Markopy
Utilizing Markov Models for brute forcing attacks
modelMatrix.h
Go to the documentation of this file.
1 /** @file modelMatrix.h
2  * @brief An extension of Markov::API::MarkovPasswords
3  * @authors Ata Hakçıl
4  *
5  * This class shows superior performance compared to the traditional model at Markov::API::MarkovPasswords
6  *
7  * @copydoc Markov::API::ModelMatrix
8  *
9  */
10 
11 #include "markovPasswords.h"
12 #include <mutex>
13 
14 namespace Markov::API{
15 
16  /** @brief Class to flatten and reduce Markov::Model to a Matrix
17  *
18  * Matrix level operations can be used for Generation events, with a significant performance optimization at the cost of O(N) memory complexity (O(1) memory space for slow mode)
19  *
20  * To limit the maximum memory usage, each generation operation is partitioned into 50M chunks for allocation. Threads are sychronized and files are flushed every 50M operations.
21  *
22  */
24  public:
25  ModelMatrix();
26 
27  /** @brief Construct the related Matrix data for the model.
28  *
29  * This operation can be used after importing/training to allocate and populate the matrix content.
30  *
31  * this will initialize:
32  * char** edgeMatrix -> a 2D array of mapping left and right connections of each edge.
33  * long int **valueMatrix -> a 2D array representing the edge weights.
34  * int matrixSize -> Size of the matrix, aka total number of nodes.
35  * char* matrixIndex -> order of nodes in the model
36  * long int *totalEdgeWeights -> total edge weights of each Node.
37  *
38  * @returns True if constructed. False if already construced.
39  */
40  bool ConstructMatrix();
41 
42 
43  /** @brief Debug function to dump the model to a JSON file.
44  *
45  * Might not work 100%. Not meant for production use.
46  */
47  void DumpJSON();
48 
49 
50  /** @brief Random walk on the Matrix-reduced Markov::Model
51  *
52  * This has an O(N) Memory complexity. To limit the maximum usage, requests with n>50M are partitioned using Markov::API::ModelMatrix::FastRandomWalkPartition.
53  *
54  * If n>50M, threads are going to be synced, files are going to be flushed, and buffers will be reallocated every 50M generations.
55  * This comes at a minor performance penalty.
56  *
57  * While it has the same functionality, this operation reduces Markov::API::MarkovPasswords::Generate runtime by %96.5
58  *
59  * This function has deprecated Markov::API::MarkovPasswords::Generate, and will eventually replace it.
60  *
61  * @param n - Number of passwords to generate.
62  * @param wordlistFileName - Filename to write to
63  * @param minLen - Minimum password length to generate
64  * @param maxLen - Maximum password length to generate
65  * @param threads - number of OS threads to spawn
66  * @param bFileIO - If false, filename will be ignored and will output to stdout.
67  *
68  *
69  * @code{.cpp}
70  * Markov::API::ModelMatrix mp;
71  * mp.Import("models/finished.mdl");
72  * mp.FastRandomWalk(50000000,"./wordlist.txt",6,12,25, true);
73  * @endcode
74  *
75  */
76  int FastRandomWalk(unsigned long int n, const char* wordlistFileName, int minLen=6, int maxLen=12, int threads=20, bool bFileIO=true);
77 
78  /** @copydoc Markov::Model::Import(const char *filename)
79  * Construct the matrix when done.
80  *
81  */
82  void Import(const char *filename);
83 
84  /** @copydoc Markov::API::MarkovPasswords::Train(const char *datasetFileName, char delimiter, int threads)
85  * Construct the matrix when done.
86  *
87  */
88  void Train(const char *datasetFileName, char delimiter, int threads);
89 
90  protected:
91 
92  /** @brief Random walk on the Matrix-reduced Markov::Model
93  *
94  * This has an O(N) Memory complexity. To limit the maximum usage, requests with n>50M are partitioned using Markov::API::ModelMatrix::FastRandomWalkPartition.
95  *
96  * If n>50M, threads are going to be synced, files are going to be flushed, and buffers will be reallocated every 50M generations.
97  * This comes at a minor performance penalty.
98  *
99  * While it has the same functionality, this operation reduces Markov::API::MarkovPasswords::Generate runtime by %96.5
100  *
101  * This function has deprecated Markov::API::MarkovPasswords::Generate, and will eventually replace it.
102  *
103  * @param n - Number of passwords to generate.
104  * @param wordlistFileName - Filename to write to
105  * @param minLen - Minimum password length to generate
106  * @param maxLen - Maximum password length to generate
107  * @param threads - number of OS threads to spawn
108  * @param bFileIO - If false, filename will be ignored and will output to stdout.
109  *
110  *
111  * @code{.cpp}
112  * Markov::API::ModelMatrix mp;
113  * mp.Import("models/finished.mdl");
114  * mp.FastRandomWalk(50000000,"./wordlist.txt",6,12,25, true);
115  * @endcode
116  *
117  */
118  int FastRandomWalk(unsigned long int n, std::ofstream *wordlist, int minLen=6, int maxLen=12, int threads=20, bool bFileIO=true);
119 
120 
121  /** @brief A single partition of FastRandomWalk event
122  *
123  * Since FastRandomWalk has to allocate its output buffer before operation starts and writes data in chunks,
124  * large n parameters would lead to huge memory allocations.
125  * @b Without @b Partitioning:
126  * - 50M results 12 characters max -> 550 Mb Memory allocation
127  *
128  * - 5B results 12 characters max -> 55 Gb Memory allocation
129  *
130  * - 50B results 12 characters max -> 550GB Memory allocation
131  *
132  * Instead, FastRandomWalk is partitioned per 50M generations to limit the top memory need.
133  *
134  * @param mlock - mutex lock to distribute to child threads
135  * @param wordlist - Reference to the wordlist file to write to
136  * @param n - Number of passwords to generate.
137  * @param wordlistFileName - Filename to write to
138  * @param minLen - Minimum password length to generate
139  * @param maxLen - Maximum password length to generate
140  * @param threads - number of OS threads to spawn
141  * @param bFileIO - If false, filename will be ignored and will output to stdout.
142  *
143  *
144  */
145  void FastRandomWalkPartition(std::mutex *mlock, std::ofstream *wordlist, unsigned long int n, int minLen, int maxLen, bool bFileIO, int threads);
146 
147  /** @brief A single thread of a single partition of FastRandomWalk
148  *
149  * A FastRandomWalkPartition will initiate as many of this function as requested.
150  *
151  * This function contains the bulk of the generation algorithm.
152  *
153  * @param mlock - mutex lock to distribute to child threads
154  * @param wordlist - Reference to the wordlist file to write to
155  * @param n - Number of passwords to generate.
156  * @param wordlistFileName - Filename to write to
157  * @param minLen - Minimum password length to generate
158  * @param maxLen - Maximum password length to generate
159  * @param id - @b DEPRECATED Thread id - No longer used
160  * @param bFileIO - If false, filename will be ignored and will output to stdout.
161  *
162  *
163  */
164  void FastRandomWalkThread(std::mutex *mlock, std::ofstream *wordlist, unsigned long int n, int minLen, int maxLen, int id, bool bFileIO);
165 
166  /** @brief Deallocate matrix and make it ready for re-construction
167  *
168  * @returns True if deallocated. False if matrix was not initialized
169  */
170  bool DeallocateMatrix();
171 
172  /**
173  @brief 2-D Character array for the edge Matrix (The characters of Nodes)
174  */
175  char** edgeMatrix;
176 
177  /**
178  @brief 2-d Integer array for the value Matrix (For the weights of Edges)
179  */
180  long int **valueMatrix;
181 
182  /**
183  @brief to hold Matrix size
184  */
186 
187  /**
188  @brief to hold the Matrix index (To hold the orders of 2-D arrays')
189  */
190  char* matrixIndex;
191 
192  /**
193  @brief Array of the Total Edge Weights
194  */
195  long int *totalEdgeWeights;
196 
197  /**
198  @brief True when matrix is constructed. False if not.
199  */
200  bool ready;
201  };
202 
203 
204 
205 };