Markopy
Utilizing Markov Models for brute forcing attacks
model.h
Go to the documentation of this file.
1 /** @file model.h
2  * @brief Model class template
3  * @authors Ata Hakçıl, Osman Ömer Yıldıztugay
4  *
5  * @copydoc Markov::Model
6  */
7 
8 
9 
10 #pragma once
11 #include <map>
12 #include <vector>
13 #include <fstream>
14 #include <assert.h>
15 #include <string>
16 #include <algorithm>
17 #include "node.h"
18 #include "edge.h"
19 
20 /**
21  @brief Namespace for the markov-model related classes.
22  Contains Model, Node and Edge classes
23 */
24 namespace Markov {
25 
26  template <typename NodeStorageType>
27  class Node;
28 
29  template <typename NodeStorageType>
30  class Edge;
31 
32  template <typename NodeStorageType>
33 
34  /** @brief class for the final Markov Model, constructed from nodes and edges.
35  *
36  * Each atomic piece of the generation result is stored in a node, while edges contain the relation weights.
37  * *Extending:*
38  * To extend the class, implement the template and inherit from it, as "class MyModel : public Markov::Model<char>".
39  * For a complete demonstration of how to extend the class, see MarkovPasswords.
40  *
41  * Whole model can be defined as a list of the edges, as dangling nodes are pointless. This approach is used for the import/export operations.
42  * For more information on importing/exporting model, check out the github readme and wiki page.
43  *
44  */
45  class Model {
46  public:
47 
48  /** @brief Initialize a model with only start and end nodes.
49  *
50  * Initialize an empty model with only a starterNode
51  * Starter node is a special kind of node that has constant 0x00 value, and will be used to initiate the generation execution from.
52  */
53  Model<NodeStorageType>();
54 
55  /** @brief Do a random walk on this model.
56  *
57  * Start from the starter node, on each node, invoke RandomNext using the random engine on current node, until terminator node is reached.
58  * If terminator node is reached before minimum length criateria is reached, ignore the last selection and re-invoke randomNext
59  *
60  * If maximum length criteria is reached but final node is not, cut off the generation and proceed to the final node.
61  * This function takes Markov::Random::RandomEngine as a parameter to generate pseudo random numbers from
62  *
63  * This library is shipped with two random engines, Marsaglia and Mersenne. While mersenne output is higher in entropy, most use cases
64  * don't really need super high entropy output, so Markov::Random::Marsaglia is preferable for better performance.
65  *
66  * This function WILL NOT reallocate buffer. Make sure no out of bound writes are happening via maximum length criteria.
67  *
68  * @b Example @b Use: Generate 10 lines, with 5 to 10 characters, and print the output. Use Marsaglia
69  * @code{.cpp}
70  * Markov::Model<char> model;
71  * Model.import("model.mdl");
72  * char* res = new char[11];
73  * Markov::Random::Marsaglia MarsagliaRandomEngine;
74  * for (int i = 0; i < 10; i++) {
75  * this->RandomWalk(&MarsagliaRandomEngine, 5, 10, res);
76  * std::cout << res << "\n";
77  * }
78  * @endcode
79  *
80  * @param randomEngine Random Engine to use for the random walks. For examples, see Markov::Random::Mersenne and Markov::Random::Marsaglia
81  * @param minSetting Minimum number of characters to generate
82  * @param maxSetting Maximum number of character to generate
83  * @param buffer buffer to write the result to
84  * @return Null terminated string that was generated.
85  */
86  NodeStorageType* RandomWalk(Markov::Random::RandomEngine* randomEngine, int minSetting, int maxSetting, NodeStorageType* buffer);
87 
88  /** @brief Adjust the model with a single string.
89  *
90  * Start from the starter node, and for each character, AdjustEdge the edge EdgeWeight from current node to the next, until NULL character is reached.
91  *
92  * Then, update the edge EdgeWeight from current node, to the terminator node.
93  *
94  * This function is used for training purposes, as it can be used for adjusting the model with each line of the corpus file.
95  *
96  * @b Example @b Use: Create an empty model and train it with string: "testdata"
97  * @code{.cpp}
98  * Markov::Model<char> model;
99  * char test[] = "testdata";
100  * model.AdjustEdge(test, 15);
101  * @endcode
102  *
103  *
104  * @param string - String that is passed from the training, and will be used to AdjustEdge the model with
105  * @param occurrence - Occurrence of this string.
106  *
107  *
108  */
109  void AdjustEdge(const NodeStorageType* payload, long int occurrence);
110 
111  /** @brief Import a file to construct the model.
112  *
113  * File contains a list of edges. For more info on the file format, check out the wiki and github readme pages.
114  * Format is: Left_repr;EdgeWeight;right_repr
115  *
116  * Iterate over this list, and construct nodes and edges accordingly.
117  * @return True if successful, False for incomplete models or corrupt file formats
118  *
119  * @b Example @b Use: Import a file from ifstream
120  * @code{.cpp}
121  * Markov::Model<char> model;
122  * std::ifstream file("test.mdl");
123  * model.Import(&file);
124  * @endcode
125  */
126  bool Import(std::ifstream*);
127 
128  /** @brief Open a file to import with filename, and call bool Model::Import with std::ifstream
129  * @return True if successful, False for incomplete models or corrupt file formats
130  *
131  * @b Example @b Use: Import a file with filename
132  * @code{.cpp}
133  * Markov::Model<char> model;
134  * model.Import("test.mdl");
135  * @endcode
136  */
137  bool Import(const char* filename);
138 
139  /** @brief Export a file of the model.
140  *
141  * File contains a list of edges.
142  * Format is: Left_repr;EdgeWeight;right_repr.
143  * For more information on the format, check out the project wiki or github readme.
144  *
145  * Iterate over this vertices, and their edges, and write them to file.
146  * @return True if successful, False for incomplete models.
147  *
148  * @b Example @b Use: Export file to ofstream
149  * @code{.cpp}
150  * Markov::Model<char> model;
151  * std::ofstream file("test.mdl");
152  * model.Export(&file);
153  * @endcode
154  */
155  bool Export(std::ofstream*);
156 
157  /** @brief Open a file to export with filename, and call bool Model::Export with std::ofstream
158  * @return True if successful, False for incomplete models or corrupt file formats
159  *
160  * @b Example @b Use: Export file to filename
161  * @code{.cpp}
162  * Markov::Model<char> model;
163  * model.Export("test.mdl");
164  * @endcode
165  */
166  bool Export(const char* filename);
167 
168  /** @brief Return starter Node
169  * @return starter node with 00 NodeValue
170  */
171  Node<NodeStorageType>* StarterNode(){ return starterNode;}
172 
173  /** @brief Return a vector of all the edges in the model
174  * @return vector of edges
175  */
176  std::vector<Edge<NodeStorageType>*>* Edges(){ return &edges;}
177 
178  /** @brief Return starter Node
179  * @return starter node with 00 NodeValue
180  */
181  std::map<NodeStorageType, Node<NodeStorageType>*>* Nodes(){ return &nodes;}
182 
183  /** @brief Sort edges of all nodes in the model ordered by edge weights
184  *
185  */
187 
188  private:
189  /**
190  @brief Map LeftNode is the Nodes NodeValue
191  * Map RightNode is the node pointer
192  */
193  std::map<NodeStorageType, Node<NodeStorageType>*> nodes;
194 
195  /**
196  @brief Starter Node of this model.
197  */
198  Node<NodeStorageType>* starterNode;
199 
200 
201  /**
202  @brief A list of all edges in this model.
203  */
204  std::vector<Edge<NodeStorageType>*> edges;
205  };
206 
207 };
208 
209 template <typename NodeStorageType>
210 Markov::Model<NodeStorageType>::Model() {
211  this->starterNode = new Markov::Node<NodeStorageType>(0);
212  this->nodes.insert({ 0, this->starterNode });
213 }
214 
215 template <typename NodeStorageType>
216 bool Markov::Model<NodeStorageType>::Import(std::ifstream* f) {
217  std::string cell;
218 
219  char src;
220  char target;
221  long int oc;
222 
223  while (std::getline(*f, cell)) {
224  //std::cout << "cell: " << cell << std::endl;
225  src = cell[0];
226  target = cell[cell.length() - 1];
227  char* j;
228  oc = std::strtol(cell.substr(2, cell.length() - 2).c_str(),&j,10);
229  //std::cout << oc << "\n";
230  Markov::Node<NodeStorageType>* srcN;
231  Markov::Node<NodeStorageType>* targetN;
232  Markov::Edge<NodeStorageType>* e;
233  if (this->nodes.find(src) == this->nodes.end()) {
234  srcN = new Markov::Node<NodeStorageType>(src);
235  this->nodes.insert(std::pair<char, Markov::Node<NodeStorageType>*>(src, srcN));
236  //std::cout << "Creating new node at start.\n";
237  }
238  else {
239  srcN = this->nodes.find(src)->second;
240  }
241 
242  if (this->nodes.find(target) == this->nodes.end()) {
243  targetN = new Markov::Node<NodeStorageType>(target);
244  this->nodes.insert(std::pair<char, Markov::Node<NodeStorageType>*>(target, targetN));
245  //std::cout << "Creating new node at end.\n";
246  }
247  else {
248  targetN = this->nodes.find(target)->second;
249  }
250  e = srcN->Link(targetN);
251  e->AdjustEdge(oc);
252  this->edges.push_back(e);
253 
254  //std::cout << int(srcN->NodeValue()) << " --" << e->EdgeWeight() << "--> " << int(targetN->NodeValue()) << "\n";
255 
256 
257  }
258 
259  this->OptimizeEdgeOrder();
260 
261  return true;
262 }
263 
264 template <typename NodeStorageType>
265 void Markov::Model<NodeStorageType>::OptimizeEdgeOrder(){
266  for (std::pair<unsigned char, Markov::Node<NodeStorageType>*> const& x : this->nodes) {
267  //std::cout << "Total edges in EdgesV: " << x.second->edgesV.size() << "\n";
268  std::sort (x.second->edgesV.begin(), x.second->edgesV.end(), [](Edge<NodeStorageType> *lhs, Edge<NodeStorageType> *rhs)->bool{
269  return lhs->EdgeWeight() > rhs->EdgeWeight();
270  });
271  //for(int i=0;i<x.second->edgesV.size();i++)
272  // std::cout << x.second->edgesV[i]->EdgeWeight() << ", ";
273  //std::cout << "\n";
274  }
275  //std::cout << "Total number of nodes: " << this->nodes.size() << std::endl;
276  //std::cout << "Total number of edges: " << this->edges.size() << std::endl;
277 }
278 
279 template <typename NodeStorageType>
280 bool Markov::Model<NodeStorageType>::Import(const char* filename) {
281  std::ifstream importfile;
282  importfile.open(filename);
283  return this->Import(&importfile);
284 
285 }
286 
287 template <typename NodeStorageType>
288 bool Markov::Model<NodeStorageType>::Export(std::ofstream* f) {
289  Markov::Edge<NodeStorageType>* e;
290  for (std::vector<int>::size_type i = 0; i != this->edges.size(); i++) {
291  e = this->edges[i];
292  //std::cout << e->LeftNode()->NodeValue() << "," << e->EdgeWeight() << "," << e->RightNode()->NodeValue() << "\n";
293  *f << e->LeftNode()->NodeValue() << "," << e->EdgeWeight() << "," << e->RightNode()->NodeValue() << "\n";
294  }
295 
296  return true;
297 }
298 
299 template <typename NodeStorageType>
300 bool Markov::Model<NodeStorageType>::Export(const char* filename) {
301  std::ofstream exportfile;
302  exportfile.open(filename);
303  return this->Export(&exportfile);
304 }
305 
306 template <typename NodeStorageType>
307 NodeStorageType* Markov::Model<NodeStorageType>::RandomWalk(Markov::Random::RandomEngine* randomEngine, int minSetting, int maxSetting, NodeStorageType* buffer) {
308  Markov::Node<NodeStorageType>* n = this->starterNode;
309  int len = 0;
310  Markov::Node<NodeStorageType>* temp_node;
311  while (true) {
312  temp_node = n->RandomNext(randomEngine);
313  if (len >= maxSetting) {
314  break;
315  }
316  else if ((temp_node == NULL) && (len < minSetting)) {
317  continue;
318  }
319 
320  else if (temp_node == NULL){
321  break;
322  }
323 
324  n = temp_node;
325 
326  buffer[len++] = n->NodeValue();
327  }
328 
329  //null terminate the string
330  buffer[len] = 0x00;
331 
332  //do something with the generated string
333  return buffer; //for now
334 }
335 
336 template <typename NodeStorageType>
337 void Markov::Model<NodeStorageType>::AdjustEdge(const NodeStorageType* payload, long int occurrence) {
338  NodeStorageType p = payload[0];
339  Markov::Node<NodeStorageType>* curnode = this->starterNode;
340  Markov::Edge<NodeStorageType>* e;
341  int i = 0;
342 
343  if (p == 0) return;
344  while (p != 0) {
345  e = curnode->FindEdge(p);
346  if (e == NULL) return;
347  e->AdjustEdge(occurrence);
348  curnode = e->RightNode();
349  p = payload[++i];
350  }
351 
352  e = curnode->FindEdge('\xff');
353  e->AdjustEdge(occurrence);
354  return;
355 }