Markopy
Utilizing Markov Models for brute forcing attacks
base.py
Go to the documentation of this file.
1 #!/usr/bin/python3
2 
3 
7 
8 import argparse
9 import allogate as logging
10 import os
11 from abc import abstractmethod
12 from termcolor import colored
13 from mm import MarkovModel
14 
15 
16 class BaseCLI():
17  """! @brief Base CLI class to handle user interactions
18  @belongsto Python::Markopy
19  """
20  def __init__(self, add_help : bool=True):
21  """!
22  @brief initialize base CLI
23  @param add_help decide to overload the help function or not
24  """
25  self.parserparser = argparse.ArgumentParser(description="Python wrapper for MarkovPasswords.",
26  epilog=f"""{colored("Sample runs:", "yellow")}
27  {__file__.split("/")[-1]} train untrained.mdl -d dataset.dat -s "\\t" -o trained.mdl
28  Import untrained.mdl, train it with dataset.dat which has tab delimited data, output resulting model to trained.mdl\n
29 
30  {__file__.split("/")[-1]} generate trained.mdl -n 500 -w output.txt
31  Import trained.mdl, and generate 500 lines to output.txt
32 
33  {__file__.split("/")[-1]} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt
34  Train and immediately generate 500 lines to output.txt. Do not export trained model.
35 
36  {__file__.split("/")[-1]} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt -o trained.mdl
37  Train and immediately generate 500 lines to output.txt. Export trained model.
38  """, add_help=add_help, formatter_class=argparse.RawTextHelpFormatter)
39  self.print_helpprint_help = self.parserparser.print_help
40  self.modelmodel = MarkovModel()
41 
42  @abstractmethod
43  def add_arguments(self):
44  "! @brief Add command line arguements to the parser"
45  self.parserparser.add_argument("mode", help="Process mode. Either 'Train', 'Generate', or 'Combine'.")
46  self.parserparser.add_argument("-t", "--threads",default=10, help="Number of lines to generate. Ignored in training mode.")
47  self.parserparser.add_argument("-v", "--verbosity",action="count", help="Output verbosity.")
48  self.parserparser.add_argument("-b", "--bulk",action="store_true", help="Bulk generate or bulk train every corpus/model in the folder.")
49 
50  @abstractmethod
51  def help(self):
52  "! @brief Handle help strings. Defaults to argparse's help"
53  self.print_helpprint_help()
54 
55  def parse(self):
56  "! @brief add, parse and hook arguements"
57  self.add_argumentsadd_arguments()
58  self.parse_argumentsparse_arguments()
59  self.init_post_argumentsinit_post_arguments()
60 
61  @abstractmethod
63  "! @brief set up stuff that is collected from command line arguements"
64  logging.VERBOSITY = 0
65  try:
66  if self.argsargs.verbosity:
67  logging.VERBOSITY = self.argsargs.verbosity
68  logging.pprint(f"Verbosity set to {self.args.verbosity}.", 2)
69  except:
70  pass
71 
72  @abstractmethod
73  def parse_arguments(self):
74  "! @brief trigger parser"
75  self.argsargs = self.parserparser.parse_known_args()[0]
76 
77  def import_model(self, filename : str):
78  """!
79  @brief Import a model file
80  @param filename filename to import
81  """
82  logging.pprint("Importing model file.", 1)
83 
84  if not self.check_import_pathcheck_import_path(filename):
85  logging.pprint(f"Model file at {filename} not found. Check the file path, or working directory")
86  return False
87 
88  self.modelmodel.Import(filename)
89  logging.pprint("Model imported successfully.", 2)
90  return True
91 
92 
93 
94  def train(self, dataset : str, seperator : str, output : str, output_forced : bool=False, bulk : bool=False):
95  """!
96  @brief Train a model via CLI parameters
97  @param model Model instance
98  @param dataset filename for the dataset
99  @param seperator seperator used with the dataset
100  @param output output filename
101  @param output_forced force overwrite
102  @param bulk marks bulk operation with directories
103  """
104  logging.pprint("Training.")
105 
106  if not (dataset and seperator and (output or not output_forced)):
107  logging.pprint(f"Training mode requires -d/--dataset{', -o/--output' if output_forced else''} and -s/--seperator parameters. Exiting.")
108  return False
109 
110  if not bulk and not self.check_corpus_pathcheck_corpus_path(dataset):
111  logging.pprint(f"{dataset} doesn't exists. Check the file path, or working directory")
112  return False
113 
114  if not self.check_export_pathcheck_export_path(output):
115  logging.pprint(f"Cannot create output at {output}")
116  return False
117 
118  if(seperator == '\\t'):
119  logging.pprint("Escaping seperator.", 3)
120  seperator = '\t'
121 
122  if(len(seperator)!=1):
123  logging.pprint(f'Delimiter must be a single character, and "{seperator}" is not accepted.')
124  exit(4)
125 
126  logging.pprint(f'Starting training.', 3)
127  self.modelmodel.Train(dataset,seperator, int(self.argsargs.threads))
128  logging.pprint(f'Training completed.', 2)
129 
130  if(output):
131  logging.pprint(f'Exporting model to {output}', 2)
132  self.exportexport(output)
133  else:
134  logging.pprint(f'Model will not be exported.', 1)
135 
136  return True
137 
138  def export(self, filename : str):
139  """!
140  @brief Export model to a file
141  @param filename filename to export to
142  """
143  self.modelmodel.Export(filename)
144 
145  def generate(self, wordlist : str, bulk : bool=False):
146  """!
147  @brief Generate strings from the model
148  @param model: model instance
149  @param wordlist wordlist filename
150  @param bulk marks bulk operation with directories
151  """
152  if not (wordlist or self.argsargs.count):
153  logging.pprint("Generation mode requires -w/--wordlist and -n/--count parameters. Exiting.")
154  return False
155 
156  if(bulk and os.path.isfile(wordlist)):
157  logging.pprint(f"{wordlist} exists and will be overwritten.", 1)
158  self._generate_generate(wordlist)
159 
160  @abstractmethod
161  def _generate(self, wordlist : str):
162  """!
163  @brief wrapper for generate function. This can be overloaded by other models
164  @param wordlist filename to generate to
165  """
166  self.modelmodel.Generate(int(self.argsargs.count), wordlist, int(self.argsargs.min), int(self.argsargs.max), int(self.argsargs.threads))
167 
168  @staticmethod
169  def check_import_path(filename : str):
170  """!
171  @brief check import path for validity
172  @param filename filename to check
173  """
174 
175  if(not os.path.isfile(filename)):
176  return False
177  else:
178  return True
179 
180  @staticmethod
181  def check_corpus_path(filename : str):
182  """!
183  @brief check import path for validity
184  @param filename filename to check
185  """
186 
187  if(not os.path.isfile(filename)):
188  return False
189  return True
190 
191  @staticmethod
192  def check_export_path(filename : str):
193  """!
194  @brief check import path for validity
195  @param filename filename to check
196  """
197 
198  if(filename and os.path.isfile(filename)):
199  return True
200  return True
201 
202  def process(self):
203  """!
204  @brief Process parameters for operation
205  """
206  if(self.argsargs.bulk):
207  logging.pprint(f"Bulk mode operation chosen.", 4)
208  if (self.argsargs.mode.lower() == "train"):
209  if (os.path.isdir(self.argsargs.output) and not os.path.isfile(self.argsargs.output)) and (os.path.isdir(self.argsargs.dataset) and not os.path.isfile(self.argsargs.dataset)):
210  corpus_list = os.listdir(self.argsargs.dataset)
211  for corpus in corpus_list:
212  self.import_modelimport_model(self.argsargs.input)
213  logging.pprint(f"Training {self.args.input} with {corpus}", 2)
214  output_file_name = corpus
215  model_extension = ""
216  if "." in self.argsargs.input:
217  model_extension = self.argsargs.input.split(".")[-1]
218  self.traintrain(f"{self.args.dataset}/{corpus}", self.argsargs.seperator, f"{self.args.output}/{corpus}.{model_extension}", output_forced=True, bulk=True)
219  else:
220  logging.pprint("In bulk training, output and dataset should be a directory.")
221  exit(1)
222 
223  elif (self.argsargs.mode.lower() == "generate"):
224  if (os.path.isdir(self.argsargs.wordlist) and not os.path.isfile(self.argsargs.wordlist)) and (os.path.isdir(self.argsargs.input) and not os.path.isfile(self.argsargs.input)):
225  model_list = os.listdir(self.argsargs.input)
226  print(model_list)
227  for input in model_list:
228  logging.pprint(f"Generating from {self.args.input}/{input} to {self.args.wordlist}/{input}.txt", 2)
229  self.import_modelimport_model(f"{self.args.input}/{input}")
230  model_base = input
231  if "." in self.argsargs.input:
232  model_base = input.split(".")[1]
233  self.generategenerate(f"{self.args.wordlist}/{model_base}.txt", bulk=True)
234  else:
235  logging.pprint("In bulk generation, input and wordlist should be directory.")
236 
237  else:
238  self.import_modelimport_model(self.argsargs.input)
239  if (self.argsargs.mode.lower() == "generate"):
240  self.generategenerate(self.argsargs.wordlist)
241 
242 
243  elif (self.argsargs.mode.lower() == "train"):
244  self.traintrain(self.argsargs.dataset, self.argsargs.seperator, self.argsargs.output, output_forced=True)
245 
246 
247  elif(self.argsargs.mode.lower() == "combine"):
248  self.traintrain(self.argsargs.dataset, self.argsargs.seperator, self.argsargs.output)
249  self.generategenerate(self.argsargs.wordlist)
250 
251 
252  else:
253  logging.pprint("Invalid mode arguement given.")
254  logging.pprint("Accepted modes: 'Generate', 'Train', 'Combine'")
255  exit(5)
256 
258  """!
259  @brief abstract class for generation capable models
260  @belongsto Python::Markopy
261  @extends Python::Markopy::BaseCLI
262  """
263  @abstractmethod
264  def add_arguments(self):
265  "Add command line arguements to the parser"
266  super().add_arguments()
267  self.parserparser.add_argument("input", help="Input model file. This model will be imported before starting operation.")
268  self.parserparser.add_argument("-w", "--wordlist", help="Wordlist file path to export generation results to. Will be ignored for training mode")
269  self.parserparser.add_argument("--min", default=6, help="Minimum length that is allowed during generation")
270  self.parserparser.add_argument("--max", default=12, help="Maximum length that is allowed during generation")
271  self.parserparser.add_argument("-n", "--count", help="Number of lines to generate. Ignored in training mode.")
272 
273 
275  """!
276  @brief abstract class for training capable models
277  @belongsto Python::Markopy
278  @extends Python::Markopy::BaseCLI
279  @extends Python::Markopy::AbstractGenerationModelCLI
280  """
281  @abstractmethod
282  def add_arguments(self):
283  "Add command line arguements to the parser"
284  self.parserparser.add_argument("-o", "--output", help="Output model file. This model will be exported when done. Will be ignored for generation mode.")
285  self.parserparser.add_argument("-d", "--dataset", help="Dataset file to read input from for training. Will be ignored for generation mode.")
286  self.parserparser.add_argument("-s", "--seperator", help="Seperator character to use with training data.(character between occurrence and value)")
287  super().add_arguments()
abstract class for generation capable models
Definition: base.py:257
abstract class for training capable models
Definition: base.py:274
Base CLI class to handle user interactions
Definition: base.py:16
def check_import_path(str filename)
check import path for validity
Definition: base.py:169
def parse(self)
Definition: base.py:55
def check_corpus_path(str filename)
check import path for validity
Definition: base.py:181
def init_post_arguments(self)
Definition: base.py:62
def train(self, str dataset, str seperator, str output, bool output_forced=False, bool bulk=False)
Train a model via CLI parameters.
Definition: base.py:94
def import_model(self, str filename)
Import a model file.
Definition: base.py:77
def __init__(self, bool add_help=True)
initialize base CLI
Definition: base.py:20
def add_arguments(self)
Definition: base.py:43
def parse_arguments(self)
Definition: base.py:73
def process(self)
Process parameters for operation.
Definition: base.py:202
def help(self)
Definition: base.py:51
def check_export_path(str filename)
check import path for validity
Definition: base.py:192
def _generate(self, str wordlist)
wrapper for generate function.
Definition: base.py:161
def generate(self, str wordlist, bool bulk=False)
Generate strings from the model.
Definition: base.py:145
def export(self, str filename)
Export model to a file.
Definition: base.py:138
Abstract representation of a markov model.
Definition: mm.py:13