9 import allogate
as logging
11 from abc
import abstractmethod
12 from termcolor
import colored
13 from mm
import MarkovModel
17 """! @brief Base CLI class to handle user interactions
18 @belongsto Python::Markopy
22 @brief initialize base CLI
23 @param add_help decide to overload the help function or not
25 self.
parserparser = argparse.ArgumentParser(description=
"Python wrapper for MarkovPasswords.",
26 epilog=f
"""{colored("Sample runs:", "yellow")}
27 {__file__.split("/")[-1]} train untrained.mdl -d dataset.dat -s "\\t" -o trained.mdl
28 Import untrained.mdl, train it with dataset.dat which has tab delimited data, output resulting model to trained.mdl\n
30 {__file__.split("/")[-1]} generate trained.mdl -n 500 -w output.txt
31 Import trained.mdl, and generate 500 lines to output.txt
33 {__file__.split("/")[-1]} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt
34 Train and immediately generate 500 lines to output.txt. Do not export trained model.
36 {__file__.split("/")[-1]} combine untrained.mdl -d dataset.dat -s "\\t" -n 500 -w output.txt -o trained.mdl
37 Train and immediately generate 500 lines to output.txt. Export trained model.
38 """, add_help=add_help, formatter_class=argparse.RawTextHelpFormatter)
44 "! @brief Add command line arguements to the parser"
45 self.
parserparser.add_argument(
"mode", help=
"Process mode. Either 'Train', 'Generate', or 'Combine'.")
46 self.
parserparser.add_argument(
"-t",
"--threads",default=10, help=
"Number of lines to generate. Ignored in training mode.")
47 self.
parserparser.add_argument(
"-v",
"--verbosity",action=
"count", help=
"Output verbosity.")
48 self.
parserparser.add_argument(
"-b",
"--bulk",action=
"store_true", help=
"Bulk generate or bulk train every corpus/model in the folder.")
52 "! @brief Handle help strings. Defaults to argparse's help"
56 "! @brief add, parse and hook arguements"
63 "! @brief set up stuff that is collected from command line arguements"
66 if self.
argsargs.verbosity:
67 logging.VERBOSITY = self.
argsargs.verbosity
68 logging.pprint(f
"Verbosity set to {self.args.verbosity}.", 2)
74 "! @brief trigger parser"
79 @brief Import a model file
80 @param filename filename to import
82 logging.pprint(
"Importing model file.", 1)
85 logging.pprint(f
"Model file at {filename} not found. Check the file path, or working directory")
88 self.
modelmodel.Import(filename)
89 logging.pprint(
"Model imported successfully.", 2)
94 def train(self, dataset : str, seperator : str, output : str, output_forced : bool=
False, bulk : bool=
False):
96 @brief Train a model via CLI parameters
97 @param model Model instance
98 @param dataset filename for the dataset
99 @param seperator seperator used with the dataset
100 @param output output filename
101 @param output_forced force overwrite
102 @param bulk marks bulk operation with directories
104 logging.pprint(
"Training.")
106 if not (dataset
and seperator
and (output
or not output_forced)):
107 logging.pprint(f
"Training mode requires -d/--dataset{', -o/--output' if output_forced else''} and -s/--seperator parameters. Exiting.")
111 logging.pprint(f
"{dataset} doesn't exists. Check the file path, or working directory")
115 logging.pprint(f
"Cannot create output at {output}")
118 if(seperator ==
'\\t'):
119 logging.pprint(
"Escaping seperator.", 3)
122 if(len(seperator)!=1):
123 logging.pprint(f
'Delimiter must be a single character, and "{seperator}" is not accepted.')
126 logging.pprint(f
'Starting training.', 3)
127 self.
modelmodel.Train(dataset,seperator, int(self.
argsargs.threads))
128 logging.pprint(f
'Training completed.', 2)
131 logging.pprint(f
'Exporting model to {output}', 2)
134 logging.pprint(f
'Model will not be exported.', 1)
140 @brief Export model to a file
141 @param filename filename to export to
143 self.
modelmodel.Export(filename)
145 def generate(self, wordlist : str, bulk : bool=
False):
147 @brief Generate strings from the model
148 @param model: model instance
149 @param wordlist wordlist filename
150 @param bulk marks bulk operation with directories
152 if not (wordlist
or self.
argsargs.count):
153 logging.pprint(
"Generation mode requires -w/--wordlist and -n/--count parameters. Exiting.")
156 if(bulk
and os.path.isfile(wordlist)):
157 logging.pprint(f
"{wordlist} exists and will be overwritten.", 1)
163 @brief wrapper for generate function. This can be overloaded by other models
164 @param wordlist filename to generate to
166 self.
modelmodel.Generate(int(self.
argsargs.count), wordlist, int(self.
argsargs.min), int(self.
argsargs.max), int(self.
argsargs.threads))
171 @brief check import path for validity
172 @param filename filename to check
175 if(
not os.path.isfile(filename)):
183 @brief check import path for validity
184 @param filename filename to check
187 if(
not os.path.isfile(filename)):
194 @brief check import path for validity
195 @param filename filename to check
198 if(filename
and os.path.isfile(filename)):
204 @brief Process parameters for operation
206 if(self.
argsargs.bulk):
207 logging.pprint(f
"Bulk mode operation chosen.", 4)
208 if (self.
argsargs.mode.lower() ==
"train"):
209 if (os.path.isdir(self.
argsargs.output)
and not os.path.isfile(self.
argsargs.output))
and (os.path.isdir(self.
argsargs.dataset)
and not os.path.isfile(self.
argsargs.dataset)):
210 corpus_list = os.listdir(self.
argsargs.dataset)
211 for corpus
in corpus_list:
213 logging.pprint(f
"Training {self.args.input} with {corpus}", 2)
214 output_file_name = corpus
216 if "." in self.
argsargs.input:
217 model_extension = self.
argsargs.input.split(
".")[-1]
218 self.
traintrain(f
"{self.args.dataset}/{corpus}", self.
argsargs.seperator, f
"{self.args.output}/{corpus}.{model_extension}", output_forced=
True, bulk=
True)
220 logging.pprint(
"In bulk training, output and dataset should be a directory.")
223 elif (self.
argsargs.mode.lower() ==
"generate"):
224 if (os.path.isdir(self.
argsargs.wordlist)
and not os.path.isfile(self.
argsargs.wordlist))
and (os.path.isdir(self.
argsargs.input)
and not os.path.isfile(self.
argsargs.input)):
225 model_list = os.listdir(self.
argsargs.input)
227 for input
in model_list:
228 logging.pprint(f
"Generating from {self.args.input}/{input} to {self.args.wordlist}/{input}.txt", 2)
229 self.
import_modelimport_model(f
"{self.args.input}/{input}")
231 if "." in self.
argsargs.input:
232 model_base = input.split(
".")[1]
233 self.
generategenerate(f
"{self.args.wordlist}/{model_base}.txt", bulk=
True)
235 logging.pprint(
"In bulk generation, input and wordlist should be directory.")
239 if (self.
argsargs.mode.lower() ==
"generate"):
243 elif (self.
argsargs.mode.lower() ==
"train"):
244 self.
traintrain(self.
argsargs.dataset, self.
argsargs.seperator, self.
argsargs.output, output_forced=
True)
247 elif(self.
argsargs.mode.lower() ==
"combine"):
248 self.
traintrain(self.
argsargs.dataset, self.
argsargs.seperator, self.
argsargs.output)
253 logging.pprint(
"Invalid mode arguement given.")
254 logging.pprint(
"Accepted modes: 'Generate', 'Train', 'Combine'")
259 @brief abstract class for generation capable models
260 @belongsto Python::Markopy
261 @extends Python::Markopy::BaseCLI
265 "Add command line arguements to the parser"
267 self.
parserparser.add_argument(
"input", help=
"Input model file. This model will be imported before starting operation.")
268 self.
parserparser.add_argument(
"-w",
"--wordlist", help=
"Wordlist file path to export generation results to. Will be ignored for training mode")
269 self.
parserparser.add_argument(
"--min", default=6, help=
"Minimum length that is allowed during generation")
270 self.
parserparser.add_argument(
"--max", default=12, help=
"Maximum length that is allowed during generation")
271 self.
parserparser.add_argument(
"-n",
"--count", help=
"Number of lines to generate. Ignored in training mode.")
276 @brief abstract class for training capable models
277 @belongsto Python::Markopy
278 @extends Python::Markopy::BaseCLI
279 @extends Python::Markopy::AbstractGenerationModelCLI
283 "Add command line arguements to the parser"
284 self.
parserparser.add_argument(
"-o",
"--output", help=
"Output model file. This model will be exported when done. Will be ignored for generation mode.")
285 self.
parserparser.add_argument(
"-d",
"--dataset", help=
"Dataset file to read input from for training. Will be ignored for generation mode.")
286 self.
parserparser.add_argument(
"-s",
"--seperator", help=
"Seperator character to use with training data.(character between occurrence and value)")
abstract class for generation capable models
abstract class for training capable models
Base CLI class to handle user interactions
def check_import_path(str filename)
check import path for validity
def check_corpus_path(str filename)
check import path for validity
def init_post_arguments(self)
def train(self, str dataset, str seperator, str output, bool output_forced=False, bool bulk=False)
Train a model via CLI parameters.
def import_model(self, str filename)
Import a model file.
def __init__(self, bool add_help=True)
initialize base CLI
def parse_arguments(self)
def process(self)
Process parameters for operation.
def check_export_path(str filename)
check import path for validity
def _generate(self, str wordlist)
wrapper for generate function.
def generate(self, str wordlist, bool bulk=False)
Generate strings from the model.
def export(self, str filename)
Export model to a file.
Abstract representation of a markov model.