8 from abc 
import abstractmethod
 
   10 import allogate 
as logging
 
   24     @brief Abstract class to evaluate and score integrity/validty 
   25     @belongsto Python::Markopy::Evaluation 
   29         @brief default constructor for evaluator 
   30         @param filename filename to evaluate. Can be a pattern 
   39             self.
filesfiles = glob.glob(filename)
 
   41             self.
filesfiles.append(filename)
 
   45         "! @brief base evaluation function" 
   46         for file 
in self.
filesfiles:
 
   49         self.
check_funcscheck_funcs = [func 
for func 
in dir(self) 
if (callable(getattr(self, func)) 
and func.startswith(
"check_"))]
 
   54         @brief internal evaluation function for a single file 
   55         @param file filename to evaluate 
   57         if(
not os.path.isfile(file)):
 
   58             logging.pprint(f
"Given file {file} is not a valid filename")
 
   61              return open(file, 
"rb").read().split(b
"\n")
 
   68         @param checkname text to display with the check 
   75         @param checkname text to display with the check 
   82         "! @brief finalize an evaluation and print checks" 
   83         print(
"\n################ Checks ################ ")
 
   84         for test 
in self.
checkschecks:
 
   85             logging.pprint(f
"{test[0]:30}:{test[1]} ")
 
   92     @brief evaluate a model 
   93     @belongsto Python::Markopy::Evaluation 
   94     @extends Python::Markopy::Evaluation::Evaluator 
   97         "! @brief default constructor" 
  104         "! @brief evaluate a model" 
  106         logging.SHOW_STACK_THRESHOLD=3
 
  108         for file 
in self.
filesfiles:
 
  109             logging.pprint(f
"Model: {file.split('/')[-1]}: ",2)
 
  123                     self.
ewsews.append(int(edge[2:-2:1]))
 
  124                     if(e[0] 
not in self.
lnodeslnodes):
 
  127                         self.
lnodeslnodes[e[0]]+=1
 
  128                     if(e[-1] 
not in self.
rnodesrnodes):
 
  129                         self.
rnodesrnodes[e[-1]]=1
 
  131                         self.
rnodesrnodes[e[-1]]+=1
 
  132                 except Exception 
as e:
 
  134                     logging.pprint(f
"Model file is corrupted.", 0)
 
  139             logging.pprint(f
"total edges: {self.edge_count}", 1)
 
  140             logging.pprint(f
"unique left nodes: {self.lnode_count}", 1)
 
  141             logging.pprint(f
"unique right nodes: {self.rnode_count}", 1)
 
  145                     self.__getattribute__(check)()
 
  146                 except Exception 
as e:
 
  148                     self.
failfail(f
"Exceptionn in {check}")
 
  152         "! @brief check if model has dangling nodes" 
  154             self.
successsuccess(
"No dangling nodes")
 
  156             logging.pprint(f
"Dangling nodes found, lnodes and rnodes do not match", 0)
 
  157             self.
failfail(
"No dangling nodes")
 
  160         "! @brief check model structure for validity" 
  162             self.
successsuccess(
"Model structure")
 
  164             logging.pprint(f
"Model did not satisfy structural integrity check (lnode_count-1) * (rnode_count-1) + 2*(lnode_count-1)", 0)
 
  165             self.
failfail(
"Model structure")
 
  168         "! @brief check model standart deviation between edge weights" 
  169         mean = sum(self.
ewsews) / len(self.
ewsews)
 
  170         variance = sum([((x - mean) ** 2) 
for x 
in self.
ewsews]) / len(self.
ewsews)
 
  171         res = variance ** 0.5
 
  174             logging.pprint(f
"Model seems to be untrained", 0)
 
  175             self.
failfail(
"Model has any training")
 
  177             self.
successsuccess(
"Model has any training")
 
  179             logging.pprint(f
"Model is not adequately trained. Might result in inadequate results", 1)
 
  180             self.
failfail(
"Model has training")
 
  181             self.
failfail(f
"Model training score: {round(self.stdev,2)}")
 
  183             self.
successsuccess(
"Model has training")
 
  184             self.
successsuccess(f
"Model training score: {round(self.stdev)}")
 
  187         "! @brief check 0 edge weights distribution" 
  189         for ew 
in self.
ewsews:
 
  193             self.
failfail(
"Too many 0 edges")
 
  194             logging.pprint(f
"0 weighted edges are dangerous and may halt the model.", 0)
 
  196             self.
successsuccess(
"0 edges below threshold")
 
  199         "! @brief check minimum 10% of the edges" 
  202         avg = sum(self.
ewsews) / len(self.
ewsews)
 
  204         med = statistics.median(self.
ewsews)
 
  208         "! @brief check which way model is leaning. Left, or right" 
  210         avg = sum(self.
ewsews) / len(self.
ewsews)
 
  211         med = statistics.median(self.
ewsews)
 
  214             logging.pprint(
"Median is too left leaning and might indicate high entropy")
 
  215             self.
failfail(
"Median too left leaning")
 
  217             self.
successsuccess(
"Median in expected ratio")
 
  221             logging.pprint(
"Least probable 10% too close to average, might indicate inadequate training")
 
  222             self.
failfail(
"Bad bottom 10%")
 
  224             self.
successsuccess(
"Good bottom 10%")
 
  230         sorted_ews = copy(self.
ewsews)
 
  231         sorted_ews.sort(reverse=
True)
 
  232         ratio1 = sorted_ews[0]/sorted_ews[int(self.
edge_countedge_count/2)]
 
  233         ratio2 = sorted_ews[int(self.
edge_countedge_count/2)]/sorted_ews[int(self.
edge_countedge_count*0.1)]
 
  240     @brief evaluate a corpus 
  241     @belongsto Python::Markopy::Evaluation 
  242     @extends Python::Markopy::Evaluation::Evaluator 
  246         @brief default constructor 
  247         @param filename filename or pattern to check 
  254         "! @brief evalute a corpus. Might take a long time" 
  255         logging.pprint(
"WARNING: This takes a while with larger corpus files", 2)
 
  257         logging.SHOW_STACK_THRESHOLD=3
 
  259         for file 
in self.
filesfiles:
 
  266             bDelimiterConflict=
False 
  267             logging.pprint(f
"Corpus: {file.split('/')[-1]}: ",2)
 
  268             with open(file, 
"rb") 
as corpus:
 
  271                     match = re.match(
r"([0-9]+)(.)(.*)\n", line.decode()).groups()
 
  272                     if(delimiter 
and delimiter!=match[1]):
 
  273                         bDelimiterConflict = 
True 
  277                         logging.pprint(f
"Delimiter is: {delimiter.encode()}")
 
  279                     total_chars += len(match[2])
 
  280                     if(int(match[0])>max):
 
  283                 if(bDelimiterConflict):
 
  284                     self.
failfail(
"Incorrect delimiter found")
 
  286                     self.
successsuccess(
"No structural conflicts")
 
  288                 logging.pprint(f
"Total number of lines: {lines_count}")
 
  289                 logging.pprint(f
"Sum of all string weights: {sum}")
 
  290                 logging.pprint(f
"Character total: {total_chars}")
 
  291                 logging.pprint(f
"Average length: {total_chars/lines_count}")
 
  292                 logging.pprint(f
"Average weight: {sum/lines_count}")
 
  297         @brief evaluate a single file. Remove reading file because it should be read line by line. 
  298         @param file corpus filename to evaluate 
  300         if(
not os.path.isfile(file)):
 
  301             logging.pprint(f
"Given file {file} is not a valid filename")
 
None __init__(self, str filename)
default constructor
 
list _evaluate(self, file)
evaluate a single file.
 
Abstract class to evaluate and score integrity/validty.
 
def success(self, checkname)
pass a test
 
None __init__(self, str filename)
default constructor for evaluator
 
list _evaluate(self, file)
internal evaluation function for a single file
 
def fail(self, checkname)
fail a test
 
def check_structure(self)
 
def check_weight_deviation(self)
 
def check_min_10percent(self)
 
None __init__(self, str filename)
default constructor for evaluator