8 from abc
import abstractmethod
10 import allogate
as logging
24 @brief Abstract class to evaluate and score integrity/validty
25 @belongsto Python::Markopy::Evaluation
29 @brief default constructor for evaluator
30 @param filename filename to evaluate. Can be a pattern
39 self.
filesfiles = glob.glob(filename)
41 self.
filesfiles.append(filename)
45 "! @brief base evaluation function"
46 for file
in self.
filesfiles:
49 self.
check_funcscheck_funcs = [func
for func
in dir(self)
if (callable(getattr(self, func))
and func.startswith(
"check_"))]
54 @brief internal evaluation function for a single file
55 @param file filename to evaluate
57 if(
not os.path.isfile(file)):
58 logging.pprint(f
"Given file {file} is not a valid filename")
61 return open(file,
"rb").read().split(b
"\n")
68 @param checkname text to display with the check
75 @param checkname text to display with the check
82 "! @brief finalize an evaluation and print checks"
83 print(
"\n################ Checks ################ ")
84 for test
in self.
checkschecks:
85 logging.pprint(f
"{test[0]:30}:{test[1]} ")
92 @brief evaluate a model
93 @belongsto Python::Markopy::Evaluation
94 @extends Python::Markopy::Evaluation::Evaluator
97 "! @brief default constructor"
104 "! @brief evaluate a model"
106 logging.SHOW_STACK_THRESHOLD=3
108 for file
in self.
filesfiles:
109 logging.pprint(f
"Model: {file.split('/')[-1]}: ",2)
123 self.
ewsews.append(int(edge[2:-2:1]))
124 if(e[0]
not in self.
lnodeslnodes):
127 self.
lnodeslnodes[e[0]]+=1
128 if(e[-1]
not in self.
rnodesrnodes):
129 self.
rnodesrnodes[e[-1]]=1
131 self.
rnodesrnodes[e[-1]]+=1
132 except Exception
as e:
134 logging.pprint(f
"Model file is corrupted.", 0)
139 logging.pprint(f
"total edges: {self.edge_count}", 1)
140 logging.pprint(f
"unique left nodes: {self.lnode_count}", 1)
141 logging.pprint(f
"unique right nodes: {self.rnode_count}", 1)
145 self.__getattribute__(check)()
146 except Exception
as e:
148 self.
failfail(f
"Exceptionn in {check}")
152 "! @brief check if model has dangling nodes"
154 self.
successsuccess(
"No dangling nodes")
156 logging.pprint(f
"Dangling nodes found, lnodes and rnodes do not match", 0)
157 self.
failfail(
"No dangling nodes")
160 "! @brief check model structure for validity"
162 self.
successsuccess(
"Model structure")
164 logging.pprint(f
"Model did not satisfy structural integrity check (lnode_count-1) * (rnode_count-1) + 2*(lnode_count-1)", 0)
165 self.
failfail(
"Model structure")
168 "! @brief check model standart deviation between edge weights"
169 mean = sum(self.
ewsews) / len(self.
ewsews)
170 variance = sum([((x - mean) ** 2)
for x
in self.
ewsews]) / len(self.
ewsews)
171 res = variance ** 0.5
174 logging.pprint(f
"Model seems to be untrained", 0)
175 self.
failfail(
"Model has any training")
177 self.
successsuccess(
"Model has any training")
179 logging.pprint(f
"Model is not adequately trained. Might result in inadequate results", 1)
180 self.
failfail(
"Model has training")
181 self.
failfail(f
"Model training score: {round(self.stdev,2)}")
183 self.
successsuccess(
"Model has training")
184 self.
successsuccess(f
"Model training score: {round(self.stdev)}")
187 "! @brief check 0 edge weights distribution"
189 for ew
in self.
ewsews:
193 self.
failfail(
"Too many 0 edges")
194 logging.pprint(f
"0 weighted edges are dangerous and may halt the model.", 0)
196 self.
successsuccess(
"0 edges below threshold")
199 "! @brief check minimum 10% of the edges"
202 avg = sum(self.
ewsews) / len(self.
ewsews)
204 med = statistics.median(self.
ewsews)
208 "! @brief check which way model is leaning. Left, or right"
210 avg = sum(self.
ewsews) / len(self.
ewsews)
211 med = statistics.median(self.
ewsews)
214 logging.pprint(
"Median is too left leaning and might indicate high entropy")
215 self.
failfail(
"Median too left leaning")
217 self.
successsuccess(
"Median in expected ratio")
221 logging.pprint(
"Least probable 10% too close to average, might indicate inadequate training")
222 self.
failfail(
"Bad bottom 10%")
224 self.
successsuccess(
"Good bottom 10%")
230 sorted_ews = copy(self.
ewsews)
231 sorted_ews.sort(reverse=
True)
232 ratio1 = sorted_ews[0]/sorted_ews[int(self.
edge_countedge_count/2)]
233 ratio2 = sorted_ews[int(self.
edge_countedge_count/2)]/sorted_ews[int(self.
edge_countedge_count*0.1)]
240 @brief evaluate a corpus
241 @belongsto Python::Markopy::Evaluation
242 @extends Python::Markopy::Evaluation::Evaluator
246 @brief default constructor
247 @param filename filename or pattern to check
254 "! @brief evalute a corpus. Might take a long time"
255 logging.pprint(
"WARNING: This takes a while with larger corpus files", 2)
257 logging.SHOW_STACK_THRESHOLD=3
259 for file
in self.
filesfiles:
266 bDelimiterConflict=
False
267 logging.pprint(f
"Corpus: {file.split('/')[-1]}: ",2)
268 with open(file,
"rb")
as corpus:
271 match = re.match(
r"([0-9]+)(.)(.*)\n", line.decode()).groups()
272 if(delimiter
and delimiter!=match[1]):
273 bDelimiterConflict =
True
277 logging.pprint(f
"Delimiter is: {delimiter.encode()}")
279 total_chars += len(match[2])
280 if(int(match[0])>max):
283 if(bDelimiterConflict):
284 self.
failfail(
"Incorrect delimiter found")
286 self.
successsuccess(
"No structural conflicts")
288 logging.pprint(f
"Total number of lines: {lines_count}")
289 logging.pprint(f
"Sum of all string weights: {sum}")
290 logging.pprint(f
"Character total: {total_chars}")
291 logging.pprint(f
"Average length: {total_chars/lines_count}")
292 logging.pprint(f
"Average weight: {sum/lines_count}")
297 @brief evaluate a single file. Remove reading file because it should be read line by line.
298 @param file corpus filename to evaluate
300 if(
not os.path.isfile(file)):
301 logging.pprint(f
"Given file {file} is not a valid filename")
None __init__(self, str filename)
default constructor
list _evaluate(self, file)
evaluate a single file.
Abstract class to evaluate and score integrity/validty.
def success(self, checkname)
pass a test
None __init__(self, str filename)
default constructor for evaluator
list _evaluate(self, file)
internal evaluation function for a single file
def fail(self, checkname)
fail a test
def check_structure(self)
def check_weight_deviation(self)
def check_min_10percent(self)
None __init__(self, str filename)
default constructor for evaluator