archivesOfToyLang/tshunhue/nfa_graph.py

268 lines
7.4 KiB
Python

#!/usr/bin/env python3
from copy import copy
start_point = 1
'''graph_list = list of links represented by (orig, dest, value)'''
graph_list = [(1, 2, "a"),
(2, 3, "g"),
(2, 4, "c"),
(2, 5, ("SET", "F", "G", "H")),
(4, 2, ("NOT", ""))]
def graph_list2dict(graph_list):
graph_dict = dict()
for item in graph_list:
orig = item[0]
dest = item[1]
value = item[2]
if orig in graph_dict:
if value in graph_dict[orig]:
raise Exception("Each 2 links from the same link shouldn't have the same value.")
'''
else:
for link_value in graph_dict[orig]:
if dest == graph_dict[orig][link_value]:
raise Exception("The orig-dest pair are duplicated.")'''
else:
graph_dict[orig] = dict()
graph_dict[orig][value] = dest
return graph_dict
graph_dict = graph_list2dict(graph_list)
print(graph_dict)
string1 = "ag"
string2 = "acxcxcxc"
def char_pattern_list_match(value, c):
'''Match a char with char pattern list "value". Called with pattern match'''
head = value[0]
# char pattern ["NOT", "a"] = [^a]
if head == "NOT":
if len(value) > 2:
raise Exception("in char pattern NOT contains only 1 sub-pattern")
return not (value_match(value[1], c))
# char pattern ["SET", "a", "b", ...] = [ab...]
elif head == "SET":
if len(value) == 1:
raise Exception("in char pattern SET contains at least 1 sub-pattern")
for i in value[1:]:
if value_match(i, c):
return True
return False
elif head == "RANGE":
if len(value) != 3 or not (isinstance(value[1], str) and isinstance(value[2], str)):
raise Exception("in char pattern RANGE contains only 2 chars")
elif len(value[1]) > 1 or len(value[2]) > 1:
raise Exception("in char pattern RANGE contains no string of which length > 1")
else:
lower_bound = ord(value[1])
upper_bound = ord(value[2])
if lower_bound > upper_bound:
raise Exception(
"in char pattern RANGE the lower_bound must not bigger than upper bound.")
if ord(c) >= lower_bound and ord(c) <= upper_bound:
return True
else:
return False
else:
raise Exception("The head of a char pattern list is not acceptable.")
def value_match(value, c):
if isinstance(value, str):
is_meta_character = (len(value) == 2 and value[1] == "\\")
if len(value) > 1 and not is_meta_character:
raise Exception("Can't compare a char with 2 or more chars.")
elif value == c:
return True
else:
return False
elif isinstance(value, tuple):
return char_pattern_list_match(value, c)
else:
raise Exception("The argument called with match_value is not available.")
print(value_match(("SET", ("RANGE", "a", "g")), "k"))
def transit_string_in_graph(graph_dict, start_point, string):
status = start_point
for c in string:
for value in graph_dict[status]:
if value_match(value, c):
status = graph_dict[status][value]
return status
print(transit_string_in_graph(graph_dict, start_point, string2))
def id_gen():
id_num = 0
while True:
new_value = (yield id_num)
if new_value != None:
id_num = new_value
else:
id_num += 1
real_id_generator = id_gen()
class NFA():
'''Definition of NFA graph'''
def __init__(self):
self.start = None
self.end = []
self.graph = [] # list of triary tuple
def __repr__(self):
return_string = ""
return_string += "Start: %d\n" % self.start
if self.graph == []:
return_string += "EMPTY PATH"
else:
for path in self.graph:
path_string = "%s -> %s : %s\n" % (path[0], path[1], path[2])
return_string += path_string
end_string = "End: " + str(self.end)
return_string += end_string
return return_string
def make_simple_nfa(pattern):
nfa = NFA()
nfa.start = next(real_id_generator)
nfa.end.append(next(real_id_generator))
nfa.graph.append(tuple([nfa.start, nfa.end[0], pattern]))
return nfa
def nfa_concat(nfa1, nfa2):
"""xy"""
new_nfa = NFA()
new_nfa.start = copy(nfa1.start)
connecting_point_pair = [nfa1.end, nfa2.start]
new_graph = copy(nfa1.graph)
new_end = []
for path in nfa2.graph:
if path[0] == connecting_point_pair[1]:
for n in connecting_point_pair[0]:
new_graph.append(tuple([n] + list(path[1:3])))
else:
new_graph.append(path)
for e in nfa2.end:
if e == connecting_point_pair[1]:
new_end += nfa1.end
else:
new_end.append(e)
new_nfa.graph = new_graph
new_nfa.end = new_end
return new_nfa
def nfa_or(nfa1, nfa2):
"""x|y"""
new_nfa = NFA()
new_nfa.start = copy(nfa1.start)
new_end = copy(nfa1.end)
new_graph = copy(nfa1.graph)
for path in nfa2.graph:
if path[0] == nfa2.start:
appended_link = tuple([nfa1.start] + list(path[1:3]))
new_graph.append(appended_link)
else:
new_graph.append(path)
for e in nfa2.end:
if e == nfa2.start:
new_end.append(nfa1.start)
else:
new_end.append(e)
new_nfa.graph = new_graph
new_nfa.end = new_end
return new_nfa
def nfa_once_or_none(nfa1, nfa2):
""""xy?"""
new_nfa = NFA()
new_nfa.start = copy(nfa1.start)
new_nfa.end = nfa1.end + filter(lambda x : x != nfa2.start, nfa2.end)
new_graph = copy(nfa1.graph)
for path in nfa2.graph:
generated_links = []
if path[0] == nfa2.start:
for new_link_orig in nfa1.end:
appended_link = tuple([new_link_orig] + list(path[1:]))
generated_links.append(appended_link)
else:
generated_links = [path]
new_graph += generated_links
new_nfa.graph = new_graph
return new_nfa
def nfa_repeat_or_none(nfa1, nfa2):
"""xy*"""
new_nfa = NFA()
new_nfa.start = copy(nfa1.start)
new_nfa.end = copy(nfa1.end)
new_graph = copy(nfa1.graph)
for path in nfa2.graph:
temp_links = []
if path[0] == nfa2.start:
for new_link_orig in nfa1.end:
appended_link = tuple([new_link_orig] + list(path[1:]))
temp_links.append(appended_link)
else:
temp_links = [path]
generated_new_links = []
for link in temp_links:
if link[1] in nfa2.end:
for new_link_end in nfa1.end:
appended_link = tuple([link[0]]+[new_link_end]+[link[2]])
generated_new_links.append(appended_link)
else:
generated_new_links.append(link)
new_graph += generated_new_links
new_nfa.graph = new_graph
return new_nfa
nfa1 = make_simple_nfa("a")
nfa2 = make_simple_nfa(("NOT", "b"))
nfa3 = nfa_or(nfa1, nfa2)
print(nfa3)
print(nfa_repeat_or_none(nfa1, nfa2).graph)
print(nfa_once_or_none(nfa1, nfa2).graph)