268 lines
7.4 KiB
Python
268 lines
7.4 KiB
Python
#!/usr/bin/env python3
|
|
from copy import copy
|
|
|
|
start_point = 1
|
|
'''graph_list = list of links represented by (orig, dest, value)'''
|
|
graph_list = [(1, 2, "a"),
|
|
(2, 3, "g"),
|
|
(2, 4, "c"),
|
|
(2, 5, ("SET", "F", "G", "H")),
|
|
(4, 2, ("NOT", ""))]
|
|
|
|
def graph_list2dict(graph_list):
|
|
graph_dict = dict()
|
|
|
|
for item in graph_list:
|
|
orig = item[0]
|
|
dest = item[1]
|
|
value = item[2]
|
|
|
|
if orig in graph_dict:
|
|
if value in graph_dict[orig]:
|
|
raise Exception("Each 2 links from the same link shouldn't have the same value.")
|
|
'''
|
|
else:
|
|
for link_value in graph_dict[orig]:
|
|
if dest == graph_dict[orig][link_value]:
|
|
raise Exception("The orig-dest pair are duplicated.")'''
|
|
|
|
else:
|
|
graph_dict[orig] = dict()
|
|
|
|
graph_dict[orig][value] = dest
|
|
|
|
return graph_dict
|
|
|
|
graph_dict = graph_list2dict(graph_list)
|
|
print(graph_dict)
|
|
|
|
string1 = "ag"
|
|
string2 = "acxcxcxc"
|
|
|
|
|
|
def char_pattern_list_match(value, c):
|
|
'''Match a char with char pattern list "value". Called with pattern match'''
|
|
head = value[0]
|
|
# char pattern ["NOT", "a"] = [^a]
|
|
if head == "NOT":
|
|
if len(value) > 2:
|
|
raise Exception("in char pattern NOT contains only 1 sub-pattern")
|
|
return not (value_match(value[1], c))
|
|
|
|
# char pattern ["SET", "a", "b", ...] = [ab...]
|
|
elif head == "SET":
|
|
if len(value) == 1:
|
|
raise Exception("in char pattern SET contains at least 1 sub-pattern")
|
|
for i in value[1:]:
|
|
if value_match(i, c):
|
|
return True
|
|
return False
|
|
|
|
elif head == "RANGE":
|
|
if len(value) != 3 or not (isinstance(value[1], str) and isinstance(value[2], str)):
|
|
raise Exception("in char pattern RANGE contains only 2 chars")
|
|
elif len(value[1]) > 1 or len(value[2]) > 1:
|
|
raise Exception("in char pattern RANGE contains no string of which length > 1")
|
|
else:
|
|
lower_bound = ord(value[1])
|
|
upper_bound = ord(value[2])
|
|
if lower_bound > upper_bound:
|
|
raise Exception(
|
|
"in char pattern RANGE the lower_bound must not bigger than upper bound.")
|
|
|
|
if ord(c) >= lower_bound and ord(c) <= upper_bound:
|
|
return True
|
|
else:
|
|
return False
|
|
else:
|
|
raise Exception("The head of a char pattern list is not acceptable.")
|
|
|
|
def value_match(value, c):
|
|
if isinstance(value, str):
|
|
is_meta_character = (len(value) == 2 and value[1] == "\\")
|
|
if len(value) > 1 and not is_meta_character:
|
|
raise Exception("Can't compare a char with 2 or more chars.")
|
|
elif value == c:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
elif isinstance(value, tuple):
|
|
return char_pattern_list_match(value, c)
|
|
|
|
else:
|
|
raise Exception("The argument called with match_value is not available.")
|
|
|
|
print(value_match(("SET", ("RANGE", "a", "g")), "k"))
|
|
|
|
def transit_string_in_graph(graph_dict, start_point, string):
|
|
status = start_point
|
|
for c in string:
|
|
for value in graph_dict[status]:
|
|
if value_match(value, c):
|
|
status = graph_dict[status][value]
|
|
|
|
return status
|
|
|
|
print(transit_string_in_graph(graph_dict, start_point, string2))
|
|
|
|
|
|
def id_gen():
|
|
id_num = 0
|
|
while True:
|
|
new_value = (yield id_num)
|
|
if new_value != None:
|
|
id_num = new_value
|
|
else:
|
|
id_num += 1
|
|
|
|
real_id_generator = id_gen()
|
|
|
|
class NFA():
|
|
'''Definition of NFA graph'''
|
|
def __init__(self):
|
|
self.start = None
|
|
self.end = []
|
|
self.graph = [] # list of triary tuple
|
|
|
|
def __repr__(self):
|
|
return_string = ""
|
|
return_string += "Start: %d\n" % self.start
|
|
|
|
if self.graph == []:
|
|
return_string += "EMPTY PATH"
|
|
|
|
else:
|
|
for path in self.graph:
|
|
path_string = "%s -> %s : %s\n" % (path[0], path[1], path[2])
|
|
return_string += path_string
|
|
|
|
end_string = "End: " + str(self.end)
|
|
return_string += end_string
|
|
|
|
return return_string
|
|
|
|
def make_simple_nfa(pattern):
|
|
nfa = NFA()
|
|
nfa.start = next(real_id_generator)
|
|
nfa.end.append(next(real_id_generator))
|
|
|
|
nfa.graph.append(tuple([nfa.start, nfa.end[0], pattern]))
|
|
|
|
return nfa
|
|
|
|
def nfa_concat(nfa1, nfa2):
|
|
"""xy"""
|
|
new_nfa = NFA()
|
|
new_nfa.start = copy(nfa1.start)
|
|
|
|
connecting_point_pair = [nfa1.end, nfa2.start]
|
|
|
|
new_graph = copy(nfa1.graph)
|
|
new_end = []
|
|
for path in nfa2.graph:
|
|
if path[0] == connecting_point_pair[1]:
|
|
for n in connecting_point_pair[0]:
|
|
new_graph.append(tuple([n] + list(path[1:3])))
|
|
else:
|
|
new_graph.append(path)
|
|
|
|
for e in nfa2.end:
|
|
if e == connecting_point_pair[1]:
|
|
new_end += nfa1.end
|
|
else:
|
|
new_end.append(e)
|
|
|
|
new_nfa.graph = new_graph
|
|
new_nfa.end = new_end
|
|
|
|
return new_nfa
|
|
|
|
def nfa_or(nfa1, nfa2):
|
|
"""x|y"""
|
|
new_nfa = NFA()
|
|
new_nfa.start = copy(nfa1.start)
|
|
|
|
new_end = copy(nfa1.end)
|
|
|
|
new_graph = copy(nfa1.graph)
|
|
|
|
for path in nfa2.graph:
|
|
if path[0] == nfa2.start:
|
|
appended_link = tuple([nfa1.start] + list(path[1:3]))
|
|
new_graph.append(appended_link)
|
|
else:
|
|
new_graph.append(path)
|
|
|
|
for e in nfa2.end:
|
|
if e == nfa2.start:
|
|
new_end.append(nfa1.start)
|
|
else:
|
|
new_end.append(e)
|
|
|
|
new_nfa.graph = new_graph
|
|
new_nfa.end = new_end
|
|
|
|
return new_nfa
|
|
|
|
def nfa_once_or_none(nfa1, nfa2):
|
|
""""xy?"""
|
|
new_nfa = NFA()
|
|
new_nfa.start = copy(nfa1.start)
|
|
new_nfa.end = nfa1.end + filter(lambda x : x != nfa2.start, nfa2.end)
|
|
|
|
new_graph = copy(nfa1.graph)
|
|
for path in nfa2.graph:
|
|
generated_links = []
|
|
|
|
if path[0] == nfa2.start:
|
|
for new_link_orig in nfa1.end:
|
|
appended_link = tuple([new_link_orig] + list(path[1:]))
|
|
generated_links.append(appended_link)
|
|
else:
|
|
generated_links = [path]
|
|
|
|
new_graph += generated_links
|
|
new_nfa.graph = new_graph
|
|
|
|
return new_nfa
|
|
|
|
|
|
def nfa_repeat_or_none(nfa1, nfa2):
|
|
"""xy*"""
|
|
new_nfa = NFA()
|
|
new_nfa.start = copy(nfa1.start)
|
|
new_nfa.end = copy(nfa1.end)
|
|
|
|
new_graph = copy(nfa1.graph)
|
|
|
|
for path in nfa2.graph:
|
|
temp_links = []
|
|
|
|
if path[0] == nfa2.start:
|
|
for new_link_orig in nfa1.end:
|
|
appended_link = tuple([new_link_orig] + list(path[1:]))
|
|
temp_links.append(appended_link)
|
|
else:
|
|
temp_links = [path]
|
|
|
|
generated_new_links = []
|
|
for link in temp_links:
|
|
if link[1] in nfa2.end:
|
|
for new_link_end in nfa1.end:
|
|
appended_link = tuple([link[0]]+[new_link_end]+[link[2]])
|
|
generated_new_links.append(appended_link)
|
|
else:
|
|
generated_new_links.append(link)
|
|
|
|
new_graph += generated_new_links
|
|
new_nfa.graph = new_graph
|
|
|
|
return new_nfa
|
|
|
|
nfa1 = make_simple_nfa("a")
|
|
nfa2 = make_simple_nfa(("NOT", "b"))
|
|
nfa3 = nfa_or(nfa1, nfa2)
|
|
print(nfa3)
|
|
print(nfa_repeat_or_none(nfa1, nfa2).graph)
|
|
print(nfa_once_or_none(nfa1, nfa2).graph)
|