fix 儂->わし bug

This commit is contained in:
Tan, Kian-ting 2021-08-14 23:13:04 +08:00
parent 3eb0440b58
commit bddf12b89c

View file

@ -1,3 +1,4 @@
import functools
import MeCab
import sys
import re
@ -28,6 +29,24 @@ def parse(sentence):
return result
def is_hira(string):
if isinstance(string, str):
string = list(string)
if len(string) == 0:
return False
elif len(string) == 1:
return (("" <= string[0]) and (string[0] <= ""))
if len(string) > 1:
return functools.reduce((lambda x, y: (is_hira(x) and is_hira(y))) , string)
def contain_kanji(str):
if len(str) == 0:
return False
elif len(str) == 1:
return re.match(r"[一-龯]", str)
if len(str) > 1:
return functools.reduce(lambda x, y: contain_kanji(x) or contain_kanji(y) , str)
# ひらがなを削除する関数
# Function to delete hiragana.
@ -64,10 +83,18 @@ if __name__ == "__main__":
elif token['lemma'] == '':
prime = ""
else:
prime = token["lemma"]
else:
prime = token["lemma"]
print(is_hira(token['lemma']))
if is_hira(token['lemma']):
prime = token["form"]
else:
prime = token["lemma"]
else:
if is_hira(token["lemma"]) and contain_kanji(token["form"]):
prime=token["form"]
else:
prime = token["lemma"]
if (token['lemma'] == '' or token['lemma'] == '貴方' or token['lemma'] == 'お前'):
@ -91,6 +118,9 @@ if __name__ == "__main__":
else:
prime = prime + ""
result_list.append(hira_to_blank(prime))