fix bugs
This commit is contained in:
parent
7cf211fa84
commit
5a12599eb9
1 changed files with 10 additions and 4 deletions
|
@ -1,5 +1,6 @@
|
|||
import MeCab
|
||||
import sys
|
||||
import re
|
||||
|
||||
|
||||
# 形態素解析する関数
|
||||
|
@ -46,7 +47,8 @@ if __name__ == "__main__":
|
|||
#print(parse_document)
|
||||
result_list = list()
|
||||
|
||||
for token in parse_document:
|
||||
for i, token in enumerate(parse_document):
|
||||
|
||||
# 形態素解析結果に置き換えルールを適用する
|
||||
if (token["pos"] != "助詞-格助詞"
|
||||
and token["pos"] != "助詞-接続助詞"
|
||||
|
@ -73,16 +75,20 @@ if __name__ == "__main__":
|
|||
if (token['lemma'] == '君' or token['lemma'] == '貴方' or token['lemma'] == 'お前'):
|
||||
prime = '你'
|
||||
|
||||
if token['lemma'] == '為る' and parse_document[i-1]['pos'] == '名詞-普通名詞-サ変可能':
|
||||
prime = ''
|
||||
|
||||
if token['lemma'] == '円-助数詞':
|
||||
prime = '円'
|
||||
|
||||
compound_matched = re.match("([^-]+)-([^-]+)", token['lemma'])
|
||||
if compound_matched:
|
||||
prime = compound_matched.group(1)
|
||||
|
||||
if len(token["features"]) != 0:
|
||||
if "連体形-一般" in token['features']:
|
||||
if token['lemma'] == 'ない':
|
||||
prime = "無之"
|
||||
else:
|
||||
prime = "之"
|
||||
prime = prime + "之"
|
||||
|
||||
result_list.append(hira_to_blank(prime))
|
||||
|
||||
|
|
Loading…
Reference in a new issue