From 63fa86aa4de9ea9fa04ee7b96e72e813852a6552 Mon Sep 17 00:00:00 2001 From: Date: Sun, 4 Sep 2011 16:49:02 +0800 Subject: [PATCH] fix the problen for the spliting of punctuation --- numbertomark.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/numbertomark.py b/numbertomark.py index 8bc9ecd..020de42 100755 --- a/numbertomark.py +++ b/numbertomark.py @@ -78,8 +78,12 @@ orig_content = orig_content[1:] #刪除頭一個字(空白) '''處理調號''' -#音節list(),包含連字號、空白 -syllable_list = re.split('([- ().,,。]+)', orig_content) +#i.e. punctuation_list,符合符號定義的字元 list。用 regexp 語法 +punctu_list = '([' + string.punctuation +\ + "、,。?!()「」『』《》〈〉【】〔〕;:…- \s" + '])' + +#音節list(),包含符號、標點、其他文字 +syllable_list = re.split(punctu_list, orig_content) result = "" #輸出的結果(預設為空)