diff --git a/example/ex1.ug b/example/ex1.ug index 139597f..e39a211 100644 --- a/example/ex1.ug +++ b/example/ex1.ug @@ -1,2 +1,11 @@ +貓咪的眼睛, +狐狸的耳朵。 + +我是 + +貓,還沒有名字。@foo%我是註釋 + +% +{@foo|@bar|12\|} \ No newline at end of file diff --git a/src/classes.jl b/src/classes.jl index 0d09350..f8c45ca 100644 --- a/src/classes.jl +++ b/src/classes.jl @@ -9,5 +9,5 @@ struct SPACE<:Node val end # space struct NL<:Node val end # newline struct PROG<:Node val end # all the program # pattern in regex form -struct PTN_RGX<:Node val::Regex end +#struct PTN_RGX<:Node val::Regex end end \ No newline at end of file diff --git a/src/parsing.jl b/src/parsing.jl index 5f8c749..7b208fb 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -16,7 +16,7 @@ space = p"[ \t]" > Passes.Classes.SPACE id_name = p"[_a-zA-Z][_0-9a-zA-Z]*" > Passes.Classes.ID id = E"@" + id_name -char = p"[^ \n\r\t\\]" |> Passes.Classes.CHAR #[1:2,:?] +char = p"[^ \n\r\t\\]" > Passes.Classes.CHAR #[1:2,:?] # chars should be preceded by "\" are \, {, }, |, @, % esc_char = p"[\{\|\}\@\%]" > Passes.Classes.ESC_CHAR @@ -67,9 +67,11 @@ end function match_unit(pair) pattern = pair[1] ast_item = pair[2] - - if typeof(pattern) == Passes.Classes.PTN_RGX - is_matched = match(pattern.val, ast_item.val) + #println(pattern, "~~~", ast_item) + if typeof(pattern) != typeof(ast_item) + return false + elseif typeof(pattern.val) == Regex + is_matched = occursin(pattern.val, ast_item.val) return is_matched else return pattern.val == ast_item.val @@ -86,9 +88,13 @@ function use_pass(ast_val, pass) if ast_pattern_matched(pass_pattern, ast_head) ast_head = pass.func(ast_head) - remained = use_pass([ast_head[2:end];ast_val[pass_pattern_length+1:end]], pass) + raw_remained = [ast_head[2:end];ast_val[pass_pattern_length+1:end]] + remained = use_pass(raw_remained, pass) ast_val = [ast_head[1]; remained] else + raw_remained = ast_val[2:end] + remained = use_pass(raw_remained, pass) + ast_val = [ast_head[1]; remained] return ast_val end end diff --git a/src/passes.jl b/src/passes.jl index dfe254d..1d08561 100644 --- a/src/passes.jl +++ b/src/passes.jl @@ -17,13 +17,24 @@ end function two_nl_to_par_pass_func(two_nl) return [Classes.SEQ([Classes.ID("par")])] end -two_nl_to_par_pattern = [Classes.NL([]), Classes.NL([])] #two continuous newline +two_nl_to_par_pattern = [Classes.NL([]), Classes.NL([])] #two continuous newline two_nl_to_par_pass = Pass(two_nl_to_par_pattern, two_nl_to_par_pass_func) - - push!(processed_passes, two_nl_to_par_pass) +# in 2 hanzi add glue. +function insert_hglue_in_adjacent_chinese(two_nl) + _0pt = Classes.SEQ([Classes.ID("pt"); Classes.CHAR(["0"])]) + inner = Classes.SEQ([Classes.ID("hglue"); _0pt]) + return [two_nl[1]; inner; two_nl[2]] +end +adjacent_chinese_pattern = [Classes.CHAR(r"[\p{Han},。!?:「」『』…]"), + Classes.CHAR(r"[\p{Han},。!?:「」『』…]")] + +adjacent_glue_pass = Pass(adjacent_chinese_pattern, +insert_hglue_in_adjacent_chinese) +push!(processed_passes, adjacent_glue_pass) + end \ No newline at end of file diff --git a/src/uahgi.jl b/src/uahgi.jl index d988340..07a29b5 100644 --- a/src/uahgi.jl +++ b/src/uahgi.jl @@ -12,7 +12,7 @@ function parse_commandline() @add_arg_table! s begin "FILE" help = "the file path to be converted." - #required = true + required = true end return parse_args(s) @@ -21,11 +21,13 @@ end function main() parsed_args = parse_commandline() - if parsed_args["FILE"] === nothing - file_path = "./example/ex1.ug" # for test - else - file_path = parsed_args["FILE"] - end + file_path = parsed_args["FILE"] + # for test + #if parsed_args["FILE"] === nothing + # file_path = "./example/ex1.ug" + #else + # file_path = parsed_args["FILE"] + #end file_content = open(f->read(f, String), file_path) Parsing.parse(file_content) end