uahgi/src/hyphenating.jl

149 lines
4.2 KiB
Julia
Raw Normal View History

2025-01-27 23:24:47 +08:00
"""
using Knuth-liang's pattern-matching hyphenating algorithm.
"""
module Hyphenating
using Match
c = Main.uahgi.Parsing.Passes.Classes
function match_lang(item)
@match item begin
c.SEQ([c.ELE([c.ID("lang")]), c.ELE([c.CHAR(v1)])]) => begin
return v1
end
_ => false
end
end
"""
check the language indicated by .ug file
"""
function check_lang(ast)
ast_tree = ast.val
result = map(x -> match_lang(x), ast_tree)
lang = result[1]
if lang != false
return lang
else
return nothing
end
end
function remove_num_from_pattern(ptn)
ptn1 = replace(ptn, r"^\." => "^")
ptn2 = replace(ptn1, r"\.$" => "\$")
ptn3 = replace(ptn2, r"\d" => "")
return ptn3
end
function hyphenate_aux(chars, patterns)
level_of_chars = fill(0, length(chars))
println(level_of_chars)
y = filter(x -> (match(Regex(x[1]), chars) !== nothing),patterns)
z = map(x -> (x[1], x[2],
map(x -> x.offset, collect(eachmatch(Regex(x[1]), chars)))), y)
for ptn in z
for offset in ptn[3]
counter = 0
for ptn_char in ptn[2]
if match(r"[a-z]", string(ptn_char)) !== nothing
counter += 1
elseif match(r"[.]", string(ptn_char)) !== nothing
counter = 0
else
# 1-5
if offset + counter -1 != 0
orig = level_of_chars[offset + counter-1]
new = parse(Int, ptn_char)
if new > orig
level_of_chars[offset + counter-1] = new
end
end
counter += 0
end
end
end
end
new_chars = ""
for (idx, char) in enumerate(chars)
new_chars *= char
level_after_char = level_of_chars[idx]
if (level_after_char > 0) && (level_after_char % 2 == 1)
new_chars *= "~" # for hyphenation
end
end
return new_chars
end
function match_char(ast_item, patterns)
@match ast_item begin
c.CHAR(chars), if match(r"[a-zA-Z]+", chars) !== Nothing end =>
begin
raw_result = hyphenate_aux(chars, patterns)
splitted = split(raw_result, "~")
final = []
for i in splitted
push!(final, c.CHAR(i))
push!(final, c.SEQ([c.ELE([c.ID("disc")]),
c.ELE([]),
c.ELE([]),
c.ELE([c.CHAR("-")])]))
end
final = final[1:end-1]
return final
#c.CHAR(hyphenate_aux(chars, patterns))
end
c.SEQ(v) => c.SEQ(map(x -> match_char(x, patterns), v))
_ => ast_item
end
end
function hyphenate(ast)
lang = check_lang(ast)
if lang !== nothing
include("hyphenRules/$lang.jl")
patterns = Hyphen.patterns
pattern_with_orig = map(x->(remove_num_from_pattern(x), x),
patterns)
println("AST=====", ast)
new_ast_val = map(x -> match_char(x, pattern_with_orig), ast.val)
new_ast_val2 = []
for i in new_ast_val
@match i begin
[x] => push!(new_ast_val2, x)
[x,y] =>begin new_ast_val2 = vcat(new_ast_val2, i) end
[x,y...,z] =>begin new_ast_val2 = vcat(new_ast_val2, i) end
_ => push!(new_ast_val2, i)
end
end
new_ast_val3 = []
for i in new_ast_val2
@match i begin
c.CHAR(val=r"[a-zA-Z]+") => begin
new_ast_val3 = vcat(new_ast_val3,
map(q -> c.CHAR(q), split(i.val, "")))
end
_ => push!(new_ast_val3, i)
end
end
println("New AST Val=====", new_ast_val3)
return c.PROG(new_ast_val3)
else
return ast
end
end
end