uahgi/src/passes.jl

97 lines
3 KiB
Julia
Raw Normal View History

2025-01-25 02:23:13 +08:00
module Passes
include("classes.jl")
using .Classes
export processed_passes, Pass
processed_passes = []
struct Pass
pattern
func
end
####definition of passes ####
# 2 newline become @par{}
function two_nl_to_par_pass_func(two_nl)
return [Classes.SEQ([Classes.ID("par")])]
end
2025-01-25 11:18:37 +08:00
two_nl_to_par_pattern = [Classes.NL([]), Classes.NL([])] #two continuous newline
2025-01-25 02:23:13 +08:00
two_nl_to_par_pass = Pass(two_nl_to_par_pattern,
two_nl_to_par_pass_func)
2025-01-25 11:18:37 +08:00
push!(processed_passes, two_nl_to_par_pass)
2025-01-25 02:23:13 +08:00
# hyphen between 2 chars to disc
function discretize_hyphen(two_nl)
inner = Classes.SEQ([Classes.ELE([Classes.ID("disc")]),
Classes.ELE([Classes.CHAR("-")]),
Classes.ELE([]),
Classes.ELE([Classes.CHAR("-")])])
return [two_nl[1]; inner; two_nl[3]]
end
hyphen_pattern = [Classes.CHAR(r"[^\-]"),
Classes.CHAR(r"[-]"), Classes.CHAR(r"[^\-]")]
hyphen_disc_pass = Pass(hyphen_pattern,
discretize_hyphen)
push!(processed_passes, hyphen_disc_pass)
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han}]")]
# in latin+cjk and latin+cjk add glue.
function insert_hglue_in_adjacent_cjk_lat(two_nl)
inner = Classes.ID("cjk_lat_spacing")
return [two_nl[1]; inner; two_nl[2]]
end
adjacent_cjk_lat_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]")]
adjacent_cjk_lat_pattern2 = [Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]"),
Classes.CHAR(r"[\p{Han}]")]
adjacent_cjk_lat_pass = Pass(adjacent_cjk_lat_pattern,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat_pass)
adjacent_cjk_lat2_pass = Pass(adjacent_cjk_lat_pattern2,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat2_pass)
2025-01-25 11:18:37 +08:00
# in 2 hanzi add glue.
function insert_hglue_in_adjacent_cjk(two_nl)
2025-01-30 00:39:34 +08:00
inner = Classes.ID("cjk_spacing")
2025-01-25 11:18:37 +08:00
return [two_nl[1]; inner; two_nl[2]]
end
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han}]")]
adjacent_cjk_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_cjk_pass)
# line breaking rule in CJK 避頭尾/禁則処理
adjacent_cjk_punc_pattern = [Classes.CHAR(r"[·,。!?:」』》】]〕』〗〉}]"),
Classes.CHAR(r"[^·,。!?:」』》】]〕』〗〉}]")]
adjacent_cjk_punc_pattern2 = [Classes.CHAR(r"[^「『《〔[【『〖〈{]"),
Classes.CHAR(r"[「『《〔[【『〖〈{]")]
adjacent_cjk_punc_pattern3 = [Classes.CHAR(r"[…—]"),
Classes.CHAR(r"[^…—]")]
2025-01-25 02:23:13 +08:00
adjacent_glue_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_cjk)
2025-01-25 11:18:37 +08:00
push!(processed_passes, adjacent_glue_pass)
2025-01-25 02:23:13 +08:00
adjacent_glue_pass2 = Pass(adjacent_cjk_punc_pattern2,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass2)
adjacent_glue_pass3 = Pass(adjacent_cjk_punc_pattern3,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass3)
2025-01-25 02:23:13 +08:00
end