97 lines
No EOL
3 KiB
Julia
97 lines
No EOL
3 KiB
Julia
|
|
module Passes
|
|
include("classes.jl")
|
|
using .Classes
|
|
|
|
export processed_passes, Pass
|
|
processed_passes = []
|
|
|
|
struct Pass
|
|
pattern
|
|
func
|
|
end
|
|
|
|
####definition of passes ####
|
|
|
|
# 2 newline become @par{}
|
|
function two_nl_to_par_pass_func(two_nl)
|
|
return [Classes.SEQ([Classes.ID("par")])]
|
|
end
|
|
|
|
two_nl_to_par_pattern = [Classes.NL([]), Classes.NL([])] #two continuous newline
|
|
|
|
two_nl_to_par_pass = Pass(two_nl_to_par_pattern,
|
|
two_nl_to_par_pass_func)
|
|
push!(processed_passes, two_nl_to_par_pass)
|
|
|
|
# hyphen between 2 chars to disc
|
|
function discretize_hyphen(two_nl)
|
|
inner = Classes.SEQ([Classes.ELE([Classes.ID("disc")]),
|
|
Classes.ELE([Classes.CHAR("-")]),
|
|
Classes.ELE([]),
|
|
Classes.ELE([Classes.CHAR("-")])])
|
|
return [two_nl[1]; inner; two_nl[3]]
|
|
end
|
|
hyphen_pattern = [Classes.CHAR(r"[^\-]"),
|
|
Classes.CHAR(r"[-]"), Classes.CHAR(r"[^\-]")]
|
|
|
|
hyphen_disc_pass = Pass(hyphen_pattern,
|
|
discretize_hyphen)
|
|
push!(processed_passes, hyphen_disc_pass)
|
|
|
|
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
|
|
Classes.CHAR(r"[\p{Han}]")]
|
|
|
|
# in latin+cjk and latin+cjk add glue.
|
|
function insert_hglue_in_adjacent_cjk_lat(two_nl)
|
|
inner = Classes.ID("cjk_lat_spacing")
|
|
return [two_nl[1]; inner; two_nl[2]]
|
|
end
|
|
adjacent_cjk_lat_pattern = [Classes.CHAR(r"[\p{Han}]"),
|
|
Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]")]
|
|
adjacent_cjk_lat_pattern2 = [Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]"),
|
|
Classes.CHAR(r"[\p{Han}]")]
|
|
|
|
adjacent_cjk_lat_pass = Pass(adjacent_cjk_lat_pattern,
|
|
insert_hglue_in_adjacent_cjk_lat)
|
|
push!(processed_passes, adjacent_cjk_lat_pass)
|
|
|
|
adjacent_cjk_lat2_pass = Pass(adjacent_cjk_lat_pattern2,
|
|
insert_hglue_in_adjacent_cjk_lat)
|
|
push!(processed_passes, adjacent_cjk_lat2_pass)
|
|
|
|
# in 2 hanzi add glue.
|
|
function insert_hglue_in_adjacent_cjk(two_nl)
|
|
inner = Classes.ID("cjk_spacing")
|
|
return [two_nl[1]; inner; two_nl[2]]
|
|
end
|
|
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
|
|
Classes.CHAR(r"[\p{Han}]")]
|
|
|
|
adjacent_cjk_pass = Pass(adjacent_cjk_pattern,
|
|
insert_hglue_in_adjacent_cjk)
|
|
push!(processed_passes, adjacent_cjk_pass)
|
|
|
|
# line breaking rule in CJK 避頭尾/禁則処理
|
|
adjacent_cjk_punc_pattern = [Classes.CHAR(r"[·,。!?:」』》】]〕』〗〉}]"),
|
|
Classes.CHAR(r"[^·,。!?:」』》】]〕』〗〉}]")]
|
|
|
|
adjacent_cjk_punc_pattern2 = [Classes.CHAR(r"[^「『《〔[【『〖〈{]"),
|
|
Classes.CHAR(r"[「『《〔[【『〖〈{]")]
|
|
adjacent_cjk_punc_pattern3 = [Classes.CHAR(r"[…—]"),
|
|
Classes.CHAR(r"[^…—]")]
|
|
|
|
adjacent_glue_pass = Pass(adjacent_cjk_pattern,
|
|
insert_hglue_in_adjacent_cjk)
|
|
push!(processed_passes, adjacent_glue_pass)
|
|
|
|
adjacent_glue_pass2 = Pass(adjacent_cjk_punc_pattern2,
|
|
insert_hglue_in_adjacent_cjk)
|
|
push!(processed_passes, adjacent_glue_pass2)
|
|
|
|
adjacent_glue_pass3 = Pass(adjacent_cjk_punc_pattern3,
|
|
insert_hglue_in_adjacent_cjk)
|
|
push!(processed_passes, adjacent_glue_pass3)
|
|
|
|
|
|
end |