add some function incl. cjk+latn passes
Some checks failed
CI / Julia 1.6 - ubuntu-latest - x64 (push) Has been cancelled
CI / Julia 1.7 - ubuntu-latest - x64 (push) Has been cancelled
CI / Julia pre - ubuntu-latest - x64 (push) Has been cancelled

This commit is contained in:
Tan, Kian-ting 2025-02-04 00:52:37 +08:00
parent b2712101bd
commit f216906b41
7 changed files with 2436 additions and 461 deletions

File diff suppressed because one or more lines are too long

View file

@ -1,8 +1,12 @@
{@lang|en}% {@def|@lang|en}%
%{@def|@font|{@quote|FreeSans|AR PL UKai TW}}% %{@def|@font|{@quote|FreeSans|AR PL UKai TW}}%
%{@def|@fontsize|12}% %{@def|@fontsize|12}%
%{@def|@linewidth|200} %in px %{@def|@linewidth|400} %in px
%{@def|@spacing|{@hglue|{@ex|1}|2}}% %{@def|@hori_align|left} % left| right|center
%{@def|@cjk_spacing|{@hglue|{@ex|0}|0.001}}% %{@def|@spacing|{@hglue|{@ex|0.4}|0.2}}%
%{@def|@cjk_lat_spacing|{@hglue|{@ex|0.2}|0.2}}%
%{@def|@cjk_spacing|{@hglue|{@ex|0}|0.2}}%
%the processing of typesetting technology is so sophisticated that many people are afraid of %the processing of typesetting technology is so sophisticated that many people are afraid of
the implementation of typesetting language. However, it is good to give TEXBook a try.測試一段漢字,究竟漢字會不會斷行呢 the implementation of typesetting language.{@set|@fontsize|14} However, it is good to give TEXBook a try. 測試一段漢字,究竟會不會斷行呢?「『《臺灣連翹》是吳濁流的著作,涉及臺灣戰後歷史與政治。』」{@set|@font|{@quote|jf open 粉圓 2.0|AR PL UMing TW}}
願主耶穌ê恩典kap眾聖徒saⁿ-kap tī-leh。À-bēng。啟示錄bóe-á-cha̍t原起頭上帝創造天kap地是創世記頭chi̍t cha̍t。

View file

@ -54,7 +54,13 @@ function total_cost(items, n, linewidth, last_of_queue=false)
mininal_cost = +Inf mininal_cost = +Inf
prev_breakpoint = nothing prev_breakpoint = nothing
for j in 1:1:(n-1) for j in 1:1:(n-1)
tmp_cost = total_cost(items, j, linewidth, false) + cost(items, j+1, n, linewidth, last_of_queue) total_c = total_cost(items, j, linewidth, false)
cost_tail = cost(items, j+1, n, linewidth, last_of_queue)
if cost_tail < +Inf && last_of_queue == true
tmp_cost = total_c
else
tmp_cost = total_c + cost_tail
end
if tmp_cost < mininal_cost if tmp_cost < mininal_cost
mininal_cost = tmp_cost mininal_cost = tmp_cost
prev_breakpoint = j prev_breakpoint = j
@ -98,7 +104,11 @@ function arrange(vbox, env)
for (x, i) in enumerate(1:1:length(eles)) for (x, i) in enumerate(1:1:length(eles))
item = eles[x] item = eles[x]
if !(x in breakpoint_list_reversed) if !(x in breakpoint_list_reversed)
if typeof(item) == u.Disc if typeof(item) == u.Par
horizonal_align = env["hori_align"]
result_vbox_inner[end].eles = add_aligner(result_vbox_inner[end].eles, horizonal_align)
push!(result_vbox_inner, u.HBox([],nothing,nothing,nothing,nothing,nothing))
elseif typeof(item) == u.Disc
if item.orig != [] if item.orig != []
push!(result_vbox_inner[end].eles, item.orig) push!(result_vbox_inner[end].eles, item.orig)
end end
@ -107,10 +117,15 @@ function arrange(vbox, env)
end end
# x is the last one # x is the last one
elseif i == length(eles) elseif i == length(eles)
if typeof(item) !== u.Par
push!(result_vbox_inner[end].eles, item) push!(result_vbox_inner[end].eles, item)
end
# x in breakpoint_list_reversed # x in breakpoint_list_reversed
else else
if typeof(item) == u.Disc if typeof(item) == u.Par
push!(result_vbox_inner, u.HBox([],nothing,nothing,nothing,nothing,nothing))
elseif typeof(item) == u.Disc
if item.before != [] if item.before != []
push!(result_vbox_inner[end].eles, item.before) push!(result_vbox_inner[end].eles, item.before)
end end
@ -127,6 +142,17 @@ function arrange(vbox, env)
return result_vbox_inner return result_vbox_inner
end end
function add_aligner(hbox_eles, horizonal_align)
aligner_glue = u.HGlue(0.0,10000.0)
if horizonal_align == "right"
hbox_eles = reverse(push!(reverse(hbox_eles), aligner_glue))
elseif horizonal_align == "middle"
return hbox_eles
else # left/default
hbox_eles = push!(hbox_eles, aligner_glue)
end
return hbox_eles
end
function position(box_inner, env#=to be used later=#) function position(box_inner, env#=to be used later=#)
pages = [] #a subarray is the content of a page pages = [] #a subarray is the content of a page
@ -135,19 +161,57 @@ function position(box_inner, env#=to be used later=#)
posX = orig_x #cursor x posX = orig_x #cursor x
posY = orig_y #cursor y posY = orig_y #cursor y
baselineskip = 30 # it can be derived from env baselineskip = 30 # it can be derived from env
for hbox in box_inner linewidth = env["linewidth"]
pages = position_chbox(hbox.eles, pages, posX, posY) horizonal_align = env["hori_align"]
for (idx, hbox) in enumerate(box_inner)
hbox_eles = hbox.eles
aligner_glue = u.HGlue(0.0,10000.0)
if idx == length(box_inner)
hbox_eles = add_aligner(hbox_eles, horizonal_align)
end
residual_space = parse(Float64, linewidth) - width_sum_of_eles(hbox.eles)
sum_of_stretch = reduce((x,y) -> x+y, map(hglue_stretch, hbox.eles))
space_stretch_ratio = residual_space / sum_of_stretch
pages = position_chbox(hbox_eles, pages, posX, posY, space_stretch_ratio)
posX = orig_x posX = orig_x
posY -= baselineskip posY -= baselineskip
end end
return pages return pages
end end
function hglue_stretch(ele)
if typeof(ele) == u.HGlue
return ele.stretch # stretch
else
return 0
end
end
function width_sum_of_eles(eles)
width_sum = x -> begin
if hasproperty(x, :wd)
return x.wd
else
return 0.0
end
end
if length(eles) == 0
return 0
elseif length(eles) == 1
return width_sum(eles[1])
else
return reduce((x,y)-> x+y, map(width_sum, eles))
end
end
"""positioning all the chboxes in a HBox""" """positioning all the chboxes in a HBox"""
function position_chbox(hbox_eles, pages, posX, posY) function position_chbox(hbox_eles, pages, posX, posY, space_stretch_ratio)
for i in hbox_eles for i in hbox_eles
if typeof(i) == u.HGlue if typeof(i) == u.HGlue
deltaX = i.wd deltaX = i.wd + (i.stretch * space_stretch_ratio)
posX += deltaX posX += deltaX
else #ChBox else #ChBox
deltaX = i.wd deltaX = i.wd

View file

@ -7,7 +7,7 @@ c = Main.uahgi.Parsing.Passes.Classes
function match_lang(item) function match_lang(item)
@match item begin @match item begin
c.SEQ([c.ELE([c.ID("lang")]), c.ELE([c.CHAR(v1)])]) => begin c.SEQ([c.ELE([c.ID("def")]),c.ELE([c.ID("lang")]), c.ELE([c.CHAR(v1)])]) => begin
return v1 return v1
end end
_ => false _ => false

View file

@ -35,9 +35,14 @@ function interp(ast, env, res_box, put_char=true_)
@match ast begin @match ast begin
c.ID(id) => c.ID(id) =>
begin begin
print("ID____", id) #print("ID____", id)
return interp(env[id], env, res_box, true_) return interp(env[id], env, res_box, true_)
end end
c.SEQ([c.ID("par")]) => begin
par_box = u.Par()
push!(res_box.eles[end].eles, par_box)
return interp("", env, res_box)
end
c.SEQ([c.ELE([c.ID("def")]), c.SEQ([c.ELE([c.ID("def")]),
c.ELE([c.ID(id)]),val]) => c.ELE([c.ID(id)]),val]) =>
begin begin
@ -76,11 +81,7 @@ function interp(ast, env, res_box, put_char=true_)
return (ele, env, res_box) return (ele, env, res_box)
end end
c.SEQ([c.ELE([c.ID("par")])]) =>begin
push!(res_box.eles, u.HBox([],
nothing, nothing, nothing, nothing, nothing))
return (ast, env, res_box)
end
c.SEQ([c.ELE([c.ID("ex")]), val]) =>begin c.SEQ([c.ELE([c.ID("ex")]), val]) =>begin
x = 'x' x = 'x'
font_family = select_font(x, env) font_family = select_font(x, env)
@ -137,7 +138,6 @@ function interp(ast, env, res_box, put_char=true_)
end end
end end
c.SEQ([c.ELE([c.ID("disc")]), c.SEQ([c.ELE([c.ID("disc")]),
c.ELE(before), c.ELE(before),
c.ELE(after), c.ELE(after),
@ -172,8 +172,10 @@ function interp(ast, env, res_box, put_char=true_)
end end
# empty item # empty item
[] => return (ast, env, res_box) [] => return (ast, env, res_box)
"" => return (ast, env, res_box)
_ => begin _ => begin
println("unknown token", ast) println("unknown token: ", ast)
val_evaled = nothing val_evaled = nothing
return (val_evaled, env, res_box) return (val_evaled, env, res_box)
end end

View file

@ -24,16 +24,74 @@ two_nl_to_par_pass = Pass(two_nl_to_par_pattern,
two_nl_to_par_pass_func) two_nl_to_par_pass_func)
push!(processed_passes, two_nl_to_par_pass) push!(processed_passes, two_nl_to_par_pass)
# hyphen between 2 chars to disc
function discretize_hyphen(two_nl)
inner = Classes.SEQ([Classes.ELE([Classes.ID("disc")]),
Classes.ELE([Classes.CHAR("-")]),
Classes.ELE([]),
Classes.ELE([Classes.CHAR("-")])])
return [two_nl[1]; inner; two_nl[3]]
end
hyphen_pattern = [Classes.CHAR(r"[^\-]"),
Classes.CHAR(r"[-]"), Classes.CHAR(r"[^\-]")]
hyphen_disc_pass = Pass(hyphen_pattern,
discretize_hyphen)
push!(processed_passes, hyphen_disc_pass)
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han}]")]
# in latin+cjk and latin+cjk add glue.
function insert_hglue_in_adjacent_cjk_lat(two_nl)
inner = Classes.ID("cjk_lat_spacing")
return [two_nl[1]; inner; two_nl[2]]
end
adjacent_cjk_lat_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]")]
adjacent_cjk_lat_pattern2 = [Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]"),
Classes.CHAR(r"[\p{Han}]")]
adjacent_cjk_lat_pass = Pass(adjacent_cjk_lat_pattern,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat_pass)
adjacent_cjk_lat2_pass = Pass(adjacent_cjk_lat_pattern2,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat2_pass)
# in 2 hanzi add glue. # in 2 hanzi add glue.
function insert_hglue_in_adjacent_chinese(two_nl) function insert_hglue_in_adjacent_cjk(two_nl)
inner = Classes.ID("cjk_spacing") inner = Classes.ID("cjk_spacing")
return [two_nl[1]; inner; two_nl[2]] return [two_nl[1]; inner; two_nl[2]]
end end
adjacent_chinese_pattern = [Classes.CHAR(r"[\p{Han},。!?:「」『』…]"), adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han},。!?:「」『』…]")] Classes.CHAR(r"[\p{Han}]")]
adjacent_glue_pass = Pass(adjacent_chinese_pattern, adjacent_cjk_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_chinese) insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_cjk_pass)
# line breaking rule in CJK 避頭尾/禁則処理
adjacent_cjk_punc_pattern = [Classes.CHAR(r"[·,。!?:」』》】]〕』〗〉}]"),
Classes.CHAR(r"[^·,。!?:」』》】]〕』〗〉}]")]
adjacent_cjk_punc_pattern2 = [Classes.CHAR(r"[^「『《〔[【『〖〈{]"),
Classes.CHAR(r"[「『《〔[【『〖〈{]")]
adjacent_cjk_punc_pattern3 = [Classes.CHAR(r"[…—]"),
Classes.CHAR(r"[^…—]")]
adjacent_glue_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass) push!(processed_passes, adjacent_glue_pass)
adjacent_glue_pass2 = Pass(adjacent_cjk_punc_pattern2,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass2)
adjacent_glue_pass3 = Pass(adjacent_cjk_punc_pattern3,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass3)
end end

View file

@ -45,13 +45,19 @@ end
""" """
Horizonal Glue (HGlue) Horizonal Glue (HGlue)
- wd : width - wd : width
- str : stretch - stretch : stretch
""" """
mutable struct HGlue<:Box mutable struct HGlue<:Box
wd wd
str stretch
end end
"""
Par
- for paragraph marker.
"""
mutable struct Par<:Box
end
""" """
Vertical Box Vertical Box