add some function incl. cjk+latn passes
Some checks failed
CI / Julia 1.6 - ubuntu-latest - x64 (push) Has been cancelled
CI / Julia 1.7 - ubuntu-latest - x64 (push) Has been cancelled
CI / Julia pre - ubuntu-latest - x64 (push) Has been cancelled

This commit is contained in:
Tan, Kian-ting 2025-02-04 00:52:37 +08:00
parent b2712101bd
commit f216906b41
7 changed files with 2436 additions and 461 deletions

File diff suppressed because one or more lines are too long

View file

@ -1,8 +1,12 @@
{@lang|en}%
{@def|@lang|en}%
%{@def|@font|{@quote|FreeSans|AR PL UKai TW}}%
%{@def|@fontsize|12}%
%{@def|@linewidth|200} %in px
%{@def|@spacing|{@hglue|{@ex|1}|2}}%
%{@def|@cjk_spacing|{@hglue|{@ex|0}|0.001}}%
%{@def|@linewidth|400} %in px
%{@def|@hori_align|left} % left| right|center
%{@def|@spacing|{@hglue|{@ex|0.4}|0.2}}%
%{@def|@cjk_lat_spacing|{@hglue|{@ex|0.2}|0.2}}%
%{@def|@cjk_spacing|{@hglue|{@ex|0}|0.2}}%
%the processing of typesetting technology is so sophisticated that many people are afraid of
the implementation of typesetting language. However, it is good to give TEXBook a try.測試一段漢字,究竟漢字會不會斷行呢
the implementation of typesetting language.{@set|@fontsize|14} However, it is good to give TEXBook a try. 測試一段漢字,究竟會不會斷行呢?「『《臺灣連翹》是吳濁流的著作,涉及臺灣戰後歷史與政治。』」{@set|@font|{@quote|jf open 粉圓 2.0|AR PL UMing TW}}
願主耶穌ê恩典kap眾聖徒saⁿ-kap tī-leh。À-bēng。啟示錄bóe-á-cha̍t原起頭上帝創造天kap地是創世記頭chi̍t cha̍t。

View file

@ -54,7 +54,13 @@ function total_cost(items, n, linewidth, last_of_queue=false)
mininal_cost = +Inf
prev_breakpoint = nothing
for j in 1:1:(n-1)
tmp_cost = total_cost(items, j, linewidth, false) + cost(items, j+1, n, linewidth, last_of_queue)
total_c = total_cost(items, j, linewidth, false)
cost_tail = cost(items, j+1, n, linewidth, last_of_queue)
if cost_tail < +Inf && last_of_queue == true
tmp_cost = total_c
else
tmp_cost = total_c + cost_tail
end
if tmp_cost < mininal_cost
mininal_cost = tmp_cost
prev_breakpoint = j
@ -98,7 +104,11 @@ function arrange(vbox, env)
for (x, i) in enumerate(1:1:length(eles))
item = eles[x]
if !(x in breakpoint_list_reversed)
if typeof(item) == u.Disc
if typeof(item) == u.Par
horizonal_align = env["hori_align"]
result_vbox_inner[end].eles = add_aligner(result_vbox_inner[end].eles, horizonal_align)
push!(result_vbox_inner, u.HBox([],nothing,nothing,nothing,nothing,nothing))
elseif typeof(item) == u.Disc
if item.orig != []
push!(result_vbox_inner[end].eles, item.orig)
end
@ -107,10 +117,15 @@ function arrange(vbox, env)
end
# x is the last one
elseif i == length(eles)
if typeof(item) !== u.Par
push!(result_vbox_inner[end].eles, item)
end
# x in breakpoint_list_reversed
else
if typeof(item) == u.Disc
if typeof(item) == u.Par
push!(result_vbox_inner, u.HBox([],nothing,nothing,nothing,nothing,nothing))
elseif typeof(item) == u.Disc
if item.before != []
push!(result_vbox_inner[end].eles, item.before)
end
@ -127,6 +142,17 @@ function arrange(vbox, env)
return result_vbox_inner
end
function add_aligner(hbox_eles, horizonal_align)
aligner_glue = u.HGlue(0.0,10000.0)
if horizonal_align == "right"
hbox_eles = reverse(push!(reverse(hbox_eles), aligner_glue))
elseif horizonal_align == "middle"
return hbox_eles
else # left/default
hbox_eles = push!(hbox_eles, aligner_glue)
end
return hbox_eles
end
function position(box_inner, env#=to be used later=#)
pages = [] #a subarray is the content of a page
@ -135,19 +161,57 @@ function position(box_inner, env#=to be used later=#)
posX = orig_x #cursor x
posY = orig_y #cursor y
baselineskip = 30 # it can be derived from env
for hbox in box_inner
pages = position_chbox(hbox.eles, pages, posX, posY)
linewidth = env["linewidth"]
horizonal_align = env["hori_align"]
for (idx, hbox) in enumerate(box_inner)
hbox_eles = hbox.eles
aligner_glue = u.HGlue(0.0,10000.0)
if idx == length(box_inner)
hbox_eles = add_aligner(hbox_eles, horizonal_align)
end
residual_space = parse(Float64, linewidth) - width_sum_of_eles(hbox.eles)
sum_of_stretch = reduce((x,y) -> x+y, map(hglue_stretch, hbox.eles))
space_stretch_ratio = residual_space / sum_of_stretch
pages = position_chbox(hbox_eles, pages, posX, posY, space_stretch_ratio)
posX = orig_x
posY -= baselineskip
end
return pages
end
function hglue_stretch(ele)
if typeof(ele) == u.HGlue
return ele.stretch # stretch
else
return 0
end
end
function width_sum_of_eles(eles)
width_sum = x -> begin
if hasproperty(x, :wd)
return x.wd
else
return 0.0
end
end
if length(eles) == 0
return 0
elseif length(eles) == 1
return width_sum(eles[1])
else
return reduce((x,y)-> x+y, map(width_sum, eles))
end
end
"""positioning all the chboxes in a HBox"""
function position_chbox(hbox_eles, pages, posX, posY)
function position_chbox(hbox_eles, pages, posX, posY, space_stretch_ratio)
for i in hbox_eles
if typeof(i) == u.HGlue
deltaX = i.wd
deltaX = i.wd + (i.stretch * space_stretch_ratio)
posX += deltaX
else #ChBox
deltaX = i.wd

View file

@ -7,7 +7,7 @@ c = Main.uahgi.Parsing.Passes.Classes
function match_lang(item)
@match item begin
c.SEQ([c.ELE([c.ID("lang")]), c.ELE([c.CHAR(v1)])]) => begin
c.SEQ([c.ELE([c.ID("def")]),c.ELE([c.ID("lang")]), c.ELE([c.CHAR(v1)])]) => begin
return v1
end
_ => false

View file

@ -35,9 +35,14 @@ function interp(ast, env, res_box, put_char=true_)
@match ast begin
c.ID(id) =>
begin
print("ID____", id)
#print("ID____", id)
return interp(env[id], env, res_box, true_)
end
c.SEQ([c.ID("par")]) => begin
par_box = u.Par()
push!(res_box.eles[end].eles, par_box)
return interp("", env, res_box)
end
c.SEQ([c.ELE([c.ID("def")]),
c.ELE([c.ID(id)]),val]) =>
begin
@ -76,11 +81,7 @@ function interp(ast, env, res_box, put_char=true_)
return (ele, env, res_box)
end
c.SEQ([c.ELE([c.ID("par")])]) =>begin
push!(res_box.eles, u.HBox([],
nothing, nothing, nothing, nothing, nothing))
return (ast, env, res_box)
end
c.SEQ([c.ELE([c.ID("ex")]), val]) =>begin
x = 'x'
font_family = select_font(x, env)
@ -137,7 +138,6 @@ function interp(ast, env, res_box, put_char=true_)
end
end
c.SEQ([c.ELE([c.ID("disc")]),
c.ELE(before),
c.ELE(after),
@ -172,8 +172,10 @@ function interp(ast, env, res_box, put_char=true_)
end
# empty item
[] => return (ast, env, res_box)
"" => return (ast, env, res_box)
_ => begin
println("unknown token", ast)
println("unknown token: ", ast)
val_evaled = nothing
return (val_evaled, env, res_box)
end

View file

@ -24,16 +24,74 @@ two_nl_to_par_pass = Pass(two_nl_to_par_pattern,
two_nl_to_par_pass_func)
push!(processed_passes, two_nl_to_par_pass)
# hyphen between 2 chars to disc
function discretize_hyphen(two_nl)
inner = Classes.SEQ([Classes.ELE([Classes.ID("disc")]),
Classes.ELE([Classes.CHAR("-")]),
Classes.ELE([]),
Classes.ELE([Classes.CHAR("-")])])
return [two_nl[1]; inner; two_nl[3]]
end
hyphen_pattern = [Classes.CHAR(r"[^\-]"),
Classes.CHAR(r"[-]"), Classes.CHAR(r"[^\-]")]
hyphen_disc_pass = Pass(hyphen_pattern,
discretize_hyphen)
push!(processed_passes, hyphen_disc_pass)
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han}]")]
# in latin+cjk and latin+cjk add glue.
function insert_hglue_in_adjacent_cjk_lat(two_nl)
inner = Classes.ID("cjk_lat_spacing")
return [two_nl[1]; inner; two_nl[2]]
end
adjacent_cjk_lat_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]")]
adjacent_cjk_lat_pattern2 = [Classes.CHAR(r"[^\p{Han}·,。!?:」』》】]〕』〗〉}…—「『《〔[【『〖〈{]"),
Classes.CHAR(r"[\p{Han}]")]
adjacent_cjk_lat_pass = Pass(adjacent_cjk_lat_pattern,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat_pass)
adjacent_cjk_lat2_pass = Pass(adjacent_cjk_lat_pattern2,
insert_hglue_in_adjacent_cjk_lat)
push!(processed_passes, adjacent_cjk_lat2_pass)
# in 2 hanzi add glue.
function insert_hglue_in_adjacent_chinese(two_nl)
function insert_hglue_in_adjacent_cjk(two_nl)
inner = Classes.ID("cjk_spacing")
return [two_nl[1]; inner; two_nl[2]]
end
adjacent_chinese_pattern = [Classes.CHAR(r"[\p{Han},。!?:「」『』…]"),
Classes.CHAR(r"[\p{Han},。!?:「」『』…]")]
adjacent_cjk_pattern = [Classes.CHAR(r"[\p{Han}]"),
Classes.CHAR(r"[\p{Han}]")]
adjacent_glue_pass = Pass(adjacent_chinese_pattern,
insert_hglue_in_adjacent_chinese)
adjacent_cjk_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_cjk_pass)
# line breaking rule in CJK 避頭尾/禁則処理
adjacent_cjk_punc_pattern = [Classes.CHAR(r"[·,。!?:」』》】]〕』〗〉}]"),
Classes.CHAR(r"[^·,。!?:」』》】]〕』〗〉}]")]
adjacent_cjk_punc_pattern2 = [Classes.CHAR(r"[^「『《〔[【『〖〈{]"),
Classes.CHAR(r"[「『《〔[【『〖〈{]")]
adjacent_cjk_punc_pattern3 = [Classes.CHAR(r"[…—]"),
Classes.CHAR(r"[^…—]")]
adjacent_glue_pass = Pass(adjacent_cjk_pattern,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass)
adjacent_glue_pass2 = Pass(adjacent_cjk_punc_pattern2,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass2)
adjacent_glue_pass3 = Pass(adjacent_cjk_punc_pattern3,
insert_hglue_in_adjacent_cjk)
push!(processed_passes, adjacent_glue_pass3)
end

View file

@ -44,14 +44,20 @@ end
"""
Horizonal Glue (HGlue)
- wd: width
- str : stretch
- wd : width
- stretch : stretch
"""
mutable struct HGlue<:Box
wd
str
stretch
end
"""
Par
- for paragraph marker.
"""
mutable struct Par<:Box
end
"""
Vertical Box