misc-codes/linebreaking/LineBreaking.md

284 lines
8.5 KiB
Markdown
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

```OCaml
(*有關於LaTeX使用於斷行的演算法
首先我們要制定一個字元(包含斷行後新生的連字號,以及空白)在斷行前、斷行後的寬度
*)
```
```OCaml
(*假設這段字元 word segment list = wordSegListSP表達半形空白HY表達連字號*)
let wordSegList = ["no"; "HY"; "thing"; "SP"; "can"; "SP"; "stop";"SP"; "the"; "SP"; "cro"; "HY"; "co";"HY" ;"dile"; "cross"; "SP"; "it."] ;;
```
val wordSegList : string list =
["no"; "HY"; "thing"; "SP"; "can"; "SP"; "stop"; "SP"; "the"; "SP"; "cro";
"HY"; "co"; "HY"; "dile"; "cross"; "SP"; "it."]
```OCaml
(*現在轉成每個word segment都附帶長度的格式*)
(* sg: 文段 segment
ow : original width 原來的寬度
hw : hyphenated width 該處指定為斷字後的寬度*)
type segment_with_length = { sg: string; ow: float; hw: float}
```
type segment_with_length = { sg : string; ow : float; hw : float; }
```OCaml
(* 每個segment之長度*)
let segOwList = List.map (fun x -> match x with
| "SP" -> 1.0 (*SP 通常寬度為1*)
| "HY" -> 0.0 (*HY 連字點寬度為0*)
| _ -> float_of_int (String.length x)) (*以chars的長度來當做文字寬度 假設是等寬半形字元*)
wordSegList
```
val segOwList : float list =
[2.; 0.; 5.; 1.; 3.; 1.; 4.; 1.; 3.; 1.; 3.; 0.; 2.; 0.; 4.; 5.; 1.; 3.]
```OCaml
(*每個segment在其被斷行時的長度 *)
let segHwList = List.map (fun x -> match x with
| "SP" -> 0.0 (*SP 通常斷行後寬度為0*)
| "HY" -> 1.0 (*HY 連字點斷行後為1*)
| _ -> infinity)(*不可能斷行的地方寬度設做0*)
wordSegList
```
val segHwList : float list =
[infinity; 1.; infinity; 0.; infinity; 0.; infinity; 0.; infinity; 0.;
infinity; 1.; infinity; 1.; infinity; infinity; 0.; infinity]
```OCaml
(*3個列表組合 zip 在一起*)
let segListCombined = List.combine (List.combine wordSegList segOwList) segHwList;;
(*然後變成type segment_with_length的列表*)
let segWithLengthList = List.map (fun i -> match i with
| ((sg,ow),hw) -> {sg = sg; ow = ow; hw = hw}) segListCombined
```
val segListCombined : ((string * float) * float) list =
[(("no", 2.), infinity); (("HY", 0.), 1.); (("thing", 5.), infinity);
(("SP", 1.), 0.); (("can", 3.), infinity); (("SP", 1.), 0.);
(("stop", 4.), infinity); (("SP", 1.), 0.); (("the", 3.), infinity);
(("SP", 1.), 0.); (("cro", 3.), infinity); (("HY", 0.), 1.);
(("co", 2.), infinity); (("HY", 0.), 1.); (("dile", 4.), infinity);
(("cross", 5.), infinity); (("SP", 1.), 0.); (("it.", 3.), infinity)]
val segWithLengthList : segment_with_length list =
[{sg = "no"; ow = 2.; hw = infinity}; {sg = "HY"; ow = 0.; hw = 1.};
{sg = "thing"; ow = 5.; hw = infinity}; {sg = "SP"; ow = 1.; hw = 0.};
{sg = "can"; ow = 3.; hw = infinity}; {sg = "SP"; ow = 1.; hw = 0.};
{sg = "stop"; ow = 4.; hw = infinity}; {sg = "SP"; ow = 1.; hw = 0.};
{sg = "the"; ow = 3.; hw = infinity}; {sg = "SP"; ow = 1.; hw = 0.};
{sg = "cro"; ow = 3.; hw = infinity}; {sg = "HY"; ow = 0.; hw = 1.};
{sg = "co"; ow = 2.; hw = infinity}; {sg = "HY"; ow = 0.; hw = 1.};
{sg = "dile"; ow = 4.; hw = infinity};
{sg = "cross"; ow = 5.; hw = infinity}; {sg = "SP"; ow = 1.; hw = 0.};
{sg = "it."; ow = 3.; hw = infinity}]
```OCaml
(*
我們可以定義在第 n 處斷行=>除了斷行點以外的文字消失,的成本函數 cost(n),成本函數越小越好。
這時後需要用動態規劃解決。
badness (k, n)是指k~n-1處若塞於一行且n處斷行時的懲罰函數等下介紹越小越好
cost(n) = baness(0,n) 若其為有限,否則 min of k in 0...n-1 of badness(k, n) + cost(k)
懲罰函數badness定義是若lineWidth >= widthBetween(a,b),則為二者之差的三次方,否則是無限大。
k >= n
badness(k, n) = (lineWidth - widthBetween(k, n) )^3 if lineWidth >= widthBetween(k+1, n)
infinity elsewhere
widthBetween(a,b)係指 a到b 塞在一行時的寬度
widthBetween(a,b) = hw[b] + (sum{i=a...b-1} of ow[i]
*)
open Printf
let lineWidth = 12.0;; (*一行最大寬度*)
let widthBetween a b = if a> b then raise (Failure "Exception: widthBetween a b, a <=b ")
else (List.nth segWithLengthList b).hw +. (sumOfOw a (b-1) segWithLengthList);;
let badness k n = let remainedSpaceWidth = lineWidth -. (widthBetween k n) in
if remainedSpaceWidth >= 0. then
remainedSpaceWidth ** 3.
else infinity;;
let minIndex = ref 0;; (*cost(x)發生的最小的k)值*)
(*動態規劃存放 (min cost, 其中的 k 滿足 min cost) 之處*)
(*格式: n (minValue, minIndex) *)
let costKStorage = Hashtbl.create 10;;
let rec cost n =
if Hashtbl.mem costKStorage n then (*若是已經存儲了,即用裡面的值,避免重複運算*)
let (minValue, minIndex) = Hashtbl.find costKStorage n in
minValue
else if (badness 0 n) < infinity then (badness 0 n)
else
let compareList = List.init n (fun k -> (badness k n) +. cost k) in
(*找最小值*)
let findMin lst = List.fold_left min infinity lst in
let minValue = findMin compareList in (*最小值*)
(*找最小值所在的索引index值*)
let findMinIndex lst = List.fold_left
(fun pos i -> if (List.nth lst i) == minValue then i else pos)
(-1)
(List.init (List.length lst) (fun x -> x)) in
let minIndex = findMinIndex compareList in
let _ = Hashtbl.add costKStorage n (minValue, minIndex) in
minValue;;
```
val lineWidth : float = 12.
val widthBetween : int -> int -> float = <fun>
val badness : int -> int -> float = <fun>
val minIndex : int ref = {contents = 0}
val costKStorage : ('_weak11, '_weak12) Hashtbl.t = <abstr>
val cost : int -> float = <fun>
```OCaml
(*sumOfOw : 上文的(sum{i=a...b} of ow[i]*)
(* sumOfOwAux輔助函數*)
let rec sumOfOwAux i start final sum list =
if i < start then sumOfOwAux (i+1) start final sum list
else if (i>= start && i <= final) then sumOfOwAux (i+1) start final (sum +. (List.nth list i).ow) list
else sum ;;
let sumOfOw start final list = sumOfOwAux 0 start final 0.0 list;;
```
val sumOfOwAux :
int -> int -> int -> float -> segment_with_length list -> float = <fun>
val sumOfOw : int -> int -> segment_with_length list -> float = <fun>
```OCaml
(*算到第11之處的成本*)
(*結果
no thing
can stop
crocodile
...........^
最多只能塞到箭頭處*)
cost 11;;
```
- : float = 179.
```OCaml
(*找 costKStorage 目前的值*)
let a = ref "" in
let _ = (Hashtbl.iter (fun x y -> let (y1,y2) = y in a := !a ^ (sprintf "%d : %f %d\n" x y1 y2)) costKStorage) in !a;;
```
- : string =
"6 : inf -1\n2 : inf -1\n8 : inf -1\n7 : 152.000000 3\n13 : 153.000000 7\n12 : inf -1\n4 : inf -1\n9 : 28.000000 5\n11 : 179.000000 7\n0 : inf -1\n10 : inf -1\n"
```OCaml
(*找出每個斷行點回溯的搜尋HashTable*)
let rec findBreakPointAux res k =
if Hashtbl.mem costKStorage k then
let (minValue, minIndex) = Hashtbl.find costKStorage k in
findBreakPointAux (List.append res [k]) minIndex
else (List.append res [k]);;
let findBreakPoint n = findBreakPointAux [] n;;
```
val findBreakPointAux : int list -> int -> int list = <fun>
val findBreakPoint : int -> int list = <fun>
```OCaml
findBreakPoint 13;;
```
findBreakPoint <-- 13
findBreakPointAux <-- []
findBreakPointAux --> <fun>
findBreakPointAux* <-- 13
Hashtbl.find <-- <abstr>
Hashtbl.find --> <fun>
Hashtbl.find* <-- <poly>
Hashtbl.find* --> <poly>
findBreakPointAux <-- [13]
findBreakPointAux --> <fun>
findBreakPointAux* <-- 7
Hashtbl.find <-- <abstr>
Hashtbl.find --> <fun>
Hashtbl.find* <-- <poly>
Hashtbl.find* --> <poly>
findBreakPointAux <-- [13; 7]
findBreakPointAux --> <fun>
findBreakPointAux* <-- 3
findBreakPointAux* --> [13; 7; 3]
findBreakPointAux* --> [13; 7; 3]
findBreakPointAux* --> [13; 7; 3]
findBreakPoint --> [13; 7; 3]
- : int list = [13; 7; 3]