uahgi2/expander.rkt
Tan Kian-ting a7080a47b3
Some checks failed
CI / Build on Racket 'stable' (BC) (push) Has been cancelled
CI / Build on Racket 'stable' (CS) (push) Has been cancelled
CI / Build on Racket 'current' (BC) (push) Has been cancelled
CI / Build on Racket 'current' (CS) (push) Has been cancelled
add cjk-splitter pass
2025-10-23 05:52:34 +08:00

116 lines
No EOL
4.4 KiB
Racket
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(module uahgi2 racket/base
(require racket/dict)
(require racket/list)
(define main-config (make-hash
'(["paper-width". 2480]
["paper-height". 3508]
["lang" . #f]
["font-family" . "FreeSerif"]
["font-family-CJK" . "Noto Sans CJK TC"]
["font-size" . 14]
["font-weight" . "regular"]
["font-style" . "normal"]
)))
(define main-frame (make-hash
`(["id" . 1]
["font-family" . ,(dict-ref main-config "font-family")]
["class" . "frame"]
["width" . 2000]
["height" . 3000]
["x" . 200]
["y" . 250]
["content" . #f])))
(define (set-main-config attr val)
(dict-set! main-config attr val))
(define-syntax text-series
(syntax-rules ()
[(_ x ...)
; delete the beginning newline
(if (eq? (caar `(x ...)) "\n")
`('text ,(cdar `(x ...)))
`('text ,(caar `(x ...)) ,(cdar `(x ...))))]))
(define plugin-list '())
(define [two-newlines-into-par-aux cont]
(display (list-ref cont 1))
(define (sexp-processing res remain)
[cond
[(eq? remain '()) (reverse res)]
[(and (equal? (car remain) "\n") (equal? (car res) "\n")) (sexp-processing (cons '(par) (cdr res)) [cdr remain])]
[else (sexp-processing(cons (car remain) res) [cdr remain])]])
(list-set cont 1 (sexp-processing '() (list-ref cont 1))))
(define two-newlines-into-par (make-hash `(["lang" . "all"] ; for all languages
["body" . ,two-newlines-into-par-aux])))
(define [is-cjk-matched? char]
(define cjk-range #rx"[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\U00020000-\U0002EBEF\u3000-\u303F,。!.?()—:;]")
(regexp-match? cjk-range char)
)
(define [remove-newline-between-cjk-aux cont]
(display (list-ref cont 1))
(define (sexp-processing res remain)
[cond
[(eq? remain '()) (reverse res)]
[(and(equal? (car remain) "\n") (is-cjk-matched? (car res)) (= (length remain) 1) (sexp-processing(cons (car remain) res) [cdr remain]))]
[(and(equal? (car remain) "\n") (is-cjk-matched? (car res)) (is-cjk-matched? (cadr remain)))
(sexp-processing (cons (cadr remain) res) [cddr remain])]
[else (sexp-processing(cons (car remain) res) [cdr remain])]])
(list-set cont 1 (sexp-processing '() (list-ref cont 1))))
(define remove-newline-between-cjk (make-hash `(["lang" . "zh"]
["body" . ,remove-newline-between-cjk-aux])))
(define [splitter-cjk-aux cont]
(display (list-ref cont 1))
(define [splitter-cjk-unit x]
(define tmp (regexp-match* #rx"[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\U00020000-\U0002EBEF\u3000-\u303F·」』》】』〗〉「『《【『〖〈…—]" x #:gap-select? #t))
(filter (lambda (x) [not (equal? x "")]) tmp))
(define [splitter-cjk-proc res rem]
(cond
[(eq? rem '()) (reverse res)]
[(string? [car rem]) [splitter-cjk-proc [append (reverse (splitter-cjk-unit [car rem])) res] (cdr rem)]]
[else [splitter-cjk-proc [cons (car rem) res] (cdr rem)]]
))
(list-set cont 1 (splitter-cjk-proc '() (list-ref cont 1))))
(define splitter-cjk (make-hash `(["lang" . "zh"]
["body" . ,splitter-cjk-aux])))
(define-syntax append-plugin!
(syntax-rules ()
[(_ ls plugin) (set! ls (reverse (cons plugin (reverse ls))))]))
[append-plugin! plugin-list two-newlines-into-par]
[append-plugin! plugin-list remove-newline-between-cjk]
[append-plugin! plugin-list splitter-cjk]
(define (main-text txt)
(begin
(dict-set! main-frame "content" txt)
(display plugin-list)
(for/list ([p plugin-list])
[if [or (memq (dict-ref p "lang") (dict-ref main-config "lang"))
(eq? (dict-ref p "lang") "all")]
[let ([tmp ((dict-ref p "body") (dict-ref main-frame "content"))])
(dict-set! main-frame "content" tmp)] #f])))
(provide text-series main-text main-frame main-config set-main-config set!
(all-from-out racket/base) (all-from-out racket/dict)
#%module-begin #%app #%datum #%expression #%top)
)