Optimise segmentation
Avoid costly `@` and prefer one-time reversal.
This commit is contained in:
parent
67acf25cd7
commit
30bb11b988
1 changed files with 15 additions and 16 deletions
|
@ -33,23 +33,22 @@ let split_string_by_unicode_codepoint str =
|
||||||
(*Split a Ocaml string [str] to a `str list` *)
|
(*Split a Ocaml string [str] to a `str list` *)
|
||||||
let pred_codepoint = ref (-1) in
|
let pred_codepoint = ref (-1) in
|
||||||
let segmented_unit_list = ref [] in
|
let segmented_unit_list = ref [] in
|
||||||
let iterator x y _ =
|
let iterator () y _ =
|
||||||
let _ = if !pred_codepoint > -1 then
|
let () = if !pred_codepoint > -1 then
|
||||||
let current_codepoint = y in
|
let current_codepoint = y in
|
||||||
let pred_char_len = current_codepoint - !pred_codepoint in
|
let pred_char_len = current_codepoint - !pred_codepoint in
|
||||||
let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in
|
let unit_substring = Stdlib.String.sub str !pred_codepoint pred_char_len in
|
||||||
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
|
segmented_unit_list := unit_substring :: !segmented_unit_list
|
||||||
unit_substring
|
in
|
||||||
else
|
let () = pred_codepoint := y in
|
||||||
"" in
|
()
|
||||||
let _ = pred_codepoint := y in x in
|
in
|
||||||
|
|
||||||
let _ = Uutf.String.fold_utf_8 iterator str str in
|
let _ = Uutf.String.fold_utf_8 iterator () str in
|
||||||
let last_char_len = (Stdlib.String.length str) - !pred_codepoint in
|
let last_char_len = (Stdlib.String.length str) - !pred_codepoint in
|
||||||
if last_char_len > 0 then
|
let () =
|
||||||
let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
|
if last_char_len > 0 then
|
||||||
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
|
let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
|
||||||
!segmented_unit_list
|
segmented_unit_list := unit_substring :: !segmented_unit_list
|
||||||
else
|
in
|
||||||
!segmented_unit_list;;
|
List.rev !segmented_unit_list;;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue