diff --git a/lib/stringCodepointSplitter.ml b/lib/stringCodepointSplitter.ml index 3c8b223..03451fc 100644 --- a/lib/stringCodepointSplitter.ml +++ b/lib/stringCodepointSplitter.ml @@ -33,23 +33,22 @@ let split_string_by_unicode_codepoint str = (*Split a Ocaml string [str] to a `str list` *) let pred_codepoint = ref (-1) in let segmented_unit_list = ref [] in - let iterator x y _ = - let _ = if !pred_codepoint > -1 then + let iterator () y _ = + let () = if !pred_codepoint > -1 then let current_codepoint = y in let pred_char_len = current_codepoint - !pred_codepoint in - let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in - let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in - unit_substring - else - "" in - let _ = pred_codepoint := y in x in + let unit_substring = Stdlib.String.sub str !pred_codepoint pred_char_len in + segmented_unit_list := unit_substring :: !segmented_unit_list + in + let () = pred_codepoint := y in + () + in - let _ = Uutf.String.fold_utf_8 iterator str str in + let _ = Uutf.String.fold_utf_8 iterator () str in let last_char_len = (Stdlib.String.length str) - !pred_codepoint in - if last_char_len > 0 then - let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in - let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in - !segmented_unit_list - else - !segmented_unit_list;; - + let () = + if last_char_len > 0 then + let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in + segmented_unit_list := unit_substring :: !segmented_unit_list + in + List.rev !segmented_unit_list;; diff --git a/test/stringCodepointSplitter.t b/test/stringCodepointSplitter.t index f50da59..5d17abf 100644 --- a/test/stringCodepointSplitter.t +++ b/test/stringCodepointSplitter.t @@ -9,3 +9,21 @@ Still simple but not just ASCII « — » + +Example from the docstring of the lib + $ ./stringCodepointSplitterTest.exe "m̄知 who you're." + m + ̄ + 知 + + w + h + o + + y + o + u + ' + r + e + .