Merge branch 'main' of github.com:Yoxem/stringCodepointSplitter

add changes
Update README.md
2023-08-31 19:13:32 +08:00 · 2023-08-31 19:12:14 +08:00 · 2023-08-31 07:19:18 +08:00 · 2023-08-30 00:02:54 +08:00 · 2023-08-29 23:10:21 +08:00 · 2023-08-29 22:32:58 +08:00
11 changed files with 68 additions and 30 deletions
--- a/4
+++ b/4
@ -1,2 +1,6 @@
+## v0.0.2 (2023-08-30)
+ - add more tests
+ - optimalize the list processing
+ - disable dynamic linking to uutf
 ## v0.0.1 (2023-08-27)
 - add `split_string_by_unicode_codepoint` initially
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-Copyright 2023 Tan Kian-ting
+Copyright 2023 Tan Kian-ting &  Raphaël Proust 

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@ Split a string to a list of strings of a character by the unicode codepoint.
 It requires module Uutf.

 ## Dependencies
- - OCaml >= 4.13
+ - OCaml >= 4.06
 - dune
 - uutf
 - fildlib
--- a/5
+++ b/5
@ -1,7 +1,8 @@
 (lang dune 3.9)
+(cram enable)

 (name stringCodepointSplitter)
-(version 0.0.1)
+(version 0.0.2)
 (generate_opam_files true)

 (source
@ -21,7 +22,7 @@
 (description "Split a string to a list of strings of a character by the unicode codepoint.
 It requires module Uutf.")

- (depends ocaml ocamlfind dune uutf)
+ (depends (ocaml (>= 4.06)) ocamlfind dune uutf)
 (tags (string utf8)))

 ; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project
--- a/lib/dune
+++ b/lib/dune
@ -1,4 +1,4 @@
 (library
 (name stringCodepointSplitter)
 (public_name stringCodepointSplitter)
- (libraries uutf findlib.dynload))
+ (libraries uutf))
--- a/lib/stringCodepointSplitter.ml
+++ b/lib/stringCodepointSplitter.ml
@ -1,6 +1,6 @@
 open Stdlib
 (*
-(c) Tan Kian-ting 2023
+(c) 2023 Tan Kian-ting (main author) & Raphaël Proust (PR giver)
 Under MIT License

 习包子 梁家河小学博士 清零宗 习炀帝 庆丰大帝
@ -13,9 +13,6 @@ Under MIT License
    It only contains [split_string_by_unicode_codepoint], which splits an OCaml string [str] to a [string list] 
 *)

-let _ = Findlib.init ();;
-Fl_dynload.load_packages ["uutf"];;
-

 (** Split an OCaml string [str] to a [string list] 

@ -36,23 +33,22 @@ let split_string_by_unicode_codepoint str =
 (*Split a Ocaml string [str] to a `str list` *)
  let pred_codepoint = ref (-1) in
  let segmented_unit_list = ref [] in
-  let iterator x y _ =
-    let _ = if  !pred_codepoint > -1 then
+  let iterator () y _ =
+    let () = if  !pred_codepoint > -1 then
      let current_codepoint = y in
      let pred_char_len = current_codepoint - !pred_codepoint in
-      let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in
-      let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
-      unit_substring
-    else
-      "" in
-    let _ =  pred_codepoint := y in x in
+      let unit_substring = Stdlib.String.sub str !pred_codepoint pred_char_len in
+      segmented_unit_list := unit_substring :: !segmented_unit_list
+    in
+    let () =  pred_codepoint := y in
+    ()
+  in

-  let _ = Uutf.String.fold_utf_8 iterator str str in
+  let _ = Uutf.String.fold_utf_8 iterator () str in
  let last_char_len = (Stdlib.String.length str) - !pred_codepoint in
-  if last_char_len > 0 then
-    let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
-    let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
-    !segmented_unit_list
-  else
-    !segmented_unit_list;;
-
+  let () =
+    if last_char_len > 0 then
+      let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
+      segmented_unit_list := unit_substring :: !segmented_unit_list
+  in
+  List.rev !segmented_unit_list;;
--- a/stringCodepointSplitter.opam
+++ b/stringCodepointSplitter.opam
@ -1,6 +1,6 @@
 # This file is generated by dune, edit dune-project instead
 opam-version: "2.0"
-version: "0.0.1"
+version: "0.0.2"
 synopsis:
  "Split a string to a list of strings of a character by the unicode codepoint"
 description: """
@ -13,7 +13,7 @@ tags: ["string" "utf8"]
 homepage: "https://github.com/yoxem/stringCodepointSplitter"
 bug-reports: "https://github.com/Yoxem/stringCodepointSplitter/issues"
 depends: [
-  "ocaml"
+  "ocaml" {>= "4.06"}
  "ocamlfind"
  "dune" {>= "3.9"}
  "uutf"
--- a/test/dune
+++ b/test/dune
@ -1,2 +1,6 @@
-(test
- (name stringCodepointSplitter))
+(executable
+ (libraries stringCodepointSplitter)
+ (name stringCodepointSplitterTest))
+
+(cram
+ (deps ./stringCodepointSplitterTest.exe))
--- a/test/stringCodepointSplitter.ml
+++ b/test/stringCodepointSplitter.ml
--- a/test/stringCodepointSplitter.t
+++ b/test/stringCodepointSplitter.t
@ -0,0 +1,29 @@
+ASCII only
+  $ ./stringCodepointSplitterTest.exe abc
+  a
+  b
+  c
+
+Still simple but not just ASCII
+  $ ./stringCodepointSplitterTest.exe «—»
+  «
+  —
+  »
+
+Example from the docstring of the lib
+  $ ./stringCodepointSplitterTest.exe "m̄知 who you're."
+  m
+  ̄
+  知
+   
+  w
+  h
+  o
+   
+  y
+  o
+  u
+  '
+  r
+  e
+  .
--- a/test/stringCodepointSplitterTest.ml
+++ b/test/stringCodepointSplitterTest.ml
@ -0,0 +1,4 @@
+let () =
+  Sys.argv.(1)
+  |> StringCodepointSplitter.split_string_by_unicode_codepoint
+  |> List.iter print_endline
Author	SHA1	Message	Date
Tan Kian-ting	c1611f3b55	Merge branch 'main' of github.com:Yoxem/stringCodepointSplitter	2023-08-31 19:13:32 +08:00
Tan Kian-ting	8b93467985	add changes	2023-08-31 19:12:14 +08:00
Tan, Kian-ting	4226b6bb40	Update README.md dep version correcting	2023-08-31 07:19:18 +08:00
Tan, Kian-ting	6a1f3000c5	Merge pull request #2 from raphael-proust/main Improve performance of splitting function	2023-08-30 00:02:54 +08:00
Tan, Kian-ting	11fbe53e74	Merge pull request #1 from raphael-proust/main Remove findlib dynlink	2023-08-29 23:10:21 +08:00
Tan, Kian-ting	aeee3f79cc	Update stringCodepointSplitter.ml	2023-08-29 22:32:58 +08:00
Tan, Kian-ting	6ca99f7399	Update stringCodepointSplitter.ml	2023-08-29 22:30:30 +08:00
Tan, Kian-ting	0bb465f663	Update LICENSE	2023-08-29 22:30:05 +08:00
Raphaël Proust	a9659a7b7e	more test	2023-08-29 15:45:33 +02:00
Raphaël Proust	30bb11b988	Optimise segmentation Avoid costly `@` and prefer one-time reversal.	2023-08-29 15:45:04 +02:00
Raphaël Proust	67acf25cd7	Replacing dynlink by just using dune to link at compile time	2023-08-29 15:38:36 +02:00
Raphaël Proust	1a4afd5d41	Adding tests to make sure things work	2023-08-29 15:38:23 +02:00