From fa8a81e3c640980b8db8077f2d4b72a92554e579 Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Sun, 27 Aug 2023 01:46:01 +0800 Subject: [PATCH] 0.0.1 --- Makefile | 5 +++ docs/StringCodepointSplitter.html | 41 +++++++++++++++++++ docs/index.html | 24 ++++++++++++ docs/index_attributes.html | 18 +++++++++ docs/index_class_types.html | 18 +++++++++ docs/index_classes.html | 18 +++++++++ docs/index_exceptions.html | 18 +++++++++ docs/index_extensions.html | 18 +++++++++ docs/index_methods.html | 18 +++++++++ docs/index_module_types.html | 18 +++++++++ docs/index_modules.html | 21 ++++++++++ docs/index_types.html | 18 +++++++++ docs/index_values.html | 25 ++++++++++++ docs/style.css | 43 ++++++++++++++++++++ docs/type_StringCodepointSplitter.html | 11 ++++++ stringCodepointSplitter.ml | 54 ++++++++++++++++++++++++++ 16 files changed, 368 insertions(+) create mode 100644 Makefile create mode 100644 docs/StringCodepointSplitter.html create mode 100644 docs/index.html create mode 100644 docs/index_attributes.html create mode 100644 docs/index_class_types.html create mode 100644 docs/index_classes.html create mode 100644 docs/index_exceptions.html create mode 100644 docs/index_extensions.html create mode 100644 docs/index_methods.html create mode 100644 docs/index_module_types.html create mode 100644 docs/index_modules.html create mode 100644 docs/index_types.html create mode 100644 docs/index_values.html create mode 100644 docs/style.css create mode 100644 docs/type_StringCodepointSplitter.html create mode 100644 stringCodepointSplitter.ml diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f22a73b --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +doc : docs stringCodepointSplitter.ml + ocamlfind ocamldoc -package uutf -html -charset=utf-8 stringCodepointSplitter.ml -d docs + +docs : + mkdir docs \ No newline at end of file diff --git a/docs/StringCodepointSplitter.html b/docs/StringCodepointSplitter.html new file mode 100644 index 0000000..7454207 --- /dev/null +++ b/docs/StringCodepointSplitter.html @@ -0,0 +1,41 @@ + + + + + + + + + + +StringCodepointSplitter + + + +

Module StringCodepointSplitter

+ +
module StringCodepointSplitter: sig .. end

+

The Module needs Uutf Module.

+ +

It only contains split_string_by_unicode_codepoint, which splits an OCaml string str to a string list

+ +
val split_string_by_unicode_codepoint : string -> string list
+
+

Split an OCaml string str to a string list

+ +

Arguments

+
    +
  • str the string to be splitted. +
  • +
+ +

Example

+ +
let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in
+
+List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);;
+  
+(*it will output : "m, ̄, 知,  , w, h, o,  , y, o, u, ', r, e, ., "*)
+
+ diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..987b93e --- /dev/null +++ b/docs/index.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + +
+ +
+ + + +
StringCodepointSplitter
+ + diff --git a/docs/index_attributes.html b/docs/index_attributes.html new file mode 100644 index 0000000..eff9c8e --- /dev/null +++ b/docs/index_attributes.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of class attributes + + + +

Index of class attributes

+ +
+ + diff --git a/docs/index_class_types.html b/docs/index_class_types.html new file mode 100644 index 0000000..69cfea9 --- /dev/null +++ b/docs/index_class_types.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of class types + + + +

Index of class types

+ +
+ + diff --git a/docs/index_classes.html b/docs/index_classes.html new file mode 100644 index 0000000..33bf4b6 --- /dev/null +++ b/docs/index_classes.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of classes + + + +

Index of classes

+ +
+ + diff --git a/docs/index_exceptions.html b/docs/index_exceptions.html new file mode 100644 index 0000000..cf9e16b --- /dev/null +++ b/docs/index_exceptions.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of exceptions + + + +

Index of exceptions

+ +
+ + diff --git a/docs/index_extensions.html b/docs/index_extensions.html new file mode 100644 index 0000000..23ae9db --- /dev/null +++ b/docs/index_extensions.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of extensions + + + +

Index of extensions

+ +
+ + diff --git a/docs/index_methods.html b/docs/index_methods.html new file mode 100644 index 0000000..ddfede4 --- /dev/null +++ b/docs/index_methods.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of class methods + + + +

Index of class methods

+ +
+ + diff --git a/docs/index_module_types.html b/docs/index_module_types.html new file mode 100644 index 0000000..7a44842 --- /dev/null +++ b/docs/index_module_types.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of module types + + + +

Index of module types

+ +
+ + diff --git a/docs/index_modules.html b/docs/index_modules.html new file mode 100644 index 0000000..f33fc5e --- /dev/null +++ b/docs/index_modules.html @@ -0,0 +1,21 @@ + + + + + + + + +Index of modules + + + +

Index of modules

+ + + + +
S
StringCodepointSplitter
+ + diff --git a/docs/index_types.html b/docs/index_types.html new file mode 100644 index 0000000..4ba29c5 --- /dev/null +++ b/docs/index_types.html @@ -0,0 +1,18 @@ + + + + + + + + +Index of types + + + +

Index of types

+ +
+ + diff --git a/docs/index_values.html b/docs/index_values.html new file mode 100644 index 0000000..cbc589b --- /dev/null +++ b/docs/index_values.html @@ -0,0 +1,25 @@ + + + + + + + + +Index of values + + + +

Index of values

+ + + + +
S
split_string_by_unicode_codepoint [StringCodepointSplitter]
+

Split an OCaml string str to a string list

+ +
+
+ + diff --git a/docs/style.css b/docs/style.css new file mode 100644 index 0000000..259bd49 --- /dev/null +++ b/docs/style.css @@ -0,0 +1,43 @@ +.keyword { font-weight : bold ; color : Red } +.keywordsign { color : #C04600 } +.comment { color : Green } +.constructor { color : Blue } +.type { color : #5C6585 } +.string { color : Maroon } +.warning { color : Red ; font-weight : bold } +.info { margin-left : 3em; margin-right: 3em } +.param_info { margin-top: 4px; margin-left : 3em; margin-right : 3em } +.code { color : #465F91 ; } +.typetable { border-style : hidden } +.paramstable { border-style : hidden ; padding: 5pt 5pt} +tr { background-color : White } +td.typefieldcomment { background-color : #FFFFFF ; font-size: smaller ;} +div.sig_block {margin-left: 2em} +*:target { background: yellow; } +body {font: 13px sans-serif; color: black; text-align: left; padding: 5px; margin: 0} +h1 { font-size : 20pt ; text-align: center; } +h2 { font-size : 20pt ; text-align: center; } +h3 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ;padding: 2px; } +h4 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90DDFF ;padding: 2px; } +h5 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90EDFF ;padding: 2px; } +h6 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90FDFF ;padding: 2px; } +div.h7 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ; padding: 2px; } +div.h8 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #E0FFFF ; padding: 2px; } +div.h9 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #F0FFFF ; padding: 2px; } +div.h10 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #FFFFFF ; padding: 2px; } +a {color: #416DFF; text-decoration: none} +a:hover {background-color: #ddd; text-decoration: underline} +pre { margin-bottom: 4px; font-family: monospace; } +pre.verbatim, pre.codepre { } +.indextable {border: 1px #ddd solid; border-collapse: collapse} +.indextable td, .indextable th {border: 1px #ddd solid; min-width: 80px} +.indextable td.module {background-color: #eee ; padding-left: 2px; padding-right: 2px} +.indextable td.module a {color: #4E6272; text-decoration: none; display: block; width: 100%} +.indextable td.module a:hover {text-decoration: underline; background-color: transparent} +.deprecated {color: #888; font-style: italic} +.indextable tr td div.info { margin-left: 2px; margin-right: 2px } +ul.indexlist { margin-left: 0; padding-left: 0;} +ul.indexlist li { list-style-type: none ; margin-left: 0; padding-left: 0; } +ul.info-attributes {list-style: none; margin: 0; padding: 0; } +div.info > p:first-child { margin-top:0; } +div.info-desc > p:first-child { margin-top:0; margin-bottom:0; } \ No newline at end of file diff --git a/docs/type_StringCodepointSplitter.html b/docs/type_StringCodepointSplitter.html new file mode 100644 index 0000000..2fadd25 --- /dev/null +++ b/docs/type_StringCodepointSplitter.html @@ -0,0 +1,11 @@ + + + + + + + +StringCodepointSplitter + + +sig end diff --git a/stringCodepointSplitter.ml b/stringCodepointSplitter.ml new file mode 100644 index 0000000..4464364 --- /dev/null +++ b/stringCodepointSplitter.ml @@ -0,0 +1,54 @@ +(*#use "topfind";;*) +open Stdlib +open Uutf + +(** + The Module needs [Uutf] Module. + + It only contains [split_string_by_unicode_codepoint], which splits an OCaml string [str] to a [string list] +*) + + +(** Split an OCaml string [str] to a [string list] + +{b Arguments} +{ul + {- [str] the string to be splitted. + }} + + {b Example} + +{[let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in + +List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);; + +(*it will output : "m, ̄, 知, , w, h, o, , y, o, u, ', r, e, ., "*)]} + *) +let split_string_by_unicode_codepoint str = +(*Split a Ocaml string [str] to a `str list` *) + let pred_codepoint = ref (-1) in + let segmented_unit_list = ref [] in + let iterator x y z = + let _ = if !pred_codepoint > -1 then + let current_codepoint = y in + let pred_char_len = current_codepoint - !pred_codepoint in + let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in + let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in + unit_substring + else + "" in + let _ = pred_codepoint := y in x in + + let _ = Uutf.String.fold_utf_8 iterator str str in + let last_char_len = (Stdlib.String.length str) - !pred_codepoint in + if last_char_len > 0 then + let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in + let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in + !segmented_unit_list + else + !segmented_unit_list;; + + +List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint "m̄知 who you're.");; + +