This commit is contained in:
Tan, Kian-ting 2023-08-27 01:46:01 +08:00
parent 1564b60c64
commit fa8a81e3c6
16 changed files with 368 additions and 0 deletions

5
Makefile Normal file
View file

@ -0,0 +1,5 @@
doc : docs stringCodepointSplitter.ml
ocamlfind ocamldoc -package uutf -html -charset=utf-8 stringCodepointSplitter.ml -d docs
docs :
mkdir docs

View file

@ -0,0 +1,41 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link rel="Up" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>StringCodepointSplitter</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Module <a href="type_StringCodepointSplitter.html">StringCodepointSplitter</a></h1>
<pre><span id="MODULEStringCodepointSplitter"><span class="keyword">module</span> StringCodepointSplitter</span>: <code class="code">sig</code> <a href="StringCodepointSplitter.html">..</a> <code class="code">end</code></pre><hr width="100%">
<p>The Module needs <code class="code">Uutf</code> Module.</p>
<p>It only contains <code class="code">split_string_by_unicode_codepoint</code>, which splits an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
<pre><span id="VALsplit_string_by_unicode_codepoint"><span class="keyword">val</span> split_string_by_unicode_codepoint</span> : <code class="type">string -&gt; string list</code></pre><div class="info ">
<div class="info-desc">
<p>Split an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
<p><b>Arguments</b></p>
<ul>
<li><code class="code">str</code> the string to be splitted.
</li>
</ul>
<p><b>Example</b></p>
<pre class="codepre"><code class="code">let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in
List.map (fun x -&gt; print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);;
(*it will output : "m, ̄, 知, , w, h, o, , y, o, u, ', r, e, ., "*)</code></pre></div>
</div>
</body></html>

24
docs/index.html Normal file
View file

@ -0,0 +1,24 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title></title>
</head>
<body>
<div class = "index-list">
<ul class="indexlist">
<li><a href="index_values.html">Index of values</a></li>
<li><a href="index_modules.html">Index of modules</a></li>
</ul>
</div>
<table class="indextable module-list">
<tr><td class="module"><a href="StringCodepointSplitter.html">StringCodepointSplitter</a></td><td></td></tr>
</table>
</body>
</html>

View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class attributes</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of class attributes</h1>
<table>
</table>
</body>
</html>

View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class types</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of class types</h1>
<table>
</table>
</body>
</html>

18
docs/index_classes.html Normal file
View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of classes</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of classes</h1>
<table>
</table>
</body>
</html>

View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of exceptions</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of exceptions</h1>
<table>
</table>
</body>
</html>

View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of extensions</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of extensions</h1>
<table>
</table>
</body>
</html>

18
docs/index_methods.html Normal file
View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class methods</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of class methods</h1>
<table>
</table>
</body>
</html>

View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of module types</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of module types</h1>
<table>
</table>
</body>
</html>

21
docs/index_modules.html Normal file
View file

@ -0,0 +1,21 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of modules</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of modules</h1>
<table>
<tr><td align="left"><div>S</div></td></tr>
<tr><td><a href="StringCodepointSplitter.html">StringCodepointSplitter</a> </td>
<td></td></tr>
</table>
</body>
</html>

18
docs/index_types.html Normal file
View file

@ -0,0 +1,18 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of types</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of types</h1>
<table>
</table>
</body>
</html>

25
docs/index_values.html Normal file
View file

@ -0,0 +1,25 @@
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of values</title>
</head>
<body>
<div class="navbar">&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;</div>
<h1>Index of values</h1>
<table>
<tr><td align="left"><div>S</div></td></tr>
<tr><td><a href="StringCodepointSplitter.html#VALsplit_string_by_unicode_codepoint">split_string_by_unicode_codepoint</a> [<a href="StringCodepointSplitter.html">StringCodepointSplitter</a>]</td>
<td><div class="info">
<p>Split an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
</div>
</td></tr>
</table>
</body>
</html>

43
docs/style.css Normal file
View file

@ -0,0 +1,43 @@
.keyword { font-weight : bold ; color : Red }
.keywordsign { color : #C04600 }
.comment { color : Green }
.constructor { color : Blue }
.type { color : #5C6585 }
.string { color : Maroon }
.warning { color : Red ; font-weight : bold }
.info { margin-left : 3em; margin-right: 3em }
.param_info { margin-top: 4px; margin-left : 3em; margin-right : 3em }
.code { color : #465F91 ; }
.typetable { border-style : hidden }
.paramstable { border-style : hidden ; padding: 5pt 5pt}
tr { background-color : White }
td.typefieldcomment { background-color : #FFFFFF ; font-size: smaller ;}
div.sig_block {margin-left: 2em}
*:target { background: yellow; }
body {font: 13px sans-serif; color: black; text-align: left; padding: 5px; margin: 0}
h1 { font-size : 20pt ; text-align: center; }
h2 { font-size : 20pt ; text-align: center; }
h3 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ;padding: 2px; }
h4 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90DDFF ;padding: 2px; }
h5 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90EDFF ;padding: 2px; }
h6 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90FDFF ;padding: 2px; }
div.h7 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ; padding: 2px; }
div.h8 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #E0FFFF ; padding: 2px; }
div.h9 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #F0FFFF ; padding: 2px; }
div.h10 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #FFFFFF ; padding: 2px; }
a {color: #416DFF; text-decoration: none}
a:hover {background-color: #ddd; text-decoration: underline}
pre { margin-bottom: 4px; font-family: monospace; }
pre.verbatim, pre.codepre { }
.indextable {border: 1px #ddd solid; border-collapse: collapse}
.indextable td, .indextable th {border: 1px #ddd solid; min-width: 80px}
.indextable td.module {background-color: #eee ; padding-left: 2px; padding-right: 2px}
.indextable td.module a {color: #4E6272; text-decoration: none; display: block; width: 100%}
.indextable td.module a:hover {text-decoration: underline; background-color: transparent}
.deprecated {color: #888; font-style: italic}
.indextable tr td div.info { margin-left: 2px; margin-right: 2px }
ul.indexlist { margin-left: 0; padding-left: 0;}
ul.indexlist li { list-style-type: none ; margin-left: 0; padding-left: 0; }
ul.info-attributes {list-style: none; margin: 0; padding: 0; }
div.info > p:first-child { margin-top:0; }
div.info-desc > p:first-child { margin-top:0; margin-bottom:0; }

View file

@ -0,0 +1,11 @@
<html><head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="Start" href="index.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>StringCodepointSplitter</title>
</head>
<body>
<code class="code"><span class="keyword">sig</span>&nbsp;<span class="keyword">end</span></code></body></html>

View file

@ -0,0 +1,54 @@
(*#use "topfind";;*)
open Stdlib
open Uutf
(**
The Module needs [Uutf] Module.
It only contains [split_string_by_unicode_codepoint], which splits an OCaml string [str] to a [string list]
*)
(** Split an OCaml string [str] to a [string list]
{b Arguments}
{ul
{- [str] the string to be splitted.
}}
{b Example}
{[let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in
List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);;
(*it will output : "m, ̄, 知, , w, h, o, , y, o, u, ', r, e, ., "*)]}
*)
let split_string_by_unicode_codepoint str =
(*Split a Ocaml string [str] to a `str list` *)
let pred_codepoint = ref (-1) in
let segmented_unit_list = ref [] in
let iterator x y z =
let _ = if !pred_codepoint > -1 then
let current_codepoint = y in
let pred_char_len = current_codepoint - !pred_codepoint in
let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
unit_substring
else
"" in
let _ = pred_codepoint := y in x in
let _ = Uutf.String.fold_utf_8 iterator str str in
let last_char_len = (Stdlib.String.length str) - !pred_codepoint in
if last_char_len > 0 then
let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
!segmented_unit_list
else
!segmented_unit_list;;
List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint "m̄知 who you're.");;