0.0.1
This commit is contained in:
parent
1564b60c64
commit
fa8a81e3c6
16 changed files with 368 additions and 0 deletions
5
Makefile
Normal file
5
Makefile
Normal file
|
@ -0,0 +1,5 @@
|
|||
doc : docs stringCodepointSplitter.ml
|
||||
ocamlfind ocamldoc -package uutf -html -charset=utf-8 stringCodepointSplitter.ml -d docs
|
||||
|
||||
docs :
|
||||
mkdir docs
|
41
docs/StringCodepointSplitter.html
Normal file
41
docs/StringCodepointSplitter.html
Normal file
|
@ -0,0 +1,41 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link rel="Up" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>StringCodepointSplitter</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Module <a href="type_StringCodepointSplitter.html">StringCodepointSplitter</a></h1>
|
||||
|
||||
<pre><span id="MODULEStringCodepointSplitter"><span class="keyword">module</span> StringCodepointSplitter</span>: <code class="code">sig</code> <a href="StringCodepointSplitter.html">..</a> <code class="code">end</code></pre><hr width="100%">
|
||||
<p>The Module needs <code class="code">Uutf</code> Module.</p>
|
||||
|
||||
<p>It only contains <code class="code">split_string_by_unicode_codepoint</code>, which splits an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
|
||||
|
||||
<pre><span id="VALsplit_string_by_unicode_codepoint"><span class="keyword">val</span> split_string_by_unicode_codepoint</span> : <code class="type">string -> string list</code></pre><div class="info ">
|
||||
<div class="info-desc">
|
||||
<p>Split an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
|
||||
|
||||
<p><b>Arguments</b></p>
|
||||
<ul>
|
||||
<li><code class="code">str</code> the string to be splitted.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p><b>Example</b></p>
|
||||
|
||||
<pre class="codepre"><code class="code">let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in
|
||||
|
||||
List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);;
|
||||
|
||||
(*it will output : "m, ̄, 知, , w, h, o, , y, o, u, ', r, e, ., "*)</code></pre></div>
|
||||
</div>
|
||||
</body></html>
|
24
docs/index.html
Normal file
24
docs/index.html
Normal file
|
@ -0,0 +1,24 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title></title>
|
||||
</head>
|
||||
<body>
|
||||
<div class = "index-list">
|
||||
<ul class="indexlist">
|
||||
<li><a href="index_values.html">Index of values</a></li>
|
||||
<li><a href="index_modules.html">Index of modules</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<table class="indextable module-list">
|
||||
<tr><td class="module"><a href="StringCodepointSplitter.html">StringCodepointSplitter</a></td><td></td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_attributes.html
Normal file
18
docs/index_attributes.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class attributes</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of class attributes</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_class_types.html
Normal file
18
docs/index_class_types.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class types</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of class types</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_classes.html
Normal file
18
docs/index_classes.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of classes</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of classes</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_exceptions.html
Normal file
18
docs/index_exceptions.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of exceptions</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of exceptions</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_extensions.html
Normal file
18
docs/index_extensions.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of extensions</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of extensions</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_methods.html
Normal file
18
docs/index_methods.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of class methods</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of class methods</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_module_types.html
Normal file
18
docs/index_module_types.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of module types</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of module types</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
21
docs/index_modules.html
Normal file
21
docs/index_modules.html
Normal file
|
@ -0,0 +1,21 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of modules</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of modules</h1>
|
||||
<table>
|
||||
<tr><td align="left"><div>S</div></td></tr>
|
||||
<tr><td><a href="StringCodepointSplitter.html">StringCodepointSplitter</a> </td>
|
||||
<td></td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
18
docs/index_types.html
Normal file
18
docs/index_types.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of types</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of types</h1>
|
||||
<table>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
25
docs/index_values.html
Normal file
25
docs/index_values.html
Normal file
|
@ -0,0 +1,25 @@
|
|||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>Index of values</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="navbar"> <a class="up" href="index.html" title="Index">Up</a>
|
||||
</div>
|
||||
<h1>Index of values</h1>
|
||||
<table>
|
||||
<tr><td align="left"><div>S</div></td></tr>
|
||||
<tr><td><a href="StringCodepointSplitter.html#VALsplit_string_by_unicode_codepoint">split_string_by_unicode_codepoint</a> [<a href="StringCodepointSplitter.html">StringCodepointSplitter</a>]</td>
|
||||
<td><div class="info">
|
||||
<p>Split an OCaml string <code class="code">str</code> to a <code class="code">string list</code></p>
|
||||
|
||||
</div>
|
||||
</td></tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
43
docs/style.css
Normal file
43
docs/style.css
Normal file
|
@ -0,0 +1,43 @@
|
|||
.keyword { font-weight : bold ; color : Red }
|
||||
.keywordsign { color : #C04600 }
|
||||
.comment { color : Green }
|
||||
.constructor { color : Blue }
|
||||
.type { color : #5C6585 }
|
||||
.string { color : Maroon }
|
||||
.warning { color : Red ; font-weight : bold }
|
||||
.info { margin-left : 3em; margin-right: 3em }
|
||||
.param_info { margin-top: 4px; margin-left : 3em; margin-right : 3em }
|
||||
.code { color : #465F91 ; }
|
||||
.typetable { border-style : hidden }
|
||||
.paramstable { border-style : hidden ; padding: 5pt 5pt}
|
||||
tr { background-color : White }
|
||||
td.typefieldcomment { background-color : #FFFFFF ; font-size: smaller ;}
|
||||
div.sig_block {margin-left: 2em}
|
||||
*:target { background: yellow; }
|
||||
body {font: 13px sans-serif; color: black; text-align: left; padding: 5px; margin: 0}
|
||||
h1 { font-size : 20pt ; text-align: center; }
|
||||
h2 { font-size : 20pt ; text-align: center; }
|
||||
h3 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ;padding: 2px; }
|
||||
h4 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90DDFF ;padding: 2px; }
|
||||
h5 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90EDFF ;padding: 2px; }
|
||||
h6 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90FDFF ;padding: 2px; }
|
||||
div.h7 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #90BDFF ; padding: 2px; }
|
||||
div.h8 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #E0FFFF ; padding: 2px; }
|
||||
div.h9 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #F0FFFF ; padding: 2px; }
|
||||
div.h10 { font-size : 20pt ; border: 1px solid #000000; margin-top: 5px; margin-bottom: 2px;text-align: center; background-color: #FFFFFF ; padding: 2px; }
|
||||
a {color: #416DFF; text-decoration: none}
|
||||
a:hover {background-color: #ddd; text-decoration: underline}
|
||||
pre { margin-bottom: 4px; font-family: monospace; }
|
||||
pre.verbatim, pre.codepre { }
|
||||
.indextable {border: 1px #ddd solid; border-collapse: collapse}
|
||||
.indextable td, .indextable th {border: 1px #ddd solid; min-width: 80px}
|
||||
.indextable td.module {background-color: #eee ; padding-left: 2px; padding-right: 2px}
|
||||
.indextable td.module a {color: #4E6272; text-decoration: none; display: block; width: 100%}
|
||||
.indextable td.module a:hover {text-decoration: underline; background-color: transparent}
|
||||
.deprecated {color: #888; font-style: italic}
|
||||
.indextable tr td div.info { margin-left: 2px; margin-right: 2px }
|
||||
ul.indexlist { margin-left: 0; padding-left: 0;}
|
||||
ul.indexlist li { list-style-type: none ; margin-left: 0; padding-left: 0; }
|
||||
ul.info-attributes {list-style: none; margin: 0; padding: 0; }
|
||||
div.info > p:first-child { margin-top:0; }
|
||||
div.info-desc > p:first-child { margin-top:0; margin-bottom:0; }
|
11
docs/type_StringCodepointSplitter.html
Normal file
11
docs/type_StringCodepointSplitter.html
Normal file
|
@ -0,0 +1,11 @@
|
|||
<html><head>
|
||||
<link rel="stylesheet" href="style.css" type="text/css">
|
||||
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="Start" href="index.html">
|
||||
<link title="Index of values" rel=Appendix href="index_values.html">
|
||||
<link title="Index of modules" rel=Appendix href="index_modules.html">
|
||||
<link title="StringCodepointSplitter" rel="Chapter" href="StringCodepointSplitter.html"><title>StringCodepointSplitter</title>
|
||||
</head>
|
||||
<body>
|
||||
<code class="code"><span class="keyword">sig</span> <span class="keyword">end</span></code></body></html>
|
54
stringCodepointSplitter.ml
Normal file
54
stringCodepointSplitter.ml
Normal file
|
@ -0,0 +1,54 @@
|
|||
(*#use "topfind";;*)
|
||||
open Stdlib
|
||||
open Uutf
|
||||
|
||||
(**
|
||||
The Module needs [Uutf] Module.
|
||||
|
||||
It only contains [split_string_by_unicode_codepoint], which splits an OCaml string [str] to a [string list]
|
||||
*)
|
||||
|
||||
|
||||
(** Split an OCaml string [str] to a [string list]
|
||||
|
||||
{b Arguments}
|
||||
{ul
|
||||
{- [str] the string to be splitted.
|
||||
}}
|
||||
|
||||
{b Example}
|
||||
|
||||
{[let example= split_string_by_unicode_codepoint "m̄知 who you're." (*don't know who you are*) in
|
||||
|
||||
List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint example);;
|
||||
|
||||
(*it will output : "m, ̄, 知, , w, h, o, , y, o, u, ', r, e, ., "*)]}
|
||||
*)
|
||||
let split_string_by_unicode_codepoint str =
|
||||
(*Split a Ocaml string [str] to a `str list` *)
|
||||
let pred_codepoint = ref (-1) in
|
||||
let segmented_unit_list = ref [] in
|
||||
let iterator x y z =
|
||||
let _ = if !pred_codepoint > -1 then
|
||||
let current_codepoint = y in
|
||||
let pred_char_len = current_codepoint - !pred_codepoint in
|
||||
let unit_substring = Stdlib.String.sub x !pred_codepoint pred_char_len in
|
||||
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
|
||||
unit_substring
|
||||
else
|
||||
"" in
|
||||
let _ = pred_codepoint := y in x in
|
||||
|
||||
let _ = Uutf.String.fold_utf_8 iterator str str in
|
||||
let last_char_len = (Stdlib.String.length str) - !pred_codepoint in
|
||||
if last_char_len > 0 then
|
||||
let unit_substring = Stdlib.String.sub str !pred_codepoint last_char_len in
|
||||
let _ = segmented_unit_list := !segmented_unit_list @ [unit_substring] in
|
||||
!segmented_unit_list
|
||||
else
|
||||
!segmented_unit_list;;
|
||||
|
||||
|
||||
List.map (fun x -> print_string (x ^ ", ")) (split_string_by_unicode_codepoint "m̄知 who you're.");;
|
||||
|
||||
|
Loading…
Reference in a new issue