This commit is contained in:
Tan, Kian-ting 2023-09-30 22:11:17 +08:00
parent fa20cc5af2
commit f9f56b4e9b
6 changed files with 203 additions and 1 deletions

1
uann

@ -1 +0,0 @@
Subproject commit 513173b22fa4776c54cae4bb6b78dcc8ec9acac9

BIN
uann.tar.gz Normal file

Binary file not shown.

16
uann/Cargo.lock generated Normal file
View file

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "parsing"
version = "0.1.0"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"

9
uann/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "parsing"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
unicode-segmentation = "1.6.0"

2
uann/README.md Normal file
View file

@ -0,0 +1,2 @@
# uann
A compiler practice or a parser's practice

176
uann/src/main.rs Normal file
View file

@ -0,0 +1,176 @@
/// (c) 2023 Tan Kian-ting <yoxem@kianting.info>
/// Under MIT License
/// 习包子 梁家河小学博士 清零宗 习炀帝 庆丰大帝
/// 独裁国贼 新疆集中营 光复香港时代革命 祈翠 南蒙古独立 香港独立
///
///
/// pairs of string for matching and parsing
///
/// - `matched` : the string being accumulatedly matched.
/// - `remained` : the string to be matched
#[derive(Debug, Clone, PartialEq)]
pub struct Matchee {
matched : String,
remained : String,
}
/// macro convert a str as a string, equivalent to `x.to_string()`.
///
/// # Example:
/// ```
/// let a = "abc";
/// assert_eq!(string!(a), a.to_string());
/// ```
///
macro_rules! string {
($name:expr) => {$name.to_string()}
}
/// then!(a, b [, c...]*) i similar to
/// a ==> b ((==> c)...)*
macro_rules! then{
($item: expr, $closure1: expr, $($closure2: expr),+)=>{
then!(then_do($item, &$closure1), $($closure2),+);
};
($item: expr, $closure1: expr)=>{
then_do($item, &$closure1);
};
}
/// convert a string to
/// a vector of char
///
/// * s : input `str`'s reference
///
/// # Example
///
/// ```
/// let s = "Lí 好!";
/// let char_vec: Vec<char> = str_to_char_vec(s);
/// assert_eq!(char_vec, vec!['L','í',' ','好',''])
/// ```
fn str_to_char_vec (s : &str) -> Vec<char>{
return s.chars().collect();
}
/// return a closure such that
/// if the 1st char of `Matchee.matched` matches `ch`,
/// then return the new `Some<Matchee>`. Otherwise, it returns `None`.
fn match_1_char(ch : char) -> Box<dyn Fn(Matchee) -> Option<Matchee>>{
return match_range(ch, ch)
}
/// return a closure such that
/// if the codepoint of the 1st char of `Matchee.matched` between
/// that of `lower_ch` (lower bound) and that of `upper_ch` (upper bound)
/// then return the new updated `Some<Matchee>`.
/// Otherwise, it returns `None`.
///
fn match_range(lower_ch : char, upper_ch: char) ->
Box<dyn Fn(Matchee) -> Option<Matchee>> {
Box::new(move | x : Matchee| -> Option<Matchee> {
let x_remained_str = x.remained.as_str();
let x_remained_char_vec = str_to_char_vec(x_remained_str);
if x_remained_char_vec.len() == 0{
return None;
}
if (x_remained_char_vec[0] as u32) >= (lower_ch as u32) &&
(x_remained_char_vec[0] as u32) <= (upper_ch as u32){
let remained_string = x_remained_char_vec[1..].iter()
.collect::<String>();
return Some(Matchee{
matched : x.matched + &x_remained_char_vec[0].to_string(),
remained : remained_string,
});
}else{
return None;
}
})
}
///
/// like the infix `==>` monad, i.e. `inputee ==> closure` in OCaml,
/// return a combinable closure.
/// - `inputee` : input string wrapped by Some() or None
/// - `closure` : the input to be processed
fn then_do(inputee : Option<Matchee>, closure : &dyn Fn(Matchee) -> Option<Matchee>)
-> Option<Matchee>{
return match inputee {
Some(inner) => closure(inner),
None => inputee,
}
}
/// return a closure for what is do 0+ times
/// similar to `( closure )*`
fn zero_plus_times_do(closure : &dyn Fn(Matchee) -> Option<Matchee>) ->
Box<dyn Fn(Matchee) -> Option<Matchee> + '_>{
return Box::new(
move |inputee|{
let mut old_inputee = inputee.clone();
let mut new_inputee = closure(old_inputee.clone());
while let Some(new_inner) = new_inputee
{
old_inputee = new_inner.clone();
new_inputee = closure(new_inner);
}
return Some(old_inputee.clone());
});
}
/// return a combined closure. if `closure1` is not passed, then
/// use `closure2`, i.e. : `(closure1 || closure2)`
fn or_do(
closure1 : Box<dyn Fn(Matchee) -> Option<Matchee>>,
closure2 : Box<dyn Fn(Matchee) -> Option<Matchee>>) ->
Box<dyn Fn(Matchee) -> Option<Matchee>>{
Box::new(
move |inputee|{
let inputee_after_c1 = closure1(inputee.clone());
match inputee_after_c1 {
None => closure2(inputee.clone()),
_ => inputee_after_c1,
}
}
)
}
fn main() {
let ex1 = Matchee{
matched : string!(""),
remained : string!("112")};
let d = match_range('0', '9');
println!("{:?}", then_do(then_do(then_do(Some(ex1.clone()), &d), &d), &d));
println!("{:?}", (ex1.clone()));
println!("{:?}", match_range('2', '9')(ex1.clone()));
println!("{:?}", match_1_char('0')(ex1.clone()));
println!("{:?}", match_1_char('1')(ex1.clone()));
let ex2 = Matchee{
matched : string!(""),
remained : string!("1234")};
println!("~~~{:?}",
then_do(then_do(Some(ex2.clone()), &or_do(match_1_char('1'),
match_1_char('0'))),&d));
println!("~~~{:?}",
then_do(Some(ex2.clone()),&zero_plus_times_do(&d)));
println!("~~~{:?}",
then!(Some(ex2.clone()),&zero_plus_times_do(&d)));
println!("~~~{:?}",
then!(Some(ex2.clone()), &d, &d, &d));
}