diff --git a/uann b/uann deleted file mode 160000 index 513173b..0000000 --- a/uann +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 513173b22fa4776c54cae4bb6b78dcc8ec9acac9 diff --git a/uann.tar.gz b/uann.tar.gz new file mode 100644 index 0000000..a08eee2 Binary files /dev/null and b/uann.tar.gz differ diff --git a/uann/Cargo.lock b/uann/Cargo.lock new file mode 100644 index 0000000..82c757d --- /dev/null +++ b/uann/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "parsing" +version = "0.1.0" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" diff --git a/uann/Cargo.toml b/uann/Cargo.toml new file mode 100644 index 0000000..90b18a6 --- /dev/null +++ b/uann/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "parsing" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +unicode-segmentation = "1.6.0" \ No newline at end of file diff --git a/uann/README.md b/uann/README.md new file mode 100644 index 0000000..5ca237b --- /dev/null +++ b/uann/README.md @@ -0,0 +1,2 @@ +# uann +A compiler practice or a parser's practice diff --git a/uann/src/main.rs b/uann/src/main.rs new file mode 100644 index 0000000..8afeb37 --- /dev/null +++ b/uann/src/main.rs @@ -0,0 +1,176 @@ +/// (c) 2023 Tan Kian-ting +/// Under MIT License +/// 习包子 梁家河小学博士 清零宗 习炀帝 庆丰大帝 +/// 独裁国贼 新疆集中营 光复香港时代革命 祈翠 南蒙古独立 香港独立 +/// + +/// +/// pairs of string for matching and parsing +/// +/// - `matched` : the string being accumulatedly matched. +/// - `remained` : the string to be matched +#[derive(Debug, Clone, PartialEq)] +pub struct Matchee { + matched : String, + remained : String, +} + +/// macro convert a str as a string, equivalent to `x.to_string()`. +/// +/// # Example: +/// ``` +/// let a = "abc"; +/// assert_eq!(string!(a), a.to_string()); +/// ``` +/// +macro_rules! string { + ($name:expr) => {$name.to_string()} +} + + +/// then!(a, b [, c...]*) i similar to +/// a ==> b ((==> c)...)* +macro_rules! then{ + + ($item: expr, $closure1: expr, $($closure2: expr),+)=>{ + then!(then_do($item, &$closure1), $($closure2),+); + }; + ($item: expr, $closure1: expr)=>{ + then_do($item, &$closure1); + }; +} + + +/// convert a string to +/// a vector of char +/// +/// * s : input `str`'s reference +/// +/// # Example +/// +/// ``` +/// let s = "Lí 好!"; +/// let char_vec: Vec = str_to_char_vec(s); +/// assert_eq!(char_vec, vec!['L','í',' ','好','!']) +/// ``` +fn str_to_char_vec (s : &str) -> Vec{ + return s.chars().collect(); +} + +/// return a closure such that +/// if the 1st char of `Matchee.matched` matches `ch`, +/// then return the new `Some`. Otherwise, it returns `None`. +fn match_1_char(ch : char) -> Box Option>{ + return match_range(ch, ch) +} + +/// return a closure such that +/// if the codepoint of the 1st char of `Matchee.matched` between +/// that of `lower_ch` (lower bound) and that of `upper_ch` (upper bound) +/// then return the new updated `Some`. +/// Otherwise, it returns `None`. +/// +fn match_range(lower_ch : char, upper_ch: char) -> + Box Option> { + Box::new(move | x : Matchee| -> Option { + let x_remained_str = x.remained.as_str(); + let x_remained_char_vec = str_to_char_vec(x_remained_str); + + if x_remained_char_vec.len() == 0{ + return None; + } + + if (x_remained_char_vec[0] as u32) >= (lower_ch as u32) && + (x_remained_char_vec[0] as u32) <= (upper_ch as u32){ + let remained_string = x_remained_char_vec[1..].iter() + .collect::(); + return Some(Matchee{ + matched : x.matched + &x_remained_char_vec[0].to_string(), + remained : remained_string, + }); + }else{ + return None; + } + }) +} + +/// +/// like the infix `==>` monad, i.e. `inputee ==> closure` in OCaml, +/// return a combinable closure. +/// - `inputee` : input string wrapped by Some() or None +/// - `closure` : the input to be processed +fn then_do(inputee : Option, closure : &dyn Fn(Matchee) -> Option) + -> Option{ + return match inputee { + Some(inner) => closure(inner), + None => inputee, + } + } +/// return a closure for what is do 0+ times +/// similar to `( closure )*` +fn zero_plus_times_do(closure : &dyn Fn(Matchee) -> Option) -> +Box Option + '_>{ + return Box::new( + move |inputee|{ + let mut old_inputee = inputee.clone(); + let mut new_inputee = closure(old_inputee.clone()); + while let Some(new_inner) = new_inputee + { + old_inputee = new_inner.clone(); + new_inputee = closure(new_inner); + } + return Some(old_inputee.clone()); + }); +} + +/// return a combined closure. if `closure1` is not passed, then +/// use `closure2`, i.e. : `(closure1 || closure2)` +fn or_do( + closure1 : Box Option>, + closure2 : Box Option>) -> + Box Option>{ + Box::new( + move |inputee|{ + let inputee_after_c1 = closure1(inputee.clone()); + match inputee_after_c1 { + None => closure2(inputee.clone()), + _ => inputee_after_c1, + } + } + ) + } + + +fn main() { + let ex1 = Matchee{ + matched : string!(""), + remained : string!("112")}; + + let d = match_range('0', '9'); + println!("{:?}", then_do(then_do(then_do(Some(ex1.clone()), &d), &d), &d)); + + println!("{:?}", (ex1.clone())); + println!("{:?}", match_range('2', '9')(ex1.clone())); + println!("{:?}", match_1_char('0')(ex1.clone())); + println!("{:?}", match_1_char('1')(ex1.clone())); + + let ex2 = Matchee{ + matched : string!(""), + remained : string!("1234")}; + println!("~~~{:?}", + then_do(then_do(Some(ex2.clone()), &or_do(match_1_char('1'), + match_1_char('0'))),&d)); + + println!("~~~{:?}", + then_do(Some(ex2.clone()),&zero_plus_times_do(&d))); + + println!("~~~{:?}", + then!(Some(ex2.clone()),&zero_plus_times_do(&d))); + + println!("~~~{:?}", + then!(Some(ex2.clone()), &d, &d, &d)); + + +} + +