From a722e739575012ebad8cd0a7d2c12df478a54eb8 Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Tue, 5 Sep 2023 23:57:57 +0800 Subject: [PATCH] add a basic tokenzier function & an int tokenizer --- src/index.js | 143 ++++++++++++++++++++++++++++++++++++++++++++- src/index.ts | 154 ++++++++++++++++++++++++++++++++++++++++++++++++- tests/index.js | 37 ++++++++++++ tests/index.ts | 45 +++++++++++++++ 4 files changed, 375 insertions(+), 4 deletions(-) diff --git a/src/index.js b/src/index.js index 712af45..a1ef267 100644 --- a/src/index.js +++ b/src/index.js @@ -1,7 +1,15 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.thenDo = exports.charToCodepoint = exports.matchRange = exports.match1Char = void 0; +exports.tokenize = exports.zeroOrOnceDo = exports.notDo = exports.zeroOrMoreDo = exports.orDo = exports.thenDo = exports.charToCodepoint = exports.matchRange = exports.matchAny = exports.match1Char = void 0; var fs = require('fs'); +/** + * wrap a x in a `Some(T)` + * @param x : variable to be wrapped. + * @returns wrapped `x`. + */ +function toSome(x) { + return { _tag: "Some", value: x }; +} /** * @description * it returns a function which test if the first char of the `remained` part of @@ -12,6 +20,9 @@ var fs = require('fs'); */ function match1Char(c) { return (m) => { + if (m.remained.length == 0) { + return { _tag: "None" }; + } const charToBeMatched = m.remained[0]; if (charToBeMatched === c) { return { _tag: "Some", value: { @@ -26,6 +37,24 @@ function match1Char(c) { } exports.match1Char = match1Char; ; +/** + * + * @param m : the `MatcheePair` to be consumed. + * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`, + * otherwise, returns `None`. + */ +function matchAny(m) { + if (m.remained.length >= 1) { + return { _tag: "Some", value: { + matched: m.matched + m.remained[0], + remained: m.remained.substring(1) + } }; + } + else { + return { _tag: "None" }; + } +} +exports.matchAny = matchAny; /** * @description * it returns a function which test if the first char of the `remained` part of @@ -42,6 +71,9 @@ function matchRange(l, u) { throw new Error("Error: the codepoint of `" + l + "` is not smaller than `" + u + "`)"); } return (m) => { + if (m.remained.length < 1) { + return { _tag: "None" }; + } const charToBeMatched = m.remained[0]; const codePointToBeMatched = charToCodepoint(charToBeMatched); if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint) { @@ -75,6 +107,10 @@ exports.charToCodepoint = charToCodepoint; /** * @description thendo(input, f, ...) like * a ==> f + * @param input: the wrapped input. + * @param f: the function to be applied. + * + * @returns:the applied wrapped result `MatcheePair`. */ function thenDo(input, f) { if (input._tag == "None") { @@ -86,3 +122,108 @@ function thenDo(input, f) { } } exports.thenDo = thenDo; +/** + * @description "or", like the regex `( f1 | f2 )` . + * It returns a function `f` of which the argument is`x`. + * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise, + * `F` returns `f1(x)`. + * @param f1 : 1st function to be compared + * @param f2 : 2nd function to be compared + * @returns:the combined function + */ +function orDo(f1, f2) { + return (x) => { + let f1x = (f1(x)); + { + if (f1x._tag == "None") { + return f2(x); + } + else { + return f1x; + } + } + }; +} +exports.orDo = orDo; +/** +* @description repeating matching function `f` +* zero or more times, like the asterisk `*` in regex `f*` . +* @param f : the function to be repeated 0+ times. +* @returns:the combined function +*/ +function zeroOrMoreDo(f) { + return (x) => { + var wrapped_old_x = { _tag: "Some", value: x }; + var wrapped_new_x = wrapped_old_x; + while (wrapped_new_x._tag != "None") { + wrapped_old_x = wrapped_new_x; + wrapped_new_x = thenDo(wrapped_old_x, f); + } + ; + return wrapped_old_x; + }; +} +exports.zeroOrMoreDo = zeroOrMoreDo; +/** +* @description Not. like the `^` inside regex of [^f]. +* returns a function `F(x)` such that if `f(x)` is `None`, +* returns the x consuming a char; if `f(x)` is not None, F(x) +* returns `None`. +* @param f: the function forbidden to be matched. +* @returns: combined function `F`. +*/ +function notDo(f) { + return (x) => { + let wrapped_x = { + _tag: "Some", + value: x + }; + let f_x = thenDo(wrapped_x, f); + if (f_x._tag != "None") { + return { _tag: "None" }; + } + else { + return thenDo(wrapped_x, matchAny); + } + }; +} +exports.notDo = notDo; +/** + * if `x` is matched by `f` once, returns `f(x)`. Otherwise, + * returns x + * similar to `?` in regex `f?`. + * @param f : the function to be matched + * @returns return wrapped f(x) + */ +function zeroOrOnceDo(f) { + return (x) => { + var wrapped_old_x = { _tag: "Some", value: x }; + var wrapped_new_x = thenDo(wrapped_old_x, f); + if (wrapped_new_x._tag != "None") { + return wrapped_new_x; + } + else { + return wrapped_old_x; + } + }; +} +exports.zeroOrOnceDo = zeroOrOnceDo; +function tokenize(input) { + var input_matchee_pair = toSome({ matched: "", + remained: input }); + // integer = ([+]|[-])\d\d? + let integer = (x) => { + let wrapped_x = toSome(x); + let plusMinus = orDo(match1Char('+'), match1Char('-')); // ([+]|[-]) + let d = matchRange('0', '9'); // \d + return thenDo(thenDo(thenDo(wrapped_x, zeroOrOnceDo(plusMinus)), d), zeroOrMoreDo(d)); + }; + console.log(input + ", result: "); + console.log(thenDo(input_matchee_pair, integer)); +} +exports.tokenize = tokenize; +tokenize("+123"); +tokenize("123"); +tokenize("-123"); +tokenize(" 123"); +tokenize("c123"); diff --git a/src/index.ts b/src/index.ts index 26cd10d..bcd3e10 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,8 +1,17 @@ var fs = require('fs'); -type Some = { _tag: "Some"; value: T }; -type None = {_tag: "None"}; +export type Some = { _tag: "Some"; value: T }; +export type None = {_tag: "None"}; + +/** + * wrap a x in a `Some(T)` + * @param x : variable to be wrapped. + * @returns wrapped `x`. + */ +function toSome(x: T): Some{ + return { _tag: "Some", value: x}; +} /** * @description Like the `Some(a)` and `None` in Rust. * @@ -33,6 +42,9 @@ export type MatcheePair = {matched : string; remained : string}; */ export function match1Char(c : string) : (m: MatcheePair) => Maybe { return (m : MatcheePair)=>{ + if (m.remained.length == 0){ + return { _tag: "None" }; + } const charToBeMatched = m.remained[0]; if (charToBeMatched === c){ return {_tag: "Some", value :{ @@ -45,6 +57,22 @@ export function match1Char(c : string) : (m: MatcheePair) => Maybe } }; +/** + * + * @param m : the `MatcheePair` to be consumed. + * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`, + * otherwise, returns `None`. + */ +export function matchAny(m : MatcheePair) : Maybe{ + if (m.remained.length >= 1){ + return {_tag: "Some", value :{ + matched : m.matched + m.remained[0], + remained : m.remained.substring(1)}}; + }else{ + return {_tag: "None"}; + } +} + /** * @description * it returns a function which test if the first char of the `remained` part of @@ -61,7 +89,9 @@ export function matchRange(l : string, u : string) : (m: MatcheePair) => Maybe{ - + if (m.remained.length < 1){ + return {_tag : "None"}; + } const charToBeMatched = m.remained[0]; const codePointToBeMatched = charToCodepoint(charToBeMatched); if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint){ @@ -92,6 +122,10 @@ export function charToCodepoint(s : string): number{ /** * @description thendo(input, f, ...) like * a ==> f + * @param input: the wrapped input. + * @param f: the function to be applied. + * + * @returns:the applied wrapped result `MatcheePair`. */ export function thenDo(input : Maybe, f : Function) : Maybe{ if (input._tag == "None"){ @@ -102,3 +136,117 @@ export function thenDo(input : Maybe, f : Function) : Maybe{ return f(inner); } } + +/** + * @description "or", like the regex `( f1 | f2 )` . + * It returns a function `f` of which the argument is`x`. + * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise, + * `F` returns `f1(x)`. + * @param f1 : 1st function to be compared + * @param f2 : 2nd function to be compared + * @returns:the combined function + */ +export function orDo(f1 : Function, f2: Function) : (x : T ) => Maybe{ + return (x) => { + let f1x : Maybe = (f1(x)); + { + if (f1x._tag == "None"){ + return f2(x); + } + else{ + return f1x; + } + } + }; +} + + +/** +* @description repeating matching function `f` +* zero or more times, like the asterisk `*` in regex `f*` . +* @param f : the function to be repeated 0+ times. +* @returns:the combined function +*/ +export function zeroOrMoreDo(f : Function): (x : T) => Maybe{ + return (x)=>{ + var wrapped_old_x : Maybe = {_tag: "Some", value : x}; + var wrapped_new_x : Maybe = wrapped_old_x; + + while (wrapped_new_x._tag != "None"){ + wrapped_old_x = wrapped_new_x; + wrapped_new_x = thenDo(wrapped_old_x, f); + }; + + return wrapped_old_x; + }; +} + +/** +* @description Not. like the `^` inside regex of [^f]. +* returns a function `F(x)` such that if `f(x)` is `None`, +* returns the x consuming a char; if `f(x)` is not None, F(x) +* returns `None`. +* @param f: the function forbidden to be matched. +* @returns: combined function `F`. +*/ +export function notDo(f : Function): (x : T) => Maybe{ + return (x)=>{ + let wrapped_x : Maybe = { + _tag : "Some", + value : x + }; + let f_x = thenDo(wrapped_x, f); + + if (f_x._tag != "None"){ + return {_tag:"None"}; + }else{ + return thenDo(wrapped_x, matchAny); + } + }; +} + +/** + * if `x` is matched by `f` once, returns `f(x)`. Otherwise, + * returns x + * similar to `?` in regex `f?`. + * @param f : the function to be matched + * @returns return wrapped f(x) + */ +export function zeroOrOnceDo(f : Function): (x : T) => Maybe{ + return (x)=>{ + var wrapped_old_x : Maybe = {_tag: "Some", value : x}; + var wrapped_new_x = thenDo(wrapped_old_x, f); + + if (wrapped_new_x._tag != "None"){ + return wrapped_new_x; + }else{ + return wrapped_old_x; + } + }; +} + + +export function tokenize(input : string){ + var input_matchee_pair : Maybe = toSome( + {matched:"", + remained: input}); + + // integer = ([+]|[-])\d\d? + let integer = (x : MatcheePair) => + { let wrapped_x = toSome(x); + let plusMinus = orDo(match1Char('+'), match1Char('-')); // ([+]|[-]) + let d = matchRange('0','9'); // \d + return thenDo(thenDo(thenDo(wrapped_x, + zeroOrOnceDo(plusMinus)),d), + zeroOrMoreDo(d)); + } + console.log(input+", result: "); + console.log(thenDo(input_matchee_pair, integer)); + +} + +tokenize("+123"); +tokenize("123"); +tokenize("-123"); +tokenize(" 123"); +tokenize("c123"); \ No newline at end of file diff --git a/tests/index.js b/tests/index.js index 933dfcc..891511c 100644 --- a/tests/index.js +++ b/tests/index.js @@ -1,4 +1,6 @@ "use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const src_1 = require("../src"); let assert = require("assert"); let cloMain = require("../src"); let a = cloMain.match1Char("我"); @@ -9,6 +11,9 @@ assert(example1.value.remained == "的"); let example2 = a({ matched: "", remained: "妳的" }); assert(example2._tag == "None"); let thenDo = cloMain.thenDo; +let orDo = cloMain.orDo; +let zeroOrMoreDo = cloMain.zeroOrMoreDo; +let notDo = cloMain.notDo; // composed part x let compPart1 = cloMain.match1Char("我"); let compPart2 = cloMain.match1Char("的"); @@ -20,6 +25,38 @@ assert(doTestRes1.value.remained == "貓"); let doThenTestee2 = { _tag: "Some", value: { matched: "", remained: "我們" } }; let doTestRes2 = thenDo(thenDo(doThenTestee2, compPart1), compPart2); assert(doTestRes2._tag == "None"); +let doThenTestee3 = { _tag: "Some", value: { matched: "", remained: "我的貓" } }; +let doTestRes3 = thenDo(thenDo(doThenTestee3, orDo(compPart1, compPart2)), compPart2); +assert(doTestRes3._tag == "Some"); +assert(doTestRes3.value.matched == "我的"); +assert(doTestRes3.value.remained == "貓"); +let doThenTestee4 = { _tag: "Some", value: { matched: "", remained: "的的貓" } }; +let doTestRes4 = thenDo(thenDo(doThenTestee4, orDo(compPart1, compPart2)), compPart2); +assert(doTestRes4._tag == "Some"); +assert(doTestRes4.value.matched == "的的"); +assert(doTestRes4.value.remained == "貓"); +let doThenTestee5 = { _tag: "Some", value: { matched: "", remained: "的貓" } }; +let doTestRes5 = thenDo(thenDo(doThenTestee5, zeroOrMoreDo(compPart1)), compPart2); +assert(doTestRes5._tag == "Some"); +assert(doTestRes5.value.matched == "的"); +assert(doTestRes5.value.remained == "貓"); +let doThenTestee6 = { _tag: "Some", value: { matched: "", remained: "我我我的貓" } }; +let doTestRes6 = thenDo(thenDo(doThenTestee6, zeroOrMoreDo(compPart1)), compPart2); +assert(doTestRes6._tag == "Some"); +assert(doTestRes6.value.matched == "我我我的"); +assert(doTestRes6.value.remained == "貓"); +let doThenTestee7 = { _tag: "Some", value: { matched: "", remained: "我的" } }; +let doTestRes7 = thenDo(thenDo(doThenTestee7, notDo(compPart1)), compPart2); +assert(doTestRes7._tag == "None"); +let doThenTestee8 = { _tag: "Some", value: { matched: "", remained: "妳的" } }; +let doTestRes8 = thenDo(thenDo(doThenTestee8, notDo(compPart1)), compPart2); +assert(doTestRes8._tag == "Some"); +assert(doTestRes8.value.matched == "妳的"); +let doThenTestee9 = { _tag: "Some", value: { matched: "", remained: "妳的" } }; +let doTestRes9 = thenDo(doThenTestee9, src_1.matchAny); +assert(doTestRes9._tag == "Some"); +assert(doTestRes9.value.matched == "妳"); +assert(doTestRes9.value.remained == "的"); // harfbuzz test let harfbuzz = require("../src/harfbuzz.js"); harfbuzz.harfbuzzTest("123.abc"); diff --git a/tests/index.ts b/tests/index.ts index a7cefa5..653f3cf 100644 --- a/tests/index.ts +++ b/tests/index.ts @@ -1,3 +1,5 @@ +import { matchAny } from "../src"; + let assert = require("assert"); let cloMain = require("../src"); @@ -14,6 +16,10 @@ assert(example2._tag == "None"); let thenDo = cloMain.thenDo; +let orDo = cloMain.orDo; +let zeroOrMoreDo = cloMain.zeroOrMoreDo; +let notDo = cloMain.notDo; + // composed part x let compPart1 = cloMain.match1Char("我"); let compPart2 = cloMain.match1Char("的"); @@ -29,6 +35,45 @@ let doThenTestee2 = {_tag : "Some",value : {matched: "", remained: "我們"}}; let doTestRes2 = thenDo(thenDo(doThenTestee2, compPart1), compPart2); assert(doTestRes2._tag == "None"); +let doThenTestee3 = {_tag : "Some",value : {matched: "", remained: "我的貓"}}; +let doTestRes3 = thenDo(thenDo(doThenTestee3, orDo(compPart1, compPart2)), compPart2); +assert(doTestRes3._tag == "Some"); +assert(doTestRes3.value.matched == "我的"); +assert(doTestRes3.value.remained == "貓"); + +let doThenTestee4 = {_tag : "Some",value : {matched: "", remained: "的的貓"}}; +let doTestRes4 = thenDo(thenDo(doThenTestee4, orDo(compPart1, compPart2)), compPart2); +assert(doTestRes4._tag == "Some"); +assert(doTestRes4.value.matched == "的的"); +assert(doTestRes4.value.remained == "貓"); + +let doThenTestee5 = {_tag : "Some",value : {matched: "", remained: "的貓"}}; +let doTestRes5 = thenDo(thenDo(doThenTestee5, zeroOrMoreDo(compPart1)), compPart2); +assert(doTestRes5._tag == "Some"); +assert(doTestRes5.value.matched == "的"); +assert(doTestRes5.value.remained == "貓"); + +let doThenTestee6 = {_tag : "Some",value : {matched: "", remained: "我我我的貓"}}; +let doTestRes6 = thenDo(thenDo(doThenTestee6, zeroOrMoreDo(compPart1)), compPart2); +assert(doTestRes6._tag == "Some"); +assert(doTestRes6.value.matched == "我我我的"); +assert(doTestRes6.value.remained == "貓"); + +let doThenTestee7 = {_tag : "Some",value : {matched: "", remained: "我的"}}; +let doTestRes7 = thenDo(thenDo(doThenTestee7, notDo(compPart1)), compPart2); +assert(doTestRes7._tag == "None"); + +let doThenTestee8 = {_tag : "Some",value : {matched: "", remained: "妳的"}}; +let doTestRes8 = thenDo(thenDo(doThenTestee8, notDo(compPart1)), compPart2); +assert(doTestRes8._tag == "Some"); +assert(doTestRes8.value.matched == "妳的"); + +let doThenTestee9 = {_tag : "Some",value : {matched: "", remained: "妳的"}}; +let doTestRes9 = thenDo(doThenTestee9, matchAny); +assert(doTestRes9._tag == "Some"); +assert(doTestRes9.value.matched == "妳"); +assert(doTestRes9.value.remained == "的"); + // harfbuzz test let harfbuzz = require("../src/harfbuzz.js");