20230910 : add basic parser CONST rule, and add the grammar rule.

2023-09-10 23:45:01 +08:00 · 2023-09-10 23:45:01 +08:00 · 6f2e788329
commit 6f2e788329
parent cda463d890
6 changed files with 252 additions and 31 deletions
--- a/README.md
+++ b/README.md
@ -7,3 +7,4 @@ another personal draught of a typesetting language and engine.
   `matchAny`, `notDo`, `orDo`, `zeroOrMoreDo`, `zeroOrOnceDo`
 - 20230905-07:強化`tokenize`, 加強功能，加`Token`界面。
 - 20230907-08:強化`tokenize`。
 - 20230910 : add basic parser `CONST` rule, and add the grammar rule.
--- a/package.json
+++ b/package.json
@ -2,7 +2,7 @@
  "name": "clo",
  "version": "0.0.1",
  "description": "a little typesetting engine in TypeScript",
-  "main": "index.js",
+  "main": "src/index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
--- a/parser_rule.txt
+++ b/parser_rule.txt
@ -0,0 +1,66 @@
 let sqrtSum = (int x, int y) -> int {
 let x2 = x * x;
 let y2 = y * y;
 return x2+y2;
 }
 let pi = 3.14159;
 let _2p = (intToFLo 2) *. pi;
 let c = if (2 == 2) then 2 else 3;
 let aStr = "hello";
 let rec fac = (int n)-> int {
    if n == 0 then 1 else (fac (n - 1));};
 type student = Student {int id, string name};
 let alice = Student {id=20, name="Alice"};
 alice.name = "Siobhan";
 let customAnd = (@ 'a has age) ('a x, 'a y) => {'a > 'b};
 type list 'a = (Cons 'a (List 'a)) | Nil;
 import("alifbata.clo"); # 匯入檔案 alifbata #
 t of  import :== string -> Option string string
 Error("string") | Ok("import aaa")
 # 型別構造子統一大寫，型別小寫 #
 PROG ::= (STMT | COMMENT | STMT_COMMENT)*
 COMMENT ::= # COMMENT_INNER #
 COMMENT_INNER ::= [^#]+
 STMT ::= (TYPE_DEF | VAR_DEF | SET | EXPR  ) ";"
 TYPE_DEF ::= type ID "=" UNION
            | type ID TYPE_VARS+ "=" UNIOM
 TYPE_VARS = ' ID
 UNION ::= (REC "|" UNION) | REC
 REC ::= ID ( TYPES )
 TYPES ::= TYPE+
 TYPE ::= ID
 EXPR ::= if SUB_EXPR then IF_BRANCH else IF_BRANCH | SUB_EXPR
 IF_BRANCH ::= EXPR | { BLOCK }
 SUB_EXPR ::= COMPAREE| COMPAREE (LE|GE|LT|GT|EQ|NE) EXPR
 COMPAREE ::= FAC| (FAC(ADD | SUB) FAC)
 FAC ::= APPLY | (APPLIER (MUL | DIV) APPLY)
 APPLY ::= "(" ID APPLYEE* ")" | APPLYEE
 APPLYEE ::= REF | CONST | EXPR | FUNC
 CONST ::= INT | STR | FLO | BOOL
 BOOL ::= "true" | "false"
 FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK}
 BLOCK ::= PROG (return ID |noReturn) ; 
 ARGS ::= TYPE (TYPE_VARS | ID)
       | TYPE (TYPE_VARS | TYPE_VARS) , ARGS
 REF ::= VAR "." ID | VAR
 VAR ::= ID
 VAR_DEF  ::= "let" VAR "=" EXPR
 SET ::= VAR "=" EXPR
 FUNC_OPTION ::= ( @ TYPE_HAS (, TYPE_HAS)* )
 TYPE_HAS ::= TYPE_VAR "has" ID
--- a/src/index.js
+++ b/src/index.js
@ -23,26 +23,32 @@ var __importStar = (this && this.__importStar) || function (mod) {
    return result;
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.match1token = void 0;
+exports.m1TType = void 0;
 var fs = require('fs');
 const node_process_1 = require("node:process");
 const tk = __importStar(require("./tokenize.js"));
-let b = tk.tokenize("2+2");
+const util = __importStar(require("util"));
 /**
 * @description
- * it returns a function which test if the first char of the `remained` part of
+ * match one token type.
- *  the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
+ *
 * it returns a function which test if the type of first token of the `remained` part of
 *  the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped
 * in `Some`. Otherwise, it returns `None`.
- *  * @param t : the char to be test.
+ *  * @param typ : the type to be test.
- * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
+ * @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`.
 */
-function match1token(t) {
+function m1TType(typ) {
    return (m) => {
        if (m.remained.length == 0) {
            return { _tag: "None" };
        }
-        const tokenToBeMatched = m.remained[0];
+        /**
-        if (tokenToBeMatched === t) {
+         * token to be matched
-            m.matched.push(tokenToBeMatched);
+         * */
        const ttbm = m.remained[0];
        if (ttbm.type == typ) {
            m.matched.push(ttbm);
            return {
                _tag: "Some", value: {
                    matched: m.matched,
@ -55,7 +61,59 @@ function match1token(t) {
        }
    };
 }
-exports.match1token = match1token;
+exports.m1TType = m1TType;
 ;
-let c = tk.toSome(b);
+let toSome = tk.toSome;
-console.log(thenDo(c, match1token(tk.tokenize("+")[0])));
+let thenDo = tk.thenDo;
 let orDo = tk.orDo;
 node_process_1.argv.forEach((val, index) => {
    console.log(`${index}=${val}`);
 });
 let commandInput = node_process_1.argv[2];
 let commandInputTokenized = tk.tokenize(commandInput);
 console.log(commandInputTokenized);
 /**
 * matchee pair of commandInputTokenized
 */
 let commandTPair = { matched: [],
    remained: commandInputTokenized };
 let tInt = m1TType(tk.TokenType.INT);
 let tFlo = m1TType(tk.TokenType.FLO);
 let tStr = m1TType(tk.TokenType.STR);
 function tBool(x) {
    let text = x.remained[0].text;
    if (text == "true" || text == "false") {
        return thenDo(toSome(x), m1TType(tk.TokenType.ID));
    }
    else {
        return { _tag: "None" };
    }
 }
 /**
 * define the right hand side of a grammar
 * eg. `LHS ::= a + b`
 * @param process  the right hand side processing : eg. `a + b` in `LHS`
 * @param arrange define the order (0 starting) of the elements of the result.
 * ast. : eg. `a + c` is `1 0 2` `(+ a c)`
 * @returns the processed ast.
 */
 function gramRHS(process, arrange) {
    return (m) => {
        let result = process(m);
        console.log(`result ${result}`);
        if (result._tag == "None") {
            return result;
        }
        else {
            let matched = result.value.matched;
            let return_array = Array(arrange.length);
            arrange.forEach((val, index) => {
                return_array[arrange[index]] = matched[index];
            });
            return return_array;
        }
    };
 }
 var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
 let tree = constParser(commandTPair);
 console.log(util.inspect(tree, { showHidden: true, depth: null }));
--- a/src/index.ts
+++ b/src/index.ts
@ -1,32 +1,41 @@
 var fs = require('fs');
-
+import { argv } from 'node:process';
 import * as tk from './tokenize.js';
 import * as util from 'util';
-
+/**
-
+ * token tree type.
-let b : Array<tk.Token> = tk.tokenize("2+2");
+ */
 type tkTree = tk.Token[] | tk.Token
 export interface TokenMatcheePair {
-    matched: tk.Token[]
+    matched: tkTree[]
    remained: tk.Token[]
 }
 /**
 * @description
- * it returns a function which test if the first char of the `remained` part of
+ * match one token type.
- *  the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
+ * 
 * it returns a function which test if the type of first token of the `remained` part of
 *  the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped
 * in `Some`. Otherwise, it returns `None`.
- *  * @param t : the char to be test.
+ *  * @param typ : the type to be test.
- * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
+ * @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`.
 */
-export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe<TokenMatcheePair> {
+export function m1TType(typ: tk.TokenType):
    (m: TokenMatcheePair) => tk.Maybe<TokenMatcheePair> {
    return (m: TokenMatcheePair) => {
        if (m.remained.length == 0) {
            return { _tag: "None" };
        }
-        const tokenToBeMatched = m.remained[0];
+        /**
-        if (tokenToBeMatched === t) {
+         * token to be matched
-            m.matched.push(tokenToBeMatched);
+         * */
        const ttbm = m.remained[0];
        if (ttbm.type == typ) {
            m.matched.push(ttbm);
            return {
                _tag: "Some", value: {
                    matched: m.matched,
@ -40,7 +49,72 @@ export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe<Toke
    }
 };
 let toSome = tk.toSome;
 let thenDo = tk.thenDo;
 let orDo = tk.orDo;
-let c = tk.toSome(b);
+argv.forEach((val, index) => {
-console.log(thenDo(c,match1token(tk.tokenize("+")[0])));
+    console.log(`${index}=${val}`);
 });
 let commandInput = argv[2];
 let commandInputTokenized = tk.tokenize(commandInput);
 console.log(commandInputTokenized);
 /**
 * matchee pair of commandInputTokenized
 */
 let commandTPair : TokenMatcheePair = {matched:[],
                                remained: commandInputTokenized};
 let tInt = m1TType(tk.TokenType.INT);
 let tFlo = m1TType(tk.TokenType.FLO);
 let tStr = m1TType(tk.TokenType.STR);
 function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
    let text = x.remained[0].text
    if (text == "true" || text == "false"){
        return thenDo(toSome(x), m1TType(tk.TokenType.ID));
    }else{
        return {_tag : "None"};
    }
 }
 /**
 * define the right hand side of a grammar
 * eg. `LHS ::= a + b`
 * @param process  the right hand side processing : eg. `a + b` in `LHS`
 * @param arrange define the order (0 starting) of the elements of the result.
 * ast. : eg. `a + c` is `1 0 2` `(+ a c)`
 * @returns the processed ast. 
 */
 function gramRHS (process: Function, arrange : number[]){
    return (m : TokenMatcheePair)=>{
    let result : tk.Maybe<TokenMatcheePair> = process(m);
    console.log(`result ${result}`)
    if (result._tag == "None"){
        return result;
    }
    else{
        let matched = result.value.matched;
        let return_array : tkTree[] = Array(arrange.length);
        arrange.forEach((val, index) => {
            return_array[arrange[index]] = matched[index];
        });
        return return_array;
    }
    }
 }
 /**
 *  CONST ::= INT | STR | FLO | BOOL
 */
 var constParser = gramRHS((x : TokenMatcheePair)=>
    {return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
 let tree = constParser(commandTPair);
 console.log(util.inspect(tree, { showHidden: true, depth: null })); 
--- a/src/tokenize.ts
+++ b/src/tokenize.ts
@ -98,7 +98,9 @@ export enum TokenType {
    NE, // <>
    APOS, // '
    R_ARROW, // ->
-
+    TRUE, // true
    FALSE, // false
    IF, // if
 }
 /**
@ -197,6 +199,25 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe<Matc
    }
 };
 /**
 * check if a matcheePair `m` matches a stringv `s`. 
 * @param s the checker string.
 * @returns `None` or matched pair wrapped in `Some`
 */
 export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> {
    return (m)=>{
        if (s.length==0){
            return { _tag: "None" };
        }
        var someM : Maybe<MatcheePair> = toSome(m);
        for (var idx : number=0; idx<s.length; idx++){
            someM = thenDo(someM, match1Char(s[idx]))
        }
        return someM;
    }
 }
 /**
 * convert the one-char string to codepoint.
 * @param s : the string to code point.
@ -444,6 +465,7 @@ export function tokenize(input: string): Array<Token> {
        thenDo(thenDo(x, match1Char("-")), match1Char(">")),
        TokenType.R_ARROW);
    /**
     * unary operator : generating the pattern of basic unary operator
     * @param char : uniry char for the operator
@ -488,7 +510,7 @@ export function tokenize(input: string): Array<Token> {
            lParen, rParen, lBracket, rBracket, lBrace, rBrace,
            comma, dot, colon, semicolon, at, hash,
            set, greaterthan, lessthan, apos,
-            float, newline, space, integer, str, id];
+            float, newline, space,  id,  integer, str];
        let term_aux = term_list.reduce((x, y) => orDo(x, y));
        var new_x: Maybe<MatcheePair> = thenDo(old_x, term_aux);