From f801ef14fc25ae122aaef4aede639ddb9b37006a Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Tue, 19 Sep 2023 00:35:01 +0800 Subject: [PATCH] rebuild parser --- README.md | 1 + parserRule.txt | 1 + parser_rule.txt => parserRuleRefOnly.txt | 3 +- src/index.js | 220 ++++++++--------- src/index.ts | 293 ++++++++++++----------- src/tokenize.ts | 2 +- 6 files changed, 262 insertions(+), 258 deletions(-) create mode 100644 parserRule.txt rename parser_rule.txt => parserRuleRefOnly.txt (93%) diff --git a/README.md b/README.md index 7683ac3..1edfb8b 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,4 @@ another personal draught of a typesetting language and engine. - 20230907-08:強化`tokenize`。 - 20230910 : add basic parser `CONST` rule, and add the grammar rule. - 20230914-15: 追加一寡 tokenizer ê 功能。 + - 20230918: 重新tuì下kàu頂起做parser. add rule diff --git a/parserRule.txt b/parserRule.txt new file mode 100644 index 0000000..0bce539 --- /dev/null +++ b/parserRule.txt @@ -0,0 +1 @@ +expr = int | int add int # expr1 and #expr2 \ No newline at end of file diff --git a/parser_rule.txt b/parserRuleRefOnly.txt similarity index 93% rename from parser_rule.txt rename to parserRuleRefOnly.txt index d7d1f9a..efff906 100644 --- a/parser_rule.txt +++ b/parserRuleRefOnly.txt @@ -56,8 +56,7 @@ APPLYEE ::= REF | CONST | EXPR | FUNC BOOL ::= "true" | "false" FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK} BLOCK ::= PROG (return ID |noReturn) ; -ARGS ::= TYPE (TYPE_VARS | ID) - | TYPE (TYPE_VARS | TYPE_VARS) , ARGS + REF ::= VAR "." ID | VAR VAR ::= ID VAR_DEF ::= "let" VAR "=" EXPR diff --git a/src/index.js b/src/index.js index b73463e..bd62a48 100644 --- a/src/index.js +++ b/src/index.js @@ -28,6 +28,32 @@ var fs = require('fs'); const node_process_1 = require("node:process"); const tk = __importStar(require("./tokenize.js")); const util = __importStar(require("util")); +/** + * debug reprensenting + */ +let repr = (x) => { return util.inspect(x, { depth: null }); }; +/** + * concated 2 `tkTree`s + * @param x the array to be concated + * @param y the item or array to ve concated + * @returns concated tkTree array, or thrown error if can't be concated. + */ +function concat(x, y) { + if (Array.isArray(x)) { + return x.concat(y); + } + else { + throw new Error("the tkTree can't be concated, because it's not an array."); + } +} +function slice(x, index, end) { + if (Array.isArray(x)) { + return x.slice(index, end); + } + else { + throw new Error("the tkTree can't be concated, because it's not an array."); + } +} /** * @description * match one token type. @@ -52,7 +78,8 @@ function m1TType(typ) { let result = { _tag: "Some", value: { matched: new_matched, - remained: m.remained.slice(1) + remained: m.remained.slice(1), + ast: ([ttbm]), } }; return result; @@ -64,133 +91,108 @@ function m1TType(typ) { } exports.m1TType = m1TType; ; -let toSome = tk.toSome; -let thenDo = tk.thenDo; -let zeroOrOnceDo = tk.zeroOrOnceDo; -let orDo = tk.orDo; -let zeroOrMoreDo = tk.zeroOrMoreDo; +/** + * type int + */ +let tInt = m1TType(tk.TokenType.INT); +let tAdd = m1TType(tk.TokenType.I_ADD); +let tMul = m1TType(tk.TokenType.I_MUL); node_process_1.argv.forEach((val, index) => { console.log(`${index}=${val}`); }); -let commandInput = "int a str b"; //argv[2]; -let commandInputTokenized = tk.tokenize(commandInput); -let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => { - return x.type != tk.TokenType.SP && - x.type != tk.TokenType.NL; -}); -console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null })); /** - * matchee pair of commandInputTokenized + * like `m ==> f` in ocaml + * @param m matchee wrapped + * @param f matching function + * @returns wrapped result */ -let commandTPair = { matched: [], - remained: commandInputTokenizedFiltered }; -let tInt = m1TType(tk.TokenType.INT); -let tFlo = m1TType(tk.TokenType.FLO); -let tStr = m1TType(tk.TokenType.STR); -let tId = m1TType(tk.TokenType.ID); -let tApos = m1TType(tk.TokenType.APOS); -function tBool(x) { - let text = x.remained[0].text; - if (text == "true" || text == "false") { - return thenDo(toSome(x), m1TType(tk.TokenType.ID)); +function thenDo(m, f) { + if (m._tag == "None") { + return m; } else { - return { _tag: "None" }; + var a = f(m.value); + if (a._tag == "Some") { + a.value.ast = concat(m.value.ast, a.value.ast); + } + return a; } } /** - * define the right hand side of a grammar - * eg. `LHS ::= a + b` - * @param process the right hand side processing : eg. `a + b` in `LHS` - * @param arrange define the order (0 starting) of the elements of the result. - * ast. : eg. `a + c` is `1 0 2` `(+ a c)` - * @returns the processed ast. + * like `f1 | f2` in regex + * @param f1 the first tried function + * @param f2 the second tried function + * @returns wrapped result */ -function gramRHS(process, arrange) { - return (m) => { - let middle = process(m); - console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null })); - if (middle._tag == "None") { - return middle; +function orDo(f1, f2) { + return (x) => { + let res1 = f1(x); + if (res1._tag == "Some") { + return res1; } else { - let matched = middle.value.matched; - let arrLength = arrange.length; - let returnRrray = Array(arrange.length); - arrange.forEach((val, index) => { - returnRrray[arrange[index]] = matched[index]; - }); - let matchedTmp1Length = matched.length - arrLength; - console.log(matchedTmp1Length); - var matchedTmp1 = matched - .slice(0, matchedTmp1Length); - console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null })); - console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null })); - matchedTmp1.push(returnRrray); - let result = { _tag: "Some", - value: { matched: matchedTmp1, - remained: middle.value.remained } }; - return result; + let res2 = f2(x); + return res2; } }; } -/** - * typeABS ::= "'" ID - */ -var typeABS = (x) => { - var result = thenDo(thenDo(toSome(x), tApos), tId); - if (result._tag == "Some" && "text" in result.value.matched[1]) { - var realToken = result.value.matched[1]; - realToken.text = "'" + realToken.text; - result.value.matched = [realToken]; - } - return result; -}; -/** - * TypeId ::= typeABS | ID - */ -var typeName = (x) => { - return thenDo(toSome(x), orDo(typeABS, tId)); -}; -/** - * CONST ::= INT | STR | FLO | BOOL - */ -/** - * TODO: 要用 debugger 檢查分析問題 - */ -var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]); -/** - * storing the tree - */ -var astTree = []; -/** - * TYPE_PAIR ::= TYP_ID ID - */ -var typePair = (x) => { - let a = thenDo(thenDo(x.maybeTokens, typeName), tId); +let midfix = (f, signal) => (x) => { + var a = f(x); if (a._tag == "Some") { - let matched = a.value.matched; - let slice = matched.slice(matched.length - 2); - console.log("slice" + slice); - let b = { maybeTokens: a, ast: slice }; - return b; - } - else { - let b = { maybeTokens: a, ast: [] }; - return b; + let ast_head = slice(a.value.ast, 0, a.value.ast.length - 3); + let ast_tail = slice(a.value.ast, a.value.ast.length - 3); + let new_ast = [ast_tail]; + a.value.ast = new_ast; + console.log("+" + signal + "+" + repr(a)); } + return a; }; /** - * function's arguments - * FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ + * + * fac1 = int MUL int */ -var fnArgs = (x) => { - let wrapper = { maybeTokens: toSome(x), ast: [] }; - let a = typePair(wrapper); - console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null })); - let abanibi = typePair(a); - console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null })); - return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] }; +//let fac1 = midfix((x : TokenMatcheePair)=> +// thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt)); +let fac1 = (x) => { + let a = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x); + return a; }; -let tree = fnArgs(commandTPair); -console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null })); +/** + * + * fac2 = int MUL int + */ +let fac2 = tInt; +/** + * fac = fac1 | fac2 + */ +let fac = orDo(fac1, fac2); +/** + * + * expr1 = fac ADD fac + */ +let expr1 = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1"); +/** + * expr2 = fac + */ +let expr2 = fac; +/** + * expr = expr1 | expr2 + */ +let expr = orDo(expr1, expr2); +let tokens = tk.tokenize("2+3"); //tk.tokenize(argv[2]); +let tokensFiltered = tokens.filter((x) => { + return (x.type != tk.TokenType.NL + && x.type != tk.TokenType.SP); +}); +let wrappedTokens = tk.toSome({ + matched: [], + remained: tokensFiltered, + ast: [] +}); +let beta = expr({ + matched: [], + remained: tokensFiltered, + ast: [] +}); +console.log(repr(wrappedTokens)); +console.log(repr(beta)); diff --git a/src/index.ts b/src/index.ts index 48d2c86..4c1ce91 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,21 +2,54 @@ var fs = require('fs'); import { argv, resourceUsage } from 'node:process'; import * as tk from './tokenize.js'; import * as util from 'util'; -import { reduceRotation } from 'pdf-lib'; +import { drawEllipsePath, reduceRotation } from 'pdf-lib'; +import { isTypedArray } from 'node:util/types'; +import { error } from 'node:console'; + +/** + * debug reprensenting + */ +let repr = (x : any)=>{return util.inspect(x, {depth: null})}; /** * token tree type. */ type tkTree = tkTree[] | tk.Token -export interface TokenMatcheePair { - matched: tkTree[] - remained: tk.Token[] +/** + * concated 2 `tkTree`s + * @param x the array to be concated + * @param y the item or array to ve concated + * @returns concated tkTree array, or thrown error if can't be concated. + */ +function concat(x: tkTree, y:tkTree): tkTree[] { + if (Array.isArray(x)){ + return x.concat(y); + }else{ + throw new Error("the tkTree can't be concated, because it's not an array."); + + } } -export interface MaybeTokensAST{ - maybeTokens: tk.Maybe; - ast: tkTree; +function slice(x: tkTree, index?:number, end?:number): tkTree[] { + if (Array.isArray(x)){ + return x.slice(index,end); + }else{ + throw new Error("the tkTree can't be concated, because it's not an array."); + + } +} + +/** + * TokenMatcheePair for tokens' parser combinator + * matched: the matched (now and before) tokens + * remained: tokens to be matched + * ast: abstract syntax tree + */ +export interface TokenMatcheePair { + matched: tk.Token[] + remained: tk.Token[] + ast : tkTree[] } /** @@ -45,7 +78,8 @@ export function m1TType(typ: tk.TokenType): let result : tk.Some = { _tag: "Some", value: { matched: new_matched, - remained: m.remained.slice(1) + remained: m.remained.slice(1), + ast: ([ttbm]), } }; return result; @@ -56,168 +90,135 @@ export function m1TType(typ: tk.TokenType): } }; -let toSome = tk.toSome; -let thenDo = tk.thenDo; -let zeroOrOnceDo = tk.zeroOrOnceDo; -let orDo = tk.orDo; -let zeroOrMoreDo = tk.zeroOrMoreDo; +/** + * type int + */ +let tInt = m1TType(tk.TokenType.INT); +let tAdd = m1TType(tk.TokenType.I_ADD); +let tMul = m1TType(tk.TokenType.I_MUL); argv.forEach((val, index) => { console.log(`${index}=${val}`); }); -let commandInput = "int a str b"//argv[2]; -let commandInputTokenized = tk.tokenize(commandInput); -let commandInputTokenizedFiltered = commandInputTokenized.filter( - (x : tk.Token)=>{return x.type != tk.TokenType.SP && - x.type != tk.TokenType.NL}); -console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null })); /** - * matchee pair of commandInputTokenized + * like `m ==> f` in ocaml + * @param m matchee wrapped + * @param f matching function + * @returns wrapped result */ -let commandTPair : TokenMatcheePair = {matched:[], - remained: commandInputTokenizedFiltered}; - - -let tInt = m1TType(tk.TokenType.INT); -let tFlo = m1TType(tk.TokenType.FLO); -let tStr = m1TType(tk.TokenType.STR); -let tId = m1TType(tk.TokenType.ID); -let tApos = m1TType(tk.TokenType.APOS); - - -function tBool (x : TokenMatcheePair) :tk.Maybe { - let text = x.remained[0].text - if (text == "true" || text == "false"){ - return thenDo(toSome(x), m1TType(tk.TokenType.ID)); +function thenDo(m : tk.Maybe, f : Function){ + if (m._tag == "None"){ + return m; }else{ - return {_tag : "None"}; + var a : tk.Maybe = f(m.value); + if (a._tag == "Some"){ + a.value.ast = concat(m.value.ast, a.value.ast); + } + + return a; } } /** - * define the right hand side of a grammar - * eg. `LHS ::= a + b` - * @param process the right hand side processing : eg. `a + b` in `LHS` - * @param arrange define the order (0 starting) of the elements of the result. - * ast. : eg. `a + c` is `1 0 2` `(+ a c)` - * @returns the processed ast. + * like `f1 | f2` in regex + * @param f1 the first tried function + * @param f2 the second tried function + * @returns wrapped result */ -function gramRHS (process: Function, arrange : number[]){ - return (m : TokenMatcheePair)=>{ - - let middle : tk.Maybe = process(m); - - console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null })); - - if (middle._tag == "None"){ - return middle; +function orDo(f1 : Function, f2 : Function){ + return (x : TokenMatcheePair) =>{ + let res1 : tk.Maybe = f1(x); + if (res1._tag == "Some"){ + return res1; + }else{ + let res2 : tk.Maybe = f2(x); + return res2; + } } - else{ - let matched = middle.value.matched; - let arrLength = arrange.length; - let returnRrray : tkTree[] = Array(arrange.length); - - arrange.forEach((val, index) => { - returnRrray[arrange[index]] = matched[index]; - }); - - let matchedTmp1Length = matched.length-arrLength; - console.log(matchedTmp1Length); - var matchedTmp1 : tkTree[] = matched - .slice(0,matchedTmp1Length); - - console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null })); - console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null })); - matchedTmp1.push(returnRrray); - - - let result : tk.Some = {_tag:"Some", - value : {matched : matchedTmp1, - remained : middle.value.remained}}; - return result; - } - } -} - -/** - * typeABS ::= "'" ID - */ -var typeABS = (x : TokenMatcheePair)=> -{ - var result = thenDo(thenDo(toSome(x),tApos),tId); - if (result._tag == "Some" && "text" in result.value.matched[1]){ - var realToken : tk.Token = result.value.matched[1]; - realToken.text = "'"+realToken.text; - result.value.matched = [realToken]; - } - return result; -} - -/** - * TypeId ::= typeABS | ID - */ -var typeName = (x : TokenMatcheePair)=> -{ - return thenDo(toSome(x), orDo(typeABS, tId)); -} - -/** - * CONST ::= INT | STR | FLO | BOOL - */ - -/** - * TODO: 要用 debugger 檢查分析問題 - */ -var constParser = gramRHS((x : TokenMatcheePair)=> - {return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]); - -/** - * storing the tree - */ -var astTree : tkTree = []; - -/** - * TYPE_PAIR ::= TYP_ID ID - */ -var typePair = (x : MaybeTokensAST)=> -{ - - let a = thenDo(thenDo(x.maybeTokens, typeName), tId); +} + +let midfix = (f : Function, signal? : string) => (x : TokenMatcheePair)=>{ + var a = f(x); if (a._tag == "Some"){ - let matched = a.value.matched; - let slice = matched.slice(matched.length-2); - console.log("slice"+slice); + let ast_head : tkTree[] = slice(a.value.ast,0,a.value.ast.length-3); + let ast_tail : tkTree[] = slice(a.value.ast,a.value.ast.length-3); + let new_ast = [ast_tail]; + a.value.ast = new_ast; - let b : MaybeTokensAST = {maybeTokens : a, ast : slice}; - return b; + console.log("+"+signal+"+"+repr(a)); + + } - else{ - let b : MaybeTokensAST= {maybeTokens : a, ast : []}; - return b; - } -} + return a; +} /** - * function's arguments - * FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ + * + * fac1 = int MUL int */ +//let fac1 = midfix((x : TokenMatcheePair)=> +// thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt)); -var fnArgs = (x : TokenMatcheePair)=> - { - let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []}; - let a = typePair(wrapper); - console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null })); - let abanibi = typePair(a); - console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null })); +let fac1 = (x : TokenMatcheePair) => { + let a = midfix((x : TokenMatcheePair)=> + thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x); + + return a; +} + + +/** + * + * fac2 = int MUL int + */ +let fac2 = tInt; + +/** + * fac = fac1 | fac2 + */ +let fac = orDo(fac1, fac2); + + +/** + * + * expr1 = fac ADD fac + */ +let expr1 = midfix((x : TokenMatcheePair)=> + thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1"); +/** + * expr2 = fac + */ +let expr2 = fac; + +/** + * expr = expr1 | expr2 + */ +let expr = orDo(expr1, expr2); - return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]}; - - }; -let tree = fnArgs(commandTPair); -console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null })); + +let tokens = tk.tokenize("2+3*4");//tk.tokenize(argv[2]); +let tokensFiltered = tokens.filter( + (x)=>{return (x.type != tk.TokenType.NL + && x.type != tk.TokenType.SP)}); + +let wrappedTokens : tk.Maybe = + tk.toSome({ + matched : [] , + remained : tokensFiltered, + ast : []}); + +let beta = expr({ + matched : [] , + remained : tokensFiltered, + ast : []}); + +console.log(repr(wrappedTokens)); + +console.log(repr(beta)); + diff --git a/src/tokenize.ts b/src/tokenize.ts index 6fa22f4..144dbed 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -64,7 +64,7 @@ export interface MatcheePair { * SEMI_C// semi-colon */ export enum TokenType { - NL, // newlinw + NL, // newline SP, // half-width space and tab ID, // identifier STR, // string