rebuild parser

This commit is contained in:
Tan, Kian-ting 2023-09-19 00:35:01 +08:00
parent bf75beea2e
commit f801ef14fc
6 changed files with 262 additions and 258 deletions

View file

@ -9,3 +9,4 @@ another personal draught of a typesetting language and engine.
- 20230907-08:強化`tokenize`。 - 20230907-08:強化`tokenize`。
- 20230910 : add basic parser `CONST` rule, and add the grammar rule. - 20230910 : add basic parser `CONST` rule, and add the grammar rule.
- 20230914-15: 追加一寡 tokenizer ê 功能。 - 20230914-15: 追加一寡 tokenizer ê 功能。
- 20230918: 重新tuì下kàu頂起做parser. add rule

1
parserRule.txt Normal file
View file

@ -0,0 +1 @@
expr = int | int add int # expr1 and #expr2

View file

@ -56,8 +56,7 @@ APPLYEE ::= REF | CONST | EXPR | FUNC
BOOL ::= "true" | "false" BOOL ::= "true" | "false"
FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK} FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK}
BLOCK ::= PROG (return ID |noReturn) ; BLOCK ::= PROG (return ID |noReturn) ;
ARGS ::= TYPE (TYPE_VARS | ID)
| TYPE (TYPE_VARS | TYPE_VARS) , ARGS
REF ::= VAR "." ID | VAR REF ::= VAR "." ID | VAR
VAR ::= ID VAR ::= ID
VAR_DEF ::= "let" VAR "=" EXPR VAR_DEF ::= "let" VAR "=" EXPR

View file

@ -28,6 +28,32 @@ var fs = require('fs');
const node_process_1 = require("node:process"); const node_process_1 = require("node:process");
const tk = __importStar(require("./tokenize.js")); const tk = __importStar(require("./tokenize.js"));
const util = __importStar(require("util")); const util = __importStar(require("util"));
/**
* debug reprensenting
*/
let repr = (x) => { return util.inspect(x, { depth: null }); };
/**
* concated 2 `tkTree`s
* @param x the array to be concated
* @param y the item or array to ve concated
* @returns concated tkTree array, or thrown error if can't be concated.
*/
function concat(x, y) {
if (Array.isArray(x)) {
return x.concat(y);
}
else {
throw new Error("the tkTree can't be concated, because it's not an array.");
}
}
function slice(x, index, end) {
if (Array.isArray(x)) {
return x.slice(index, end);
}
else {
throw new Error("the tkTree can't be concated, because it's not an array.");
}
}
/** /**
* @description * @description
* match one token type. * match one token type.
@ -52,7 +78,8 @@ function m1TType(typ) {
let result = { let result = {
_tag: "Some", value: { _tag: "Some", value: {
matched: new_matched, matched: new_matched,
remained: m.remained.slice(1) remained: m.remained.slice(1),
ast: ([ttbm]),
} }
}; };
return result; return result;
@ -64,133 +91,108 @@ function m1TType(typ) {
} }
exports.m1TType = m1TType; exports.m1TType = m1TType;
; ;
let toSome = tk.toSome; /**
let thenDo = tk.thenDo; * type int
let zeroOrOnceDo = tk.zeroOrOnceDo; */
let orDo = tk.orDo; let tInt = m1TType(tk.TokenType.INT);
let zeroOrMoreDo = tk.zeroOrMoreDo; let tAdd = m1TType(tk.TokenType.I_ADD);
let tMul = m1TType(tk.TokenType.I_MUL);
node_process_1.argv.forEach((val, index) => { node_process_1.argv.forEach((val, index) => {
console.log(`${index}=${val}`); console.log(`${index}=${val}`);
}); });
let commandInput = "int a str b"; //argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => {
return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL;
});
console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/** /**
* matchee pair of commandInputTokenized * like `m ==> f` in ocaml
* @param m matchee wrapped
* @param f matching function
* @returns wrapped result
*/ */
let commandTPair = { matched: [], function thenDo(m, f) {
remained: commandInputTokenizedFiltered }; if (m._tag == "None") {
let tInt = m1TType(tk.TokenType.INT); return m;
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool(x) {
let text = x.remained[0].text;
if (text == "true" || text == "false") {
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
} }
else { else {
return { _tag: "None" }; var a = f(m.value);
if (a._tag == "Some") {
a.value.ast = concat(m.value.ast, a.value.ast);
}
return a;
} }
} }
/** /**
* define the right hand side of a grammar * like `f1 | f2` in regex
* eg. `LHS ::= a + b` * @param f1 the first tried function
* @param process the right hand side processing : eg. `a + b` in `LHS` * @param f2 the second tried function
* @param arrange define the order (0 starting) of the elements of the result. * @returns wrapped result
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
* @returns the processed ast.
*/ */
function gramRHS(process, arrange) { function orDo(f1, f2) {
return (m) => { return (x) => {
let middle = process(m); let res1 = f1(x);
console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null })); if (res1._tag == "Some") {
if (middle._tag == "None") { return res1;
return middle;
} }
else { else {
let matched = middle.value.matched; let res2 = f2(x);
let arrLength = arrange.length; return res2;
let returnRrray = Array(arrange.length);
arrange.forEach((val, index) => {
returnRrray[arrange[index]] = matched[index];
});
let matchedTmp1Length = matched.length - arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 = matched
.slice(0, matchedTmp1Length);
console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result = { _tag: "Some",
value: { matched: matchedTmp1,
remained: middle.value.remained } };
return result;
} }
}; };
} }
/** let midfix = (f, signal) => (x) => {
* typeABS ::= "'" ID var a = f(x);
*/
var typeABS = (x) => {
var result = thenDo(thenDo(toSome(x), tApos), tId);
if (result._tag == "Some" && "text" in result.value.matched[1]) {
var realToken = result.value.matched[1];
realToken.text = "'" + realToken.text;
result.value.matched = [realToken];
}
return result;
};
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x) => {
return thenDo(toSome(x), orDo(typeABS, tId));
};
/**
* CONST ::= INT | STR | FLO | BOOL
*/
/**
* TODO: 要用 debugger 檢查分析問題
*/
var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
/**
* storing the tree
*/
var astTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x) => {
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some") { if (a._tag == "Some") {
let matched = a.value.matched; let ast_head = slice(a.value.ast, 0, a.value.ast.length - 3);
let slice = matched.slice(matched.length - 2); let ast_tail = slice(a.value.ast, a.value.ast.length - 3);
console.log("slice" + slice); let new_ast = [ast_tail];
let b = { maybeTokens: a, ast: slice }; a.value.ast = new_ast;
return b; console.log("+" + signal + "+" + repr(a));
}
else {
let b = { maybeTokens: a, ast: [] };
return b;
} }
return a;
}; };
/** /**
* function's arguments *
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ * fac1 = int MUL int
*/ */
var fnArgs = (x) => { //let fac1 = midfix((x : TokenMatcheePair)=>
let wrapper = { maybeTokens: toSome(x), ast: [] }; // thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt));
let a = typePair(wrapper); let fac1 = (x) => {
console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null })); let a = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x);
let abanibi = typePair(a); return a;
console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null }));
return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] };
}; };
let tree = fnArgs(commandTPair); /**
console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null })); *
* fac2 = int MUL int
*/
let fac2 = tInt;
/**
* fac = fac1 | fac2
*/
let fac = orDo(fac1, fac2);
/**
*
* expr1 = fac ADD fac
*/
let expr1 = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1");
/**
* expr2 = fac
*/
let expr2 = fac;
/**
* expr = expr1 | expr2
*/
let expr = orDo(expr1, expr2);
let tokens = tk.tokenize("2+3"); //tk.tokenize(argv[2]);
let tokensFiltered = tokens.filter((x) => {
return (x.type != tk.TokenType.NL
&& x.type != tk.TokenType.SP);
});
let wrappedTokens = tk.toSome({
matched: [],
remained: tokensFiltered,
ast: []
});
let beta = expr({
matched: [],
remained: tokensFiltered,
ast: []
});
console.log(repr(wrappedTokens));
console.log(repr(beta));

View file

@ -2,21 +2,54 @@ var fs = require('fs');
import { argv, resourceUsage } from 'node:process'; import { argv, resourceUsage } from 'node:process';
import * as tk from './tokenize.js'; import * as tk from './tokenize.js';
import * as util from 'util'; import * as util from 'util';
import { reduceRotation } from 'pdf-lib'; import { drawEllipsePath, reduceRotation } from 'pdf-lib';
import { isTypedArray } from 'node:util/types';
import { error } from 'node:console';
/**
* debug reprensenting
*/
let repr = (x : any)=>{return util.inspect(x, {depth: null})};
/** /**
* token tree type. * token tree type.
*/ */
type tkTree = tkTree[] | tk.Token type tkTree = tkTree[] | tk.Token
export interface TokenMatcheePair { /**
matched: tkTree[] * concated 2 `tkTree`s
remained: tk.Token[] * @param x the array to be concated
* @param y the item or array to ve concated
* @returns concated tkTree array, or thrown error if can't be concated.
*/
function concat(x: tkTree, y:tkTree): tkTree[] {
if (Array.isArray(x)){
return x.concat(y);
}else{
throw new Error("the tkTree can't be concated, because it's not an array.");
}
} }
export interface MaybeTokensAST{ function slice(x: tkTree, index?:number, end?:number): tkTree[] {
maybeTokens: tk.Maybe<TokenMatcheePair>; if (Array.isArray(x)){
ast: tkTree; return x.slice(index,end);
}else{
throw new Error("the tkTree can't be concated, because it's not an array.");
}
}
/**
* TokenMatcheePair for tokens' parser combinator
* matched: the matched (now and before) tokens
* remained: tokens to be matched
* ast: abstract syntax tree
*/
export interface TokenMatcheePair {
matched: tk.Token[]
remained: tk.Token[]
ast : tkTree[]
} }
/** /**
@ -45,7 +78,8 @@ export function m1TType(typ: tk.TokenType):
let result : tk.Some<TokenMatcheePair> = { let result : tk.Some<TokenMatcheePair> = {
_tag: "Some", value: { _tag: "Some", value: {
matched: new_matched, matched: new_matched,
remained: m.remained.slice(1) remained: m.remained.slice(1),
ast: ([ttbm]),
} }
}; };
return result; return result;
@ -56,168 +90,135 @@ export function m1TType(typ: tk.TokenType):
} }
}; };
let toSome = tk.toSome; /**
let thenDo = tk.thenDo; * type int
let zeroOrOnceDo = tk.zeroOrOnceDo; */
let orDo = tk.orDo; let tInt = m1TType(tk.TokenType.INT);
let zeroOrMoreDo = tk.zeroOrMoreDo; let tAdd = m1TType(tk.TokenType.I_ADD);
let tMul = m1TType(tk.TokenType.I_MUL);
argv.forEach((val, index) => { argv.forEach((val, index) => {
console.log(`${index}=${val}`); console.log(`${index}=${val}`);
}); });
let commandInput = "int a str b"//argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
let commandInputTokenizedFiltered = commandInputTokenized.filter(
(x : tk.Token)=>{return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL});
console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/** /**
* matchee pair of commandInputTokenized * like `m ==> f` in ocaml
* @param m matchee wrapped
* @param f matching function
* @returns wrapped result
*/ */
let commandTPair : TokenMatcheePair = {matched:[], function thenDo(m : tk.Maybe<TokenMatcheePair>, f : Function){
remained: commandInputTokenizedFiltered}; if (m._tag == "None"){
return m;
let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
let text = x.remained[0].text
if (text == "true" || text == "false"){
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
}else{ }else{
return {_tag : "None"}; var a : tk.Maybe<TokenMatcheePair> = f(m.value);
if (a._tag == "Some"){
a.value.ast = concat(m.value.ast, a.value.ast);
}
return a;
} }
} }
/** /**
* define the right hand side of a grammar * like `f1 | f2` in regex
* eg. `LHS ::= a + b` * @param f1 the first tried function
* @param process the right hand side processing : eg. `a + b` in `LHS` * @param f2 the second tried function
* @param arrange define the order (0 starting) of the elements of the result. * @returns wrapped result
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
* @returns the processed ast.
*/ */
function gramRHS (process: Function, arrange : number[]){ function orDo(f1 : Function, f2 : Function){
return (m : TokenMatcheePair)=>{ return (x : TokenMatcheePair) =>{
let res1 : tk.Maybe<TokenMatcheePair> = f1(x);
let middle : tk.Maybe<TokenMatcheePair> = process(m); if (res1._tag == "Some"){
return res1;
console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null })); }else{
let res2 : tk.Maybe<TokenMatcheePair> = f2(x);
if (middle._tag == "None"){ return res2;
return middle; }
} }
else{
let matched = middle.value.matched;
let arrLength = arrange.length;
let returnRrray : tkTree[] = Array(arrange.length);
arrange.forEach((val, index) => {
returnRrray[arrange[index]] = matched[index];
});
let matchedTmp1Length = matched.length-arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 : tkTree[] = matched
.slice(0,matchedTmp1Length);
console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result : tk.Some<TokenMatcheePair> = {_tag:"Some",
value : {matched : matchedTmp1,
remained : middle.value.remained}};
return result;
}
}
} }
/** let midfix = (f : Function, signal? : string) => (x : TokenMatcheePair)=>{
* typeABS ::= "'" ID var a = f(x);
*/
var typeABS = (x : TokenMatcheePair)=>
{
var result = thenDo(thenDo(toSome(x),tApos),tId);
if (result._tag == "Some" && "text" in result.value.matched[1]){
var realToken : tk.Token = result.value.matched[1];
realToken.text = "'"+realToken.text;
result.value.matched = [realToken];
}
return result;
}
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x : TokenMatcheePair)=>
{
return thenDo(toSome(x), orDo(typeABS, tId));
}
/**
* CONST ::= INT | STR | FLO | BOOL
*/
/**
* TODO: 要用 debugger
*/
var constParser = gramRHS((x : TokenMatcheePair)=>
{return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
/**
* storing the tree
*/
var astTree : tkTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x : MaybeTokensAST)=>
{
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some"){ if (a._tag == "Some"){
let matched = a.value.matched; let ast_head : tkTree[] = slice(a.value.ast,0,a.value.ast.length-3);
let slice = matched.slice(matched.length-2); let ast_tail : tkTree[] = slice(a.value.ast,a.value.ast.length-3);
console.log("slice"+slice); let new_ast = [ast_tail];
a.value.ast = new_ast;
console.log("+"+signal+"+"+repr(a));
let b : MaybeTokensAST = {maybeTokens : a, ast : slice};
return b;
}
else{
let b : MaybeTokensAST= {maybeTokens : a, ast : []};
return b;
} }
return a;
} }
/** /**
* function's arguments *
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ * fac1 = int MUL int
*/ */
//let fac1 = midfix((x : TokenMatcheePair)=>
// thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt));
var fnArgs = (x : TokenMatcheePair)=> let fac1 = (x : TokenMatcheePair) => {
{ let a = midfix((x : TokenMatcheePair)=>
let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []}; thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x);
let a = typePair(wrapper);
console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null })); return a;
let abanibi = typePair(a); }
console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null }));
return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]}; /**
*
* fac2 = int MUL int
*/
let fac2 = tInt;
}; /**
* fac = fac1 | fac2
*/
let fac = orDo(fac1, fac2);
/**
*
* expr1 = fac ADD fac
*/
let expr1 = midfix((x : TokenMatcheePair)=>
thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1");
/**
* expr2 = fac
*/
let expr2 = fac;
/**
* expr = expr1 | expr2
*/
let expr = orDo(expr1, expr2);
let tokens = tk.tokenize("2+3*4");//tk.tokenize(argv[2]);
let tokensFiltered = tokens.filter(
(x)=>{return (x.type != tk.TokenType.NL
&& x.type != tk.TokenType.SP)});
let wrappedTokens : tk.Maybe<TokenMatcheePair> =
tk.toSome({
matched : [] ,
remained : tokensFiltered,
ast : []});
let beta = expr({
matched : [] ,
remained : tokensFiltered,
ast : []});
console.log(repr(wrappedTokens));
console.log(repr(beta));
let tree = fnArgs(commandTPair);
console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -64,7 +64,7 @@ export interface MatcheePair {
* SEMI_C// semi-colon * SEMI_C// semi-colon
*/ */
export enum TokenType { export enum TokenType {
NL, // newlinw NL, // newline
SP, // half-width space and tab SP, // half-width space and tab
ID, // identifier ID, // identifier
STR, // string STR, // string