rebuild parser

This commit is contained in:
Tan, Kian-ting 2023-09-19 00:35:01 +08:00
parent bf75beea2e
commit f801ef14fc
6 changed files with 262 additions and 258 deletions

View file

@ -9,3 +9,4 @@ another personal draught of a typesetting language and engine.
- 20230907-08:強化`tokenize`。
- 20230910 : add basic parser `CONST` rule, and add the grammar rule.
- 20230914-15: 追加一寡 tokenizer ê 功能。
- 20230918: 重新tuì下kàu頂起做parser. add rule

parserRule.txt Normal file
View file

@ -0,0 +1 @@
expr = int | int add int # expr1 and #expr2

View file

@ -56,8 +56,7 @@ APPLYEE ::= REF | CONST | EXPR | FUNC
BOOL ::= "true" | "false"
BLOCK ::= PROG (return ID |noReturn) ;
REF ::= VAR "." ID | VAR
VAR ::= ID
VAR_DEF ::= "let" VAR "=" EXPR

View file

@ -28,6 +28,32 @@ var fs = require('fs');
const node_process_1 = require("node:process");
const tk = __importStar(require("./tokenize.js"));
const util = __importStar(require("util"));
* debug reprensenting
let repr = (x) => { return util.inspect(x, { depth: null }); };
* concated 2 `tkTree`s
* @param x the array to be concated
* @param y the item or array to ve concated
* @returns concated tkTree array, or thrown error if can't be concated.
function concat(x, y) {
if (Array.isArray(x)) {
return x.concat(y);
else {
throw new Error("the tkTree can't be concated, because it's not an array.");
function slice(x, index, end) {
if (Array.isArray(x)) {
return x.slice(index, end);
else {
throw new Error("the tkTree can't be concated, because it's not an array.");
* @description
* match one token type.
@ -52,7 +78,8 @@ function m1TType(typ) {
let result = {
_tag: "Some", value: {
matched: new_matched,
remained: m.remained.slice(1)
remained: m.remained.slice(1),
ast: ([ttbm]),
return result;
@ -64,133 +91,108 @@ function m1TType(typ) {
exports.m1TType = m1TType;
let toSome = tk.toSome;
let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
* type int
let tInt = m1TType(tk.TokenType.INT);
let tAdd = m1TType(tk.TokenType.I_ADD);
let tMul = m1TType(tk.TokenType.I_MUL);
node_process_1.argv.forEach((val, index) => {
let commandInput = "int a str b"; //argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => {
return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL;
console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
* matchee pair of commandInputTokenized
* like `m ==> f` in ocaml
* @param m matchee wrapped
* @param f matching function
* @returns wrapped result
let commandTPair = { matched: [],
remained: commandInputTokenizedFiltered };
let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool(x) {
let text = x.remained[0].text;
if (text == "true" || text == "false") {
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
function thenDo(m, f) {
if (m._tag == "None") {
return m;
else {
return { _tag: "None" };
var a = f(m.value);
if (a._tag == "Some") {
a.value.ast = concat(m.value.ast, a.value.ast);
return a;
* define the right hand side of a grammar
* eg. `LHS ::= a + b`
* @param process the right hand side processing : eg. `a + b` in `LHS`
* @param arrange define the order (0 starting) of the elements of the result.
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
* @returns the processed ast.
* like `f1 | f2` in regex
* @param f1 the first tried function
* @param f2 the second tried function
* @returns wrapped result
function gramRHS(process, arrange) {
return (m) => {
let middle = process(m);
console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null }));
if (middle._tag == "None") {
return middle;
function orDo(f1, f2) {
return (x) => {
let res1 = f1(x);
if (res1._tag == "Some") {
return res1;
else {
let matched = middle.value.matched;
let arrLength = arrange.length;
let returnRrray = Array(arrange.length);
arrange.forEach((val, index) => {
returnRrray[arrange[index]] = matched[index];
let matchedTmp1Length = matched.length - arrLength;
var matchedTmp1 = matched
.slice(0, matchedTmp1Length);
console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null }));
let result = { _tag: "Some",
value: { matched: matchedTmp1,
remained: middle.value.remained } };
return result;
let res2 = f2(x);
return res2;
* typeABS ::= "'" ID
var typeABS = (x) => {
var result = thenDo(thenDo(toSome(x), tApos), tId);
if (result._tag == "Some" && "text" in result.value.matched[1]) {
var realToken = result.value.matched[1];
realToken.text = "'" + realToken.text;
result.value.matched = [realToken];
return result;
* TypeId ::= typeABS | ID
var typeName = (x) => {
return thenDo(toSome(x), orDo(typeABS, tId));
* TODO: 要用 debugger 檢查分析問題
var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
* storing the tree
var astTree = [];
var typePair = (x) => {
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
let midfix = (f, signal) => (x) => {
var a = f(x);
if (a._tag == "Some") {
let matched = a.value.matched;
let slice = matched.slice(matched.length - 2);
console.log("slice" + slice);
let b = { maybeTokens: a, ast: slice };
return b;
else {
let b = { maybeTokens: a, ast: [] };
return b;
let ast_head = slice(a.value.ast, 0, a.value.ast.length - 3);
let ast_tail = slice(a.value.ast, a.value.ast.length - 3);
let new_ast = [ast_tail];
a.value.ast = new_ast;
console.log("+" + signal + "+" + repr(a));
return a;
* function's arguments
* fac1 = int MUL int
var fnArgs = (x) => {
let wrapper = { maybeTokens: toSome(x), ast: [] };
let a = typePair(wrapper);
console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null }));
return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] };
//let fac1 = midfix((x : TokenMatcheePair)=>
// thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt));
let fac1 = (x) => {
let a = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x);
return a;
let tree = fnArgs(commandTPair);
console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null }));
* fac2 = int MUL int
let fac2 = tInt;
* fac = fac1 | fac2
let fac = orDo(fac1, fac2);
* expr1 = fac ADD fac
let expr1 = midfix((x) => thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1");
* expr2 = fac
let expr2 = fac;
* expr = expr1 | expr2
let expr = orDo(expr1, expr2);
let tokens = tk.tokenize("2+3"); //tk.tokenize(argv[2]);
let tokensFiltered = tokens.filter((x) => {
return (x.type != tk.TokenType.NL
&& x.type != tk.TokenType.SP);
let wrappedTokens = tk.toSome({
matched: [],
remained: tokensFiltered,
ast: []
let beta = expr({
matched: [],
remained: tokensFiltered,
ast: []

View file

@ -2,21 +2,54 @@ var fs = require('fs');
import { argv, resourceUsage } from 'node:process';
import * as tk from './tokenize.js';
import * as util from 'util';
import { reduceRotation } from 'pdf-lib';
import { drawEllipsePath, reduceRotation } from 'pdf-lib';
import { isTypedArray } from 'node:util/types';
import { error } from 'node:console';
* debug reprensenting
let repr = (x : any)=>{return util.inspect(x, {depth: null})};
* token tree type.
type tkTree = tkTree[] | tk.Token
export interface TokenMatcheePair {
matched: tkTree[]
remained: tk.Token[]
* concated 2 `tkTree`s
* @param x the array to be concated
* @param y the item or array to ve concated
* @returns concated tkTree array, or thrown error if can't be concated.
function concat(x: tkTree, y:tkTree): tkTree[] {
if (Array.isArray(x)){
return x.concat(y);
throw new Error("the tkTree can't be concated, because it's not an array.");
export interface MaybeTokensAST{
maybeTokens: tk.Maybe<TokenMatcheePair>;
ast: tkTree;
function slice(x: tkTree, index?:number, end?:number): tkTree[] {
if (Array.isArray(x)){
return x.slice(index,end);
throw new Error("the tkTree can't be concated, because it's not an array.");
* TokenMatcheePair for tokens' parser combinator
* matched: the matched (now and before) tokens
* remained: tokens to be matched
* ast: abstract syntax tree
export interface TokenMatcheePair {
matched: tk.Token[]
remained: tk.Token[]
ast : tkTree[]
@ -45,7 +78,8 @@ export function m1TType(typ: tk.TokenType):
let result : tk.Some<TokenMatcheePair> = {
_tag: "Some", value: {
matched: new_matched,
remained: m.remained.slice(1)
remained: m.remained.slice(1),
ast: ([ttbm]),
return result;
@ -56,168 +90,135 @@ export function m1TType(typ: tk.TokenType):
let toSome = tk.toSome;
let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
* type int
let tInt = m1TType(tk.TokenType.INT);
let tAdd = m1TType(tk.TokenType.I_ADD);
let tMul = m1TType(tk.TokenType.I_MUL);
argv.forEach((val, index) => {
let commandInput = "int a str b"//argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
let commandInputTokenizedFiltered = commandInputTokenized.filter(
(x : tk.Token)=>{return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL});
console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
* matchee pair of commandInputTokenized
* like `m ==> f` in ocaml
* @param m matchee wrapped
* @param f matching function
* @returns wrapped result
let commandTPair : TokenMatcheePair = {matched:[],
remained: commandInputTokenizedFiltered};
let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
let text = x.remained[0].text
if (text == "true" || text == "false"){
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
function thenDo(m : tk.Maybe<TokenMatcheePair>, f : Function){
if (m._tag == "None"){
return m;
return {_tag : "None"};
* define the right hand side of a grammar
* eg. `LHS ::= a + b`
* @param process the right hand side processing : eg. `a + b` in `LHS`
* @param arrange define the order (0 starting) of the elements of the result.
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
* @returns the processed ast.
function gramRHS (process: Function, arrange : number[]){
return (m : TokenMatcheePair)=>{
let middle : tk.Maybe<TokenMatcheePair> = process(m);
console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null }));
if (middle._tag == "None"){
return middle;
let matched = middle.value.matched;
let arrLength = arrange.length;
let returnRrray : tkTree[] = Array(arrange.length);
arrange.forEach((val, index) => {
returnRrray[arrange[index]] = matched[index];
let matchedTmp1Length = matched.length-arrLength;
var matchedTmp1 : tkTree[] = matched
console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null }));
let result : tk.Some<TokenMatcheePair> = {_tag:"Some",
value : {matched : matchedTmp1,
remained : middle.value.remained}};
return result;
* typeABS ::= "'" ID
var typeABS = (x : TokenMatcheePair)=>
var result = thenDo(thenDo(toSome(x),tApos),tId);
if (result._tag == "Some" && "text" in result.value.matched[1]){
var realToken : tk.Token = result.value.matched[1];
realToken.text = "'"+realToken.text;
result.value.matched = [realToken];
return result;
* TypeId ::= typeABS | ID
var typeName = (x : TokenMatcheePair)=>
return thenDo(toSome(x), orDo(typeABS, tId));
* TODO: 要用 debugger
var constParser = gramRHS((x : TokenMatcheePair)=>
{return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
* storing the tree
var astTree : tkTree = [];
var typePair = (x : MaybeTokensAST)=>
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
var a : tk.Maybe<TokenMatcheePair> = f(m.value);
if (a._tag == "Some"){
let matched = a.value.matched;
let slice = matched.slice(matched.length-2);
let b : MaybeTokensAST = {maybeTokens : a, ast : slice};
return b;
a.value.ast = concat(m.value.ast, a.value.ast);
let b : MaybeTokensAST= {maybeTokens : a, ast : []};
return b;
return a;
* function's arguments
* like `f1 | f2` in regex
* @param f1 the first tried function
* @param f2 the second tried function
* @returns wrapped result
function orDo(f1 : Function, f2 : Function){
return (x : TokenMatcheePair) =>{
let res1 : tk.Maybe<TokenMatcheePair> = f1(x);
if (res1._tag == "Some"){
return res1;
let res2 : tk.Maybe<TokenMatcheePair> = f2(x);
return res2;
var fnArgs = (x : TokenMatcheePair)=>
let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []};
let a = typePair(wrapper);
console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null }));
let midfix = (f : Function, signal? : string) => (x : TokenMatcheePair)=>{
var a = f(x);
if (a._tag == "Some"){
let ast_head : tkTree[] = slice(a.value.ast,0,a.value.ast.length-3);
let ast_tail : tkTree[] = slice(a.value.ast,a.value.ast.length-3);
let new_ast = [ast_tail];
a.value.ast = new_ast;
return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]};
return a;
* fac1 = int MUL int
//let fac1 = midfix((x : TokenMatcheePair)=>
// thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt));
let fac1 = (x : TokenMatcheePair) => {
let a = midfix((x : TokenMatcheePair)=>
thenDo(thenDo(thenDo(tk.toSome(x), tInt), tMul), tInt), "fac1")(x);
return a;
* fac2 = int MUL int
let fac2 = tInt;
* fac = fac1 | fac2
let fac = orDo(fac1, fac2);
* expr1 = fac ADD fac
let expr1 = midfix((x : TokenMatcheePair)=>
thenDo(thenDo(thenDo(tk.toSome(x), fac), tAdd), fac), "expr1");
* expr2 = fac
let expr2 = fac;
* expr = expr1 | expr2
let expr = orDo(expr1, expr2);
let tokens = tk.tokenize("2+3*4");//tk.tokenize(argv[2]);
let tokensFiltered = tokens.filter(
(x)=>{return (x.type != tk.TokenType.NL
&& x.type != tk.TokenType.SP)});
let wrappedTokens : tk.Maybe<TokenMatcheePair> =
matched : [] ,
remained : tokensFiltered,
ast : []});
let beta = expr({
matched : [] ,
remained : tokensFiltered,
ast : []});
let tree = fnArgs(commandTPair);
console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -64,7 +64,7 @@ export interface MatcheePair {
* SEMI_C// semi-colon
export enum TokenType {
NL, // newlinw
NL, // newline
SP, // half-width space and tab
ID, // identifier
STR, // string