fix some basic funciton of parser - rebuild

This commit is contained in:
Tan, Kian-ting 2024-02-14 22:24:53 +08:00
parent 829ac29ac3
commit 950c4e0423
5 changed files with 163 additions and 521 deletions

View file

@ -44,7 +44,7 @@ C語言、Python語言就算有許多的關鍵字、操作符、符號或是常
## 決定語法
那我們要如何制定這個語言的語法這樣我們才能夠寫出符合這個語法的函數然後再用tokenizer和parser轉成AST樹。
不考慮` + - * /`這種運算子,以及向量的表示子,函數可以用`ID(arg1, arg2, ...)`這種方式來表示,其中`arg_x`是引數,`ID`是識別子identifier可以把它想成變函數的名字
函數可以用`ID arg1 arg2`這種方式來表示,其中`arg_x`是引數,`ID`是識別子identifier可以把它想成變函數的名字
變數可以是`ID``arg_n`可以是`ID`或常數(量)。
@ -56,56 +56,48 @@ C語言、Python語言就算有許多的關鍵字、操作符、符號或是常
- 字串:`'"' (不是「"」的任一字元|('\' '"')) '"'``.`表示任何一個字元)
然而我們還是需要綁定變數`let x = var in boby`(在`body`裡面,`x`指代`var`)、`set x = var`改變變數值、lambda`lambda (x)=>{body}`。另外為了要區別要在PDF印上去的一般字元在這個檔案的常數、變數、函數、關鍵字等前後需要加@表示但是函數、lambda裡面的變數不用。比如`@foo(a, b)@`、`@lambda(x)@`、`@"IAmAString"@`、`@2.2@`、`@3@`後三者應該很少用到可是若需在PDF印`@`時怎辦?那就用`\@`。比如`foo\@example.com`。
然而我們還是需要綁定變數`let int x = var in body`(在`body`裡面,`x`指代`var`、改變變數值、lambda`fn (int x) (int y) -> + x y`(採用前綴表示法,`+`在前)。另外為了要區別要在PDF印上去的一般字元在這個檔案的常數、變數、函數、關鍵字等前後需要加@表示但是函數、lambda裡面的變數不用。比如`@foo a b@`、`@let int x = 3 in toString (+ x 2)@`、`@"IAmAString"@`、`@2.2@`、`@3@`後三者應該很少用到可是若需在PDF印`@`時怎辦?那就用`\@`。比如`foo\@example.com`。
所以我們可以定義以下的BNF風文法
```
Language ::= MainTxt | Exprs | Comment
Comment ::= '/*' (不含'*/'的任何字元組合)* '*/'
MainTxt ::= (('\' '@')| 非@非空白字元)+ //顯示的文字。「我是一隻貓」或是「www\@example.com」
// Exprs 表示一群定義變數、常數、函數、函數套用的表達式
Exprs ::= @ Expr* @ // *表示前面的重複0次以上包含不出現
// Comment also included
// "(" and ")" only for applying function
Expr ::= (Letting | Setting | Lambda | | Var| Const) | "(" Applying ")" | Comment
Letting ::= "let" Var "=" Expr "in" Expr // let foo = 12 in ...
Setting ::= Var ":=" Expr "in" Expr // foo := a in ...
// we force every function have at least 1 argument.
Lambda ::= "fn" LambdaArgs "->" Expr // fn x y -> 12
LambdaArgs ::= Var | Var LambdaArgs
Applying ::= Expr ExprArgs // foo 3 9 即foo(3, 9)
ExprArgs ::= Expr | (Expr ExprArgs)
Var ::= ID
Const ::= String | Float | Integer
ID ::= ("_" | [a-z] | [A-Z]) ("_" | [0-9] | [a-z] | [A-Z])+
Integer ::= [0-9]+
Float ::= [0-9]+ "." [0-9]+
String ::= '"' (不是「"」的任一字元|('\' '"')) '"'
FLO = \d+[.]\d+ // 浮點數
INT = \d+ // 整數
AT = '@' // @
ID = [_\w][_\d\w]* // 識別子
R_ARR = [-][>] // 右箭頭 ->
SEMICOLON = ";"
// 括號
L_PAR = '('
R_PAR = ')'
ASSIGN = '='
OP = [+-*/] | [=][=] | [!<>][=] // 運算子
HASH = [#]
COM = #[^#]*# # 註解 #
SPACE = \s+ # 空白字元
B_SLASH = [\\] // 反斜線
STR = \"([^"]|[\\\"])*\"
LIT_STR = ([^\\]?) // 文字模式的不貪婪模式
```
而上述的item可以被1個以上半形空白或tab`\t`以及1個「`\n`或`\r\n`」換行符號隔開。而為求簡化這些符號在MainTxt均指代一個半形空白。也就是空一個半形空白、兩個半形空白、一個tab、一個換行符號等等都會顯示如一個半形符號。而在Expr表達式區把它忽略掉。另外兩個換行符號設定為換行指令而這在Expr區會被忽略。所以要加另外兩條
```
Space = (' ' | '\t')* | '\n' | '\r\n'
NewPara = = ('\n' |'\r' '\n' ) ('\n' |'\r' '\n' )
程式語法定義如下:
```BNF
Main ::= (LitStr | Prog)* ; 主體
LitStr ::= ( not(AT) | B_SLASH AT | B_SLATH HASH)+ ;基本文字模式
Prog ::= '@' BODY '@' ;程式模式
BODY ::= LET | EXPR | DEFINE
DEFINE ::= "define" TYPE VAR ASSIGN BODY SEMICOLON ; 全局定義
LET ::= "let" TYPE VAR ASSIGN "in" BODY ; 局域定義
EXPR ::= APPLY | FN | LIST | CONST | VAR | "(" EXPR ")"
APPLY ::= OP EXPR+ | EXPR EXPR+
FN ::= "fn" ARGS R_ARR BODY
ARGS ::= ARG | ARG ARGS
ARG ::= "(" TYPE VAR ")"
CONST ::= FLO | STR | INT
VAR ::= ID
TYPE ::= ID
LIST ::= [LIST_INNER]
LIST_INNER ::= EXPR | EXPR SEMICOLON LIST_INNER
```
## 用ts-parsec和regexp進行tokenize
@ -163,44 +155,7 @@ thenDo(thenDo(thenDo(sWrapped, match0to9), match0to9), match0to9)
我們編輯Node.js的進入點程式假設為src/index.js`底下為定義tokenizer的型別和regex pattern
```typescript
/** the type of token */
enum TokenKind {
Int, // 3
Flo, // 3.1416
Id, // foo, _123, etc
At, // @
Comt, // comment /*
Str, /** "foo" */
Assign, /** = */
Set, /** := */
Keyword, /** let, in */
LParen, /** ( */
RParen, /** ) */
Space, /** semi-width space tab, \r\n? */
NewPara, /** breaking paragraph, (\r\n?){2} */
MainTxt, /** used in main text */
}
// tokenizer
const tokenizer = parsec.buildLexer([
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^(let|in)/g, TokenKind.Keyword], // let and in
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
[true, /^\@/g, TokenKind.At],
/* inside comment, only accept 1. non / character
or 2. "/ + non * character" */
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
[true, /^\:\=/g, TokenKind.Set],
[true, /^\=/g, TokenKind.Assign],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^([ \t]+|\n)/g, TokenKind.Space],
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
[true, /^(\\\@|[^@\s])+/g, TokenKind.MainTxt],
]);
```
### 常數parsing

View file

@ -1,23 +1,24 @@
import * as parsec from 'typescript-parsec';
import * as p from 'typescript-parsec';
/** the type of token */
declare enum TokenKind {
Int = 0,
Flo = 1,
Id = 2,
At = 3,
Comt = 4,
Str = /** "foo" */ 5,
Lambda = /** -> */ 6,
Assign = /** = */ 7,
Set = /** := */ 8,
Keyword = /** let, in */ 9,
LParen = /** ( */ 10,
RParen = /** ) */ 11,
Space = /** semi-width space tab, \r\n? */ 12,
NewPara = /** breaking paragraph, (\r\n?){2} */ 13,
MainTxt = /** used in main text */ 14
Flo = 0,
Int = 1,
At = 2,
Id = 3,
RArr = 4,
SColon = 5,
LPar = 6,
RPar = 7,
Assign = 8,
Op = 9,
Hash = 10,
Com = 11,
BSlash = 12,
Str = 13,
LitStr = 14,
Space = 15
}
export interface ASTNode extends parsec.Token<TokenKind> {
export interface ASTNode extends p.Token<TokenKind> {
actualValue?: bigint | number | string;
}
/** AST Tree */

View file

@ -1,27 +1,28 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.astToSExp = void 0;
const parsec = require("typescript-parsec"); // import parsec
const p = require("typescript-parsec"); // import p
/* for test */
const assert = require("assert");
/** the type of token */
var TokenKind;
(function (TokenKind) {
TokenKind[TokenKind["Int"] = 0] = "Int";
TokenKind[TokenKind["Flo"] = 1] = "Flo";
TokenKind[TokenKind["Id"] = 2] = "Id";
TokenKind[TokenKind["At"] = 3] = "At";
TokenKind[TokenKind["Comt"] = 4] = "Comt";
TokenKind[TokenKind["Str"] = 5] = "Str";
TokenKind[TokenKind["Lambda"] = 6] = "Lambda";
TokenKind[TokenKind["Assign"] = 7] = "Assign";
TokenKind[TokenKind["Set"] = 8] = "Set";
TokenKind[TokenKind["Keyword"] = 9] = "Keyword";
TokenKind[TokenKind["LParen"] = 10] = "LParen";
TokenKind[TokenKind["RParen"] = 11] = "RParen";
TokenKind[TokenKind["Space"] = 12] = "Space";
TokenKind[TokenKind["NewPara"] = 13] = "NewPara";
TokenKind[TokenKind["MainTxt"] = 14] = "MainTxt";
TokenKind[TokenKind["Flo"] = 0] = "Flo";
TokenKind[TokenKind["Int"] = 1] = "Int";
TokenKind[TokenKind["At"] = 2] = "At";
TokenKind[TokenKind["Id"] = 3] = "Id";
TokenKind[TokenKind["RArr"] = 4] = "RArr";
TokenKind[TokenKind["SColon"] = 5] = "SColon";
TokenKind[TokenKind["LPar"] = 6] = "LPar";
TokenKind[TokenKind["RPar"] = 7] = "RPar";
TokenKind[TokenKind["Assign"] = 8] = "Assign";
TokenKind[TokenKind["Op"] = 9] = "Op";
TokenKind[TokenKind["Hash"] = 10] = "Hash";
TokenKind[TokenKind["Com"] = 11] = "Com";
TokenKind[TokenKind["BSlash"] = 12] = "BSlash";
TokenKind[TokenKind["Str"] = 13] = "Str";
TokenKind[TokenKind["LitStr"] = 14] = "LitStr";
TokenKind[TokenKind["Space"] = 15] = "Space";
})(TokenKind || (TokenKind = {}));
/** from AST to S-exp */
function astToSExp(ast) {
@ -36,29 +37,29 @@ function astToSExp(ast) {
}
exports.astToSExp = astToSExp;
// tokenizer
const tokenizer = parsec.buildLexer([
const tokenizer = p.buildLexer([
[true, /^\d+[.]\d+/g, TokenKind.Flo],
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^(let|in|fn)/g, TokenKind.Keyword],
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
[true, /^\@/g, TokenKind.At],
/* inside comment, only accept 1. non / character
or 2. "/ + non * character" */
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
[true, /^\:\=/g, TokenKind.Set],
[true, /^\=/g, TokenKind.Assign],
[true, /^->/g, TokenKind.Lambda],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^([ \t]+|[ \t]*\r?\n[ \t]*)/g, TokenKind.Space],
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
[true, /^(\\\@|[^@\s])/g, TokenKind.MainTxt],
[true, /^[@]/g, TokenKind.At],
[true, /^[_\w][_\d\w]*/g, TokenKind.Id],
[true, /^->/g, TokenKind.RArr],
[true, /^[;]/g, TokenKind.SColon],
[true, /^[(]/g, TokenKind.LPar],
[true, /^[)]/g, TokenKind.RPar],
[true, /^[=]/g, TokenKind.Assign],
[true, /^([\+\-\*\/]|[!<>=]=)/g, TokenKind.Op],
[true, /^#[^#]*#/g, TokenKind.Com],
[true, /^[\\]/g, TokenKind.BSlash],
[true, /^\"([^"]|[\\\"])*\"/g, TokenKind.Str],
[true, /^([^\\]+?)/g, TokenKind.LitStr],
[true, /^\s+/g, TokenKind.Space],
]);
/** ignore spaces ,new lines, and comments */
const _ = parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara),
// space or newPara + comment + space or newPara
parsec.seq(parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara))), parsec.tok(TokenKind.Comt), parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara))))));
//const _ = p.opt(p.alt(
// p.tok(TokenKind.Space),
// p.tok(TokenKind.Com),
// )
//);
function applyInteger(value) {
// extend value to ASTNode
const newNode = {
@ -82,112 +83,23 @@ function applyString(value) {
};
return newNode;
}
function applyIdentifier(value) {
const newNode = {
actualValue: value.text,
...value
};
return newNode;
}
/** apply LETTING.
* returns [let, [var, x], expr] */
function applyLetting(input) {
// node representing let
let letNode = input[0];
let varNode = input[2];
let valueNode = input[6];
let exprAST = input[10];
return [letNode, [varNode, valueNode], exprAST];
}
/** apply SETTING */
function applySetting(input) {
// node representing let
let setNode = input[2];
let varNode = input[0];
let valueNode = input[4];
let exprAST = input[8];
// (:= (var val) expr) : set var = val in expr
return [setNode, [varNode, valueNode], exprAST];
}
function applyLambda(input) {
let lambdaNode = input[0];
let argHead = input[1];
let argTail = input[2];
let body = input[6];
let args = [argHead].concat(argTail);
// return (fn (args) body) like lambda in Scheme
return [lambdaNode, args, body];
}
function applyApplying(input) {
let applier = input[0];
let applieeHead = input[2];
let applieeTail = input[3];
let appliee = [(applieeHead)].concat(applieeTail);
// foo 2 3 => (foo (2 3))
return [applier, appliee];
}
/** define all the parser sentence */
const CONST = parsec.rule();
const VAR = parsec.rule();
const ARG = parsec.rule();
const EXPR = parsec.rule();
const LETTING = parsec.rule();
const SETTING = parsec.rule();
const LAMBDA = parsec.rule();
const APPLYING = parsec.rule();
/*
CONST ::= INT | FLOAT | STRING
*/
CONST.setPattern(parsec.alt(parsec.apply(parsec.tok(TokenKind.Int), applyInteger), parsec.apply(parsec.tok(TokenKind.Flo), applyFloat), parsec.apply(parsec.tok(TokenKind.Str), applyString)));
/** VAR = ID */
VAR.setPattern(parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier));
/** ARG = ID */
ARG.setPattern(parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier));
/**SETTING ::= VAR ":=" EXPR in EXPR
* and ignore the spaces and new lines with `_`
*/
SETTING.setPattern(parsec.apply(parsec.seq(VAR, _, parsec.str(":="), _, EXPR, _, parsec.str("in"), _, EXPR), applySetting));
/**LETTING ::= "let" VAR "=" EXPR in EXPR
* and ignore the spaces and new lines with `_`
*/
LETTING.setPattern(parsec.apply(parsec.seq(parsec.str("let"), _, VAR, _, parsec.str("="), _, EXPR, _, parsec.str("in"), _, EXPR), applyLetting));
/**LAMBDA ::= "fn" (Args)+ "->" EXPR
* and ignore the spaces and new lines with `_`
*/
LAMBDA.setPattern(parsec.apply(parsec.seq(parsec.str("fn"), parsec.kright(_, ARG), // arg SpaceNL
parsec.rep_sc(parsec.kright(_, ARG)), //other (arg SpaceNL), repeat 0+times
_, parsec.str("->"), _, EXPR), applyLambda));
// APPLYING = ( "(" APPLYING ")" |LAMBDA|VAR) APPLIEE+
APPLYING.setPattern(parsec.apply(parsec.seq(parsec.alt(LAMBDA, VAR, parsec.kmid(parsec.seq(parsec.str('('), _), APPLYING, parsec.seq(_, parsec.str(')')))), _, EXPR, parsec.rep_sc(parsec.kright(_, EXPR))), applyApplying));
/** EXPR = CONST | VAR
* | LETTING | SETTING
* | LAMBDA | APPLYING
* | "(" APPLYING ")" */
EXPR.setPattern(parsec.alt(CONST, VAR, LETTING, SETTING, LAMBDA, parsec.kmid(parsec.seq(parsec.str('('), _), APPLYING, parsec.seq(_, parsec.str(')')))));
const CONST = p.rule();
/*const VAR = p.rule<TokenKind, ASTNode>();
const ARG = p.rule<TokenKind, ASTNode>();
const EXPR = p.rule<TokenKind, AST>();
const LETTING = p.rule<TokenKind, AST>();
const LAMBDA = p.rule<TokenKind, AST>();
const APPLYING = p.rule<TokenKind, AST>(); */
CONST.setPattern(p.alt(p.apply(p.tok(TokenKind.Flo), applyFloat), p.apply(p.tok(TokenKind.Int), applyInteger), p.apply(p.tok(TokenKind.Str), applyString)));
function mainParse(inputStr) {
return parsec.expectSingleResult(parsec.expectEOF(EXPR.parse(tokenizer.parse(inputStr))));
return p.expectSingleResult(p.expectEOF(CONST.parse(tokenizer.parse(inputStr))));
}
// test
function main() {
// bigint has suffix `n`
assert.strictEqual(mainParse('123455667').actualValue, 123455667n);
assert.strictEqual(mainParse('000').actualValue, 0n);
assert.strictEqual(mainParse('1.22').actualValue, 1.22);
assert.strictEqual(mainParse('0.0').actualValue, 0.0);
assert.strictEqual(mainParse(`""`).actualValue, "");
assert.strictEqual(mainParse(`"the little town"`).actualValue, `the little town`);
assert.strictEqual(mainParse(`"\\\"Alice\\\""`).actualValue, `"Alice"`);
assert.strictEqual(mainParse(`foo`).actualValue, "foo");
assert.strictEqual(astToSExp(mainParse(`let x = 12 in 23`)), "(let (x 12) 23)");
assert.strictEqual(astToSExp(mainParse(`let y = 10 in let x = 12 in 23`)), "(let (y 10) (let (x 12) 23))");
assert.strictEqual(astToSExp(mainParse(`let y = 10 in y := 12 in 23`)), "(let (y 10) (:= (y 12) 23))");
assert.strictEqual(astToSExp(mainParse(`fn x y -> 234`)), "(fn (x y) 234)");
assert.strictEqual(astToSExp(mainParse(`(add 12 23 )`)), "(add (12 23))");
assert.strictEqual(astToSExp(mainParse(`(foo x y)`)), "(foo (x y))");
assert.strictEqual(astToSExp(mainParse(`((foo 6 7) bar)`)), "((foo (6 7)) (bar))");
assert.strictEqual(astToSExp(mainParse(`fn x y ->
/* foo bar */
(foo x y)`)), "(fn (x y) (foo (x y)))");
assert.strictEqual(mainParse("123").actualValue, 123n);
assert.strictEqual(mainParse("3.14").actualValue, 3.14);
assert.strictEqual(mainParse("\"foo\"").actualValue, "foo");
}
;
main();

File diff suppressed because one or more lines are too long

View file

@ -1,28 +1,29 @@
import * as parsec from 'typescript-parsec'; // import parsec
import * as p from 'typescript-parsec'; // import p
/* for test */
import * as assert from 'assert';
/** the type of token */
enum TokenKind {
Int, // 3
Flo, // 3.1416
Id, // foo, _123, etc
Flo,
Int,
At, // @
Comt, // comment /*
Str, /** "foo" */
Lambda, /** -> */
Assign, /** = */
Set, /** := */
Keyword, /** let, in */
LParen, /** ( */
RParen, /** ) */
Space, /** semi-width space tab, \r\n? */
NewPara, /** breaking paragraph, (\r\n?){2} */
MainTxt, /** used in main text */
Id, // identifier
RArr, // Right Arrow
SColon, // Semi colon
LPar, // left perenthesis
RPar, // right paranthesis
Assign, // =
Op, // +-*/...
Hash, // #
Com, // # comment
BSlash, // backslash\
Str, // "string"
LitStr, // literal string
Space, // Spaces
}
// add "actualValue" in the parsed Token
export interface ASTNode extends parsec.Token<TokenKind>{
export interface ASTNode extends p.Token<TokenKind>{
// number is for float number;
//it's optional. since keyword has no value
actualValue? : bigint | number | string;
@ -44,49 +45,36 @@ export function astToSExp(ast : AST){
// tokenizer
const tokenizer = parsec.buildLexer([
const tokenizer = p.buildLexer([
[true,/^\d+[.]\d+/g , TokenKind.Flo],
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^(let|in|fn)/g, TokenKind.Keyword], // let, in, fn
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
[true, /^\@/g, TokenKind.At],
/* inside comment, only accept 1. non / character
or 2. "/ + non * character" */
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
[true, /^\:\=/g, TokenKind.Set],
[true, /^\=/g, TokenKind.Assign],
[true, /^->/g, TokenKind.Lambda],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^([ \t]+|[ \t]*\r?\n[ \t]*)/g, TokenKind.Space],
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
[true, /^(\\\@|[^@\s])/g, TokenKind.MainTxt],
[true,/^[@]/g, TokenKind.At ],
[true,/^[_\w][_\d\w]*/g, TokenKind.Id ],
[true,/^->/g , TokenKind.RArr ],
[true, /^[;]/g, TokenKind.SColon ],
[true, /^[(]/g, TokenKind.LPar ],
[true, /^[)]/g, TokenKind.RPar ],
[true, /^[=]/g, TokenKind.Assign ],
[true, /^([\+\-\*\/]|[!<>=]=)/g, TokenKind.Op ],
[true, /^#[^#]*#/g, TokenKind.Com ],
[true, /^[\\]/g, TokenKind.BSlash ],
[true,/^\"([^"]|[\\\"])*\"/g , TokenKind.Str ],
[true, /^([^\\]+?)/g, TokenKind.LitStr ],
[true, /^\s+/g, TokenKind.Space],
]);
/** ignore spaces ,new lines, and comments */
const _ = parsec.opt(parsec.alt(
parsec.tok(TokenKind.Space),
parsec.tok(TokenKind.NewPara),
// space or newPara + comment + space or newPara
parsec.seq(
parsec.opt(parsec.alt(
parsec.tok(TokenKind.Space),
parsec.tok(TokenKind.NewPara))),
parsec.tok(TokenKind.Comt),
parsec.opt(parsec.alt(
parsec.tok(TokenKind.Space),
parsec.tok(TokenKind.NewPara))),
)
)
);
//const _ = p.opt(p.alt(
// p.tok(TokenKind.Space),
// p.tok(TokenKind.Com),
// )
//);
function applyInteger(value: parsec.Token<TokenKind.Int>): ASTNode {
function applyInteger(value: p.Token<TokenKind.Int>): ASTNode {
// extend value to ASTNode
const newNode : ASTNode = {
actualValue : BigInt(value.text) ,
@ -94,14 +82,14 @@ function applyInteger(value: parsec.Token<TokenKind.Int>): ASTNode {
return newNode;
}
function applyFloat(value: parsec.Token<TokenKind.Flo>): ASTNode {
function applyFloat(value: p.Token<TokenKind.Flo>): ASTNode {
const newNode : ASTNode = {
actualValue : parseFloat(value.text) ,
...value};
return newNode;
}
function applyString(value: parsec.Token<TokenKind.Str>): ASTNode {
function applyString(value: p.Token<TokenKind.Str>): ASTNode {
const newNode : ASTNode = {
// get only text[1,2,...,the second last char]
actualValue : value.text.slice(1,value.text.length-1).replace(/\\\"/g, "\"") ,
@ -109,250 +97,36 @@ function applyString(value: parsec.Token<TokenKind.Str>): ASTNode {
return newNode;
}
function applyIdentifier(value: parsec.Token<TokenKind.Id>): ASTNode {
const newNode : ASTNode = {
actualValue : value.text,
...value};
return newNode;
}
/** apply LETTING.
* returns [let, [var, x], expr] */
function applyLetting(input: [parsec.Token<TokenKind>, // let
parsec.Token<TokenKind> | undefined, // space
ASTNode, // var
parsec.Token<TokenKind>| undefined, // space
parsec.Token<TokenKind>, // =
parsec.Token<TokenKind>| undefined, // space
AST, // val
parsec.Token<TokenKind>| undefined, // space
parsec.Token<TokenKind>, // in
parsec.Token<TokenKind>| undefined, // space
AST // expr
]): AST {
// node representing let
let letNode : ASTNode = input[0];
let varNode = input[2];
let valueNode = input[6];
let exprAST = input[10];
return [letNode, [varNode, valueNode], exprAST];
}
/** apply SETTING */
function applySetting(input: [ASTNode, // var
parsec.Token<TokenKind>| undefined, // space
parsec.Token<TokenKind>, // :=
parsec.Token<TokenKind>| undefined, // space
AST, // val
parsec.Token<TokenKind>| undefined, // space
parsec.Token<TokenKind>, // in
parsec.Token<TokenKind>| undefined, // space
AST // expr
]): AST {
// node representing let
let setNode = input[2];
let varNode = input[0];
let valueNode = input[4];
let exprAST = input[8];
// (:= (var val) expr) : set var = val in expr
return [setNode, [varNode, valueNode], exprAST];
}
function applyLambda(input: [ASTNode, // fn
ASTNode, // arg
ASTNode[], // args
parsec.Token<TokenKind>| undefined, // space
parsec.Token<TokenKind>, // ->
parsec.Token<TokenKind>| undefined, // space
AST // expr
]): AST {
let lambdaNode = input[0];
let argHead = input[1];
let argTail = input[2];
let body = input[6];
let args = [argHead].concat(argTail)
// return (fn (args) body) like lambda in Scheme
return [lambdaNode, args, body];
}
function applyApplying(input : [ASTNode, // caller
parsec.Token<TokenKind> |undefined, // space
ASTNode, // head of callee
AST[] // tail of callee
]){
let applier = input[0];
let applieeHead = input[2];
let applieeTail = input[3];
let appliee = [<AST>(applieeHead)].concat(applieeTail);
// foo 2 3 => (foo (2 3))
return [applier, appliee];
}
/** define all the parser sentence */
const CONST = parsec.rule<TokenKind, ASTNode>();
const VAR = parsec.rule<TokenKind, ASTNode>();
const ARG = parsec.rule<TokenKind, ASTNode>();
const EXPR = parsec.rule<TokenKind, AST>();
const LETTING = parsec.rule<TokenKind, AST>();
const SETTING = parsec.rule<TokenKind, AST>();
const LAMBDA = parsec.rule<TokenKind, AST>();
const APPLYING = parsec.rule<TokenKind, AST>();
const CONST = p.rule<TokenKind, ASTNode>();
/*const VAR = p.rule<TokenKind, ASTNode>();
const ARG = p.rule<TokenKind, ASTNode>();
const EXPR = p.rule<TokenKind, AST>();
const LETTING = p.rule<TokenKind, AST>();
const LAMBDA = p.rule<TokenKind, AST>();
const APPLYING = p.rule<TokenKind, AST>(); */
/*
CONST ::= INT | FLOAT | STRING
*/
CONST.setPattern(
parsec.alt(
parsec.apply(parsec.tok(TokenKind.Int), applyInteger),
parsec.apply(parsec.tok(TokenKind.Flo), applyFloat),
parsec.apply(parsec.tok(TokenKind.Str), applyString),
p.alt(
p.apply(p.tok(TokenKind.Flo), applyFloat),
p.apply(p.tok(TokenKind.Int), applyInteger),
p.apply(p.tok(TokenKind.Str), applyString),
)
);
/** VAR = ID */
VAR.setPattern(
parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier),
);
/** ARG = ID */
ARG.setPattern(
parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier),
);
/**SETTING ::= VAR ":=" EXPR in EXPR
* and ignore the spaces and new lines with `_`
*/
SETTING.setPattern(
parsec.apply(
parsec.seq(
VAR,
_,
parsec.str(":="),
_,
EXPR,
_,
parsec.str("in"),
_,
EXPR), applySetting));
/**LETTING ::= "let" VAR "=" EXPR in EXPR
* and ignore the spaces and new lines with `_`
*/
LETTING.setPattern(
parsec.apply(
parsec.seq(
parsec.str("let"),
_,
VAR,
_,
parsec.str("="),
_,
EXPR,
_,
parsec.str("in"),
_,
EXPR), applyLetting));
/**LAMBDA ::= "fn" (Args)+ "->" EXPR
* and ignore the spaces and new lines with `_`
*/
LAMBDA.setPattern(
parsec.apply(
parsec.seq(
parsec.str("fn"),
parsec.kright(_, ARG), // arg SpaceNL
parsec.rep_sc(parsec.kright(_, ARG)), //other (arg SpaceNL), repeat 0+times
_,
parsec.str("->"),
_,
EXPR),
applyLambda)
)
// APPLYING = ( "(" APPLYING ")" |LAMBDA|VAR) APPLIEE+
APPLYING.setPattern(
parsec.apply(
parsec.seq(
parsec.alt(
LAMBDA,
VAR,
parsec.kmid(
parsec.seq(parsec.str('('), _),
APPLYING,
parsec.seq(_, parsec.str(')')))
),
_,
EXPR,
parsec.rep_sc(parsec.kright(_, EXPR))),
applyApplying));
/** EXPR = CONST | VAR
* | LETTING | SETTING
* | LAMBDA | APPLYING
* | "(" APPLYING ")" */
EXPR.setPattern(
parsec.alt(
CONST,
VAR,
LETTING,
SETTING,
LAMBDA,
parsec.kmid(
parsec.seq(parsec.str('('), _),
APPLYING,
parsec.seq(_, parsec.str(')')))
)
);
function mainParse(inputStr : string){
return parsec.expectSingleResult(parsec.expectEOF(
EXPR.parse(tokenizer.parse(inputStr))));
return p.expectSingleResult(p.expectEOF(
CONST.parse(tokenizer.parse(inputStr))));
}
// test
function main(){
// bigint has suffix `n`
assert.strictEqual((<ASTNode>mainParse('123455667')).actualValue, 123455667n);
assert.strictEqual((<ASTNode>mainParse('000')).actualValue, 0n);
assert.strictEqual((<ASTNode>mainParse('1.22')).actualValue, 1.22);
assert.strictEqual((<ASTNode>mainParse('0.0')).actualValue, 0.0);
assert.strictEqual((<ASTNode>mainParse(`""`)).actualValue, "");
assert.strictEqual((<ASTNode>mainParse(`"the little town"`)).actualValue, `the little town`);
assert.strictEqual((<ASTNode>mainParse(`"\\\"Alice\\\""`)).actualValue, `"Alice"`);
assert.strictEqual((<ASTNode>mainParse(`foo`)).actualValue, "foo");
assert.strictEqual(astToSExp(mainParse(`let x = 12 in 23`)), "(let (x 12) 23)");
assert.strictEqual(astToSExp(mainParse(`let y = 10 in let x = 12 in 23`)), "(let (y 10) (let (x 12) 23))");
assert.strictEqual(astToSExp(mainParse(`let y = 10 in y := 12 in 23`)), "(let (y 10) (:= (y 12) 23))");
assert.strictEqual(astToSExp(mainParse(`fn x y -> 234`)), "(fn (x y) 234)");
assert.strictEqual(astToSExp(mainParse(`(add 12 23 )`)), "(add (12 23))");
assert.strictEqual(astToSExp(mainParse(`(foo x y)`)), "(foo (x y))");
assert.strictEqual(astToSExp(mainParse(`((foo 6 7) bar)`)), "((foo (6 7)) (bar))");
assert.strictEqual(astToSExp(mainParse(`fn x y ->
/* foo bar */
(foo x y)`)), "(fn (x y) (foo (x y)))");
assert.strictEqual(<BigInt>mainParse("123").actualValue, 123n);
assert.strictEqual(<BigInt>mainParse("3.14").actualValue, 3.14);
assert.strictEqual(<BigInt>mainParse("\"foo\"").actualValue, "foo");
};