fix some basic funciton of parser - rebuild
This commit is contained in:
parent
829ac29ac3
commit
950c4e0423
5 changed files with 163 additions and 521 deletions
|
@ -44,7 +44,7 @@ C語言、Python語言就算有許多的關鍵字、操作符、符號或是常
|
|||
## 決定語法
|
||||
那我們要如何制定這個語言的語法,這樣我們才能夠寫出符合這個語法的函數,然後再用tokenizer和parser轉成AST樹。
|
||||
|
||||
不考慮` + - * /`這種運算子,以及向量的表示子,函數可以用`ID(arg1, arg2, ...)`這種方式來表示,其中`arg_x`是引數,`ID`是識別子(identifier,可以把它想成變函數的名字)。
|
||||
函數可以用`ID arg1 arg2`這種方式來表示,其中`arg_x`是引數,`ID`是識別子(identifier,可以把它想成變函數的名字)。
|
||||
|
||||
變數可以是`ID`,`arg_n`可以是`ID`或常數(量)。
|
||||
|
||||
|
@ -56,56 +56,48 @@ C語言、Python語言就算有許多的關鍵字、操作符、符號或是常
|
|||
|
||||
- 字串:`'"' (不是「"」的任一字元|('\' '"')) '"'`(`.`表示任何一個字元)
|
||||
|
||||
然而我們還是需要綁定變數`let x = var in boby`(在`body`裡面,`x`指代`var`)、`set x = var`(改變變數值)、lambda`lambda (x)=>{body}`。另外為了要區別要在PDF印上去的一般字元,在這個檔案的常數、變數、函數、關鍵字等前後需要加@表示(但是函數、lambda裡面的變數不用)。比如`@foo(a, b)@`、`@lambda(x)@`、`@"IAmAString"@`、`@2.2@`、`@3@`(後三者應該很少用到)可是若需在PDF印`@`時怎辦?那就用`\@`。比如`foo\@example.com`。
|
||||
然而我們還是需要綁定變數`let int x = var in body`(在`body`裡面,`x`指代`var`)、改變變數值)、lambda`fn (int x) (int y) -> + x y`(採用前綴表示法,`+`在前)。另外為了要區別要在PDF印上去的一般字元,在這個檔案的常數、變數、函數、關鍵字等前後需要加@表示(但是函數、lambda裡面的變數不用)。比如`@foo a b@`、`@let int x = 3 in toString (+ x 2)@`、`@"IAmAString"@`、`@2.2@`、`@3@`(後三者應該很少用到)可是若需在PDF印`@`時怎辦?那就用`\@`。比如`foo\@example.com`。
|
||||
|
||||
所以我們可以定義以下的BNF風文法:
|
||||
|
||||
```
|
||||
Language ::= MainTxt | Exprs | Comment
|
||||
|
||||
Comment ::= '/*' (不含'*/'的任何字元組合)* '*/'
|
||||
|
||||
|
||||
MainTxt ::= (('\' '@')| 非@非空白字元)+ //顯示的文字。「我是一隻貓」或是「www\@example.com」
|
||||
|
||||
// Exprs 表示一群定義變數、常數、函數、函數套用的表達式
|
||||
Exprs ::= @ Expr* @ // *表示前面的重複0次以上(包含不出現)
|
||||
|
||||
// Comment also included
|
||||
// "(" and ")" only for applying function
|
||||
Expr ::= (Letting | Setting | Lambda | | Var| Const) | "(" Applying ")" | Comment
|
||||
|
||||
Letting ::= "let" Var "=" Expr "in" Expr // let foo = 12 in ...
|
||||
|
||||
Setting ::= Var ":=" Expr "in" Expr // foo := a in ...
|
||||
|
||||
// we force every function have at least 1 argument.
|
||||
Lambda ::= "fn" LambdaArgs "->" Expr // fn x y -> 12
|
||||
|
||||
LambdaArgs ::= Var | Var LambdaArgs
|
||||
|
||||
Applying ::= Expr ExprArgs // foo 3 9 即foo(3, 9)
|
||||
|
||||
ExprArgs ::= Expr | (Expr ExprArgs)
|
||||
|
||||
Var ::= ID
|
||||
|
||||
Const ::= String | Float | Integer
|
||||
|
||||
ID ::= ("_" | [a-z] | [A-Z]) ("_" | [0-9] | [a-z] | [A-Z])+
|
||||
|
||||
Integer ::= [0-9]+
|
||||
|
||||
Float ::= [0-9]+ "." [0-9]+
|
||||
|
||||
String ::= '"' (不是「"」的任一字元|('\' '"')) '"'
|
||||
FLO = \d+[.]\d+ // 浮點數
|
||||
INT = \d+ // 整數
|
||||
AT = '@' // @
|
||||
ID = [_\w][_\d\w]* // 識別子
|
||||
R_ARR = [-][>] // 右箭頭 ->
|
||||
SEMICOLON = ";"
|
||||
// 括號
|
||||
L_PAR = '('
|
||||
R_PAR = ')'
|
||||
ASSIGN = '='
|
||||
OP = [+-*/] | [=][=] | [!<>][=] // 運算子
|
||||
HASH = [#]
|
||||
COM = #[^#]*# # 註解 #
|
||||
SPACE = \s+ # 空白字元
|
||||
B_SLASH = [\\] // 反斜線
|
||||
STR = \"([^"]|[\\\"])*\"
|
||||
LIT_STR = ([^\\]?) // 文字模式的不貪婪模式
|
||||
```
|
||||
|
||||
而上述的item可以被1個以上半形空白或tab(`\t`)以及1個「`\n`或`\r\n`」(換行符號)隔開。而為求簡化這些符號在MainTxt均指代一個半形空白。也就是空一個半形空白、兩個半形空白、一個tab、一個換行符號等等都會顯示如一個半形符號。而在Expr表達式區,把它忽略掉。另外兩個換行符號設定為換行指令,而這在Expr區會被忽略。所以要加另外兩條:
|
||||
|
||||
```
|
||||
Space = (' ' | '\t')* | '\n' | '\r\n'
|
||||
NewPara = = ('\n' |'\r' '\n' ) ('\n' |'\r' '\n' )
|
||||
程式語法定義如下:
|
||||
```BNF
|
||||
Main ::= (LitStr | Prog)* ; 主體
|
||||
LitStr ::= ( not(AT) | B_SLASH AT | B_SLATH HASH)+ ;基本文字模式
|
||||
Prog ::= '@' BODY '@' ;程式模式
|
||||
BODY ::= LET | EXPR | DEFINE
|
||||
DEFINE ::= "define" TYPE VAR ASSIGN BODY SEMICOLON ; 全局定義
|
||||
LET ::= "let" TYPE VAR ASSIGN "in" BODY ; 局域定義
|
||||
EXPR ::= APPLY | FN | LIST | CONST | VAR | "(" EXPR ")"
|
||||
APPLY ::= OP EXPR+ | EXPR EXPR+
|
||||
FN ::= "fn" ARGS R_ARR BODY
|
||||
ARGS ::= ARG | ARG ARGS
|
||||
ARG ::= "(" TYPE VAR ")"
|
||||
CONST ::= FLO | STR | INT
|
||||
VAR ::= ID
|
||||
TYPE ::= ID
|
||||
LIST ::= [LIST_INNER]
|
||||
LIST_INNER ::= EXPR | EXPR SEMICOLON LIST_INNER
|
||||
```
|
||||
|
||||
## 用ts-parsec和regexp進行tokenize
|
||||
|
@ -163,44 +155,7 @@ thenDo(thenDo(thenDo(sWrapped, match0to9), match0to9), match0to9)
|
|||
我們編輯Node.js的進入點程式(假設為src/index.js`),底下為定義tokenizer的型別和regex pattern:
|
||||
|
||||
```typescript
|
||||
/** the type of token */
|
||||
enum TokenKind {
|
||||
Int, // 3
|
||||
Flo, // 3.1416
|
||||
Id, // foo, _123, etc
|
||||
At, // @
|
||||
Comt, // comment /*
|
||||
Str, /** "foo" */
|
||||
Assign, /** = */
|
||||
Set, /** := */
|
||||
Keyword, /** let, in */
|
||||
LParen, /** ( */
|
||||
RParen, /** ) */
|
||||
Space, /** semi-width space tab, \r\n? */
|
||||
NewPara, /** breaking paragraph, (\r\n?){2} */
|
||||
MainTxt, /** used in main text */
|
||||
}
|
||||
|
||||
|
||||
// tokenizer
|
||||
const tokenizer = parsec.buildLexer([
|
||||
[true, /^\d+/g, TokenKind.Int],
|
||||
[true, /^\d+\.\d+/g, TokenKind.Flo],
|
||||
[true, /^(let|in)/g, TokenKind.Keyword], // let and in
|
||||
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
|
||||
[true, /^\@/g, TokenKind.At],
|
||||
/* inside comment, only accept 1. non / character
|
||||
or 2. "/ + non * character" */
|
||||
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
|
||||
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
|
||||
[true, /^\:\=/g, TokenKind.Set],
|
||||
[true, /^\=/g, TokenKind.Assign],
|
||||
[true, /^\(/g, TokenKind.LParen],
|
||||
[true, /^\)/g, TokenKind.RParen],
|
||||
[true, /^([ \t]+|\n)/g, TokenKind.Space],
|
||||
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
|
||||
[true, /^(\\\@|[^@\s])+/g, TokenKind.MainTxt],
|
||||
]);
|
||||
```
|
||||
|
||||
### 常數parsing
|
||||
|
|
35
src/ch1/src/index.d.ts
vendored
35
src/ch1/src/index.d.ts
vendored
|
@ -1,23 +1,24 @@
|
|||
import * as parsec from 'typescript-parsec';
|
||||
import * as p from 'typescript-parsec';
|
||||
/** the type of token */
|
||||
declare enum TokenKind {
|
||||
Int = 0,
|
||||
Flo = 1,
|
||||
Id = 2,
|
||||
At = 3,
|
||||
Comt = 4,
|
||||
Str = /** "foo" */ 5,
|
||||
Lambda = /** -> */ 6,
|
||||
Assign = /** = */ 7,
|
||||
Set = /** := */ 8,
|
||||
Keyword = /** let, in */ 9,
|
||||
LParen = /** ( */ 10,
|
||||
RParen = /** ) */ 11,
|
||||
Space = /** semi-width space tab, \r\n? */ 12,
|
||||
NewPara = /** breaking paragraph, (\r\n?){2} */ 13,
|
||||
MainTxt = /** used in main text */ 14
|
||||
Flo = 0,
|
||||
Int = 1,
|
||||
At = 2,
|
||||
Id = 3,
|
||||
RArr = 4,
|
||||
SColon = 5,
|
||||
LPar = 6,
|
||||
RPar = 7,
|
||||
Assign = 8,
|
||||
Op = 9,
|
||||
Hash = 10,
|
||||
Com = 11,
|
||||
BSlash = 12,
|
||||
Str = 13,
|
||||
LitStr = 14,
|
||||
Space = 15
|
||||
}
|
||||
export interface ASTNode extends parsec.Token<TokenKind> {
|
||||
export interface ASTNode extends p.Token<TokenKind> {
|
||||
actualValue?: bigint | number | string;
|
||||
}
|
||||
/** AST Tree */
|
||||
|
|
|
@ -1,27 +1,28 @@
|
|||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.astToSExp = void 0;
|
||||
const parsec = require("typescript-parsec"); // import parsec
|
||||
const p = require("typescript-parsec"); // import p
|
||||
/* for test */
|
||||
const assert = require("assert");
|
||||
/** the type of token */
|
||||
var TokenKind;
|
||||
(function (TokenKind) {
|
||||
TokenKind[TokenKind["Int"] = 0] = "Int";
|
||||
TokenKind[TokenKind["Flo"] = 1] = "Flo";
|
||||
TokenKind[TokenKind["Id"] = 2] = "Id";
|
||||
TokenKind[TokenKind["At"] = 3] = "At";
|
||||
TokenKind[TokenKind["Comt"] = 4] = "Comt";
|
||||
TokenKind[TokenKind["Str"] = 5] = "Str";
|
||||
TokenKind[TokenKind["Lambda"] = 6] = "Lambda";
|
||||
TokenKind[TokenKind["Assign"] = 7] = "Assign";
|
||||
TokenKind[TokenKind["Set"] = 8] = "Set";
|
||||
TokenKind[TokenKind["Keyword"] = 9] = "Keyword";
|
||||
TokenKind[TokenKind["LParen"] = 10] = "LParen";
|
||||
TokenKind[TokenKind["RParen"] = 11] = "RParen";
|
||||
TokenKind[TokenKind["Space"] = 12] = "Space";
|
||||
TokenKind[TokenKind["NewPara"] = 13] = "NewPara";
|
||||
TokenKind[TokenKind["MainTxt"] = 14] = "MainTxt";
|
||||
TokenKind[TokenKind["Flo"] = 0] = "Flo";
|
||||
TokenKind[TokenKind["Int"] = 1] = "Int";
|
||||
TokenKind[TokenKind["At"] = 2] = "At";
|
||||
TokenKind[TokenKind["Id"] = 3] = "Id";
|
||||
TokenKind[TokenKind["RArr"] = 4] = "RArr";
|
||||
TokenKind[TokenKind["SColon"] = 5] = "SColon";
|
||||
TokenKind[TokenKind["LPar"] = 6] = "LPar";
|
||||
TokenKind[TokenKind["RPar"] = 7] = "RPar";
|
||||
TokenKind[TokenKind["Assign"] = 8] = "Assign";
|
||||
TokenKind[TokenKind["Op"] = 9] = "Op";
|
||||
TokenKind[TokenKind["Hash"] = 10] = "Hash";
|
||||
TokenKind[TokenKind["Com"] = 11] = "Com";
|
||||
TokenKind[TokenKind["BSlash"] = 12] = "BSlash";
|
||||
TokenKind[TokenKind["Str"] = 13] = "Str";
|
||||
TokenKind[TokenKind["LitStr"] = 14] = "LitStr";
|
||||
TokenKind[TokenKind["Space"] = 15] = "Space";
|
||||
})(TokenKind || (TokenKind = {}));
|
||||
/** from AST to S-exp */
|
||||
function astToSExp(ast) {
|
||||
|
@ -36,29 +37,29 @@ function astToSExp(ast) {
|
|||
}
|
||||
exports.astToSExp = astToSExp;
|
||||
// tokenizer
|
||||
const tokenizer = parsec.buildLexer([
|
||||
const tokenizer = p.buildLexer([
|
||||
[true, /^\d+[.]\d+/g, TokenKind.Flo],
|
||||
[true, /^\d+/g, TokenKind.Int],
|
||||
[true, /^\d+\.\d+/g, TokenKind.Flo],
|
||||
[true, /^(let|in|fn)/g, TokenKind.Keyword],
|
||||
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
|
||||
[true, /^\@/g, TokenKind.At],
|
||||
/* inside comment, only accept 1. non / character
|
||||
or 2. "/ + non * character" */
|
||||
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
|
||||
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
|
||||
[true, /^\:\=/g, TokenKind.Set],
|
||||
[true, /^\=/g, TokenKind.Assign],
|
||||
[true, /^->/g, TokenKind.Lambda],
|
||||
[true, /^\(/g, TokenKind.LParen],
|
||||
[true, /^\)/g, TokenKind.RParen],
|
||||
[true, /^([ \t]+|[ \t]*\r?\n[ \t]*)/g, TokenKind.Space],
|
||||
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
|
||||
[true, /^(\\\@|[^@\s])/g, TokenKind.MainTxt],
|
||||
[true, /^[@]/g, TokenKind.At],
|
||||
[true, /^[_\w][_\d\w]*/g, TokenKind.Id],
|
||||
[true, /^->/g, TokenKind.RArr],
|
||||
[true, /^[;]/g, TokenKind.SColon],
|
||||
[true, /^[(]/g, TokenKind.LPar],
|
||||
[true, /^[)]/g, TokenKind.RPar],
|
||||
[true, /^[=]/g, TokenKind.Assign],
|
||||
[true, /^([\+\-\*\/]|[!<>=]=)/g, TokenKind.Op],
|
||||
[true, /^#[^#]*#/g, TokenKind.Com],
|
||||
[true, /^[\\]/g, TokenKind.BSlash],
|
||||
[true, /^\"([^"]|[\\\"])*\"/g, TokenKind.Str],
|
||||
[true, /^([^\\]+?)/g, TokenKind.LitStr],
|
||||
[true, /^\s+/g, TokenKind.Space],
|
||||
]);
|
||||
/** ignore spaces ,new lines, and comments */
|
||||
const _ = parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara),
|
||||
// space or newPara + comment + space or newPara
|
||||
parsec.seq(parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara))), parsec.tok(TokenKind.Comt), parsec.opt(parsec.alt(parsec.tok(TokenKind.Space), parsec.tok(TokenKind.NewPara))))));
|
||||
//const _ = p.opt(p.alt(
|
||||
// p.tok(TokenKind.Space),
|
||||
// p.tok(TokenKind.Com),
|
||||
// )
|
||||
//);
|
||||
function applyInteger(value) {
|
||||
// extend value to ASTNode
|
||||
const newNode = {
|
||||
|
@ -82,112 +83,23 @@ function applyString(value) {
|
|||
};
|
||||
return newNode;
|
||||
}
|
||||
function applyIdentifier(value) {
|
||||
const newNode = {
|
||||
actualValue: value.text,
|
||||
...value
|
||||
};
|
||||
return newNode;
|
||||
}
|
||||
/** apply LETTING.
|
||||
* returns [let, [var, x], expr] */
|
||||
function applyLetting(input) {
|
||||
// node representing let
|
||||
let letNode = input[0];
|
||||
let varNode = input[2];
|
||||
let valueNode = input[6];
|
||||
let exprAST = input[10];
|
||||
return [letNode, [varNode, valueNode], exprAST];
|
||||
}
|
||||
/** apply SETTING */
|
||||
function applySetting(input) {
|
||||
// node representing let
|
||||
let setNode = input[2];
|
||||
let varNode = input[0];
|
||||
let valueNode = input[4];
|
||||
let exprAST = input[8];
|
||||
// (:= (var val) expr) : set var = val in expr
|
||||
return [setNode, [varNode, valueNode], exprAST];
|
||||
}
|
||||
function applyLambda(input) {
|
||||
let lambdaNode = input[0];
|
||||
let argHead = input[1];
|
||||
let argTail = input[2];
|
||||
let body = input[6];
|
||||
let args = [argHead].concat(argTail);
|
||||
// return (fn (args) body) like lambda in Scheme
|
||||
return [lambdaNode, args, body];
|
||||
}
|
||||
function applyApplying(input) {
|
||||
let applier = input[0];
|
||||
let applieeHead = input[2];
|
||||
let applieeTail = input[3];
|
||||
let appliee = [(applieeHead)].concat(applieeTail);
|
||||
// foo 2 3 => (foo (2 3))
|
||||
return [applier, appliee];
|
||||
}
|
||||
/** define all the parser sentence */
|
||||
const CONST = parsec.rule();
|
||||
const VAR = parsec.rule();
|
||||
const ARG = parsec.rule();
|
||||
const EXPR = parsec.rule();
|
||||
const LETTING = parsec.rule();
|
||||
const SETTING = parsec.rule();
|
||||
const LAMBDA = parsec.rule();
|
||||
const APPLYING = parsec.rule();
|
||||
/*
|
||||
CONST ::= INT | FLOAT | STRING
|
||||
*/
|
||||
CONST.setPattern(parsec.alt(parsec.apply(parsec.tok(TokenKind.Int), applyInteger), parsec.apply(parsec.tok(TokenKind.Flo), applyFloat), parsec.apply(parsec.tok(TokenKind.Str), applyString)));
|
||||
/** VAR = ID */
|
||||
VAR.setPattern(parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier));
|
||||
/** ARG = ID */
|
||||
ARG.setPattern(parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier));
|
||||
/**SETTING ::= VAR ":=" EXPR in EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
SETTING.setPattern(parsec.apply(parsec.seq(VAR, _, parsec.str(":="), _, EXPR, _, parsec.str("in"), _, EXPR), applySetting));
|
||||
/**LETTING ::= "let" VAR "=" EXPR in EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
LETTING.setPattern(parsec.apply(parsec.seq(parsec.str("let"), _, VAR, _, parsec.str("="), _, EXPR, _, parsec.str("in"), _, EXPR), applyLetting));
|
||||
/**LAMBDA ::= "fn" (Args)+ "->" EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
LAMBDA.setPattern(parsec.apply(parsec.seq(parsec.str("fn"), parsec.kright(_, ARG), // arg SpaceNL
|
||||
parsec.rep_sc(parsec.kright(_, ARG)), //other (arg SpaceNL), repeat 0+times
|
||||
_, parsec.str("->"), _, EXPR), applyLambda));
|
||||
// APPLYING = ( "(" APPLYING ")" |LAMBDA|VAR) APPLIEE+
|
||||
APPLYING.setPattern(parsec.apply(parsec.seq(parsec.alt(LAMBDA, VAR, parsec.kmid(parsec.seq(parsec.str('('), _), APPLYING, parsec.seq(_, parsec.str(')')))), _, EXPR, parsec.rep_sc(parsec.kright(_, EXPR))), applyApplying));
|
||||
/** EXPR = CONST | VAR
|
||||
* | LETTING | SETTING
|
||||
* | LAMBDA | APPLYING
|
||||
* | "(" APPLYING ")" */
|
||||
EXPR.setPattern(parsec.alt(CONST, VAR, LETTING, SETTING, LAMBDA, parsec.kmid(parsec.seq(parsec.str('('), _), APPLYING, parsec.seq(_, parsec.str(')')))));
|
||||
const CONST = p.rule();
|
||||
/*const VAR = p.rule<TokenKind, ASTNode>();
|
||||
const ARG = p.rule<TokenKind, ASTNode>();
|
||||
const EXPR = p.rule<TokenKind, AST>();
|
||||
const LETTING = p.rule<TokenKind, AST>();
|
||||
const LAMBDA = p.rule<TokenKind, AST>();
|
||||
const APPLYING = p.rule<TokenKind, AST>(); */
|
||||
CONST.setPattern(p.alt(p.apply(p.tok(TokenKind.Flo), applyFloat), p.apply(p.tok(TokenKind.Int), applyInteger), p.apply(p.tok(TokenKind.Str), applyString)));
|
||||
function mainParse(inputStr) {
|
||||
return parsec.expectSingleResult(parsec.expectEOF(EXPR.parse(tokenizer.parse(inputStr))));
|
||||
return p.expectSingleResult(p.expectEOF(CONST.parse(tokenizer.parse(inputStr))));
|
||||
}
|
||||
// test
|
||||
function main() {
|
||||
// bigint has suffix `n`
|
||||
assert.strictEqual(mainParse('123455667').actualValue, 123455667n);
|
||||
assert.strictEqual(mainParse('000').actualValue, 0n);
|
||||
assert.strictEqual(mainParse('1.22').actualValue, 1.22);
|
||||
assert.strictEqual(mainParse('0.0').actualValue, 0.0);
|
||||
assert.strictEqual(mainParse(`""`).actualValue, "");
|
||||
assert.strictEqual(mainParse(`"the little town"`).actualValue, `the little town`);
|
||||
assert.strictEqual(mainParse(`"\\\"Alice\\\""`).actualValue, `"Alice"`);
|
||||
assert.strictEqual(mainParse(`foo`).actualValue, "foo");
|
||||
assert.strictEqual(astToSExp(mainParse(`let x = 12 in 23`)), "(let (x 12) 23)");
|
||||
assert.strictEqual(astToSExp(mainParse(`let y = 10 in let x = 12 in 23`)), "(let (y 10) (let (x 12) 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`let y = 10 in y := 12 in 23`)), "(let (y 10) (:= (y 12) 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`fn x y -> 234`)), "(fn (x y) 234)");
|
||||
assert.strictEqual(astToSExp(mainParse(`(add 12 23 )`)), "(add (12 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`(foo x y)`)), "(foo (x y))");
|
||||
assert.strictEqual(astToSExp(mainParse(`((foo 6 7) bar)`)), "((foo (6 7)) (bar))");
|
||||
assert.strictEqual(astToSExp(mainParse(`fn x y ->
|
||||
/* foo bar */
|
||||
(foo x y)`)), "(fn (x y) (foo (x y)))");
|
||||
assert.strictEqual(mainParse("123").actualValue, 123n);
|
||||
assert.strictEqual(mainParse("3.14").actualValue, 3.14);
|
||||
assert.strictEqual(mainParse("\"foo\"").actualValue, "foo");
|
||||
}
|
||||
;
|
||||
main();
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,28 +1,29 @@
|
|||
import * as parsec from 'typescript-parsec'; // import parsec
|
||||
import * as p from 'typescript-parsec'; // import p
|
||||
/* for test */
|
||||
import * as assert from 'assert';
|
||||
|
||||
/** the type of token */
|
||||
enum TokenKind {
|
||||
Int, // 3
|
||||
Flo, // 3.1416
|
||||
Id, // foo, _123, etc
|
||||
Flo,
|
||||
Int,
|
||||
At, // @
|
||||
Comt, // comment /*
|
||||
Str, /** "foo" */
|
||||
Lambda, /** -> */
|
||||
Assign, /** = */
|
||||
Set, /** := */
|
||||
Keyword, /** let, in */
|
||||
LParen, /** ( */
|
||||
RParen, /** ) */
|
||||
Space, /** semi-width space tab, \r\n? */
|
||||
NewPara, /** breaking paragraph, (\r\n?){2} */
|
||||
MainTxt, /** used in main text */
|
||||
Id, // identifier
|
||||
RArr, // Right Arrow
|
||||
SColon, // Semi colon
|
||||
LPar, // left perenthesis
|
||||
RPar, // right paranthesis
|
||||
Assign, // =
|
||||
Op, // +-*/...
|
||||
Hash, // #
|
||||
Com, // # comment
|
||||
BSlash, // backslash\
|
||||
Str, // "string"
|
||||
LitStr, // literal string
|
||||
Space, // Spaces
|
||||
}
|
||||
|
||||
// add "actualValue" in the parsed Token
|
||||
export interface ASTNode extends parsec.Token<TokenKind>{
|
||||
export interface ASTNode extends p.Token<TokenKind>{
|
||||
// number is for float number;
|
||||
//it's optional. since keyword has no value
|
||||
actualValue? : bigint | number | string;
|
||||
|
@ -44,49 +45,36 @@ export function astToSExp(ast : AST){
|
|||
|
||||
|
||||
// tokenizer
|
||||
const tokenizer = parsec.buildLexer([
|
||||
const tokenizer = p.buildLexer([
|
||||
[true,/^\d+[.]\d+/g , TokenKind.Flo],
|
||||
[true, /^\d+/g, TokenKind.Int],
|
||||
[true, /^\d+\.\d+/g, TokenKind.Flo],
|
||||
[true, /^(let|in|fn)/g, TokenKind.Keyword], // let, in, fn
|
||||
[true, /^[_a-zA-Z][_0-9a-zA-Z]*/g, TokenKind.Id],
|
||||
[true, /^\@/g, TokenKind.At],
|
||||
/* inside comment, only accept 1. non / character
|
||||
or 2. "/ + non * character" */
|
||||
[true, /^\/\*(\/[^*]|[^\\]?)*\*\//g, TokenKind.Comt],
|
||||
[true, /^\"(\\\"|[^\"]?)*\"/g, TokenKind.Str],
|
||||
[true, /^\:\=/g, TokenKind.Set],
|
||||
[true, /^\=/g, TokenKind.Assign],
|
||||
[true, /^->/g, TokenKind.Lambda],
|
||||
[true, /^\(/g, TokenKind.LParen],
|
||||
[true, /^\)/g, TokenKind.RParen],
|
||||
[true, /^([ \t]+|[ \t]*\r?\n[ \t]*)/g, TokenKind.Space],
|
||||
[true, /^(\r?\n){2}/g, TokenKind.NewPara],
|
||||
[true, /^(\\\@|[^@\s])/g, TokenKind.MainTxt],
|
||||
[true,/^[@]/g, TokenKind.At ],
|
||||
[true,/^[_\w][_\d\w]*/g, TokenKind.Id ],
|
||||
[true,/^->/g , TokenKind.RArr ],
|
||||
[true, /^[;]/g, TokenKind.SColon ],
|
||||
[true, /^[(]/g, TokenKind.LPar ],
|
||||
[true, /^[)]/g, TokenKind.RPar ],
|
||||
[true, /^[=]/g, TokenKind.Assign ],
|
||||
[true, /^([\+\-\*\/]|[!<>=]=)/g, TokenKind.Op ],
|
||||
[true, /^#[^#]*#/g, TokenKind.Com ],
|
||||
[true, /^[\\]/g, TokenKind.BSlash ],
|
||||
[true,/^\"([^"]|[\\\"])*\"/g , TokenKind.Str ],
|
||||
[true, /^([^\\]+?)/g, TokenKind.LitStr ],
|
||||
[true, /^\s+/g, TokenKind.Space],
|
||||
]);
|
||||
|
||||
|
||||
|
||||
/** ignore spaces ,new lines, and comments */
|
||||
const _ = parsec.opt(parsec.alt(
|
||||
parsec.tok(TokenKind.Space),
|
||||
parsec.tok(TokenKind.NewPara),
|
||||
|
||||
// space or newPara + comment + space or newPara
|
||||
parsec.seq(
|
||||
parsec.opt(parsec.alt(
|
||||
parsec.tok(TokenKind.Space),
|
||||
parsec.tok(TokenKind.NewPara))),
|
||||
parsec.tok(TokenKind.Comt),
|
||||
parsec.opt(parsec.alt(
|
||||
parsec.tok(TokenKind.Space),
|
||||
parsec.tok(TokenKind.NewPara))),
|
||||
)
|
||||
)
|
||||
);
|
||||
//const _ = p.opt(p.alt(
|
||||
// p.tok(TokenKind.Space),
|
||||
// p.tok(TokenKind.Com),
|
||||
// )
|
||||
//);
|
||||
|
||||
|
||||
|
||||
|
||||
function applyInteger(value: parsec.Token<TokenKind.Int>): ASTNode {
|
||||
function applyInteger(value: p.Token<TokenKind.Int>): ASTNode {
|
||||
// extend value to ASTNode
|
||||
const newNode : ASTNode = {
|
||||
actualValue : BigInt(value.text) ,
|
||||
|
@ -94,14 +82,14 @@ function applyInteger(value: parsec.Token<TokenKind.Int>): ASTNode {
|
|||
return newNode;
|
||||
}
|
||||
|
||||
function applyFloat(value: parsec.Token<TokenKind.Flo>): ASTNode {
|
||||
function applyFloat(value: p.Token<TokenKind.Flo>): ASTNode {
|
||||
const newNode : ASTNode = {
|
||||
actualValue : parseFloat(value.text) ,
|
||||
...value};
|
||||
return newNode;
|
||||
}
|
||||
|
||||
function applyString(value: parsec.Token<TokenKind.Str>): ASTNode {
|
||||
function applyString(value: p.Token<TokenKind.Str>): ASTNode {
|
||||
const newNode : ASTNode = {
|
||||
// get only text[1,2,...,the second last char]
|
||||
actualValue : value.text.slice(1,value.text.length-1).replace(/\\\"/g, "\"") ,
|
||||
|
@ -109,250 +97,36 @@ function applyString(value: parsec.Token<TokenKind.Str>): ASTNode {
|
|||
return newNode;
|
||||
}
|
||||
|
||||
function applyIdentifier(value: parsec.Token<TokenKind.Id>): ASTNode {
|
||||
const newNode : ASTNode = {
|
||||
actualValue : value.text,
|
||||
...value};
|
||||
return newNode;
|
||||
}
|
||||
|
||||
/** apply LETTING.
|
||||
* returns [let, [var, x], expr] */
|
||||
function applyLetting(input: [parsec.Token<TokenKind>, // let
|
||||
parsec.Token<TokenKind> | undefined, // space
|
||||
ASTNode, // var
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
parsec.Token<TokenKind>, // =
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
AST, // val
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
parsec.Token<TokenKind>, // in
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
AST // expr
|
||||
]): AST {
|
||||
// node representing let
|
||||
let letNode : ASTNode = input[0];
|
||||
let varNode = input[2];
|
||||
let valueNode = input[6];
|
||||
let exprAST = input[10];
|
||||
|
||||
return [letNode, [varNode, valueNode], exprAST];
|
||||
|
||||
}
|
||||
|
||||
/** apply SETTING */
|
||||
function applySetting(input: [ASTNode, // var
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
parsec.Token<TokenKind>, // :=
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
AST, // val
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
parsec.Token<TokenKind>, // in
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
AST // expr
|
||||
]): AST {
|
||||
// node representing let
|
||||
let setNode = input[2];
|
||||
let varNode = input[0];
|
||||
let valueNode = input[4];
|
||||
let exprAST = input[8];
|
||||
|
||||
// (:= (var val) expr) : set var = val in expr
|
||||
return [setNode, [varNode, valueNode], exprAST];
|
||||
|
||||
}
|
||||
|
||||
function applyLambda(input: [ASTNode, // fn
|
||||
ASTNode, // arg
|
||||
ASTNode[], // args
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
parsec.Token<TokenKind>, // ->
|
||||
parsec.Token<TokenKind>| undefined, // space
|
||||
AST // expr
|
||||
]): AST {
|
||||
let lambdaNode = input[0];
|
||||
let argHead = input[1];
|
||||
let argTail = input[2];
|
||||
let body = input[6];
|
||||
let args = [argHead].concat(argTail)
|
||||
|
||||
// return (fn (args) body) like lambda in Scheme
|
||||
return [lambdaNode, args, body];
|
||||
}
|
||||
|
||||
|
||||
function applyApplying(input : [ASTNode, // caller
|
||||
parsec.Token<TokenKind> |undefined, // space
|
||||
ASTNode, // head of callee
|
||||
AST[] // tail of callee
|
||||
]){
|
||||
let applier = input[0];
|
||||
let applieeHead = input[2];
|
||||
let applieeTail = input[3];
|
||||
let appliee = [<AST>(applieeHead)].concat(applieeTail);
|
||||
|
||||
|
||||
// foo 2 3 => (foo (2 3))
|
||||
return [applier, appliee];
|
||||
|
||||
}
|
||||
|
||||
/** define all the parser sentence */
|
||||
const CONST = parsec.rule<TokenKind, ASTNode>();
|
||||
const VAR = parsec.rule<TokenKind, ASTNode>();
|
||||
const ARG = parsec.rule<TokenKind, ASTNode>();
|
||||
const EXPR = parsec.rule<TokenKind, AST>();
|
||||
const LETTING = parsec.rule<TokenKind, AST>();
|
||||
const SETTING = parsec.rule<TokenKind, AST>();
|
||||
const LAMBDA = parsec.rule<TokenKind, AST>();
|
||||
const APPLYING = parsec.rule<TokenKind, AST>();
|
||||
const CONST = p.rule<TokenKind, ASTNode>();
|
||||
/*const VAR = p.rule<TokenKind, ASTNode>();
|
||||
const ARG = p.rule<TokenKind, ASTNode>();
|
||||
const EXPR = p.rule<TokenKind, AST>();
|
||||
const LETTING = p.rule<TokenKind, AST>();
|
||||
const LAMBDA = p.rule<TokenKind, AST>();
|
||||
const APPLYING = p.rule<TokenKind, AST>(); */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
CONST ::= INT | FLOAT | STRING
|
||||
*/
|
||||
CONST.setPattern(
|
||||
parsec.alt(
|
||||
parsec.apply(parsec.tok(TokenKind.Int), applyInteger),
|
||||
parsec.apply(parsec.tok(TokenKind.Flo), applyFloat),
|
||||
parsec.apply(parsec.tok(TokenKind.Str), applyString),
|
||||
p.alt(
|
||||
p.apply(p.tok(TokenKind.Flo), applyFloat),
|
||||
p.apply(p.tok(TokenKind.Int), applyInteger),
|
||||
p.apply(p.tok(TokenKind.Str), applyString),
|
||||
|
||||
)
|
||||
);
|
||||
|
||||
/** VAR = ID */
|
||||
VAR.setPattern(
|
||||
parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier),
|
||||
);
|
||||
|
||||
/** ARG = ID */
|
||||
ARG.setPattern(
|
||||
parsec.apply(parsec.tok(TokenKind.Id), applyIdentifier),
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
/**SETTING ::= VAR ":=" EXPR in EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
|
||||
SETTING.setPattern(
|
||||
parsec.apply(
|
||||
parsec.seq(
|
||||
VAR,
|
||||
_,
|
||||
parsec.str(":="),
|
||||
_,
|
||||
EXPR,
|
||||
_,
|
||||
parsec.str("in"),
|
||||
_,
|
||||
EXPR), applySetting));
|
||||
|
||||
/**LETTING ::= "let" VAR "=" EXPR in EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
LETTING.setPattern(
|
||||
parsec.apply(
|
||||
parsec.seq(
|
||||
parsec.str("let"),
|
||||
_,
|
||||
VAR,
|
||||
_,
|
||||
parsec.str("="),
|
||||
_,
|
||||
EXPR,
|
||||
_,
|
||||
parsec.str("in"),
|
||||
_,
|
||||
EXPR), applyLetting));
|
||||
|
||||
|
||||
|
||||
/**LAMBDA ::= "fn" (Args)+ "->" EXPR
|
||||
* and ignore the spaces and new lines with `_`
|
||||
*/
|
||||
LAMBDA.setPattern(
|
||||
parsec.apply(
|
||||
parsec.seq(
|
||||
parsec.str("fn"),
|
||||
parsec.kright(_, ARG), // arg SpaceNL
|
||||
parsec.rep_sc(parsec.kright(_, ARG)), //other (arg SpaceNL), repeat 0+times
|
||||
_,
|
||||
parsec.str("->"),
|
||||
_,
|
||||
EXPR),
|
||||
applyLambda)
|
||||
)
|
||||
|
||||
// APPLYING = ( "(" APPLYING ")" |LAMBDA|VAR) APPLIEE+
|
||||
APPLYING.setPattern(
|
||||
parsec.apply(
|
||||
parsec.seq(
|
||||
parsec.alt(
|
||||
LAMBDA,
|
||||
VAR,
|
||||
parsec.kmid(
|
||||
parsec.seq(parsec.str('('), _),
|
||||
APPLYING,
|
||||
parsec.seq(_, parsec.str(')')))
|
||||
),
|
||||
_,
|
||||
EXPR,
|
||||
parsec.rep_sc(parsec.kright(_, EXPR))),
|
||||
applyApplying));
|
||||
|
||||
/** EXPR = CONST | VAR
|
||||
* | LETTING | SETTING
|
||||
* | LAMBDA | APPLYING
|
||||
* | "(" APPLYING ")" */
|
||||
EXPR.setPattern(
|
||||
parsec.alt(
|
||||
CONST,
|
||||
VAR,
|
||||
LETTING,
|
||||
SETTING,
|
||||
LAMBDA,
|
||||
parsec.kmid(
|
||||
parsec.seq(parsec.str('('), _),
|
||||
APPLYING,
|
||||
parsec.seq(_, parsec.str(')')))
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
function mainParse(inputStr : string){
|
||||
return parsec.expectSingleResult(parsec.expectEOF(
|
||||
EXPR.parse(tokenizer.parse(inputStr))));
|
||||
return p.expectSingleResult(p.expectEOF(
|
||||
CONST.parse(tokenizer.parse(inputStr))));
|
||||
}
|
||||
|
||||
|
||||
// test
|
||||
function main(){
|
||||
// bigint has suffix `n`
|
||||
assert.strictEqual((<ASTNode>mainParse('123455667')).actualValue, 123455667n);
|
||||
assert.strictEqual((<ASTNode>mainParse('000')).actualValue, 0n);
|
||||
assert.strictEqual((<ASTNode>mainParse('1.22')).actualValue, 1.22);
|
||||
assert.strictEqual((<ASTNode>mainParse('0.0')).actualValue, 0.0);
|
||||
assert.strictEqual((<ASTNode>mainParse(`""`)).actualValue, "");
|
||||
assert.strictEqual((<ASTNode>mainParse(`"the little town"`)).actualValue, `the little town`);
|
||||
assert.strictEqual((<ASTNode>mainParse(`"\\\"Alice\\\""`)).actualValue, `"Alice"`);
|
||||
|
||||
assert.strictEqual((<ASTNode>mainParse(`foo`)).actualValue, "foo");
|
||||
assert.strictEqual(astToSExp(mainParse(`let x = 12 in 23`)), "(let (x 12) 23)");
|
||||
assert.strictEqual(astToSExp(mainParse(`let y = 10 in let x = 12 in 23`)), "(let (y 10) (let (x 12) 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`let y = 10 in y := 12 in 23`)), "(let (y 10) (:= (y 12) 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`fn x y -> 234`)), "(fn (x y) 234)");
|
||||
assert.strictEqual(astToSExp(mainParse(`(add 12 23 )`)), "(add (12 23))");
|
||||
assert.strictEqual(astToSExp(mainParse(`(foo x y)`)), "(foo (x y))");
|
||||
assert.strictEqual(astToSExp(mainParse(`((foo 6 7) bar)`)), "((foo (6 7)) (bar))");
|
||||
assert.strictEqual(astToSExp(mainParse(`fn x y ->
|
||||
/* foo bar */
|
||||
(foo x y)`)), "(fn (x y) (foo (x y)))");
|
||||
assert.strictEqual(<BigInt>mainParse("123").actualValue, 123n);
|
||||
assert.strictEqual(<BigInt>mainParse("3.14").actualValue, 3.14);
|
||||
assert.strictEqual(<BigInt>mainParse("\"foo\"").actualValue, "foo");
|
||||
|
||||
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue