modify the language

This commit is contained in:
Tan, Kian-ting 2024-03-30 17:34:51 +08:00
parent 4ab5276bdc
commit 68a21c773d
2 changed files with 209 additions and 109 deletions

View file

@ -10,10 +10,12 @@ var TokenKind;
TokenKind[TokenKind["Str"] = 3] = "Str";
TokenKind[TokenKind["LParen"] = 4] = "LParen";
TokenKind[TokenKind["RParen"] = 5] = "RParen";
TokenKind[TokenKind["SpaceNL"] = 6] = "SpaceNL";
TokenKind[TokenKind["At"] = 7] = "At";
TokenKind[TokenKind["BSlash"] = 8] = "BSlash";
TokenKind[TokenKind["Other"] = 9] = "Other";
TokenKind[TokenKind["LBrack"] = 6] = "LBrack";
TokenKind[TokenKind["RBrack"] = 7] = "RBrack";
TokenKind[TokenKind["SpaceNL"] = 8] = "SpaceNL";
TokenKind[TokenKind["BSlash"] = 9] = "BSlash";
TokenKind[TokenKind["Apos"] = 10] = "Apos";
TokenKind[TokenKind["Other"] = 11] = "Other";
})(TokenKind || (TokenKind = {}));
var ItemType;
(function (ItemType) {
@ -25,18 +27,43 @@ var ItemType;
const tokenizer = (0, typescript_parsec_1.buildLexer)([
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id],
[true, /^[+\-*/a-zA-Z_][0-9+\-*/a-zA-Z_]*/g, TokenKind.Id],
[true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^[(]/g, TokenKind.LParen],
[true, /^[)]/g, TokenKind.RParen],
[true, /^\[/g, TokenKind.LBrack],
[true, /^\]/g, TokenKind.RBrack],
[true, /^'/g, TokenKind.Apos],
[true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL],
[true, /^\@/g, TokenKind.At],
[true, /^\\/g, TokenKind.BSlash],
[true, /^[.]+/g, TokenKind.Other],
[true, /^([^+\-*/a-zA-Z_0-9\[\]()'\s\t\r\n\\]+)/g, TokenKind.Other],
]);
/**
* ## BNF
LISP = UNIT | LISPS | CON_STR
LISPS = "(" LISP ")" | "'" "(" LISP ")"
SINGLE = "{" CONSTR_INNR | LISP "}"
UNIT = INT | NUMBER | STRING | ID
CONSTR = "[" (CONSTR_INNER "]"
CONSTR_INNER = ([^\\\[\][]] | [\\][{}\[\]]) | LISPS)*
*/
const SINGLE = (0, typescript_parsec_1.rule)();
const SINGLES = (0, typescript_parsec_1.rule)();
const PROG_INNER = (0, typescript_parsec_1.rule)();
const LISPS = (0, typescript_parsec_1.rule)();
const LISP = (0, typescript_parsec_1.rule)();
const CON_STR = (0, typescript_parsec_1.rule)();
const CON_STR_INNER = (0, typescript_parsec_1.rule)();
function tokenToStr(value) {
return {
type: ItemType.Str,
str: value.text
};
}
function bSlashTokenToStr(value) {
return {
type: ItemType.Str,
str: value.text
};
}
function applyId(value) {
return {
type: ItemType.Id,
@ -58,29 +85,56 @@ function applyFlo(value) {
function applyStr(value) {
return {
type: ItemType.Str,
str: value.text
str: value.text.slice(1, value.text.length - 1)
};
}
function applyList(value) {
return value;
}
function applyQuoted(value) {
let head = { type: ItemType.Id,
id: "quote" };
let merged = [head, value];
return merged;
}
function applyStrings(value) {
let head = [{ type: ItemType.Id,
id: "%concat" }];
let merged = head.concat(value);
return merged;
}
/** for convinence to omit the spaces and newlines */
let __ = (0, typescript_parsec_2.opt)((0, typescript_parsec_2.tok)(TokenKind.SpaceNL));
function getInsideParathesis(value) {
return value[2];
LISP.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.kleft)(SINGLE, __), (0, typescript_parsec_2.kleft)(LISPS, __), (0, typescript_parsec_2.kleft)(CON_STR, __)));
SINGLE.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), applyId), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), applyInt), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), applyFlo), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), applyStr)));
LISPS.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.str)("("), __), (0, typescript_parsec_2.rep_sc)(LISP), (0, typescript_parsec_2.str)(")")), applyList), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.str)("'"), (0, typescript_parsec_2.kmid)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.str)("("), __), (0, typescript_parsec_2.rep_sc)(LISP), (0, typescript_parsec_2.str)(")"))), applyQuoted)));
CON_STR_INNER.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Other), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.SpaceNL), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.LParen)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.RParen)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.LBrack)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.RBrack)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.Apos)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.BSlash)), bSlashTokenToStr), LISPS));
CON_STR.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.str)("["), (0, typescript_parsec_2.rep_sc)(CON_STR_INNER), (0, typescript_parsec_2.str)("]")), applyStrings));
function printAST(ast) {
if (Array.isArray(ast)) {
let ast2 = ast.map(printAST);
return "(" + ast2.join(" ") + ")";
}
else {
if (ast.type == ItemType.Str) {
return "`" + ast.str + "`";
}
else if (ast.type == ItemType.Id) {
return ast.id;
}
else if (ast.type == ItemType.Flo) {
return ast.flo.toString();
}
else {
return ast.int.toString();
}
}
}
function giveAST(value) {
return value;
}
/** SINGLE ::= Int| Flo | Str | Id */
SINGLE.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), applyId), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), applyInt), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), applyFlo), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), applyStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.tok)(TokenKind.LParen), __, SINGLES, (0, typescript_parsec_2.tok)(TokenKind.RParen)), getInsideParathesis)));
/** SINGLES ::= SINGLE SP_NL? */
SINGLES.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.rep_sc)((0, typescript_parsec_2.kleft)(SINGLE, __)), applyList));
/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */
PROG_INNER.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.str)('@'), SINGLE, (0, typescript_parsec_2.str)('@')), giveAST));
function evaluate(expr) {
let a = (0, typescript_parsec_1.expectSingleResult)((0, typescript_parsec_1.expectEOF)(PROG_INNER.parse(tokenizer.parse(expr))));
console.log(a);
let a = (0, typescript_parsec_1.expectSingleResult)((0, typescript_parsec_1.expectEOF)(LISP.parse(tokenizer.parse(expr))));
const util = require('util');
console.log(printAST(a));
return a;
}
evaluate("@(let (a 17) (+ a 10))@");
evaluate(`(main '((text 12)) [ 快狐跳懶狗\\\\\\\[\\\]\\\(\\\)(italic "fox and dog") (bold [OK])])`);
//evaluate("@(let (a 17) (+ a 10))@")

View file

@ -9,9 +9,11 @@ enum TokenKind{
Str,
LParen,
RParen,
LBrack,
RBrack,
SpaceNL,
At,
BSlash,
Apos,
Other
}
@ -23,13 +25,29 @@ enum ItemType{
Str,
}
interface Item{
type : ItemType,
int? : BigInt,
flo? : number,
str? : string,
id? : string,
type Item = ItemStr | ItemInt | ItemId | ItemFlo;
interface ItemStr{
type : ItemType.Str,
str : string,
}
interface ItemInt{
type : ItemType.Int,
int : BigInt,
}
interface ItemId{
type : ItemType.Id,
id : string,
}
interface ItemFlo{
type : ItemType.Flo,
flo : number,
}
type AST = Item | AST[];
@ -37,26 +55,53 @@ type AST = Item | AST[];
const tokenizer = buildLexer([
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id],
[true, /^[+\-*/a-zA-Z_][0-9+\-*/a-zA-Z_]*/g, TokenKind.Id],
[true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^[(]/g, TokenKind.LParen],
[true, /^[)]/g, TokenKind.RParen],
[true, /^\[/g, TokenKind.LBrack],
[true, /^\]/g, TokenKind.RBrack],
[true, /^'/g, TokenKind.Apos],
[true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL],
[true, /^\@/g, TokenKind.At],
[true, /^\\/g, TokenKind.BSlash],
[true, /^[.]+/g, TokenKind.Other],
[true, /^([^+\-*/a-zA-Z_0-9\[\]()'\s\t\r\n\\]+)/g, TokenKind.Other],
]);
/**
* ## BNF
LISP = UNIT | LISPS | CON_STR
LISPS = "(" LISP ")" | "'" "(" LISP ")"
SINGLE = "{" CONSTR_INNR | LISP "}"
UNIT = INT | NUMBER | STRING | ID
CONSTR = "[" (CONSTR_INNER "]"
CONSTR_INNER = ([^\\\[\][]] | [\\][{}\[\]]) | LISPS)*
*/
const SINGLE = rule<TokenKind, AST>();
const SINGLES = rule<TokenKind, AST>();
const PROG_INNER = rule<TokenKind, AST>();
const STRINGS = rule<TokenKind, AST>();
const STRING = rule<TokenKind, AST>();
const LISPS = rule<TokenKind, AST>();
const LISP = rule<TokenKind, AST>();
const CON_STR = rule<TokenKind, AST>();
const CON_STR_INNER = rule<TokenKind, AST>();
function tokenToStr(value: Token<TokenKind>): Item {
return {
type : ItemType.Str,
str : value.text};
}
function bSlashTokenToStr(value: Token<TokenKind>): Item {
return {
type : ItemType.Str,
str : value.text};
}
function applyId(value: Token<TokenKind.Id>): Item {
return {
type : ItemType.Id,
type :ItemType.Id,
id : value.text};
}
@ -75,105 +120,106 @@ function applyFlo(value: Token<TokenKind.Flo>): Item {
function applyStr(value: Token<TokenKind.Str>): Item {
return {
type : ItemType.Str,
str : value.text};
}
function applyInt(value: Token<TokenKind.Int>): Item {
return {
type : ItemType.Int,
int : BigInt(value.text)};
}
function applyFlo(value: Token<TokenKind.Flo>): Item {
return {
type : ItemType.Flo,
flo : +value.text};
}
function applyStr(value: Token<TokenKind.Str>): Item {
return {
type : ItemType.Str,
str : value.text};
str : value.text.slice(1,value.text.length-1)};
}
function applyList(value: AST[]):AST{
return value;
}
function applyQuoted(value: AST[]):AST{
let head : Item = {type : ItemType.Id,
id:"quote"}
let merged = [head, value];
return merged;
}
function applyStrings(value: AST[]):AST{
let head : AST[] = [{type : ItemType.Id,
id:"%concat"}]
let merged = head.concat(value);
return merged;
}
/** for convinence to omit the spaces and newlines */
let __ = opt(tok(TokenKind.SpaceNL))
function getInsideParathesis (value: [Token<TokenKind>, Token<TokenKind>|undefined, AST, Token<TokenKind>]){
return value[2];
}
LISP.setPattern(
alt(
kleft(SINGLE, __),
kleft(LISPS, __),
kleft(CON_STR, __)
))
function giveAST (value: AST){
return value;
}
/** SINGLE ::= Int| Flo | Str | Id */
SINGLE.setPattern(
alt(
apply(tok(TokenKind.Id), applyId),
apply(tok(TokenKind.Int), applyInt),
apply(tok(TokenKind.Flo), applyFlo),
apply(tok(TokenKind.Str), applyStr),
apply(seq(tok(TokenKind.LParen),__, SINGLES,tok(TokenKind.RParen)),getInsideParathesis),
))
/** SINGLES ::= SINGLE SP_NL? */
SINGLES.setPattern(
apply(rep_sc(kleft(SINGLE, __)), applyList))
LISPS.setPattern(
alt(
apply(kmid(seq(str("("), __),rep_sc(LISP),str(")")), applyList),
apply(kright(str("'"),
kmid(seq(str("("), __),rep_sc(LISP),str(")"))), applyQuoted),
))
/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */
PROG_INNER.setPattern(
apply(
kmid(str('@'), SINGLE, str('@')),
giveAST
)
)
/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */
STRING.setPattern(
CON_STR_INNER.setPattern(
alt(
apply(tok(TokenKind.Id),idToStr),
apply(tok(TokenKind.Float),fLoatToStr),
apply(tok(TokenKind.Id),tokenToStr),
apply(tok(TokenKind.Int),tokenToStr),
apply(tok(TokenKind.Flo),tokenToStr),
apply(tok(TokenKind.Str),tokenToStr),
apply(tok(TokenKind.Other),tokenToStr),
apply(tok(TokenKind.SpaceNL), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.LParen)), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.RParen)), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.LBrack)), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.RBrack)), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.Apos)), tokenToStr),
apply(kright(tok(TokenKind.BSlash),tok(TokenKind.BSlash)), bSlashTokenToStr),
LISPS
))
CON_STR.setPattern(
apply(kmid(str("["),
rep_sc(CON_STR_INNER),
str("]")), applyStrings)
)
function printAST(ast : AST): string{
if (Array.isArray(ast)){
let ast2 = ast.map(printAST);
return "(" + ast2.join(" ") + ")";
}else{
if (ast.type==ItemType.Str){
return "`" + ast.str + "`";
}else if (ast.type==ItemType.Id){
return ast.id;
}else if (ast.type== ItemType.Flo){
return ast.flo.toString();
}else{
return ast.int.toString();
}
}
}
function evaluate(expr: string): AST {
let a = expectSingleResult(expectEOF(PROG_INNER.parse(tokenizer.parse(expr))));
console.log(a);
let a = expectSingleResult(expectEOF(LISP.parse(tokenizer.parse(expr))));
const util = require('util')
console.log(printAST(a))
return a;
}
[true, /^\d+/g, TokenKind.Int],
[true, /^\d+\.\d+/g, TokenKind.Flo],
[true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id],
[true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str],
[true, /^\(/g, TokenKind.LParen],
[true, /^\)/g, TokenKind.RParen],
[true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL],
[true, /^\@/g, TokenKind.At],
[true, /^\\/g, TokenKind.BSlash],
[true, /^[.]+/g, TokenKind.Other],
evaluate("@(let (a 17) (+ a 10))@")
evaluate(`(main '((text 12)) [ 快狐跳懶狗\\\\\\\[\\\]\\\(\\\)(italic "fox and dog") (bold [OK])])`)
//evaluate("@(let (a 17) (+ a 10))@")