rewrite parser
This commit is contained in:
parent
fed01e9044
commit
8d03cc503c
5 changed files with 167 additions and 266 deletions
|
@ -23,4 +23,5 @@ License: MIT
|
|||
```
|
||||
- 20230928:basically fix `issue1`。其他ê物件猶著做。
|
||||
- 20230929:add multi args parsing for `callee`.
|
||||
- 20230930:tîng-khí parser, using `js-token`.
|
||||
- 20230930:tîng khí parser, using `js-token`.
|
||||
- 20231016: tîng siá parser, using `ts-parsec`.
|
13
package-lock.json
generated
13
package-lock.json
generated
|
@ -13,7 +13,8 @@
|
|||
"harfbuzzjs": "^0.3.3",
|
||||
"js-tokens": "^8.0.2",
|
||||
"npx": "^3.0.0",
|
||||
"pdf-lib": "^1.17.1"
|
||||
"pdf-lib": "^1.17.1",
|
||||
"typescript-parsec": "^0.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/chai": "^4.3.5",
|
||||
|
@ -6411,6 +6412,11 @@
|
|||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript-parsec": {
|
||||
"version": "0.3.4",
|
||||
"resolved": "https://registry.npmjs.org/typescript-parsec/-/typescript-parsec-0.3.4.tgz",
|
||||
"integrity": "sha512-6RD4xOxp26BTZLopNbqT2iErqNhQZZWb5m5F07/UwGhldGvOAKOl41pZ3fxsFp04bNL+PbgMjNfb6IvJAC/uYQ=="
|
||||
},
|
||||
"node_modules/unbox-primitive": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz",
|
||||
|
@ -11275,6 +11281,11 @@
|
|||
"integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
|
||||
"dev": true
|
||||
},
|
||||
"typescript-parsec": {
|
||||
"version": "0.3.4",
|
||||
"resolved": "https://registry.npmjs.org/typescript-parsec/-/typescript-parsec-0.3.4.tgz",
|
||||
"integrity": "sha512-6RD4xOxp26BTZLopNbqT2iErqNhQZZWb5m5F07/UwGhldGvOAKOl41pZ3fxsFp04bNL+PbgMjNfb6IvJAC/uYQ=="
|
||||
},
|
||||
"unbox-primitive": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz",
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
"harfbuzzjs": "^0.3.3",
|
||||
"js-tokens": "^8.0.2",
|
||||
"npx": "^3.0.0",
|
||||
"pdf-lib": "^1.17.1"
|
||||
"pdf-lib": "^1.17.1",
|
||||
"typescript-parsec": "^0.3.4"
|
||||
}
|
||||
}
|
||||
|
|
183
src/index.js
183
src/index.js
|
@ -22,14 +22,11 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|||
__setModuleDefault(result, mod);
|
||||
return result;
|
||||
};
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.matchAny = exports.tkTreeToSExp = void 0;
|
||||
exports.tkTreeToSExp = void 0;
|
||||
var fs = require('fs');
|
||||
const js_tokens_1 = __importDefault(require("js-tokens"));
|
||||
const util = __importStar(require("util"));
|
||||
const p = __importStar(require("typescript-parsec"));
|
||||
/**
|
||||
*
|
||||
* # REPRESENTATION
|
||||
|
@ -50,7 +47,7 @@ function tkTreeToSExp(t) {
|
|||
str = "%undefined";
|
||||
}
|
||||
else {
|
||||
str = t.value;
|
||||
str = t;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
|
@ -58,129 +55,65 @@ function tkTreeToSExp(t) {
|
|||
exports.tkTreeToSExp = tkTreeToSExp;
|
||||
/**inspect the inner of the representation. */
|
||||
let repr = (x) => { return util.inspect(x, { depth: null }); };
|
||||
var TokenKind;
|
||||
(function (TokenKind) {
|
||||
TokenKind[TokenKind["Seperator"] = 0] = "Seperator";
|
||||
TokenKind[TokenKind["Semicolon"] = 1] = "Semicolon";
|
||||
TokenKind[TokenKind["Number"] = 2] = "Number";
|
||||
TokenKind[TokenKind["Op"] = 3] = "Op";
|
||||
TokenKind[TokenKind["ExprMark"] = 4] = "ExprMark";
|
||||
TokenKind[TokenKind["Paren"] = 5] = "Paren";
|
||||
TokenKind[TokenKind["SpaceNL"] = 6] = "SpaceNL";
|
||||
TokenKind[TokenKind["Id"] = 7] = "Id";
|
||||
TokenKind[TokenKind["Str"] = 8] = "Str";
|
||||
})(TokenKind || (TokenKind = {}));
|
||||
/**
|
||||
*
|
||||
* # PARSER UNITS
|
||||
* Parsing
|
||||
*/
|
||||
function toSome(x) {
|
||||
return { _tag: "Some", value: x };
|
||||
}
|
||||
/**
|
||||
* like `m ==> f` in ocaml
|
||||
* @param m matchee wrapped
|
||||
* @param f matching function
|
||||
* @returns wrapped result
|
||||
*/
|
||||
function thenDo(m, f) {
|
||||
if (m._tag == "None") {
|
||||
return m;
|
||||
}
|
||||
else {
|
||||
var a = f(m.value);
|
||||
if (a._tag == "Some") {
|
||||
a.value.ast = m.value.ast.concat(a.value.ast);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @param m : the `TokenPair` to be consumed.
|
||||
* @returns if the length of `m.remained` >= 1; consumes the matchee by 1 token
|
||||
* and wraps it in `Some`,
|
||||
* otherwise, returns `None`.
|
||||
*/
|
||||
function matchAny(m) {
|
||||
if (m.remained.length >= 1) {
|
||||
return {
|
||||
_tag: "Some", value: {
|
||||
matched: m.matched.concat(m.remained[0]),
|
||||
remained: m.remained.slice(1),
|
||||
ast: [m.remained[0]],
|
||||
}
|
||||
};
|
||||
}
|
||||
else {
|
||||
return { _tag: "None" };
|
||||
}
|
||||
}
|
||||
exports.matchAny = matchAny;
|
||||
/**
|
||||
* like `f1 | f2` in regex
|
||||
* @param f1 the first tried function
|
||||
* @param f2 the second tried function
|
||||
* @returns wrapped result
|
||||
*/
|
||||
function orDo(f1, f2) {
|
||||
return (x) => {
|
||||
let res1 = f1(x);
|
||||
if (res1._tag == "Some") {
|
||||
return res1;
|
||||
}
|
||||
else {
|
||||
let res2 = f2(x);
|
||||
return res2;
|
||||
}
|
||||
};
|
||||
}
|
||||
/**
|
||||
* like regex [^c]
|
||||
* @param f input token function. one token only.
|
||||
* @returns combined finction
|
||||
*/
|
||||
function notDo(f) {
|
||||
return (x) => {
|
||||
let res1 = f(x);
|
||||
if (res1._tag == "Some") {
|
||||
return { _tag: "None" };
|
||||
}
|
||||
else {
|
||||
let res2 = matchAny(x);
|
||||
return res2;
|
||||
}
|
||||
};
|
||||
}
|
||||
function matchToken(typeName, value) {
|
||||
return (t) => {
|
||||
let headToken = t.remained[0];
|
||||
if (headToken.type != typeName) {
|
||||
return { _tag: "None" };
|
||||
}
|
||||
else {
|
||||
if (value === undefined || value == headToken.value) {
|
||||
let newTokenPair = {
|
||||
matched: t.matched.concat(headToken),
|
||||
remained: t.remained.slice(1),
|
||||
ast: [headToken]
|
||||
};
|
||||
return { _tag: "Some", value: newTokenPair };
|
||||
}
|
||||
else {
|
||||
return { _tag: "None" };
|
||||
}
|
||||
}
|
||||
;
|
||||
};
|
||||
}
|
||||
;
|
||||
const lexer = p.buildLexer([
|
||||
[true, /^\d+(\.\d+)?/g, TokenKind.Number],
|
||||
[true, /^\;/g, TokenKind.Semicolon],
|
||||
[true, /^[-][-][-]/g, TokenKind.Seperator],
|
||||
[true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op],
|
||||
[true, /^\@+/g, TokenKind.ExprMark],
|
||||
[true, /^[()\[\]{}]/g, TokenKind.Paren],
|
||||
[true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str],
|
||||
[true, /^[']([\']|[\\].)*[']/g, TokenKind.Str],
|
||||
[true, /^[()\[\]{}]/g, TokenKind.Paren],
|
||||
[true, /^[^\s\n\t\r;]+/g, TokenKind.Id],
|
||||
[false, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL]
|
||||
]);
|
||||
/**
|
||||
*
|
||||
* # TEST
|
||||
*/
|
||||
const tokens = Array.from((0, js_tokens_1.default)(`import; foo from\t 'bar';
|
||||
import * as util from 'util';
|
||||
|
||||
|
||||
花非花,霧\\{非霧 。{{foo();}}下
|
||||
一句`));
|
||||
console.log("RESULT=" + repr(tokens));
|
||||
var mainTokenPair = {
|
||||
matched: [],
|
||||
remained: tokens,
|
||||
ast: []
|
||||
};
|
||||
let a = thenDo(thenDo(toSome(mainTokenPair), matchToken('IdentifierName')), notDo(matchToken('Punctuator', ';')));
|
||||
console.log("RESULT=" + repr(a));
|
||||
if (a._tag == "Some") {
|
||||
console.log("SEXP=" + tkTreeToSExp(a.value.ast));
|
||||
const inputTxt = `import ast;
|
||||
---
|
||||
122`;
|
||||
const PROG = p.rule();
|
||||
const UNIT = p.rule();
|
||||
const IMPORTS = p.rule();
|
||||
const SEMICOLON = p.rule();
|
||||
let doubleMinus = { type: 'Punctuator', value: '--' };
|
||||
let doubleMinus2 = p.str('--');
|
||||
const TERM = p.rule();
|
||||
function applyUnit(value) {
|
||||
return value.text;
|
||||
}
|
||||
function applySemiColon(value) {
|
||||
return value.text;
|
||||
}
|
||||
function applyParts(first, second) {
|
||||
return ["%clo", first, second[1]];
|
||||
}
|
||||
PROG.setPattern(p.lrec_sc(IMPORTS, p.seq(p.str('---'), UNIT), applyParts));
|
||||
function applyImports(input) {
|
||||
let importTail = input[1].map(x => x.text);
|
||||
return ["import"].concat(importTail);
|
||||
}
|
||||
;
|
||||
IMPORTS.setPattern(p.apply(p.seq(p.str('import'), p.rep_sc(p.tok(TokenKind.Id)), SEMICOLON), applyImports));
|
||||
SEMICOLON.setPattern(p.apply(p.tok(TokenKind.Semicolon), applySemiColon));
|
||||
UNIT.setPattern(p.apply(p.tok(TokenKind.Number), applyUnit));
|
||||
let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt))));
|
||||
console.log("RESULT=" + tkTreeToSExp(tree));
|
||||
|
|
231
src/index.ts
231
src/index.ts
|
@ -1,7 +1,8 @@
|
|||
var fs = require('fs');
|
||||
import jsTokens from "js-tokens";
|
||||
import * as util from 'util';
|
||||
|
||||
import * as p from 'typescript-parsec';
|
||||
import { Token } from 'typescript-parsec';
|
||||
/**
|
||||
*
|
||||
* # REPRESENTATION
|
||||
|
@ -21,7 +22,7 @@ export function tkTreeToSExp(t: tkTree): string{
|
|||
if (t=== undefined){
|
||||
str = "%undefined"
|
||||
}else{
|
||||
str = t.value;
|
||||
str = t;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,156 +36,110 @@ let repr = (x : any)=>{return util.inspect(x, {depth: null})};
|
|||
* # TYPES
|
||||
*/
|
||||
|
||||
/**
|
||||
* TokenPair for tokens' parser combinator
|
||||
*
|
||||
* matched: the matched (now and before) tokens
|
||||
*
|
||||
* remained: tokens to be matched
|
||||
*
|
||||
* ast: abstract syntax tree
|
||||
*/
|
||||
export interface TokenPair {
|
||||
matched: jsTokens.Token[]
|
||||
remained: jsTokens.Token[]
|
||||
ast : tkTree[]
|
||||
}
|
||||
export type Some<T> = { _tag: "Some"; value: T };
|
||||
export type None = { _tag: "None" };
|
||||
export type Maybe<T> = Some<T> | None;
|
||||
|
||||
type Token = jsTokens.Token;
|
||||
type tkTree = Token | tkTree[];
|
||||
type tkTree = string | tkTree[];
|
||||
|
||||
/**
|
||||
*
|
||||
* # PARSER UNITS
|
||||
*/
|
||||
function toSome<T>(x:T): Maybe<T>{
|
||||
return {_tag: "Some", value: x};
|
||||
enum TokenKind {
|
||||
Seperator,
|
||||
Semicolon,
|
||||
Number,
|
||||
Op,
|
||||
ExprMark,
|
||||
Paren,
|
||||
SpaceNL,
|
||||
Id,
|
||||
Str,
|
||||
}
|
||||
|
||||
/**
|
||||
* like `m ==> f` in ocaml
|
||||
* @param m matchee wrapped
|
||||
* @param f matching function
|
||||
* @returns wrapped result
|
||||
* Parsing
|
||||
*/
|
||||
function thenDo(m : Maybe<TokenPair>, f : Function){
|
||||
if (m._tag == "None"){
|
||||
return m;
|
||||
}else{
|
||||
var a : Maybe<TokenPair> = f(m.value);
|
||||
if (a._tag == "Some"){
|
||||
a.value.ast = m.value.ast.concat(a.value.ast);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @param m : the `TokenPair` to be consumed.
|
||||
* @returns if the length of `m.remained` >= 1; consumes the matchee by 1 token
|
||||
* and wraps it in `Some`,
|
||||
* otherwise, returns `None`.
|
||||
*/
|
||||
export function matchAny(m: TokenPair): Maybe<TokenPair> {
|
||||
if (m.remained.length >= 1) {
|
||||
return {
|
||||
_tag: "Some", value: {
|
||||
matched: m.matched.concat(m.remained[0]),
|
||||
remained: m.remained.slice(1),
|
||||
ast : [m.remained[0]],
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return { _tag: "None" };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* like `f1 | f2` in regex
|
||||
* @param f1 the first tried function
|
||||
* @param f2 the second tried function
|
||||
* @returns wrapped result
|
||||
*/
|
||||
function orDo(f1 : Function, f2 : Function){
|
||||
return (x : TokenPair) =>{
|
||||
let res1 : Maybe<TokenPair> = f1(x);
|
||||
if (res1._tag == "Some"){
|
||||
return res1;
|
||||
}else{
|
||||
let res2 : Maybe<TokenPair> = f2(x);
|
||||
return res2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* like regex [^c]
|
||||
* @param f input token function. one token only.
|
||||
* @returns combined finction
|
||||
*/
|
||||
function notDo(f : Function){
|
||||
return (x : TokenPair) =>{
|
||||
let res1 : Maybe<TokenPair> = f(x);
|
||||
if (res1._tag == "Some"){
|
||||
return {_tag:"None"};
|
||||
}else{
|
||||
let res2 = matchAny(x);
|
||||
return res2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function matchToken(typeName : string, value? : string):
|
||||
(t : TokenPair) => Maybe<TokenPair>{
|
||||
return (t)=>{
|
||||
let headToken = t.remained[0];
|
||||
if (headToken.type != typeName){
|
||||
return {_tag:"None"};
|
||||
}else{
|
||||
if (value === undefined || value == headToken.value){
|
||||
let newTokenPair = {
|
||||
matched: t.matched.concat(headToken),
|
||||
remained: t.remained.slice(1),
|
||||
ast : [headToken]
|
||||
};
|
||||
return {_tag : "Some", value : newTokenPair};
|
||||
}else{
|
||||
return {_tag:"None"};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const lexer = p.buildLexer([
|
||||
[true, /^\d+(\.\d+)?/g, TokenKind.Number],
|
||||
[true, /^\;/g, TokenKind.Semicolon],
|
||||
[true, /^[-][-][-]/g, TokenKind.Seperator],
|
||||
[true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op],
|
||||
[true, /^\@+/g, TokenKind.ExprMark],
|
||||
[true, /^[()\[\]{}]/g, TokenKind.Paren],
|
||||
[true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str],
|
||||
[true, /^[']([\']|[\\].)*[']/g, TokenKind.Str],
|
||||
[true, /^[()\[\]{}]/g, TokenKind.Paren],
|
||||
[true, /^[^\s\n\t\r;]+/g, TokenKind.Id],
|
||||
[false, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL]
|
||||
]);
|
||||
|
||||
/**
|
||||
*
|
||||
* # TEST
|
||||
*/
|
||||
const tokens = Array.from(jsTokens(
|
||||
`import foo from\t 'bar';
|
||||
import * as util from 'util';
|
||||
const inputTxt=
|
||||
`import ast;
|
||||
---
|
||||
122`;
|
||||
|
||||
|
||||
花非花,霧\\{非霧 。{{foo();}}下
|
||||
一句`));
|
||||
|
||||
console.log("RESULT="+repr(tokens));
|
||||
const PROG = p.rule<TokenKind, tkTree>();
|
||||
const UNIT = p.rule<TokenKind, tkTree>();
|
||||
const IMPORTS = p.rule<TokenKind, tkTree>();
|
||||
const SEMICOLON = p.rule<TokenKind, tkTree>();
|
||||
|
||||
|
||||
var mainTokenPair : TokenPair = {
|
||||
matched : [] ,
|
||||
remained : tokens,
|
||||
ast : []};
|
||||
let doubleMinus = { type: 'Punctuator', value: '--' };
|
||||
let doubleMinus2 = p.str('--');
|
||||
const TERM = p.rule<TokenKind, tkTree>();
|
||||
|
||||
let a = thenDo(thenDo(toSome(mainTokenPair), matchToken('IdentifierName')),
|
||||
notDo(matchToken('Punctuator', ';')));
|
||||
|
||||
|
||||
console.log("RESULT="+repr(a));
|
||||
if (a._tag == "Some"){
|
||||
console.log("SEXP="+tkTreeToSExp(a.value.ast));
|
||||
function applyUnit(value: Token<TokenKind.Number>): tkTree{
|
||||
return value.text;
|
||||
}
|
||||
|
||||
function applySemiColon(value: Token<TokenKind.Semicolon>): tkTree{
|
||||
return value.text;
|
||||
}
|
||||
|
||||
function applyParts(first: tkTree,
|
||||
second: [Token<TokenKind>, tkTree]):tkTree {
|
||||
return ["%clo", first , second[1]];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
function applyImports(input: [Token<TokenKind>,Token<TokenKind>[], tkTree]):tkTree{
|
||||
let importTail = input[1].map(x=>x.text);
|
||||
return ["import"].concat(importTail);
|
||||
};
|
||||
|
||||
/**
|
||||
* PROG : IMPORTS '---' UNIT;
|
||||
*/
|
||||
PROG.setPattern(
|
||||
p.lrec_sc(IMPORTS, p.seq(p.str('---'), UNIT), applyParts)
|
||||
|
||||
)
|
||||
|
||||
/**
|
||||
* PROG : 'import' Id* SEMICOLON;
|
||||
*/
|
||||
IMPORTS.setPattern(
|
||||
p.apply(p.seq(p.str('import'), p.rep_sc(p.tok(TokenKind.Id)), SEMICOLON) , applyImports)
|
||||
);
|
||||
|
||||
/**
|
||||
* SEMICOLON : ';';
|
||||
*/
|
||||
SEMICOLON.setPattern(
|
||||
p.apply(p.tok(TokenKind.Semicolon), applySemiColon)
|
||||
);
|
||||
|
||||
/**
|
||||
* UNIT : Number;
|
||||
*/
|
||||
UNIT.setPattern(
|
||||
p.apply(p.tok(TokenKind.Number), applyUnit)
|
||||
);
|
||||
|
||||
let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt))));
|
||||
|
||||
|
||||
|
||||
console.log("RESULT="+tkTreeToSExp(tree));
|
||||
|
|
Loading…
Reference in a new issue