20230910 : add basic parser CONST
rule, and add the grammar rule.
This commit is contained in:
parent
cda463d890
commit
6f2e788329
6 changed files with 252 additions and 31 deletions
|
@ -7,3 +7,4 @@ another personal draught of a typesetting language and engine.
|
||||||
`matchAny`, `notDo`, `orDo`, `zeroOrMoreDo`, `zeroOrOnceDo`
|
`matchAny`, `notDo`, `orDo`, `zeroOrMoreDo`, `zeroOrOnceDo`
|
||||||
- 20230905-07:強化`tokenize`, 加強功能,加`Token`界面。
|
- 20230905-07:強化`tokenize`, 加強功能,加`Token`界面。
|
||||||
- 20230907-08:強化`tokenize`。
|
- 20230907-08:強化`tokenize`。
|
||||||
|
- 20230910 : add basic parser `CONST` rule, and add the grammar rule.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
"name": "clo",
|
"name": "clo",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"description": "a little typesetting engine in TypeScript",
|
"description": "a little typesetting engine in TypeScript",
|
||||||
"main": "index.js",
|
"main": "src/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
},
|
},
|
||||||
|
|
66
parser_rule.txt
Normal file
66
parser_rule.txt
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
let sqrtSum = (int x, int y) -> int {
|
||||||
|
let x2 = x * x;
|
||||||
|
let y2 = y * y;
|
||||||
|
return x2+y2;
|
||||||
|
}
|
||||||
|
|
||||||
|
let pi = 3.14159;
|
||||||
|
let _2p = (intToFLo 2) *. pi;
|
||||||
|
|
||||||
|
let c = if (2 == 2) then 2 else 3;
|
||||||
|
|
||||||
|
let aStr = "hello";
|
||||||
|
|
||||||
|
let rec fac = (int n)-> int {
|
||||||
|
if n == 0 then 1 else (fac (n - 1));};
|
||||||
|
|
||||||
|
|
||||||
|
type student = Student {int id, string name};
|
||||||
|
|
||||||
|
let alice = Student {id=20, name="Alice"};
|
||||||
|
|
||||||
|
alice.name = "Siobhan";
|
||||||
|
|
||||||
|
let customAnd = (@ 'a has age) ('a x, 'a y) => {'a > 'b};
|
||||||
|
|
||||||
|
type list 'a = (Cons 'a (List 'a)) | Nil;
|
||||||
|
|
||||||
|
import("alifbata.clo"); # 匯入檔案 alifbata #
|
||||||
|
|
||||||
|
t of import :== string -> Option string string
|
||||||
|
Error("string") | Ok("import aaa")
|
||||||
|
# 型別構造子統一大寫,型別小寫 #
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PROG ::= (STMT | COMMENT | STMT_COMMENT)*
|
||||||
|
COMMENT ::= # COMMENT_INNER #
|
||||||
|
COMMENT_INNER ::= [^#]+
|
||||||
|
STMT ::= (TYPE_DEF | VAR_DEF | SET | EXPR ) ";"
|
||||||
|
TYPE_DEF ::= type ID "=" UNION
|
||||||
|
| type ID TYPE_VARS+ "=" UNIOM
|
||||||
|
TYPE_VARS = ' ID
|
||||||
|
UNION ::= (REC "|" UNION) | REC
|
||||||
|
REC ::= ID ( TYPES )
|
||||||
|
TYPES ::= TYPE+
|
||||||
|
TYPE ::= ID
|
||||||
|
|
||||||
|
EXPR ::= if SUB_EXPR then IF_BRANCH else IF_BRANCH | SUB_EXPR
|
||||||
|
IF_BRANCH ::= EXPR | { BLOCK }
|
||||||
|
SUB_EXPR ::= COMPAREE| COMPAREE (LE|GE|LT|GT|EQ|NE) EXPR
|
||||||
|
COMPAREE ::= FAC| (FAC(ADD | SUB) FAC)
|
||||||
|
FAC ::= APPLY | (APPLIER (MUL | DIV) APPLY)
|
||||||
|
APPLY ::= "(" ID APPLYEE* ")" | APPLYEE
|
||||||
|
APPLYEE ::= REF | CONST | EXPR | FUNC
|
||||||
|
CONST ::= INT | STR | FLO | BOOL
|
||||||
|
BOOL ::= "true" | "false"
|
||||||
|
FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK}
|
||||||
|
BLOCK ::= PROG (return ID |noReturn) ;
|
||||||
|
ARGS ::= TYPE (TYPE_VARS | ID)
|
||||||
|
| TYPE (TYPE_VARS | TYPE_VARS) , ARGS
|
||||||
|
REF ::= VAR "." ID | VAR
|
||||||
|
VAR ::= ID
|
||||||
|
VAR_DEF ::= "let" VAR "=" EXPR
|
||||||
|
SET ::= VAR "=" EXPR
|
||||||
|
FUNC_OPTION ::= ( @ TYPE_HAS (, TYPE_HAS)* )
|
||||||
|
TYPE_HAS ::= TYPE_VAR "has" ID
|
84
src/index.js
84
src/index.js
|
@ -23,26 +23,32 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.match1token = void 0;
|
exports.m1TType = void 0;
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
|
const node_process_1 = require("node:process");
|
||||||
const tk = __importStar(require("./tokenize.js"));
|
const tk = __importStar(require("./tokenize.js"));
|
||||||
let b = tk.tokenize("2+2");
|
const util = __importStar(require("util"));
|
||||||
/**
|
/**
|
||||||
* @description
|
* @description
|
||||||
* it returns a function which test if the first char of the `remained` part of
|
* match one token type.
|
||||||
* the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
|
*
|
||||||
|
* it returns a function which test if the type of first token of the `remained` part of
|
||||||
|
* the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped
|
||||||
* in `Some`. Otherwise, it returns `None`.
|
* in `Some`. Otherwise, it returns `None`.
|
||||||
* * @param t : the char to be test.
|
* * @param typ : the type to be test.
|
||||||
* @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
|
* @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`.
|
||||||
*/
|
*/
|
||||||
function match1token(t) {
|
function m1TType(typ) {
|
||||||
return (m) => {
|
return (m) => {
|
||||||
if (m.remained.length == 0) {
|
if (m.remained.length == 0) {
|
||||||
return { _tag: "None" };
|
return { _tag: "None" };
|
||||||
}
|
}
|
||||||
const tokenToBeMatched = m.remained[0];
|
/**
|
||||||
if (tokenToBeMatched === t) {
|
* token to be matched
|
||||||
m.matched.push(tokenToBeMatched);
|
* */
|
||||||
|
const ttbm = m.remained[0];
|
||||||
|
if (ttbm.type == typ) {
|
||||||
|
m.matched.push(ttbm);
|
||||||
return {
|
return {
|
||||||
_tag: "Some", value: {
|
_tag: "Some", value: {
|
||||||
matched: m.matched,
|
matched: m.matched,
|
||||||
|
@ -55,7 +61,59 @@ function match1token(t) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
exports.match1token = match1token;
|
exports.m1TType = m1TType;
|
||||||
;
|
;
|
||||||
let c = tk.toSome(b);
|
let toSome = tk.toSome;
|
||||||
console.log(thenDo(c, match1token(tk.tokenize("+")[0])));
|
let thenDo = tk.thenDo;
|
||||||
|
let orDo = tk.orDo;
|
||||||
|
node_process_1.argv.forEach((val, index) => {
|
||||||
|
console.log(`${index}=${val}`);
|
||||||
|
});
|
||||||
|
let commandInput = node_process_1.argv[2];
|
||||||
|
let commandInputTokenized = tk.tokenize(commandInput);
|
||||||
|
console.log(commandInputTokenized);
|
||||||
|
/**
|
||||||
|
* matchee pair of commandInputTokenized
|
||||||
|
*/
|
||||||
|
let commandTPair = { matched: [],
|
||||||
|
remained: commandInputTokenized };
|
||||||
|
let tInt = m1TType(tk.TokenType.INT);
|
||||||
|
let tFlo = m1TType(tk.TokenType.FLO);
|
||||||
|
let tStr = m1TType(tk.TokenType.STR);
|
||||||
|
function tBool(x) {
|
||||||
|
let text = x.remained[0].text;
|
||||||
|
if (text == "true" || text == "false") {
|
||||||
|
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return { _tag: "None" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* define the right hand side of a grammar
|
||||||
|
* eg. `LHS ::= a + b`
|
||||||
|
* @param process the right hand side processing : eg. `a + b` in `LHS`
|
||||||
|
* @param arrange define the order (0 starting) of the elements of the result.
|
||||||
|
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
|
||||||
|
* @returns the processed ast.
|
||||||
|
*/
|
||||||
|
function gramRHS(process, arrange) {
|
||||||
|
return (m) => {
|
||||||
|
let result = process(m);
|
||||||
|
console.log(`result ${result}`);
|
||||||
|
if (result._tag == "None") {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
let matched = result.value.matched;
|
||||||
|
let return_array = Array(arrange.length);
|
||||||
|
arrange.forEach((val, index) => {
|
||||||
|
return_array[arrange[index]] = matched[index];
|
||||||
|
});
|
||||||
|
return return_array;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
|
||||||
|
let tree = constParser(commandTPair);
|
||||||
|
console.log(util.inspect(tree, { showHidden: true, depth: null }));
|
||||||
|
|
104
src/index.ts
104
src/index.ts
|
@ -1,32 +1,41 @@
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
|
import { argv } from 'node:process';
|
||||||
import * as tk from './tokenize.js';
|
import * as tk from './tokenize.js';
|
||||||
|
import * as util from 'util';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* token tree type.
|
||||||
let b : Array<tk.Token> = tk.tokenize("2+2");
|
*/
|
||||||
|
type tkTree = tk.Token[] | tk.Token
|
||||||
|
|
||||||
export interface TokenMatcheePair {
|
export interface TokenMatcheePair {
|
||||||
matched: tk.Token[]
|
matched: tkTree[]
|
||||||
remained: tk.Token[]
|
remained: tk.Token[]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @description
|
* @description
|
||||||
* it returns a function which test if the first char of the `remained` part of
|
* match one token type.
|
||||||
* the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
|
*
|
||||||
|
* it returns a function which test if the type of first token of the `remained` part of
|
||||||
|
* the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped
|
||||||
* in `Some`. Otherwise, it returns `None`.
|
* in `Some`. Otherwise, it returns `None`.
|
||||||
* * @param t : the char to be test.
|
* * @param typ : the type to be test.
|
||||||
* @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
|
* @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`.
|
||||||
*/
|
*/
|
||||||
export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe<TokenMatcheePair> {
|
export function m1TType(typ: tk.TokenType):
|
||||||
|
(m: TokenMatcheePair) => tk.Maybe<TokenMatcheePair> {
|
||||||
return (m: TokenMatcheePair) => {
|
return (m: TokenMatcheePair) => {
|
||||||
if (m.remained.length == 0) {
|
if (m.remained.length == 0) {
|
||||||
return { _tag: "None" };
|
return { _tag: "None" };
|
||||||
}
|
}
|
||||||
const tokenToBeMatched = m.remained[0];
|
/**
|
||||||
if (tokenToBeMatched === t) {
|
* token to be matched
|
||||||
m.matched.push(tokenToBeMatched);
|
* */
|
||||||
|
const ttbm = m.remained[0];
|
||||||
|
|
||||||
|
if (ttbm.type == typ) {
|
||||||
|
m.matched.push(ttbm);
|
||||||
return {
|
return {
|
||||||
_tag: "Some", value: {
|
_tag: "Some", value: {
|
||||||
matched: m.matched,
|
matched: m.matched,
|
||||||
|
@ -40,7 +49,72 @@ export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe<Toke
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let toSome = tk.toSome;
|
||||||
|
let thenDo = tk.thenDo;
|
||||||
|
let orDo = tk.orDo;
|
||||||
|
|
||||||
|
|
||||||
let c = tk.toSome(b);
|
argv.forEach((val, index) => {
|
||||||
console.log(thenDo(c,match1token(tk.tokenize("+")[0])));
|
console.log(`${index}=${val}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
let commandInput = argv[2];
|
||||||
|
let commandInputTokenized = tk.tokenize(commandInput);
|
||||||
|
console.log(commandInputTokenized);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* matchee pair of commandInputTokenized
|
||||||
|
*/
|
||||||
|
let commandTPair : TokenMatcheePair = {matched:[],
|
||||||
|
remained: commandInputTokenized};
|
||||||
|
|
||||||
|
|
||||||
|
let tInt = m1TType(tk.TokenType.INT);
|
||||||
|
let tFlo = m1TType(tk.TokenType.FLO);
|
||||||
|
let tStr = m1TType(tk.TokenType.STR);
|
||||||
|
function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
|
||||||
|
let text = x.remained[0].text
|
||||||
|
if (text == "true" || text == "false"){
|
||||||
|
return thenDo(toSome(x), m1TType(tk.TokenType.ID));
|
||||||
|
}else{
|
||||||
|
return {_tag : "None"};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* define the right hand side of a grammar
|
||||||
|
* eg. `LHS ::= a + b`
|
||||||
|
* @param process the right hand side processing : eg. `a + b` in `LHS`
|
||||||
|
* @param arrange define the order (0 starting) of the elements of the result.
|
||||||
|
* ast. : eg. `a + c` is `1 0 2` `(+ a c)`
|
||||||
|
* @returns the processed ast.
|
||||||
|
*/
|
||||||
|
function gramRHS (process: Function, arrange : number[]){
|
||||||
|
return (m : TokenMatcheePair)=>{
|
||||||
|
|
||||||
|
let result : tk.Maybe<TokenMatcheePair> = process(m);
|
||||||
|
console.log(`result ${result}`)
|
||||||
|
if (result._tag == "None"){
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
let matched = result.value.matched;
|
||||||
|
let return_array : tkTree[] = Array(arrange.length);
|
||||||
|
|
||||||
|
arrange.forEach((val, index) => {
|
||||||
|
return_array[arrange[index]] = matched[index];
|
||||||
|
});
|
||||||
|
|
||||||
|
return return_array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CONST ::= INT | STR | FLO | BOOL
|
||||||
|
*/
|
||||||
|
var constParser = gramRHS((x : TokenMatcheePair)=>
|
||||||
|
{return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
|
||||||
|
|
||||||
|
let tree = constParser(commandTPair);
|
||||||
|
console.log(util.inspect(tree, { showHidden: true, depth: null }));
|
||||||
|
|
|
@ -98,7 +98,9 @@ export enum TokenType {
|
||||||
NE, // <>
|
NE, // <>
|
||||||
APOS, // '
|
APOS, // '
|
||||||
R_ARROW, // ->
|
R_ARROW, // ->
|
||||||
|
TRUE, // true
|
||||||
|
FALSE, // false
|
||||||
|
IF, // if
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -197,6 +199,25 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe<Matc
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check if a matcheePair `m` matches a stringv `s`.
|
||||||
|
* @param s the checker string.
|
||||||
|
* @returns `None` or matched pair wrapped in `Some`
|
||||||
|
*/
|
||||||
|
export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> {
|
||||||
|
return (m)=>{
|
||||||
|
if (s.length==0){
|
||||||
|
return { _tag: "None" };
|
||||||
|
}
|
||||||
|
var someM : Maybe<MatcheePair> = toSome(m);
|
||||||
|
for (var idx : number=0; idx<s.length; idx++){
|
||||||
|
someM = thenDo(someM, match1Char(s[idx]))
|
||||||
|
}
|
||||||
|
return someM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* convert the one-char string to codepoint.
|
* convert the one-char string to codepoint.
|
||||||
* @param s : the string to code point.
|
* @param s : the string to code point.
|
||||||
|
@ -444,6 +465,7 @@ export function tokenize(input: string): Array<Token> {
|
||||||
thenDo(thenDo(x, match1Char("-")), match1Char(">")),
|
thenDo(thenDo(x, match1Char("-")), match1Char(">")),
|
||||||
TokenType.R_ARROW);
|
TokenType.R_ARROW);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* unary operator : generating the pattern of basic unary operator
|
* unary operator : generating the pattern of basic unary operator
|
||||||
* @param char : uniry char for the operator
|
* @param char : uniry char for the operator
|
||||||
|
@ -488,7 +510,7 @@ export function tokenize(input: string): Array<Token> {
|
||||||
lParen, rParen, lBracket, rBracket, lBrace, rBrace,
|
lParen, rParen, lBracket, rBracket, lBrace, rBrace,
|
||||||
comma, dot, colon, semicolon, at, hash,
|
comma, dot, colon, semicolon, at, hash,
|
||||||
set, greaterthan, lessthan, apos,
|
set, greaterthan, lessthan, apos,
|
||||||
float, newline, space, integer, str, id];
|
float, newline, space, id, integer, str];
|
||||||
let term_aux = term_list.reduce((x, y) => orDo(x, y));
|
let term_aux = term_list.reduce((x, y) => orDo(x, y));
|
||||||
|
|
||||||
var new_x: Maybe<MatcheePair> = thenDo(old_x, term_aux);
|
var new_x: Maybe<MatcheePair> = thenDo(old_x, term_aux);
|
||||||
|
|
Loading…
Reference in a new issue