add some tokenizer function

This commit is contained in:
Tan, Kian-ting 2023-09-15 00:34:20 +08:00
parent e8f894d994
commit ec563ca30f
3 changed files with 217 additions and 36 deletions

View file

@ -48,13 +48,14 @@ function m1TType(typ) {
* */ * */
const ttbm = m.remained[0]; const ttbm = m.remained[0];
if (ttbm.type == typ) { if (ttbm.type == typ) {
m.matched.push(ttbm); let new_matched = m.matched.concat(ttbm);
return { let result = {
_tag: "Some", value: { _tag: "Some", value: {
matched: m.matched, matched: new_matched,
remained: m.remained.slice(1) remained: m.remained.slice(1)
} }
}; };
return result;
} }
else { else {
return { _tag: "None" }; return { _tag: "None" };
@ -65,21 +66,29 @@ exports.m1TType = m1TType;
; ;
let toSome = tk.toSome; let toSome = tk.toSome;
let thenDo = tk.thenDo; let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo; let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
node_process_1.argv.forEach((val, index) => { node_process_1.argv.forEach((val, index) => {
console.log(`${index}=${val}`); console.log(`${index}=${val}`);
}); });
let commandInput = node_process_1.argv[2]; let commandInput = "int a str b"; //argv[2];
let commandInputTokenized = tk.tokenize(commandInput); let commandInputTokenized = tk.tokenize(commandInput);
console.log(commandInputTokenized); let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => {
return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL;
});
console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/** /**
* matchee pair of commandInputTokenized * matchee pair of commandInputTokenized
*/ */
let commandTPair = { matched: [], let commandTPair = { matched: [],
remained: commandInputTokenized }; remained: commandInputTokenizedFiltered };
let tInt = m1TType(tk.TokenType.INT); let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO); let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR); let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool(x) { function tBool(x) {
let text = x.remained[0].text; let text = x.remained[0].text;
if (text == "true" || text == "false") { if (text == "true" || text == "false") {
@ -99,21 +108,89 @@ function tBool(x) {
*/ */
function gramRHS(process, arrange) { function gramRHS(process, arrange) {
return (m) => { return (m) => {
let result = process(m); let middle = process(m);
console.log(`result ${result}`); console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null }));
if (result._tag == "None") { if (middle._tag == "None") {
return result; return middle;
} }
else { else {
let matched = result.value.matched; let matched = middle.value.matched;
let return_array = Array(arrange.length); let arrLength = arrange.length;
let returnRrray = Array(arrange.length);
arrange.forEach((val, index) => { arrange.forEach((val, index) => {
return_array[arrange[index]] = matched[index]; returnRrray[arrange[index]] = matched[index];
}); });
return return_array; let matchedTmp1Length = matched.length - arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 = matched
.slice(0, matchedTmp1Length);
console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result = { _tag: "Some",
value: { matched: matchedTmp1,
remained: middle.value.remained } };
return result;
} }
}; };
} }
/**
* typeABS ::= "'" ID
*/
var typeABS = (x) => {
var result = thenDo(thenDo(toSome(x), tApos), tId);
if (result._tag == "Some" && "text" in result.value.matched[1]) {
var realToken = result.value.matched[1];
realToken.text = "'" + realToken.text;
result.value.matched = [realToken];
}
return result;
};
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x) => {
return thenDo(toSome(x), orDo(typeABS, tId));
};
/**
* CONST ::= INT | STR | FLO | BOOL
*/
/**
* TODO: 要用 debugger 檢查分析問題
*/
var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]); var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
let tree = constParser(commandTPair); /**
console.log(util.inspect(tree, { showHidden: true, depth: null })); * storing the tree
*/
var astTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x) => {
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some") {
let matched = a.value.matched;
let slice = matched.slice(matched.length - 2);
console.log("slice" + slice);
let b = { maybeTokens: a, ast: slice };
return b;
}
else {
let b = { maybeTokens: a, ast: [] };
return b;
}
};
/**
* function's arguments
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+
*/
var fnArgs = (x) => {
let wrapper = { maybeTokens: toSome(x), ast: [] };
let a = typePair(wrapper);
console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null }));
return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] };
};
let tree = fnArgs(commandTPair);
console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -1,18 +1,24 @@
var fs = require('fs'); var fs = require('fs');
import { argv } from 'node:process'; import { argv, resourceUsage } from 'node:process';
import * as tk from './tokenize.js'; import * as tk from './tokenize.js';
import * as util from 'util'; import * as util from 'util';
import { reduceRotation } from 'pdf-lib';
/** /**
* token tree type. * token tree type.
*/ */
type tkTree = tk.Token[] | tk.Token type tkTree = tkTree[] | tk.Token
export interface TokenMatcheePair { export interface TokenMatcheePair {
matched: tkTree[] matched: tkTree[]
remained: tk.Token[] remained: tk.Token[]
} }
export interface MaybeTokensAST{
maybeTokens: tk.Maybe<TokenMatcheePair>;
ast: tkTree;
}
/** /**
* @description * @description
* match one token type. * match one token type.
@ -35,13 +41,14 @@ export function m1TType(typ: tk.TokenType):
const ttbm = m.remained[0]; const ttbm = m.remained[0];
if (ttbm.type == typ) { if (ttbm.type == typ) {
m.matched.push(ttbm); let new_matched = m.matched.concat(ttbm);
return { let result : tk.Some<TokenMatcheePair> = {
_tag: "Some", value: { _tag: "Some", value: {
matched: m.matched, matched: new_matched,
remained: m.remained.slice(1) remained: m.remained.slice(1)
} }
}; };
return result;
} }
else { else {
return { _tag: "None" }; return { _tag: "None" };
@ -51,27 +58,36 @@ export function m1TType(typ: tk.TokenType):
let toSome = tk.toSome; let toSome = tk.toSome;
let thenDo = tk.thenDo; let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo; let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
argv.forEach((val, index) => { argv.forEach((val, index) => {
console.log(`${index}=${val}`); console.log(`${index}=${val}`);
}); });
let commandInput = argv[2]; let commandInput = "int a str b"//argv[2];
let commandInputTokenized = tk.tokenize(commandInput); let commandInputTokenized = tk.tokenize(commandInput);
console.log(commandInputTokenized); let commandInputTokenizedFiltered = commandInputTokenized.filter(
(x : tk.Token)=>{return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL});
console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/** /**
* matchee pair of commandInputTokenized * matchee pair of commandInputTokenized
*/ */
let commandTPair : TokenMatcheePair = {matched:[], let commandTPair : TokenMatcheePair = {matched:[],
remained: commandInputTokenized}; remained: commandInputTokenizedFiltered};
let tInt = m1TType(tk.TokenType.INT); let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO); let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR); let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> { function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
let text = x.remained[0].text let text = x.remained[0].text
if (text == "true" || text == "false"){ if (text == "true" || text == "false"){
@ -92,29 +108,116 @@ function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
function gramRHS (process: Function, arrange : number[]){ function gramRHS (process: Function, arrange : number[]){
return (m : TokenMatcheePair)=>{ return (m : TokenMatcheePair)=>{
let result : tk.Maybe<TokenMatcheePair> = process(m); let middle : tk.Maybe<TokenMatcheePair> = process(m);
console.log(`result ${result}`)
if (result._tag == "None"){ console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null }));
return result;
if (middle._tag == "None"){
return middle;
} }
else{ else{
let matched = result.value.matched; let matched = middle.value.matched;
let return_array : tkTree[] = Array(arrange.length); let arrLength = arrange.length;
let returnRrray : tkTree[] = Array(arrange.length);
arrange.forEach((val, index) => { arrange.forEach((val, index) => {
return_array[arrange[index]] = matched[index]; returnRrray[arrange[index]] = matched[index];
}); });
return return_array; let matchedTmp1Length = matched.length-arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 : tkTree[] = matched
.slice(0,matchedTmp1Length);
console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result : tk.Some<TokenMatcheePair> = {_tag:"Some",
value : {matched : matchedTmp1,
remained : middle.value.remained}};
return result;
} }
} }
} }
/**
* typeABS ::= "'" ID
*/
var typeABS = (x : TokenMatcheePair)=>
{
var result = thenDo(thenDo(toSome(x),tApos),tId);
if (result._tag == "Some" && "text" in result.value.matched[1]){
var realToken : tk.Token = result.value.matched[1];
realToken.text = "'"+realToken.text;
result.value.matched = [realToken];
}
return result;
}
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x : TokenMatcheePair)=>
{
return thenDo(toSome(x), orDo(typeABS, tId));
}
/** /**
* CONST ::= INT | STR | FLO | BOOL * CONST ::= INT | STR | FLO | BOOL
*/ */
/**
* TODO: 要用 debugger
*/
var constParser = gramRHS((x : TokenMatcheePair)=> var constParser = gramRHS((x : TokenMatcheePair)=>
{return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]); {return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
let tree = constParser(commandTPair); /**
console.log(util.inspect(tree, { showHidden: true, depth: null })); * storing the tree
*/
var astTree : tkTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x : MaybeTokensAST)=>
{
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some"){
let matched = a.value.matched;
let slice = matched.slice(matched.length-2);
console.log("slice"+slice);
let b : MaybeTokensAST = {maybeTokens : a, ast : slice};
return b;
}
else{
let b : MaybeTokensAST= {maybeTokens : a, ast : []};
return b;
}
}
/**
* function's arguments
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+
*/
var fnArgs = (x : TokenMatcheePair)=>
{
let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []};
let a = typePair(wrapper);
console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null }));
return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]};
};
let tree = fnArgs(commandTPair);
console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -1,3 +1,4 @@
import * as util from 'util';
var fs = require('fs'); var fs = require('fs');
@ -205,7 +206,7 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe<Matc
* @param s the checker string. * @param s the checker string.
* @returns `None` or matched pair wrapped in `Some` * @returns `None` or matched pair wrapped in `Some`
*/ */
export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> { export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> {
return (m)=>{ return (m)=>{
if (s.length==0){ if (s.length==0){
return { _tag: "None" }; return { _tag: "None" };
@ -377,7 +378,7 @@ export function tokenize(input: string): Array<Token> {
// space = [ \t]+ // space = [ \t]+
let space = bTerm((x: Maybe<MatcheePair>) => let space = bTerm((x: Maybe<MatcheePair>) =>
thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)), thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)),
TokenType.INT); TokenType.SP);
// newline = \r?\n // newline = \r?\n
let newline = bTerm((x: Maybe<MatcheePair>) => let newline = bTerm((x: Maybe<MatcheePair>) =>