add some tokenizer function

This commit is contained in:
Tan, Kian-ting 2023-09-15 00:34:20 +08:00
parent e8f894d994
commit ec563ca30f
3 changed files with 217 additions and 36 deletions

View file

@ -48,13 +48,14 @@ function m1TType(typ) {
* */
const ttbm = m.remained[0];
if (ttbm.type == typ) {
m.matched.push(ttbm);
return {
let new_matched = m.matched.concat(ttbm);
let result = {
_tag: "Some", value: {
matched: m.matched,
matched: new_matched,
remained: m.remained.slice(1)
}
};
return result;
}
else {
return { _tag: "None" };
@ -65,21 +66,29 @@ exports.m1TType = m1TType;
;
let toSome = tk.toSome;
let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
node_process_1.argv.forEach((val, index) => {
console.log(`${index}=${val}`);
});
let commandInput = node_process_1.argv[2];
let commandInput = "int a str b"; //argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
console.log(commandInputTokenized);
let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => {
return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL;
});
console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/**
* matchee pair of commandInputTokenized
*/
let commandTPair = { matched: [],
remained: commandInputTokenized };
remained: commandInputTokenizedFiltered };
let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool(x) {
let text = x.remained[0].text;
if (text == "true" || text == "false") {
@ -99,21 +108,89 @@ function tBool(x) {
*/
function gramRHS(process, arrange) {
return (m) => {
let result = process(m);
console.log(`result ${result}`);
if (result._tag == "None") {
return result;
let middle = process(m);
console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null }));
if (middle._tag == "None") {
return middle;
}
else {
let matched = result.value.matched;
let return_array = Array(arrange.length);
let matched = middle.value.matched;
let arrLength = arrange.length;
let returnRrray = Array(arrange.length);
arrange.forEach((val, index) => {
return_array[arrange[index]] = matched[index];
returnRrray[arrange[index]] = matched[index];
});
return return_array;
let matchedTmp1Length = matched.length - arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 = matched
.slice(0, matchedTmp1Length);
console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result = { _tag: "Some",
value: { matched: matchedTmp1,
remained: middle.value.remained } };
return result;
}
};
}
/**
* typeABS ::= "'" ID
*/
var typeABS = (x) => {
var result = thenDo(thenDo(toSome(x), tApos), tId);
if (result._tag == "Some" && "text" in result.value.matched[1]) {
var realToken = result.value.matched[1];
realToken.text = "'" + realToken.text;
result.value.matched = [realToken];
}
return result;
};
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x) => {
return thenDo(toSome(x), orDo(typeABS, tId));
};
/**
* CONST ::= INT | STR | FLO | BOOL
*/
/**
* TODO: 要用 debugger 檢查分析問題
*/
var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]);
let tree = constParser(commandTPair);
console.log(util.inspect(tree, { showHidden: true, depth: null }));
/**
* storing the tree
*/
var astTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x) => {
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some") {
let matched = a.value.matched;
let slice = matched.slice(matched.length - 2);
console.log("slice" + slice);
let b = { maybeTokens: a, ast: slice };
return b;
}
else {
let b = { maybeTokens: a, ast: [] };
return b;
}
};
/**
* function's arguments
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+
*/
var fnArgs = (x) => {
let wrapper = { maybeTokens: toSome(x), ast: [] };
let a = typePair(wrapper);
console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null }));
return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] };
};
let tree = fnArgs(commandTPair);
console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -1,18 +1,24 @@
var fs = require('fs');
import { argv } from 'node:process';
import { argv, resourceUsage } from 'node:process';
import * as tk from './tokenize.js';
import * as util from 'util';
import { reduceRotation } from 'pdf-lib';
/**
* token tree type.
*/
type tkTree = tk.Token[] | tk.Token
type tkTree = tkTree[] | tk.Token
export interface TokenMatcheePair {
matched: tkTree[]
remained: tk.Token[]
}
export interface MaybeTokensAST{
maybeTokens: tk.Maybe<TokenMatcheePair>;
ast: tkTree;
}
/**
* @description
* match one token type.
@ -35,13 +41,14 @@ export function m1TType(typ: tk.TokenType):
const ttbm = m.remained[0];
if (ttbm.type == typ) {
m.matched.push(ttbm);
return {
let new_matched = m.matched.concat(ttbm);
let result : tk.Some<TokenMatcheePair> = {
_tag: "Some", value: {
matched: m.matched,
matched: new_matched,
remained: m.remained.slice(1)
}
};
return result;
}
else {
return { _tag: "None" };
@ -51,27 +58,36 @@ export function m1TType(typ: tk.TokenType):
let toSome = tk.toSome;
let thenDo = tk.thenDo;
let zeroOrOnceDo = tk.zeroOrOnceDo;
let orDo = tk.orDo;
let zeroOrMoreDo = tk.zeroOrMoreDo;
argv.forEach((val, index) => {
console.log(`${index}=${val}`);
});
let commandInput = argv[2];
let commandInput = "int a str b"//argv[2];
let commandInputTokenized = tk.tokenize(commandInput);
console.log(commandInputTokenized);
let commandInputTokenizedFiltered = commandInputTokenized.filter(
(x : tk.Token)=>{return x.type != tk.TokenType.SP &&
x.type != tk.TokenType.NL});
console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null }));
/**
* matchee pair of commandInputTokenized
*/
let commandTPair : TokenMatcheePair = {matched:[],
remained: commandInputTokenized};
remained: commandInputTokenizedFiltered};
let tInt = m1TType(tk.TokenType.INT);
let tFlo = m1TType(tk.TokenType.FLO);
let tStr = m1TType(tk.TokenType.STR);
let tId = m1TType(tk.TokenType.ID);
let tApos = m1TType(tk.TokenType.APOS);
function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
let text = x.remained[0].text
if (text == "true" || text == "false"){
@ -92,29 +108,116 @@ function tBool (x : TokenMatcheePair) :tk.Maybe<TokenMatcheePair> {
function gramRHS (process: Function, arrange : number[]){
return (m : TokenMatcheePair)=>{
let result : tk.Maybe<TokenMatcheePair> = process(m);
console.log(`result ${result}`)
if (result._tag == "None"){
return result;
let middle : tk.Maybe<TokenMatcheePair> = process(m);
console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null }));
if (middle._tag == "None"){
return middle;
}
else{
let matched = result.value.matched;
let return_array : tkTree[] = Array(arrange.length);
let matched = middle.value.matched;
let arrLength = arrange.length;
let returnRrray : tkTree[] = Array(arrange.length);
arrange.forEach((val, index) => {
return_array[arrange[index]] = matched[index];
returnRrray[arrange[index]] = matched[index];
});
return return_array;
let matchedTmp1Length = matched.length-arrLength;
console.log(matchedTmp1Length);
var matchedTmp1 : tkTree[] = matched
.slice(0,matchedTmp1Length);
console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null }));
console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null }));
matchedTmp1.push(returnRrray);
let result : tk.Some<TokenMatcheePair> = {_tag:"Some",
value : {matched : matchedTmp1,
remained : middle.value.remained}};
return result;
}
}
}
/**
* typeABS ::= "'" ID
*/
var typeABS = (x : TokenMatcheePair)=>
{
var result = thenDo(thenDo(toSome(x),tApos),tId);
if (result._tag == "Some" && "text" in result.value.matched[1]){
var realToken : tk.Token = result.value.matched[1];
realToken.text = "'"+realToken.text;
result.value.matched = [realToken];
}
return result;
}
/**
* TypeId ::= typeABS | ID
*/
var typeName = (x : TokenMatcheePair)=>
{
return thenDo(toSome(x), orDo(typeABS, tId));
}
/**
* CONST ::= INT | STR | FLO | BOOL
*/
/**
* TODO: 要用 debugger
*/
var constParser = gramRHS((x : TokenMatcheePair)=>
{return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]);
let tree = constParser(commandTPair);
console.log(util.inspect(tree, { showHidden: true, depth: null }));
/**
* storing the tree
*/
var astTree : tkTree = [];
/**
* TYPE_PAIR ::= TYP_ID ID
*/
var typePair = (x : MaybeTokensAST)=>
{
let a = thenDo(thenDo(x.maybeTokens, typeName), tId);
if (a._tag == "Some"){
let matched = a.value.matched;
let slice = matched.slice(matched.length-2);
console.log("slice"+slice);
let b : MaybeTokensAST = {maybeTokens : a, ast : slice};
return b;
}
else{
let b : MaybeTokensAST= {maybeTokens : a, ast : []};
return b;
}
}
/**
* function's arguments
* FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+
*/
var fnArgs = (x : TokenMatcheePair)=>
{
let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []};
let a = typePair(wrapper);
console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null }));
let abanibi = typePair(a);
console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null }));
return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]};
};
let tree = fnArgs(commandTPair);
console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null }));

View file

@ -1,3 +1,4 @@
import * as util from 'util';
var fs = require('fs');
@ -205,7 +206,7 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe<Matc
* @param s the checker string.
* @returns `None` or matched pair wrapped in `Some`
*/
export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> {
export function matchWord(s: string, ): (m: MatcheePair) => Maybe<MatcheePair> {
return (m)=>{
if (s.length==0){
return { _tag: "None" };
@ -377,7 +378,7 @@ export function tokenize(input: string): Array<Token> {
// space = [ \t]+
let space = bTerm((x: Maybe<MatcheePair>) =>
thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)),
TokenType.INT);
TokenType.SP);
// newline = \r?\n
let newline = bTerm((x: Maybe<MatcheePair>) =>