parsing include lambda and Pass1 unifyVar

This commit is contained in:
Tan, Kian-ting 2025-08-19 23:57:14 +08:00
parent dad707924d
commit 26305b7e0d
5 changed files with 207 additions and 57 deletions

View file

@ -2,3 +2,6 @@ name = "TComp"
uuid = "b858fc1c-1812-4b3c-a2e6-a1a64b2d44f1"
authors = ["Tan Kian-ting <chenjt30@gmail.com>"]
version = "0.1.0"
[deps]
Match = "7eb4fadd-790c-5f42-8a69-bfa0b872bfbf"

View file

@ -1,7 +1,75 @@
module TComp
include("./parser.jl")
using .Parser
using Match
inp = ARGS
f = open(ARGS[1], "r")
prog = read(f, String)
print(prog)
parsed = Parser.totalParse(prog)
print(parsed)
tmp_var_no = 0
# Pass 1 duplicated varname unified
function unifyVar(parsed, env)
@match parsed begin
# letrec is not considered
#[("%let", "id"), [ty, var], val, [("%lambda", "id"), args, body]] => nothing
[("%let", "id"), [ty, var], val, body] =>
begin
envNew = env
push!(envNew, var[1]) # push x of var = ("x", "id") in newEnv
res = [("%let", "id"),
[ty, unifyVar(var, envNew)],
unifyVar(val, env),
unifyVar(body, envNew)]
return res
end
(var, "id") =>
begin
reversedEnv = reverse(env)
index = length(env) - findfirst(e -> e == var, reversedEnv) + 1
return (index, "id")
end
[(plus, "plus"), lhs, rhs] =>
begin
lhs_new = unifyVar(lhs, env)
rhs_new = unifyVar(rhs, env)
return [(plus, "plus"), lhs_new, rhs_new]
end
[(minus, "minus"), lhs, rhs] =>
begin
lhs_new = unifyVar(lhs, env)
rhs_new = unifyVar(rhs, env)
return [(minus, "minus"), lhs_new, rhs_new]
end
[("%call", "id"), callee, args...] =>
begin
unifiedCallee = unifyVar(callee, env)
unifiedArgs = map(x ->unifyVar(x, env), args[1])
return vcat([("%call", "id"), unifiedCallee], [unifiedArgs])
end
(c, "int") => return parsed
_ => "Error"
end
end
emptyEnv = []
res = unifyVar(parsed, emptyEnv)
print(res)
close(f)
greet() = print("Hello World!")
end # module

View file

@ -90,10 +90,11 @@ patternList = [("int", "\\d+"),
("lParen", "[\\(]"),
("rParen", "[\\)]"),
("plus", "[+]"),
("funType", "[-][\\>]"),
("minus", "[\\-]"),
("mul", "[\\*]"),
("div", "[\\/]"),
("sp", "\\s+"),
("sp", "[\\n\\r ]+"),
("comma", "[,]"),
("semicolon", "[;]"),
("lambda", "[=][\\>]"),
@ -112,21 +113,15 @@ matchEachItem = Regex(tmp)
matchAll = "^(" * tmp * ")+\$"
matchEntirely = Regex(matchAll)
print(matchEntirely)
#println(matchEntirely)
"""
((int -> int) -> int)
add = (x , y) => x + y;
inp = """
((a, b, c)=>(d=>e)) add = add;
int a = 8;
12 + foo(13*a,14,15) """
print("~~~\n")
isEntirelyMatched = match(matchEntirely, inp)
if isEntirelyMatched == false
print("illegal tokens contained.")
end
12 + foo(13*a,14,15)
"""
function prettyString(ele)
@ -146,66 +141,112 @@ function prettyString(ele)
else #number
return string(ele)
end
end
mI = eachmatch(matchEachItem, inp)
function prettyStringLisp(ele)
if isa(ele, String)
return ele
elseif isa(ele, Tuple)
res = prettyStringLisp(ele[1])
return res
elseif isa(ele, Array)
mappedEle = map(prettyStringLisp, ele)
mappedString = "(" * join(mappedEle, " ") * ")"
return mappedString
elseif isa(ele, ParserResult)
res = prettyStringLisp(ele.matched)
return res
else #number
return string(ele)
end
end
function processKeys(x)
keys_ = keys(x)
return filter((i) -> x[i] != nothing, keys_)[1]
end
matchedList = map((x)->x.match, collect(mI))
groupNameList = map(processKeys, collect(mI))
zippedTokenList = collect(zip(matchedList, groupNameList))
print(zippedTokenList)
withoutSpaces = filter((x)-> x[2] != "sp", zippedTokenList)
initWrapped = ParserResult([], withoutSpaces)
test1 = initWrapped >> strng("123") >> (strng("+")|strng("-"))
test2 = initWrapped >> seq([strng("123"), strng("+")])
println(prettyString(test1))
println(prettyString(test2))
#test1 = initWrapped >> strng("123") >> (strng("+")|strng("-"))
#test2 = initWrapped >> seq([strng("123"), strng("+")])
#println(prettyString(test1))
#println(prettyString(test2))
"""
atom = int | id
unit = "(" unit ")" | atom
func = "(" fn_args ")" "=>" body
unit = func | "(" exp ")" | atom
args = unit ("," unit)*
factor = unit "(" args ")"
term = (factor (*|/) factor) | factor
exp = (term (+|-) term) | term
tyOfArgs = ty ("," ty)*
tyOfFn = "(" ty => ty ")"
ty = id | tyOfFn | "(" tyOfArgs ")"
letexp = ty id "=" exp ";" body
body = exp | letexp
"""
atom = typ("int") | typ("id")
function longUnitAux(input)
rawFunc = seq([typ("lParen"), unit, typ("rParen")])
function fnArgItemAux(input)
rawFunc = seq([typ("comma"), typ("id")])
rawRes = rawFunc.fun(input)
if rawRes != nothing
matched = rawRes.matched[2]
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
fnArgItem = Psr(fnArgItemAux)
function fnArgsAux(input)
rawFunc = seq([typ("id"), many0(fnArgItem)])
res = rawFunc.fun(input)
if res != nothing
matched = vcat([res.matched[1]], res.matched[2])
res = ParserResult(matched, res.remained)
return res
else
return nothing
end
end
fnArgs = Psr(fnArgsAux)
function funcAux(input)
rawFunc = seq([typ("lParen"), fnArgs, typ("rParen"), typ("lambda"), body])
rawRes = rawFunc.fun(input)
if rawRes != nothing
matched = [("%lambda", "id"), rawRes.matched[2], rawRes.matched[5]]
res = ParserResult(matched, rawRes.remained)
return res
else
return nothing
end
end
function longUnitAux(input)
rawFunc = seq([typ("lParen"), exp, typ("rParen")])
rawRes = rawFunc.fun(input)
if rawRes != nothing
matched = rawRes.matched[2]
res = ParserResult(matched, rawRes.remained)
return res
else
return nothing
end
end
function unitAux(input)
fun = Psr(funcAux)
longUnit = Psr(longUnitAux)
rawFunc = longUnit | atom
rawFunc = fun | longUnit | atom
res = rawFunc.fun(input)
return res
end
@ -220,7 +261,7 @@ function argItemAux(input)
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
argItem = Psr(argItemAux)
@ -245,7 +286,7 @@ function longFactorAux(input)
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
@ -266,7 +307,7 @@ function longTermAux(input)
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
@ -288,7 +329,7 @@ function longExpAux(input)
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
@ -302,48 +343,62 @@ exp = Psr(expAux)
"""tyOfArgs = "(" ty ("," ty)* ")"
tyOfFn = "(" ty => ty ")"
ty = id | tyOfFn | tyOfArgs """
tyHead = tyOfArgs | tyOfFn | id
tyOfFn = "(" tyHead -> ty ")"
ty = id | tyOfFn """
function tyArgItemAux(input)
rawFunc = seq([typ("comma"), typ("id")])
rawFunc = seq([typ("comma"), ty])
rawRes = rawFunc.fun(input)
if rawRes != nothing
matched = rawRes.matched[2]
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
function tyOfArgsAux(input)
tyArgItem = Psr(tyArgItemAux)
rawFunc = seq([typ("lParen"), typ("id"), many0(tyArgItem), typ("rParen")])
rawFunc = seq([typ("lParen"), ty, many0(tyArgItem), typ("rParen")])
res = rawFunc.fun(input)
if res != nothing
matched = vcat([("%argType")], vcat([res.matched[2]], res.matched[3]))
res = ParserResult(matched, res.remained)
return res
else
return nothing
end
end
function tyHeadAux(input)
tyOfArgs = Psr(tyOfArgsAux)
tyOfFn = Psr(tyOfFnAux)
rawFunc = tyOfArgs | tyOfFn | typ("id")
res = rawFunc.fun(input)
return res
end
function tyOfFnAux(input)
rawFunc = seq([typ("lParen"), ty, typ("lambda"), ty, typ("rParen")])
tyHead = Psr(tyHeadAux)
rawFunc = seq([typ("lParen"), tyHead, typ("funType"), ty, typ("rParen")])
rawRes = rawFunc.fun(input)
if rawRes != nothing
matched = [("%funType", "id"), rawRes.matched[2], rawRes.matched[4]]
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
function tyAux(input)
tyOfFn = Psr(tyOfArgsAux)
tyOfArgs = Psr(tyOfFnAux)
rawFunc = tyOfFn | tyOfArgs | typ("id")
tyOfFn= Psr(tyOfFnAux)
rawFunc = tyOfFn | typ("id")
res = rawFunc.fun(input)
return res
end
@ -363,7 +418,7 @@ function letExpAux(input)
res = ParserResult(matched, rawRes.remained)
return res
else
return rawRes
return nothing
end
end
@ -371,8 +426,24 @@ letExp = Psr(letExpAux)
body = letExp | exp
test3 = initWrapped >> body
println(prettyString(test3))
function totalParse(prog)
isEntirelyMatched = match(matchEntirely, prog)
if isEntirelyMatched == false
throw("illegal tokens contained.")
end
mI = eachmatch(matchEachItem, prog)
matchedList = map((x)->x.match, collect(mI))
groupNameList = map(processKeys, collect(mI))
zippedTokenList = collect(zip(matchedList, groupNameList))
print(zippedTokenList)
withoutSpaces = filter((x)-> x[2] != "sp", zippedTokenList)
initWrapped = ParserResult([], withoutSpaces)
res = initWrapped >> body
println(prettyStringLisp(res))
return res.matched
end
end

3
test/prog.tc Normal file
View file

@ -0,0 +1,3 @@
(int -> int)
addmulti2 = (x , y) => (x + y) * 2;
addmulti2(40,10)

5
test/prog1.tc Normal file
View file

@ -0,0 +1,5 @@
int a = 12;
int b = 12;
int a = 15;
int d = 20;
a + d((0 - a), b)