2025-07-28 22:40:06 +08:00
|
|
|
module Parser
|
|
|
|
|
2025-08-05 21:55:40 +08:00
|
|
|
struct ParserResult
|
|
|
|
matched
|
|
|
|
remained
|
|
|
|
end
|
|
|
|
|
|
|
|
OptParserResult = Union{Nothing,ParserResult}
|
|
|
|
|
|
|
|
struct Psr
|
|
|
|
fun
|
|
|
|
end
|
|
|
|
|
|
|
|
function strng(c)
|
|
|
|
return Psr((x)-> length(x) >= 1 ?
|
|
|
|
(x[1][1] == c ?
|
2025-08-08 22:21:55 +08:00
|
|
|
ParserResult(x[1], x[2:end])
|
2025-08-05 21:55:40 +08:00
|
|
|
: nothing)
|
|
|
|
: nothing)
|
|
|
|
end
|
|
|
|
|
2025-08-06 19:40:41 +08:00
|
|
|
function typ(t)
|
2025-08-05 21:55:40 +08:00
|
|
|
return Psr((x)-> length(x) >= 1 ?
|
|
|
|
(x[1][2] == t ?
|
2025-08-08 22:21:55 +08:00
|
|
|
ParserResult(x[1], x[2:end])
|
2025-08-05 21:55:40 +08:00
|
|
|
: nothing)
|
|
|
|
: nothing)
|
|
|
|
end
|
|
|
|
|
|
|
|
(>>)(a::OptParserResult, b::Psr) = then(a, b)
|
|
|
|
|
|
|
|
function then(a, b)
|
|
|
|
if a == nothing
|
|
|
|
return a
|
|
|
|
else
|
|
|
|
return b.fun(a.remained)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
(|)(a::Psr, b::Psr) = choice(a, b)
|
|
|
|
|
|
|
|
function choice(a, b)
|
|
|
|
return Psr((x)-> (a.fun(x) == nothing ? b.fun(x) : a.fun(x)))
|
|
|
|
end
|
|
|
|
|
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
function many0(parser)
|
|
|
|
function many0Aux(s)
|
|
|
|
result = []
|
|
|
|
tmp = parser.fun(s)
|
|
|
|
while tmp != nothing
|
|
|
|
s = tmp.remained
|
|
|
|
result = push!(result, tmp.matched)
|
|
|
|
tmp = parser.fun(s)
|
|
|
|
end
|
|
|
|
return ParserResult(result, s)
|
|
|
|
end
|
|
|
|
|
|
|
|
return Psr(many0Aux)
|
|
|
|
|
|
|
|
end
|
2025-08-05 21:55:40 +08:00
|
|
|
|
|
|
|
function seq(parserLst)
|
|
|
|
function seqAux(s)
|
|
|
|
result = []
|
|
|
|
isNothing = false
|
|
|
|
tmp = nothing
|
|
|
|
|
|
|
|
for p in parserLst
|
|
|
|
tmp = p.fun(s)
|
|
|
|
if tmp == nothing
|
|
|
|
return nothing
|
|
|
|
else
|
2025-08-06 19:40:41 +08:00
|
|
|
s = tmp.remained
|
2025-08-08 22:21:55 +08:00
|
|
|
result = push!(result, tmp.matched)
|
2025-08-05 21:55:40 +08:00
|
|
|
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return ParserResult(result, s)
|
|
|
|
end
|
|
|
|
|
|
|
|
return Psr(seqAux)
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-07-28 22:40:06 +08:00
|
|
|
patternList = [("int", "\\d+"),
|
|
|
|
("id", "[_a-zA-Z][_0-9a-zA-Z]*"),
|
2025-08-08 22:21:55 +08:00
|
|
|
("lParen", "[\\(]"),
|
|
|
|
("rParen", "[\\)]"),
|
2025-07-28 22:40:06 +08:00
|
|
|
("plus", "[+]"),
|
2025-08-19 23:57:14 +08:00
|
|
|
("funType", "[-][\\>]"),
|
2025-07-28 22:40:06 +08:00
|
|
|
("minus", "[\\-]"),
|
|
|
|
("mul", "[\\*]"),
|
2025-08-08 22:21:55 +08:00
|
|
|
("div", "[\\/]"),
|
2025-08-19 23:57:14 +08:00
|
|
|
("sp", "[\\n\\r ]+"),
|
2025-07-28 22:40:06 +08:00
|
|
|
("comma", "[,]"),
|
2025-08-08 22:21:55 +08:00
|
|
|
("semicolon", "[;]"),
|
|
|
|
("lambda", "[=][\\>]"),
|
2025-07-28 22:40:06 +08:00
|
|
|
("assign", "[=]"),
|
|
|
|
]
|
|
|
|
|
|
|
|
function combineUnit(tup)
|
|
|
|
retString = "(?P<" * tup[1] * ">" * tup[2] * ")"
|
|
|
|
#retStringRegex = Regex(retString)
|
|
|
|
return retString
|
|
|
|
end
|
|
|
|
|
|
|
|
tmp = join(map(combineUnit, patternList), "|")
|
|
|
|
matchEachItem = Regex(tmp)
|
|
|
|
|
|
|
|
matchAll = "^(" * tmp * ")+\$"
|
|
|
|
matchEntirely = Regex(matchAll)
|
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
#println(matchEntirely)
|
2025-07-28 22:40:06 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
"""
|
|
|
|
((int -> int) -> int)
|
|
|
|
add = (x , y) => x + y;
|
2025-07-28 22:40:06 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
int a = 8;
|
|
|
|
12 + foo(13*a,14,15)
|
|
|
|
"""
|
2025-07-28 22:40:06 +08:00
|
|
|
|
|
|
|
|
2025-08-06 19:40:41 +08:00
|
|
|
function prettyString(ele)
|
|
|
|
if isa(ele, String)
|
|
|
|
return "\"" * ele * "\""
|
|
|
|
elseif isa(ele, Tuple)
|
|
|
|
mappedEle = map(prettyString, ele)
|
|
|
|
mappedString = "(" * join(mappedEle, ", ") * ")"
|
|
|
|
return mappedString
|
|
|
|
elseif isa(ele, Array)
|
|
|
|
mappedEle = map(prettyString, ele)
|
|
|
|
mappedString = "[" * join(mappedEle, ", ") * "]"
|
|
|
|
return mappedString
|
|
|
|
elseif isa(ele, ParserResult)
|
|
|
|
res = "ParserResult(" * prettyString(ele.matched) * "," * prettyString(ele.remained) * ")"
|
|
|
|
return res
|
|
|
|
else #number
|
|
|
|
return string(ele)
|
|
|
|
end
|
2025-08-19 23:57:14 +08:00
|
|
|
end
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
function prettyStringLisp(ele)
|
|
|
|
if isa(ele, String)
|
|
|
|
return ele
|
|
|
|
elseif isa(ele, Tuple)
|
2025-08-21 00:00:59 +08:00
|
|
|
res = string(prettyStringLisp(ele[1]))
|
2025-08-19 23:57:14 +08:00
|
|
|
return res
|
|
|
|
elseif isa(ele, Array)
|
|
|
|
mappedEle = map(prettyStringLisp, ele)
|
|
|
|
mappedString = "(" * join(mappedEle, " ") * ")"
|
|
|
|
return mappedString
|
|
|
|
elseif isa(ele, ParserResult)
|
|
|
|
res = prettyStringLisp(ele.matched)
|
|
|
|
return res
|
|
|
|
else #number
|
|
|
|
return string(ele)
|
|
|
|
end
|
2025-08-06 19:40:41 +08:00
|
|
|
end
|
2025-07-28 22:40:06 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
|
|
|
|
|
2025-07-28 22:40:06 +08:00
|
|
|
|
|
|
|
function processKeys(x)
|
|
|
|
keys_ = keys(x)
|
|
|
|
return filter((i) -> x[i] != nothing, keys_)[1]
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
#test1 = initWrapped >> strng("123") >> (strng("+")|strng("-"))
|
|
|
|
#test2 = initWrapped >> seq([strng("123"), strng("+")])
|
2025-08-05 21:55:40 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
#println(prettyString(test1))
|
|
|
|
#println(prettyString(test2))
|
2025-08-06 19:40:41 +08:00
|
|
|
|
|
|
|
"""
|
2025-08-08 22:21:55 +08:00
|
|
|
atom = int | id
|
2025-08-19 23:57:14 +08:00
|
|
|
func = "(" fn_args ")" "=>" body
|
|
|
|
unit = func | "(" exp ")" | atom
|
2025-08-08 22:21:55 +08:00
|
|
|
args = unit ("," unit)*
|
|
|
|
factor = unit "(" args ")"
|
|
|
|
term = (factor (*|/) factor) | factor
|
2025-08-06 19:40:41 +08:00
|
|
|
exp = (term (+|-) term) | term
|
2025-08-08 22:21:55 +08:00
|
|
|
|
|
|
|
letexp = ty id "=" exp ";" body
|
|
|
|
body = exp | letexp
|
2025-08-06 19:40:41 +08:00
|
|
|
"""
|
2025-08-08 22:21:55 +08:00
|
|
|
atom = typ("int") | typ("id")
|
|
|
|
|
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
|
|
|
|
function fnArgItemAux(input)
|
|
|
|
rawFunc = seq([typ("comma"), typ("id")])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = rawRes.matched[2]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
end
|
|
|
|
fnArgItem = Psr(fnArgItemAux)
|
|
|
|
|
|
|
|
function fnArgsAux(input)
|
|
|
|
rawFunc = seq([typ("id"), many0(fnArgItem)])
|
|
|
|
res = rawFunc.fun(input)
|
|
|
|
if res != nothing
|
|
|
|
matched = vcat([res.matched[1]], res.matched[2])
|
|
|
|
res = ParserResult(matched, res.remained)
|
|
|
|
return res
|
|
|
|
else
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
end
|
|
|
|
fnArgs = Psr(fnArgsAux)
|
|
|
|
|
|
|
|
function funcAux(input)
|
|
|
|
rawFunc = seq([typ("lParen"), fnArgs, typ("rParen"), typ("lambda"), body])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = [("%lambda", "id"), rawRes.matched[2], rawRes.matched[5]]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
function longUnitAux(input)
|
2025-08-19 23:57:14 +08:00
|
|
|
rawFunc = seq([typ("lParen"), exp, typ("rParen")])
|
2025-08-08 22:21:55 +08:00
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = rawRes.matched[2]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
function unitAux(input)
|
2025-08-19 23:57:14 +08:00
|
|
|
fun = Psr(funcAux)
|
2025-08-08 22:21:55 +08:00
|
|
|
longUnit = Psr(longUnitAux)
|
2025-08-19 23:57:14 +08:00
|
|
|
rawFunc = fun | longUnit | atom
|
2025-08-08 22:21:55 +08:00
|
|
|
res = rawFunc.fun(input)
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
unit = Psr(unitAux)
|
|
|
|
|
|
|
|
|
|
|
|
function argItemAux(input)
|
|
|
|
rawFunc = seq([typ("comma"), exp])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = rawRes.matched[2]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
argItem = Psr(argItemAux)
|
|
|
|
|
|
|
|
function argsAux(input)
|
|
|
|
rawFunc = seq([exp, many0(argItem)])
|
|
|
|
res = rawFunc.fun(input)
|
|
|
|
if res != nothing
|
|
|
|
matched = vcat([res.matched[1]], res.matched[2])
|
|
|
|
res = ParserResult(matched, res.remained)
|
|
|
|
end
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
args = Psr(argsAux)
|
|
|
|
|
|
|
|
|
|
|
|
function longFactorAux(input)
|
|
|
|
rawFunc = seq([unit, typ("lParen"), args, typ("rParen")])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = [("%call", "id"), rawRes.matched[1], rawRes.matched[3]]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function factorAux(input)
|
|
|
|
longFactor = Psr(longFactorAux)
|
|
|
|
rawFunc = longFactor | unit
|
|
|
|
res = rawFunc.fun(input)
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
|
|
|
|
factor = Psr(factorAux)
|
|
|
|
|
|
|
|
function longTermAux(input)
|
|
|
|
rawFunc = seq([factor, (typ("mul") | typ("div")), factor])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
2025-08-21 00:00:59 +08:00
|
|
|
matched = [("%prime", "id"), rawRes.matched[2], [rawRes.matched[1], rawRes.matched[3]]]
|
2025-08-08 22:21:55 +08:00
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function termAux(input)
|
|
|
|
longTerm = Psr(longTermAux)
|
|
|
|
rawFunc = longTerm | factor
|
|
|
|
res = rawFunc.fun(input)
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
|
|
|
|
term = Psr(termAux)
|
|
|
|
|
2025-08-06 19:40:41 +08:00
|
|
|
|
|
|
|
function longExpAux(input)
|
2025-08-08 22:21:55 +08:00
|
|
|
rawFunc = seq([term, (typ("plus") | typ("minus")), term])
|
2025-08-06 19:40:41 +08:00
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
2025-08-21 00:00:59 +08:00
|
|
|
matched = [("%prime", "id"), rawRes.matched[2], [rawRes.matched[1], rawRes.matched[3]]]
|
2025-08-06 19:40:41 +08:00
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-06 19:40:41 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function expAux(input)
|
|
|
|
longExp = Psr(longExpAux)
|
|
|
|
rawFunc = longExp | term
|
|
|
|
res = rawFunc.fun(input)
|
2025-08-08 22:21:55 +08:00
|
|
|
return res
|
|
|
|
end
|
|
|
|
exp = Psr(expAux)
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
|
|
|
|
"""tyOfArgs = "(" ty ("," ty)* ")"
|
2025-08-19 23:57:14 +08:00
|
|
|
tyHead = tyOfArgs | tyOfFn | id
|
|
|
|
tyOfFn = "(" tyHead -> ty ")"
|
|
|
|
ty = id | tyOfFn """
|
2025-08-08 22:21:55 +08:00
|
|
|
|
|
|
|
function tyArgItemAux(input)
|
2025-08-19 23:57:14 +08:00
|
|
|
rawFunc = seq([typ("comma"), ty])
|
2025-08-08 22:21:55 +08:00
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = rawRes.matched[2]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
function tyOfArgsAux(input)
|
|
|
|
tyArgItem = Psr(tyArgItemAux)
|
2025-08-19 23:57:14 +08:00
|
|
|
rawFunc = seq([typ("lParen"), ty, many0(tyArgItem), typ("rParen")])
|
2025-08-08 22:21:55 +08:00
|
|
|
res = rawFunc.fun(input)
|
|
|
|
if res != nothing
|
|
|
|
matched = vcat([("%argType")], vcat([res.matched[2]], res.matched[3]))
|
|
|
|
res = ParserResult(matched, res.remained)
|
2025-08-19 23:57:14 +08:00
|
|
|
return res
|
|
|
|
else
|
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
2025-08-19 23:57:14 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
function tyHeadAux(input)
|
|
|
|
tyOfArgs = Psr(tyOfArgsAux)
|
|
|
|
tyOfFn = Psr(tyOfFnAux)
|
|
|
|
rawFunc = tyOfArgs | tyOfFn | typ("id")
|
|
|
|
res = rawFunc.fun(input)
|
2025-08-06 19:40:41 +08:00
|
|
|
return res
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
function tyOfFnAux(input)
|
2025-08-19 23:57:14 +08:00
|
|
|
tyHead = Psr(tyHeadAux)
|
|
|
|
rawFunc = seq([typ("lParen"), tyHead, typ("funType"), ty, typ("rParen")])
|
2025-08-08 22:21:55 +08:00
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
matched = [("%funType", "id"), rawRes.matched[2], rawRes.matched[4]]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
2025-08-06 19:40:41 +08:00
|
|
|
end
|
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
|
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
function tyAux(input)
|
2025-08-19 23:57:14 +08:00
|
|
|
tyOfFn= Psr(tyOfFnAux)
|
|
|
|
rawFunc = tyOfFn | typ("id")
|
2025-08-08 22:21:55 +08:00
|
|
|
res = rawFunc.fun(input)
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
ty = Psr(tyAux)
|
|
|
|
|
|
|
|
function letExpAux(input)
|
|
|
|
#id id "=" exp ";" body
|
|
|
|
rawFunc = seq([ty, typ("id"), typ("assign"), exp, typ("semicolon"), body])
|
|
|
|
rawRes = rawFunc.fun(input)
|
|
|
|
if rawRes != nothing
|
|
|
|
typ_matched = rawRes.matched[1]
|
|
|
|
var_matched = rawRes.matched[2]
|
|
|
|
val_matched = rawRes.matched[4]
|
|
|
|
body_matched = rawRes.matched[6]
|
|
|
|
|
|
|
|
matched = [("%let", "id"), [typ_matched, var_matched], val_matched, body_matched]
|
|
|
|
res = ParserResult(matched, rawRes.remained)
|
|
|
|
return res
|
|
|
|
else
|
2025-08-19 23:57:14 +08:00
|
|
|
return nothing
|
2025-08-08 22:21:55 +08:00
|
|
|
end
|
|
|
|
end
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
letExp = Psr(letExpAux)
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-08 22:21:55 +08:00
|
|
|
body = letExp | exp
|
2025-08-06 19:40:41 +08:00
|
|
|
|
2025-08-19 23:57:14 +08:00
|
|
|
function totalParse(prog)
|
|
|
|
isEntirelyMatched = match(matchEntirely, prog)
|
|
|
|
if isEntirelyMatched == false
|
|
|
|
throw("illegal tokens contained.")
|
|
|
|
end
|
|
|
|
|
|
|
|
mI = eachmatch(matchEachItem, prog)
|
|
|
|
matchedList = map((x)->x.match, collect(mI))
|
|
|
|
groupNameList = map(processKeys, collect(mI))
|
|
|
|
zippedTokenList = collect(zip(matchedList, groupNameList))
|
|
|
|
|
|
|
|
withoutSpaces = filter((x)-> x[2] != "sp", zippedTokenList)
|
|
|
|
initWrapped = ParserResult([], withoutSpaces)
|
|
|
|
res = initWrapped >> body
|
|
|
|
return res.matched
|
|
|
|
end
|
2025-08-05 21:55:40 +08:00
|
|
|
|
|
|
|
|
2025-07-28 22:40:06 +08:00
|
|
|
end
|
|
|
|
|