From eea281bc38ab9a69cb71ac34be3b213e8f82b388 Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Mon, 1 Sep 2025 01:21:11 +0800 Subject: [PATCH] add pass 4, 5, 6 and let it can print basic assembly code --- README.md | 18 ++++ misc/vertexcoloring.jl | 59 +++++++++++++ passesInstrument.md | 7 +- src/TComp.jl | 182 +++++++++++++++++++++++++++++++++++++---- src/parser.jl | 2 - test/prog1.tc | 7 +- 6 files changed, 253 insertions(+), 22 deletions(-) create mode 100644 README.md create mode 100644 misc/vertexcoloring.jl diff --git a/README.md b/README.md new file mode 100644 index 0000000..08080c8 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# TComp +A practice of Essential of Complication in Julia + +## Dependencies + - julia + - [Match.jl](github.com/JuliaServices/Match.jl) + +## instruction +`./src/TComp.jl [.tc file]` + +the output assembly code is `./a.c` in AT&T assembly langauge. + +to make it executable, please use `gcc`: `gcc ./a.c -o output.out` + +the example `.tc` file is in `./test` + +## Known issues + - parser for a + b + c .. and a * b * c \ No newline at end of file diff --git a/misc/vertexcoloring.jl b/misc/vertexcoloring.jl new file mode 100644 index 0000000..dab4f04 --- /dev/null +++ b/misc/vertexcoloring.jl @@ -0,0 +1,59 @@ +graph = [['a', 'b'], ['b', 'c'], ['e', 'd'], ['e', 'a'], ['a', 'c'], ['b','e'], ['e','c']] + + +function vertexColoring(graph) + notDefined = -1 + + function getColor(v, color) + if !(v in keys(color)) + return -1 + else + return color[v] + end + end + + vertices = Set(vcat(graph...)) + verticesList = collect(vertices) + + verticesMapping = map(x -> [x, Set()], verticesList) + + adjacentNodes = Dict(verticesMapping) + + for link in graph + a = link[1] + b = link[2] + push!(adjacentNodes[a], b) + push!(adjacentNodes[b], a) + end + + sort!(verticesList, by=x -> length(adjacentNodes[x]), rev=true) + + color = Dict() + + println(verticesList) + + + for i in verticesList + i_adjacents = adjacentNodes[i] + println(i_adjacents) + i_adjacents_color_set = Set(map(x -> getColor(x, color), collect(i_adjacents))) + i_adjacents_color_list = sort(collect(i_adjacents_color_set)) + + if i_adjacents_color_list == [notDefined] + color[i] = 0 + else + tmpId = 0 + for i in i_adjacents_color_list + if tmpId == i + tmpId += 1 + end + end + color[i] = tmpId + end + end + + return color + +end + +println(vertexColoring(graph)) \ No newline at end of file diff --git a/passesInstrument.md b/passesInstrument.md index bcb035e..3c03136 100644 --- a/passesInstrument.md +++ b/passesInstrument.md @@ -7,7 +7,10 @@ to hold the results of complex subexpressions. 3. **OK** explicate_control makes the execution order of the program explicit. It converts the abstract syntax tree representation into a graph in which each node is a labeled sequence of statements and the edges are goto statements. - 4. select_instructions handles the difference between LVar operations and x86 + 4. **OK** select_instructions handles the difference between LVar operations and x86 instructions. This pass converts each LVar operation to a short sequence of instructions that accomplishes the same task. -assign_homes \ No newline at end of file +assign_homes + 5. **OK**assign homes (register allocation) + 6. **OK**patch instructions + 7. **OK**prelude & conclusion diff --git a/src/TComp.jl b/src/TComp.jl index f768797..e18faa1 100644 --- a/src/TComp.jl +++ b/src/TComp.jl @@ -13,9 +13,9 @@ inp = ARGS f = open(ARGS[1], "r") prog = read(f, String) -print(prog) +#(prog) parsed = Parser.totalParse(prog) -print(parsed) +#print(parsed) tmp_var_no = 0 @@ -93,7 +93,6 @@ function explicitControlRemoveComplex(prog) varNo = res[2] newBind = res[1].binds if newBind != [] - println("NEW_BIND:", newBind) newResList = vcat(newResList, newBind) push!(new_exp_args, last(newBind)[2][2]) else @@ -102,8 +101,7 @@ function explicitControlRemoveComplex(prog) end push!(new_exp_body, new_exp_args) - - println(newResList) + newBindVar = [("int", "id"), ("tmp" * string(varNo) , "id")] varNo += 1 newBind = [("%let", "id"), newBindVar, new_exp_body] @@ -118,10 +116,17 @@ function explicitControlRemoveComplex(prog) end function splitLet(binds, exp, varNo) + if exp[1] == ("%let", "id") res = rmComplexAux1(exp[3], varNo) + binds = vcat(binds, res[1].binds) new_exp = res[1].body + + #fix bug[("int", "id"), ("tmp1", "id")] => ("tmp1", "id") + if new_exp[1] == ("int", "id") + new_exp = new_exp[2] + end new_bind = [("%let", "id"), exp[2], new_exp] push!(binds, new_bind) @@ -135,7 +140,17 @@ function explicitControlRemoveComplex(prog) raw_res = rmComplex(prog)[1] - res = push!(raw_res.binds, raw_res.body) + raw_res_body = raw_res.body + #fix bug[("int", "id"), ("tmp1", "id")] => ("tmp1", "id") + if raw_res_body[1] == ("int", "id") + raw_res_body = [("%return", "id"), raw_res_body[2]] + end + + if raw_res_body[2] == "int" # ("$8", "int") + raw_res_body = [("%return", "id"), raw_res_body] + end + + res = push!(raw_res.binds, raw_res_body) return res end @@ -143,17 +158,30 @@ end function assignInstruction(inp) resList = [] for i in inp - println(i) @match i begin + [("%return", "id"), (val, t_val)] => begin + if t_val == "int" + val = "\$" * val + end + push!(resList, ["movq", val, "%rax"]) + end [("%let", "id"), [_ty, (id, "id")], - [("%prime", "id"), (op, _), [(rhs, _), (lhs, _)]]] => + [("%prime", "id"), (op, _), [(lhs, lhs_t), (rhs, rhs_t)]]] => begin instr = "" ops = ["+", "-", "*", "/"] - instrs = ["addq", "subq", "mulq", "divq"] + instrs = ["addq", "subq", "imulq", "divq"] opIndex = findfirst(x -> x == op, ops) instr = instrs[opIndex] + if lhs_t == "int" + lhs = "\$" * lhs + end + + if rhs_t == "int" + rhs = "\$" * rhs + end + if rhs == id line1 = [instr, lhs, id] push!(resList, line1) @@ -169,13 +197,19 @@ function assignInstruction(inp) #TODO [("%call", "id"), (op, _), args] => ... end - [("%let", "id"), [_ty, (id, "id")], (val, _)] => + [("%let", "id"), [_ty, (id, "id")], (val, t_val)] => begin + if t_val == "int" + val = "\$" * val + end line = ["movq", val, id] push!(resList, line) end - (c, "int") => push!(resList, [c]) + (c, "int") => begin + c_modified = "\$" * c + push!(resList, [c_modified]) + end (v, "id") => push!(resList, [v]) _ => println("Error") @@ -189,16 +223,134 @@ end emptyEnv = [] res = uniquifyVar(parsed, emptyEnv) -println("PASS1", res) +#println("PASS1", res) res2 = explicitControlRemoveComplex(res) -println("PASS2", Parser.prettyStringLisp(res2)) +#println("PASS2", Parser.prettyStringLisp(res2)) res3 = assignInstruction(res2) -println("PASS3", res3) +#println("PASS3", res3) + + +# PASS4 assign home +function assignHomes(inp) + varRegex = r"(^[^\$%].*)" + res = [] + vars = [] + for i in inp + orig = i[2] + dest = i[3] + if match(varRegex, orig) != nothing # i.e. orig is a var and not a reg. + if !(orig in vars) + push!(vars, orig) + end + end + if match(varRegex, dest) != nothing # i.e. dest is a var and not a reg. + if !(dest in vars) + push!(vars, dest) + end + end + end + #println("ALL_VAR", vars) + + + varsLength = length(vars) + for i in inp + instr = i[1] + orig = i[2] + dest = i[3] + + origIdx = findfirst(x -> x == orig,vars) + if origIdx != nothing + realAddressIdx = varsLength - origIdx + 1 + realAddress = "-$(realAddressIdx * 8)(%rbp)" + orig = realAddress + end + + destIdx = findfirst(x -> x == dest,vars) + if destIdx != nothing + realAddressIdx = varsLength - destIdx + 1 + realAddress = "-$(realAddressIdx * 8)(%rbp)" + dest = realAddress + end + + push!(res, [instr, orig, dest]) + end + return (res, varsLength) +end + + +# PASS5 patch instruction (ensure "instr x(rbp) y(rbp)" not happened) +function patchInstruction(inp) + memoryRegex = r".+[(]%rbp[)]$" + res = [] + + for i in inp + inst = i[1] + orig = i[2] + dest = i[3] + if (match(memoryRegex, orig) != nothing) & (match(memoryRegex, dest) != nothing) + cmd1 = ["movq", orig, "%rax"] + push!(res, cmd1) + + cmd2 = [inst, "%rax", dest] + push!(res, cmd2) + elseif (inst == "imulq") & (match(r"^%.+", dest) == nothing) + cmd1 = ["movq", dest, "%rax"] + cmd2 = ["imulq", orig, "%rax"] + cmd3 = ["movq", "%rax", dest] + push!(res, cmd1) + push!(res, cmd2) + push!(res, cmd3) + + else + push!(res, i) + end + end + return res +end + +res4 = assignHomes(res3) +res4_prog = res4[1] +varNumber = res4[2] +res5 = patchInstruction(res4_prog) +#println("PASS5",res5) + + +## PASS6 add prelude and conclude +function preludeConclude(prog, varNumber) + rspSubqMax = varNumber * 8 + + body = "start:\n" + + for i in prog + ln_cmd = "" + if length(i) == 3 + ln_cmd = "\t$(i[1])\t$(i[2]), $(i[3])\n" + body = body * ln_cmd + end + end + body *= "\tjmp\tconclusion\n\n\n" + + prelude = """ + .globl main + main: + pushq %rbp + movq %rsp, %rbp\n""" * "\tsubq \$$rspSubqMax, %rsp\n\tjmp start\n\n" + + conclude = """\nconclusion:\n""" * "\taddq \$$rspSubqMax, %rsp\n\tpopq %rbp\n\tretq" + + assemblyProg = prelude * body * conclude + return assemblyProg +end + +res6 = preludeConclude(res5, varNumber) +# println("PASS6",res6) # emit assembly code +f2 = open("./a.s", "w") +write(f2, res6) #write the assembly code close(f) - +close(f2) end # module diff --git a/src/parser.jl b/src/parser.jl index 608fc5f..33b948f 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -436,12 +436,10 @@ function totalParse(prog) matchedList = map((x)->x.match, collect(mI)) groupNameList = map(processKeys, collect(mI)) zippedTokenList = collect(zip(matchedList, groupNameList)) - print(zippedTokenList) withoutSpaces = filter((x)-> x[2] != "sp", zippedTokenList) initWrapped = ParserResult([], withoutSpaces) res = initWrapped >> body - println(prettyStringLisp(res)) return res.matched end diff --git a/test/prog1.tc b/test/prog1.tc index cbf2d92..53d5c51 100644 --- a/test/prog1.tc +++ b/test/prog1.tc @@ -1,5 +1,6 @@ -int a = 12; +int a = 10; +int a = 13; int b = (12 + (0 - a)); -int a = (15 + b); +int c = (14 + b); int d = 20; -a + d((0 - a), b) \ No newline at end of file +a * 2 + ((b - c) - d) \ No newline at end of file