356 lines
No EOL
14 KiB
OCaml
356 lines
No EOL
14 KiB
OCaml
open List;;
|
|
open StringLabels;;
|
|
|
|
type token = Tokenizer.token;;
|
|
|
|
|
|
type ast_tree =
|
|
| ASTFail
|
|
| Item of token
|
|
| Ls of (ast_tree list);;
|
|
|
|
let ast_example = Ls ([Item(Tokenizer.Token ("12", "INT"));Item(Tokenizer.Token ("+", "OP")); Item(Tokenizer.Token ("2", "INT")); ]);;
|
|
|
|
|
|
let not_empty_token token = match token with
|
|
| Tokenizer.Token( _ , token_type) -> match token_type with
|
|
| "SPACE" -> false
|
|
| "NL" -> false
|
|
| _ -> true;;
|
|
|
|
type parseoutput =
|
|
| Fail
|
|
| Success of ast_tree * token list;;
|
|
|
|
let consume1token ls =
|
|
match ls with
|
|
| [] -> Fail
|
|
| token :: token_rest -> Success ( Item(token), token_rest);;
|
|
|
|
|
|
let match_token_type token_type =
|
|
fun token_ls ->
|
|
let initial_result = consume1token token_ls in
|
|
match initial_result with
|
|
| Success (Item(Token(_ , type_name)) , rest) ->
|
|
if equal type_name token_type
|
|
then initial_result
|
|
else Fail
|
|
| Fail -> Fail
|
|
| _ -> Fail;;
|
|
|
|
let match_token_name_type token_name token_type =
|
|
fun token_ls ->
|
|
let initial_result = consume1token token_ls in
|
|
match initial_result with
|
|
| Success (Item(Token( nm , tp)) , rest) ->
|
|
if ((equal token_name nm) && (equal token_type tp))
|
|
then initial_result
|
|
else Fail
|
|
| Fail -> Fail
|
|
| _ -> Fail;;
|
|
|
|
|
|
let parseoutput_list2string str token =
|
|
str ^ (Tokenizer.token_to_string token);;
|
|
|
|
let rec ast2string ast_tree =
|
|
match ast_tree with
|
|
| ASTFail -> "ASTFail"
|
|
| Item(token) -> Tokenizer.token_to_string token
|
|
| Ls(ast) -> "Ls(" ^ (List.fold_left (fun str ast -> str ^ " " ^ (ast2string ast)) "" ast) ^ ")";;
|
|
|
|
let rec parseoutput2string input =
|
|
match input with
|
|
| Fail -> "Fail"
|
|
| Success(matched_ast, tkn_remained_ls) -> ast2string matched_ast ^
|
|
":::" ^ List.fold_left parseoutput_list2string "" tkn_remained_ls
|
|
|
|
|
|
let print_parseoutput input = print_string (parseoutput2string input);;
|
|
|
|
let (>>=) parseoutput parser_unit =
|
|
match parseoutput with
|
|
| Fail -> Fail
|
|
| Success(matched1 , remained1) ->
|
|
let result = parser_unit remained1 in
|
|
match result with
|
|
| Fail -> Fail
|
|
| Success (Ls([]) , remained2) ->
|
|
parseoutput
|
|
| Success (matched2 , remained2) ->
|
|
match matched1 with
|
|
| Ls(matched_list1) -> Success (Ls(append matched_list1 [matched2]), remained2)
|
|
| Item(matched_item1) -> Success (Ls(append [matched1] [matched2]), remained2)
|
|
| ASTFail -> Fail;;
|
|
|
|
let (||) parser_unit1 parser_unit2 =
|
|
fun parseoutput ->
|
|
let middle1 = parser_unit1 parseoutput in
|
|
match middle1 with
|
|
| Success (_ , _) -> middle1
|
|
| Fail ->
|
|
let middle2 = parser_unit2 parseoutput in
|
|
match middle2 with
|
|
| Fail -> Fail
|
|
| Success (_ , _) -> middle2;;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let rec ( >>=* ) input parser =
|
|
if input == Fail then
|
|
Fail
|
|
else
|
|
let middle0 = input >>= parser in
|
|
match middle0 with
|
|
| Success(Ls(_), remained_tokens) -> middle0 >>=* parser
|
|
| _ -> input
|
|
|
|
let rec correct_list ls =
|
|
match ls with
|
|
| Ls([lhs; Ls(op::rhs)]) -> Ls(op::lhs::[(correct_list(Ls(rhs)))])
|
|
| Ls([Item(Token(id, typ))]) -> Item(Token(id, typ))
|
|
| Ls([Ls(lst)]) -> (correct_list (Ls(lst)))
|
|
| _ -> ls
|
|
|
|
|
|
(*item2 = (expr) | int | flo | id *)
|
|
let rec item2 token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((fun i -> Success(Ls([]), i) >>= (match_token_name_type "(" "PAREN") >>= expr >>=(match_token_name_type ")" "PAREN"))
|
|
|| (match_token_type "FLO")
|
|
|| (match_token_type "INT")
|
|
|| (match_token_type "ID")) in
|
|
match result1 with
|
|
| Success(Ls([Ls([Item(Token("(", "PAREN")); x; Item(Token(")", "PAREN"))])]), remained) -> Success((correct_list x), remained)
|
|
| Success(ls, remained) -> Success((correct_list ls), remained)
|
|
| _ -> result1
|
|
|
|
(*args = typ1 arg1, typ2 arg2... *)
|
|
and args token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= (match_token_name_type "(" "PAREN") >>=* (fun i -> Success(Ls([]), i) >>= (match_token_type "ID") >>= (match_token_type "ID")
|
|
>>=*(fun i -> Success(Ls([]), i) >>= (match_token_type "COMMA")
|
|
>>= (match_token_type "ID") >>= (match_token_type "ID"))) >>= (match_token_name_type ")" "PAREN") in
|
|
match result1 with
|
|
| Success(Ls(left_paren::Ls(typ1::var1::other)::righ_paren), remained) ->
|
|
(*let _ = print_string "RESULT" in
|
|
let _ = print_string (ast2string (Ls(other))) in
|
|
let _ = print_string "\nEND_OF_RESULT\n" in *)
|
|
let remove_comma = fun ls -> match ls with Ls([l;typ;var]) -> Ls([typ;var]) | _ -> ls in
|
|
let other_removed_comma = List.map remove_comma other in
|
|
Success(Ls(Item(Token("%args", "ID"))::Ls([typ1;var1])::other_removed_comma), remained)
|
|
| Success(Ls(left_paren::righ_paren), remained) -> Success(Ls([Item(Token("%args", "ID"))]), remained)
|
|
| _ -> result1
|
|
|
|
(*item = item2 | lambda "(" args ")" {stmts} *)
|
|
and item token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((fun i -> Success(Ls([]), i) >>= (match_token_name_type "lambda" "ID") >>= args
|
|
>>= (match_token_name_type "{" "BRACE") >>= stmts >>= (match_token_name_type "}" "BRACE"))
|
|
|| fun i -> Success(Ls([]), i) >>= item2) in
|
|
match result1 with
|
|
| Success(Ls([Ls(Item(Token("lambda", "ID"))::args::l_brace::[Item(Token("}", "BRACE"))])]) , remained) ->
|
|
Success(Ls([Item(Token("lambda", "ID"));args;Ls([])]), remained)
|
|
| Success(Ls([Ls(Item(Token("lambda", "ID"))::args::l_brace::body::r_brace)]) , remained) ->
|
|
Success(Ls([Item(Token("lambda", "ID"));args;body]), remained)
|
|
| Success(Ls([Ls([Item(Token(x,y))])]), remained) -> Success(Item(Token(x,y)), remained)
|
|
| _ -> result1
|
|
|
|
|
|
and factor_more_callees token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= (match_token_name_type "(" "PAREN") >>= item >>=*
|
|
(fun i -> Success(Ls([]), i) >>= (match_token_type "COMMA") >>= item)
|
|
>>=(match_token_name_type ")" "PAREN") in
|
|
match result1 with
|
|
| Success(Ls(Item(Token("(", "PAREN"))::first_callee::rest_lst), y) ->
|
|
let lst_without_r_paren = filter (fun x ->
|
|
match x with
|
|
| Item(_) -> false
|
|
| _ -> true) rest_lst in
|
|
let remove_comma = fun ls -> match ls with Ls([Item(Token(",", "COMMA")); x]) -> x | _ -> ls in
|
|
let lst_removed_comma = List.map remove_comma lst_without_r_paren in
|
|
Success(Ls(Item(Token("%callee", "ID"))::first_callee::lst_removed_comma), y)
|
|
| _ -> result1
|
|
|
|
(*factor = item | item "(" morecollee ")" *)
|
|
and factor token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((fun i -> Success(Ls([]), i) >>= item >>= (match_token_name_type "(" "PAREN") >>= (match_token_name_type ")" "PAREN"))
|
|
|| (fun i -> Success(Ls([]), i) >>= item >>= factor_more_callees)
|
|
|| item) in
|
|
match result1 with
|
|
| Success(ASTFail, _) -> result1
|
|
| Fail -> Fail
|
|
| Success(Item _, _) -> result1
|
|
| Success(Ls(other), remained) ->
|
|
let result2 = Success((correct_list (Ls(other))), remained) in
|
|
match result2 with
|
|
| Success(Ls[caller; Item(Token("(", "PAREN")); Item(Token(")", "PAREN"))], remained) ->
|
|
Success(Ls[Item(Token("%apply", "ID")); caller], remained)
|
|
| Success(Ls[caller; Item(Token("(", "PAREN")); callee ; Item(Token(")", "PAREN"))], remained) ->
|
|
Success(Ls[Item(Token("%apply", "ID")); caller; callee], remained)
|
|
| Success(Ls(Item(Token("%callee", "ID"))::op::rest), remained) -> let l1 = Item(Token("%apply", "ID"))::op::rest in
|
|
let l2 = List.filter (fun x -> match x with Ls([]) -> false | _ -> true) l1 in
|
|
Success(Ls(l2), remained)
|
|
| _ -> result2
|
|
|
|
|
|
(*
|
|
(* ( */ factor) *)
|
|
let rec factor_rest token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((match_token_name_type "*" "OP") || (match_token_name_type "/" "OP")) >>= (match_token_type "INT") >>= term_rest in
|
|
match result1 with
|
|
| Success(Ls(_), remained_tokens) -> result1
|
|
| _ -> wrapper *)
|
|
|
|
|
|
|
|
(* ( */ factor) *)
|
|
and term_rest token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((match_token_name_type "*" "OP") || (match_token_name_type "/" "OP")) >>= factor >>= term_rest in
|
|
match result1 with
|
|
| Success(Ls(_), remained_tokens) -> result1
|
|
| _ -> wrapper
|
|
|
|
(*term = factor ( */ factor)* *)
|
|
and term token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= factor >>= term_rest in
|
|
match result1 with
|
|
| Success(Ls(x), remained) -> Success((correct_list (Ls(x))), remained)
|
|
| _ -> result1
|
|
|
|
(* (+- term) *)
|
|
and expr_rest token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((match_token_name_type "+" "OP") || (match_token_name_type "-" "OP")) >>= term >>= expr_rest in
|
|
match result1 with
|
|
| Success(Ls([Item(x) ; Ls(lists)]), remained_tokens) -> Success(Ls((Item(x)::lists)), remained_tokens)
|
|
| Success(Ls(_), remained_tokens) -> result1
|
|
| _ -> wrapper
|
|
|
|
(*expr = term (+- term)* *)
|
|
and expr token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= term >>= expr_rest in
|
|
match result1 with
|
|
| Success(Ls(x), remained) -> Success((correct_list (Ls(x))), remained)
|
|
| _ -> result1
|
|
|
|
|
|
(* type = id | ( type -> type ) *)
|
|
and type_ token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= ((fun i -> Success(Ls([]), i) >>= (match_token_name_type "(" "PAREN") >>= type_ >>= (match_token_name_type "->" "IMPLY") >>= type_ >>= (match_token_name_type ")" "PAREN"))
|
|
|| (match_token_type "ID")) in
|
|
match result1 with
|
|
(*| Success(Ls([Item(x) ; Ls(lists)]), remained_tokens) -> Success(Ls((Item(x)::lists)), remained_tokens)*)
|
|
| Success(Ls([Item(x)]), remained_tokens) -> Success(Item(x), remained_tokens)
|
|
| Success(Ls([Ls(l_paren::lhs::imply::rhs::r_paren)]), remained_tokens) -> Success(Ls(imply::lhs::[rhs]), remained_tokens)
|
|
| _ -> result1
|
|
|
|
(* var_def = type id expr ;*)
|
|
and var_def token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>= type_ >>= (match_token_type "ID") >>= (match_token_type "ASSIGN") >>= expr in
|
|
match result1 with
|
|
| Success(Ls(typ::var::assign::expr), remained_tokens) -> Success(Ls(Item(Token("%def", "ID"))::typ::var::expr), remained_tokens)
|
|
| _ -> wrapper
|
|
|
|
|
|
|
|
(* one_statement = var_def | expr ;*)
|
|
and one_statement token_list =
|
|
let token_list2 = List.filter (fun x -> match x with Tokenizer.Token(_, "SPACE") -> false | Tokenizer.Token(_, "NL") -> false | _ -> true) token_list in
|
|
let wrapper = Success(Ls([]), token_list2) in
|
|
let result1 = wrapper >>= ((fun i -> Success(Ls([]), i) >>= expr >>= (match_token_name_type ";" "SEMICO") )||(fun i -> Success(Ls([]), i) >>= var_def >>= (match_token_name_type ";" "SEMICO"))) in
|
|
match result1 with
|
|
| Success(Ls(lst), remained_tokens) -> let lst2 = (correct_list (Ls(lst))) in
|
|
let lst2_inner = match lst2 with
|
|
| Ls(lst2_inner) -> lst2_inner
|
|
| _ -> [lst2] in
|
|
let lst_remove_semicolon = List.filter (fun x -> match x with Item(Token(_, "SEMICO")) -> false | _ -> true) lst2_inner in
|
|
Success((correct_list (Ls(lst_remove_semicolon))), remained_tokens)
|
|
| _ -> result1
|
|
|
|
(* stmts = one_statement* *)
|
|
and stmts token_list =
|
|
let wrapper = Success(Ls([]), token_list) in
|
|
let result1 = wrapper >>=* one_statement in
|
|
match result1 with
|
|
| Success(Ls(_), remained_tokens) -> result1
|
|
| _ -> result1 ;;
|
|
|
|
(*examples
|
|
|
|
let ex_token_list = Tokenizer.total_parser "lambda(int y){12;};";;
|
|
|
|
List.iter Tokenizer.print_token ex_token_list;;
|
|
print_string "字串輸出結果";
|
|
|
|
print_parseoutput (one_statement ex_token_list);;
|
|
print_string "\n\n";;
|
|
|
|
|
|
|
|
let ex_token_list = Tokenizer.total_parser "(2);";;
|
|
|
|
(* List.iter Tokenizer.print_token ex_token_list;; *)
|
|
|
|
|
|
|
|
print_parseoutput (one_statement ex_token_list);;
|
|
print_string "\n\n";;
|
|
|
|
|
|
let ex_token_list = Tokenizer.total_parser "7/(5+6)*7;";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "(7/(10-6)*a);";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "a(b);";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "a();";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "a(b,c,a);";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
|
|
print_string "\n\n";;*)
|
|
(*let ex_token_list = Tokenizer.total_parser "(int -> int) a = 2+ 3;a + b;";;
|
|
print_parseoutput (stmts ex_token_list);;*)
|
|
(*print_string "\n\n";;
|
|
|
|
|
|
let ex_token_list = Tokenizer.total_parser "(int-> int) foo = lambda(int c){12;};foo(13);";;
|
|
print_string "ABACABRA";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "lambda(a){12;};";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "lambda(){};";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
print_string "\n\n";;
|
|
let ex_token_list = Tokenizer.total_parser "lambda(x){x;}(12);";;
|
|
print_parseoutput (stmts ex_token_list);;
|
|
|
|
*) |