diff --git a/uahgi-ng/a.txt b/uahgi-ng/a.txt new file mode 100644 index 0000000..095de78 --- /dev/null +++ b/uahgi-ng/a.txt @@ -0,0 +1,679 @@ +0 g1038660 = (g1038661) +1 g1038661 = (u-program EOF) +2 u-program = () +3 u-program = (u-expr u-program) +4 u-expr = (u-series) +5 u-expr = (u-atom) +6 u-expr = (u-converting-num) +7 u-expr = (L_PAREN u-sexps R_PAREN) +8 u-sexps = () +9 u-sexps = (u-expr u-sexps) +10 u-series-unit = (ID) +11 u-series-unit = (u-string) +12 u-series-unit = (NUM) +13 u-series-unit = (L_PAREN u-sexps R_PAREN) +14 u-series-unit = (newlines) +15 u-series = () +16 u-series = (u-series-unit u-series) +17 u-converting-num = (NUM_CONVERTER ID) +18 u-converting-num = (NUM_CONVERTER NUM) +19 u-atom = (ID) +20 u-atom = (u-string) +21 u-atom = (NUM) +22 u-atom = (newlines) +23 u-string = (CHAR) +24 u-string = (u-string CHAR) +25 newlines = (NEWLINE) +26 newlines = (NEWLINE newlines) +State 0 + g1038660 -> . g1038661 + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict + g1038661 goto 1 + u-atom goto 7 +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict + u-converting-num goto 6 +begin conflict: + EOF reduce 15 + EOF reduce 2 +end conflict + u-string goto 8 + u-program goto 2 + u-series-unit goto 4 + u-expr goto 3 + newlines goto 9 + u-series goto 5 +begin conflict: + NUM shift 14 + NUM reduce 15 +end conflict +begin conflict: + ID shift 13 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 10 + L_PAREN reduce 15 +end conflict +begin conflict: + NUM_CONVERTER shift 11 + NUM_CONVERTER reduce 15 +end conflict + +State 1 + g1038660 -> g1038661 . + + +State 2 + g1038661 -> u-program . EOF + + EOF accept + +State 3 + u-program -> u-expr . u-program + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict + u-atom goto 7 +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict + u-converting-num goto 6 +begin conflict: + EOF reduce 15 + EOF reduce 2 +end conflict + u-string goto 8 + u-program goto 17 + u-series-unit goto 4 + u-expr goto 3 + newlines goto 9 + u-series goto 5 +begin conflict: + NUM shift 14 + NUM reduce 15 +end conflict +begin conflict: + ID shift 13 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 10 + L_PAREN reduce 15 +end conflict +begin conflict: + NUM_CONVERTER shift 11 + NUM_CONVERTER reduce 15 +end conflict + +State 4 + u-series -> u-series-unit . u-series + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict + R_PAREN reduce 15 + EOF reduce 15 + u-string goto 19 + u-series-unit goto 4 + newlines goto 20 + u-series goto 18 +begin conflict: + NUM shift 23 + NUM reduce 15 +end conflict +begin conflict: + ID shift 22 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 21 + L_PAREN reduce 15 +end conflict + NUM_CONVERTER reduce 15 + +State 5 + u-expr -> u-series . + + EOF reduce 4 + NEWLINE reduce 4 + CHAR reduce 4 + NUM reduce 4 + R_PAREN reduce 4 + L_PAREN reduce 4 + ID reduce 4 + NUM_CONVERTER reduce 4 + +State 6 + u-expr -> u-converting-num . + + EOF reduce 6 + NEWLINE reduce 6 + CHAR reduce 6 + NUM reduce 6 + R_PAREN reduce 6 + L_PAREN reduce 6 + ID reduce 6 + NUM_CONVERTER reduce 6 + +State 7 + u-expr -> u-atom . + + EOF reduce 5 + NEWLINE reduce 5 + CHAR reduce 5 + NUM reduce 5 + R_PAREN reduce 5 + L_PAREN reduce 5 + ID reduce 5 + NUM_CONVERTER reduce 5 + +State 8 + u-series-unit -> u-string . + u-atom -> u-string . + u-string -> u-string . CHAR + +begin conflict: + EOF reduce 11 + EOF reduce 20 +end conflict +begin conflict: + NEWLINE reduce 11 + NEWLINE reduce 20 +end conflict +begin conflict: + CHAR shift 24 + CHAR reduce 11 + CHAR reduce 20 +end conflict +begin conflict: + NUM reduce 11 + NUM reduce 20 +end conflict +begin conflict: + R_PAREN reduce 11 + R_PAREN reduce 20 +end conflict +begin conflict: + L_PAREN reduce 11 + L_PAREN reduce 20 +end conflict +begin conflict: + ID reduce 11 + ID reduce 20 +end conflict +begin conflict: + NUM_CONVERTER reduce 11 + NUM_CONVERTER reduce 20 +end conflict + +State 9 + u-series-unit -> newlines . + u-atom -> newlines . + +begin conflict: + EOF reduce 14 + EOF reduce 22 +end conflict +begin conflict: + NEWLINE reduce 14 + NEWLINE reduce 22 +end conflict +begin conflict: + CHAR reduce 14 + CHAR reduce 22 +end conflict +begin conflict: + NUM reduce 14 + NUM reduce 22 +end conflict +begin conflict: + R_PAREN reduce 14 + R_PAREN reduce 22 +end conflict +begin conflict: + L_PAREN reduce 14 + L_PAREN reduce 22 +end conflict +begin conflict: + ID reduce 14 + ID reduce 22 +end conflict +begin conflict: + NUM_CONVERTER reduce 14 + NUM_CONVERTER reduce 22 +end conflict + +State 10 + u-expr -> L_PAREN . u-sexps R_PAREN + u-series-unit -> L_PAREN . u-sexps R_PAREN + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict + u-atom goto 7 +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict +begin conflict: + R_PAREN reduce 15 + R_PAREN reduce 8 +end conflict + u-sexps goto 26 + u-converting-num goto 6 + u-string goto 8 + u-series-unit goto 4 + u-expr goto 25 + newlines goto 9 + u-series goto 5 +begin conflict: + NUM shift 14 + NUM reduce 15 +end conflict +begin conflict: + ID shift 13 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 10 + L_PAREN reduce 15 +end conflict +begin conflict: + NUM_CONVERTER shift 11 + NUM_CONVERTER reduce 15 +end conflict + +State 11 + u-converting-num -> NUM_CONVERTER . ID + u-converting-num -> NUM_CONVERTER . NUM + + NUM shift 28 + ID shift 27 + +State 12 + u-string -> CHAR . + + EOF reduce 23 + NEWLINE reduce 23 + CHAR reduce 23 + NUM reduce 23 + R_PAREN reduce 23 + L_PAREN reduce 23 + ID reduce 23 + NUM_CONVERTER reduce 23 + +State 13 + u-series-unit -> ID . + u-atom -> ID . + +begin conflict: + EOF reduce 10 + EOF reduce 19 +end conflict +begin conflict: + NEWLINE reduce 10 + NEWLINE reduce 19 +end conflict +begin conflict: + CHAR reduce 10 + CHAR reduce 19 +end conflict +begin conflict: + NUM reduce 10 + NUM reduce 19 +end conflict +begin conflict: + R_PAREN reduce 10 + R_PAREN reduce 19 +end conflict +begin conflict: + L_PAREN reduce 10 + L_PAREN reduce 19 +end conflict +begin conflict: + ID reduce 10 + ID reduce 19 +end conflict +begin conflict: + NUM_CONVERTER reduce 10 + NUM_CONVERTER reduce 19 +end conflict + +State 14 + u-series-unit -> NUM . + u-atom -> NUM . + +begin conflict: + EOF reduce 12 + EOF reduce 21 +end conflict +begin conflict: + NEWLINE reduce 12 + NEWLINE reduce 21 +end conflict +begin conflict: + CHAR reduce 12 + CHAR reduce 21 +end conflict +begin conflict: + NUM reduce 12 + NUM reduce 21 +end conflict +begin conflict: + R_PAREN reduce 12 + R_PAREN reduce 21 +end conflict +begin conflict: + L_PAREN reduce 12 + L_PAREN reduce 21 +end conflict +begin conflict: + ID reduce 12 + ID reduce 21 +end conflict +begin conflict: + NUM_CONVERTER reduce 12 + NUM_CONVERTER reduce 21 +end conflict + +State 15 + newlines -> NEWLINE . + newlines -> NEWLINE . newlines + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 25 +end conflict + CHAR reduce 25 + R_PAREN reduce 25 + EOF reduce 25 + NUM reduce 25 + newlines goto 29 + ID reduce 25 + L_PAREN reduce 25 + NUM_CONVERTER reduce 25 + +State 16 + g1038661 -> u-program EOF . + + +State 17 + u-program -> u-expr u-program . + + EOF reduce 3 + +State 18 + u-series -> u-series-unit u-series . + + EOF reduce 16 + NEWLINE reduce 16 + CHAR reduce 16 + NUM reduce 16 + R_PAREN reduce 16 + L_PAREN reduce 16 + ID reduce 16 + NUM_CONVERTER reduce 16 + +State 19 + u-series-unit -> u-string . + u-string -> u-string . CHAR + + EOF reduce 11 + NEWLINE reduce 11 +begin conflict: + CHAR shift 24 + CHAR reduce 11 +end conflict + NUM reduce 11 + R_PAREN reduce 11 + L_PAREN reduce 11 + ID reduce 11 + NUM_CONVERTER reduce 11 + +State 20 + u-series-unit -> newlines . + + EOF reduce 14 + NEWLINE reduce 14 + CHAR reduce 14 + NUM reduce 14 + R_PAREN reduce 14 + L_PAREN reduce 14 + ID reduce 14 + NUM_CONVERTER reduce 14 + +State 21 + u-series-unit -> L_PAREN . u-sexps R_PAREN + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict + u-atom goto 7 +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict +begin conflict: + R_PAREN reduce 15 + R_PAREN reduce 8 +end conflict + u-sexps goto 30 + u-converting-num goto 6 + u-string goto 8 + u-series-unit goto 4 + u-expr goto 25 + newlines goto 9 + u-series goto 5 +begin conflict: + NUM shift 14 + NUM reduce 15 +end conflict +begin conflict: + ID shift 13 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 10 + L_PAREN reduce 15 +end conflict +begin conflict: + NUM_CONVERTER shift 11 + NUM_CONVERTER reduce 15 +end conflict + +State 22 + u-series-unit -> ID . + + EOF reduce 10 + NEWLINE reduce 10 + CHAR reduce 10 + NUM reduce 10 + R_PAREN reduce 10 + L_PAREN reduce 10 + ID reduce 10 + NUM_CONVERTER reduce 10 + +State 23 + u-series-unit -> NUM . + + EOF reduce 12 + NEWLINE reduce 12 + CHAR reduce 12 + NUM reduce 12 + R_PAREN reduce 12 + L_PAREN reduce 12 + ID reduce 12 + NUM_CONVERTER reduce 12 + +State 24 + u-string -> u-string CHAR . + + EOF reduce 24 + NEWLINE reduce 24 + CHAR reduce 24 + NUM reduce 24 + R_PAREN reduce 24 + L_PAREN reduce 24 + ID reduce 24 + NUM_CONVERTER reduce 24 + +State 25 + u-sexps -> u-expr . u-sexps + +begin conflict: + NEWLINE shift 15 + NEWLINE reduce 15 +end conflict + u-atom goto 7 +begin conflict: + CHAR shift 12 + CHAR reduce 15 +end conflict +begin conflict: + R_PAREN reduce 15 + R_PAREN reduce 8 +end conflict + u-sexps goto 31 + u-converting-num goto 6 + u-string goto 8 + u-series-unit goto 4 + u-expr goto 25 + newlines goto 9 + u-series goto 5 +begin conflict: + NUM shift 14 + NUM reduce 15 +end conflict +begin conflict: + ID shift 13 + ID reduce 15 +end conflict +begin conflict: + L_PAREN shift 10 + L_PAREN reduce 15 +end conflict +begin conflict: + NUM_CONVERTER shift 11 + NUM_CONVERTER reduce 15 +end conflict + +State 26 + u-expr -> L_PAREN u-sexps . R_PAREN + u-series-unit -> L_PAREN u-sexps . R_PAREN + + R_PAREN shift 32 + +State 27 + u-converting-num -> NUM_CONVERTER ID . + + EOF reduce 17 + NEWLINE reduce 17 + CHAR reduce 17 + NUM reduce 17 + R_PAREN reduce 17 + L_PAREN reduce 17 + ID reduce 17 + NUM_CONVERTER reduce 17 + +State 28 + u-converting-num -> NUM_CONVERTER NUM . + + EOF reduce 18 + NEWLINE reduce 18 + CHAR reduce 18 + NUM reduce 18 + R_PAREN reduce 18 + L_PAREN reduce 18 + ID reduce 18 + NUM_CONVERTER reduce 18 + +State 29 + newlines -> NEWLINE newlines . + + EOF reduce 26 + NEWLINE reduce 26 + CHAR reduce 26 + NUM reduce 26 + R_PAREN reduce 26 + L_PAREN reduce 26 + ID reduce 26 + NUM_CONVERTER reduce 26 + +State 30 + u-series-unit -> L_PAREN u-sexps . R_PAREN + + R_PAREN shift 33 + +State 31 + u-sexps -> u-expr u-sexps . + + R_PAREN reduce 9 + +State 32 + u-expr -> L_PAREN u-sexps R_PAREN . + u-series-unit -> L_PAREN u-sexps R_PAREN . + +begin conflict: + EOF reduce 7 + EOF reduce 13 +end conflict +begin conflict: + NEWLINE reduce 7 + NEWLINE reduce 13 +end conflict +begin conflict: + CHAR reduce 7 + CHAR reduce 13 +end conflict +begin conflict: + NUM reduce 7 + NUM reduce 13 +end conflict +begin conflict: + R_PAREN reduce 7 + R_PAREN reduce 13 +end conflict +begin conflict: + L_PAREN reduce 7 + L_PAREN reduce 13 +end conflict +begin conflict: + ID reduce 7 + ID reduce 13 +end conflict +begin conflict: + NUM_CONVERTER reduce 7 + NUM_CONVERTER reduce 13 +end conflict + +State 33 + u-series-unit -> L_PAREN u-sexps R_PAREN . + + EOF reduce 13 + NEWLINE reduce 13 + CHAR reduce 13 + NUM reduce 13 + R_PAREN reduce 13 + L_PAREN reduce 13 + ID reduce 13 + NUM_CONVERTER reduce 13 + +38 shift/reduce conflicts +45 reduce/reduce conflicts diff --git a/uahgi-ng/expander.rkt b/uahgi-ng/expander.rkt new file mode 100644 index 0000000..25a7b19 --- /dev/null +++ b/uahgi-ng/expander.rkt @@ -0,0 +1,3 @@ +(module uahgi-ng racket/base + (provide lambda quote add1 begin #%module-begin #%app #%datum #%expression #%top) + ) diff --git a/uahgi-ng/lexer.rkt b/uahgi-ng/lexer.rkt new file mode 100644 index 0000000..5cf2bce --- /dev/null +++ b/uahgi-ng/lexer.rkt @@ -0,0 +1,48 @@ +#lang racket +(require parser-tools/lex) +(require parser-tools/lex-sre) + + + +(define-empty-tokens unary-tokens (L_PAREN R_PAREN NUM_CONVERTER SEPERATOR EOF)) +(define-tokens value-tokens ( CHAR ID NUM NEWLINE)) + + +(define-lex-abbrev digits (+ (char-set "0123456789"))) +(define-lex-abbrev raw-id (: (or "_" (/ "a" "z") (/ "A" "Z")) (* (or "_" (/ "0" "9") (/ "a" "z") (/ "A" "Z"))))) +(define-lex-abbrev any-other-char (~ (char-set "0123456789@\\{}%|"))) + +(define uahgi2-lexer + (lexer-src-pos + ["\n" (token-NEWLINE lexeme)] ; newline + [whitespace (token-CHAR lexeme)] ; whitespace + [(: "\\"(char-set "@\\{}%|")) (token-CHAR (substring lexeme + 1 (string-length lexeme)))] + [(: "@" raw-id) (token-ID (string->symbol(substring lexeme + 1 (string-length lexeme))))] ; @id + ["{" (token-L_PAREN)] ; { + ["}" (token-R_PAREN)] ; } + ["|" (token-SEPERATOR)] ; | seperator + ["`" (token-NUM_CONVERTER)] ; ` convert to number + [digits (token-NUM (string->number lexeme))] ;123 + [(: (? (char-set "-")) digits "." digits) ; 123.456 + (token-NUM (string->number lexeme))] + [ (: "%" (complement (: any-string "%" any-string)) "%") ; %COMMENT% + (return-without-pos (uahgi2-lexer input-port))] + [(eof) (token-EOF)] + [any-other-char (token-CHAR lexeme)] ; any char into char +)) + + + + +(define (looping-lex str) + (define in (open-input-string str)) + (port-count-lines! in) + (let loop ([v (uahgi2-lexer in)]) + (cond [(void? (position-token-token v)) (loop (uahgi2-lexer in))] + [(eq? 'EOF (position-token-token v)) '()] + [else (cons v (loop (uahgi2-lexer in)))]))) + + +(provide uahgi2-lexer unary-tokens value-tokens looping-lex) \ No newline at end of file diff --git a/uahgi-ng/main.rkt b/uahgi-ng/main.rkt new file mode 100644 index 0000000..da73f2d --- /dev/null +++ b/uahgi-ng/main.rkt @@ -0,0 +1,12 @@ +#lang racket/base +(require "parser.rkt" "tokenizer.rkt") +(require syntax/strip-context) + +(define (read-syntax path port) + (define parse-tree (parse-tkns (make-tokenizer port path))) + (strip-context + #`(module uahgi-ng-mod uahgi-ng/expander + #,parse-tree))) + +(module+ reader + (provide read-syntax)) \ No newline at end of file diff --git a/uahgi-ng/parser.rkt b/uahgi-ng/parser.rkt new file mode 100644 index 0000000..d9d3e1a --- /dev/null +++ b/uahgi-ng/parser.rkt @@ -0,0 +1,86 @@ +#lang racket +(require megaparsack megaparsack/text) +(require uahgi-ng/lexer) +(require megaparsack/parser-tools/lex) +(require data/monad) +(require data/applicative) + +(define newline/p (token/p 'NEWLINE)) +(define num/p (do + [num <- (token/p 'NUM)] + (pure (number->string num)))) +(define char/p (token/p 'CHAR)) +(define id/p (token/p 'ID)) +(define num-converter/p (token/p 'NUM_CONVERTER)) + +(define string/p + (do + [chars <- (many+/p char/p)] + (pure (foldr string-append "" chars)))) + +(define atom/p (or/p + num/p + string/p + id/p)) + +(define converting-num/p + (do + (token/p 'NUM_CONVERTER) + [num <- num/p] + (pure (string->number num)))) + + +(define sexp-elem/p + (delay/p (or/p expr/p + series/p))) + + +(define sexp/p + (do + (token/p 'L_PAREN) + [args <- (many/p sexp-elem/p #:sep (token/p 'SEPERATOR))] + (token/p 'R_PAREN) + (pure (list* args)))) + +(define expr/p + (or/p atom/p + converting-num/p + sexp/p)) + + + + + + +(define series-elem/p + (or/p newline/p + num/p + string/p + id/p + sexp/p)) + +(define series/p + (do [args <- (many/p series-elem/p #:min 2)] + (pure (list 'text-series args)))) + + + + + + + +(define prog/p + (syntax/p + (do + (many/p (token/p 'NEWLINE)) + [lines <- (many/p expr/p #:sep (many/p(token/p 'NEWLINE)))] + (many/p (token/p 'NEWLINE)) + (pure `(begin ,@lines))))) + +(define (parse-tkns tokens) + (parse-result! (parse-tokens prog/p tokens))) + +(define (parse-string str) + (parse-tkns (looping-lex str))) + +(provide parse-tkns parse-string) diff --git a/uahgi-ng/parser.txt b/uahgi-ng/parser.txt new file mode 100644 index 0000000..1d76f16 --- /dev/null +++ b/uahgi-ng/parser.txt @@ -0,0 +1,16 @@ +#lang brag +u-program: [/NEWLINE+] (u-expr [/NEWLINE+])* +u-expr: + ; u-series ; string series + | u-atom + | u-converting-num + | u-sexp +u-sexp: + /L_PAREN u-expr (/SEPERATOR u-expr)* /R_PAREN +u-series: (u-atom | u-sexp)+ +~~~u-converting-num: /NUM_CONVERTER (u-id | u-number) +~~~u-atom: u-id | u-string | u-number | (NEWLINE+) +~~~u-number: INTEGER | DECIMAL +u-id: ID +@u-string: u-char+ +@u-char: CHAR | " " diff --git a/uahgi-ng/test.ug b/uahgi-ng/test.ug new file mode 100644 index 0000000..e078251 --- /dev/null +++ b/uahgi-ng/test.ug @@ -0,0 +1,2 @@ +#lang uahgi-ng +{@add1|`3} \ No newline at end of file diff --git a/uahgi-ng/tokenizer.rkt b/uahgi-ng/tokenizer.rkt new file mode 100644 index 0000000..f7e1988 --- /dev/null +++ b/uahgi-ng/tokenizer.rkt @@ -0,0 +1,9 @@ +#lang racket/base +(require "lexer.rkt" parser-tools/lex racket/port) + +(define (make-tokenizer ip [path #f]) + (port-count-lines! ip) + (file-path path) + (looping-lex (port->string ip))) + +(provide make-tokenizer) \ No newline at end of file