#lang racket (require parser-tools/lex) (require parser-tools/lex-sre) (define-empty-tokens unary-tokens (L_PAREN R_PAREN NUM_CONVERTER SEPERATOR EOF)) (define-tokens value-tokens ( CHAR ID NUM NEWLINE)) (define-lex-abbrev digits (+ (char-set "0123456789"))) (define-lex-abbrev raw-id (: (or "_" (/ "a" "z") (/ "A" "Z")) (* (or "_" "-" "!" "?" (/ "0" "9") (/ "a" "z") (/ "A" "Z"))))) (define-lex-abbrev any-other-char (~ (char-set "0123456789@\\{}%|"))) (define uahgi2-lexer (lexer-src-pos ["\n" (token-NEWLINE lexeme)] ; newline [whitespace (token-CHAR lexeme)] ; whitespace [(: "\\"(char-set "@\\{}%|")) (token-CHAR (substring lexeme 1 (string-length lexeme)))] [(: "@" raw-id) (token-ID (string->symbol(substring lexeme 1 (string-length lexeme))))] ; @id ["{" (token-L_PAREN)] ; { ["}" (token-R_PAREN)] ; } ["|" (token-SEPERATOR)] ; | seperator ["`" (token-NUM_CONVERTER)] ; ` convert to number [digits (token-NUM (string->number lexeme))] ;123 [(: (? (char-set "-")) digits "." digits) ; 123.456 (token-NUM (string->number lexeme))] [ (: "%" (complement (: any-string "%" any-string)) "%") ; %COMMENT% (return-without-pos (uahgi2-lexer input-port))] [(eof) (token-EOF)] [any-other-char (token-CHAR lexeme)] ; any char into char )) (define (looping-lex str) (define in (open-input-string str)) (port-count-lines! in) (let loop ([v (uahgi2-lexer in)]) (cond [(void? (position-token-token v)) (loop (uahgi2-lexer in))] [(eq? 'EOF (position-token-token v)) '()] [else (cons v (loop (uahgi2-lexer in)))]))) (provide uahgi2-lexer unary-tokens value-tokens looping-lex)