add parser and lexer
This commit is contained in:
parent
0dcd45f118
commit
e8c94c857d
6 changed files with 146 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -130,3 +130,6 @@ dmypy.json
|
||||||
|
|
||||||
#backup file
|
#backup file
|
||||||
*~
|
*~
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
launch.json
|
|
@ -1,3 +1,5 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#-*-coding:utf-8-*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from PyQt5.Qsci import QsciLexerCustom, QsciScintilla
|
from PyQt5.Qsci import QsciLexerCustom, QsciScintilla
|
||||||
|
@ -31,6 +33,8 @@ class ClochurLexer(QsciLexerCustom):
|
||||||
|
|
||||||
self.PRIMARY = ['define', 'let' , '#t', '#f', 'lambda', '@', 'cond', 'if', 'docu']
|
self.PRIMARY = ['define', 'let' , '#t', '#f', 'lambda', '@', 'cond', 'if', 'docu']
|
||||||
|
|
||||||
|
self.split_pattern = re.compile(r'(\s+|\\%|%|\\\[|\\\]|[[]|[]])')
|
||||||
|
|
||||||
font = QFont()
|
font = QFont()
|
||||||
font.setFamily(parent.font_family)
|
font.setFamily(parent.font_family)
|
||||||
font.setPointSize(parent.font_size)
|
font.setPointSize(parent.font_size)
|
||||||
|
@ -110,9 +114,7 @@ class ClochurLexer(QsciLexerCustom):
|
||||||
|
|
||||||
line_utf8 = line.decode('utf-8')
|
line_utf8 = line.decode('utf-8')
|
||||||
|
|
||||||
split_pattern = re.compile(r'(\s+|\\%|%|\\\[|\\\]|[[]|[]])')
|
line_utf8_splitted = self.split_pattern.split(line_utf8)
|
||||||
|
|
||||||
line_utf8_splitted = split_pattern.split(line_utf8)
|
|
||||||
|
|
||||||
line_utf8_splitted_len_pair = [{"str": item, "len" : len(bytearray(item, "utf-8"))} for item in line_utf8_splitted]
|
line_utf8_splitted_len_pair = [{"str": item, "len" : len(bytearray(item, "utf-8"))} for item in line_utf8_splitted]
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#-*-coding:utf-8-*-
|
||||||
|
|
||||||
from PyQt5.QtGui import *
|
from PyQt5.QtGui import *
|
||||||
from PyQt5.Qsci import QsciScintilla
|
from PyQt5.Qsci import QsciScintilla
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#-*-coding:utf-8-*-
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from PyQt5.QtWidgets import *
|
from PyQt5.QtWidgets import *
|
||||||
from PyQt5.Qsci import QsciScintilla
|
from PyQt5.Qsci import QsciScintilla
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#-*-coding:utf-8-*-
|
||||||
|
|
||||||
|
|
||||||
version_no = "0.0.1"
|
version_no = "0.0.1"
|
||||||
about_info = '''A S-expression-like typesetting language powered by SILE engine with a simple text text editor.
|
about_info = '''A S-expression-like typesetting language powered by SILE engine with a simple text text editor.
|
||||||
http://yoxem.github.com
|
http://yoxem.github.com
|
||||||
|
|
127
src/Interpreter/__init__.py
Normal file
127
src/Interpreter/__init__.py
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
#-*-coding:utf-8-*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Parser():
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
float_pattern =r"(?P<flo>[+-]?\d+[.]\d+)"
|
||||||
|
int_pattern =r"(?P<int>[+-]?\d+)"
|
||||||
|
symbol_pattern = r"(?P<sym>[_a-zA-Z][_0-9a-zA-Z]+)"
|
||||||
|
string_pattern = r"(?P<str>[\"]([^\"\\]|[\\][\"\\nt])+[\"])"
|
||||||
|
parenthesis_pattern = r"(?P<paren>[[]|[]])"
|
||||||
|
percent_pattern = r"(?P<percent>[%])"
|
||||||
|
space_pattern = r"(?P<space>[ \t]+)"
|
||||||
|
newline_pattern = r"(?P<nl>)\n"
|
||||||
|
inside_docu_pattern = r"(?P<other>([^%\[\]\n\s\\]|[\\][%\[\]]?)+)"
|
||||||
|
|
||||||
|
|
||||||
|
self.total_pattern = re.compile("|".join([float_pattern,int_pattern,symbol_pattern,string_pattern,parenthesis_pattern,
|
||||||
|
percent_pattern,inside_docu_pattern,space_pattern,newline_pattern]))
|
||||||
|
|
||||||
|
self.clc_sexp = None
|
||||||
|
self.tokenized = None
|
||||||
|
self.index = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_clc_sexp(self, clc):
|
||||||
|
self.clc_sexp = clc
|
||||||
|
self.tokenized = self.remove_comment(self.tokenize(self.clc_sexp))
|
||||||
|
print(self.parse_main(self.tokenized))
|
||||||
|
|
||||||
|
def tokenize(self, clc):
|
||||||
|
line_no = 1
|
||||||
|
column = 0
|
||||||
|
column_offset = 0
|
||||||
|
find_iterator = re.finditer(self.total_pattern, self.clc_sexp)
|
||||||
|
result = []
|
||||||
|
for i in find_iterator:
|
||||||
|
column = i.start() - column_offset
|
||||||
|
|
||||||
|
if i.group(0) == '\n':
|
||||||
|
item = {"token" : i.group(0), "line": line_no, "col" : column, "type": i.lastgroup}
|
||||||
|
line_no += 1
|
||||||
|
column_offset = i.end()
|
||||||
|
else:
|
||||||
|
item = {"token" : i.group(0), "line": line_no, "col" : column, "type": i.lastgroup}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
result.append(item)
|
||||||
|
|
||||||
|
[print(i["token"]) for i in result]
|
||||||
|
|
||||||
|
return result
|
||||||
|
def remove_comment(self, series):
|
||||||
|
result = []
|
||||||
|
is_comment_token = False
|
||||||
|
for i in series:
|
||||||
|
if i["token"] == "%":
|
||||||
|
is_comment_token = True
|
||||||
|
elif i["token"] == "\n":
|
||||||
|
if is_comment_token == True:
|
||||||
|
is_comment_token = False
|
||||||
|
else:
|
||||||
|
result.append(i)
|
||||||
|
elif is_comment_token == True:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
result.append(i)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def move_forward(self):
|
||||||
|
self.index += 1
|
||||||
|
|
||||||
|
def parse_main(self, series):
|
||||||
|
self.index = 0
|
||||||
|
|
||||||
|
processed_series = [{"token": "[", "line": None, "col": None, "type": None}] + series + \
|
||||||
|
[{"token": "]", "line": None, "col": None, "type": None}]
|
||||||
|
result = self.parse(processed_series)
|
||||||
|
|
||||||
|
if self.index < len(processed_series):
|
||||||
|
raise Exception("the parenthesis ] is not balanced.")
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
|
||||||
|
def atom(self, series):
|
||||||
|
result = series[self.index]
|
||||||
|
if result["type"] == "int":
|
||||||
|
result["token"] = int(result["token"])
|
||||||
|
elif result["type"] == "flo":
|
||||||
|
result["token"] = float(result["token"])
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
self.move_forward()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def parse(self, series):
|
||||||
|
result = None
|
||||||
|
if series[self.index]["token"] == "[":
|
||||||
|
result = []
|
||||||
|
self.move_forward()
|
||||||
|
try:
|
||||||
|
while series[self.index]["token"] != "]":
|
||||||
|
item = self.parse(series)
|
||||||
|
result.append(item)
|
||||||
|
|
||||||
|
self.move_forward()
|
||||||
|
|
||||||
|
return result
|
||||||
|
except IndexError:
|
||||||
|
raise Exception("the parenthesis [ is not balanced.")
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
result = self.atom(series)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
'''test'''
|
||||||
|
a = Parser()
|
||||||
|
text = '''[[[ 123 1.23 abc "\\123\\"喵喵"] 我是貓,喵\[喵\]貓\%。喵喵%喵
|
||||||
|
]]'''
|
||||||
|
|
||||||
|
a.get_clc_sexp(text)
|
Loading…
Reference in a new issue