diff --git a/.gitignore b/.gitignore index 9aba52d..8676b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,7 @@ dmypy.json .pyre/ #backup file -*~ \ No newline at end of file +*~ + +# VS Code +launch.json \ No newline at end of file diff --git a/src/Editor/ClochurLexer.py b/src/Editor/ClochurLexer.py index 1f2374c..b70831c 100644 --- a/src/Editor/ClochurLexer.py +++ b/src/Editor/ClochurLexer.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 +#-*-coding:utf-8-*- import re from PyQt5.Qsci import QsciLexerCustom, QsciScintilla @@ -31,6 +33,8 @@ class ClochurLexer(QsciLexerCustom): self.PRIMARY = ['define', 'let' , '#t', '#f', 'lambda', '@', 'cond', 'if', 'docu'] + self.split_pattern = re.compile(r'(\s+|\\%|%|\\\[|\\\]|[[]|[]])') + font = QFont() font.setFamily(parent.font_family) font.setPointSize(parent.font_size) @@ -110,9 +114,7 @@ class ClochurLexer(QsciLexerCustom): line_utf8 = line.decode('utf-8') - split_pattern = re.compile(r'(\s+|\\%|%|\\\[|\\\]|[[]|[]])') - - line_utf8_splitted = split_pattern.split(line_utf8) + line_utf8_splitted = self.split_pattern.split(line_utf8) line_utf8_splitted_len_pair = [{"str": item, "len" : len(bytearray(item, "utf-8"))} for item in line_utf8_splitted] diff --git a/src/Editor/CustomQsciEditor.py b/src/Editor/CustomQsciEditor.py index 4b90732..c83bbb2 100644 --- a/src/Editor/CustomQsciEditor.py +++ b/src/Editor/CustomQsciEditor.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 +#-*-coding:utf-8-*- + from PyQt5.QtGui import * from PyQt5.Qsci import QsciScintilla diff --git a/src/Editor/FindReplace.py b/src/Editor/FindReplace.py index 5caab70..02c80a7 100644 --- a/src/Editor/FindReplace.py +++ b/src/Editor/FindReplace.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 +#-*-coding:utf-8-*- + import sys from PyQt5.QtWidgets import * from PyQt5.Qsci import QsciScintilla diff --git a/src/Editor/__about__.py b/src/Editor/__about__.py index e1abd2a..5e38d94 100644 --- a/src/Editor/__about__.py +++ b/src/Editor/__about__.py @@ -1,3 +1,7 @@ +#!/usr/bin/env python3 +#-*-coding:utf-8-*- + + version_no = "0.0.1" about_info = '''A S-expression-like typesetting language powered by SILE engine with a simple text text editor. http://yoxem.github.com diff --git a/src/Interpreter/__init__.py b/src/Interpreter/__init__.py new file mode 100644 index 0000000..8c48eb0 --- /dev/null +++ b/src/Interpreter/__init__.py @@ -0,0 +1,127 @@ +#-*-coding:utf-8-*- + +import re + +class Parser(): + + def __init__(self): + float_pattern =r"(?P[+-]?\d+[.]\d+)" + int_pattern =r"(?P[+-]?\d+)" + symbol_pattern = r"(?P[_a-zA-Z][_0-9a-zA-Z]+)" + string_pattern = r"(?P[\"]([^\"\\]|[\\][\"\\nt])+[\"])" + parenthesis_pattern = r"(?P[[]|[]])" + percent_pattern = r"(?P[%])" + space_pattern = r"(?P[ \t]+)" + newline_pattern = r"(?P)\n" + inside_docu_pattern = r"(?P([^%\[\]\n\s\\]|[\\][%\[\]]?)+)" + + + self.total_pattern = re.compile("|".join([float_pattern,int_pattern,symbol_pattern,string_pattern,parenthesis_pattern, + percent_pattern,inside_docu_pattern,space_pattern,newline_pattern])) + + self.clc_sexp = None + self.tokenized = None + self.index = None + + + def get_clc_sexp(self, clc): + self.clc_sexp = clc + self.tokenized = self.remove_comment(self.tokenize(self.clc_sexp)) + print(self.parse_main(self.tokenized)) + + def tokenize(self, clc): + line_no = 1 + column = 0 + column_offset = 0 + find_iterator = re.finditer(self.total_pattern, self.clc_sexp) + result = [] + for i in find_iterator: + column = i.start() - column_offset + + if i.group(0) == '\n': + item = {"token" : i.group(0), "line": line_no, "col" : column, "type": i.lastgroup} + line_no += 1 + column_offset = i.end() + else: + item = {"token" : i.group(0), "line": line_no, "col" : column, "type": i.lastgroup} + + + + result.append(item) + + [print(i["token"]) for i in result] + + return result + def remove_comment(self, series): + result = [] + is_comment_token = False + for i in series: + if i["token"] == "%": + is_comment_token = True + elif i["token"] == "\n": + if is_comment_token == True: + is_comment_token = False + else: + result.append(i) + elif is_comment_token == True: + pass + else: + result.append(i) + + return result + + + def move_forward(self): + self.index += 1 + + def parse_main(self, series): + self.index = 0 + + processed_series = [{"token": "[", "line": None, "col": None, "type": None}] + series + \ + [{"token": "]", "line": None, "col": None, "type": None}] + result = self.parse(processed_series) + + if self.index < len(processed_series): + raise Exception("the parenthesis ] is not balanced.") + else: + return result + + def atom(self, series): + result = series[self.index] + if result["type"] == "int": + result["token"] = int(result["token"]) + elif result["type"] == "flo": + result["token"] = float(result["token"]) + else: + pass + self.move_forward() + return result + + def parse(self, series): + result = None + if series[self.index]["token"] == "[": + result = [] + self.move_forward() + try: + while series[self.index]["token"] != "]": + item = self.parse(series) + result.append(item) + + self.move_forward() + + return result + except IndexError: + raise Exception("the parenthesis [ is not balanced.") + + + else: + result = self.atom(series) + return result + + +'''test''' +a = Parser() +text = '''[[[ 123 1.23 abc "\\123\\"喵喵"] 我是貓,喵\[喵\]貓\%。喵喵%喵 +]]''' + +a.get_clc_sexp(text) \ No newline at end of file