├── .github └── workflows │ └── gpt_summarizer.yml ├── .gitignore ├── Jenkinsfile ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── config ├── constant_ast.json ├── decl_ast.json ├── funccall_ast.json ├── funcdef_ast.json └── id_ast.json ├── examples ├── math.c ├── math.hc ├── test.c └── test.hc ├── requirements.txt ├── secularize ├── __init__.py ├── char.py ├── parser.py ├── token.py └── utils.py ├── setup.cfg └── setup.py /.github/workflows/gpt_summarizer.yml: -------------------------------------------------------------------------------- 1 | name: GPT Commits summarizer 2 | # Summary: This action will write a comment about every commit in a pull request, as well as generate a summary for every file that was modified and add it to the review page, compile a PR summary from all commit summaries and file diff summaries, and delete outdated code review comments 3 | 4 | on: 5 | pull_request: 6 | types: [opened, synchronize] 7 | 8 | jobs: 9 | summarize: 10 | runs-on: self-hosted 11 | permissions: write-all # Some repositories need this line 12 | 13 | steps: 14 | - uses: KanHarI/gpt-commit-summarizer@master 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | venv 3 | .pyc 4 | .python-version 5 | .eggs 6 | run.sh 7 | build 8 | secularize.egg-info 9 | dist 10 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent { 3 | docker { 4 | image 'python:3.6' 5 | args '-u root:sudo' 6 | } 7 | } 8 | 9 | stages { 10 | stage('Build') { 11 | steps { 12 | sh 'python setup.py install' 13 | } 14 | } 15 | stage('Test') { 16 | steps { 17 | echo 'you suck, there\'s no tests' 18 | } 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 James Albert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include bin * 2 | recursive-include config * 3 | recursive-include secularize * 4 | 5 | include LICENSE.txt 6 | include README.md 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HolyC-for-Linux 2 | run HolyC on Linux secularly 3 | 4 | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen) 5 | ![License](https://img.shields.io/github/license/jamesalbert/holyc-for-Linux?color=brightgreen&logoColor=brightgreen) 6 | [![PyPI version](https://badge.fury.io/py/secularize.svg)](https://badge.fury.io/py/secularize) 7 | 8 | #### Disclaimer 9 | 10 | This tool is in super-hella-mega alpha stage. If you use this, you will die. Or worse, your current operating system will be replaced with TempleOS. I've only tested this on `3.7-dev`. 11 | 12 | ## Install 13 | 14 | ``` 15 | pip install secularize 16 | ``` 17 | 18 | ## Translate 19 | 20 | The primary use is to translate holyc to c. Do this with: 21 | `secularize examples/test.hc` 22 | 23 | this turns `examples/test.hc` 24 | ```c 25 | F64 *s = 3; 26 | 27 | U0 test(I16 a, U8 b, F64 c) { 28 | Print("hello"); 29 | } 30 | 31 | F64 pest(I8 d) { 32 | Print("nothing"); 33 | } 34 | 35 | Print("%s %s", "hello", "world"); 36 | I64 b = 2.000; 37 | ``` 38 | 39 | into `examples/test.c` 40 | ```c 41 | void test(short a, unsigned char b, double c) 42 | { 43 | printf("hello"); 44 | } 45 | 46 | double pest(char d) 47 | { 48 | printf("nothing"); 49 | } 50 | 51 | int main() 52 | { 53 | double* s = 3; 54 | printf("%s %s", "hello", "world"); 55 | long b = 2.0; 56 | } 57 | ``` 58 | 59 | ## Debugging 60 | 61 | To add a feature, it's useful to get the AST of an expected target. To do this, write the C file you're trying to translate to, then run `secularize dump-ast name-of-file.c`. This will pretty print the AST to json. 62 | 63 | ```sh 64 | $ cat examples/math.c 65 | int main() 66 | { 67 | long a = 3; 68 | long b = 2; 69 | } 70 | 71 | $ secularize dump-ast examples/math.c 72 | { 73 | "_nodetype": "FileAST", 74 | "coord": null, 75 | "ext": [ 76 | { 77 | "_nodetype": "FuncDef", 78 | "coord": "examples/math.c:1:5", 79 | "decl": { 80 | "_nodetype": "Decl", 81 | "name": "main", 82 | "quals": [], 83 | "storage": [], 84 | "funcspec": [], 85 | "coord": "examples/math.c:1:5", 86 | "type": { 87 | "_nodetype": "FuncDecl", 88 | "coord": "examples/math.c:1:5", 89 | "type": { 90 | "_nodetype": "TypeDecl", 91 | "declname": "main", 92 | "quals": [], 93 | "coord": "examples/math.c:1:5", 94 | "type": { 95 | "_nodetype": "IdentifierType", 96 | "names": [ 97 | "int" 98 | ], 99 | "coord": "examples/math.c:1:1" 100 | } 101 | }, 102 | "args": null 103 | }, 104 | "init": null, 105 | "bitsize": null 106 | }, 107 | "body": { 108 | "_nodetype": "Compound", 109 | "coord": "examples/math.c:2:1", 110 | "block_items": [ 111 | { 112 | "_nodetype": "Decl", 113 | "name": "a", 114 | "quals": [], 115 | "storage": [], 116 | "funcspec": [], 117 | "coord": "examples/math.c:3:8", 118 | "type": { 119 | "_nodetype": "TypeDecl", 120 | "declname": "a", 121 | "quals": [], 122 | "coord": "examples/math.c:3:8", 123 | "type": { 124 | "_nodetype": "IdentifierType", 125 | "names": [ 126 | "long" 127 | ], 128 | "coord": "examples/math.c:3:3" 129 | } 130 | }, 131 | "init": { 132 | "_nodetype": "Constant", 133 | "type": "int", 134 | "value": "3", 135 | "coord": "examples/math.c:3:12" 136 | }, 137 | "bitsize": null 138 | }, 139 | { 140 | "_nodetype": "Decl", 141 | "name": "b", 142 | "quals": [], 143 | "storage": [], 144 | "funcspec": [], 145 | "coord": "examples/math.c:4:8", 146 | "type": { 147 | "_nodetype": "TypeDecl", 148 | "declname": "b", 149 | "quals": [], 150 | "coord": "examples/math.c:4:8", 151 | "type": { 152 | "_nodetype": "IdentifierType", 153 | "names": [ 154 | "long" 155 | ], 156 | "coord": "examples/math.c:4:3" 157 | } 158 | }, 159 | "init": { 160 | "_nodetype": "Constant", 161 | "type": "int", 162 | "value": "2", 163 | "coord": "examples/math.c:4:12" 164 | }, 165 | "bitsize": null 166 | } 167 | ] 168 | }, 169 | "param_decls": null 170 | } 171 | ] 172 | } 173 | 174 | ``` 175 | 176 | ## What's Supported 177 | 178 | - print statements 179 | - primitive data types 180 | - basic functions 181 | 182 | ## What's Not Supported 183 | 184 | Everything else. Deal with it. 185 | -------------------------------------------------------------------------------- /config/constant_ast.json: -------------------------------------------------------------------------------- 1 | { 2 | "_nodetype": "Constant", 3 | "type": null, 4 | "value": null, 5 | "coord": null 6 | } 7 | -------------------------------------------------------------------------------- /config/decl_ast.json: -------------------------------------------------------------------------------- 1 | { 2 | "_nodetype": "Decl", 3 | "name": null, 4 | "quals": [], 5 | "storage": [], 6 | "funcspec": [], 7 | "coord": null, 8 | "type": { 9 | "_nodetype": "TypeDecl", 10 | "declname": null, 11 | "quals": [], 12 | "coord": null, 13 | "type": { 14 | "_nodetype": "IdentifierType", 15 | "names": null, 16 | "coord": null 17 | } 18 | }, 19 | "init": { 20 | "_nodetype": "Constant", 21 | "type": "int", 22 | "value": null, 23 | "coord": null 24 | }, 25 | "bitsize": null 26 | } 27 | -------------------------------------------------------------------------------- /config/funccall_ast.json: -------------------------------------------------------------------------------- 1 | { 2 | "_nodetype": "FuncCall", 3 | "coord": null, 4 | "name": { 5 | "_nodetype": "ID", 6 | "name": null, 7 | "coord": null 8 | }, 9 | "args": { 10 | "_nodetype": "ExprList", 11 | "coord": null, 12 | "exprs": [ 13 | { 14 | "_nodetype": "Constant", 15 | "type": "string", 16 | "value": "\"\"", 17 | "coord": null 18 | } 19 | ] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /config/funcdef_ast.json: -------------------------------------------------------------------------------- 1 | { 2 | "_nodetype": "FuncDef", 3 | "decl": { 4 | "_nodetype": "Decl", 5 | "quals": [], 6 | "storage": [], 7 | "funcspec": [], 8 | "type": { 9 | "_nodetype": "FuncDecl", 10 | "type": { 11 | "_nodetype": "TypeDecl", 12 | "quals": [], 13 | "type": { 14 | "_nodetype": "IdentifierType" 15 | } 16 | }, 17 | "args": null 18 | }, 19 | "bitsize": null, 20 | "init": null 21 | }, 22 | "body": { 23 | "_nodetype": "Compound" 24 | }, 25 | "param_decls": null 26 | } 27 | -------------------------------------------------------------------------------- /config/id_ast.json: -------------------------------------------------------------------------------- 1 | { 2 | "_nodetype": "ID", 3 | "name": null, 4 | "coord": null 5 | } 6 | -------------------------------------------------------------------------------- /examples/math.c: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | void a = 3; 4 | printf("%s = %s\n", a, "b"); 5 | } 6 | 7 | -------------------------------------------------------------------------------- /examples/math.hc: -------------------------------------------------------------------------------- 1 | U0 a = 3; 2 | Print("%s = %s\n", a, "b"); 3 | -------------------------------------------------------------------------------- /examples/test.c: -------------------------------------------------------------------------------- 1 | void test(short a, unsigned char b, double c) 2 | { 3 | printf("hello"); 4 | } 5 | 6 | double pest(char d) 7 | { 8 | printf("nothing"); 9 | } 10 | 11 | int main() 12 | { 13 | double* s = 3; 14 | printf("%s %s", "hello", "world"); 15 | long b = 2.0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /examples/test.hc: -------------------------------------------------------------------------------- 1 | F64 *s = 3; 2 | 3 | U0 test(I16 a, U8 b, F64 c) { 4 | Print("hello"); 5 | } 6 | 7 | F64 pest(I8 d) { 8 | Print("nothing"); 9 | } 10 | 11 | Print("%s %s", "hello", "world"); 12 | I64 b = 2.000; 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pycparser==2.17 2 | docopt==0.6.2 3 | -------------------------------------------------------------------------------- /secularize/__init__.py: -------------------------------------------------------------------------------- 1 | """secularize 2 | 3 | Usage: 4 | secularize translate () 5 | secularize dump-ast () 6 | secularize (-h | --help) 7 | secularize --version 8 | 9 | Options: 10 | -h --help Show this screen. 11 | --version Show version. 12 | """ 13 | 14 | from .char import InputStream 15 | from .parser import Parser 16 | from .token import TokenStream 17 | 18 | from docopt import docopt 19 | from json import dumps, loads 20 | from pycparser import parse_file, c_parser, c_generator, c_ast 21 | from pycparser.plyparser import Coord 22 | import re 23 | import sys 24 | 25 | 26 | RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]') 27 | RE_INTERNAL_ATTR = re.compile('__.*__') 28 | 29 | 30 | class CJsonError(Exception): 31 | pass 32 | 33 | 34 | def memodict(fn): 35 | """ Fast memoization decorator for a function taking a single argument """ 36 | class memodict(dict): 37 | def __missing__(self, key): 38 | ret = self[key] = fn(key) 39 | return ret 40 | return memodict().__getitem__ 41 | 42 | 43 | @memodict 44 | def child_attrs_of(klass): 45 | """ 46 | Given a Node class, get a set of child attrs. 47 | Memoized to avoid highly repetitive string manipulation 48 | """ 49 | non_child_attrs = set(klass.attr_names) 50 | all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)]) 51 | return all_attrs - non_child_attrs 52 | 53 | 54 | def to_dict(node): 55 | """ Recursively convert an ast into dict representation. """ 56 | klass = node.__class__ 57 | 58 | result = {} 59 | 60 | # Metadata 61 | result['_nodetype'] = klass.__name__ 62 | 63 | # Local node attributes 64 | for attr in klass.attr_names: 65 | result[attr] = getattr(node, attr) 66 | 67 | # Coord object 68 | if node.coord: 69 | result['coord'] = str(node.coord) 70 | else: 71 | result['coord'] = None 72 | 73 | # Child attributes 74 | for child_name, child in node.children(): 75 | # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]') 76 | match = RE_CHILD_ARRAY.match(child_name) 77 | if match: 78 | array_name, array_index = match.groups() 79 | array_index = int(array_index) 80 | # arrays come in order, so we verify and append. 81 | result[array_name] = result.get(array_name, []) 82 | if array_index != len(result[array_name]): 83 | raise CJsonError('Internal ast error. Array {} out of order. ' 84 | 'Expected index {}, got {}'.format( 85 | array_name, len(result[array_name]), array_index)) 86 | result[array_name].append(to_dict(child)) 87 | else: 88 | result[child_name] = to_dict(child) 89 | 90 | # Any child attributes that were missing need "None" values in the json. 91 | for child_attr in child_attrs_of(klass): 92 | if child_attr not in result: 93 | result[child_attr] = None 94 | 95 | return result 96 | 97 | 98 | def _parse_coord(coord_str): 99 | """ Parse coord string (file:line[:column]) into Coord object. """ 100 | if coord_str is None: 101 | return None 102 | 103 | vals = coord_str.split(':') 104 | vals.extend([None] * 3) 105 | filename, line, column = vals[:3] 106 | return Coord(filename, line, column) 107 | 108 | 109 | def _convert_to_obj(value): 110 | """ 111 | Convert an object in the dict representation into an object. 112 | Note: Mutually recursive with from_dict. 113 | """ 114 | value_type = type(value) 115 | if value_type == dict: 116 | return from_dict(value) 117 | elif value_type == list: 118 | return [_convert_to_obj(item) for item in value] 119 | else: 120 | return value 121 | 122 | 123 | def from_dict(node_dict): 124 | """ Recursively build an ast from dict representation """ 125 | class_name = node_dict.pop('_nodetype') 126 | 127 | klass = getattr(c_ast, class_name) 128 | 129 | # Create a new dict containing the key-value pairs which we can pass 130 | # to node constructors. 131 | objs = {} 132 | for key, value in node_dict.items(): 133 | if key == 'coord': 134 | objs[key] = _parse_coord(value) 135 | else: 136 | objs[key] = _convert_to_obj(value) 137 | 138 | # Use keyword parameters, which works thanks to beautifully consistent 139 | # ast Node initializers. 140 | return klass(**objs) 141 | 142 | 143 | def from_json(ast_json): 144 | """ Build an ast from json string representation """ 145 | return from_dict(json.loads(ast_json)) 146 | 147 | 148 | def translate(filename): 149 | outfile = filename.replace('.hc', '.c') 150 | i = InputStream(filename) 151 | t = TokenStream(i) 152 | p = Parser(t) 153 | ast_json = dumps(p.out, indent=2, sort_keys=True) 154 | # print(ast_json) 155 | ast = from_dict(p.out) 156 | generator = c_generator.CGenerator() 157 | with open(outfile, 'w+') as out: 158 | out.write(generator.visit(ast)) 159 | 160 | 161 | def dump_ast(filename): 162 | with open(filename) as c_file: 163 | c_text = c_file.read() 164 | parser = c_parser.CParser() 165 | ast = parser.parse(c_text, filename=filename) 166 | print(dumps(to_dict(ast), sort_keys=False, indent=2)) 167 | 168 | 169 | def main(): 170 | args = docopt(__doc__, version='secularize') 171 | if args['dump-ast']: 172 | dump_ast(args['']) 173 | elif args['translate']: 174 | translate(args['']) 175 | -------------------------------------------------------------------------------- /secularize/char.py: -------------------------------------------------------------------------------- 1 | from re import split 2 | 3 | class InputStream(object): 4 | def __init__(self, filename): 5 | self.filename = filename 6 | self.input = open(filename).read() 7 | self.pos = 0 8 | self.line = 1 9 | self.col = 0 10 | self.cols = list() 11 | 12 | def next(self): 13 | ch = self.input[self.pos] 14 | self.pos += 1 15 | if ch == '\n': 16 | self.line += 1 17 | self.cols.append(self.col) 18 | self.col = 0 19 | else: 20 | self.col += 1 21 | return ch 22 | 23 | def prev(self): 24 | ch = self.input[self.pos] 25 | self.pos += 1 26 | if ch == '\n': 27 | self.line -= 1 28 | self.col = self.cols.pop() 29 | else: 30 | self.col -= 1 31 | return ch 32 | 33 | def peek(self): 34 | try: 35 | return self.input[self.pos] 36 | except IndexError: 37 | return None 38 | 39 | def peek_prev(self): 40 | try: 41 | return self.input[self.pos - 1] 42 | except IndexError: 43 | return None 44 | 45 | def bof(self): 46 | return self.line == self.col == 0 47 | 48 | def eof(self): 49 | return self.peek() is None 50 | 51 | def croak(self, message): 52 | raise Exception(f'{message} ({self.line}:{self.col})') 53 | -------------------------------------------------------------------------------- /secularize/parser.py: -------------------------------------------------------------------------------- 1 | from json import dumps 2 | 3 | 4 | FALSE = { 5 | 'type': 'bool', 6 | 'value': False 7 | } 8 | 9 | 10 | class Parser(object): 11 | def __init__(self, input_): 12 | self.input = input_ 13 | self.precedence = { 14 | '=': 1, 15 | '||': 2, 16 | '&&': 3, 17 | '<': 7, 18 | '>': 7, 19 | '<=': 7, 20 | '>=': 7, 21 | '==': 7, 22 | '!=': 7, 23 | '+': 10, 24 | '-': 10, 25 | '*': 20, 26 | '/': 20, 27 | '%': 20 28 | } 29 | self.toplevel_prog = list() 30 | self.out = self.parse_toplevel() 31 | 32 | def _is(self, _in, _type): 33 | tok = self.input.peek() 34 | return tok.get('type', False) and tok and tok['type'] == _type and\ 35 | (not _in or tok['value'] == _in) and tok 36 | 37 | def _skip(self, _in, _type): 38 | if getattr(self, f'is_{_type}')(_in): 39 | self.input.next() 40 | else: 41 | self.input.croak(f'Expecting punctuation: "{_in}"') 42 | 43 | def get_coord(self): 44 | return f'{self.input.input.filename}:{self.input.input.line}' 45 | 46 | def is_punc(self, ch): 47 | return self._is(ch, 'punc') 48 | 49 | def is_kw(self, kw): 50 | return self._is(kw, 'kw') 51 | 52 | def is_op(self, op=None): 53 | return self._is(op, 'op') 54 | 55 | def skip_punc(self, ch): 56 | return self._skip(ch, 'punc') 57 | 58 | def skip_kw(self, kw): 59 | return self._skip(kw, 'kw') 60 | 61 | def skip_op(self, op): 62 | return self._skip(op, 'op') 63 | 64 | def unexpected(self): 65 | self.input.croak(f'Unexpected token: {str(self.input.peek())}') 66 | 67 | def maybe_binary(self, left, my_prec): 68 | tok = self.is_op() 69 | if tok: 70 | his_prec = self.precedence[tok['value']] 71 | if his_prec > my_prec: 72 | self.input.next() 73 | return self.maybe_binary({ 74 | 'type': 'assign' if tok['value'] == "=" else 'binary', 75 | 'operator': tok['value'], 76 | 'left': left, 77 | 'right': self.maybe_binary(self.parse_atom(), his_prec) 78 | }, my_prec) 79 | return left 80 | 81 | def delimited(self, start, stop, sep, parser): 82 | a = list() 83 | first = True 84 | while not self.input.eof(): 85 | # print(f'delimited list: {a}') 86 | if self.is_punc(stop): 87 | break 88 | if first: 89 | first = False 90 | else: 91 | self.skip_punc(sep) 92 | if self.is_punc(stop): 93 | break 94 | a.append(parser()) 95 | self.skip_punc(stop) 96 | return a 97 | 98 | def parse_call(self, func): 99 | coord = self.get_coord() 100 | # print(f'parsing call: {func}') 101 | name = func['name']['name'] 102 | func['name']['name'] = name 103 | func['args']['exprs'] = [ 104 | arg for arg in self.delimited('(', ')', ',', self.parse_expression) 105 | ] 106 | # print(f'func: {func}') 107 | return func 108 | 109 | def parse_varname(self): 110 | type_ = self.input.next()['value'] 111 | name = self.input.next() 112 | if name['_nodetype'] != 'Decl': 113 | self.input.croak('Expecting variable name') 114 | name['type']['type']['names'].append(type_) 115 | name['init'] = None 116 | return name 117 | 118 | def parse_if(self): 119 | self.skip_kw('if') 120 | cond = self.parse_expression() 121 | if not self.is_punc('{'): 122 | self.skip_kw('then') 123 | then = self.parse_expression() 124 | ret = { 125 | 'type': 'if', 126 | 'cond': cond, 127 | 'then': then, 128 | } 129 | if self.is_kw("else"): 130 | self.input.next() 131 | ret['else'] = self.parse_expression() 132 | return ret 133 | 134 | def parse_lambda(self): 135 | coord = self.get_coord() 136 | vars_ = self.delimited('(', ')', ',', self.parse_varname) 137 | expr = self.parse_expression() 138 | expr['decl']['type']['args'] = { 139 | "_nodetype": "ParamList", 140 | "coord": coord, 141 | "params": vars_, 142 | } 143 | return expr 144 | 145 | def parse_bool(self): 146 | return { 147 | 'type': 'bool', 148 | 'value': self.input.next()['value'] == 'true' 149 | } 150 | 151 | def maybe_call(self, expr): 152 | expr = expr() 153 | # print(f'sub expr: {expr}') 154 | if self.input.peek().get('type') in ['string', 'num'] or \ 155 | self.input.peek().get('_nodetype') in ['ID']: 156 | return self.parse_call(expr) 157 | else: 158 | return expr 159 | 160 | def parse_atom(self): 161 | def anon(): 162 | if self.input.peek().get('_nodetype') == 'FuncCall': 163 | # print("found FuncCall") 164 | return self.input.next() 165 | if self.is_punc('('): 166 | # print("found (") 167 | self.input.next() 168 | # print("parsing expression after (") 169 | expr = self.parse_expression() 170 | # print("skipping )") 171 | self.skip_punc(')') 172 | return expr 173 | if self.is_punc('{'): 174 | return self.parse_prog() 175 | if self.is_kw('if'): 176 | return self.parse_if() 177 | if self.is_kw('true') or self.is_kw('false'): 178 | return self.parse_bool() 179 | tok = self.input.next() 180 | # print(f"got next token: {tok}") 181 | # print(f'tok: {dumps(tok, indent=2)}') 182 | # DATATYPE 183 | if tok.get('type') == 'datatype': 184 | # print("found datatype") 185 | var = self.input.next() 186 | if var['_nodetype'] == 'FuncDef': 187 | self.input.next() 188 | return self.parse_lambda() 189 | var['type']['type']['names'].append(tok['value']) 190 | tok = self.input.next() 191 | if tok['value'] == ';': 192 | var['init'] = None 193 | return var 194 | if tok['value'] != '=': 195 | self.input.croak('Expected = for variable declaration') 196 | tok = self.input.next() 197 | var['init']['type'] = type(tok['value']).__name__ 198 | var['init']['value'] = str(tok['value']) 199 | return var 200 | # CONSTANT 201 | if tok.get('type') in ['string', 'int'] or \ 202 | tok.get('_nodetype') in ['ID']: 203 | # print("found string or int") 204 | return tok 205 | self.unexpected() 206 | return self.maybe_call(anon) 207 | 208 | def build_ast(self, prog): 209 | return { 210 | '_nodetype': 'FileAST', 211 | 'coord': None, 212 | 'ext': [self.input.read_function('main', prog)] 213 | } 214 | 215 | def parse_toplevel(self): 216 | # prog = list() 217 | functions = list() 218 | while not self.input.eof(): 219 | expr = self.parse_expression() 220 | # print(f'toplevel expr: {expr}') 221 | if expr['_nodetype'] == 'FuncDef': 222 | functions.append(expr) 223 | else: 224 | self.toplevel_prog.append(expr) 225 | if not self.input.eof(): 226 | self.skip_punc(';') 227 | ast = self.build_ast(self.toplevel_prog) 228 | for func in functions[::-1]: 229 | ast['ext'].insert(0, func) 230 | return ast 231 | 232 | def parse_prog(self): 233 | coord = f'{self.input.input.filename}:{self.input.input.line}' 234 | name = str() 235 | type_ = str() 236 | for i, tok in enumerate(self.input.tokens[::-1]): 237 | if tok.get('_nodetype') == 'FuncDef': 238 | name = tok['decl']['name'] 239 | type_ = self.input.tokens[len(self.input.tokens) - i - 3]['value'] 240 | break 241 | self.input.next() 242 | prog = self.delimited('{', '}', ';', self.parse_expression) 243 | return self.input.read_function(name, prog, type_=[type_]) 244 | 245 | def parse_expression(self): 246 | return self.maybe_call(lambda: self.maybe_binary(self.parse_atom(), 0)) 247 | -------------------------------------------------------------------------------- /secularize/token.py: -------------------------------------------------------------------------------- 1 | from json import load, dumps 2 | from .utils import populate_ast 3 | 4 | 5 | class TokenStream(object): 6 | def __init__(self, input_): 7 | self.input = input_ 8 | self.current = None 9 | self.keywords = 'if then else true false'.split() 10 | self.datatypes = ['U0', 'U8', 'U16', 'U32', 'U64', 11 | 'I8', 'I16', 'I32', 'I64', 'F64'] 12 | self.tokens = list() 13 | self.direct_trans = { 14 | 'Print': 'printf', 15 | 'U0': 'void', 16 | 'U8': 'unsigned char', 17 | 'U16': 'unsigned short', 18 | 'U32': 'unsigned int', 19 | 'U64': 'unsigned long', 20 | 'I8': 'char', 21 | 'I16': 'short', 22 | 'I32': 'int', 23 | 'I64': 'long', 24 | 'F64': 'double' 25 | } 26 | 27 | def croak(self, message): 28 | return self.input.croak(message + f'{dumps(self.tokens, indent=2)}') 29 | 30 | def is_keyword(self, word): 31 | return word in self.keywords 32 | 33 | def is_datatype(self, word): 34 | return word in self.datatypes 35 | 36 | def is_digit(self, ch): 37 | try: 38 | int(ch) 39 | return True 40 | except (ValueError, TypeError): 41 | return False 42 | 43 | def is_id_start(self, ch): 44 | try: 45 | return ch.isalpha() 46 | except AttributeError: 47 | return False 48 | 49 | def is_id(self, ch): 50 | return self.is_id_start(ch) or ch in '?!-<>=0123456789' 51 | 52 | def is_op_char(self, ch): 53 | return ch in '+-*/%=&|<>!' 54 | 55 | def is_punc(self, ch): 56 | return ch in ',;(){}[]' 57 | 58 | def is_whitespace(self, ch): 59 | return ch in ' _\t_\n'.split('_') 60 | 61 | def is_being_declared(self): 62 | return self.tokens and self.tokens[-1].get('type') != 'datatype' 63 | 64 | def is_not_builtin(self, id_): 65 | return id_ not in self.direct_trans 66 | 67 | def read_while(self, predicate): 68 | string = str() 69 | while not self.input.eof() and predicate(self.input.peek()): 70 | string += self.input.next() 71 | return string 72 | 73 | def read_while_prev(self, predicate): 74 | string = str() 75 | line = self.input.line 76 | col = self.input.col 77 | while not self.input.bof() and predicate(self.input.peek_prev()): 78 | string += self.input.prev() 79 | self.input.line = line 80 | self.input.col = col 81 | return string[::-1] 82 | 83 | def read_number(self): 84 | has_dot = False 85 | def anon(ch, has_dot): 86 | if ch == '.': 87 | if (has_dot): 88 | return False 89 | has_dot = True 90 | return True 91 | return self.is_digit(ch) 92 | number = self.read_while(lambda ch: anon(ch, has_dot)) 93 | try: 94 | number = int(number) 95 | except ValueError: 96 | number = float(number) 97 | self.tokens.append({ 98 | 'type': 'num', 99 | 'value': number 100 | }) 101 | return self.tokens[-1] 102 | 103 | 104 | def read_function(self, name, prog, type_=['int']): 105 | coord = f'{self.input.filename}:{self.input.line}' 106 | return populate_ast(self, 'funcdef', **{ 107 | 'coord': coord, 108 | 'body.coord': coord, 109 | 'body.block_items': prog, 110 | 'decl.name': name, 111 | 'decl.coord': coord, 112 | 'decl.type.coord': coord, 113 | 'decl.type.type.coord': coord, 114 | 'decl.type.type.declname': name, 115 | 'decl.type.type.type.names': type_, 116 | 'decl.type.type.type.coord': coord 117 | }) 118 | 119 | 120 | def read_ident(self): 121 | coord = f'{self.input.filename}:{self.input.line}' 122 | id_ = self.read_while(self.is_id) 123 | type_ = str() 124 | # print(f'id: {id_}') 125 | if self.is_keyword(id_): 126 | type_ = 'kw' 127 | elif self.is_datatype(id_): 128 | type_ = 'datatype' 129 | self.direct_trans[f'{id_}*'] = f'{self.direct_trans[id_]}*' 130 | maybe_pointer = self.read_while(lambda ch: ch in [' ', '*'])\ 131 | .replace(' ', str()) 132 | if maybe_pointer: 133 | id_ += maybe_pointer 134 | elif self.is_being_declared() and self.is_not_builtin(id_): 135 | # print(f"creating var out of {id_}") 136 | return populate_ast(self, 'id', **{ 137 | 'name': id_, 138 | 'coord': coord 139 | }) 140 | else: 141 | # function definition 142 | if self.tokens and self.tokens[-1].get('type') == 'datatype' and\ 143 | self.peek()['value'] == '(': 144 | return self.read_function(id_, list()) 145 | # function call 146 | if self.peek()['value'] == '(': 147 | return populate_ast(self, 'funccall', **{ 148 | 'coord': coord, 149 | 'name.name': self.direct_trans.get(id_, id_), 150 | 'name.coord': coord, 151 | 'args.coord': coord, 152 | 'args.exprs.coord': coord 153 | }) 154 | # function/variable declaration 155 | return populate_ast(self, 'decl', **{ 156 | 'name': id_, 157 | 'coord': coord, 158 | 'type.declname': id_, 159 | 'type.coord': coord, 160 | 'type.type.names': list(), 161 | 'type.type.coord': coord, 162 | 'init.coord': coord 163 | }) 164 | self.tokens.append({ 165 | 'type': type_, 166 | 'value': self.direct_trans.get(id_, id_) 167 | }) 168 | return self.tokens[-1] 169 | 170 | def read_escaped(self, end): 171 | escaped = False 172 | string = str() 173 | self.input.next() 174 | while not self.input.eof(): 175 | ch = self.input.next() 176 | if ch == end: 177 | break 178 | string += ch 179 | # if escaped: 180 | # string += ch 181 | # escaped = False 182 | # elif ch == '\\': 183 | # escaped = True 184 | # elif ch == end: 185 | # break 186 | # else: 187 | # string += ch 188 | return f'"{string}"' 189 | 190 | def read_string(self): 191 | self.tokens.append({ 192 | "_nodetype": "Constant", 193 | "type": "string", 194 | "value": self.read_escaped('"'), 195 | "coord": "examples/math.c:3:16" 196 | }) 197 | # print(f'found string: {self.tokens[-1]}') 198 | # self.tokens.append({ 199 | # 'type': 'str', 200 | # 'value': self.read_escaped('"') 201 | # }) 202 | return self.tokens[-1] 203 | 204 | def skip_comment(self): 205 | self.read_while(lambda ch: ch != "\n") 206 | self.input.next() 207 | 208 | def read_next(self): 209 | self.read_while(self.is_whitespace) 210 | if self.input.eof(): 211 | return None 212 | ch = self.input.peek() 213 | if ch == "//": 214 | self.skip_comment() 215 | return self.read_next() 216 | if ch == '"': 217 | return self.read_string() 218 | if self.is_digit(ch): 219 | return self.read_number() 220 | if self.is_id_start(ch): 221 | return self.read_ident() 222 | if self.is_punc(ch): 223 | self.tokens.append({ 224 | 'type': 'punc', 225 | 'value': self.input.next() 226 | }) 227 | return self.tokens[-1] 228 | if self.is_op_char(ch): 229 | self.tokens.append({ 230 | 'type': 'op', 231 | 'value': self.read_while(self.is_op_char) 232 | }) 233 | return self.tokens[-1] 234 | self.input.croak(f'Can\'t handle character: {ch}') 235 | 236 | def read_prev(self): 237 | self.read_while_prev(self.is_whitespace) 238 | if self.input.bof(): 239 | return None 240 | ch = self.input.peek() 241 | if ch == "//": 242 | self.skip_comment() 243 | return self.read_next() 244 | if ch == '"': 245 | return self.read_string() 246 | if self.is_digit(ch): 247 | return self.read_number() 248 | if self.is_id_start(ch): 249 | return self.read_ident() 250 | if self.is_punc(ch): 251 | self.tokens.append({ 252 | 'type': 'punc', 253 | 'value': self.input.next() 254 | }) 255 | return self.tokens 256 | if self.is_op_char(ch): 257 | self.tokens.append({ 258 | 'type': 'op', 259 | 'value': self.read_while(self.is_op_char) 260 | }) 261 | return self.tokens[-1] 262 | self.input.croak(f'Can\'t handle character: {ch}') 263 | 264 | def peek(self): 265 | if self.current: 266 | return self.current 267 | self.current = self.read_next() 268 | return self.current 269 | 270 | def next(self): 271 | tok = self.current 272 | self.current = None 273 | return tok or self.read_next() 274 | 275 | def prev(self): 276 | return self.read_prev() 277 | 278 | def eof(self): 279 | return self.peek() is None 280 | -------------------------------------------------------------------------------- /secularize/utils.py: -------------------------------------------------------------------------------- 1 | from json import load 2 | 3 | 4 | def populate_ast(phase, ast_type, **kwargs): 5 | with open(f'config/{ast_type}_ast.json') as ast_file: 6 | ast_json = load(ast_file) 7 | for keypath, value in kwargs.items(): 8 | keys = keypath.split('.') 9 | last_key = keys.pop() 10 | if not keys: 11 | ast_json[last_key] = value 12 | else: 13 | sub_ast = ast_json[keys.pop(0)] 14 | ''' 15 | fix this ugliness 16 | ''' 17 | for key in keys: 18 | if isinstance(sub_ast, list): 19 | sub_ast = sub_ast[0][key] 20 | else: 21 | sub_ast = sub_ast[key] 22 | if isinstance(sub_ast, list): 23 | sub_ast[0][last_key] = value 24 | else: 25 | sub_ast[last_key] = value 26 | phase.tokens.append(ast_json) 27 | return phase.tokens[-1] 28 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages # Always prefer setuptools over distutils 2 | from codecs import open # To use a consistent encoding 3 | from os import path 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | 7 | # Get the long description from the relevant file 8 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 9 | long_description = f.read() 10 | 11 | setup( 12 | name='secularize', 13 | 14 | # Versions should comply with PEP440. For a discussion on single-sourcing 15 | # the version across setup.py and the project code, see 16 | # http://packaging.python.org/en/latest/tutorial.html#version 17 | version='0.0.1-7', 18 | 19 | description='run HolyC source on linux', 20 | long_description=long_description, #this is the 21 | long_description_content_type='text/markdown', 22 | 23 | # The project's main homepage. 24 | url='https://www.github.com/jamesalbert/HolyC-for-Linux', 25 | 26 | # Author details 27 | author='jamesalbert', 28 | author_email='jamesrobertalbert@gmail.com', 29 | 30 | # Choose your license 31 | license='MIT', 32 | 33 | entry_points={ 34 | 'console_scripts': [ 35 | 'secularize = secularize.__init__:main', 36 | ], 37 | }, 38 | 39 | install_requires=['pycparser', 'docopt'], 40 | 41 | # See https://PyPI.python.org/PyPI?%3Aaction=list_classifiers 42 | classifiers=[ 43 | # How mature is this project? Common values are 44 | # 3 - Alpha 45 | # 4 - Beta 46 | # 5 - Production/Stable 47 | 'Development Status :: 3 - Alpha', 48 | 49 | # Indicate who your project is intended for 50 | 'Intended Audience :: Developers', 51 | 'Topic :: Software Development :: Build Tools', 52 | 53 | # Pick your license as you wish (should match "license" above) 54 | 'License :: OSI Approved :: MIT License', 55 | 56 | # Specify the Python versions you support here. In particular, ensure 57 | # that you indicate whether you support Python 2, Python 3 or both. 58 | 'Programming Language :: Python :: 3.6', 59 | ], 60 | 61 | # What does your project relate to? 62 | keywords='holyc linux terry lord temple mlg', 63 | 64 | packages=["secularize"], 65 | 66 | ) 67 | --------------------------------------------------------------------------------