├── .codecov.yml ├── .coveragerc ├── .flake8 ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── demo.gif ├── docker ├── Dockerfile └── shell ├── feature_tracking.txt ├── hooks └── pre-commit ├── setup.py ├── shivyc ├── __init__.py ├── asm_cmds.py ├── asm_gen.py ├── ctypes.py ├── errors.py ├── il_cmds │ ├── __init__.py │ ├── base.py │ ├── compare.py │ ├── control.py │ ├── math.py │ └── value.py ├── il_gen.py ├── include │ ├── ctype.h │ ├── stdbool.h │ ├── stdio.h │ ├── stdlib.h │ └── string.h ├── lexer.py ├── main.py ├── parser │ ├── __init__.py │ ├── declaration.py │ ├── expression.py │ ├── parser.py │ ├── statement.py │ └── utils.py ├── preproc.py ├── spots.py ├── token_kinds.py ├── tokens.py └── tree │ ├── __init__.py │ ├── decl_nodes.py │ ├── expr_nodes.py │ ├── nodes.py │ └── utils.py ├── tests ├── __init__.py ├── feature_tests │ ├── addition.c │ ├── array.c │ ├── assignment.c │ ├── bitwise_shifts.c │ ├── bool.c │ ├── break_continue.c │ ├── cast.c │ ├── comparison.c │ ├── compound_assign.c │ ├── compound_statement.c │ ├── declaration.c │ ├── division.c │ ├── else.c │ ├── error_array.c │ ├── error_bool.c │ ├── error_break_continue.c │ ├── error_cast.c │ ├── error_comparison.c │ ├── error_compound_assign.c │ ├── error_control_statements.c │ ├── error_declaration.c │ ├── error_extra_tokens.c │ ├── error_function_call.c │ ├── error_function_def.c │ ├── error_include.c │ ├── error_incomplete_types.c │ ├── error_incr_decr.c │ ├── error_initializer.c │ ├── error_large_literal.c │ ├── error_main1.c │ ├── error_main2.c │ ├── error_main3.c │ ├── error_main4.c │ ├── error_pointer.c │ ├── error_pointer_math.c │ ├── error_redefined_identifier.c │ ├── error_sizeof.c │ ├── error_string.c │ ├── error_struct.c │ ├── error_type_qualifier.c │ ├── error_typedef.c │ ├── error_typedef2.c │ ├── error_unary.c │ ├── error_unassignable_lvalue.c │ ├── error_undeclared_identifier.c │ ├── error_union.c │ ├── expr_comma.c │ ├── for.c │ ├── function_call.c │ ├── function_def.c │ ├── function_def_helper.c │ ├── if.c │ ├── implicit_cast.c │ ├── include.c │ ├── include_helper.h │ ├── include_helper_empty.h │ ├── incomplete_types.c │ ├── incr_decr.c │ ├── int_arithmetic.c │ ├── main_return.c │ ├── modulus.c │ ├── multiplication.c │ ├── pointer-1.c │ ├── pointer-2.c │ ├── pointer_math.c │ ├── sizeof.c │ ├── storage.c │ ├── storage_helper.c │ ├── string.c │ ├── struct.c │ ├── subtraction.c │ ├── type_qualifier.c │ ├── typedef.c │ ├── unary_arithmetic.c │ ├── union.c │ └── while.c ├── frontend_tests │ ├── empty_statement.c │ ├── error_bad_identifier.c │ ├── error_declaration.c │ ├── error_empty_struct.c │ ├── error_end_binop.c │ ├── error_if_close_paren.c │ ├── error_if_no_condition.c │ ├── error_if_open_paren.c │ ├── error_lexer.c │ ├── error_missing_final_brace.c │ ├── error_missing_quote.c │ ├── error_missing_semicolon.c │ ├── error_missing_semicolon_and_end.c │ ├── error_struct_initializer.c │ ├── error_tokens_in_array_size.c │ ├── identifier.c │ ├── lexer.c │ ├── spacing.c │ └── string.c ├── general_tests │ ├── count │ │ ├── .gitignore │ │ ├── Count.c │ │ └── input.c │ ├── pi │ │ └── pi.c │ └── trie │ │ ├── trie.c │ │ └── words.txt └── test_all.py └── tools ├── compile_asm ├── make_asm └── release /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: off 4 | patch: off 5 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | NotImplementedError 5 | 6 | [run] 7 | omit = 8 | shivyc/main.py 9 | setup.py -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E127, W503, E701, D105, D202, E731 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | out 6 | out.o 7 | out.s 8 | gcc_out 9 | a.out 10 | test.c 11 | \#*# 12 | tests/**/*.s 13 | tests/**/*.o 14 | shivyc.egg-info/* 15 | dist/* 16 | build/* 17 | gcc_output 18 | shivyc_output -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | # - "pypy3" # might be faster that CPython for big compiles 5 | install: 6 | - pip install codecov flake8==3.5 7 | addons: 8 | apt: 9 | packages: 10 | - binutils 11 | - gcc 12 | before_script: 13 | - flake8 . --count --show-source --statistics 14 | - flake8 . --exit-zero --max-complexity=15 15 | script: coverage run -m unittest discover 16 | after_success: codecov 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Shivam Sarodia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ShivyC [![Build Status](https://travis-ci.org/ShivamSarodia/ShivyC.svg?branch=master)](https://travis-ci.org/ShivamSarodia/ShivyC) [![Code Coverage](https://codecov.io/gh/ShivamSarodia/ShivyC/branch/master/graph/badge.svg)](https://codecov.io/gh/ShivamSarodia/ShivyC) 2 | 3 | ### A hobby C compiler created in Python. 4 | 5 | ![ShivyC demo GIF.](https://raw.githubusercontent.com/ShivamSarodia/ShivyC/master/demo.gif) 6 | 7 | --- 8 | 9 | ShivyC is a hobby C compiler written in Python 3 that supports a subset of the C11 standard and generates reasonably efficient binaries, including some optimizations. ShivyC also generates helpful compile-time error messages. 10 | 11 | This [implementation of a trie](tests/general_tests/trie/trie.c) is an example of what ShivyC can compile today. For a more comprehensive list of features, see the [feature test directory](tests/feature_tests). 12 | 13 | ## Quickstart 14 | 15 | ### x86-64 Linux 16 | ShivyC requires only Python 3.6 or later to compile C code. Assembling and linking are done using the GNU binutils and glibc, which you almost certainly already have installed. 17 | 18 | To install ShivyC: 19 | ``` 20 | pip3 install shivyc 21 | ``` 22 | To create, compile, and run an example program: 23 | ```c 24 | $ vim hello.c 25 | $ cat hello.c 26 | 27 | #include 28 | int main() { 29 | printf("hello, world!\n"); 30 | } 31 | 32 | $ shivyc hello.c 33 | $ ./out 34 | hello, world! 35 | ``` 36 | To run the tests: 37 | ``` 38 | git clone https://github.com/ShivamSarodia/ShivyC.git 39 | cd ShivyC 40 | python3 -m unittest discover 41 | ``` 42 | 43 | ### Other Architectures 44 | For the convenience of those not running Linux, the [`docker/`](docker/) directory provides a Dockerfile that sets up an x86-64 Linux Ubuntu environment with everything necessary for ShivyC. To use this, run: 45 | ``` 46 | git clone https://github.com/ShivamSarodia/ShivyC.git 47 | cd ShivyC 48 | docker build -t shivyc docker/ 49 | docker/shell 50 | ``` 51 | This will open up a shell in an environment with ShivyC installed and ready to use with 52 | ``` 53 | shivyc any_c_file.c # to compile a file 54 | python3 -m unittest discover # to run tests 55 | ``` 56 | The Docker ShivyC executable will update live with any changes made in your local ShivyC directory. 57 | 58 | ## Implementation Overview 59 | #### Preprocessor 60 | ShivyC today has a very limited preprocessor that parses out comments and expands `#include` directives. These features are implemented between [`lexer.py`](shivyc/lexer.py) and [`preproc.py`](shivyc/lexer.py). 61 | 62 | #### Lexer 63 | The ShivyC lexer is implemented primarily in [`lexer.py`](shivyc/lexer.py). Additionally, [`tokens.py`](shivyc/tokens.py) contains definitions of the token classes used in the lexer and [`token_kinds.py`](shivyc/token_kinds.py) contains instances of recognized keyword and symbol tokens. 64 | 65 | #### Parser 66 | The ShivyC parser uses recursive descent techniques for all parsing. It is implemented in [`parser/*.py`](shivyc/parser/) and creates a parse tree of nodes defined in [`tree/nodes.py`](shivyc/tree/nodes.py) and [`tree/expr_nodes.py`](shivyc/tree/expr_nodes.py). 67 | 68 | #### IL generation 69 | ShivyC traverses the parse tree to generate a flat custom IL (intermediate language). The commands for this IL are in [`il_cmds/*.py`](shivyc/il_cmds/) . Objects used for IL generation are in [`il_gen.py`](shivyc/il_gen.py) , but most of the IL generating code is in the `make_code` function of each tree node in [`tree/*.py`](shivyc/tree/). 70 | 71 | #### ASM generation 72 | ShivyC sequentially reads the IL commands, converting each into Intel-format x86-64 assembly code. ShivyC performs register allocation using George and Appel’s iterated register coalescing algorithm (see References below). The general ASM generation functionality is in [`asm_gen.py`](shivyc/asm_gen.py) , but much of the ASM generating code is in the `make_asm` function of each IL command in [`il_cmds/*.py`](shivyc/il_cmds/). 73 | 74 | ## Contributing 75 | Pull requests to ShivyC are very welcome. A good place to start is the [Issues page](https://github.com/ShivamSarodia/ShivyC/issues). All [issues labeled "feature"](https://github.com/ShivamSarodia/ShivyC/issues?q=is%3Aopen+is%3Aissue+label%3Afeature) are TODO tasks. [Issues labeled "bug"](https://github.com/ShivamSarodia/ShivyC/issues?q=is%3Aopen+is%3Aissue+label%3Abug) are individual miscompilations in ShivyC. If you have any questions, please feel free to ask in the comments of the relevant issue or create a new issue labeled "question". Of course, please add test(s) for all new functionality. 76 | 77 | Many thanks to our current and past contributers: 78 | * [ShivamSarodia](https://github.com/ShivamSarodia) 79 | * [cclauss](https://github.com/cclauss) 80 | * [TBladen](https://github.com/tbladen) 81 | * [christian-stephen](https://github.com/christian-stephen) 82 | * [jubnzv](https://github.com/jubnzv) 83 | * [eriols](https://github.com/eriols) 84 | 85 | ## References 86 | - [ShivC](https://github.com/ShivamSarodia/ShivC) - ShivyC is a rewrite from scratch of my old C compiler, ShivC, with much more emphasis on feature completeness and code quality. See the ShivC README for more details. 87 | - C11 Specification - http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf 88 | - x86_64 ABI - https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf 89 | - Iterated Register Coalescing (George and Appel) - https://www.cs.purdue.edu/homes/hosking/502/george.pdf 90 | -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShivamSarodia/ShivyC/6232136be38a29e8c18beae3d23e49ecfb7906fd/demo.gif -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:artful 2 | RUN apt-get update && apt-get install -y \ 3 | python3 \ 4 | python3-pip \ 5 | gcc \ 6 | vim 7 | CMD pip3 install -e . && /bin/bash 8 | -------------------------------------------------------------------------------- /docker/shell: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -it -v $PWD:/ShivyC -w /ShivyC shivyc 4 | -------------------------------------------------------------------------------- /feature_tracking.txt: -------------------------------------------------------------------------------- 1 | This is a VERY rough txt file I'm using to keep track of direction of the 2 | project. 3 | 4 | LEGEND 5 | ------ 6 | 7 | Immediate timeline - Changes planned as the next steps. 8 | Main projects - The large-scale changes planned for forseeable future. 9 | Features - Various smaller-scale changes that need to be implemented 10 | 11 | Current target: fully compile nmake.c from CS323 pset 12 | 13 | * - Explicitly required to reach current target 14 | 15 | -------------------------------------------------------------------------------- 16 | 17 | IMMEDIATE TIMELINE 18 | ------------------ 19 | - const 20 | - typedef 21 | - initializers (global, array/struct, string) 22 | - #define 23 | - functions (definition, struct as argument/return value, argc/argv) 24 | - sizeof 25 | - hex sequences in strings 26 | 27 | -------------------------------------------------------------------------------- 28 | 29 | MAIN PROJECTS 30 | ------------- 31 | 32 | -------------------------------------------------------------------------------- 33 | 34 | FEATURES 35 | -------- 36 | 37 | TYPES 38 | - Declaration initializers 39 | * - Global initializers 40 | * - Array/struct initializers 41 | * - String initializers 42 | - Full extern/static/global variable implementation 43 | - static globals, static locals, etc 44 | - Qualified types 45 | * - const 46 | - volatile 47 | - register 48 | * - Update headers to include these 49 | - Similar types 50 | - `long long` (distinct from `long`) 51 | - Proper ranking of integers, distinct from just size 52 | - Implement `double` and `float` 53 | * - Implement typedef 54 | 55 | PREPROCESSOR 56 | - Fix small bugs in current implementation 57 | * - #define 58 | * - static and function-form 59 | 60 | CONTROL STATEMENTS 61 | - do-while 62 | - switch 63 | 64 | FUNCTIONS 65 | * - Implement function definition 66 | * - Improve function declaration 67 | * - Struct as argument 68 | * - Struct as return value 69 | - Over 6 arguments 70 | * - Support argc and argv 71 | 72 | EXPRESSION 73 | - Bitwise (&, |, etc.) 74 | - Ternary 75 | * - sizeof 76 | - Casts 77 | - Hex/octal literal consts 78 | - Literal suffixed consts (`123ul`, etc) 79 | * - Hex sequences in strings (`\0`) 80 | 81 | OPTIMIZAIONS 82 | - Remove unnecessary lea before function calls 83 | - General dataflow analysis 84 | - Conditionals in if-statements 85 | - Simplify output code for chained [] or . or -> 86 | - Simplify repeated Set IL commands 87 | - Live range splitting in register allocator 88 | - Speed up register allocator 89 | 90 | GENERAL 91 | - Support error recovery 92 | - Within parser 93 | - Between stages 94 | (i.e. if lexer/parser are confident in their guess, try IL generation too) 95 | - Improved treatment of command line flags 96 | 97 | -------------------------------------------------------------------------------- 98 | -------------------------------------------------------------------------------- /hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in $(git diff --cached --name-only --diff-filter=ACM | grep -e '\.py$') 4 | do 5 | flake8 $file 6 | if [ $? != 0 ]; then 7 | echo "flake8 failed!" 8 | exit 1 9 | fi 10 | done 11 | exit 0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ShivyC installation script.""" 2 | 3 | from codecs import open 4 | from os import path 5 | 6 | from setuptools import find_packages, setup 7 | 8 | import shivyc 9 | 10 | f"ShivyC only supports Python 3.6 or later" # f-str is Syntax Err before Py3.6 11 | 12 | VERSION = str(shivyc.__version__) 13 | DOWNLOAD_URL = ('https://github.com/ShivamSarodia/ShivyC/archive/' 14 | f'{VERSION}.tar.gz') 15 | 16 | here = path.abspath(path.dirname(__file__)) 17 | 18 | # Get the long description from the README file 19 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 20 | long_description = f.read() 21 | 22 | setup( 23 | name='shivyc', 24 | version=VERSION, 25 | 26 | description='A C compiler written in Python', 27 | long_description=long_description, 28 | long_description_content_type='text/markdown', 29 | 30 | # The project's main homepage. 31 | url='https://github.com/ShivamSarodia/ShivyC', 32 | 33 | # Author details 34 | author='Shivam Sarodia', 35 | author_email='ssarodia@gmail.com', 36 | 37 | # Choose your license 38 | license='MIT', 39 | 40 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 41 | classifiers=[ 42 | 'Development Status :: 3 - Alpha', 43 | 'Environment :: Console', 44 | 'License :: OSI Approved :: MIT License', 45 | 'Programming Language :: Python :: 3', 46 | 'Programming Language :: Python :: 3.5', 47 | 'Programming Language :: Python :: 3.6', 48 | 'Programming Language :: C', 49 | 'Topic :: Software Development', 50 | 'Topic :: Software Development :: Compilers', 51 | 'Topic :: Software Development :: Code Generators', 52 | 'Topic :: Software Development :: Build Tools', 53 | 'Intended Audience :: Developers', 54 | 'Intended Audience :: Education', 55 | ], 56 | 57 | keywords='shivyc compiler c programming parsing', 58 | packages=find_packages(exclude=['tests']), 59 | install_requires=[], 60 | package_data={ 61 | 'shivyc': ['include/*.h'], 62 | }, 63 | 64 | entry_points={ 65 | 'console_scripts': [ 66 | 'shivyc=shivyc.main:main', 67 | ], 68 | }, 69 | 70 | download_url=DOWNLOAD_URL 71 | ) 72 | -------------------------------------------------------------------------------- /shivyc/__init__.py: -------------------------------------------------------------------------------- 1 | """This module implements the core ShivyC functionality.""" 2 | 3 | __version__ = "0.5.0" 4 | -------------------------------------------------------------------------------- /shivyc/asm_cmds.py: -------------------------------------------------------------------------------- 1 | """This module defines and implements classes representing assembly commands. 2 | 3 | The _ASMCommand object is the base class for most ASM commands. Some commands 4 | inherit from _ASMCommandMultiSize or _JumpCommand instead. 5 | 6 | """ 7 | 8 | 9 | class _ASMCommand: 10 | """Base class for a standard ASMCommand, like `add` or `imul`. 11 | 12 | This class is used for ASM commands which take arguments of the same 13 | size. 14 | """ 15 | 16 | name = None 17 | 18 | def __init__(self, dest=None, source=None, size=None): 19 | self.dest = dest.asm_str(size) if dest else None 20 | self.source = source.asm_str(size) if source else None 21 | self.size = size 22 | 23 | def __str__(self): 24 | s = "\t" + self.name 25 | if self.dest: 26 | s += " " + self.dest 27 | if self.source: 28 | s += ", " + self.source 29 | return s 30 | 31 | 32 | class _ASMCommandMultiSize: 33 | """Base class for an ASMCommand which takes arguments of different sizes. 34 | 35 | For example, `movsx` and `movzx`. 36 | """ 37 | 38 | name = None 39 | 40 | def __init__(self, dest, source, source_size, dest_size): 41 | self.dest = dest.asm_str(source_size) 42 | self.source = source.asm_str(dest_size) 43 | self.source_size = source_size 44 | self.dest_size = dest_size 45 | 46 | def __str__(self): 47 | s = "\t" + self.name 48 | if self.dest: 49 | s += " " + self.dest 50 | if self.source: 51 | s += ", " + self.source 52 | return s 53 | 54 | 55 | class _JumpCommand: 56 | """Base class for jump commands.""" 57 | 58 | name = None 59 | 60 | def __init__(self, target): 61 | self.target = target 62 | 63 | def __str__(self): 64 | s = "\t" + self.name + " " + self.target 65 | return s 66 | 67 | 68 | class Comment: 69 | """Class for comments.""" 70 | 71 | def __init__(self, msg): # noqa: D102 72 | self.msg = msg 73 | 74 | def __str__(self): # noqa: D102 75 | return "\t// " + self.msg 76 | 77 | 78 | class Label: 79 | """Class for label.""" 80 | 81 | def __init__(self, label): # noqa: D102 82 | self.label = label 83 | 84 | def __str__(self): # noqa: D102 85 | return self.label + ":" 86 | 87 | 88 | class Lea: 89 | """Class for lea command.""" 90 | 91 | name = "lea" 92 | 93 | def __init__(self, dest, source): # noqa: D102 94 | self.dest = dest 95 | self.source = source 96 | 97 | def __str__(self): # noqa: D102 98 | return ("\t" + self.name + " " + self.dest.asm_str(8) + ", " 99 | "" + self.source.asm_str(0)) 100 | 101 | 102 | class Je(_JumpCommand): name = "je" # noqa: D101 103 | 104 | 105 | class Jne(_JumpCommand): name = "jne" # noqa: D101 106 | 107 | 108 | class Jg(_JumpCommand): name = "jg" # noqa: D101 109 | 110 | 111 | class Jge(_JumpCommand): name = "jge" # noqa: D101 112 | 113 | 114 | class Jl(_JumpCommand): name = "jl" # noqa: D101 115 | 116 | 117 | class Jle(_JumpCommand): name = "jle" # noqa: D101 118 | 119 | 120 | class Ja(_JumpCommand): name = "ja" # noqa: D101 121 | 122 | 123 | class Jae(_JumpCommand): name = "jae" # noqa: D101 124 | 125 | 126 | class Jb(_JumpCommand): name = "jb" # noqa: D101 127 | 128 | 129 | class Jbe(_JumpCommand): name = "jbe" # noqa: D101 130 | 131 | 132 | class Jmp(_JumpCommand): name = "jmp" # noqa: D101 133 | 134 | 135 | class Movsx(_ASMCommandMultiSize): name = "movsx" # noqa: D101 136 | 137 | 138 | class Movzx(_ASMCommandMultiSize): name = "movzx" # noqa: D101 139 | 140 | 141 | class Mov(_ASMCommand): name = "mov" # noqa: D101 142 | 143 | 144 | class Add(_ASMCommand): name = "add" # noqa: D101 145 | 146 | 147 | class Sub(_ASMCommand): name = "sub" # noqa: D101 148 | 149 | 150 | class Neg(_ASMCommand): name = "neg" # noqa: D101 151 | 152 | 153 | class Not(_ASMCommand): name = "not" # noqa: D101 154 | 155 | 156 | class Div(_ASMCommand): name = "div" # noqa: D101 157 | 158 | 159 | class Imul(_ASMCommand): name = "imul" # noqa: D101 160 | 161 | 162 | class Idiv(_ASMCommand): name = "idiv" # noqa: D101 163 | 164 | 165 | class Cdq(_ASMCommand): name = "cdq" # noqa: D101 166 | 167 | 168 | class Cqo(_ASMCommand): name = "cqo" # noqa: D101 169 | 170 | 171 | class Xor(_ASMCommand): name = "xor" # noqa: D101 172 | 173 | 174 | class Cmp(_ASMCommand): name = "cmp" # noqa: D101 175 | 176 | 177 | class Pop(_ASMCommand): name = "pop" # noqa: D101 178 | 179 | 180 | class Push(_ASMCommand): name = "push" # noqa: D101 181 | 182 | 183 | class Call(_ASMCommand): name = "call" # noqa: D101 184 | 185 | 186 | class Ret(_ASMCommand): name = "ret" # noqa: D101 187 | 188 | 189 | class Sar(_ASMCommandMultiSize): name = "sar" # noqa: D101 190 | 191 | 192 | class Sal(_ASMCommandMultiSize): name = "sal" # noqa: D101 193 | -------------------------------------------------------------------------------- /shivyc/errors.py: -------------------------------------------------------------------------------- 1 | """Objects used for error reporting. 2 | 3 | The main executable catches an exception and prints it for the user. 4 | 5 | """ 6 | 7 | 8 | class ErrorCollector: 9 | """Class that accumulates all errors and warnings encountered. 10 | 11 | We create a global instance of this class so all parts of the compiler can 12 | access it and add errors to it. This is kind of janky, but it's much easier 13 | than passing an instance to every function that could potentially fail. 14 | 15 | """ 16 | 17 | def __init__(self): 18 | """Initialize the ErrorCollector with no issues to report.""" 19 | self.issues = [] 20 | 21 | def add(self, issue): 22 | """Add the given error or warning (CompilerError) to list of errors.""" 23 | self.issues.append(issue) 24 | self.issues.sort() 25 | 26 | def ok(self): 27 | """Return True iff there are no errors.""" 28 | return not any(not issue.warning for issue in self.issues) 29 | 30 | def show(self): # pragma: no cover 31 | """Display all warnings and errors.""" 32 | for issue in self.issues: 33 | print(issue) 34 | 35 | def clear(self): 36 | """Clear all warnings and errors. Intended only for testing use.""" 37 | self.issues = [] 38 | 39 | 40 | error_collector = ErrorCollector() 41 | 42 | 43 | class Position: 44 | """Class representing a position in source code. 45 | 46 | file (str) - Name of file in which this position is located. 47 | line (int) - Line number in file at which this position is located. 48 | col (int) - Horizontal column at which this position is located 49 | full_line (str) - Full text of the line containing this position. 50 | Specifically, full_line[col + 1] should be this position. 51 | """ 52 | 53 | def __init__(self, file, line, col, full_line): 54 | """Initialize Position object.""" 55 | self.file = file 56 | self.line = line 57 | self.col = col 58 | self.full_line = full_line 59 | 60 | def __add__(self, other): 61 | """Increment Position column by one.""" 62 | return Position(self.file, self.line, self.col + 1, self.full_line) 63 | 64 | 65 | class Range: 66 | """Class representing a continuous range between two positions. 67 | 68 | start (Position) - start position, inclusive 69 | end (Position) - end position, inclusive 70 | """ 71 | 72 | def __init__(self, start, end=None): 73 | """Initialize Range objects.""" 74 | self.start = start 75 | self.end = end or start 76 | 77 | def __add__(self, other): 78 | """Add Range objects by concatenating their ranges.""" 79 | return Range(self.start, other.end) 80 | 81 | 82 | class CompilerError(Exception): 83 | """Class representing compile-time errors. 84 | 85 | message (str) - User-friendly explanation of the error. Should 86 | begin with a lowercase letter. 87 | file_name (str) - File name in which the error occurred. 88 | line_number (int) - Line number on which the error occurred 89 | 90 | """ 91 | 92 | def __init__(self, descrip, range=None, warning=False): 93 | """Initialize error. 94 | 95 | descrip (str) - Description of the error. 96 | range (Range) - Range at which the error appears. 97 | warning (bool) - True if this is a warning 98 | 99 | """ 100 | self.descrip = descrip 101 | self.range = range 102 | self.warning = warning 103 | 104 | def __str__(self): # pragma: no cover 105 | """Return a pretty-printable statement of the error. 106 | 107 | Also includes the line on which the error occurred. 108 | """ 109 | error_color = "\x1B[31m" 110 | warn_color = "\x1B[33m" 111 | reset_color = "\x1B[0m" 112 | bold_color = "\033[1m" 113 | 114 | color_code = warn_color if self.warning else error_color 115 | issue_type = "warning" if self.warning else "error" 116 | 117 | # A position range is provided, and this is output to terminal. 118 | if self.range: 119 | 120 | # Set "indicator" to display the ^^^s and ---s to indicate the 121 | # error location. 122 | indicator = warn_color 123 | indicator += " " * (self.range.start.col - 1) 124 | 125 | if (self.range.start.line == self.range.end.line and 126 | self.range.start.file == self.range.end.file): 127 | 128 | if self.range.end.col == self.range.start.col: 129 | indicator += "^" 130 | else: 131 | indicator += "-" * (self.range.end.col - 132 | self.range.start.col + 1) 133 | 134 | else: 135 | indicator += "-" * (len(self.range.start.full_line) - 136 | self.range.start.col + 1) 137 | 138 | indicator += reset_color 139 | return (f"{bold_color}{self.range.start.file}:" 140 | f"{self.range.start.line}:{self.range.start.col}: " 141 | f"{color_code}{issue_type}:{reset_color} {self.descrip}\n" 142 | f" {self.range.start.full_line}\n" 143 | f" {indicator}") 144 | # A position range is not provided and this is output to terminal. 145 | else: 146 | return (f"{bold_color}shivyc: {color_code}{issue_type}:" 147 | f"{reset_color} {self.descrip}") 148 | 149 | def __lt__(self, other): # pragma: no cover 150 | """Provides sort order for printing errors.""" 151 | 152 | # everything without a range comes before everything with range 153 | if not self.range: 154 | return bool(other.range) 155 | 156 | # no opinion between errors in different files 157 | if self.range.start.file != other.range.start.file: 158 | return False 159 | 160 | this_tuple = self.range.start.line, self.range.start.col 161 | other_tuple = other.range.start.line, other.range.start.col 162 | return this_tuple < other_tuple 163 | -------------------------------------------------------------------------------- /shivyc/il_cmds/__init__.py: -------------------------------------------------------------------------------- 1 | """Package for IL commands.""" 2 | -------------------------------------------------------------------------------- /shivyc/il_cmds/base.py: -------------------------------------------------------------------------------- 1 | """Base ILCommand interface definition.""" 2 | 3 | import shivyc.ctypes as ctypes 4 | from shivyc.spots import LiteralSpot 5 | 6 | 7 | class ILCommand: 8 | """Base interface for all IL commands.""" 9 | 10 | def inputs(self): 11 | """Return list of ILValues used as input for this command.""" 12 | raise NotImplementedError 13 | 14 | def outputs(self): 15 | """Return list of values output by this command. 16 | 17 | No command executed after this one should rely on the previous value of 18 | any ILValue in the list returned here. ("Previous value" denotes the 19 | value of the ILValue before this command was executed.) 20 | """ 21 | raise NotImplementedError 22 | 23 | def clobber(self): 24 | """Return list of Spots this command may clobber, other than outputs. 25 | 26 | Every Spot this command may change the value at (not including 27 | the Spots of the outputs returned above) must be included in the 28 | return list of this function. For example, signed division clobbers 29 | RAX and RDX. 30 | """ 31 | return [] 32 | 33 | def rel_spot_conf(self): 34 | """Return the relative conflict list of this command. 35 | 36 | This function returns a dictionary mapping an ILValue to a list of 37 | ILValues. If this contains a key value pair k: [t1, t2], then the 38 | register allocator will attempt to place ILValue k in a different spot 39 | than t1 and t2. It is assumed by default that the inputs do 40 | not share the same spot. 41 | """ 42 | return {} 43 | 44 | def abs_spot_conf(self): 45 | """Return the absolute conflict list of this command. 46 | 47 | This function returns a dictionary mapping an ILValue to a list of 48 | spots. If this contains a key value pair k: [s1, s2], then the 49 | register allocator will attempt to place ILValue k in a spot which 50 | is not s1 or s2. 51 | """ 52 | return {} 53 | 54 | def rel_spot_pref(self): 55 | """Return the relative spot preference list (RSPL) for this command. 56 | 57 | A RSPL is a dictionary mapping an ILValue to a list of ILValues. For 58 | each key k in the RSPL, the register allocator will attempt to place k 59 | in the same spot as an ILValue in RSPL[k] is placed. RSPL[k] is 60 | ordered by preference; that is, the register allocator will 61 | first attempt to place k in the same spot as RSPL[k][0], then the 62 | same spot as RSPL[k][1], etc. 63 | """ 64 | return {} 65 | 66 | def abs_spot_pref(self): 67 | """Return the absolute spot preference list (ASPL) for this command. 68 | 69 | An ASPL is a dictionary mapping an ILValue to a list of Spots. For 70 | each key k in the ASPL, the register allocator will attempt to place k 71 | in one of the spots listed in ASPL[k]. ASPL[k] is ordered by 72 | preference; that is, the register allocator will first attempt to 73 | place k in ASPL[k][0], then in ASPL[k][1], etc. 74 | """ 75 | return {} 76 | 77 | def references(self): 78 | """Return the potential reference list (PRL) for this command. 79 | 80 | The PRL is a dictionary mapping an ILValue to a list of ILValues. 81 | If this command may directly set some ILValue k to be a pointer to 82 | other ILValue(s) v1, v2, etc., then PRL[k] must include v1, v2, 83 | etc. That is, suppose the PRL was {t1: [t2]}. This means that 84 | ILValue t1 output from this command may be a pointer to the ILValue t2. 85 | 86 | In addition, the PRL may have a None key. The value of this key is a 87 | list of ILValue which are being internally referenced, but no 88 | pointers to them are being externally returned. 89 | """ 90 | return {} 91 | 92 | def indir_write(self): 93 | """Return list of values that may be dereferenced for indirect write. 94 | 95 | For example, suppose this list is [t1, t2]. Then, this command may 96 | be changing the value of the ILValue pointed to by t1 or the value 97 | of the ILValue pointed to by t2. 98 | """ 99 | return [] 100 | 101 | def indir_read(self): 102 | """Return list of values that may be dereferenced for indirect read. 103 | 104 | For example, suppose this list is [t1, t2]. Then, this command may 105 | be reading the value of the ILValue pointed to by t1 or the value of 106 | the ILValue pointed to by t2. 107 | """ 108 | return [] 109 | 110 | def label_name(self): 111 | """If this command is a label, return its name.""" 112 | return None 113 | 114 | def targets(self): 115 | """Return list of any labels to which this command may jump.""" 116 | return [] 117 | 118 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): 119 | """Generate assembly code for this command. 120 | 121 | spotmap - Dictionary mapping every input and output ILValue to a spot. 122 | 123 | home_spots - Dictionary mapping every ILValue that appears in any of 124 | self.references().values() to a memory spot. This is used for 125 | commands which need the address of an ILValue. 126 | 127 | get_reg - Function to get a usable register. Accepts two arguments, 128 | first is a list of Spot preferences, and second is a list of 129 | unacceptable spots. This function returns a register which is not 130 | in the list of unacceptable spots and can be clobbered. Note this 131 | could be one of the registers the input is stored in, if the input 132 | ILValues are not being used after this command executes. 133 | 134 | asm_code - ASMCode object to add code to 135 | """ 136 | raise NotImplementedError 137 | 138 | def _is_imm(self, spot): 139 | """Return True iff given spot is an immediate operand.""" 140 | return isinstance(spot, LiteralSpot) 141 | 142 | def _is_imm8(self, spot): 143 | """Return True if given spot is a 8-bit immediate operand.""" 144 | return self._is_imm(spot) and int(spot.detail) < ctypes.unsig_char_max 145 | 146 | def _is_imm64(self, spot): 147 | """Return True if given spot is a 64-bit immediate operand.""" 148 | return (isinstance(spot, LiteralSpot) and 149 | (int(spot.detail) > ctypes.int_max or 150 | int(spot.detail) < ctypes.int_min)) 151 | -------------------------------------------------------------------------------- /shivyc/il_cmds/compare.py: -------------------------------------------------------------------------------- 1 | """IL commands for comparisons.""" 2 | 3 | import shivyc.asm_cmds as asm_cmds 4 | from shivyc.il_cmds.base import ILCommand 5 | from shivyc.spots import MemSpot, LiteralSpot 6 | 7 | 8 | class _GeneralCmp(ILCommand): 9 | """_GeneralCmp - base class for the comparison commands. 10 | 11 | IL value output must have int type. arg1, arg2 must have types that can 12 | be compared for equality bit-by-bit. No type conversion or promotion is 13 | done here. 14 | 15 | """ 16 | signed_cmp_cmd = None 17 | unsigned_cmp_cmd = None 18 | 19 | def __init__(self, output, arg1, arg2): # noqa D102 20 | self.output = output 21 | self.arg1 = arg1 22 | self.arg2 = arg2 23 | 24 | def inputs(self): # noqa D102 25 | return [self.arg1, self.arg2] 26 | 27 | def outputs(self): # noqa D102 28 | return [self.output] 29 | 30 | def rel_spot_conf(self): # noqa D102 31 | return {self.output: [self.arg1, self.arg2]} 32 | 33 | def _fix_both_literal_or_mem(self, arg1_spot, arg2_spot, regs, 34 | get_reg, asm_code): 35 | """Fix arguments if both are literal or memory. 36 | 37 | Adds any called registers to given regs list. Returns tuple where 38 | first element is new spot of arg1 and second element is new spot of 39 | arg2. 40 | """ 41 | if ((isinstance(arg1_spot, LiteralSpot) and 42 | isinstance(arg2_spot, LiteralSpot)) or 43 | (isinstance(arg1_spot, MemSpot) and 44 | isinstance(arg2_spot, MemSpot))): 45 | 46 | # No need to worry about r overlapping with arg1 or arg2 because 47 | # in this case both are literal/memory. 48 | r = get_reg([], regs) 49 | regs.append(r) 50 | asm_code.add(asm_cmds.Mov(r, arg1_spot, self.arg1.ctype.size)) 51 | return r, arg2_spot 52 | else: 53 | return arg1_spot, arg2_spot 54 | 55 | def _fix_either_literal64(self, arg1_spot, arg2_spot, regs, 56 | get_reg, asm_code): 57 | """Move any 64-bit immediate operands to register.""" 58 | 59 | if self._is_imm64(arg1_spot): 60 | size = self.arg1.ctype.size 61 | new_arg1_spot = get_reg([], regs + [arg2_spot]) 62 | asm_code.add(asm_cmds.Mov(new_arg1_spot, arg1_spot, size)) 63 | return new_arg1_spot, arg2_spot 64 | 65 | # We cannot have both cases because _fix_both_literal is called 66 | # before this. 67 | elif self._is_imm64(arg2_spot): 68 | size = self.arg2.ctype.size 69 | new_arg2_spot = get_reg([], regs + [arg1_spot]) 70 | asm_code.add(asm_cmds.Mov(new_arg2_spot, arg2_spot, size)) 71 | return arg1_spot, new_arg2_spot 72 | else: 73 | return arg1_spot, arg2_spot 74 | 75 | def _fix_literal_wrong_order(self, arg1_spot, arg2_spot): 76 | """If the first operand is a literal, swap the operands.""" 77 | if self._is_imm(arg1_spot): 78 | return arg2_spot, arg1_spot 79 | else: 80 | return arg1_spot, arg2_spot 81 | 82 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 83 | regs = [] 84 | 85 | result = get_reg([spotmap[self.output]], 86 | [spotmap[self.arg1], spotmap[self.arg2]]) 87 | regs.append(result) 88 | 89 | out_size = self.output.ctype.size 90 | eq_val_spot = LiteralSpot(1) 91 | asm_code.add(asm_cmds.Mov(result, eq_val_spot, out_size)) 92 | 93 | arg1_spot, arg2_spot = self._fix_both_literal_or_mem( 94 | spotmap[self.arg1], spotmap[self.arg2], regs, get_reg, asm_code) 95 | arg1_spot, arg2_spot = self._fix_either_literal64( 96 | arg1_spot, arg2_spot, regs, get_reg, asm_code) 97 | arg1_spot, arg2_spot = self._fix_literal_wrong_order( 98 | arg1_spot, arg2_spot) 99 | 100 | arg_size = self.arg1.ctype.size 101 | neq_val_spot = LiteralSpot(0) 102 | label = asm_code.get_label() 103 | 104 | asm_code.add(asm_cmds.Cmp(arg1_spot, arg2_spot, arg_size)) 105 | asm_code.add(self.cmp_command()(label)) 106 | asm_code.add(asm_cmds.Mov(result, neq_val_spot, out_size)) 107 | asm_code.add(asm_cmds.Label(label)) 108 | 109 | if result != spotmap[self.output]: 110 | asm_code.add(asm_cmds.Mov(spotmap[self.output], result, out_size)) 111 | 112 | def cmp_command(self): 113 | ctype = self.arg1.ctype 114 | if ctype.is_pointer() or (ctype.is_integral() and not ctype.signed): 115 | return self.unsigned_cmp_cmd 116 | else: 117 | return self.signed_cmp_cmd 118 | 119 | 120 | class NotEqualCmp(_GeneralCmp): 121 | """NotEqualCmp - checks whether arg1 and arg2 are not equal. 122 | 123 | IL value output must have int type. arg1, arg2 must all have the same 124 | type. No type conversion or promotion is done here. 125 | 126 | """ 127 | signed_cmp_cmd = asm_cmds.Jne 128 | unsigned_cmp_cmd = asm_cmds.Jne 129 | 130 | 131 | class EqualCmp(_GeneralCmp): 132 | """EqualCmp - checks whether arg1 and arg2 are equal. 133 | 134 | IL value output must have int type. arg1, arg2 must all have the same 135 | type. No type conversion or promotion is done here. 136 | 137 | """ 138 | signed_cmp_cmd = asm_cmds.Je 139 | unsigned_cmp_cmd = asm_cmds.Je 140 | 141 | 142 | class LessCmp(_GeneralCmp): 143 | signed_cmp_cmd = asm_cmds.Jl 144 | unsigned_cmp_cmd = asm_cmds.Jb 145 | 146 | 147 | class GreaterCmp(_GeneralCmp): 148 | signed_cmp_cmd = asm_cmds.Jg 149 | unsigned_cmp_cmd = asm_cmds.Ja 150 | 151 | 152 | class LessOrEqCmp(_GeneralCmp): 153 | signed_cmp_cmd = asm_cmds.Jle 154 | unsigned_cmp_cmd = asm_cmds.Jbe 155 | 156 | 157 | class GreaterOrEqCmp(_GeneralCmp): 158 | signed_cmp_cmd = asm_cmds.Jge 159 | unsigned_cmp_cmd = asm_cmds.Jae 160 | -------------------------------------------------------------------------------- /shivyc/il_cmds/control.py: -------------------------------------------------------------------------------- 1 | """IL commands for labels, jumps, and function calls.""" 2 | 3 | import shivyc.asm_cmds as asm_cmds 4 | import shivyc.spots as spots 5 | from shivyc.il_cmds.base import ILCommand 6 | from shivyc.spots import LiteralSpot 7 | 8 | 9 | class Label(ILCommand): 10 | """Label - Analogous to an ASM label.""" 11 | 12 | def __init__(self, label): # noqa D102 13 | """The label argument is an string label name unique to this label.""" 14 | self.label = label 15 | 16 | def inputs(self): # noqa D102 17 | return [] 18 | 19 | def outputs(self): # noqa D102 20 | return [] 21 | 22 | def label_name(self): # noqa D102 23 | return self.label 24 | 25 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 26 | asm_code.add(asm_cmds.Label(self.label)) 27 | 28 | 29 | class Jump(ILCommand): 30 | """Jumps unconditionally to a label.""" 31 | 32 | def __init__(self, label): # noqa D102 33 | self.label = label 34 | 35 | def inputs(self): # noqa D102 36 | return [] 37 | 38 | def outputs(self): # noqa D102 39 | return [] 40 | 41 | def targets(self): # noqa D102 42 | return [self.label] 43 | 44 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 45 | asm_code.add(asm_cmds.Jmp(self.label)) 46 | 47 | 48 | class _GeneralJump(ILCommand): 49 | """General class for jumping to a label based on condition.""" 50 | 51 | # ASM command to output for this jump IL command. 52 | # (asm_cmds.Je for JumpZero and asm_cmds.Jne for JumpNotZero) 53 | asm_cmd = None 54 | 55 | def __init__(self, cond, label): # noqa D102 56 | self.cond = cond 57 | self.label = label 58 | 59 | def inputs(self): # noqa D102 60 | return [self.cond] 61 | 62 | def outputs(self): # noqa D102 63 | return [] 64 | 65 | def targets(self): # noqa D102 66 | return [self.label] 67 | 68 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 69 | size = self.cond.ctype.size 70 | 71 | if isinstance(spotmap[self.cond], LiteralSpot): 72 | r = get_reg() 73 | asm_code.add(asm_cmds.Mov(r, spotmap[self.cond], size)) 74 | cond_spot = r 75 | else: 76 | cond_spot = spotmap[self.cond] 77 | 78 | zero_spot = LiteralSpot("0") 79 | asm_code.add(asm_cmds.Cmp(cond_spot, zero_spot, size)) 80 | asm_code.add(self.command(self.label)) 81 | 82 | 83 | class JumpZero(_GeneralJump): 84 | """Jumps to a label if given condition is zero.""" 85 | 86 | command = asm_cmds.Je 87 | 88 | 89 | class JumpNotZero(_GeneralJump): 90 | """Jumps to a label if given condition is zero.""" 91 | 92 | command = asm_cmds.Jne 93 | 94 | 95 | class Return(ILCommand): 96 | """RETURN - returns the given value from function. 97 | 98 | If arg is None, then returns from the function without putting any value 99 | in the return register. Today, only supports values that fit in one 100 | register. 101 | """ 102 | 103 | def __init__(self, arg=None): # noqa D102 104 | # arg must already be cast to return type 105 | self.arg = arg 106 | 107 | def inputs(self): # noqa D102 108 | return [self.arg] 109 | 110 | def outputs(self): # noqa D102 111 | return [] 112 | 113 | def clobber(self): # noqa D102 114 | return [spots.RAX] 115 | 116 | def abs_spot_pref(self): # noqa D102 117 | return {self.arg: [spots.RAX]} 118 | 119 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 120 | if self.arg and spotmap[self.arg] != spots.RAX: 121 | size = self.arg.ctype.size 122 | asm_code.add(asm_cmds.Mov(spots.RAX, spotmap[self.arg], size)) 123 | 124 | asm_code.add(asm_cmds.Mov(spots.RSP, spots.RBP, 8)) 125 | asm_code.add(asm_cmds.Pop(spots.RBP, None, 8)) 126 | asm_code.add(asm_cmds.Ret()) 127 | 128 | 129 | class Call(ILCommand): 130 | """Call a given function. 131 | 132 | func - Pointer to function 133 | args - Arguments of the function, in left-to-right order. Must match the 134 | parameter types the function expects. 135 | ret - If function has non-void return type, IL value to save the return 136 | value. Its type must match the function return value. 137 | """ 138 | 139 | arg_regs = [spots.RDI, spots.RSI, spots.RDX, spots.RCX, spots.R8, spots.R9] 140 | 141 | def __init__(self, func, args, ret): # noqa D102 142 | self.func = func 143 | self.args = args 144 | self.ret = ret 145 | self.void_return = self.func.ctype.arg.ret.is_void() 146 | 147 | if len(self.args) > len(self.arg_regs): 148 | raise NotImplementedError("too many arguments") 149 | 150 | def inputs(self): # noqa D102 151 | return [self.func] + self.args 152 | 153 | def outputs(self): # noqa D102 154 | return [] if self.void_return else [self.ret] 155 | 156 | def clobber(self): # noqa D102 157 | # All caller-saved registers are clobbered by function call 158 | return [spots.RAX, spots.RCX, spots.RDX, spots.RSI, spots.RDI, 159 | spots.R8, spots.R9, spots.R10, spots.R11] 160 | 161 | def abs_spot_pref(self): # noqa D102 162 | prefs = {} if self.void_return else {self.ret: [spots.RAX]} 163 | for arg, reg in zip(self.args, self.arg_regs): 164 | prefs[arg] = [reg] 165 | 166 | return prefs 167 | 168 | def abs_spot_conf(self): # noqa D102 169 | # We don't want the function pointer to be in the same register as 170 | # an argument will be placed into. 171 | return {self.func: self.arg_regs[0:len(self.args)]} 172 | 173 | def indir_write(self): # noqa D102 174 | return self.args 175 | 176 | def indir_read(self): # noqa D102 177 | return self.args 178 | 179 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 180 | func_spot = spotmap[self.func] 181 | 182 | func_size = self.func.ctype.size 183 | ret_size = self.func.ctype.arg.ret.size 184 | 185 | # Check if function pointer spot will be clobbered by moving the 186 | # arguments into the correct registers. 187 | if spotmap[self.func] in self.arg_regs[0:len(self.args)]: 188 | # Get a register which isn't one of the unallowed registers. 189 | r = get_reg([], self.arg_regs[0:len(self.args)]) 190 | asm_code.add(asm_cmds.Mov(r, spotmap[self.func], func_size)) 191 | func_spot = r 192 | 193 | for arg, reg in zip(self.args, self.arg_regs): 194 | if spotmap[arg] == reg: 195 | continue 196 | asm_code.add(asm_cmds.Mov(reg, spotmap[arg], arg.ctype.size)) 197 | 198 | asm_code.add(asm_cmds.Call(func_spot, None, self.func.ctype.size)) 199 | 200 | if not self.void_return and spotmap[self.ret] != spots.RAX: 201 | asm_code.add(asm_cmds.Mov(spotmap[self.ret], spots.RAX, ret_size)) 202 | -------------------------------------------------------------------------------- /shivyc/il_cmds/math.py: -------------------------------------------------------------------------------- 1 | """IL commands for mathematical operations.""" 2 | 3 | import shivyc.asm_cmds as asm_cmds 4 | import shivyc.spots as spots 5 | from shivyc.il_cmds.base import ILCommand 6 | 7 | 8 | class _AddMult(ILCommand): 9 | """Base class for ADD, MULT, and SUB.""" 10 | 11 | # Indicates whether this instruction is commutative. If not, 12 | # a "neg" instruction is inserted when the order is flipped. Override 13 | # this value in subclasses. 14 | comm = False 15 | 16 | # The ASM instruction to generate for this command. Override this value 17 | # in subclasses. 18 | Inst = None 19 | 20 | def __init__(self, output, arg1, arg2): # noqa D102 21 | self.output = output 22 | self.arg1 = arg1 23 | self.arg2 = arg2 24 | 25 | def inputs(self): # noqa D102 26 | return [self.arg1, self.arg2] 27 | 28 | def outputs(self): # noqa D102 29 | return [self.output] 30 | 31 | def rel_spot_pref(self): # noqa D102 32 | return {self.output: [self.arg1, self.arg2]} 33 | 34 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 35 | """Make the ASM for ADD, MULT, and SUB.""" 36 | ctype = self.arg1.ctype 37 | size = ctype.size 38 | 39 | arg1_spot = spotmap[self.arg1] 40 | arg2_spot = spotmap[self.arg2] 41 | 42 | # Get temp register for computation. 43 | temp = get_reg([spotmap[self.output], 44 | arg1_spot, 45 | arg2_spot]) 46 | 47 | if temp == arg1_spot: 48 | if not self._is_imm64(arg2_spot): 49 | asm_code.add(self.Inst(temp, arg2_spot, size)) 50 | else: 51 | temp2 = get_reg([], [temp]) 52 | asm_code.add(asm_cmds.Mov(temp2, arg2_spot, size)) 53 | asm_code.add(self.Inst(temp, temp2, size)) 54 | elif temp == arg2_spot: 55 | if not self._is_imm64(arg1_spot): 56 | asm_code.add(self.Inst(temp, arg1_spot, size)) 57 | else: 58 | temp2 = get_reg([], [temp]) 59 | asm_code.add(asm_cmds.Mov(temp2, arg1_spot, size)) 60 | asm_code.add(self.Inst(temp, temp2, size)) 61 | 62 | if not self.comm: 63 | asm_code.add(asm_cmds.Neg(temp, None, size)) 64 | 65 | else: 66 | if (not self._is_imm64(arg1_spot) and 67 | not self._is_imm64(arg2_spot)): 68 | asm_code.add(asm_cmds.Mov(temp, arg1_spot, size)) 69 | asm_code.add(self.Inst(temp, arg2_spot, size)) 70 | elif (self._is_imm64(arg1_spot) and 71 | not self._is_imm64(arg2_spot)): 72 | asm_code.add(asm_cmds.Mov(temp, arg1_spot, size)) 73 | asm_code.add(self.Inst(temp, arg2_spot, size)) 74 | elif (not self._is_imm64(arg1_spot) and 75 | self._is_imm64(arg2_spot)): 76 | asm_code.add(asm_cmds.Mov(temp, arg2_spot, size)) 77 | asm_code.add(self.Inst(temp, arg1_spot, size)) 78 | if not self.comm: 79 | asm_code.add(asm_cmds.Neg(temp, None, size)) 80 | 81 | else: # both are imm64 82 | raise NotImplementedError( 83 | "never reach because of constant folding") 84 | 85 | if temp != spotmap[self.output]: 86 | asm_code.add(asm_cmds.Mov(spotmap[self.output], temp, size)) 87 | 88 | 89 | class Add(_AddMult): 90 | """Adds arg1 and arg2, then saves to output. 91 | 92 | IL values output, arg1, arg2 must all have the same type. No type 93 | conversion or promotion is done here. 94 | """ 95 | comm = True 96 | Inst = asm_cmds.Add 97 | 98 | 99 | class Subtr(_AddMult): 100 | """Subtracts arg1 and arg2, then saves to output. 101 | 102 | ILValues output, arg1, and arg2 must all have types of the same size. 103 | """ 104 | comm = False 105 | Inst = asm_cmds.Sub 106 | 107 | 108 | class Mult(_AddMult): 109 | """Multiplies arg1 and arg2, then saves to output. 110 | 111 | IL values output, arg1, arg2 must all have the same type. No type 112 | conversion or promotion is done here. 113 | """ 114 | comm = True 115 | Inst = asm_cmds.Imul 116 | 117 | 118 | class _BitShiftCmd(ILCommand): 119 | """Base class for bitwise shift commands.""" 120 | 121 | # The ASM instruction to generate for this command. Override this value 122 | # in subclasses. 123 | Inst = None 124 | 125 | def __init__(self, output, arg1, arg2): # noqa D102 126 | self.output = output 127 | self.arg1 = arg1 128 | self.arg2 = arg2 129 | 130 | def inputs(self): # noqa D102 131 | return [self.arg1, self.arg2] 132 | 133 | def outputs(self): # noqa D102 134 | return [self.output] 135 | 136 | def clobber(self): # noqa D102 137 | return [spots.RCX] 138 | 139 | def abs_spot_pref(self): # noqa D102 140 | return {self.arg2: [spots.RCX]} 141 | 142 | def rel_spot_pref(self): # noqa D102 143 | return {self.output: [self.arg1]} 144 | 145 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 146 | arg1_spot = spotmap[self.arg1] 147 | arg1_size = self.arg1.ctype.size 148 | arg2_spot = spotmap[self.arg2] 149 | arg2_size = self.arg2.ctype.size 150 | 151 | # According Intel® 64 and IA-32 software developer's manual 152 | # Vol. 2B 4-582 second (count) operand must be represented as 153 | # imm8 or CL register. 154 | if not self._is_imm8(arg2_spot) and arg2_spot != spots.RCX: 155 | if arg1_spot == spots.RCX: 156 | out_spot = spotmap[self.output] 157 | temp_spot = get_reg([out_spot, arg1_spot], 158 | [arg2_spot, spots.RCX]) 159 | asm_code.add(asm_cmds.Mov(temp_spot, arg1_spot, arg1_size)) 160 | arg1_spot = temp_spot 161 | asm_code.add(asm_cmds.Mov(spots.RCX, arg2_spot, arg2_size)) 162 | arg2_spot = spots.RCX 163 | 164 | if spotmap[self.output] == arg1_spot: 165 | asm_code.add(self.Inst(arg1_spot, arg2_spot, arg1_size, 1)) 166 | else: 167 | out_spot = spotmap[self.output] 168 | temp_spot = get_reg([out_spot, arg1_spot], [arg2_spot]) 169 | if arg1_spot != temp_spot: 170 | asm_code.add(asm_cmds.Mov(temp_spot, arg1_spot, arg1_size)) 171 | asm_code.add(self.Inst(temp_spot, arg2_spot, arg1_size, 1)) 172 | if temp_spot != out_spot: 173 | asm_code.add(asm_cmds.Mov(out_spot, temp_spot, arg1_size)) 174 | 175 | 176 | class RBitShift(_BitShiftCmd): 177 | """Right bitwise shift operator for IL value. 178 | Shifts each bit in IL value left operand to the right by position 179 | indicated by right operand.""" 180 | 181 | Inst = asm_cmds.Sar 182 | 183 | 184 | class LBitShift(_BitShiftCmd): 185 | """Left bitwise shift operator for IL value. 186 | Shifts each bit in IL value left operand to the left by position 187 | indicated by right operand.""" 188 | 189 | Inst = asm_cmds.Sal 190 | 191 | 192 | class _DivMod(ILCommand): 193 | """Base class for ILCommand Div and Mod.""" 194 | 195 | # Register which contains the value we want after the x86 div or idiv 196 | # command is executed. For the Div IL command, this is spots.RAX, 197 | # and for the Mod IL command, this is spots.RDX. 198 | return_reg = None 199 | 200 | def __init__(self, output, arg1, arg2): 201 | self.output = output 202 | self.arg1 = arg1 203 | self.arg2 = arg2 204 | 205 | def inputs(self): # noqa D102 206 | return [self.arg1, self.arg2] 207 | 208 | def outputs(self): # noqa D102 209 | return [self.output] 210 | 211 | def clobber(self): # noqa D102 212 | return [spots.RAX, spots.RDX] 213 | 214 | def abs_spot_conf(self): # noqa D102 215 | return {self.arg2: [spots.RDX, spots.RAX]} 216 | 217 | def abs_spot_pref(self): # noqa D102 218 | return {self.output: [self.return_reg], 219 | self.arg1: [spots.RAX]} 220 | 221 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 222 | ctype = self.arg1.ctype 223 | size = ctype.size 224 | 225 | output_spot = spotmap[self.output] 226 | arg1_spot = spotmap[self.arg1] 227 | arg2_spot = spotmap[self.arg2] 228 | 229 | # Move first operand into RAX if we can do so without clobbering 230 | # other argument 231 | moved_to_rax = False 232 | if spotmap[self.arg1] != spots.RAX and spotmap[self.arg2] != spots.RAX: 233 | moved_to_rax = True 234 | asm_code.add(asm_cmds.Mov(spots.RAX, arg1_spot, size)) 235 | 236 | # If the divisor is a literal or in a bad register, we must move it 237 | # to a register. 238 | if (self._is_imm(spotmap[self.arg2]) or 239 | spotmap[self.arg2] in [spots.RAX, spots.RDX]): 240 | r = get_reg([], [spots.RAX, spots.RDX]) 241 | asm_code.add(asm_cmds.Mov(r, arg2_spot, size)) 242 | arg2_final_spot = r 243 | else: 244 | arg2_final_spot = arg2_spot 245 | 246 | # If we did not move to RAX above, do so here. 247 | if not moved_to_rax and arg1_spot != self.return_reg: 248 | asm_code.add(asm_cmds.Mov(spots.RAX, arg1_spot, size)) 249 | 250 | if ctype.signed: 251 | if ctype.size == 4: 252 | asm_code.add(asm_cmds.Cdq()) 253 | elif ctype.size == 8: 254 | asm_code.add(asm_cmds.Cqo()) 255 | asm_code.add(asm_cmds.Idiv(arg2_final_spot, None, size)) 256 | else: 257 | # zero out RDX register 258 | asm_code.add(asm_cmds.Xor(spots.RDX, spots.RDX, size)) 259 | asm_code.add(asm_cmds.Div(arg2_final_spot, None, size)) 260 | 261 | if spotmap[self.output] != self.return_reg: 262 | asm_code.add(asm_cmds.Mov(output_spot, self.return_reg, size)) 263 | 264 | 265 | class Div(_DivMod): 266 | """Divides given IL values. 267 | 268 | IL values output, arg1, arg2 must all have the same type of size at least 269 | int. No type conversion or promotion is done here. 270 | 271 | """ 272 | 273 | return_reg = spots.RAX 274 | 275 | 276 | class Mod(_DivMod): 277 | """Divides given IL values. 278 | 279 | IL values output, arg1, arg2 must all have the same type of size at least 280 | int. No type conversion or promotion is done here. 281 | 282 | """ 283 | 284 | return_reg = spots.RDX 285 | 286 | 287 | class _NegNot(ILCommand): 288 | """Base class for NEG and NOT.""" 289 | 290 | # The ASM instruction to generate for this command. Override this value 291 | # in subclasses. 292 | Inst = None 293 | 294 | def __init__(self, output, arg): # noqa D102 295 | self.output = output 296 | self.arg = arg 297 | 298 | def inputs(self): # noqa D102 299 | return [self.arg] 300 | 301 | def outputs(self): # noqa D102 302 | return [self.output] 303 | 304 | def rel_spot_pref(self): # noqa D102 305 | return {self.output: [self.arg]} 306 | 307 | def make_asm(self, spotmap, home_spots, get_reg, asm_code): # noqa D102 308 | size = self.arg.ctype.size 309 | 310 | output_spot = spotmap[self.output] 311 | arg_spot = spotmap[self.arg] 312 | 313 | if output_spot != arg_spot: 314 | asm_code.add(asm_cmds.Mov(output_spot, arg_spot, size)) 315 | asm_code.add(self.Inst(output_spot, None, size)) 316 | 317 | 318 | class Neg(_NegNot): 319 | """Negates given IL value (two's complement). 320 | 321 | No type promotion is done here. 322 | 323 | """ 324 | 325 | Inst = asm_cmds.Neg 326 | 327 | 328 | class Not(_NegNot): 329 | """Logically negates each bit of given IL value (one's complement). 330 | 331 | No type promotion is done here. 332 | 333 | """ 334 | 335 | Inst = asm_cmds.Not 336 | -------------------------------------------------------------------------------- /shivyc/include/ctype.h: -------------------------------------------------------------------------------- 1 | int isalnum(int); 2 | int isalpha(int); 3 | int isascii(int); 4 | int iscntrl(int); 5 | int isdigit(int); 6 | int isgraph(int); 7 | int islower(int); 8 | int isprint(int); 9 | int ispunct(int); 10 | int isspace(int); 11 | int isupper(int); 12 | int isxdigit(int); 13 | int toascii(int); 14 | int tolower(int); 15 | int toupper(int); 16 | -------------------------------------------------------------------------------- /shivyc/include/stdbool.h: -------------------------------------------------------------------------------- 1 | // TODO: define bool, true, and false as macros 2 | typedef _Bool bool; 3 | -------------------------------------------------------------------------------- /shivyc/include/stdio.h: -------------------------------------------------------------------------------- 1 | typedef unsigned long size_t; 2 | typedef struct __FILE_STRUCT FILE; 3 | 4 | void clearerr(FILE *); 5 | char *ctermid(char *); 6 | int fclose(FILE *); 7 | FILE *fdopen(int, const char *); 8 | int feof(FILE *); 9 | int ferror(FILE *); 10 | int fflush(FILE *); 11 | int fgetc(FILE*); 12 | char *fgets(char *, int, FILE*); 13 | int fileno(FILE*); 14 | void flockfile(FILE*); 15 | FILE *fopen(const char *, const char *); 16 | int fprintf(); // vargargs not yet implemented 17 | int fputc(int, FILE*); 18 | int fputs(const char *, FILE*); 19 | size_t fread(void *, size_t, size_t, FILE *); 20 | FILE *freopen(const char *, const char *, FILE *); 21 | int fscanf(); // vargargs not yet implemented 22 | int fseek(FILE *, long, int); 23 | long ftell(FILE *); 24 | int ftrylockfile(FILE *); 25 | void funlockfile(FILE *); 26 | size_t fwrite(const void *, size_t, size_t, FILE*); 27 | int getc(FILE*); 28 | int getchar(void); 29 | int getc_unlocked(FILE *); 30 | int getchar_unlocked(void); 31 | // removed in C11 32 | // char *gets(char *); 33 | int getw(void *); 34 | int pclose(void *); 35 | void perror(const char *); 36 | void *popen(const char *, const char *); 37 | int printf(); // vargargs not yet implemented 38 | int putc(int, FILE*); 39 | int putchar(int); 40 | int putc_unlocked(int, FILE*); 41 | int putchar_unlocked(int); 42 | int puts(const char *); 43 | int putw(int, FILE*); 44 | int remove(const char *); 45 | int rename(const char *, const char *); 46 | void rewind(FILE *); 47 | int scanf(); // vargargs not yet implemented 48 | void setbuf(FILE*, char *); 49 | int setvbuf(FILE*, char *, int, size_t); 50 | int snprintf(); // vargargs not yet implemented 51 | int sprintf(); // vargargs not yet implemented 52 | int sscanf(); // vargargs not yet implemented 53 | char *tempnam(const char *, const char *); 54 | FILE *tmpfile(void); 55 | char *tmpnam(char *); 56 | int ungetc(int, FILE*); 57 | 58 | extern void* stdin; 59 | extern void* stdout; 60 | extern void* stderr; 61 | -------------------------------------------------------------------------------- /shivyc/include/stdlib.h: -------------------------------------------------------------------------------- 1 | typedef unsigned long size_t; 2 | 3 | long a64l(const char *); 4 | void abort(void); 5 | int abs(int); 6 | int atexit(void (*)(void)); 7 | int atoi(const char *); 8 | long atol(const char *); 9 | void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); 10 | void *calloc(size_t, size_t); 11 | void exit(int); 12 | void free(void *); 13 | char *getenv(const char *); 14 | int getsubopt(char **, char *const *, char **); 15 | // int grantpt(int); cannot find in C11 stdlib 16 | char *initstate(unsigned int, char *, size_t); 17 | long jrand48(unsigned short[3]); 18 | char *l64a(long); 19 | long labs(long); 20 | void lcong48(unsigned short[7]); 21 | long lrand48(void); 22 | void *malloc(size_t); 23 | int mblen(const char *, size_t); 24 | char *mktemp(char *); 25 | int mkstemp(char *); 26 | long mrand48(void); 27 | long nrand48(unsigned short [3]); 28 | char *ptsname(int); 29 | int putenv(char *); 30 | void qsort(void *, size_t, size_t, int (*)(const void *, const void *)); 31 | int rand(void); 32 | int rand_r(unsigned int *); 33 | long random(void); 34 | void *realloc(void *, size_t); 35 | char *realpath(const char *, char *); 36 | unsigned short seed48(unsigned short[3]); 37 | // void setkey(char *); cannot find in C11 stdlib 38 | char *setstate(char *); 39 | void srand(unsigned int); 40 | void srand48(long); 41 | void srandom(unsigned); 42 | long strtol(const char *, char **, int); 43 | size_t strtoul(const char *, char **, int); 44 | int system(const char *); 45 | int unlockpt(int); 46 | -------------------------------------------------------------------------------- /shivyc/include/string.h: -------------------------------------------------------------------------------- 1 | typedef unsigned long size_t; 2 | 3 | void *memchr(const void *, int, size_t); 4 | int memcmp(const void *, const void *, size_t); 5 | void *memcpy(void *, const void *, size_t); 6 | void *memmove(void *, const void *, size_t); 7 | void *memset(void *, int, size_t); 8 | char *strcat(char *, const char *); 9 | char *strchr(const char *, int); 10 | int strcmp(const char *, const char *); 11 | int strcoll(const char *, const char *); 12 | char *strcpy(char *, const char *); 13 | size_t strcspn(const char *, const char *); 14 | char *strdup(const char *); 15 | char *strerror(int); 16 | size_t strlen(const char *); 17 | char *strncat(char *, const char *, size_t); 18 | int strncmp(const char *, const char *, size_t); 19 | char *strncpy(char *, const char *, size_t); 20 | char *strpbrk(const char *, const char *); 21 | char *strrchr(const char *, int); 22 | size_t strspn(const char *, const char *); 23 | char *strstr(const char *, const char *); 24 | char *strtok(char *, const char *); 25 | char *strtok_r(char *, const char *, char **); 26 | size_t strxfrm(char *, const char *, size_t); 27 | -------------------------------------------------------------------------------- /shivyc/main.py: -------------------------------------------------------------------------------- 1 | """Main executable for ShivyC compiler.""" 2 | 3 | import argparse 4 | import pathlib 5 | import platform 6 | import subprocess 7 | import sys 8 | 9 | import shivyc.lexer as lexer 10 | import shivyc.preproc as preproc 11 | 12 | from shivyc.errors import error_collector, CompilerError 13 | from shivyc.parser.parser import parse 14 | from shivyc.il_gen import ILCode, SymbolTable, Context 15 | from shivyc.asm_gen import ASMCode, ASMGen 16 | 17 | 18 | def main(): 19 | """Run the main compiler script.""" 20 | 21 | if platform.system() != "Linux": 22 | err = "only x86_64 Linux is supported" 23 | print(CompilerError(err)) 24 | return 1 25 | 26 | arguments = get_arguments() 27 | 28 | objs = [] 29 | for file in arguments.files: 30 | objs.append(process_file(file, arguments)) 31 | 32 | error_collector.show() 33 | if any(not obj for obj in objs): 34 | return 1 35 | else: 36 | if not link("out", objs): 37 | err = "linker returned non-zero status" 38 | print(CompilerError(err)) 39 | return 1 40 | return 0 41 | 42 | 43 | def process_file(file, args): 44 | """Process single file into object file and return the object file name.""" 45 | if file[-2:] == ".c": 46 | return process_c_file(file, args) 47 | elif file[-2:] == ".o": 48 | return file 49 | else: 50 | err = f"unknown file type: '{file}'" 51 | error_collector.add(CompilerError(err)) 52 | return None 53 | 54 | 55 | def process_c_file(file, args): 56 | """Compile a C file into an object file and return the object file name.""" 57 | code = read_file(file) 58 | if not error_collector.ok(): 59 | return None 60 | 61 | token_list = lexer.tokenize(code, file) 62 | if not error_collector.ok(): 63 | return None 64 | 65 | token_list = preproc.process(token_list, file) 66 | if not error_collector.ok(): 67 | return None 68 | 69 | # If parse() can salvage the input into a parse tree, it may emit an 70 | # ast_root even when there are errors saved to the error_collector. In this 71 | # case, we still want to continue the compiler stages. 72 | ast_root = parse(token_list) 73 | if not ast_root: 74 | return None 75 | 76 | il_code = ILCode() 77 | symbol_table = SymbolTable() 78 | ast_root.make_il(il_code, symbol_table, Context()) 79 | if not error_collector.ok(): 80 | return None 81 | 82 | asm_code = ASMCode() 83 | ASMGen(il_code, symbol_table, asm_code, args).make_asm() 84 | asm_source = asm_code.full_code() 85 | if not error_collector.ok(): 86 | return None 87 | 88 | asm_file = file[:-2] + ".s" 89 | obj_file = file[:-2] + ".o" 90 | 91 | write_asm(asm_source, asm_file) 92 | if not error_collector.ok(): 93 | return None 94 | 95 | assemble(asm_file, obj_file) 96 | if not error_collector.ok(): 97 | return None 98 | 99 | return obj_file 100 | 101 | 102 | def get_arguments(): 103 | """Get the command-line arguments. 104 | 105 | This function sets up the argument parser. Returns a tuple containing 106 | an object storing the argument values and a list of the file names 107 | provided on command line. 108 | """ 109 | desc = """Compile, assemble, and link C files. Option flags starting 110 | with `-z` are primarily for debugging or diagnostic purposes.""" 111 | parser = argparse.ArgumentParser( 112 | description=desc, usage="shivyc [-h] [options] files...") 113 | 114 | # Files to compile 115 | parser.add_argument("files", metavar="files", nargs="+") 116 | 117 | # Boolean flag for whether to print register allocator performance info 118 | parser.add_argument("-z-reg-alloc-perf", 119 | help="display register allocator performance info", 120 | dest="show_reg_alloc_perf", action="store_true") 121 | 122 | return parser.parse_args() 123 | 124 | 125 | def read_file(file): 126 | """Return the contents of the given file.""" 127 | try: 128 | with open(file) as c_file: 129 | return c_file.read() 130 | except IOError as e: 131 | descrip = f"could not read file: '{file}'" 132 | error_collector.add(CompilerError(descrip)) 133 | 134 | 135 | def write_asm(asm_source, asm_filename): 136 | """Save the given assembly source to disk at asm_filename. 137 | 138 | asm_source (str) - Full assembly source code. 139 | asm_filename (str) - Filename to which to save the generated assembly. 140 | 141 | """ 142 | try: 143 | with open(asm_filename, "w") as s_file: 144 | s_file.write(asm_source) 145 | except IOError: 146 | descrip = f"could not write output file '{asm_filename}'" 147 | error_collector.add(CompilerError(descrip)) 148 | 149 | 150 | def assemble(asm_name, obj_name): 151 | """Assemble the given assembly file into an object file.""" 152 | try: 153 | subprocess.check_call(["as", "-64", "-o", obj_name, asm_name]) 154 | return True 155 | except subprocess.CalledProcessError: 156 | err = "assembler returned non-zero status" 157 | error_collector.add(CompilerError(err)) 158 | return False 159 | 160 | 161 | def link(binary_name, obj_names): 162 | """Assemble the given object files into a binary.""" 163 | 164 | try: 165 | crtnum = find_crtnum() 166 | if not crtnum: return 167 | 168 | crti = find_library_or_err("crti.o") 169 | if not crti: return 170 | 171 | linux_so = find_library_or_err("ld-linux-x86-64.so.2") 172 | if not linux_so: return 173 | 174 | crtn = find_library_or_err("crtn.o") 175 | if not crtn: return 176 | 177 | # find files to link 178 | subprocess.check_call( 179 | ["ld", "-dynamic-linker", linux_so, crtnum, crti, "-lc"] 180 | + obj_names + [crtn, "-o", binary_name]) 181 | 182 | return True 183 | 184 | except subprocess.CalledProcessError: 185 | return False 186 | 187 | 188 | def find_crtnum(): 189 | """Search for the crt0, crt1, or crt2.o files on the system. 190 | 191 | If one is found, return its path. Else, add an error to the 192 | error_collector and return None. 193 | """ 194 | for file in ["crt2.o", "crt1.o", "crt0.o"]: 195 | crt = find_library(file) 196 | if crt: return crt 197 | 198 | err = "could not find crt0.o, crt1.o, or crt2.o for linking" 199 | error_collector.add(CompilerError(err)) 200 | return None 201 | 202 | 203 | def find_library_or_err(file): 204 | """Search the given library file and return path if found. 205 | 206 | If not found, add an error to the error collector and return None. 207 | """ 208 | path = find_library(file) 209 | if not path: 210 | err = f"could not find {file}" 211 | error_collector.add(CompilerError(err)) 212 | return None 213 | else: 214 | return path 215 | 216 | 217 | def find_library(file): 218 | """Search the given library file by searching in common directories. 219 | 220 | If found, returns the path. Otherwise, returns None. 221 | """ 222 | search_paths = [pathlib.Path("/usr/local/lib/x86_64-linux-gnu"), 223 | pathlib.Path("/lib/x86_64-linux-gnu"), 224 | pathlib.Path("/usr/lib/x86_64-linux-gnu"), 225 | pathlib.Path("/usr/local/lib64"), 226 | pathlib.Path("/lib64"), 227 | pathlib.Path("/usr/lib64"), 228 | pathlib.Path("/usr/local/lib"), 229 | pathlib.Path("/lib"), 230 | pathlib.Path("/usr/lib"), 231 | pathlib.Path("/usr/x86_64-linux-gnu/lib64"), 232 | pathlib.Path("/usr/x86_64-linux-gnu/lib")] 233 | 234 | for path in search_paths: 235 | full = path.joinpath(file) 236 | if full.is_file(): 237 | return str(full) 238 | return None 239 | 240 | 241 | if __name__ == "__main__": 242 | sys.exit(main()) 243 | -------------------------------------------------------------------------------- /shivyc/parser/__init__.py: -------------------------------------------------------------------------------- 1 | """Dummy __init__ for parser.""" 2 | -------------------------------------------------------------------------------- /shivyc/parser/expression.py: -------------------------------------------------------------------------------- 1 | """Parser logic that parses expression nodes.""" 2 | 3 | import shivyc.parser.utils as p 4 | import shivyc.token_kinds as token_kinds 5 | import shivyc.tree.expr_nodes as expr_nodes 6 | import shivyc.tree.decl_nodes as decl_nodes 7 | from shivyc.parser.utils import (add_range, match_token, token_is, ParserError, 8 | raise_error, log_error, token_in) 9 | 10 | 11 | @add_range 12 | def parse_expression(index): 13 | """Parse expression.""" 14 | return parse_series( 15 | index, parse_assignment, 16 | {token_kinds.comma: expr_nodes.MultiExpr}) 17 | 18 | 19 | @add_range 20 | def parse_assignment(index): 21 | """Parse an assignment expression.""" 22 | 23 | # This is a slight departure from the official grammar. The standard 24 | # specifies that a program is syntactically correct only if the 25 | # left-hand side of an assignment expression is a unary expression. But, 26 | # to provide more helpful error messages, we permit the left side to be 27 | # any non-assignment expression. 28 | 29 | left, index = parse_conditional(index) 30 | 31 | if index < len(p.tokens): 32 | op = p.tokens[index] 33 | kind = op.kind 34 | else: 35 | op = None 36 | kind = None 37 | 38 | node_types = {token_kinds.equals: expr_nodes.Equals, 39 | token_kinds.plusequals: expr_nodes.PlusEquals, 40 | token_kinds.minusequals: expr_nodes.MinusEquals, 41 | token_kinds.starequals: expr_nodes.StarEquals, 42 | token_kinds.divequals: expr_nodes.DivEquals, 43 | token_kinds.modequals: expr_nodes.ModEquals} 44 | 45 | if kind in node_types: 46 | right, index = parse_assignment(index + 1) 47 | return node_types[kind](left, right, op), index 48 | else: 49 | return left, index 50 | 51 | 52 | @add_range 53 | def parse_conditional(index): 54 | """Parse a conditional expression.""" 55 | # TODO: Parse ternary operator 56 | return parse_logical_or(index) 57 | 58 | 59 | @add_range 60 | def parse_logical_or(index): 61 | """Parse logical or expression.""" 62 | return parse_series( 63 | index, parse_logical_and, 64 | {token_kinds.bool_or: expr_nodes.BoolOr}) 65 | 66 | 67 | @add_range 68 | def parse_logical_and(index): 69 | """Parse logical and expression.""" 70 | # TODO: Implement bitwise operators here. 71 | return parse_series( 72 | index, parse_equality, 73 | {token_kinds.bool_and: expr_nodes.BoolAnd}) 74 | 75 | 76 | @add_range 77 | def parse_equality(index): 78 | """Parse equality expression.""" 79 | # TODO: Implement relational and shift expressions here. 80 | return parse_series( 81 | index, parse_relational, 82 | {token_kinds.twoequals: expr_nodes.Equality, 83 | token_kinds.notequal: expr_nodes.Inequality}) 84 | 85 | 86 | @add_range 87 | def parse_relational(index): 88 | """Parse relational expression.""" 89 | return parse_series( 90 | index, parse_bitwise, 91 | {token_kinds.lt: expr_nodes.LessThan, 92 | token_kinds.gt: expr_nodes.GreaterThan, 93 | token_kinds.ltoe: expr_nodes.LessThanOrEq, 94 | token_kinds.gtoe: expr_nodes.GreaterThanOrEq}) 95 | 96 | 97 | @add_range 98 | def parse_bitwise(index): 99 | return parse_series( 100 | index, parse_additive, 101 | {token_kinds.lbitshift: expr_nodes.LBitShift, 102 | token_kinds.rbitshift: expr_nodes.RBitShift}) 103 | 104 | 105 | @add_range 106 | def parse_additive(index): 107 | """Parse additive expression.""" 108 | return parse_series( 109 | index, parse_multiplicative, 110 | {token_kinds.plus: expr_nodes.Plus, 111 | token_kinds.minus: expr_nodes.Minus}) 112 | 113 | 114 | @add_range 115 | def parse_multiplicative(index): 116 | """Parse multiplicative expression.""" 117 | return parse_series( 118 | index, parse_cast, 119 | {token_kinds.star: expr_nodes.Mult, 120 | token_kinds.slash: expr_nodes.Div, 121 | token_kinds.mod: expr_nodes.Mod}) 122 | 123 | 124 | @add_range 125 | def parse_cast(index): 126 | """Parse cast expression.""" 127 | 128 | from shivyc.parser.declaration import ( 129 | parse_abstract_declarator, parse_spec_qual_list) 130 | 131 | with log_error(): 132 | match_token(index, token_kinds.open_paren, ParserError.AT) 133 | specs, index = parse_spec_qual_list(index + 1) 134 | node, index = parse_abstract_declarator(index) 135 | match_token(index, token_kinds.close_paren, ParserError.AT) 136 | 137 | decl_node = decl_nodes.Root(specs, [node]) 138 | expr_node, index = parse_cast(index + 1) 139 | return expr_nodes.Cast(decl_node, expr_node), index 140 | 141 | return parse_unary(index) 142 | 143 | 144 | @add_range 145 | def parse_unary(index): 146 | """Parse unary expression.""" 147 | 148 | unary_args = {token_kinds.incr: (parse_unary, expr_nodes.PreIncr), 149 | token_kinds.decr: (parse_unary, expr_nodes.PreDecr), 150 | token_kinds.amp: (parse_cast, expr_nodes.AddrOf), 151 | token_kinds.star: (parse_cast, expr_nodes.Deref), 152 | token_kinds.bool_not: (parse_cast, expr_nodes.BoolNot), 153 | token_kinds.plus: (parse_cast, expr_nodes.UnaryPlus), 154 | token_kinds.minus: (parse_cast, expr_nodes.UnaryMinus), 155 | token_kinds.compl: (parse_cast, expr_nodes.Compl)} 156 | 157 | if token_in(index, unary_args): 158 | parse_func, NodeClass = unary_args[p.tokens[index].kind] 159 | subnode, index = parse_func(index + 1) 160 | return NodeClass(subnode), index 161 | elif token_is(index, token_kinds.sizeof_kw): 162 | with log_error(): 163 | node, index = parse_unary(index + 1) 164 | return expr_nodes.SizeofExpr(node), index 165 | 166 | from shivyc.parser.declaration import ( 167 | parse_abstract_declarator, parse_spec_qual_list) 168 | 169 | match_token(index + 1, token_kinds.open_paren, ParserError.AFTER) 170 | specs, index = parse_spec_qual_list(index + 2) 171 | node, index = parse_abstract_declarator(index) 172 | match_token(index, token_kinds.close_paren, ParserError.AT) 173 | decl_node = decl_nodes.Root(specs, [node]) 174 | 175 | return expr_nodes.SizeofType(decl_node), index + 1 176 | else: 177 | return parse_postfix(index) 178 | 179 | 180 | @add_range 181 | def parse_postfix(index): 182 | """Parse postfix expression.""" 183 | cur, index = parse_primary(index) 184 | 185 | while True: 186 | old_range = cur.r 187 | 188 | if token_is(index, token_kinds.open_sq_brack): 189 | index += 1 190 | arg, index = parse_expression(index) 191 | cur = expr_nodes.ArraySubsc(cur, arg) 192 | match_token(index, token_kinds.close_sq_brack, ParserError.GOT) 193 | index += 1 194 | 195 | elif (token_is(index, token_kinds.dot) or 196 | token_is(index, token_kinds.arrow)): 197 | index += 1 198 | match_token(index, token_kinds.identifier, ParserError.AFTER) 199 | member = p.tokens[index] 200 | 201 | if token_is(index - 1, token_kinds.dot): 202 | cur = expr_nodes.ObjMember(cur, member) 203 | else: 204 | cur = expr_nodes.ObjPtrMember(cur, member) 205 | 206 | index += 1 207 | 208 | elif token_is(index, token_kinds.open_paren): 209 | args = [] 210 | index += 1 211 | 212 | if token_is(index, token_kinds.close_paren): 213 | return expr_nodes.FuncCall(cur, args), index + 1 214 | 215 | while True: 216 | arg, index = parse_assignment(index) 217 | args.append(arg) 218 | 219 | if token_is(index, token_kinds.comma): 220 | index += 1 221 | else: 222 | break 223 | 224 | index = match_token( 225 | index, token_kinds.close_paren, ParserError.GOT) 226 | 227 | return expr_nodes.FuncCall(cur, args), index 228 | 229 | elif token_is(index, token_kinds.incr): 230 | index += 1 231 | cur = expr_nodes.PostIncr(cur) 232 | elif token_is(index, token_kinds.decr): 233 | index += 1 234 | cur = expr_nodes.PostDecr(cur) 235 | else: 236 | return cur, index 237 | 238 | cur.r = old_range + p.tokens[index - 1].r 239 | 240 | 241 | @add_range 242 | def parse_primary(index): 243 | """Parse primary expression.""" 244 | if token_is(index, token_kinds.open_paren): 245 | node, index = parse_expression(index + 1) 246 | index = match_token(index, token_kinds.close_paren, ParserError.GOT) 247 | return expr_nodes.ParenExpr(node), index 248 | elif token_is(index, token_kinds.number): 249 | return expr_nodes.Number(p.tokens[index]), index + 1 250 | elif (token_is(index, token_kinds.identifier) 251 | and not p.symbols.is_typedef(p.tokens[index])): 252 | return expr_nodes.Identifier(p.tokens[index]), index + 1 253 | elif token_is(index, token_kinds.string): 254 | return expr_nodes.String(p.tokens[index].content), index + 1 255 | elif token_is(index, token_kinds.char_string): 256 | chars = p.tokens[index].content 257 | return expr_nodes.Number(chars[0]), index + 1 258 | else: 259 | raise_error("expected expression", index, ParserError.GOT) 260 | 261 | 262 | def parse_series(index, parse_base, separators): 263 | """Parse a series of symbols joined together with given separator(s). 264 | 265 | index (int) - Index at which to start searching. 266 | parse_base (function) - A parse_* function that parses the base symbol. 267 | separators (Dict(TokenKind -> Node)) - The separators that join 268 | instances of the base symbol. Each separator corresponds to a Node, 269 | which is the Node produced to join two expressions connected with that 270 | separator. 271 | """ 272 | cur, index = parse_base(index) 273 | while True: 274 | for s in separators: 275 | if token_is(index, s): 276 | break 277 | else: 278 | return cur, index 279 | 280 | tok = p.tokens[index] 281 | new, index = parse_base(index + 1) 282 | cur = separators[s](cur, new, tok) 283 | -------------------------------------------------------------------------------- /shivyc/parser/parser.py: -------------------------------------------------------------------------------- 1 | """Entry point for the parser logic that converts a token list to an AST. 2 | 3 | Each parse_* function corresponds to a unique non-terminal symbol in the C 4 | grammar. It parses utils.tokens beginning at the given index to try to match 5 | a grammar rule that generates the desired symbol. If a match is found, 6 | it returns a tuple (Node, index) where Node is an AST node for that match 7 | and index is one more than that of the last token consumed in that parse. If no 8 | match is not found, raises an appropriate ParserError. 9 | 10 | Whenever a call to a parse_* function raises a ParserError, the calling 11 | function must either catch the exception and log it (using log_error), 12 | or pass the exception on to the caller. A function takes the first approach 13 | if there are other possible parse paths to consider, and the second approach if 14 | the function cannot parse the entity from the tokens. 15 | 16 | """ 17 | import shivyc.parser.utils as p 18 | import shivyc.tree.nodes as nodes 19 | 20 | from shivyc.errors import error_collector 21 | from shivyc.parser.utils import (add_range, log_error, ParserError, 22 | raise_error) 23 | from shivyc.parser.declaration import parse_declaration, parse_func_definition 24 | 25 | 26 | def parse(tokens_to_parse): 27 | """Parse the given tokens into an AST. 28 | 29 | Also, as the entry point for the parser, responsible for setting the 30 | tokens global variable. 31 | """ 32 | p.best_error = None 33 | p.tokens = tokens_to_parse 34 | 35 | with log_error(): 36 | return parse_root(0)[0] 37 | 38 | error_collector.add(p.best_error) 39 | return None 40 | 41 | 42 | @add_range 43 | def parse_root(index): 44 | """Parse the given tokens into an AST.""" 45 | items = [] 46 | while True: 47 | with log_error(): 48 | item, index = parse_func_definition(index) 49 | items.append(item) 50 | continue 51 | 52 | with log_error(): 53 | item, index = parse_declaration(index) 54 | items.append(item) 55 | continue 56 | 57 | # If neither parse attempt above worked, break 58 | break 59 | 60 | # If there are tokens that remain unparsed, complain 61 | if not p.tokens[index:]: 62 | return nodes.Root(items), index 63 | else: 64 | raise_error("unexpected token", index, ParserError.AT) 65 | -------------------------------------------------------------------------------- /shivyc/parser/statement.py: -------------------------------------------------------------------------------- 1 | """Parser logic that parses statement nodes.""" 2 | 3 | import shivyc.token_kinds as token_kinds 4 | import shivyc.tree.nodes as nodes 5 | import shivyc.parser.utils as p 6 | 7 | from shivyc.parser.declaration import parse_declaration 8 | from shivyc.parser.expression import parse_expression 9 | from shivyc.parser.utils import (add_range, log_error, match_token, token_is, 10 | ParserError) 11 | 12 | 13 | @add_range 14 | def parse_statement(index): 15 | """Parse a statement. 16 | 17 | Try each possible type of statement, catching/logging exceptions upon 18 | parse failures. On the last try, raise the exception on to the caller. 19 | 20 | """ 21 | for func in (parse_compound_statement, parse_return, parse_break, 22 | parse_continue, parse_if_statement, parse_while_statement, 23 | parse_for_statement): 24 | with log_error(): 25 | return func(index) 26 | 27 | return parse_expr_statement(index) 28 | 29 | 30 | @add_range 31 | def parse_compound_statement(index): 32 | """Parse a compound statement. 33 | 34 | A compound statement is a collection of several 35 | statements/declarations, enclosed in braces. 36 | 37 | """ 38 | p.symbols.new_scope() 39 | index = match_token(index, token_kinds.open_brack, ParserError.GOT) 40 | 41 | # Read block items (statements/declarations) until there are no more. 42 | items = [] 43 | while True: 44 | with log_error(): 45 | item, index = parse_statement(index) 46 | items.append(item) 47 | continue 48 | 49 | with log_error(): 50 | item, index = parse_declaration(index) 51 | items.append(item) 52 | continue 53 | 54 | break 55 | 56 | index = match_token(index, token_kinds.close_brack, ParserError.GOT) 57 | p.symbols.end_scope() 58 | 59 | return nodes.Compound(items), index 60 | 61 | 62 | @add_range 63 | def parse_return(index): 64 | """Parse a return statement. 65 | 66 | Ex: return 5; 67 | 68 | """ 69 | index = match_token(index, token_kinds.return_kw, ParserError.GOT) 70 | if token_is(index, token_kinds.semicolon): 71 | return nodes.Return(None), index 72 | 73 | node, index = parse_expression(index) 74 | 75 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 76 | return nodes.Return(node), index 77 | 78 | 79 | @add_range 80 | def parse_break(index): 81 | """Parse a break statement.""" 82 | index = match_token(index, token_kinds.break_kw, ParserError.GOT) 83 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 84 | return nodes.Break(), index 85 | 86 | 87 | @add_range 88 | def parse_continue(index): 89 | """Parse a continue statement.""" 90 | index = match_token(index, token_kinds.continue_kw, ParserError.GOT) 91 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 92 | return nodes.Continue(), index 93 | 94 | 95 | @add_range 96 | def parse_if_statement(index): 97 | """Parse an if statement.""" 98 | 99 | index = match_token(index, token_kinds.if_kw, ParserError.GOT) 100 | index = match_token(index, token_kinds.open_paren, ParserError.AFTER) 101 | conditional, index = parse_expression(index) 102 | index = match_token(index, token_kinds.close_paren, ParserError.AFTER) 103 | statement, index = parse_statement(index) 104 | 105 | # If there is an else that follows, parse that too. 106 | is_else = token_is(index, token_kinds.else_kw) 107 | if not is_else: 108 | else_statement = None 109 | else: 110 | index = match_token(index, token_kinds.else_kw, ParserError.GOT) 111 | else_statement, index = parse_statement(index) 112 | 113 | return nodes.IfStatement(conditional, statement, else_statement), index 114 | 115 | 116 | @add_range 117 | def parse_while_statement(index): 118 | """Parse a while statement.""" 119 | index = match_token(index, token_kinds.while_kw, ParserError.GOT) 120 | index = match_token(index, token_kinds.open_paren, ParserError.AFTER) 121 | conditional, index = parse_expression(index) 122 | index = match_token(index, token_kinds.close_paren, ParserError.AFTER) 123 | statement, index = parse_statement(index) 124 | 125 | return nodes.WhileStatement(conditional, statement), index 126 | 127 | 128 | @add_range 129 | def parse_for_statement(index): 130 | """Parse a for statement.""" 131 | index = match_token(index, token_kinds.for_kw, ParserError.GOT) 132 | index = match_token(index, token_kinds.open_paren, ParserError.AFTER) 133 | 134 | first, second, third, index = _get_for_clauses(index) 135 | stat, index = parse_statement(index) 136 | 137 | return nodes.ForStatement(first, second, third, stat), index 138 | 139 | 140 | def _get_for_clauses(index): 141 | """Get the three clauses of a for-statement. 142 | 143 | index - Index of the beginning of the first clause. 144 | 145 | returns - Tuple (Node, Node, Node, index). Each Node is the corresponding 146 | clause, or None if that clause is empty The index is that of first token 147 | after the close paren terminating the for clauses. 148 | 149 | Raises exception on malformed input. 150 | """ 151 | 152 | first, index = _get_first_for_clause(index) 153 | 154 | if token_is(index, token_kinds.semicolon): 155 | second = None 156 | index += 1 157 | else: 158 | second, index = parse_expression(index) 159 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 160 | 161 | if token_is(index, token_kinds.close_paren): 162 | third = None 163 | index += 1 164 | else: 165 | third, index = parse_expression(index) 166 | index = match_token(index, token_kinds.close_paren, ParserError.AFTER) 167 | 168 | return first, second, third, index 169 | 170 | 171 | def _get_first_for_clause(index): 172 | """Get the first clause of a for-statement. 173 | 174 | index - Index of the beginning of the first clause in the for-statement. 175 | returns - Tuple. First element is a node if a clause is found and None if 176 | there is no clause (i.e. semicolon terminating the clause). Second element 177 | is an integer index where the next token begins. 178 | 179 | If malformed, raises exception. 180 | 181 | """ 182 | if token_is(index, token_kinds.semicolon): 183 | return None, index + 1 184 | 185 | with log_error(): 186 | return parse_declaration(index) 187 | 188 | clause, index = parse_expression(index) 189 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 190 | return clause, index 191 | 192 | 193 | @add_range 194 | def parse_expr_statement(index): 195 | """Parse a statement that is an expression. 196 | 197 | Ex: a = 3 + 4 198 | 199 | """ 200 | if token_is(index, token_kinds.semicolon): 201 | return nodes.EmptyStatement(), index + 1 202 | 203 | node, index = parse_expression(index) 204 | index = match_token(index, token_kinds.semicolon, ParserError.AFTER) 205 | return nodes.ExprStatement(node), index 206 | -------------------------------------------------------------------------------- /shivyc/parser/utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for the parser.""" 2 | 3 | from contextlib import contextmanager 4 | import copy 5 | 6 | from shivyc.errors import CompilerError, Range 7 | 8 | 9 | # This is a little bit messy, but worth the repetition it saves. In the 10 | # parser.py file, the main parse function sets this global variable to the 11 | # list of tokens. Then, all functions in the parser can reference this 12 | # variable rather than passing around the tokens list everywhere. 13 | tokens = None 14 | 15 | 16 | class SimpleSymbolTable: 17 | """Table to record every declared symbol. 18 | 19 | This is required to parse typedefs in C, because the parser must know 20 | whether a given identifier denotes a type or a value. For every 21 | declared identifier, the table records whether or not it is a type 22 | defnition. 23 | """ 24 | def __init__(self): 25 | self.symbols = [] 26 | self.new_scope() 27 | 28 | def new_scope(self): 29 | self.symbols.append({}) 30 | 31 | def end_scope(self): 32 | self.symbols.pop() 33 | 34 | def add_symbol(self, identifier, is_typedef): 35 | self.symbols[-1][identifier.content] = is_typedef 36 | 37 | def is_typedef(self, identifier): 38 | name = identifier.content 39 | for table in self.symbols[::-1]: 40 | if name in table: 41 | return table[name] 42 | return False 43 | 44 | 45 | symbols = SimpleSymbolTable() 46 | 47 | 48 | class ParserError(CompilerError): 49 | """Class representing parser errors. 50 | 51 | amount_parsed (int) - Number of tokens successfully parsed before this 52 | error was encountered. This value is used by the Parser to determine which 53 | error corresponds to the most successful parse. 54 | """ 55 | 56 | # Options for the message_type constructor field. 57 | # 58 | # AT generates a message like "expected semicolon at '}'", GOT generates a 59 | # message like "expected semicolon, got '}'", and AFTER generates a message 60 | # like "expected semicolon after '15'" (if possible). 61 | # 62 | # As a very general guide, use AT when a token should be removed, use AFTER 63 | # when a token should be to be inserted (esp. because of what came before), 64 | # and GOT when a token should be changed. 65 | AT = 1 66 | GOT = 2 67 | AFTER = 3 68 | 69 | def __init__(self, message, index, tokens, message_type): 70 | """Initialize a ParserError from the given arguments. 71 | 72 | message (str) - Base message to put in the error. 73 | tokens (List[Token]) - List of tokens. 74 | index (int) - Index of the offending token. 75 | message_type (int) - One of self.AT, self.GOT, or self.AFTER. 76 | 77 | Example: 78 | ParserError("unexpected semicolon", 10, [...], self.AT) 79 | -> CompilerError("unexpected semicolon at ';'", ..., ...) 80 | -> "main.c:10: unexpected semicolon at ';'" 81 | """ 82 | self.amount_parsed = index 83 | 84 | if len(tokens) == 0: 85 | super().__init__(f"{message} at beginning of source") 86 | return 87 | 88 | # If the index is too big, we're always using the AFTER form 89 | if index >= len(tokens): 90 | index = len(tokens) 91 | message_type = self.AFTER 92 | # If the index is too small, we should not use the AFTER form 93 | elif index <= 0: 94 | index = 0 95 | if message_type == self.AFTER: 96 | message_type = self.GOT 97 | 98 | if message_type == self.AT: 99 | super().__init__(f"{message} at '{tokens[index]}'", 100 | tokens[index].r) 101 | elif message_type == self.GOT: 102 | super().__init__(f"{message}, got '{tokens[index]}'", 103 | tokens[index].r) 104 | elif message_type == self.AFTER: 105 | if tokens[index - 1].r: 106 | new_range = Range(tokens[index - 1].r.end + 1) 107 | else: 108 | new_range = None 109 | 110 | super().__init__( 111 | f"{message} after '{tokens[index - 1]}'", new_range) 112 | 113 | 114 | def raise_error(err, index, error_type): 115 | """Raise a parser error.""" 116 | global tokens 117 | raise ParserError(err, index, tokens, error_type) 118 | 119 | 120 | # Used to store the best error found in the parsing phase. 121 | best_error = None 122 | 123 | 124 | @contextmanager 125 | def log_error(): 126 | """Wrap this context manager around conditional parsing code. 127 | 128 | For example, 129 | 130 | with log_error(): 131 | [try parsing something] 132 | return 133 | 134 | [try parsing something else] 135 | 136 | will run the code in [try parsing something]. If an error occurs, 137 | it will be saved and then [try parsing something else] will run. 138 | 139 | The value of e.amount_parsed is used to determine the amount 140 | successfully parsed before encountering the error. 141 | """ 142 | global best_error, symbols 143 | 144 | # back up the global symbols table, so if parsing fails we can reset it 145 | symbols_bak = copy.deepcopy(symbols) 146 | try: 147 | yield 148 | except ParserError as e: 149 | if not best_error or e.amount_parsed >= best_error.amount_parsed: 150 | best_error = e 151 | symbols = symbols_bak 152 | 153 | 154 | def token_is(index, kind): 155 | """Return true if the next token is of the given kind.""" 156 | global tokens 157 | return len(tokens) > index and tokens[index].kind == kind 158 | 159 | 160 | def token_in(index, kinds): 161 | """Return true if the next token is in the given list/set of kinds.""" 162 | global tokens 163 | return len(tokens) > index and tokens[index].kind in kinds 164 | 165 | 166 | def match_token(index, kind, message_type, message=None): 167 | """Raise ParserError if tokens[index] is not of the expected kind. 168 | 169 | If tokens[index] is of the expected kind, returns index + 1. 170 | Otherwise, raises a ParserError with the given message and 171 | message_type. 172 | 173 | """ 174 | global tokens 175 | if not message: 176 | message = f"expected '{kind.text_repr}'" 177 | 178 | if token_is(index, kind): 179 | return index + 1 180 | else: 181 | raise ParserError(message, index, tokens, message_type) 182 | 183 | 184 | def token_range(start, end): 185 | """Generate a range that encompasses tokens[start] to tokens[end-1]""" 186 | global tokens 187 | 188 | start_index = min(start, len(tokens) - 1, end - 1) 189 | end_index = min(end - 1, len(tokens) - 1) 190 | return tokens[start_index].r + tokens[end_index].r 191 | 192 | 193 | def add_range(parse_func): 194 | """Return a decorated function that tags the produced node with a range. 195 | 196 | Accepts a parse_* function, and returns a version of the function where 197 | the returned node has its range attribute set 198 | 199 | """ 200 | global tokens 201 | 202 | def parse_with_range(index, *args): 203 | start_index = index 204 | node, end_index = parse_func(index, *args) 205 | node.r = token_range(start_index, end_index) 206 | 207 | return node, end_index 208 | 209 | return parse_with_range 210 | -------------------------------------------------------------------------------- /shivyc/preproc.py: -------------------------------------------------------------------------------- 1 | """Implementation of the ShivyC preprocessor. 2 | 3 | Currently, the preprocessor implementation is very simple and only handles 4 | include directives. Despite this, the implementation is also 5 | technically incorrect in many ways. For example, it expands #include 6 | directives wherever they appear, rather than only expanding them when the 7 | appear at the beginning of a line. 8 | """ 9 | import pathlib 10 | 11 | import shivyc.lexer as lexer 12 | import shivyc.token_kinds as token_kinds 13 | 14 | from shivyc.errors import error_collector, CompilerError 15 | 16 | 17 | def process(tokens, this_file): 18 | """Process the given tokens and return the preprocessed token list.""" 19 | 20 | processed = [] 21 | i = 0 22 | while i < len(tokens) - 2: 23 | if (tokens[i].kind == token_kinds.pound and 24 | tokens[i + 1].kind == token_kinds.identifier and 25 | tokens[i + 1].content == "include" and 26 | tokens[i + 2].kind == token_kinds.include_file): 27 | 28 | # Replace tokens[i] -> tokens[i+2] with preprocessed contents of 29 | # the included file. 30 | try: 31 | file, filename = read_file(tokens[i + 2].content, this_file) 32 | new_tokens = process(lexer.tokenize(file, filename), filename) 33 | processed += new_tokens 34 | 35 | except IOError: 36 | error_collector.add(CompilerError( 37 | "unable to read included file", 38 | tokens[i + 2].r 39 | )) 40 | 41 | i += 3 42 | 43 | else: 44 | processed.append(tokens[i]) 45 | i += 1 46 | 47 | return processed + tokens[i:] 48 | 49 | 50 | def read_file(include_file, this_file): 51 | """Read the text of the given include file. 52 | 53 | include_file - the header name, including opening and closing quotes or 54 | angle brackets. 55 | this_file - location of the current file being preprocessed. used for 56 | locating quoted headers. 57 | """ 58 | 59 | if include_file[0] == '"': 60 | path = pathlib.Path(this_file).parent.joinpath(include_file[1:-1]) 61 | else: # path is an include file 62 | path = pathlib.Path(__file__).parent\ 63 | .joinpath("include").joinpath(include_file[1:-1]) 64 | 65 | with open(str(path)) as file: 66 | return file.read(), str(path) 67 | -------------------------------------------------------------------------------- /shivyc/spots.py: -------------------------------------------------------------------------------- 1 | """The Spot object definition and and some predefined spots, like registers.""" 2 | 3 | 4 | class Spot: 5 | """Spot in the machine where an IL value can be. 6 | 7 | spot_type (enum) - One of the values below describing the general type of 8 | spot this is. 9 | detail - Additional information about this spot. The this attribute's type 10 | and meaning depend on the spot_type; see below for more. 11 | 12 | """ 13 | 14 | def __init__(self, detail): 15 | """Initialize a spot. 16 | 17 | `detail` should uniquely represent this Spot for this specific spot 18 | type, because it will be used for hashing and equality testing. 19 | """ 20 | self.detail = detail 21 | 22 | def asm_str(self, size): 23 | """Make the ASM form of this spot, for the given size in bytes. 24 | 25 | This function raises NotImplementedError for unsupported sizes. 26 | 27 | Examples: 28 | spots.RAX.asm_str(4) -> "eax" 29 | spots.RAX.asm_str(8) -> "rax" 30 | spot(STACK, -16).asm_str(4) -> "DWORD [rbp-16]" 31 | spot(LITERAL, 14).asm_str(4) -> "14" 32 | 33 | size (int) - Size in bytes of the data stored at this spot. 34 | return (str) - ASM form of this spot. 35 | 36 | """ 37 | raise NotImplementedError 38 | 39 | def rbp_offset(self): 40 | """Return this spot's offset from RBP. 41 | 42 | If this is a memory spot which resides at a certain negative offset 43 | away from RBP, then return that offset. This is used by the register 44 | allocator to figure out how much memory to allocate for this spot. 45 | 46 | If this is not a memory spot relative to RBP, just return 0. 47 | """ 48 | return 0 49 | 50 | def shift(self, chunk, count=None): 51 | """Return a new spot shifted relative to this one. 52 | 53 | For non-memory spots, this function returns itself and throws an 54 | error if given chunk != 0 or count != None. 55 | """ 56 | if chunk or count: 57 | raise NotImplementedError("cannot shift this spot type") 58 | return self 59 | 60 | def __repr__(self): # pragma: no cover 61 | return self.detail 62 | 63 | def __eq__(self, other): 64 | """Test equality by comparing Spot type and detail.""" 65 | if self.__class__.__name__ != other.__class__.__name__: 66 | return False 67 | 68 | return self.detail == other.detail 69 | 70 | def __hash__(self): 71 | """Hash based on type and detail.""" 72 | return hash((self.__class__.__name__, self.detail)) 73 | 74 | 75 | class RegSpot(Spot): 76 | """Spot representing a machine register.""" 77 | 78 | # Mapping from the 64-bit register name to the 64-bit, 32-bit, 16-bit, 79 | # and 8-bit register names for each register. 80 | # TODO: Do I need rex prefix on any of the 8-bit? 81 | reg_map = {"rax": ["rax", "eax", "ax", "al"], 82 | "rbx": ["rbx", "ebx", "bx", "bl"], 83 | "rcx": ["rcx", "ecx", "cx", "cl"], 84 | "rdx": ["rdx", "edx", "dx", "dl"], 85 | "rsi": ["rsi", "esi", "si", "sil"], 86 | "rdi": ["rdi", "edi", "di", "dil"], 87 | "r8": ["r8", "r8d", "r8w", "r8b"], 88 | "r9": ["r9", "r9d", "r9w", "r9b"], 89 | "r10": ["r10", "r10d", "r10w", "r10b"], 90 | "r11": ["r11", "r11d", "r11w", "r11b"], 91 | "rbp": ["rbp", "", "", ""], 92 | "rsp": ["rsp", "", "", ""]} 93 | 94 | def __init__(self, name): 95 | """Initialize this spot. 96 | 97 | `name` is the string representation of the 64-bit register (e.g. 98 | "rax"). 99 | """ 100 | super().__init__(name) 101 | self.name = name 102 | 103 | def asm_str(self, size): # noqa D102 104 | if size == 0 or size == 8: 105 | i = 0 106 | elif size == 1: 107 | i = 3 108 | elif size == 2: 109 | i = 2 110 | elif size == 4: 111 | i = 1 112 | else: 113 | raise NotImplementedError("unexpected register size") 114 | 115 | return self.reg_map[self.name][i] 116 | 117 | 118 | class MemSpot(Spot): 119 | """Spot representing a region in memory, like on stack or .data section. 120 | 121 | `base` can be either a string or a Spot. The string form is used when 122 | this spot represents an external variable. The Spot form is used when 123 | this spot represents an offset in memory, like [rbp-5]. 124 | """ 125 | 126 | size_map = {1: "BYTE PTR ", 127 | 2: "WORD PTR ", 128 | 4: "DWORD PTR ", 129 | 8: "QWORD PTR "} 130 | 131 | def __init__(self, base, offset=0, chunk=0, count=None): # noqa D102 132 | super().__init__((base, offset, chunk, count)) 133 | 134 | self.base = base 135 | self.offset = offset 136 | self.chunk = chunk 137 | self.count = count 138 | 139 | def asm_str(self, size): # noqa D102 140 | if isinstance(self.base, Spot): 141 | base_str = self.base.asm_str(0) 142 | else: 143 | base_str = self.base 144 | 145 | total_offset = self.offset 146 | if not self.count: 147 | total_offset = self.offset + self.chunk 148 | 149 | if total_offset == 0: 150 | simple = base_str 151 | elif total_offset > 0: 152 | simple = f"{base_str}+{total_offset}" 153 | else: # total_offset < 0 154 | simple = f"{base_str}-{-total_offset}" 155 | 156 | if self.count and self.chunk > 0: 157 | final = f"{simple}+{self.chunk}*{self.count.asm_str(8)}" 158 | elif self.count and self.chunk < 0: 159 | final = f"{simple}-{-self.chunk}*{self.count.asm_str(8)}" 160 | else: 161 | final = simple 162 | 163 | size_desc = self.size_map.get(size, "") 164 | return f"{size_desc}[{final}]" 165 | 166 | def rbp_offset(self): # noqa D102 167 | if self.base == RBP: 168 | return -self.offset 169 | else: 170 | return 0 171 | 172 | def shift(self, chunk, count=None): # noqa D102 173 | """Return a new memory spot shifted relative to this one. 174 | 175 | chunk - A Python integer representing the size of each chunk of offset 176 | count - If provided, a register spot storing the number of chunks to 177 | be offset. If this value is provided, then `chunk` must be in {1, 2, 178 | 4, 8}. 179 | """ 180 | if count and self.count: 181 | raise NotImplementedError("cannot shift by count") 182 | 183 | if count: 184 | new_offset = self.offset + self.chunk 185 | new_chunk = chunk 186 | new_count = count 187 | else: # no count given 188 | new_offset = self.offset + chunk 189 | new_chunk = self.chunk 190 | new_count = self.count 191 | 192 | return MemSpot(self.base, new_offset, new_chunk, new_count) 193 | 194 | 195 | class LiteralSpot(Spot): 196 | """Spot representing a literal value. 197 | 198 | This is a bit of a hack, since a literal value isn't /really/ a storage 199 | spot. The value attribute is the integer representation of the value of 200 | this literal. 201 | """ 202 | 203 | def __init__(self, value): 204 | super().__init__(value) 205 | self.value = value 206 | 207 | def asm_str(self, size): # noqa D102 208 | return str(self.value) 209 | 210 | 211 | # RBX is callee-saved, which is still unsupported 212 | # RBX = RegSpot("rbx") 213 | 214 | RAX = RegSpot("rax") 215 | RCX = RegSpot("rcx") 216 | RDX = RegSpot("rdx") 217 | RSI = RegSpot("rsi") 218 | RDI = RegSpot("rdi") 219 | R8 = RegSpot("r8") 220 | R9 = RegSpot("r9") 221 | R10 = RegSpot("r10") 222 | R11 = RegSpot("r11") 223 | 224 | registers = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11] 225 | 226 | RBP = RegSpot("rbp") 227 | RSP = RegSpot("rsp") 228 | -------------------------------------------------------------------------------- /shivyc/token_kinds.py: -------------------------------------------------------------------------------- 1 | """The token kinds currently recognized.""" 2 | 3 | from shivyc.tokens import TokenKind 4 | 5 | keyword_kinds = [] 6 | symbol_kinds = [] 7 | 8 | bool_kw = TokenKind("_Bool", keyword_kinds) 9 | char_kw = TokenKind("char", keyword_kinds) 10 | short_kw = TokenKind("short", keyword_kinds) 11 | int_kw = TokenKind("int", keyword_kinds) 12 | long_kw = TokenKind("long", keyword_kinds) 13 | signed_kw = TokenKind("signed", keyword_kinds) 14 | unsigned_kw = TokenKind("unsigned", keyword_kinds) 15 | void_kw = TokenKind("void", keyword_kinds) 16 | 17 | return_kw = TokenKind("return", keyword_kinds) 18 | if_kw = TokenKind("if", keyword_kinds) 19 | else_kw = TokenKind("else", keyword_kinds) 20 | while_kw = TokenKind("while", keyword_kinds) 21 | for_kw = TokenKind("for", keyword_kinds) 22 | break_kw = TokenKind("break", keyword_kinds) 23 | continue_kw = TokenKind("continue", keyword_kinds) 24 | 25 | auto_kw = TokenKind("auto", keyword_kinds) 26 | static_kw = TokenKind("static", keyword_kinds) 27 | extern_kw = TokenKind("extern", keyword_kinds) 28 | struct_kw = TokenKind("struct", keyword_kinds) 29 | union_kw = TokenKind("union", keyword_kinds) 30 | const_kw = TokenKind("const", keyword_kinds) 31 | typedef_kw = TokenKind("typedef", keyword_kinds) 32 | sizeof_kw = TokenKind("sizeof", keyword_kinds) 33 | 34 | plus = TokenKind("+", symbol_kinds) 35 | minus = TokenKind("-", symbol_kinds) 36 | star = TokenKind("*", symbol_kinds) 37 | slash = TokenKind("/", symbol_kinds) 38 | mod = TokenKind("%", symbol_kinds) 39 | incr = TokenKind("++", symbol_kinds) 40 | decr = TokenKind("--", symbol_kinds) 41 | equals = TokenKind("=", symbol_kinds) 42 | plusequals = TokenKind("+=", symbol_kinds) 43 | minusequals = TokenKind("-=", symbol_kinds) 44 | starequals = TokenKind("*=", symbol_kinds) 45 | divequals = TokenKind("/=", symbol_kinds) 46 | modequals = TokenKind("%=", symbol_kinds) 47 | twoequals = TokenKind("==", symbol_kinds) 48 | notequal = TokenKind("!=", symbol_kinds) 49 | bool_and = TokenKind("&&", symbol_kinds) 50 | bool_or = TokenKind("||", symbol_kinds) 51 | bool_not = TokenKind("!", symbol_kinds) 52 | lt = TokenKind("<", symbol_kinds) 53 | gt = TokenKind(">", symbol_kinds) 54 | ltoe = TokenKind("<=", symbol_kinds) 55 | gtoe = TokenKind(">=", symbol_kinds) 56 | amp = TokenKind("&", symbol_kinds) 57 | pound = TokenKind("#", symbol_kinds) 58 | lbitshift = TokenKind("<<", symbol_kinds) 59 | rbitshift = TokenKind(">>", symbol_kinds) 60 | compl = TokenKind("~", symbol_kinds) 61 | 62 | dquote = TokenKind('"', symbol_kinds) 63 | squote = TokenKind("'", symbol_kinds) 64 | 65 | open_paren = TokenKind("(", symbol_kinds) 66 | close_paren = TokenKind(")", symbol_kinds) 67 | open_brack = TokenKind("{", symbol_kinds) 68 | close_brack = TokenKind("}", symbol_kinds) 69 | open_sq_brack = TokenKind("[", symbol_kinds) 70 | close_sq_brack = TokenKind("]", symbol_kinds) 71 | 72 | comma = TokenKind(",", symbol_kinds) 73 | semicolon = TokenKind(";", symbol_kinds) 74 | dot = TokenKind(".", symbol_kinds) 75 | arrow = TokenKind("->", symbol_kinds) 76 | 77 | identifier = TokenKind() 78 | number = TokenKind() 79 | string = TokenKind() 80 | char_string = TokenKind() 81 | include_file = TokenKind() 82 | -------------------------------------------------------------------------------- /shivyc/tokens.py: -------------------------------------------------------------------------------- 1 | """Classes for representing tokens. 2 | 3 | A TokenKind instance represents one of the kinds of tokens recognized (see 4 | token_kinds.py). A Token instance represents a token as produced by the lexer. 5 | 6 | """ 7 | 8 | 9 | class TokenKind: 10 | """Class representing the various known kinds of tokens. 11 | 12 | Ex: +, -, ), return, int 13 | 14 | There are also token kind instances for each of 'identifier' and 15 | 'number'. See token_kinds.py for a list of token_kinds defined. 16 | 17 | text_repr (str) - The token's representation in text, if it has a fixed 18 | representation. 19 | 20 | """ 21 | 22 | def __init__(self, text_repr="", kinds=[]): 23 | """Initialize a new TokenKind and add it to `kinds`. 24 | 25 | kinds (List[TokenKind]) - List of kinds to which this TokenKind is 26 | added. This is convenient when defining token kinds in token_kind.py. 27 | 28 | """ 29 | self.text_repr = text_repr 30 | kinds.append(self) 31 | kinds.sort(key=lambda kind: -len(kind.text_repr)) 32 | 33 | def __str__(self): 34 | """Return the representation of this token kind.""" 35 | return self.text_repr 36 | 37 | 38 | class Token: 39 | """Single unit element of the input as produced by the tokenizer. 40 | 41 | kind (TokenKind) - Kind of this token. 42 | 43 | content - Additional content about some tokens. For number tokens, 44 | this stores the number itself. For identifiers, this stores the identifier 45 | name. For string, stores a list of its characters. 46 | rep (str) - The string representation of this token. If not provided, the 47 | content parameter is used. 48 | r (Range) - Range of positions that this token covers. 49 | 50 | """ 51 | 52 | def __init__(self, kind, content="", rep="", r=None): 53 | """Initialize this token.""" 54 | self.kind = kind 55 | 56 | self.content = content if content else str(self.kind) 57 | self.rep = rep 58 | self.r = r 59 | 60 | def __repr__(self): # pragma: no cover 61 | return self.content 62 | 63 | def __str__(self): 64 | """Return the token content.""" 65 | return self.rep if self.rep else self.content 66 | -------------------------------------------------------------------------------- /shivyc/tree/__init__.py: -------------------------------------------------------------------------------- 1 | """Dummy __init__.py.""" 2 | -------------------------------------------------------------------------------- /shivyc/tree/decl_nodes.py: -------------------------------------------------------------------------------- 1 | """Classes for the nodes that form the declaration and type name tree. 2 | 3 | This tree/node system is pretty distinct from the tree/node system used for 4 | the rest of the AST because parsing declarations is very different from 5 | parsing other parts of the language due to the "backwards"-ness of C 6 | declaration syntax, as described below: 7 | 8 | The declaration trees produces by the parser feel "backwards". For example, 9 | the following: 10 | 11 | int *arr[3]; 12 | 13 | parses to: 14 | 15 | Root([token_kinds.int_kw], [Pointer(Array(3, Identifier(tok)))]) 16 | 17 | while the following: 18 | 19 | int (*arr)[3]; 20 | 21 | parses to: 22 | 23 | Root([token_kinds.int_kw], [Array(3, Pointer(Identifier(tok)))]) 24 | 25 | Declaration trees are to be read inside-out. So, the first example above is 26 | an array of 3 pointers to int, and the second example is a pointer to an 27 | array of 3 integers. The DeclarationNode class in tree.py performs the task 28 | of reversing these trees when forming the ctype. 29 | 30 | """ 31 | 32 | import shivyc.token_kinds as token_kinds 33 | 34 | 35 | class DeclNode: 36 | """Base class for all decl_nodes nodes.""" 37 | 38 | pass 39 | 40 | 41 | class Root(DeclNode): 42 | """Represents a list of declaration specifiers and declarators. 43 | 44 | specs (List(Tokens/Nodes)) - list of the declaration specifiers, as tokens 45 | decls (List(Node)) - list of declarator nodes 46 | """ 47 | 48 | def __init__(self, specs, decls, inits=None): 49 | """Generate root node.""" 50 | self.specs = specs 51 | self.decls = decls 52 | 53 | if inits: 54 | self.inits = inits 55 | else: 56 | self.inits = [None] * len(self.decls) 57 | 58 | super().__init__() 59 | 60 | 61 | class Pointer(DeclNode): 62 | """Represents a pointer to a type.""" 63 | 64 | def __init__(self, child, const): 65 | """Generate pointer node. 66 | 67 | const - boolean indicating whether this pointer is const 68 | """ 69 | self.child = child 70 | self.const = const 71 | super().__init__() 72 | 73 | 74 | class Array(DeclNode): 75 | """Represents an array of a type. 76 | 77 | n (int) - size of the array 78 | 79 | """ 80 | 81 | def __init__(self, n, child): 82 | """Generate array node.""" 83 | self.n = n 84 | self.child = child 85 | super().__init__() 86 | 87 | 88 | class Function(DeclNode): 89 | """Represents an function with given arguments and returning given type. 90 | 91 | args (List(Node)) - arguments of the functions 92 | """ 93 | 94 | def __init__(self, args, child): 95 | """Generate array node.""" 96 | self.args = args 97 | self.child = child 98 | super().__init__() 99 | 100 | 101 | class Identifier(DeclNode): 102 | """Represents an identifier. 103 | 104 | If this is a type name and has no identifier, `identifier` is None. 105 | """ 106 | 107 | def __init__(self, identifier): 108 | """Generate identifier node from an identifier token.""" 109 | self.identifier = identifier 110 | super().__init__() 111 | 112 | 113 | class _StructUnion(DeclNode): 114 | """Base class to represent a struct or a union C type. 115 | 116 | tag (Token) - Token containing the tag of this struct 117 | members (List(Node)) - List of decl_nodes nodes of members, or None 118 | r (Range) - range that the specifier covers 119 | """ 120 | 121 | def __init__(self, tag, members, r): 122 | self.tag = tag 123 | self.members = members 124 | 125 | # These r and kind members are a little hacky. They allow the 126 | # make_specs_ctype function in tree.nodes.Declaration to treat this 127 | # as a Token for the purposes of determining the base type of the 128 | # declaration. 129 | self.r = r 130 | 131 | super().__init__() 132 | 133 | 134 | class Struct(_StructUnion): 135 | """Represents a struct C type.""" 136 | 137 | def __init__(self, tag, members, r): 138 | self.kind = token_kinds.struct_kw 139 | super().__init__(tag, members, r) 140 | 141 | 142 | class Union(_StructUnion): 143 | """Represents a union C type.""" 144 | 145 | def __init__(self, tag, members, r): 146 | self.kind = token_kinds.union_kw 147 | super().__init__(tag, members, r) 148 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Package for tests.""" 2 | -------------------------------------------------------------------------------- /tests/feature_tests/addition.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5, b = 10; 3 | 4 | int c = a + b; 5 | if(c != 15) return 1; 6 | 7 | int d = c + 5; 8 | if(d != 20) return 2; 9 | 10 | int e = 2 + 4; 11 | if(e != 6) return 3; 12 | 13 | int f = e + d; 14 | if(f != 26) return 4; 15 | 16 | int g = f + f + e; 17 | if(g != 58) return 5; 18 | 19 | int i = g + g; 20 | i = i + i; 21 | 22 | if(i != 232) return 6; 23 | 24 | // Test imm64 cases 25 | 26 | // used to modify variable liveliness 27 | int dummy; 28 | dummy = 0; 29 | 30 | // this variable is always live 31 | long never_dead; 32 | never_dead = 1099511627776; 33 | 34 | long j = 1099511627776; 35 | never_dead = j + 1099511627776; 36 | if(never_dead != 1099511627776 + 1099511627776) return 7; 37 | 38 | long k = 1099511627776; 39 | never_dead = 1099511627776 + k; 40 | if(never_dead != 1099511627776 + 1099511627776) return 8; 41 | 42 | long not_dead = 1099511627776; 43 | never_dead = not_dead + 1099511627776; 44 | if(never_dead != 1099511627776 + 1099511627776) return 9; 45 | 46 | never_dead = 1099511627776 + not_dead; 47 | if(never_dead != 1099511627776 + 1099511627776) return 10; 48 | if(1099511627776 + 1099511627776 != never_dead) return 11; 49 | 50 | dummy = dummy + never_dead + not_dead; 51 | 52 | unsigned int l = 4294967295; 53 | unsigned int m = 4294967295; 54 | if(l + m != (unsigned int)4294967295 + (unsigned int)4294967295) return 12; 55 | } 56 | -------------------------------------------------------------------------------- /tests/feature_tests/array.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int array[2+3]; 3 | if(&array != &array) return 1; 4 | if(array != array) return 2; 5 | if(&array[0] != &array[0]) return 13; 6 | 7 | if(&array[0] != (void*)array) return 13; 8 | if(&array[3] != &array[0] + 3) return 14; 9 | if(&array + 1 != (void*)(&array[0] + 5)) return 15; 10 | 11 | int array2[5]; 12 | if(&array2 != &array2) return 3; 13 | if(array2 != array2) return 4; 14 | if(&array == &array2) return 5; 15 | if(array == array2) return 6; 16 | 17 | int array3[6]; 18 | if(array == array3) return 7; 19 | if(&array == (void*)&array3) return 8; 20 | 21 | unsigned int array4[5]; 22 | if(&array == (void*)&array4) return 9; 23 | if(array == (void*)array4) return 10; 24 | 25 | *array = 15; 26 | if(*array != 15) return 11; 27 | 28 | *(array + 2) = 20; 29 | if(*(array + 2) != 20) return 12; 30 | 31 | if(array[0] != 15) return 16; 32 | if(array[2] != 20) return 17; 33 | if((array+4)[-2] != 20) return 21; 34 | 35 | // Test array subscripting 36 | array[1] = 35; 37 | array[3] = 10; 38 | 4[array] = 1[array] + array[3]; 39 | 40 | int sum = 0, i = 0; 41 | while(i != 5) { 42 | sum = sum + array[i]; 43 | i = i + 1; 44 | } 45 | 46 | if(sum != 15 + 35 + 20 + 10 + 35 + 10) return 18; 47 | 48 | // Test multidimentional arrays 49 | int array5[5][6]; 50 | array5[2][3] = 10; 51 | if(array5[2][3] != 10) return 19; 52 | 53 | void *void_p1, *void_p2; 54 | char *p1, *p2; 55 | 56 | void_p1 = (&array5[0] + 1); 57 | void_p2 = &array5[0]; 58 | p1 = void_p1; 59 | p2 = void_p2; 60 | 61 | p2 = p2 + 6 * 4; 62 | if(p1 != p2) { 63 | return 20; 64 | } 65 | 66 | 67 | int power_of_two_arr[10][10]; 68 | power_of_two_arr[3][4] = 10; 69 | if(power_of_two_arr[3][4] != 10) return 21; 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /tests/feature_tests/assignment.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | signed a; int b; int c; int d; int e; int f; int g; int h; 3 | a = b = 10; 4 | c = a; 5 | d = b; 6 | (e) = c; 7 | ((f)) = d; 8 | g = 20; 9 | 10 | // Force variables to be on stack 11 | int i; int j; char k; 12 | &i; &j; 13 | i = g; 14 | j = i; 15 | j = k; 16 | 17 | 18 | if(a != 10) return 1; 19 | if(b != 10) return 2; 20 | if(c != 10) return 3; 21 | if(d != 10) return 4; 22 | if(e != 10) return 5; 23 | if(f != 10) return 6; 24 | if(g != 20) return 7; 25 | } 26 | -------------------------------------------------------------------------------- /tests/feature_tests/bitwise_shifts.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 14; 3 | 4 | if (a>>1 != 7) return 1; 5 | if (a>>2 != 3) return 2; 6 | if (a<<1 != 28) return 3; 7 | if (a<<2 != 56) return 4; 8 | 9 | int b = 3; 10 | if (a>>b != 1) return 5; 11 | if (a<>(b-1); 14 | if (c != 3) return 7; 15 | 16 | if ((1<<16)-1 != 65535) return 8; 17 | if (3<<8 != 768) return 9; 18 | } 19 | -------------------------------------------------------------------------------- /tests/feature_tests/bool.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if((3 && 4) != 1) return 1; 3 | if((0 && 4) != 0) return 2; 4 | if(0 && 4) return 3; 5 | if((3 && 0) != 0) return 4; 6 | if(3 && 0) return 5; 7 | 8 | int a, *p = &a; 9 | if((p && 0) != 0) return 6; 10 | if(p && 0) return 7; 11 | if((p && p) != 1) return 8; 12 | 13 | if((3 || 4) != 1) return 9; 14 | if((0 || 4) != 1) return 10; 15 | if((2 || 0) != 1) return 11; 16 | if((0 || 0) != 0) return 12; 17 | if(0 || 0) return 13; 18 | 19 | if((0 || 0) != 0) return 12; 20 | if((p || 0) != 1) return 14; 21 | if((p || p) != 1) return 15; 22 | 23 | if(!p != 0) return 16; 24 | if(!p) return 16; 25 | if(!0 != 1) return 17; 26 | 27 | int n = 0; 28 | 0 && (n = 1); 29 | if(n == 1) return 18; 30 | 31 | (n = 1) && 0; 32 | if(n != 1) return 19; 33 | 34 | 1 || (n = 3); 35 | if(n == 3) return 20; 36 | 37 | (n = 3) || 1; 38 | if(n != 3) return 21; 39 | } 40 | -------------------------------------------------------------------------------- /tests/feature_tests/break_continue.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | while(1) { 3 | break; 4 | } 5 | 6 | while(1) break; 7 | 8 | for(;;) { 9 | 1 + 1; 10 | break; 11 | 2 + 2; 12 | } 13 | 14 | int i = 0; 15 | for(i = 0; i != 10; i++) { 16 | if(i == 5) break; 17 | 18 | continue; 19 | i--; 20 | } 21 | 22 | if(i != 5) return 1; 23 | 24 | int count = 0; 25 | for(i = 0; i != 10; i++) { 26 | count++; 27 | while(1) { 28 | break; 29 | } 30 | } 31 | 32 | if(count != 10) return 2; 33 | } 34 | -------------------------------------------------------------------------------- /tests/feature_tests/cast.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | struct S1 { 5 | int x; 6 | } a, *p1 = &a; 7 | 8 | struct S2 { 9 | long y; 10 | } b, *p2 = &b; 11 | 12 | p1 = (struct S1*) p2; 13 | p2->y = 75; 14 | if(p1->x != 75) return 1; 15 | 16 | unsigned int c, d; 17 | d = 65536; 18 | c = (unsigned char) d; 19 | if(c != 0) return 2; 20 | 21 | unsigned char e = -10; 22 | int f = (signed char) e; 23 | if(f != -10) return 3; 24 | 25 | (void) 5; 26 | } 27 | -------------------------------------------------------------------------------- /tests/feature_tests/comparison.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5, b = 10; long c; unsigned int d; 3 | 4 | if(a == b) return 1; 5 | 6 | if(5 == a); 7 | else return 36; 8 | 9 | if(&a == &b) return 2; 10 | 11 | if(&a != &a) return 3; 12 | 13 | if(&a == 0) return 4; 14 | 15 | if(&a == (0)) return 5; 16 | 17 | if(0 == &a) return 6; 18 | 19 | void* v = &a; 20 | if(v == 0) return 10; 21 | if(0 == v) return 11; 22 | if(v == &b) return 12; 23 | if(&b == v) return 13; 24 | if(v != &a) return 14; 25 | 26 | // Test imm64 operands 27 | long e = 17179869184; 28 | if(e != 17179869184) return 15; 29 | if(17179869184 != e) return 16; 30 | 31 | //////////////////////////////////// 32 | 33 | a = 5; b = 10; 34 | if(a > b) return 17; 35 | if(a >= b) return 18; 36 | if(b < a) return 19; 37 | if(b <= a) return 20; 38 | if(a < 5) return 21; 39 | if(b < 10) return 22; 40 | 41 | unsigned short f; unsigned int g; 42 | f = 65535; 43 | g = 4294967295; 44 | 45 | if(f > g) return 25; 46 | if(f >= g) return 26; 47 | if(g < f) return 27; 48 | if(g <= f) return 28; 49 | if(f < 5) return 29; 50 | if(g < 5) return 30; 51 | 52 | // Test imm64 operands 53 | e = 17179869184; 54 | if(17179869184 < 17179869184) return 31; 55 | if(17179869184 < 17179869183) return 32; 56 | if(e < 17179869183) return 33; 57 | 58 | int array[5]; 59 | if(&array[1] > &array[3]) return 21; 60 | if(&array[1] >= &array[3]) return 22; 61 | if(&array[3] < &array[1]) return 23; 62 | if(&array[3] <= &array[1]) return 24; 63 | 64 | // Test order of ops between < and == 65 | if(3 < 4 == 9 < 3) return 34; 66 | if(3 < 4 != 5 < 6) return 35; 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /tests/feature_tests/compound_assign.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a, b; 3 | 4 | int* p = &a; 5 | int* q = p += 5; 6 | if(q != p) return 1; 7 | if(q - &a != 5) return 2; 8 | if(p - &a != 5) return 3; 9 | 10 | p = &a; 11 | q = p -= 5; 12 | if(q != p) return 4; 13 | if(&a - q != 5) return 5; 14 | if(&a - p != 5) return 6; 15 | 16 | a = 10; 17 | b = a += 5; 18 | if(a != b) return 7; 19 | if(b != 15) return 8; 20 | if(a != 15) return 9; 21 | 22 | long l = 1099511627776; // 2^40 23 | a = 10; 24 | a += l; 25 | if(a != 10) return 10; 26 | 27 | a = 10; 28 | a += 1099511627776; 29 | if(a != 10) return 11; 30 | 31 | a = 10; 32 | b = a -= 15; 33 | if(a != b) return 12; 34 | if(a + 5 != 0) return 13; 35 | if(b + 5 != 0) return 14; 36 | 37 | a = 10; 38 | b = a *= 1099511627776; 39 | if(a != 0) return 15; 40 | if(b != 0) return 16; 41 | 42 | a = 10; 43 | b = a /= 2; 44 | if(a != 5) return 17; 45 | if(b != 5) return 18; 46 | 47 | a = 1234; 48 | b = a %= 100; 49 | if(a != 34) return 19; 50 | if(b != 34) return 20; 51 | } 52 | -------------------------------------------------------------------------------- /tests/feature_tests/compound_statement.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a; 3 | { 4 | { 5 | int b; 6 | b = 10; 7 | a = b; 8 | } 9 | 10 | { 11 | 12 | } 13 | } 14 | 15 | return a - 10; 16 | } 17 | -------------------------------------------------------------------------------- /tests/feature_tests/declaration.c: -------------------------------------------------------------------------------- 1 | // Verify we can declare variables before and after main 2 | extern int a; 3 | 4 | int f0(int[5], int()); 5 | 6 | int f1(void); 7 | 8 | int f2(); 9 | 10 | int f4(int a, int b, int c) { return 0; } 11 | 12 | int func(int a, int b) { 13 | a; b; 14 | return a; 15 | } 16 | 17 | // Test declaration of function returning function. 18 | int (*getFunc(int z))(int a, int b) { 19 | z; 20 | return func; 21 | } 22 | 23 | int main() { 24 | int; 25 | 26 | int b = 3 + 4; 27 | 28 | int arr[3], (*c)[3] = &arr, *d[3], e = 2; 29 | (*c)[e]; 30 | 31 | d[e] = &b; 32 | *d[e]; 33 | 34 | int *f(int, unsigned int* b, long *[5], long (*)[5]); 35 | int g(); 36 | int h(void); 37 | int *i(); 38 | int *j(int); 39 | int *k(int(int)); 40 | 41 | // verify pointer to function and decayed function are compatible 42 | int (*f3)(int, int, int); 43 | f3 = f4; 44 | } 45 | 46 | extern int z; 47 | -------------------------------------------------------------------------------- /tests/feature_tests/division.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5, b = 10; 3 | 4 | int c = b / a; 5 | int d = b / c; 6 | int e = b / d; 7 | int f = e / 2; 8 | int g = f / 2; 9 | 10 | if(g != 0) return 1; 11 | 12 | int h = 30, i = 3, j = 5; 13 | 14 | int k = h / i / j; 15 | if(k != 2) return 2; 16 | 17 | int l = k / k; 18 | if(l != 1) return 3; 19 | 20 | int m = 3; 21 | m = m / m; 22 | if(m != 1) return 4; 23 | 24 | unsigned long n = 4294967295; 25 | int o = -4; 26 | if(n / o != (unsigned long)4294967295 / -4) return 5; 27 | } 28 | -------------------------------------------------------------------------------- /tests/feature_tests/else.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 0; 3 | 4 | if(0) { 5 | return 1; 6 | } else { 7 | a = 10; 8 | } 9 | 10 | // Verify correct branch was taken 11 | if(a != 10) return 2; else { 12 | 13 | // Verify proper dangling-else parsing 14 | if(0) if(1) return 4; else return 5; 15 | 16 | return 0; 17 | } 18 | 19 | return 6; 20 | } 21 | -------------------------------------------------------------------------------- /tests/feature_tests/error_array.c: -------------------------------------------------------------------------------- 1 | int func(void); 2 | 3 | struct S; 4 | 5 | int main() { 6 | int array[5]; 7 | 8 | // error: expression on left of '=' is not assignable 9 | array = 4; 10 | 11 | // error: invalid operand types for array subscriping 12 | 4[3]; 13 | 14 | // error: invalid operand types for array subscriping 15 | array[array]; 16 | 17 | // error: declared variable is not of assignable type 18 | int array1[5] = 1; 19 | 20 | void* p; 21 | // error: cannot subscript pointer to incomplete type 22 | p[4]; 23 | 24 | // error: array size must be compile-time constant 25 | int array1[func()]; 26 | 27 | // error: array size must have integral type 28 | int array2[(int*)1]; 29 | 30 | // error: array size must be positive 31 | int array3[-2]; 32 | 33 | // error: array elements must have complete type 34 | struct S array4[3]; 35 | 36 | // error: array elements must have complete type 37 | int array5[3][]; 38 | } 39 | -------------------------------------------------------------------------------- /tests/feature_tests/error_bool.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | struct A {} a; 3 | 4 | // error: '&&' operator requires scalar operands 5 | a && a; 6 | 7 | // error: '||' operator requires scalar operands 8 | 1 || a; 9 | 10 | // error: '||' operator requires scalar operands 11 | a || 1; 12 | 13 | // error: '!' operator requires scalar operand 14 | !a; 15 | } 16 | -------------------------------------------------------------------------------- /tests/feature_tests/error_break_continue.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: break statement not in loop 3 | break; 4 | 5 | // error: continue statement not in loop 6 | continue; 7 | } 8 | -------------------------------------------------------------------------------- /tests/feature_tests/error_cast.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | struct S1 { 3 | int x; 4 | }* a; 5 | 6 | struct S2 { 7 | long y; 8 | }* b; 9 | 10 | // error: conversion from incompatible pointer type 11 | a = (struct S2*) b; 12 | 13 | // error: can only cast to scalar or void type 14 | (struct S1) 4; 15 | 16 | // error: can only cast from scalar type 17 | (int) *a; 18 | 19 | // error: expected abstract declarator, but identifier name was provided 20 | (int x) a; 21 | 22 | // error: storage specifier not permitted here 23 | (static int) a; 24 | } 25 | -------------------------------------------------------------------------------- /tests/feature_tests/error_comparison.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5, b = 10; long c; unsigned int d; 3 | 4 | // error: comparison between incomparable types 5 | &a == 1; 6 | 7 | // error: comparison between distinct pointer types 8 | &a == &c; 9 | 10 | // error: comparison between distinct pointer types 11 | &a == &d; 12 | 13 | // error: comparison between distinct pointer types 14 | &a < &d; 15 | 16 | // error: comparison between incomparable types 17 | &a < 1; 18 | } 19 | -------------------------------------------------------------------------------- /tests/feature_tests/error_compound_assign.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a, *p, *q; 3 | 4 | // error: expression on left of '+=' is not assignable 5 | 10 += a; 6 | // error: invalid types for '+=' operator 7 | p += q; 8 | 9 | // error: expression on left of '-=' is not assignable 10 | 10 -= a; 11 | // error: invalid types for '-=' operator 12 | p -= q; 13 | 14 | // error: invalid types for '*=' operator 15 | p *= a; 16 | // error: invalid types for '*=' operator 17 | p *= q; 18 | // error: expression on left of '*=' is not assignable 19 | 10 *= a; 20 | 21 | // error: invalid types for '/=' operator 22 | p /= a; 23 | // error: invalid types for '/=' operator 24 | p /= q; 25 | // error: expression on left of '/=' is not assignable 26 | 10 /= a; 27 | 28 | // error: invalid types for '%=' operator 29 | p %= a; 30 | // error: invalid types for '%=' operator 31 | p %= q; 32 | // error: expression on left of '%=' is not assignable 33 | 10 %= a; 34 | 35 | void* v; 36 | // error: invalid arithmetic on pointer to incomplete type 37 | v += 1; 38 | // error: invalid arithmetic on pointer to incomplete type 39 | v -= 1; 40 | } 41 | -------------------------------------------------------------------------------- /tests/feature_tests/error_control_statements.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expression on left of '=' is not assignable 3 | if(3 = 5) { 4 | return 1; 5 | } 6 | 7 | // error: use of undeclared identifier 'a' 8 | if(a = 5) { 9 | return 2; 10 | } 11 | 12 | // error: expression on left of '=' is not assignable 13 | while(3 = 5) { 14 | return 3; 15 | } 16 | 17 | // error: use of undeclared identifier 'a' 18 | while(a = 5) { 19 | return 4; 20 | } 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/feature_tests/error_declaration.c: -------------------------------------------------------------------------------- 1 | // error: storage class specified for function parameter 2 | int func(auto int a); 3 | 4 | // error: 'void' must be the only parameter 5 | int func1(void, void); 6 | 7 | typedef int Function(void); 8 | typedef int Array[10]; 9 | 10 | // error: function cannot return function type 11 | Function f(void); 12 | // error: function cannot return array type 13 | Array f(void); 14 | 15 | extern int var; 16 | // error: redeclared 'var' with different linkage 17 | static int var; 18 | // error: redeclared 'var' with incompatible type 19 | extern long var; 20 | 21 | int var1; 22 | // error: redeclared 'var1' with different linkage 23 | static int var1; 24 | 25 | int main() { 26 | // error: variable of incomplete type declared 27 | void a; 28 | 29 | // error: missing identifier name in declaration 30 | int *; 31 | 32 | // error: unrecognized set of type specifiers 33 | int int a; 34 | 35 | // error: unrecognized set of type specifiers 36 | unsigned signed int a; 37 | 38 | // error: local variable with linkage has initializer 39 | extern int a = 10; 40 | 41 | // error: too many storage classes in declaration specifiers 42 | extern auto int b; 43 | 44 | { 45 | int c; 46 | } 47 | // error: use of undeclared identifier 'c' 48 | c; 49 | 50 | int (*f1)(int), f2(int, int); 51 | // error: conversion from incompatible pointer type 52 | f1 = f2; 53 | 54 | void (*f3)(int); 55 | // error: conversion from incompatible pointer type 56 | f1 = f3; 57 | 58 | void (*f4)(long); 59 | // error: conversion from incompatible pointer type 60 | f3 = f4; 61 | 62 | int redefined; 63 | // error: redefinition of 'redefined' 64 | int redefined; 65 | } 66 | -------------------------------------------------------------------------------- /tests/feature_tests/error_extra_tokens.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | return 0; 3 | // error: unexpected token at 'a' 4 | } a; 5 | -------------------------------------------------------------------------------- /tests/feature_tests/error_function_call.c: -------------------------------------------------------------------------------- 1 | int isalpha(int); 2 | 3 | // This declaration differs from the C standard library, but it allows us to 4 | // verify that a void parameter works. 5 | int isdigit(void); 6 | 7 | struct S incomplete_return(); 8 | 9 | int main() { 10 | int a; 11 | 12 | // error: called object is not a function pointer 13 | a(); 14 | 15 | // error: incorrect number of arguments for function call (expected 1, have 0) 16 | isalpha(); 17 | 18 | // error: incorrect number of arguments for function call (expected 1, have 2) 19 | isalpha(10, 10); 20 | 21 | isdigit(); 22 | 23 | // error: incorrect number of arguments for function call (expected 0, have 1) 24 | isdigit(1); 25 | 26 | // error: incorrect number of arguments for function call (expected 0, have 2) 27 | isdigit(1, 2); 28 | 29 | // error: function returns non-void incomplete type 30 | incomplete_return(); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/feature_tests/error_function_def.c: -------------------------------------------------------------------------------- 1 | int add(int a, long b) { 2 | return 0; 3 | } 4 | 5 | void test_no_args() { } 6 | 7 | void redefine_func() { } 8 | // error: redefinition of 'redefine_func' 9 | void redefine_func() { } 10 | 11 | void return_expr() { 12 | // error: function with void return type cannot return value 13 | return 2; 14 | } 15 | 16 | int no_return_expr() { 17 | // error: function with non-void return type must return value 18 | return; 19 | } 20 | 21 | int func_argument(int f(int, int)) { 22 | return f(0,0); 23 | } 24 | 25 | // error: function definition provided for non-function type 26 | int not_func { 27 | return 0; 28 | } 29 | 30 | // error: function definition missing parameter name 31 | void missing_param(int) { } 32 | 33 | void repeat_def(int a) { 34 | // error: redefinition of 'a' 35 | int a; 36 | } 37 | 38 | int* return_pointer() { 39 | char* a; 40 | // error: conversion from incompatible pointer type 41 | return a; 42 | } 43 | 44 | int const_param(const int a) { 45 | // error: expression on left of '=' is not assignable 46 | a = 3; 47 | } 48 | 49 | int const_ptr_param(const int* a) { 50 | int b; 51 | // verify we can assign directly to a 52 | a = &b; 53 | // error: expression on left of '=' is not assignable 54 | *a = 3; 55 | } 56 | 57 | const int func() { 58 | return 4; 59 | } 60 | 61 | // error: redefinition of 'a' 62 | void repeat_param(int a, int a) { } 63 | 64 | // error: storage class specified for function parameter 65 | void storage_on_param(static int a) { } 66 | 67 | int main() { 68 | // error: incorrect number of arguments for function call (expected 2, have 3) 69 | add(1,2,3); 70 | 71 | char* p; 72 | // error: invalid conversion between types 73 | add(1, p); 74 | 75 | // error: incorrect number of arguments for function call (expected 0, have 1) 76 | test_no_args(1); 77 | 78 | // error: conversion from incompatible pointer type 79 | func_argument(add); 80 | } 81 | -------------------------------------------------------------------------------- /tests/feature_tests/error_include.c: -------------------------------------------------------------------------------- 1 | // error: unable to read included file 2 | #include "filethatdoesnotexist.h" 3 | // error: unable to read included file 4 | #include 5 | -------------------------------------------------------------------------------- /tests/feature_tests/error_incomplete_types.c: -------------------------------------------------------------------------------- 1 | extern int a[]; 2 | extern int b[]; 3 | 4 | extern int c[]; 5 | extern int d[10]; 6 | 7 | int main() { 8 | extern int a[10]; 9 | { 10 | // error: redeclared 'a' with incompatible type 11 | extern int a[12]; 12 | } 13 | 14 | extern int b[10]; 15 | 16 | // check compatibility between complete and incomplete 17 | &c == &d; 18 | extern int c[5]; 19 | // error: comparison between distinct pointer types 20 | &c == &d; 21 | } 22 | 23 | // error: redeclared 'b' with incompatible type 24 | extern int b[12]; 25 | -------------------------------------------------------------------------------- /tests/feature_tests/error_incr_decr.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: operand of decrement operator not a modifiable lvalue 3 | 4--; 4 | 5 | int array[5]; 6 | // error: operand of increment operator not a modifiable lvalue 7 | ++array; 8 | 9 | void* p; 10 | // error: operand of increment operator not a modifiable lvalue 11 | (*p)++; 12 | } 13 | -------------------------------------------------------------------------------- /tests/feature_tests/error_initializer.c: -------------------------------------------------------------------------------- 1 | int f() { 2 | return 3; 3 | } 4 | 5 | // error: non-constant initializer for variable with static storage duration 6 | int a = f(); 7 | 8 | int main() { } 9 | -------------------------------------------------------------------------------- /tests/feature_tests/error_large_literal.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: integer literal too large to be represented by any integer type 3 | 1000000000000000000000000000; 4 | } 5 | -------------------------------------------------------------------------------- /tests/feature_tests/error_main1.c: -------------------------------------------------------------------------------- 1 | // error: 'main' function must have integer return type 2 | void main() {} 3 | -------------------------------------------------------------------------------- /tests/feature_tests/error_main2.c: -------------------------------------------------------------------------------- 1 | // error: 'main' function must have 0 or 2 arguments 2 | int main(int a, int b, int c) {} 3 | -------------------------------------------------------------------------------- /tests/feature_tests/error_main3.c: -------------------------------------------------------------------------------- 1 | // error: first parameter of 'main' must be of integer type 2 | int main(long a, int b) {} 3 | -------------------------------------------------------------------------------- /tests/feature_tests/error_main4.c: -------------------------------------------------------------------------------- 1 | // error: second parameter of 'main' must be like char** 2 | int main(int a, char b) {} 3 | -------------------------------------------------------------------------------- /tests/feature_tests/error_pointer.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a; int b; 3 | 4 | // error: operand of unary '&' must be lvalue 5 | &(a + b); 6 | 7 | // error: operand of unary '*' must have pointer type 8 | *a; 9 | 10 | // error: invalid conversion between types 11 | a = &b; 12 | 13 | int* c; 14 | // error: invalid conversion between types 15 | c = 10; 16 | 17 | // error: operand of unary '*' must have pointer type 18 | *a = 1; 19 | 20 | void* p; 21 | // error: expression on left of '=' is not assignable 22 | *p = 1; 23 | 24 | int *f; unsigned int *g; 25 | // error: conversion from incompatible pointer type 26 | f = g; 27 | 28 | int (*h)(); 29 | // error: conversion from incompatible pointer type 30 | h = f; 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/feature_tests/error_pointer_math.c: -------------------------------------------------------------------------------- 1 | struct S { int a; } s; 2 | 3 | int main() { 4 | int* a; int *b; 5 | 6 | // error: invalid operand types for addition 7 | a + b; 8 | 9 | // error: invalid operand types for multiplication 10 | a * b; 11 | 12 | // error: invalid operand types for division 13 | a / b; 14 | 15 | // error: invalid operand types for modulus 16 | a % b; 17 | 18 | // error: invalid operand types for modulus 19 | 3 % b; 20 | 21 | // error: invalid operand types for subtraction 22 | 3 - a; 23 | 24 | // error: invalid operand types for bitwise shift 25 | int* c; c << 3; 26 | 27 | void *p, *q; 28 | // error: invalid arithmetic on pointer to incomplete type 29 | p + 1; 30 | // error: invalid arithmetic on pointer to incomplete type 31 | 1 + p; 32 | // error: invalid arithmetic on pointers to incomplete types 33 | p - q; 34 | // error: invalid arithmetic on pointer to incomplete type 35 | p - 1; 36 | // error: invalid arithmetic on pointer to incomplete type 37 | p++; 38 | // error: invalid type for increment operator 39 | s++; 40 | } 41 | -------------------------------------------------------------------------------- /tests/feature_tests/error_redefined_identifier.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a; 3 | 4 | // error: redefinition of 'a' 5 | int a; 6 | 7 | { 8 | int a; // OK 9 | } 10 | 11 | // error: redefinition of 'a' 12 | int a; 13 | } 14 | -------------------------------------------------------------------------------- /tests/feature_tests/error_sizeof.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | void* p; 3 | 4 | // error: sizeof argument cannot have incomplete type 5 | sizeof(*p); 6 | 7 | // error: sizeof argument cannot have incomplete type 8 | sizeof(void); 9 | 10 | // error: sizeof argument cannot have incomplete type 11 | sizeof(struct S); 12 | 13 | // error: sizeof argument cannot have function type 14 | sizeof(main); 15 | } 16 | -------------------------------------------------------------------------------- /tests/feature_tests/error_string.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: empty character constant 3 | ''; 4 | 5 | // error: multiple characters in character constant 6 | 'ab'; 7 | 8 | // error: multiple characters in character constant 9 | '/*'; 10 | } 11 | -------------------------------------------------------------------------------- /tests/feature_tests/error_struct.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | struct R { 3 | // error: cannot have storage specifier on struct member 4 | extern int a; 5 | 6 | // error: cannot have storage specifier on struct member 7 | auto int a; 8 | 9 | // error: cannot have incomplete type as struct member 10 | struct R a; 11 | 12 | // error: cannot have function type as struct member 13 | int function(int); 14 | 15 | // error: missing name of struct member 16 | int*; 17 | }; 18 | 19 | struct S { 20 | int apple; 21 | // error: duplicate member 'apple' 22 | int apple; 23 | // error: duplicate member 'apple' 24 | int apple; 25 | // error: duplicate member 'banana' 26 | int banana, banana; 27 | }; 28 | 29 | struct A { 30 | int a; 31 | } *a; 32 | 33 | struct B { 34 | int a; 35 | } *b; 36 | 37 | // error: conversion from incompatible pointer type 38 | a = b; 39 | 40 | struct C *p; 41 | // error: invalid arithmetic on pointer to incomplete type 42 | p + 1; 43 | 44 | struct C { 45 | int a; 46 | }; 47 | p + 1; 48 | 49 | { 50 | struct C* q; 51 | q + 1; 52 | 53 | struct C; 54 | struct C* r; 55 | 56 | // error: invalid arithmetic on pointer to incomplete type 57 | r + 1; 58 | 59 | struct C { 60 | int a; 61 | }; 62 | 63 | r + 1; 64 | } 65 | 66 | struct D { 67 | int a; 68 | }; 69 | 70 | // error: redefinition of 'struct D' 71 | struct D { 72 | int a; 73 | }; 74 | 75 | // error: defined as wrong kind of tag 'union D' 76 | union D { 77 | int a; 78 | }; 79 | 80 | struct E; 81 | // error: defined as wrong kind of tag 'union E' 82 | union E; 83 | 84 | struct Struct { 85 | int a; 86 | long b; 87 | int* c; 88 | } s, *s_p; 89 | 90 | // error: variable of incomplete type declared 91 | struct E e; 92 | 93 | // error: request for member in something not a structure or union 94 | 10.a; 95 | 96 | // error: request for member in something not a structure or union 97 | s_p.a; 98 | 99 | int *int_ptr; 100 | // error: request for member in something not a structure or union 101 | int_ptr->a; 102 | 103 | // error: first argument of '->' must have pointer type 104 | s->a; 105 | 106 | // error: structure or union has no member 'd' 107 | s.d; 108 | } 109 | -------------------------------------------------------------------------------- /tests/feature_tests/error_type_qualifier.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | const int a; 3 | 4 | // error: expression on left of '=' is not assignable 5 | a = 4; 6 | 7 | const const int* p1; 8 | // error: expression on left of '=' is not assignable 9 | *p1 = 0; 10 | p1 = &a; 11 | 12 | int *const const p2; 13 | *p2 = 0; 14 | // error: expression on left of '=' is not assignable 15 | p2 = &a; 16 | 17 | ///////////////////////////////////////////////////// 18 | 19 | const struct A { 20 | int a; 21 | const int b; 22 | } X; 23 | 24 | // error: expression on left of '=' is not assignable 25 | X.a = 3; 26 | // error: expression on left of '=' is not assignable 27 | X.b = 3; 28 | // error: expression on left of '=' is not assignable 29 | *(&X.a) = 3; 30 | // error: expression on left of '=' is not assignable 31 | (&X.a)[3] = 3; 32 | 33 | struct A Y; 34 | Y.a = 3; 35 | // error: expression on left of '=' is not assignable 36 | Y.b = 3; 37 | // error: expression on left of '=' is not assignable 38 | *(&X.b) = 3; 39 | // error: expression on left of '=' is not assignable 40 | (&X.b)[3] = 3; 41 | 42 | // error: conversion from incompatible pointer type 43 | struct A* ptr_X = &X; 44 | struct A* ptr_Y = &Y; 45 | } 46 | -------------------------------------------------------------------------------- /tests/feature_tests/error_typedef.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | typedef int a; 3 | // error: 'a' redeclared as incompatible type in same scope 4 | typedef long a; 5 | // error: redeclared type definition 'a' as variable 6 | int a; 7 | 8 | int variable; 9 | // error: 'variable' redeclared as type definition in same scope 10 | typedef int variable; 11 | 12 | struct { 13 | // error: cannot have storage specifier on struct member 14 | typedef int a; 15 | }; 16 | 17 | typedef struct S { 18 | int a; 19 | int b; 20 | } struct_S; 21 | 22 | typedef struct { 23 | int a; 24 | int b; 25 | // error: 'struct_S' redeclared as incompatible type in same scope 26 | } struct_S; 27 | 28 | const struct_S s; 29 | // error: expression on left of '=' is not assignable 30 | s.a = 3; 31 | 32 | typedef int A; 33 | { 34 | // error: too many storage classes in declaration specifiers 35 | static extern int A; 36 | // error: use of undeclared identifier 'A' 37 | A = 3; 38 | } 39 | 40 | int B; 41 | { 42 | // error: too many storage classes in declaration specifiers 43 | static typedef int B; 44 | // error: use of undeclared type definition 'B' 45 | B b; 46 | } 47 | 48 | // error: typedef cannot have initializer 49 | typedef int init = 3; 50 | } 51 | 52 | typedef int F(void); 53 | // error: function definition missing parameter list 54 | F f { } 55 | 56 | // error: function definition cannot be a typedef 57 | typedef int function(int a) { 58 | return 0; 59 | } 60 | 61 | // error: storage class specified for function parameter 62 | int function(typedef int a) { 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /tests/feature_tests/error_typedef2.c: -------------------------------------------------------------------------------- 1 | typedef int a; 2 | // error: expected declaration specifier at 'a' 3 | int function(int a, a b) { 4 | return a; 5 | } 6 | -------------------------------------------------------------------------------- /tests/feature_tests/error_unary.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: unary minus requires arithmetic type operand 3 | -""; 4 | 5 | // error: unary plus requires arithmetic type operand 6 | +""; 7 | 8 | // error: bit-complement requires integral type operand 9 | ~""; 10 | } 11 | -------------------------------------------------------------------------------- /tests/feature_tests/error_unassignable_lvalue.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expression on left of '=' is not assignable 3 | 3 = 4; 4 | 5 | int a; 6 | // error: expression on left of '=' is not assignable 7 | 3 = a; 8 | 9 | // error: expression on left of '=' is not assignable 10 | 3 + 4 = a; 11 | 12 | // error: expression on left of '=' is not assignable 13 | a + a = 3; 14 | 15 | // error: expression on left of '=' is not assignable 16 | a = (5 = 6); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /tests/feature_tests/error_undeclared_identifier.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: use of undeclared identifier 'a' 3 | a = 0; 4 | // error: use of undeclared identifier 'a' 5 | a; 6 | 7 | int a; int b; int c; 8 | a = 0; 9 | // error: use of undeclared identifier 'd' 10 | d = 0; 11 | // error: use of undeclared identifier 'd' 12 | a = d; 13 | // error: use of undeclared identifier 'd' 14 | a = d + d; 15 | 16 | { 17 | int e; 18 | e; 19 | } 20 | 21 | // error: use of undeclared identifier 'e' 22 | e; 23 | } 24 | -------------------------------------------------------------------------------- /tests/feature_tests/error_union.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | union R { 3 | // error: cannot have storage specifier on union member 4 | extern int a; 5 | 6 | // error: cannot have storage specifier on union member 7 | auto int a; 8 | 9 | // error: cannot have incomplete type as union member 10 | union R a; 11 | 12 | // error: cannot have function type as union member 13 | int function(int); 14 | 15 | // error: missing name of union member 16 | int*; 17 | }; 18 | 19 | union S { 20 | int apple; 21 | // error: duplicate member 'apple' 22 | int apple; 23 | // error: duplicate member 'apple' 24 | int apple; 25 | // error: duplicate member 'banana' 26 | int banana, banana; 27 | }; 28 | 29 | union A { 30 | int a; 31 | } *a; 32 | 33 | union B { 34 | int a; 35 | } *b; 36 | 37 | // error: conversion from incompatible pointer type 38 | a = b; 39 | 40 | union C *p; 41 | // error: invalid arithmetic on pointer to incomplete type 42 | p + 1; 43 | 44 | union C { 45 | int a; 46 | }; 47 | p + 1; 48 | 49 | { 50 | union C* q; 51 | q + 1; 52 | 53 | union C; 54 | union C* r; 55 | 56 | // error: invalid arithmetic on pointer to incomplete type 57 | r + 1; 58 | 59 | union C { 60 | int a; 61 | }; 62 | 63 | r + 1; 64 | } 65 | 66 | union D { 67 | int a; 68 | }; 69 | 70 | // error: redefinition of 'union D' 71 | union D { 72 | int a; 73 | }; 74 | 75 | // error: defined as wrong kind of tag 'struct D' 76 | struct D { 77 | int b; 78 | }; 79 | 80 | union D1; 81 | 82 | // error: defined as wrong kind of tag 'struct D1' 83 | struct D1; 84 | 85 | union D2 { 86 | int a; 87 | }; 88 | 89 | // error: defined as wrong kind of tag 'struct D2' 90 | struct D2 ddd; 91 | 92 | union Union { 93 | int a; 94 | long b; 95 | int* c; 96 | } s, *s_p; 97 | 98 | // error: request for member in something not a structure or union 99 | 10.a; 100 | 101 | // error: request for member in something not a structure or union 102 | s_p.a; 103 | 104 | int *int_ptr; 105 | // error: request for member in something not a structure or union 106 | int_ptr->a; 107 | 108 | // error: first argument of '->' must have pointer type 109 | s->a; 110 | 111 | // error: structure or union has no member 'd' 112 | s.d; 113 | } 114 | -------------------------------------------------------------------------------- /tests/feature_tests/expr_comma.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = (3, 5); 3 | int *p = (10, &a); 4 | 5 | if(a != 5) return 1; 6 | if(p != &a) return 2; 7 | } 8 | -------------------------------------------------------------------------------- /tests/feature_tests/for.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int sum = 0; 5 | 6 | // This variable `a` is independent from the variable `a` used below 7 | int a = 10; 8 | for(int a = 0; a != 10; a++) { 9 | sum = sum + a; 10 | 11 | // This line does not set `a` above 12 | int a = 0; 13 | } 14 | 15 | if(sum != 45) return 1; 16 | 17 | sum = 0; 18 | for(a = 20; a != 80; a = a * 2) { 19 | sum = sum + a; 20 | } 21 | if(a != 80) return 2; 22 | if(sum != 60) return 3; 23 | 24 | sum = 0; 25 | for(; a != 100; ++a) { 26 | sum = sum + a; 27 | } 28 | if(a != 100) return 4; 29 | if(sum != 1790) return 5; 30 | 31 | sum = 0; 32 | for(; a != 110; ) { 33 | sum = sum + a; 34 | a++; 35 | } 36 | if(a != 110) return 6; 37 | if(sum != 1045) return 7; 38 | 39 | // Exit inside this 40 | int count = 0; 41 | for(;;) { 42 | count++; 43 | if(count == 10) exit(0); 44 | if(count == 0) exit(1); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/feature_tests/function_call.c: -------------------------------------------------------------------------------- 1 | int isalpha(int); 2 | int div(); // test function prototype 3 | 4 | int strcmp(char*, char*); 5 | char* strcpy(char*, char*); 6 | char* strncpy(char*, char*, long); 7 | int atoi(char *str); 8 | 9 | int signal(int, int(int)); 10 | 11 | int main() { 12 | // Try out a few function calls from standard library. 13 | 14 | _Bool b; 15 | 16 | b = isalpha(65); // 'A' 17 | if(b != 1) return 1; 18 | 19 | // Coax the return IL value of atoi to be assigned to a register 20 | // that is not RAX, so we can test the `mov` operation that moves a 21 | // return value from RAX to the spot its IL value is stored in. 22 | if(1/atoi("1") != 1) return 14; 23 | 24 | b = isalpha(52); 25 | if(b != 0) return 2; 26 | 27 | // This is super ratchet, but works for now. The div function 28 | // accepts two integers and returns a struct. It turns out the 29 | // quotient of this struct is returned in the integer return 30 | // register, so this test works. 31 | char c1 = 50, c2 = 5; 32 | if(div(c1, c2) != 10) return 3; 33 | 34 | char str1[6], str2[6]; 35 | strcpy(str1, "hello"); 36 | strcpy(str2, "hello"); 37 | if(strcmp(str1, str2)) return 4; 38 | 39 | // doing (vp=___) is basically a ratchet cast to avoid pointer 40 | // errors 41 | void *vp; 42 | if(strcmp((vp=str1), str2)) return 13; 43 | 44 | str2[3] = 102; 45 | if(strcmp(str1, str2) != 6) return 5; 46 | 47 | strcpy(str2, "hey"); 48 | char* out = strncpy(str1, str2, 3); 49 | if(strcmp(str1, "heylo")) return 6; 50 | 51 | int (*f2)(int) = isalpha; 52 | if(f2(5)) return 12; 53 | 54 | // test passing a function to a function 55 | signal(0, isalpha); 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /tests/feature_tests/function_def.c: -------------------------------------------------------------------------------- 1 | int add(int a, long b) { 2 | return a + b; 3 | } 4 | 5 | // test static variables with same name 6 | int counter1() { 7 | static int i; 8 | return i++; 9 | } 10 | 11 | int counter2() { 12 | static int i; 13 | return i++; 14 | } 15 | 16 | // defined in function_def_helper.c 17 | int helper_ret_5(void); 18 | int helper_ret_6(); 19 | 20 | int helper; 21 | void void_exit() { 22 | helper = 3; 23 | } 24 | 25 | void void_ret() { 26 | helper = 5; 27 | return; 28 | helper = 6; 29 | } 30 | 31 | int int_ret() { 32 | int a = 3; 33 | // force `a` to conflict with RAX 34 | a = 3 / a; 35 | // must emit `mov` because `a` is not in RAX 36 | return a; 37 | } 38 | 39 | int array_sum(int arr[3]) { 40 | int sum = 0; 41 | for(int i = 0; i < 3; i++) { 42 | sum += arr[i]; 43 | } 44 | return sum; 45 | } 46 | 47 | int call_function(int f(int, long), int arg1, int arg2) { 48 | return f(arg1, arg2); 49 | } 50 | 51 | const int return_const() { 52 | return 4; 53 | } 54 | 55 | int ptr_value(const int* p) { 56 | return *p; 57 | } 58 | 59 | int sum_array(int a[][2], int len) { 60 | int sum = 0; 61 | for(int i = 0; i < len; i++) { 62 | for(int j = 0; j < 2; j++) { 63 | sum += a[i][j]; 64 | } 65 | } 66 | return sum; 67 | } 68 | 69 | int main() { 70 | if(add(3, 4) != 7) return 1; 71 | if(add(helper_ret_5(), 4) != 9) return 2; 72 | if(add(helper_ret_6(), 5) != 11) return 3; 73 | 74 | for(int i = 0; i < 5; i++) { 75 | if(counter1() != i) return 4; 76 | if(counter2() != i) return 5; 77 | } 78 | 79 | void_exit(); 80 | if(helper != 3) return 6; 81 | 82 | void_ret(); 83 | if(helper != 5) return 7; 84 | 85 | int arr[3]; 86 | arr[0] = 1; 87 | arr[1] = 2; 88 | arr[2] = 3; 89 | if(array_sum(arr) != 6) return 8; 90 | 91 | if(call_function(add, 5, 6) != 11) return 9; 92 | 93 | int a = return_const(); 94 | if(a != 4) return 10; 95 | 96 | if(ptr_value(&a) != 4) return 11; 97 | const int* p = &a; 98 | if(ptr_value(&a) != 4) return 12; 99 | 100 | int arr1[2][2]; 101 | arr1[0][0] = 1; 102 | arr1[0][1] = 1; 103 | arr1[1][0] = 1; 104 | arr1[1][1] = 1; 105 | if(sum_array(arr1, 2) != 4) return 13; 106 | } 107 | -------------------------------------------------------------------------------- /tests/feature_tests/function_def_helper.c: -------------------------------------------------------------------------------- 1 | int helper_ret_5() { 2 | return 5; 3 | } 4 | 5 | int helper_ret_6() { 6 | return 6; 7 | } 8 | -------------------------------------------------------------------------------- /tests/feature_tests/if.c: -------------------------------------------------------------------------------- 1 | // Return: 30 2 | 3 | int main() { 4 | // False literal condition 5 | if(0) return 1; 6 | 7 | // False variable condition 8 | int a = 0; 9 | if(a) return 2; 10 | 11 | // False variable equality condition 12 | int b = 10; 13 | int c = 11; 14 | if(b == c) return 3; 15 | 16 | // False variable inequality 17 | if(b != b) return 4; 18 | 19 | // True variable inequality 20 | if(b != c) { 21 | 22 | 23 | // False literal computation condition 24 | if(b * 0) { 25 | return 4; 26 | } 27 | 28 | // False literal equality condition 29 | if(3 == 4) { 30 | return 5; 31 | } 32 | 33 | // False literal inequality condition 34 | if(3 != 3) { 35 | return 6; 36 | } 37 | 38 | // False half-literal inequality condition 39 | b = 3; 40 | if(b != 3) { 41 | return 7; 42 | } 43 | 44 | // Check register allocation of branch 45 | 46 | // Without proper register allocation of branches, the compiler places `e` 47 | // in the same register as `d` because it does not realize `d` is still 48 | // live due to the comparison at line 58. 49 | int d = 0; int e = d + 2; 50 | if(e == 5) { // not taken 51 | d = 1; 52 | } 53 | if(d != 0) return 8; 54 | 55 | // Valid conditions with computations 56 | int ret1; int ret2; int ret3; 57 | if(b == 3) { 58 | if(b != 15) { 59 | ret1 = 10; 60 | if(3 == 3) { 61 | ret2 = 10 + ret1; 62 | if(5) { 63 | ret3 = ret2 + 10; 64 | return ret3; 65 | } 66 | } 67 | } 68 | } 69 | } 70 | 71 | return 9; 72 | } 73 | -------------------------------------------------------------------------------- /tests/feature_tests/implicit_cast.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | _Bool b; 3 | 4 | char c; 5 | unsigned char uc; 6 | 7 | short s; 8 | unsigned short us; 9 | unsigned short us_2; 10 | 11 | int i; 12 | unsigned int ui; 13 | 14 | long l; 15 | unsigned long ul; 16 | 17 | // Until negative literals are supported, this is how we insert -1. 18 | int neg_one = 4294967295; 19 | 20 | c = neg_one; 21 | if(c != neg_one) return 1; 22 | 23 | s = neg_one; 24 | if(s != neg_one) return 2; 25 | if(s == 65535) return 3; 26 | 27 | us = neg_one; 28 | if(us == neg_one) return 4; 29 | if(us != 65535) return 5; 30 | 31 | s = neg_one; 32 | us_2 = s; 33 | if(us != 65535) return 6; 34 | if(us != us_2) return 7; 35 | 36 | ui = neg_one; 37 | if(ui != 4294967295) return 8; 38 | 39 | s = neg_one; 40 | ui = s; 41 | if(ui != 4294967295) return 9; 42 | 43 | us = neg_one; 44 | ui = us; 45 | if(ui != 65535) return 9; 46 | 47 | s = neg_one; 48 | ui = s; 49 | i = ui; // Technically undefined behavior, per the spec. 50 | if(i != neg_one) return 10; 51 | 52 | c = neg_one; 53 | l = c; 54 | if(l + 1 != 0) return 11; 55 | 56 | s = neg_one; 57 | l = s; 58 | if(l + 1 != 0) return 12; 59 | 60 | l = neg_one; 61 | if(l + 1 != 0) return 13; 62 | 63 | // Test integer promotion 64 | char c1 = 30, c2 = 40, c3 = 10, c4; 65 | c4 = (c1 * c2) / c3; 66 | if(c4 != 120) return 14; 67 | 68 | unsigned short us1 = 30, us2 = 40, us3 = 10, us4; 69 | us1 = 30; us2 = 40; us3 = 10; 70 | us4 = (us1 * us2) / us3; 71 | if(us4 != 120) return 15; 72 | 73 | // Test integer conversion 74 | long l1 = 1073741824, i1; 75 | // Because large immediate values are not yet supported, we split up 76 | // l1 in this way. l1 = 2^32. 77 | l1 = l1 + l1 + l1 + l1; 78 | if(l1 * 2 / 8 != 1073741824) return 16; 79 | 80 | // Test unsigned integer conversion 81 | int i2; unsigned int ui2; 82 | i2 = 2*neg_one; 83 | ui2 = 1; 84 | if(i2 + ui2 != 4294967295) return 17; 85 | 86 | // Test int/long conversion. 87 | long l2; int i3; 88 | l2 = 2147483644; // 2^31-4 89 | i3 = 4; 90 | if(i3 * l2 / i3 != 2147483644) return 18; 91 | 92 | // Test signed/unsigned conversion when signed is bigger 93 | long l3 = 100; unsigned int i4 = 100; 94 | if(l3 != i4) { 95 | return 20; 96 | } 97 | if(i4 != l3) { 98 | return 21; 99 | } 100 | 101 | b = 0; 102 | if(b) return 19; 103 | 104 | b = 10; 105 | if(b) { 106 | b = i3; 107 | if(b) { 108 | return 0; 109 | } 110 | } 111 | return 1; 112 | } 113 | -------------------------------------------------------------------------------- /tests/feature_tests/include.c: -------------------------------------------------------------------------------- 1 | #include "include_helper.h" 2 | #include "include_helper_empty.h" 3 | 4 | int main() { 5 | char* a = "test string"; 6 | 7 | // Make sure the includes in include_helper.h were successful. 8 | isalpha(10); 9 | strcpy(a, a); 10 | } 11 | -------------------------------------------------------------------------------- /tests/feature_tests/include_helper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /tests/feature_tests/include_helper_empty.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShivamSarodia/ShivyC/6232136be38a29e8c18beae3d23e49ecfb7906fd/tests/feature_tests/include_helper_empty.h -------------------------------------------------------------------------------- /tests/feature_tests/incomplete_types.c: -------------------------------------------------------------------------------- 1 | int array1[]; 2 | int array2[]; 3 | int array3[][5]; 4 | int array4[5][5]; 5 | 6 | // verify typedef doesn't get completed 7 | typedef int T[]; 8 | T e; 9 | int e[10]; 10 | T f; 11 | int f[5]; 12 | 13 | struct S s; 14 | 15 | int check_completed(void); 16 | 17 | int main() { 18 | // verify these are compatible types 19 | // (complete and incomplete arrays are compatible) 20 | &array1 == &array2; 21 | 22 | // complete array1 23 | extern int array1[5]; 24 | &array1 == &array2; 25 | 26 | // (complete and incomplete are compatible) 27 | &array3 == &array4; 28 | 29 | // todo: test sizeof(S) 30 | return check_completed(); 31 | } 32 | 33 | // complete arrays 34 | int array2[5]; 35 | int array3[5][5]; 36 | 37 | struct S { 38 | int a, b; 39 | } s; 40 | 41 | int check_completed() { 42 | s.a = 3; 43 | s.b = 5; 44 | if(s.a != 3) return 1; 45 | if(s.b != 5) return 2; 46 | 47 | &array1 == &array2; 48 | &array3 == &array4; 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /tests/feature_tests/incr_decr.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5; 3 | 4 | a--; 5 | if(a != 4) return 1; 6 | if(a-- != 4) return 2; 7 | if(--a != 2) return 3; 8 | if(++a != 3) return 4; 9 | if(a++ != 3) return 5; 10 | 11 | a = 5; 12 | int* b = &a; 13 | (*b)++; 14 | if(a != 6) return 6; 15 | if((*b)++ != 6) return 7; 16 | if(--*b != 6) return 8; 17 | 18 | int arr[5]; 19 | arr[0] = 10; 20 | if(++arr[0] != 11) return 9; 21 | if(arr[0] != 11) return 10; 22 | 23 | int* p = &a; 24 | if(p++ != &a) return 11; 25 | if(++p != &a + 2) return 12; 26 | if(p-- != &a + 2) return 13; 27 | if(--p - &a != 0) return 14; 28 | } 29 | -------------------------------------------------------------------------------- /tests/feature_tests/int_arithmetic.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 5; int b = 10; int c = 2; int d; int e; 3 | c = b + a * b + 10 * a / c + 10 * 3 / 3; 4 | d = c * b + a / 2; 5 | e = d * c; 6 | 7 | if(e != 90440) return 1; 8 | } 9 | -------------------------------------------------------------------------------- /tests/feature_tests/main_return.c: -------------------------------------------------------------------------------- 1 | // Return: 4 2 | 3 | int main() { 4 | return 4; 5 | } 6 | -------------------------------------------------------------------------------- /tests/feature_tests/modulus.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | if(20 % 3 != 2) return 1; 3 | if(3 % 3 != 0) return 2; 4 | 5 | int a = 5, b = 13; 6 | if(a % b != 5) return 3; 7 | if(b % a != 3) return 4; 8 | 9 | long l = 3; 10 | if(a % l != 2) return 5; 11 | if(a % 1099511627776 != a) return 6; 12 | if(1099511627776 % a != 1) return 7; 13 | if(1099511627776 % 1099511627776 != 0) return 8; 14 | } 15 | -------------------------------------------------------------------------------- /tests/feature_tests/multiplication.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 2, b = 3; 3 | int c = a * b; 4 | int d = c * 5; 5 | int e = 2 * 4; 6 | int f = c * d * e; 7 | f = f * f; 8 | 9 | if(f != 2073600) return 1; 10 | 11 | unsigned int g = 5, h = g * 10; 12 | unsigned int i; 13 | h = 10 * g; // 50 14 | i = g * h; // 250 15 | 16 | if(i != 250) return 2; 17 | 18 | if(i / g != h) return 3; 19 | 20 | // Test order of operations 21 | if(3 + 2 * 3 != 9) return 4; 22 | 23 | // Test unsigned int multiplication. 24 | unsigned int j = 4294967295; 25 | if(j * j != 1) return 5; 26 | if((j - 1) * (j - 1) != 4) return 6; 27 | 28 | long k = 2147483648; 29 | if(k * j != 9223372034707292160) return 7; 30 | 31 | unsigned int l = 4294967295; 32 | unsigned int m = 4294967295; 33 | if(l * m != (unsigned int)4294967295 * (unsigned int)4294967295) return 8; 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /tests/feature_tests/pointer-1.c: -------------------------------------------------------------------------------- 1 | // For testing dereference operator on left-side of assignment, see pointer-2.c 2 | 3 | int main() { 4 | int a = 10; 5 | if(*(&a) != 10) return 1; 6 | 7 | long b = 20; 8 | if(*(&b) + 50 != 70) return 2; 9 | 10 | // Assignment of compatible pointer types 11 | int* c = &a; int *d; 12 | c = &a; 13 | c = d; 14 | 15 | // Verify reference operator reverses dereference pointer 16 | c = &a; 17 | if(&(*c) != &a) return 5; 18 | 19 | // Assignment of non-void to void 20 | void* v = &a; 21 | 22 | // Dereference void pointer 23 | *v; 24 | 25 | // Assignment of non-void double pointer to void 26 | int *q = &a; 27 | v = &q; 28 | 29 | // Assignment of void to non-void 30 | int* e = v; 31 | 32 | // Assignment of null pointer constant 33 | v = 0; 34 | e = 0; 35 | 36 | // Value of below is checked at very end of this main() function 37 | _Bool h = &a; 38 | 39 | // Address-of operator where output is on the stack 40 | int* i_on_stack; int j; 41 | &i_on_stack; 42 | i_on_stack = &j; 43 | if(i_on_stack != &j) return 3; 44 | 45 | // Read-at where address is on stack 46 | j = 10; 47 | i_on_stack = &j; 48 | j = *i_on_stack; 49 | if(j != 10) return 4; 50 | 51 | if(h) return 0; 52 | return 1; 53 | } 54 | -------------------------------------------------------------------------------- /tests/feature_tests/pointer-2.c: -------------------------------------------------------------------------------- 1 | // Tests dereference operator on left-side of assignment. 2 | 3 | int main() { 4 | int a = 1; 5 | int* p1 = &a; 6 | 7 | *p1 = 2; 8 | if(a != 2) return 1; 9 | 10 | int b = 0; 11 | *(&b) = 3; 12 | if(b != 3) return 2; 13 | 14 | int c; 15 | int* p2; int* p3; 16 | p2 = &c + 2; 17 | *p2 = 4; 18 | p3 = &c + 2; 19 | if(*p2 != *p3) return 3; 20 | 21 | char d = 4; 22 | long e = 3; 23 | *(&d) = e; 24 | if(d != 3) return 4; 25 | 26 | d = 4; 27 | *(&e) = d; 28 | if(e != 4) return 5; 29 | 30 | // dereferencing pointer stored on stack 31 | int* p4; int f; 32 | &p2; 33 | p2 = &f; 34 | *p2 = 10; 35 | if(f != 10) return 6; 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /tests/feature_tests/pointer_math.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // These tests are pretty finicky, as they reply on these variables being 3 | // declared on the stack with a particular order/alignment. 4 | 5 | int a = 5, b = 10, c = 15; 6 | 7 | // This line forces the register allocator to declare a, b, and c on the stack 8 | // in the desired order. 9 | &a; &b; &c; 10 | 11 | if(*(&c + 1) != 10) return 1; 12 | if(*(1 + &c) != 10) return 2; 13 | if(*(&c + 2) != 5) return 3; 14 | if(*(2 + &c) != 5) return 4; 15 | if(*(&b + 1) != 5) return 5; 16 | if(*(1 + &b) != 5) return 6; 17 | if(&b + 1 != &a) return 7; 18 | if(1 + &b != &a) return 8; 19 | 20 | if(&a - &b != 1) return 9; 21 | if(&a - &c != 2) return 10; 22 | if(&a - 1 != &b) return 11; 23 | if(&a - 2 != &c) return 12; 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tests/feature_tests/sizeof.c: -------------------------------------------------------------------------------- 1 | int global; 2 | 3 | int f() { 4 | global = 10; 5 | } 6 | 7 | int main() { 8 | if(sizeof(_Bool) != 1) return 1; 9 | if(sizeof(char) != 1) return 2; 10 | if(sizeof(char signed) != 1) return 3; 11 | if(sizeof(char unsigned) != 1) return 4;; 12 | if(sizeof(short) != 2) return 5; 13 | if(sizeof(short signed) != 2) return 6; 14 | if(sizeof(int short) != 2) return 7; 15 | if(sizeof(int short signed) != 2) return 8; 16 | if(sizeof(short unsigned) != 2) return 9; 17 | if(sizeof(int short unsigned) != 2) return 10; 18 | if(sizeof(int) != 4) return 11; 19 | if(sizeof(signed) != 4) return 12; 20 | if(sizeof(int signed) != 4) return 13; 21 | if(sizeof(unsigned) != 4) return 14; 22 | if(sizeof(int unsigned) != 4) return 15; 23 | if(sizeof(long) != 8) return 16; 24 | if(sizeof(long signed) != 8) return 17; 25 | if(sizeof(int long) != 8) return 18; 26 | if(sizeof(int long signed) != 8) return 19; 27 | if(sizeof(long unsigned) != 8) return 20; 28 | if(sizeof(int long unsigned) != 8) return 21; 29 | if(sizeof(int *) != 8) return 22; 30 | 31 | int a = 1; 32 | if(sizeof a != 4) return 23; 33 | if(sizeof(a) != 4) return 24; 34 | 35 | if(sizeof 32 != 4) return 25; 36 | 37 | int b[3]; 38 | if(sizeof b != 12) return 26; 39 | if(sizeof b/sizeof b[0] != 3) return 34; 40 | 41 | struct C { 42 | int a_int_one; 43 | struct B { 44 | int b_int_one; 45 | long b_long; 46 | int b_int_two; 47 | } b_struct; 48 | int a_int_two, *a_ptr; 49 | } c; 50 | struct C q; 51 | if(sizeof q != 32) return 27; 52 | if(sizeof q.a_int_one != 4) return 28; 53 | if(sizeof q.b_struct != 16) return 29; 54 | 55 | typedef int T; 56 | if(sizeof(T) != 4) return 30; 57 | 58 | T d; 59 | if(sizeof(d) != 4) return 31; 60 | 61 | // verify struct declared in sizeof is visible outside 62 | sizeof(struct S {int a;}); 63 | struct S s; 64 | 65 | // verify argument of sizeof is not evaluated 66 | if(sizeof(f()) != 4) return 32; 67 | if(global != 0) return 33; 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /tests/feature_tests/storage.c: -------------------------------------------------------------------------------- 1 | extern int extern_var; 2 | extern int extern_var; 3 | 4 | extern void* stdout; 5 | 6 | int redef_func(int, int); 7 | int redef_func(int, int); 8 | 9 | extern int a; 10 | void set_a_to_1(void); 11 | 12 | static int b = 7; 13 | void set_b_to_1(void); 14 | 15 | static int intern_var; 16 | 17 | // should have no effect 18 | extern int intern_var; 19 | 20 | extern int extern_var_2; 21 | 22 | int tent_var; 23 | int tent_var; 24 | int tent_var_1; 25 | extern int tent_var_1; 26 | void set_tent_vars(void); 27 | 28 | int tent_var_2; 29 | int tent_var_2 = 10; 30 | 31 | int func(); 32 | 33 | int main() { 34 | auto char* p; 35 | p = stdout; 36 | if(*p + 124 != 0) return 1; 37 | 38 | if(extern_var != 0) return 2; 39 | extern_var = 18; 40 | if(extern_var != 18) return 3; 41 | 42 | { 43 | int extern_var; 44 | if(extern_var == 18) return 4; 45 | { 46 | extern int extern_var; 47 | if(extern_var != 18) return 5; 48 | } 49 | } 50 | 51 | { 52 | // will have internal linkage 53 | extern int intern_var; 54 | intern_var = 7; 55 | if(intern_var != 7) return 6; 56 | } 57 | { 58 | // has internal linkage also 59 | if(intern_var != 7) return 7; 60 | } 61 | 62 | if(extern_var_2 != 8) return 8; 63 | 64 | if(a != 3) return 9; 65 | set_a_to_1(); 66 | if(a != 1) return 10; 67 | 68 | if(b != 7) return 11; 69 | set_b_to_1(); 70 | if(b != 7) return 12; 71 | 72 | for(int i = 1; i < 10; i++) { 73 | if(func() != i) return 13; 74 | } 75 | 76 | if(tent_var) return 14; 77 | if(tent_var_1) return 15; 78 | set_tent_vars(); 79 | if(tent_var != 3) return 16; 80 | if(tent_var_1 != 3) return 17; 81 | if(tent_var_2 != 10) return 18; 82 | } 83 | 84 | int func() { 85 | static int a = 1; 86 | return a++; 87 | } 88 | -------------------------------------------------------------------------------- /tests/feature_tests/storage_helper.c: -------------------------------------------------------------------------------- 1 | int extern_var = 0; 2 | int extern_var_2 = 4 + 4; 3 | 4 | int tent_var; 5 | int tent_var_1; 6 | 7 | int a = 3; 8 | void set_a_to_1() { 9 | a = 1; 10 | } 11 | 12 | static int b = 4; 13 | void set_b_to_1() { 14 | b = 1; 15 | } 16 | 17 | void set_tent_vars() { 18 | tent_var = 3; 19 | tent_var_1 = 3; 20 | } 21 | -------------------------------------------------------------------------------- /tests/feature_tests/string.c: -------------------------------------------------------------------------------- 1 | int strcmp(char*, char*); 2 | 3 | int main() { 4 | if(strcmp("hello", "hello")) return 1; 5 | 6 | char (*a)[6] = &"hello"; 7 | if(strcmp("hello", *a)) return 2; 8 | 9 | if('a' != 97) return 3; 10 | if('f' - 'a' != 5) return 4; 11 | if('\'' != 39) return 5; 12 | if('"' != 34) return 6; 13 | if('\n' != 10) return 7; 14 | if('\\' != 92) return 8; 15 | if(' ' != 32) return 9; 16 | 17 | if('\0' != 0) return 10; 18 | if('\40' != 32) return 11; 19 | if('\100' != 64) return 12; 20 | if('\x9' != 9) return 13; 21 | if('\x67' != 103) return 14; 22 | if('\x7A' != 122) return 15; 23 | if('\x5a' != 90) return 16; 24 | if('\x00000021' != 33) return 17; 25 | 26 | if(strcmp("\x68\145\x6c\154\x6F", "hello")) return 18; 27 | if(strcmp("12\63", "123")) return 19; 28 | if(strcmp("\06123", "123")) return 20; 29 | if(strcmp("\578", "/8")) return 21; 30 | if(strcmp("\x2fg", "/g")) return 22; 31 | if(strcmp("\x", "\x")) return 23; 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /tests/feature_tests/struct.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | struct A *q; 3 | 4 | struct A { 5 | int a_int_one; 6 | struct B { 7 | int b_int_one; 8 | long b_long; 9 | int b_int_two; 10 | } b_struct; 11 | int a_int_two, *a_ptr; 12 | int; 13 | } a; 14 | 15 | q = &a; 16 | void* p1 = q + 1; 17 | char* p2 = p1; 18 | 19 | // this is a hacky test to check sizeof(struct A) 20 | void* p3 = p2 - 8*4; 21 | 22 | if(p3 != q) return 1; 23 | 24 | struct {}; 25 | struct I {} b; 26 | if(&b != (&b + 1)) return 2; 27 | 28 | ////////////////////////// 29 | 30 | a.a_int_one = 10; 31 | if(a.a_int_one != 10) return 3; 32 | a.a_ptr = &a.a_int_one; 33 | *a.a_ptr = 20; 34 | if(a.a_int_one != 20) return 4; 35 | 36 | q = &a; 37 | (*q).a_int_two = 15; 38 | if(a.a_int_two != 15) return 5; 39 | if(q->a_int_two != 15) return 11; 40 | 41 | p1 = q; 42 | p3 = &a.a_int_one; 43 | if(p1 != p3) return 6; 44 | 45 | a.b_struct.b_long = 10; 46 | if(a.b_struct.b_long != 10) return 7; 47 | if((*(&a.b_struct)).b_long != 10) return 8; 48 | if((&a.b_struct)->b_long != 10) return 12; 49 | 50 | long* p_val = &a.b_struct.b_long; 51 | if(*p_val != 10) return 9; 52 | 53 | *p_val = 20; 54 | if(a.b_struct.b_long != 20) return 10; 55 | 56 | struct A array[10]; 57 | array[3].b_struct.b_int_one = 3; 58 | if(array[3].b_struct.b_int_one != 3) return 13; 59 | if((&array[0] + 3)->b_struct.b_int_one != 3) return 14; 60 | 61 | 62 | // Check with array members 63 | struct F { 64 | int array[10]; 65 | }; 66 | 67 | struct F array2[10]; 68 | array2[5].array[5] = 3; 69 | if(array2[5].array[5] != 3) return 15; 70 | 71 | // Check anonymous struct 72 | struct { 73 | int a; 74 | } s; 75 | s.a = 3; 76 | if(s.a != 3) return 16; 77 | 78 | 79 | // Check with union members 80 | struct C { 81 | int c_int; 82 | union D { 83 | int d_int; 84 | long d_long; 85 | } nested_union_d; 86 | union E { 87 | int e_int; 88 | } nested_union_e; 89 | }; 90 | } 91 | -------------------------------------------------------------------------------- /tests/feature_tests/subtraction.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 10, b = 5; 3 | if(a - b != 5) return 1; 4 | 5 | int c = a - b; 6 | if(c != 5) return 2; 7 | 8 | int d = a - 5; 9 | if(d != 5) return 3; 10 | 11 | // Test associativity of subtraction 12 | if(3 - 4 - 5 == 3 - (4 - 5)) return 4; 13 | if(3 - 4 - 5 != (3 - 4) - 5) return 5; 14 | 15 | // Test imm64 cases 16 | 17 | // used to modify variable liveliness 18 | int dummy; 19 | dummy = 0; 20 | 21 | // this variable is always live 22 | long never_dead; 23 | never_dead = 1099511627776; 24 | 25 | long j = 1099511627776; 26 | never_dead = j - 1099511627776; 27 | if(never_dead != 1099511627776 - 1099511627776) return 7; 28 | 29 | long k = 1099511627776; 30 | never_dead = 1099511627776 - k; 31 | if(never_dead != 1099511627776 - 1099511627776) return 8; 32 | 33 | long not_dead = 1099511627776; 34 | never_dead = not_dead - 1099511627776; 35 | if(never_dead != 1099511627776 - 1099511627776) return 9; 36 | 37 | never_dead = 1099511627776 - not_dead; 38 | if(never_dead != 1099511627776 - 1099511627776) return 10; 39 | if(1099511627776 - 1099511627776 != never_dead) return 11; 40 | 41 | dummy = dummy - never_dead - not_dead; 42 | } 43 | -------------------------------------------------------------------------------- /tests/feature_tests/type_qualifier.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | const int a = 4; 3 | 4 | const struct A { 5 | int a; 6 | int b; 7 | } X; 8 | 9 | struct B { 10 | const int a; 11 | int b; 12 | } Y; 13 | Y.b = 4; 14 | } 15 | -------------------------------------------------------------------------------- /tests/feature_tests/typedef.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | typedef int a; 3 | typedef int a; 4 | 5 | typedef struct S struct_S; 6 | typedef struct S { 7 | int a; 8 | int b; 9 | } struct_S; 10 | 11 | typedef struct S struct_S1; 12 | // verify a struct is compatible with itself 13 | struct_S *s; 14 | struct_S1 *s1; 15 | s = s1; 16 | 17 | typedef int A; 18 | { 19 | A A; 20 | A = 3; 21 | if(A != 3) return 1; 22 | } 23 | 24 | typedef A* B, C; 25 | C c; 26 | B b = &c; 27 | c = 3; 28 | if(*b != 3) return 2; 29 | } 30 | 31 | typedef int a; 32 | int function(a b, int a) { 33 | return a; 34 | } 35 | -------------------------------------------------------------------------------- /tests/feature_tests/unary_arithmetic.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int n = 5; 3 | unsigned char c = ' '; 4 | 5 | // test unary minus 6 | if(-n != 0-5) return 1; 7 | if(0-n != -5) return 2; 8 | if(-(n+2) != -7) return 3; 9 | if(-(-n) != 5) return 4; 10 | 11 | // test unary plus 12 | if(+n != 5) return 5; 13 | if(n != +5) return 6; 14 | if(+(n-2) != 3) return 7; 15 | if(+(-n) != -5) return 8; 16 | 17 | // test bitwise complement 18 | if(~0 != -1) return 9; 19 | if(~n != -6) return 10; 20 | if(-n-1 != ~5) return 11; 21 | if(~(n+2) != ~7) return 12; 22 | if(~(~n) != 5) return 13; 23 | 24 | // test type promotion 25 | if(-c != -32) return 14; 26 | if(~c != -33) return 15; 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /tests/feature_tests/union.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | union A *q; 3 | 4 | union A { 5 | int a_int_1; 6 | int a_int_2; 7 | int *a_ptr; 8 | union B { 9 | int b_int_one; 10 | long b_long; 11 | } nested_union; 12 | struct C { 13 | int c_int; 14 | long c_long; 15 | } nested_struct; 16 | long a_long_1; 17 | } a; 18 | 19 | q = &a; 20 | void* p1 = q + 1; 21 | char* p2 = p1; 22 | void* p3 = p2 - 4+4+8+8+4+8+8; 23 | 24 | union {}; 25 | union I {} b; 26 | if(&b != (&b + 1)) return 2; 27 | 28 | a.a_int_1 = 10; 29 | if(a.a_int_1 != 10) return 3; 30 | a.a_ptr = &a.a_int_1; 31 | *a.a_ptr = 20; 32 | if(a.a_int_1 != 20) return 4; 33 | 34 | q = &a; 35 | (*q).a_int_2 = 15; 36 | if(a.a_int_2 != 15) return 5; 37 | if(q->a_int_2 != 15) return 11; 38 | 39 | p1 = q; 40 | p3 = &a.a_int_1; 41 | if(p1 != p3) return 6; 42 | 43 | a.nested_union.b_long = 10; 44 | if(a.nested_union.b_long != 10) return 7; 45 | if((*(&a.nested_union)).b_long != 10) return 8; 46 | if((&a.nested_union)->b_long != 10) return 12; 47 | 48 | long* p_val = &a.nested_union.b_long; 49 | if(*p_val != 10) return 9; 50 | 51 | *p_val = 20; 52 | if(a.nested_union.b_long != 20) return 10; 53 | 54 | union A array[10]; 55 | array[3].nested_union.b_int_one = 3; 56 | if(array[3].nested_union.b_int_one != 3) return 13; 57 | if((&array[0] + 3)->nested_union.b_int_one != 3) return 14; 58 | 59 | union D { 60 | unsigned short a; 61 | unsigned int b; 62 | } u; 63 | u.b = 4294967295; 64 | if(u.a != 65535) return 15; 65 | } 66 | -------------------------------------------------------------------------------- /tests/feature_tests/while.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | int a = 0; 3 | while(a != 5) a = a + 1; 4 | 5 | if(a != 5) return 1; 6 | 7 | int b = 0; 8 | a = 0; 9 | while(a != 5) { 10 | b = b + a; 11 | a = a + 1; 12 | } 13 | 14 | if(a != 5) return 2; 15 | if(b != 10) return 3; 16 | 17 | // While statement never runs 18 | while(b == 100) return 4; 19 | 20 | // While statement runs once 21 | int num_times_run = 0; 22 | while(b == 10) { 23 | b = b + 1; 24 | num_times_run = num_times_run + 1; 25 | 26 | if(num_times_run != 1) { 27 | return 5; 28 | } 29 | } 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /tests/frontend_tests/empty_statement.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | ;;;;; 3 | while(0); 4 | for(;0;); 5 | } 6 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_bad_identifier.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: unrecognized token at '1identifier' 3 | int 1identifier; 4 | } 5 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_declaration.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: mismatched parentheses in declaration at '(' 3 | int (var; 4 | } 5 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_empty_struct.c: -------------------------------------------------------------------------------- 1 | // error: expected identifier or member list after 'struct' 2 | struct; 3 | 4 | int main() { 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_end_binop.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expected ';' after '5' 3 | return 3 + 5 4 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_if_close_paren.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expected ')' after 'a' 3 | if(a return 0; 4 | } 5 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_if_no_condition.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expected expression, got ')' 3 | if() return 3; 4 | } 5 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_if_open_paren.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | // error: expected '(' after 'if' 3 | if 10) return 3; 4 | } 5 | -------------------------------------------------------------------------------- /tests/frontend_tests/error_lexer.c: -------------------------------------------------------------------------------- 1 | // error: expected "FILENAME" or after include directive 2 | #include 3 | // error: expected "FILENAME" or after include directive 4 | #include blah 5 | // error: missing terminating character for include filename 6 | #include 8 | #include 9 | 10 | // No arguments 11 | // Always returns 0 12 | // Takes program as input from stdin, outputs the input program with line nums 13 | // Does not modify external state 14 | 15 | int main() 16 | { 17 | int c = 0; // current character 18 | int line_num = 0; // line number 19 | 20 | int p = 0; // last character 21 | int in_str = 0; // if we are in a string 22 | int in_char = 0; // if we are in a char string 23 | int in_cpp_comment = 0; // if we are in C++ comment 24 | int in_c_comment = 0; // if we are in a C comment 25 | int is_line = 0; // if current line is really a line 26 | int p_is_line = 0; // if it was a line character before 27 | int escaped = 0; // if the next character is to be escaped 28 | 29 | int p_else = 0; // if it was a line character before the else 30 | int else_state = 0; // counter to keep track of else letters 31 | 32 | while( (c = getchar()) != (-1) ) { 33 | // Check for line splices 34 | int next = getchar(); 35 | if( (c == '\\') && (next == '\n') ) { 36 | putchar(c); 37 | putchar(next); 38 | } 39 | else { 40 | ungetc(next,stdin); // return next to stdin 41 | 42 | int ending_comment = 0; // if this character is used to end comment 43 | 44 | // check for C comment start 45 | if((p == '/') && (c == '*') 46 | && !in_str && !in_char && !in_cpp_comment ) { 47 | in_c_comment = 1; 48 | is_line = p_is_line; 49 | } 50 | 51 | // check for C comment end 52 | else if((p == '*') && (c == '/') ) { 53 | in_c_comment = 0; 54 | ending_comment = 1; 55 | } 56 | 57 | // check for C++ comment start 58 | else if((p == '/') && (c == '/') 59 | && !in_str && !in_char && !in_c_comment ) { 60 | in_cpp_comment = 1; 61 | is_line = p_is_line; 62 | } 63 | 64 | // check for string start 65 | else if((c == '"') 66 | && !in_char && !in_c_comment && !in_cpp_comment && !escaped) 67 | in_str = !in_str; 68 | 69 | // check for char string start 70 | else if((c == '\'') 71 | && !in_str && !in_c_comment && !in_cpp_comment && !escaped) 72 | in_char = !in_char; 73 | 74 | // check if the next char is escaped 75 | if(c == '\\') escaped = !escaped; 76 | else escaped = 0; // if not, the next character is not escaped 77 | 78 | p_is_line = is_line; 79 | p = c; 80 | 81 | // update is_line if this is a line of code 82 | if((c != '{') && (c != '}') && !isspace(c) 83 | && !in_c_comment && !in_cpp_comment 84 | && !ending_comment) 85 | is_line = 1; 86 | 87 | // deal with else 88 | if(c == 'e' && else_state == 0 && !in_c_comment && !in_cpp_comment) { 89 | p_else = p_is_line; 90 | else_state++; 91 | } 92 | else if(c == 'l' && else_state == 1) else_state++; 93 | else if(c == 's' && else_state == 2) else_state++; 94 | else if(c == 'e' && else_state == 3) { 95 | is_line = p_else; 96 | else_state = 0; 97 | } 98 | else else_state = 0; 99 | 100 | // if it's a newline that counts, print/reset stuff 101 | if(c == '\n' && is_line && !in_c_comment) { 102 | printf(" //%d", ++line_num); 103 | is_line = 0; 104 | // NO in_c_comment! do not reset in_c_comment 105 | p_is_line = 0; 106 | is_line = 0; 107 | p_else = 0; 108 | } 109 | // if it's a newline at all, reset other stuff too 110 | if(c == '\n') in_cpp_comment = 0; 111 | 112 | putchar(c); 113 | } 114 | } 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /tests/general_tests/count/input.c: -------------------------------------------------------------------------------- 1 | THIS IS THE TESTING INPUT FILE FOR THE COUNT.C PROGRAM. IT HAPPENS 2 | TO BE COUNT.C ITSELF, FOR CONVENINCE. HOWEVER, SOME SMALL CHANGES 3 | WERE MADE TO MAKE THIS A BETTER TEST. 4 | 5 | TEST 6 | TEST 7 | TEST 8 | 9 | /******************************************************************************* 10 | * Count.c * 11 | * Shivam Sarodia - Yale University * 12 | * Stanley Eisenstat, CS 223 * 13 | *******************************************************************************/ 14 | 15 | #include 16 | #include 17 | 18 | // No arguments 19 | // Always returns 0 20 | // Takes program as input from stdin, outputs the input program with line nums 21 | // Does not modify external state 22 | 23 | int main() 24 | { 25 | int c = 0; // current character 26 | int line_num = 0; // line number 27 | 28 | int p = 0; // last character 29 | int in_str = 0; // if we are in a string 30 | int in_char = 0; // if we are in a char string 31 | int in_cpp_comment = 0; // if we are in C++ comment 32 | int in_c_comment = 0; // if we are in a C comment 33 | int is_line = 0; // if current line is really a line 34 | int p_is_line = 0; // if it was a line character before 35 | int escaped = 0; // if the next character is to be escaped 36 | 37 | int p_else = 0; // if it was a line character before the else 38 | int else_state = 0; // counter to keep track of else letters 39 | 40 | while( (c = getchar()) != (-1) ) { 41 | // Check for line splices 42 | int next = getchar(); 43 | if( (c == '\\') && (next == '\n') ) { 44 | putchar(c); 45 | putchar(next); 46 | } 47 | else { 48 | ungetc(next,stdin); // return next to stdin 49 | 50 | int ending_comment = 0; // if this character is used to end comment 51 | 52 | // check for C comment start 53 | if((p == '/') && (c == '*') 54 | && !in_str && !in_char && !in_cpp\ 55 | _comment ) { 56 | in_c_comment = 1; 57 | is_line = p_is_line; 58 | } 59 | 60 | // check for C comment end 61 | else if((p == '*') && (c == '/') ) { 62 | in_c_comment = 0; 63 | ending_comment = 1; 64 | } 65 | 66 | // check for C++ comment start 67 | else if((p == '/') && (c == '/') 68 | && !in_str && !in_char && !in_c_comment ) { 69 | in_cpp_comment = 1; 70 | is_line = p_is_line; 71 | } 72 | 73 | // check for string start 74 | else if((c == '"') 75 | && !in_char && !in_c_comment && !in_cpp_comment && !escaped) 76 | in_str = !in_str; 77 | 78 | // check for char string start 79 | else if((c == '\'') 80 | && !in_str && !in_c_comment && !in_cpp_comment && !escaped) 81 | in_char = !in_char; 82 | 83 | // check if the next char is escaped 84 | if(c == '\\') escaped = !escaped; 85 | else escaped = 0; // if not, the next character is not escaped 86 | 87 | p_is_line = is_line; 88 | p = c; 89 | 90 | // update is_line if this is a line of code 91 | if((c != '{') && (c != '}') && !isspace(c) 92 | && !in_c_comment && !in_cpp_comment 93 | && !ending_comment) 94 | is_line = 1; 95 | 96 | // deal with else 97 | if(c == 'e' && else_state == 0 && !in_c_comment && !in_cpp_comment) { 98 | p_else = p_is_line; 99 | else_state++; 100 | } 101 | else if(c == 'l' && else_state == 1) else_state++; 102 | else if(c == 's' && else_state == 2) else_state++; 103 | else if(c == 'e' && else_state == 3) { 104 | is_line = p_else; 105 | else_state = 0; 106 | } 107 | else else_state = 0; 108 | 109 | // if it's a newline that counts, print/reset stuff 110 | if(c == '\n' && is_line && !in_c_comment) { 111 | printf(" //%d", ++line_num); 112 | is_line = 0; 113 | // NO in_c_comment! do not reset in_c_comment 114 | p_is_line = 0; 115 | is_line = 0; 116 | p_else = 0; 117 | } else { 118 | 119 | } 120 | // if it's a newline at all, reset other stuff too 121 | if(c == '\n') in_cpp_comment = 0;\ 122 | 123 | putchar(c); 124 | }\ 125 | } 126 | 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /tests/general_tests/pi/pi.c: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | 3 | pi.c 4 | 5 | An obfuscated C program to print the first several digits of pi. 6 | 7 | Source: https://cs.uwaterloo.ca/~alopez-o/math-faq/mathtext/node12.html 8 | 9 | ******************************************************************************/ 10 | 11 | #include 12 | 13 | int main(){ 14 | int a = 10000, b = 0, c=2800, d = 0, e = 0, f[2801], g = 0; 15 | for(; b-c;) f[b++]=a/5; 16 | for(; d=0, g=c*2; c-=14, printf("%.4d",e+d/a), e = d%a) 17 | for(b=c; d+=f[b]*a, f[b]=d%--g, d/=g-- ,--b; d*=b); 18 | printf("\n"); 19 | } 20 | -------------------------------------------------------------------------------- /tests/general_tests/trie/trie.c: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | 3 | An implementation of a trie, loosely based off the CS50 pset 5 4 | (https://docs.cs50.net/2017/x/psets/5/pset5.html). 5 | 6 | ********************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | typedef struct node { 15 | struct node* next[27]; 16 | int complete; 17 | } Node; 18 | 19 | // Load words from the given dictionary into a trie. 20 | Node* load(const char* dictionary); 21 | 22 | // Check whether a word is in the given trie. 23 | bool check(Node* root, const char* word); 24 | 25 | int main() { 26 | Node* trie = load("tests/general_tests/trie/words.txt"); 27 | 28 | int NUM_WORDS = 10; 29 | char* words[10]; 30 | words[0] = "doctor"; 31 | words[1] = "they're"; 32 | words[2] = "many"; 33 | words[3] = "market"; 34 | words[4] = "populate"; 35 | words[5] = "proper"; 36 | words[6] = "motion"; 37 | words[7] = "notaword"; 38 | words[8] = "notawordeither"; 39 | words[9] = "notawordeithereither"; 40 | 41 | for(int word_num = 0; word_num < NUM_WORDS; word_num++) { 42 | char* word = words[word_num]; 43 | if(check(trie, word)) { 44 | printf("found word %s\n", word); 45 | } else { 46 | printf("cannot find word %s\n", word); 47 | } 48 | } 49 | } 50 | 51 | Node* load(const char* dictionary) { 52 | Node* root = malloc(sizeof(Node)); 53 | for(int i = 0; i < 27; i++) root->next[i] = 0; 54 | root->complete = 1; 55 | 56 | FILE* f = fopen(dictionary, "r"); 57 | 58 | Node** n = &root; 59 | char c; 60 | 61 | while((c = fgetc(f)) + 1 != 0) { 62 | if(c == '\n') { 63 | (*n)->complete = 1; 64 | n = &root; 65 | } 66 | else { 67 | if(c == '\'') n = &((*n)->next[26]); 68 | else n = &((*n)->next[c - 'a']); 69 | 70 | if(!(*n)) { 71 | *n = malloc(sizeof(Node)); 72 | (*n)->complete = 0; 73 | for(int i = 0; i < 27; i++) (*n)->next[i] = 0; 74 | } 75 | } 76 | } 77 | 78 | // finish processing the current word if needed 79 | if(!(*n)->complete) { 80 | (*n)->complete = 1; 81 | n = &root; 82 | } 83 | 84 | fclose(f); 85 | 86 | return root; 87 | } 88 | 89 | bool check(Node* root, const char* word) { 90 | Node* n = root; 91 | for(int i = 0, len = strlen(word); i < len; i++) { 92 | if(word[i] == '\'') n = n->next[26]; 93 | else n = n->next[tolower(word[i]) - 'a']; 94 | 95 | if(!n) break; 96 | } 97 | 98 | return n && n->complete; 99 | } 100 | -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | """Integration test driver for the compiler. 2 | 3 | This module defines metaclasses which generate test cases from files on disk, 4 | and a test class based off that metaclass. For each file that matches 5 | "tests/feature_tests/*.c", a feature test function is generated, and for 6 | each file that matches "tests/frontend_tests/*.c", a frontend test function 7 | is generated. 8 | 9 | If a file name ends in "_helper.c", a test function is not generated for 10 | that file, but that file is linked into another test. For example, 11 | "function_helper.c" is linked into the test for "function.c". 12 | 13 | If the C file contains a line of the form: 14 | 15 | // Return: ### 16 | 17 | Then, the test expects the main() in that test file to return the value 18 | "###". If no such line exists, the default expected return value is 0. 19 | 20 | If the C file contains line(s) of the form: 21 | 22 | // error: ____ 23 | // warning: ____ 24 | 25 | Then, the test expects compilation to raise an error or warning on the 26 | following line whose message is the string "____". 27 | """ 28 | 29 | import glob 30 | import pathlib 31 | import subprocess 32 | import unittest 33 | 34 | import shivyc.main 35 | from shivyc.errors import error_collector 36 | 37 | 38 | def compile_with_shivyc(test_file_names): 39 | """Compile given file with ShivyC. 40 | 41 | Errors are saved in the error collector. 42 | 43 | """ 44 | # Mock out arguments to ShivyC call 45 | class MockArguments: 46 | files = test_file_names 47 | show_reg_alloc_perf = False 48 | variables_on_stack = False 49 | 50 | shivyc.main.get_arguments = lambda: MockArguments() 51 | 52 | # Mock out error collector functions 53 | error_collector.show = lambda: True 54 | 55 | shivyc.main.main() 56 | 57 | 58 | def _read_params(test_file_name): 59 | """Return expected errors, warnings, and return value for test file.""" 60 | 61 | with open(test_file_name) as f: 62 | exp_ret_val = 0 63 | exp_errors = [] 64 | exp_warnings = [] 65 | 66 | for index, line in enumerate(f.readlines()): 67 | ret_mark = "// Return:" 68 | error_mark = "// error:" 69 | warning_mark = "// warning:" 70 | 71 | if line.strip().startswith(ret_mark): 72 | exp_ret_val = int(line.split(ret_mark)[-1]) 73 | elif line.strip().startswith(error_mark): 74 | error_text = line.split(error_mark)[-1].strip() 75 | exp_errors.append((error_text, index + 2)) 76 | elif line.strip().startswith(warning_mark): 77 | warning_text = line.split(warning_mark)[-1].strip() 78 | exp_warnings.append((warning_text, index + 2)) 79 | 80 | return exp_errors, exp_warnings, exp_ret_val 81 | 82 | 83 | def generate_test(test_file_name, helper_name): 84 | """Return a function that tests given file.""" 85 | 86 | def test_function(self): 87 | exp_errors, exp_warnings, exp_ret_val = _read_params(test_file_name) 88 | 89 | if helper_name: 90 | files = [test_file_name, helper_name] 91 | else: 92 | files = [test_file_name] 93 | compile_with_shivyc(files) 94 | 95 | act_errors = [] 96 | act_warnings = [] 97 | 98 | for issue in error_collector.issues: 99 | issue_list = act_warnings if issue.warning else act_errors 100 | issue_list.append((issue.descrip, issue.range.start.line)) 101 | 102 | self.assertListEqual(act_errors, exp_errors) 103 | self.assertListEqual(act_warnings, exp_warnings) 104 | 105 | if not act_errors: 106 | self.assertEqual(subprocess.call(["./out"]), exp_ret_val) 107 | 108 | return test_function 109 | 110 | 111 | def new(glob_str, dct): 112 | """The implementation of __new__ used for generating tests.""" 113 | test_file_names = glob.glob(glob_str) 114 | for test_file_name in test_file_names: 115 | short_name = test_file_name.split("/")[-1][:-2] 116 | test_func_name = "test_" + short_name 117 | 118 | if not short_name.endswith("_helper"): 119 | helper_name = test_file_name.replace(".c", "_helper.c") 120 | if helper_name not in test_file_names: 121 | helper_name = None 122 | 123 | dct[test_func_name] = generate_test(test_file_name, helper_name) 124 | 125 | 126 | class TestUtils(unittest.TestCase): 127 | """Helper base class for all unit tests.""" 128 | 129 | def setUp(self): 130 | """Clear error collector before each test.""" 131 | error_collector.clear() 132 | 133 | 134 | class MetaFrontendTests(type): 135 | """Metaclass for creating frontend tests.""" 136 | 137 | def __new__(meta, name, bases, dct): 138 | """Create FrontendTests class.""" 139 | new("tests/frontend_tests/*.c", dct) 140 | return super().__new__(meta, name, bases, dct) 141 | 142 | 143 | class FrontendTests(TestUtils, metaclass=MetaFrontendTests): 144 | """Frontend tests that test the lexer, preprocessor, and parser.""" 145 | 146 | pass 147 | 148 | 149 | class MetaFeatureTests(type): 150 | """Metaclass for creating feature tests.""" 151 | 152 | def __new__(meta, name, bases, dct): 153 | """Create FeatureTests class.""" 154 | new("tests/feature_tests/*.c", dct) 155 | done_class = super().__new__(meta, name, bases, dct) 156 | return done_class 157 | 158 | 159 | class FeatureTests(TestUtils, metaclass=MetaFeatureTests): 160 | """Frontend tests that test the lexer, preprocessor, and parser.""" 161 | 162 | pass 163 | 164 | 165 | class IntegrationTests(TestUtils): 166 | """Integration tests for the compiler. 167 | 168 | These test the programs found in general_tests/* for proper functionality. 169 | """ 170 | 171 | def io_test(self, rel_dir, cfile, stdin): 172 | """Run a general I/O test. 173 | 174 | Args: 175 | name (str): Name of this test 176 | rel_dir (str): Directory for the test 177 | cfile (str): The .c file to compile and run 178 | stdin (str): The file to pipe into stdin of the executable, or None 179 | """ 180 | dir = str(pathlib.Path(__file__).parent.joinpath(rel_dir)) 181 | 182 | # Remove leftover files from last test 183 | rm = "rm -f {0}/gcc_out {0}/out {0}/shivyc_output {0}/gcc_output" 184 | subprocess.run(rm.format(dir), shell=True, check=True) 185 | 186 | # Compile with ShivyC 187 | compile_with_shivyc([str(pathlib.Path(dir).joinpath(cfile))]) 188 | self.assertEqual(error_collector.issues, []) 189 | 190 | # Compile with gcc 191 | gcc_compile = f"gcc -std=c11 {dir}/{cfile} -o gcc_out" 192 | subprocess.run(gcc_compile, shell=True, check=True) 193 | 194 | # Run ShivyC executable on sample input 195 | if stdin: 196 | shivyc_run = f"./out < {dir}/input.c > {dir}/shivyc_output" 197 | gcc_run = f"./gcc_out < {dir}/input.c > {dir}/gcc_output" 198 | else: 199 | shivyc_run = f"./out > {dir}/shivyc_output" 200 | gcc_run = f"./gcc_out > {dir}/gcc_output" 201 | 202 | subprocess.run(shivyc_run, shell=True, check=True) 203 | subprocess.run(gcc_run, shell=True, check=True) 204 | 205 | # Diff the two output files 206 | diff = f"diff {dir}/gcc_output {dir}/shivyc_output" 207 | subprocess.run(diff, shell=True, check=True) 208 | 209 | def test_count(self): 210 | """Test the Count.c program from the first pset of CPSC 223 at Yale.""" 211 | 212 | self.io_test("general_tests/count", "Count.c", "input.c") 213 | 214 | def test_pi(self): 215 | """Test the pi.c program.""" 216 | 217 | self.io_test("general_tests/pi", "pi.c", None) 218 | 219 | def test_trie(self): 220 | """Test the trie.c program.""" 221 | 222 | self.io_test("general_tests/trie", "trie.c", None) 223 | -------------------------------------------------------------------------------- /tools/compile_asm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Assembles and links the given assembly file into an executable on the provided Docker 4 | # machine. Useful for testing assembly programs. 5 | 6 | # usage: compile_asm asm_file.s 7 | # creates a binary file named `out` 8 | 9 | as -64 -o out.o $1 10 | ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o -lc out.o /usr/lib/x86_64-linux-gnu/crtn.o -o out 11 | -------------------------------------------------------------------------------- /tools/make_asm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compiles the given .c file into a .s file. Useful for examining how 4 | # GCC compiles a given example code. 5 | 6 | # usage: make_asm c_file.c 7 | # creates a .s file with the same name as the given .c file 8 | 9 | gcc --std=c11 -Wall -pedantic -S -masm=intel -fno-asynchronous-unwind-tables -fno-stack-protector $1 10 | -------------------------------------------------------------------------------- /tools/release: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Used by maintainer to release updated versions of ShivyC. 4 | 5 | read -p "Have you updated version in setup.py and created a new release on GitHub? " -n 1 -r 6 | echo # (optional) move to a new line 7 | if [[ $REPLY =~ ^[Yy]$ ]] 8 | then 9 | rm -rf dist 10 | python3 setup.py sdist 11 | python3 setup.py bdist_wheel 12 | twine upload dist/* 13 | fi 14 | --------------------------------------------------------------------------------