├── include
    ├── util.h
    ├── runtime.h
    ├── parser.h
    ├── ast.h
    ├── lexer.h
    ├── table.h
    └── codegen.h
├── .gitignore
├── src
    ├── util
    │   ├── util.c
    │   ├── compiletarget.c
    │   ├── table.c
    │   └── disassembler.c
    ├── main.c
    ├── back
    │   ├── codegen.c
    │   ├── runtime.c
    │   ├── expression.c
    │   └── keyword.c
    └── front
    │   ├── lexer.c
    │   ├── parser.c
    │   └── keyword_parser.c
├── docs
    ├── docs.md
    ├── list.md
    ├── differences.md
    ├── node_info.md
    ├── review.md
    ├── codegen_theory.md
    ├── grammar.ebnf
    └── parsing_theory.md
├── README.md
├── Makefile
├── additional
    ├── mosbasic.vim
    └── test.bas
└── LICENSE


/include/util.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef UTIL_H
 7 | #define UTIL_H
 8 | 
 9 | void string_uppercase(char* str);
10 | void raise_error();
11 | void check_for_error();		/* This will exit whole program */
12 | char* read_file(const char* filename);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Object files
 5 | *.o
 6 | *.ko
 7 | *.obj
 8 | *.elf
 9 | 
10 | # Linker output
11 | *.ilk
12 | *.map
13 | *.exp
14 | 
15 | # Precompiled Headers
16 | *.gch
17 | *.pch
18 | 
19 | # Libraries
20 | *.lib
21 | *.a
22 | *.la
23 | *.lo
24 | 
25 | # Shared objects (inc. Windows DLLs)
26 | *.dll
27 | *.so
28 | *.so.*
29 | *.dylib
30 | 
31 | # Executables
32 | *.exe
33 | *.out
34 | *.app
35 | *.i*86
36 | *.x86_64
37 | *.hex
38 | 
39 | # Debug files
40 | *.dSYM/
41 | *.su
42 | *.idb
43 | *.pdb
44 | 
45 | # Kernel Module Compile Results
46 | *.mod*
47 | *.cmd
48 | .tmp_versions/
49 | modules.order
50 | Module.symvers
51 | Mkfile.old
52 | dkms.conf
53 | 


--------------------------------------------------------------------------------
/src/util/util.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | /* Standard library includes */
 7 | #include <ctype.h>
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include <stdbool.h>
11 | 
12 | /* Custom includes */
13 | #include <util.h>
14 | 
15 | bool had_error = false;
16 | 
17 | void string_uppercase(char* str)
18 | {
19 | 	while (*str) {
20 | 		*str = toupper(*str);
21 | 		str++;
22 | 	}
23 | }
24 | 
25 | void raise_error()
26 | {
27 | 	had_error = true;
28 | }
29 | 
30 | void check_for_error()
31 | {
32 | 	if (had_error) {
33 | 		printf("%c[33mCompilation terminated due to error(s)!%c[0m\n",
34 | 			0x1B, 0x1B);
35 | 		exit(-1);
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/include/runtime.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef RUNTIME_H
 7 | #define RUNTIME_H
 8 | 
 9 | /* Custom includes */
10 | #include <table.h>
11 | #include <codegen.h>
12 | 
13 | /* Setup stack frame */
14 | void make_entry(CompileTarget* code, StringTable* str);
15 | 
16 | /* Exit program */
17 | void make_exit(CompileTarget* code);
18 | 
19 | /* Handle division by zero */
20 | void zero_divide_handler(CompileTarget* code);
21 | 
22 | /* SI = SI + DI (original strings are left unmodified) */
23 | void add_strings(CompileTarget* code);
24 | 
25 | /* Print SI, using WORKPAGE and INK */
26 | void print_string(CompileTarget* code);
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/docs/docs.md:
--------------------------------------------------------------------------------
 1 | # "Documentation"
 2 | 
 3 | Or rather bizarre list of random files roughly describing what was happening
 4 | throughout the development. Enjoy!
 5 | 
 6 | ### VERY IMPORTANT THINGS:
 7 | 
 8 | - [Code review](review.md) - Review of code structure.
 9 | - [Differences](differences.md) - List of all differences between MikeOS'
10 | original implementation and this one.
11 | 
12 | ### Compiler:
13 | 
14 | - ["Codegen Theory"](codegen_theory.md) - Some explanation of how code generator
15 | works.
16 | 
17 | ### Parser:
18 | 
19 | - [Grammar](grammar.ebnf) - Something trying to be an EBNF grammar.
20 | - ["Parsing Theory"](parsing_theory.md) - Some explanation of how parser works.
21 | 
22 | ### Least useful:
23 | 
24 | - [Keyword list](list.md) - List of keywords for MikeOS.
25 | - [Node info](node_info.md) - Basic info about how nodes are constructed.
26 | 


--------------------------------------------------------------------------------
/docs/list.md:
--------------------------------------------------------------------------------
 1 | # List of all keywords for MikeOS Basic
 2 | 
 3 | - ALERT
 4 | - AND
 5 | - ASKFILE
 6 | - BREAK
 7 | - CALL
 8 | - CASE
 9 | - CHR
10 | - CLS
11 | - CURSOR
12 | - CURSCHAR
13 | - CURSCOL
14 | - CURSPOS
15 | - DELETE
16 | - DO
17 | - ELSE
18 | - END
19 | - ENDLESS
20 | - FILES
21 | - FOR
22 | - GET
23 | - GOSUB
24 | - GOTO
25 | - GETKEY
26 | - HEX
27 | - IF
28 | - IN
29 | - INCLUDE
30 | - INK
31 | - INPUT
32 | - LEN
33 | - LISTBOX
34 | - LOAD
35 | - LOOP
36 | - LOWER
37 | - MOVE
38 | - NEXT
39 | - NUMBER
40 | - OFF
41 | - ON
42 | - OUT
43 | - PAGE
44 | - PAUSE
45 | - PEEK
46 | - PEEKINT
47 | - POKE
48 | - POKEINT
49 | - PORT
50 | - PRINT
51 | - PROGSTART
52 | - RAMSTART
53 | - RAND
54 | - READ
55 | - REC
56 | - REM
57 | - RENAME
58 | - RETURN
59 | - SAVE
60 | - SEND
61 | - SERIAL
62 | - SET
63 | - SIZE
64 | - SOUND
65 | - STRING
66 | - THEN
67 | - TIMER
68 | - TO
69 | - UNTIL
70 | - UPPER
71 | - VARIABLES
72 | - VERSION
73 | - WAITKEY
74 | - WHILE
75 | 


--------------------------------------------------------------------------------
/include/parser.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef PARSER_H
 7 | #define PARSER_H
 8 | 
 9 | /* Standard library includes */
10 | #include <stdbool.h>
11 | 
12 | /* Custom includes */
13 | #include <ast.h>
14 | #include <lexer.h>
15 | #include <table.h>
16 | 
17 | /* Note: Those are internal functions, unused outside of the parser */
18 | void parse_error(const char* msg);
19 | bool match(TokenType t);
20 | Token scan();
21 | Node* init_node(NodeType t, Token token, int v, Node* op1, Node* op2);
22 | void synchronize();
23 | Node* literal();
24 | Node* variable();
25 | Node* string();
26 | Node* numeric();
27 | Node* primary();
28 | Node* expr();
29 | Node* comparison();
30 | Node* boolean_expr();
31 | Node* assign();
32 | Node* if_stmt();
33 | Node* do_stmt();
34 | Node* for_stmt();
35 | Node* parse_keyword();
36 | Node* statement();
37 | 
38 | /* Here are proper functions: one for parsing, two for node managment */
39 | Node* parse(SymbolTable* t, StringTable* s);
40 | void print_node(Node* n, int lvl);
41 | void free_node(Node* n);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/include/ast.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef AST_H
 7 | #define AST_H
 8 | 
 9 | /* Standard library includes */
10 | #include <stdbool.h>
11 | 
12 | /* Custom includes */
13 | #include <lexer.h>
14 | 
15 | typedef enum {
16 | 	NODE_SEQUENCE = 0,	/* Synthetic node, puts 2 nodes in order */
17 | 	NODE_ASSIGN = 1,	/* Left operand is target, right is value */
18 | 	NODE_EXPR = 2,		/* Generic node for expressions */
19 | 	NODE_VARIABLE = 3,	/* Variables (both string and numeric) */
20 | 	NODE_LITERAL = 4,	/* Literals (numeric and string) */
21 | 	NODE_LABEL = 5,		/* Labels (as targets for GOTO and GOSUB) */
22 | 	NODE_IF = 6,		/* Left op is condition, right then-else */
23 | 	NODE_DO = 7,		/* Same as above, right is body and modifiers */
24 | 	NODE_FOR = 8,		/* Left - initializer, right - "to" and body */
25 | 	NODE_KEYWORD_CALL = 9	/* Synthetic token, keyword is in attribute */
26 | } NodeType;
27 | 
28 | typedef struct _Node {
29 | 	NodeType type;		/* Generic type of Node */
30 | 	TokenType attribute;	/* Used to distinguish subtypes, 0 if unused */
31 | 	int val;		/* Literal value, else 0 */
32 | 	int line;		/* Line on which Node lies */
33 | 	struct _Node* op1;	/* Both operands are NULL if unused */
34 | 	struct _Node* op2;
35 | } Node;
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/docs/differences.md:
--------------------------------------------------------------------------------
 1 | # Differences between this implementation and original one
 2 | 
 3 | All things listed are what this version does and original doesn't. List may be
 4 | incomplete:
 5 | - Accepts tabs as whitespace.
 6 | - Doesn't have limit on source length.
 7 | - Tokens can have no whitespace between them (so `2+2` is okay).
 8 | - String variables are validated at compile-time. In original version due to an
 9 | obscure bug in token detection, things like `$J` or `$~` pass unnoticed. It
10 | isn't mentioned anywhere in documentation, so I assume it is a bug.
11 | - Labels can't be keywords (so no `GOTO LOOP`).
12 | - Address operator (`&`) can be used on both string and numeric variables.
13 | - If variables specified by `FOR` initializer and `NEXT` don't match, error is
14 | raised.
15 | - In almost every built-in function possible sources were expanded. Instead of
16 | using for example only numeric literals, now using variables is also possible.
17 | Note: this doesn't apply to *target* variables, so writing is only allowed to
18 | variables.
19 | - `NEXT` cannot be put inside `FOR` loop, only at the end. It is a bit of a
20 | shame, because in original version it is like `continue` in C.
21 | - `FOR` loop will stop after iterator goes over `TO` field, even if it is not
22 | exactly met (so if loop skips 2 every iteration, it will halt even if `TO` is
23 | missed).
24 | - Numbers cannot be added to strings. It is not that bad actually, as you can
25 | always convert both ways with `NUMBER`.
26 | - `FILES` doesn't print filenames like `DIR` command but more like `LS`, with
27 | every file on different line.
28 | - `READ` doesn't work, and probably won't ever. Sorry, it breaks some key
29 | assumptions the compiler uses.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MikeOS BASIC Compiler
 2 | 
 3 | A compiler written in C which compiles MikeOS' BASIC dialect to executables for
 4 | this operating system. For documentation on the language itself, please read the
 5 | [article](http://mikeos.sourceforge.net/handbook-appdev-basic.html) on MikeOS'
 6 | [website](http://mikeos.sourceforge.net/).
 7 | 
 8 | ## Building
 9 | 
10 | To setup the project (creates output directories):
11 | ```
12 | make init
13 | ```
14 | 
15 | For release build:
16 | ```
17 | make all 	(default)
18 | make release	(explicit release)
19 | ```
20 | 
21 | For debug build:
22 | ```
23 | make debug
24 | ```
25 | 
26 | For cleanup (deletes all object files):
27 | ```
28 | make clean
29 | ```
30 | 
31 | **Note:** Don't try to initialize already initialized project, or you will get
32 | some errors (at least on Windows).
33 | 
34 | ## Licensing
35 | 
36 | For full text of the license, see file [LICENSE](LICENSE).
37 | 
38 | This program is free software: you can redistribute it and/or modify it under
39 | the terms of the GNU General Public License as published by the Free Software
40 | Foundation, either version 3 of the License, or (at your option) any later
41 | version.
42 | 
43 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
44 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
45 | PARTICULAR PURPOSE. See the GNU General Public License for more details.
46 | 
47 | You should have received a copy of the GNU General Public License along with
48 | this program. If not, see <https://www.gnu.org/licenses/>.
49 | 
50 | ## Documentation
51 | 
52 | See `docs/`, start by reading [main documentation file](docs/docs.md).
53 | 
54 | ## Contributing
55 | 
56 | Contributions are very much welcome! If you have one, you can either write an
57 | email to me (contact: <grzela.wojciech@gmail.com>) or open a pull request on
58 | GitHub.
59 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Alas, all of my editors went insane trying to highlight this file :(
 2 | CC = gcc
 3 | STRIP = strip
 4 | CFLAGS = -I include -g -Wall -Wextra -Wpedantic
 5 | # Object targets
 6 | OBJ_UTIL = obj/util/compiletarget.o obj/util/table.o obj/util/util.o \
 7 | 	obj/util/disassembler.o
 8 | OBJ_FRONTEND = obj/front/parser.o obj/front/keyword_parser.o obj/front/lexer.o
 9 | OBJ_BACKEND = obj/back/codegen.o obj/back/runtime.o obj/back/keyword.o \
10 | 	obj/back/expression.o
11 | OBJ = obj/main.o $(OBJ_BACKEND) $(OBJ_FRONTEND) $(OBJ_UTIL)
12 | 
13 | # If no target is provided, run release
14 | all: release
15 | 
16 | # Release enables all optimizations
17 | release: CFLAGS = -I include -O2 -Wall -Wextra -Wpedantic
18 | release: bin/mosbc.exe
19 | 	$(info [36mCompiled $^ (Release build)[0m)
20 | 	@$(STRIP) $(^)
21 | 
22 | # Debug compile
23 | debug: CFLAGS = -I include -g -Wall -Wextra -Wpedantic
24 | debug: bin/mosbc.exe
25 | 	$(info [36mCompiled $(^) (Debug build)[0m)
26 | 
27 | # Main target, compile normally
28 | bin/mosbc.exe: $(OBJ)
29 | 	$(info [32mBuilding $@[0m)
30 | 	@$(CC) $(^) -o $(@)
31 | 
32 | # Compile all object files
33 | obj/%.o: src/%.c
34 | 	$(info [35mBuilding $@[0m)
35 | 	@$(CC) $(CFLAGS) $(^) -o $(@) -c
36 | 
37 | # Initialize the project (create directories with autodetect)
38 | init:
39 | 	$(info [31mInitializing project directory[0m)
40 | ifeq ($(OS),Windows_NT)
41 | 	@mkdir bin
42 | 	@mkdir obj
43 | 	@mkdir obj\front
44 | 	@mkdir obj\back
45 | 	@mkdir obj\util
46 | else
47 | 	@mkdir -p bin
48 | 	@mkdir -p obj
49 | 	@mkdir -p obj/front
50 | 	@mkdir -p obj/back
51 | 	@mkdir -p obj/util
52 | endif
53 | 
54 | # Clean rule, with autodetect
55 | clean:
56 | 	$(info [33mCleaning binaries[0m)
57 | ifeq ($(OS),Windows_NT)
58 | 	@del obj\main.o
59 | 	@del obj\back\*.o
60 | 	@del obj\front\*.o
61 | 	@del obj\util\*.o
62 | 	@del bin\mosbc.exe
63 | else
64 | 	@rm $(OBJ)
65 | 	@rm bin/mosbc.exe
66 | endif
67 | 


--------------------------------------------------------------------------------
/additional/mosbasic.vim:
--------------------------------------------------------------------------------
 1 | " Syntax is case insensitive
 2 | syntax case ignore
 3 | 
 4 | " Keywords
 5 | syn keyword basicStmtKeywords ALERT AND ASKFILE BREAK CALL
 6 | syn keyword basicStmtKeywords CASE CHR CLS CURSOR CURSCHAR
 7 | syn keyword basicStmtKeywords CURSCOL CURSPOS DELETE DO ELSE
 8 | syn keyword basicStmtKeywords END ENDLESS FILES FOR GET GOSUB
 9 | syn keyword basicStmtKeywords GOTO GETKEY HEX IF IN INCLUDE
10 | syn keyword basicStmtKeywords INK INPUT LEN LISTBOX LOAD
11 | syn keyword basicStmtKeywords LOOP LOWER MOVE NEXT NUMBER
12 | syn keyword basicStmtKeywords OFF ON OUT PAGE PAUSE PEEK
13 | syn keyword basicStmtKeywords PEEKINT POKE POKEINT PORT PRINT
14 | syn keyword basicStmtKeywords RAND READ REC RENAME RETURN SAVE
15 | syn keyword basicStmtKeywords SEND SERIAL SET SIZE SOUND STRING
16 | syn keyword basicStmtKeywords THEN TO UNTIL UPPER WAITKEY WHILE
17 | 
18 | " Expression keywords
19 | syn keyword basicExprKeywords INK PROGSTART RAMSTART TIMER
20 | syn keyword basicExprKeywords VARIABLES VERSION
21 | 
22 | " Variables and literals (order matters!)
23 | syn match basicNumericVar "[a-zA-Z]"
24 | syn match basicCharLiteral "'[a-zA-Z]'"
25 | syn match basicStringVar "$[1-8]"
26 | syn match basicNumericLiteral '\d\+'
27 | syn region basicStringLiteral start='"' end='"'
28 | 
29 | " Operators
30 | syn match basicOperator '+*-/%'
31 | 
32 | " Comments and TODO
33 | syn keyword basicTodo contained TODO NOTE
34 | syn region basicComment start="[rR][eE][mM]" end="$" contains=basicTodo
35 | 
36 | " Link all of the to specific classes
37 | highlight link basicTodo Todo
38 | highlight link basicComment Comment
39 | highlight link basicStmtKeywords Keyword
40 | highlight link basicExprKeywords Constant
41 | highlight link basicStringLiteral String
42 | highlight link basicCharLiteral Character
43 | highlight link basicNumericVar Identifier
44 | highlight link basicStringVar Identifier
45 | highlight link basicNumericLiteral Number
46 | highlight link basicOperator Operator
47 | 
48 | let b:current_syntax = "mosbasic"
49 | 


--------------------------------------------------------------------------------
/include/lexer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef LEXER_H
 7 | #define LEXER_H
 8 | 
 9 | typedef enum {
10 | 	/* Keywords */
11 | 	TOKEN_ALERT = 0, TOKEN_AND, TOKEN_ASKFILE, TOKEN_BREAK, TOKEN_CALL,
12 | 	TOKEN_CASE, TOKEN_CHR, TOKEN_CLS, TOKEN_CURSOR, TOKEN_CURSCHAR,
13 | 	TOKEN_CURSCOL, TOKEN_CURSPOS, TOKEN_DELETE, TOKEN_DO, TOKEN_ELSE,
14 | 	TOKEN_END, TOKEN_ENDLESS, TOKEN_FILES, TOKEN_FOR, TOKEN_GET,
15 | 	TOKEN_GETKEY, TOKEN_GOSUB, TOKEN_GOTO, TOKEN_HEX, TOKEN_IF, TOKEN_IN,
16 | 	TOKEN_INCLUDE, TOKEN_INK, TOKEN_INPUT, TOKEN_LEN, TOKEN_LISTBOX,
17 | 	TOKEN_LOAD, TOKEN_LOOP, TOKEN_LOWER, TOKEN_MOVE, TOKEN_NEXT,
18 | 	TOKEN_NUMBER, TOKEN_OFF, TOKEN_ON, TOKEN_OUT, TOKEN_PAGE, TOKEN_PAUSE,
19 | 	TOKEN_PEEK, TOKEN_PEEKINT, TOKEN_POKE, TOKEN_POKEINT, TOKEN_PORT,
20 | 	TOKEN_PRINT, TOKEN_PROGSTART, TOKEN_RAMSTART, TOKEN_RAND, TOKEN_READ,
21 | 	TOKEN_REC, TOKEN_REM, TOKEN_RENAME, TOKEN_RETURN, TOKEN_SAVE,
22 | 	TOKEN_SEND, TOKEN_SERIAL, TOKEN_SET, TOKEN_SIZE, TOKEN_SOUND,
23 | 	TOKEN_STRING, TOKEN_THEN, TOKEN_TIMER, TOKEN_TO, TOKEN_UNTIL,
24 | 	TOKEN_UPPER, TOKEN_VARIABLES, TOKEN_VERSION, TOKEN_WAITKEY, TOKEN_WHILE,
25 | 
26 | 	/* Operators */
27 | 	TOKEN_PLUS = 72, TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, TOKEN_PERCENT,
28 | 	TOKEN_EQUALS, TOKEN_NOT_EQUALS, TOKEN_GREATER, TOKEN_SMALLER,
29 | 	TOKEN_AMPERSAND, TOKEN_SEMICOLON,
30 | 
31 | 	/* Literals */
32 | 	TOKEN_NUMERIC_VARIABLE = 83, TOKEN_STRING_VARIABLE,
33 | 	TOKEN_NUMERIC_LITERAL, TOKEN_STRING_LITERAL, TOKEN_CHARACTER_LITERAL,
34 | 	TOKEN_LABEL, TOKEN_IDENTIFIER,
35 | 
36 | 	/* Synthetic tokens */
37 | 	TOKEN_ERROR = 90, TOKEN_EOF
38 | } TokenType;
39 | 
40 | typedef struct {
41 | 	const char* source;	/* Beginning of whole source code */
42 | 	const char* beginning;	/* Beginning of current token */
43 | 	const char* current;	/* Current position in source */
44 | 	int line;		/* Current line */
45 | } Lexer;
46 | 
47 | typedef struct {
48 | 	TokenType type;		/* Type of the token */
49 | 	const char* text;	/* Pointer to the beginning of lexeme */
50 | 	int length;		/* Length of the lexeme */
51 | 	int line;		/* Line on which it lies */
52 | } Token;
53 | 
54 | void init_lexer(const char* source);
55 | Token get_token();
56 | Token lookahead();
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/include/table.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
 3 |  * Licensed under GNU General Public License version 3.
 4 |  */
 5 | 
 6 | #ifndef TABLE_H
 7 | #define TABLE_H
 8 | 
 9 | /* Standard library includes */
10 | #include <stdint.h>
11 | #include <stdbool.h>
12 | 
13 | /* Custom includes */
14 | #include <ast.h>
15 | 
16 | /* ============================== SYMBOL TABLE ============================== */
17 | typedef struct {
18 | 	int id;			/* Entry's index into table */
19 | 	const char* str;	/* Content */
20 | 	int len;		/* Content's length */
21 | 	Node* target;		/* Where label points (NULL if unused) */
22 | 	uint16_t addr;		/* It will be set in codegen */
23 | 	bool isreal;		/* Was symbol really found in source? */
24 | } SymbolTableEntry;
25 | 
26 | typedef struct {
27 | 	SymbolTableEntry* table;	/* Array of symbol descriptors */
28 | 	int len;			/* Length of said array */
29 | 	int capacity;			/* Obviously, dynamic array :) */
30 | } SymbolTable;
31 | 
32 | void init_sym_table(SymbolTable* t);
33 | void free_sym_table(SymbolTable* t);
34 | 
35 | /* Add symbol (without setting isreal or with) */
36 | int add_unreal_symbol(SymbolTable* t, char* str, int len);
37 | int add_real_symbol(SymbolTable* t, char* str, int len, Node* n);
38 | /* Check if given ID is assigned to real symbol */
39 | bool is_symbol_real(SymbolTable* t, int id);
40 | /* Get ID for given node, or the other way around */
41 | int find_symbol(SymbolTable* t, Node* n);
42 | Node* get_symbol(SymbolTable* t, int id);
43 | 
44 | /* ============================== STRING TABLE ============================== */
45 | typedef struct {
46 | 	int id;				/* Index into table */
47 | 	int offset;			/* Offset into string */
48 | 	int len;			/* Length of string */
49 | } StringTableEntry;
50 | 
51 | typedef struct {
52 | 	StringTableEntry* table;	/* Array itself */
53 | 	int len;			/* Length of array */
54 | 	int capacity;			/* Classic dynamic array */
55 | 	char* blob;			/* String blob (here offset points) */
56 | 	int blob_len;			/* Length of whole string */
57 | } StringTable;
58 | 
59 | void init_str_table(StringTable* t);
60 | void free_str_table(StringTable* t);
61 | 
62 | /* Add string */
63 | int add_string(StringTable* t, const char* str, int len);
64 | /* Get string (or just offset) of given ID */
65 | int get_offset_string(StringTable* t, int id);
66 | const char* get_string(StringTable* t, int id);
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/docs/node_info.md:
--------------------------------------------------------------------------------
 1 | # Notes on nodes (pun originally not intended)
 2 | 
 3 | Here are rules for construction and interpretation of nodes.
 4 | This is not meant to be very specific, just to give the idea for what goes
 5 | where.
 6 | 
 7 | ## Special forms nodes
 8 | 
 9 | ```
10 | SEQUENCE -> (op1 = First in sequence; op2 = Second in sequence)
11 | ASSIGN -> (op1 = Target; op2 = Value)
12 | EXPR -> (op1 = First value; op2 = Second value)
13 | VARIABLE -> (op1 = NULL; op2 = NULL)
14 | LITERAL -> (op1 = NULL; op2 = NULL)
15 | LABEL -> (op1 = NULL; op2 = NULL)
16 | IF -> (op1 = Condition; op2 = SEQUENCE(Then branch; Else branch))
17 | DO -> (op1 = Condition; op2 = SEQUENCE(Body; Modifier))
18 | FOR -> (op1 = Initializer; op2 = SEQUENCE(To value; Body))
19 | ```
20 | 
21 | ## Keyword nodes
22 | 
23 | ```
24 | ALERT -> (op1 = Target; op2 = NULL)
25 | ASKFILE -> (op1 = Target; op2 = NULL)
26 | BREAK -> (op1 = NULL; op2 = NULL)
27 | CALL -> (op1 = Target; op2 = NULL)
28 | CASE -> (op1 = Modifier; op2 = Target)
29 | CLS -> (op1 = NULL; op2 = NULL)
30 | CURSOR -> (op1 = Modifier; op2 = NULL)
31 | CURSCHAR -> (op1 = Target; op2 = NULL)
32 | CURSCOL -> (op1 = Target; op2 = NULL)
33 | CURSPOS -> (op1 = First target; op2 = Second target)
34 | DELETE -> (op1 = Name; op2 = NULL)
35 | END -> (op1 = NULL; op2 = NULL)
36 | FILES -> (op1 = NULL; op2 = NULL)
37 | GETKEY -> (op1 = Target; op2 = NULL)
38 | GOSUB -> (op1 = Target; op2 = NULL)
39 | GOTO -> (op1 = Target; op2 = NULL)
40 | INCLUDE -> N/A
41 | INK -> (op1 = Value; op2 = NULL)
42 | INPUT -> (op1 = Target; op2 = NULL)
43 | LEN -> (op1 = String value; op2 = Target)
44 | LISTBOX -> (op1 = First string; op2 = SEQUENCE(Second string; SEQUENCE(Third string; Target)))
45 | LOAD -> (op1 = String value; op2 = Value)
46 | MOVE -> (op1 = First value; op2 = Second value)
47 | NUMBER -> (op1 = First value; op2 = Second value)
48 | PAGE -> (op1 = First value; op2 = Second value)
49 | PAUSE -> (op1 = Value; op2 = NULL)
50 | PEEK -> (op1 = Target; op2 = Value)
51 | PEEKINT -> (op1 = Target; op2 = Value)
52 | POKE -> (op1 = First value; op2 = Second value)
53 | POKEINT -> (op1 = First value; op2 = Second value)
54 | PORT -> (op1 = Modifier; op2 = SEQUENCE(First value; Second value))
55 | PRINT -> (op1 = First modifier; op2 = SEQUENCE(Value; Second modifier))
56 | RAND -> (op1 = Target; op2 = SEQUENCE(First value; Second value))
57 | READ -> N/A
58 | RENAME -> (op1 = First string; op2 = Second string)
59 | RETURN -> (op1 = NULL; op2 = NULL)
60 | SAVE -> (op1 = String; op2 = SEQUENCE(First value; Second value))
61 | SERIAL -> (op1 = Modifier; op2 = Value)
62 | SIZE -> (op1 = String; op2 = NULL)
63 | SOUND -> (op1 = First value; op2 = Second value)
64 | STRING -> (op1 = Modifier; op2 = SEQUENCE(String target; SEQUENCE(Value; Numeric target)))
65 | WAITKEY -> (op1 = Target; op2 = NULL)
66 | ```
67 | 


--------------------------------------------------------------------------------
/src/util/compiletarget.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdlib.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | 
 11 | /* Custom includes */
 12 | #include <codegen.h>
 13 | 
 14 | /* ============================== PATCH TABLES ============================== */
 15 | void init_patch(PatchTable* p)
 16 | {
 17 | 	p->length = 0;
 18 | 	p->capacity = 8;
 19 | 	p->table = malloc(p->capacity * sizeof(PatchTableEntry));
 20 | }
 21 | 
 22 | void free_patch(PatchTable* p)
 23 | {
 24 | 	free(p->table);
 25 | 	p->table = NULL;
 26 | 	p->length = 0;
 27 | 	p->capacity = 0;
 28 | }
 29 | 
 30 | void add_patch(PatchTable* p, int id, uint16_t addr)
 31 | {
 32 | 	if (p->capacity < p->length + 1) {
 33 | 		p->capacity *= 2;
 34 | 		p->table = realloc(p->table, p->capacity *
 35 | 				sizeof(PatchTableEntry));
 36 | 	}
 37 | 
 38 | 	p->table[p->length].id = id;
 39 | 	p->table[p->length].addr = addr;
 40 | 	p->length++;
 41 | }
 42 | 
 43 | /* ============================= INITIALIZATION ============================= */
 44 | void init_code(CompileTarget* c)
 45 | {
 46 | 	c->length = 0;
 47 | 	c->capacity = 8;
 48 | 	c->code = malloc(c->capacity);
 49 | }
 50 | 
 51 | void free_code(CompileTarget* c)
 52 | {
 53 | 	free(c->code);
 54 | 	c->code = NULL;
 55 | 	c->length = 0;
 56 | 	c->capacity = 0;
 57 | }
 58 | 
 59 | void patch_jumps(CompileTarget* c, PatchTable* p, SymbolTable* sym, Node* n)
 60 | {
 61 | 	uint16_t addr = c->length;
 62 | 
 63 | 	/* Search all labels whether Node is in them */
 64 | 	for (int i = 0; i < sym->len; i++)
 65 | 		if (sym->table[i].target == n) {
 66 | 			sym->table[i].addr = addr;
 67 | 
 68 | 			/* Now look if there are any matching entries */
 69 | 			for (int j = 0; j < p->length; j++)
 70 | 				if (p->table[j].id == i) {
 71 | 					uint16_t a = p->table[j].addr;
 72 | 					int16_t rel = addr - (a + 2);
 73 | 					c->code[a] = (uint8_t) rel & 0xFF;
 74 | 					c->code[a + 1] = (uint8_t) (rel >> 8) &
 75 | 								0xFF;
 76 | 				}
 77 | 		}
 78 | }
 79 | 
 80 | /* =========================== EMITTING FUNCTIONS =========================== */
 81 | void emit_byte(CompileTarget* c, uint8_t byte)
 82 | {
 83 | 	/* If there is no room, make some */
 84 | 	if (c->capacity < c->length + 1) {
 85 | 		c->capacity *= 2;
 86 | 		c->code = realloc(c->code, c->capacity);
 87 | 	}
 88 | 
 89 | 	/* Put our byte in place */
 90 | 	c->code[c->length] = byte;
 91 | 	c->length++;
 92 | }
 93 | 
 94 | void emit_word(CompileTarget* c, uint16_t word)
 95 | {
 96 | 	emit_byte(c, word & 0xFF);
 97 | 	emit_byte(c, word >> 8);
 98 | }
 99 | 
100 | void emit_call(CompileTarget* c, uint16_t target)
101 | {
102 | 	uint16_t next = c->length + 3 + LOAD;
103 | 	int16_t rel = target - next;
104 | 	emit_byte(c, 0xE8);	/* CALL */
105 | 	emit_word(c, rel);	/* rel16 */
106 | }
107 | 
108 | void emit_jump(CompileTarget* c, uint16_t target)
109 | {
110 | 	uint16_t next = c->length + 3 + LOAD;
111 | 	int16_t rel = target - next;
112 | 	emit_byte(c, 0xE9);		/* JMP NEAR */
113 | 	emit_word(c, rel);		/* rel16 */
114 | }
115 | 
116 | void emit_string(CompileTarget* c, const char* str)
117 | {
118 | 	for (unsigned int i = 0; i < strlen(str); i++)
119 | 		emit_byte(c, str[i]);
120 | 
121 | 	emit_byte(c, 0x00);	/* NUL terminate */
122 | }
123 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <string.h>
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | 
 11 | /* Custom includes */
 12 | #include <lexer.h>
 13 | #include <parser.h>
 14 | #include <table.h>
 15 | #include <codegen.h>
 16 | #include <util.h>
 17 | 
 18 | extern Lexer lexer;
 19 | 
 20 | char* read_file(const char* filename)
 21 | {
 22 | 	FILE* f = fopen(filename, "rb");
 23 | 	if (f == NULL) {
 24 | 		printf("\x1B[31mError\x1B[0m: Could not open source file.\n");
 25 | 		raise_error();
 26 | 		return NULL;
 27 | 	}
 28 | 
 29 | 	fseek(f, 0, SEEK_END);
 30 | 	int len = ftell(f);
 31 | 	rewind(f);
 32 | 
 33 | 	char* source = malloc(len + 1);		/* One more for NUL */
 34 | 	if (source == NULL) {
 35 | 		printf("\x1B[31mError\x1B[0m: Could not allocate to read.\n");
 36 | 		raise_error();
 37 | 		return NULL;
 38 | 	}
 39 | 
 40 | 	int read = fread(source, sizeof(char), len, f);
 41 | 	if (read != len) {
 42 | 		printf("\x1B[31mError\x1B[0m: Could not read source file.\n");
 43 | 		raise_error();
 44 | 		return NULL;
 45 | 	}
 46 | 
 47 | 	source[len] = '\0';			/* Zero terminate string */
 48 | 	fclose(f);
 49 | 	return source;
 50 | }
 51 | 
 52 | void write_file(const char* filename, CompileTarget* ct)
 53 | {
 54 | 	FILE* f = fopen(filename, "wb");
 55 | 	if (f == NULL) {
 56 | 		printf("\x1B[31mError\x1B[0m: Could not open output file.\n");
 57 | 		raise_error();
 58 | 		return;
 59 | 	}
 60 | 
 61 | 	int len = fwrite(ct->code, sizeof(char), ct->length, f);
 62 | 
 63 | 	if (len != ct->length) {
 64 | 		printf("\x1B[31mError\x1B[0m: Could not write output file.\n");
 65 | 		raise_error();
 66 | 		return;
 67 | 	}
 68 | 
 69 | 	fclose(f);
 70 | }
 71 | 
 72 | int main(int argc, char** argv)
 73 | {
 74 | 	if (argc < 3) {
 75 | 		printf("----- \x1B[33mMikeOS Basic Compiler\x1B[0m -----\n"
 76 | 			"Usage: mosbc \x1B[35msrc\x1B[0m \x1B[36mout\x1B[0m "
 77 | 			"\x1B[33m[-debug]\x1B[0m\n"
 78 | 			"  \x1B[35msrc\x1B[0m - Name of the source file\n"
 79 | 			"  \x1B[36mout\x1B[0m - Name of output file\n"
 80 | 			"  \x1B[33m-debug\x1B[0m - Print compiler data "
 81 | 			"structures.\n");
 82 | 		return -1;
 83 | 	}
 84 | 
 85 | 	/* Read */
 86 | 	char* src = read_file(argv[1]);
 87 | 	check_for_error();
 88 | 
 89 | 	/* Now we read the source, initialize all data structures */
 90 | 	SymbolTable t;
 91 | 	StringTable s;
 92 | 	CompileTarget ct;
 93 | 	init_sym_table(&t);
 94 | 	init_str_table(&s);
 95 | 	init_code(&ct);
 96 | 
 97 | 	/* Parse */
 98 | 	init_lexer(src);
 99 | 	Node* ast = parse(&t, &s);
100 | 	check_for_error();
101 | 
102 | 	/* Compile */
103 | 	compile(ast, &ct, &s, &t);
104 | 	check_for_error();
105 | 
106 | 	/* Output debug info, if needed */
107 | 	if (argc == 4 && strcmp(argv[3], "-debug") == 0) {
108 | 		printf("\x1B[32mSource:\x1B[0m\n%s\n\n", lexer.source);
109 | 		printf("\x1B[36mAST\x1B[0m:\n");
110 | 		print_node(ast, 0);
111 | 		printf("\n\x1B[34mASM:\x1B[0m\n");
112 | 		disassemble(&ct);
113 | 	}
114 | 
115 | 	/* Finally, write out our compiled code to file */
116 | 	write_file(argv[2], &ct);
117 | 
118 | 	/* Please be reassuring: */
119 | 	printf("\x1B[32mCompilation successful\x1B[0m: written file %s "
120 | 		"(%d bytes long)\n", argv[2], ct.length);
121 | 
122 | 	/* Clean up */
123 | 	free_node(ast);
124 | 	free_code(&ct);
125 | 	free_sym_table(&t);
126 | 	free_str_table(&s);
127 | 
128 | 	return 0;
129 | }
130 | 


--------------------------------------------------------------------------------
/include/codegen.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | #ifndef CODEGEN_H
  7 | #define CODEGEN_H
  8 | 
  9 | /* Standard library includes */
 10 | #include <stdint.h>
 11 | 
 12 | /* Custom includes */
 13 | #include <ast.h>
 14 | #include <table.h>
 15 | 
 16 | /* Some constants needed for code generation:
 17 |  * LOAD - Load address of the binary
 18 |  * INKADDR - Address of INK value in runtime
 19 |  * RAMSTART - Address of RAMSTART value in runtime (not the value itself!)
 20 |  * WORKPAGE - Address of working page's number
 21 |  * ACTIVEPAGE - Address of active page's number
 22 |  * VARS - Address of beginning of variables - THIS MAY CHANGE!!!
 23 |  * STRVARS - Address of beginning of string variables - THIS MAY CHANGE!!!
 24 |  * STRBUF - Temporary buffer for string operations
 25 |  * RUNTIMELEN - Length of the runtime (together with jump at the beginning)
 26 |  * VERSION - API version. Update accordingly
 27 |  */
 28 | #define LOAD 0x8000
 29 | #define STRADD 0x8003
 30 | #define ZERODIV 0x8018
 31 | #define PRINTSTR 0x8048
 32 | #define INKADDR 0x8098
 33 | #define RAMSTART 0x809A
 34 | #define WORKPAGE 0x809C
 35 | #define ACTIVEPAGE 0x809E
 36 | #define VARS 0x4941
 37 | #define STRVARS 0x4B76
 38 | #define STRBUF 0x7C00
 39 | #define RUNTIMELEN 0xA0
 40 | #define VERSION 18
 41 | 
 42 | /* ============================== PATCH TABLE =============================== */
 43 | typedef struct {
 44 | 	int id;			/* ID of the label (NOT ENTRY'S!!!) */
 45 | 	uint16_t addr;		/* Address to patch up */
 46 | } PatchTableEntry;
 47 | 
 48 | typedef struct {
 49 | 	PatchTableEntry* table;	/* Table */
 50 | 	int length;		/* Its length */
 51 | 	int capacity;		/* And its capacity */
 52 | } PatchTable;
 53 | 
 54 | /* Initialize and free */
 55 | void init_patch(PatchTable* p);
 56 | void free_patch(PatchTable* p);
 57 | 
 58 | /* Add one entry */
 59 | void add_patch(PatchTable* p, int id, uint16_t addr);
 60 | 
 61 | /* ======================== COMPILED CODE CONTAINER ========================= */
 62 | typedef struct {
 63 | 	char* code;		/* Bytes of compiled code */
 64 | 	int length;		/* Length of code */
 65 | 	int capacity;		/* Boy I love them dynamic arrays */
 66 | } CompileTarget;
 67 | 
 68 | /* Initialize and free */
 69 | void init_code(CompileTarget* c);
 70 | void free_code(CompileTarget* c);
 71 | 
 72 | /* Emit pieces of machine code (takes care of endianness) */
 73 | void emit_byte(CompileTarget* c, uint8_t byte);
 74 | void emit_word(CompileTarget* c, uint16_t word);
 75 | void emit_call(CompileTarget* c, uint16_t target);
 76 | void emit_jump(CompileTarget* c, uint16_t target);
 77 | void emit_string(CompileTarget* c, const char* str);
 78 | 
 79 | /* Patch all jumps when compiling Node n */
 80 | void patch_jumps(CompileTarget* c, PatchTable* p, SymbolTable* sym, Node* n);
 81 | 
 82 | /* Convenience function to dump compiled code as ASM */
 83 | void disassemble(CompileTarget* c);
 84 | 
 85 | /* Different helpers for the compiler:
 86 |  * compile_error() - Emit error message
 87 |  * init_expr_compiler() - Initialize expression compiler
 88 |  * init_kword_compiler() - Initialize keyword compiler
 89 |  * compile_expression() - Returns true if expr was numeric, false if string
 90 |  * compile_keyword() - Compile keyword statement
 91 |  * compile_ast() - Compile one AST node
 92 |  */
 93 | void compile_error(const char* msg, Node* ast);
 94 | void init_expr_compiler(StringTable* str);
 95 | void init_kword_compiler(StringTable* str, SymbolTable* sym, PatchTable* p);
 96 | bool compile_expression(Node* ast, CompileTarget* code);
 97 | void compile_keyword(Node* ast, CompileTarget* code);
 98 | void compile_ast(Node* ast, CompileTarget* code);
 99 | 
100 | /* Main function of code generation: compiler */
101 | void compile(Node* ast, CompileTarget* code, StringTable* str, SymbolTable* t);
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/docs/review.md:
--------------------------------------------------------------------------------
 1 | # Table of contents
 2 | 
 3 | - [Directories](#directories)
 4 | - [General structure](#general-structure)
 5 | - [Used techniques](#used-techniques)
 6 |   - [Dispatch tables](#dispatch-tables)
 7 |   - [Data structures](#data-structures)
 8 | 
 9 | ## Directories
10 | 
11 | Directories of this project are:
12 | - `additional`: Here are some additional files (like test files or syntax
13 | highlighting for Vim)
14 | - `bin`: Here lies the compiled executable (**Note:** you need to run
15 | `make init` first to have this directory).
16 | - `docs`: You are here.
17 | - `include`: All of the headers are in here.
18 | - `obj`: Object files produced during compilation (**Note:** you need to run
19 | `make init` first to have this directory).
20 | - `src`: This is the main directory with source files.
21 |   - `back`: Source of the back-end (code generator).
22 |   - `front`: Source of the front-end (lexer and parser).
23 |   - `util`: Source of various helpers.
24 | 
25 | ## General structure
26 | 
27 | Execution starts obviously in [main.c](../src/main.c), which through a rather
28 | primitive interface gets the source filename, and reads it. It then initializes
29 | some compiler structures, and calls the function `parse()` from
30 | [parser.c](../src/front/parser.c).
31 | 
32 | Parser repeatedly calls `get_token()` from [lexer.c](../src/lexer.c). It is a
33 | boring function with nothing interesting in it. It outputs next token from the
34 | source code, and changes some internal state to move to the next one. Function
35 | `lookahead()` also grabs next token, but without advancing.
36 | 
37 | In short, `parse()` returns Abstract Syntax Tree (AST), which represents what
38 | source code is doing in more machine-friendly way (so with lots of pointers,
39 | numbers and no text). It is defined in [ast.h](../include/ast.h). For more
40 | information about AST in general, see this pretty good explanation on the
41 | [Wikipedia](https://en.wikipedia.org/wiki/Abstract_syntax_tree). How parser
42 | works is described in greater detail in ["Parsing Theory"](parsing_theory.md).
43 | 
44 | Parsed code is passed then to the `compile()` function. It writes out bytes of
45 | machine code to [dynamic array](#data-structures), which, after successful code
46 | generation, is returned to `main()`. There, it is written to output file, and
47 | compilation terminates.
48 | 
49 | ---
50 | 
51 | ## Used techniques
52 | 
53 | Here are described basic data structures and "algorithms" used (yes I know
54 | dispatch tables aren't "algorithms" but it sounds better).
55 | 
56 | ### Dispatch tables
57 | 
58 | Dispatch tables are really cool (and sometimes efficient) so they are used
59 | *a lot* in this project. If you don't know what they are, it is basically a
60 | table of pointers to the functions. Now you can just use your value as an index
61 | in the table, get the pointer and call it like a normal function.
62 | 
63 | ### Data structures
64 | 
65 | As any large enough program, this compiler needs to use some data structures.
66 | It isn't rocket science, but we use those two:
67 | - **Dynamic array**
68 | - **Binary tree** (in form of AST)
69 | 
70 | Dynamic arrays are like normal contiguous arrays, but have added parameter:
71 | their capacity. So if you want to add new element, inserting function checks if
72 | there is enough space, and if not it extends array by a factor of 2. This way
73 | we can reduce number of needed reallocations (that's good). If you want to know
74 | more about dynamic arrays, read yet another article on Wikipedia, right
75 | [here](https://en.wikipedia.org/wiki/Dynamic_array).
76 | 
77 | Binary trees are basically a set of nodes, where all nodes hold references to
78 | two of their *children*. If the reference is `NULL` it means there is no child.
79 | Because every node specifies all of its descendants, we can pass the whole tree
80 | using only the top-most node (called root). Don't ask me why root is on top.
81 | And, the Wikipedia [article](https://en.wikipedia.org/wiki/Binary_tree).
82 | 


--------------------------------------------------------------------------------
/docs/codegen_theory.md:
--------------------------------------------------------------------------------
 1 | # Table of contents
 2 | 
 3 | - [About code generation](#about-code-generation)
 4 | - [Binary layout of the compiled file](#binary-layout-of-the-compiled-file)
 5 | - [Statements](#statements)
 6 | - [Expressions](#expressions)
 7 | - [Labels](#labels)
 8 | 
 9 | ---
10 | 
11 | ## About code generation
12 | 
13 | The way machine-code is generated is very very simple: main generating function
14 | is `compile()`, which is basically a wrapper around `compile_ast()`.
15 | 
16 | It is a recursive function, which upon encountering `NODE_ASSIGN` calls itself
17 | on both children nodes. Else it looks up the rule in dispatch table and calls
18 | it.
19 | 
20 | **Note:** What is very important, is the distinction between *runtime* and
21 | *compile time*. When codegen function "returns" something, it doesn't return it
22 | to the caller in actual compiler, but rather sets registers to some state.
23 | 
24 | ## Binary layout of the compiled file
25 | 
26 | | Range of addresses   |  Contents of the range                    |
27 | |:--------------------:|:-----------------------------------------:|
28 | | `0x8000 - 0x8002`    |  JMP NEAR 0x???? (skip runtime & strings) |
29 | | `0x8003 - 0x8016`    |  String addition handler                  |
30 | | `0x8018 - 0x8047`    |  Division by zero handler                 |
31 | | `0x8048 - 0x8097`    |  Printing string code                     |
32 | | `0x8098 - 0x8099`    |  INK value                                |
33 | | `0x809A - 0x809B`    |  RAMSTART value                           |
34 | | `0x809C - 0x809D`    |  Working page                             |
35 | | `0x809E - 0x809F`    |  Active page                              |
36 | | `0x80A0 - 0x????`    |  String table                             |
37 | | `0x???? - 0xFFFF`    |  Compiled binary and its RAM              |
38 | 
39 | ---
40 | 
41 | ## Statements
42 | 
43 | There are 2 types of statements: special forms and keyword statements. Former
44 | are handled in main code generation file, [codegen.c](../src/back/codegen.c),
45 | while latter are compiled in [keyword.c](../src/back/keyword.c).
46 | 
47 | Keyword statements are easy ones: you just look at the keyword and pick function
48 | to call from a dispatch table. Called function does all the generation, usually
49 | emitting calls to the API or BIOS.
50 | 
51 | Special forms are a little bit more tricky, because you need to patch jumps. It
52 | isn't hard, just that we need to remember code addresses to later fix offsets
53 | (because jumps in x86 are relative to current IP).
54 | 
55 | ## Expressions
56 | 
57 | Expressions were difficult but here we have a quite elegant approach (because
58 | nothing is more elegant than recursion everywhere) - function
59 | `compile_expression()` will return true if all subexpressions are numeric and
60 | false if they are not.
61 | 
62 | Based on keyword being compiled we can determine how we should interpret the
63 | expression. For example, if we want to multiply 2 values, we would first call
64 | this function on both of the children and compute the result.
65 | *Note:* Here also type checking is done.
66 | 
67 | **The main rule is:** if expression is numeric, `compile_expression` puts the
68 | result to `AX` register. If it is a string, it puts the address into `SI`.
69 | 
70 | ## Labels
71 | 
72 | First read ["Parsing theory"](parsing_theory.md), chapter about labels to
73 | understand it. Alright, now how it is compiled.
74 | 
75 | In every entry of symbol table, we have an `addr` field. During parsing it is
76 | set to zero, and we don't bother with it. During code generation however, it is
77 | really important.
78 | 
79 | Before compiling any statement, so at the beginning of `compile_ast()` we call
80 | `patch_jumps()`. What it does, is it checks if current node is pointed to by
81 | one of the labels. If it is, it sets `addr` to current place in memory, and
82 | checks in the **patch table** if label was referred to in earlier `GOTO` or
83 | `GOSUB` statements. If it was, then we go back and patch the jump to correct
84 | location.
85 | 


--------------------------------------------------------------------------------
/additional/test.bas:
--------------------------------------------------------------------------------
  1 | rem TODO LIST
  2 | rem [ok] ALERT
  3 | rem [ok] ASKFILE
  4 | rem [ok] BREAK AND CLS
  5 | rem [ok] POKE AND CALL
  6 | rem [ok] CASE
  7 | rem [ok] CURSOR, PAUSE, CURSPOS, CURSCHAR AND CURSCOL
  8 | rem [ok] DELETE
  9 | rem [ok] FILES
 10 | rem [ok] GETKEY
 11 | rem [ok] GOSUB, GOTO, RETURN
 12 | rem [ok] INK
 13 | rem [ok] INPUT
 14 | rem [ok] LEN
 15 | rem [ok] LISTBOX
 16 | rem [ok] LOAD
 17 | rem [ok] MOVE
 18 | rem [ok] NUMBER
 19 | rem [ok] PAGE
 20 | rem [ok] PEEK/INT, POKE,INT
 21 | rem [ok] PRINT
 22 | rem [ok] RAND
 23 | rem [ok] RENAME
 24 | rem [ok] SAVE
 25 | rem [??] SIZE
 26 | rem [??] STRING
 27 | rem [??] WAITKEY
 28 | 
 29 | $1 = "ok"
 30 | x = PROGSTART
 31 | 
 32 | rem ALERT TEST
 33 | alert "start"
 34 | alert $1
 35 | 
 36 | rem ASKFILE TEST
 37 | askfile $1
 38 | cls
 39 | print $1
 40 | 
 41 | rem POKE & CALL TEST
 42 | poke 195 40000
 43 | call 40000
 44 | 
 45 | rem CASE TEST
 46 | case upper $1
 47 | print "upper: " + $1
 48 | case lower $1
 49 | print "lower: " + $1
 50 | 
 51 | rem CURSOR & PAUSE TEST
 52 | cursor off
 53 | pause 40
 54 | cursor on
 55 | 
 56 | rem CURSCHAR TEST
 57 | cls
 58 | print "Hello world"
 59 | move 0 0
 60 | curschar x
 61 | move 0 1
 62 | print chr x
 63 | 
 64 | rem CURSCOL TEST
 65 | move 0 0
 66 | curscol x
 67 | move 0 2
 68 | print hex x
 69 | 
 70 | rem CURSPOS TEST
 71 | curspos x y
 72 | print hex x ;
 73 | print " " ;
 74 | print hex y
 75 | 
 76 | rem DELETE TEST
 77 | print "DELETE TEST: ";
 78 | delete "abababab.txt"
 79 | print r
 80 | 
 81 | rem FILES TEST
 82 | print "FILES TEST: ";
 83 | files
 84 | 
 85 | rem GETKEY TEST
 86 | print "GETKEY TEST: ";
 87 | getkey x
 88 | print hex x
 89 | 
 90 | rem GOSUB & GOTO TEST
 91 | print "GOSUB & GOTO TEST: ";
 92 | gosub func
 93 | goto cont
 94 | print "hi"
 95 | cont:
 96 | print "hi continued"
 97 | 
 98 | rem INK TEST
 99 | print "INK TEST: ";
100 | ink 8
101 | print "test"
102 | ink 7
103 | print "test"
104 | 
105 | rem INPUT TEST
106 | print "INPUT TEST: ";
107 | input x
108 | print hex x
109 | 
110 | rem LEN TEST
111 | print "LEN TEST: ";
112 | $1 = "this string is 33 characters long"
113 | len $1 x
114 | print x
115 | 
116 | rem LISTBOX TEST
117 | print "LISTBOX TEST: ";
118 | listbox "first,second,third,fourth" "b" "c" x
119 | cls
120 | print x
121 | 
122 | rem LOAD TEST
123 | print "LOAD TEST: ";
124 | load "example.bas" RAMSTART
125 | peek x RAMSTART
126 | print chr x
127 | 
128 | rem MOVE TEST
129 | print "MOVE TEST: "
130 | curspos x y
131 | move 0 0
132 | pause 20
133 | move x y
134 | 
135 | rem NUMBER TEST
136 | print "NUMBER TEST: ";
137 | $1 = "123"
138 | number $1 x
139 | print x ;
140 | print " ";
141 | x = 456
142 | number x $1
143 | print $1
144 | 
145 | rem PAGE TEST
146 | print "PAGE TEST, change in 6 seconds"  rem This is on page 0, visible
147 | pause 30
148 | page 1 0
149 | print "Hello world, another 3 seconds"  rem This is written to 1, invisible
150 | pause 30
151 | page 0 1
152 | print "Back to original"  rem This is written to 0, while it is invisible
153 | pause 30
154 | page 0 0
155 | 
156 | rem POKE/INT TEST
157 | print "POKE 40000: ";
158 | poke 127 40001
159 | poke 247 40000
160 | print "ok"
161 | 
162 | rem PEEK/INT TEST
163 | print "PEEK 40000: ";
164 | peek a 40000
165 | print hex a
166 | print "PEEK 40001: ";
167 | peek a 40001
168 | print hex a
169 | print "PEEKINT 40000: ";
170 | peekint a 40000
171 | print hex a
172 | 
173 | rem PRINT TEST
174 | rem (Unnecessary)
175 | 
176 | rem RAND TEST
177 | rand x 100 1000
178 | print x
179 | 
180 | rem SAVE TEST
181 | print "DISK TESTS: ";
182 | pokeint 26984 40000
183 | pokeint 30752 40002
184 | save "test.txt" 40000 4
185 | 
186 | rem RENAME TEST
187 | rename "test.txt" "hello.txt"
188 | 
189 | rem SIZE TEST
190 | size "test.txt"
191 | print s
192 | 
193 | rem STRING TEST
194 | print "STRING TEST: ";
195 | $1 = "Hello"
196 | string get $1 3 b
197 | print hex b ;
198 | b = 121
199 | string set $1 5 b
200 | print " " + $1
201 | 
202 | rem WAITKEY TEST
203 | print "WAITKEY TEST: ";
204 | waitkey x
205 | print x
206 | 
207 | end
208 | 
209 | func:
210 |   print "gosub is here!"
211 | return
212 | 


--------------------------------------------------------------------------------
/src/util/table.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | 
 10 | /* Custom includes */
 11 | #include <table.h>
 12 | 
 13 | /* ============================== SYMBOL TABLE ============================== */
 14 | void init_sym_table(SymbolTable* t)
 15 | {
 16 | 	t->len = 0;
 17 | 	t->capacity = 8;
 18 | 	t->table = malloc(t->capacity * sizeof(SymbolTableEntry));
 19 | }
 20 | 
 21 | void free_sym_table(SymbolTable* t)
 22 | {
 23 | 	free(t->table);
 24 | 	t->table = NULL;
 25 | 	t->len = 0;
 26 | 	t->capacity = 0;
 27 | }
 28 | 
 29 | int add_unreal_symbol(SymbolTable* t, char* str, int len)
 30 | {
 31 | 	/* If entry is found, just return its index */
 32 | 	for (int i = 0; i < t->len; i++) {
 33 | 		if (!strncmp(t->table[i].str, str, t->table[i].len))
 34 | 			return i;
 35 | 	}
 36 | 
 37 | 	/* If it is not, check if we need to make more space */
 38 | 	if (t->capacity < t->len + 1) {
 39 | 		t->capacity *= 2;
 40 | 
 41 | 		t->table = realloc(t->table, t->capacity *
 42 | 					sizeof(SymbolTableEntry));
 43 | 	}
 44 | 
 45 | 	/* Now we now there is space, fill it with new entry */
 46 | 	t->table[t->len].id = t->len;
 47 | 	t->table[t->len].str = str;
 48 | 	t->table[t->len].len = len;
 49 | 	t->table[t->len].target = NULL;
 50 | 	t->table[t->len].addr = 0;
 51 | 	t->table[t->len].isreal = false;
 52 | 	t->len++;
 53 | 
 54 | 	return t->len - 1;
 55 | }
 56 | 
 57 | int add_real_symbol(SymbolTable* t, char* str, int len, Node* n)
 58 | {
 59 | 	/* Use function above, just set needed fields */
 60 | 	int ret = add_unreal_symbol(t, str, len);
 61 | 	t->table[ret].target = n;
 62 | 	t->table[ret].isreal = true;
 63 | 
 64 | 	return ret;
 65 | }
 66 | 
 67 | bool is_symbol_real(SymbolTable* t, int id)
 68 | {
 69 | 	/* If index is out of bounds, then it surely isn't real */
 70 | 	if (t->len < id)
 71 | 		return false;
 72 | 
 73 | 	return t->table[id].isreal;
 74 | }
 75 | 
 76 | int find_symbol(SymbolTable* t, Node* n)
 77 | {
 78 | 	/* Look through table */
 79 | 	for (int i = 0; i < t->len; i++) {
 80 | 		if (t->table[i].target == n)
 81 | 			return i;
 82 | 	}
 83 | 
 84 | 	/* If we got nothing, return -1 */
 85 | 	return -1;
 86 | }
 87 | 
 88 | Node* get_symbol(SymbolTable* t, int id)
 89 | {
 90 | 	/* If index is out of bounds, then give NULL */
 91 | 	if (t->len < id)
 92 | 		return NULL;
 93 | 
 94 | 	return t->table[id].target;
 95 | }
 96 | 
 97 | /* ============================== STRING TABLE ============================== */
 98 | void init_str_table(StringTable* t)
 99 | {
100 | 	t->len = 0;
101 | 	t->capacity = 8;
102 | 	t->table = malloc(t->capacity * sizeof(StringTableEntry));
103 | 	t->blob = NULL;
104 | 	t->blob_len = 0;
105 | }
106 | 
107 | void free_str_table(StringTable* t)
108 | {
109 | 	free(t->table);
110 | 	free(t->blob);
111 | 	t->table = NULL;
112 | 	t->len = 0;
113 | 	t->capacity = 0;
114 | 	t->blob = NULL;
115 | 	t->blob_len = 0;
116 | }
117 | 
118 | int add_string(StringTable* t, const char* str, int len)
119 | {
120 | 	/* If entry is already present, just return its index */
121 | 	for (int i = 0; i < t->len; i++) {
122 | 		const char* entry_str = t->blob + t->table[i].offset;
123 | 		if (!strcmp(entry_str, str))
124 | 			return i;
125 | 	}
126 | 
127 | 	/* If it is not, check if we need to make more space */
128 | 	if (t->capacity < t->len + 1) {
129 | 		t->capacity *= 2;
130 | 
131 | 		t->table = realloc(t->table, t->capacity *
132 | 					sizeof(StringTableEntry));
133 | 	}
134 | 
135 | 	/* Set entry's values */
136 | 	t->table[t->len].id = t->len;
137 | 	t->table[t->len].offset = t->blob_len;
138 | 	t->table[t->len].len = len;
139 | 	t->len++;
140 | 
141 | 	/* Append string to blob */
142 | 	t->blob = realloc(t->blob, t->blob_len + len + 1);
143 | 	strncpy(t->blob + t->blob_len, str, len);
144 | 	t->blob_len += len + 1;
145 | 	t->blob[t->blob_len - 1] = '\0';
146 | 
147 | 	return t->len - 1;
148 | }
149 | 
150 | int get_offset_string(StringTable* t, int id)
151 | {
152 | 	if (t->len < id)
153 | 		return -1;
154 | 
155 | 	return t->table[id].offset;
156 | }
157 | 
158 | const char* get_string(StringTable* t, int id)
159 | {
160 | 	int off = get_offset_string(t, id);
161 | 	if (off == -1)
162 | 		return NULL;
163 | 
164 | 	return (const char*) t->blob + off;
165 | }
166 | 


--------------------------------------------------------------------------------
/docs/grammar.ebnf:
--------------------------------------------------------------------------------
  1 | (* EBNF GRAMMAR FOR MIKEOS BASIC (sort of) *)
  2 | (* Note: It really diverges from the language implemented orginally in some places *)
  3 | (* Although most of this deviations make sense, like allowing to print expressions *)
  4 | 
  5 | (* Program has to have at least one statement *)
  6 | program = statement , { statement } , EOF ;
  7 | statement = assign | ifstmt | dostmt | forstmt | LABEL | keywordstmt ;
  8 | 
  9 | (* There are four basic statements: assignment, if, do and for *)
 10 | assign = variable , "=" , expr ;
 11 | ifstmt = "IF" , boolexpr , "THEN" , statement , [ "ELSE" , statement ] ;
 12 | dostmt = dostmtendless | dostmtendful ;
 13 | forstmt = "FOR" , assign , "TO" , expr , { statement } , "NEXT" , NUMERIC_VARIABLE ;
 14 | 
 15 | (* Little helpers for do *)
 16 | dostmtendless = "DO" , { statement } , "LOOP" , "ENDLESS" ;
 17 | dostmtendful = "DO" , { statement } , "LOOP", ( "WHILE" | "UNTIL" ) , boolexpr ;
 18 | 
 19 | (* Here we have a general rule for keyword statements and, by the nature of BASIC, there are lots of them *)
 20 | keywordstmt = alertstmt |
 21 | 		askfilestmt |
 22 | 		breakstmt |
 23 | 		callstmt |
 24 | 		casestmt |
 25 | 		clsstmt |
 26 | 		cursorstmt |
 27 | 		curscharstmt |
 28 | 		curscolstmt |
 29 | 		cursposstmt |
 30 | 		deletestmt |
 31 | 		endstmt |
 32 | 		filesstmt |
 33 | 		getkeystmt |
 34 | 		gosubstmt |
 35 | 		gotostmt |
 36 | 		includestmt |
 37 | 		inkstmt |
 38 | 		inputstmt |
 39 | 		lenstmt |
 40 | 		listboxstmt |
 41 | 		loadstmt |
 42 | 		movestmt |
 43 | 		numberstmt |
 44 | 		pagestmt |
 45 | 		pausestmt |
 46 | 		peekstmt |
 47 | 		peekintstmt |
 48 | 		pokestmt |
 49 | 		pokeintstmt |
 50 | 		portstmt |
 51 | 		printstmt |
 52 | 		randstmt |
 53 | 		readstmt |
 54 | 		renamestmt |
 55 | 		returnstmt |
 56 | 		savestmt |
 57 | 		serialstmt |
 58 | 		sizestmt |
 59 | 		soundstmt |
 60 | 		stringstmt |
 61 | 		waitkeystmt ;
 62 | 
 63 | (* Oh boy... *)
 64 | alertstmt = ALERT , string ;
 65 | askfilestmt = ASKFILE , STRING_VARIABLE ;
 66 | breakstmt = BREAK ;
 67 | callstmt = CALL , expr ;
 68 | casestmt = CASE , ( LOWER | UPPER ) , STRING_VARIABLE ;
 69 | clsstmt = CLS ;
 70 | cursorstmt = CURSOR , ( OFF | ON ) ;
 71 | curscharstmt = CURSCHAR , NUMERIC_VARIABLE ;
 72 | curscolstmt = CURSCOL , NUMERIC_VARIABLE ;
 73 | cursposstmt = CURSPOS , NUMERIC_VARIABLE , NUMERIC_VARIABLE ;
 74 | deletestmt = DELETE , string ;
 75 | endstmt = END ;
 76 | filesstmt = FILES ;
 77 | getkeystmt = GETKEY, NUMERIC_VARIABLE ;
 78 | gosubstmt = GOSUB , LABEL ;
 79 | gotostmt = GOTO , LABEL ;
 80 | includestmt = INCLUDE , STRING_LITERAL ;
 81 | inkstmt = INK , numeric ;
 82 | inputstmt = INPUT , variable ;
 83 | lenstmt = LEN , string , NUMERIC_VARIABLE ;
 84 | listboxstmt = LISTBOX , string , string , string , NUMERIC_VARIABLE ;
 85 | loadstmt = LOAD , string , numeric ;
 86 | movestmt = MOVE , numeric , numeric ;
 87 | numberstmt = numberstrtonum | numbernumtostr ;
 88 | pagestmt = PAGE , numeric , numeric ;
 89 | pausestmt = PAUSE , numeric ;
 90 | peekstmt = PEEK , NUMERIC_VARIABLE , numeric ;
 91 | peekintstmt = PEEKINT , NUMERIC_VARIABLE , numeric ;
 92 | pokestmt = POKE , numeric , numeric ;
 93 | pokeintstmt = POKEINT , numeric , numeric ;
 94 | portstmt = portin | portout ;
 95 | printstmt = PRINT , ( CHR | HEX ) , expr , ( ";" ) ;
 96 | randstmt = RAND , NUMERIC_VARIABLE , numeric , numeric ;
 97 | readstmt = READ , LABEL , numeric , NUMERIC_VARIABLE ;
 98 | renamestmt = RENAME , string , string ;
 99 | returnstmt = RETURN ;
100 | savestmt = SAVE , string , numeric , numeric ;
101 | serialstmt = serialon | serialsend | serialrec ;
102 | sizestmt = SIZE , string ;
103 | soundstmt = SOUND , numeric , numeric ;
104 | stringstmt = STRING , ( GET | SET ) , STRING_VARIABLE , numeric , NUMERIC_VARIABLE ;
105 | waitkeystmt = WAITKEY , NUMERIC_VARIABLE ;
106 | 
107 | (* Helpers for keyword rules *)
108 | numberstrtonum = NUMBER , STRING_VARIABLE , NUMERIC_VARIABLE ;
109 | numbernumtostr = NUMBER , NUMERIC_VARIABLE , STRING_VARIABLE ;
110 | portin = PORT , IN , numeric , NUMERIC_VARIABLE ;
111 | portout = PORT , OUT , numeric , numeric ;
112 | serialon = SERIAL , ON , numeric ; (* "1200" | "9600" *)
113 | serialsend = SERIAL , SEND , numeric ;
114 | serialrec = SERIAL , REC , NUMERIC_VARIABLE ;
115 | 
116 | (* This rules are used everywhere (NOTE: here are most of the differences from MikeOS implementation) *)
117 | boolexpr = cond , { "AND" , cond } ;
118 | cond = variable , ( "=" | "<" | ">" | "!=" ) , expr ;
119 | expr = primary , { ( "+" | "-" | "*" | "/" | "%" ) , primary } ;
120 | primary = ( "&" ) , variable | variable | literal | keywordval ;
121 | 
122 | (* Those are simplifying other rules *)
123 | keywordval = PROGSTART | RAMSTART | VARIABLES | VERSION | TIMER | INK ;
124 | literal = STRING_LITERAL | NUMERIC_LITERAL | CHARACTER_LITERAL ;
125 | string = STRING_VARIABLE | STRING_LITERAL ;
126 | numeric = NUMERIC_VARIABLE | NUMERIC_LITERAL | ( "&" ) , variable | keywordval ;
127 | variable = NUMERIC_VARIABLE | STRING_VARIABLE ;
128 | 


--------------------------------------------------------------------------------
/src/back/codegen.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdio.h>
  8 | #include <stdbool.h>
  9 | 
 10 | /* Custom includes */
 11 | #include <lexer.h>
 12 | #include <parser.h>
 13 | #include <table.h>
 14 | #include <codegen.h>
 15 | #include <runtime.h>
 16 | #include <util.h>
 17 | 
 18 | static PatchTable* patches;
 19 | static SymbolTable* symbols;
 20 | 
 21 | void compile_error(const char* msg, Node* current)
 22 | {
 23 | 	printf("\x1B[31mError (codegen)\x1B[0m: %s at line: %d.\n", msg,
 24 | 		current->line);
 25 | 	raise_error();
 26 | }
 27 | 
 28 | /* =========================== COMPILER FUNCTIONS =========================== */
 29 | void compile_assign(Node* ast, CompileTarget* code)
 30 | {
 31 | 	bool expr = compile_expression(ast->op2, code);
 32 | 	bool type = (ast->op1->attribute == TOKEN_NUMERIC_VARIABLE) ?
 33 | 			true : false;
 34 | 
 35 | 	/* Check typing */
 36 | 	if (expr != type) {
 37 | 		compile_error("Assigning invalid type", ast);
 38 | 		return;
 39 | 	}
 40 | 
 41 | 	/* It is a numeric assignment */
 42 | 	if (expr) {
 43 | 		uint16_t addr = VARS + ast->op1->val * 2;
 44 | 		emit_byte(code, 0x89);	/* MOV */
 45 | 		emit_byte(code, 0x06);	/* [addr], AX */
 46 | 		emit_word(code, addr);
 47 | 	}
 48 | 	else {
 49 | 		uint16_t addr = STRVARS + ast->op1->val * 128;
 50 | 		emit_byte(code, 0xC7);	/* MOV */
 51 | 		emit_byte(code, 0xC7);	/* DI, */
 52 | 		emit_word(code, addr);	/* addr */
 53 | 
 54 | 		/* Copy string into variable */
 55 | 		emit_call(code, 0x0039);
 56 | 	}
 57 | }
 58 | 
 59 | void compile_if(Node* ast, CompileTarget* code)
 60 | {
 61 | 	/* Compile condition */
 62 | 	compile_expression(ast->op1, code);
 63 | 
 64 | 	/* If it is false skip THEN branch */
 65 | 	emit_byte(code, 0x85);		/* TEST */
 66 | 	emit_byte(code, 0xC0);		/* AX, AX */
 67 | 	emit_byte(code, 0x0F);		/* JZ NEAR */
 68 | 	emit_byte(code, 0x84);
 69 | 	emit_word(code, 0x0000);	/* To patch up */
 70 | 
 71 | 	int patch = code->length - 2;
 72 | 
 73 | 	/* Compile THEN branch */
 74 | 	compile_ast(ast->op2->op1, code);
 75 | 
 76 | 	/* Jump over ELSE branch (and patch the jump) */
 77 | 	emit_byte(code, 0xE9);		/* JMP NEAR */
 78 | 	emit_word(code, 0x0000);	/* To patch up */
 79 | 	uint16_t rel = code->length - (patch + 2);
 80 | 	code->code[patch] = (uint8_t) rel & 0xFF;
 81 | 	code->code[patch + 1] = (uint8_t) (rel >> 8) & 0xFF;
 82 | 
 83 | 	/* New patch will be needed */
 84 | 	patch = code->length - 2;
 85 | 
 86 | 	/* Compile ELSE branch */
 87 | 	compile_ast(ast->op2->op2, code);
 88 | 	rel = code->length - (patch + 2);
 89 | 	code->code[patch] = (uint8_t) rel & 0xFF;
 90 | 	code->code[patch + 1] = (uint8_t) (rel >> 8) & 0xFF;
 91 | }
 92 | 
 93 | void compile_do(Node* ast, CompileTarget* code)
 94 | {
 95 | 	/* Compile the body first */
 96 | 	uint16_t start = code->length;
 97 | 	compile_ast(ast->op2->op1, code);
 98 | 
 99 | 	/* Now we need to check if there is a condition */
100 | 	if (ast->op1 == NULL)
101 | 		emit_jump(code, LOAD + start);	/* Just loop endlessly */
102 | 	else {
103 | 		/* Check modifier */
104 | 		TokenType mod = ast->op2->op2->attribute;
105 | 
106 | 		/* Compile condition */
107 | 		compile_expression(ast->op1, code);
108 | 		uint16_t rel = start - (code->length + 6);
109 | 
110 | 		/* Pick what to do next */
111 | 		if (mod == TOKEN_WHILE) {
112 | 			emit_byte(code, 0x85);		/* TEST */
113 | 			emit_byte(code, 0xC0);		/* AX, AX */
114 | 			emit_byte(code, 0x0F);		/* JNE NEAR */
115 | 			emit_byte(code, 0x85);
116 | 			emit_word(code, rel);
117 | 		}
118 | 		else {
119 | 			emit_byte(code, 0x85);		/* TEST */
120 | 			emit_byte(code, 0xC0);		/* AX, AX */
121 | 			emit_byte(code, 0x0F);		/* JZ NEAR */
122 | 			emit_byte(code, 0x84);
123 | 			emit_word(code, rel);
124 | 		}
125 | 	}
126 | }
127 | 
128 | void compile_for(Node* ast, CompileTarget* code)
129 | {
130 | 	int var_num = ast->op1->op1->val;
131 | 	uint16_t var = VARS + var_num * 2;
132 | 
133 | 	/* Compile initializer */
134 | 	compile_assign(ast->op1, code);
135 | 
136 | 	/* Save where to jump */
137 | 	uint16_t start = code->length;
138 | 
139 | 	/* Compile the body and NEXT */
140 | 	compile_ast(ast->op2->op2, code);
141 | 	emit_byte(code, 0xFF);				/* INC */
142 | 	emit_byte(code, 0x06);				/* [imm16] */
143 | 	emit_word(code, var);
144 | 
145 | 	/* Compile "TO" field */
146 | 	bool to = compile_expression(ast->op2->op1, code);
147 | 	if (!to) {
148 | 		compile_error("Type error in FOR TO field", ast);
149 | 		return;
150 | 	}
151 | 	emit_byte(code, 0x8B);				/* MOV */
152 | 	emit_byte(code, 0xD8);				/* BX, AX */
153 | 
154 | 	/* Perform bound check */
155 | 	emit_byte(code, 0x8B);				/* MOV */
156 | 	emit_byte(code, 0x06);				/* AX, */
157 | 	emit_word(code, var);				/* [imm16] */
158 | 	emit_byte(code, 0x3B);				/* CMP */
159 | 	emit_byte(code, 0xD8);				/* BX, AX */
160 | 	emit_byte(code, 0x0F);				/* JGE NEAR */
161 | 	emit_byte(code, 0x8D);
162 | 
163 | 	uint16_t rel = start - (code->length + 2);
164 | 	emit_word(code, rel);
165 | }
166 | 
167 | /* =========================== MAIN CODE GENERATOR ========================== */
168 | typedef void (*CompileFuncPtr)(Node*, CompileTarget*);
169 | static CompileFuncPtr node_compiler[] = {
170 | 	[NODE_ASSIGN] = compile_assign,
171 | 	[NODE_EXPR] = NULL,	/* Expression is an invalid statement anyways */
172 | 	[NODE_VARIABLE] = NULL,	/* And so is a variable */
173 | 	[NODE_LITERAL] = NULL,	/* Or a literal */
174 | 	[NODE_LABEL] = NULL,	/* And a label too */
175 | 	[NODE_IF] = compile_if,
176 | 	[NODE_DO] = compile_do,
177 | 	[NODE_FOR] = compile_for,
178 | 	[NODE_KEYWORD_CALL] = compile_keyword
179 | };
180 | 
181 | /* Generate code proper, with no prologue */
182 | void compile_ast(Node* ast, CompileTarget* code)
183 | {
184 | 	/* When you hit empty node, just return */
185 | 	if (ast == NULL)
186 | 		return;
187 | 
188 | 	patch_jumps(code, patches, symbols, ast);
189 | 
190 | 	/* Now select what to do */
191 | 	if (ast->type == NODE_SEQUENCE) {
192 | 		compile_ast(ast->op1, code);
193 | 		compile_ast(ast->op2, code);
194 | 	}
195 | 	else {
196 | 		CompileFuncPtr rule = node_compiler[ast->type];
197 | 		rule(ast, code);
198 | 	}
199 | }
200 | 
201 | void compile(Node* ast, CompileTarget* code, StringTable* str, SymbolTable* t)
202 | {
203 | 	PatchTable p;
204 | 	init_patch(&p);
205 | 	patches = &p;
206 | 	symbols = t;
207 | 
208 | 	init_expr_compiler(str);
209 | 	init_kword_compiler(str, t, &p);
210 | 	make_entry(code, str);
211 | 	compile_ast(ast, code);
212 | 
213 | 	/* If program doesn't have END, add one */
214 | 	if ((uint8_t) code->code[code->length - 1] != 0xC3)
215 | 		make_exit(code);
216 | 
217 | 	/* Fix RAMSTART */
218 | 	uint16_t ramstart = LOAD + code->length;
219 | 	code->code[RAMSTART - LOAD] = (uint8_t) ramstart & 0xFF;
220 | 	code->code[RAMSTART + 1 - LOAD] = (uint8_t) (ramstart >> 8) & 0xFF;
221 | 
222 | 	free_patch(&p);
223 | 	patches = NULL;
224 | }
225 | 


--------------------------------------------------------------------------------
/src/back/runtime.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Custom includes */
  7 | #include <table.h>
  8 | #include <codegen.h>
  9 | 
 10 | void make_exit(CompileTarget* code)
 11 | {
 12 | 	/* Reset active page */
 13 | 	emit_byte(code, 0xC7);		/* MOV */
 14 | 	emit_byte(code, 0xC0);		/* AX, */
 15 | 	emit_word(code, 0x0500);	/* AH = 5, AL = 0 */
 16 | 	emit_byte(code, 0xCD);		/* INT */
 17 | 	emit_byte(code, 0x10);		/* 0x10 */
 18 | 
 19 | 	/* Rewind stack */
 20 | 	emit_byte(code, 0x8B);		/* MOV */
 21 | 	emit_byte(code, 0xE5);		/* SP, BP */
 22 | 
 23 | 	/* Exit the program */
 24 | 	emit_byte(code, 0xC3);		/* RET */
 25 | }
 26 | 
 27 | /* Length of this handler: 4 + 3 + 9 + 32 = 48 bytes */
 28 | void zero_divide_handler(CompileTarget* code)
 29 | {
 30 | 	/* Print message (it is 2 + 3 + 3 = 8 bytes) */
 31 | 	emit_byte(code, 0xC7);				/* MOV */
 32 | 	emit_byte(code, 0xC6);				/* SI, */
 33 | 	emit_word(code, LOAD + code->length + 8);	/* imm16 */
 34 | 
 35 | 	/* Call os_print_string */
 36 | 	emit_call(code, 0x0003);
 37 | 
 38 | 	/* Exit */
 39 | 	make_exit(code);
 40 | 
 41 | 	emit_string(code, "BASIC Runtime: Division by zero");
 42 | }
 43 | 
 44 | /* Length of this handler: 1 + 4 + 3 + 2 + 1 + 3 * 2 + 3 = 20 bytes */
 45 | void add_strings(CompileTarget* code)
 46 | {
 47 | 	emit_byte(code, 0x57);		/* PUSH DI */
 48 | 	emit_byte(code, 0xC7);		/* MOV */
 49 | 	emit_byte(code, 0xC7);		/* DI, */
 50 | 	emit_word(code, STRBUF);	/* imm16 */
 51 | 
 52 | 	/* CALL os_string_copy */
 53 | 	emit_call(code, 0x0039);
 54 | 
 55 | 	emit_byte(code, 0x8B);		/* MOV */
 56 | 	emit_byte(code, 0xF7);		/* SI, DI */
 57 | 
 58 | 	emit_byte(code, 0x5F);		/* POP DI */
 59 | 
 60 | 	/* Now SI = buffer with old SI, DI = 2nd string */
 61 | 	emit_byte(code, 0x8B);		/* MOV */
 62 | 	emit_byte(code, 0xC6);		/* AX, SI */
 63 | 	emit_byte(code, 0x8B);		/* MOV */
 64 | 	emit_byte(code, 0xDF);		/* BX, DI */
 65 | 	emit_byte(code, 0x8B);		/* MOV */
 66 | 	emit_byte(code, 0xCE);		/* CX, SI */
 67 | 
 68 | 	/* CALL os_string_join */
 69 | 	emit_call(code, 0x003F);
 70 | 
 71 | 	/* Return from handler */
 72 | 	emit_byte(code, 0xC3);
 73 | }
 74 | 
 75 | /* Length of this handler: 80 bytes */
 76 | void print_string(CompileTarget* code)
 77 | {
 78 | 	/* Prepare to enter print loop (19 bytes) */
 79 | 	emit_byte(code, 0xC7);		/* MOV */
 80 | 	emit_byte(code, 0xC0);		/* AX, */
 81 | 	emit_word(code, 0x0900);	/* 0x900 -> AH = 9 */
 82 | 	emit_byte(code, 0x8B);		/* MOV */
 83 | 	emit_byte(code, 0x1E);		/* BX, */
 84 | 	emit_word(code, WORKPAGE);	/* [WORKPAGE] */
 85 | 	emit_byte(code, 0xC1);		/* SHL */
 86 | 	emit_byte(code, 0xE3);		/* BX, */
 87 | 	emit_byte(code, 0x08);		/* 8 */
 88 | 	emit_byte(code, 0x0B);		/* OR */
 89 | 	emit_byte(code, 0x1E);		/* BX, */
 90 | 	emit_word(code, INKADDR);	/* [INKADDR] */
 91 | 	emit_byte(code, 0xC7);		/* MOV */
 92 | 	emit_byte(code, 0xC1);		/* CX, */
 93 | 	emit_word(code, 0x0001);	/* 1 */
 94 | 
 95 | 	/* Loop itself (11 bytes) */
 96 | 	emit_byte(code, 0xAC);		/* LODSB */
 97 | 	emit_byte(code, 0x3D);		/* CMP AX, */
 98 | 	emit_word(code, 0x0900);	/* 0x900 -> AL = 0 */
 99 | 	emit_byte(code, 0x74);		/* JE */
100 | 	emit_byte(code, 0x35);		/* To the end (+53) */
101 | 	emit_byte(code, 0x3D);		/* CMP AX, */
102 | 	emit_word(code, 0x090A);	/* 0x90A -> AL = 0x0A */
103 | 	emit_byte(code, 0x74);		/* JE */
104 | 	emit_byte(code, 0x16);		/* To newline (+22) */
105 | 
106 | 	/* Print character (2 bytes) */
107 | 	emit_byte(code, 0xCD);		/* INT */
108 | 	emit_byte(code, 0x10);		/* 0x10 */
109 | 
110 | 	/* Get cursor postion and adjust (20 bytes) */
111 | 	emit_byte(code, 0x50);		/* PUSH AX */
112 | 	emit_byte(code, 0x51);		/* PUSH CX */
113 | 	emit_byte(code, 0xC7);		/* MOV */
114 | 	emit_byte(code, 0xC0);		/* AX, */
115 | 	emit_word(code, 0x0300);	/* AH = 3 */
116 | 	emit_byte(code, 0xCD);		/* INT */
117 | 	emit_byte(code, 0x10);		/* 0x10 */
118 | 	emit_byte(code, 0xFF);		/* INC */
119 | 	emit_byte(code, 0xC2);		/* DX */
120 | 	emit_byte(code, 0xC7);		/* MOV */
121 | 	emit_byte(code, 0xC0);		/* AX, */
122 | 	emit_word(code, 0x0200);	/* AH = 2 */
123 | 	emit_byte(code, 0xCD);		/* INT */
124 | 	emit_byte(code, 0x10);		/* 0x10 */
125 | 	emit_byte(code, 0x59);		/* POP CX */
126 | 	emit_byte(code, 0x58);		/* POP AX */
127 | 	emit_byte(code, 0xEB);		/* JMP */
128 | 	emit_byte(code, 0xDF);		/* Back to the loop (-33) */
129 | 
130 | 	/* We got 0x0A, move to next line (26 bytes) */
131 | 	emit_byte(code, 0x50);		/* PUSH AX */
132 | 	emit_byte(code, 0x53);		/* PUSH BX */
133 | 	emit_byte(code, 0xC7);		/* MOV */
134 | 	emit_byte(code, 0xC0);		/* AX, */
135 | 	emit_word(code, 0x0500);	/* AH = 5 */
136 | 	emit_byte(code, 0x0B);		/* OR */
137 | 	emit_byte(code, 0x06);		/* AX, */
138 | 	emit_word(code, WORKPAGE);	/* [WORKPAGE] */
139 | 	emit_byte(code, 0xCD);		/* INT */
140 | 	emit_byte(code, 0x10);		/* 0x10 */
141 | 	emit_byte(code, 0x8B);		/* MOV */
142 | 	emit_byte(code, 0x1E);		/* BX, */
143 | 	emit_word(code, WORKPAGE);	/* [WORKPAGE] */
144 | 	emit_byte(code, 0xC1);		/* SHL */
145 | 	emit_byte(code, 0xE3);		/* BX, */
146 | 	emit_byte(code, 0x08);		/* 8 */
147 | 	/* CALL os_print_newline */
148 | 	emit_call(code, 0x000F);
149 | 	emit_byte(code, 0x5B);		/* POP BX */
150 | 	emit_byte(code, 0x58);		/* POP AX */
151 | 	emit_byte(code, 0xEB);		/* JMP */
152 | 	emit_byte(code, 0xC5);		/* Back to the loop (-59) */
153 | 
154 | 	/* Return from the handler and padding for alignment (2 bytes) */
155 | 	emit_byte(code, 0xC3);		/* RET */
156 | 	emit_byte(code, 0x90);		/* NOP */
157 | }
158 | 
159 | void make_entry(CompileTarget* code, StringTable* strings)
160 | {
161 | 	int len = strings->blob_len;
162 | 	int rel = RUNTIMELEN + len;		/* Runtime + strings */
163 | 
164 | 	/* Jump over runtime and strings */
165 | 	emit_jump(code, LOAD + rel);
166 | 
167 | 	/* Runtime functions */
168 | 	add_strings(code);
169 | 	zero_divide_handler(code);
170 | 	print_string(code);
171 | 
172 | 	/* Empty places for different values */
173 | 	emit_word(code, 0x0007);	/* INK (default 7) */
174 | 	emit_word(code, 0x0000);	/* RAMSTART (codegen will fill it) */
175 | 	emit_word(code, 0x0000);	/* WORKPAGE */
176 | 	emit_word(code, 0x0000);	/* ACTIVEPAGE */
177 | 
178 | 	/* Write out string table */
179 | 	for (int i = 0; i < len; i++)
180 | 		emit_byte(code, strings->blob[i]);
181 | 
182 | 	/* Clear out numeric variables */
183 | 	emit_byte(code, 0x33);		/* XOR */
184 | 	emit_byte(code, 0xC0);		/* AX, AX */
185 | 	emit_byte(code, 0xC7);		/* MOV */
186 | 	emit_byte(code, 0xC7);		/* DI, */
187 | 	emit_word(code, VARS);		/* VARS */
188 | 	emit_byte(code, 0xC7);		/* MOV */
189 | 	emit_byte(code, 0xC1);		/* CX, */
190 | 	emit_word(code, 0x002E);	/* 46 */
191 | 	emit_byte(code, 0xF3);		/* REP */
192 | 	emit_byte(code, 0xAA);		/* STOSB */
193 | 
194 | 	/* Clear out string variables */
195 | 	emit_byte(code, 0xC7);		/* MOV */
196 | 	emit_byte(code, 0xC7);		/* DI, */
197 | 	emit_word(code, STRVARS);	/* STRVARS */
198 | 	emit_byte(code, 0xC7);		/* MOV */
199 | 	emit_byte(code, 0xC1);		/* CX, */
200 | 	emit_word(code, 0x0400);	/* 1024 */
201 | 	emit_byte(code, 0xF3);		/* REP */
202 | 	emit_byte(code, 0xAA);		/* STOSB */
203 | 
204 | 	/* Setup stack */
205 | 	emit_byte(code, 0x8B);		/* MOV */
206 | 	emit_byte(code, 0xEC);		/* BP, SP */
207 | }
208 | 


--------------------------------------------------------------------------------
/docs/parsing_theory.md:
--------------------------------------------------------------------------------
  1 | # Table of contents
  2 | 
  3 | - [About this parser](#about-this-parser)
  4 | - [Main parsing function](#main-parsing-function)
  5 | - [Statements](#statements)
  6 | - [Assignments](#assignments)
  7 | - [IF statements](#if-statements)
  8 | - [DO loops](#do-loops)
  9 | - [FOR loops](#for-loops)
 10 | - [Keyword statements](#keyword-statements)
 11 | - [Labels](#labels)
 12 | 
 13 | ---
 14 | 
 15 | ## About this parser
 16 | 
 17 | It is a [recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser),
 18 | strictly following specified grammar.
 19 | 
 20 | Every function corresponds to one rule (although helpers are often put in one
 21 | function). They take no arguments (besides `parse()`, see below) and return
 22 | `Node*`. This return value is AST for parsed rule.
 23 | 
 24 | In case of an error, each and every function has the moral duty to clean up
 25 | after itself (so free every node initialized) and return `NULL`. No successful
 26 | parsing will result in the return of `NULL` pointer.
 27 | 
 28 | And one last thing about grammar itself: if you look very closely at the parsing
 29 | code, you will see that it is not, in fact, context free. This is due to the way
 30 | FOR loops are parsed. For more info read [here](#for-loops).
 31 | 
 32 | ---
 33 | 
 34 | ## Main parsing function
 35 | 
 36 | Parsing obviously starts by calling `parse()` with 2 arguments: *symbol table*
 37 | and *string table*. Those will be filled with entries as the parser goes through
 38 | the code. Both arguments are stored in global variables, and are not moved (it
 39 | is pointers don't change, only values under those pointers).
 40 | 
 41 | The way `parse()` works is pretty simple: while it has anything to parse (isn't
 42 | at the EOF) it calls `statement()` and appends it to its tree of nodes, linked
 43 | via `NODE_SEQUENCE`.
 44 | 
 45 | ## Statements
 46 | 
 47 | Real parsing starts in `statement()` because, in this BASIC, **program is just**
 48 | **a list of statements**. There are only a few types of statements, namely:
 49 | - Assignment [(more info)](#assignments)
 50 | - IF statement [(more info)](#if-statements)
 51 | - DO loops [(more info)](#do-loops)
 52 | - FOR loops [(more info)](#for-loops)
 53 | - Keyword statement [(more info)](#keyword-statements)
 54 | - Label statement [(more info)](#labels)
 55 | 
 56 | Now what happens, is that looking only at the first token of the entire
 57 | statement we can tell which type it is, so we only need one token of lookahead.
 58 | So `statement()`'s only job is to call correct function by looking at next
 59 | token, and sometimes throw out an error if it doesn't match any of these.
 60 | 
 61 | ---
 62 | 
 63 | ## Assignments
 64 | 
 65 | Assignments are handled by an intuitively named function: `assign()`. It works
 66 | like this:
 67 | 1. Consume a variable. This is assignment's *target*.
 68 | 2. Consume `TOKEN_EQUALS`.
 69 | 3. Parse following expression. It becomes the *value*.
 70 | 
 71 | It is really that simple, although as you may have noted, we don't check
 72 | validity of assignment, that is, typing. We will do it during the compilation.
 73 | 
 74 | **Important**: Assignments are statements, not expressions, so you can't chain
 75 | them like in C (for example `a = b = c;`). It is consistent with MikeOS'
 76 | implementation, yet it is still worth a note.
 77 | 
 78 | ## IF statements
 79 | 
 80 | IF statements are parsed in `if_stmt()` function. Its rough description is:
 81 | 1. Consume `TOKEN_IF`.
 82 | 2. Parse boolean expression. This will be our *condition*.
 83 | 3. Consume `TOKEN_THEN`.
 84 | 4. Parse **exactly 1 (one)** statement. It is a *then branch*.
 85 | 5. Check if there is `TOKEN_ELSE`. If there is, proceed. Else parsing is done.
 86 | 6. Consume said token.
 87 | 7. Parse **exactly 1 (one)** statement. Now it becomes an *else branch*.
 88 | 
 89 | Only thing that should be noted is how each branch is only **one** statement.
 90 | There is no C-like construct like block in here, and given that IF is not
 91 | followed by some sort of "FI" keyword, there is no way of knowing where branch
 92 | ends.
 93 | 
 94 | ## DO loops
 95 | 
 96 | Parsed by `do_stmt()`; this is an algorithm for it:
 97 | 1. Consume `TOKEN_DO`.
 98 | 2. Until you hit `TOKEN_EOF` (meaning an error) or `TOKEN_LOOP` (meaning the
 99 | loop's body is finished), parse statements, building tree of them. This tree is
100 | the *body*.
101 | 3. Consume `TOKEN_LOOP`.
102 | 4. Consume loop's *modifier*.
103 | 
104 | Code for this really straightforward, but you have to be careful to stop
105 | parsing when you hit `TOKEN_EOF`, or else an invalid syntax can cause an
106 | infinite loop in your parser.
107 | 
108 | ## FOR loops
109 | 
110 | Parsed by `for_stmt()`. As always, an algorithm:
111 | 1. Consume `TOKEN_FOR`.
112 | 2. Call `assign()`. Returned node is loop's *initializer*.
113 | 3. Consume `TOKEN_TO`.
114 | 4. Parse expression. This is *to field*.
115 | 5. Until you hit `TOKEN_EOF` (meaning an error) or `TOKEN_NEXT` (meaning the
116 | loop's body is finished), parse statements, building tree of them. This tree is
117 | the *body*.
118 | 6. Consume `TOKEN_NEXT`.
119 | 7. Consume a variable. Compare it to initializer's target, and if they don't
120 | match, raise an error.
121 | 
122 | **Important**: We compare the target of initializer and variable after `NEXT`
123 | *at parse time* rather than during compiling. There is no particular difference
124 | in where it happens, as it will get reported anyways. Also, for this check to
125 | happen we need to ensure that initializer is not `NULL`, or else parser will
126 | seg-fault on it.
127 | **Additional note**: Due to this check, parser becomes not context-free (or at
128 | least I think).
129 | 
130 | ## Keyword statements
131 | 
132 | Keywords are handled in [separate file](../src/front/keyword_parser.c), for
133 | clarity of code. Main function there is, unsurprisingly, `parse_keyword()`,
134 | which looks up next token in a dispatch table, handing parser to found function.
135 | All of those `NULL` entries correspond to tokens that should have been handled
136 | elsewhere, like `TOKEN_IF` or `TOKEN_DO`. If we meet them then something went
137 | very wrong and we report an error.
138 | 
139 | How all of those functions work individually is too much to describe here, but
140 | none of them is really anything more than simply direct implementation of its
141 | grammar rule.
142 | 
143 | ## Labels
144 | 
145 | How labels work is actually very educative, and it is because of quite peculiar
146 | thing about MikeOS' BASIC: you can call *forward* in code.
147 | 
148 | It is even more interesting given that it's a feature not present in most of
149 | older languages (like C or Pascal), so it was to be expected for BASIC to not
150 | support it, but well, it does.
151 | 
152 | So now, how it is implemented: each label is kept in **symbol table**, and has
153 | a flag attached to it: **the "real" flag**. As we parse, we can add *real* and
154 | *unreal* entries to the table. The distinction is:
155 | - Real entries represent labels **actually found** in the code.
156 | - Unreal entries represent labels that are **only targets** for jumps.
157 | 
158 | Now, this table has 2 functions:
159 | - `add_unreal_symbol()` adds the symbol, but doesn't set its real flag. If the
160 | entry is already present, then it does nothing.
161 | - `add_real_symbol()` adds the symbol, and sets its real flag. If the entry is
162 | already present **and is unreal**, then set its real flag. If it is present and
163 | real, does nothing.
164 | 
165 | So now after parsing whole file, during compiling, when we hit `GOTO` or
166 | `GOSUB`, we check if its target is real or not. If it is not, then it means we
167 | found reference to undefined label, and call for an error. Effectively what we
168 | do is **delay checking label's presence**.
169 | 


--------------------------------------------------------------------------------
/src/front/lexer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <stdbool.h>
 10 | #include <ctype.h>
 11 | #include <string.h>
 12 | 
 13 | /* Custom includes */
 14 | #include <lexer.h>
 15 | #include <util.h>
 16 | 
 17 | /* List of all keywords */
 18 | const char* keywords_names[] = {
 19 | 	[TOKEN_ALERT] = "ALERT",
 20 | 	[TOKEN_AND] = "AND",
 21 | 	[TOKEN_ASKFILE] = "ASKFILE",
 22 | 	[TOKEN_BREAK] = "BREAK",
 23 | 	[TOKEN_CALL] = "CALL",
 24 | 	[TOKEN_CASE] = "CASE",
 25 | 	[TOKEN_CHR] = "CHR",
 26 | 	[TOKEN_CLS] = "CLS",
 27 | 	[TOKEN_CURSOR] = "CURSOR",
 28 | 	[TOKEN_CURSCHAR] = "CURSCHAR",
 29 | 	[TOKEN_CURSCOL] = "CURSCOL",
 30 | 	[TOKEN_CURSPOS] = "CURSPOS",
 31 | 	[TOKEN_DELETE] = "DELETE",
 32 | 	[TOKEN_DO] = "DO",
 33 | 	[TOKEN_ELSE] = "ELSE",
 34 | 	[TOKEN_END] = "END",
 35 | 	[TOKEN_ENDLESS] = "ENDLESS",
 36 | 	[TOKEN_FILES] = "FILES",
 37 | 	[TOKEN_FOR] = "FOR",
 38 | 	[TOKEN_GET] = "GET",
 39 | 	[TOKEN_GOSUB] = "GOSUB",
 40 | 	[TOKEN_GOTO] = "GOTO",
 41 | 	[TOKEN_GETKEY] = "GETKEY",
 42 | 	[TOKEN_HEX] = "HEX",
 43 | 	[TOKEN_IF] = "IF",
 44 | 	[TOKEN_IN] = "IN",
 45 | 	[TOKEN_INCLUDE] = "INCLUDE",
 46 | 	[TOKEN_INK] = "INK",
 47 | 	[TOKEN_INPUT] = "INPUT",
 48 | 	[TOKEN_LEN] = "LEN",
 49 | 	[TOKEN_LISTBOX] = "LISTBOX",
 50 | 	[TOKEN_LOAD] = "LOAD",
 51 | 	[TOKEN_LOOP] = "LOOP",
 52 | 	[TOKEN_LOWER] = "LOWER",
 53 | 	[TOKEN_MOVE] = "MOVE",
 54 | 	[TOKEN_NEXT] = "NEXT",
 55 | 	[TOKEN_NUMBER] = "NUMBER",
 56 | 	[TOKEN_OFF] = "OFF",
 57 | 	[TOKEN_ON] = "ON",
 58 | 	[TOKEN_OUT] = "OUT",
 59 | 	[TOKEN_PAGE] = "PAGE",
 60 | 	[TOKEN_PAUSE] = "PAUSE",
 61 | 	[TOKEN_PEEK] = "PEEK",
 62 | 	[TOKEN_PEEKINT] = "PEEKINT",
 63 | 	[TOKEN_POKE] = "POKE",
 64 | 	[TOKEN_POKEINT] = "POKEINT",
 65 | 	[TOKEN_PORT] = "PORT",
 66 | 	[TOKEN_PRINT] = "PRINT",
 67 | 	[TOKEN_PROGSTART] = "PROGSTART",
 68 | 	[TOKEN_RAMSTART] = "RAMSTART",
 69 | 	[TOKEN_RAND] = "RAND",
 70 | 	[TOKEN_READ] = "READ",
 71 | 	[TOKEN_REC] = "REC",
 72 | 	[TOKEN_REM] = "REM",
 73 | 	[TOKEN_RENAME] = "RENAME",
 74 | 	[TOKEN_RETURN] = "RETURN",
 75 | 	[TOKEN_SAVE] = "SAVE",
 76 | 	[TOKEN_SEND] = "SEND",
 77 | 	[TOKEN_SERIAL] = "SERIAL",
 78 | 	[TOKEN_SET] = "SET",
 79 | 	[TOKEN_SIZE] = "SIZE",
 80 | 	[TOKEN_SOUND] = "SOUND",
 81 | 	[TOKEN_STRING] = "STRING",
 82 | 	[TOKEN_THEN] = "THEN",
 83 | 	[TOKEN_TIMER] = "TIMER",
 84 | 	[TOKEN_TO] = "TO",
 85 | 	[TOKEN_UNTIL] = "UNTIL",
 86 | 	[TOKEN_UPPER] = "UPPER",
 87 | 	[TOKEN_VARIABLES] = "VARIABLES",
 88 | 	[TOKEN_VERSION] = "VERSION",
 89 | 	[TOKEN_WAITKEY] = "WAITKEY",
 90 | 	[TOKEN_WHILE] = "WHILE"
 91 | };
 92 | 
 93 | Lexer lexer;
 94 | bool wait;
 95 | Token pending;
 96 | 
 97 | /* =========================== INTERNAL FUNCTIONS =========================== */
 98 | void include(const char* new_fname)
 99 | {
100 | 	/* Read source code of included file */
101 | 	char* src = read_file(new_fname);
102 | 
103 | 	/* Calculate new length and allocate */
104 | 	int len = strlen(lexer.source) + strlen(src) + 2;
105 | 	char* new_source = malloc(len);
106 | 
107 | 	/* Copy  */
108 | 	strcpy(new_source, lexer.source);
109 | 	strcat(new_source, "\n");
110 | 	strcat(new_source, src);
111 | 
112 | 	/* Free old source and set new */
113 | 	free((char*) lexer.source);
114 | 	lexer.source = new_source;
115 | }
116 | 
117 | /* Consider NUL and operators as whitespace */
118 | bool iswhite(char c)
119 | {
120 | 	if (isspace(c))
121 | 		return true;
122 | 	else if (c == '\0')
123 | 		return true;
124 | 	else if (c == '+' || c == '-' || c == '*' || c == '/' || c == '=' ||
125 | 		c == '\'' || c == '"' || c == '!' || c == '>' || c == '<' ||
126 | 		c == '%' || c == '&' || c == ';')
127 | 		return true;
128 | 
129 | 	return false;
130 | }
131 | 
132 | void lex_error(const char* str)
133 | {
134 | 	printf("\x1B[31mError (lex)\x1B[0m: %s at line: %d.\n", str,
135 | 		lexer.line);
136 | 	raise_error();
137 | }
138 | 
139 | Token init_token(TokenType t)
140 | {
141 | 	Token ret;
142 | 	ret.type = t;
143 | 	ret.text = lexer.beginning;
144 | 	ret.length = (int) (lexer.current - lexer.beginning);
145 | 	ret.line = lexer.line;
146 | 
147 | 	return ret;
148 | }
149 | 
150 | TokenType match_keyword(const char* str)
151 | {
152 | 	size_t len = lexer.current - lexer.beginning;
153 | 	char* upper_str = malloc(len);
154 | 	strncpy(upper_str, str, len);
155 | 	string_uppercase(upper_str);
156 | 
157 | 	TokenType ret = TOKEN_IDENTIFIER;
158 | 	for (int i = TOKEN_ALERT; i <= TOKEN_WHILE; i++) {
159 | 		/* If lengths are different then surely not */
160 | 		if (strlen(keywords_names[i]) != len)
161 | 			continue;
162 | 
163 | 		/* Now compare */
164 | 		if (!strncmp(upper_str, keywords_names[i], len))
165 | 			ret = i;
166 | 	}
167 | 
168 | 	free(upper_str);
169 | 	return ret;
170 | }
171 | 
172 | /* ============================ EXPOSED FUNCTIONS =========================== */
173 | void init_lexer(const char* source)
174 | {
175 | 	lexer.source = source;
176 | 	lexer.beginning = source;
177 | 	lexer.current = source;
178 | 	lexer.line = 1;
179 | 
180 | 	/* First pass is for INCLUDE */
181 | 	while (lookahead().type != TOKEN_EOF) {
182 | 		Token t = get_token();
183 | 		if (t.type == TOKEN_INCLUDE) {
184 | 			t = get_token();
185 | 			if (t.type == TOKEN_STRING_LITERAL) {
186 | 				char* str = malloc(t.length + 1);
187 | 				strncpy(str, t.text, t.length);
188 | 				str[t.length] = '\0';
189 | 
190 | 				include(str);
191 | 			}
192 | 		}
193 | 	}
194 | 
195 | 	/* Reset lexer */
196 | 	lexer.beginning = lexer.source;
197 | 	lexer.current = lexer.source;
198 | 	lexer.line = 1;
199 | 	wait = false;
200 | }
201 | 
202 | Token get_token()
203 | {
204 | 	if (wait) {
205 | 		wait = false;
206 | 		return pending;
207 | 	}
208 | 
209 | 	Token ret;
210 | 
211 | 	next:
212 | 	lexer.beginning = lexer.current;
213 | 	switch (*lexer.current++) {
214 | 		/* If we meet NULL - we ended lexing */
215 | 		case '\0':
216 | 			lexer.current--;	/* Don't move forward! */
217 | 			ret = init_token(TOKEN_EOF);
218 | 			break;
219 | 
220 | 		/* Handle whitespace */
221 | 		case '\n':
222 | 			lexer.line++;
223 | 		case '\r':
224 | 		case '\t':
225 | 		case ' ':
226 | 			goto next;
227 | 
228 | 		/* Handle basic, one character operators - this is easy */
229 | 		case '+':
230 | 			ret = init_token(TOKEN_PLUS);
231 | 			break;
232 | 		case '-':
233 | 			ret = init_token(TOKEN_MINUS);
234 | 			break;
235 | 		case '*':
236 | 			ret = init_token(TOKEN_STAR);
237 | 			break;
238 | 		case '/':
239 | 			ret = init_token(TOKEN_SLASH);
240 | 			break;
241 | 		case '%':
242 | 			ret = init_token(TOKEN_PERCENT);
243 | 			break;
244 | 		case '=':
245 | 			ret = init_token(TOKEN_EQUALS);
246 | 			break;
247 | 		case '>':
248 | 			ret = init_token(TOKEN_GREATER);
249 | 			break;
250 | 		case '<':
251 | 			ret = init_token(TOKEN_SMALLER);
252 | 			break;
253 | 		case '&':
254 | 			ret = init_token(TOKEN_AMPERSAND);
255 | 			break;
256 | 		case ';':
257 | 			ret = init_token(TOKEN_SEMICOLON);
258 | 			break;
259 | 
260 | 		case '!':
261 | 			if (*lexer.current == '=') {
262 | 				lexer.current++;	/* Skip = */
263 | 				ret = init_token(TOKEN_NOT_EQUALS);
264 | 				break;
265 | 			}
266 | 			lex_error("Bang not followed by equal sign");
267 | 			ret = init_token(TOKEN_ERROR);
268 | 			break;
269 | 
270 | 		/* String literals handling */
271 | 		case '"':
272 | 			lexer.beginning++;	/* Skip first " */
273 | 			while (*lexer.current++ != '"')
274 | 				if (*lexer.current == '\n') {
275 | 					lex_error("Newline in string constant");
276 | 					ret = init_token(TOKEN_ERROR);
277 | 					return ret;
278 | 				}
279 | 			lexer.current--;	/* We don't want " in string */
280 | 			ret = init_token(TOKEN_STRING_LITERAL);
281 | 			lexer.current++;	/* Skip ending " */
282 | 			break;
283 | 		case '\'':
284 | 			/* Skip first ' */
285 | 			lexer.beginning++;
286 | 			if (*++lexer.current != '\'') {
287 | 				lex_error("Character constant too long");
288 | 				ret = init_token(TOKEN_ERROR);
289 | 				return ret;
290 | 			}
291 | 			ret = init_token(TOKEN_CHARACTER_LITERAL);
292 | 			lexer.current++;	/* Skip ending ' */
293 | 			break;
294 | 
295 | 		/* String variables */
296 | 		case '$': {
297 | 			char c = *lexer.current;
298 | 			if (c < '1' || c > '8') {
299 | 				lex_error("Invalid string variable");
300 | 				lexer.current++;
301 | 				ret = init_token(TOKEN_ERROR);
302 | 				return ret;
303 | 			}
304 | 			lexer.current++;	/* Take in number */
305 | 			ret = init_token(TOKEN_STRING_VARIABLE);
306 | 			break;
307 | 		}
308 | 
309 | 		default:
310 | 			/* Is it a number? */
311 | 			if (isdigit((unsigned char) *(lexer.current - 1))) {
312 | 				while (isdigit((unsigned char) *lexer.current))
313 | 					lexer.current++;
314 | 
315 | 				ret = init_token(TOKEN_NUMERIC_LITERAL);
316 | 			}
317 | 			/* Is it a numeric variable? */
318 | 			else if (iswhite(*lexer.current)) {
319 | 				if (!isalpha((unsigned char) *(lexer.current - 1))) {
320 | 					lex_error("Invalid numeric variable");
321 | 					ret = init_token(TOKEN_ERROR);
322 | 					return ret;
323 | 				}
324 | 				ret = init_token(TOKEN_NUMERIC_VARIABLE);
325 | 			}
326 | 			/* No! It is a random string! */
327 | 			else {
328 | 				while (!iswhite(*lexer.current))
329 | 					lexer.current++;
330 | 
331 | 				TokenType t = match_keyword(lexer.beginning);
332 | 
333 | 				/* Maybe it was a label? */
334 | 				if (*(lexer.current - 1) == ':') {
335 | 					lexer.current--;	/* Ignore : */
336 | 					ret = init_token(TOKEN_LABEL);
337 | 					lexer.current++;	/* Skip it */
338 | 				}
339 | 
340 | 				/* Or quite possibly comment */
341 | 				else if (t == TOKEN_REM) {
342 | 					while (*lexer.current != '\n')
343 | 						lexer.current++;
344 | 					goto next;
345 | 				}
346 | 				else
347 | 					ret = init_token(t);
348 | 			}
349 | 	}
350 | 
351 | 	return ret;
352 | }
353 | 
354 | Token lookahead()
355 | {
356 | 	pending = get_token();
357 | 	wait = true;
358 | 
359 | 	return pending;
360 | }
361 | 


--------------------------------------------------------------------------------
/src/front/parser.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <stdbool.h>
 10 | #include <ctype.h>
 11 | 
 12 | /* Custom includes */
 13 | #include <parser.h>
 14 | #include <table.h>
 15 | #include <util.h>
 16 | 
 17 | SymbolTable* labels;
 18 | StringTable* strings;
 19 | Token current;
 20 | bool is_current;
 21 | 
 22 | /* ================================= UTILITY ================================ */
 23 | void parse_error(const char* msg)
 24 | {
 25 | 	printf("\x1B[31mError (parse)\x1B[0m: %s at line: %d, got \"%.*s\".\n",
 26 | 		msg, current.line, current.length, current.text);
 27 | 	raise_error();
 28 | }
 29 | 
 30 | /* Check if next token has type t */
 31 | bool match(TokenType t)
 32 | {
 33 | 	Token temp = lookahead();
 34 | 	current = temp;
 35 | 	is_current = false;
 36 | 	if (temp.type == t)
 37 | 		return true;
 38 | 
 39 | 	return false;
 40 | }
 41 | 
 42 | /* Get next token, while saving it (for error reporting) */
 43 | Token scan()
 44 | {
 45 | 	current = get_token();
 46 | 	is_current = true;
 47 | 	return current;
 48 | }
 49 | 
 50 | Node* init_node(NodeType t, Token token, int v, Node* op1, Node *op2)
 51 | {
 52 | 	Node* ret = malloc(sizeof(Node));
 53 | 	ret->type = t;
 54 | 	ret->attribute = token.type;
 55 | 	ret->line = token.line;
 56 | 	ret->val = v;
 57 | 	ret->op1 = op1;
 58 | 	ret->op2 = op2;
 59 | 
 60 | 	return ret;
 61 | }
 62 | 
 63 | void free_node(Node* n)
 64 | {
 65 | 	if (n == NULL)
 66 | 		return;
 67 | 
 68 | 	free_node(n->op1);
 69 | 	free_node(n->op2);
 70 | 
 71 | 	free(n);
 72 | }
 73 | 
 74 | /* Recursively "pretty" prints node */
 75 | void print_node(Node* n, int lvl)
 76 | {
 77 | 	for (int i = 0; i < lvl; i++)
 78 | 		putchar(' ');
 79 | 
 80 | 	/* Write current node */
 81 | 	if (n == NULL) {
 82 | 		printf("(null)\n");
 83 | 		return;
 84 | 	}
 85 | 	printf("(%d %d %d\n", n->type, n->attribute, n->val);
 86 | 
 87 | 	/* Write left operand */
 88 | 	print_node(n->op1, lvl + 2);
 89 | 
 90 | 	/* Write right operand */
 91 | 	print_node(n->op2, lvl + 2);
 92 | 
 93 | 	/* Close current node */
 94 | 	for (int i = 0; i < lvl; i++)
 95 | 		putchar(' ');
 96 | 	printf(")\n");
 97 | }
 98 | 
 99 | /* Skip tokens until we are on keyword (to help parser to get up) */
100 | void synchronize()
101 | {
102 | 	Token t = scan();
103 | 	while (t.type != TOKEN_EOF && t.type > TOKEN_WHILE) {
104 | 		t = scan();
105 | 		t = lookahead();
106 | 	}
107 | }
108 | 
109 | /* ============================ HELPER FUNCTIONS ============================ */
110 | /* Parse literal value (strings and numbers) */
111 | Node* literal()
112 | {
113 | 	Token t;
114 | 	int val;
115 | 	if (match(TOKEN_NUMERIC_LITERAL)) {
116 | 		t = scan();
117 | 		val = atoi(t.text);
118 | 	}
119 | 	else if (match(TOKEN_STRING_LITERAL)) {
120 | 		t = scan();
121 | 		val = add_string(strings, t.text, t.length);
122 | 	}
123 | 	else if (match(TOKEN_CHARACTER_LITERAL)) {
124 | 		t = scan();
125 | 		val = t.text[0];
126 | 	}
127 | 	else if (match(TOKEN_PROGSTART) || match(TOKEN_RAMSTART) ||
128 | 		 match(TOKEN_VARIABLES) || match(TOKEN_VERSION) ||
129 | 	 	 match(TOKEN_TIMER) || match(TOKEN_INK))
130 | 		return init_node(NODE_KEYWORD_CALL, scan(), 0, NULL, NULL);
131 | 	else {
132 | 		parse_error("Expected literal");
133 | 		synchronize();
134 | 		return NULL;
135 | 	}
136 | 
137 | 	return init_node(NODE_LITERAL, t, val, NULL, NULL);
138 | }
139 | 
140 | /* Parse only one variable */
141 | Node* variable()
142 | {
143 | 	Token t;
144 | 	int target_idx;
145 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
146 | 		t = scan();
147 | 		target_idx = toupper(t.text[0]) - 'A';
148 | 	}
149 | 	else if (match(TOKEN_STRING_VARIABLE)) {
150 | 		t = scan();
151 | 		target_idx = toupper(t.text[1]) - '1';
152 | 	}
153 | 	else {
154 | 		parse_error("Expected variable");
155 | 		synchronize();
156 | 		return NULL;
157 | 	}
158 | 
159 | 	return init_node(NODE_VARIABLE, t, target_idx, NULL, NULL);
160 | }
161 | 
162 | /* Parse primary expression - variables, literals and addresses of strings */
163 | Node* primary()
164 | {
165 | 	if (match(TOKEN_AMPERSAND)) {
166 | 		Token skip = scan();		/* Skip & */
167 | 		Node* next = variable();
168 | 		return init_node(NODE_EXPR, skip, 0, next, NULL);
169 | 	}
170 | 	else if (match(TOKEN_NUMERIC_VARIABLE) || match(TOKEN_STRING_VARIABLE))
171 | 		return variable();
172 | 	else if (match(TOKEN_NUMERIC_LITERAL) || match(TOKEN_STRING_LITERAL) ||
173 | 		 match(TOKEN_CHARACTER_LITERAL))
174 | 		return literal();
175 | 	else if (match(TOKEN_PROGSTART) || match(TOKEN_RAMSTART) ||
176 | 		 match(TOKEN_VARIABLES) || match(TOKEN_VERSION) ||
177 | 	 	 match(TOKEN_TIMER) || match(TOKEN_INK))
178 | 		return init_node(NODE_KEYWORD_CALL, scan(), 0, NULL, NULL);
179 | 
180 | 	parse_error("Expected primary expression");
181 | 	synchronize();
182 | 	return NULL;
183 | }
184 | 
185 | /* Parse expression */
186 | Node* expr()
187 | {
188 | 	Node* ret = primary();
189 | 
190 | 	while (match(TOKEN_PLUS) || match(TOKEN_MINUS) || match(TOKEN_STAR) ||
191 | 	       match(TOKEN_SLASH) || match(TOKEN_PERCENT)) {
192 | 		Token next = scan();		/* Skip operator */
193 | 		Node* op2 = primary();
194 | 		ret = init_node(NODE_EXPR, next, 0, ret, op2);
195 | 	}
196 | 
197 | 	return ret;
198 | }
199 | 
200 | /* Parse only one comparison */
201 | Node* comparison()
202 | {
203 | 	Node* target = expr();
204 | 
205 | 	if (match(TOKEN_EQUALS) || match(TOKEN_SMALLER) ||
206 | 	    match(TOKEN_GREATER) || match(TOKEN_NOT_EQUALS)) {
207 | 		Token op = scan();
208 | 		Node* value = expr();
209 | 		return init_node(NODE_EXPR, op, 0, target, value);
210 | 	}
211 | 
212 | 	parse_error("Expected comparison");
213 | 	synchronize();
214 | 	free_node(target);
215 | 	return NULL;
216 | }
217 | 
218 | /* Parse whole boolean expression (with AND) */
219 | Node* boolean_expr()
220 | {
221 | 	Node* ret = comparison();
222 | 	while (match(TOKEN_AND)) {
223 | 		Token t = scan();	/* Skip AND */
224 | 		Node* op2 = comparison();
225 | 		ret = init_node(NODE_EXPR, t, 0, ret, op2);
226 | 	}
227 | 
228 | 	return ret;
229 | }
230 | 
231 | /* ============================ MAIN PARSING CODE =========================== */
232 | /* Parse an assignment */
233 | Node* assign()
234 | {
235 | 	Node* target = variable();
236 | 
237 | 	if (match(TOKEN_EQUALS)) {
238 | 		Token t = scan();	/* Discard = */
239 | 		Node* val = expr();
240 | 		Node* ret = init_node(NODE_ASSIGN, t, 0, target, val);
241 | 		return ret;
242 | 	}
243 | 
244 | 	parse_error("Expected assignment");
245 | 	synchronize();
246 | 	free_node(target);
247 | 	return NULL;
248 | }
249 | 
250 | /* Parse an IF statement */
251 | Node* if_stmt()
252 | {
253 | 	Token skip = scan();	/* Discard IF */
254 | 	Token t;
255 | 
256 | 	Node* cond = boolean_expr();
257 | 
258 | 	/* Expect THEN */
259 | 	if (!match(TOKEN_THEN)) {
260 | 		parse_error("Expected THEN");
261 | 		synchronize();
262 | 		free_node(cond);
263 | 		return NULL;
264 | 	}
265 | 	t = scan();			/* Discard THEN */
266 | 
267 | 	Node* then_case = statement();
268 | 	Node* else_case = NULL;
269 | 
270 | 	/* We have an else branch */
271 | 	if (match(TOKEN_ELSE)) {
272 | 		t = scan();		/* Discard ELSE */
273 | 		else_case = statement();
274 | 	}
275 | 
276 | 	Node* then_else = init_node(NODE_SEQUENCE, t, 0, then_case, else_case);
277 | 	return init_node(NODE_IF, skip, 0, cond, then_else);
278 | }
279 | 
280 | /* Parse a DO loop */
281 | Node* do_stmt()
282 | {
283 | 	Token skip = scan();	/* Discard DO */
284 | 	Token t = skip;
285 | 
286 | 	Node* ret = NULL;
287 | 	Node* body = NULL;	/* Body of the loop */
288 | 	Node* mod = NULL;	/* Keyword modifier (UNTIL, WHILE or ENDLESS) */
289 | 	Node* cond = NULL;	/* Condition */
290 | 
291 | 	/* Parse the body */
292 | 	while (!match(TOKEN_EOF) && !match(TOKEN_LOOP)) {
293 | 		Node* stmt = statement();
294 | 		body = init_node(NODE_SEQUENCE, t, 0, body, stmt);
295 | 	}
296 | 
297 | 	if (scan().type != TOKEN_LOOP) {	/* Discard LOOP */
298 | 		parse_error("Reached End Of File before LOOP");
299 | 		free_node(body);
300 | 		return NULL;
301 | 	}
302 | 
303 | 	/* Parse modifier and condition */
304 | 	if (match(TOKEN_ENDLESS)) {
305 | 		t = scan();	/* Discard ENDLESS */
306 | 	}
307 | 	else if (match(TOKEN_WHILE) || match(TOKEN_UNTIL)) {
308 | 		t = scan();	/* Discard WHILE or UNTIL */
309 | 		cond = boolean_expr();
310 | 	}
311 | 	else {
312 | 		parse_error("Expected LOOP modifier (UNTIL, WHILE or ENDLESS)");
313 | 		free_node(body);
314 | 		free_node(cond);
315 | 		return NULL;
316 | 	}
317 | 
318 | 	mod = init_node(NODE_KEYWORD_CALL, t, 0, NULL, NULL);
319 | 	ret = init_node(NODE_DO, skip, 0, cond,
320 | 			init_node(NODE_SEQUENCE, t, 0, body, mod));
321 | 	return ret;
322 | }
323 | 
324 | /* Parse a FOR loop */
325 | Node* for_stmt()
326 | {
327 | 	Token t = scan();	/* Discard FOR */
328 | 
329 | 	/* We can't use string variable */
330 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
331 | 		parse_error("FOR loops require numeric iterator");
332 | 		synchronize();
333 | 		return NULL;
334 | 	}
335 | 
336 | 	Node* init = assign();		/* Initializer */
337 | 	Node* to = NULL;		/* "To" field */
338 | 	Node* body = NULL;		/* Body of the loop */
339 | 
340 | 	if (init == NULL || init->op1 == NULL) {
341 | 		parse_error("Expected valid initializer in FOR");
342 | 		synchronize();
343 | 		return NULL;
344 | 	}
345 | 
346 | 	if (!match(TOKEN_TO)) {
347 | 		parse_error("Expected TO keyword in FOR loop");
348 | 		synchronize();
349 | 		free_node(init);
350 | 		return NULL;
351 | 	}
352 | 	t = scan();		/* Discard TO */
353 | 
354 | 	/* Parse "To" field */
355 | 	to = expr();
356 | 
357 | 	/* Parse the body */
358 | 	while (!match(TOKEN_EOF) && !match(TOKEN_NEXT)) {
359 | 		Node* stmt = statement();
360 | 		body = init_node(NODE_SEQUENCE, t, 0, body, stmt);
361 | 	}
362 | 
363 | 	if (scan().type != TOKEN_NEXT) {	/* Discard NEXT */
364 | 		parse_error("Reached End Of File before LOOP");
365 | 		free_node(init);
366 | 		free_node(to);
367 | 		free_node(body);
368 | 		return NULL;
369 | 	}
370 | 
371 | 	/* Check if NEXT points to loop's variable */
372 | 	Node* temp = variable();
373 | 	if (temp->attribute != init->op1->attribute ||
374 | 	    temp->val != init->op1->val) {
375 | 		parse_error("Incorrect target for NEXT");
376 | 		/* Don't synchronize, we know what is going on */
377 | 		free_node(init);
378 | 		free_node(to);
379 | 		free_node(body);
380 | 		return NULL;
381 | 	}
382 | 
383 | 	return init_node(NODE_FOR, t, 0, init,
384 | 		init_node(NODE_SEQUENCE, t, 0, to, body));
385 | }
386 | 
387 | /* Parse whole statement (calling appropriate functions) */
388 | Node* statement()
389 | {
390 | 	/* End of file, stop parsing */
391 | 	if (match(TOKEN_EOF))
392 | 		return NULL;
393 | 
394 | 	/* If it is a variable, parse assignment */
395 | 	if (match(TOKEN_NUMERIC_VARIABLE) || match(TOKEN_STRING_VARIABLE))
396 | 		return assign();
397 | 
398 | 	/* If it is an IF, parse conditional */
399 | 	if (match(TOKEN_IF))
400 | 		return if_stmt();
401 | 
402 | 	/* If it is DO, parse loop */
403 | 	if (match(TOKEN_DO))
404 | 		return do_stmt();
405 | 
406 | 	/* Parse FOR */
407 | 	if (match(TOKEN_FOR))
408 | 		return for_stmt();
409 | 
410 | 	/* Labels aren't proper statements, but we need to remember them */
411 | 	if (match(TOKEN_LABEL)) {
412 | 		Token t = scan();		/* Grab the label */
413 | 		Node* ret = statement();
414 | 		add_real_symbol(labels, (char*) t.text, t.length, ret);
415 | 		return ret;
416 | 	}
417 | 
418 | 	/* Is it a normal keyword? */
419 | 	Token t = lookahead();
420 | 	if (t.type >= TOKEN_ALERT && t.type <= TOKEN_WHILE)
421 | 		return parse_keyword(labels);
422 | 
423 | 	/* We found something that isn't proper statement, synchronize */
424 | 	parse_error("Expected statement");
425 | 	synchronize();
426 | 	return NULL;
427 | }
428 | 
429 | Node* parse(SymbolTable* t, StringTable* s)
430 | {
431 | 	Token empty = {0, NULL, 0, 0};
432 | 	labels = t;
433 | 	strings = s;
434 | 	Node* ret = init_node(NODE_SEQUENCE, empty, 0, statement(), NULL);
435 | 	if (!match(TOKEN_EOF))
436 | 		ret->op2 = parse(t, s);
437 | 
438 | 	return ret;
439 | }
440 | 


--------------------------------------------------------------------------------
/src/util/disassembler.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdio.h>
  8 | 
  9 | /* Custom includes */
 10 | #include <codegen.h>
 11 | 
 12 | static const char* regs[8] = {
 13 | 	"AX",	/* 0 */
 14 | 	"CX",	/* 1 */
 15 | 	"DX",	/* 2 */
 16 | 	"BX",	/* 3 */
 17 | 	"SP",	/* 4 */
 18 | 	"BP",	/* 5 */
 19 | 	"SI",	/* 6 */
 20 | 	"DI"	/* 7 */
 21 | };
 22 | 
 23 | static const char* byte_regs[8] = {
 24 | 	"AL",	/* 0 */
 25 | 	"BL",	/* 1 */
 26 | 	"CL",	/* 2 */
 27 | 	"DL",	/* 3 */
 28 | 	"AH",	/* 4 */
 29 | 	"BH",	/* 5 */
 30 | 	"CH",	/* 6 */
 31 | 	"DH"	/* 7 */
 32 | };
 33 | 
 34 | /* Swap endianness */
 35 | uint16_t swap(uint16_t word)
 36 | {
 37 | 	return (word >> 8) | (word << 8);
 38 | }
 39 | 
 40 | uint16_t read_word(CompileTarget* c, int offset)
 41 | {
 42 | 	uint8_t low = c->code[offset + 1] & 0xFF;
 43 | 	uint8_t high = c->code[offset] & 0xFF;
 44 | 	uint16_t swapped = ((high << 8) + low) & 0xFFFF;
 45 | 	return swap(swapped);
 46 | }
 47 | 
 48 | int disassemble_instruction(CompileTarget* c, int offset)
 49 | {
 50 | 	printf("0x%04X  ", offset + LOAD);	/* Print our current offset */
 51 | 
 52 | 	uint8_t byte = c->code[offset];
 53 | 	switch (byte) {
 54 | 		case 0x03: {	/* ADD r16, r16 */
 55 | 			uint8_t modrm = c->code[offset + 1];
 56 | 			printf("%02X%02X          ", byte, modrm);
 57 | 			int dst = (modrm & 0x38) >> 3;
 58 | 			int src = modrm & 7;
 59 | 			printf("ADD %s, %s\n", regs[dst], regs[src]);
 60 | 			offset += 2;
 61 | 			break;
 62 | 		}
 63 | 		case 0x0B: {	/* OR r16, [imm16] */
 64 | 			uint8_t modrm = c->code[offset + 1];
 65 | 			uint16_t addr = read_word(c, offset + 2);
 66 | 			printf("%02X%02X%04X      ", byte, modrm, swap(addr));
 67 | 			int dst = (modrm & 0x38) >> 3;
 68 | 			printf("OR %s, [0x%04X]\n", regs[dst], addr);
 69 | 			offset += 4;
 70 | 			break;
 71 | 		}
 72 | 		case 0x0F: {	/* Various Jcc rel16 */
 73 | 			uint8_t op = c->code[offset + 1];
 74 | 			uint16_t rel = read_word(c, offset + 2);
 75 | 			uint16_t addr = offset + 4 + rel + LOAD;
 76 | 			printf("%02X%02X%04X      ", byte, op, swap(rel));
 77 | 			switch (op) {
 78 | 				case 0x84: {
 79 | 					printf("JZ  near  0x%04X\n", addr);
 80 | 					break;
 81 | 				}
 82 | 				case 0x85: {
 83 | 					printf("JNE near  0x%04X\n", addr);
 84 | 					break;
 85 | 				}
 86 | 				case 0x8D: {
 87 | 					printf("JGE near  0x%04X\n", addr);
 88 | 				}
 89 | 			}
 90 | 			offset += 4;
 91 | 			break;
 92 | 		}
 93 | 		case 0x25: {	/* AND AX, imm16 */
 94 | 			uint16_t imm = read_word(c, offset + 1);
 95 | 			printf("%02X%04X        ", byte, swap(imm));
 96 | 			printf("AND AX, %d (0x%04X)\n", imm, imm);
 97 | 			offset += 3;
 98 | 			break;
 99 | 		}
100 | 		case 0x2B: {	/* SUB r16, r16 */
101 | 			uint8_t modrm = c->code[offset + 1];
102 | 			printf("%02X%02X          ", byte, modrm);
103 | 			int dst = (modrm & 0x38) >> 3;
104 | 			int src = modrm & 7;
105 | 			printf("SUB %s, %s\n", regs[dst], regs[src]);
106 | 			offset += 2;
107 | 			break;
108 | 		}
109 | 		case 0x33: {	/* XOR r16, r16 */
110 | 			uint8_t modrm = c->code[offset + 1];
111 | 			printf("%02X%02X          ", byte, modrm);
112 | 			int dst = (modrm & 0x38) >> 3;
113 | 			int src = modrm & 7;
114 | 			printf("XOR %s, %s\n", regs[dst], regs[src]);
115 | 			offset += 2;
116 | 			break;
117 | 		}
118 | 		case 0x3B: {	/* CMP r16, r16 */
119 | 			uint8_t modrm = c->code[offset + 1];
120 | 			printf("%02X%02X          ", byte, modrm);
121 | 			int dst = (modrm & 0x38) >> 3;
122 | 			int src = modrm & 7;
123 | 			printf("CMP %s, %s\n", regs[dst], regs[src]);
124 | 			offset += 2;
125 | 			break;
126 | 		}
127 | 		case 0x3D: {	/* CMP AX, imm16 */
128 | 			uint16_t imm = read_word(c, offset + 1);
129 | 			printf("%02X%04X        ", byte, swap(imm));
130 | 			printf("CMP AX, %d (0x%04X)\n", imm, imm);
131 | 			offset += 3;
132 | 			break;
133 | 		}
134 | 		/* Man I love C */
135 | 		case 0x50:
136 | 		case 0x51:
137 | 		case 0x52:
138 | 		case 0x53:
139 | 		case 0x54:
140 | 		case 0x55:
141 | 		case 0x56:
142 | 		case 0x57: {	/* PUSH r16 */
143 | 			printf("%02X            ", byte);
144 | 			printf("PUSH %s\n", regs[byte - 0x50]);
145 | 			offset += 1;
146 | 			break;
147 | 		}
148 | 		case 0x58:
149 | 		case 0x59:
150 | 		case 0x5A:
151 | 		case 0x5B:
152 | 		case 0x5C:
153 | 		case 0x5D:
154 | 		case 0x5E:
155 | 		case 0x5F: {	/* POP r16 */
156 | 			printf("%02X            ", byte);
157 | 			printf("POP %s\n", regs[byte - 0x58]);
158 | 			offset += 1;
159 | 			break;
160 | 		}
161 | 		case 0x72: {	/* JC rel8 */
162 | 			int8_t rel = c->code[offset + 1];
163 | 			uint16_t target = offset + 2 + rel + LOAD;
164 | 			printf("%02X%02X          ", byte, (uint8_t)rel);
165 | 			printf("JC  short 0x%04X\n", target);
166 | 			offset += 2;
167 | 			break;
168 | 		}
169 | 		case 0x74: {	/* JZ rel8 */
170 | 			int8_t rel = c->code[offset + 1];
171 | 			uint16_t target = offset + 2 + rel + LOAD;
172 | 			printf("%02X%02X          ", byte, (uint8_t)rel);
173 | 			printf("JZ  short 0x%04X\n", target);
174 | 			offset += 2;
175 | 			break;
176 | 		}
177 | 		case 0x75: {	/* JNE rel8 */
178 | 			int8_t rel = c->code[offset + 1];
179 | 			uint16_t target = offset + 2 + rel + LOAD;
180 | 			printf("%02X%02X          ", byte, (uint8_t)rel);
181 | 			printf("JNE short 0x%04X\n", target);
182 | 			offset += 2;
183 | 			break;
184 | 		}
185 | 		case 0x7F: {	/* JG rel8 */
186 | 			int8_t rel = c->code[offset + 1];
187 | 			uint16_t target = offset + 2 + rel + LOAD;
188 | 			printf("%02X%02X          ", byte, (uint8_t)rel);
189 | 			printf("JG  short 0x%04X\n", target);
190 | 			offset += 2;
191 | 			break;
192 | 		}
193 | 		case 0x85: {	/* TEST r16, r16 */
194 | 			uint8_t modrm = c->code[offset + 1];
195 | 			printf("%02X%02X          ", byte, modrm);
196 | 			int dst = (modrm & 0x38) >> 3;
197 | 			int src = modrm & 7;
198 | 			printf("TEST %s, %s\n", regs[dst], regs[src]);
199 | 			offset += 2;
200 | 			break;
201 | 		}
202 | 		case 0x87: {	/* XCHG r16, r16 */
203 | 			uint8_t modrm = c->code[offset + 1];
204 | 			printf("%02X%02X          ", byte, modrm);
205 | 			int dst = (modrm & 0x38) >> 3;
206 | 			int src = modrm & 7;
207 | 			printf("XCHG %s, %s\n", regs[dst], regs[src]);
208 | 			offset += 2;
209 | 			break;
210 | 		}
211 | 		case 0x89: {	/* MOV [r/m16], r16 */
212 | 			uint8_t modrm = c->code[offset + 1];
213 | 			int src = (modrm & 0x38) >> 3;
214 | 
215 | 			/* Loading into [BX] */
216 | 			if ((modrm & 0xC0) == 0 && (modrm & 7) == 7) {
217 | 				printf("%02X%02X          ", byte, modrm);
218 | 				printf("MOV [BX], %s\n", regs[src]);
219 | 				offset += 2;
220 | 				break;
221 | 			}
222 | 
223 | 			/* We are loading into immediate address */
224 | 			uint16_t addr = read_word(c, offset + 2);
225 | 			printf("%02X%02X%04X      ", byte, modrm, swap(addr));
226 | 			printf("MOV [0x%04X], %s\n", addr, regs[src]);
227 | 			offset += 4;
228 | 			break;
229 | 		}
230 | 		case 0x8B: {	/* MOV r16, r16 or MOV r16, [r/m16] */
231 | 			uint8_t modrm = c->code[offset + 1];
232 | 			int dst = (modrm & 0x38) >> 3;
233 | 
234 | 			/* It is reg to reg */
235 | 			if (modrm & 0xC0) {
236 | 				printf("%02X%02X          ", byte, modrm);
237 | 				int src = modrm & 7;
238 | 				printf("MOV %s, %s\n", regs[dst], regs[src]);
239 | 				offset += 2;
240 | 				break;
241 | 			}
242 | 
243 | 			/* It is loading from [BX] */
244 | 			if ((modrm & 7) == 7) {
245 | 				printf("%02X%02X          ", byte, modrm);
246 | 				printf("MOV %s, [BX]\n", regs[dst]);
247 | 				offset += 2;
248 | 				break;
249 | 			}
250 | 
251 | 			/* Or from immediate value */
252 | 			uint16_t addr = read_word(c, offset + 2);
253 | 			printf("%02X%02X%04X      ", byte, modrm, swap(addr));
254 | 			printf("MOV %s, [0x%04X]\n", regs[dst], addr);
255 | 
256 | 			offset += 4;
257 | 			break;
258 | 		}
259 | 		case 0x90: {	/* NOP */
260 | 			printf("%02X            ", byte);
261 | 			printf("NOP\n");
262 | 			offset += 1;
263 | 			break;
264 | 		}
265 | 		case 0x91:
266 | 		case 0x92:
267 | 		case 0x93:
268 | 		case 0x94:
269 | 		case 0x95:
270 | 		case 0x96:
271 | 		case 0x97: {	/* XCHG AX, r16 */
272 | 			printf("%02X            ", byte);
273 | 			printf("XCHG AX, %s\n", regs[byte - 0x90]);
274 | 			offset += 1;
275 | 			break;
276 | 		}
277 | 		case 0xAA: {	/* STOSB */
278 | 			printf("%02X            ", byte);
279 | 			printf("STOSB\n");
280 | 			offset += 1;
281 | 			break;
282 | 		}
283 | 		case 0xAB: {	/* STOSW */
284 | 			printf("%02X            ", byte);
285 | 			printf("STOSW\n");
286 | 			offset += 1;
287 | 			break;
288 | 		}
289 | 		case 0xB0:
290 | 		case 0xB1:
291 | 		case 0xB2:
292 | 		case 0xB3:
293 | 		case 0xB4:
294 | 		case 0xB5:
295 | 		case 0xB6:
296 | 		case 0xB7: {	/* MOV r/m8, imm8 */
297 | 			int reg = byte - 0xB0;
298 | 			uint8_t imm = c->code[offset + 1];
299 | 			printf("%02X%02X          ", byte, imm);
300 | 			printf("MOV %s, %d (0x%02X)\n", byte_regs[reg],
301 | 				imm, imm);
302 | 			offset += 2;
303 | 			break;
304 | 		}
305 | 		case 0xAC: {	/* LODSB */
306 | 			printf("%02X            ", byte);
307 | 			printf("LODSB\n");
308 | 			offset += 1;
309 | 			break;
310 | 		}
311 | 		case 0xC1: {	/* SHR r16, imm8 or SHL r16, imm8 */
312 | 			uint8_t modrm = c->code[offset + 1];
313 | 			uint8_t imm = c->code[offset + 2];
314 | 			int dst = modrm & 7;
315 | 			printf("%02X%02X%04X      ", byte, modrm, swap(imm));
316 | 			if ((modrm & 0x38) >> 3 == 4)
317 | 				printf("SHL %s, %d\n", regs[dst], imm);
318 | 			else
319 | 				printf("SHR %s, %d\n", regs[dst], imm);
320 | 			offset += 3;
321 | 			break;
322 | 		}
323 | 		case 0xC3: {	/* RET */
324 | 			printf("%02X            ", byte);
325 | 			printf("RET\n");
326 | 			offset += 1;
327 | 			break;
328 | 		}
329 | 		case 0xC7: {	/* MOV r/m16, imm16 */
330 | 			uint8_t modrm = c->code[offset + 1];
331 | 			int dst = modrm & 7;
332 | 			uint16_t imm = read_word(c, offset + 2);
333 | 			if ((modrm & 0xC0) == 0) {
334 | 				uint16_t val = read_word(c, offset + 4);
335 | 				printf("%02X%02X%04X%04X  ", byte, modrm,
336 | 					swap(imm), swap(val));
337 | 				printf("MOV [0x%04X], %d (0x%04X)\n", imm,
338 | 					val, val);
339 | 				offset += 6;
340 | 				break;
341 | 			}
342 | 			printf("%02X%02X%04X      ", byte, modrm, swap(imm));
343 | 			printf("MOV %s, %d (0x%04X)\n", regs[dst], imm, imm);
344 | 			offset += 4;
345 | 			break;
346 | 		}
347 | 		case 0xCD: {	/* INT n */
348 | 			uint8_t imm = c->code[offset + 1];
349 | 			printf("%02X%02X          ", byte, imm);
350 | 			printf("INT 0x%02X\n", imm);
351 | 			offset += 2;
352 | 			break;
353 | 		}
354 | 		case 0xE8: {	/* CALL rel16 */
355 | 			int16_t rel = read_word(c, offset + 1);
356 | 			uint16_t target = offset + 3 + rel + LOAD;
357 | 			printf("%02X%04X        ", byte, swap(rel));
358 | 			printf("CALL 0x%04X\n", target);
359 | 			offset += 3;
360 | 			break;
361 | 		}
362 | 		case 0xE9: {	/* JMP NEAR */
363 | 			int16_t rel = read_word(c, offset + 1);
364 | 			uint16_t target = offset + 3 + rel + LOAD;
365 | 			printf("%02X%04X        ", byte, swap(rel));
366 | 			printf("JMP near  0x%04X\n", target);
367 | 			offset += 3;
368 | 			break;
369 | 		}
370 | 		case 0xEB: {	/* JMP SHORT */
371 | 			int8_t rel = c->code[offset + 1];
372 | 			uint16_t target = offset + 2 + rel + LOAD;
373 | 			printf("%02X%02X          ", byte, (uint8_t)rel);
374 | 			printf("JMP short 0x%04X\n", target);
375 | 			offset += 2;
376 | 			break;
377 | 		}
378 | 		case 0xF3: {	/* REP prefix */
379 | 			printf("%02X            REP Prefix\n", byte);
380 | 			offset += 1;
381 | 			break;
382 | 		}
383 | 		case 0xF7: {	/* MUL r16 or DIV r16 */
384 | 			uint8_t modrm = c->code[offset + 1];
385 | 			printf("%02X%02X          ", byte, modrm);
386 | 			int src = modrm & 7;
387 | 			if ((modrm & 0x38) >> 3 == 4)
388 | 				printf("MUL %s\n", regs[src]);
389 | 			else
390 | 				printf("DIV %s\n", regs[src]);
391 | 			offset += 2;
392 | 			break;
393 | 		}
394 | 		case 0xFE: {	/* INC r/m8 */
395 | 			uint8_t modrm = c->code[offset + 1];
396 | 
397 | 			int src = modrm & 7;
398 | 			printf("%02X%02X          ", byte, modrm);
399 | 			printf("INC %s\n", byte_regs[src]);
400 | 
401 | 			offset += 2;
402 | 			break;
403 | 		}
404 | 		case 0xFF: {	/* INC r/m16 or DEC r/m16 or CALL r16 */
405 | 			uint8_t modrm = c->code[offset + 1];
406 | 
407 | 			/* CALL r16 */
408 | 			if ((modrm & 0x38) >> 3 == 2) {
409 | 				int src = modrm & 7;
410 | 				printf("%02X%02X          ", byte, modrm);
411 | 				printf("CALL %s\n", regs[src]);
412 | 				offset += 2;
413 | 				break;
414 | 			}
415 | 
416 | 			/* DEC r16 */
417 | 			if ((modrm & 0x38) >> 3 == 1) {
418 | 				int src = modrm & 7;
419 | 				printf("%02X%02X          ", byte, modrm);
420 | 				printf("DEC %s\n", regs[src]);
421 | 				offset += 2;
422 | 				break;
423 | 			}
424 | 
425 | 			/* INC r16 */
426 | 			if ((modrm & 0xC0) != 0) {
427 | 				int src = modrm & 7;
428 | 				printf("%02X%02X          ", byte, modrm);
429 | 				printf("INC %s\n", regs[src]);
430 | 				offset += 2;
431 | 				break;
432 | 			}
433 | 
434 | 			/* INC [mem16] */
435 | 			uint16_t addr = read_word(c, offset + 2);
436 | 			printf("%02X%02X%04X      ", byte, modrm, swap(addr));
437 | 			printf("INC [0x%04X]\n", addr);
438 | 			offset += 4;
439 | 			break;
440 | 		}
441 | 		default:
442 | 			printf("%02X            <Unknown instruction>\n", byte);
443 | 			offset++;
444 | 	}
445 | 
446 | 	return offset;
447 | }
448 | 
449 | void disassemble(CompileTarget* c)
450 | {
451 | 	int strings_len = read_word(c, 1) + 3 - RUNTIMELEN;
452 | 	/* Instructions in x86 are variable length so we do it this way */
453 | 	for (int i = 0; i < c->length; /* nothing */) {
454 | 		if (i == 3) {	/* Runtime! Don't disassemble */
455 | 			printf("0x%04X  ", i + LOAD);
456 | 			printf("(* ==== BASIC RUNTIME ==== *)\n");
457 | 			i = RUNTIMELEN;
458 | 			continue;
459 | 		}
460 | 		if (i == RUNTIMELEN && strings_len != 0) { /* String table */
461 | 			printf("0x%04X  ", i + LOAD);
462 | 			printf("(* ==== STRINGS TABLE ==== *)\n");
463 | 			i = RUNTIMELEN + strings_len;	/* Where we jump */
464 | 			continue;
465 | 		}
466 | 		i = disassemble_instruction(c, i);
467 | 	}
468 | }
469 | 


--------------------------------------------------------------------------------
/src/back/expression.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Custom includes */
  7 | #include <lexer.h>
  8 | #include <codegen.h>
  9 | 
 10 | static StringTable* strings;
 11 | 
 12 | /* Return true if is numeric, false if string */
 13 | bool compile_expression(Node* ast, CompileTarget* code)
 14 | {
 15 | 	switch (ast->attribute) {
 16 | 		/* Keyword values: */
 17 | 		case TOKEN_INK: {
 18 | 			emit_byte(code, 0x8B);			/* MOV */
 19 | 			emit_byte(code, 0x06);			/* AX */
 20 | 			emit_word(code, INKADDR);		/* [imm16] */
 21 | 			return true;
 22 | 		}
 23 | 		case TOKEN_PROGSTART: {
 24 | 			emit_byte(code, 0xC7);			/* MOV */
 25 | 			emit_byte(code, 0xC0);			/* AX, */
 26 | 			emit_word(code, (uint16_t)LOAD);	/* imm16 */
 27 | 			return true;
 28 | 		}
 29 | 		case TOKEN_RAMSTART: {
 30 | 			emit_byte(code, 0x8B);			/* MOV */
 31 | 			emit_byte(code, 0x06);			/* AX */
 32 | 			emit_word(code, RAMSTART);		/* [imm16] */
 33 | 			return true;
 34 | 		}
 35 | 		case TOKEN_TIMER: {
 36 | 			emit_byte(code, 0x33);			/* XOR */
 37 | 			emit_byte(code, 0xC0);			/* AX, AX */
 38 | 			emit_byte(code, 0xCD);			/* INT */
 39 | 			emit_byte(code, 0x1A);			/* 0x1A */
 40 | 			emit_byte(code, 0x8B);			/* MOV */
 41 | 			emit_byte(code, 0xC2);			/* AX, DX */
 42 | 			return true;
 43 | 		}
 44 | 		case TOKEN_VARIABLES: {
 45 | 			emit_byte(code, 0xC7);			/* MOV */
 46 | 			emit_byte(code, 0xC0);			/* AX, */
 47 | 			emit_word(code, (uint16_t)VARS);	/* imm16 */
 48 | 			return true;
 49 | 		}
 50 | 		case TOKEN_VERSION: {
 51 | 			emit_byte(code, 0xC7);			/* MOV */
 52 | 			emit_byte(code, 0xC0);			/* AX, */
 53 | 			emit_word(code, (uint16_t)VERSION);	/* imm16 */
 54 | 			return true;
 55 | 		}
 56 | 
 57 | 		/* Literals / variables: */
 58 | 		case TOKEN_NUMERIC_LITERAL: {
 59 | 			emit_byte(code, 0xC7);			/* MOV */
 60 | 			emit_byte(code, 0xC0);			/* AX, */
 61 | 			emit_word(code, (uint16_t)ast->val);	/* imm16 */
 62 | 			return true;
 63 | 		}
 64 | 		case TOKEN_NUMERIC_VARIABLE: {
 65 | 			uint16_t addr = VARS + ast->val * 2;
 66 | 			emit_byte(code, 0x8B);			/* MOV */
 67 | 			emit_byte(code, 0x06);			/* AX, */
 68 | 			emit_word(code, addr);			/* [imm16] */
 69 | 			return true;
 70 | 		}
 71 | 		case TOKEN_STRING_LITERAL: {
 72 | 			int offset = get_offset_string(strings, ast->val);
 73 | 			uint16_t addr = LOAD + RUNTIMELEN + offset;
 74 | 			emit_byte(code, 0xC7);			/* MOV */
 75 | 			emit_byte(code, 0xC6);			/* SI, */
 76 | 			emit_word(code, addr);			/* imm16 */
 77 | 			return false;
 78 | 		}
 79 | 		case TOKEN_STRING_VARIABLE: {
 80 | 			uint16_t addr = STRVARS + ast->val * 128;
 81 | 			emit_byte(code, 0xC7);			/* MOV */
 82 | 			emit_byte(code, 0xC6);			/* SI, */
 83 | 			emit_word(code, addr);			/* imm16 */
 84 | 			return false;
 85 | 		}
 86 | 		case TOKEN_CHARACTER_LITERAL: {
 87 | 			emit_byte(code, 0xC7);			/* MOV */
 88 | 			emit_byte(code, 0xC0);			/* AX, */
 89 | 			emit_word(code, (uint16_t)ast->val);	/* imm16 */
 90 | 			return true;
 91 | 		}
 92 | 
 93 | 		/* Numeric / string operators: */
 94 | 		case TOKEN_PLUS: {
 95 | 			bool a = compile_expression(ast->op1, code);
 96 | 
 97 | 			/* Save value (numeric to BX, string to DI) */
 98 | 			if (a) {
 99 | 				emit_byte(code, 0x8B);		/* MOV */
100 | 				emit_byte(code, 0xD8);		/* BX, AX */
101 | 			}
102 | 			else {
103 | 				emit_byte(code, 0x8B);		/* MOV */
104 | 				emit_byte(code, 0xFE);		/* DI, SI */
105 | 			}
106 | 
107 | 			bool b = compile_expression(ast->op2, code);
108 | 
109 | 			/* Check typing */
110 | 			if (a != b) {
111 | 				compile_error("Type error in expression", ast);
112 | 				return false;
113 | 			}
114 | 
115 | 			/* Perform addition */
116 | 			if (a) {
117 | 				emit_byte(code, 0x03);		/* ADD */
118 | 				emit_byte(code, 0xC3);		/* AX, BX */
119 | 				return true;
120 | 			}
121 | 			else {
122 | 				emit_byte(code, 0x87);		/* XCHG */
123 | 				emit_byte(code, 0xFE);		/* DI, SI */
124 | 				emit_call(code, STRADD);	/* Call adder */
125 | 				return false;
126 | 			}
127 | 		}
128 | 		case TOKEN_MINUS: {
129 | 			bool a = compile_expression(ast->op1, code);
130 | 			if (!a) {
131 | 				compile_error("Type error in expression", ast);
132 | 				return false;
133 | 			}
134 | 
135 | 			/* Save result */
136 | 			emit_byte(code, 0x8B);			/* MOV */
137 | 			emit_byte(code, 0xD8);			/* BX, AX */
138 | 
139 | 			bool b = compile_expression(ast->op2, code);
140 | 			if (!b) {
141 | 				compile_error("Type error in expression", ast);
142 | 				return false;
143 | 			}
144 | 
145 | 			emit_byte(code, 0x93);			/* XCHG AX,BX */
146 | 
147 | 			emit_byte(code, 0x2B);			/* SUB */
148 | 			emit_byte(code, 0xC3);			/* AX, BX */
149 | 
150 | 			return true;
151 | 		}
152 | 		case TOKEN_STAR: {
153 | 			bool a = compile_expression(ast->op1, code);
154 | 			if (!a) {
155 | 				compile_error("Type error in expression", ast);
156 | 				return false;
157 | 			}
158 | 
159 | 			/* Save result */
160 | 			emit_byte(code, 0x8B);			/* MOV */
161 | 			emit_byte(code, 0xD8);			/* BX, AX */
162 | 
163 | 			bool b = compile_expression(ast->op2, code);
164 | 			if (!b) {
165 | 				compile_error("Type error in expression", ast);
166 | 				return false;
167 | 			}
168 | 
169 | 			emit_byte(code, 0xF7);			/* MUL */
170 | 			emit_byte(code, 0xE3);			/* BX */
171 | 
172 | 			return true;
173 | 		}
174 | 		case TOKEN_SLASH: {
175 | 			bool a = compile_expression(ast->op1, code);
176 | 			if (!a) {
177 | 				compile_error("Type error in expression", ast);
178 | 				return false;
179 | 			}
180 | 
181 | 			/* Save result */
182 | 			emit_byte(code, 0x8B);			/* MOV */
183 | 			emit_byte(code, 0xD8);			/* BX, AX */
184 | 
185 | 			bool b = compile_expression(ast->op2, code);
186 | 			if (!b) {
187 | 				compile_error("Type error in expression", ast);
188 | 				return false;
189 | 			}
190 | 
191 | 			/* Handle division by zero */
192 | 			emit_byte(code, 0x85);			/* TEST */
193 | 			emit_byte(code, 0xC0);			/* AX, AX */
194 | 			emit_byte(code, 0x75);			/* JNE */
195 | 			emit_byte(code, 0x03);			/* rel8 */
196 | 			emit_call(code, ZERODIV);		/* Error! */
197 | 
198 | 			/* Proceed */
199 | 			emit_byte(code, 0x93);			/* XCHG AX,BX */
200 | 			emit_byte(code, 0x33);			/* XOR */
201 | 			emit_byte(code, 0xD2);			/* DX, DX */
202 | 			emit_byte(code, 0xF7);			/* DIV */
203 | 			emit_byte(code, 0xF3);			/* BX */
204 | 
205 | 			return true;
206 | 		}
207 | 		case TOKEN_PERCENT: {
208 | 			bool a = compile_expression(ast->op1, code);
209 | 			if (!a) {
210 | 				compile_error("Type error in expression", ast);
211 | 				return false;
212 | 			}
213 | 
214 | 			/* Save result */
215 | 			emit_byte(code, 0x8B);			/* MOV */
216 | 			emit_byte(code, 0xD8);			/* BX, AX */
217 | 
218 | 			bool b = compile_expression(ast->op2, code);
219 | 			if (!b) {
220 | 				compile_error("Type error in expression", ast);
221 | 				return false;
222 | 			}
223 | 
224 | 			/* Handle division by zero */
225 | 			emit_byte(code, 0x85);			/* TEST */
226 | 			emit_byte(code, 0xC0);			/* AX, AX */
227 | 			emit_byte(code, 0x75);			/* JNE */
228 | 			emit_byte(code, 0x03);			/* rel8 */
229 | 			emit_call(code, ZERODIV);		/* Error! */
230 | 
231 | 			/* Proceed */
232 | 			emit_byte(code, 0x93);			/* XCHG AX,BX */
233 | 			emit_byte(code, 0x33);			/* XOR */
234 | 			emit_byte(code, 0xD2);			/* DX, DX */
235 | 			emit_byte(code, 0xF7);			/* DIV */
236 | 			emit_byte(code, 0xF3);			/* BX */
237 | 			emit_byte(code, 0x8B);			/* MOV */
238 | 			emit_byte(code, 0xC2);			/* AX, DX */
239 | 
240 | 			return true;
241 | 		}
242 | 		case TOKEN_AMPERSAND: {
243 | 			uint16_t addr;
244 | 			if (ast->op1->attribute == TOKEN_NUMERIC_VARIABLE)
245 | 				addr = VARS + ast->op1->val * 2;
246 | 			else
247 | 				addr = STRVARS + ast->op1->val * 128;
248 | 
249 | 			emit_byte(code, 0xC7);			/* MOV */
250 | 			emit_byte(code, 0xC0);			/* AX, */
251 | 			emit_word(code, addr);			/* imm16 */
252 | 			return true;
253 | 		}
254 | 
255 | 		/* Boolean operators: */
256 | 		case TOKEN_AND: {
257 | 			bool a = compile_expression(ast->op1, code);
258 | 			if (!a) {
259 | 				compile_error("Type error in expression", ast);
260 | 				return false;
261 | 			}
262 | 
263 | 			/* Check first boolean */
264 | 			emit_byte(code, 0x85);			/* TEST */
265 | 			emit_byte(code, 0xC0);			/* AX, AX */
266 | 			emit_byte(code, 0x0F);			/* JZ */
267 | 			emit_byte(code, 0x84);			/* NEAR */
268 | 			emit_word(code, 0x0000);		/* False */
269 | 			int patch = code->length - 2;
270 | 
271 | 			bool b = compile_expression(ast->op2, code);
272 | 			if (!b) {
273 | 				compile_error("Type error in expression", ast);
274 | 				return false;
275 | 			}
276 | 
277 | 			/* Check second boolean */
278 | 			emit_byte(code, 0x85);			/* TEST */
279 | 			emit_byte(code, 0xC0);			/* AX, AX */
280 | 			emit_byte(code, 0x74);			/* JZ */
281 | 			emit_byte(code, 0x06);			/* False */
282 | 
283 | 			/* Both were true */
284 | 			emit_byte(code, 0xC7);			/* MOV */
285 | 			emit_byte(code, 0xC0);			/* AX, */
286 | 			emit_word(code, 0x0001);		/* 1 */
287 | 			emit_byte(code, 0xEB);			/* JMP SHORT */
288 | 			emit_byte(code, 0x02);			/* Skip false */
289 | 
290 | 			/* Patch up our previous jump */
291 | 			uint16_t rel = code->length - (patch + 2);
292 | 			code->code[patch] = (uint8_t) rel & 0xFF;
293 | 			code->code[patch + 1] = (uint8_t) (rel >> 8) & 0xFF;
294 | 
295 | 			/* One was false */
296 | 			emit_byte(code, 0x33);			/* XOR */
297 | 			emit_byte(code, 0xC0);			/* AX, AX */
298 | 
299 | 			return true;
300 | 		}
301 | 		case TOKEN_EQUALS: {
302 | 			bool a = compile_expression(ast->op1, code);
303 | 			/* Save value, depending on string/numeric */
304 | 			if (a) {
305 | 				emit_byte(code, 0x8B);	/* MOV */
306 | 				emit_byte(code, 0xD8);	/* BX, AX */
307 | 			}
308 | 			else {
309 | 				emit_byte(code, 0x8B);	/* MOV */
310 | 				emit_byte(code, 0xFE);	/* DI, SI */
311 | 			}
312 | 
313 | 			bool b = compile_expression(ast->op2, code);
314 | 			if (a != b) {
315 | 				compile_error("Type error in expression", ast);
316 | 				return false;
317 | 			}
318 | 
319 | 			/* Perform comparison */
320 | 			if (a) {
321 | 				emit_byte(code, 0x3B);	/* CMP */
322 | 				emit_byte(code, 0xC3);	/* AX, BX */
323 | 				emit_byte(code, 0x75);	/* JNE */
324 | 				emit_byte(code, 0x06);	/* Skip == branch */
325 | 				emit_byte(code, 0xC7);	/* MOV */
326 | 				emit_byte(code, 0xC0);	/* AX, */
327 | 				emit_word(code, 0x0001);/* 1 */
328 | 				emit_byte(code, 0xEB);	/* JMP short */
329 | 				emit_byte(code, 0x02);	/* Skip != branch */
330 | 				emit_byte(code, 0x33);	/* XOR */
331 | 				emit_byte(code, 0xC0);	/* AX, AX */
332 | 			}
333 | 			else {
334 | 				/* CALL os_string_compare */
335 | 				emit_call(code, 0x0045);
336 | 				emit_byte(code, 0x72);	/* JC */
337 | 				emit_byte(code, 0x04);	/* Skip != branch */
338 | 				emit_byte(code, 0x33);	/* XOR */
339 | 				emit_byte(code, 0xC0);	/* AX, AX */
340 | 				emit_byte(code, 0xEB);	/* JMP short */
341 | 				emit_byte(code, 0x04);	/* Skip == branch */
342 | 				emit_byte(code, 0xC7);	/* MOV */
343 | 				emit_byte(code, 0xC0);	/* AX, */
344 | 				emit_word(code, 0x0001);/* 1 */
345 | 			}
346 | 			return true;
347 | 		}
348 | 		case TOKEN_SMALLER: {
349 | 			bool a = compile_expression(ast->op1, code);
350 | 			if (!a) {
351 | 				compile_error("Type error in expression", ast);
352 | 				return false;
353 | 			}
354 | 
355 | 			emit_byte(code, 0x8B);		/* MOV */
356 | 			emit_byte(code, 0xD8);		/* BX, AX */
357 | 
358 | 			bool b = compile_expression(ast->op2, code);
359 | 			if (!b) {
360 | 				compile_error("Type error in expression", ast);
361 | 				return false;
362 | 			}
363 | 
364 | 			emit_byte(code, 0x3B);		/* CMP */
365 | 			emit_byte(code, 0xC3);		/* AX, BX */
366 | 			emit_byte(code, 0x7F);		/* JG */
367 | 			emit_byte(code, 0x04);		/* Skip <= branch */
368 | 			emit_byte(code, 0x33);		/* XOR */
369 | 			emit_byte(code, 0xC0);		/* AX, AX */
370 | 			emit_byte(code, 0xEB);		/* JMP short */
371 | 			emit_byte(code, 0x04);		/* Skip > branch */
372 | 			emit_byte(code, 0xC7);		/* MOV */
373 | 			emit_byte(code, 0xC0);		/* AX, */
374 | 			emit_word(code, 0x0001);	/* 1 */
375 | 			return true;
376 | 		}
377 | 		case TOKEN_GREATER: {
378 | 			bool a = compile_expression(ast->op1, code);
379 | 			if (!a) {
380 | 				compile_error("Type error in expression", ast);
381 | 				return false;
382 | 			}
383 | 
384 | 			emit_byte(code, 0x8B);		/* MOV */
385 | 			emit_byte(code, 0xD8);		/* BX, AX */
386 | 
387 | 			bool b = compile_expression(ast->op2, code);
388 | 			if (!b) {
389 | 				compile_error("Type error in expression", ast);
390 | 				return false;
391 | 			}
392 | 
393 | 			emit_byte(code, 0x3B);		/* CMP */
394 | 			emit_byte(code, 0xD8);		/* BX, AX */
395 | 			emit_byte(code, 0x7F);		/* JG */
396 | 			emit_byte(code, 0x04);		/* skip >= branch */
397 | 			emit_byte(code, 0x33);		/* XOR */
398 | 			emit_byte(code, 0xC0);		/* AX, AX */
399 | 			emit_byte(code, 0xEB);		/* JMP short */
400 | 			emit_byte(code, 0x04);		/* skip < branch */
401 | 			emit_byte(code, 0xC7);		/* MOV */
402 | 			emit_byte(code, 0xC0);		/* AX, */
403 | 			emit_word(code, 0x0001);	/* 1 */
404 | 			return true;
405 | 		}
406 | 		case TOKEN_NOT_EQUALS: {
407 | 			bool a = compile_expression(ast->op1, code);
408 | 			/* Save value, depending on string/numeric */
409 | 			if (a) {
410 | 				emit_byte(code, 0x8B);	/* MOV */
411 | 				emit_byte(code, 0xD8);	/* BX, AX */
412 | 			}
413 | 			else {
414 | 				emit_byte(code, 0x8B);	/* MOV */
415 | 				emit_byte(code, 0xFE);	/* DI, SI */
416 | 			}
417 | 
418 | 			bool b = compile_expression(ast->op2, code);
419 | 			if (a != b) {
420 | 				compile_error("Type error in expression", ast);
421 | 				return false;
422 | 			}
423 | 
424 | 			/* Perform comparison */
425 | 			if (a) {
426 | 				emit_byte(code, 0x3B);	/* CMP */
427 | 				emit_byte(code, 0xC3);	/* AX, BX */
428 | 				emit_byte(code, 0x75);	/* JNE */
429 | 				emit_byte(code, 0x04);	/* Skip == branch */
430 | 				emit_byte(code, 0x33);	/* XOR */
431 | 				emit_byte(code, 0xC0);	/* AX, AX */
432 | 				emit_byte(code, 0xEB);	/* JMP short */
433 | 				emit_byte(code, 0x04);	/* Skip != branch */
434 | 				emit_byte(code, 0xC7);	/* MOV */
435 | 				emit_byte(code, 0xC0);	/* AX, */
436 | 				emit_word(code, 0x0001);/* 1 */
437 | 			}
438 | 			else {
439 | 				/* CALL os_string_compare */
440 | 				emit_call(code, 0x0045);
441 | 				emit_byte(code, 0x72);	/* JC */
442 | 				emit_byte(code, 0x06);	/* Skip != branch */
443 | 				emit_byte(code, 0xC7);	/* MOV */
444 | 				emit_byte(code, 0xC0);	/* AX, */
445 | 				emit_word(code, 0x0001);/* 1 */
446 | 				emit_byte(code, 0xEB);	/* JMP short */
447 | 				emit_byte(code, 0x02);	/* Skip == branch */
448 | 				emit_byte(code, 0x33);	/* XOR */
449 | 				emit_byte(code, 0xC0);	/* AX, AX */
450 | 			}
451 | 			return true;
452 | 		}
453 | 
454 | 		/* No match: */
455 | 		default:
456 | 			compile_error("Cannot compile expression", ast);
457 | 			break;
458 | 	}
459 | 
460 | 	/* Unreached */
461 | 	return false;
462 | }
463 | 
464 | void init_expr_compiler(StringTable* str)
465 | {
466 | 	strings = str;
467 | }
468 | 


--------------------------------------------------------------------------------
/src/front/keyword_parser.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
  3 |  * Licensed under GNU General Public License version 3.
  4 |  */
  5 | 
  6 | /* Standard library includes */
  7 | #include <stdio.h>
  8 | 
  9 | /* Custom includes */
 10 | #include <parser.h>
 11 | #include <table.h>
 12 | 
 13 | SymbolTable* klabels;	/* Keyword LABELS */
 14 | 
 15 | /* ============================ HELPER FUNCTIONS ============================ */
 16 | Node* string()
 17 | {
 18 | 	if (match(TOKEN_STRING_VARIABLE))
 19 | 		return variable();
 20 | 	if (match(TOKEN_STRING_LITERAL))
 21 | 		return literal();
 22 | 
 23 | 	parse_error("Expected string expression");
 24 | 	synchronize();
 25 | 	return NULL;
 26 | }
 27 | 
 28 | Node* numeric()
 29 | {
 30 | 	if (match(TOKEN_NUMERIC_VARIABLE))
 31 | 		return variable();
 32 | 	if (match(TOKEN_NUMERIC_LITERAL))
 33 | 		return literal();
 34 | 	if (match(TOKEN_PROGSTART) || match(TOKEN_RAMSTART) ||
 35 | 	    match(TOKEN_VARIABLES) || match(TOKEN_VERSION) ||
 36 | 	    match(TOKEN_TIMER) || match(TOKEN_INK))
 37 | 		return literal();
 38 | 	if (match(TOKEN_AMPERSAND))
 39 | 		return primary();
 40 | 
 41 | 	parse_error("Expected numeric expression");
 42 | 	synchronize();
 43 | 	return NULL;
 44 | }
 45 | 
 46 | Node* label()
 47 | {
 48 | 	if (match(TOKEN_IDENTIFIER)) {
 49 | 		Token t = scan();
 50 | 		int val = add_unreal_symbol(klabels, (char*) t.text, t.length);
 51 | 		return init_node(NODE_LABEL, t, val, NULL, NULL);
 52 | 	}
 53 | 
 54 | 	parse_error("Expected label (note: labels can't be keywords!)");
 55 | 	synchronize();
 56 | 	return NULL;
 57 | }
 58 | 
 59 | Node* init_keyword(Token t, Node* op1, Node* op2)
 60 | {
 61 | 	return init_node(NODE_KEYWORD_CALL, t, 0, op1, op2);
 62 | }
 63 | 
 64 | /* =========================== PARSING FUNCTIONS ============================ */
 65 | Node* do_alert()
 66 | {
 67 | 	Token t = scan();	/* Discard ALERT */
 68 | 	Node* op = string();
 69 | 	return init_keyword(t, op, NULL);
 70 | }
 71 | 
 72 | Node* do_askfile()
 73 | {
 74 | 	Token t = scan();	/* Discard ASKFILE */
 75 | 	if (match(TOKEN_STRING_VARIABLE)) {
 76 | 		Node* op = variable();
 77 | 		return init_keyword(t, op, NULL);
 78 | 	}
 79 | 
 80 | 	parse_error("Expected string variable after ASKFILE");
 81 | 	synchronize();
 82 | 	return NULL;
 83 | }
 84 | 
 85 | Node* do_break()
 86 | {
 87 | 	Token t = scan();	/* Discard BREAK */
 88 | 	return init_keyword(t, NULL, NULL);
 89 | }
 90 | 
 91 | Node* do_call()
 92 | {
 93 | 	Token t = scan();	/* Discard CALL */
 94 | 	Node* op = expr();
 95 | 
 96 | 	return init_keyword(t, op, NULL);
 97 | }
 98 | 
 99 | Node* do_case()
100 | {
101 | 	Token t = scan();	/* You get the point... */
102 | 	if (match(TOKEN_LOWER) || match(TOKEN_UPPER)) {
103 | 		Node* op1 = init_keyword(scan(), NULL, NULL);
104 | 		if (match(TOKEN_STRING_VARIABLE)) {
105 | 			Node* op2 = variable();
106 | 			return init_keyword(t, op1, op2);
107 | 		}
108 | 
109 | 		parse_error("Expected string variable after CASE");
110 | 		synchronize();
111 | 		free_node(op1);
112 | 		return NULL;
113 | 	}
114 | 
115 | 	parse_error("Expected LOWER or UPPER after CASE");
116 | 	synchronize();
117 | 	return NULL;
118 | }
119 | 
120 | Node* do_cls()
121 | {
122 | 	Token t = scan();
123 | 	return init_keyword(t, NULL, NULL);
124 | }
125 | 
126 | Node* do_cursor()
127 | {
128 | 	Token t = scan();
129 | 	if (match(TOKEN_ON) || match(TOKEN_OFF)) {
130 | 		Token mod = scan();
131 | 		Node* op = init_keyword(mod, NULL, NULL);
132 | 		return init_keyword(t, op, NULL);
133 | 	}
134 | 
135 | 	parse_error("Expected ON or OFF after CURSOR");
136 | 	synchronize();
137 | 	return NULL;
138 | }
139 | 
140 | Node* do_curschar()
141 | {
142 | 	Token t = scan();
143 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
144 | 		Node* op = variable();
145 | 		return init_keyword(t, op, NULL);
146 | 	}
147 | 
148 | 	parse_error("Expected numeric variable after CURSCHAR");
149 | 	synchronize();
150 | 	return NULL;
151 | }
152 | 
153 | Node* do_curscol()
154 | {
155 | 	Token t = scan();
156 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
157 | 		Node* op = variable();
158 | 		return init_keyword(t, op, NULL);
159 | 	}
160 | 
161 | 	parse_error("Expected numeric variable after CURSCOL");
162 | 	synchronize();
163 | 	return NULL;
164 | }
165 | 
166 | Node* do_curspos()
167 | {
168 | 	Token t = scan();
169 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
170 | 		Node* op1 = variable();
171 | 		if (match(TOKEN_NUMERIC_VARIABLE)) {
172 | 			Node* op2 = variable();
173 | 			return init_keyword(t, op1, op2);
174 | 		}
175 | 
176 | 		free_node(op1);
177 | 	}
178 | 
179 | 	parse_error("Expected two numeric variables after CURSPOS");
180 | 	synchronize();
181 | 	return NULL;
182 | }
183 | 
184 | Node* do_delete()
185 | {
186 | 	Token t = scan();
187 | 	Node* op = string();
188 | 	return init_keyword(t, op, NULL);
189 | }
190 | 
191 | Node* do_end()
192 | {
193 | 	Token t = scan();
194 | 	return init_keyword(t, NULL, NULL);
195 | }
196 | 
197 | Node* do_files()
198 | {
199 | 	Token t = scan();
200 | 	return init_keyword(t, NULL, NULL);
201 | }
202 | 
203 | Node* do_getkey()
204 | {
205 | 	Token t = scan();
206 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
207 | 		Node* op = variable();
208 | 		return init_keyword(t, op, NULL);
209 | 	}
210 | 
211 | 	parse_error("Expected numeric variable after GETKEY");
212 | 	synchronize();
213 | 	return NULL;
214 | }
215 | 
216 | Node* do_gosub()
217 | {
218 | 	Token t = scan();
219 | 	Node* op = label();
220 | 	return init_keyword(t, op, NULL);
221 | }
222 | 
223 | Node* do_goto()
224 | {
225 | 	Token t = scan();
226 | 	Node* op = label();
227 | 	return init_keyword(t, op, NULL);
228 | }
229 | 
230 | Node* do_include()
231 | {
232 | 	/* Actually we don't need it */
233 | 	scan();
234 | 	scan();
235 | 
236 | 	return parse_keyword(klabels);
237 | }
238 | 
239 | Node* do_ink()
240 | {
241 | 	Token t = scan();
242 | 	Node* op = numeric();
243 | 	return init_keyword(t, op, NULL);
244 | }
245 | 
246 | Node* do_input()
247 | {
248 | 	Token t = scan();
249 | 	Node* op = variable();
250 | 	return init_keyword(t, op, NULL);
251 | }
252 | 
253 | Node* do_len()
254 | {
255 | 	Token t = scan();
256 | 	Node* op1 = string();
257 | 
258 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
259 | 		Node* op2 = variable();
260 | 		return init_keyword(t, op1, op2);
261 | 	}
262 | 
263 | 	parse_error("Expected numeric variable as a target for LEN");
264 | 	synchronize();
265 | 	free_node(op1);
266 | 	return NULL;
267 | }
268 | 
269 | Node* do_listbox()
270 | {
271 | 	Token t = scan();
272 | 
273 | 	Node* op1 = string();
274 | 	Node* op2 = string();
275 | 	Node* op3 = string();
276 | 	if (match(TOKEN_NUMERIC_VARIABLE)) {
277 | 		Node* op4 = variable();
278 | 		Node* subseq = init_node(NODE_SEQUENCE, t, 0, op3, op4);
279 | 		Node* seq = init_node(NODE_SEQUENCE, t, 0, op2, subseq);
280 | 		return init_keyword(t, op1, seq);
281 | 	}
282 | 
283 | 	parse_error("Expected numeric variable as a target for LISTBOX");
284 | 	synchronize();
285 | 	free_node(op1);
286 | 	free_node(op2);
287 | 	free_node(op3);
288 | 	return NULL;
289 | }
290 | 
291 | Node* do_load()
292 | {
293 | 	Token t = scan();
294 | 
295 | 	Node* op1 = string();
296 | 	Node* op2 = numeric();
297 | 
298 | 	return init_keyword(t, op1, op2);
299 | }
300 | 
301 | Node* do_move()
302 | {
303 | 	Token t = scan();
304 | 
305 | 	Node* op1 = numeric();
306 | 	Node* op2 = numeric();
307 | 
308 | 	return init_keyword(t, op1, op2);
309 | }
310 | 
311 | Node* do_number()
312 | {
313 | 	Token t = scan();
314 | 
315 | 	Node* op1, *op2;
316 | 	if (match(TOKEN_STRING_VARIABLE)) {
317 | 		op1 = variable();
318 | 		if (!match(TOKEN_NUMERIC_VARIABLE)) {
319 | 			parse_error("Expected numeric variable in NUMBER");
320 | 			synchronize();
321 | 			free_node(op1);
322 | 			return NULL;
323 | 		}
324 | 		op2 = variable();
325 | 	}
326 | 	else if (match(TOKEN_NUMERIC_VARIABLE)) {
327 | 		op1 = variable();
328 | 		if (!match(TOKEN_STRING_VARIABLE)) {
329 | 			parse_error("Expected string variable in NUMBER");
330 | 			synchronize();
331 | 			free_node(op1);
332 | 			return NULL;
333 | 		}
334 | 		op2 = variable();
335 | 	}
336 | 	else {
337 | 		parse_error("Expected variable as a source for NUMBER");
338 | 		synchronize();
339 | 		return NULL;
340 | 	}
341 | 
342 | 	return init_keyword(t, op1, op2);
343 | }
344 | 
345 | Node* do_page()
346 | {
347 | 	Token t = scan();
348 | 	Node* op1 = numeric();
349 | 	Node* op2 = numeric();
350 | 
351 | 	return init_keyword(t, op1, op2);
352 | }
353 | 
354 | Node* do_pause()
355 | {
356 | 	Token t = scan();
357 | 	Node* op = numeric();
358 | 
359 | 	return init_keyword(t, op, NULL);
360 | }
361 | 
362 | Node* do_peek()
363 | {
364 | 	Token t = scan();
365 | 
366 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
367 | 		parse_error("Expected numeric target for PEEK");
368 | 		synchronize();
369 | 		return NULL;
370 | 	}
371 | 	Node* op1 = variable();
372 | 	Node* op2 = numeric();
373 | 
374 | 	return init_keyword(t, op1, op2);
375 | }
376 | 
377 | Node* do_peekint()
378 | {
379 | 	Token t = scan();
380 | 
381 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
382 | 		parse_error("Expected numeric target for PEEKINT");
383 | 		synchronize();
384 | 		return NULL;
385 | 	}
386 | 	Node* op1 = variable();
387 | 	Node* op2 = numeric();
388 | 
389 | 	return init_keyword(t, op1, op2);
390 | }
391 | 
392 | Node* do_poke()
393 | {
394 | 	Token t = scan();
395 | 	Node* op1 = numeric();
396 | 	Node* op2 = numeric();
397 | 
398 | 	return init_keyword(t, op1, op2);
399 | }
400 | 
401 | Node* do_pokeint()
402 | {
403 | 	Token t = scan();
404 | 	Node* op1 = numeric();
405 | 	Node* op2 = numeric();
406 | 
407 | 	return init_keyword(t, op1, op2);
408 | }
409 | 
410 | Node* do_port()
411 | {
412 | 	Token t = scan();
413 | 	Node* mod, *op2, *op3;
414 | 	if (match(TOKEN_IN)) {
415 | 		mod = init_keyword(scan(), NULL, NULL);
416 | 		op2 = numeric();
417 | 		if (!match(TOKEN_NUMERIC_VARIABLE)) {
418 | 			parse_error("Expected numeric target for PORT IN");
419 | 			synchronize();
420 | 			free_node(mod);
421 | 			free_node(op2);
422 | 			return NULL;
423 | 		}
424 | 		op3 = variable();
425 | 	}
426 | 	else if (match(TOKEN_OUT)) {
427 | 		mod = init_keyword(scan(), NULL, NULL);
428 | 		op2 = numeric();
429 | 		op3 = numeric();
430 | 	}
431 | 	else {
432 | 		parse_error("Expected modifier for PORT (IN or OUT)");
433 | 		synchronize();
434 | 		return NULL;
435 | 	}
436 | 
437 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, op2, op3);
438 | 	return init_keyword(t, mod, seq);
439 | }
440 | 
441 | Node* do_print()
442 | {
443 | 	Token t = scan();
444 | 	Node* mod1 = NULL;
445 | 	if (match(TOKEN_CHR) || match(TOKEN_HEX))
446 | 		mod1 = init_keyword(scan(), NULL, NULL);
447 | 
448 | 	Node* op = expr();
449 | 	Node* mod2 = NULL;
450 | 	if (match(TOKEN_SEMICOLON))
451 | 		mod2 = init_keyword(scan(), NULL, NULL);
452 | 
453 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, op, mod2);
454 | 	return init_keyword(t, mod1, seq);
455 | }
456 | 
457 | Node* do_rand()
458 | {
459 | 	Token t = scan();
460 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
461 | 		parse_error("Expected numeric target for RAND");
462 | 		synchronize();
463 | 		return NULL;
464 | 	}
465 | 
466 | 	Node* target = variable();
467 | 	Node* low = numeric();
468 | 	Node* high = numeric();
469 | 
470 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, low, high);
471 | 	return init_keyword(t, target, seq);
472 | }
473 | 
474 | Node* do_read()
475 | {
476 | 	Token t = scan();
477 | 
478 | 	Node* l = label();
479 | 	Node* offset = numeric();
480 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
481 | 		parse_error("Expected numeric target for READ");
482 | 		synchronize();
483 | 		free_node(l);
484 | 		free_node(offset);
485 | 		return NULL;
486 | 	}
487 | 
488 | 	Node* target = variable();
489 | 
490 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, offset, target);
491 | 
492 | 	parse_error("READ is not supported (sorry). It is");
493 | 	free_node(l);
494 | 	free_node(seq);
495 | 	return NULL;
496 | }
497 | 
498 | Node* do_rename()
499 | {
500 | 	Token t = scan();
501 | 	Node* op1 = string();
502 | 	Node* op2 = string();
503 | 
504 | 	return init_keyword(t, op1, op2);
505 | }
506 | 
507 | Node* do_return()
508 | {
509 | 	Token t = scan();
510 | 
511 | 	return init_keyword(t, NULL, NULL);
512 | }
513 | 
514 | Node* do_save()
515 | {
516 | 	Token t = scan();
517 | 
518 | 	Node* name = string();
519 | 	Node* pos = numeric();
520 | 	Node* len = numeric();
521 | 
522 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, pos, len);
523 | 
524 | 	return init_keyword(t, name, seq);
525 | }
526 | 
527 | Node* do_serial()
528 | {
529 | 	Token t = scan();
530 | 
531 | 	Node* mod, *val;
532 | 	if (match(TOKEN_ON) || match(TOKEN_SEND)) {
533 | 		mod = init_keyword(scan(), NULL, NULL);
534 | 		val = numeric();
535 | 	}
536 | 	else if (match(TOKEN_REC)) {
537 | 		mod = init_keyword(scan(), NULL, NULL);
538 | 		if (!match(TOKEN_NUMERIC_VARIABLE)) {
539 | 			parse_error("Expected numeric target for SERIAL REC");
540 | 			synchronize();
541 | 			free_node(mod);
542 | 			return NULL;
543 | 		}
544 | 		val = variable();
545 | 	}
546 | 	else {
547 | 		parse_error("Expected modifier for SERIAL (ON, SEND or REC)");
548 | 		synchronize();
549 | 		return NULL;
550 | 	}
551 | 
552 | 	return init_keyword(t, mod, val);
553 | }
554 | 
555 | Node* do_size()
556 | {
557 | 	Token t = scan();
558 | 	Node* op = string();
559 | 
560 | 	return init_keyword(t, op, NULL);
561 | }
562 | 
563 | Node* do_sound()
564 | {
565 | 	Token t = scan();
566 | 	Node* op1 = numeric();
567 | 	Node* op2 = numeric();
568 | 
569 | 	return init_keyword(t, op1, op2);
570 | }
571 | 
572 | Node* do_string()
573 | {
574 | 	Token t = scan();
575 | 
576 | 	if (!match(TOKEN_GET) && !match(TOKEN_SET)) {
577 | 		parse_error("Expected modifier for STRING (GET or SET)");
578 | 		synchronize();
579 | 		return NULL;
580 | 	}
581 | 
582 | 	Node* mod = init_keyword(scan(), NULL, NULL);
583 | 
584 | 	if (!match(TOKEN_STRING_VARIABLE)) {
585 | 		parse_error("Expected string variable for STRING");
586 | 		synchronize();
587 | 		free_node(mod);
588 | 		return NULL;
589 | 	}
590 | 
591 | 	Node* target = variable();
592 | 	Node* offset = numeric();
593 | 
594 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
595 | 		parse_error("Expected numeric target for STRING");
596 | 		synchronize();
597 | 		free_node(mod);
598 | 		free_node(target);
599 | 		free_node(offset);
600 | 		return NULL;
601 | 	}
602 | 
603 | 	Node* num = variable();
604 | 
605 | 	Node* subseq = init_node(NODE_SEQUENCE, t, 0, offset, num);
606 | 	Node* seq = init_node(NODE_SEQUENCE, t, 0, target, subseq);
607 | 
608 | 	return init_keyword(t, mod, seq);
609 | }
610 | 
611 | Node* do_waitkey()
612 | {
613 | 	Token t = scan();
614 | 
615 | 	if (!match(TOKEN_NUMERIC_VARIABLE)) {
616 | 		parse_error("Expected numeric target for WAITKEY");
617 | 		synchronize();
618 | 		return NULL;
619 | 	}
620 | 
621 | 	Node* op = variable();
622 | 
623 | 	return init_keyword(t, op, NULL);
624 | }
625 | 
626 | /* ============================== ENTRY POINT =============================== */
627 | typedef Node* (*FunctionPtr)();
628 | static FunctionPtr keywords_compilers[] = {
629 | 	/* Those all are valid keyword-statements */
630 | 	[TOKEN_ALERT] = do_alert,
631 | 	[TOKEN_ASKFILE] = do_askfile,
632 | 	[TOKEN_BREAK] = do_break,
633 | 	[TOKEN_CALL] = do_call,
634 | 	[TOKEN_CASE] = do_case,
635 | 	[TOKEN_CLS] = do_cls,
636 | 	[TOKEN_CURSOR] = do_cursor,
637 | 	[TOKEN_CURSCHAR] = do_curschar,
638 | 	[TOKEN_CURSCOL] = do_curscol,
639 | 	[TOKEN_CURSPOS] = do_curspos,
640 | 	[TOKEN_DELETE] = do_delete,
641 | 	[TOKEN_END] = do_end,
642 | 	[TOKEN_FILES] = do_files,
643 | 	[TOKEN_GETKEY] = do_getkey,
644 | 	[TOKEN_GOSUB] = do_gosub,
645 | 	[TOKEN_GOTO] = do_goto,
646 | 	[TOKEN_INCLUDE] = do_include,
647 | 	[TOKEN_INK] = do_ink,
648 | 	[TOKEN_INPUT] = do_input,
649 | 	[TOKEN_LEN] = do_len,
650 | 	[TOKEN_LISTBOX] = do_listbox,
651 | 	[TOKEN_LOAD] = do_load,
652 | 	[TOKEN_MOVE] = do_move,
653 | 	[TOKEN_NUMBER] = do_number,
654 | 	[TOKEN_PAGE] = do_page,
655 | 	[TOKEN_PAUSE] = do_pause,
656 | 	[TOKEN_PEEK] = do_peek,
657 | 	[TOKEN_PEEKINT] = do_peekint,
658 | 	[TOKEN_POKE] = do_poke,
659 | 	[TOKEN_POKEINT] = do_pokeint,
660 | 	[TOKEN_PORT] = do_port,
661 | 	[TOKEN_PRINT] = do_print,
662 | 	[TOKEN_RAND] = do_rand,
663 | 	[TOKEN_READ] = do_read,
664 | 	[TOKEN_RENAME] = do_rename,
665 | 	[TOKEN_RETURN] = do_return,
666 | 	[TOKEN_SAVE] = do_save,
667 | 	[TOKEN_SERIAL] = do_serial,
668 | 	[TOKEN_SIZE] = do_size,
669 | 	[TOKEN_SOUND] = do_sound,
670 | 	[TOKEN_STRING] = do_string,
671 | 	[TOKEN_WAITKEY] = do_waitkey,
672 | 
673 | 	/* While those are not (or are handled elsewhere) */
674 | 	[TOKEN_AND] = NULL,
675 | 	[TOKEN_CHR] = NULL,
676 | 	[TOKEN_DO] = NULL,
677 | 	[TOKEN_ELSE] = NULL,
678 | 	[TOKEN_ENDLESS] = NULL,
679 | 	[TOKEN_FOR] = NULL,
680 | 	[TOKEN_GET] = NULL,
681 | 	[TOKEN_HEX] = NULL,
682 | 	[TOKEN_IF] = NULL,
683 | 	[TOKEN_IN] = NULL,
684 | 	[TOKEN_LOOP] = NULL,
685 | 	[TOKEN_LOWER] = NULL,
686 | 	[TOKEN_NEXT] = NULL,
687 | 	[TOKEN_OFF] = NULL,
688 | 	[TOKEN_ON] = NULL,
689 | 	[TOKEN_OUT] = NULL,
690 | 	[TOKEN_PROGSTART] = NULL,
691 | 	[TOKEN_RAMSTART] = NULL,
692 | 	[TOKEN_REC] = NULL,
693 | 	[TOKEN_REM] = NULL,
694 | 	[TOKEN_SEND] = NULL,
695 | 	[TOKEN_SET] = NULL,
696 | 	[TOKEN_THEN] = NULL,
697 | 	[TOKEN_TIMER] = NULL,
698 | 	[TOKEN_TO] = NULL,
699 | 	[TOKEN_UNTIL] = NULL,
700 | 	[TOKEN_UPPER] = NULL,
701 | 	[TOKEN_VARIABLES] = NULL,
702 | 	[TOKEN_VERSION] = NULL,
703 | 	[TOKEN_WHILE] = NULL
704 | };
705 | 
706 | Node* parse_keyword(SymbolTable* l)
707 | {
708 | 	klabels = l;
709 | 	Token t = lookahead();
710 | 
711 | 	FunctionPtr rule = keywords_compilers[t.type];
712 | 	if (rule != NULL)
713 | 		return rule();
714 | 
715 | 	parse_error("Expected keyword");
716 | 	synchronize();
717 | 	return NULL;
718 | }
719 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/src/back/keyword.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * Copyright (C) 2022, Wojciech Grzela <grzela.wojciech@gmail.com>
   3 |  * Licensed under GNU General Public License version 3.
   4 |  */
   5 | 
   6 | /* Standard library includes */
   7 | #include <stdio.h>
   8 | #include <string.h>
   9 | 
  10 | /* Custom includes */
  11 | #include <table.h>
  12 | #include <codegen.h>
  13 | #include <runtime.h>
  14 | 
  15 | static SymbolTable* symbols;
  16 | static StringTable* strings;
  17 | static PatchTable* patches;
  18 | 
  19 | /* =========================== COMPILER FUNCTIONS =========================== */
  20 | void compile_alert(Node* ast, CompileTarget* code)
  21 | {
  22 | 	compile_expression(ast->op1, code);
  23 | 
  24 | 	emit_byte(code, 0x8B);			/* MOV */
  25 | 	emit_byte(code, 0xC6);			/* AX, SI */
  26 | 	emit_byte(code, 0x33);			/* XOR */
  27 | 	emit_byte(code, 0xDB);			/* BX, BX */
  28 | 	emit_byte(code, 0x33);			/* XOR */
  29 | 	emit_byte(code, 0xC9);			/* CX, CX */
  30 | 	emit_byte(code, 0x33);			/* XOR */
  31 | 	emit_byte(code, 0xD2);			/* DX, DX */
  32 | 
  33 | 	/* CALL os_dialog_box */
  34 | 	emit_call(code, 0x003C);
  35 | }
  36 | 
  37 | void compile_askfile(Node* ast, CompileTarget* code)
  38 | {
  39 | 	uint16_t var = STRVARS + ast->op1->val * 128;
  40 | 
  41 | 	/* CALL os_file_selector */
  42 | 	emit_call(code, 0x005A);
  43 | 
  44 | 	emit_byte(code, 0x8B);			/* MOV */
  45 | 	emit_byte(code, 0xF0);			/* SI, AX */
  46 | 	emit_byte(code, 0xC7);			/* MOV */
  47 | 	emit_byte(code, 0xC7);			/* DI, */
  48 | 	emit_word(code, var);			/* var */
  49 | 
  50 | 	/* CALL os_string_copy */
  51 | 	emit_call(code, 0x0039);
  52 | }
  53 | 
  54 | void compile_break(Node* ast, CompileTarget* code)
  55 | {
  56 | 	/* Make string */
  57 | 	int line = ast->line;
  58 | 	char msg[28];
  59 | 	sprintf(msg, "BREAK CALLED - line %d\r\n", line);
  60 | 	uint16_t addr = LOAD + code->length + 9;
  61 | 
  62 | 	/* Print message */
  63 | 	emit_byte(code, 0xC7);			/* MOV */
  64 | 	emit_byte(code, 0xC6);			/* SI, */
  65 | 	emit_word(code, addr);			/* imm16 */
  66 | 	emit_call(code, 0x0003);
  67 | 
  68 | 	/* Exit program */
  69 | 	make_exit(code);
  70 | 
  71 | 	emit_string(code, msg);
  72 | }
  73 | 
  74 | void compile_call(Node* ast, CompileTarget* code)
  75 | {
  76 | 	compile_expression(ast->op1, code);
  77 | 	emit_byte(code, 0xFF);			/* CALL */
  78 | 	emit_byte(code, 0xD0);			/* AX */
  79 | }
  80 | 
  81 | void compile_case(Node* ast, CompileTarget* code)
  82 | {
  83 | 	uint16_t var = STRVARS + ast->op2->val * 128;
  84 | 
  85 | 	emit_byte(code, 0xC7);			/* MOV */
  86 | 	emit_byte(code, 0xC0);			/* AX, */
  87 | 	emit_word(code, var);			/* imm16 */
  88 | 
  89 | 	/* CALL os_string_lowercase */
  90 | 	if (ast->op1->attribute == TOKEN_LOWER)
  91 | 		emit_call(code, 0x0033);
  92 | 	/* CALL os_string_uppercase */
  93 | 	else
  94 | 		emit_call(code, 0x0030);
  95 | }
  96 | 
  97 | void compile_cls(Node* ast, CompileTarget* code)
  98 | {
  99 | 	ast->op1 = NULL;			/* Shut up */
 100 | 	/* CALL os_clear_screen */
 101 | 	emit_call(code, 0x0009);
 102 | }
 103 | 
 104 | void compile_cursor(Node* ast, CompileTarget* code)
 105 | {
 106 | 	/* CALL os_show_cursor */
 107 | 	if (ast->op1->attribute == TOKEN_ON)
 108 | 		emit_call(code, 0x008A);
 109 | 	/* CALL os_hide_cursor */
 110 | 	else
 111 | 		emit_call(code, 0x008D);
 112 | }
 113 | 
 114 | void compile_curschar(Node* ast, CompileTarget* code)
 115 | {
 116 | 	uint16_t var = VARS + ast->op1->val * 2;
 117 | 
 118 | 	/* Call BIOS for character */
 119 | 	emit_byte(code, 0xC7);			/* MOV */
 120 | 	emit_byte(code, 0xC0);			/* AX, */
 121 | 	emit_word(code, 0x0800);		/* 0x800 -> AH = 8 */
 122 | 	emit_byte(code, 0x8B);			/* MOV */
 123 | 	emit_byte(code, 0x1E);			/* BX, [imm16] */
 124 | 	emit_word(code, WORKPAGE);		/* Working page */
 125 | 	emit_byte(code, 0xCD);			/* INT */
 126 | 	emit_byte(code, 0x10);			/* 0x10 */
 127 | 
 128 | 	/* Store it (clean upper byte of AX too!) */
 129 | 	emit_byte(code, 0x25);			/* AND AX, */
 130 | 	emit_word(code, 0x00FF);		/* 0x00FF */
 131 | 	emit_byte(code, 0x89);			/* MOV */
 132 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 133 | 	emit_word(code, var);
 134 | }
 135 | 
 136 | void compile_curscol(Node* ast, CompileTarget* code)
 137 | {
 138 | 	uint16_t var = VARS + ast->op1->val * 2;
 139 | 
 140 | 	/* Call BIOS for character */
 141 | 	emit_byte(code, 0xC7);			/* MOV */
 142 | 	emit_byte(code, 0xC0);			/* AX, */
 143 | 	emit_word(code, 0x0800);		/* 0x800 -> AH = 8 */
 144 | 	emit_byte(code, 0x8B);			/* MOV */
 145 | 	emit_byte(code, 0x1E);			/* BX, [imm16] */
 146 | 	emit_word(code, WORKPAGE);		/* Working page */
 147 | 	emit_byte(code, 0xCD);			/* INT */
 148 | 	emit_byte(code, 0x10);			/* 0x10 */
 149 | 
 150 | 	/* Store it (clean upper byte of AX too!) */
 151 | 	emit_byte(code, 0xC1);			/* SHR */
 152 | 	emit_byte(code, 0xE8);			/* AX, */
 153 | 	emit_byte(code, 0x08);			/* 8 */
 154 | 	emit_byte(code, 0x89);			/* MOV */
 155 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 156 | 	emit_word(code, var);
 157 | }
 158 | 
 159 | void compile_curspos(Node* ast, CompileTarget* code)
 160 | {
 161 | 	uint16_t vara = VARS + ast->op1->val * 2;
 162 | 	uint16_t varb = VARS + ast->op2->val * 2;
 163 | 
 164 | 	/* CALL os_get_cursor_pos */
 165 | 	emit_call(code, 0x0069);
 166 | 
 167 | 	/* Store column */
 168 | 	emit_byte(code, 0x8B);			/* MOV */
 169 | 	emit_byte(code, 0xC2);			/* AX, DX */
 170 | 	emit_byte(code, 0x25);			/* AND AX, */
 171 | 	emit_word(code, 0x00FF);		/* 0x00FF */
 172 | 	emit_byte(code, 0x89);			/* MOV */
 173 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 174 | 	emit_word(code, vara);
 175 | 
 176 | 	/* Store row */
 177 | 	emit_byte(code, 0x8B);			/* MOV */
 178 | 	emit_byte(code, 0xC2);			/* AX, DX */
 179 | 	emit_byte(code, 0xC1);			/* SHR */
 180 | 	emit_byte(code, 0xE8);			/* AX, */
 181 | 	emit_byte(code, 0x08);			/* 8 */
 182 | 	emit_byte(code, 0x89);			/* MOV */
 183 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 184 | 	emit_word(code, varb);
 185 | }
 186 | 
 187 | void compile_delete(Node* ast, CompileTarget* code)
 188 | {
 189 | 	uint16_t rvar = VARS + ('r' - 'a') * 2;
 190 | 
 191 | 	compile_expression(ast->op1, code);
 192 | 
 193 | 	/* Check if file exists (CALL os_file_exists) */
 194 | 	emit_byte(code, 0x8B);			/* MOV */
 195 | 	emit_byte(code, 0xC6);			/* AX, SI */
 196 | 	emit_call(code, 0x0099);
 197 | 	emit_byte(code, 0x72);			/* JC */
 198 | 	emit_byte(code, 0x11);			/* Over exists branch */
 199 | 
 200 | 	/* Try deleting file (CALL os_remove_file) */
 201 | 	emit_call(code, 0x009F);
 202 | 	emit_byte(code, 0x72);			/* JC */
 203 | 	emit_byte(code, 0x04);			/* Jump over failure */
 204 | 
 205 | 	/* File deleted, set R to 0 */
 206 | 	emit_byte(code, 0x33);			/* XOR */
 207 | 	emit_byte(code, 0xC0);			/* AX, AX */
 208 | 	emit_byte(code, 0xEB);			/* JMP */
 209 | 	emit_byte(code, 0x0E);			/* Over failures */
 210 | 
 211 | 	/* File couldn't be deleted, set R to 1 */
 212 | 	emit_byte(code, 0xC7);			/* MOV */
 213 | 	emit_byte(code, 0x06);
 214 | 	emit_word(code, rvar);			/* [rvar], */
 215 | 	emit_word(code, 0x0001);		/* 1 */
 216 | 	emit_byte(code, 0xEB);			/* JMP */
 217 | 	emit_byte(code, 0x06);			/* Over failure */
 218 | 
 219 | 	/* File doesn't exist, set R to 2 */
 220 | 	emit_byte(code, 0xC7);			/* MOV */
 221 | 	emit_byte(code, 0x06);
 222 | 	emit_word(code, rvar);			/* [rvar], */
 223 | 	emit_word(code, 0x0002);		/* 2 */
 224 | }
 225 | 
 226 | void compile_end(Node* ast, CompileTarget* code)
 227 | {
 228 | 	ast->op1 = NULL;			/* Shut up */
 229 | 	make_exit(code);
 230 | }
 231 | 
 232 | void compile_files(Node* ast, CompileTarget* code)
 233 | {
 234 | 	ast->op1 = NULL;			/* Shut up */
 235 | 
 236 | 	/* First set AX to our buffer */
 237 | 	emit_byte(code, 0xC7);			/* MOV */
 238 | 	emit_byte(code, 0xC0);			/* AX, */
 239 | 	emit_word(code, STRBUF);		/* STRBUF */
 240 | 
 241 | 	/* CALL os_get_file_list */
 242 | 	emit_call(code, 0x0042);
 243 | 
 244 | 	emit_byte(code, 0x8B);			/* MOV */
 245 | 	emit_byte(code, 0xF0);			/* SI, AX */
 246 | 	emit_byte(code, 0x56);			/* PUSH SI */
 247 | 	emit_byte(code, 0x25);			/* AND AX, */
 248 | 	emit_word(code, 0x00FF);
 249 | 
 250 | 	/* Loop through list, replace all commas for newlines */
 251 | 	emit_byte(code, 0xAC);			/* LODSB */
 252 | 	emit_byte(code, 0x85);			/* TEST */
 253 | 	emit_byte(code, 0xC0);			/* AX, AX */
 254 | 	emit_byte(code, 0x74);			/* JZ */
 255 | 	emit_byte(code, 0x10);			/* To the end */
 256 | 	emit_byte(code, 0x3D);			/* CMP AX, */
 257 | 	emit_word(code, 0x002C);		/* 0x002C = ',' */
 258 | 	emit_byte(code, 0x75);			/* JNE */
 259 | 	emit_byte(code, 0xF6);			/* Back to the loop */
 260 | 
 261 | 	/* Replace byte */
 262 | 	emit_byte(code, 0xC7);			/* MOV */
 263 | 	emit_byte(code, 0xC0);			/* AX, */
 264 | 	emit_word(code, 0x000A);		/* 0xA */
 265 | 	emit_byte(code, 0x8B);			/* MOV */
 266 | 	emit_byte(code, 0xFE);			/* DI, SI */
 267 | 	emit_byte(code, 0xFF);			/* DEC */
 268 | 	emit_byte(code, 0xCF);			/* DI */
 269 | 	emit_byte(code, 0xAA);			/* STOSB */
 270 | 	emit_byte(code, 0xEB);			/* JMP */
 271 | 	emit_byte(code, 0xEB);			/* Back to the loop */
 272 | 
 273 | 	emit_byte(code, 0x5E);			/* POP SI */
 274 | 	/* CALL print_string */
 275 | 	emit_call(code, PRINTSTR);
 276 | 
 277 | 	/* Put NL in STRBUF and print it */
 278 | 	emit_byte(code, 0xC7);			/* MOV */
 279 | 	emit_byte(code, 0xC0);			/* AX, */
 280 | 	emit_word(code, 0x000A);		/* 0xA */
 281 | 	emit_byte(code, 0x89);			/* MOV */
 282 | 	emit_byte(code, 0x06);			/* [imm], AX */
 283 | 	emit_word(code, STRBUF);
 284 | 	emit_byte(code, 0xC7);			/* MOV */
 285 | 	emit_byte(code, 0xC6);			/* SI, */
 286 | 	emit_word(code, STRBUF);
 287 | 
 288 | 	/* CALL print_string */
 289 | 	emit_call(code, PRINTSTR);
 290 | }
 291 | 
 292 | void compile_getkey(Node* ast, CompileTarget* code)
 293 | {
 294 | 	uint16_t var = VARS + ast->op1->val * 2;
 295 | 
 296 | 	/* CALL os_check_for_key */
 297 | 	emit_call(code, 0x0015);
 298 | 
 299 | 	/* Is it special char? */
 300 | 	emit_byte(code, 0x3D);			/* CMP AX, */
 301 | 	emit_word(code, 0x48E0);		/* 0x48E0 */
 302 | 	emit_byte(code, 0x74);			/* JE */
 303 | 	emit_byte(code, 0x18);			/* To UP (24) */
 304 | 	emit_byte(code, 0x3D);			/* CMP AX, */
 305 | 	emit_word(code, 0x50E0);		/* 0x50E0 */
 306 | 	emit_byte(code, 0x74);			/* JE */
 307 | 	emit_byte(code, 0x19);			/* To DOWN (25) */
 308 | 	emit_byte(code, 0x3D);			/* CMP AX, */
 309 | 	emit_word(code, 0x4BE0);		/* 0x4BE0 */
 310 | 	emit_byte(code, 0x74);			/* JE */
 311 | 	emit_byte(code, 0x1A);			/* To LEFT (26) */
 312 | 	emit_byte(code, 0x3D);			/* CMP AX, */
 313 | 	emit_word(code, 0x4DE0);		/* 0x4DE0 */
 314 | 	emit_byte(code, 0x74);			/* JE */
 315 | 	emit_byte(code, 0x1B);			/* To RIGHT (27) */
 316 | 
 317 | 	/* Store the character */
 318 | 	emit_byte(code, 0x25);			/* AND AX, */
 319 | 	emit_word(code, 0x00FF);		/* 0x00FF */
 320 | 	emit_byte(code, 0x89);			/* MOV */
 321 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 322 | 	emit_word(code, var);
 323 | 	emit_byte(code, 0xEB);			/* JMP */
 324 | 	emit_byte(code, 0x18);			/* Over others (24) */
 325 | 
 326 | 	/* It was UP */
 327 | 	emit_byte(code, 0xC7);			/* MOV */
 328 | 	emit_byte(code, 0xC0);			/* AX, */
 329 | 	emit_word(code, 0x0001);		/* 1 */
 330 | 	emit_byte(code, 0xEB);			/* JMP */
 331 | 	emit_byte(code, 0xF1);			/* To store (-15) */
 332 | 
 333 | 	/* It was DOWN */
 334 | 	emit_byte(code, 0xC7);			/* MOV */
 335 | 	emit_byte(code, 0xC0);			/* AX, */
 336 | 	emit_word(code, 0x0002);		/* 2 */
 337 | 	emit_byte(code, 0xEB);			/* JMP */
 338 | 	emit_byte(code, 0xEB);			/* To store (-21) */
 339 | 
 340 | 	/* It was LEFT */
 341 | 	emit_byte(code, 0xC7);			/* MOV */
 342 | 	emit_byte(code, 0xC0);			/* AX, */
 343 | 	emit_word(code, 0x0003);		/* 3 */
 344 | 	emit_byte(code, 0xEB);			/* JMP */
 345 | 	emit_byte(code, 0xE5);			/* To store (-27) */
 346 | 
 347 | 	/* It was RIGHT */
 348 | 	emit_byte(code, 0xC7);			/* MOV */
 349 | 	emit_byte(code, 0xC0);			/* AX, */
 350 | 	emit_word(code, 0x0004);		/* 4 */
 351 | 	emit_byte(code, 0xEB);			/* JMP */
 352 | 	emit_byte(code, 0xDF);			/* To store (-33) */
 353 | 
 354 | }
 355 | 
 356 | void compile_gosub(Node* ast, CompileTarget* code)
 357 | {
 358 | 	int id = ast->op1->val;
 359 | 
 360 | 	/* Is label even real? */
 361 | 	if (!symbols->table[id].isreal) {
 362 | 		compile_error("GOSUB label not present", ast);
 363 | 		return;
 364 | 	}
 365 | 
 366 | 	/* Label was already compiled */
 367 | 	if (symbols->table[id].addr != 0)
 368 | 		emit_call(code, symbols->table[id].addr);
 369 | 	/* It wasn't */
 370 | 	else {
 371 | 		emit_byte(code, 0xE8);
 372 | 		add_patch(patches, id, code->length);
 373 | 		emit_word(code, 0x0000);
 374 | 	}
 375 | }
 376 | 
 377 | void compile_goto(Node* ast, CompileTarget* code)
 378 | {
 379 | 	int id = ast->op1->val;
 380 | 
 381 | 	/* Is label even real? */
 382 | 	if (!symbols->table[id].isreal) {
 383 | 		compile_error("GOTO label not present", ast);
 384 | 		return;
 385 | 	}
 386 | 
 387 | 	/* Label was already compiled */
 388 | 	if (symbols->table[id].addr != 0)
 389 | 		emit_jump(code, symbols->table[id].addr);
 390 | 	/* It wasn't */
 391 | 	else {
 392 | 		emit_byte(code, 0xE9);
 393 | 		add_patch(patches, id, code->length);
 394 | 		emit_word(code, 0x0000);
 395 | 	}
 396 | }
 397 | 
 398 | void compile_ink(Node* ast, CompileTarget* code)
 399 | {
 400 | 	compile_expression(ast->op1, code);
 401 | 
 402 | 	emit_byte(code, 0x89);			/* MOV */
 403 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 404 | 	emit_word(code, INKADDR);
 405 | }
 406 | 
 407 | void compile_input(Node* ast, CompileTarget* code)
 408 | {
 409 | 	/* Do we want string? */
 410 | 	if (ast->op1->attribute == TOKEN_STRING_VARIABLE) {
 411 | 		uint16_t var = STRVARS + ast->op1->val * 128;
 412 | 		emit_byte(code, 0xC7);		/* MOV */
 413 | 		emit_byte(code, 0xC0);		/* AX, */
 414 | 		emit_word(code, var);		/* var */
 415 | 
 416 | 		/* CALL os_input_string */
 417 | 		emit_call(code, 0x0036);
 418 | 		/* CALL os_print_newline */
 419 | 		emit_call(code, 0x000F);
 420 | 
 421 | 		return;
 422 | 	}
 423 | 
 424 | 	/* No, we want numeric, use buffer */
 425 | 	uint16_t var = VARS + ast->op1->val * 2;
 426 | 
 427 | 	emit_byte(code, 0xC7);			/* MOV */
 428 | 	emit_byte(code, 0xC0);			/* AX, */
 429 | 	emit_word(code, STRBUF);		/* STRBUF */
 430 | 
 431 | 	/* CALL os_input_string */
 432 | 	emit_call(code, 0x0036);
 433 | 
 434 | 	/* Check for empty string */
 435 | 	emit_call(code, 0x002D);
 436 | 	emit_byte(code, 0x85);			/* TEST */
 437 | 	emit_byte(code, 0xC0);			/* AX, AX */
 438 | 	emit_byte(code, 0x75);			/* JNZ */
 439 | 	emit_byte(code, 0x08);			/* Over zeroing it */
 440 | 
 441 | 	/* We need to put "0(NUL)" in buffer */
 442 | 	emit_byte(code, 0xC7);			/* MOV */
 443 | 	emit_byte(code, 0xC0);			/* AX, */
 444 | 	emit_word(code, 0x0030);		/* 0x0030 -> "0\0" */
 445 | 	emit_byte(code, 0x89);			/* MOV */
 446 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 447 | 	emit_word(code, STRBUF);		/* STRBUF */
 448 | 
 449 | 	/* Convert string to number */
 450 | 	emit_byte(code, 0xC7);			/* MOV */
 451 | 	emit_byte(code, 0xC6);			/* SI, */
 452 | 	emit_word(code, STRBUF);		/* STRBUF */
 453 | 	/* CALL os_string_to_int */
 454 | 	emit_call(code, 0x00B1);
 455 | 
 456 | 	/* Store it */
 457 | 	emit_byte(code, 0x89);			/* MOV */
 458 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 459 | 	emit_word(code, var);			/* var */
 460 | 
 461 | 	/* CALL os_print_newline */
 462 | 	emit_call(code, 0x000F);
 463 | }
 464 | 
 465 | void compile_len(Node* ast, CompileTarget* code)
 466 | {
 467 | 	uint16_t var = VARS + ast->op2->val * 2;
 468 | 
 469 | 	/* Compile string first */
 470 | 	compile_expression(ast->op1, code);
 471 | 
 472 | 	/* Calculate its length */
 473 | 	emit_call(code, 0x002D);
 474 | 
 475 | 	/* Store it */
 476 | 	emit_byte(code, 0x89);			/* MOV */
 477 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 478 | 	emit_word(code, var);			/* var */
 479 | }
 480 | 
 481 | void compile_listbox(Node* ast, CompileTarget* code)
 482 | {
 483 | 	uint16_t var = VARS + ast->op2->op2->op2->val * 2;
 484 | 
 485 | 	/* First string to AX */
 486 | 	compile_expression(ast->op1, code);
 487 | 	emit_byte(code, 0x8B);			/* MOV */
 488 | 	emit_byte(code, 0xC6);			/* AX, SI */
 489 | 
 490 | 	/* Second to BX */
 491 | 	compile_expression(ast->op2->op1, code);
 492 | 	emit_byte(code, 0x8B);			/* MOV */
 493 | 	emit_byte(code, 0xDE);			/* BX, SI */
 494 | 
 495 | 	/* Third to CX */
 496 | 	compile_expression(ast->op2->op2->op1, code);
 497 | 	emit_byte(code, 0x8B);			/* MOV */
 498 | 	emit_byte(code, 0xCE);			/* CX, SI */
 499 | 
 500 | 	/* CALL os_list_dialog */
 501 | 	emit_call(code, 0x00AB);
 502 | 
 503 | 	/* Maybe it was ESC? */
 504 | 	emit_byte(code, 0x72);			/* JC */
 505 | 	emit_byte(code, 0x06);			/* Over store */
 506 | 
 507 | 	/* Store the value */
 508 | 	emit_byte(code, 0x89);			/* MOV */
 509 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 510 | 	emit_word(code, var);			/* var */
 511 | 	emit_byte(code, 0xEB);			/* JMP */
 512 | 	emit_byte(code, 0x04);			/* Over ESC */
 513 | 
 514 | 	/* ESC pressed, set AX to zero */
 515 | 	emit_byte(code, 0x33);			/* XOR */
 516 | 	emit_byte(code, 0xC0);			/* AX, AX */
 517 | 	emit_byte(code, 0xEB);			/* JMP */
 518 | 	emit_byte(code, 0xF6);			/* Back to store */
 519 | }
 520 | 
 521 | void compile_load(Node* ast, CompileTarget* code)
 522 | {
 523 | 	uint16_t rvar = VARS + ('r' - 'a') * 2;
 524 | 	uint16_t svar = VARS + ('s' - 'a') * 2;
 525 | 
 526 | 	/* Put load position in CX */
 527 | 	compile_expression(ast->op2, code);
 528 | 	emit_byte(code, 0x8B);			/* MOV */
 529 | 	emit_byte(code, 0xC8);			/* CX, AX */
 530 | 
 531 | 	/* Put filename in AX */
 532 | 	compile_expression(ast->op1, code);
 533 | 	emit_byte(code, 0x8B);			/* MOV */
 534 | 	emit_byte(code, 0xC6);			/* AX, SI */
 535 | 
 536 | 	/* CALL os_load_file */
 537 | 	emit_call(code, 0x0021);
 538 | 
 539 | 	/* First check if file was loaded */
 540 | 	emit_byte(code, 0x72);			/* JC */
 541 | 	emit_byte(code, 0x08);			/* To set R to 1 */
 542 | 
 543 | 	/* Okay, set BX to 0 and S to file size */
 544 | 	emit_byte(code, 0x89);			/* MOV */
 545 | 	emit_byte(code, 0x1E);			/* [imm16], BX */
 546 | 	emit_word(code, svar);			/* svar */
 547 | 	emit_byte(code, 0x33);			/* XOR */
 548 | 	emit_byte(code, 0xD8);			/* BX, BX */
 549 | 	emit_byte(code, 0xEB);			/* JMP */
 550 | 	emit_byte(code, 0x04);			/* Over failure */
 551 | 
 552 | 	/* Set BX to 1 */
 553 | 	emit_byte(code, 0xC7);			/* MOV */
 554 | 	emit_byte(code, 0xC3);			/* BX, */
 555 | 	emit_word(code, 0x0001);		/* 1 */
 556 | 
 557 | 	/* Store BX to R */
 558 | 	emit_byte(code, 0x89);			/* MOV */
 559 | 	emit_byte(code, 0x1E);			/* [imm16], BX */
 560 | 	emit_word(code, rvar);			/* rvar */
 561 | }
 562 | 
 563 | void compile_move(Node* ast, CompileTarget* code)
 564 | {
 565 | 	/* Put row in DX */
 566 | 	compile_expression(ast->op2, code);
 567 | 	emit_byte(code, 0x8B);			/* MOV */
 568 | 	emit_byte(code, 0xD0);			/* DX, AX */
 569 | 
 570 | 	/* Shift DX left, to make room for column */
 571 | 	emit_byte(code, 0xC1);			/* SHL */
 572 | 	emit_byte(code, 0xE2);			/* DX, */
 573 | 	emit_byte(code, 0x08);			/* 8 */
 574 | 
 575 | 	/* Add column to DX, putting it in DL */
 576 | 	compile_expression(ast->op1, code);
 577 | 	emit_byte(code, 0x03);			/* ADD */
 578 | 	emit_byte(code, 0xD0);			/* DX, AX */
 579 | 
 580 | 	/* CALL os_move_cursor */
 581 | 	emit_call(code, 0x0006);
 582 | }
 583 | 
 584 | void compile_number(Node* ast, CompileTarget* code)
 585 | {
 586 | 	bool src = compile_expression(ast->op1, code);
 587 | 
 588 | 	/* Is it numeric to string? */
 589 | 	if (src) {
 590 | 		uint16_t dst = STRVARS + ast->op2->val * 128;
 591 | 
 592 | 		/* CALL os_int_to_string */
 593 | 		emit_call(code, 0x0018);
 594 | 
 595 | 		/* Copy internal buffer into destination */
 596 | 		emit_byte(code, 0x8B);		/* MOV */
 597 | 		emit_byte(code, 0xF0);		/* SI, AX */
 598 | 		emit_byte(code, 0xC7);		/* MOV */
 599 | 		emit_byte(code, 0xC7);		/* DI, */
 600 | 		emit_word(code, dst);		/* dst */
 601 | 
 602 | 		/* CALL os_string_copy */
 603 | 		emit_call(code, 0x0039);
 604 | 	}
 605 | 	/* No, string to numeric */
 606 | 	else {
 607 | 		uint16_t dst = VARS + ast->op2->val * 2;
 608 | 
 609 | 		/* CALL os_string_to_int */
 610 | 		emit_call(code, 0x00B1);
 611 | 
 612 | 		/* Store result to variable */
 613 | 		emit_byte(code, 0x89);		/* MOV */
 614 | 		emit_byte(code, 0x06);		/* [imm16], AX */
 615 | 		emit_word(code, dst);		/* dst */
 616 | 	}
 617 | }
 618 | 
 619 | void compile_page(Node* ast, CompileTarget* code)
 620 | {
 621 | 	/* First value is new work page */
 622 | 	compile_expression(ast->op1, code);
 623 | 	emit_byte(code, 0x89);			/* MOV */
 624 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 625 | 	emit_word(code, WORKPAGE);		/* WORKPAGE */
 626 | 
 627 | 	/* Second value is new active page */
 628 | 	compile_expression(ast->op2, code);
 629 | 	emit_byte(code, 0x89);			/* MOV */
 630 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 631 | 	emit_word(code, ACTIVEPAGE);		/* ACTIVEPAGE */
 632 | 
 633 | 	/* Now set AX to 0x500, OR with ACTIVEPAGE */
 634 | 	emit_byte(code, 0xC7);			/* MOV */
 635 | 	emit_byte(code, 0xC0);			/* AX, */
 636 | 	emit_word(code, 0x0500);		/* 0x500 -> AH = 5 */
 637 | 	emit_byte(code, 0x0B);			/* OR */
 638 | 	emit_byte(code, 0x06);			/* AX, [imm16] */
 639 | 	emit_word(code, ACTIVEPAGE);		/* ACTIVEPAGE */
 640 | 
 641 | 	/* Now all is set for BIOS. Call it */
 642 | 	emit_byte(code, 0xCD);			/* INT */
 643 | 	emit_byte(code, 0x10);			/* 0x10 */
 644 | }
 645 | 
 646 | void compile_pause(Node* ast, CompileTarget* code)
 647 | {
 648 | 	compile_expression(ast->op1, code);
 649 | 	emit_call(code, 0x0024);
 650 | }
 651 | 
 652 | void compile_peek(Node* ast, CompileTarget* code)
 653 | {
 654 | 	uint16_t var = VARS + ast->op1->val * 2;
 655 | 
 656 | 	/* Address will be in AX, put it in BX and load */
 657 | 	compile_expression(ast->op2, code);
 658 | 	emit_byte(code, 0x8B);			/* MOV */
 659 | 	emit_byte(code, 0xD8);			/* BX, AX */
 660 | 	emit_byte(code, 0x8B);			/* MOV */
 661 | 	emit_byte(code, 0x07);			/* AX, [BX] */
 662 | 
 663 | 	/* Now mask the upper byte */
 664 | 	emit_byte(code, 0x25);			/* AND AX, */
 665 | 	emit_word(code, 0x00FF);		/* 0x00FF */
 666 | 
 667 | 	/* Store the result */
 668 | 	emit_byte(code, 0x89);			/* MOV */
 669 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 670 | 	emit_word(code, var);			/* var */
 671 | }
 672 | 
 673 | void compile_peekint(Node* ast, CompileTarget* code)
 674 | {
 675 | 	uint16_t var = VARS + ast->op1->val * 2;
 676 | 
 677 | 	/* Address will be in AX, put it in BX and load */
 678 | 	compile_expression(ast->op2, code);
 679 | 	emit_byte(code, 0x8B);			/* MOV */
 680 | 	emit_byte(code, 0xD8);			/* BX, AX */
 681 | 	emit_byte(code, 0x8B);			/* MOV */
 682 | 	emit_byte(code, 0x07);			/* AX, [BX] */
 683 | 
 684 | 	/* Store the result */
 685 | 	emit_byte(code, 0x89);			/* MOV */
 686 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 687 | 	emit_word(code, var);			/* var */
 688 | }
 689 | 
 690 | void compile_poke(Node* ast, CompileTarget* code)
 691 | {
 692 | 	/* Before we can poke, we need to read one byte more */
 693 | 	compile_expression(ast->op2, code);
 694 | 	emit_byte(code, 0x8B);			/* MOV */
 695 | 	emit_byte(code, 0xD8);			/* BX, AX */
 696 | 	emit_byte(code, 0x8B);			/* MOV */
 697 | 	emit_byte(code, 0x07);			/* AX, [BX] */
 698 | 	emit_byte(code, 0x25);			/* AND AX, */
 699 | 	emit_word(code, 0xFF00);		/* 0xFF00 */
 700 | 
 701 | 	/* Save upper byte in CX */
 702 | 	emit_byte(code, 0x8B);			/* MOV */
 703 | 	emit_byte(code, 0xC8);			/* CX, AX */
 704 | 
 705 | 	/* Get value to poke, and mask upper byte */
 706 | 	compile_expression(ast->op1, code);
 707 | 	emit_byte(code, 0x25);			/* AND AX, */
 708 | 	emit_word(code, 0x00FF);		/* 0x00FF */
 709 | 
 710 | 	/* Add it into CX */
 711 | 	emit_byte(code, 0x03);			/* ADD */
 712 | 	emit_byte(code, 0xC8);			/* CX, AX */
 713 | 
 714 | 	/* Store CX under [BX] */
 715 | 	emit_byte(code, 0x89);			/* MOV */
 716 | 	emit_byte(code, 0x0F);			/* [BX], CX */
 717 | }
 718 | 
 719 | void compile_pokeint(Node* ast, CompileTarget* code)
 720 | {
 721 | 	/* Put address in BX */
 722 | 	compile_expression(ast->op2, code);
 723 | 	emit_byte(code, 0x8B);			/* MOV */
 724 | 	emit_byte(code, 0xD8);			/* BX, AX */
 725 | 
 726 | 	/* Get value to poke, and mask upper byte */
 727 | 	compile_expression(ast->op1, code);
 728 | 
 729 | 	/* Store AX under [BX] */
 730 | 	emit_byte(code, 0x89);			/* MOV */
 731 | 	emit_byte(code, 0x07);			/* [BX], AX */
 732 | }
 733 | 
 734 | void compile_port(Node* ast, CompileTarget* code)
 735 | {
 736 | 	/* First set DX to wanted port */
 737 | 	compile_expression(ast->op2->op1, code);
 738 | 	emit_byte(code, 0x8B);			/* MOV */
 739 | 	emit_byte(code, 0xD0);			/* DX, AX */
 740 | 
 741 | 	/* Do we want to write byte out? */
 742 | 	if (ast->op1->attribute == TOKEN_OUT) {
 743 | 		compile_expression(ast->op2->op2, code);
 744 | 
 745 | 		/* CALL os_port_byte_out */
 746 | 		emit_call(code, 0x00C9);
 747 | 	}
 748 | 	/* No, read it in */
 749 | 	else {
 750 | 		uint16_t var = VARS + ast->op2->op2->val * 2;
 751 | 
 752 | 		/* CALL os_port_byte_in */
 753 | 		emit_call(code, 0x00CC);
 754 | 
 755 | 		/* Mask upper byte (just in case) and store */
 756 | 		emit_byte(code, 0x25);		/* AND AX, */
 757 | 		emit_word(code, 0x00FF);	/* 0x00FF */
 758 | 		emit_byte(code, 0x89);		/* MOV */
 759 | 		emit_byte(code, 0x06);		/* [imm16], AX */
 760 | 		emit_word(code, var);		/* var */
 761 | 	}
 762 | }
 763 | 
 764 | void compile_print(Node* ast, CompileTarget* code)
 765 | {
 766 | 	/* First we need to compile what we want to print */
 767 | 	bool expr = compile_expression(ast->op2->op1, code);
 768 | 
 769 | 	/* Check if modifier is valid */
 770 | 	if (!expr && ast->op1 != NULL) {
 771 | 		compile_error("PRINT modifier used with string", ast);
 772 | 		return;
 773 | 	}
 774 | 
 775 | 	/* We want to print SI, but if it is numeric, we need to
 776 | 	   fill it with what we want */
 777 | 	if (expr && ast->op1 == NULL) {
 778 | 		/* CALL os_int_to_string */
 779 | 		emit_call(code, 0x0018);
 780 | 
 781 | 		/* Put it in SI */
 782 | 		emit_byte(code, 0x8B);		/* MOV */
 783 | 		emit_byte(code, 0xF0);		/* SI, AX */
 784 | 	}
 785 | 	else if (expr && ast->op1->attribute == TOKEN_CHR) {
 786 | 		emit_byte(code, 0x25);		/* AND AX, */
 787 | 		emit_word(code, 0x00FF);	/* 0x00FF */
 788 | 		emit_byte(code, 0x89);		/* MOV */
 789 | 		emit_byte(code, 0x06);		/* [imm], AX */
 790 | 		emit_word(code, STRBUF);
 791 | 
 792 | 		/* Set SI to buffer */
 793 | 		emit_byte(code, 0xC7);		/* MOV */
 794 | 		emit_byte(code, 0xC6);		/* SI, imm */
 795 | 		emit_word(code, STRBUF);
 796 | 	}
 797 | 	else if (expr && ast->op1->attribute == TOKEN_HEX) {
 798 | 		/* First set DX to zero and BX to 16 */
 799 | 		emit_byte(code, 0x33);		/* XOR */
 800 | 		emit_byte(code, 0xD2);		/* DX, DX */
 801 | 		emit_byte(code, 0xC7);		/* MOV */
 802 | 		emit_byte(code, 0xC3);		/* BX, imm */
 803 | 		emit_word(code, 0x0010);	/* 0x10 = 16 */
 804 | 
 805 | 		/* Now set DI to STRBUF */
 806 | 		emit_byte(code, 0xC7);		/* MOV */
 807 | 		emit_byte(code, 0xC7);		/* DI, imm */
 808 | 		emit_word(code, STRBUF);
 809 | 
 810 | 		/* CALL os_long_int_to_string */
 811 | 		emit_call(code, 0x007E);
 812 | 
 813 | 		/* Now set SI to DI */
 814 | 		emit_byte(code, 0x8B);		/* MOV */
 815 | 		emit_byte(code, 0xF7);		/* SI, DI */
 816 | 	}
 817 | 
 818 | 	/* CALL print_string */
 819 | 	emit_call(code, PRINTSTR);
 820 | 
 821 | 	/* If there is no semicolon, print NL */
 822 | 	if (ast->op2->op2 == NULL) {
 823 | 		emit_byte(code, 0xC7);		/* MOV */
 824 | 		emit_byte(code, 0xC0);		/* AX, */
 825 | 		emit_word(code, 0x000A);	/* 0xA */
 826 | 		emit_byte(code, 0x89);		/* MOV */
 827 | 		emit_byte(code, 0x06);		/* [imm], AX */
 828 | 		emit_word(code, STRBUF);
 829 | 		emit_byte(code, 0xC7);		/* MOV */
 830 | 		emit_byte(code, 0xC6);		/* SI, */
 831 | 		emit_word(code, STRBUF);
 832 | 
 833 | 		/* CALL print_string */
 834 | 		emit_call(code, PRINTSTR);
 835 | 	}
 836 | }
 837 | 
 838 | void compile_rand(Node* ast, CompileTarget* code)
 839 | {
 840 | 	uint16_t var = VARS + ast->op1->val * 2;
 841 | 
 842 | 	/* Second value to BX */
 843 | 	compile_expression(ast->op2->op2, code);
 844 | 	emit_byte(code, 0x8B);			/* MOV */
 845 | 	emit_byte(code, 0xD8);			/* BX, AX */
 846 | 
 847 | 	/* First to AX */
 848 | 	compile_expression(ast->op2->op1, code);
 849 | 
 850 | 	/* CALL os_get_random */
 851 | 	emit_call(code, 0x00B7);
 852 | 
 853 | 	/* Store result from CX */
 854 | 	emit_byte(code, 0x89);			/* MOV */
 855 | 	emit_byte(code, 0x0E);			/* [imm16], CX */
 856 | 	emit_word(code, var);
 857 | }
 858 | 
 859 | void compile_rename(Node* ast, CompileTarget* code)
 860 | {
 861 | 	uint16_t rvar = VARS + ('r' - 'a') * 2;
 862 | 
 863 | 	/* First we check if destination exists */
 864 | 	compile_expression(ast->op2, code);
 865 | 	emit_byte(code, 0x8B);			/* MOV */
 866 | 	emit_byte(code, 0xC6);			/* AX, SI */
 867 | 	emit_byte(code, 0x8B);			/* MOV */
 868 | 	emit_byte(code, 0xDE);			/* BX, SI */
 869 | 
 870 | 	/* CALL os_file_exists */
 871 | 	emit_call(code, 0x0099);
 872 | 	emit_byte(code, 0x72);			/* JC */
 873 | 	emit_byte(code, 0x06);			/* Over failure */
 874 | 
 875 | 	/* Destination exists, set R to 3 */
 876 | 	emit_byte(code, 0xC7);			/* MOV */
 877 | 	emit_byte(code, 0xC0);			/* AX, */
 878 | 	emit_word(code, 0x0003);		/* 3 */
 879 | 	emit_byte(code, 0xEB);			/* JMP */
 880 | 	emit_byte(code, 0x00);			/* To store */
 881 | 	uint16_t patch = code->length - 1;
 882 | 
 883 | 	/* Now check source */
 884 | 	compile_expression(ast->op1, code);
 885 | 	emit_byte(code, 0x8B);			/* MOV */
 886 | 	emit_byte(code, 0xC6);			/* AX, SI */
 887 | 
 888 | 	/* CALL os_file_exists */
 889 | 	emit_call(code, 0x0099);
 890 | 	emit_byte(code, 0x72);			/* JC */
 891 | 	emit_byte(code, 0x09);			/* To set R to 1 */
 892 | 
 893 | 	/* Rename proper, CALL os_rename_file */
 894 | 	emit_call(code, 0x00A2);
 895 | 	emit_byte(code, 0x72);			/* JC */
 896 | 	emit_byte(code, 0x0A);			/* To set R to 2 */
 897 | 
 898 | 	/* No errors, set R to 0 */
 899 | 	emit_byte(code, 0x33);			/* XOR */
 900 | 	emit_byte(code, 0xC0);			/* AX, AX */
 901 | 	emit_byte(code, 0xEB);			/* JMP */
 902 | 	emit_byte(code, 0x0A);			/* To store (+10) */
 903 | 
 904 | 	/* Source not present, set R to 1 */
 905 | 	emit_byte(code, 0xC7);			/* MOV */
 906 | 	emit_byte(code, 0xC0);			/* AX, */
 907 | 	emit_word(code, 0x0001);		/* 1 */
 908 | 	emit_byte(code, 0xEB);			/* JMP */
 909 | 	emit_byte(code, 0x04);			/* To store (+4) */
 910 | 
 911 | 	/* Rename failed, set R to 2 */
 912 | 	emit_byte(code, 0xC7);			/* MOV */
 913 | 	emit_byte(code, 0xC0);			/* AX, */
 914 | 	emit_word(code, 0x0002);		/* 2 */
 915 | 
 916 | 	/* Store AX to R (and patch up) */
 917 | 	uint8_t rel = code->length - (patch + 1);
 918 | 	code->code[patch] = rel & 0xFF;
 919 | 	emit_byte(code, 0x89);			/* MOV */
 920 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 921 | 	emit_word(code, rvar);
 922 | }
 923 | 
 924 | void compile_return(Node* ast, CompileTarget* code)
 925 | {
 926 | 	ast->op1 = NULL;			/* Shut up */
 927 | 	emit_byte(code, 0xC3);			/* RET */
 928 | }
 929 | 
 930 | void compile_save(Node* ast, CompileTarget* code)
 931 | {
 932 | 	uint16_t rvar = VARS + ('r' - 'a') * 2;
 933 | 
 934 | 	/* Put load address in BX */
 935 | 	compile_expression(ast->op2->op1, code);
 936 | 	emit_byte(code, 0x8B);			/* MOV */
 937 | 	emit_byte(code, 0xD8);			/* BX, AX */
 938 | 
 939 | 	/* Put file size in CX */
 940 | 	compile_expression(ast->op2->op2, code);
 941 | 	emit_byte(code, 0x8B);			/* MOV */
 942 | 	emit_byte(code, 0xC8);			/* CX, AX */
 943 | 
 944 | 	/* Put filename in AX */
 945 | 	compile_expression(ast->op1, code);
 946 | 	emit_byte(code, 0x8B);			/* MOV */
 947 | 	emit_byte(code, 0xC6);			/* AX, SI */
 948 | 
 949 | 	/* CALL os_file_exists */
 950 | 	emit_call(code, 0x0099);
 951 | 	emit_byte(code, 0x72);			/* JC */
 952 | 	emit_byte(code, 0x02);			/* Proceed */
 953 | 	emit_byte(code, 0xEB);			/* JMP */
 954 | 	emit_byte(code, 0x09);			/* File exists */
 955 | 
 956 | 	/* All set, CALL os_write_file */
 957 | 	emit_call(code, 0x0096);
 958 | 	emit_byte(code, 0x72);			/* JC */
 959 | 	emit_byte(code, 0x0A);			/* Couldn't save */
 960 | 
 961 | 	/* All is good, set AX to 0 */
 962 | 	emit_byte(code, 0x33);			/* XOR */
 963 | 	emit_byte(code, 0xC0);			/* AX, AX */
 964 | 	emit_byte(code, 0xEB);			/* JMP */
 965 | 	emit_byte(code, 0x0A);			/* To store */
 966 | 
 967 | 	/* File exists, set AX to 2 */
 968 | 	emit_byte(code, 0xC7);			/* MOV */
 969 | 	emit_byte(code, 0xC0);			/* AX, */
 970 | 	emit_word(code, 0x0002);		/* 2 */
 971 | 	emit_byte(code, 0xEB);			/* JMP */
 972 | 	emit_byte(code, 0x04);			/* To store */
 973 | 
 974 | 	/* Cannot save, set AX to 1 */
 975 | 	emit_byte(code, 0xC7);			/* MOV */
 976 | 	emit_byte(code, 0xC0);			/* AX, */
 977 | 	emit_word(code, 0x0001);		/* 1 */
 978 | 
 979 | 	/* Store AX to R */
 980 | 	emit_byte(code, 0x89);			/* MOV */
 981 | 	emit_byte(code, 0x06);			/* [imm16], AX */
 982 | 	emit_word(code, rvar);			/* rvar */
 983 | }
 984 | 
 985 | void compile_serial(Node* ast, CompileTarget* code)
 986 | {
 987 | 	/* Turn serial on */
 988 | 	if (ast->op1->attribute == TOKEN_ON) {
 989 | 		int mode = ast->op2->val;
 990 | 
 991 | 		/* Slow mode */
 992 | 		if (mode == 1200) {
 993 | 			emit_byte(code, 0x33);	/* XOR */
 994 | 			emit_byte(code, 0xC0);	/* AX, AX */
 995 | 		}
 996 | 		/* Fast mode */
 997 | 		else if (mode == 9600) {
 998 | 			emit_byte(code, 0xC7);	/* MOV */
 999 | 			emit_byte(code, 0xC0);	/* AX, */
1000 | 			emit_word(code, 0x0001);/* 1 */
1001 | 		}
1002 | 		/* Invalid mode, compile error */
1003 | 		else {
1004 | 			compile_error("Invalid mode for SERIAL",
1005 | 				ast);
1006 | 		}
1007 | 
1008 | 		/* CALL os_serial_port_enable */
1009 | 		emit_call(code, 0x00BD);
1010 | 		return;
1011 | 	}
1012 | 
1013 | 	/* Send value through serial */
1014 | 	if (ast->op1->attribute == TOKEN_SEND) {
1015 | 		compile_expression(ast->op2, code);
1016 | 
1017 | 		/* CALL os_send_via_serial */
1018 | 		emit_call(code, 0x0060);
1019 | 
1020 | 		return;
1021 | 	}
1022 | 
1023 | 	/* We want to receive byte */
1024 | 	uint16_t var = VARS + ast->op2->val * 2;
1025 | 
1026 | 	/* CALL os_get_via_serial */
1027 | 	emit_call(code, 0x0063);
1028 | 
1029 | 	/* Mask upper byte and store */
1030 | 	emit_byte(code, 0x25);			/* AND AX, */
1031 | 	emit_word(code, 0x00FF);		/* 0x00FF */
1032 | 	emit_byte(code, 0x89);			/* MOV */
1033 | 	emit_byte(code, 0x06);			/* [imm16], AX */
1034 | 	emit_word(code, var);			/* var */
1035 | }
1036 | 
1037 | void compile_size(Node* ast, CompileTarget* code)
1038 | {
1039 | 	uint16_t rvar = VARS + ('r' - 'a') * 2;
1040 | 	uint16_t svar = VARS + ('s' - 'a') * 2;
1041 | 
1042 | 	/* Store filename to AX */
1043 | 	compile_expression(ast->op1, code);
1044 | 	emit_byte(code, 0x8B);			/* MOV */
1045 | 	emit_byte(code, 0xC6);			/* AX, SI */
1046 | 
1047 | 	/* CALL os_get_file_size */
1048 | 	emit_call(code, 0x00A5);
1049 | 	emit_byte(code, 0x72);			/* JC */
1050 | 	emit_byte(code, 0x08);			/* To failure */
1051 | 
1052 | 	/* Okay, store size (BX) to S and zero it */
1053 | 	emit_byte(code, 0x89);			/* MOV */
1054 | 	emit_byte(code, 0x1E);			/* [imm16], BX */
1055 | 	emit_word(code, svar);			/* svar */
1056 | 	emit_byte(code, 0x33);			/* XOR */
1057 | 	emit_byte(code, 0xDB);			/* BX, BX */
1058 | 	emit_byte(code, 0xEB);			/* JMP */
1059 | 	emit_byte(code, 0x04);			/* Over failure */
1060 | 
1061 | 	/* No such file found, set BX to 1 */
1062 | 	emit_byte(code, 0xC7);			/* MOV */
1063 | 	emit_byte(code, 0xC3);			/* BX, */
1064 | 	emit_word(code, 0x0001);		/* 1 */
1065 | 
1066 | 	/* Store BX to R */
1067 | 	emit_byte(code, 0x89);			/* MOV */
1068 | 	emit_byte(code, 0x1E);			/* [imm16], BX */
1069 | 	emit_word(code, rvar);			/* rvar */
1070 | }
1071 | 
1072 | void compile_sound(Node* ast, CompileTarget* code)
1073 | {
1074 | 	/* Frequency to AX */
1075 | 	compile_expression(ast->op1, code);
1076 | 
1077 | 	/* CALL os_speaker_tone */
1078 | 	emit_call(code, 0x001B);
1079 | 
1080 | 	/* Duration to AX */
1081 | 	compile_expression(ast->op2, code);
1082 | 
1083 | 	/* CALL os_pause and CALL os_speaker_off */
1084 | 	emit_call(code, 0x0024);
1085 | 	emit_call(code, 0x001E);
1086 | }
1087 | 
1088 | void compile_string(Node* ast, CompileTarget* code)
1089 | {
1090 | 	uint16_t var = VARS + ast->op2->op2->op2->val * 2;
1091 | 
1092 | 	/* String in SI */
1093 | 	compile_expression(ast->op2->op1, code);
1094 | 	/* Offset in AX */
1095 | 	compile_expression(ast->op2->op2->op1, code);
1096 | 
1097 | 	/* Offsets start from 1, not zero */
1098 | 	emit_byte(code, 0x03);			/* ADD */
1099 | 	emit_byte(code, 0xF0);			/* SI, AX */
1100 | 	emit_byte(code, 0xFF);			/* DEC */
1101 | 	emit_byte(code, 0xCE);			/* SI */
1102 | 
1103 | 	/* GET it with LODSB */
1104 | 	if (ast->op1->attribute == TOKEN_GET) {
1105 | 		/* Mask upper byte of AX */
1106 | 		emit_byte(code, 0xAC);		/* LODSB */
1107 | 		emit_byte(code, 0x25);		/* AND AX, */
1108 | 		emit_word(code, 0x00FF);
1109 | 
1110 | 		/* And store it back */
1111 | 		emit_byte(code, 0x89);		/* MOV */
1112 | 		emit_byte(code, 0x06);		/* [imm16], AX */
1113 | 		emit_word(code, var);		/* var */
1114 | 	}
1115 | 	/* Set it with STOSB */
1116 | 	else {
1117 | 		/* Put SI to DI first */
1118 | 		emit_byte(code, 0x8B);		/* MOV */
1119 | 		emit_byte(code, 0xFE);		/* DI, SI */
1120 | 
1121 | 		/* Compile variable now */
1122 | 		compile_expression(ast->op2->op2->op2, code);
1123 | 
1124 | 		/* Set byte and all is done */
1125 | 		emit_byte(code, 0xAA);		/* STOSB */
1126 | 	}
1127 | }
1128 | 
1129 | void compile_waitkey(Node* ast, CompileTarget* code)
1130 | {
1131 | 	uint16_t var = VARS + ast->op1->val * 2;
1132 | 
1133 | 	/* CALL os_wait_for_key */
1134 | 	emit_call(code, 0x0012);
1135 | 
1136 | 	/* Is it special char? */
1137 | 	emit_byte(code, 0x3D);			/* CMP AX, */
1138 | 	emit_word(code, 0x48E0);		/* 0x48E0 */
1139 | 	emit_byte(code, 0x74);			/* JE */
1140 | 	emit_byte(code, 0x18);			/* To UP (24) */
1141 | 	emit_byte(code, 0x3D);			/* CMP AX, */
1142 | 	emit_word(code, 0x50E0);		/* 0x50E0 */
1143 | 	emit_byte(code, 0x74);			/* JE */
1144 | 	emit_byte(code, 0x19);			/* To DOWN (25) */
1145 | 	emit_byte(code, 0x3D);			/* CMP AX, */
1146 | 	emit_word(code, 0x4BE0);		/* 0x4BE0 */
1147 | 	emit_byte(code, 0x74);			/* JE */
1148 | 	emit_byte(code, 0x1A);			/* To LEFT (26) */
1149 | 	emit_byte(code, 0x3D);			/* CMP AX, */
1150 | 	emit_word(code, 0x4DE0);		/* 0x4DE0 */
1151 | 	emit_byte(code, 0x74);			/* JE */
1152 | 	emit_byte(code, 0x1B);			/* To RIGHT (27) */
1153 | 
1154 | 	/* Store the character */
1155 | 	emit_byte(code, 0x25);			/* AND AX, */
1156 | 	emit_word(code, 0x00FF);		/* 0x00FF */
1157 | 	emit_byte(code, 0x89);			/* MOV */
1158 | 	emit_byte(code, 0x06);			/* [imm16], AX */
1159 | 	emit_word(code, var);
1160 | 	emit_byte(code, 0xEB);			/* JMP */
1161 | 	emit_byte(code, 0x18);			/* Over others (24) */
1162 | 
1163 | 	/* It was UP */
1164 | 	emit_byte(code, 0xC7);			/* MOV */
1165 | 	emit_byte(code, 0xC0);			/* AX, */
1166 | 	emit_word(code, 0x0001);		/* 1 */
1167 | 	emit_byte(code, 0xEB);			/* JMP */
1168 | 	emit_byte(code, 0xF1);			/* To store (-15) */
1169 | 
1170 | 	/* It was DOWN */
1171 | 	emit_byte(code, 0xC7);			/* MOV */
1172 | 	emit_byte(code, 0xC0);			/* AX, */
1173 | 	emit_word(code, 0x0002);		/* 2 */
1174 | 	emit_byte(code, 0xEB);			/* JMP */
1175 | 	emit_byte(code, 0xEB);			/* To store (-21) */
1176 | 
1177 | 	/* It was LEFT */
1178 | 	emit_byte(code, 0xC7);			/* MOV */
1179 | 	emit_byte(code, 0xC0);			/* AX, */
1180 | 	emit_word(code, 0x0003);		/* 3 */
1181 | 	emit_byte(code, 0xEB);			/* JMP */
1182 | 	emit_byte(code, 0xE5);			/* To store (-27) */
1183 | 
1184 | 	/* It was RIGHT */
1185 | 	emit_byte(code, 0xC7);			/* MOV */
1186 | 	emit_byte(code, 0xC0);			/* AX, */
1187 | 	emit_word(code, 0x0004);		/* 4 */
1188 | 	emit_byte(code, 0xEB);			/* JMP */
1189 | 	emit_byte(code, 0xDF);			/* To store (-33) */
1190 | }
1191 | 
1192 | /* ========================= MAIN COMPILATION CODE ========================== */
1193 | typedef void (*KeywordCompileFuncPtr)(Node*, CompileTarget*);
1194 | static KeywordCompileFuncPtr compiler[] = {
1195 | 	[TOKEN_ALERT] = compile_alert,
1196 | 	[TOKEN_ASKFILE] = compile_askfile,
1197 | 	[TOKEN_BREAK] = compile_break,
1198 | 	[TOKEN_CALL] = compile_call,
1199 | 	[TOKEN_CASE] = compile_case,
1200 | 	[TOKEN_CLS] = compile_cls,
1201 | 	[TOKEN_CURSOR] = compile_cursor,
1202 | 	[TOKEN_CURSCHAR] = compile_curschar,
1203 | 	[TOKEN_CURSCOL] = compile_curscol,
1204 | 	[TOKEN_CURSPOS] = compile_curspos,
1205 | 	[TOKEN_DELETE] = compile_delete,
1206 | 	[TOKEN_END] = compile_end,
1207 | 	[TOKEN_FILES] = compile_files,
1208 | 	[TOKEN_GETKEY] = compile_getkey,
1209 | 	[TOKEN_GOSUB] = compile_gosub,
1210 | 	[TOKEN_GOTO] = compile_goto,
1211 | 	[TOKEN_INCLUDE] = NULL,
1212 | 	[TOKEN_INK] = compile_ink,
1213 | 	[TOKEN_INPUT] = compile_input,
1214 | 	[TOKEN_LEN] = compile_len,
1215 | 	[TOKEN_LISTBOX] = compile_listbox,
1216 | 	[TOKEN_LOAD] = compile_load,
1217 | 	[TOKEN_MOVE] = compile_move,
1218 | 	[TOKEN_NUMBER] = compile_number,
1219 | 	[TOKEN_PAGE] = compile_page,
1220 | 	[TOKEN_PAUSE] = compile_pause,
1221 | 	[TOKEN_PEEK] = compile_peek,
1222 | 	[TOKEN_PEEKINT] = compile_peekint,
1223 | 	[TOKEN_POKE] = compile_poke,
1224 | 	[TOKEN_POKEINT] = compile_pokeint,
1225 | 	[TOKEN_PORT] = compile_port,
1226 | 	[TOKEN_PRINT] = compile_print,
1227 | 	[TOKEN_RAND] = compile_rand,
1228 | 	[TOKEN_READ] = NULL,
1229 | 	[TOKEN_RENAME] = compile_rename,
1230 | 	[TOKEN_RETURN] = compile_return,
1231 | 	[TOKEN_SAVE] = compile_save,
1232 | 	[TOKEN_SERIAL] = compile_serial,
1233 | 	[TOKEN_SIZE] = compile_size,
1234 | 	[TOKEN_SOUND] = compile_sound,
1235 | 	[TOKEN_STRING] = compile_string,
1236 | 	[TOKEN_WAITKEY] = compile_waitkey
1237 | };
1238 | 
1239 | void compile_keyword(Node* ast, CompileTarget* code)
1240 | {
1241 | 	TokenType t = ast->attribute;
1242 | 	KeywordCompileFuncPtr rule = compiler[t];
1243 | 
1244 | 	/* Compile our keyword */
1245 | 	rule(ast, code);
1246 | }
1247 | 
1248 | void init_kword_compiler(StringTable* s, SymbolTable* t, PatchTable* p)
1249 | {
1250 | 	strings = s;
1251 | 	symbols = t;
1252 | 	patches = p;
1253 | }
1254 | 


--------------------------------------------------------------------------------