├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── lib ├── binary │ └── shift.asm └── math │ ├── divide.asm │ └── multiply.asm ├── pi.asm ├── pi.bin └── src ├── assembler ├── codegen.rs ├── mod.rs ├── parser │ ├── ast.rs │ ├── grammar.md │ ├── lexer.rs │ ├── mod.rs │ └── syntax_ext │ │ ├── auto_address.rs │ │ ├── constants.rs │ │ ├── imports.rs │ │ ├── labels.rs │ │ ├── mod.rs │ │ └── subroutines.rs └── util.rs ├── machine.rs ├── main.rs └── vm └── mod.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [root] 2 | name = "tiny-asm" 3 | version = "0.0.1" 4 | dependencies = [ 5 | "ansi_term 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 6 | "docopt 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)", 7 | "docopt_macros 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)", 8 | "env_logger 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 9 | "lazy_static 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", 10 | "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", 11 | "rand 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 12 | "rustc-serialize 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 13 | ] 14 | 15 | [[package]] 16 | name = "ansi_term" 17 | version = "0.5.0" 18 | source = "registry+https://github.com/rust-lang/crates.io-index" 19 | 20 | [[package]] 21 | name = "docopt" 22 | version = "0.6.43" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | dependencies = [ 25 | "libc 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", 26 | "regex 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", 27 | "rustc-serialize 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", 28 | ] 29 | 30 | [[package]] 31 | name = "docopt_macros" 32 | version = "0.6.43" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | dependencies = [ 35 | "docopt 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)", 36 | ] 37 | 38 | [[package]] 39 | name = "env_logger" 40 | version = "0.2.2" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | dependencies = [ 43 | "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", 44 | "regex 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", 45 | ] 46 | 47 | [[package]] 48 | name = "lazy_static" 49 | version = "0.1.7" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | 52 | [[package]] 53 | name = "libc" 54 | version = "0.1.2" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | 57 | [[package]] 58 | name = "log" 59 | version = "0.2.5" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | 62 | [[package]] 63 | name = "rand" 64 | version = "0.1.3" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | dependencies = [ 67 | "libc 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", 68 | "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", 69 | ] 70 | 71 | [[package]] 72 | name = "regex" 73 | version = "0.1.16" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | 76 | [[package]] 77 | name = "rustc-serialize" 78 | version = "0.3.0" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | 81 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | 3 | name = "tiny-asm" 4 | version = "0.0.1" 5 | authors = ["Markus Siemens "] 6 | 7 | [[bin]] 8 | name = "tiny" 9 | path = "src/main.rs" 10 | 11 | 12 | [profile.release] 13 | opt-level = 3 14 | lto = true 15 | 16 | [dependencies] 17 | ansi_term = "*" 18 | docopt = "*" 19 | docopt_macros = "*" 20 | env_logger = "*" 21 | lazy_static = "*" 22 | log = "*" 23 | rand = "*" 24 | rustc-serialize = "*" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rust-tinyasm 2 | 3 | A Rust port of my Python submission for http://redd.it/1kqxz9: 4 | 5 | > Tiny, a very simple fictional computer architecture, is programmed by an assembly language that has 16 mnemonics, with 37 unique op-codes. The system is based on Harvard architecture, and is very straight-forward: program memory is different from working memory, the machine only executes one instruction at a time, memory is an array of bytes from index 0 to index 255 (inclusive), and doesn't have any relative addressing modes. 6 | > 7 | Your goal will be to write an assembler for Tiny: though you don't need to simulate the code or machine components, you must take given assembly-language source code and produce a list of hex op-codes. You are essentially writing code that converts the lowest human-readable language to machine-readable language! 8 | 9 | My original Python submission can be found here: 10 | https://github.com/msiemens/TINY.ASM/. This is a Rust port. It features 11 | a much better architecture, including a proper parser and abstract syntax tree. 12 | Like the Python version, this also comes with a small VM. 13 | 14 | ## Usage 15 | 16 | Run the assembler: 17 | 18 | $ tiny asm 19 | 20 | Create a binary file that the VM can execute: 21 | 22 | $ tiny asm --bin 23 | 24 | Run the VM: 25 | 26 | $ tiny vm 27 | 28 | 29 | ## Syntax (+ Additions) 30 | 31 | v--- operation 32 | MOV [0] 1 33 | ^ ^---- literal 34 | |------- memory address 35 | 36 | 37 | **Comments** 38 | 39 | ; This is a comment 40 | 41 | **Labels** 42 | 43 | label: 44 | JMP :label 45 | 46 | **Constants** 47 | 48 | $mem_addr = [0] 49 | $some_const = 5 50 | 51 | MOV $mem_addr $some_const 52 | 53 | **Imports** 54 | 55 | #import file_name.asm 56 | 57 | **Char Constants** 58 | 59 | APRINT '!' ; Prints: ! 60 | APRINT '\n' ; Prints a newline 61 | 62 | **Subroutines** 63 | 64 | ; Define a subroutine 65 | ; name ----v v---- number of arguments 66 | @start(binary_shift_left, 1) 67 | ADD $arg0 $arg0 68 | MOV $return $arg0 69 | @end 70 | 71 | ; Call a subroutine 72 | @call(binary_shift_left, 5) 73 | @call(binary_shift_left, [5]) 74 | 75 | 76 | ## LICENSE 77 | 78 | The MIT License (MIT) 79 | 80 | Copyright (c) 2014 Markus Siemens 81 | 82 | Permission is hereby granted, free of charge, to any person obtaining a copy of 83 | this software and associated documentation files (the "Software"), to deal in 84 | the Software without restriction, including without limitation the rights to 85 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 86 | the Software, and to permit persons to whom the Software is furnished to do so, 87 | subject to the following conditions: 88 | 89 | The above copyright notice and this permission notice shall be included in all 90 | copies or substantial portions of the Software. 91 | 92 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 93 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 94 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 95 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 96 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 97 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /lib/binary/shift.asm: -------------------------------------------------------------------------------- 1 | ; Define constants 2 | $shift_r_and = [_] 3 | $shift_r_cmp = [_] 4 | $shift_r_bit2 = 2 5 | $shift_r_bit3 = 4 6 | $shift_r_bit4 = 8 7 | $shift_r_bit5 = 16 8 | $shift_r_bit6 = 32 9 | $shift_r_bit7 = 64 10 | $shift_r_bit8 = 128 11 | 12 | 13 | ; SUBROUTINE: Shift left 14 | ; ---------------------- 15 | 16 | ; Input: $arg0 as integer 17 | ; Output: $return's the integer shifted left 18 | @start(binary_shift_left, 1) 19 | ADD $arg0 $arg0 20 | MOV $return $arg0 21 | @end() 22 | 23 | 24 | ; SUBROUTINE: Shift right 25 | ; ----------------------- 26 | 27 | ; Input: $arg0 as integer 28 | ; Output: $return's the integer shifted right 29 | ; Algorithm: We check every bit, and if it is set, we add bit_val/2 30 | ; to the result: 31 | ; Input: 100 ← bit(3) is set, value: 4, add 4/2=2 32 | ; Output: 010 33 | @start(binary_shift_right, 1) 34 | MOV $return 0 ; Initialize memory 35 | 36 | ; shift_r_bit2: 37 | MOV $shift_r_cmp $arg0 38 | AND $shift_r_cmp $shift_r_bit2 39 | JEQ :shift_r_bit3 $shift_r_cmp 0 ; v & 2 == 0 → skip 40 | ADD $return 1 ; Add 2 / 2 = 1 41 | 42 | shift_r_bit3: 43 | MOV $shift_r_cmp $arg0 44 | AND $shift_r_cmp $shift_r_bit3 45 | JEQ :shift_r_bit4 $shift_r_cmp 0 ; v & 4 == 0 → skip 46 | ADD $return 2 ; Add 4 / 2 = 2 47 | 48 | shift_r_bit4: 49 | MOV $shift_r_cmp $arg0 50 | AND $shift_r_cmp $shift_r_bit4 51 | JEQ :shift_r_bit5 $shift_r_cmp 0 ; v & 8 == 0 → skip 52 | ADD $return 4 ; Add 8 / 2 = 4 53 | 54 | shift_r_bit5: 55 | MOV $shift_r_cmp $arg0 56 | AND $shift_r_cmp $shift_r_bit5 57 | JEQ :shift_r_bit6 $shift_r_cmp 0 ; v & 16 == 0 → skip 58 | ADD $return 8 ; Add 16 / 2 = 8 59 | 60 | shift_r_bit6: 61 | MOV $shift_r_cmp $arg0 62 | AND $shift_r_cmp $shift_r_bit6 63 | JEQ :shift_r_bit7 $shift_r_cmp 0 ; v & 32 == 0 → skip 64 | ADD $return 16 ; Add 32 / 2 = 16 65 | 66 | shift_r_bit7: 67 | MOV $shift_r_cmp $arg0 68 | AND $shift_r_cmp $shift_r_bit7 69 | JEQ :shift_r_bit8 $shift_r_cmp 0 ; v & 64 == 0 → skip 70 | ADD $return 32 ; Add 64 / 2 = 32 71 | 72 | shift_r_bit8: 73 | MOV $shift_r_cmp $arg0 74 | AND $shift_r_cmp $shift_r_bit8 75 | JEQ :shift_r_return $shift_r_cmp 0 ; v & 128 == 0 → skip 76 | ADD $return 64 ; Add 128 / 2 = 64 77 | 78 | shift_r_return: 79 | @end() 80 | -------------------------------------------------------------------------------- /lib/math/divide.asm: -------------------------------------------------------------------------------- 1 | 2 | 3 | ; SUBROUTINE: Divide two integers 4 | ; --------------------------------- 5 | 6 | ; Input: $arg0: dividend as int, $arg1: divisor as int 7 | ; Output: $return's the arg0/arg1 as int division 8 | ; Algorithm: TODO 9 | 10 | @start(divide, 2) 11 | math_div_loop: 12 | ; arg0 < arg1 → break 13 | JLS :math_div_done $arg0 $arg1 14 | ADD $return 1 15 | SUB $arg0 $arg1 16 | JMP :math_div_loop ; Loop iteration 17 | 18 | math_div_done: 19 | @end() -------------------------------------------------------------------------------- /lib/math/multiply.asm: -------------------------------------------------------------------------------- 1 | ; Define constants 2 | $math_mul_counter = [_] 3 | 4 | 5 | ; SUBROUTINE: Multiply two integers 6 | ; --------------------------------- 7 | 8 | ; Input: $arg1 & $arg2 as two integers 9 | ; Output: $return's the multiplication of the two 10 | ; Algorithm: Sum arg1 arg0' times 11 | @start(multiply, 2) 12 | math_mul_loop: 13 | ; counter == arg1 → break 14 | JEQ :math_mul_done $arg1 $math_mul_counter 15 | ADD $math_mul_counter 1 16 | ADD $return $arg0 17 | JMP :math_mul_loop ; Loop iteration 18 | 19 | math_mul_done: 20 | @end() -------------------------------------------------------------------------------- /pi.asm: -------------------------------------------------------------------------------- 1 | ; Approximate PI 2 | ; -------------- 3 | ; 4 | ; by Markus Siemens 5 | 6 | ; Define constants 7 | $max_rand_square = 144 ; (RAND_MAX/2) ** 2 8 | 9 | ; Approximate PI 10 | $pi_iterations = 100 ; Iteration count 11 | $pi_rand_divider = 2 ; Divide the RANDOM numbers by this, so we don't overflow 12 | $pi_counter = [_] ; Loop counter 13 | $pi_rand0 = [_] ; First RANDOM number 14 | $pi_rand1 = [_] ; Second RANDOM number 15 | $pi_rand_sum = [_] 16 | $pi_inside = [_] ; Number of dots inside the circle 17 | 18 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 19 | 20 | main: 21 | MOV $pi_counter 0 ; Initialize memory 22 | 23 | main_loop: ; The main loop 24 | ; Loop break condition: $pi_counter == $pi_iterations 25 | JEQ :print $pi_counter $pi_iterations 26 | APRINT '.' 27 | MOV $pi_rand_sum 0 ; Reset sum of rand0^2 and rand1^2 28 | 29 | ; Get random numbers 30 | RANDOM $pi_rand0 31 | @call(divide, $pi_rand0, $pi_rand_divider) ; $pi_rand0 ^ 2 would overflow otherwise 32 | MOV $pi_rand0 $return 33 | 34 | RANDOM $pi_rand1 35 | @call(divide, $pi_rand1, $pi_rand_divider) ; $pi_rand1 ^ 2 would overflow otherwise 36 | MOV $pi_rand1 $return 37 | 38 | @call(multiply, $pi_rand0, $pi_rand0) 39 | MOV $pi_rand0 $return 40 | 41 | @call(multiply, $pi_rand1, $pi_rand1) 42 | MOV $pi_rand1 $return 43 | 44 | ADD $pi_rand_sum $pi_rand0 ; Add $pi_rand0^2 and $pi_rand1^2 45 | ADD $pi_rand_sum $pi_rand1 46 | 47 | ; If $pi_rand_sum > $MAX_RAND_SQUARE, GOTO FI 48 | JGT :pi_fi_indot $pi_rand_sum $max_rand_square 49 | ADD $pi_inside 1 50 | 51 | pi_fi_indot: 52 | 53 | ; If pi_counter_0 == 255 54 | ADD $pi_counter 1 55 | JMP :main_loop ; Next loop iteration 56 | 57 | print: ; SUBROUTINE 58 | ; Calculate PI using 'inside / total * 4' as float 59 | APRINT '\n' 60 | DPRINT $pi_inside 61 | APRINT '/' 62 | DPRINT $pi_iterations 63 | APRINT '*' 64 | DPRINT 4 65 | 66 | JMP :end 67 | 68 | 69 | end: 70 | ; SUBROUTINE 71 | ; End the programm execution 72 | HALT 73 | 74 | #import 75 | #import -------------------------------------------------------------------------------- /pi.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msiemens/rust-tinyasm/1549d75260b9d523d4ecebd2033b098ef5e5f156/pi.bin -------------------------------------------------------------------------------- /src/assembler/codegen.rs: -------------------------------------------------------------------------------- 1 | use machine::InstructionManager; 2 | use machine::Argument as ArgumentType; 3 | use assembler::parser::ast::{Statement, StatementNode, Argument, Mnemonic}; 4 | 5 | 6 | pub fn generate_binary(ast: Vec) -> Vec> { 7 | let mut binary = vec![]; 8 | let im = InstructionManager::new(); 9 | 10 | for stmt in ast.iter() { 11 | if let Statement::Operation(ref mnem, ref args) = stmt.value { 12 | // Get the requested mnemonic 13 | let Mnemonic(instr) = mnem.clone(); 14 | 15 | // Get the argument types we received 16 | let arg_types: Vec = args.iter().map(|ref arg| { 17 | match arg.value { 18 | Argument::Literal(_) | Argument::Char(_) => { 19 | ArgumentType::Literal 20 | }, 21 | Argument::Address(_) => { 22 | ArgumentType::Address 23 | }, 24 | _ => fatal!("unprocessed argument: {}", arg; arg) 25 | } 26 | }).collect(); 27 | 28 | // Find the opcode matching the given argument types 29 | let instr_class = im.lookup_operations(&instr); 30 | let op = instr_class.iter().find(|op| { 31 | op.arg_types == arg_types 32 | }).unwrap_or_else(|| { 33 | // Build allowed arguments string 34 | let allowed_arg_types = instr_class.iter() 35 | .cloned() 36 | .map(|i| format!("{:?}", i.arg_types)) 37 | .collect::>() 38 | .connect(" or "); 39 | 40 | fatal!("invalid arguments for {:?}: found {:?}, allowed: {:?}", 41 | instr, arg_types, allowed_arg_types; stmt) 42 | }); 43 | 44 | // Finally, write the opcode 45 | let mut binary_stmt = vec![op.opcode]; 46 | binary_stmt.extend(args.iter().map(|arg| { 47 | match arg.value { 48 | Argument::Literal(i) => i, 49 | Argument::Char(c) => c, 50 | Argument::Address(a) => a.unwrap(), 51 | // Shouldn't happen as we check this in arg_types 52 | _ => fatal!("unprocessed argument: {}", arg; arg) 53 | } 54 | })); 55 | 56 | binary.push(binary_stmt); 57 | } else { 58 | fatal!("unprocessed operation: {}", stmt; stmt) 59 | } 60 | } 61 | 62 | binary 63 | } 64 | 65 | 66 | #[cfg(test)] 67 | mod test { 68 | use assembler::parser::ast::{Statement, Mnemonic}; 69 | use assembler::parser::dummy_source; 70 | 71 | use super::generate_binary; 72 | 73 | #[test] 74 | fn test_operation() { 75 | assert_eq!( 76 | generate_binary(vec![ 77 | Statement::new( 78 | Statement::Operation( 79 | Mnemonic("HALT".parse().unwrap()), 80 | vec![] 81 | ), 82 | dummy_source() 83 | ) 84 | ]), 85 | vec![vec![0xFF]] 86 | ) 87 | } 88 | } -------------------------------------------------------------------------------- /src/assembler/mod.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] mod util; 2 | mod codegen; 3 | mod parser; 4 | 5 | use std::fs::File; 6 | use std::io::{Read, Write}; 7 | use std::path::Path; 8 | use super::Args; 9 | use machine::WordSize; 10 | 11 | 12 | pub fn main(args: Args) { 13 | // Read source file 14 | let input_path = Path::new(&args.arg_input); 15 | let source = read_file(&input_path); 16 | 17 | // Parse source file 18 | let filename = input_path.iter().last().unwrap().to_string_lossy(); 19 | let mut source = parser::Parser::new(&source, &filename).parse(); 20 | 21 | if args.flag_v { 22 | println!("Source:"); 23 | for stmt in source.iter() { 24 | println!("{}", stmt); 25 | } 26 | print!("\n"); 27 | } 28 | 29 | // Expand syntax extensions 30 | parser::expand_syntax_extensions(&mut source); 31 | 32 | if args.flag_v { 33 | println!("Expanded source:"); 34 | for stmt in source.iter() { 35 | println!("{}", stmt); 36 | } 37 | print!("\n"); 38 | } 39 | 40 | // Generate binary 41 | let binary = codegen::generate_binary(source); 42 | 43 | if args.flag_bin { 44 | write_binary(binary, &Path::new(&args.arg_output)); 45 | } else { 46 | for stmt in binary.iter() { 47 | for b in stmt.iter() { 48 | print!("{:#04x} ", *b) 49 | } 50 | print!("\n"); 51 | } 52 | } 53 | } 54 | 55 | 56 | fn read_file(input_path: &Path) -> String { 57 | let mut file = match File::open(&input_path) { 58 | Ok(f) => f, 59 | Err(err) => panic!("Can't open {}: {}", input_path.display(), err) 60 | }; 61 | 62 | let mut contents = String::new(); 63 | match file.read_to_string(&mut contents) { 64 | Ok(contents) => contents, 65 | Err(_) => panic!("Can't read {}", input_path.display()) 66 | }; 67 | 68 | contents 69 | } 70 | 71 | fn write_binary(binary: Vec>, output_path: &Path) { 72 | let mut file = match File::create(output_path) { 73 | Ok(f) => f, 74 | Err(err) => panic!("Can't write to {}: {}", output_path.display(), err) 75 | }; 76 | 77 | for stmt in binary.iter() { 78 | for b in stmt.iter() { 79 | match file.write_all(&[*b]) { 80 | Ok(_) => {}, 81 | Err(err) => panic!("Can't write to {}: {}", output_path.display(), err) 82 | } 83 | } 84 | } 85 | } -------------------------------------------------------------------------------- /src/assembler/parser/ast.rs: -------------------------------------------------------------------------------- 1 | //! The Tiny Abstract Syntax Tree. 2 | //! Modeled following the grammar (`grammar.md`). Every compound item has an 3 | //! `Item` enum with all options and an `ItemNode` which contains the item 4 | //! and the location in the source file. 5 | 6 | use std::borrow::ToOwned; 7 | use std::fmt; 8 | 9 | use assembler::parser::lexer::SourceLocation; 10 | use machine::Mnemonic as Instruction; // FIXME 11 | use machine::WordSize; 12 | 13 | 14 | pub type Program = Vec; 15 | 16 | 17 | // --- Helper for AST definitions ----------------------------------------------- 18 | 19 | macro_rules! define( 20 | ( $name:ident -> $wrapper:ident : $( $variants:ident ( $( $arg:ty ),* ) ),* ) => { 21 | #[derive(PartialEq, Eq, Clone)] 22 | pub struct $wrapper { 23 | pub value: $name, 24 | pub location: SourceLocation 25 | } 26 | 27 | impl_to_string!($wrapper: "{}", value); 28 | 29 | #[derive(PartialEq, Eq, Clone)] 30 | pub enum $name { 31 | $( $variants ( $( $arg ),* ) ),* 32 | } 33 | 34 | impl $name { 35 | pub fn new(stmt: $name, location: SourceLocation) -> $wrapper { 36 | $wrapper { 37 | value: stmt, 38 | location: location 39 | } 40 | } 41 | } 42 | }; 43 | ); 44 | 45 | // --- AST: Compound items ------------------------------------------------------ 46 | 47 | // --- AST: Compound items: Statements ------------------------------------------ 48 | 49 | define!(Statement -> StatementNode: 50 | Include(IPath), // Ex: #import <...> 51 | Label(Ident), // Ex: label: 52 | Const(Ident, ArgumentNode), // Ex: $const = 2 53 | Operation(Mnemonic, Vec), // Ex: @macro(args, ...) 54 | Macro(Ident, Vec) 55 | ); 56 | 57 | impl fmt::Debug for Statement { 58 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 59 | match *self { 60 | Statement::Include(ref path) => write!(f, "#include {}", path), 61 | Statement::Label(ref name) => write!(f, "{}:", name), 62 | Statement::Const(ref name, ref value) => { 63 | write!(f, "${} = {}", name, value) 64 | }, 65 | Statement::Operation(ref mnem, ref args) => { 66 | try!(write!(f, "{}", mnem)); 67 | for arg in args.iter() { 68 | try!(write!(f, " {}", arg)); 69 | } 70 | Ok(()) 71 | }, 72 | Statement::Macro(ref name, ref args) => { 73 | write!(f, "@{}({})", name, 74 | args.iter() 75 | .map(|arg| format!("{}", arg)) 76 | .collect::>() 77 | .connect(" ")) 78 | } 79 | } 80 | } 81 | } 82 | 83 | impl fmt::Display for Statement { 84 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 85 | write!(f, "{:?}", self) 86 | } 87 | } 88 | 89 | 90 | // --- AST: Compound items: Arguments ------------------------------------------- 91 | 92 | define!(Argument -> ArgumentNode: 93 | Literal(WordSize), // A simple literal 94 | Address(Option), // An address (`[0]`) or an auto-filled address (`[_]`) 95 | Const(Ident), // A constant (`$const`) 96 | Label(Ident), // A label (`:label`) 97 | Char(WordSize) // A character (`'a'`) 98 | ); 99 | 100 | impl fmt::Debug for Argument { 101 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 102 | match *self { 103 | Argument::Literal(i) => write!(f, "{}", i), 104 | Argument::Address(addr) => { 105 | match addr { 106 | Some(i) => write!(f, "[{}]", i), 107 | None => write!(f, "[_]") 108 | } 109 | }, 110 | Argument::Const(ref name) => write!(f, "${}", name), 111 | Argument::Label(ref name) => write!(f, ":{}", name), 112 | Argument::Char(c) => write!(f, "'{}'", c), 113 | } 114 | } 115 | } 116 | 117 | impl fmt::Display for Argument { 118 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 119 | write!(f, "{:?}", self) 120 | } 121 | } 122 | 123 | 124 | // --- AST: Compound items: Macro Arguments ------------------------------------- 125 | 126 | define!(MacroArgument -> MacroArgumentNode: 127 | Argument(ArgumentNode), 128 | Ident(Ident) 129 | ); 130 | 131 | impl fmt::Debug for MacroArgument { 132 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 133 | match *self { 134 | MacroArgument::Argument(ref arg) => write!(f, "{}", arg), 135 | MacroArgument::Ident(ref name) => write!(f, "{}", name) 136 | } 137 | } 138 | } 139 | 140 | impl fmt::Display for MacroArgument { 141 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 142 | write!(f, "{:?}", self) 143 | } 144 | } 145 | 146 | 147 | // --- AST: Single items -------------------------------------------------------- 148 | 149 | // --- AST: Single items: Identifier -------------------------------------------- 150 | 151 | #[derive(PartialEq, Eq, Hash, Clone)] 152 | pub struct Ident(pub String); 153 | 154 | impl Ident { 155 | pub fn as_str(&self) -> &str { 156 | let Ident(ref s) = *self; 157 | s 158 | } 159 | 160 | pub fn clone(&self) -> Ident { 161 | Ident(self.as_str().to_owned()) 162 | } 163 | } 164 | 165 | impl fmt::Debug for Ident { 166 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 167 | write!(f, "{}", self.as_str()) 168 | } 169 | } 170 | 171 | impl fmt::Display for Ident { 172 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 173 | write!(f, "{:?}", self) 174 | } 175 | } 176 | 177 | 178 | // --- AST: Single items: Mnemonic ---------------------------------------------- 179 | 180 | #[derive(PartialEq, Eq, Clone)] 181 | pub struct Mnemonic(pub Instruction); 182 | 183 | impl fmt::Debug for Mnemonic { 184 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 185 | let Mnemonic(ref mnem) = *self; 186 | write!(f, "{:?}", mnem) 187 | } 188 | } 189 | 190 | impl fmt::Display for Mnemonic { 191 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 192 | write!(f, "{:?}", self) 193 | } 194 | } 195 | 196 | 197 | // --- AST: Single items: Import Path ------------------------------------------- 198 | 199 | #[derive(PartialEq, Eq, Clone)] 200 | pub struct IPath(pub String); 201 | 202 | impl IPath { 203 | pub fn as_str(&self) -> &str { 204 | let IPath(ref p) = *self; 205 | &**p 206 | } 207 | } 208 | 209 | impl fmt::Debug for IPath { 210 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 211 | let IPath(ref path) = *self; 212 | write!(f, "<{}>", path) 213 | } 214 | } 215 | 216 | impl fmt::Display for IPath { 217 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 218 | write!(f, "{:?}", self) 219 | } 220 | } -------------------------------------------------------------------------------- /src/assembler/parser/grammar.md: -------------------------------------------------------------------------------- 1 | # EBNF-like grammar 2 | 3 | # AST 4 | programm: comment | (statement comment?)* 5 | statement: include | label_def | const_def | operation | macro 6 | 7 | include: hash path 8 | label_def: ident colon 9 | const_def: constant eq argument 10 | operation: mnemonic argument* 11 | argument: integer 12 | | address 13 | | constant 14 | | label 15 | | char 16 | 17 | address: lbracket ( integer | underscore ) rbracket 18 | label: colon ident 19 | constant: dollar ident 20 | macro: at ident lparen ( marco_arg ( comma marco_arg )* )? rparen 21 | marco_arg: argument | ident 22 | 23 | # Tokens 24 | hash: '#' 25 | colon: ':' 26 | dollar: '$' 27 | at: '@' 28 | comma: ',' 29 | eq: '=' 30 | underscore: '_' 31 | lparen: '(' 32 | rparen: ')' 33 | lbracket: '[' 34 | rbracket: ']' 35 | mnemonic: [A-Z]+ 36 | ident: [a-z]+ ( '_' | [a-z] | [0-9]+ )+ 37 | integer: [0-9]+ 38 | char: '\'' ( [a-z] | [A-Z] | '\n' ) '\'' 39 | path: '<' ( [a-z] | [A-Z] | '.' | '/' | '_' | '-' )+ '>' 40 | comment: ';' ([a-z] | [A-Z] | [0-9])* -------------------------------------------------------------------------------- /src/assembler/parser/lexer.rs: -------------------------------------------------------------------------------- 1 | //! The Lexer 2 | //! 3 | //! Nothing outstanding, just a normal lexer. 4 | 5 | use std::borrow::ToOwned; 6 | use std::fmt; 7 | use std::rc::Rc; 8 | 9 | use assembler::util::fatal; 10 | use machine::{Mnemonic, WordSize}; 11 | 12 | 13 | // --- Source Location ---------------------------------------------------------- 14 | 15 | pub type SharedString = Rc; 16 | 17 | #[derive(PartialEq, Eq, Clone)] 18 | pub struct SourceLocation { 19 | pub filename: SharedString, 20 | pub lineno: usize 21 | } 22 | 23 | impl_to_string!(SourceLocation: "{}:{}", filename, lineno); 24 | 25 | 26 | pub fn dummy_source() -> SourceLocation { 27 | SourceLocation { 28 | filename: Rc::new(String::from_str("")), 29 | lineno: 0 30 | } 31 | } 32 | 33 | 34 | // --- List of Tokens ----------------------------------------------------------- 35 | 36 | #[derive(Clone, PartialEq, Eq)] 37 | pub enum Token<'a> { 38 | HASH, 39 | COLON, 40 | DOLLAR, 41 | AT, 42 | COMMA, 43 | EQ, 44 | UNDERSCORE, 45 | 46 | LPAREN, 47 | RPAREN, 48 | LBRACKET, 49 | RBRACKET, 50 | 51 | MNEMONIC(Mnemonic), 52 | IDENT(&'a str), 53 | INTEGER(WordSize), 54 | CHAR(WordSize), 55 | PATH(&'a str), 56 | 57 | EOF, 58 | 59 | PLACEHOLDER 60 | //UNKNOWN(String) 61 | } 62 | 63 | impl<'a> fmt::Debug for Token<'a> { 64 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 65 | match *self { 66 | Token::HASH => write!(f, "#"), 67 | Token::COLON => write!(f, ":"), 68 | Token::DOLLAR => write!(f, "$"), 69 | Token::AT => write!(f, "@"), 70 | Token::COMMA => write!(f, ","), 71 | Token::EQ => write!(f, "="), 72 | Token::UNDERSCORE => write!(f, "_"), 73 | 74 | Token::LPAREN => write!(f, "("), 75 | Token::RPAREN => write!(f, ")"), 76 | Token::LBRACKET => write!(f, "["), 77 | Token::RBRACKET => write!(f, "]"), 78 | 79 | Token::MNEMONIC(ref instr) => write!(f, "{:?}", instr), 80 | Token::IDENT(ref ident) => write!(f, "{:?}", ident), 81 | Token::INTEGER(i) => write!(f, "{}", i), 82 | Token::CHAR(c) => write!(f, "{}", c as char), 83 | Token::PATH(ref path) => write!(f, "{:?}", path), 84 | 85 | Token::EOF => write!(f, "EOF"), 86 | Token::PLACEHOLDER => write!(f, "PLACEHOLDER") 87 | } 88 | } 89 | } 90 | 91 | impl<'a> fmt::Display for Token<'a> { 92 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 93 | write!(f, "{:?}", self) 94 | } 95 | } 96 | 97 | 98 | // --- The Lexer ---------------------------------------------------------------- 99 | // We use a Lexer trait along with two implementations: FileLexer and Vec. 100 | // The first one is used for processing a file on the hard drive, the second 101 | // is used for testing purposes. 102 | 103 | pub trait Lexer<'a> { 104 | fn get_source(&self) -> SourceLocation; 105 | fn next_token(&mut self) -> Token<'a>; 106 | fn tokenize(&mut self) -> Vec>; 107 | } 108 | 109 | 110 | // --- The Lexer: FileLexer ----------------------------------------------------- 111 | 112 | pub struct FileLexer<'a> { 113 | source: &'a str, 114 | file: SharedString, 115 | len: usize, 116 | 117 | pos: usize, 118 | curr: Option, 119 | 120 | lineno: usize 121 | } 122 | 123 | impl<'a> FileLexer<'a> { 124 | 125 | pub fn new(source: &'a str, file: &str) -> FileLexer<'a> { 126 | FileLexer { 127 | source: source, 128 | file: Rc::new(String::from_str(file)), 129 | len: source.len(), 130 | 131 | pos: 0, 132 | curr: Some(source.char_at(0)), 133 | 134 | lineno: 1 135 | } 136 | } 137 | 138 | 139 | // --- File Lexer: Helpers --------------------------------------------------- 140 | 141 | fn fatal(&self, msg: String) -> ! { 142 | fatal(msg, &self.get_source()) 143 | } 144 | 145 | 146 | fn is_eof(&self) -> bool { 147 | self.curr.is_none() 148 | } 149 | 150 | 151 | // --- File Lexer: Character processing -------------------------------------- 152 | 153 | fn bump(&mut self) { 154 | self.curr = self.nextch(); 155 | self.pos += 1; 156 | 157 | debug!("Moved on to {:?}", self.curr) 158 | } 159 | 160 | fn nextch(&self) -> Option { 161 | let mut new_pos = self.pos + 1; 162 | 163 | // When encountering multi-byte UTF-8, we may stop in the middle 164 | // of it. Fast forward till we see the next actual char or EOF 165 | 166 | while !self.source.is_char_boundary(new_pos) 167 | && self.pos < self.len { 168 | new_pos += 1; 169 | } 170 | 171 | if new_pos < self.len { 172 | Some(self.source.char_at(new_pos)) 173 | } else { 174 | None 175 | } 176 | } 177 | 178 | fn curr_repr(&self) -> String { 179 | match self.curr { 180 | Some(c) => c.escape_default().collect(), 181 | None => "EOF".to_owned() 182 | } 183 | } 184 | 185 | fn expect(&mut self, expect: char) { 186 | if self.curr != Some(expect) { 187 | // Build error message 188 | let expect_str = match expect { 189 | '\'' => String::from_str("quote"), 190 | c => format!("'{}'", c) 191 | }; 192 | let found_str = match self.curr { 193 | Some(_) => format!("'{}'", self.curr_repr()), 194 | None => String::from_str("EOF") 195 | }; 196 | 197 | self.fatal(format!("Expected `{}`, found `{}`", 198 | expect_str, found_str)) 199 | } 200 | 201 | self.bump(); 202 | } 203 | 204 | fn collect(&mut self, cond: F) -> &'a str 205 | where F: Fn(&char) -> bool { 206 | let start = self.pos; 207 | 208 | debug!("start colleting"); 209 | 210 | while let Some(c) = self.curr { 211 | if cond(&c) { 212 | self.bump(); 213 | } else { 214 | debug!("colleting finished"); 215 | break; 216 | } 217 | } 218 | 219 | let end = self.pos; 220 | 221 | &self.source[start..end] 222 | } 223 | 224 | fn eat_all(&mut self, cond: F) 225 | where F: Fn(&char) -> bool { 226 | while let Some(c) = self.curr { 227 | if cond(&c) { self.bump(); } 228 | else { break; } 229 | } 230 | } 231 | 232 | // --- File Lexer: Tokenizers ------------------------------------------------ 233 | 234 | fn tokenize_mnemonic(&mut self) -> Token<'a> { 235 | debug!("Tokenizing a mnemonic"); 236 | 237 | let mnemonic_str = self.collect(|c| c.is_alphabetic() && c.is_uppercase()); 238 | let mnemonic = match mnemonic_str.parse() { 239 | Ok(m) => m, 240 | Err(_) => self.fatal(format!("invalid mnemonic: {}", mnemonic_str)) 241 | }; 242 | 243 | Token::MNEMONIC(mnemonic) 244 | } 245 | 246 | fn tokenize_ident(&mut self) -> Token<'a> { 247 | debug!("Tokenizing an ident"); 248 | 249 | let ident = self.collect(|c| { 250 | (c.is_alphabetic() && c.is_lowercase()) || c.is_numeric() || *c == '_' 251 | }); 252 | 253 | Token::IDENT(ident) 254 | } 255 | 256 | fn tokenize_digit(&mut self) -> Token<'a> { 257 | debug!("Tokenizing a digit"); 258 | 259 | let integer_str = self.collect(|c| c.is_numeric()); 260 | let integer = match integer_str.parse() { 261 | Ok(i) => i, 262 | Err(_) => self.fatal(format!("invalid integer: {}", integer_str)) 263 | }; 264 | 265 | Token::INTEGER(integer) 266 | } 267 | 268 | fn tokenize_char(&mut self) -> Token<'a> { 269 | debug!("Tokenizing a char"); 270 | 271 | self.bump(); // '\'' matched, move on 272 | 273 | let c = self.curr.unwrap_or_else(|| { 274 | self.fatal(format!("expected a char, found EOF")); 275 | }); 276 | let tok = if c == '\\' { 277 | // Escaped char, let's take a look on one more char 278 | self.bump(); 279 | match self.curr { 280 | Some('n') => Token::CHAR(10), 281 | Some('\'') => Token::CHAR(39), 282 | Some(c) => self.fatal(format!("unsupported or invalid escape sequence: \\{}", c)), 283 | None => self.fatal(format!("expected escaped char, found EOF")) 284 | } 285 | } else { 286 | Token::CHAR(c as WordSize) 287 | }; 288 | self.bump(); 289 | 290 | // Match closing quote 291 | self.expect('\''); 292 | 293 | tok 294 | } 295 | 296 | fn tokenize_path(&mut self) -> Token<'a> { 297 | debug!("Tokenizing a path"); 298 | 299 | self.bump(); // '<' matched, move on 300 | 301 | let path = self.collect(|c| *c != '>'); 302 | 303 | // Match closing '>' 304 | self.expect('>'); 305 | 306 | Token::PATH(path) 307 | } 308 | 309 | /// Read the next token and return it 310 | /// 311 | /// If `None` is returned, the current token is to be ignored and the 312 | /// lexer requests the reader to read the next token instead. 313 | fn read_token(&mut self) -> Option> { 314 | let c = match self.curr { 315 | Some(c) => c, 316 | None => return Some(Token::EOF) 317 | }; 318 | 319 | let token = match c { 320 | '#' => { self.bump(); Token::HASH }, 321 | ':' => { self.bump(); Token::COLON }, 322 | '$' => { self.bump(); Token::DOLLAR }, 323 | '@' => { self.bump(); Token::AT }, 324 | ',' => { self.bump(); Token::COMMA }, 325 | '=' => { self.bump(); Token::EQ }, 326 | '_' => { self.bump(); Token::UNDERSCORE }, 327 | '(' => { self.bump(); Token::LPAREN }, 328 | ')' => { self.bump(); Token::RPAREN }, 329 | '[' => { self.bump(); Token::LBRACKET }, 330 | ']' => { self.bump(); Token::RBRACKET }, 331 | 332 | c if c.is_alphabetic() && c.is_uppercase() => { 333 | self.tokenize_mnemonic() 334 | }, 335 | c if c.is_alphabetic() && c.is_lowercase() => { 336 | self.tokenize_ident() 337 | }, 338 | c if c.is_numeric() => self.tokenize_digit(), 339 | '\'' => self.tokenize_char(), 340 | '<' => self.tokenize_path(), 341 | 342 | ';' => { 343 | self.eat_all(|c| *c != '\n'); 344 | return None; 345 | }, 346 | c if c.is_whitespace() => { 347 | if c == '\n' { self.lineno += 1; } 348 | 349 | self.bump(); 350 | return None; 351 | }, 352 | c => { 353 | self.fatal(format!("unknown token: {}", c)) 354 | // UNKNOWN(format!("{}", c).into_string()) 355 | } 356 | }; 357 | 358 | Some(token) 359 | } 360 | } 361 | 362 | impl<'a> Lexer<'a> for FileLexer<'a> { 363 | fn get_source(&self) -> SourceLocation { 364 | SourceLocation { 365 | filename: self.file.clone(), 366 | lineno: self.lineno 367 | } 368 | } 369 | 370 | fn next_token(&mut self) -> Token<'a> { 371 | if self.is_eof() { 372 | Token::EOF 373 | } else { 374 | // Read the next token until it's not none 375 | loop { 376 | if let Some(token) = self.read_token() { 377 | return token; 378 | } 379 | } 380 | } 381 | } 382 | 383 | #[allow(dead_code)] // Used for tests 384 | fn tokenize(&mut self) -> Vec> { 385 | let mut tokens = vec![]; 386 | 387 | while !self.is_eof() { 388 | debug!("Processing {:?}", self.curr); 389 | 390 | if let Some(t) = self.read_token() { 391 | tokens.push(t); 392 | } 393 | 394 | debug!("So far: {:?}", tokens) 395 | } 396 | 397 | tokens 398 | } 399 | } 400 | 401 | 402 | // --- The Lexer: Vec ---------------------------------------------------- 403 | 404 | impl<'a> Lexer<'a> for Vec> { 405 | fn get_source(&self) -> SourceLocation { 406 | dummy_source() 407 | } 408 | 409 | fn next_token(&mut self) -> Token<'a> { 410 | if self.len() >= 1 { 411 | self.remove(0) 412 | } else { 413 | Token::EOF 414 | } 415 | } 416 | 417 | fn tokenize(&mut self) -> Vec> { 418 | self.iter().cloned().collect() 419 | } 420 | } 421 | 422 | 423 | // --- Tests -------------------------------------------------------------------- 424 | 425 | #[cfg(test)] 426 | mod tests { 427 | use std::borrow::ToOwned; 428 | use std::rc::Rc; 429 | 430 | use super::{Token, Lexer, FileLexer}; 431 | use super::Token::*; 432 | use machine::WordSize; 433 | 434 | fn tokenize(src: &'static str) -> Vec { 435 | FileLexer::new(src, "").tokenize() 436 | } 437 | 438 | #[test] 439 | fn test_mnemonic() { 440 | assert_eq!(tokenize("MOV"), 441 | vec![MNEMONIC("MOV".parse().unwrap())]); 442 | } 443 | 444 | #[test] 445 | fn test_ident() { 446 | assert_eq!(tokenize("abc"), 447 | vec![IDENT("abc")]); 448 | } 449 | 450 | #[test] 451 | fn test_ident_with_underscore() { 452 | assert_eq!(tokenize("abc_efg"), 453 | vec![IDENT("abc_efg")]); 454 | } 455 | 456 | #[test] 457 | fn test_digit() { 458 | assert_eq!(tokenize("128"), 459 | vec![INTEGER(128)]); 460 | } 461 | 462 | #[test] 463 | fn test_char() { 464 | assert_eq!(tokenize("'a'"), 465 | vec![CHAR('a' as WordSize)]); 466 | assert_eq!(tokenize("' '"), 467 | vec![CHAR(' ' as WordSize)]); 468 | assert_eq!(tokenize("'\n'"), 469 | vec![CHAR('\n' as WordSize)]); 470 | assert_eq!(tokenize("'\\\''"), 471 | vec![CHAR('\'' as WordSize)]); 472 | } 473 | 474 | #[test] 475 | fn test_path() { 476 | assert_eq!(tokenize(""), 477 | vec![PATH("asd")]); 478 | } 479 | 480 | #[test] 481 | fn test_comment() { 482 | assert_eq!(tokenize("; asd"), 483 | vec![]); 484 | assert_eq!(tokenize("; asd\nMOV ;asd\nMOV"), 485 | vec![MNEMONIC("MOV".parse().unwrap()), 486 | MNEMONIC("MOV".parse().unwrap())]); 487 | } 488 | 489 | #[test] 490 | fn test_whitespace() { 491 | assert_eq!(tokenize("\n\n\n\n \n\t\n"), 492 | vec![]); 493 | assert_eq!(tokenize(" MOV \n\n MOV"), 494 | vec![MNEMONIC("MOV".parse().unwrap()), 495 | MNEMONIC("MOV".parse().unwrap())]); 496 | } 497 | 498 | #[test] 499 | fn test_line_counter() { 500 | let mut lx = FileLexer::new("MOV\nMOV", ""); 501 | lx.tokenize(); 502 | assert_eq!(lx.lineno, 2); 503 | 504 | let mut lx = FileLexer::new("MOV\r\nMOV", ""); 505 | lx.tokenize(); 506 | assert_eq!(lx.lineno, 2); 507 | 508 | let mut lx = FileLexer::new("#include", ""); 509 | lx.tokenize(); 510 | assert_eq!(lx.lineno, 1); 511 | } 512 | } -------------------------------------------------------------------------------- /src/assembler/parser/mod.rs: -------------------------------------------------------------------------------- 1 | //! The Parser 2 | //! 3 | //! A simple recursive descent parser the grammar as described in `grammar.md`. 4 | 5 | pub mod ast; 6 | mod lexer; 7 | mod syntax_ext; 8 | 9 | use std::borrow::ToOwned; 10 | use std::collections::LinkedList; 11 | use assembler::util::fatal; 12 | use self::ast::*; 13 | use self::lexer::{Lexer, FileLexer, Token}; 14 | 15 | pub use self::lexer::{SourceLocation, dummy_source}; 16 | pub use self::syntax_ext::expand_syntax_extensions; 17 | 18 | 19 | pub struct Parser<'a> { 20 | location: SourceLocation, 21 | token: Token<'a>, 22 | buffer: LinkedList>, 23 | lexer: Box + 'a> 24 | } 25 | 26 | impl<'a> Parser<'a> { 27 | pub fn new(source: &'a str, file: &str) -> Parser<'a> { 28 | Parser::with_lexer(Box::new(FileLexer::new(source, file))) 29 | } 30 | 31 | pub fn with_lexer(mut lx: Box + 'a>) -> Parser { 32 | Parser { 33 | token: lx.next_token(), 34 | location: lx.get_source(), 35 | buffer: LinkedList::new(), 36 | lexer: lx 37 | } 38 | } 39 | 40 | pub fn parse(&mut self) -> Program { 41 | let mut source = vec![]; 42 | 43 | debug!("Starting parsing"); 44 | 45 | while self.token != Token::EOF { 46 | source.push(self.parse_statement()); 47 | } 48 | 49 | debug!("Parsing finished"); 50 | 51 | source 52 | } 53 | 54 | 55 | // --- Error handling ------------------------------------------------------- 56 | 57 | fn fatal(&self, msg: String) -> ! { 58 | fatal(msg, &self.location); 59 | } 60 | 61 | fn unexpected_token(&self, tok: &Token, expected: Option<&'static str>) -> ! { 62 | match expected { 63 | Some(ex) => self.fatal(format!("unexpected token: `{}`, expected {}", tok, ex)), 64 | None => self.fatal(format!("unexpected token: `{}`", tok)) 65 | } 66 | } 67 | 68 | 69 | // --- Token processing ----------------------------------------------------- 70 | 71 | fn update_location(&mut self) -> SourceLocation { 72 | self.location = self.lexer.get_source(); 73 | self.location.clone() 74 | } 75 | 76 | fn bump(&mut self) { 77 | self.token = match self.buffer.pop_front() { 78 | Some(tok) => tok, 79 | None => self.lexer.next_token() 80 | }; 81 | } 82 | 83 | fn eat(&mut self, tok: &Token) -> bool { 84 | if self.token == *tok { 85 | self.bump(); 86 | true 87 | } else { 88 | false 89 | } 90 | } 91 | 92 | fn expect(&mut self, tok: &Token) { 93 | if !self.eat(tok) { 94 | self.fatal(format!("expected `{}`, found `{}`", tok, self.token)) 95 | } 96 | } 97 | 98 | fn look_ahead(&mut self, distance: usize, f: F) -> R where F: Fn(&Token) -> R { 99 | if self.buffer.len() < distance { 100 | for _ in 0 .. distance - self.buffer.len() { 101 | self.buffer.push_back(self.lexer.next_token()); 102 | } 103 | } 104 | 105 | f(self.buffer.iter().nth(distance - 1).unwrap()) 106 | } 107 | 108 | // --- Actual parsing ------------------------------------------------------- 109 | 110 | fn token_is_argument(&mut self) -> bool { 111 | match self.token { 112 | Token::INTEGER(_) | Token::CHAR(_) 113 | | Token::LBRACKET | Token::COLON => true, 114 | Token::DOLLAR => self.look_ahead(2, |t| return t != &Token::EQ), 115 | _ => false 116 | } 117 | } 118 | 119 | // --- Parsing: Single tokens ----------------------------------------------- 120 | 121 | fn parse_ident(&mut self) -> Ident { 122 | let ident = match self.token { 123 | Token::IDENT(id) => Ident(id.to_owned()), 124 | _ => self.unexpected_token(&self.token, Some("a identifier")) 125 | }; 126 | self.bump(); 127 | 128 | ident 129 | } 130 | 131 | fn parse_path(&mut self) -> IPath { 132 | let path = match self.token { 133 | Token::PATH(p) => IPath(p.to_owned()), 134 | _ => self.unexpected_token(&self.token, Some("a path")) 135 | }; 136 | self.bump(); 137 | 138 | path 139 | } 140 | 141 | // --- Parsing: Compound expressions ---------------------------------------- 142 | 143 | fn parse_address(&mut self) -> Option { 144 | self.expect(&Token::LBRACKET); 145 | 146 | let value = match self.token { 147 | Token::INTEGER(i) => Some(i), 148 | Token::UNDERSCORE => None, 149 | _ => self.unexpected_token(&self.token, Some("an address")) 150 | }; 151 | self.bump(); 152 | 153 | self.expect(&Token::RBRACKET); 154 | 155 | value 156 | } 157 | 158 | fn parse_label(&mut self) -> Ident { 159 | self.expect(&Token::COLON); 160 | self.parse_ident() 161 | } 162 | 163 | fn parse_constant(&mut self) -> Ident { 164 | self.expect(&Token::DOLLAR); 165 | self.parse_ident() 166 | } 167 | 168 | fn parse_argument(&mut self) -> ArgumentNode { 169 | let location = self.update_location(); 170 | 171 | let arg = match self.token { 172 | Token::INTEGER(i) => { self.bump(); Argument::Literal(i) }, 173 | Token::CHAR(c) => { self.bump(); Argument::Char(c) }, 174 | Token::LBRACKET => Argument::Address(self.parse_address()), 175 | Token::DOLLAR => Argument::Const(self.parse_constant()), 176 | Token::COLON => Argument::Label(self.parse_label()), 177 | _ => self.unexpected_token(&self.token, Some("an argument")) 178 | }; 179 | 180 | Argument::new(arg, location) 181 | } 182 | 183 | fn parse_macro_argument(&mut self) -> MacroArgumentNode { 184 | let location = self.update_location(); 185 | 186 | if self.token_is_argument() { 187 | MacroArgument::new(MacroArgument::Argument(self.parse_argument()), 188 | location) 189 | } else { 190 | MacroArgument::new(MacroArgument::Ident(self.parse_ident()), 191 | location) 192 | } 193 | } 194 | 195 | // ---- Parsing: Expressions ------------------------------------------------ 196 | 197 | fn parse_include(&mut self) -> StatementNode { 198 | let location = self.update_location(); 199 | 200 | self.bump(); 201 | self.expect(&Token::IDENT("import")); 202 | let path = self.parse_path(); 203 | 204 | Statement::new(Statement::Include(path), location) 205 | } 206 | 207 | fn parse_label_def(&mut self) -> StatementNode { 208 | let location = self.update_location(); 209 | 210 | let label = self.parse_ident(); 211 | self.expect(&Token::COLON); 212 | 213 | Statement::new(Statement::Label(label), location) 214 | } 215 | 216 | fn parse_constant_def(&mut self) -> StatementNode { 217 | let location = self.update_location(); 218 | 219 | let name = self.parse_constant(); 220 | self.expect(&Token::EQ); 221 | let value = self.parse_argument(); 222 | 223 | Statement::new(Statement::Const(name, value), location) 224 | } 225 | 226 | fn parse_operation(&mut self) -> StatementNode { 227 | let location = self.update_location(); 228 | 229 | let mn = if let Token::MNEMONIC(mn) = self.token { 230 | Mnemonic(mn) 231 | } else { 232 | self.unexpected_token(&self.token, Some("a mnemonic")) 233 | }; 234 | 235 | self.bump(); 236 | 237 | let mut args = vec![]; 238 | while self.token_is_argument() { 239 | args.push(self.parse_argument()); 240 | } 241 | 242 | Statement::new(Statement::Operation(mn, args), location) 243 | } 244 | 245 | fn parse_macro(&mut self) -> StatementNode { 246 | let location = self.update_location(); 247 | 248 | self.expect(&Token::AT); 249 | let name = self.parse_ident(); 250 | 251 | self.expect(&Token::LPAREN); 252 | 253 | let mut args = vec![]; 254 | if self.token != Token::RPAREN { 255 | loop { 256 | args.push(self.parse_macro_argument()); 257 | if !self.eat(&Token::COMMA) { 258 | break 259 | } 260 | } 261 | } 262 | self.expect(&Token::RPAREN); 263 | 264 | Statement::new(Statement::Macro(name, args), location) 265 | } 266 | 267 | fn parse_statement(&mut self) -> StatementNode { 268 | let stmt = match self.token { 269 | Token::HASH => self.parse_include(), 270 | Token::DOLLAR => self.parse_constant_def(), 271 | Token::IDENT(_) => self.parse_label_def(), 272 | Token::MNEMONIC(_) => self.parse_operation(), 273 | Token::AT => self.parse_macro(), 274 | 275 | ref tok => self.unexpected_token(tok, Some("a statement")) 276 | }; 277 | 278 | stmt 279 | } 280 | } 281 | 282 | #[cfg(test)] 283 | mod tests { 284 | use std::borrow::ToOwned; 285 | use std::rc::Rc; 286 | 287 | use assembler::parser::ast::*; 288 | use assembler::parser::lexer::{Token, Lexer}; 289 | use assembler::parser::lexer::Token::*; 290 | 291 | use super::*; 292 | 293 | fn parse<'a, F, T>(toks: Vec>, f: F) -> T where F: Fn(&mut Parser<'a>) -> T { 294 | f(&mut Parser::with_lexer(Box::new(toks) as Box)) 295 | } 296 | 297 | fn ident_from_str(s: &str) -> Ident { 298 | Ident(s.to_owned()) 299 | } 300 | 301 | fn path_from_str(s: &str) -> IPath { 302 | IPath(s.to_owned()) 303 | } 304 | 305 | #[test] 306 | fn test_statements() { 307 | assert_eq!( 308 | parse( 309 | vec![HASH, IDENT("import"), PATH("as/d"), 310 | MNEMONIC("HALT".parse().unwrap())], 311 | |p| p.parse() 312 | ), 313 | vec![ 314 | Statement::new( 315 | Statement::Include( 316 | path_from_str("as/d") 317 | ), 318 | dummy_source() 319 | ), 320 | Statement::new( 321 | Statement::Operation( 322 | Mnemonic("HALT".parse().unwrap()), 323 | vec![] 324 | ), 325 | dummy_source() 326 | ) 327 | ] 328 | ) 329 | } 330 | 331 | #[test] 332 | fn test_include() { 333 | assert_eq!( 334 | parse(vec![HASH, IDENT("import"), PATH("as/d")], 335 | |p| p.parse_statement()), 336 | Statement::new( 337 | Statement::Include( 338 | path_from_str("as/d") 339 | ), 340 | dummy_source() 341 | ) 342 | ) 343 | } 344 | 345 | #[test] 346 | fn test_label_def() { 347 | assert_eq!( 348 | parse(vec![IDENT("lbl"), COLON], 349 | |p| p.parse_statement()), 350 | Statement::new( 351 | Statement::Label( 352 | ident_from_str("lbl") 353 | ), 354 | dummy_source() 355 | ) 356 | ) 357 | } 358 | 359 | #[test] 360 | fn test_const_def() { 361 | assert_eq!( 362 | parse(vec![DOLLAR, IDENT("c"), EQ, INTEGER(0)], 363 | |p| p.parse_statement()), 364 | Statement::new( 365 | Statement::Const( 366 | ident_from_str("c"), 367 | Argument::new( 368 | Argument::Literal(0), 369 | dummy_source() 370 | ) 371 | ), 372 | dummy_source() 373 | ) 374 | ) 375 | } 376 | 377 | #[test] 378 | fn test_operation() { 379 | assert_eq!( 380 | parse(vec![MNEMONIC("MOV".parse().unwrap()), INTEGER(0)], 381 | |p| p.parse_statement()), 382 | Statement::new( 383 | Statement::Operation( 384 | Mnemonic("MOV".parse().unwrap()), 385 | vec![ 386 | Argument::new( 387 | Argument::Literal(0), 388 | dummy_source() 389 | ) 390 | ] 391 | ), 392 | dummy_source() 393 | ) 394 | ) 395 | } 396 | 397 | #[test] 398 | fn test_macro() { 399 | assert_eq!( 400 | parse(vec![AT, IDENT("macro"), 401 | LPAREN, INTEGER(0), COMMA, INTEGER(0), RPAREN], 402 | |p| p.parse_statement()), 403 | Statement::new( 404 | Statement::Macro( 405 | ident_from_str("macro"), 406 | vec![ 407 | MacroArgument::new( 408 | MacroArgument::Argument( 409 | Argument::new( 410 | Argument::Literal(0), 411 | dummy_source() 412 | ) 413 | ), 414 | dummy_source() 415 | ), 416 | MacroArgument::new( 417 | MacroArgument::Argument( 418 | Argument::new( 419 | Argument::Literal(0), 420 | dummy_source() 421 | ) 422 | ), 423 | dummy_source() 424 | ) 425 | ] 426 | ), 427 | dummy_source() 428 | ) 429 | ) 430 | } 431 | 432 | #[test] 433 | fn test_literal() { 434 | assert_eq!( 435 | parse(vec![INTEGER(0)], 436 | |p| p.parse_argument()), 437 | Argument::new( 438 | Argument::Literal(0), 439 | dummy_source() 440 | ) 441 | ) 442 | } 443 | 444 | #[test] 445 | fn test_address() { 446 | assert_eq!( 447 | parse(vec![LBRACKET, INTEGER(0), RBRACKET], 448 | |p| p.parse_argument()), 449 | Argument::new( 450 | Argument::Address(Some(0)), 451 | dummy_source() 452 | ) 453 | ) 454 | } 455 | 456 | #[test] 457 | fn test_address_auto() { 458 | assert_eq!( 459 | parse(vec![LBRACKET, UNDERSCORE, RBRACKET], 460 | |p| p.parse_argument()), 461 | Argument::new( 462 | Argument::Address(None), 463 | dummy_source() 464 | ) 465 | ) 466 | } 467 | 468 | #[test] 469 | fn test_const() { 470 | assert_eq!( 471 | parse(vec![DOLLAR, IDENT("asd")], 472 | |p| p.parse_argument()), 473 | Argument::new( 474 | Argument::Const( 475 | ident_from_str("asd") 476 | ), 477 | dummy_source() 478 | ) 479 | ) 480 | } 481 | 482 | #[test] 483 | fn test_label() { 484 | assert_eq!( 485 | parse(vec![COLON, IDENT("asd")], 486 | |p| p.parse_argument()), 487 | Argument::new( 488 | Argument::Label( 489 | ident_from_str("asd") 490 | ), 491 | dummy_source() 492 | ) 493 | ) 494 | } 495 | 496 | #[test] 497 | fn test_char() { 498 | assert_eq!( 499 | parse(vec![CHAR(0)], 500 | |p| p.parse_argument()), 501 | Argument::new( 502 | Argument::Char(0), 503 | dummy_source() 504 | ) 505 | ) 506 | } 507 | 508 | #[test] 509 | fn test_macro_arg_arg() { 510 | assert_eq!( 511 | parse(vec![INTEGER(0)], 512 | |p| p.parse_macro_argument()), 513 | MacroArgument::new( 514 | MacroArgument::Argument( 515 | Argument::new( 516 | Argument::Literal(0), 517 | dummy_source() 518 | ) 519 | ), 520 | dummy_source() 521 | ) 522 | ) 523 | } 524 | 525 | #[test] 526 | fn test_macro_arg_ident() { 527 | assert_eq!( 528 | parse(vec![IDENT("asd")], 529 | |p| p.parse_macro_argument()), 530 | MacroArgument::new( 531 | MacroArgument::Ident( 532 | ident_from_str("asd") 533 | ), 534 | dummy_source() 535 | ) 536 | ) 537 | } 538 | 539 | #[test] 540 | fn test_op_and_const() { 541 | assert_eq!( 542 | parse(vec![MNEMONIC("HALT".parse().unwrap()), 543 | DOLLAR, IDENT("c"), EQ, INTEGER(0)], 544 | |p| p.parse()), 545 | vec![ 546 | Statement::new( 547 | Statement::Operation( 548 | Mnemonic("HALT".parse().unwrap()), 549 | vec![] 550 | ), 551 | dummy_source() 552 | ), 553 | Statement::new( 554 | Statement::Const( 555 | ident_from_str("c"), 556 | Argument::new( 557 | Argument::Literal(0), 558 | dummy_source() 559 | ) 560 | ), 561 | dummy_source() 562 | ) 563 | ] 564 | ) 565 | } 566 | } -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/auto_address.rs: -------------------------------------------------------------------------------- 1 | //! A syntax extension that auto-fills addresses to prevent repeating and 2 | //! having to keeping track of memory addresses. 3 | //! 4 | //! # Example: 5 | //! 6 | //! ``` 7 | //! $const = [_] 8 | //! MOV $const 2 9 | //! ``` 10 | //! 11 | //! Results in: 12 | //! 13 | //! ``` 14 | //! MOV [0] 2 15 | //! ``` 16 | 17 | use assembler::parser::ast::{Program, Statement, Argument, ArgumentNode, MacroArgument}; 18 | 19 | 20 | pub fn expand(source: &mut Program) { 21 | // The address to use next 22 | let mut auto_addr = 0u8; 23 | 24 | // A helper function that replaces the value of the current argument 25 | // with the next free address. 26 | let mut update_arg = |arg: &mut ArgumentNode| { 27 | if let Argument::Address(addr) = arg.value { 28 | if addr == None { 29 | arg.value = Argument::Address(Some(auto_addr)); 30 | auto_addr += 1; 31 | } 32 | } 33 | }; 34 | 35 | // Process all statements in the current source 36 | for stmt in source.iter_mut() { 37 | match stmt.value { 38 | 39 | // Process operation arguments 40 | Statement::Operation(_, ref mut args) => { 41 | for arg in args.iter_mut() { 42 | update_arg(arg); 43 | } 44 | }, 45 | 46 | // Process constants 47 | Statement::Const(_, ref mut arg) => { 48 | update_arg(arg); 49 | }, 50 | 51 | // Process macro arguments 52 | Statement::Macro(_, ref mut margs) => { 53 | for marg in margs.iter_mut() { 54 | if let MacroArgument::Argument(ref mut arg) = marg.value { 55 | update_arg(arg); 56 | } 57 | } 58 | } 59 | 60 | _ => {} 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/constants.rs: -------------------------------------------------------------------------------- 1 | //! A syntax extension for constants 2 | //! 3 | //! # Example: 4 | //! 5 | //! ``` 6 | //! $const = [0] 7 | //! MOV $const 2 8 | //! ``` 9 | //! 10 | //! Results in: 11 | //! 12 | //! ``` 13 | //! MOV [0] 2 14 | //! ``` 15 | 16 | use std::collections::HashMap; 17 | use assembler::parser::ast::{Program, Statement, Argument, Ident}; 18 | 19 | 20 | pub fn expand(source: &mut Program) { 21 | let mut consts: HashMap = HashMap::new(); 22 | 23 | // Pass 1: Collect constant definitions & remove them from the source 24 | source.retain(|stmt| { 25 | let (name, value) = match stmt.value { 26 | Statement::Const(ref name, ref value) => (name, value), 27 | _ => return true // Not a const assignment, keep it 28 | }; 29 | 30 | // Collect value 31 | match value.value { 32 | Argument::Literal(_) | Argument::Address(_) => { 33 | if consts.insert(name.clone(), value.value.clone()).is_some() { 34 | warn!("redefinition of ${:?}", name; value); 35 | } 36 | }, 37 | _ => fatal!("invalid constant value: {:?}", value; value) 38 | } 39 | 40 | false // Remove the definition from the source 41 | }); 42 | 43 | debug!("Constants: {:?}", consts); 44 | 45 | // Pass 2: Replace usages of constants 46 | for stmt in source.iter_mut() { 47 | let args = match stmt.value { 48 | Statement::Operation(_, ref mut args) => args, 49 | _ => continue 50 | }; 51 | 52 | for arg in args.iter_mut() { 53 | // Get the new value if the argument is a constant 54 | arg.value = if let Argument::Const(ref name) = arg.value { 55 | match consts.get(name) { 56 | Some(value) => value.clone(), 57 | None => fatal!("unknown constant: ${:?}", name; arg) 58 | } 59 | } else { 60 | continue 61 | }; 62 | } 63 | } 64 | } -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/imports.rs: -------------------------------------------------------------------------------- 1 | //! A syntax extension for imports 2 | //! 3 | //! # Example: 4 | //! 5 | //! `a.asm`: 6 | //! 7 | //! ``` 8 | //! APRINT '!' 9 | //! ``` 10 | //! 11 | //! `b.asm`: 12 | //! 13 | //! ``` 14 | //! #import 15 | //! HALT 16 | //! ``` 17 | //! 18 | //! Results in: 19 | //! 20 | //! ``` 21 | //! APRINT '!' 22 | //! HALT 23 | //! ``` 24 | //! 25 | //! # Note: 26 | //! 27 | //! A file will be imported only once. Circular imports are not allowed. 28 | 29 | use std::ffi::AsOsStr; 30 | use std::fs::File; 31 | use std::io::Read; 32 | use std::path::Path; 33 | use assembler::parser::ast::{Program, Statement}; 34 | use assembler::parser::Parser; 35 | 36 | 37 | pub fn expand(source: &mut Program) { 38 | let mut last_file = None; 39 | 40 | // We use a indexed iteration here because we'll modify the source as we iterate 41 | // over it 42 | let mut i = 0; 43 | while i < source.len() { 44 | // Process import statements 45 | let mut included_source = if let Statement::Include(ref include) = source[i].value { 46 | // Get path to include 47 | let path = Path::new(&*source[i].location.filename); 48 | 49 | let dir = Path::new(path.parent().unwrap_or(Path::new("."))); 50 | let to_include = dir.join(&*include.as_str()); 51 | 52 | // Forbid circular imports 53 | if last_file == Some(to_include.clone()) { 54 | fatal!("circular import of {}", to_include.display(); source[i]); 55 | } 56 | last_file = Some(to_include.clone()); 57 | 58 | // Read source file 59 | let mut file = File::open(&to_include).unwrap_or_else(|e| { 60 | fatal!("cannot read {}: {}", to_include.display(), e; source[i]); 61 | }); 62 | 63 | let mut contents = String::new(); 64 | file.read_to_string(&mut contents).unwrap_or_else(|e| { 65 | fatal!("cannot read {}: {}", to_include.display(), e; source[i]); 66 | }); 67 | 68 | // Parse it 69 | let mut parser = Parser::new(&contents, to_include.as_os_str().to_str().unwrap()); 70 | parser.parse() 71 | } else { 72 | i += 1; 73 | continue 74 | }; 75 | 76 | // Remove the `#import <...>` statement 77 | source.remove(i); 78 | 79 | // Insert the new source into the current one 80 | for j in range(0, included_source.len()) { 81 | source.insert(i + j, included_source.remove(0)); 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/labels.rs: -------------------------------------------------------------------------------- 1 | //! A syntax extension that replaces labels with the referenced instruction number 2 | //! 3 | //! # Example: 4 | //! 5 | //! ``` 6 | //! label: 7 | //! GOTO :label 8 | //! ``` 9 | //! 10 | //! Results in: 11 | //! 12 | //! ``` 13 | //! GOTO 0 14 | //! ``` 15 | 16 | use std::collections::HashMap; 17 | use assembler::parser::ast::{Program, Statement, Argument, Ident}; 18 | 19 | 20 | pub fn expand(source: &mut Program) { 21 | let mut labels: HashMap = HashMap::new(); 22 | let mut offset = 0; 23 | 24 | // Pass 1: Collect label definitions 25 | source.retain(|stmt| { 26 | match stmt.value { 27 | // Store label name and current offset 28 | Statement::Label(ref name) => { 29 | if labels.insert(name.clone(), offset).is_some() { 30 | warn!("redefinition of label: {:?}", name; stmt); 31 | } 32 | 33 | false // Remove label definition from the source 34 | }, 35 | 36 | // Increment the offset (only operation statements will count 37 | // in the final binary) 38 | Statement::Operation(_, ref args) => { 39 | offset += 1 + args.len() as u32; 40 | true // Not a label definition, keep it 41 | }, 42 | 43 | _ => true // Something else, keep it 44 | } 45 | }); 46 | 47 | debug!("Labels: {:?}", labels); 48 | 49 | // Pass 2: Replace label usages 50 | for stmt in source.iter_mut() { 51 | 52 | // Process all operations 53 | if let Statement::Operation(_, ref mut args) = stmt.value { 54 | for arg in args.iter_mut() { 55 | 56 | // Get a new location if argument is a label 57 | arg.value = if let Argument::Label(ref name) = arg.value { 58 | 59 | if let Some(val) = labels.get(name) { 60 | Argument::Literal(overflow_check!(*val, arg)) 61 | } else { 62 | fatal!("unknown label: {:?}", name; arg) 63 | } 64 | 65 | } else { 66 | continue 67 | } 68 | 69 | } 70 | } 71 | } 72 | } -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/mod.rs: -------------------------------------------------------------------------------- 1 | use assembler::parser::ast::Program; 2 | 3 | mod imports; 4 | mod subroutines; 5 | mod auto_address; 6 | mod constants; 7 | mod labels; 8 | 9 | pub fn expand_syntax_extensions(source: &mut Program) { 10 | imports::expand(source); 11 | subroutines::expand(source); 12 | auto_address::expand(source); 13 | constants::expand(source); 14 | labels::expand(source); 15 | } 16 | -------------------------------------------------------------------------------- /src/assembler/parser/syntax_ext/subroutines.rs: -------------------------------------------------------------------------------- 1 | //! A syntax extension for custom subroutines 2 | //! 3 | //! # Example: 4 | //! 5 | //! Subroutine call: 6 | //! 7 | //! ``` 8 | //! @call(name, arg1, arg2) 9 | //! ``` 10 | //! 11 | //! FIXME: Maybe use @name(arg1, arg2) instead? 12 | //! 13 | //! Subroutine definition: 14 | //! 15 | //! ``` 16 | //! @start(name, argc) 17 | //! ... 18 | //! @end() 19 | //! ``` 20 | 21 | use std::borrow::ToOwned; 22 | use std::collections::HashMap; 23 | use assembler::parser::ast::{Program, Statement, StatementNode, Argument, MacroArgument, MacroArgumentNode, 24 | Ident}; 25 | use assembler::parser::Parser; 26 | 27 | use self::SubroutineState::*; 28 | 29 | 30 | pub fn expand(source: &mut Program) { 31 | SubroutineExpander { 32 | source: source, 33 | routines: HashMap::new() 34 | }.expand(); 35 | } 36 | 37 | 38 | // --- Subroutine Expansion: Implementation ------------------------------------- 39 | 40 | // We use a state machine to keep track of where we are and what is allowed. 41 | 42 | #[derive(Debug, Clone, Eq, PartialEq)] 43 | enum SubroutineState { 44 | SubroutineStart(Ident), // Definition of a new subroutine 45 | InSubroutine, // Subroutine body 46 | SubroutineEnd, // End of the body 47 | SubroutineCall(Ident, Vec), // Call of a subroutine 48 | NotInSubroutine // Everything else 49 | } 50 | 51 | struct SubroutineExpander<'a> { 52 | source: &'a mut Program, 53 | routines: HashMap 54 | } 55 | 56 | impl<'a> SubroutineExpander<'a> { 57 | 58 | fn expand(&mut self) { 59 | // Pass 1: Collect definitions and build preamble 60 | self.collect_routines(); 61 | if self.routines.len() == 0 { 62 | return 63 | } 64 | 65 | // Build preamble 66 | self.build_preamble(); 67 | 68 | debug!("Subroutines: {:?}", self.routines); 69 | 70 | // Pass 2: Replace function definitions 71 | self.process_macros(); 72 | 73 | // Pass 3: Remove macro statements 74 | self.source.retain(|stmt| { 75 | match stmt.value { 76 | Statement::Macro(..) => { 77 | false 78 | }, 79 | _ => true 80 | } 81 | }); 82 | } 83 | 84 | /// Collect all subroutine definitions and store them in `self.routines` 85 | fn collect_routines(&mut self) { 86 | for stmt in self.source.iter() { 87 | let (ident, args) = match stmt.value { 88 | Statement::Macro(ref ident, ref args) => (ident.clone(), args), 89 | _ => continue 90 | }; 91 | 92 | if ident.as_str() == "start" { 93 | // Two args expected: name and number of arguments 94 | if args.len() != 2 { 95 | fatal!("invalid number of Argument::s for @start: {}", 96 | args.len(); stmt) 97 | } 98 | 99 | let name = if let MacroArgument::Ident(ref name) = args[0].value { 100 | name.clone() 101 | } else { 102 | fatal!("expected subroutine name, got {}", args[0]; stmt) 103 | }; 104 | 105 | let argc = if let MacroArgument::Argument(ref arg) = args[1].value { 106 | if let Argument::Literal(argc) = arg.value { 107 | argc as usize 108 | } else { 109 | fatal!("expected argument count, got {}", args[1]; stmt) 110 | } 111 | } else { 112 | fatal!("expected argument count, got {}", args[1]; stmt) 113 | }; 114 | 115 | // Subroutine definition is valid, store it 116 | if self.routines.insert(name, argc).is_some() { 117 | fatal!("redefinition of subroutine: {}", args[0]; stmt) 118 | }; 119 | } 120 | } 121 | } 122 | 123 | fn parse_and_insert(&mut self, source: &str, pos: usize) { 124 | let ast = Parser::new(source, "").parse(); 125 | 126 | for (i, stmt) in ast.into_iter().enumerate() { 127 | self.source.insert(pos + i, stmt) 128 | } 129 | } 130 | 131 | /// Build the preamble for the subroutine machinery. 132 | /// Will only be inserted once at 133 | /// 134 | /// Will look like this: 135 | /// 136 | /// ``` 137 | /// $return = [_] ; The return value 138 | /// $jump_back = [_] ; The return address 139 | /// $arg0 = [_] ; Arguments any subroutine receives 140 | /// ``` 141 | fn build_preamble(&mut self) { 142 | let mut template = r###" 143 | $return = [_] 144 | $jump_back = [_] 145 | "###.to_owned(); 146 | 147 | for i in 0 .. *self.routines.values().max().unwrap() { 148 | template.push_str(&format!("$arg{} = [_]\n", i)); 149 | } 150 | 151 | self.parse_and_insert(&template, 0); 152 | } 153 | 154 | /// Process subroutine definitions and calls 155 | fn process_macros(&mut self) { 156 | let mut state = NotInSubroutine; 157 | 158 | // We use a indexed iteration here because we'll modify the source as we iterate 159 | // over it 160 | let mut i = 0; 161 | while i < self.source.len() { 162 | let prev_state = state.clone(); 163 | 164 | state = match self.get_state_for(&self.source[i], &state) { 165 | /// State processing & transitions 166 | 167 | SubroutineStart(ident) => { 168 | // Build subroutine preamble 169 | self.source.remove(i); 170 | 171 | let mut template = format!("{}:\n", ident); 172 | template.push_str("MOV $return 0\n"); 173 | 174 | self.parse_and_insert(&template, i); 175 | 176 | InSubroutine 177 | }, 178 | 179 | SubroutineEnd => { 180 | // Build subroutine epilogue 181 | self.source.remove(i); 182 | 183 | self.parse_and_insert("JMP $jump_back\n", i); 184 | 185 | NotInSubroutine 186 | }, 187 | 188 | SubroutineCall(name, args) => { 189 | self.source.remove(i); 190 | 191 | let mut template = String::new(); 192 | 193 | // Build arguments 194 | for j in 0 .. args.len() { 195 | let arg = match args[j].value { 196 | MacroArgument::Argument(ref arg) => arg, 197 | MacroArgument::Ident(ref ident) => { 198 | fatal!("expected argument, got `{}`", ident; args[j]) 199 | } 200 | }; 201 | 202 | template.push_str(&format!("MOV $arg{} {}\n", j, arg)); 203 | } 204 | 205 | // Set jumpback 206 | template.push_str(&format!("MOV $jump_back :ret{}\n", i)); 207 | 208 | // Jump to function 209 | template.push_str(&format!("JMP :{}\n", name)); 210 | 211 | // Add label where to continue 212 | template.push_str(&format!("ret{}:\n", i)); 213 | 214 | self.parse_and_insert(&template, i); 215 | 216 | prev_state // Return to previous state 217 | }, 218 | 219 | _ => state // Stay in current state 220 | }; 221 | 222 | i += 1; 223 | } 224 | } 225 | 226 | /// Get the current state based on the statement we're currently processing 227 | fn get_state_for(&self, stmt: &StatementNode, state: &SubroutineState) -> SubroutineState { 228 | match stmt.value { 229 | Statement::Macro(ref ident, ref args) => { 230 | match ident.as_str() { 231 | "start" => { 232 | if *state == InSubroutine { fatal!("can't nest subroutines"; stmt); } 233 | 234 | // Get subroutine name 235 | let ident = if let MacroArgument::Ident(ref ident) = args[0].value { 236 | ident.clone() 237 | } else { 238 | fatal!("expected subroutine name, found `{}`", args[0].value; args[0]); 239 | }; 240 | 241 | SubroutineStart(ident) 242 | }, 243 | "end" => { 244 | if args.len() > 0 { 245 | fatal!("@end takes no args"; args[0]); 246 | } 247 | 248 | SubroutineEnd 249 | }, 250 | "call" => { 251 | if args.len() == 0 { 252 | fatal!("expected (name, args...), found `)`"; stmt); 253 | } 254 | 255 | // Get subroutine name 256 | let ident = if let MacroArgument::Ident(ref ident) = args[0].value { 257 | ident.clone() 258 | } else { 259 | fatal!("expected subroutine name, found `{}`", args[0]; args[0]); 260 | }; 261 | 262 | // Verify argument count 263 | let routine_argc = *self.routines.get(&ident).unwrap_or_else(|| { 264 | fatal!("unknown subroutine: {}", ident; stmt); 265 | }); 266 | 267 | if args.len() - 1 != routine_argc { 268 | fatal!("wrong argument count: found {} args, expected {}", 269 | args.len() - 1, routine_argc; args[0]); 270 | } 271 | 272 | // Get args (cloned) 273 | let args: Vec<_> = args[1..].iter() 274 | .cloned() 275 | .collect(); 276 | 277 | SubroutineCall(ident, args) 278 | } 279 | _ => state.clone() 280 | } 281 | }, 282 | _ => state.clone() 283 | } 284 | } 285 | } -------------------------------------------------------------------------------- /src/assembler/util.rs: -------------------------------------------------------------------------------- 1 | use std::old_io; 2 | use ansi_term::Colour::{Red, Yellow}; 3 | use assembler::parser::SourceLocation; 4 | 5 | 6 | #[macro_export] 7 | macro_rules! impl_to_string( 8 | ($cls:ident: $fmt:expr, $( $args:ident ),*) => ( 9 | impl fmt::Debug for $cls { 10 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 11 | write!(f, $fmt, $( self.$args ),*) 12 | } 13 | } 14 | 15 | impl fmt::Display for $cls { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | write!(f, "{:?}", self) 18 | } 19 | } 20 | ) 21 | ); 22 | 23 | 24 | #[macro_export] 25 | macro_rules! overflow_check( 26 | ($val:expr, $stmt:expr) => ( 27 | if $val > 255 { 28 | warn!("overflow: {} > 255", $val; $stmt); 29 | ($val as u32 % !(0 as ::machine::WordSize) as u32) as ::machine::WordSize 30 | } 31 | else { $val as ::machine::WordSize } 32 | ) 33 | ); 34 | 35 | 36 | #[macro_export] 37 | macro_rules! fatal( 38 | ($msg:expr, $($args:expr),* ; $stmt:expr) => { 39 | { 40 | use assembler::util::fatal; 41 | fatal(format!($msg, $($args),*), &$stmt.location) 42 | } 43 | }; 44 | 45 | ($msg:expr ; $stmt:expr) => { 46 | { 47 | use std::borrow::ToOwned; 48 | ::assembler::util::fatal($msg.to_owned(), &$stmt.location) 49 | } 50 | }; 51 | ); 52 | 53 | pub fn fatal(msg: String, source: &SourceLocation) -> ! { 54 | println!("{} in {}: {}", Red.paint("Error"), source, msg); 55 | 56 | old_io::stdio::set_stderr(Box::new(old_io::util::NullWriter)); 57 | panic!(); 58 | } 59 | 60 | 61 | #[macro_export] 62 | macro_rules! warn( 63 | ($msg:expr, $($args:expr),* ; $stmt:expr ) => { 64 | ::assembler::util::warn(format!($msg, $($args),*), &$stmt.location) 65 | } 66 | ); 67 | 68 | pub fn warn(msg: String, source: &SourceLocation) { 69 | println!("{} in {}: {}", Yellow.paint("Warning"), source, msg); 70 | } -------------------------------------------------------------------------------- /src/machine.rs: -------------------------------------------------------------------------------- 1 | use std::ascii::AsciiExt; 2 | use std::collections::HashMap; 3 | use std::str::FromStr; 4 | use rand::distributions::Sample; 5 | use rand::distributions::Range as RandRange; 6 | use rand; 7 | 8 | use self::Argument::*; 9 | 10 | pub use self::StateChange::*; 11 | 12 | 13 | pub type WordSize = u8; 14 | const RAND_MAX: u8 = 25; 15 | 16 | 17 | // --- Instruction + helpers --------------------------------------------- 18 | 19 | /// Representation of an instruction (opcode + args + implementation) 20 | pub struct Instruction { 21 | pub mnem: Mnemonic, 22 | pub opcode: u8, 23 | pub argc: usize, 24 | pub arg_types: &'static [Argument], 25 | implementation: fn(&[WordSize], &[WordSize]) -> StateChange 26 | } 27 | 28 | impl Instruction { 29 | pub fn execute(&self, args: &[WordSize], mem: &[WordSize]) -> StateChange { 30 | (self.implementation)(args, mem) 31 | } 32 | } 33 | 34 | 35 | /// Argument types 36 | #[derive(Debug)] 37 | pub enum Argument { 38 | Value, // The value of an address 39 | Address, // An address 40 | Literal, // A literal value 41 | } 42 | 43 | impl PartialEq for Argument { 44 | // It's a little tricky here as Value and Address are somewhat equal depending 45 | // on the context ... 46 | fn eq(&self, other: &Argument) -> bool { 47 | match *self { 48 | Value | Address => match *other { 49 | Value | Address => true, 50 | _ => false 51 | }, 52 | Literal => match *other { 53 | Literal => true, 54 | _ => false 55 | } 56 | } 57 | } 58 | } 59 | 60 | 61 | /// Possible results of instruction execution 62 | pub enum StateChange { 63 | Memset { address: WordSize, value: WordSize }, 64 | Jump { address: WordSize }, 65 | Halt, 66 | Continue 67 | } 68 | 69 | 70 | // --- Instruction helpers ------------------------------------------------------ 71 | 72 | /// A helper to define an instruction 73 | macro_rules! make_instruction { 74 | // Static return 75 | ($name:ident -> $ret_type:ident) => { 76 | pub struct $name; 77 | impl $name { 78 | #[allow(unused_variables)] 79 | fn execute(args: &[WordSize], mem: &[WordSize]) -> StateChange { 80 | $ret_type 81 | } 82 | } 83 | }; 84 | 85 | // Arguments and static return type 86 | ( $name:ident ($args:ident [ $argc:expr ] , $mem:ident) -> $ret_type:ident $body:block ) => { 87 | pub struct $name; 88 | impl $name { 89 | #[allow(unused_variables)] 90 | fn execute($args: &[WordSize], $mem: &[WordSize]) -> StateChange { 91 | $body; 92 | $ret_type 93 | } 94 | } 95 | }; 96 | 97 | // Normal arguments 98 | ( $name:ident ($args:ident [ $argc:expr ] , $mem:ident) $body:block ) => { 99 | pub struct $name; 100 | impl $name { 101 | #[allow(unused_variables)] 102 | fn execute($args: &[WordSize], $mem: &[WordSize]) -> StateChange { 103 | $body 104 | } 105 | } 106 | }; 107 | } 108 | 109 | 110 | // --- Instruction implementations ---------------------------------------------- 111 | // Syntax of the comments: 112 | // a, b, c: first/second/third argument 113 | // M[x]: Value of address x 114 | 115 | // --- Memory Access 116 | 117 | // M[a] = M[b], or the Literal-set M[a] = b 118 | make_instruction!(IMov(args[2], memory) { 119 | Memset { address: args[0], value: args[1] } 120 | }); 121 | 122 | 123 | // --- Logic operations 124 | 125 | // M[a] = M[a] & M[b] 126 | make_instruction!(IAnd(args[2], memory) { 127 | Memset { address: args[0], value: memory[args[0] as usize] & args[1] } 128 | }); 129 | 130 | // M[a] = M[a] | M[b] 131 | make_instruction!(IOr(args[2], memory) { 132 | Memset { address: args[0], value: memory[args[0] as usize] | args[1] } 133 | }); 134 | 135 | // M[a] = M[a] ^ M[b] 136 | make_instruction!(IXor(args[2], memory) { 137 | Memset { address: args[0], value: memory[args[0] as usize] ^ args[1] } 138 | }); 139 | 140 | // M[a] = !M[a] 141 | make_instruction!(INot(args[1], memory) { 142 | Memset { address: args[0], value: !memory[args[0] as usize] } 143 | }); 144 | 145 | 146 | // --- Math 147 | 148 | // M[a] = M[a] + b 149 | make_instruction!(IAdd(args[2], memory) { 150 | Memset { address: args[0], value: memory[args[0] as usize] + args[1] } 151 | }); 152 | 153 | 154 | // M[a] = M[a] - b 155 | make_instruction!(ISub(args[2], memory) { 156 | Memset { address: args[0], value: memory[args[0] as usize] - args[1] } 157 | }); 158 | 159 | 160 | // --- Control 161 | 162 | make_instruction!(IHalt -> Halt); 163 | 164 | // Jump to a 165 | make_instruction!(IJmp(args[1], memory) { 166 | Jump { address: args[0] } 167 | }); 168 | 169 | // Jump to a if b == 0 170 | make_instruction!(IJz(args[2], memory) { 171 | if args[0] == 0 { 172 | Jump { address: args[0] } 173 | } else { 174 | Continue 175 | } 176 | }); 177 | 178 | // Jump to a if b == c 179 | make_instruction!(IJeq(args[3], memory) { 180 | if args[1] == args[2] { 181 | Jump { address: args[0] } 182 | } else { 183 | Continue 184 | } 185 | }); 186 | 187 | // Jump to a if b < c 188 | make_instruction!(IJls(args[3], memory) { 189 | if args[1] < args[2] { 190 | Jump { address: args[0] } 191 | } else { 192 | Continue 193 | } 194 | }); 195 | 196 | // Jump to a if b > c 197 | make_instruction!(IJgt(args[3], memory) { 198 | if args[1] > args[2] { 199 | Jump { address: args[0] } 200 | } else { 201 | Continue 202 | } 203 | }); 204 | 205 | 206 | // --- I/O 207 | 208 | // Print the contents of M[a] in ASCII 209 | make_instruction!(IAPrint(args[1], memory) -> Continue { 210 | print!("{:}", args[0] as char); 211 | }); 212 | 213 | // Print the contents of M[a] in decimal 214 | make_instruction!(IDPrint(args[1], memory) -> Continue { 215 | print!("{:}", args[0]); 216 | }); 217 | 218 | 219 | // --- Misc 220 | 221 | // M[a] = random value (0 to 25 -> equal probability distribution) 222 | make_instruction!(IRandom(args[1], memory) { 223 | let mut rand_range = RandRange::new(0, RAND_MAX); 224 | let mut rng = rand::thread_rng(); 225 | Memset { address: args[0], value: rand_range.sample(&mut rng) } 226 | }); 227 | 228 | 229 | // --- Opcode -> Instruction mapping -------------------------------------------- 230 | 231 | macro_rules! count_args { 232 | () => { 0 }; 233 | ($x:expr) => { 1 }; 234 | ($head:expr, $($tail:expr),+) => { 1 + count_args!($($tail),+) }; 235 | } 236 | 237 | macro_rules! instruction { 238 | ( $mnem:path : $opcode:expr => $instr:ident ) => ( 239 | Instruction { 240 | mnem: $mnem, 241 | opcode: $opcode, 242 | argc: 0, 243 | arg_types: &[], 244 | implementation: $instr::execute 245 | } 246 | ); 247 | 248 | ( $mnem:path : $opcode:expr => $instr:ident [ $($t:ident),* ] ) => ( 249 | Instruction { 250 | mnem: $mnem, 251 | opcode: $opcode, 252 | arg_types: &[$($t),*], 253 | argc: count_args!($($t),*), 254 | implementation: $instr::execute 255 | } 256 | ); 257 | } 258 | 259 | macro_rules! instructions { 260 | ( $($mnem:ident : $( $opcode:expr => $instr:ident ( $($t:ident),* ) ),* ; )* ) => { 261 | 262 | // Remember: HALT is not part of the macro's arguments as its opcode 263 | // doesn't follow the scheme of the other instructions. 264 | 265 | #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] 266 | pub enum Mnemonic { 267 | $( $mnem, )* HALT 268 | } 269 | 270 | impl FromStr for Mnemonic { 271 | type Err = String; 272 | 273 | fn from_str(s: &str) -> Result { 274 | match &*s.to_ascii_uppercase() { 275 | $( 276 | stringify!($mnem) => Ok(Mnemonic::$mnem), 277 | )* 278 | "HALT" => Ok(Mnemonic::HALT), 279 | _ => Err(format!("Invalid instruction: {}", s)) 280 | } 281 | } 282 | } 283 | 284 | /// An opcode → instruction mapping 285 | static INSTRUCTIONS_TABLE: &'static [Instruction] = &[ 286 | $( 287 | $( 288 | instruction!(Mnemonic::$mnem: $opcode => $instr [ $($t),* ]) 289 | ),* 290 | ),* 291 | ]; 292 | 293 | 294 | /// An mnemonic → instructions mapping + access methods 295 | pub struct InstructionManager { 296 | map: HashMap> 297 | } 298 | 299 | impl InstructionManager { 300 | pub fn new() -> InstructionManager { 301 | let mut map = HashMap::new(); 302 | $( 303 | map.insert(Mnemonic::$mnem, vec![ 304 | $( &INSTRUCTIONS_TABLE[$opcode] ),* 305 | ]); 306 | )* 307 | 308 | map.insert(Mnemonic::HALT, vec![&INSTRUCTION_HALT]); 309 | 310 | InstructionManager { 311 | map: map 312 | } 313 | } 314 | 315 | pub fn lookup_operations(&self, mnem: &Mnemonic) -> &[&'static Instruction] { 316 | &self.map[*mnem] 317 | } 318 | 319 | pub fn decode_opcode(&self, opcode: u8) -> &'static Instruction { 320 | // We're assuming the table is not full 321 | assert!(INSTRUCTIONS_TABLE.len() < 0xFF); 322 | 323 | if opcode != 0xFF && opcode as usize >= INSTRUCTIONS_TABLE.len() { 324 | panic!("Invalid opcode: {}", opcode) 325 | }; 326 | 327 | // Special case: 0xFF is HALT 328 | if opcode == 0xFF { 329 | &INSTRUCTION_HALT 330 | } else { 331 | &INSTRUCTIONS_TABLE[opcode as usize] 332 | } 333 | } 334 | 335 | pub fn decode_args(&self, args: &[WordSize], arg_types: &[Argument], mem: &[WordSize]) -> Vec { 336 | arg_types.iter() 337 | .zip(args.iter()) 338 | .map(|(ty, val)| { 339 | match *ty { 340 | Argument::Value => mem[*val as usize], 341 | Argument::Address => *val, 342 | Argument::Literal => *val, 343 | } 344 | }) 345 | .collect() 346 | } 347 | 348 | } 349 | }; 350 | } 351 | 352 | instructions! { 353 | AND: 354 | 0x00 => IAnd(Address, Value ), 355 | 0x01 => IAnd(Address, Literal); 356 | 357 | OR: 358 | 0x02 => IOr(Address, Value ), 359 | 0x03 => IOr(Address, Literal); 360 | 361 | XOR: 362 | 0x04 => IXor(Address, Value ), 363 | 0x05 => IXor(Address, Literal); 364 | 365 | NOT: 366 | 0x06 => INot(Address); 367 | 368 | 369 | MOV: 370 | 0x07 => IMov(Address, Value ), 371 | 0x08 => IMov(Address, Literal); 372 | 373 | 374 | RANDOM: 375 | 0x09 => IRandom(Address); 376 | 377 | ADD: 378 | 0x0A => IAdd(Address, Value ), 379 | 0x0B => IAdd(Address, Literal); 380 | 381 | SUB: 382 | 0x0C => ISub(Address, Value ), 383 | 0x0D => ISub(Address, Literal); 384 | 385 | 386 | JMP: 387 | 0x0E => IJmp(Value ), 388 | 0x0F => IJmp(Literal); 389 | 390 | JZ: 391 | 0x10 => IJz(Value, Value ), 392 | 0x11 => IJz(Value, Literal), 393 | 0x12 => IJz(Literal, Value ), 394 | 0x13 => IJz(Literal, Literal); 395 | 396 | JEQ: 397 | 0x14 => IJeq(Value, Value, Value ), 398 | 0x15 => IJeq(Literal, Value, Value ), 399 | 0x16 => IJeq(Value, Value, Literal), 400 | 0x17 => IJeq(Literal, Value, Literal); 401 | 402 | JLS: 403 | 0x18 => IJls(Value, Value, Value ), 404 | 0x19 => IJls(Literal, Value, Value ), 405 | 0x1A => IJls(Value, Value, Literal), 406 | 0x1B => IJls(Literal, Value, Literal); 407 | 408 | JGT: 409 | 0x1C => IJgt(Value, Value, Value ), 410 | 0x1D => IJgt(Literal, Value, Value ), 411 | 0x1E => IJgt(Value, Value, Literal), 412 | 0x1F => IJgt(Literal, Value, Literal); 413 | 414 | 415 | APRINT: 416 | 0x20 => IAPrint(Value ), 417 | 0x21 => IAPrint(Literal); 418 | 419 | DPRINT: 420 | 0x22 => IDPrint(Value ), 421 | 0x23 => IDPrint(Literal); 422 | } 423 | 424 | // Halt the program 425 | static INSTRUCTION_HALT: Instruction = instruction!(Mnemonic::HALT: 0xFF => IHalt); -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(plugin)] 2 | 3 | // Use of unstable libraries 4 | #![feature(collections)] 5 | #![feature(core)] 6 | #![feature(fs)] 7 | #![feature(io)] 8 | #![feature(path)] 9 | #![feature(old_io)] 10 | #![feature(os)] 11 | #![feature(std_misc)] 12 | 13 | #![plugin(docopt_macros)] 14 | 15 | extern crate ansi_term; 16 | extern crate docopt; 17 | extern crate env_logger; 18 | extern crate rand; 19 | extern crate "rustc-serialize" as rustc_serialize; 20 | #[macro_use] extern crate lazy_static; 21 | #[macro_use] extern crate log; 22 | 23 | use docopt::Docopt; 24 | 25 | mod assembler; 26 | mod machine; 27 | mod vm; 28 | 29 | docopt!(Args derive Debug, " 30 | Usage: tiny asm [-v] 31 | tiny asm [-v] --bin 32 | tiny vm 33 | tiny --help 34 | 35 | Options: 36 | --help Show this screen. 37 | "); 38 | 39 | 40 | #[cfg(not(test))] 41 | fn main() { 42 | env_logger::init().unwrap(); 43 | 44 | let args: Args = Args::docopt().decode().unwrap_or_else(|e| e.exit()); 45 | 46 | if args.cmd_asm { 47 | assembler::main(args) 48 | } else { 49 | vm::main(args) 50 | } 51 | } -------------------------------------------------------------------------------- /src/vm/mod.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::Read; 3 | use std::path::Path; 4 | 5 | use machine::{InstructionManager, Memset, Jump, Halt, Continue}; 6 | use Args; 7 | 8 | 9 | const MEMORY_SIZE: usize = 256; 10 | 11 | 12 | pub fn main(args: Args) { 13 | // Read binary file 14 | let path = Path::new(&args.arg_input); 15 | let mut file = match File::open(&path) { 16 | Ok(f) => f, 17 | Err(err) => { panic!("Can't open {}: {}", path.display(), err) } 18 | }; 19 | 20 | let mut source = vec![]; 21 | match file.read_to_end(&mut source) { 22 | Ok(v) => v, 23 | Err(err) => { panic!("Can't read {}: {}", path.display(), err) } 24 | }; 25 | 26 | // Run virtual machine 27 | run(&source); 28 | } 29 | 30 | fn run(source: &[u8]) { 31 | let mut memory = [0u8; MEMORY_SIZE]; 32 | let mut ip = 0; 33 | let im = InstructionManager::new(); 34 | 35 | loop { 36 | debug!("--- next instruction (ip: {})", ip); 37 | debug!("memory: {:?}@{}", &memory[..], memory.len()); 38 | 39 | // Step 1: Read instruction 40 | let opcode = source[ip]; 41 | 42 | // Step 2: Decode opcode and read + decode the arguments 43 | let ref instruction = im.decode_opcode(opcode); 44 | 45 | let argc = instruction.argc; 46 | if ip + argc >= source.len() { 47 | panic!("Reached end of input without HALT!") 48 | } 49 | let args = &source[ip + 1 .. ip + 1 + argc]; 50 | 51 | let decoded_args = im.decode_args(args, instruction.arg_types, &memory); 52 | 53 | // Step 3 + 4: Execute instruction and process result 54 | debug!("executing {:?} ({:#04X}) with {:?}", instruction.mnem, opcode, decoded_args); 55 | 56 | match instruction.execute(&decoded_args, &memory) { 57 | Continue => {}, 58 | Jump { address } => { 59 | debug!("Jumping to {}", address); 60 | ip = address as usize; 61 | continue; // We've already updated the instruction pointer 62 | }, 63 | Memset { address, value } => { 64 | debug!("Setting m[{}] = {}", address, value); 65 | memory[address as usize] = value; 66 | }, 67 | Halt => break 68 | } 69 | 70 | // Update instruction pointer 71 | ip += 1; // Skip opcode 72 | ip += argc; // Skip args 73 | } 74 | } --------------------------------------------------------------------------------