├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── README.md
├── lib
    ├── binary
    │   └── shift.asm
    └── math
    │   ├── divide.asm
    │   └── multiply.asm
├── pi.asm
├── pi.bin
└── src
    ├── assembler
        ├── codegen.rs
        ├── mod.rs
        ├── parser
        │   ├── ast.rs
        │   ├── grammar.md
        │   ├── lexer.rs
        │   ├── mod.rs
        │   └── syntax_ext
        │   │   ├── auto_address.rs
        │   │   ├── constants.rs
        │   │   ├── imports.rs
        │   │   ├── labels.rs
        │   │   ├── mod.rs
        │   │   └── subroutines.rs
        └── util.rs
    ├── machine.rs
    ├── main.rs
    └── vm
        └── mod.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
 1 | [root]
 2 | name = "tiny-asm"
 3 | version = "0.0.1"
 4 | dependencies = [
 5 |  "ansi_term 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
 6 |  "docopt 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)",
 7 |  "docopt_macros 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)",
 8 |  "env_logger 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 9 |  "lazy_static 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
10 |  "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
11 |  "rand 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
12 |  "rustc-serialize 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
13 | ]
14 | 
15 | [[package]]
16 | name = "ansi_term"
17 | version = "0.5.0"
18 | source = "registry+https://github.com/rust-lang/crates.io-index"
19 | 
20 | [[package]]
21 | name = "docopt"
22 | version = "0.6.43"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | dependencies = [
25 |  "libc 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
26 |  "regex 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)",
27 |  "rustc-serialize 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
28 | ]
29 | 
30 | [[package]]
31 | name = "docopt_macros"
32 | version = "0.6.43"
33 | source = "registry+https://github.com/rust-lang/crates.io-index"
34 | dependencies = [
35 |  "docopt 0.6.43 (registry+https://github.com/rust-lang/crates.io-index)",
36 | ]
37 | 
38 | [[package]]
39 | name = "env_logger"
40 | version = "0.2.2"
41 | source = "registry+https://github.com/rust-lang/crates.io-index"
42 | dependencies = [
43 |  "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
44 |  "regex 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)",
45 | ]
46 | 
47 | [[package]]
48 | name = "lazy_static"
49 | version = "0.1.7"
50 | source = "registry+https://github.com/rust-lang/crates.io-index"
51 | 
52 | [[package]]
53 | name = "libc"
54 | version = "0.1.2"
55 | source = "registry+https://github.com/rust-lang/crates.io-index"
56 | 
57 | [[package]]
58 | name = "log"
59 | version = "0.2.5"
60 | source = "registry+https://github.com/rust-lang/crates.io-index"
61 | 
62 | [[package]]
63 | name = "rand"
64 | version = "0.1.3"
65 | source = "registry+https://github.com/rust-lang/crates.io-index"
66 | dependencies = [
67 |  "libc 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
68 |  "log 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
69 | ]
70 | 
71 | [[package]]
72 | name = "regex"
73 | version = "0.1.16"
74 | source = "registry+https://github.com/rust-lang/crates.io-index"
75 | 
76 | [[package]]
77 | name = "rustc-serialize"
78 | version = "0.3.0"
79 | source = "registry+https://github.com/rust-lang/crates.io-index"
80 | 
81 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | name = "tiny-asm"
 4 | version = "0.0.1"
 5 | authors = ["Markus Siemens <siemens1993@gmail.com>"]
 6 | 
 7 | [[bin]]
 8 | name = "tiny"
 9 | path = "src/main.rs"
10 | 
11 | 
12 | [profile.release]
13 | opt-level = 3
14 | lto = true
15 | 
16 | [dependencies]
17 | ansi_term = "*"
18 | docopt = "*"
19 | docopt_macros = "*"
20 | env_logger = "*"
21 | lazy_static = "*"
22 | log = "*"
23 | rand = "*"
24 | rustc-serialize = "*"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # rust-tinyasm
 2 | 
 3 | A Rust port of my Python submission for http://redd.it/1kqxz9:
 4 | 
 5 | > Tiny, a very simple fictional computer architecture, is programmed by an assembly language that has 16 mnemonics, with 37 unique op-codes. The system is based on Harvard architecture, and is very straight-forward: program memory is different from working memory, the machine only executes one instruction at a time, memory is an array of bytes from index 0 to index 255 (inclusive), and doesn't have any relative addressing modes.
 6 | >
 7 | Your goal will be to write an assembler for Tiny: though you don't need to simulate the code or machine components, you must take given assembly-language source code and produce a list of hex op-codes. You are essentially writing code that converts the lowest human-readable language to machine-readable language!
 8 | 
 9 | My original Python submission can be found here:
10 | https://github.com/msiemens/TINY.ASM/. This is a Rust port. It features
11 | a much better architecture, including a proper parser and abstract syntax tree.
12 | Like the Python version, this also comes with a small VM.
13 | 
14 | ## Usage
15 | 
16 | Run the assembler:
17 | 
18 |     $ tiny asm <input>
19 | 
20 | Create a binary file that the VM can execute:
21 | 
22 |     $ tiny asm --bin <input> <binary>
23 | 
24 | Run the VM:
25 | 
26 |     $ tiny vm <binary>
27 | 
28 | 
29 | ## Syntax (+ Additions)
30 | 
31 |      v--- operation
32 |     MOV [0] 1
33 |          ^  ^---- literal
34 |          |------- memory address
35 | 
36 | 
37 | **Comments**
38 | 
39 |     ; This is a comment
40 | 
41 | **Labels**
42 | 
43 |     label:
44 |     JMP :label
45 | 
46 | **Constants**
47 | 
48 |     $mem_addr = [0]
49 |     $some_const = 5
50 | 
51 |     MOV $mem_addr $some_const
52 | 
53 | **Imports**
54 | 
55 |     #import file_name.asm
56 | 
57 | **Char Constants**
58 | 
59 |     APRINT '!'  ; Prints: !
60 |     APRINT '\n' ; Prints a newline
61 | 
62 | **Subroutines**
63 | 
64 |     ; Define a subroutine
65 |     ; name ----v              v---- number of arguments
66 |     @start(binary_shift_left, 1)
67 |         ADD     $arg0       $arg0
68 |         MOV     $return     $arg0
69 |     @end
70 | 
71 |     ; Call a subroutine
72 |     @call(binary_shift_left, 5)
73 |     @call(binary_shift_left, [5])
74 | 
75 | 
76 | ## LICENSE
77 | 
78 | The MIT License (MIT)
79 | 
80 | Copyright (c) 2014 Markus Siemens
81 | 
82 | Permission is hereby granted, free of charge, to any person obtaining a copy of
83 | this software and associated documentation files (the "Software"), to deal in
84 | the Software without restriction, including without limitation the rights to
85 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
86 | the Software, and to permit persons to whom the Software is furnished to do so,
87 | subject to the following conditions:
88 | 
89 | The above copyright notice and this permission notice shall be included in all
90 | copies or substantial portions of the Software.
91 | 
92 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
93 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
94 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
95 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
96 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
97 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/lib/binary/shift.asm:
--------------------------------------------------------------------------------
 1 | ; Define constants
 2 |     $shift_r_and        = [_]
 3 |     $shift_r_cmp        = [_]
 4 |     $shift_r_bit2       = 2
 5 |     $shift_r_bit3       = 4
 6 |     $shift_r_bit4       = 8
 7 |     $shift_r_bit5       = 16
 8 |     $shift_r_bit6       = 32
 9 |     $shift_r_bit7       = 64
10 |     $shift_r_bit8       = 128
11 | 
12 | 
13 | ; SUBROUTINE: Shift left
14 | ; ----------------------
15 | 
16 | ;  Input: $arg0 as integer
17 | ; Output: $return's the integer shifted left
18 | @start(binary_shift_left, 1)
19 |     ADD     $arg0       $arg0
20 |     MOV     $return     $arg0
21 | @end()
22 | 
23 | 
24 | ; SUBROUTINE: Shift right
25 | ; -----------------------
26 | 
27 | ;     Input: $arg0 as integer
28 | ;    Output: $return's the integer shifted right
29 | ; Algorithm: We check every bit, and if it is set, we add bit_val/2
30 | ;            to the result:
31 | ;            Input:  100 ← bit(3) is set, value: 4, add 4/2=2
32 | ;            Output: 010
33 | @start(binary_shift_right, 1)
34 |     MOV     $return     0                       ; Initialize memory
35 | 
36 |     ; shift_r_bit2:
37 |     MOV     $shift_r_cmp    $arg0
38 |     AND     $shift_r_cmp    $shift_r_bit2
39 |     JEQ     :shift_r_bit3   $shift_r_cmp    0   ; v & 2 == 0 → skip
40 |     ADD     $return         1                   ; Add 2 / 2 = 1
41 | 
42 |     shift_r_bit3:
43 |     MOV     $shift_r_cmp    $arg0
44 |     AND     $shift_r_cmp    $shift_r_bit3
45 |     JEQ     :shift_r_bit4   $shift_r_cmp    0   ; v & 4 == 0 → skip
46 |     ADD     $return         2                   ; Add 4 / 2 = 2
47 | 
48 |     shift_r_bit4:
49 |     MOV     $shift_r_cmp    $arg0
50 |     AND     $shift_r_cmp    $shift_r_bit4
51 |     JEQ     :shift_r_bit5   $shift_r_cmp    0   ; v & 8 == 0 → skip
52 |     ADD     $return         4                   ; Add 8 / 2 = 4
53 | 
54 |     shift_r_bit5:
55 |     MOV     $shift_r_cmp    $arg0
56 |     AND     $shift_r_cmp    $shift_r_bit5
57 |     JEQ     :shift_r_bit6   $shift_r_cmp    0   ; v & 16 == 0 → skip
58 |     ADD     $return         8                   ; Add 16 / 2 = 8
59 | 
60 |     shift_r_bit6:
61 |     MOV     $shift_r_cmp    $arg0
62 |     AND     $shift_r_cmp    $shift_r_bit6
63 |     JEQ     :shift_r_bit7   $shift_r_cmp    0   ; v & 32 == 0 → skip
64 |     ADD     $return         16                  ; Add 32 / 2 = 16
65 | 
66 |     shift_r_bit7:
67 |     MOV     $shift_r_cmp    $arg0
68 |     AND     $shift_r_cmp    $shift_r_bit7
69 |     JEQ     :shift_r_bit8   $shift_r_cmp    0   ; v & 64 == 0 → skip
70 |     ADD     $return         32                  ; Add 64 / 2 = 32
71 | 
72 |     shift_r_bit8:
73 |     MOV     $shift_r_cmp    $arg0
74 |     AND     $shift_r_cmp    $shift_r_bit8
75 |     JEQ     :shift_r_return $shift_r_cmp    0   ; v & 128 == 0 → skip
76 |     ADD     $return         64                  ; Add 128 / 2 = 64
77 | 
78 |     shift_r_return:
79 | @end()
80 | 


--------------------------------------------------------------------------------
/lib/math/divide.asm:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ; SUBROUTINE: Divide two integers
 4 | ; ---------------------------------
 5 | 
 6 | ;     Input: $arg0: dividend as int, $arg1: divisor as int
 7 | ;    Output: $return's the arg0/arg1 as int division
 8 | ; Algorithm: TODO
 9 | 
10 | @start(divide, 2)
11 |     math_div_loop:
12 |                                         ; arg0 < arg1 → break
13 |     JLS     :math_div_done      $arg0   $arg1
14 |     ADD     $return             1
15 |     SUB     $arg0               $arg1
16 |     JMP     :math_div_loop              ; Loop iteration
17 | 
18 |     math_div_done:
19 | @end()


--------------------------------------------------------------------------------
/lib/math/multiply.asm:
--------------------------------------------------------------------------------
 1 | ; Define constants
 2 |     $math_mul_counter       = [_]
 3 | 
 4 | 
 5 | ; SUBROUTINE: Multiply two integers
 6 | ; ---------------------------------
 7 | 
 8 | ;     Input: $arg1 & $arg2 as two integers
 9 | ;    Output: $return's the multiplication of the two
10 | ; Algorithm: Sum arg1 arg0' times
11 | @start(multiply, 2)
12 |     math_mul_loop:
13 |                                         ; counter == arg1 → break
14 |     JEQ     :math_mul_done      $arg1   $math_mul_counter
15 |     ADD     $math_mul_counter   1
16 |     ADD     $return             $arg0
17 |     JMP     :math_mul_loop              ; Loop iteration
18 | 
19 |     math_mul_done:
20 | @end()


--------------------------------------------------------------------------------
/pi.asm:
--------------------------------------------------------------------------------
 1 | ; Approximate PI
 2 | ; --------------
 3 | ;
 4 | ; by Markus Siemens <markus@es-netze.de>
 5 | 
 6 | ; Define constants
 7 |     $max_rand_square   = 144    ; (RAND_MAX/2) ** 2
 8 | 
 9 |     ; Approximate PI
10 |     $pi_iterations   = 100  ; Iteration count
11 |     $pi_rand_divider = 2    ; Divide the RANDOM numbers by this, so we don't overflow
12 |     $pi_counter     = [_]   ; Loop counter
13 |     $pi_rand0       = [_]   ; First RANDOM number
14 |     $pi_rand1       = [_]   ; Second RANDOM number
15 |     $pi_rand_sum    = [_]
16 |     $pi_inside      = [_]   ; Number of dots inside the circle
17 | 
18 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19 | 
20 | main:
21 |     MOV $pi_counter     0               ; Initialize memory
22 | 
23 |     main_loop:                          ; The main loop
24 |                                         ; Loop break condition: $pi_counter == $pi_iterations
25 |     JEQ     :print      $pi_counter     $pi_iterations
26 |     APRINT  '.'
27 |     MOV     $pi_rand_sum 0              ; Reset sum of rand0^2 and rand1^2
28 | 
29 |                                         ; Get random numbers
30 |     RANDOM  $pi_rand0
31 |     @call(divide, $pi_rand0, $pi_rand_divider)  ; $pi_rand0 ^ 2 would overflow otherwise
32 |     MOV     $pi_rand0   $return
33 | 
34 |     RANDOM  $pi_rand1
35 |     @call(divide, $pi_rand1, $pi_rand_divider)  ; $pi_rand1 ^ 2 would overflow otherwise
36 |     MOV     $pi_rand1   $return
37 | 
38 |     @call(multiply, $pi_rand0, $pi_rand0)
39 |     MOV     $pi_rand0   $return
40 | 
41 |     @call(multiply, $pi_rand1, $pi_rand1)
42 |     MOV     $pi_rand1   $return
43 | 
44 |     ADD     $pi_rand_sum    $pi_rand0   ; Add $pi_rand0^2 and $pi_rand1^2
45 |     ADD     $pi_rand_sum    $pi_rand1
46 | 
47 |                                         ; If $pi_rand_sum > $MAX_RAND_SQUARE, GOTO FI
48 |     JGT     :pi_fi_indot    $pi_rand_sum    $max_rand_square
49 |     ADD     $pi_inside      1
50 | 
51 |     pi_fi_indot:
52 | 
53 |                                         ; If pi_counter_0 == 255
54 |     ADD     $pi_counter     1
55 |     JMP     :main_loop                  ; Next loop iteration
56 | 
57 | print:                                  ; SUBROUTINE
58 |                                         ; Calculate PI using 'inside / total * 4' as float
59 |     APRINT '\n'
60 |     DPRINT  $pi_inside
61 |     APRINT  '/'
62 |     DPRINT  $pi_iterations
63 |     APRINT  '*'
64 |     DPRINT  4
65 | 
66 |     JMP     :end
67 | 
68 | 
69 | end:
70 |                                         ; SUBROUTINE
71 |                                         ; End the programm execution
72 |     HALT
73 | 
74 | #import <lib/math/multiply.asm>
75 | #import <lib/math/divide.asm>


--------------------------------------------------------------------------------
/pi.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msiemens/rust-tinyasm/1549d75260b9d523d4ecebd2033b098ef5e5f156/pi.bin


--------------------------------------------------------------------------------
/src/assembler/codegen.rs:
--------------------------------------------------------------------------------
 1 | use machine::InstructionManager;
 2 | use machine::Argument as ArgumentType;
 3 | use assembler::parser::ast::{Statement, StatementNode, Argument, Mnemonic};
 4 | 
 5 | 
 6 | pub fn generate_binary(ast: Vec<StatementNode>) -> Vec<Vec<u8>> {
 7 |     let mut binary = vec![];
 8 |     let im = InstructionManager::new();
 9 | 
10 |     for stmt in ast.iter() {
11 |         if let Statement::Operation(ref mnem, ref args) = stmt.value {
12 |             // Get the requested mnemonic
13 |             let Mnemonic(instr) = mnem.clone();
14 | 
15 |             // Get the argument types we received
16 |             let arg_types: Vec<ArgumentType> = args.iter().map(|ref arg| {
17 |                 match arg.value {
18 |                     Argument::Literal(_) | Argument::Char(_) => {
19 |                         ArgumentType::Literal
20 |                     },
21 |                     Argument::Address(_) => {
22 |                         ArgumentType::Address
23 |                     },
24 |                     _ => fatal!("unprocessed argument: {}", arg; arg)
25 |                 }
26 |             }).collect();
27 | 
28 |             // Find the opcode matching the given argument types
29 |             let instr_class = im.lookup_operations(&instr);
30 |             let op = instr_class.iter().find(|op| {
31 |                 op.arg_types == arg_types
32 |             }).unwrap_or_else(|| {
33 |                 // Build allowed arguments string
34 |                 let allowed_arg_types = instr_class.iter()
35 |                     .cloned()
36 |                     .map(|i| format!("{:?}", i.arg_types))
37 |                     .collect::<Vec<_>>()
38 |                     .connect(" or ");
39 | 
40 |                 fatal!("invalid arguments for {:?}: found {:?}, allowed: {:?}",
41 |                        instr, arg_types, allowed_arg_types; stmt)
42 |             });
43 | 
44 |             // Finally, write the opcode
45 |             let mut binary_stmt = vec![op.opcode];
46 |             binary_stmt.extend(args.iter().map(|arg| {
47 |                 match arg.value {
48 |                     Argument::Literal(i) => i,
49 |                     Argument::Char(c) => c,
50 |                     Argument::Address(a) => a.unwrap(),
51 |                     // Shouldn't happen as we check this in arg_types
52 |                     _ => fatal!("unprocessed argument: {}", arg; arg)
53 |                 }
54 |             }));
55 | 
56 |             binary.push(binary_stmt);
57 |         } else {
58 |             fatal!("unprocessed operation: {}", stmt; stmt)
59 |         }
60 |     }
61 | 
62 |     binary
63 | }
64 | 
65 | 
66 | #[cfg(test)]
67 | mod test {
68 |     use assembler::parser::ast::{Statement, Mnemonic};
69 |     use assembler::parser::dummy_source;
70 | 
71 |     use super::generate_binary;
72 | 
73 |     #[test]
74 |     fn test_operation() {
75 |         assert_eq!(
76 |             generate_binary(vec![
77 |                 Statement::new(
78 |                     Statement::Operation(
79 |                         Mnemonic("HALT".parse().unwrap()),
80 |                         vec![]
81 |                     ),
82 |                     dummy_source()
83 |                 )
84 |             ]),
85 |             vec![vec![0xFF]]
86 |         )
87 |     }
88 | }


--------------------------------------------------------------------------------
/src/assembler/mod.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use] mod util;
 2 | mod codegen;
 3 | mod parser;
 4 | 
 5 | use std::fs::File;
 6 | use std::io::{Read, Write};
 7 | use std::path::Path;
 8 | use super::Args;
 9 | use machine::WordSize;
10 | 
11 | 
12 | pub fn main(args: Args) {
13 |     // Read source file
14 |     let input_path = Path::new(&args.arg_input);
15 |     let source = read_file(&input_path);
16 | 
17 |     // Parse source file
18 |     let filename = input_path.iter().last().unwrap().to_string_lossy();
19 |     let mut source = parser::Parser::new(&source, &filename).parse();
20 | 
21 |     if args.flag_v {
22 |         println!("Source:");
23 |         for stmt in source.iter() {
24 |             println!("{}", stmt);
25 |         }
26 |         print!("\n");
27 |     }
28 | 
29 |     // Expand syntax extensions
30 |     parser::expand_syntax_extensions(&mut source);
31 | 
32 |     if args.flag_v {
33 |         println!("Expanded source:");
34 |         for stmt in source.iter() {
35 |             println!("{}", stmt);
36 |         }
37 |         print!("\n");
38 |     }
39 | 
40 |     // Generate binary
41 |     let binary = codegen::generate_binary(source);
42 | 
43 |     if args.flag_bin {
44 |         write_binary(binary, &Path::new(&args.arg_output));
45 |     } else {
46 |         for stmt in binary.iter() {
47 |             for b in stmt.iter() {
48 |                 print!("{:#04x} ", *b)
49 |             }
50 |             print!("\n");
51 |         }
52 |     }
53 | }
54 | 
55 | 
56 | fn read_file(input_path: &Path) -> String {
57 |     let mut file = match File::open(&input_path) {
58 |         Ok(f) => f,
59 |         Err(err) => panic!("Can't open {}: {}", input_path.display(), err)
60 |     };
61 | 
62 |     let mut contents = String::new();
63 |     match file.read_to_string(&mut contents) {
64 |         Ok(contents) => contents,
65 |         Err(_) => panic!("Can't read {}", input_path.display())
66 |     };
67 | 
68 |     contents
69 | }
70 | 
71 | fn write_binary(binary: Vec<Vec<WordSize>>, output_path: &Path) {
72 |     let mut file = match File::create(output_path) {
73 |         Ok(f) => f,
74 |         Err(err) => panic!("Can't write to {}: {}", output_path.display(), err)
75 |     };
76 | 
77 |     for stmt in binary.iter() {
78 |         for b in stmt.iter() {
79 |             match file.write_all(&[*b]) {
80 |                 Ok(_) => {},
81 |                 Err(err) => panic!("Can't write to {}: {}", output_path.display(), err)
82 |             }
83 |         }
84 |     }
85 | }


--------------------------------------------------------------------------------
/src/assembler/parser/ast.rs:
--------------------------------------------------------------------------------
  1 | //! The Tiny Abstract Syntax Tree.
  2 | //! Modeled following the grammar (`grammar.md`). Every compound item has an
  3 | //! `Item` enum with all options and an `ItemNode` which contains the item
  4 | //! and the location in the source file.
  5 | 
  6 | use std::borrow::ToOwned;
  7 | use std::fmt;
  8 | 
  9 | use assembler::parser::lexer::SourceLocation;
 10 | use machine::Mnemonic as Instruction;  // FIXME
 11 | use machine::WordSize;
 12 | 
 13 | 
 14 | pub type Program = Vec<StatementNode>;
 15 | 
 16 | 
 17 | // --- Helper for AST definitions -----------------------------------------------
 18 | 
 19 | macro_rules! define(
 20 |     ( $name:ident -> $wrapper:ident : $( $variants:ident ( $( $arg:ty ),* ) ),* ) => {
 21 |         #[derive(PartialEq, Eq, Clone)]
 22 |         pub struct $wrapper {
 23 |             pub value: $name,
 24 |             pub location: SourceLocation
 25 |         }
 26 | 
 27 |         impl_to_string!($wrapper: "{}", value);
 28 | 
 29 |         #[derive(PartialEq, Eq, Clone)]
 30 |         pub enum $name {
 31 |             $( $variants ( $( $arg ),* ) ),*
 32 |         }
 33 | 
 34 |         impl $name {
 35 |             pub fn new(stmt: $name, location: SourceLocation) -> $wrapper {
 36 |                 $wrapper {
 37 |                     value: stmt,
 38 |                     location: location
 39 |                 }
 40 |             }
 41 |         }
 42 |     };
 43 | );
 44 | 
 45 | // --- AST: Compound items ------------------------------------------------------
 46 | 
 47 | // --- AST: Compound items: Statements ------------------------------------------
 48 | 
 49 | define!(Statement -> StatementNode:
 50 |     Include(IPath),                         // Ex: #import <...>
 51 |     Label(Ident),                           // Ex: label:
 52 |     Const(Ident, ArgumentNode),         // Ex: $const = 2
 53 |     Operation(Mnemonic, Vec<ArgumentNode>), // Ex: @macro(args, ...)
 54 |     Macro(Ident, Vec<MacroArgumentNode>)
 55 | );
 56 | 
 57 | impl fmt::Debug for Statement {
 58 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 59 |         match *self {
 60 |             Statement::Include(ref path) => write!(f, "#include {}", path),
 61 |             Statement::Label(ref name)   => write!(f, "{}:", name),
 62 |             Statement::Const(ref name, ref value) => {
 63 |                 write!(f, "${} = {}", name, value)
 64 |             },
 65 |             Statement::Operation(ref mnem, ref args) => {
 66 |                 try!(write!(f, "{}", mnem));
 67 |                 for arg in args.iter() {
 68 |                     try!(write!(f, " {}", arg));
 69 |                 }
 70 |                 Ok(())
 71 |             },
 72 |             Statement::Macro(ref name, ref args) => {
 73 |                 write!(f, "@{}({})", name,
 74 |                        args.iter()
 75 |                            .map(|arg| format!("{}", arg))
 76 |                            .collect::<Vec<_>>()
 77 |                            .connect(" "))
 78 |             }
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl fmt::Display for Statement {
 84 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 85 |         write!(f, "{:?}", self)
 86 |     }
 87 | }
 88 | 
 89 | 
 90 | // --- AST: Compound items: Arguments -------------------------------------------
 91 | 
 92 | define!(Argument -> ArgumentNode:
 93 |     Literal(WordSize),            // A simple literal
 94 |     Address(Option<WordSize>),    // An address (`[0]`) or an auto-filled address (`[_]`)
 95 |     Const(Ident),           // A constant (`$const`)
 96 |     Label(Ident),           // A label (`:label`)
 97 |     Char(WordSize)                // A character (`'a'`)
 98 | );
 99 | 
100 | impl fmt::Debug for Argument {
101 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102 |         match *self {
103 |             Argument::Literal(i) => write!(f, "{}", i),
104 |             Argument::Address(addr) => {
105 |                 match addr {
106 |                     Some(i) => write!(f, "[{}]", i),
107 |                     None => write!(f, "[_]")
108 |                 }
109 |             },
110 |             Argument::Const(ref name) => write!(f, "${}", name),
111 |             Argument::Label(ref name) => write!(f, ":{}", name),
112 |             Argument::Char(c) => write!(f, "'{}'", c),
113 |         }
114 |     }
115 | }
116 | 
117 | impl fmt::Display for Argument {
118 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 |         write!(f, "{:?}", self)
120 |     }
121 | }
122 | 
123 | 
124 | // --- AST: Compound items: Macro Arguments -------------------------------------
125 | 
126 | define!(MacroArgument -> MacroArgumentNode:
127 |     Argument(ArgumentNode),
128 |     Ident(Ident)
129 | );
130 | 
131 | impl fmt::Debug for MacroArgument {
132 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
133 |         match *self {
134 |             MacroArgument::Argument(ref arg) => write!(f, "{}", arg),
135 |             MacroArgument::Ident(ref name) => write!(f, "{}", name)
136 |         }
137 |     }
138 | }
139 | 
140 | impl fmt::Display for MacroArgument {
141 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142 |         write!(f, "{:?}", self)
143 |     }
144 | }
145 | 
146 | 
147 | // --- AST: Single items --------------------------------------------------------
148 | 
149 | // --- AST: Single items: Identifier --------------------------------------------
150 | 
151 | #[derive(PartialEq, Eq, Hash, Clone)]
152 | pub struct Ident(pub String);
153 | 
154 | impl Ident {
155 |     pub fn as_str(&self) -> &str {
156 |         let Ident(ref s) = *self;
157 |         s
158 |     }
159 | 
160 |     pub fn clone(&self) -> Ident {
161 |         Ident(self.as_str().to_owned())
162 |     }
163 | }
164 | 
165 | impl fmt::Debug for Ident {
166 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
167 |         write!(f, "{}", self.as_str())
168 |     }
169 | }
170 | 
171 | impl fmt::Display for Ident {
172 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
173 |         write!(f, "{:?}", self)
174 |     }
175 | }
176 | 
177 | 
178 | // --- AST: Single items: Mnemonic ----------------------------------------------
179 | 
180 | #[derive(PartialEq, Eq, Clone)]
181 | pub struct Mnemonic(pub Instruction);
182 | 
183 | impl fmt::Debug for Mnemonic {
184 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
185 |         let Mnemonic(ref mnem) = *self;
186 |         write!(f, "{:?}", mnem)
187 |     }
188 | }
189 | 
190 | impl fmt::Display for Mnemonic {
191 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192 |         write!(f, "{:?}", self)
193 |     }
194 | }
195 | 
196 | 
197 | // --- AST: Single items: Import Path -------------------------------------------
198 | 
199 | #[derive(PartialEq, Eq, Clone)]
200 | pub struct IPath(pub String);
201 | 
202 | impl IPath {
203 |     pub fn as_str(&self) -> &str {
204 |         let IPath(ref p) = *self;
205 |         &**p
206 |     }
207 | }
208 | 
209 | impl fmt::Debug for IPath {
210 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
211 |         let IPath(ref path) = *self;
212 |         write!(f, "<{}>", path)
213 |     }
214 | }
215 | 
216 | impl fmt::Display for IPath {
217 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218 |         write!(f, "{:?}", self)
219 |     }
220 | }


--------------------------------------------------------------------------------
/src/assembler/parser/grammar.md:
--------------------------------------------------------------------------------
 1 | # EBNF-like grammar
 2 | 
 3 |     # AST
 4 |     programm:   comment | (statement comment?)*
 5 |     statement:  include | label_def | const_def | operation | macro
 6 | 
 7 |     include:    hash path
 8 |     label_def:  ident colon
 9 |     const_def:  constant eq argument
10 |     operation:  mnemonic argument*
11 |     argument:   integer
12 |                 | address
13 |                 | constant
14 |                 | label
15 |                 | char
16 | 
17 |     address:    lbracket ( integer | underscore ) rbracket
18 |     label:      colon ident
19 |     constant:   dollar ident
20 |     macro:      at ident lparen ( marco_arg ( comma marco_arg )* )? rparen
21 |     marco_arg:  argument | ident
22 | 
23 |     # Tokens
24 |     hash:       '#'
25 |     colon:      ':'
26 |     dollar:     '$'
27 |     at:         '@'
28 |     comma:      ','
29 |     eq:         '='
30 |     underscore: '_'
31 |     lparen:     '('
32 |     rparen:     ')'
33 |     lbracket:   '['
34 |     rbracket:   ']'
35 |     mnemonic:   [A-Z]+
36 |     ident:      [a-z]+ ( '_' | [a-z] | [0-9]+ )+
37 |     integer:    [0-9]+
38 |     char:       '\'' ( [a-z] | [A-Z] | '\n' ) '\''
39 |     path:       '<' ( [a-z] | [A-Z] | '.' | '/' | '_' | '-' )+ '>'
40 |     comment:    ';' ([a-z] | [A-Z] | [0-9])*


--------------------------------------------------------------------------------
/src/assembler/parser/lexer.rs:
--------------------------------------------------------------------------------
  1 | //! The Lexer
  2 | //!
  3 | //! Nothing outstanding, just a normal lexer.
  4 | 
  5 | use std::borrow::ToOwned;
  6 | use std::fmt;
  7 | use std::rc::Rc;
  8 | 
  9 | use assembler::util::fatal;
 10 | use machine::{Mnemonic, WordSize};
 11 | 
 12 | 
 13 | // --- Source Location ----------------------------------------------------------
 14 | 
 15 | pub type SharedString = Rc<String>;
 16 | 
 17 | #[derive(PartialEq, Eq, Clone)]
 18 | pub struct SourceLocation {
 19 |     pub filename: SharedString,
 20 |     pub lineno: usize
 21 | }
 22 | 
 23 | impl_to_string!(SourceLocation: "{}:{}", filename, lineno);
 24 | 
 25 | 
 26 | pub fn dummy_source() -> SourceLocation {
 27 |     SourceLocation {
 28 |         filename: Rc::new(String::from_str("<input>")),
 29 |         lineno: 0
 30 |     }
 31 | }
 32 | 
 33 | 
 34 | // --- List of Tokens -----------------------------------------------------------
 35 | 
 36 | #[derive(Clone, PartialEq, Eq)]
 37 | pub enum Token<'a> {
 38 |     HASH,
 39 |     COLON,
 40 |     DOLLAR,
 41 |     AT,
 42 |     COMMA,
 43 |     EQ,
 44 |     UNDERSCORE,
 45 | 
 46 |     LPAREN,
 47 |     RPAREN,
 48 |     LBRACKET,
 49 |     RBRACKET,
 50 | 
 51 |     MNEMONIC(Mnemonic),
 52 |     IDENT(&'a str),
 53 |     INTEGER(WordSize),
 54 |     CHAR(WordSize),
 55 |     PATH(&'a str),
 56 | 
 57 |     EOF,
 58 | 
 59 |     PLACEHOLDER
 60 |     //UNKNOWN(String)
 61 | }
 62 | 
 63 | impl<'a> fmt::Debug for Token<'a> {
 64 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 65 |         match *self {
 66 |             Token::HASH       => write!(f, "#"),
 67 |             Token::COLON      => write!(f, ":"),
 68 |             Token::DOLLAR     => write!(f, "$"),
 69 |             Token::AT         => write!(f, "@"),
 70 |             Token::COMMA      => write!(f, ","),
 71 |             Token::EQ         => write!(f, "="),
 72 |             Token::UNDERSCORE => write!(f, "_"),
 73 | 
 74 |             Token::LPAREN     => write!(f, "("),
 75 |             Token::RPAREN     => write!(f, ")"),
 76 |             Token::LBRACKET   => write!(f, "["),
 77 |             Token::RBRACKET   => write!(f, "]"),
 78 | 
 79 |             Token::MNEMONIC(ref instr) => write!(f, "{:?}", instr),
 80 |             Token::IDENT(ref ident)    => write!(f, "{:?}", ident),
 81 |             Token::INTEGER(i)          => write!(f, "{}", i),
 82 |             Token::CHAR(c)             => write!(f, "{}", c as char),
 83 |             Token::PATH(ref path)      => write!(f, "{:?}", path),
 84 | 
 85 |             Token::EOF         => write!(f, "EOF"),
 86 |             Token::PLACEHOLDER => write!(f, "PLACEHOLDER")
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | impl<'a> fmt::Display for Token<'a> {
 92 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 93 |         write!(f, "{:?}", self)
 94 |     }
 95 | }
 96 | 
 97 | 
 98 | // --- The Lexer ----------------------------------------------------------------
 99 | // We use a Lexer trait along with two implementations: FileLexer and Vec<Token>.
100 | // The first one is used for processing a file on the hard drive, the second
101 | // is used for testing purposes.
102 | 
103 | pub trait Lexer<'a> {
104 |     fn get_source(&self) -> SourceLocation;
105 |     fn next_token(&mut self) -> Token<'a>;
106 |     fn tokenize(&mut self) -> Vec<Token<'a>>;
107 | }
108 | 
109 | 
110 | // --- The Lexer: FileLexer -----------------------------------------------------
111 | 
112 | pub struct FileLexer<'a> {
113 |     source: &'a str,
114 |     file: SharedString,
115 |     len: usize,
116 | 
117 |     pos: usize,
118 |     curr: Option<char>,
119 | 
120 |     lineno: usize
121 | }
122 | 
123 | impl<'a> FileLexer<'a> {
124 | 
125 |     pub fn new(source: &'a str, file: &str) -> FileLexer<'a> {
126 |         FileLexer {
127 |             source: source,
128 |             file: Rc::new(String::from_str(file)),
129 |             len: source.len(),
130 | 
131 |             pos: 0,
132 |             curr: Some(source.char_at(0)),
133 | 
134 |             lineno: 1
135 |         }
136 |     }
137 | 
138 | 
139 |     // --- File Lexer: Helpers ---------------------------------------------------
140 | 
141 |     fn fatal(&self, msg: String) -> ! {
142 |         fatal(msg, &self.get_source())
143 |     }
144 | 
145 | 
146 |     fn is_eof(&self) -> bool {
147 |         self.curr.is_none()
148 |     }
149 | 
150 | 
151 |     // --- File Lexer: Character processing --------------------------------------
152 | 
153 |     fn bump(&mut self) {
154 |         self.curr = self.nextch();
155 |         self.pos += 1;
156 | 
157 |         debug!("Moved on to {:?}", self.curr)
158 |     }
159 | 
160 |     fn nextch(&self) -> Option<char> {
161 |         let mut new_pos = self.pos + 1;
162 | 
163 |         // When encountering multi-byte UTF-8, we may stop in the middle
164 |         // of it. Fast forward till we see the next actual char or EOF
165 | 
166 |         while !self.source.is_char_boundary(new_pos)
167 |                 && self.pos < self.len {
168 |             new_pos += 1;
169 |         }
170 | 
171 |         if new_pos < self.len {
172 |             Some(self.source.char_at(new_pos))
173 |         } else {
174 |             None
175 |         }
176 |     }
177 | 
178 |     fn curr_repr(&self) -> String {
179 |         match self.curr {
180 |             Some(c) => c.escape_default().collect(),
181 |             None    => "EOF".to_owned()
182 |         }
183 |     }
184 | 
185 |     fn expect(&mut self, expect: char) {
186 |         if self.curr != Some(expect) {
187 |             // Build error message
188 |             let expect_str = match expect {
189 |                 '\'' => String::from_str("quote"),
190 |                 c    => format!("'{}'", c)
191 |             };
192 |             let found_str = match self.curr {
193 |                 Some(_) => format!("'{}'", self.curr_repr()),
194 |                 None    => String::from_str("EOF")
195 |             };
196 | 
197 |             self.fatal(format!("Expected `{}`, found `{}`",
198 |                                expect_str, found_str))
199 |         }
200 | 
201 |         self.bump();
202 |     }
203 | 
204 |     fn collect<F>(&mut self, cond: F) -> &'a str
205 |             where F: Fn(&char) -> bool {
206 |         let start = self.pos;
207 | 
208 |         debug!("start colleting");
209 | 
210 |         while let Some(c) = self.curr {
211 |             if cond(&c) {
212 |                 self.bump();
213 |             } else {
214 |                 debug!("colleting finished");
215 |                 break;
216 |             }
217 |         }
218 | 
219 |         let end = self.pos;
220 | 
221 |         &self.source[start..end]
222 |     }
223 | 
224 |     fn eat_all<F>(&mut self, cond: F)
225 |             where F: Fn(&char) -> bool {
226 |         while let Some(c) = self.curr {
227 |             if cond(&c) { self.bump(); }
228 |             else { break; }
229 |         }
230 |     }
231 | 
232 |     // --- File Lexer: Tokenizers ------------------------------------------------
233 | 
234 |     fn tokenize_mnemonic(&mut self) -> Token<'a> {
235 |         debug!("Tokenizing a mnemonic");
236 | 
237 |         let mnemonic_str = self.collect(|c| c.is_alphabetic() && c.is_uppercase());
238 |         let mnemonic     = match mnemonic_str.parse() {
239 |             Ok(m) => m,
240 |             Err(_) => self.fatal(format!("invalid mnemonic: {}", mnemonic_str))
241 |         };
242 | 
243 |         Token::MNEMONIC(mnemonic)
244 |     }
245 | 
246 |     fn tokenize_ident(&mut self) -> Token<'a> {
247 |         debug!("Tokenizing an ident");
248 | 
249 |         let ident = self.collect(|c| {
250 |             (c.is_alphabetic() && c.is_lowercase()) || c.is_numeric() || *c == '_'
251 |         });
252 | 
253 |         Token::IDENT(ident)
254 |     }
255 | 
256 |     fn tokenize_digit(&mut self) -> Token<'a> {
257 |         debug!("Tokenizing a digit");
258 | 
259 |         let integer_str = self.collect(|c| c.is_numeric());
260 |         let integer     = match integer_str.parse() {
261 |             Ok(i) => i,
262 |             Err(_) => self.fatal(format!("invalid integer: {}", integer_str))
263 |         };
264 | 
265 |         Token::INTEGER(integer)
266 |     }
267 | 
268 |     fn tokenize_char(&mut self) -> Token<'a> {
269 |         debug!("Tokenizing a char");
270 | 
271 |         self.bump();  // '\'' matched, move on
272 | 
273 |         let c = self.curr.unwrap_or_else(|| {
274 |             self.fatal(format!("expected a char, found EOF"));
275 |         });
276 |         let tok = if c == '\\' {
277 |             // Escaped char, let's take a look on one more char
278 |             self.bump();
279 |             match self.curr {
280 |                 Some('n')  => Token::CHAR(10),
281 |                 Some('\'') => Token::CHAR(39),
282 |                 Some(c) => self.fatal(format!("unsupported or invalid escape sequence: \\{}", c)),
283 |                 None => self.fatal(format!("expected escaped char, found EOF"))
284 |             }
285 |         } else {
286 |             Token::CHAR(c as WordSize)
287 |         };
288 |         self.bump();
289 | 
290 |         // Match closing quote
291 |         self.expect('\'');
292 | 
293 |         tok
294 |     }
295 | 
296 |     fn tokenize_path(&mut self) -> Token<'a> {
297 |         debug!("Tokenizing a path");
298 | 
299 |         self.bump();  // '<' matched, move on
300 | 
301 |         let path = self.collect(|c| *c != '>');
302 | 
303 |         // Match closing '>'
304 |         self.expect('>');
305 | 
306 |         Token::PATH(path)
307 |     }
308 | 
309 |     /// Read the next token and return it
310 |     ///
311 |     /// If `None` is returned, the current token is to be ignored and the
312 |     /// lexer requests the reader to read the next token instead.
313 |     fn read_token(&mut self) -> Option<Token<'a>> {
314 |         let c = match self.curr {
315 |             Some(c) => c,
316 |             None    => return Some(Token::EOF)
317 |         };
318 | 
319 |         let token = match c {
320 |             '#' => { self.bump(); Token::HASH },
321 |             ':' => { self.bump(); Token::COLON },
322 |             '$' => { self.bump(); Token::DOLLAR },
323 |             '@' => { self.bump(); Token::AT },
324 |             ',' => { self.bump(); Token::COMMA },
325 |             '=' => { self.bump(); Token::EQ },
326 |             '_' => { self.bump(); Token::UNDERSCORE },
327 |             '(' => { self.bump(); Token::LPAREN },
328 |             ')' => { self.bump(); Token::RPAREN },
329 |             '[' => { self.bump(); Token::LBRACKET },
330 |             ']' => { self.bump(); Token::RBRACKET },
331 | 
332 |             c if c.is_alphabetic() && c.is_uppercase() => {
333 |                 self.tokenize_mnemonic()
334 |             },
335 |             c if c.is_alphabetic() && c.is_lowercase() => {
336 |                 self.tokenize_ident()
337 |             },
338 |             c if c.is_numeric() => self.tokenize_digit(),
339 |             '\''                => self.tokenize_char(),
340 |             '<'                 => self.tokenize_path(),
341 | 
342 |             ';' => {
343 |                 self.eat_all(|c| *c != '\n');
344 |                 return None;
345 |             },
346 |             c if c.is_whitespace() => {
347 |                 if c == '\n' { self.lineno += 1; }
348 | 
349 |                 self.bump();
350 |                 return None;
351 |             },
352 |             c => {
353 |                 self.fatal(format!("unknown token: {}", c))
354 |                 // UNKNOWN(format!("{}", c).into_string())
355 |             }
356 |         };
357 | 
358 |         Some(token)
359 |     }
360 | }
361 | 
362 | impl<'a> Lexer<'a> for FileLexer<'a> {
363 |     fn get_source(&self) -> SourceLocation {
364 |         SourceLocation {
365 |             filename: self.file.clone(),
366 |             lineno: self.lineno
367 |         }
368 |     }
369 | 
370 |     fn next_token(&mut self) -> Token<'a> {
371 |         if self.is_eof() {
372 |             Token::EOF
373 |         } else {
374 |             // Read the next token until it's not none
375 |             loop {
376 |                 if let Some(token) = self.read_token() {
377 |                     return token;
378 |                 }
379 |             }
380 |         }
381 |     }
382 | 
383 |     #[allow(dead_code)]  // Used for tests
384 |     fn tokenize(&mut self) -> Vec<Token<'a>> {
385 |         let mut tokens = vec![];
386 | 
387 |         while !self.is_eof() {
388 |             debug!("Processing {:?}", self.curr);
389 | 
390 |             if let Some(t) = self.read_token() {
391 |                 tokens.push(t);
392 |             }
393 | 
394 |             debug!("So far: {:?}", tokens)
395 |         }
396 | 
397 |         tokens
398 |     }
399 | }
400 | 
401 | 
402 | // --- The Lexer: Vec<Token> ----------------------------------------------------
403 | 
404 | impl<'a> Lexer<'a> for Vec<Token<'a>> {
405 |     fn get_source(&self) -> SourceLocation {
406 |         dummy_source()
407 |     }
408 | 
409 |     fn next_token(&mut self) -> Token<'a> {
410 |         if self.len() >= 1 {
411 |             self.remove(0)
412 |         } else {
413 |             Token::EOF
414 |         }
415 |     }
416 | 
417 |     fn tokenize(&mut self) -> Vec<Token<'a>> {
418 |         self.iter().cloned().collect()
419 |     }
420 | }
421 | 
422 | 
423 | // --- Tests --------------------------------------------------------------------
424 | 
425 | #[cfg(test)]
426 | mod tests {
427 |     use std::borrow::ToOwned;
428 |     use std::rc::Rc;
429 | 
430 |     use super::{Token, Lexer, FileLexer};
431 |     use super::Token::*;
432 |     use machine::WordSize;
433 | 
434 |     fn tokenize(src: &'static str) -> Vec<Token> {
435 |         FileLexer::new(src, "<test>").tokenize()
436 |     }
437 | 
438 |     #[test]
439 |     fn test_mnemonic() {
440 |         assert_eq!(tokenize("MOV"),
441 |                    vec![MNEMONIC("MOV".parse().unwrap())]);
442 |     }
443 | 
444 |     #[test]
445 |     fn test_ident() {
446 |         assert_eq!(tokenize("abc"),
447 |                    vec![IDENT("abc")]);
448 |     }
449 | 
450 |     #[test]
451 |     fn test_ident_with_underscore() {
452 |         assert_eq!(tokenize("abc_efg"),
453 |                    vec![IDENT("abc_efg")]);
454 |     }
455 | 
456 |     #[test]
457 |     fn test_digit() {
458 |         assert_eq!(tokenize("128"),
459 |                    vec![INTEGER(128)]);
460 |     }
461 | 
462 |     #[test]
463 |     fn test_char() {
464 |         assert_eq!(tokenize("'a'"),
465 |                    vec![CHAR('a' as WordSize)]);
466 |         assert_eq!(tokenize("' '"),
467 |                    vec![CHAR(' ' as WordSize)]);
468 |         assert_eq!(tokenize("'\n'"),
469 |                    vec![CHAR('\n' as WordSize)]);
470 |         assert_eq!(tokenize("'\\\''"),
471 |                    vec![CHAR('\'' as WordSize)]);
472 |     }
473 | 
474 |     #[test]
475 |     fn test_path() {
476 |         assert_eq!(tokenize("<asd>"),
477 |                    vec![PATH("asd")]);
478 |     }
479 | 
480 |     #[test]
481 |     fn test_comment() {
482 |         assert_eq!(tokenize("; asd"),
483 |                    vec![]);
484 |         assert_eq!(tokenize("; asd\nMOV ;asd\nMOV"),
485 |                    vec![MNEMONIC("MOV".parse().unwrap()),
486 |                         MNEMONIC("MOV".parse().unwrap())]);
487 |     }
488 | 
489 |     #[test]
490 |     fn test_whitespace() {
491 |         assert_eq!(tokenize("\n\n\n\n     \n\t\n"),
492 |                    vec![]);
493 |         assert_eq!(tokenize("      MOV        \n\n MOV"),
494 |                    vec![MNEMONIC("MOV".parse().unwrap()),
495 |                         MNEMONIC("MOV".parse().unwrap())]);
496 |     }
497 | 
498 |     #[test]
499 |     fn test_line_counter() {
500 |         let mut lx = FileLexer::new("MOV\nMOV", "<test>");
501 |         lx.tokenize();
502 |         assert_eq!(lx.lineno, 2);
503 | 
504 |         let mut lx = FileLexer::new("MOV\r\nMOV", "<test>");
505 |         lx.tokenize();
506 |         assert_eq!(lx.lineno, 2);
507 | 
508 |         let mut lx = FileLexer::new("#include<lib\\something>", "<test>");
509 |         lx.tokenize();
510 |         assert_eq!(lx.lineno, 1);
511 |     }
512 | }


--------------------------------------------------------------------------------
/src/assembler/parser/mod.rs:
--------------------------------------------------------------------------------
  1 | //! The Parser
  2 | //!
  3 | //! A simple recursive descent parser the grammar as described in `grammar.md`.
  4 | 
  5 | pub mod ast;
  6 | mod lexer;
  7 | mod syntax_ext;
  8 | 
  9 | use std::borrow::ToOwned;
 10 | use std::collections::LinkedList;
 11 | use assembler::util::fatal;
 12 | use self::ast::*;
 13 | use self::lexer::{Lexer, FileLexer, Token};
 14 | 
 15 | pub use self::lexer::{SourceLocation, dummy_source};
 16 | pub use self::syntax_ext::expand_syntax_extensions;
 17 | 
 18 | 
 19 | pub struct Parser<'a> {
 20 |     location: SourceLocation,
 21 |     token: Token<'a>,
 22 |     buffer: LinkedList<Token<'a>>,
 23 |     lexer: Box<Lexer<'a> + 'a>
 24 | }
 25 | 
 26 | impl<'a> Parser<'a> {
 27 |     pub fn new(source: &'a str, file: &str) -> Parser<'a> {
 28 |         Parser::with_lexer(Box::new(FileLexer::new(source, file)))
 29 |     }
 30 | 
 31 |     pub fn with_lexer(mut lx: Box<Lexer<'a> + 'a>) -> Parser {
 32 |         Parser {
 33 |             token: lx.next_token(),
 34 |             location: lx.get_source(),
 35 |             buffer: LinkedList::new(),
 36 |             lexer: lx
 37 |         }
 38 |     }
 39 | 
 40 |     pub fn parse(&mut self) -> Program {
 41 |         let mut source = vec![];
 42 | 
 43 |         debug!("Starting parsing");
 44 | 
 45 |         while self.token != Token::EOF {
 46 |             source.push(self.parse_statement());
 47 |         }
 48 | 
 49 |         debug!("Parsing finished");
 50 | 
 51 |         source
 52 |     }
 53 | 
 54 | 
 55 |     // --- Error handling -------------------------------------------------------
 56 | 
 57 |     fn fatal(&self, msg: String) -> ! {
 58 |         fatal(msg, &self.location);
 59 |     }
 60 | 
 61 |     fn unexpected_token(&self, tok: &Token, expected: Option<&'static str>) -> ! {
 62 |         match expected {
 63 |             Some(ex) => self.fatal(format!("unexpected token: `{}`, expected {}", tok, ex)),
 64 |             None => self.fatal(format!("unexpected token: `{}`", tok))
 65 |         }
 66 |     }
 67 | 
 68 | 
 69 |     // --- Token processing -----------------------------------------------------
 70 | 
 71 |     fn update_location(&mut self) -> SourceLocation {
 72 |         self.location = self.lexer.get_source();
 73 |         self.location.clone()
 74 |     }
 75 | 
 76 |     fn bump(&mut self) {
 77 |         self.token = match self.buffer.pop_front() {
 78 |             Some(tok) => tok,
 79 |             None => self.lexer.next_token()
 80 |         };
 81 |     }
 82 | 
 83 |     fn eat(&mut self, tok: &Token) -> bool {
 84 |         if self.token == *tok {
 85 |             self.bump();
 86 |             true
 87 |         } else {
 88 |             false
 89 |         }
 90 |     }
 91 | 
 92 |     fn expect(&mut self, tok: &Token) {
 93 |         if !self.eat(tok) {
 94 |             self.fatal(format!("expected `{}`, found `{}`", tok, self.token))
 95 |         }
 96 |     }
 97 | 
 98 |     fn look_ahead<F, R>(&mut self, distance: usize, f: F) -> R where F: Fn(&Token) -> R {
 99 |         if self.buffer.len() < distance {
100 |             for _ in 0 .. distance - self.buffer.len() {
101 |                 self.buffer.push_back(self.lexer.next_token());
102 |             }
103 |         }
104 | 
105 |         f(self.buffer.iter().nth(distance - 1).unwrap())
106 |     }
107 | 
108 |     // --- Actual parsing -------------------------------------------------------
109 | 
110 |     fn token_is_argument(&mut self) -> bool {
111 |         match self.token {
112 |             Token::INTEGER(_) | Token::CHAR(_)
113 |                 | Token::LBRACKET | Token::COLON => true,
114 |             Token::DOLLAR => self.look_ahead(2, |t| return t != &Token::EQ),
115 |             _ => false
116 |         }
117 |     }
118 | 
119 |     // --- Parsing: Single tokens -----------------------------------------------
120 | 
121 |     fn parse_ident(&mut self) -> Ident {
122 |         let ident = match self.token {
123 |             Token::IDENT(id) => Ident(id.to_owned()),
124 |             _ => self.unexpected_token(&self.token, Some("a identifier"))
125 |         };
126 |         self.bump();
127 | 
128 |         ident
129 |     }
130 | 
131 |     fn parse_path(&mut self) -> IPath {
132 |         let path = match self.token {
133 |             Token::PATH(p) => IPath(p.to_owned()),
134 |             _ => self.unexpected_token(&self.token, Some("a path"))
135 |         };
136 |         self.bump();
137 | 
138 |         path
139 |     }
140 | 
141 |     // --- Parsing: Compound expressions ----------------------------------------
142 | 
143 |     fn parse_address(&mut self) -> Option<u8> {
144 |         self.expect(&Token::LBRACKET);
145 | 
146 |         let value = match self.token {
147 |             Token::INTEGER(i) => Some(i),
148 |             Token::UNDERSCORE => None,
149 |             _ => self.unexpected_token(&self.token, Some("an address"))
150 |         };
151 |         self.bump();
152 | 
153 |         self.expect(&Token::RBRACKET);
154 | 
155 |         value
156 |     }
157 | 
158 |     fn parse_label(&mut self) -> Ident {
159 |         self.expect(&Token::COLON);
160 |         self.parse_ident()
161 |     }
162 | 
163 |     fn parse_constant(&mut self) -> Ident {
164 |         self.expect(&Token::DOLLAR);
165 |         self.parse_ident()
166 |     }
167 | 
168 |     fn parse_argument(&mut self) -> ArgumentNode {
169 |         let location = self.update_location();
170 | 
171 |         let arg = match self.token {
172 |             Token::INTEGER(i) => { self.bump(); Argument::Literal(i) },
173 |             Token::CHAR(c)    => { self.bump(); Argument::Char(c) },
174 |             Token::LBRACKET   => Argument::Address(self.parse_address()),
175 |             Token::DOLLAR     => Argument::Const(self.parse_constant()),
176 |             Token::COLON      => Argument::Label(self.parse_label()),
177 |             _ => self.unexpected_token(&self.token, Some("an argument"))
178 |         };
179 | 
180 |         Argument::new(arg, location)
181 |     }
182 | 
183 |     fn parse_macro_argument(&mut self) -> MacroArgumentNode {
184 |         let location = self.update_location();
185 | 
186 |         if self.token_is_argument() {
187 |             MacroArgument::new(MacroArgument::Argument(self.parse_argument()),
188 |                                location)
189 |         } else {
190 |             MacroArgument::new(MacroArgument::Ident(self.parse_ident()),
191 |                                location)
192 |         }
193 |     }
194 | 
195 |     // ---- Parsing: Expressions ------------------------------------------------
196 | 
197 |     fn parse_include(&mut self) -> StatementNode {
198 |         let location = self.update_location();
199 | 
200 |         self.bump();
201 |         self.expect(&Token::IDENT("import"));
202 |         let path = self.parse_path();
203 | 
204 |         Statement::new(Statement::Include(path), location)
205 |     }
206 | 
207 |     fn parse_label_def(&mut self) -> StatementNode {
208 |         let location = self.update_location();
209 | 
210 |         let label = self.parse_ident();
211 |         self.expect(&Token::COLON);
212 | 
213 |         Statement::new(Statement::Label(label), location)
214 |     }
215 | 
216 |     fn parse_constant_def(&mut self) -> StatementNode {
217 |         let location = self.update_location();
218 | 
219 |         let name = self.parse_constant();
220 |         self.expect(&Token::EQ);
221 |         let value = self.parse_argument();
222 | 
223 |         Statement::new(Statement::Const(name, value), location)
224 |     }
225 | 
226 |     fn parse_operation(&mut self) -> StatementNode {
227 |         let location = self.update_location();
228 | 
229 |         let mn = if let Token::MNEMONIC(mn) = self.token {
230 |             Mnemonic(mn)
231 |         } else {
232 |             self.unexpected_token(&self.token, Some("a mnemonic"))
233 |         };
234 | 
235 |         self.bump();
236 | 
237 |         let mut args = vec![];
238 |         while self.token_is_argument() {
239 |             args.push(self.parse_argument());
240 |         }
241 | 
242 |         Statement::new(Statement::Operation(mn, args), location)
243 |     }
244 | 
245 |     fn parse_macro(&mut self) -> StatementNode {
246 |         let location = self.update_location();
247 | 
248 |         self.expect(&Token::AT);
249 |         let name = self.parse_ident();
250 | 
251 |         self.expect(&Token::LPAREN);
252 | 
253 |         let mut args = vec![];
254 |         if self.token != Token::RPAREN {
255 |             loop {
256 |                 args.push(self.parse_macro_argument());
257 |                 if !self.eat(&Token::COMMA) {
258 |                     break
259 |                 }
260 |             }
261 |         }
262 |         self.expect(&Token::RPAREN);
263 | 
264 |         Statement::new(Statement::Macro(name, args), location)
265 |     }
266 | 
267 |     fn parse_statement(&mut self) -> StatementNode {
268 |         let stmt = match self.token {
269 |             Token::HASH        => self.parse_include(),
270 |             Token::DOLLAR      => self.parse_constant_def(),
271 |             Token::IDENT(_)    => self.parse_label_def(),
272 |             Token::MNEMONIC(_) => self.parse_operation(),
273 |             Token::AT          => self.parse_macro(),
274 | 
275 |             ref tok => self.unexpected_token(tok, Some("a statement"))
276 |         };
277 | 
278 |         stmt
279 |     }
280 | }
281 | 
282 | #[cfg(test)]
283 | mod tests {
284 |     use std::borrow::ToOwned;
285 |     use std::rc::Rc;
286 | 
287 |     use assembler::parser::ast::*;
288 |     use assembler::parser::lexer::{Token, Lexer};
289 |     use assembler::parser::lexer::Token::*;
290 | 
291 |     use super::*;
292 | 
293 |     fn parse<'a, F, T>(toks: Vec<Token<'a>>, f: F) -> T where F: Fn(&mut Parser<'a>) -> T {
294 |         f(&mut Parser::with_lexer(Box::new(toks) as Box<Lexer>))
295 |     }
296 | 
297 |     fn ident_from_str(s: &str) -> Ident {
298 |         Ident(s.to_owned())
299 |     }
300 | 
301 |     fn path_from_str(s: &str) -> IPath {
302 |         IPath(s.to_owned())
303 |     }
304 | 
305 |     #[test]
306 |     fn test_statements() {
307 |         assert_eq!(
308 |             parse(
309 |                 vec![HASH, IDENT("import"), PATH("as/d"),
310 |                      MNEMONIC("HALT".parse().unwrap())],
311 |                 |p| p.parse()
312 |             ),
313 |             vec![
314 |                 Statement::new(
315 |                     Statement::Include(
316 |                         path_from_str("as/d")
317 |                     ),
318 |                     dummy_source()
319 |                 ),
320 |                 Statement::new(
321 |                     Statement::Operation(
322 |                         Mnemonic("HALT".parse().unwrap()),
323 |                         vec![]
324 |                     ),
325 |                     dummy_source()
326 |                 )
327 |             ]
328 |         )
329 |     }
330 | 
331 |     #[test]
332 |     fn test_include() {
333 |         assert_eq!(
334 |             parse(vec![HASH, IDENT("import"), PATH("as/d")],
335 |                   |p| p.parse_statement()),
336 |             Statement::new(
337 |                 Statement::Include(
338 |                     path_from_str("as/d")
339 |                 ),
340 |                 dummy_source()
341 |             )
342 |         )
343 |     }
344 | 
345 |     #[test]
346 |     fn test_label_def() {
347 |         assert_eq!(
348 |             parse(vec![IDENT("lbl"), COLON],
349 |                   |p| p.parse_statement()),
350 |             Statement::new(
351 |                 Statement::Label(
352 |                     ident_from_str("lbl")
353 |                 ),
354 |                 dummy_source()
355 |             )
356 |         )
357 |     }
358 | 
359 |     #[test]
360 |     fn test_const_def() {
361 |         assert_eq!(
362 |             parse(vec![DOLLAR, IDENT("c"), EQ, INTEGER(0)],
363 |                   |p| p.parse_statement()),
364 |             Statement::new(
365 |                 Statement::Const(
366 |                     ident_from_str("c"),
367 |                     Argument::new(
368 |                         Argument::Literal(0),
369 |                         dummy_source()
370 |                     )
371 |                 ),
372 |                 dummy_source()
373 |             )
374 |         )
375 |     }
376 | 
377 |     #[test]
378 |     fn test_operation() {
379 |         assert_eq!(
380 |             parse(vec![MNEMONIC("MOV".parse().unwrap()), INTEGER(0)],
381 |                   |p| p.parse_statement()),
382 |             Statement::new(
383 |                 Statement::Operation(
384 |                     Mnemonic("MOV".parse().unwrap()),
385 |                     vec![
386 |                         Argument::new(
387 |                             Argument::Literal(0),
388 |                             dummy_source()
389 |                         )
390 |                     ]
391 |                 ),
392 |                 dummy_source()
393 |             )
394 |         )
395 |     }
396 | 
397 |     #[test]
398 |     fn test_macro() {
399 |         assert_eq!(
400 |             parse(vec![AT, IDENT("macro"),
401 |                        LPAREN, INTEGER(0), COMMA, INTEGER(0), RPAREN],
402 |                   |p| p.parse_statement()),
403 |             Statement::new(
404 |                 Statement::Macro(
405 |                     ident_from_str("macro"),
406 |                     vec![
407 |                         MacroArgument::new(
408 |                             MacroArgument::Argument(
409 |                                 Argument::new(
410 |                                     Argument::Literal(0),
411 |                                     dummy_source()
412 |                                 )
413 |                             ),
414 |                             dummy_source()
415 |                         ),
416 |                         MacroArgument::new(
417 |                             MacroArgument::Argument(
418 |                                 Argument::new(
419 |                                     Argument::Literal(0),
420 |                                     dummy_source()
421 |                                 )
422 |                             ),
423 |                             dummy_source()
424 |                         )
425 |                     ]
426 |                 ),
427 |                 dummy_source()
428 |             )
429 |         )
430 |     }
431 | 
432 |     #[test]
433 |     fn test_literal() {
434 |         assert_eq!(
435 |             parse(vec![INTEGER(0)],
436 |                   |p| p.parse_argument()),
437 |             Argument::new(
438 |                 Argument::Literal(0),
439 |                 dummy_source()
440 |             )
441 |         )
442 |     }
443 | 
444 |     #[test]
445 |     fn test_address() {
446 |         assert_eq!(
447 |             parse(vec![LBRACKET, INTEGER(0), RBRACKET],
448 |                   |p| p.parse_argument()),
449 |             Argument::new(
450 |                 Argument::Address(Some(0)),
451 |                 dummy_source()
452 |             )
453 |         )
454 |     }
455 | 
456 |     #[test]
457 |     fn test_address_auto() {
458 |         assert_eq!(
459 |             parse(vec![LBRACKET, UNDERSCORE, RBRACKET],
460 |                   |p| p.parse_argument()),
461 |             Argument::new(
462 |                 Argument::Address(None),
463 |                 dummy_source()
464 |             )
465 |         )
466 |     }
467 | 
468 |     #[test]
469 |     fn test_const() {
470 |         assert_eq!(
471 |             parse(vec![DOLLAR, IDENT("asd")],
472 |                   |p| p.parse_argument()),
473 |             Argument::new(
474 |                 Argument::Const(
475 |                     ident_from_str("asd")
476 |                 ),
477 |                 dummy_source()
478 |             )
479 |         )
480 |     }
481 | 
482 |     #[test]
483 |     fn test_label() {
484 |         assert_eq!(
485 |             parse(vec![COLON, IDENT("asd")],
486 |                   |p| p.parse_argument()),
487 |             Argument::new(
488 |                 Argument::Label(
489 |                     ident_from_str("asd")
490 |                 ),
491 |                 dummy_source()
492 |             )
493 |         )
494 |     }
495 | 
496 |     #[test]
497 |     fn test_char() {
498 |         assert_eq!(
499 |             parse(vec![CHAR(0)],
500 |                   |p| p.parse_argument()),
501 |             Argument::new(
502 |                 Argument::Char(0),
503 |                 dummy_source()
504 |             )
505 |         )
506 |     }
507 | 
508 |     #[test]
509 |     fn test_macro_arg_arg() {
510 |         assert_eq!(
511 |             parse(vec![INTEGER(0)],
512 |                   |p| p.parse_macro_argument()),
513 |             MacroArgument::new(
514 |                 MacroArgument::Argument(
515 |                     Argument::new(
516 |                         Argument::Literal(0),
517 |                         dummy_source()
518 |                     )
519 |                 ),
520 |                 dummy_source()
521 |             )
522 |         )
523 |     }
524 | 
525 |     #[test]
526 |     fn test_macro_arg_ident() {
527 |         assert_eq!(
528 |             parse(vec![IDENT("asd")],
529 |                   |p| p.parse_macro_argument()),
530 |             MacroArgument::new(
531 |                 MacroArgument::Ident(
532 |                     ident_from_str("asd")
533 |                 ),
534 |                 dummy_source()
535 |             )
536 |         )
537 |     }
538 | 
539 |     #[test]
540 |     fn test_op_and_const() {
541 |         assert_eq!(
542 |             parse(vec![MNEMONIC("HALT".parse().unwrap()),
543 |                        DOLLAR, IDENT("c"), EQ, INTEGER(0)],
544 |                   |p| p.parse()),
545 |             vec![
546 |                 Statement::new(
547 |                     Statement::Operation(
548 |                         Mnemonic("HALT".parse().unwrap()),
549 |                         vec![]
550 |                     ),
551 |                     dummy_source()
552 |                 ),
553 |                 Statement::new(
554 |                     Statement::Const(
555 |                         ident_from_str("c"),
556 |                         Argument::new(
557 |                             Argument::Literal(0),
558 |                             dummy_source()
559 |                         )
560 |                     ),
561 |                     dummy_source()
562 |                 )
563 |             ]
564 |         )
565 |     }
566 | }


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/auto_address.rs:
--------------------------------------------------------------------------------
 1 | //! A syntax extension that auto-fills addresses to prevent repeating and
 2 | //! having to keeping track of memory addresses.
 3 | //!
 4 | //! # Example:
 5 | //!
 6 | //! ```
 7 | //! $const = [_]
 8 | //! MOV $const 2
 9 | //! ```
10 | //!
11 | //! Results in:
12 | //!
13 | //! ```
14 | //! MOV [0] 2
15 | //! ```
16 | 
17 | use assembler::parser::ast::{Program, Statement, Argument, ArgumentNode, MacroArgument};
18 | 
19 | 
20 | pub fn expand(source: &mut Program) {
21 |     // The address to use next
22 |     let mut auto_addr = 0u8;
23 | 
24 |     // A helper function that replaces the value of the current argument
25 |     // with the next free address.
26 |     let mut update_arg = |arg: &mut ArgumentNode| {
27 |         if let Argument::Address(addr) = arg.value {
28 |             if addr == None {
29 |                 arg.value = Argument::Address(Some(auto_addr));
30 |                 auto_addr += 1;
31 |             }
32 |         }
33 |     };
34 | 
35 |     // Process all statements in the current source
36 |     for stmt in source.iter_mut() {
37 |         match stmt.value {
38 | 
39 |             // Process operation arguments
40 |             Statement::Operation(_, ref mut args) => {
41 |                 for arg in args.iter_mut() {
42 |                     update_arg(arg);
43 |                 }
44 |             },
45 | 
46 |             // Process constants
47 |             Statement::Const(_, ref mut arg) => {
48 |                 update_arg(arg);
49 |             },
50 | 
51 |             // Process macro arguments
52 |             Statement::Macro(_, ref mut margs) => {
53 |                 for marg in margs.iter_mut() {
54 |                     if let MacroArgument::Argument(ref mut arg) = marg.value {
55 |                         update_arg(arg);
56 |                     }
57 |                 }
58 |             }
59 | 
60 |             _ => {}
61 |         }
62 |     }
63 | }


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/constants.rs:
--------------------------------------------------------------------------------
 1 | //! A syntax extension for constants
 2 | //!
 3 | //! # Example:
 4 | //!
 5 | //! ```
 6 | //! $const = [0]
 7 | //! MOV $const 2
 8 | //! ```
 9 | //!
10 | //! Results in:
11 | //!
12 | //! ```
13 | //! MOV [0] 2
14 | //! ```
15 | 
16 | use std::collections::HashMap;
17 | use assembler::parser::ast::{Program, Statement, Argument, Ident};
18 | 
19 | 
20 | pub fn expand(source: &mut Program) {
21 |     let mut consts: HashMap<Ident, Argument> = HashMap::new();
22 | 
23 |     // Pass 1: Collect constant definitions & remove them from the source
24 |     source.retain(|stmt| {
25 |         let (name, value) = match stmt.value {
26 |             Statement::Const(ref name, ref value) => (name, value),
27 |             _ => return true  // Not a const assignment, keep it
28 |         };
29 | 
30 |         // Collect value
31 |         match value.value {
32 |             Argument::Literal(_) | Argument::Address(_) => {
33 |                 if consts.insert(name.clone(), value.value.clone()).is_some() {
34 |                     warn!("redefinition of ${:?}", name; value);
35 |                 }
36 |             },
37 |             _ => fatal!("invalid constant value: {:?}", value; value)
38 |         }
39 | 
40 |         false  // Remove the definition from the source
41 |     });
42 | 
43 |     debug!("Constants: {:?}", consts);
44 | 
45 |     // Pass 2: Replace usages of constants
46 |     for stmt in source.iter_mut() {
47 |         let args = match stmt.value {
48 |             Statement::Operation(_, ref mut args) => args,
49 |             _ => continue
50 |         };
51 | 
52 |         for arg in args.iter_mut() {
53 |             // Get the new value if the argument is a constant
54 |             arg.value = if let Argument::Const(ref name) = arg.value {
55 |                 match consts.get(name) {
56 |                     Some(value) => value.clone(),
57 |                     None => fatal!("unknown constant: ${:?}", name; arg)
58 |                 }
59 |             } else {
60 |                 continue
61 |             };
62 |         }
63 |     }
64 | }


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/imports.rs:
--------------------------------------------------------------------------------
 1 | //! A syntax extension for imports
 2 | //!
 3 | //! # Example:
 4 | //!
 5 | //! `a.asm`:
 6 | //!
 7 | //! ```
 8 | //! APRINT '!'
 9 | //! ```
10 | //!
11 | //! `b.asm`:
12 | //!
13 | //! ```
14 | //! #import <a.asm>
15 | //! HALT
16 | //! ```
17 | //!
18 | //! Results in:
19 | //!
20 | //! ```
21 | //! APRINT '!'
22 | //! HALT
23 | //! ```
24 | //!
25 | //! # Note:
26 | //!
27 | //! A file will be imported only once. Circular imports are not allowed.
28 | 
29 | use std::ffi::AsOsStr;
30 | use std::fs::File;
31 | use std::io::Read;
32 | use std::path::Path;
33 | use assembler::parser::ast::{Program, Statement};
34 | use assembler::parser::Parser;
35 | 
36 | 
37 | pub fn expand(source: &mut Program) {
38 |     let mut last_file = None;
39 | 
40 |     // We use a indexed iteration here because we'll modify the source as we iterate
41 |     // over it
42 |     let mut i = 0;
43 |     while i < source.len() {
44 |         // Process import statements
45 |         let mut included_source = if let Statement::Include(ref include) = source[i].value {
46 |             // Get path to include
47 |             let path = Path::new(&*source[i].location.filename);
48 | 
49 |             let dir = Path::new(path.parent().unwrap_or(Path::new(".")));
50 |             let to_include = dir.join(&*include.as_str());
51 | 
52 |             // Forbid circular imports
53 |             if last_file == Some(to_include.clone()) {
54 |                 fatal!("circular import of {}", to_include.display(); source[i]);
55 |             }
56 |             last_file = Some(to_include.clone());
57 | 
58 |             // Read source file
59 |             let mut file = File::open(&to_include).unwrap_or_else(|e| {
60 |                 fatal!("cannot read {}: {}", to_include.display(), e; source[i]);
61 |             });
62 | 
63 |             let mut contents = String::new();
64 |             file.read_to_string(&mut contents).unwrap_or_else(|e| {
65 |                 fatal!("cannot read {}: {}", to_include.display(), e; source[i]);
66 |             });
67 | 
68 |             // Parse it
69 |             let mut parser = Parser::new(&contents, to_include.as_os_str().to_str().unwrap());
70 |             parser.parse()
71 |         } else {
72 |             i += 1;
73 |             continue
74 |         };
75 | 
76 |         // Remove the `#import <...>` statement
77 |         source.remove(i);
78 | 
79 |         // Insert the new source into the current one
80 |         for j in range(0, included_source.len()) {
81 |             source.insert(i + j, included_source.remove(0));
82 |         }
83 |     }
84 | }


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/labels.rs:
--------------------------------------------------------------------------------
 1 | //! A syntax extension that replaces labels with the referenced instruction number
 2 | //!
 3 | //! # Example:
 4 | //!
 5 | //! ```
 6 | //! label:
 7 | //! GOTO :label
 8 | //! ```
 9 | //!
10 | //! Results in:
11 | //!
12 | //! ```
13 | //! GOTO 0
14 | //! ```
15 | 
16 | use std::collections::HashMap;
17 | use assembler::parser::ast::{Program, Statement, Argument, Ident};
18 | 
19 | 
20 | pub fn expand(source: &mut Program) {
21 |     let mut labels: HashMap<Ident, u32> = HashMap::new();
22 |     let mut offset = 0;
23 | 
24 |     // Pass 1: Collect label definitions
25 |     source.retain(|stmt| {
26 |         match stmt.value {
27 |             // Store label name and current offset
28 |             Statement::Label(ref name) => {
29 |                 if labels.insert(name.clone(), offset).is_some() {
30 |                     warn!("redefinition of label: {:?}", name; stmt);
31 |                 }
32 | 
33 |                 false  // Remove label definition from the source
34 |             },
35 | 
36 |             // Increment the offset (only operation statements will count
37 |             // in the final binary)
38 |             Statement::Operation(_, ref args) => {
39 |                 offset += 1 + args.len() as u32;
40 |                 true  // Not a label definition, keep it
41 |             },
42 | 
43 |             _ => true  // Something else, keep it
44 |         }
45 |     });
46 | 
47 |     debug!("Labels: {:?}", labels);
48 | 
49 |     // Pass 2: Replace label usages
50 |     for stmt in source.iter_mut() {
51 | 
52 |         // Process all operations
53 |         if let Statement::Operation(_, ref mut args) = stmt.value {
54 |             for arg in args.iter_mut() {
55 | 
56 |                 // Get a new location if argument is a label
57 |                 arg.value = if let Argument::Label(ref name) = arg.value {
58 | 
59 |                     if let Some(val) = labels.get(name) {
60 |                         Argument::Literal(overflow_check!(*val, arg))
61 |                     } else {
62 |                         fatal!("unknown label: {:?}", name; arg)
63 |                     }
64 | 
65 |                 } else {
66 |                     continue
67 |                 }
68 | 
69 |             }
70 |         }
71 |     }
72 | }


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/mod.rs:
--------------------------------------------------------------------------------
 1 | use assembler::parser::ast::Program;
 2 | 
 3 | mod imports;
 4 | mod subroutines;
 5 | mod auto_address;
 6 | mod constants;
 7 | mod labels;
 8 | 
 9 | pub fn expand_syntax_extensions(source: &mut Program) {
10 |     imports::expand(source);
11 |     subroutines::expand(source);
12 |     auto_address::expand(source);
13 |     constants::expand(source);
14 |     labels::expand(source);
15 | }
16 | 


--------------------------------------------------------------------------------
/src/assembler/parser/syntax_ext/subroutines.rs:
--------------------------------------------------------------------------------
  1 | //! A syntax extension for custom subroutines
  2 | //!
  3 | //! # Example:
  4 | //!
  5 | //! Subroutine call:
  6 | //!
  7 | //! ```
  8 | //! @call(name, arg1, arg2)
  9 | //! ```
 10 | //!
 11 | //! FIXME: Maybe use @name(arg1, arg2) instead?
 12 | //!
 13 | //! Subroutine definition:
 14 | //!
 15 | //! ```
 16 | //! @start(name, argc)
 17 | //!    ...
 18 | //! @end()
 19 | //! ```
 20 | 
 21 | use std::borrow::ToOwned;
 22 | use std::collections::HashMap;
 23 | use assembler::parser::ast::{Program, Statement, StatementNode, Argument, MacroArgument, MacroArgumentNode,
 24 |                              Ident};
 25 | use assembler::parser::Parser;
 26 | 
 27 | use self::SubroutineState::*;
 28 | 
 29 | 
 30 | pub fn expand(source: &mut Program) {
 31 |     SubroutineExpander {
 32 |         source: source,
 33 |         routines: HashMap::new()
 34 |     }.expand();
 35 | }
 36 | 
 37 | 
 38 | // --- Subroutine Expansion: Implementation -------------------------------------
 39 | 
 40 | // We use a state machine to keep track of where we are and what is allowed.
 41 | 
 42 | #[derive(Debug, Clone, Eq, PartialEq)]
 43 | enum SubroutineState {
 44 |     SubroutineStart(Ident), // Definition of a new subroutine
 45 |     InSubroutine,               // Subroutine body
 46 |     SubroutineEnd,              // End of the body
 47 |     SubroutineCall(Ident, Vec<MacroArgumentNode>),  // Call of a subroutine
 48 |     NotInSubroutine            // Everything else
 49 | }
 50 | 
 51 | struct SubroutineExpander<'a> {
 52 |     source: &'a mut Program,
 53 |     routines: HashMap<Ident, usize>
 54 | }
 55 | 
 56 | impl<'a> SubroutineExpander<'a> {
 57 | 
 58 |     fn expand(&mut self) {
 59 |         // Pass 1: Collect definitions and build preamble
 60 |         self.collect_routines();
 61 |         if self.routines.len() == 0 {
 62 |             return
 63 |         }
 64 | 
 65 |         // Build preamble
 66 |         self.build_preamble();
 67 | 
 68 |         debug!("Subroutines: {:?}", self.routines);
 69 | 
 70 |         // Pass 2: Replace function definitions
 71 |         self.process_macros();
 72 | 
 73 |         // Pass 3: Remove macro statements
 74 |         self.source.retain(|stmt| {
 75 |             match stmt.value {
 76 |                 Statement::Macro(..) => {
 77 |                     false
 78 |                 },
 79 |                 _ => true
 80 |             }
 81 |         });
 82 |     }
 83 | 
 84 |     /// Collect all subroutine definitions and store them in `self.routines`
 85 |     fn collect_routines(&mut self) {
 86 |         for stmt in self.source.iter() {
 87 |             let (ident, args) = match stmt.value {
 88 |                 Statement::Macro(ref ident, ref args) => (ident.clone(), args),
 89 |                 _ => continue
 90 |             };
 91 | 
 92 |             if ident.as_str() == "start" {
 93 |                 // Two args expected: name and number of arguments
 94 |                 if args.len() != 2 {
 95 |                     fatal!("invalid number of Argument::s for @start: {}",
 96 |                            args.len(); stmt)
 97 |                 }
 98 | 
 99 |                 let name = if let MacroArgument::Ident(ref name) = args[0].value {
100 |                     name.clone()
101 |                 } else {
102 |                     fatal!("expected subroutine name, got {}", args[0]; stmt)
103 |                 };
104 | 
105 |                 let argc = if let MacroArgument::Argument(ref arg) = args[1].value {
106 |                     if let Argument::Literal(argc) = arg.value {
107 |                         argc as usize
108 |                     } else {
109 |                         fatal!("expected argument count, got {}", args[1]; stmt)
110 |                     }
111 |                 } else {
112 |                     fatal!("expected argument count, got {}", args[1]; stmt)
113 |                 };
114 | 
115 |                 // Subroutine definition is valid, store it
116 |                 if self.routines.insert(name, argc).is_some() {
117 |                     fatal!("redefinition of subroutine: {}", args[0]; stmt)
118 |                 };
119 |             }
120 |         }
121 |     }
122 | 
123 |     fn parse_and_insert(&mut self, source: &str, pos: usize) {
124 |         let ast = Parser::new(source, "<internal>").parse();
125 | 
126 |         for (i, stmt) in ast.into_iter().enumerate() {
127 |             self.source.insert(pos + i, stmt)
128 |         }
129 |     }
130 | 
131 |     /// Build the preamble for the subroutine machinery.
132 |     /// Will only be inserted once at
133 |     ///
134 |     /// Will look like this:
135 |     ///
136 |     /// ```
137 |     /// $return = [_]     ; The return value
138 |     /// $jump_back = [_]  ; The return address
139 |     /// $arg0 = [_]       ; Arguments any subroutine receives
140 |     /// ```
141 |     fn build_preamble(&mut self) {
142 |         let mut template = r###"
143 |             $return = [_]
144 |             $jump_back = [_]
145 |         "###.to_owned();
146 | 
147 |         for i in 0 .. *self.routines.values().max().unwrap() {
148 |             template.push_str(&format!("$arg{} = [_]\n", i));
149 |         }
150 | 
151 |         self.parse_and_insert(&template, 0);
152 |     }
153 | 
154 |     /// Process subroutine definitions and calls
155 |     fn process_macros(&mut self) {
156 |         let mut state = NotInSubroutine;
157 | 
158 |         // We use a indexed iteration here because we'll modify the source as we iterate
159 |         // over it
160 |         let mut i = 0;
161 |         while i < self.source.len() {
162 |             let prev_state = state.clone();
163 | 
164 |             state = match self.get_state_for(&self.source[i], &state) {
165 |                 /// State processing & transitions
166 | 
167 |                 SubroutineStart(ident) => {
168 |                     // Build subroutine preamble
169 |                     self.source.remove(i);
170 | 
171 |                     let mut template = format!("{}:\n", ident);
172 |                     template.push_str("MOV $return 0\n");
173 | 
174 |                     self.parse_and_insert(&template, i);
175 | 
176 |                     InSubroutine
177 |                 },
178 | 
179 |                 SubroutineEnd => {
180 |                     // Build subroutine epilogue
181 |                     self.source.remove(i);
182 | 
183 |                     self.parse_and_insert("JMP $jump_back\n", i);
184 | 
185 |                     NotInSubroutine
186 |                 },
187 | 
188 |                 SubroutineCall(name, args) => {
189 |                     self.source.remove(i);
190 | 
191 |                     let mut template = String::new();
192 | 
193 |                     // Build arguments
194 |                     for j in 0 .. args.len() {
195 |                         let arg = match args[j].value {
196 |                             MacroArgument::Argument(ref arg) => arg,
197 |                             MacroArgument::Ident(ref ident) => {
198 |                                 fatal!("expected argument, got `{}`", ident; args[j])
199 |                             }
200 |                         };
201 | 
202 |                         template.push_str(&format!("MOV $arg{} {}\n", j, arg));
203 |                     }
204 | 
205 |                     // Set jumpback
206 |                     template.push_str(&format!("MOV $jump_back :ret{}\n", i));
207 | 
208 |                     // Jump to function
209 |                     template.push_str(&format!("JMP :{}\n", name));
210 | 
211 |                     // Add label where to continue
212 |                     template.push_str(&format!("ret{}:\n", i));
213 | 
214 |                     self.parse_and_insert(&template, i);
215 | 
216 |                     prev_state  // Return to previous state
217 |                 },
218 | 
219 |                 _ => state // Stay in current state
220 |             };
221 | 
222 |             i += 1;
223 |         }
224 |     }
225 | 
226 |     /// Get the current state based on the statement we're currently processing
227 |     fn get_state_for(&self, stmt: &StatementNode, state: &SubroutineState) -> SubroutineState {
228 |         match stmt.value {
229 |             Statement::Macro(ref ident, ref args) => {
230 |                 match ident.as_str() {
231 |                     "start" => {
232 |                         if *state == InSubroutine { fatal!("can't nest subroutines"; stmt); }
233 | 
234 |                         // Get subroutine name
235 |                         let ident = if let MacroArgument::Ident(ref ident) = args[0].value {
236 |                             ident.clone()
237 |                         } else {
238 |                             fatal!("expected subroutine name, found `{}`", args[0].value; args[0]);
239 |                         };
240 | 
241 |                         SubroutineStart(ident)
242 |                     },
243 |                     "end" => {
244 |                         if args.len() > 0 {
245 |                             fatal!("@end takes no args"; args[0]);
246 |                         }
247 | 
248 |                         SubroutineEnd
249 |                     },
250 |                     "call" => {
251 |                         if args.len() == 0 {
252 |                             fatal!("expected (name, args...), found `)`"; stmt);
253 |                         }
254 | 
255 |                         // Get subroutine name
256 |                         let ident = if let MacroArgument::Ident(ref ident) = args[0].value {
257 |                             ident.clone()
258 |                         } else {
259 |                             fatal!("expected subroutine name, found `{}`", args[0]; args[0]);
260 |                         };
261 | 
262 |                         // Verify argument count
263 |                         let routine_argc = *self.routines.get(&ident).unwrap_or_else(|| {
264 |                             fatal!("unknown subroutine: {}", ident; stmt);
265 |                         });
266 | 
267 |                         if args.len() - 1 != routine_argc {
268 |                             fatal!("wrong argument count: found {} args, expected {}",
269 |                                    args.len() - 1, routine_argc; args[0]);
270 |                         }
271 | 
272 |                         // Get args (cloned)
273 |                         let args: Vec<_> = args[1..].iter()
274 |                             .cloned()
275 |                             .collect();
276 | 
277 |                         SubroutineCall(ident, args)
278 |                     }
279 |                     _ => state.clone()
280 |                 }
281 |             },
282 |             _ => state.clone()
283 |         }
284 |     }
285 | }


--------------------------------------------------------------------------------
/src/assembler/util.rs:
--------------------------------------------------------------------------------
 1 | use std::old_io;
 2 | use ansi_term::Colour::{Red, Yellow};
 3 | use assembler::parser::SourceLocation;
 4 | 
 5 | 
 6 | #[macro_export]
 7 | macro_rules! impl_to_string(
 8 |     ($cls:ident: $fmt:expr, $( $args:ident ),*) => (
 9 |         impl fmt::Debug for $cls {
10 |             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
11 |                 write!(f, $fmt, $( self.$args ),*)
12 |             }
13 |         }
14 | 
15 |         impl fmt::Display for $cls {
16 |             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
17 |                 write!(f, "{:?}", self)
18 |             }
19 |         }
20 |     )
21 | );
22 | 
23 | 
24 | #[macro_export]
25 | macro_rules! overflow_check(
26 |     ($val:expr, $stmt:expr) => (
27 |         if $val > 255 {
28 |             warn!("overflow: {} > 255", $val; $stmt);
29 |             ($val as u32 % !(0 as ::machine::WordSize) as u32) as ::machine::WordSize
30 |         }
31 |         else { $val as ::machine::WordSize }
32 |     )
33 | );
34 | 
35 | 
36 | #[macro_export]
37 | macro_rules! fatal(
38 |     ($msg:expr, $($args:expr),* ; $stmt:expr) => {
39 |         {
40 |             use assembler::util::fatal;
41 |             fatal(format!($msg, $($args),*), &$stmt.location)
42 |         }
43 |     };
44 | 
45 |     ($msg:expr ; $stmt:expr) => {
46 |         {
47 |             use std::borrow::ToOwned;
48 |             ::assembler::util::fatal($msg.to_owned(), &$stmt.location)
49 |         }
50 |     };
51 | );
52 | 
53 | pub fn fatal(msg: String, source: &SourceLocation) -> ! {
54 |     println!("{} in {}: {}", Red.paint("Error"), source, msg);
55 | 
56 |     old_io::stdio::set_stderr(Box::new(old_io::util::NullWriter));
57 |     panic!();
58 | }
59 | 
60 | 
61 | #[macro_export]
62 | macro_rules! warn(
63 |     ($msg:expr, $($args:expr),* ; $stmt:expr ) => {
64 |         ::assembler::util::warn(format!($msg, $($args),*), &$stmt.location)
65 |     }
66 | );
67 | 
68 | pub fn warn(msg: String, source: &SourceLocation) {
69 |     println!("{} in {}: {}", Yellow.paint("Warning"), source, msg);
70 | }


--------------------------------------------------------------------------------
/src/machine.rs:
--------------------------------------------------------------------------------
  1 | use std::ascii::AsciiExt;
  2 | use std::collections::HashMap;
  3 | use std::str::FromStr;
  4 | use rand::distributions::Sample;
  5 | use rand::distributions::Range as RandRange;
  6 | use rand;
  7 | 
  8 | use self::Argument::*;
  9 | 
 10 | pub use self::StateChange::*;
 11 | 
 12 | 
 13 | pub type WordSize = u8;
 14 | const RAND_MAX: u8 = 25;
 15 | 
 16 | 
 17 | // --- Instruction + helpers ---------------------------------------------
 18 | 
 19 | /// Representation of an instruction (opcode + args + implementation)
 20 | pub struct Instruction {
 21 |     pub mnem: Mnemonic,
 22 |     pub opcode: u8,
 23 |     pub argc: usize,
 24 |     pub arg_types: &'static [Argument],
 25 |     implementation: fn(&[WordSize], &[WordSize]) -> StateChange
 26 | }
 27 | 
 28 | impl Instruction {
 29 |     pub fn execute(&self, args: &[WordSize], mem: &[WordSize]) -> StateChange {
 30 |         (self.implementation)(args, mem)
 31 |     }
 32 | }
 33 | 
 34 | 
 35 | /// Argument types
 36 | #[derive(Debug)]
 37 | pub enum Argument {
 38 |     Value,      // The value of an address
 39 |     Address,    // An address
 40 |     Literal,    // A literal value
 41 | }
 42 | 
 43 | impl PartialEq<Argument> for Argument {
 44 |     // It's a little tricky here as Value and Address are somewhat equal depending
 45 |     // on the context ...
 46 |     fn eq(&self, other: &Argument) -> bool {
 47 |         match *self {
 48 |             Value | Address => match *other {
 49 |                 Value | Address => true,
 50 |                 _ => false
 51 |             },
 52 |             Literal => match *other {
 53 |                 Literal => true,
 54 |                 _ => false
 55 |             }
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | 
 61 | /// Possible results of instruction execution
 62 | pub enum StateChange {
 63 |     Memset { address: WordSize, value: WordSize },
 64 |     Jump { address: WordSize },
 65 |     Halt,
 66 |     Continue
 67 | }
 68 | 
 69 | 
 70 | // --- Instruction helpers ------------------------------------------------------
 71 | 
 72 | /// A helper to define an instruction
 73 | macro_rules! make_instruction {
 74 |     // Static return
 75 |     ($name:ident -> $ret_type:ident) => {
 76 |         pub struct $name;
 77 |         impl $name {
 78 |             #[allow(unused_variables)]
 79 |             fn execute(args: &[WordSize], mem: &[WordSize]) -> StateChange {
 80 |                 $ret_type
 81 |             }
 82 |         }
 83 |     };
 84 | 
 85 |     // Arguments and static return type
 86 |     ( $name:ident ($args:ident [ $argc:expr ] , $mem:ident) -> $ret_type:ident $body:block ) => {
 87 |         pub struct $name;
 88 |         impl $name {
 89 |             #[allow(unused_variables)]
 90 |             fn execute($args: &[WordSize], $mem: &[WordSize]) -> StateChange {
 91 |                 $body;
 92 |                 $ret_type
 93 |             }
 94 |         }
 95 |     };
 96 | 
 97 |     // Normal arguments
 98 |     ( $name:ident ($args:ident [ $argc:expr ] , $mem:ident) $body:block ) => {
 99 |         pub struct $name;
100 |         impl $name {
101 |             #[allow(unused_variables)]
102 |             fn execute($args: &[WordSize], $mem: &[WordSize]) -> StateChange {
103 |                 $body
104 |             }
105 |         }
106 |     };
107 | }
108 | 
109 | 
110 | // --- Instruction implementations ----------------------------------------------
111 | // Syntax of the comments:
112 | //    a, b, c: first/second/third argument
113 | //    M[x]: Value of address x
114 | 
115 | // --- Memory Access
116 | 
117 | // M[a] = M[b], or the Literal-set M[a] = b
118 | make_instruction!(IMov(args[2], memory) {
119 |     Memset { address: args[0], value: args[1] }
120 | });
121 | 
122 | 
123 | // --- Logic operations
124 | 
125 | // M[a] = M[a] & M[b]
126 | make_instruction!(IAnd(args[2], memory) {
127 |     Memset { address: args[0], value: memory[args[0] as usize] & args[1] }
128 | });
129 | 
130 | // M[a] = M[a] | M[b]
131 | make_instruction!(IOr(args[2], memory) {
132 |     Memset { address: args[0], value: memory[args[0] as usize] | args[1] }
133 | });
134 | 
135 | // M[a] = M[a] ^ M[b]
136 | make_instruction!(IXor(args[2], memory) {
137 |     Memset { address: args[0], value: memory[args[0] as usize] ^ args[1] }
138 | });
139 | 
140 | // M[a] = !M[a]
141 | make_instruction!(INot(args[1], memory) {
142 |     Memset { address: args[0], value: !memory[args[0] as usize] }
143 | });
144 | 
145 | 
146 | // --- Math
147 | 
148 | // M[a] = M[a] + b
149 | make_instruction!(IAdd(args[2], memory) {
150 |     Memset { address: args[0], value: memory[args[0] as usize] + args[1] }
151 | });
152 | 
153 | 
154 | // M[a] = M[a] - b
155 | make_instruction!(ISub(args[2], memory) {
156 |     Memset { address: args[0], value: memory[args[0] as usize] - args[1] }
157 | });
158 | 
159 | 
160 | // --- Control
161 | 
162 | make_instruction!(IHalt -> Halt);
163 | 
164 | // Jump to a
165 | make_instruction!(IJmp(args[1], memory) {
166 |     Jump { address: args[0] }
167 | });
168 | 
169 | // Jump to a if b == 0
170 | make_instruction!(IJz(args[2], memory) {
171 |     if args[0] == 0 {
172 |         Jump { address: args[0] }
173 |     } else {
174 |         Continue
175 |     }
176 | });
177 | 
178 | // Jump to a if b == c
179 | make_instruction!(IJeq(args[3], memory) {
180 |     if args[1] == args[2] {
181 |         Jump { address: args[0] }
182 |     } else {
183 |         Continue
184 |     }
185 | });
186 | 
187 | // Jump to a if b < c
188 | make_instruction!(IJls(args[3], memory) {
189 |     if args[1] < args[2] {
190 |         Jump { address: args[0] }
191 |     } else {
192 |         Continue
193 |     }
194 | });
195 | 
196 | // Jump to a if b > c
197 | make_instruction!(IJgt(args[3], memory) {
198 |     if args[1] > args[2] {
199 |         Jump { address: args[0] }
200 |     } else {
201 |         Continue
202 |     }
203 | });
204 | 
205 | 
206 | // --- I/O
207 | 
208 | // Print the contents of M[a] in ASCII
209 | make_instruction!(IAPrint(args[1], memory) -> Continue {
210 |     print!("{:}", args[0] as char);
211 | });
212 | 
213 | // Print the contents of M[a] in decimal
214 | make_instruction!(IDPrint(args[1], memory) -> Continue {
215 |     print!("{:}", args[0]);
216 | });
217 | 
218 | 
219 | // --- Misc
220 | 
221 | // M[a] = random value (0 to 25 -> equal probability distribution)
222 | make_instruction!(IRandom(args[1], memory) {
223 |     let mut rand_range = RandRange::new(0, RAND_MAX);
224 |     let mut rng = rand::thread_rng();
225 |     Memset { address: args[0], value: rand_range.sample(&mut rng) }
226 | });
227 | 
228 | 
229 | // --- Opcode -> Instruction mapping --------------------------------------------
230 | 
231 | macro_rules! count_args {
232 |     () => { 0 };
233 |     ($x:expr) => { 1 };
234 |     ($head:expr, $($tail:expr),+) => { 1 + count_args!($($tail),+) };
235 | }
236 | 
237 | macro_rules! instruction {
238 |     ( $mnem:path : $opcode:expr => $instr:ident ) => (
239 |         Instruction {
240 |             mnem: $mnem,
241 |             opcode: $opcode,
242 |             argc: 0,
243 |             arg_types: &[],
244 |             implementation: $instr::execute
245 |         }
246 |     );
247 | 
248 |     ( $mnem:path : $opcode:expr => $instr:ident [ $($t:ident),* ] ) => (
249 |         Instruction {
250 |             mnem: $mnem,
251 |             opcode: $opcode,
252 |             arg_types: &[$($t),*],
253 |             argc: count_args!($($t),*),
254 |             implementation: $instr::execute
255 |         }
256 |     );
257 | }
258 | 
259 | macro_rules! instructions {
260 |     ( $($mnem:ident : $( $opcode:expr => $instr:ident ( $($t:ident),* ) ),* ; )* ) => {
261 | 
262 |         // Remember: HALT is not part of the macro's arguments as its opcode
263 |         // doesn't follow the scheme of the other instructions.
264 | 
265 |         #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
266 |         pub enum Mnemonic {
267 |             $( $mnem, )* HALT
268 |         }
269 | 
270 |         impl FromStr for Mnemonic {
271 |             type Err = String;
272 | 
273 |             fn from_str(s: &str) -> Result<Mnemonic, String> {
274 |                 match &*s.to_ascii_uppercase() {
275 |                     $(
276 |                         stringify!($mnem) => Ok(Mnemonic::$mnem),
277 |                     )*
278 |                     "HALT" => Ok(Mnemonic::HALT),
279 |                     _ => Err(format!("Invalid instruction: {}", s))
280 |                 }
281 |             }
282 |         }
283 | 
284 |         /// An opcode → instruction mapping
285 |         static INSTRUCTIONS_TABLE: &'static [Instruction] = &[
286 |             $(
287 |                 $(
288 |                     instruction!(Mnemonic::$mnem: $opcode => $instr [ $($t),* ])
289 |                 ),*
290 |             ),*
291 |         ];
292 | 
293 | 
294 |         /// An mnemonic → instructions mapping + access methods
295 |         pub struct InstructionManager {
296 |             map: HashMap<Mnemonic, Vec<&'static Instruction>>
297 |         }
298 | 
299 |         impl InstructionManager {
300 |             pub fn new() -> InstructionManager {
301 |                 let mut map = HashMap::new();
302 |                 $(
303 |                     map.insert(Mnemonic::$mnem, vec![
304 |                         $( &INSTRUCTIONS_TABLE[$opcode] ),*
305 |                     ]);
306 |                 )*
307 | 
308 |                 map.insert(Mnemonic::HALT, vec![&INSTRUCTION_HALT]);
309 | 
310 |                 InstructionManager {
311 |                     map: map
312 |                 }
313 |             }
314 | 
315 |             pub fn lookup_operations(&self, mnem: &Mnemonic) -> &[&'static Instruction] {
316 |                 &self.map[*mnem]
317 |             }
318 | 
319 |             pub fn decode_opcode(&self, opcode: u8) -> &'static Instruction {
320 |                 // We're assuming the table is not full
321 |                 assert!(INSTRUCTIONS_TABLE.len() < 0xFF);
322 | 
323 |                 if opcode != 0xFF && opcode as usize >= INSTRUCTIONS_TABLE.len() {
324 |                     panic!("Invalid opcode: {}", opcode)
325 |                 };
326 | 
327 |                 // Special case: 0xFF is HALT
328 |                 if opcode == 0xFF {
329 |                     &INSTRUCTION_HALT
330 |                 } else {
331 |                     &INSTRUCTIONS_TABLE[opcode as usize]
332 |                 }
333 |             }
334 | 
335 |             pub fn decode_args(&self, args: &[WordSize], arg_types: &[Argument], mem: &[WordSize]) -> Vec<u8> {
336 |                 arg_types.iter()
337 |                     .zip(args.iter())
338 |                     .map(|(ty, val)| {
339 |                         match *ty {
340 |                             Argument::Value => mem[*val as usize],
341 |                             Argument::Address => *val,
342 |                             Argument::Literal => *val,
343 |                         }
344 |                     })
345 |                     .collect()
346 |             }
347 | 
348 |         }
349 |     };
350 | }
351 | 
352 | instructions! {
353 |     AND:
354 |     0x00 => IAnd(Address, Value  ),
355 |     0x01 => IAnd(Address, Literal);
356 | 
357 |     OR:
358 |     0x02 => IOr(Address, Value  ),
359 |     0x03 => IOr(Address, Literal);
360 | 
361 |     XOR:
362 |     0x04 => IXor(Address, Value  ),
363 |     0x05 => IXor(Address, Literal);
364 | 
365 |     NOT:
366 |     0x06 => INot(Address);
367 | 
368 | 
369 |     MOV:
370 |     0x07 => IMov(Address, Value  ),
371 |     0x08 => IMov(Address, Literal);
372 | 
373 | 
374 |     RANDOM:
375 |     0x09 => IRandom(Address);
376 | 
377 |     ADD:
378 |     0x0A => IAdd(Address, Value  ),
379 |     0x0B => IAdd(Address, Literal);
380 | 
381 |     SUB:
382 |     0x0C => ISub(Address, Value  ),
383 |     0x0D => ISub(Address, Literal);
384 | 
385 | 
386 |     JMP:
387 |     0x0E => IJmp(Value  ),
388 |     0x0F => IJmp(Literal);
389 | 
390 |     JZ:
391 |     0x10 => IJz(Value,   Value  ),
392 |     0x11 => IJz(Value,   Literal),
393 |     0x12 => IJz(Literal, Value  ),
394 |     0x13 => IJz(Literal, Literal);
395 | 
396 |     JEQ:
397 |     0x14 => IJeq(Value,   Value, Value  ),
398 |     0x15 => IJeq(Literal, Value, Value  ),
399 |     0x16 => IJeq(Value,   Value, Literal),
400 |     0x17 => IJeq(Literal, Value, Literal);
401 | 
402 |     JLS:
403 |     0x18 => IJls(Value,   Value, Value  ),
404 |     0x19 => IJls(Literal, Value, Value  ),
405 |     0x1A => IJls(Value,   Value, Literal),
406 |     0x1B => IJls(Literal, Value, Literal);
407 | 
408 |     JGT:
409 |     0x1C => IJgt(Value,   Value, Value  ),
410 |     0x1D => IJgt(Literal, Value, Value  ),
411 |     0x1E => IJgt(Value,   Value, Literal),
412 |     0x1F => IJgt(Literal, Value, Literal);
413 | 
414 | 
415 |     APRINT:
416 |     0x20 => IAPrint(Value  ),
417 |     0x21 => IAPrint(Literal);
418 | 
419 |     DPRINT:
420 |     0x22 => IDPrint(Value  ),
421 |     0x23 => IDPrint(Literal);
422 | }
423 | 
424 | // Halt the program
425 | static INSTRUCTION_HALT: Instruction = instruction!(Mnemonic::HALT: 0xFF => IHalt);


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![feature(plugin)]
 2 | 
 3 | // Use of unstable libraries
 4 | #![feature(collections)]
 5 | #![feature(core)]
 6 | #![feature(fs)]
 7 | #![feature(io)]
 8 | #![feature(path)]
 9 | #![feature(old_io)]
10 | #![feature(os)]
11 | #![feature(std_misc)]
12 | 
13 | #![plugin(docopt_macros)]
14 | 
15 | extern crate ansi_term;
16 | extern crate docopt;
17 | extern crate env_logger;
18 | extern crate rand;
19 | extern crate "rustc-serialize" as rustc_serialize;
20 | #[macro_use] extern crate lazy_static;
21 | #[macro_use] extern crate log;
22 | 
23 | use docopt::Docopt;
24 | 
25 | mod assembler;
26 | mod machine;
27 | mod vm;
28 | 
29 | docopt!(Args derive Debug, "
30 | Usage: tiny asm [-v] <input>
31 |        tiny asm [-v] --bin <input> <output>
32 |        tiny vm <input>
33 |        tiny --help
34 | 
35 | Options:
36 |     --help  Show this screen.
37 | ");
38 | 
39 | 
40 | #[cfg(not(test))]
41 | fn main() {
42 |     env_logger::init().unwrap();
43 | 
44 |     let args: Args = Args::docopt().decode().unwrap_or_else(|e| e.exit());
45 | 
46 |     if args.cmd_asm {
47 |         assembler::main(args)
48 |     } else {
49 |         vm::main(args)
50 |     }
51 | }


--------------------------------------------------------------------------------
/src/vm/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::File;
 2 | use std::io::Read;
 3 | use std::path::Path;
 4 | 
 5 | use machine::{InstructionManager, Memset, Jump, Halt, Continue};
 6 | use Args;
 7 | 
 8 | 
 9 | const MEMORY_SIZE: usize = 256;
10 | 
11 | 
12 | pub fn main(args: Args) {
13 |     // Read binary file
14 |     let path = Path::new(&args.arg_input);
15 |     let mut file = match File::open(&path) {
16 |         Ok(f) => f,
17 |         Err(err) => { panic!("Can't open {}: {}", path.display(), err) }
18 |     };
19 | 
20 |     let mut source = vec![];
21 |     match file.read_to_end(&mut source) {
22 |         Ok(v)  => v,
23 |         Err(err) => { panic!("Can't read {}: {}", path.display(), err) }
24 |     };
25 | 
26 |     // Run virtual machine
27 |     run(&source);
28 | }
29 | 
30 | fn run(source: &[u8]) {
31 |     let mut memory = [0u8; MEMORY_SIZE];
32 |     let mut ip = 0;
33 |     let im = InstructionManager::new();
34 | 
35 |     loop {
36 |         debug!("--- next instruction (ip: {})", ip);
37 |         debug!("memory: {:?}@{}", &memory[..], memory.len());
38 | 
39 |         // Step 1: Read instruction
40 |         let opcode = source[ip];
41 | 
42 |         // Step 2: Decode opcode and read + decode the arguments
43 |         let ref instruction = im.decode_opcode(opcode);
44 | 
45 |         let argc = instruction.argc;
46 |         if ip + argc >= source.len() {
47 |             panic!("Reached end of input without HALT!")
48 |         }
49 |         let args = &source[ip + 1 .. ip + 1 + argc];
50 | 
51 |         let decoded_args = im.decode_args(args, instruction.arg_types, &memory);
52 | 
53 |         // Step 3 + 4: Execute instruction and process result
54 |         debug!("executing {:?} ({:#04X}) with {:?}", instruction.mnem, opcode, decoded_args);
55 | 
56 |         match instruction.execute(&decoded_args, &memory) {
57 |             Continue => {},
58 |             Jump { address } => {
59 |                 debug!("Jumping to {}", address);
60 |                 ip = address as usize;
61 |                 continue;  // We've already updated the instruction pointer
62 |             },
63 |             Memset { address, value } => {
64 |                 debug!("Setting m[{}] = {}", address, value);
65 |                 memory[address as usize] = value;
66 |             },
67 |             Halt => break
68 |         }
69 | 
70 |         // Update instruction pointer
71 |         ip += 1;  // Skip opcode
72 |         ip += argc;  // Skip args
73 |     }
74 | }


--------------------------------------------------------------------------------