├── .gitignore ├── LICENSE ├── README.md ├── array.asm ├── ascii.asm ├── benchmark.asm ├── benchmark2.asm ├── fact.asm ├── greet.asm ├── hello.asm ├── in.txt ├── int_sum.asm ├── rev.asm ├── stack.asm └── vm.awk /.gitignore: -------------------------------------------------------------------------------- 1 | asm.temp 2 | *~ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Siraphob (Ben) Phipathananunth 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # awk-vm: A virtual machine and assembler written in AWK 2 | 3 | ## What? 4 | _The AWK Programming Language_ (TAPL) seems at first glance to be a 5 | book on using AWK to fulfill your text processing needs. The first 6 | 100 or so pages do a great job of that, but then you reach Chapter 6, 7 | the first 5 pages of which describe a virtual machine and assembler 8 | whose AWK source fits just under a page, then you realize---that AWK 9 | is so much more. 10 | 11 | This repository takes the assembler/interpreter on page 134 of TAPL 12 | and adds additional instructions and directives, without breaking 13 | backwards compatibility. It's still relatively small, though, under 14 | 200 lines. 15 | 16 | ## Usage 17 | The lines between `Running...` and `Stopping.` is the program output, 18 | in this case, it's `15`. 19 | ```text 20 | $ awk -f vm int_sum.asm in.txt 21 | Pass 1...done 22 | Pass 2...done. 12 bytes total 23 | Running program... 24 | 15 25 | 26 | Stopping after 32 instructions. Program counter at 10. 27 | 28 | $ awk -f vm greet.asm 29 | Pass 1...done 30 | Pass 2...done. 85 bytes total 31 | Running program... 32 | What is your name? Brian Kernighan 33 | Hello, Brian Kernighan. 34 | Stopping after 206 instructions. Program counter at 23. 35 | ``` 36 | 37 | ## Hello World example 38 | ```asm 39 | start 40 | lda msg 41 | puts 42 | halt 43 | 44 | msg string "hello, world!" 45 | ``` 46 | ## VM Instructions (backwards compatible with TAPL) 47 | **N.B. Instructions with opcode > 10 were written by me.** 48 | 49 | **Boldface on opcodes <= 10 indicate changed behavior from the book** 50 | 51 | | Opcode | Instruction | Meaning | 52 | | :-: | :-: | :-: | 53 | | 01 | get | read a number from the input into the accumulator | 54 | | 02 | put | write the contents of the accumulator to the output | 55 | | 03 | ld M | load accumulator with contents of memory location M | 56 | | 04 | st M | store contents of accumulator in location M | 57 | | 05 | add M | add contents of location M to accumulator | 58 | | 06 | sub M | subtract contents of location M from accumulator | 59 | | 07 | jpos M | jump to location M if accumulator is positive, **set J = PC + 1** | 60 | | 08 | jz M | jump to location M if accumulator is zero, **set J = PC + 1** | 61 | | 09 | j M | jump to location M, **set J = PC + 1** | 62 | | 10 | halt | stop execution | 63 | | 11 | mul M | multiply contents of location M with accumulator | 64 | | 12 | div M | divide accumulator with contents of location M | 65 | | 13 | lda M | load accumulator with M | 66 | | 14 | inc | increment accumulator | 67 | | 15 | dec | decrement accumulator | 68 | | 16 | sti M | store accumulator in location pointed to by M | 69 | | 17 | ldi M | load accumulator with contents of location pointed to by M | 70 | | 18 | putc | write the contents of the accumulator as an ASCII character | 71 | | 19 | getc | read a character from the input as an ASCII character | 72 | | 20 | puts | print the contents starting from the accumulator until a NUL is read | 73 | | 21 | putn | like `puts` but with a trailing newline | 74 | | 22 | lj | set the accumulator to the value of the j register (A = J) | 75 | | 23 | ret | set the program counter to the value of the j register (PC = J) | 76 | 77 | ## Assembler directives ("pseudo-operations") 78 | | Instruction | Meaning | 79 | | :-: | :-: | 80 | | const C | define a variable with default value C (0 if C is omitted) | 81 | | blk C | fill C cells with 0 | 82 | | string S | a null-terminated ASCII-encoded string | 83 | 84 | Not really operations, but make inline constants easy. 85 | ```text 86 | # Declare array "foo" of size 10 87 | foo blk 10 88 | # Declare variable "bar" with default value 5 89 | bar const 5 90 | # Declare string "msg" with contents "hello, world!" 91 | msg string "hello, world!" 92 | ``` 93 | ## Simple benchmarks 94 | Mostly qualitative, not rigorous benchmarks. `benchmark.asm` and 95 | `benchmark2.asm` perform the same computation (decrement a number 96 | from 99999 to 0, 100 times), but `benchmark.asm` uses the accumulator 97 | to hold the value, whereas `benchmark2.asm` uses a memory cell. 98 | `benchmark.asm` runs in 60% of the instructions and takes 70% of the time 99 | compared to `benchmark2.asm`. 100 | 101 | ```text 102 | $ time awk -f vm benchmark.asm 103 | ... 104 | Stopping after 30000499 instructions. Program counter at 12. 105 | awk -f vm benchmark.asm 26.91s user 0.05s system 99% cpu 26.972 total 106 | 107 | $ time awk -f vm benchmark2.asm 108 | ... 109 | Stopping after 50000597 instructions. Program counter at 13. 110 | awk -f vm benchmark2.asm 36.26s user 0.00s system 99% cpu 36.258 total 111 | ``` 112 | - `benchmark.asm` (26.91/30000499) ≈ (0.897 µs/cycle) 113 | - `benchmark2.asm` (36.26/50000597) ≈ (0.725 µs/cycle) 114 | 115 | Using 0.725 µs/cycle, we find that the VM runs at a speed of 1.38 116 | MHz. Not bad for 150 lines of Awk! 117 | 118 | ### With mawk 119 | `vm.awk` doesn't depend on implementation specific features (such as 120 | the traversal order of associative arrays), so we can test out 121 | different Awk implementations, take, for instance, 122 | [mawk](https://invisible-island.net/mawk/mawk.html): 123 | 124 | ```text 125 | $ time mawk -f vm.awk benchmark.asm 126 | ... 127 | Stopping after 30000499 instructions. Program counter at 12. 128 | mawk -f vm.awk benchmark.asm 15.72s user 0.00s system 99% cpu 15.731 total 129 | 130 | $ time mawk -f vm.awk benchmark2.asm 131 | ... 132 | Stopping after 50000597 instructions. Program counter at 13. 133 | mawk -f vm.awk benchmark2.asm 21.00s user 0.01s system 99% cpu 21.007 total 134 | ``` 135 | - `benchmark.asm` (15.72/30000499) ≈ (0.524 µs/cycle) 136 | - `benchmark2.asm` (21.00/50000597) ≈ (0.420 µs/cycle) 137 | 138 | Now the VM runs at 2.38 MHz! 139 | 140 | ## Future plans 141 | ### Enhancing current ISA 142 | - Multiplication, division 143 | - Memory indirection operations 144 | - Multiple registers 145 | - Reading/writing ASCII characters 146 | - Stacks (push, pop) 147 | - Subroutine calls 148 | 149 | 150 | ### Potential targets 151 | - MIX computer (from _The Art of Computer Programming_ by Donald Knuth) 152 | - Z80 (without interrupts) 153 | - [R216](https://lbphacker.pw/powdertoy/R216/manual.md) 154 | -------------------------------------------------------------------------------- /array.asm: -------------------------------------------------------------------------------- 1 | ;;; reads a list of numbers (max 10 numbers), stores it in memory and 2 | ;;; prints it out 3 | 4 | 5 | loop 6 | ld count 7 | jz done_read 8 | dec 9 | st count 10 | get 11 | jz done_read 12 | sti arrayptr 13 | ld arrayptr 14 | inc 15 | st arrayptr 16 | j loop 17 | 18 | 19 | done_read 20 | lda array 21 | st arrayptr 22 | 23 | print_loop 24 | ldi arrayptr 25 | jz done 26 | put 27 | ld arrayptr 28 | inc 29 | st arrayptr 30 | j print_loop 31 | 32 | done 33 | halt 34 | 35 | count const 10 36 | arrayptr const array 37 | array blk 10 38 | 39 | -------------------------------------------------------------------------------- /ascii.asm: -------------------------------------------------------------------------------- 1 | # Reads two numbers: i j 2 | # Print the ASCII table from i to j. 3 | 4 | get 5 | st i 6 | get 7 | loop 8 | st j 9 | jz done 10 | ld i 11 | putc 12 | inc 13 | st i 14 | ld j 15 | dec 16 | j loop 17 | done 18 | halt 19 | 20 | i const 21 | j const 22 | -------------------------------------------------------------------------------- /benchmark.asm: -------------------------------------------------------------------------------- 1 | ;; Decrementing from the accumulator 2 | 3 | lda 0 4 | start 5 | dec 6 | jz done 7 | j start 8 | done 9 | halt 10 | 11 | done 12 | ld count 13 | dec 14 | jz bye 15 | st count 16 | lda 0 17 | j start 18 | bye 19 | halt 20 | count const 100 21 | -------------------------------------------------------------------------------- /benchmark2.asm: -------------------------------------------------------------------------------- 1 | ;; Decrementing from memory 2 | 3 | start 4 | ld foo 5 | dec 6 | st foo 7 | jz done 8 | j start 9 | done 10 | ld count 11 | dec 12 | jz bye 13 | st count 14 | lda 0 15 | st foo 16 | j start 17 | bye 18 | halt 19 | foo const 0 20 | count const 100 21 | -------------------------------------------------------------------------------- /fact.asm: -------------------------------------------------------------------------------- 1 | ;; Factorial 2 | 3 | get 4 | fact_loop 5 | jz done 6 | st c 7 | 8 | mul acc 9 | st acc 10 | ld c 11 | dec 12 | j fact_loop 13 | 14 | done 15 | ld acc 16 | put 17 | halt 18 | 19 | acc const 1 20 | c const 0 21 | -------------------------------------------------------------------------------- /greet.asm: -------------------------------------------------------------------------------- 1 | ;; Ask for a user's name 2 | lda msg1 3 | puts 4 | 5 | get_name 6 | lda 5 7 | getc 8 | jz say_hi ;; Reached end 9 | sti name_ptr 10 | ld name_ptr 11 | inc 12 | st name_ptr 13 | ld chars 14 | dec 15 | st chars 16 | jz say_hi ;; Out of characters. 17 | j get_name 18 | 19 | say_hi 20 | lda 0 ;; null-terminate 21 | sti name_ptr 22 | lda msg2 23 | puts 24 | lda name 25 | puts 26 | lda 46 27 | putc 28 | halt 29 | 30 | chars const 32 31 | name_ptr const name 32 | name blk 32 33 | msg1 string "What is your name? " 34 | msg2 string "Hello, " 35 | 36 | -------------------------------------------------------------------------------- /hello.asm: -------------------------------------------------------------------------------- 1 | start 2 | lda msg 3 | puts 4 | halt 5 | 6 | msg string "hello, world!" 7 | -------------------------------------------------------------------------------- /in.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 0 7 | -------------------------------------------------------------------------------- /int_sum.asm: -------------------------------------------------------------------------------- 1 | # from The AWK Programming Language book 2 | 3 | # print sum of input numbers (terminated by zero) 4 | 5 | ld zero # initialize sum to zero 6 | st sum 7 | loop get # read a number 8 | jz done # no more input if number is zero 9 | add sum # add in accumulated sum 10 | st sum # store new value back in sum 11 | j loop # go back and read another number 12 | 13 | done ld sum # print sum 14 | put 15 | halt 16 | 17 | zero const 0 18 | sum const 19 | 20 | -------------------------------------------------------------------------------- /rev.asm: -------------------------------------------------------------------------------- 1 | ;; Reverse a user's string. 2 | lda msg1 3 | puts 4 | get_s 5 | getc 6 | jz print_rev ;; Reached end 7 | sti s_ptr 8 | ld s_ptr 9 | inc 10 | st s_ptr 11 | ld c 12 | dec ;; Decrement c 13 | jz print_rev ;; Out of characters. 14 | st c 15 | j get_s 16 | 17 | print_rev 18 | lda s 19 | sub s_ptr 20 | inc 21 | ;; c contains number of characters to print. 22 | print_loop 23 | st c 24 | ld s_ptr 25 | dec 26 | st s_ptr 27 | ldi s_ptr 28 | putc 29 | ld c 30 | inc 31 | jz done 32 | j print_loop 33 | done 34 | halt 35 | 36 | s_ptr const s 37 | s blk 32 38 | msg1 string "Enter a string to reverse: " 39 | c const 0 40 | -------------------------------------------------------------------------------- /stack.asm: -------------------------------------------------------------------------------- 1 | ;; Simulating a stack and subroutine calls 2 | 3 | j say_msg1 4 | j say_msg2 5 | lda 1 6 | j push 7 | lda 2 8 | j push 9 | lda 3 10 | j push 11 | j pop 12 | put 13 | j pop 14 | put 15 | j pop 16 | put 17 | halt 18 | 19 | say_msg1 20 | lda msg1 21 | putn 22 | ret 23 | 24 | say_msg2 25 | lda msg2 26 | putn 27 | ret 28 | 29 | ;; Push a value into the stack from the accumulator 30 | push 31 | sti sp 32 | ld sp 33 | inc 34 | st sp 35 | ret 36 | 37 | ;; Pop a value into the accumulator 38 | pop 39 | ld sp 40 | dec 41 | st sp 42 | ldi sp 43 | ret 44 | 45 | sp const stack_start 46 | stack_start blk 10 47 | msg1 string "message 1" 48 | msg2 string "message 2" 49 | -------------------------------------------------------------------------------- /vm.awk: -------------------------------------------------------------------------------- 1 | BEGIN { 2 | # Initialize ASCII table. 3 | for(n = 0; n < 256; n++) { 4 | ord[sprintf("%c",n)] = n 5 | } 6 | 7 | # Check if the file exists 8 | srcfile = ARGV[1] 9 | if (system("test -f " srcfile)) { 10 | printf("File %s doesn't exist!\n", srcfile) 11 | exit(1) 12 | } 13 | 14 | tempfile = "asm.temp" 15 | ARGV[1] = "" 16 | 17 | # Backwards compatible ISA with the VM in the `AWK programming language' 18 | n = split("const get put ld st add sub jpos jz j halt\ 19 | mul div lda inc dec sti ldi putc getc puts putn lj ret", x) 20 | 21 | for (i = 1; i <= n; i++) { 22 | op[x[i]] = i - 1 23 | } 24 | 25 | printf("Pass 1...") 26 | FS = "[ \t]+" 27 | 28 | while ((stat = getline < srcfile) > 0) { 29 | sub(/#.*/, "") 30 | # alternate comment syntax 31 | sub(/;.*/, "") 32 | # blk directive 33 | symtab[$1] = nextmem 34 | if ($2 == "blk") { 35 | for(i = 0; i < $3; i++) { 36 | print sprintf("r%d 0", extraRegs++) > tempfile 37 | } 38 | nextmem += i 39 | blkcount++ 40 | # string directive 41 | } else if ($2 == "string") { 42 | while (match($0,/"[^"]*"/)){ 43 | s = substr($0, RSTART + 1, RLENGTH - 2) 44 | sub(/"[^"]*"/,"") 45 | found = 1 46 | split(s, x, "") 47 | # Can't write for (i in s) because of implementation 48 | # differences in associative array traversal order. 49 | for (i = 1; i <= length(s); i++) { 50 | print sprintf("const_%.2d_%.2d %.3d", blkcount, j++, ord[x[i]]) > tempfile 51 | } 52 | # NUL at end of string 53 | print sprintf("const_%.2d_%.2d %.3d", blkcount++, j++, 0) > tempfile 54 | nextmem += j 55 | } 56 | 57 | if (!found) { 58 | printf("ERROR: Could not read string at address %d\n", nextmem) 59 | exit(1) 60 | } 61 | } else if ($2 != "") { 62 | print $2 " " $3 > tempfile 63 | nextmem++ 64 | } 65 | } 66 | close(tempfile) 67 | 68 | printf("done\nPass 2...") 69 | nextmem = 0 70 | if (DEBUG) print "" 71 | while (getline < tempfile > 0) { 72 | if ($2 !~ /^[0-9]*$/) { 73 | $2 = symtab[$2] 74 | } 75 | mem[nextmem] = 1000 * op[$1] + $2 76 | if (DEBUG && (b = mem[nextmem])) { 77 | printf("%3d: %.5d %s %s\n", nextmem, b, $0, is_ascii(b) ? sprintf(" %c", b): "") 78 | } 79 | nextmem++ 80 | } 81 | printf("done. %d bytes total\nRunning program...\n", nextmem) 82 | 83 | # bytecode interpreter 84 | acc = 0 85 | # address after last jump instruciton 86 | jaddr = 0 87 | stdin = "" 88 | firstread = 1 89 | for (pc = 0; pc >= 0;) { 90 | cycles++ 91 | addr = mem[pc] % 1000 92 | code = int(mem[pc++] / 1000) 93 | if (code == op["get"]) { getline acc; if (!(acc ~ /[+-]?[0-9]+/)) { printf("Failed to read number.\n"); acc = 0}} 94 | else if (code == op["put"]) { print (acc + 0) } 95 | else if (code == op["st"]) { mem[addr] = acc } 96 | else if (code == op["ld"]) { acc = mem[addr] % 100000 } 97 | else if (code == op["add"]) { acc = (acc + mem[addr]) % 100000 } 98 | else if (code == op["sub"]) { acc = (100000 + acc - mem[addr]) % 100000 } 99 | else if (code == op["jpos"]) { if (acc > 0) { jaddr = pc; pc = addr } } 100 | else if (code == op["jz"]) { if (acc == 0) { jaddr = pc; pc = addr } } 101 | else if (code == op["j"]) { jaddr = pc; pc = addr } 102 | else if (code == op["halt"]) { halt(pc, mem) } 103 | # Additional instructions 104 | else if (code == op["mul"]) { acc = (acc * mem[addr]) % 100000 } 105 | else if (code == op["div"]) { acc = int(acc / mem[addr]) % 100000 } 106 | else if (code == op["lda"]) { acc = addr } 107 | else if (code == op["inc"]) { acc = (acc + 1) % 100000 } 108 | else if (code == op["dec"]) { acc = (acc + 99999) % 100000 } 109 | else if (code == op["sti"]) { mem[mem[addr]] = acc } 110 | else if (code == op["ldi"]) { acc = mem[mem[addr]] } 111 | else if (code == op["putc"]) { printf("%c", acc) } 112 | else if (code == op["getc"]) { getc() } 113 | else if (code == op["puts"]) { while(mem[acc]) { printf("%c", mem[acc++]) } } 114 | else if (code == op["putn"]) { while(mem[acc]) { printf("%c", mem[acc++]) } print "" } 115 | else if (code == op["lj"]) { acc = jaddr } 116 | else if (code == op["ret"]) { pc = jaddr } 117 | else { printf("INVALID OPCODE: %d\n", code); exit(1) } 118 | } 119 | } 120 | 121 | function getc() { 122 | if (!stdin) { 123 | if (firstread) { 124 | getline stdin 125 | } else { 126 | acc = 0 127 | } 128 | firstread = !firstread 129 | } else { 130 | acc = ord[substr(stdin, 1, 1)] 131 | stdin = substr(stdin, 2) 132 | } 133 | } 134 | 135 | function halt(pc, mem) { 136 | printf("\nStopping after %d instructions. Program counter at %d.\n", cycles, pc) 137 | if (!DEBUG) exit(0) 138 | print "Memory:\n-------------" 139 | for (i = 1; i <= nextmem; i++) { 140 | # = is not a typo, assignment to a variable returns the value 141 | if (b = mem[i]) { printf("%3d: %.5d%s\n", i, b, is_ascii(b) ? sprintf(" %c", b): "") } 142 | } 143 | printf("-------------\nMemory ends at address: %d\n", i) 144 | exit(0) 145 | } 146 | 147 | function is_ascii(x) { return (x >= 32 && x <= 177)} 148 | --------------------------------------------------------------------------------