├── bin
    └── .gitignore
├── .gitignore
├── examples
    ├── basic.asm
    ├── fibonacci_loop.asm
    ├── fibonacci_recursive.asm
    └── memoryadd.asm
├── LICENSE
├── src
    ├── yarn.h
    └── yarn.c
├── README.md
└── tools
    └── assemble.py


/bin/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### C ###
 2 | # Object files
 3 | *.o
 4 | *.ko
 5 | *.obj
 6 | *.elf
 7 | 
 8 | # Precompiled Headers
 9 | *.gch
10 | *.pch
11 | 
12 | # Libraries
13 | *.lib
14 | *.a
15 | *.la
16 | *.lo
17 | 
18 | # Shared objects (inc. Windows DLLs)
19 | *.dll
20 | *.so
21 | *.so.*
22 | *.dylib
23 | 
24 | # Executables
25 | *.exe
26 | *.out
27 | *.app
28 | *.i*86
29 | *.x86_64
30 | *.hex
31 | 
32 | # Debug files
33 | *.dSYM/
34 | 
35 | 
36 | ### Yarn ###
37 | # Debug memory dumps:
38 | *.mem
39 | 


--------------------------------------------------------------------------------
/examples/basic.asm:
--------------------------------------------------------------------------------
 1 | ; The code will start executing at the first instruction. Technically you do not
 2 | ;   need to label the "Init", but is a nice convention to think of it as a
 3 | ;   procedure.
 4 | Init:
 5 |   mov $3, %c1
 6 | 
 7 |   push %c1
 8 |   call :Multiply_by_five
 9 |   add $4, %stk ; remove c1 from stack
10 | 
11 |   halt ; Needed for graceful shutdown
12 | 
13 | 
14 | Multiply_by_five:
15 |   push %bse       ; Stack set up
16 |   mov %stk, %bse
17 | 
18 |   mov *(%bse+$8), %ret
19 | 
20 |   mul $5,%ret
21 | 
22 |   pop %bse   ; Stack tear down
23 |   ret
24 | 


--------------------------------------------------------------------------------
/examples/fibonacci_loop.asm:
--------------------------------------------------------------------------------
 1 | Init:
 2 |   mov $20, %c1
 3 | 
 4 |   push %c1
 5 |   call :Fibonacci
 6 |   pop %c1
 7 | 
 8 |   halt
 9 | 
10 | ; Fibonacci(int i)
11 | ;   Finds the i'th number of the fibonacci sequence with a loop.
12 | Fibonacci:
13 |   push %bse
14 |   mov %stk, %bse
15 | 
16 |   mov *(%bse+$8), %ret
17 |   mov $1, %s5
18 | 
19 |   lt %ret, %s5      ; if i < 1 return 0
20 |   jif :Fibonacci_EndZero
21 | 
22 |   eq %ret, %s5         ; if i == 1 return 1
23 |   jif :Fibonacci_End
24 | 
25 |   mov $1, %s1   ; f(n-1)
26 |   mov $0, %s2   ; f(n-2)
27 |   mov %ret, %s3 ; counter
28 | 
29 | Fibonacci_Loop:
30 |   mov %s1, %ret
31 |   add %s2, %ret
32 |   mov %s1, %s2
33 |   mov %ret, %s1
34 | 
35 |   sub $1, %s3
36 |   lt %s3, %s5
37 |   jif :Fibonacci_End  ; if counter < 1 break
38 |   jmp :Fibonacci_Loop ; else continue
39 | 
40 | Fibonacci_EndZero:    ; zero out %ret and return
41 |   mov $0, %ret
42 | 
43 | Fibonacci_End:
44 |   pop %bse
45 |   ret
46 | 


--------------------------------------------------------------------------------
/examples/fibonacci_recursive.asm:
--------------------------------------------------------------------------------
 1 | Init:
 2 |   mov $20, %c1
 3 | 
 4 |   push %c1
 5 |   call :Fibonacci
 6 |   pop %c1
 7 | 
 8 |   halt
 9 | 
10 | ; Fibonacci(int i)
11 | ;   Recursivly finds the i'th number (starts counting at 0) of the fibonacci
12 | ;   sequence.
13 | Fibonacci:
14 |   push %bse
15 |   mov %stk, %bse
16 |   push %c1
17 |   push %c2
18 | 
19 |   mov *(%bse+$8), %ret
20 |   mov $1, %s1
21 | 
22 |   lt %ret, %s1      ; if i < 1 return 0
23 |   jif :Fibonacci_EndZero
24 | 
25 |   eq %ret, %s1         ; if i == 1 return 1
26 |   jif :Fibonacci_End
27 | 
28 |   mov %ret, %c1 ; n-1
29 |   mov %ret, %c2 ; n-2
30 |   sub $1, %c1
31 |   sub $2, %c2
32 | 
33 |   ; call Fibonacci(n-1)
34 |   push %c1
35 |   call :Fibonacci
36 |   add $4, %stk
37 | 
38 |   mov %ret, %c1
39 | 
40 |   ; call Fibonacci(n-2)
41 |   push %c2
42 |   call :Fibonacci
43 |   add $4, %stk
44 | 
45 |   add %c1, %ret ; n-1 + n-2
46 |   jmp :Fibonacci_End
47 | 
48 | Fibonacci_EndZero:    ; zero out %ret and return
49 |   mov $0, %ret
50 | 
51 | Fibonacci_End:
52 |   pop %c2
53 |   pop %c1
54 |   pop %bse
55 |   ret
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 WetDesertRock
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/memoryadd.asm:
--------------------------------------------------------------------------------
 1 | Init:
 2 |   mov 0x0, %c1 ; Start address
 3 |   mov 0xAF, %c2 ; Size
 4 | 
 5 |   push %c2
 6 |   push %c1
 7 |   call :FillMemory
 8 |   call :SumMemory
 9 |   add $8, %stk
10 | 
11 |   halt ; Needed for graceful shutdown
12 | 
13 | 
14 | ; FillMemory(int *start, int size)
15 | ;   Fills the memory starting at *start with `size` deincrementing numbers
16 | FillMemory:
17 |   push %bse       ; Stack set up
18 |   mov %stk, %bse
19 | 
20 |   mov $0, %s5
21 |   mov *(%bse+$8), %s1  ; *start
22 |   mov *(%bse+$12), %s2   ; size
23 | 
24 |   lte %s2, %s5
25 |   jif :FillMemory_End
26 | FillMemory_Loop:
27 |   mov %s2, *(%s1) ; *start = counter
28 | 
29 |   add $4, %s1
30 |   sub $1, %s2
31 |   lte %s2, %s5
32 |   jif :FillMemory_End   ; if size <= 0 then break
33 |   jmp :FillMemory_Loop  ; else loop
34 | 
35 | FillMemory_End:
36 |   pop %bse   ; Stack tear down
37 |   ret
38 | ; End FillMemory
39 | 
40 | ; SumMemory(int *start, int size)
41 | SumMemory:
42 |   push %bse       ; Stack set up
43 |   mov %stk, %bse
44 | 
45 |   mov $0, %s5
46 |   mov *(%bse+$8), %s1  ; *start
47 |   mov *(%bse+$12), %s2   ; size
48 |   mov $0, %ret          ; sum
49 | 
50 |   lte %s2, %s5
51 |   jif :Sum_End
52 | 
53 | Sum_Loop:
54 |   mov *(%s1), %s3
55 |   add %s3, %ret
56 | 
57 |   add $4, %s1
58 |   sub $1, %s2
59 |   lte %s2, %s5
60 |   jif :Sum_End   ; if size <= 0 then break
61 |   jmp :Sum_Loop  ; else loop
62 | 
63 | Sum_End:
64 |   pop %bse   ; Stack tear down
65 |   ret
66 | ; End SumMemory
67 | 


--------------------------------------------------------------------------------
/src/yarn.h:
--------------------------------------------------------------------------------
  1 | // Usage:
  2 | //   These .c and .h files only deal with assembled code. To create code to be
  3 | //   read, you either need to assemble it by hand (with a hex editor and lots of
  4 | //   patience), or you can use the included assembler.
  5 | //   Most typical usage will just require setting up the state, loading in the
  6 | //   instructions, executing, and destroying after you are finished.
  7 | //   This will usually look like:
  8 | //     Y = yarn_init(256*sizeof(yarn_int));
  9 | //     yarn_loadCode(Y,buffer,bufsize);
 10 | //     yarn_execute(Y, -1);
 11 | //     yarn_destroy(Y);
 12 | //
 13 | //   Most of this is self explanitory, the only surprising thing may be the
 14 | //   `yarn_execute` call's second argument. This argument specifies how many
 15 | //   instructions it should execute. -1 will execute forever. This is useful if
 16 | //   you want to embed yarn elsewhere and don't want it to dominate your program.
 17 | //
 18 | // Extending:
 19 | //   Currently there is no API for extending it with "system calls", however this
 20 | //   is planned for the future.
 21 | 
 22 | #include <stdint.h>
 23 | #include <stdlib.h>
 24 | 
 25 | #ifndef YARN_H_
 26 | #define YARN_H_
 27 | 
 28 | #define YARN_VERSION "0.0.1"
 29 | 
 30 | typedef struct yarn_state yarn_state;
 31 | typedef int32_t yarn_int;
 32 | typedef uint32_t yarn_uint;
 33 | typedef void (*yarn_CFunc)(yarn_state *Y);
 34 | 
 35 | // Create the yarn state, returns NULL on failure
 36 | yarn_state *yarn_init(size_t memsize);
 37 | // Destroys the yarn state.
 38 | void yarn_destroy(yarn_state *Y);
 39 | // Loads the object code, returns 0 on success, -1 on failure.
 40 | int yarn_loadCode(yarn_state *Y, char *code, size_t codesize);
 41 | // Executes icount instructions (-1 for the whole program). Returns the status.
 42 | int yarn_execute(yarn_state *Y, int icount);
 43 | 
 44 | void *yarn_getMemoryPtr(yarn_state *Y);
 45 | size_t yarn_getMemorySize(yarn_state *Y);
 46 | size_t yarn_getInstructionCount(yarn_state *Y);
 47 | 
 48 | const char *yarn_registerToString(unsigned char reg);
 49 | const char *yarn_statusToString(int status);
 50 | 
 51 | void yarn_getRegister(yarn_state *Y, unsigned char reg, void *val) ;
 52 | void yarn_setRegister(yarn_state *Y, unsigned char reg, void *val);
 53 | void yarn_incRegister(yarn_state *Y, unsigned char reg, yarn_int val);
 54 | 
 55 | void yarn_getMemory(yarn_state *Y, yarn_uint pos, void *val, size_t bsize);
 56 | void yarn_setMemory(yarn_state *Y, yarn_uint pos, void *val, size_t bsize);
 57 | 
 58 | void yarn_push(yarn_state *Y, yarn_int val);
 59 | yarn_int yarn_pop(yarn_state *Y);
 60 | 
 61 | int yarn_getStatus(yarn_state *Y);
 62 | void yarn_setStatus(yarn_state *Y, unsigned char val);
 63 | 
 64 | int yarn_getFlag(yarn_state *Y, int flag);
 65 | void yarn_setFlag(yarn_state *Y, int flag);
 66 | void yarn_clearFlag(yarn_state *Y, int flag);
 67 | 
 68 | void yarn_registerSysCall(yarn_state *Y, yarn_uint key, yarn_CFunc fun);
 69 | yarn_CFunc yarn_getSysCall(yarn_state *Y, yarn_uint key);
 70 | 
 71 | enum {
 72 |   YARN_STATUS_OK,
 73 |   YARN_STATUS_PAUSE,
 74 |   YARN_STATUS_HALT,
 75 |   YARN_STATUS_INVALIDMEMORY,
 76 |   YARN_STATUS_INVALIDINSTRUCTION,
 77 |   YARN_STATUS_DIVBYZERO,
 78 |   YARN_STATUS_NUM,
 79 | };
 80 | enum {
 81 |   YARN_FLAG_CONDITIONAL, // 0x0 // Stores result of last COND statement.
 82 |   YARN_FLAG_NUM,
 83 | };
 84 | 
 85 | enum {
 86 |   YARN_REG_INSTRUCTION,   //0x0 // Special Registers:
 87 |   YARN_REG_STACK,         //0x1
 88 |   YARN_REG_BASE,          //0x2
 89 |   YARN_REG_RETURN,        //0x3
 90 |   YARN_REG_C1,            //0x4 // Callee Save Registers:
 91 |   YARN_REG_C2,            //0x5
 92 |   YARN_REG_C3,            //0x6
 93 |   YARN_REG_C4,            //0x7
 94 |   YARN_REG_C5,            //0x8
 95 |   YARN_REG_C6,            //0x9
 96 |   YARN_REG_S1,            //0xA // Caller (sub or scratch) Save Registers:
 97 |   YARN_REG_S2,            //0xB
 98 |   YARN_REG_S3,            //0xC
 99 |   YARN_REG_S4,            //0xD
100 |   YARN_REG_S5,            //0xE
101 |   YARN_REG_NULL,          //0xF // Used to indicate 'no register' for arith and move operations.
102 |   YARN_REG_NUM,
103 | };
104 | 
105 | enum {
106 |   YARN_ICODE_CONTROL = 0x00,       //0x00
107 |   YARN_ICODE_ARITH = 0x10,         //0x10
108 |   YARN_ICODE_MOVE = 0x20,          //0x20
109 |   YARN_ICODE_STACK = 0x30,         //0x30
110 |   YARN_ICODE_BRANCH = 0x40,        //0x40
111 |   YARN_ICODE_CONDITIONAL = 0x50,   //0x50
112 |   YARN_ICODE_NUM = 0x60,           //0x60
113 | };
114 | 
115 | enum {
116 |   YARN_INST_HALT = YARN_ICODE_CONTROL,  //0x00
117 |   YARN_INST_PAUSE,                      //0x01
118 |   YARN_INST_NOP,                        //0x02
119 | 
120 |   /* ARITH: */
121 |   YARN_INST_ADD = YARN_ICODE_ARITH,   //0x10
122 |   YARN_INST_SUB,                      //0x11
123 |   YARN_INST_MUL,                      //0x12
124 |   YARN_INST_DIV,                      //0x13
125 |   YARN_INST_DIVS,                     //0x14 signed
126 |   YARN_INST_LSH,                      //0x15
127 |   YARN_INST_RSH,                      //0x16
128 |   YARN_INST_RSHS,                     //0x17 signed
129 |   YARN_INST_AND,                      //0x18
130 |   YARN_INST_OR,                       //0x19
131 |   YARN_INST_XOR,                      //0x1A
132 |   YARN_INST_NOT,                      //0x1B
133 | 
134 |   /* MOVE */
135 |   YARN_INST_IR = YARN_ICODE_MOVE,    //0x20
136 |   YARN_INST_MR,                      //0x21
137 |   YARN_INST_RR,                      //0x22
138 |   YARN_INST_RM,                      //0x23
139 | 
140 |   /* STACK */
141 |   YARN_INST_PUSH = YARN_ICODE_STACK,  //0x30
142 |   YARN_INST_POP,                      //0x31
143 | 
144 |   /* BRANCH */
145 |   YARN_INST_CALL = YARN_ICODE_BRANCH,  //0x40
146 |   YARN_INST_RET,                       //0x41
147 |   YARN_INST_JUMP,                      //0x42
148 |   YARN_INST_CONDJUMP,                  //0x43
149 |   YARN_INST_SYSCALL,                   //0x44
150 | 
151 |   /* CONDITIONAL */
152 |   YARN_INST_LT = YARN_ICODE_CONDITIONAL,    //0x50
153 |   YARN_INST_LTS,                            //0x51 signed
154 |   YARN_INST_LTE,                            //0x52
155 |   YARN_INST_LTES,                           //0x53 signed
156 |   YARN_INST_EQ,                             //0x54
157 |   YARN_INST_NEQ,                            //0x55
158 | 
159 |   YARN_INST_NUM,
160 | };
161 | 
162 | #endif
163 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Yarn
  2 | A small embeddable VM with a custom instruction set and statically allocated
  3 | heap.
  4 | 
  5 | ## Overview
  6 |   * Simple instruction set.
  7 |   * Sequentially (non-pipelined) execution.
  8 |   * Single space memory, stack and heap occupy the same memory space.
  9 |   * 16 registers total, 11 multi-purpose.
 10 |   * ~25 instructions total.
 11 |   * Name inspired by cats.
 12 | 
 13 | ## Usage:
 14 | Compile yarn:
 15 | ```
 16 | ./build.sh
 17 | ```
 18 | 
 19 | Assemble your code:
 20 | ```
 21 | ./tools/assemble.py code.asm code.o
 22 | ```
 23 | 
 24 | Run it:
 25 | ```
 26 | ./bin/yarn code.o
 27 | ```
 28 | 
 29 | If you want to enable some debug features to help you debug your code, compile
 30 | yarn with -DYARN_DEBUG. You can pass this argument directly to ./build.sh:
 31 | ```
 32 | ./build.sh -DYARN_DEBUG
 33 | ```
 34 | 
 35 | ## Embedding and Extending
 36 | Embedding is designed to be simple. Here is a simple example of embedding it:
 37 | ```c
 38 | Y = yarn_init(256*sizeof(yarn_int));
 39 | yarn_loadCode(Y,buffer,bufsize);
 40 | yarn_execute(Y, -1);
 41 | yarn_destroy(Y);
 42 | ```
 43 | 
 44 | `yarn_init` takes one argument, which is the number of bytes to allocate.
 45 | Typically you will want to multiply this by the size of the basic int type that
 46 | yarn uses. `yarn_loadCode` will copy the object code into memory so it can be
 47 | executed. `yarn_execute` executes the specified number of instructions, or the
 48 | whole program if -1 is specified.
 49 | 
 50 | System calls are the main way of extending Yarn. After creating the yarn_state,
 51 | you can register system calls to be used with it with the `yarn_registerSysCall`
 52 | function. Here is an example of a system call.
 53 | ```c
 54 | static void vyarn_getheight(yarn_state *Y) {
 55 |   yarn_setRegister(Y, YARN_REG_RETURN, &screenHeight);
 56 | }
 57 | 
 58 | 
 59 | Y = yarn_init(256*sizeof(yarn_int));
 60 | yarn_registerSysCall(Y, 0xA0, vyarn_getheight);
 61 | ```
 62 | Note you explicitly set the ID for the system call. If there is a preexisting
 63 | system call with the same ID, it will overwrite that system call and replace it.
 64 | 
 65 | ## Memory Layout
 66 | All of the program memory is in one chunk. While the amount of possible memory
 67 | is set by the environment, if you had 0x400 bytes of memory allocated It could
 68 | be visualized like this:
 69 | 
 70 | ```
 71 | Offset  |              Use              |
 72 | 0x0     |         Program memory        |
 73 | 0x4     |              ...              |
 74 | ...           
 75 | 0x3B8   |         Base of stack         |
 76 | 0x3BC   |      Register 0xF (%null)     |
 77 | 0x3C0   |      Register 0xE (%s5)       |
 78 | ...     |              ...              |
 79 | 0x3F4   |      Register 0x1 (%bse)      |
 80 | 0x3F8   |      Register 0x0 (%stk)      |
 81 | 0x3FC   |    Program status and flags   |
 82 | 0x400   |         Not allocated         |
 83 | ```
 84 | 
 85 | ## Registers
 86 | Here is a list of registers
 87 |   * *%ins*       -  Instruction pointer
 88 |   * *%stk*       -  Stack pointer
 89 |   * *%bse*       -  Base pointer
 90 |   * *%ret*       -  Return register
 91 |   * *%c1* - *%c6*  -  Callee save registers
 92 |   * *%s1* - *%s5*  -  Caller save registers (also known as scratch registers)
 93 |   * *%null*      -  Null register, used to indicate no register used
 94 | 
 95 | 
 96 | ## Instructions
 97 | Each instruction type has a specific format it uses for encoding. Given here is
 98 | the encoding format with the byte offset given. The last byte of the next
 99 | instruction is given as context. If two values are 4 bits each a, they are
100 | separated by a `:` and are one byte combined.
101 | 
102 | ### Control instructions
103 | ```
104 | | 0x0         | 0x1
105 | | icode:ifun  |
106 | ```
107 | 
108 | These one byte instructions control the program as a whole.
109 |   * *halt*  ( stops the program execution )
110 |   * *pause* ( pauses the execution, however depending on the environment it may
111 |     not get restarted)
112 |   * *nop*   ( does nothing for one instruction )
113 | 
114 | ### Arithmetic instructions
115 | ```
116 | | 0x0         | 0x1    |  0x2        | 0x6
117 | | icode:ifun  | rA:rB  | d           |
118 | ```
119 | 
120 | If rA is null then d will be used instead of the value in rA.
121 | 
122 | Here are the available instructions:
123 |   * *add*
124 |   * *sub*
125 |   * *mul*
126 |   * *div*
127 |   * *divs* (signed)
128 |   * *lsh* (left shift)
129 |   * *rsh* (right shift)
130 |   * *rshs* (right shift, signed)
131 |   * *and*
132 |   * *or*
133 |   * *xor*
134 |   * *not*
135 | 
136 | ### Move instructions
137 | ```
138 | | 0x0         | 0x1    | 0x2         | 0x6
139 | | icode:ifun  | rA:rB  | d           |
140 | ```
141 | 
142 | If rA is null then d will be used instead of the value in rA.
143 | Although the assembler can figure out which move type is used and only have one
144 | move instruction, the VM cannot implicitly figure it out.
145 |   * *irmov* ( immediate to register )
146 |   * *mrmov* ( memory to register )
147 |   * *rrmov* ( register to register )
148 |   * *rmmov* ( register to memory )
149 | 
150 | ### Stack instructions
151 | ```
152 | | 0x0         | 0x1    | 0x2
153 | | icode:ifun  | rA     |
154 | ```
155 | 
156 | These instructions help manipulate the stack pointer.
157 |   * *push*
158 |   * *pop*
159 | 
160 | ### Branch instructions
161 | ```
162 | | 0x0         | 0x1          | 0x5
163 | | icode:ifun  | d            |
164 | ```
165 | 
166 | These instructions will jump to a new instruction location.
167 |   * *call*
168 |   * *ret*
169 |   * *jmp*
170 |   * *jif* (jump if, conditional jump will only jump if conditional flag is true)
171 |   * *syscall* (acts the same as call, d will specify which call to make)
172 | 
173 | 
174 | ### Conditional instructions
175 | ```
176 | | 0x0         | 0x1    | 0x2
177 | | icode:ifun  | rA:rB  |
178 | ```
179 | 
180 | These instructions will compare the two registers and set the conditional flag
181 | based on the result. Signed comparisons will interpret the numbers as 2's
182 | complement signed integers.
183 |   * *lt*  ( < )
184 |   * *lts*  ( < signed comparison)
185 |   * *lte* ( <= )
186 |   * *ltes* ( <= signed comparison)
187 |   * *eq*  ( == )
188 |   * *neq* ( != )
189 | 
190 | ## Assembler Syntax Primer
191 | The assembler as it stands is a very crude tool that gets the job done. This
192 | syntax outlined here is subject to change when I get around to improving the
193 | assembler. This is a primer for the syntax the assembler uses. You can use `;`
194 | for a comment.
195 | 
196 | The basic format is `instruction arg1,arg2` where instruction is a mnemonic
197 | given above, and arg1,arg2 are expressions that usually comprise of registers.
198 | 
199 | There are several basic types of symbols you can use:
200 | 
201 | **Registers** are prefixed with % and one of 16 types (but you typically only use
202 | 15), and generally have specific purposes associated with them, however this is
203 | just by convention. Example: `%ret`
204 | 
205 | **Literals** are represented in base 10 or base 16 and are prefixed with `$` or
206 | `0x` respectively to indicate as such. If trying to indicate a negative number,
207 | the negative sign goes in front of the `0x` and after the `$`. Examples: `$255`
208 | `0xFF` `$-255` `-0xFF`
209 | 
210 | **Locations** are represented by a symbol in this format: `:Name` and get replaced
211 | with the location specified elsewhere in the assembly. They get specified in the
212 | code by `Name:` and get set to whatever address the next instruction is at.
213 | Example:
214 | ```x86
215 | :Callee
216 |   ret
217 | 
218 | :Caller
219 |   call :Callee
220 | ```
221 | 
222 | **Memory addresses** are given by this format: `*(%reg+$offset)` where `%reg` is
223 | the specified register, and `$offset` is a literal specifying the offset in
224 | memory. Either the register or offset can be omitted. Examples: `*(%bse)`
225 | `*(%bse+$8)`.
226 | 
227 | 
228 | ## System Calls
229 | System calls are used just the same as "call" instructions, except instead of
230 | calling a memory address it will give an ID to specify which function to call.
231 | An example of getting the available memory and storing it in memory address 0x0
232 | is:
233 | ```x86
234 | syscall 0x02  ; stores available memory in %ret
235 | mov %ret, *(0x0) ; moves memory from %ret into 0x0
236 | ```
237 | 
238 | |  ID   | C equivalent declaration |                Description               |
239 | | ----: | ------------------------ | ---------------------------------------- |
240 | |  0x00 | uint time()              | Returns the current time of the computer |
241 | |  0x01 | uint cycles()            | Returns the current amount of cycles executed by the VM |
242 | |  0x02 | uint availablememory()   | Returns the number of bytes available to the VM |
243 | 
244 | ## Roadmap
245 |   * Create a proper assembler (with proper errors)  
246 |   * Allow for some sort of linking/combining asm files
247 |   * Debugging tools
248 | 


--------------------------------------------------------------------------------
/tools/assemble.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: ascii -*-
  3 | 
  4 | import sys
  5 | import os
  6 | import struct
  7 | import re
  8 | 
  9 | """
 10 | We want to have syntax like this:
 11 | 
 12 | Number:
 13 |     $1512
 14 |     or
 15 |     0x1242
 16 | 
 17 | Register:
 18 |     %ret
 19 | 
 20 | Memory at...
 21 |     Register:
 22 |         *(ret)
 23 |     Offset:
 24 |         *(0x1252)
 25 |     Register+Offset:
 26 |         *(ret+0x1252)
 27 | 
 28 | Addition of:
 29 |     Register and Register:
 30 |         %c1, %c2
 31 |     Register+Literal and Register:
 32 |         %c1+0x1252, %c2
 33 |     Literal and Register:
 34 |         0x1252, %c2
 35 | 
 36 | """
 37 | 
 38 | registers = {
 39 |     "ins": 0x0,
 40 |     "stk": 0x1,
 41 |     "bse": 0x2,
 42 |     "ret": 0x3,
 43 |     "c1": 0x4,
 44 |     "c2": 0x5,
 45 |     "c3": 0x6,
 46 |     "c4": 0x7,
 47 |     "c5": 0x8,
 48 |     "c6": 0x9,
 49 |     "s1": 0xA,
 50 |     "s2": 0xB,
 51 |     "s3": 0xC,
 52 |     "s4": 0xD,
 53 |     "s5": 0xE,
 54 |     "null": 0xF, # TODO: Throw error on use?
 55 | }
 56 | 
 57 | class AssembleError(Exception):
 58 |     def __init__(self, linenum, message):
 59 |         self.linenum = linenum
 60 |         self.message = message
 61 | 
 62 |     def __str__(self):
 63 |         return "Error(line %d): %s"%(self.linenum,self.message)
 64 | 
 65 | icodes = {
 66 |     "control": 0x0,
 67 |     "arith": 0x1,
 68 |     "move": 0x2,
 69 |     "stack": 0x3,
 70 |     "branch": 0x4,
 71 |     "conditional": 0x5
 72 | }
 73 | 
 74 | # Order in these sub lists specify the ifun code.
 75 | ifuns = {
 76 |     "control": [
 77 |         "halt",
 78 |         "pause",
 79 |         "nop"
 80 |     ],
 81 |     "arith": [
 82 |         "add",
 83 |         "sub",
 84 |         "mul",
 85 |         "div",
 86 |         "divs",
 87 |         "lsh",
 88 |         "rsh",
 89 |         "rshs",
 90 |         "and",
 91 |         "or",
 92 |         "xor",
 93 |         "not",
 94 |     ],
 95 |     "move": [
 96 |         "irmov",
 97 |         "mrmov",
 98 |         "rrmov",
 99 |         "rmmov"
100 |     ],
101 |     "stack": [
102 |         "push",
103 |         "pop"
104 |     ],
105 |     "branch": [
106 |         "call",
107 |         "ret",
108 |         "jmp",
109 |         "jif",
110 |         "syscall"
111 |     ],
112 |     "conditional": [
113 |         "lt",
114 |         "lts",
115 |         "lte",
116 |         "ltes",
117 |         "eq",
118 |         "neq"
119 |     ]
120 | }
121 | 
122 | instruction_types = {}
123 | for ins_type in ifuns.keys():
124 |     for ins in ifuns[ins_type]:
125 |         instruction_types[ins] = ins_type
126 | 
127 | instruction_types["mov"] = "move"
128 | 
129 | #Note for negative d's, it has to look like: $-100 or -0x231A. The + always has to be there.
130 | memcase = re.compile(r"^\*\((.+?)\)") #Matches memory expressions
131 | 
132 | regre = r"%(?P<reg>\w+)"
133 | litre = r"(?P<dtype>-?0x|\$-?)(?P<d>[0-9a-fA-F]+)"
134 | locre = r":(?P<loc>\w+)"
135 | 
136 | symcase1 = re.compile("^"+regre+"$") # Matches %ret and similar
137 | symcase2 = re.compile("^"+regre+r"\+"+litre+"$") # Matches %ret+$123A2 and similar
138 | symcase3 = re.compile("^"+litre+"$") # Matches $123A2 and similar
139 | symcase4 = re.compile("^"+locre+"$") # Matches :Init and similar
140 | 
141 | # Parse a symbol (argument after an instruction) and return a dictonary of its
142 | # its parsed results.
143 | def parseSymbol(sym):
144 |     info = {
145 |         "reg": registers["null"],
146 |         "d": 0,
147 |         "dstruct": "I",
148 |         "loc": "",
149 |         "type": ""
150 |     }
151 |     sym = sym.strip()
152 |     reg = "null"
153 |     dtype = ""
154 |     d = ""
155 |     loc = ""
156 |     m = memcase.match(sym)
157 |     if m:
158 |         sym = m.groups()[0]
159 |         info["type"] = "mem"
160 | 
161 |     m = symcase1.match(sym)
162 |     if m: # If it looks like %ret and similar
163 |         reg = m.group("reg")
164 | 
165 |     m = symcase2.match(sym)
166 |     if m: # If it looks like %ret+$123A2 and similar
167 |         reg = m.group("reg")
168 |         dtype = m.group("dtype")
169 |         d = m.group("d")
170 | 
171 |     m = symcase3.match(sym)
172 |     if m: # If it looks like $123A2 and similar
173 |         dtype = m.group("dtype")
174 |         d = m.group("d")
175 | 
176 |     m = symcase4.match(sym)
177 |     if m: # If it looks like :Init and similar
178 |         info["loc"] = m.group("loc")
179 | 
180 |     info["reg"] = registers[reg]
181 |     negative = False
182 |     if d != "":
183 |         if "-" in dtype:
184 |             negative = True
185 |             info["dstruct"] = "i"
186 |             dtype = dtype.replace("-","")
187 | 
188 |         if dtype == "$":
189 |             info["d"] = int(d)
190 |         else:
191 |             info["d"] = int(d, 16)
192 | 
193 |     return info
194 | 
195 | def expectRegister(sym):
196 |     r = 0
197 |     if sym[0] == "%":
198 |         r = registers[sym[1:]]
199 |     elif sym[0] == "$":
200 |         r = int(syms[1])
201 |     elif sym.strip() == "":
202 |         r = 0 # null register
203 |     else:
204 |         print("Unknown symbol: %s"%sym)
205 |         sys.exit()
206 | 
207 |     return r
208 | 
209 | if __name__ == "__main__":
210 |     outpath = ""
211 |     if len(sys.argv) < 2:
212 |         print("Must provide an input path.")
213 |         sys.exit()
214 | 
215 |     if len(sys.argv) >= 3:
216 |         outpath = sys.argv[2]
217 |     else:
218 |         outpath = os.path.splitext(sys.argv[1])[0]+".o"
219 | 
220 |     objectcode = bytes()
221 |     locations = {}
222 |     lookbacks = {}
223 |     with open(sys.argv[1],'r') as f:
224 |         nextlocation = None
225 |         for linenum, line in enumerate(f):
226 |             #Basic commenting
227 |             if line.find(";") != -1:
228 |                 line = line[:line.find(";")]
229 | 
230 |             line = line.strip()
231 |             if line == "":
232 |                 continue
233 | 
234 |             syms = line.split(" ",1)
235 |             args = []
236 |             if len(syms) > 1:
237 |                 args = syms[1].split(",")
238 | 
239 |             if syms[0][-1] == ":":
240 |                 nextlocation = syms[0][:-1]
241 |                 continue
242 |             else:
243 |                 if nextlocation != None:
244 |                     locations[nextlocation] = len(objectcode)
245 |                     nextlocation = None
246 | 
247 |                 ins = syms[0]
248 |                 ins_type = instruction_types[ins]
249 |                 ins_id = 0
250 |                 ins_id = icodes[ins_type] << 4
251 |                 if ins_type != "move":
252 |                     ins_id |= ifuns[ins_type].index(ins)
253 | 
254 |                 if ins_type == "control":
255 |                     objectcode += struct.pack("=B", ins_id)
256 |                 elif ins_type == "stack":
257 |                     rA = parseSymbol(args[0])['reg']
258 |                     objectcode += struct.pack("=BB", ins_id, rA<<4)
259 |                 elif ins_type == "move":  #TODO: Better arg parsing so we can do rm and mr moves
260 |                     infoA = parseSymbol(args[0])
261 |                     infoB = parseSymbol(args[1])
262 |                     d = 0
263 |                     dstruct = "I"
264 |                     rA = infoA["reg"]
265 |                     rB = infoB["reg"]
266 |                     movtype = "rrmov"
267 |                     if infoA["type"] == "mem" and infoB["type"] != "mem":
268 |                         movtype = "mrmov"
269 |                         d = infoA["d"]
270 |                         dstruct = infoA["dstruct"]
271 |                     elif infoA["type"] != "mem" and infoB["type"] == "mem":
272 |                         movtype = "rmmov"
273 |                         d = infoB["d"]
274 |                         dstruct = infoB["dstruct"]
275 |                     elif rA == registers["null"] and infoB["type"] != "mem":
276 |                         movtype = "irmov"
277 |                         d = infoA["d"]
278 |                         dstruct = infoA["dstruct"]
279 |                     elif rA != registers["null"] and rB != registers["null"] and infoB["type"] != "mem":
280 |                         movtype = "rrmov"
281 |                         d = infoA["d"]
282 |                         dstruct = infoA["dstruct"]
283 |                     else:
284 |                         raise AssembleError(linenum,"Invalid mov statement.")
285 | 
286 |                     # print("Move: %s %X %X %d"%(movtype,rA,rB,d))
287 | 
288 |                     ins_id |= ifuns[ins_type].index(movtype)
289 |                     objectcode += struct.pack("=BB%s"%dstruct, ins_id, rA<<4|rB,d)
290 | 
291 |                 elif ins_type == "arith":
292 |                     infoA = parseSymbol(args[0])
293 |                     infoB = parseSymbol(args[1])
294 |                     d = infoA['d']
295 |                     dstruct = infoA['dstruct']
296 |                     rA = infoA['reg']
297 |                     rB = infoB['reg']
298 |                     objectcode += struct.pack("=BB%s"%dstruct, ins_id, rA<<4|rB,d)
299 | 
300 |                 elif ins_type == "branch":
301 |                     d = 0
302 |                     dstruct = "I"
303 |                     if len(args) > 0:
304 |                         info = parseSymbol(args[0])
305 |                         d = info["d"]
306 |                         dstruct = info["dstruct"]
307 |                         if info["loc"]:
308 |                             if info["loc"] in locations:
309 |                                 d = locations[info["loc"]]
310 |                             else:
311 |                                 # Add this location to our lookbacks
312 |                                 lookbacks[len(objectcode)] = info["loc"]
313 | 
314 |                     objectcode += struct.pack("=B%s"%dstruct, ins_id, d)
315 | 
316 |                 elif ins_type == "conditional":
317 |                     rA = parseSymbol(args[0])['reg']
318 |                     rB = parseSymbol(args[1])['reg']
319 |                     objectcode += struct.pack("=BB", ins_id, (rA<<4)|rB)
320 | 
321 |     for lb in lookbacks:
322 |         #Extract our object code on either side of our lookback.
323 |         o1 = objectcode[:lb+1]
324 |         o2 = objectcode[lb+5:]
325 |         loc = lookbacks[lb]
326 |         if not loc in locations:
327 |             raise(AssembleError(linenum,"Invalid location: %s"%lookbacks[lb]))
328 | 
329 |         objectcode = o1 + struct.pack("=I", locations[loc]) + o2
330 | 
331 |     with open(outpath,'wb') as f:
332 |         f.write(objectcode)
333 | 


--------------------------------------------------------------------------------
/src/yarn.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <time.h>
  5 | 
  6 | #include "yarn.h"
  7 | 
  8 | #ifndef YARN_MAP_COUNT
  9 | #define YARN_MAP_COUNT 256 // Has to be a power-of-two
 10 | #endif
 11 | #define YARN_MAP_MASK  (YARN_MAP_COUNT - 1)
 12 | 
 13 | struct yarn_state {
 14 |   char *code;               // The code that we will execute
 15 |   size_t codesize;          // The code size
 16 |   void *memory;             // Memory for the program. Contains registers, flags, everything
 17 |   size_t memsize;           // The total size of memory
 18 |   size_t instructioncount;  // Total count of instuctions used
 19 |   // Sys call hash map data structure:
 20 |   struct { unsigned key; yarn_CFunc val; } syscalls[YARN_MAP_COUNT];
 21 | };
 22 | 
 23 | // Syscalls:
 24 | static void yarn_sys_gettime(yarn_state *Y) {
 25 |   yarn_int t = time(NULL);
 26 |   yarn_setRegister(Y, YARN_REG_RETURN, &t);
 27 | }
 28 | static void yarn_sys_getinstructioncount(yarn_state *Y) {
 29 |   yarn_setRegister(Y, YARN_REG_RETURN, &Y->instructioncount);
 30 | }
 31 | static void yarn_sys_getvmmemory(yarn_state *Y) {
 32 |   yarn_setRegister(Y, YARN_REG_RETURN, &Y->memsize);
 33 | }
 34 | 
 35 | // yarn_functions
 36 | yarn_state *yarn_init(size_t memsize) {
 37 |   yarn_uint stackaddr;
 38 |   yarn_uint instaddr;
 39 |   yarn_state *Y = malloc(sizeof(yarn_state));
 40 |   if (Y == NULL) {
 41 |     return NULL;
 42 |   }
 43 |   Y->code = NULL;
 44 |   Y->memsize = memsize;
 45 |   Y->memory = calloc(memsize,1);
 46 |   if (Y->memory == NULL) {
 47 |     free(Y);
 48 |     return NULL;
 49 |   }
 50 | 
 51 |   //Init our instruction pointer to 0
 52 |   instaddr = 0;
 53 |   yarn_setRegister(Y, YARN_REG_INSTRUCTION, &instaddr);
 54 | 
 55 |   // Init our stack&base pointer to the top of the memory, stack will grow down.
 56 |   //   Leave space for the registers, flags, and other stuff at the top.
 57 |   stackaddr =  memsize-(YARN_REG_NUM+2)*sizeof(yarn_uint);
 58 |   yarn_setRegister(Y, YARN_REG_STACK, &stackaddr);
 59 |   yarn_setRegister(Y, YARN_REG_BASE, &stackaddr);
 60 | 
 61 |   struct { yarn_uint id; yarn_CFunc fn; } syscalls[] = {
 62 |     { 0x00,  yarn_sys_getvmmemory               },
 63 |     { 0x01,  yarn_sys_getinstructioncount       },
 64 |     { 0x02,  yarn_sys_gettime                   },
 65 |     { 0x00,  NULL                               }
 66 |   };
 67 |   for (int i = 0; syscalls[i].fn; i++) {
 68 |     yarn_registerSysCall(Y, syscalls[i].id, syscalls[i].fn);
 69 |   }
 70 | 
 71 |   return Y;
 72 | }
 73 | 
 74 | void yarn_destroy(yarn_state *Y) {
 75 |   free(Y->code);
 76 |   free(Y->memory);
 77 |   free(Y);
 78 | }
 79 | 
 80 | // Will copy given code to an internal buffer.
 81 | int yarn_loadCode(yarn_state *Y, char *code, size_t codesize) {
 82 |   free(Y->code);
 83 | 
 84 |   Y->code = malloc(codesize);
 85 |   if (Y->code == NULL) {
 86 |     return -1;
 87 |   }
 88 |   memcpy(Y->code, code, codesize);
 89 |   Y->codesize = codesize;
 90 |   return 0;
 91 | }
 92 | 
 93 | // Returns the pointer to its memory.
 94 | void *yarn_getMemoryPtr(yarn_state *Y) {
 95 |   return Y->memory;
 96 | }
 97 | size_t yarn_getMemorySize(yarn_state *Y) {
 98 |   return Y->memsize;
 99 | }
100 | size_t yarn_getInstructionCount(yarn_state *Y) {
101 |   return Y->instructioncount;
102 | }
103 | 
104 | const char *yarn_registerToString(unsigned char reg) {
105 |   const char *result;
106 |   switch(reg) {
107 |     case YARN_REG_INSTRUCTION: result = "%ins"; break;
108 |     case YARN_REG_STACK: result = "%stk"; break;
109 |     case YARN_REG_BASE: result = "%bse"; break;
110 |     case YARN_REG_RETURN: result = "%ret"; break;
111 |     case YARN_REG_C1: result = "%C1"; break;
112 |     case YARN_REG_C2: result = "%C2"; break;
113 |     case YARN_REG_C3: result = "%C3"; break;
114 |     case YARN_REG_C4: result = "%C4"; break;
115 |     case YARN_REG_C5: result = "%C5"; break;
116 |     case YARN_REG_C6: result = "%C6"; break;
117 |     case YARN_REG_S1: result = "%S1"; break;
118 |     case YARN_REG_S2: result = "%S2"; break;
119 |     case YARN_REG_S3: result = "%S3"; break;
120 |     case YARN_REG_S4: result = "%S4"; break;
121 |     case YARN_REG_S5: result = "%S5"; break;
122 |     case YARN_REG_NULL: result = "%null"; break;
123 |     default:
124 |       result = "invalid";
125 |   }
126 |   return result;
127 | }
128 | 
129 | const char *yarn_statusToString(int status) {
130 |   const char *result;
131 |   switch(status) {
132 |     case YARN_STATUS_OK: result = "ok"; break;
133 |     case YARN_STATUS_HALT: result = "halt"; break;
134 |     case YARN_STATUS_PAUSE: result = "paused"; break;
135 |     case YARN_STATUS_INVALIDMEMORY: result = "invalid memory access error"; break;
136 |     case YARN_STATUS_INVALIDINSTRUCTION: result = "invalid instruction error"; break;
137 |     case YARN_STATUS_DIVBYZERO: result = "divide by zero  error"; break;
138 |     default:
139 |       result = "invalid";
140 |   }
141 |   return result;
142 | }
143 | 
144 | // Shortcuts for register manipulation
145 | #define registerLocation(r) Y->memsize-(yarn_uint)(r+2)*sizeof(yarn_uint)
146 | void yarn_getRegister(yarn_state *Y, unsigned char reg, void *val) {
147 |   yarn_getMemory(Y, registerLocation(reg), val, sizeof(yarn_uint));
148 | }
149 | void yarn_setRegister(yarn_state *Y, unsigned char reg, void *val) {
150 |   yarn_setMemory(Y, registerLocation(reg), val, sizeof(yarn_uint));
151 | }
152 | void yarn_incRegister(yarn_state *Y, unsigned char reg, yarn_int val) {
153 |   yarn_int rval;
154 |   yarn_getRegister(Y, reg, &rval);
155 |   rval += val;
156 |   yarn_setRegister(Y, reg, &rval);
157 | }
158 | #undef registerLocation
159 | 
160 | // Memory manipulations.
161 | void yarn_getMemory(yarn_state *Y, yarn_uint pos, void *val, size_t bsize) {
162 |   if ((pos+bsize) > Y->memsize) { // Check for out-of-bounds
163 |     yarn_setStatus(Y, YARN_STATUS_INVALIDMEMORY);
164 |     return;
165 |   }
166 |   memcpy(val, ((char*)Y->memory)+pos, bsize);
167 | }
168 | void yarn_setMemory(yarn_state *Y, yarn_uint pos, void *val, size_t bsize) {
169 |   if ((pos+bsize) > Y->memsize) { // Check for out-of-bounds
170 |     yarn_setStatus(Y, YARN_STATUS_INVALIDMEMORY);
171 |     return;
172 |   }
173 |   memcpy(((char*)Y->memory)+pos, val,  bsize);
174 | }
175 | 
176 | // Pushs the stack
177 | void yarn_push(yarn_state *Y, yarn_int val) {
178 |   yarn_uint stk;
179 |   yarn_incRegister(Y, YARN_REG_STACK, -(int)sizeof(yarn_int));
180 |   yarn_getRegister(Y, YARN_REG_STACK, &stk);
181 |   yarn_setMemory(Y, stk, &val, sizeof(val));
182 | }
183 | // Pops the stack
184 | yarn_int yarn_pop(yarn_state *Y) {
185 |   yarn_uint stk;
186 |   yarn_int val;
187 |   yarn_getRegister(Y, YARN_REG_STACK, &stk);
188 |   yarn_getMemory(Y, stk, &val, sizeof(val));
189 |   yarn_incRegister(Y, YARN_REG_STACK, (int)sizeof(yarn_int));
190 |   return val;
191 | }
192 | 
193 | // Gets the status of the execution. Status codes are given by YARN_STATUS_
194 | int yarn_getStatus(yarn_state *Y) {
195 |   unsigned char val;
196 |   yarn_getMemory(Y, Y->memsize-sizeof(yarn_int), &val, sizeof(val));
197 |   return (int)val;
198 | }
199 | void yarn_setStatus(yarn_state *Y, unsigned char val) {
200 |   yarn_setMemory(Y, Y->memsize-sizeof(yarn_int), &val, sizeof(val));
201 | }
202 | 
203 | // Gets the specified flag. Currently only used for the conditional flag.
204 | int yarn_getFlag(yarn_state *Y, int flag) {
205 |   unsigned char val;
206 |   yarn_getMemory(Y, Y->memsize-3, &val, sizeof(val));
207 |   return (val>>flag)&1;
208 | }
209 | void yarn_setFlag(yarn_state *Y, int flag) {
210 |   unsigned char val;
211 |   yarn_getMemory(Y, Y->memsize-3, &val, sizeof(val));
212 |   val |= 1 << flag;
213 |   yarn_setMemory(Y, Y->memsize-3, &val, sizeof(val));
214 | }
215 | void yarn_clearFlag(yarn_state *Y, int flag) {
216 |   unsigned char val;
217 |   yarn_getMemory(Y, Y->memsize-3, &val, sizeof(val));
218 |   val &= ~(1 << flag);
219 |   yarn_setMemory(Y, Y->memsize-3, &val, sizeof(val));
220 | }
221 | 
222 | /*
223 |   System call interface, includes helper functions to manipulate the hashmap
224 | */
225 | 
226 | static inline unsigned int hash_uint(unsigned int n) {
227 |   return n * 2654435761;
228 | }
229 | 
230 | void yarn_registerSysCall(yarn_state *Y, yarn_uint key, yarn_CFunc fun) {
231 |   unsigned int n = hash_uint(key);
232 |   for (;;) {
233 |     unsigned idx = n & YARN_MAP_MASK;
234 |     if (Y->syscalls[idx].key == key || Y->syscalls[idx].val == NULL) {
235 |       Y->syscalls[idx].key = key;
236 |       Y->syscalls[idx].val = fun;
237 |       break;
238 |     }
239 |     n++;
240 |   }
241 | }
242 | 
243 | yarn_CFunc yarn_getSysCall(yarn_state *Y, yarn_uint key) {
244 |   unsigned n = hash_uint(key);
245 |   for (;;) {
246 |     unsigned idx = n & YARN_MAP_MASK;
247 |     if (Y->syscalls[idx].val == NULL) {
248 |       return NULL;
249 |     }
250 |     if (Y->syscalls[idx].key == key) {
251 |       return Y->syscalls[idx].val;
252 |     }
253 |     n++;
254 |   }
255 | }
256 | 
257 | /*
258 |  *  External function to execute the program. icount is the maximum number of
259 |  *    instructions to execute. Use -1 to indicate indefinite execution. Will
260 |  *    execute until program sets status to anything but YARN_STATUS_OK,
261 |  */
262 | #define arithinst_s_setup() \
263 |   yarn_validInstruction(5);\
264 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
265 |   rB = Y->code[ip+1] & 0x0F; \
266 |   d_s = *((yarn_int *) &Y->code[ip+2]); \
267 |   yarn_getRegister(Y, rB, &valB_s); \
268 |   if (rA == YARN_REG_NULL) { \
269 |     valA_s = d_s; \
270 |   } else { \
271 |     yarn_getRegister(Y, rA, &valA_s); \
272 |   } \
273 | 
274 | #define arithinst_setup() \
275 |   yarn_validInstruction(5);\
276 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
277 |   rB = Y->code[ip+1] & 0x0F; \
278 |   d = *((yarn_uint *) &Y->code[ip+2]); \
279 |   yarn_getRegister(Y, rB, &valB); \
280 |   if (rA == YARN_REG_NULL) { \
281 |     valA = d; \
282 |   } else { \
283 |     yarn_getRegister(Y, rA, &valA); \
284 |   } \
285 | 
286 | #define moveinst_setup() \
287 |   yarn_validInstruction(5);\
288 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
289 |   rB = Y->code[ip+1] & 0x0F; \
290 |   d = *((yarn_int *) &Y->code[ip+2]); \
291 |   if (rA == YARN_REG_NULL) { \
292 |     valA = 0; \
293 |   } else { \
294 |     yarn_getRegister(Y, rA, &valA); \
295 |   } \
296 | 
297 | #define stackinst_setup() \
298 |   yarn_validInstruction(1);\
299 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
300 | 
301 | #define branchinst_setup() \
302 |   yarn_validInstruction(4); \
303 |   d = *((yarn_int *) &Y->code[ip+1]); \
304 | 
305 | #define conditionalinst_setup() \
306 |   yarn_validInstruction(1);\
307 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
308 |   rB = Y->code[ip+1] & 0x0F; \
309 |   yarn_getRegister(Y, rA, &valA); \
310 |   yarn_getRegister(Y, rB, &valB); \
311 |   yarn_clearFlag(Y, YARN_FLAG_CONDITIONAL); \
312 | 
313 | #define conditionalinst_s_setup() \
314 |   yarn_validInstruction(1);\
315 |   rA = (Y->code[ip+1] & 0xF0)>>4; \
316 |   rB = Y->code[ip+1] & 0x0F; \
317 |   yarn_getRegister(Y, rA, &valA_s); \
318 |   yarn_getRegister(Y, rB, &valB_s); \
319 |   yarn_clearFlag(Y, YARN_FLAG_CONDITIONAL); \
320 | 
321 | #ifdef YARN_DEBUG
322 | #define yarn_validInstruction(o) if (ip+o >= Y->codesize) { \
323 |   yarn_setStatus(Y,YARN_STATUS_INVALIDINSTRUCTION); \
324 |   printf("INVALID: %d %%ins: 0x%X\n",__LINE__,ip); \
325 |   break; \
326 | }
327 | #else
328 | #define yarn_validInstruction(o) if (ip+o >= Y->codesize) { \
329 |   yarn_setStatus(Y,YARN_STATUS_INVALIDINSTRUCTION); \
330 |   break; \
331 | }
332 | #endif
333 | 
334 | int yarn_execute(yarn_state *Y, int icount) {
335 |   yarn_uint ip;
336 |   int instruction;
337 | 
338 |   while (yarn_getStatus(Y) == YARN_STATUS_OK && (icount > 0 || icount == -1)) {
339 |     yarn_getRegister(Y, YARN_REG_INSTRUCTION, &ip);
340 |     yarn_validInstruction(0);
341 | 
342 |     unsigned char rA, rB;
343 |     yarn_uint valA, valB, valM, d;
344 |     yarn_int valA_s, valB_s, d_s;
345 | 
346 |     instruction = Y->code[ip];
347 |     #if YARN_DEBUG
348 |     printf("instruction: 0x%02X icode: 0x%02X\n",instruction,instruction & 0xF0);
349 |     #endif
350 | 
351 |     // Here we execute the specified function for the icode and increment the
352 |     // instruction register.
353 |     switch(instruction) {
354 |       //   Control
355 |       case YARN_INST_HALT:
356 |         yarn_setStatus(Y,YARN_STATUS_HALT);
357 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 1);
358 |         break;
359 |       case YARN_INST_PAUSE:
360 |         yarn_setStatus(Y,YARN_STATUS_PAUSE);
361 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 1);
362 |         break;
363 |       case YARN_INST_NOP:
364 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 1);
365 |         break;
366 | 
367 |       //   Arith:
368 |       case YARN_INST_ADD:
369 |         arithinst_setup();
370 |         valB += valA;
371 |         yarn_setRegister(Y, rB, &valB);
372 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
373 |         break;
374 |       case YARN_INST_SUB:
375 |         arithinst_setup();
376 |         valB -= valA;
377 |         yarn_setRegister(Y, rB, &valB);
378 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
379 |         break;
380 |       case YARN_INST_MUL:
381 |         arithinst_setup();
382 |         valB *= valA;
383 |         yarn_setRegister(Y, rB, &valB);
384 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
385 |         break;
386 |       case YARN_INST_DIV:
387 |         arithinst_setup();
388 |         if (valA == 0) {
389 |           yarn_setStatus(Y, YARN_STATUS_DIVBYZERO);
390 |         } else {
391 |           valB /= valA;
392 |           yarn_setRegister(Y, rB, &valB);
393 |         }
394 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
395 |         break;
396 |       case YARN_INST_DIVS:
397 |         arithinst_s_setup();
398 |         if (valA_s == 0) {
399 |           yarn_setStatus(Y, YARN_STATUS_DIVBYZERO);
400 |         } else {
401 |           valB_s /= valA_s;
402 |           yarn_setRegister(Y, rB, &valB_s);
403 |         }
404 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
405 |         break;
406 |       case YARN_INST_LSH:
407 |         arithinst_setup();
408 |         valB <<= valA;
409 |         yarn_setRegister(Y, rB, &valB);
410 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
411 |         break;
412 |       case YARN_INST_RSH:
413 |         arithinst_setup();
414 |         valB >>= valA;
415 |         yarn_setRegister(Y, rB, &valB);
416 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
417 |         break;
418 |       case YARN_INST_RSHS:
419 |         arithinst_s_setup();
420 |         valB_s >>= valA_s;
421 |         yarn_setRegister(Y, rB, &valB_s);
422 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
423 |         break;
424 |       case YARN_INST_AND:
425 |         arithinst_setup();
426 |         valB &= valA;
427 |         yarn_setRegister(Y, rB, &valB);
428 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
429 |         break;
430 |       case YARN_INST_OR:
431 |         arithinst_setup();
432 |         valB |= valA;
433 |         yarn_setRegister(Y, rB, &valB);
434 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
435 |         break;
436 |       case YARN_INST_XOR:
437 |         arithinst_setup();
438 |         valB ^= valA;
439 |         yarn_setRegister(Y, rB, &valB);
440 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
441 |         break;
442 |       case YARN_INST_NOT:
443 |         arithinst_setup();
444 |         valB = ~valA;
445 |         yarn_setRegister(Y, rB, &valB);
446 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
447 |         break;
448 | 
449 |       //   Move:
450 |       case YARN_INST_IR:
451 |         moveinst_setup();
452 |         valA += d;
453 |         yarn_setRegister(Y, rB, &valA);
454 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
455 |         break;
456 |       case YARN_INST_MR:
457 |         moveinst_setup();
458 |         yarn_getMemory(Y,d+valA, &valM, sizeof(valM));
459 |         yarn_setRegister(Y,rB,&valM);
460 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
461 |         break;
462 |       case YARN_INST_RR:
463 |         moveinst_setup();
464 |         yarn_setRegister(Y,rB,&valA); // Do we want to use d?
465 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
466 |         break;
467 |       case YARN_INST_RM:
468 |         moveinst_setup();
469 |         yarn_getRegister(Y, rB, &valB);
470 |         yarn_setMemory(Y, valB+d, &valA, sizeof(valA));
471 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 6);
472 |         break;
473 | 
474 |       //   Stack:
475 |       case YARN_INST_PUSH:
476 |         stackinst_setup();
477 |         yarn_getRegister(Y, rA, &valA);
478 |         yarn_push(Y, valA);
479 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
480 |         break;
481 |       case YARN_INST_POP:
482 |         stackinst_setup();
483 |         valA = yarn_pop(Y);
484 |         yarn_setRegister(Y, rA, &valA);
485 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
486 |         break;
487 | 
488 |       //   Branches:
489 |       case YARN_INST_CALL:
490 |         branchinst_setup();
491 |         yarn_push(Y, ip+5);
492 |         yarn_setRegister(Y, YARN_REG_INSTRUCTION, &d);
493 |         break;
494 |       case YARN_INST_RET:
495 |         branchinst_setup();
496 |         for(yarn_uint i=0; i<d; i++) {
497 |           yarn_pop(Y);
498 |         }
499 |         valA = yarn_pop(Y);
500 |         yarn_setRegister(Y, YARN_REG_INSTRUCTION, &valA);
501 |         break;
502 |       case YARN_INST_JUMP:
503 |         branchinst_setup();
504 |         yarn_setRegister(Y, YARN_REG_INSTRUCTION, &d);
505 |         break;
506 |       case YARN_INST_CONDJUMP:
507 |         branchinst_setup();
508 |         if (yarn_getFlag(Y, YARN_FLAG_CONDITIONAL)) {
509 |           yarn_setRegister(Y, YARN_REG_INSTRUCTION, &d);
510 |         } else {
511 |           yarn_incRegister(Y, YARN_REG_INSTRUCTION, 5);
512 |         }
513 |         break;
514 |       case YARN_INST_SYSCALL:
515 |         branchinst_setup();
516 |         yarn_CFunc fun = yarn_getSysCall(Y, (yarn_uint)d);
517 |         if (fun == NULL) {
518 |           yarn_setStatus(Y, YARN_STATUS_INVALIDINSTRUCTION);
519 |         } else {
520 |           (*fun)(Y);
521 |         }
522 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 5);
523 |         break;
524 | 
525 |       //   Conditionals:
526 |       case YARN_INST_LT:
527 |         conditionalinst_setup();
528 |         if (valA < valB) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
529 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
530 |         break;
531 |       case YARN_INST_LTS:
532 |         conditionalinst_s_setup();
533 |         if (valA_s < valB_s) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
534 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
535 |         break;
536 |       case YARN_INST_LTE:
537 |         conditionalinst_setup();
538 |         if (valA <= valB) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
539 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
540 |         break;
541 |       case YARN_INST_LTES:
542 |         conditionalinst_s_setup();
543 |         if (valA_s <= valB_s) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
544 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
545 |         break;
546 |       case YARN_INST_EQ:
547 |         conditionalinst_setup();
548 |         if (valA == valB) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
549 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
550 |         break;
551 |       case YARN_INST_NEQ:
552 |         conditionalinst_setup();
553 |         if (valA != valB) yarn_setFlag(Y, YARN_FLAG_CONDITIONAL);
554 |         yarn_incRegister(Y, YARN_REG_INSTRUCTION, 2);
555 |         break;
556 |       default:
557 |         yarn_setStatus(Y,YARN_STATUS_INVALIDINSTRUCTION);
558 |         break;
559 |     }
560 | 
561 |     // Check and make sure we haven't run out of instructions to use.
562 |     Y->instructioncount += 1;
563 |     if (icount != -1) {
564 |       icount -= 1;
565 |     }
566 |   }
567 |   return yarn_getStatus(Y);
568 | }
569 | 
570 | #undef arithinst_setup
571 | #undef arithinst_s_setup
572 | #undef moveinst_setup
573 | #undef stackinst_setup
574 | #undef branchinst_setup
575 | #undef conditionalinst_setup
576 | #undef conditionalinst_s_setup
577 | #undef yarn_validInstruction
578 | 
579 | #ifdef YARN_STANDALONE
580 | /*
581 |  * Command line program.
582 |  *   Usage: ./bin/yarn code.o
583 |  *   Flags:
584 |  *     -m<file> - Dumps the memory state to a file. Ex: -mmemdump.mem
585 |  *     -c<icount> - Limits execution to icount instructions. Ex: -c20
586 |  */
587 | inline static void printProgramStatus(yarn_state *Y) {
588 |   printf("Register contents:\n");
589 |   yarn_uint rval;
590 |   for (int r=0; r < 16; r++) {
591 |     yarn_getRegister(Y, r, &rval);
592 |     printf("\tReg: %-5s = 0x%08X   %d\n",yarn_registerToString(r), rval, rval);
593 |   }
594 | 
595 |   printf("Status: %s\n",yarn_statusToString(yarn_getStatus(Y)));
596 |   printf("Instructions executed: %zu\n",yarn_getInstructionCount(Y));
597 | 
598 | }
599 | int main(int argc, char **argv) {
600 |   FILE *fp;
601 |   size_t fsize;
602 |   yarn_state *Y;
603 |   char *buffer;
604 |   char *memoryfile = NULL;
605 |   int icount = -1;
606 |   int status = YARN_STATUS_OK;
607 | 
608 |   if (argc <= 1) {
609 |     printf("Must specify an object file to load.\n");
610 |     return 0;
611 |   }
612 |   for (int i=2; i<argc; i++) {
613 |     if (strncmp("-m", argv[i], strlen("-m")) == 0) {
614 |       memoryfile = argv[i]+2;
615 |     } else if (strncmp("-c", argv[i], strlen("-c")) == 0) {
616 |       icount = atoi(argv[i]+2);
617 |     }
618 |   }
619 | 
620 |   fp = fopen(argv[1], "r");
621 |   if (!fp) {
622 |     printf("Invalid object file.\n");
623 |     return EXIT_FAILURE;
624 |   }
625 |   fseek(fp, 0L, SEEK_END);
626 |   fsize = ftell(fp);
627 |   fseek(fp, 0L, SEEK_SET);
628 | 
629 |   buffer = malloc(sizeof(char)*fsize);
630 |   if (buffer == NULL) {
631 |     printf("Unable to load object file.\n");
632 |     return EXIT_FAILURE;
633 |   }
634 |   fread(buffer,1,fsize,fp);
635 |   fclose(fp);
636 | 
637 |   Y = yarn_init(256*sizeof(yarn_int));
638 |   if (Y == NULL) {
639 |     printf("Unable to create Yarn state.\n");
640 |     return EXIT_FAILURE;
641 |   }
642 |   if (yarn_loadCode(Y,buffer,fsize) != 0) {
643 |     printf("Unable to load Yarn object code.\n");
644 |     return EXIT_FAILURE;
645 |   }
646 | 
647 |   while (status == YARN_STATUS_OK) {
648 |     status = yarn_execute(Y, icount);
649 |     printProgramStatus(Y);
650 |     if (status == YARN_STATUS_PAUSE) {
651 |       printf("Program paused, hit enter to continue.");
652 |       getchar();
653 |       // Reset our status
654 |       yarn_setStatus(Y, YARN_STATUS_OK);
655 |       status = YARN_STATUS_OK;
656 |     }
657 |   }
658 | 
659 |   if (memoryfile != NULL) {
660 |     fp = fopen(memoryfile, "w");
661 |     if (!fp) {
662 |       printf("Invalid memory dump name.\n");
663 |       return EXIT_FAILURE;
664 |     }
665 |     fwrite(yarn_getMemoryPtr(Y), 1, yarn_getMemorySize(Y), fp);
666 |     fclose(fp);
667 |     printf("Wrote memory dump: %s\n",memoryfile);
668 |   }
669 | 
670 |   yarn_destroy(Y);
671 |   free(buffer);
672 |   return 0;
673 | }
674 | #endif
675 | 


--------------------------------------------------------------------------------