├── Makefile ├── helpers.asm ├── qpu-dis.cpp └── qpu-asm.cpp /Makefile: -------------------------------------------------------------------------------- 1 | all: qpu-asm qpu-dis 2 | 3 | qpu-asm: qpu-asm.cpp 4 | g++ -g -o qpu-asm qpu-asm.cpp 5 | 6 | qpu-dis: qpu-dis.cpp 7 | g++ -g -o qpu-dis qpu-dis.cpp 8 | -------------------------------------------------------------------------------- /helpers.asm: -------------------------------------------------------------------------------- 1 | define(`MUTEX_ACQUIRE', `or ra39, ra51, rb39; nop') 2 | define(`MUTEX_RELEASE', `or ra51, ra39, ra39; nop') 3 | 4 | # Hardwired IO registers 5 | define(`rVpmWriteFifo', `rb48') 6 | define(`rVpmReadFifo', `ra48') 7 | define(`raReadUniform', `ra32') 8 | define(`rbReadUniform', `rb32') 9 | define(`raZero', `ra39') 10 | define(`rbZero', `rb39') 11 | 12 | # Macro argument constants 13 | define(`MODEW_32_BIT', 0) 14 | define(`MODEW_16_BIT_OFFSET_0', 2) 15 | define(`MODEW_16_BIT_OFFSET_1', 3) 16 | define(`MODEW_8_BIT_OFFSET_0', 4) 17 | define(`MODEW_8_BIT_OFFSET_1', 5) 18 | define(`MODEW_8_BIT_OFFSET_2', 6) 19 | define(`MODEW_8_BIT_OFFSET_3', 7) 20 | define(`SIZE_8_BIT', 0) 21 | define(`SIZE_16_BIT', 1) 22 | define(`SIZE_32_BIT', 2) 23 | define(`IS_HORIZ', 1) 24 | define(`NOT_HORIZ', 0) 25 | define(`IS_VERT', 1) 26 | define(`NOT_VERT', 0) 27 | define(`IS_LANED', 1) 28 | define(`NOT_LANED', 0) 29 | 30 | # VPM_BLOCK_WRITE_SETUP 31 | # ~~~~~~~~~~~~~~~~~~~~~ 32 | # Sets up things so writes go into the small VPM data cache. 33 | # Once the data's been written (by outputting repeatedly to the VPM_WRITE_FIFO 34 | # register rb48), you then call VPM_DMA_WRITE_SETUP to configure the main 35 | # memory destination and writing pattern. 36 | # Arguments: 37 | # STRIDE: 0-64 - How much to increment the ADDR after each write. 38 | # HORIZ: 0 or 1 - Whether the layout is horizontal (1) or vertical (0). 39 | # LANED: 0 or 1 - Whether the layout is laned (1) or packed (0). 40 | # SIZE: 0, 1, 2 - The data unit size, 8-bit (0), 16-bit(1), or 32-bit (2). 41 | # ADDR: 0-255 - Packed address, meaning depends on exact unit size and mode. 42 | # See http://www.broadcom.com/docs/support/videocore/VideoCoreIV-AG100-R.pdf page 57 43 | define(`VPM_BLOCK_WRITE_SETUP_ID_SHIFT', 30) 44 | define(`VPM_BLOCK_WRITE_SETUP_STRIDE_SHIFT', 12) 45 | define(`VPM_BLOCK_WRITE_SETUP_HORIZ_SHIFT', 11) 46 | define(`VPM_BLOCK_WRITE_SETUP_LANED_SHIFT', 10) 47 | define(`VPM_BLOCK_WRITE_SETUP_SIZE_SHIFT', 8) 48 | define(`VPM_BLOCK_WRITE_SETUP_ADDR_SHIFT', 0) 49 | define(`VPM_BLOCK_WRITE_SETUP_VALUE', `eval( 50 | (0< 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void show_qpu_inst(uint32_t *inst); 8 | void show_qpu_fragment(uint32_t *inst, int length); 9 | 10 | 11 | int base; 12 | int showfields = 1; 13 | 14 | const char *acc_names[] = { 15 | "r0", "r1", "r2", "r3", "r4", "r5" 16 | }; 17 | 18 | const char *banka_r[64] = { 19 | "ra0", "ra1", "ra2", "ra3", "ra4", "ra5", "ra6", "ra7", 20 | "ra8", "ra9", "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", //ra15 is w in shaders 21 | "ra16", "ra17", "ra18", "ra19", "ra20", "ra21", "ra22", "ra23", 22 | "ra24", "ra25", "ra26", "ra27", "ra28", "ra29", "ra30", "ra31", 23 | "unif", "ra33?", "ra34?", "vary", "ra36?", "ra37?", "elem_num", "nop", 24 | "ra40", "x_coord", "ms_mask", "ra43?", "ra44?", "ra45?", "ra46?", "ra47?", 25 | "vpm", "vr_busy", "vr_wait", "mutex", "ra52?", "ra53?", "ra54?", "ra55?", 26 | "ra56?", "ra57?", "ra58?", "ra59?", "ra60?", "ra61?", "ra62?", "ra63?", 27 | }; 28 | 29 | const char *bankb_r[64] = { 30 | "rb0", "rb1", "rb2", "rb3", "rb4", "rb5", "rb6", "rb7", 31 | "rb8", "rb9", "rb10", "rb11", "rb12", "rb13", "rb14", "rb15", //rb15 is z in shaders 32 | "rb16", "rb17", "rb18", "rb19", "rb20", "rb21", "rb22", "rb23", 33 | "rb24", "rb25", "rb26", "rb27", "rb28", "rb29", "rb30", "rb31", 34 | "unif", "rb33?", "rb34?", "vary", "rb36?", "rb37?", "qpu_num", "nop", 35 | "rb40?", "y_coord", "rev_flag", "rb43?", "rb44?", "rb45?", "rb46?", "rb47?", 36 | "vpm", "vw_busy", "vw_wait", "mutex", "rb52?", "rb53?", "rb54?", "rb55?", 37 | "rb56?", "rb57?", "rb58?", "rb59?", "rb60?", "rb61?", "rb62?", "rb63?", 38 | }; 39 | 40 | const char *banka_w[64] = { 41 | "ra0", "ra1", "ra2", "ra3", "ra4", "ra5", "ra6", "ra7", 42 | "ra8", "ra9", "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", //ra15 is w in shaders 43 | "ra16", "ra17", "ra18", "ra19", "ra20", "ra21", "ra22", "ra23", 44 | "ra24", "ra25", "ra26", "ra27", "ra28", "ra29", "ra30", "ra31", 45 | "r0", "r1", "r2", "r3", "tmurs", "r5quad", "irq", "-", 46 | "unif_addr", "x_coord", "ms_mask", "stencil", "tlbz", "tlbm", "tlbc", "tlbam", 47 | "vpm", "vr_setup", "vr_addr", "mutex", "recip", "recipsqrt", "exp", "log", 48 | "t0s", "t0t", "t0r", "t0b", "t1s", "t1t", "t1r", "t1b", 49 | }; 50 | 51 | const char *bankb_w[64] = { 52 | "rb0", "rb1", "rb2", "rb3", "rb4", "rb5", "rb6", "rb7", 53 | "rb8", "rb9", "rb10", "rb11", "rb12", "rb13", "rb14", "rb15", //rb15 is z in shaders 54 | "rb16", "rb17", "rb18", "rb19", "rb20", "rb21", "rb22", "rb23", 55 | "rb24", "rb25", "rb26", "rb27", "rb28", "rb29", "rb30", "rb31", 56 | "r0", "r1", "r2", "r3", "tmurs", "r5rep", "irq", "-", 57 | "unif_addr_rel", "y_coord", "rev_flag", "stencil", "tlbz", "tlbm", "tlbc", "tlbam", 58 | "vpm", "vw_setup", "vw_addr", "mutex", "recip", "recipsqrt", "exp", "log", 59 | "t0s", "t0t", "t0r", "t0b", "t1s", "t1t", "t1r", "t1b", 60 | }; 61 | 62 | const char *ops[] = { 63 | "bkpt", "nop", "thrsw", "thrend", "sbwait", "sbdone", "lthrsw", "loadcv", 64 | "loadc", "ldcend", "ldtmu0", "ldtmu1", "loadam", "nop", "ldi", "bra", 65 | }; 66 | 67 | const char *addops[] = { 68 | "nop", "fadd", "fsub", "fmin", "fmax", "fminabs", "fmaxabs", "ftoi", 69 | "itof", "addop9", "addop10", "addop11", "add", "sub", "shr", "asr", 70 | "ror", "shl", "min", "max", "and", "or", "xor", "not", 71 | "clz", "addop25", "addop26", "addop27", "addop28", "addop29", "v8adds", "v8subs", 72 | 73 | "mov" 74 | }; 75 | 76 | const char *mulops[] = { 77 | "nop", "fmul", "mul24", "v8muld", "v8min", "v8max", "v8adds", "v8subs", 78 | 79 | "mov" 80 | }; 81 | 82 | const char *cc[] = { 83 | ".never", "", ".zs", ".zc", ".ns", ".nc", ".cs", ".cc" 84 | }; 85 | 86 | const char *dstpackadd[] = { 87 | "", ".16a", ".16b", ".8abcd", ".8a", ".8b", ".8c", ".8d", ".s", ".16as", ".16bs", ".8abcds", ".8as", ".8bs", ".8cs", ".8ds" 88 | }; 89 | 90 | const char *dstpackmul[] = { 91 | "", ".packm01", ".packm02", ".8abcd", ".8a", ".8b", ".8c", ".8d", ".packm08", ".packm09", ".packm10", ".packm11", ".packm12", ".packm13", ".packm14", ".packm15" 92 | }; 93 | 94 | const char *srcunpackadd[] = { 95 | "", ".16a", ".16b", ".8dr", ".8a", ".8b", ".8c", ".8d" 96 | }; 97 | 98 | const char *srcunpackmul[] = { 99 | "", ".16a", ".16b", ".8dr", ".8a", ".8b", ".8c", ".8d" 100 | }; 101 | 102 | const char *bcc[] = { 103 | ".allz", ".allnz", ".anyz", ".anynz", ".alln", ".allnn", ".anyn", ".anynn", 104 | ".allc", ".allnc", ".anyc", ".anync", ".cc12", ".cc13", ".cc14", "" 105 | }; 106 | 107 | const char *imm[] = { 108 | "0", "1", "2", "3", "4", "5", "6", "7", 109 | "8", "9", "10", "11", "12", "13", "14", "15", 110 | "-16", "-15", "-14", "-13", "-12", "-11", "-10", "-9", 111 | "-8", "-7", "-6", "-5", "-4", "-3", "-2", "-1", 112 | "1.0", "2.0", "4.0", "8.0", "16.0", "32.0", "64.0", "128.0", 113 | "1/256", "1/128", "1/64", "1/32", "1/16", "1/8", "1/4", "1/2", 114 | " >> r5", " >> 1", " >> 2", " >> 3", " >> 4", " >> 5", " >> 6", " >> 7", 115 | " >> 8", " >> 9", " >> 10", " >> 11", " >> 12", " >> 13", " >> 14", " >> 15" 116 | }; 117 | 118 | const char *setf[] = { 119 | "", ".setf" 120 | }; 121 | 122 | // QPU Instruction unpacking 123 | // 124 | // Add/Mul Operations: 125 | // mulop:3 addop:5 ra:6 rb:6 adda:3 addb:3 mula:3 mulb:3, op:4 packbits:8 addcc:3 mulcc:3 F:1 X:1 wa:6 wb:6 126 | // 127 | // Branches: 128 | // addr:32, 1111 0000 cond:4 relative:1 register:1 ra:5 X:1 wa:6 wb:6 129 | // 130 | // 32 Bit Immediates: 131 | // data:32, 1110 unknown:8 addcc:3 mulcc:3 F:1 X:1 wa:6 wb:6 132 | 133 | unsigned tmpthis=0; 134 | unsigned tmpnext=0; 135 | char tmpbuff[256]; 136 | #define tmpalloc(sizebytes) ( tmpthis = tmpnext+sizebytes > sizeof(tmpbuff) ? 0 : tmpnext, tmpnext = (tmpthis+sizebytes), &tmpbuff[tmpthis]) 137 | 138 | const char *qpu_r(uint32_t ra, uint32_t rb, uint32_t adda, uint32_t op, int rotator) { 139 | 140 | if (op == 13) { 141 | if (rb<48) { 142 | if (adda==6) return banka_r[ra]; 143 | if (adda==7) return imm[rb]; 144 | } 145 | else { 146 | if ((adda<6) && rotator) { 147 | char *tmp = tmpalloc(32); 148 | sprintf(tmp, "%s%s", acc_names[adda], imm[rb]); 149 | return tmp; 150 | } 151 | if ((adda==6) && rotator) { 152 | char *tmp = tmpalloc(32); 153 | sprintf(tmp, "%s%s", banka_r[ra], imm[rb]); 154 | return tmp; 155 | } 156 | if ((adda==7) && rotator) { 157 | return "err?"; 158 | } 159 | } 160 | } 161 | 162 | if (adda==6) return banka_r[ra]; 163 | if (adda==7) return bankb_r[rb]; 164 | return acc_names[adda]; 165 | } 166 | 167 | const char *qpu_w_add(uint32_t wa, uint32_t X) { 168 | return X ? bankb_w[wa] : banka_w[wa]; 169 | } 170 | 171 | const char *qpu_w_mul(uint32_t wb, uint32_t X) { 172 | return X ? banka_w[wb] : bankb_w[wb]; 173 | } 174 | 175 | const char *qpu_unpack_add(uint32_t packmul, uint32_t unpack, uint32_t adda) { 176 | if ((packmul == 0) && (adda == 6)) 177 | return srcunpackadd[unpack]; 178 | if ((packmul == 1) && (adda == 4)) 179 | return srcunpackmul[unpack]; 180 | return ""; 181 | } 182 | 183 | const char *qpu_unpack_mul(uint32_t packmul, uint32_t unpack, uint32_t adda) { 184 | if ((packmul == 0) && (adda == 6)) 185 | return srcunpackmul[unpack]; 186 | if ((packmul == 1) && (adda == 4)) 187 | return srcunpackmul[unpack]; 188 | return ""; 189 | } 190 | 191 | const char *qpu_pack_add(uint32_t packmul, uint32_t pack, uint32_t wa, uint32_t X) { 192 | if ((packmul == 0) && (X==0) && (wa<=32)) //todo: what is the real limit on ra range? 193 | return dstpackadd[pack]; 194 | return ""; 195 | } 196 | 197 | const char *qpu_pack_mul(uint32_t packmul, uint32_t pack, uint32_t wa, uint32_t X) { 198 | if ((packmul == 0) && (X==1) && (wa<=32)) //todo: what is the real limit on ra range? 199 | return dstpackmul[pack]; 200 | if (packmul == 1) 201 | return dstpackmul[pack]; 202 | return ""; 203 | } 204 | 205 | void show_qpu_add_mul(uint32_t i0, uint32_t i1) 206 | { 207 | uint32_t mulop = (i0 >> 29) & 0x7; 208 | uint32_t addop = (i0 >> 24) & 0x1f; 209 | uint32_t ra = (i0 >> 18) & 0x3f; 210 | uint32_t rb = (i0 >> 12) & 0x3f; 211 | uint32_t adda = (i0 >> 9) & 0x07; 212 | uint32_t addb = (i0 >> 6) & 0x07; 213 | uint32_t mula = (i0 >> 3) & 0x07; 214 | uint32_t mulb = (i0 >> 0) & 0x07; 215 | uint32_t op = (i1 >> 28) & 0x0f; 216 | uint32_t packbits = (i1 >> 20) & 0xff; 217 | uint32_t unpacking = (packbits >> 5) & 0x7; 218 | uint32_t packmul = (packbits >> 4) & 0x1; 219 | uint32_t packing = (packbits >> 0) & 0xf; 220 | uint32_t addcc = (i1 >> 17) & 0x07; 221 | uint32_t mulcc = (i1 >> 14) & 0x07; 222 | uint32_t F = (i1 >> 13) & 0x01; 223 | uint32_t X = (i1 >> 12) & 0x01; 224 | uint32_t wa = (i1 >> 6) & 0x3f; 225 | uint32_t wb = (i1 >> 0) & 0x3f; 226 | 227 | if (showfields) { 228 | printf("mulop=%d, addop=%d, ra=%d, rb=%d, adda=%d, addb=%d, mula=%d, mulb=%d, op=%d, unpacking=%d, packmul=%d, packing=%d, addcc=%d, mulcc=%d, F=%d, X=%d, wa=%d, wb=%d \n", 229 | mulop, addop, ra, rb, adda, addb, mula, mulb, op, unpacking, packmul, packing, addcc, mulcc, F, X, wa, wb); 230 | } 231 | 232 | uint32_t addF = (F==1) && (addop != 0) && (addcc != 0); 233 | uint32_t mulF = (F==1) && !addF; 234 | 235 | // Instruction formats: 236 | // op[cc][setf] 237 | // op[cc][setf] rd[.pack] 238 | // op[cc][setf] rd[.pack], ra[.unpack] 239 | // op[cc][setf] rd[.pack], ra[.unpack], rb[.unpack] 240 | const char *args[] = { 241 | "", " %s%s", " %s%s, %s%s", " %s%s, %s%s, %s%s" 242 | }; 243 | 244 | uint32_t arity = 3; 245 | if (addop == 0) { 246 | arity = 0; 247 | addcc = 1; 248 | } 249 | else if ((adda == addb) && ((addop == 7) || (addop == 8) || (addop == 21) || (addop == 23) || (addop == 24))) { 250 | arity = 2; 251 | if (addop == 21) addop = 32; 252 | } 253 | 254 | // add op always 255 | printf("%s%s%s", addops[addop], cc[addcc], setf[addF]); 256 | printf(args[arity], qpu_w_add(wa, X), qpu_pack_add(packmul, packing, wa, X), qpu_r(ra, rb, adda, op, 0), qpu_unpack_add(packmul, unpacking, adda), qpu_r(ra, rb, addb, op, 0), qpu_unpack_add(packmul, unpacking, addb)); 257 | 258 | // show mul op if non nop or control op is non nop 259 | if (mulop || (op != 1)) { 260 | 261 | uint32_t arity = 3; 262 | if (mulop == 0) { 263 | arity = 0; 264 | mulcc = 1; 265 | } 266 | else if ((mula == mulb) && (mulop == 4)) { 267 | arity = 2; 268 | if (mulop == 4) mulop = 8; 269 | } 270 | 271 | printf("; %s%s%s", mulops[mulop], cc[mulcc], setf[mulF]); 272 | ///* 000003a0: 36020037 18025841 */ xor r1, r0, r0; fmul ra1, ra0, unif 273 | printf(args[arity], qpu_w_mul(wb, X), qpu_pack_mul(packmul, packing, wb, X), qpu_r(ra, rb, mula, op, 1), qpu_unpack_mul(packmul, unpacking, mula), qpu_r(ra, rb, mulb, op, 1), qpu_unpack_mul(packmul, unpacking, mulb)); 274 | } 275 | 276 | // show control op if non nop 277 | if ((op != 1) && (op != 13)) { 278 | printf("; %s", ops[op]); 279 | } 280 | printf("\n"); 281 | 282 | } 283 | 284 | void show_qpu_branch(uint32_t i0, uint32_t i1) 285 | { 286 | uint32_t addr = i0; 287 | uint32_t unknown = (i1 >> 24) & 0x0f; 288 | uint32_t cond = (i1 >> 20) & 0x0f; 289 | uint32_t pcrel = (i1 >> 19) & 0x01; 290 | uint32_t addreg = (i1 >> 18) & 0x01; 291 | uint32_t ra = (i1 >> 13) & 0x1f; 292 | uint32_t X = (i1 >> 12) & 0x01; 293 | uint32_t wa = (i1 >> 6) & 0x3f; 294 | uint32_t wb = (i1 >> 0) & 0x3f; 295 | 296 | if (showfields) { 297 | printf("branch addr=0x%08x, unknown=%x, cond=%02d, pcrel=%x, addreg=%x, ra=%02d, X=%x, wa=%02d, wb=%02x\n", 298 | addr, unknown, cond, pcrel, addreg, ra, X, wa, wb); 299 | } 300 | // branch: b[link][cc] [linkreg,] [basedreg,] 301 | if (wa==39) 302 | printf("%s%s %s, %s%+d", pcrel ? "brr" : "bra", bcc[cond], qpu_w_mul(wb, X), addreg ? qpu_r(ra, ra, 6, (i1 >> 28)&0xf, 0) : "", addr); 303 | else if (wb==39) 304 | printf("%s%s %s, %s%+d", pcrel ? "brr" : "bra", bcc[cond], qpu_w_add(wa, X), addreg ? qpu_r(ra, ra, 6, (i1 >> 28)&0xf, 0) : "", addr); 305 | else 306 | printf("%s%s %s, %s, %s%+d", pcrel ? "brr" : "bra", bcc[cond], qpu_w_add(wa, X), qpu_w_mul(wb, X), addreg ? qpu_r(ra, ra, 6, (i1 >> 28)&0xf, 0) : "", addr); 307 | 308 | if (!addreg) printf(" // 0x%08x", base+addr+8*4); 309 | printf("\n"); 310 | 311 | } 312 | 313 | const char *qpu_ldi_unpack(uint32_t unpack, uint32_t data) 314 | { 315 | char *tmp = tmpalloc(128); 316 | // unpack = 1 (2 bit signed vectors), 3 = (2 bit unsigned vectors); 317 | if ((unpack==1) || (unpack==3)) { 318 | int d[16]; 319 | for (int i=0; i<16; i++) { 320 | d[i] = ((data >> (16+i-1))&0x2) | ((data >> i) & 0x1); 321 | if ((unpack == 1) && d[i] &0x2) 322 | d[i] |= 0xfffffffc; 323 | } 324 | sprintf(tmp, "[%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d]", 325 | d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], 326 | d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); 327 | } 328 | else { 329 | sprintf(tmp, "0x%08x", data); 330 | } 331 | return tmp; 332 | } 333 | 334 | void show_qpu_imm32(uint32_t i0, uint32_t i1) 335 | { 336 | uint32_t data = i0; 337 | uint32_t packbits = (i1 >> 20) & 0xff; 338 | uint32_t unpacking = (packbits >> 5) & 0x7; 339 | uint32_t packmul = (packbits >> 4) & 0x1; 340 | uint32_t packing = (packbits >> 0) & 0xf; 341 | uint32_t addcc = (i1 >> 17) & 0x07; 342 | uint32_t mulcc = (i1 >> 14) & 0x07; 343 | uint32_t F = (i1 >> 13) & 0x01; 344 | uint32_t X = (i1 >> 12) & 0x01; 345 | uint32_t wa = (i1 >> 6) & 0x3f; 346 | uint32_t wb = (i1 >> 0) & 0x3f; 347 | 348 | if (showfields) { 349 | printf("imm32 data=0x%08x, unpacking=0x%d, packmul=%d, packing=%d, addcc=%x, mulcc=%x, F=%x, X=%x, wa=%02d, wb=%02d\n", 350 | data, unpacking, packmul, packing, addcc, mulcc, F, X, wa, wb); 351 | } 352 | 353 | const char *inst = ops[(i1 >> 28) & 0xf]; 354 | 355 | if (unpacking & 0x4) { 356 | inst = (data & 0x10) ? "sacq" : "srel"; 357 | if (data <= 0x1f) 358 | data = data & 0xffffffef; 359 | } 360 | 361 | // addop: op[cc][setf] rd[.pack?], immediate 362 | if (packbits==0 && addcc==0 && wa==39) 363 | printf("nop"); 364 | else 365 | printf("%s%s%s %s%s, %s", inst, cc[addcc], setf[F], qpu_w_add(wa, X), qpu_pack_add(packmul, packing, wa, X), qpu_ldi_unpack(unpacking, data)); 366 | 367 | // mulop: [op[cc][setf] rd[.pack?], immediate 368 | if (mulcc) { 369 | printf("; %s%s%s %s%s, %s", inst, cc[mulcc], setf[F], qpu_w_mul(wb, X), qpu_pack_mul(packmul, packing, wa, X), qpu_ldi_unpack(unpacking, data)); 370 | } 371 | 372 | printf("\n"); 373 | } 374 | 375 | void show_qpu_inst(uint32_t *inst) { 376 | uint32_t i0 = inst[0]; 377 | uint32_t i1 = inst[1]; 378 | 379 | int op = (i1 >> 28) & 0xf; 380 | if (op<14) show_qpu_add_mul(i0, i1); 381 | if (op==14) show_qpu_imm32(i0, i1); 382 | if (op==15) show_qpu_branch(i0, i1); 383 | } 384 | 385 | void show_qpu_fragment(uint32_t *inst, int length) { 386 | uint32_t i = 0; 387 | for(;i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include // for getopt() 12 | 13 | using namespace std; 14 | 15 | enum token_t { 16 | END=-1, 17 | WORD, 18 | DOT, 19 | COMMA, 20 | SEMI, 21 | COLON, 22 | }; 23 | 24 | struct QPUreg { 25 | enum { A, B, ACCUM, SMALL } file; 26 | int num; 27 | }; 28 | 29 | struct relocation { 30 | string label; 31 | int pc; 32 | }; 33 | 34 | struct context { 35 | const char *stream; 36 | map labels; 37 | int pc; 38 | vector relocations; 39 | }; 40 | 41 | 42 | static string addOps[] = { 43 | "nop", "fadd", "fsub", "fmin", "fmax", "fminabs", "fmaxabs", 44 | "ftoi", "itof", "XXX", "XXX", "XXX", "add", "sub", "shr", 45 | "asr", "ror", "shl", "min", "max", "and", "or", "xor", "not", 46 | "clz", "XXX", "XXX", "XXX", "XXX", "XXX", "v8adds", "v8subs" }; 47 | 48 | static string mulOps[] = { 49 | "nop", "fmul", "mul24", "v8muld", "v8min", "v8max", "v8adds", 50 | "v8subs" }; 51 | 52 | static uint8_t addOpCode(const string& word) 53 | { 54 | for (int i=0; i < 32; i++) { 55 | if (word == addOps[i]) 56 | return i; 57 | } 58 | 59 | return 0xFF; 60 | } 61 | 62 | string printAddOpCode(uint8_t opcode) { 63 | assert((opcode >= 0) && (opcode < 32)); 64 | return addOps[opcode]; 65 | } 66 | 67 | static uint8_t mulOpCode(const string& word) 68 | { 69 | for (int i=0; i < 8; i++) { 70 | if (word == mulOps[i]) 71 | return i; 72 | } 73 | 74 | return 0xFF; 75 | } 76 | 77 | string printMulOpCode(uint8_t opcode) { 78 | assert((opcode >= 0) && (opcode < 8)); 79 | return mulOps[opcode]; 80 | } 81 | 82 | bool isRegisterWord(const string& word) { return word[0] == 'r'; } 83 | 84 | string printRegister(const QPUreg& reg) 85 | { 86 | char buffer[32]; 87 | if (reg.file == QPUreg::A || reg.file == QPUreg::B) { 88 | snprintf(buffer, 32, "r%c%d", (reg.file == QPUreg::A) ? 'a' : 'b', 89 | reg.num); 90 | } 91 | else if (reg.file == QPUreg::ACCUM) { 92 | snprintf(buffer, 32, "r%d", reg.num); 93 | } 94 | else { 95 | snprintf(buffer, 32, ".0x%x.", reg.num); 96 | } 97 | 98 | return buffer; 99 | } 100 | 101 | void parsePossibleNumber(const char* possibleNumber, int base, int* outNumber, bool* outIsNumber) { 102 | char *endOfNumber; 103 | *outNumber = strtol(possibleNumber, &endOfNumber, base); 104 | *outIsNumber = (!(endOfNumber == possibleNumber || *endOfNumber != '\0' || errno == ERANGE)); 105 | } 106 | 107 | bool parseRegister(const string& word, QPUreg& reg) 108 | { 109 | if (word[0] != 'r') 110 | return false; 111 | 112 | int offset = 0; 113 | switch (word[1]) { 114 | case 'a': reg.file = QPUreg::A; offset = 2; break; 115 | case 'b': reg.file = QPUreg::B; offset = 2; break; 116 | default: 117 | reg.file = QPUreg::ACCUM; 118 | offset = 1; 119 | } 120 | 121 | const char* possibleNumber = (word.c_str() + offset); 122 | bool isNumber; 123 | int number; 124 | parsePossibleNumber(possibleNumber, 10, &number, &isNumber); 125 | if (!isNumber) { 126 | cerr << "Warning - couldn't interpret '" << word << "' as a register" << endl; 127 | return false; 128 | } 129 | reg.num = number; 130 | 131 | if ((reg.file == QPUreg::ACCUM) && (reg.num >= 6)) { 132 | fprintf(stderr, "Warning - accumulator out of range\n"); 133 | return false; 134 | } 135 | 136 | return true; 137 | } 138 | 139 | bool parseFullImmediate(const string& str, uint32_t* outResult, uint32_t* outType) 140 | { 141 | bool isNumber; 142 | if (str[0] == '[') { 143 | bool areAnyNegative = false; 144 | std:string cleanedString(str); 145 | cleanedString.erase(std::remove(cleanedString.begin(), cleanedString.end(), '['), cleanedString.end()); 146 | cleanedString.erase(std::remove(cleanedString.begin(), cleanedString.end(), ']'), cleanedString.end()); 147 | std::stringstream ss(cleanedString); 148 | std::string item; 149 | int itemCount = 0; 150 | int itemValues[16]; 151 | while (std::getline(ss, item, ',')) { 152 | if (itemCount >= 16) { 153 | break; 154 | } 155 | bool isItemNumber; 156 | int itemValue; 157 | parsePossibleNumber(item.c_str(), 10, &itemValues[itemCount], &isItemNumber); 158 | if (!isItemNumber) { 159 | cerr << "Couldn't understand '" << item << "' as an entry in an immediate list" << endl; 160 | return false; 161 | } 162 | if (itemValues[itemCount] < 0) { 163 | areAnyNegative = true; 164 | } 165 | itemCount += 1; 166 | } 167 | 168 | if (itemCount < 16) { 169 | cerr << "Found too few items in the immediate array - expected 16 but had " << itemCount << endl; 170 | return false; 171 | } 172 | 173 | if (areAnyNegative) { 174 | *outType = 0x02; 175 | } else { 176 | *outType = 0x06; 177 | } 178 | 179 | uint32_t result = 0; 180 | for (int index = 0; index < 16; index += 1) { 181 | int value = itemValues[index]; 182 | if (areAnyNegative) { 183 | if ((value < -1) || (value > 1)) { 184 | cerr << "Found an out-of-range signed value in the immediate array - expected -1, 0, or 1 but found " << value << endl; 185 | return false; 186 | } 187 | } else { 188 | if (value > 3) { 189 | cerr << "Found an out-of-range unsigned value in the immediate array - expected 0, 1, 2, or 3 but found " << value << endl; 190 | return false; 191 | } 192 | } 193 | uint32_t msb; 194 | uint32_t lsb; 195 | if (areAnyNegative) { 196 | msb = ((value & 0x80000000) >> 31); 197 | lsb = (value & 0x1); 198 | } else { 199 | msb = ((value & 0x2) >> 1); 200 | lsb = (value & 0x1); 201 | } 202 | result = (result | (lsb << (index + 0))); 203 | result = (result | (msb << (index + 16))); 204 | } 205 | 206 | *outResult = result; 207 | isNumber = true; 208 | } else { 209 | *outType = 0x00; // A full 32-bit immediate 210 | // if there is an 'x' we assume it's hex. 211 | if (str.find_first_of("x") != string::npos) { 212 | int signedResult; 213 | parsePossibleNumber(str.c_str(), 16, &signedResult, &isNumber); 214 | *outResult = signedResult; 215 | } else if (str.find_first_of(".f") != string::npos) { 216 | float f = strtof(str.c_str(), NULL); 217 | *outResult = *(uint32_t*)&f; 218 | isNumber = true; 219 | } else { 220 | int signedResult; 221 | parsePossibleNumber(str.c_str(), 10, &signedResult, &isNumber); 222 | *outResult = signedResult; 223 | } 224 | } 225 | return isNumber; 226 | } 227 | 228 | int32_t parseSmallImmediate(const string& str) 229 | { 230 | int32_t result; 231 | if (str.find_first_of("x") != string::npos) { 232 | result = strtoul(str.c_str(), NULL, 16); 233 | if (result >= 16) { 234 | cerr << "Immediate out of range: " << str << endl; 235 | result = -1; 236 | } 237 | } else if (str.find_first_of("<<") != string::npos) { 238 | uint32_t shift = strtoul(str.c_str() + 2, NULL, 10); 239 | result = (48 + shift); 240 | } else if (str.find_first_of("-") != string::npos) { 241 | uint32_t value = strtoul(str.c_str() + 1, NULL, 10); 242 | if ((value < 1) || (value > 16)) { 243 | cerr << "Negative immediate out of range: " << str << endl; 244 | result = -1; 245 | } else { 246 | result = (32 + value); 247 | } 248 | } else { 249 | result = strtoul(str.c_str(), NULL, 10); 250 | if (result >= 16) { 251 | cerr << "Immediate out of range: " << str << endl; 252 | result = -1; 253 | } 254 | } 255 | return result; 256 | } 257 | 258 | uint8_t parseBranchCond(const string& str) 259 | { 260 | if (str == "zf") // all z flags set ("z full") 261 | return 0x0; 262 | if (str == "ze") // all z flags clear ("z empty") 263 | return 0x1; 264 | if (str == "zs") // any z flags set ("z set") 265 | return 0x2; 266 | if (str == "zc") // any z flags clear ("z clear") 267 | return 0x3; 268 | if (str == "nf") // all N flags set ("N full") 269 | return 0x4; 270 | if (str == "ne") // all N flags clear ("N empty") 271 | return 0x5; 272 | if (str == "ns") // any N flags set ("N set") 273 | return 0x6; 274 | if (str == "nc") // any N flags clear ("N clear") 275 | return 0x7; 276 | if (str == "cf") // all C flags set ("C full") 277 | return 0x8; 278 | if (str == "ce") // all C flags clear ("C empty") 279 | return 0x9; 280 | if (str == "cs") // any C flags set ("C set") 281 | return 0xa; 282 | if (str == "cc") // any C flags clear ("C clear") 283 | return 0xb; 284 | if (str == "*") // always 285 | return 0xf; 286 | 287 | // throw some exceptions 288 | cerr << "Invalid branch condition: " << str << endl; 289 | exit(0); 290 | } 291 | 292 | bool parsePacking(const string& str, uint32_t* outUnpack, uint32_t* outPM, uint32_t* outPack) 293 | { 294 | *outUnpack = 0; 295 | *outPM = 0; 296 | *outPack = 0; 297 | if (str == "unpack32") { 298 | *outUnpack = 0; 299 | } else if (str == "unpack16a") { 300 | *outUnpack = 1; 301 | } else if (str == "unpack16b") { 302 | *outUnpack = 2; 303 | } else if (str == "unpack8ddupe") { 304 | *outUnpack = 3; 305 | } else if (str == "unpack8a") { 306 | *outUnpack = 4; 307 | } else if (str == "unpack8b") { 308 | *outUnpack = 5; 309 | } else if (str == "unpack8c") { 310 | *outUnpack = 6; 311 | } else if (str == "unpack8d") { 312 | *outUnpack = 7; 313 | } else if (str == "pack32") { 314 | *outPack = 0; 315 | } else if (str == "pack16a") { 316 | *outPack = 1; 317 | } else if (str == "pack16b") { 318 | *outPack = 2; 319 | } else if (str == "pack8ddupe") { 320 | *outPack = 3; 321 | } else if (str == "pack8a") { 322 | *outPack = 4; 323 | } else if (str == "pack8b") { 324 | *outPack = 5; 325 | } else if (str == "pack8c") { 326 | *outPack = 6; 327 | } else if (str == "pack8d") { 328 | *outPack = 7; 329 | } else if (str == "pack32clamp") { 330 | *outPack = 8; 331 | } else if (str == "pack16aclamp") { 332 | *outPack = 9; 333 | } else if (str == "pack16bclamp") { 334 | *outPack = 10; 335 | } else if (str == "pack8ddupeclamp") { 336 | *outPack = 11; 337 | } else if (str == "pack8aclamp") { 338 | *outPack = 12; 339 | } else if (str == "pack8bclamp") { 340 | *outPack = 13; 341 | } else if (str == "pack8cclamp") { 342 | *outPack = 14; 343 | } else if (str == "pack8dclamp") { 344 | *outPack = 15; 345 | } else { 346 | cerr << "Unknown pack condition: " << str << endl; 347 | return false; 348 | } 349 | 350 | return true; 351 | } 352 | 353 | uint8_t setALUMux(const QPUreg& reg) 354 | { 355 | switch (reg.file) { 356 | case QPUreg::A: return 0x6; 357 | case QPUreg::B: return 0x7; 358 | case QPUreg::ACCUM: 359 | if (reg.num > 6 || reg.num < 0) { 360 | cerr << "Invalid accumulator register; out of range" << endl; 361 | exit(0); 362 | } 363 | return reg.num; 364 | case QPUreg::SMALL: return 0x7; 365 | } 366 | } 367 | 368 | 369 | token_t nextToken(const char *stream, string& out, const char **ptr) 370 | { 371 | char buffer[128]; 372 | int i = 0; 373 | 374 | *ptr = stream; 375 | if (!stream || !*stream) 376 | return END; 377 | 378 | while (*stream == ' ' || *stream == '\t') 379 | stream++; 380 | 381 | if (isdigit(*stream)) 382 | { 383 | // read until we don't find a hex digit, x (for hex) or . 384 | while (isxdigit(*stream) || isdigit(*stream) || *stream == '.' || *stream == 'x') { 385 | buffer[i++] = *stream++; 386 | if (*stream == 0 || i > sizeof(buffer) - 1) 387 | break; 388 | } 389 | buffer[i++] = '\0'; 390 | out = buffer; 391 | *ptr = stream; 392 | 393 | return WORD; 394 | } 395 | 396 | 397 | if (*stream == '.') { *ptr = stream+1; return DOT; } 398 | if (*stream == ',') { *ptr = stream+1; return COMMA; } 399 | if (*stream == ';') { *ptr = stream+1; return SEMI; } 400 | if (*stream == '#') { *ptr = stream+1; return END; } 401 | if (*stream == ':') { *ptr = stream+1; return COLON; } 402 | 403 | while (*stream != '.' && *stream != ',' && *stream != ';' 404 | && *stream != ' ' && *stream != '\t' 405 | && *stream != ':') 406 | { 407 | buffer[i++] = *stream++; 408 | if (*stream == 0 || i > sizeof(buffer)-1) 409 | break; 410 | } 411 | 412 | buffer[i++] = '\0'; 413 | out = buffer; 414 | *ptr = stream; 415 | return WORD; 416 | } 417 | 418 | 419 | bool aluHelper(const char *stream, QPUreg& dest, QPUreg& r1, QPUreg& r2, uint8_t& sig, uint32_t& unpack, uint32_t& pm, uint32_t& pack, const char **ptr) 420 | { 421 | string token_str; 422 | token_t tok = nextToken(stream, token_str, &stream); 423 | 424 | if (tok == DOT) { 425 | // conditional 426 | nextToken(stream, token_str, &stream); 427 | cout << "flag/conditional = " << token_str << endl; 428 | if (token_str == "ldtmu0") { 429 | sig = 10; 430 | } else if (token_str == "ldtmu1") { 431 | sig = 11; 432 | } else if (token_str == "tend") { 433 | sig = 3; 434 | } else if (parsePacking(token_str, &unpack, &pm, &pack)) { 435 | // Do nothing, the parse function has filled in the values 436 | } else { 437 | cout << "Conditional couldn't be understood: " << token_str << endl; 438 | return false; 439 | } 440 | tok = nextToken(stream, token_str, &stream); 441 | } 442 | 443 | // this is supposed to be the destination register 444 | if (tok != WORD) { 445 | cout << "Expecting word. Got: " << token_str << endl; 446 | return false; 447 | } 448 | 449 | if (!parseRegister(token_str, dest)) { 450 | return false; 451 | } 452 | tok = nextToken(stream, token_str, &stream); 453 | if (tok != COMMA) return false; 454 | tok = nextToken(stream, token_str, &stream); 455 | if (!parseRegister(token_str, r1)) { 456 | return false; 457 | } 458 | 459 | tok = nextToken(stream, token_str, &stream); 460 | if (tok != COMMA) return false; 461 | tok = nextToken(stream, token_str, &stream); 462 | if (!parseRegister(token_str, r2)) { 463 | r2.file = QPUreg::SMALL; 464 | int32_t imm = parseSmallImmediate(token_str); 465 | if (imm < 0) { 466 | return false; 467 | } 468 | r2.num = imm; 469 | } 470 | 471 | /* 472 | cout << "dest: " << printRegister(dest) << ", r1: " 473 | << printRegister(r1) << ", r2: " 474 | << printRegister(r2) << endl; 475 | */ 476 | 477 | *ptr = stream; 478 | return true; 479 | } 480 | 481 | 482 | uint64_t assembleALU(context& ctx, string word) 483 | { 484 | string token_str; 485 | uint8_t add_op = addOpCode(word); 486 | if (add_op == 0xFF) { 487 | cout << "FATAL (assert). Bad ADD opcode: " << word << endl; 488 | return -1; 489 | } 490 | 491 | uint32_t unpack = 0; 492 | uint32_t pm = 0; 493 | uint32_t pack = 0; 494 | 495 | QPUreg addDest, addR1, addR2; 496 | QPUreg mulDest, mulR1, mulR2; 497 | 498 | uint8_t sig = 0x1; // no-signal (TODO: plumb signals through) 499 | if (!aluHelper(ctx.stream, addDest, addR1, addR2, sig, unpack, pm, pack, &ctx.stream)) 500 | return -1; 501 | 502 | token_t tok = nextToken(ctx.stream, token_str, &ctx.stream); 503 | // this should be a semi-colon 504 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 505 | uint8_t mul_op = mulOpCode(token_str); 506 | if (mul_op == 0xFF) { 507 | cout << "FATAL (assert). Bad MUL opcode: " << token_str << endl; 508 | return -1; 509 | } 510 | 511 | bool skipParseMul(false); 512 | if (mul_op == 0) { 513 | // nop. If the next token is a semi or END, we'll generate 514 | // the registers for them 515 | const char *discard; 516 | tok = nextToken(ctx.stream, token_str, &discard); 517 | if (tok == END || tok == SEMI) { 518 | mulDest.num = 39; 519 | mulDest.file = (addDest.file == QPUreg::A) ? QPUreg::B : QPUreg::A; 520 | mulR1 = addR1; 521 | mulR2 = addR2; 522 | skipParseMul = true; 523 | } 524 | } 525 | 526 | if (!skipParseMul) { 527 | uint8_t junk; 528 | uint32_t junk32; 529 | if (!aluHelper(ctx.stream, mulDest, mulR1, mulR2, junk, junk32, junk32, junk32, &ctx.stream)) 530 | return -1; 531 | } 532 | 533 | uint64_t ins = 0x0; 534 | uint8_t cond_add = 0x1; 535 | uint8_t cond_mul = 0x1; 536 | uint8_t sf = 0x1; 537 | if (add_op == 0) 538 | sf = 0x0; // no set flags on nop 539 | 540 | // TODO: constraints. We can only read from file A and file B once (dual-port) 541 | 542 | uint8_t ws = 0x0; 543 | // If the add pipe specifies file b for output, ws = 1 544 | if ((addDest.file == QPUreg::B) || 545 | ((addDest.file == QPUreg::ACCUM) && (mulDest.file == QPUreg::A))) { 546 | ws = 0x1; 547 | } 548 | // if ws == 1, mul pipe must specify file a or accumulator for output 549 | if (ws == 0x1 && (mulDest.file != QPUreg::A) && (mulDest.file != QPUreg::ACCUM)) { 550 | cout << "constraint check failed. mul pipe must specify register file A when write-swap set, but found " << printRegister(mulDest) << endl; 551 | return -1; 552 | } 553 | // if ws == 0, mul pipe must specify file b or accumulator for output 554 | if (ws == 0x0 && (mulDest.file != QPUreg::B) && (mulDest.file != QPUreg::ACCUM)) { 555 | cout << "constraint check failed. mul pipe must specify register file B when write-swap clear, but found " << printRegister(mulDest) << endl; 556 | return -1; 557 | } 558 | 559 | // TODO: handle the accumulators and the small immediate 560 | uint8_t read_a = 0x0; 561 | uint8_t read_b = 0x0; 562 | bool isReadASet = false; 563 | bool isReadBSet = false; 564 | QPUreg candidates[] = {addR1, addR2, mulR1, mulR2}; 565 | for (int index = 0; index < (sizeof(candidates)/sizeof(candidates[0])); index += 1) { 566 | QPUreg reg = candidates[index]; 567 | if (reg.file == QPUreg::A) { 568 | if (isReadASet && (read_a != reg.num)) { 569 | fprintf(stderr, "Error: Can't set multiple different general registers as sources in a single ALU instruction\n"); 570 | return -1; 571 | } 572 | isReadASet = true; 573 | read_a = reg.num; 574 | } 575 | if (reg.file == QPUreg::B) { 576 | if (isReadBSet && (read_b != reg.num)) { 577 | fprintf(stderr, "Error: Can't set multiple different general registers as sources in a single ALU instruction\n"); 578 | return -1; 579 | } 580 | isReadBSet = true; 581 | read_b = reg.num; 582 | } 583 | } 584 | 585 | // checks: 586 | // read_a not set and one of the muxes specifies file A ... 587 | // same for read_b 588 | // read_b set and there is a small immediate value 589 | 590 | // we could have immediates in the first register slot but not sure it makes sense 591 | // As above, we should check that read_b is not already set 592 | if (addR2.file == QPUreg::SMALL) { 593 | if (isReadBSet && (read_b != addR2.num)) { 594 | fprintf(stderr, "Error: Can't set an immediate and general registers as sources in a single ALU instruction\n"); 595 | return -1; 596 | } 597 | isReadBSet = true; 598 | read_b = addR2.num; 599 | sig = 13; 600 | } 601 | if (mulR2.file == QPUreg::SMALL) { 602 | if (isReadBSet && (read_b != mulR2.num)) { 603 | fprintf(stderr, "Error: Can't set an immediate and general registers as sources in a single ALU instruction\n"); 604 | return -1; 605 | } 606 | isReadBSet = true; 607 | read_b = mulR2.num; 608 | sig = 13; 609 | } 610 | 611 | // The accumulators are mapped to r32-35 when writing to them as destinations 612 | if (addDest.file == QPUreg::ACCUM) { 613 | addDest.num += 32; 614 | } 615 | if (mulDest.file == QPUreg::ACCUM) { 616 | mulDest.num += 32; 617 | } 618 | 619 | uint8_t add_a = setALUMux(addR1) & 0x7; 620 | uint8_t add_b = setALUMux(addR2) & 0x7; 621 | uint8_t mul_a = setALUMux(mulR1) & 0x7; 622 | uint8_t mul_b = setALUMux(mulR2) & 0x7; 623 | read_a &= 0x3f; 624 | read_b &= 0x3f; 625 | mul_op &= 0x7; 626 | add_op &= 0x1f; 627 | addDest.num &= 0x3f; 628 | mulDest.num &= 0x3f; 629 | cond_add &= 0x7; 630 | cond_mul &= 0x7; 631 | sf &= 0x1; 632 | ws &= 0x1; 633 | 634 | // printf("Assembling ALU instruction: %s, %d, %d\n", printRegister(addDest).c_str(), ws, sig); 635 | 636 | printf("ALU: %s %s, %s, %s; %s %s, %s, %s\n", 637 | printAddOpCode(add_op).c_str(), 638 | printRegister(addDest).c_str(), 639 | printRegister(addR1).c_str(), 640 | printRegister(addR2).c_str(), 641 | printMulOpCode(mul_op).c_str(), 642 | printRegister(mulDest).c_str(), 643 | printRegister(mulR1).c_str(), 644 | printRegister(mulR2).c_str() 645 | ); 646 | 647 | ins = ((uint64_t)sig << 60) | 648 | ((uint64_t)unpack << 57) | 649 | ((uint64_t)pm << 56) | 650 | ((uint64_t)pack << 52) | 651 | ((uint64_t)cond_add << 49) | 652 | ((uint64_t)cond_mul << 46) | 653 | ((uint64_t)sf << 45) | 654 | ((uint64_t)ws << 44); 655 | ins |= ((uint64_t)addDest.num << 38) | ((uint64_t)mulDest.num << 32) | ((uint64_t)mul_op << 29) | ((uint64_t)add_op << 24); 656 | ins |= ((uint64_t)read_a << 18) | ((uint64_t)read_b << 12) | ((uint64_t)add_a << 9) | ((uint64_t)add_b << 6) | ((uint64_t)mul_a << 3) | mul_b; 657 | 658 | return ins; 659 | } 660 | 661 | uint64_t assembleLDI(context& ctx, string word) 662 | { 663 | cout << "Assembling LDI instruction ... " << endl; 664 | 665 | string token_str; 666 | token_t tok = nextToken(ctx.stream, token_str, &ctx.stream); 667 | 668 | if (tok == DOT) { 669 | // conditional ... conditionals should be on each register ? 670 | cout << "conditional ... "; 671 | // chew the conditional 672 | nextToken(ctx.stream, token_str, &ctx.stream); 673 | 674 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 675 | } 676 | 677 | // this is supposed to be the register 678 | if (tok != WORD) return -1; 679 | 680 | QPUreg register1, register2; 681 | // check errors here 682 | if (!parseRegister(token_str, register1)) { 683 | return false; 684 | } 685 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 686 | if (tok != COMMA) return -1; 687 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 688 | 689 | // this can either be another register 690 | // (in which case we'll use both ALUs to set) 691 | // or an immediate value (in which case we'll use rX39) 692 | register2.num = 39; 693 | register2.file = (register1.file == QPUreg::A) ? QPUreg::B : QPUreg::A; 694 | if (isRegisterWord(token_str)) { 695 | if (!parseRegister(token_str, register2)) { 696 | return -1; 697 | } 698 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 699 | // check that this is a comma ... 700 | } 701 | 702 | uint32_t immediateType = 0x00; // A full 32-bit immediate 703 | unsigned int immediate; 704 | string restOfLine(ctx.stream); 705 | restOfLine = (token_str + restOfLine); 706 | if (!parseFullImmediate(restOfLine, &immediate, &immediateType)) { 707 | cerr << "Immediate couldn't be parsed: " << restOfLine << endl; 708 | return -1; 709 | } 710 | 711 | cout << "r1: " << printRegister(register1) << ", r2: " 712 | << printRegister(register2) << ", immed: 0x" 713 | << hex << immediate << dec << endl; 714 | 715 | // The accumulators are mapped to r32-35 in this context 716 | if (register1.file == QPUreg::ACCUM) { 717 | register1.num += 32; 718 | } 719 | if (register2.file == QPUreg::ACCUM) { 720 | register2.num += 32; 721 | } 722 | 723 | uint32_t high = (uint32_t)0xE << 28; 724 | high |= immediateType << 24; 725 | high |= (uint32_t)0x1 << 17; // cond_add 726 | high |= (uint32_t)0x1 << 14; // cond_mul 727 | high |= (uint32_t)0x0 << 13; // sf 728 | high |= (uint32_t)0x0 << 12; // ws 729 | uint8_t addreg = (register1.file != QPUreg::B) ? register1.num : register2.num; 730 | uint8_t mulreg = (register1.file == QPUreg::B) ? register1.num : register2.num; 731 | high |= (uint32_t)addreg << 6; 732 | high |= mulreg; 733 | uint64_t ins = ((uint64_t)high << 32) | immediate; 734 | 735 | return ins; 736 | } 737 | 738 | uint64_t assembleBRANCH(context& ctx, string word) 739 | { 740 | cout << "Assembing BRANCH instruction" << endl; 741 | 742 | QPUreg dest; 743 | string token_str; 744 | token_t tok = nextToken(ctx.stream, token_str, &ctx.stream); 745 | 746 | // relative or absolute branch? 747 | uint8_t relative = 1; 748 | if (word == "bra") 749 | relative = 0; 750 | 751 | uint8_t branchCondition = 0xf; // by default: always (unconditional branch) 752 | if (tok == DOT) { 753 | // conditional 754 | nextToken(ctx.stream, token_str, &ctx.stream); 755 | branchCondition = parseBranchCond(token_str); 756 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 757 | } 758 | 759 | // this is the destination register 760 | if (tok != WORD) { 761 | cerr << "branch expecting destination register." << endl; 762 | return -1; 763 | } 764 | if (!parseRegister(token_str, dest)) { 765 | return false; 766 | } 767 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 768 | if (tok != COMMA) return false; 769 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 770 | if (tok != WORD) { 771 | cerr << "branch expecting label/target" << endl; 772 | return -1; 773 | } 774 | 775 | // look it up in the labels map 776 | int target = 0xFFFFFFFF; 777 | if (ctx.labels.count(token_str) < 1) { 778 | relocation r; 779 | r.label = token_str; 780 | r.pc = ctx.pc; 781 | ctx.relocations.push_back(r); 782 | } else 783 | target = ctx.labels[token_str]; 784 | int offset = target - (ctx.pc+4*8); 785 | 786 | uint8_t raddr_a = 0; // raddr_a is only 5-bits? 787 | uint8_t use_reg = 0; 788 | // if there's a third argument, it is a register offset 789 | const char *discard; 790 | tok = nextToken(ctx.stream, token_str, &discard); 791 | if (tok == COMMA) { 792 | QPUreg offsetReg; 793 | // chew the comma we just read 794 | ctx.stream = discard; 795 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 796 | if (!parseRegister(token_str, offsetReg)) { 797 | return -1; 798 | } 799 | if (offsetReg.file != QPUreg::A) { 800 | cerr << "branch target offset register must be file A" << endl; 801 | return -1; 802 | } 803 | if (offsetReg.num > 31) { 804 | cerr << "branch target offset register must be < 32" << endl; 805 | return -1; 806 | } 807 | raddr_a = offsetReg.num; 808 | use_reg = 1; 809 | } 810 | 811 | uint8_t waddr_add = 39; // link address appears at ALU outputs 812 | uint8_t waddr_mul = 39; 813 | if (dest.file == QPUreg::A) waddr_add = dest.num; 814 | if (dest.file == QPUreg::B) waddr_mul = dest.num; 815 | 816 | // TODO: generate absolute branches too 817 | 818 | uint64_t ins = (uint64_t)0xF << 60; 819 | ins |= (uint64_t)branchCondition << 52; 820 | ins |= (uint64_t)relative << 51; 821 | ins |= (uint64_t)use_reg << 50; 822 | ins |= (uint64_t)raddr_a << 45; 823 | ins |= (uint64_t)0x0 << 44; // write-swap 824 | ins |= (uint64_t)waddr_add << 38; 825 | ins |= (uint64_t)waddr_mul << 32; 826 | ins |= (uint32_t)offset; 827 | 828 | return ins; 829 | } 830 | 831 | uint64_t assembleSEMA(context& ctx, string word) 832 | { 833 | 834 | uint64_t ins = (uint64_t)0x74 << 57; 835 | 836 | string token_str; 837 | token_t tok = nextToken(ctx.stream, token_str, &ctx.stream); 838 | if (tok != WORD) { 839 | cerr << "semaphore instruction expecting down/up or acquire/release" << endl; 840 | return -1; 841 | } 842 | 843 | uint8_t sa = 0; // up 844 | if (token_str == "down" || token_str == "acquire") 845 | sa = 1; 846 | 847 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 848 | if (tok != COMMA) return -1; 849 | tok = nextToken(ctx.stream, token_str, &ctx.stream); 850 | uint32_t imm = parseSmallImmediate(token_str); 851 | if (imm < 0) { 852 | cerr << "semaphore out of range" << endl; 853 | return -1; 854 | } 855 | // cond_add, cond_mul = NEVER, ws, sf = false 856 | ins |= (uint64_t)39 << 38; // waddr_add 857 | ins |= (uint64_t)39 << 32; // waddr_mul 858 | ins |= sa << 4; 859 | ins |= (uint8_t)imm; 860 | 861 | cout << "Assembling SEMAPHORE instruction (" << imm << "), " << (int)sa << endl; 862 | 863 | return ins; 864 | } 865 | 866 | 867 | int main(int argc, char **argv) 868 | { 869 | char *outfname = 0; 870 | int c; 871 | 872 | char* writeCPP = NULL; 873 | while ((c = getopt(argc, argv, "o:c:")) != -1) { 874 | switch (c) { 875 | case 'o': 876 | outfname = optarg; 877 | break; 878 | case 'c': 879 | writeCPP = optarg; 880 | break; 881 | } 882 | } 883 | 884 | if (!outfname) { 885 | cerr << "Usage: " << argv[0] << " -o " << endl; 886 | return -1; 887 | } 888 | 889 | char line[128]; 890 | string token_string; 891 | 892 | struct context ctx; 893 | ctx.pc = 0; 894 | 895 | vector instructions; 896 | 897 | while (cin.getline(line, 128)) 898 | { 899 | const char *p = line; 900 | ctx.stream = p; 901 | token_t tok = nextToken(ctx.stream, token_string, &ctx.stream); 902 | 903 | if (tok == END) 904 | continue; 905 | 906 | if (tok == WORD) 907 | { 908 | // read-ahead to see if the next token is a colon in which case 909 | // this is a label. 910 | const char *discard = NULL; 911 | string nextTokenStr; 912 | if (nextToken(ctx.stream, nextTokenStr, &discard) == COLON) { 913 | ctx.labels[token_string] = ctx.pc; 914 | continue; 915 | } 916 | 917 | enum { INVALID, ALU, BRANCH, LDI, SEMA } opType = INVALID; 918 | if (addOpCode(token_string) != 0xFF || mulOpCode(token_string) != 0xFF) 919 | opType = ALU; 920 | if (token_string == "ldi") opType = LDI; 921 | if (token_string == "bra" || token_string == "brr") opType = BRANCH; 922 | if (token_string == "sema") opType = SEMA; 923 | 924 | if (opType == INVALID) { 925 | cout << "Unable to assemble line; invalid opcode: " << line << endl; 926 | return -1; 927 | } 928 | 929 | uint64_t ins = 0; 930 | switch (opType) { 931 | case ALU: ins = assembleALU(ctx, token_string); break; 932 | case BRANCH: ins = assembleBRANCH(ctx, token_string); break; 933 | case LDI: ins = assembleLDI(ctx, token_string); break; 934 | case SEMA: ins = assembleSEMA(ctx, token_string); break; 935 | } 936 | 937 | if (ins == (uint64_t)-1) { 938 | cerr << "Error on line: " << line << endl; 939 | return -1; 940 | } 941 | 942 | instructions.push_back(ins); 943 | ctx.pc += 8; // bytes; 944 | } 945 | } 946 | 947 | // Process relocations 948 | ctx.labels["ZERO"] = 0x0; 949 | for (int i=0; i < ctx.relocations.size(); i++) 950 | { 951 | relocation& r = ctx.relocations[i]; 952 | if (ctx.labels.count(r.label) < 1) 953 | { 954 | cerr << "undefined label: " << r.label << endl; 955 | return -1; 956 | } 957 | int offset = ctx.labels[r.label] - (r.pc + 4*8); 958 | if (r.label == "ZERO") 959 | offset = 0x0; 960 | cout << "Processing relocation at " << r.pc << " : " << r.label 961 | << " : " << offset << endl; 962 | uint64_t ins = instructions[r.pc / 8]; 963 | ins &= (uint64_t)0xFFFFFFFF << 32; // zero bottom 32-bits for new value 964 | ins |= (uint32_t)offset; 965 | instructions[r.pc / 8] = ins; 966 | } 967 | 968 | FILE *outfile = fopen(outfname, "w"); 969 | if (!outfile) 970 | { 971 | cerr << "Unable to open output file " << string(outfname) << endl; 972 | return -1; 973 | } 974 | 975 | if (writeCPP) { 976 | fprintf(outfile, "#include \n"); 977 | fprintf(outfile, "#include \n\n"); 978 | fprintf(outfile, "uint32_t %s[%d] = {\n", writeCPP, (instructions.size() * 2)); 979 | uint32_t* instructionsData = (uint32_t*)(&instructions[0]); 980 | for (int i=0; i < instructions.size(); i++) { 981 | fprintf(outfile, " 0x%08x, 0x%08x,\n", instructionsData[(i * 2) + 0], instructionsData[(i * 2) + 1]); 982 | } 983 | fprintf(outfile, "};\n\n"); 984 | fprintf(outfile, "size_t %sByteCount = %d;\n", writeCPP, (instructions.size() * 8)); 985 | } else { 986 | for (int i=0; i < instructions.size(); i++) 987 | fwrite(&instructions[i], sizeof(uint64_t), 1, outfile); 988 | } 989 | 990 | fclose(outfile); 991 | cout << "Done. Num instructions: " << instructions.size() << ", " 992 | << instructions.size() * 8 << " bytes." << endl; 993 | } 994 | --------------------------------------------------------------------------------