├── LICENSE ├── README ├── arm.csv ├── armasm ├── Makefile ├── decode.go ├── decode_test.go ├── ext_test.go ├── gnu.go ├── inst.go ├── objdump_test.go ├── objdumpext_test.go ├── plan9x.go ├── tables.go └── testdata │ ├── Makefile │ └── decode.txt ├── armmap └── map.go ├── armspec ├── code.go ├── pseudo.go ├── pseudo.y ├── spec.go ├── specmap.go └── y.go └── codereview.cfg /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | go get rsc.io/arm 2 | 3 | http://godoc.org/rsc.io/arm 4 | -------------------------------------------------------------------------------- /armasm/Makefile: -------------------------------------------------------------------------------- 1 | tables.go: ../armmap/map.go ../arm.csv 2 | go run ../armmap/map.go -fmt=decoder ../arm.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go 3 | -------------------------------------------------------------------------------- /armasm/decode.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "encoding/binary" 9 | "fmt" 10 | ) 11 | 12 | // An instFormat describes the format of an instruction encoding. 13 | // An instruction with 32-bit value x matches the format if x&mask == value 14 | // and the condition matches. 15 | // The condition matches if x>>28 == 0xF && value>>28==0xF 16 | // or if x>>28 != 0xF and value>>28 == 0. 17 | // If x matches the format, then the rest of the fields describe how to interpret x. 18 | // The opBits describe bits that should be extracted from x and added to the opcode. 19 | // For example opBits = 0x1234 means that the value 20 | // (2 bits at offset 1) followed by (4 bits at offset 3) 21 | // should be added to op. 22 | // Finally the args describe how to decode the instruction arguments. 23 | // args is stored as a fixed-size array; if there are fewer than len(args) arguments, 24 | // args[i] == 0 marks the end of the argument list. 25 | type instFormat struct { 26 | mask uint32 27 | value uint32 28 | priority int8 29 | op Op 30 | opBits uint64 31 | args instArgs 32 | } 33 | 34 | type instArgs [4]instArg 35 | 36 | var ( 37 | errMode = fmt.Errorf("unsupported execution mode") 38 | errShort = fmt.Errorf("truncated instruction") 39 | errUnknown = fmt.Errorf("unknown instruction") 40 | ) 41 | 42 | var decoderCover []bool 43 | 44 | // Decode decodes the leading bytes in src as a single instruction. 45 | func Decode(src []byte, mode Mode) (inst Inst, err error) { 46 | if mode != ModeARM { 47 | return Inst{}, errMode 48 | } 49 | if len(src) < 4 { 50 | return Inst{}, errShort 51 | } 52 | 53 | if decoderCover == nil { 54 | decoderCover = make([]bool, len(instFormats)) 55 | } 56 | 57 | x := binary.LittleEndian.Uint32(src) 58 | 59 | // The instFormat table contains both conditional and unconditional instructions. 60 | // Considering only the top 4 bits, the conditional instructions use mask=0, value=0, 61 | // while the unconditional instructions use mask=f, value=f. 62 | // Prepare a version of x with the condition cleared to 0 in conditional instructions 63 | // and then assume mask=f during matching. 64 | const condMask = 0xf0000000 65 | xNoCond := x 66 | if x&condMask != condMask { 67 | xNoCond &^= condMask 68 | } 69 | var priority int8 70 | Search: 71 | for i := range instFormats { 72 | f := &instFormats[i] 73 | if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority { 74 | continue 75 | } 76 | delta := uint32(0) 77 | deltaShift := uint(0) 78 | for opBits := f.opBits; opBits != 0; opBits >>= 16 { 79 | n := uint(opBits & 0xFF) 80 | off := uint((opBits >> 8) & 0xFF) 81 | delta |= (x >> off) & (1<> 8) & (1<<4 - 1)) 212 | case arg_R_12: 213 | return Reg((x >> 12) & (1<<4 - 1)) 214 | case arg_R_16: 215 | return Reg((x >> 16) & (1<<4 - 1)) 216 | 217 | case arg_R_12_nzcv: 218 | r := Reg((x >> 12) & (1<<4 - 1)) 219 | if r == R15 { 220 | return APSR_nzcv 221 | } 222 | return r 223 | 224 | case arg_R_16_WB: 225 | mode := AddrLDM 226 | if (x>>21)&1 != 0 { 227 | mode = AddrLDM_WB 228 | } 229 | return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode} 230 | 231 | case arg_R_rotate: 232 | Rm := Reg(x & (1<<4 - 1)) 233 | typ, count := decodeShift(x) 234 | // ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1. 235 | if typ == RotateRightExt { 236 | return Reg(Rm) 237 | } 238 | return RegShift{Rm, typ, uint8(count)} 239 | 240 | case arg_R_shift_R: 241 | Rm := Reg(x & (1<<4 - 1)) 242 | Rs := Reg((x >> 8) & (1<<4 - 1)) 243 | typ := Shift((x >> 5) & (1<<2 - 1)) 244 | return RegShiftReg{Rm, typ, Rs} 245 | 246 | case arg_R_shift_imm: 247 | Rm := Reg(x & (1<<4 - 1)) 248 | typ, count := decodeShift(x) 249 | if typ == ShiftLeft && count == 0 { 250 | return Reg(Rm) 251 | } 252 | return RegShift{Rm, typ, uint8(count)} 253 | 254 | case arg_R1_0: 255 | return Reg((x & (1<<4 - 1))) 256 | case arg_R1_12: 257 | return Reg(((x >> 12) & (1<<4 - 1))) 258 | case arg_R2_0: 259 | return Reg((x & (1<<4 - 1)) | 1) 260 | case arg_R2_12: 261 | return Reg(((x >> 12) & (1<<4 - 1)) | 1) 262 | 263 | case arg_SP: 264 | return SP 265 | 266 | case arg_Sd_Dd: 267 | v := (x >> 12) & (1<<4 - 1) 268 | vx := (x >> 22) & 1 269 | sz := (x >> 8) & 1 270 | if sz != 0 { 271 | return D0 + Reg(vx<<4+v) 272 | } else { 273 | return S0 + Reg(v<<1+vx) 274 | } 275 | 276 | case arg_Dd_Sd: 277 | return decodeArg(arg_Sd_Dd, x^(1<<8)) 278 | 279 | case arg_Sd: 280 | v := (x >> 12) & (1<<4 - 1) 281 | vx := (x >> 22) & 1 282 | return S0 + Reg(v<<1+vx) 283 | 284 | case arg_Sm_Dm: 285 | v := (x >> 0) & (1<<4 - 1) 286 | vx := (x >> 5) & 1 287 | sz := (x >> 8) & 1 288 | if sz != 0 { 289 | return D0 + Reg(vx<<4+v) 290 | } else { 291 | return S0 + Reg(v<<1+vx) 292 | } 293 | 294 | case arg_Sm: 295 | v := (x >> 0) & (1<<4 - 1) 296 | vx := (x >> 5) & 1 297 | return S0 + Reg(v<<1+vx) 298 | 299 | case arg_Dn_half: 300 | v := (x >> 16) & (1<<4 - 1) 301 | vx := (x >> 7) & 1 302 | return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)} 303 | 304 | case arg_Sn_Dn: 305 | v := (x >> 16) & (1<<4 - 1) 306 | vx := (x >> 7) & 1 307 | sz := (x >> 8) & 1 308 | if sz != 0 { 309 | return D0 + Reg(vx<<4+v) 310 | } else { 311 | return S0 + Reg(v<<1+vx) 312 | } 313 | 314 | case arg_Sn: 315 | v := (x >> 16) & (1<<4 - 1) 316 | vx := (x >> 7) & 1 317 | return S0 + Reg(v<<1+vx) 318 | 319 | case arg_const: 320 | v := x & (1<<8 - 1) 321 | rot := (x >> 8) & (1<<4 - 1) * 2 322 | if rot > 0 && v&3 == 0 { 323 | // could rotate less 324 | return ImmAlt{uint8(v), uint8(rot)} 325 | } 326 | if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v { 327 | // could wrap around to rot==0. 328 | return ImmAlt{uint8(v), uint8(rot)} 329 | } 330 | return Imm(v>>rot | v<<(32-rot)) 331 | 332 | case arg_endian: 333 | return Endian((x >> 9) & 1) 334 | 335 | case arg_fbits: 336 | return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1)) 337 | 338 | case arg_fp_0: 339 | return Imm(0) 340 | 341 | case arg_imm24: 342 | return Imm(x & (1<<24 - 1)) 343 | 344 | case arg_imm5: 345 | return Imm((x >> 7) & (1<<5 - 1)) 346 | 347 | case arg_imm5_32: 348 | x = (x >> 7) & (1<<5 - 1) 349 | if x == 0 { 350 | x = 32 351 | } 352 | return Imm(x) 353 | 354 | case arg_imm5_nz: 355 | x = (x >> 7) & (1<<5 - 1) 356 | if x == 0 { 357 | return nil 358 | } 359 | return Imm(x) 360 | 361 | case arg_imm_4at16_12at0: 362 | return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1)) 363 | 364 | case arg_imm_12at8_4at0: 365 | return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1)) 366 | 367 | case arg_imm_vfp: 368 | x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1) 369 | return Imm(x) 370 | 371 | case arg_label24: 372 | imm := (x & (1<<24 - 1)) << 2 373 | return PCRel(int32(imm<<6) >> 6) 374 | 375 | case arg_label24H: 376 | h := (x >> 24) & 1 377 | imm := (x&(1<<24-1))<<2 | h<<1 378 | return PCRel(int32(imm<<6) >> 6) 379 | 380 | case arg_label_m_12: 381 | d := int32(x & (1<<12 - 1)) 382 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)} 383 | 384 | case arg_label_p_12: 385 | d := int32(x & (1<<12 - 1)) 386 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} 387 | 388 | case arg_label_pm_12: 389 | d := int32(x & (1<<12 - 1)) 390 | u := (x >> 23) & 1 391 | if u == 0 { 392 | d = -d 393 | } 394 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} 395 | 396 | case arg_label_pm_4_4: 397 | d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) 398 | u := (x >> 23) & 1 399 | if u == 0 { 400 | d = -d 401 | } 402 | return PCRel(d) 403 | 404 | case arg_lsb_width: 405 | lsb := (x >> 7) & (1<<5 - 1) 406 | msb := (x >> 16) & (1<<5 - 1) 407 | if msb < lsb || msb >= 32 { 408 | return nil 409 | } 410 | return Imm(msb + 1 - lsb) 411 | 412 | case arg_mem_R: 413 | Rn := Reg((x >> 16) & (1<<4 - 1)) 414 | return Mem{Base: Rn, Mode: AddrOffset} 415 | 416 | case arg_mem_R_pm_R_postindex: 417 | // Treat [],+/- like [,+/-{,}]{!} 418 | // by forcing shift bits to <<0 and P=0, W=0 (postindex=true). 419 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21)) 420 | 421 | case arg_mem_R_pm_R_W: 422 | // Treat [,+/-]{!} like [,+/-{,}]{!} 423 | // by forcing shift bits to <<0. 424 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5)) 425 | 426 | case arg_mem_R_pm_R_shift_imm_offset: 427 | // Treat [],+/-{,} like [,+/-{,}]{!} 428 | // by forcing P=1, W=0 (index=false, wback=false). 429 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24) 430 | 431 | case arg_mem_R_pm_R_shift_imm_postindex: 432 | // Treat [],+/-{,} like [,+/-{,}]{!} 433 | // by forcing P=0, W=0 (postindex=true). 434 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21)) 435 | 436 | case arg_mem_R_pm_R_shift_imm_W: 437 | Rn := Reg((x >> 16) & (1<<4 - 1)) 438 | Rm := Reg(x & (1<<4 - 1)) 439 | typ, count := decodeShift(x) 440 | u := (x >> 23) & 1 441 | w := (x >> 21) & 1 442 | p := (x >> 24) & 1 443 | if p == 0 && w == 1 { 444 | return nil 445 | } 446 | sign := int8(+1) 447 | if u == 0 { 448 | sign = -1 449 | } 450 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 451 | return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count} 452 | 453 | case arg_mem_R_pm_imm12_offset: 454 | // Treat [,#+/-] like [{,#+/-}]{!} 455 | // by forcing P=1, W=0 (index=false, wback=false). 456 | return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24) 457 | 458 | case arg_mem_R_pm_imm12_postindex: 459 | // Treat [],#+/- like [{,#+/-}]{!} 460 | // by forcing P=0, W=0 (postindex=true). 461 | return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21)) 462 | 463 | case arg_mem_R_pm_imm12_W: 464 | Rn := Reg((x >> 16) & (1<<4 - 1)) 465 | u := (x >> 23) & 1 466 | w := (x >> 21) & 1 467 | p := (x >> 24) & 1 468 | if p == 0 && w == 1 { 469 | return nil 470 | } 471 | sign := int8(+1) 472 | if u == 0 { 473 | sign = -1 474 | } 475 | imm := int16(x & (1<<12 - 1)) 476 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 477 | return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} 478 | 479 | case arg_mem_R_pm_imm8_postindex: 480 | // Treat [],#+/- like [{,#+/-}]{!} 481 | // by forcing P=0, W=0 (postindex=true). 482 | return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21)) 483 | 484 | case arg_mem_R_pm_imm8_W: 485 | Rn := Reg((x >> 16) & (1<<4 - 1)) 486 | u := (x >> 23) & 1 487 | w := (x >> 21) & 1 488 | p := (x >> 24) & 1 489 | if p == 0 && w == 1 { 490 | return nil 491 | } 492 | sign := int8(+1) 493 | if u == 0 { 494 | sign = -1 495 | } 496 | imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) 497 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 498 | return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} 499 | 500 | case arg_mem_R_pm_imm8at0_offset: 501 | Rn := Reg((x >> 16) & (1<<4 - 1)) 502 | u := (x >> 23) & 1 503 | sign := int8(+1) 504 | if u == 0 { 505 | sign = -1 506 | } 507 | imm := int16(x&(1<<8-1)) << 2 508 | return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm} 509 | 510 | case arg_option: 511 | return Imm(x & (1<<4 - 1)) 512 | 513 | case arg_registers: 514 | return RegList(x & (1<<16 - 1)) 515 | 516 | case arg_registers2: 517 | x &= 1<<16 - 1 518 | n := 0 519 | for i := 0; i < 16; i++ { 520 | if x>>uint(i)&1 != 0 { 521 | n++ 522 | } 523 | } 524 | if n < 2 { 525 | return nil 526 | } 527 | return RegList(x) 528 | 529 | case arg_registers1: 530 | Rt := (x >> 12) & (1<<4 - 1) 531 | return RegList(1 << Rt) 532 | 533 | case arg_satimm4: 534 | return Imm((x >> 16) & (1<<4 - 1)) 535 | 536 | case arg_satimm5: 537 | return Imm((x >> 16) & (1<<5 - 1)) 538 | 539 | case arg_satimm4m1: 540 | return Imm((x>>16)&(1<<4-1) + 1) 541 | 542 | case arg_satimm5m1: 543 | return Imm((x>>16)&(1<<5-1) + 1) 544 | 545 | case arg_widthm1: 546 | return Imm((x>>16)&(1<<5-1) + 1) 547 | 548 | } 549 | } 550 | 551 | // decodeShift decodes the shift-by-immediate encoded in x. 552 | func decodeShift(x uint32) (Shift, uint8) { 553 | count := (x >> 7) & (1<<5 - 1) 554 | typ := Shift((x >> 5) & (1<<2 - 1)) 555 | switch typ { 556 | case ShiftRight, ShiftRightSigned: 557 | if count == 0 { 558 | count = 32 559 | } 560 | case RotateRight: 561 | if count == 0 { 562 | typ = RotateRightExt 563 | count = 1 564 | } 565 | } 566 | return typ, uint8(count) 567 | } 568 | -------------------------------------------------------------------------------- /armasm/decode_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "encoding/hex" 9 | "io/ioutil" 10 | "strconv" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | func TestDecode(t *testing.T) { 16 | data, err := ioutil.ReadFile("testdata/decode.txt") 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | all := string(data) 21 | for strings.Contains(all, "\t\t") { 22 | all = strings.Replace(all, "\t\t", "\t", -1) 23 | } 24 | for _, line := range strings.Split(all, "\n") { 25 | line = strings.TrimSpace(line) 26 | if line == "" || strings.HasPrefix(line, "#") { 27 | continue 28 | } 29 | f := strings.SplitN(line, "\t", 4) 30 | i := strings.Index(f[0], "|") 31 | if i < 0 { 32 | t.Errorf("parsing %q: missing | separator", f[0]) 33 | continue 34 | } 35 | if i%2 != 0 { 36 | t.Errorf("parsing %q: misaligned | separator", f[0]) 37 | } 38 | size := i / 2 39 | code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) 40 | if err != nil { 41 | t.Errorf("parsing %q: %v", f[0], err) 42 | continue 43 | } 44 | mode, err := strconv.Atoi(f[1]) 45 | if err != nil { 46 | t.Errorf("invalid mode %q in: %s", f[1], line) 47 | continue 48 | } 49 | syntax, asm := f[2], f[3] 50 | inst, err := Decode(code, Mode(mode)) 51 | var out string 52 | if err != nil { 53 | out = "error: " + err.Error() 54 | } else { 55 | switch syntax { 56 | case "gnu": 57 | out = GNUSyntax(inst) 58 | case "plan9": 59 | out = plan9Syntax(inst, 0, nil, nil) 60 | default: 61 | t.Errorf("unknown syntax %q", syntax) 62 | continue 63 | } 64 | } 65 | if out != asm || inst.Len != size { 66 | t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size) 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /armasm/ext_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Support for testing against external disassembler program. 6 | // Copied and simplified from rsc.io/x86/x86asm/ext_test.go. 7 | 8 | package armasm 9 | 10 | import ( 11 | "bufio" 12 | "bytes" 13 | "encoding/hex" 14 | "flag" 15 | "fmt" 16 | "io/ioutil" 17 | "log" 18 | "math/rand" 19 | "os" 20 | "os/exec" 21 | "regexp" 22 | "runtime" 23 | "strings" 24 | "testing" 25 | "time" 26 | ) 27 | 28 | var ( 29 | printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths") 30 | dumpTest = flag.Bool("dump", false, "dump all encodings") 31 | mismatch = flag.Bool("mismatch", false, "log allowed mismatches") 32 | longTest = flag.Bool("long", false, "long test") 33 | keep = flag.Bool("keep", false, "keep object files around") 34 | debug = false 35 | ) 36 | 37 | // A ExtInst represents a single decoded instruction parsed 38 | // from an external disassembler's output. 39 | type ExtInst struct { 40 | addr uint32 41 | enc [4]byte 42 | nenc int 43 | text string 44 | } 45 | 46 | func (r ExtInst) String() string { 47 | return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) 48 | } 49 | 50 | // An ExtDis is a connection between an external disassembler and a test. 51 | type ExtDis struct { 52 | Arch Mode 53 | Dec chan ExtInst 54 | File *os.File 55 | Size int 56 | KeepFile bool 57 | Cmd *exec.Cmd 58 | } 59 | 60 | // Run runs the given command - the external disassembler - and returns 61 | // a buffered reader of its standard output. 62 | func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { 63 | if *keep { 64 | log.Printf("%s\n", strings.Join(cmd, " ")) 65 | } 66 | ext.Cmd = exec.Command(cmd[0], cmd[1:]...) 67 | out, err := ext.Cmd.StdoutPipe() 68 | if err != nil { 69 | return nil, fmt.Errorf("stdoutpipe: %v", err) 70 | } 71 | if err := ext.Cmd.Start(); err != nil { 72 | return nil, fmt.Errorf("exec: %v", err) 73 | } 74 | 75 | b := bufio.NewReaderSize(out, 1<<20) 76 | return b, nil 77 | } 78 | 79 | // Wait waits for the command started with Run to exit. 80 | func (ext *ExtDis) Wait() error { 81 | return ext.Cmd.Wait() 82 | } 83 | 84 | // testExtDis tests a set of byte sequences against an external disassembler. 85 | // The disassembler is expected to produce the given syntax and be run 86 | // in the given architecture mode (16, 32, or 64-bit). 87 | // The extdis function must start the external disassembler 88 | // and then parse its output, sending the parsed instructions on ext.Dec. 89 | // The generate function calls its argument f once for each byte sequence 90 | // to be tested. The generate function itself will be called twice, and it must 91 | // make the same sequence of calls to f each time. 92 | // When a disassembly does not match the internal decoding, 93 | // allowedMismatch determines whether this mismatch should be 94 | // allowed, or else considered an error. 95 | func testExtDis( 96 | t *testing.T, 97 | syntax string, 98 | arch Mode, 99 | extdis func(ext *ExtDis) error, 100 | generate func(f func([]byte)), 101 | allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool, 102 | ) { 103 | start := time.Now() 104 | ext := &ExtDis{ 105 | Dec: make(chan ExtInst), 106 | Arch: arch, 107 | } 108 | errc := make(chan error) 109 | 110 | // First pass: write instructions to input file for external disassembler. 111 | file, f, size, err := writeInst(generate) 112 | if err != nil { 113 | t.Fatal(err) 114 | } 115 | ext.Size = size 116 | ext.File = f 117 | defer func() { 118 | f.Close() 119 | if !*keep { 120 | os.Remove(file) 121 | } 122 | }() 123 | 124 | // Second pass: compare disassembly against our decodings. 125 | var ( 126 | totalTests = 0 127 | totalSkips = 0 128 | totalErrors = 0 129 | 130 | errors = make([]string, 0, 100) // sampled errors, at most cap 131 | ) 132 | go func() { 133 | errc <- extdis(ext) 134 | }() 135 | generate(func(enc []byte) { 136 | dec, ok := <-ext.Dec 137 | if !ok { 138 | t.Errorf("decoding stream ended early") 139 | return 140 | } 141 | inst, text := disasm(syntax, arch, pad(enc)) 142 | totalTests++ 143 | if *dumpTest { 144 | fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) 145 | } 146 | if text != dec.text || inst.Len != dec.nenc { 147 | suffix := "" 148 | if allowedMismatch(text, size, &inst, dec) { 149 | totalSkips++ 150 | if !*mismatch { 151 | return 152 | } 153 | suffix += " (allowed mismatch)" 154 | } 155 | totalErrors++ 156 | if len(errors) >= cap(errors) { 157 | j := rand.Intn(totalErrors) 158 | if j >= cap(errors) { 159 | return 160 | } 161 | errors = append(errors[:j], errors[j+1:]...) 162 | } 163 | errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix)) 164 | } 165 | }) 166 | 167 | if *mismatch { 168 | totalErrors -= totalSkips 169 | } 170 | 171 | for _, b := range errors { 172 | t.Log(b) 173 | } 174 | 175 | if totalErrors > 0 { 176 | t.Fail() 177 | } 178 | t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) 179 | 180 | if err := <-errc; err != nil { 181 | t.Fatal("external disassembler: %v", err) 182 | } 183 | 184 | } 185 | 186 | const start = 0x8000 // start address of text 187 | 188 | // writeInst writes the generated byte sequences to a new file 189 | // starting at offset start. That file is intended to be the input to 190 | // the external disassembler. 191 | func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { 192 | f, err = ioutil.TempFile("", "armasm") 193 | if err != nil { 194 | return 195 | } 196 | 197 | file = f.Name() 198 | 199 | f.Seek(start, 0) 200 | w := bufio.NewWriter(f) 201 | defer w.Flush() 202 | size = 0 203 | generate(func(x []byte) { 204 | if len(x) > 4 { 205 | x = x[:4] 206 | } 207 | if debug { 208 | fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) 209 | } 210 | w.Write(x) 211 | w.Write(zeros[len(x):]) 212 | size += len(zeros) 213 | }) 214 | return file, f, size, nil 215 | } 216 | 217 | var zeros = []byte{0, 0, 0, 0} 218 | 219 | // pad pads the code sequenc with pops. 220 | func pad(enc []byte) []byte { 221 | if len(enc) < 4 { 222 | enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) 223 | } 224 | return enc 225 | } 226 | 227 | // disasm returns the decoded instruction and text 228 | // for the given source bytes, using the given syntax and mode. 229 | func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) { 230 | // If printTests is set, we record the coverage value 231 | // before and after, and we write out the inputs for which 232 | // coverage went up, in the format expected in testdata/decode.text. 233 | // This produces a fairly small set of test cases that exercise nearly 234 | // all the code. 235 | var cover float64 236 | if *printTests { 237 | cover -= coverage() 238 | } 239 | 240 | inst, err := Decode(src, mode) 241 | if err != nil { 242 | text = "error: " + err.Error() 243 | } else { 244 | text = inst.String() 245 | switch syntax { 246 | //case "arm": 247 | // text = ARMSyntax(inst) 248 | case "gnu": 249 | text = GNUSyntax(inst) 250 | //case "plan9": 251 | // text = plan9Syntax(inst, 0, nil) 252 | default: 253 | text = "error: unknown syntax " + syntax 254 | } 255 | } 256 | 257 | if *printTests { 258 | cover += coverage() 259 | if cover > 0 { 260 | max := len(src) 261 | if max > 4 && inst.Len <= 4 { 262 | max = 4 263 | } 264 | fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text) 265 | } 266 | } 267 | 268 | return 269 | } 270 | 271 | // coverage returns a floating point number denoting the 272 | // test coverage until now. The number increases when new code paths are exercised, 273 | // both in the Go program and in the decoder byte code. 274 | func coverage() float64 { 275 | /* 276 | testing.Coverage is not in the main distribution. 277 | The implementation, which must go in package testing, is: 278 | 279 | // Coverage reports the current code coverage as a fraction in the range [0, 1]. 280 | func Coverage() float64 { 281 | var n, d int64 282 | for _, counters := range cover.Counters { 283 | for _, c := range counters { 284 | if c > 0 { 285 | n++ 286 | } 287 | d++ 288 | } 289 | } 290 | if d == 0 { 291 | return 0 292 | } 293 | return float64(n) / float64(d) 294 | } 295 | */ 296 | 297 | var f float64 298 | f += testing.Coverage() 299 | f += decodeCoverage() 300 | return f 301 | } 302 | 303 | func decodeCoverage() float64 { 304 | n := 0 305 | for _, t := range decoderCover { 306 | if t { 307 | n++ 308 | } 309 | } 310 | return float64(1+n) / float64(1+len(decoderCover)) 311 | } 312 | 313 | // Helpers for writing disassembler output parsers. 314 | 315 | // hasPrefix reports whether any of the space-separated words in the text s 316 | // begins with any of the given prefixes. 317 | func hasPrefix(s string, prefixes ...string) bool { 318 | for _, prefix := range prefixes { 319 | for s := s; s != ""; { 320 | if strings.HasPrefix(s, prefix) { 321 | return true 322 | } 323 | i := strings.Index(s, " ") 324 | if i < 0 { 325 | break 326 | } 327 | s = s[i+1:] 328 | } 329 | } 330 | return false 331 | } 332 | 333 | // contains reports whether the text s contains any of the given substrings. 334 | func contains(s string, substrings ...string) bool { 335 | for _, sub := range substrings { 336 | if strings.Contains(s, sub) { 337 | return true 338 | } 339 | } 340 | return false 341 | } 342 | 343 | // isHex reports whether b is a hexadecimal character (0-9A-Fa-f). 344 | func isHex(b byte) bool { return b == '0' || unhex[b] > 0 } 345 | 346 | // parseHex parses the hexadecimal byte dump in hex, 347 | // appending the parsed bytes to raw and returning the updated slice. 348 | // The returned bool signals whether any invalid hex was found. 349 | // Spaces and tabs between bytes are okay but any other non-hex is not. 350 | func parseHex(hex []byte, raw []byte) ([]byte, bool) { 351 | hex = trimSpace(hex) 352 | for j := 0; j < len(hex); { 353 | for hex[j] == ' ' || hex[j] == '\t' { 354 | j++ 355 | } 356 | if j >= len(hex) { 357 | break 358 | } 359 | if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { 360 | return nil, false 361 | } 362 | raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]]) 363 | j += 2 364 | } 365 | return raw, true 366 | } 367 | 368 | var unhex = [256]byte{ 369 | '0': 0, 370 | '1': 1, 371 | '2': 2, 372 | '3': 3, 373 | '4': 4, 374 | '5': 5, 375 | '6': 6, 376 | '7': 7, 377 | '8': 8, 378 | '9': 9, 379 | 'A': 10, 380 | 'B': 11, 381 | 'C': 12, 382 | 'D': 13, 383 | 'E': 14, 384 | 'F': 15, 385 | 'a': 10, 386 | 'b': 11, 387 | 'c': 12, 388 | 'd': 13, 389 | 'e': 14, 390 | 'f': 15, 391 | } 392 | 393 | // index is like bytes.Index(s, []byte(t)) but avoids the allocation. 394 | func index(s []byte, t string) int { 395 | i := 0 396 | for { 397 | j := bytes.IndexByte(s[i:], t[0]) 398 | if j < 0 { 399 | return -1 400 | } 401 | i = i + j 402 | if i+len(t) > len(s) { 403 | return -1 404 | } 405 | for k := 1; k < len(t); k++ { 406 | if s[i+k] != t[k] { 407 | goto nomatch 408 | } 409 | } 410 | return i 411 | nomatch: 412 | i++ 413 | } 414 | } 415 | 416 | // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. 417 | // If s must be rewritten, it is rewritten in place. 418 | func fixSpace(s []byte) []byte { 419 | s = trimSpace(s) 420 | for i := 0; i < len(s); i++ { 421 | if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { 422 | goto Fix 423 | } 424 | } 425 | return s 426 | 427 | Fix: 428 | b := s 429 | w := 0 430 | for i := 0; i < len(s); i++ { 431 | c := s[i] 432 | if c == '\t' || c == '\n' { 433 | c = ' ' 434 | } 435 | if c == ' ' && w > 0 && b[w-1] == ' ' { 436 | continue 437 | } 438 | b[w] = c 439 | w++ 440 | } 441 | if w > 0 && b[w-1] == ' ' { 442 | w-- 443 | } 444 | return b[:w] 445 | } 446 | 447 | // trimSpace trims leading and trailing space from s, returning a subslice of s. 448 | func trimSpace(s []byte) []byte { 449 | j := len(s) 450 | for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { 451 | j-- 452 | } 453 | i := 0 454 | for i < j && (s[i] == ' ' || s[i] == '\t') { 455 | i++ 456 | } 457 | return s[i:j] 458 | } 459 | 460 | // pcrel matches instructions using relative addressing mode. 461 | var ( 462 | pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`) 463 | ) 464 | 465 | // Generators. 466 | // 467 | // The test cases are described as functions that invoke a callback repeatedly, 468 | // with a new input sequence each time. These helpers make writing those 469 | // a little easier. 470 | 471 | // condCases generates conditional instructions. 472 | func condCases(t *testing.T) func(func([]byte)) { 473 | return func(try func([]byte)) { 474 | // All the strides are relatively prime to 2 and therefore to 2²⁸, 475 | // so we will not repeat any instructions until we have tried all 2²⁸. 476 | // Using a stride other than 1 is meant to visit the instructions in a 477 | // pseudorandom order, which gives better variety in the set of 478 | // test cases chosen by -printtests. 479 | stride := uint32(10007) 480 | n := 1 << 28 / 7 481 | if testing.Short() { 482 | stride = 100003 483 | n = 1 << 28 / 1001 484 | } else if *longTest { 485 | stride = 200000033 486 | n = 1 << 28 487 | } 488 | x := uint32(0) 489 | for i := 0; i < n; i++ { 490 | enc := (x%15)<<28 | x&(1<<28-1) 491 | try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) 492 | x += stride 493 | } 494 | } 495 | } 496 | 497 | // uncondCases generates unconditional instructions. 498 | func uncondCases(t *testing.T) func(func([]byte)) { 499 | return func(try func([]byte)) { 500 | condCases(t)(func(enc []byte) { 501 | enc[3] |= 0xF0 502 | try(enc) 503 | }) 504 | } 505 | } 506 | 507 | func countBits(x uint32) int { 508 | n := 0 509 | for ; x != 0; x >>= 1 { 510 | n += int(x & 1) 511 | } 512 | return n 513 | } 514 | 515 | func expandBits(x, m uint32) uint32 { 516 | var out uint32 517 | for i := uint(0); i < 32; i++ { 518 | out >>= 1 519 | if m&1 != 0 { 520 | out |= (x & 1) << 31 521 | x >>= 1 522 | } 523 | m >>= 1 524 | } 525 | return out 526 | } 527 | 528 | func tryCondMask(mask, val uint32, try func([]byte)) { 529 | n := countBits(^mask) 530 | bits := uint32(0) 531 | for i := 0; i < 1<> 8), byte(x >> 16), byte(x >> 24)}) 535 | } 536 | } 537 | 538 | // vfpCases generates VFP instructions. 539 | func vfpCases(t *testing.T) func(func([]byte)) { 540 | const ( 541 | vfpmask uint32 = 0xFF00FE10 542 | vfp uint32 = 0x0E009A00 543 | ) 544 | return func(try func([]byte)) { 545 | tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space 546 | tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half 547 | tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32 548 | tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes 549 | } 550 | } 551 | 552 | // hexCases generates the cases written in hexadecimal in the encoded string. 553 | // Spaces in 'encoded' separate entire test cases, not individual bytes. 554 | func hexCases(t *testing.T, encoded string) func(func([]byte)) { 555 | return func(try func([]byte)) { 556 | for _, x := range strings.Fields(encoded) { 557 | src, err := hex.DecodeString(x) 558 | if err != nil { 559 | t.Errorf("parsing %q: %v", x, err) 560 | } 561 | try(src) 562 | } 563 | } 564 | } 565 | 566 | // testdataCases generates the test cases recorded in testdata/decode.txt. 567 | // It only uses the inputs; it ignores the answers recorded in that file. 568 | func testdataCases(t *testing.T) func(func([]byte)) { 569 | var codes [][]byte 570 | data, err := ioutil.ReadFile("testdata/decode.txt") 571 | if err != nil { 572 | t.Fatal(err) 573 | } 574 | for _, line := range strings.Split(string(data), "\n") { 575 | line = strings.TrimSpace(line) 576 | if line == "" || strings.HasPrefix(line, "#") { 577 | continue 578 | } 579 | f := strings.Fields(line)[0] 580 | i := strings.Index(f, "|") 581 | if i < 0 { 582 | t.Errorf("parsing %q: missing | separator", f) 583 | continue 584 | } 585 | if i%2 != 0 { 586 | t.Errorf("parsing %q: misaligned | separator", f) 587 | } 588 | code, err := hex.DecodeString(f[:i] + f[i+1:]) 589 | if err != nil { 590 | t.Errorf("parsing %q: %v", f, err) 591 | continue 592 | } 593 | codes = append(codes, code) 594 | } 595 | 596 | return func(try func([]byte)) { 597 | for _, code := range codes { 598 | try(code) 599 | } 600 | } 601 | } 602 | 603 | func caller(skip int) string { 604 | pc, _, _, _ := runtime.Caller(skip) 605 | f := runtime.FuncForPC(pc) 606 | name := "?" 607 | if f != nil { 608 | name = f.Name() 609 | if i := strings.LastIndex(name, "."); i >= 0 { 610 | name = name[i+1:] 611 | } 612 | } 613 | return name 614 | } 615 | -------------------------------------------------------------------------------- /armasm/gnu.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "strings" 11 | ) 12 | 13 | var saveDot = strings.NewReplacer( 14 | ".F16", "_dot_F16", 15 | ".F32", "_dot_F32", 16 | ".F64", "_dot_F64", 17 | ".S32", "_dot_S32", 18 | ".U32", "_dot_U32", 19 | ".FXS", "_dot_S", 20 | ".FXU", "_dot_U", 21 | ".32", "_dot_32", 22 | ) 23 | 24 | // GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. 25 | // This form typically matches the syntax defined in the ARM Reference Manual. 26 | func GNUSyntax(inst Inst) string { 27 | var buf bytes.Buffer 28 | op := inst.Op.String() 29 | op = saveDot.Replace(op) 30 | op = strings.Replace(op, ".", "", -1) 31 | op = strings.Replace(op, "_dot_", ".", -1) 32 | op = strings.ToLower(op) 33 | buf.WriteString(op) 34 | sep := " " 35 | for i, arg := range inst.Args { 36 | if arg == nil { 37 | break 38 | } 39 | text := gnuArg(&inst, i, arg) 40 | if text == "" { 41 | continue 42 | } 43 | buf.WriteString(sep) 44 | sep = ", " 45 | buf.WriteString(text) 46 | } 47 | return buf.String() 48 | } 49 | 50 | func gnuArg(inst *Inst, argIndex int, arg Arg) string { 51 | switch inst.Op &^ 15 { 52 | case LDRD_EQ, LDREXD_EQ, STRD_EQ: 53 | if argIndex == 1 { 54 | // second argument in consecutive pair not printed 55 | return "" 56 | } 57 | case STREXD_EQ: 58 | if argIndex == 2 { 59 | // second argument in consecutive pair not printed 60 | return "" 61 | } 62 | } 63 | 64 | switch arg := arg.(type) { 65 | case Imm: 66 | switch inst.Op &^ 15 { 67 | case BKPT_EQ: 68 | return fmt.Sprintf("%#04x", uint32(arg)) 69 | case SVC_EQ: 70 | return fmt.Sprintf("%#08x", uint32(arg)) 71 | } 72 | return fmt.Sprintf("#%d", int32(arg)) 73 | 74 | case ImmAlt: 75 | return fmt.Sprintf("#%d, %d", arg.Val, arg.Rot) 76 | 77 | case Mem: 78 | R := gnuArg(inst, -1, arg.Base) 79 | X := "" 80 | if arg.Sign != 0 { 81 | X = "" 82 | if arg.Sign < 0 { 83 | X = "-" 84 | } 85 | X += gnuArg(inst, -1, arg.Index) 86 | if arg.Shift == ShiftLeft && arg.Count == 0 { 87 | // nothing 88 | } else if arg.Shift == RotateRightExt { 89 | X += ", rrx" 90 | } else { 91 | X += fmt.Sprintf(", %s #%d", strings.ToLower(arg.Shift.String()), arg.Count) 92 | } 93 | } else { 94 | X = fmt.Sprintf("#%d", arg.Offset) 95 | } 96 | 97 | switch arg.Mode { 98 | case AddrOffset: 99 | if X == "#0" { 100 | return fmt.Sprintf("[%s]", R) 101 | } 102 | return fmt.Sprintf("[%s, %s]", R, X) 103 | case AddrPreIndex: 104 | return fmt.Sprintf("[%s, %s]!", R, X) 105 | case AddrPostIndex: 106 | return fmt.Sprintf("[%s], %s", R, X) 107 | case AddrLDM: 108 | if X == "#0" { 109 | return R 110 | } 111 | case AddrLDM_WB: 112 | if X == "#0" { 113 | return R + "!" 114 | } 115 | } 116 | return fmt.Sprintf("[%s Mode(%d) %s]", R, int(arg.Mode), X) 117 | 118 | case PCRel: 119 | return fmt.Sprintf(".%+#x", int32(arg)+4) 120 | 121 | case Reg: 122 | switch inst.Op &^ 15 { 123 | case LDREX_EQ: 124 | if argIndex == 0 { 125 | return fmt.Sprintf("r%d", int32(arg)) 126 | } 127 | } 128 | switch arg { 129 | case R10: 130 | return "sl" 131 | case R11: 132 | return "fp" 133 | case R12: 134 | return "ip" 135 | } 136 | 137 | case RegList: 138 | var buf bytes.Buffer 139 | fmt.Fprintf(&buf, "{") 140 | sep := "" 141 | for i := 0; i < 16; i++ { 142 | if arg&(1<= Op(len(opstr)) || opstr[op] == "" { 41 | return fmt.Sprintf("Op(%d)", int(op)) 42 | } 43 | return opstr[op] 44 | } 45 | 46 | // An Inst is a single instruction. 47 | type Inst struct { 48 | Op Op // Opcode mnemonic 49 | Enc uint32 // Raw encoding bits. 50 | Len int // Length of encoding in bytes. 51 | Args Args // Instruction arguments, in ARM manual order. 52 | } 53 | 54 | func (i Inst) String() string { 55 | var buf bytes.Buffer 56 | buf.WriteString(i.Op.String()) 57 | for j, arg := range i.Args { 58 | if arg == nil { 59 | break 60 | } 61 | if j == 0 { 62 | buf.WriteString(" ") 63 | } else { 64 | buf.WriteString(", ") 65 | } 66 | buf.WriteString(arg.String()) 67 | } 68 | return buf.String() 69 | } 70 | 71 | // An Args holds the instruction arguments. 72 | // If an instruction has fewer than 4 arguments, 73 | // the final elements in the array are nil. 74 | type Args [4]Arg 75 | 76 | // An Arg is a single instruction argument, one of these types: 77 | // Endian, Imm, Mem, PCRel, Reg, RegList, RegShift, RegShiftReg. 78 | type Arg interface { 79 | IsArg() 80 | String() string 81 | } 82 | 83 | type Float32Imm float32 84 | 85 | func (Float32Imm) IsArg() {} 86 | 87 | func (f Float32Imm) String() string { 88 | return fmt.Sprintf("#%v", float32(f)) 89 | } 90 | 91 | type Float64Imm float32 92 | 93 | func (Float64Imm) IsArg() {} 94 | 95 | func (f Float64Imm) String() string { 96 | return fmt.Sprintf("#%v", float64(f)) 97 | } 98 | 99 | // An Imm is an integer constant. 100 | type Imm uint32 101 | 102 | func (Imm) IsArg() {} 103 | 104 | func (i Imm) String() string { 105 | return fmt.Sprintf("#%#x", uint32(i)) 106 | } 107 | 108 | // A ImmAlt is an alternate encoding of an integer constant. 109 | type ImmAlt struct { 110 | Val uint8 111 | Rot uint8 112 | } 113 | 114 | func (ImmAlt) IsArg() {} 115 | 116 | func (i ImmAlt) Imm() Imm { 117 | v := uint32(i.Val) 118 | r := uint(i.Rot) 119 | return Imm(v>>r | v<<(32-r)) 120 | } 121 | 122 | func (i ImmAlt) String() string { 123 | return fmt.Sprintf("#%#x, %d", i.Val, i.Rot) 124 | } 125 | 126 | // A Label is a text (code) address. 127 | type Label uint32 128 | 129 | func (Label) IsArg() {} 130 | 131 | func (i Label) String() string { 132 | return fmt.Sprintf("%#x", uint32(i)) 133 | } 134 | 135 | // A Reg is a single register. 136 | // The zero value denotes R0, not the absence of a register. 137 | type Reg uint8 138 | 139 | const ( 140 | R0 Reg = iota 141 | R1 142 | R2 143 | R3 144 | R4 145 | R5 146 | R6 147 | R7 148 | R8 149 | R9 150 | R10 151 | R11 152 | R12 153 | R13 154 | R14 155 | R15 156 | 157 | S0 158 | S1 159 | S2 160 | S3 161 | S4 162 | S5 163 | S6 164 | S7 165 | S8 166 | S9 167 | S10 168 | S11 169 | S12 170 | S13 171 | S14 172 | S15 173 | S16 174 | S17 175 | S18 176 | S19 177 | S20 178 | S21 179 | S22 180 | S23 181 | S24 182 | S25 183 | S26 184 | S27 185 | S28 186 | S29 187 | S30 188 | S31 189 | 190 | D0 191 | D1 192 | D2 193 | D3 194 | D4 195 | D5 196 | D6 197 | D7 198 | D8 199 | D9 200 | D10 201 | D11 202 | D12 203 | D13 204 | D14 205 | D15 206 | D16 207 | D17 208 | D18 209 | D19 210 | D20 211 | D21 212 | D22 213 | D23 214 | D24 215 | D25 216 | D26 217 | D27 218 | D28 219 | D29 220 | D30 221 | D31 222 | 223 | APSR 224 | APSR_nzcv 225 | FPSCR 226 | 227 | SP = R13 228 | LR = R14 229 | PC = R15 230 | ) 231 | 232 | func (Reg) IsArg() {} 233 | 234 | func (r Reg) String() string { 235 | switch r { 236 | case APSR: 237 | return "APSR" 238 | case APSR_nzcv: 239 | return "APSR_nzcv" 240 | case FPSCR: 241 | return "FPSCR" 242 | case SP: 243 | return "SP" 244 | case PC: 245 | return "PC" 246 | case LR: 247 | return "LR" 248 | } 249 | if R0 <= r && r <= R15 { 250 | return fmt.Sprintf("R%d", int(r-R0)) 251 | } 252 | if S0 <= r && r <= S31 { 253 | return fmt.Sprintf("S%d", int(r-S0)) 254 | } 255 | if D0 <= r && r <= D31 { 256 | return fmt.Sprintf("D%d", int(r-D0)) 257 | } 258 | return fmt.Sprintf("Reg(%d)", int(r)) 259 | } 260 | 261 | // A RegX represents a fraction of a multi-value register. 262 | // The Index field specifies the index number, 263 | // but the size of the fraction is not specified. 264 | // It must be inferred from the instruction and the register type. 265 | // For example, in a VMOV instruction, RegX{D5, 1} represents 266 | // the top 32 bits of the 64-bit D5 register. 267 | type RegX struct { 268 | Reg Reg 269 | Index int 270 | } 271 | 272 | func (RegX) IsArg() {} 273 | 274 | func (r RegX) String() string { 275 | return fmt.Sprintf("%s[%d]", r.Reg, r.Index) 276 | } 277 | 278 | // A RegList is a register list. 279 | // Bits at indexes x = 0 through 15 indicate whether the corresponding Rx register is in the list. 280 | type RegList uint16 281 | 282 | func (RegList) IsArg() {} 283 | 284 | func (r RegList) String() string { 285 | var buf bytes.Buffer 286 | fmt.Fprintf(&buf, "{") 287 | sep := "" 288 | for i := 0; i < 16; i++ { 289 | if r&(1<= 4 { 110 | raw := binary.LittleEndian.Uint32(dec.enc[:4]) 111 | 112 | // word 21FFF0B5. 113 | // the manual is clear that this is pre-indexed mode (with !) but libopcodes generates post-index (without !). 114 | if raw&0x01200000 == 0x01200000 && strings.Replace(text, "!", "", -1) == dec.text { 115 | return true 116 | } 117 | 118 | // word C100543E: libopcodes says tst, but no evidence for that. 119 | if strings.HasPrefix(dec.text, "tst") && raw&0x0ff00000 != 0x03100000 && raw&0x0ff00000 != 0x01100000 { 120 | return true 121 | } 122 | 123 | // word C3203CE8: libopcodes says teq, but no evidence for that. 124 | if strings.HasPrefix(dec.text, "teq") && raw&0x0ff00000 != 0x03300000 && raw&0x0ff00000 != 0x01300000 { 125 | return true 126 | } 127 | 128 | // word D14C552E: libopcodes says cmp but no evidence for that. 129 | if strings.HasPrefix(dec.text, "cmp") && raw&0x0ff00000 != 0x03500000 && raw&0x0ff00000 != 0x01500000 { 130 | return true 131 | } 132 | 133 | // word 2166AA4A: libopcodes says cmn but no evidence for that. 134 | if strings.HasPrefix(dec.text, "cmn") && raw&0x0ff00000 != 0x03700000 && raw&0x0ff00000 != 0x01700000 { 135 | return true 136 | } 137 | 138 | // word E70AEEEF: libopcodes says str but no evidence for that. 139 | if strings.HasPrefix(dec.text, "str") && len(dec.text) >= 5 && (dec.text[3] == ' ' || dec.text[5] == ' ') && raw&0x0e500018 != 0x06000000 && raw&0x0e500000 != 0x0400000 { 140 | return true 141 | } 142 | 143 | // word B0AF48F4: libopcodes says strd but P=0,W=1 which is unpredictable. 144 | if hasPrefix(dec.text, "ldr", "str") && raw&0x01200000 == 0x00200000 { 145 | return true 146 | } 147 | 148 | // word B6CC1C76: libopcodes inexplicably says 'uxtab16lt r1, ip, r6, ROR #24' instead of 'uxtab16lt r1, ip, r6, ror #24' 149 | if strings.ToLower(dec.text) == text { 150 | return true 151 | } 152 | 153 | // word F410FDA1: libopcodes says PLDW but the manual is clear that PLDW is F5/F7, not F4. 154 | // word F7D0FB17: libopcodes says PLDW but the manual is clear that PLDW has 0x10 clear 155 | if hasPrefix(dec.text, "pld") && raw&0xfd000010 != 0xf5000000 { 156 | return true 157 | } 158 | 159 | // word F650FE14: libopcodes says PLI but the manual is clear that PLI has 0x10 clear 160 | if hasPrefix(dec.text, "pli") && raw&0xff000010 != 0xf6000000 { 161 | return true 162 | } 163 | } 164 | 165 | return false 166 | } 167 | 168 | // Instructions known to libopcodes (or xed) but not to us. 169 | // Most of these are floating point coprocessor instructions. 170 | var unsupported = strings.Fields(` 171 | abs 172 | acs 173 | adf 174 | aes 175 | asn 176 | atn 177 | cdp 178 | cf 179 | cmf 180 | cnf 181 | cos 182 | cps 183 | crc32 184 | dvf 185 | eret 186 | exp 187 | fadd 188 | fcmp 189 | fcpy 190 | fcvt 191 | fdiv 192 | fdv 193 | fix 194 | fld 195 | flt 196 | fmac 197 | fmd 198 | fml 199 | fmr 200 | fms 201 | fmul 202 | fmx 203 | fneg 204 | fnm 205 | frd 206 | fsit 207 | fsq 208 | fst 209 | fsu 210 | fto 211 | fui 212 | hlt 213 | hvc 214 | lda 215 | ldc 216 | ldf 217 | lfm 218 | lgn 219 | log 220 | mar 221 | mcr 222 | mcrr 223 | mia 224 | mnf 225 | mra 226 | mrc 227 | mrrc 228 | mrs 229 | msr 230 | msr 231 | muf 232 | mvf 233 | nrm 234 | pol 235 | pow 236 | rdf 237 | rfc 238 | rfe 239 | rfs 240 | rmf 241 | rnd 242 | rpw 243 | rsf 244 | sdiv 245 | sev 246 | sfm 247 | sha1 248 | sha256 249 | sin 250 | smc 251 | sqt 252 | srs 253 | stc 254 | stf 255 | stl 256 | suf 257 | tan 258 | udf 259 | udiv 260 | urd 261 | vfma 262 | vfms 263 | vfnma 264 | vfnms 265 | vrint 266 | wfc 267 | wfs 268 | `) 269 | -------------------------------------------------------------------------------- /armasm/objdumpext_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Copied and simplified from rsc.io/x86/x86asm/objdumpext_test.go. 6 | 7 | package armasm 8 | 9 | import ( 10 | "bytes" 11 | "debug/elf" 12 | "encoding/binary" 13 | "fmt" 14 | "io" 15 | "log" 16 | "os" 17 | "strconv" 18 | "strings" 19 | "testing" 20 | ) 21 | 22 | const objdumpPath = "/usr/local/bin/arm-linux-elf-objdump" 23 | 24 | func testObjdumpARM(t *testing.T, generate func(func([]byte))) { 25 | testObjdumpArch(t, generate, ModeARM) 26 | } 27 | 28 | func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch Mode) { 29 | if _, err := os.Stat(objdumpPath); err != nil { 30 | if !testing.Short() { 31 | t.Fatal(err) 32 | } 33 | t.Skip(err) 34 | } 35 | 36 | testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump) 37 | } 38 | 39 | func objdump(ext *ExtDis) error { 40 | // File already written with instructions; add ELF header. 41 | if ext.Arch == ModeARM { 42 | if err := writeELF32(ext.File, ext.Size); err != nil { 43 | return err 44 | } 45 | } else { 46 | panic("unknown arch") 47 | } 48 | 49 | b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name()) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | var ( 55 | nmatch int 56 | reading bool 57 | next uint32 = start 58 | addr uint32 59 | encbuf [4]byte 60 | enc []byte 61 | text string 62 | ) 63 | flush := func() { 64 | if addr == next { 65 | if m := pcrel.FindStringSubmatch(text); m != nil { 66 | targ, _ := strconv.ParseUint(m[2], 16, 64) 67 | text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc)))) 68 | } 69 | if strings.HasPrefix(text, "stmia") { 70 | text = "stm" + text[5:] 71 | } 72 | if strings.HasPrefix(text, "stmfd") { 73 | text = "stmdb" + text[5:] 74 | } 75 | if strings.HasPrefix(text, "ldmfd") { 76 | text = "ldm" + text[5:] 77 | } 78 | text = strings.Replace(text, "#0.0", "#0", -1) 79 | if text == "undefined" && len(enc) == 4 { 80 | text = "error: unknown instruction" 81 | enc = nil 82 | } 83 | if len(enc) == 4 { 84 | // prints as word but we want to record bytes 85 | enc[0], enc[3] = enc[3], enc[0] 86 | enc[1], enc[2] = enc[2], enc[1] 87 | } 88 | ext.Dec <- ExtInst{addr, encbuf, len(enc), text} 89 | encbuf = [4]byte{} 90 | enc = nil 91 | next += 4 92 | } 93 | } 94 | var textangle = []byte("<.text>:") 95 | for { 96 | line, err := b.ReadSlice('\n') 97 | if err != nil { 98 | if err == io.EOF { 99 | break 100 | } 101 | return fmt.Errorf("reading objdump output: %v", err) 102 | } 103 | if bytes.Contains(line, textangle) { 104 | reading = true 105 | continue 106 | } 107 | if !reading { 108 | continue 109 | } 110 | if debug { 111 | os.Stdout.Write(line) 112 | } 113 | if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil { 114 | enc = enc1 115 | continue 116 | } 117 | flush() 118 | nmatch++ 119 | addr, enc, text = parseLine(line, encbuf[:0]) 120 | if addr > next { 121 | return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line) 122 | } 123 | } 124 | flush() 125 | if next != start+uint32(ext.Size) { 126 | return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size) 127 | } 128 | if err := ext.Wait(); err != nil { 129 | return fmt.Errorf("exec: %v", err) 130 | } 131 | 132 | return nil 133 | } 134 | 135 | var ( 136 | undefined = []byte("") 137 | unpredictable = []byte("") 138 | illegalShifter = []byte("") 139 | ) 140 | 141 | func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) { 142 | oline := line 143 | i := index(line, ":\t") 144 | if i < 0 { 145 | log.Fatalf("cannot parse disassembly: %q", oline) 146 | } 147 | x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32) 148 | if err != nil { 149 | log.Fatalf("cannot parse disassembly: %q", oline) 150 | } 151 | addr = uint32(x) 152 | line = line[i+2:] 153 | i = bytes.IndexByte(line, '\t') 154 | if i < 0 { 155 | log.Fatalf("cannot parse disassembly: %q", oline) 156 | } 157 | enc, ok := parseHex(line[:i], encstart) 158 | if !ok { 159 | log.Fatalf("cannot parse disassembly: %q", oline) 160 | } 161 | line = trimSpace(line[i:]) 162 | if bytes.Contains(line, undefined) { 163 | text = "undefined" 164 | return 165 | } 166 | if bytes.Contains(line, illegalShifter) { 167 | text = "undefined" 168 | return 169 | } 170 | if false && bytes.Contains(line, unpredictable) { 171 | text = "unpredictable" 172 | return 173 | } 174 | if i := bytes.IndexByte(line, ';'); i >= 0 { 175 | line = trimSpace(line[:i]) 176 | } 177 | text = string(fixSpace(line)) 178 | return 179 | } 180 | 181 | func parseContinuation(line []byte, enc []byte) []byte { 182 | i := index(line, ":\t") 183 | if i < 0 { 184 | return nil 185 | } 186 | line = line[i+1:] 187 | enc, _ = parseHex(line, enc) 188 | return enc 189 | } 190 | 191 | // writeELF32 writes an ELF32 header to the file, 192 | // describing a text segment that starts at start 193 | // and extends for size bytes. 194 | func writeELF32(f *os.File, size int) error { 195 | f.Seek(0, 0) 196 | var hdr elf.Header32 197 | var prog elf.Prog32 198 | var sect elf.Section32 199 | var buf bytes.Buffer 200 | binary.Write(&buf, binary.LittleEndian, &hdr) 201 | off1 := buf.Len() 202 | binary.Write(&buf, binary.LittleEndian, &prog) 203 | off2 := buf.Len() 204 | binary.Write(&buf, binary.LittleEndian, §) 205 | off3 := buf.Len() 206 | buf.Reset() 207 | data := byte(elf.ELFDATA2LSB) 208 | hdr = elf.Header32{ 209 | Ident: [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1}, 210 | Type: 2, 211 | Machine: uint16(elf.EM_ARM), 212 | Version: 1, 213 | Entry: start, 214 | Phoff: uint32(off1), 215 | Shoff: uint32(off2), 216 | Flags: 0x05000002, 217 | Ehsize: uint16(off1), 218 | Phentsize: uint16(off2 - off1), 219 | Phnum: 1, 220 | Shentsize: uint16(off3 - off2), 221 | Shnum: 3, 222 | Shstrndx: 2, 223 | } 224 | binary.Write(&buf, binary.LittleEndian, &hdr) 225 | prog = elf.Prog32{ 226 | Type: 1, 227 | Off: start, 228 | Vaddr: start, 229 | Paddr: start, 230 | Filesz: uint32(size), 231 | Memsz: uint32(size), 232 | Flags: 5, 233 | Align: start, 234 | } 235 | binary.Write(&buf, binary.LittleEndian, &prog) 236 | binary.Write(&buf, binary.LittleEndian, §) // NULL section 237 | sect = elf.Section32{ 238 | Name: 1, 239 | Type: uint32(elf.SHT_PROGBITS), 240 | Addr: start, 241 | Off: start, 242 | Size: uint32(size), 243 | Flags: uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR), 244 | Addralign: 4, 245 | } 246 | binary.Write(&buf, binary.LittleEndian, §) // .text 247 | sect = elf.Section32{ 248 | Name: uint32(len("\x00.text\x00")), 249 | Type: uint32(elf.SHT_STRTAB), 250 | Addr: 0, 251 | Off: uint32(off2 + (off3-off2)*3), 252 | Size: uint32(len("\x00.text\x00.shstrtab\x00")), 253 | Addralign: 1, 254 | } 255 | binary.Write(&buf, binary.LittleEndian, §) 256 | buf.WriteString("\x00.text\x00.shstrtab\x00") 257 | f.Write(buf.Bytes()) 258 | return nil 259 | } 260 | -------------------------------------------------------------------------------- /armasm/plan9x.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "bytes" 9 | "encoding/binary" 10 | "fmt" 11 | "io" 12 | "strings" 13 | ) 14 | 15 | // plan9Syntax returns the Go assembler syntax for the instruction. 16 | // The syntax was originally defined by Plan 9. 17 | // The pc is the program counter of the instruction, used for expanding 18 | // PC-relative addresses into absolute ones. 19 | // The symname function queries the symbol table for the program 20 | // being disassembled. Given a target address it returns the name and base 21 | // address of the symbol containing the target, if any; otherwise it returns "", 0. 22 | // The reader r should read from the text segment using text addresses 23 | // as offsets; it is used to display pc-relative loads as constant loads. 24 | func plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string { 25 | if symname == nil { 26 | symname = func(uint64) (string, uint64) { return "", 0 } 27 | } 28 | 29 | var args []string 30 | for _, a := range inst.Args { 31 | if a == nil { 32 | break 33 | } 34 | args = append(args, plan9Arg(&inst, pc, symname, a)) 35 | } 36 | 37 | op := inst.Op.String() 38 | 39 | switch inst.Op &^ 15 { 40 | case LDR_EQ, LDRB_EQ, LDRH_EQ: 41 | // Check for RET 42 | reg, _ := inst.Args[0].(Reg) 43 | mem, _ := inst.Args[1].(Mem) 44 | if inst.Op&^15 == LDR_EQ && reg == R15 && mem.Base == SP && mem.Sign == 0 && mem.Mode == AddrPostIndex { 45 | return fmt.Sprintf("RET%s #%d", op[3:], mem.Offset) 46 | } 47 | 48 | // Check for PC-relative load. 49 | if mem.Base == PC && mem.Sign == 0 && mem.Mode == AddrOffset && text != nil { 50 | addr := uint32(pc) + 8 + uint32(mem.Offset) 51 | buf := make([]byte, 4) 52 | switch inst.Op &^ 15 { 53 | case LDRB_EQ: 54 | if _, err := text.ReadAt(buf[:1], int64(addr)); err != nil { 55 | break 56 | } 57 | args[1] = fmt.Sprintf("$%#x", buf[0]) 58 | 59 | case LDRH_EQ: 60 | if _, err := text.ReadAt(buf[:2], int64(addr)); err != nil { 61 | break 62 | } 63 | args[1] = fmt.Sprintf("$%#x", binary.LittleEndian.Uint16(buf)) 64 | 65 | case LDR_EQ: 66 | if _, err := text.ReadAt(buf, int64(addr)); err != nil { 67 | break 68 | } 69 | x := binary.LittleEndian.Uint32(buf) 70 | if s, base := symname(uint64(x)); s != "" && uint64(x) == base { 71 | args[1] = fmt.Sprintf("$%s(SB)", s) 72 | } else { 73 | args[1] = fmt.Sprintf("$%#x", x) 74 | } 75 | } 76 | } 77 | } 78 | 79 | // Move addressing mode into opcode suffix. 80 | suffix := "" 81 | switch inst.Op &^ 15 { 82 | case LDR_EQ, LDRB_EQ, LDRH_EQ, STR_EQ, STRB_EQ, STRH_EQ: 83 | mem, _ := inst.Args[1].(Mem) 84 | switch mem.Mode { 85 | case AddrOffset, AddrLDM: 86 | // no suffix 87 | case AddrPreIndex, AddrLDM_WB: 88 | suffix = ".W" 89 | case AddrPostIndex: 90 | suffix = ".P" 91 | } 92 | off := "" 93 | if mem.Offset != 0 { 94 | off = fmt.Sprintf("%#x", mem.Offset) 95 | } 96 | base := fmt.Sprintf("(R%d)", int(mem.Base)) 97 | index := "" 98 | if mem.Sign != 0 { 99 | sign := "" 100 | if mem.Sign < 0 { 101 | sign = "" 102 | } 103 | shift := "" 104 | if mem.Count != 0 { 105 | shift = fmt.Sprintf("%s%d", plan9Shift[mem.Shift], mem.Count) 106 | } 107 | index = fmt.Sprintf("(%sR%d%s)", sign, int(mem.Index), shift) 108 | } 109 | args[1] = off + base + index 110 | } 111 | 112 | // Reverse args, placing dest last. 113 | for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { 114 | args[i], args[j] = args[j], args[i] 115 | } 116 | 117 | switch inst.Op &^ 15 { 118 | case MOV_EQ: 119 | op = "MOVW" + op[3:] 120 | 121 | case LDR_EQ: 122 | op = "MOVW" + op[3:] + suffix 123 | case LDRB_EQ: 124 | op = "MOVB" + op[4:] + suffix 125 | case LDRH_EQ: 126 | op = "MOVH" + op[4:] + suffix 127 | 128 | case STR_EQ: 129 | op = "MOVW" + op[3:] + suffix 130 | args[0], args[1] = args[1], args[0] 131 | case STRB_EQ: 132 | op = "MOVB" + op[4:] + suffix 133 | args[0], args[1] = args[1], args[0] 134 | case STRH_EQ: 135 | op = "MOVH" + op[4:] + suffix 136 | args[0], args[1] = args[1], args[0] 137 | } 138 | 139 | if args != nil { 140 | op += " " + strings.Join(args, ", ") 141 | } 142 | 143 | return op 144 | } 145 | 146 | // assembler syntax for the various shifts. 147 | // @x> is a lie; the assembler uses @> 0 148 | // instead of @x> 1, but i wanted to be clear that it 149 | // was a different operation (rotate right extended, not rotate right). 150 | var plan9Shift = []string{"<<", ">>", "->", "@>", "@x>"} 151 | 152 | func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { 153 | switch a := arg.(type) { 154 | case Endian: 155 | 156 | case Imm: 157 | return fmt.Sprintf("$%d", int(a)) 158 | 159 | case Mem: 160 | 161 | case PCRel: 162 | addr := uint32(pc) + 8 + uint32(a) 163 | if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base { 164 | return fmt.Sprintf("%s(SB)", s) 165 | } 166 | return fmt.Sprintf("%#x", addr) 167 | 168 | case Reg: 169 | if a < 16 { 170 | return fmt.Sprintf("R%d", int(a)) 171 | } 172 | 173 | case RegList: 174 | var buf bytes.Buffer 175 | start := -2 176 | end := -2 177 | fmt.Fprintf(&buf, "[") 178 | flush := func() { 179 | if start >= 0 { 180 | if buf.Len() > 1 { 181 | fmt.Fprintf(&buf, ",") 182 | } 183 | if start == end { 184 | fmt.Fprintf(&buf, "R%d", start) 185 | } else { 186 | fmt.Fprintf(&buf, "R%d-R%d", start, end) 187 | } 188 | } 189 | } 190 | for i := 0; i < 16; i++ { 191 | if a&(1<&1 | tee log 3 | cd ..; go test -cover -run 'ObjdumpARMUncond' -v -timeout 10h -printtests -long 2>&1 | tee -a log 4 | egrep ' (gnu|plan9) ' ../log |sort >newdecode.txt 5 | 6 | -------------------------------------------------------------------------------- /armasm/testdata/decode.txt: -------------------------------------------------------------------------------- 1 | 000001f1| 1 gnu setend le 2 | 00100f61| 1 gnu mrsvs r1, apsr 3 | 00f02053| 1 gnu noppl 4 | 00f0d4f4| 1 gnu pli [r4] 5 | 01f020d3| 1 gnu yieldle 6 | 02002d59| 1 gnu stmdbpl sp!, {r1} 7 | 021da9d8| 1 gnu stmle r9!, {r1, r8, sl, fp, ip} 8 | 02c0b071| 1 gnu movsvc ip, r2 9 | 02f02073| 1 gnu wfevc 10 | 03f02013| 1 gnu wfine 11 | 03f05df7| 1 gnu pld [sp, -r3] 12 | 04009d34| 1 gnu popcc {r0} 13 | 043a52b1| 1 gnu cmplt r2, r4, lsl #20 14 | 04402de5| 1 gnu push {r4} 15 | 045b148d| 1 gnu vldrhi d5, [r4, #-16] 16 | 04f02093| 1 gnu sevls 17 | 0793eab0| 1 gnu rsclt r9, sl, r7, lsl #6 18 | 079bfb9e| 1 gnu vmovls.f64 d25, #183 19 | 0a4fc9d3| 1 gnu bicle r4, r9, #10, 30 20 | 0bac7ab6| 1 gnu ldrbtlt sl, [sl], -fp, lsl #24 21 | 0c2aee44| 1 gnu strbtmi r2, [lr], #2572 22 | 0c4bb000| 1 gnu adcseq r4, r0, ip, lsl #22 23 | 0e26d561| 1 gnu bicsvs r2, r5, lr, lsl #12 24 | 0f0fa011| 1 gnu lslne r0, pc, #30 25 | 0fa448e0| 1 gnu sub sl, r8, pc, lsl #8 26 | 101af1de| 1 gnu vmrsle r1, fpscr 27 | 108a0cee| 1 gnu vmov s24, r8 28 | 108a1dae| 1 gnu vmovge r8, s26 29 | 108ae14e| 1 gnu vmsrmi fpscr, r8 30 | 10faf1ae| 1 gnu vmrsge apsr_nzcv, fpscr 31 | 10fb052e| 1 gnu vmovcs.32 d5[0], pc 32 | 11c902b7| 1 gnu smladlt r2, r1, r9, ip 33 | 11ef5b16| 1 gnu uadd16ne lr, fp, r1 34 | 12fa87a7| 1 gnu usad8ge r7, r2, sl 35 | 135f2956| 1 gnu qadd16pl r5, r9, r3 36 | 13de9aa1| 1 gnu orrsge sp, sl, r3, lsl lr 37 | 145c0e40| 1 gnu andmi r5, lr, r4, lsl ip 38 | 150f7fd6| 1 gnu uhadd16le r0, pc, r5 39 | 15b9bf12| 1 gnu adcsne fp, pc, #344064 40 | 16373391| 1 gnu teqls r3, r6, lsl r7 41 | 19ef1966| 1 gnu sadd16vs lr, r9, r9 42 | 1ab0b091| 1 gnu lslsls fp, sl, r0 43 | 1b9f6fe6| 1 gnu uqadd16 r9, pc, fp 44 | 1bb58557| 1 gnu usada8pl r5, fp, r5, fp 45 | 1beff8e0| 1 gnu rscs lr, r8, fp, lsl pc 46 | 1caff0e6| 1 gnu usat sl, #16, ip, lsl #30 47 | 1d0f3d36| 1 gnu shadd16cc r0, sp, sp 48 | 1dca1d52| 1 gnu andspl ip, sp, #118784 49 | 1e4891d0| 1 gnu addsle r4, r1, lr, lsl r8 50 | 1f0889e6| 1 gnu pkhbt r0, r9, pc, lsl #16 51 | 1f1f6fe1| 1 gnu clz r1, pc 52 | 1f26d157| 1 gnu bfcpl r2, #12, #6 53 | 1ff07ff5| 1 gnu clrex 54 | 1fff2fd1| 1 gnu bxle pc 55 | 20f153f6| 1 gnu pli [r3, -r0, lsr #2] 56 | 21047013| 1 gnu cmnne r0, #553648128 57 | 21c2eb8b| 1 gnu blhi .-0x50f778 58 | 21c2ebfb| 1 gnu blx .-0x50f776 59 | 21fa62ee| 1 gnu vmul.f32 s31, s4, s3 60 | 23005720| 1 gnu subscs r0, r7, r3, lsr #32 61 | 236a303e| 1 gnu vaddcc.f32 s12, s0, s7 62 | 23f055f6| 1 gnu pli [r5, -r3, lsr #32] 63 | 2430a031| 1 gnu lsrcc r3, r4, #32 64 | 245d0803| 1 gnu movweq r5, #36132 65 | 251a86be| 1 gnu vdivlt.f32 s2, s12, s11 66 | 25db7b81| 1 gnu cmnhi fp, r5, lsr #22 67 | 26bc3553| 1 gnu teqpl r5, #9728 68 | 277c2d69| 1 gnu pushvs {r0, r1, r2, r5, sl, fp, ip, sp, lr} 69 | 29fc1cf5| 1 gnu pldw [ip, #-3113] 70 | 29ff2fc1| 1 gnu bxjgt r9 71 | 2decd9c0| 1 gnu sbcsgt lr, r9, sp, lsr #24 72 | 30fa5e47| 1 gnu smmulrmi lr, r0, sl 73 | 316f64d6| 1 gnu uqasxle r6, r4, r1 74 | 323f5da6| 1 gnu uasxge r3, sp, r2 75 | 327fe5e6| 1 gnu usat16 r7, #5, r2 76 | 330151e3| 1 gnu cmp r1, #-1073741812 77 | 34af2ae6| 1 gnu qasx sl, sl, r4 78 | 35fd3710| 1 gnu eorsne pc, r7, r5, lsr sp 79 | 36def1c1| 1 gnu mvnsgt sp, r6, lsr lr 80 | 3801b061| 1 gnu lsrsvs r0, r8, r1 81 | 38985477| 1 gnu smmlarvc r4, r8, r8, r9 82 | 3a2fbfa6| 1 gnu revge r2, sl 83 | 3a3f1b06| 1 gnu sasxeq r3, fp, sl 84 | 3a7fa346| 1 gnu ssat16mi r7, #4, sl 85 | 3a943b94| 1 gnu ldrtls r9, [fp], #-1082 86 | 3bf505e7| 1 gnu smuadx r5, fp, r5 87 | 3cef7086| 1 gnu uhasxhi lr, r0, ip 88 | 3e5f3ec6| 1 gnu shasxgt r5, lr, lr 89 | 3f4fff86| 1 gnu rbithi r4, pc 90 | 3faf4717| 1 gnu smlaldxne sl, r7, pc, pc 91 | 3fff2fc1| 1 gnu blxgt pc 92 | 402bbf7e| 1 gnu vcvtvc.u16.f64 d2, d2, #16 93 | 403ab5de| 1 gnu vcmple.f32 s6, #0 94 | 40eb363e| 1 gnu vsubcc.f64 d14, d6, d0 95 | 420f73d1| 1 gnu cmnle r3, r2, asr #30 96 | 424a648e| 1 gnu vnmulhi.f32 s9, s8, s4 97 | 4284d717| 1 gnu ldrbne r8, [r7, r2, asr #8] 98 | 42a599c3| 1 gnu orrsgt sl, r9, #276824064 99 | 42abf0be| 1 gnu vmovlt.f64 d26, d2 100 | 446ea031| 1 gnu asrcc r6, r4, #28 101 | 4a953557| 1 gnu ldrpl r9, [r5, -sl, asr #10]! 102 | 4ab6f712| 1 gnu rscsne fp, r7, #77594624 103 | 4af07ff5| 1 gnu dsb #10 104 | 4df6def4| 1 gnu pli [lr, #1613] 105 | 4efbf52e| 1 gnu vcmpcs.f64 d31, #0 106 | 50aaac79| 1 gnu stmibvc ip!, {r4, r6, r9, fp, sp, pc} 107 | 50caf011| 1 gnu mvnsne ip, r0, asr sl 108 | 50f04961| 1 gnu qdaddvs pc, r0, r9 109 | 51282008| 1 gnu stmdaeq r0!, {r0, r4, r6, fp, sp} 110 | 52bf6576| 1 gnu uqsaxvc fp, r5, r2 111 | 5345c9d0| 1 gnu sbcle r4, r9, r3, asr r5 112 | 538f5e46| 1 gnu usaxmi r8, lr, r3 113 | 54106d31| 1 gnu qdsubcc r1, r4, sp 114 | 56e0e557| 1 gnu ubfxpl lr, r6, #0, #6 115 | 57073d11| 1 gnu teqne sp, r7, asr r7 116 | 58bb0aa9| 1 gnu stmdbge sl, {r3, r4, r6, r8, r9, fp, ip, sp, pc} 117 | 58f007b1| 1 gnu qaddlt pc, r8, r7 118 | 59fd0e77| 1 gnu smusdvc lr, r9, sp 119 | 5ab7f1c5| 1 gnu ldrbgt fp, [r1, #1882]! 120 | 5abf23c6| 1 gnu qsaxgt fp, r3, sl 121 | 5b8f1c96| 1 gnu ssaxls r8, ip, fp 122 | 5b98ab97| 1 gnu sbfxls r9, fp, #16, #12 123 | 5bc9b041| 1 gnu asrsmi ip, fp, r9 124 | 5bf07ff5| 1 gnu dmb #11 125 | 5c102b81| 1 gnu qsubhi r1, ip, fp 126 | 5caa49e1| 1 gnu qdadd sl, ip, r9 127 | 5d3f7226| 1 gnu uhsaxcs r3, r2, sp 128 | 5db55470| 1 gnu subsvc fp, r4, sp, asr r5 129 | 5ef14387| 1 gnu smlsldhi pc, r3, lr, r1 130 | 5f540a11| 1 gnu qaddne r5, pc, sl 131 | 5f9079d1| 1 gnu cmnle r9, pc, asr r0 132 | 5faf3f66| 1 gnu shsaxvs sl, pc, pc 133 | 605071d7| 1 gnu ldrble r5, [r1, -r0, rrx]! 134 | 614adc76| 1 gnu ldrbvc r4, [ip], r1, ror #20 135 | 616b9e42| 1 gnu addsmi r6, lr, #99328 136 | 62c84f15| 1 gnu strbne ip, [pc, #-2146] 137 | 62f051f7| 1 gnu pld [r1, -r2, rrx] 138 | 6346c393| 1 gnu bicls r4, r3, #103809024 139 | 654abbae| 1 gnu vcvtge.f32.u16 s8, s8, #5 140 | 65a5f0e3| 1 gnu mvns sl, #423624704 141 | 65f796f7| 1 gnu pldw [r6, r5, ror #14] 142 | 670bb12e| 1 gnu vnegcs.f64 d0, d23 143 | 67903731| 1 gnu teqcc r7, r7, rrx 144 | 68ddc637| 1 gnu strbcc sp, [r6, r8, ror #26] 145 | 695b3ab6| 1 gnu ldrtlt r5, [sl], -r9, ror #22 146 | 697cfc71| 1 gnu mvnsvc r7, r9, ror #24 147 | 6a0ab3ee| 1 gnu vcvtb.f16.f32 s0, s21 148 | 6ad9ad54| 1 gnu strtpl sp, [sp], #2410 149 | 6af07ff5| 1 gnu isb #10 150 | 6afa6f10| 1 gnu rsbne pc, pc, sl, ror #20 151 | 6d5b19ee| 1 gnu vnmla.f64 d5, d9, d29 152 | 6d60b071| 1 gnu rrxsvc r6, sp 153 | 6df754f7| 1 gnu pld [r4, -sp, ror #14] 154 | 70065821| 1 gnu cmpcs r8, r0, ror r6 155 | 7050ed86| 1 gnu uxtabhi r5, sp, r0 156 | 715f1186| 1 gnu ssub16hi r5, r1, r1 157 | 716c9805| 1 gnu ldreq r6, [r8, #3185] 158 | 718d5ab1| 1 gnu cmplt sl, r1, ror sp 159 | 71c8cfb6| 1 gnu uxtb16lt ip, r1, ror #16 160 | 7294af06| 1 gnu sxtbeq r9, r2, ror #8 161 | 72c0bac6| 1 gnu sxtahgt ip, sl, r2 162 | 730f6716| 1 gnu uqsub16ne r0, r7, r3 163 | 73608f46| 1 gnu sxtb16mi r6, r3 164 | 73687f22| 1 gnu rsbscs r6, pc, #7536640 165 | 74308816| 1 gnu sxtab16ne r3, r8, r4 166 | 757f3456| 1 gnu shsub16pl r7, r4, r5 167 | 77788016| 1 gnu sxtab16ne r7, r0, r7, ror #16 168 | 78061671| 1 gnu tstvc r6, r8, ror r6 169 | 780a2fe1| 1 gnu bkpt 0xf0a8 170 | 7850abd6| 1 gnu sxtable r5, fp, r8 171 | 792cef26| 1 gnu uxtbcs r2, r9, ror #24 172 | 799eb8e0| 1 gnu adcs r9, r8, r9, ror lr 173 | 799f5726| 1 gnu usub16cs r9, r7, r9 174 | 79d0bf16| 1 gnu sxthne sp, r9 175 | 7a037ba1| 1 gnu cmnge fp, sl, ror r3 176 | 7b0f2566| 1 gnu qsub16vs r0, r5, fp 177 | 7b79dd51| 1 gnu bicspl r7, sp, fp, ror r9 178 | 7b9a9f1d| 1 gnu vldrne s18, [pc, #492] 179 | 7c70cea6| 1 gnu uxtab16ge r7, lr, ip 180 | 7d48f966| 1 gnu uxtahvs r4, r9, sp, ror #16 181 | 7d5c13a1| 1 gnu tstge r3, sp, ror ip 182 | 7e0001f1| 1 gnu setend le 183 | 7e1c0ba7| 1 gnu smlsdxge fp, lr, ip, r1 184 | 7e567e40| 1 gnu rsbsmi r5, lr, lr, ror r6 185 | 7e8f73b6| 1 gnu uhsub16lt r8, r3, lr 186 | 7ef0ffd6| 1 gnu uxthle pc, lr 187 | 7faaa011| 1 gnu rorne sl, pc, sl 188 | 81f19af7| 1 gnu pldw [sl, r1, lsl #3] 189 | 82033901| 1 gnu teqeq r9, r2, lsl #7 190 | 82f316f5| 1 gnu pldw [r6, #-898] 191 | 830201f1| 1 gnu setend be 192 | 838a3b91| 1 gnu teqls fp, r3, lsl #21 193 | 8408af2f| 1 gnu svccs 0x00af0884 194 | 884201d1| 1 gnu smlabble r1, r8, r2, r4 195 | 8aa12e31| 1 gnu smlawbcc lr, sl, r1, sl 196 | 8b9b99c0| 1 gnu addsgt r9, r9, fp, lsl #23 197 | 8c005c81| 1 gnu cmphi ip, ip, lsl #1 198 | 8fb429c6| 1 gnu strtgt fp, [r9], -pc, lsl #9 199 | 907b1f9e| 1 gnu vmovls.32 r7, d31[0] 200 | 91975f25| 1 gnu ldrbcs r9, [pc, #-1937] 201 | 91b010e3| 1 gnu tst r0, #145 202 | 927facb1| 1 gnu strexdlt r7, r2, [ip] 203 | 92904c91| 1 gnu swpbls r9, r2, [ip] 204 | 92af1226| 1 gnu sadd8cs sl, r2, r2 205 | 92b28c70| 1 gnu umullvc fp, ip, r2, r2 206 | 945f68a6| 1 gnu uqadd8ge r5, r8, r4 207 | 950b2560| 1 gnu mlavs r5, r5, fp, r0 208 | 969fcf71| 1 gnu strexbvc r9, r6, [pc] 209 | 96cf35e6| 1 gnu shadd8 ip, r5, r6 210 | 98060eb0| 1 gnu mullt lr, r8, r6 211 | 9843fb93| 1 gnu mvnsls r4, #152, 6 212 | 9a3fe2b0| 1 gnu smlallt r3, r2, sl, pc 213 | 9aef58b6| 1 gnu uadd8lt lr, r8, sl 214 | 9afcdff5| 1 gnu pld [pc, #3226] 215 | 9c221810| 1 gnu mulsne r8, ip, r2 216 | 9c3bc9dd| 1 gnu vstrle d19, [r9, #624] 217 | 9c5f2606| 1 gnu qadd8eq r5, r6, ip 218 | 9d87dac0| 1 gnu smullsgt r8, sl, sp, r7 219 | 9e0f7c86| 1 gnu uhadd8hi r0, ip, lr 220 | 9e814560| 1 gnu umaalvs r8, r5, lr, r1 221 | 9e9f8dc1| 1 gnu strexgt r9, lr, [sp] 222 | 9ec3c9d7| 1 gnu bfile ip, lr, #7, #3 223 | 9ed26d90| 1 gnu mlsls sp, lr, r2, sp 224 | 9f7fd9c1| 1 gnu ldrexbgt r7, [r9] 225 | 9f7fea91| 1 gnu strexhls r7, pc, [sl] 226 | 9f9f9921| 1 gnu ldrexcs r9, [r9] 227 | 9faffd21| 1 gnu ldrexhcs sl, [sp] 228 | 9fcfbd61| 1 gnu ldrexdvs ip, [sp] 229 | 9ff7a710| 1 gnu umlalne pc, r7, pc, r7 230 | a05459d3| 1 gnu cmple r9, #160, 8 231 | a3062be1| 1 gnu smulwb fp, r3, r6 232 | a68a92b1| 1 gnu orrslt r8, r2, r6, lsr #21 233 | abff55f6| 1 gnu pli [r5, -fp, lsr #31] 234 | addbf8ea| 1 gnu b .-0x1c9148 235 | ae79b021| 1 gnu lsrscs r7, lr, #19 236 | b590a3b1| 1 gnu strhlt r9, [r3, r5]! 237 | b5b2e390| 1 gnu strhtls fp, [r3], #37 238 | b6ac4e30| 1 gnu strhcc sl, [lr], #-198 239 | b73fff86| 1 gnu revshhi r3, r7 240 | b75fbfc6| 1 gnu rev16gt r5, r7 241 | b80b7c80| 1 gnu ldrhthi r0, [ip], #-184 242 | b82035e0| 1 gnu ldrht r2, [r5], -r8 243 | b8877391| 1 gnu ldrhls r8, [r3, #-120]! 244 | b9703e41| 1 gnu ldrhmi r7, [lr, -r9]! 245 | b9cf8c16| 1 gnu selne ip, ip, r9 246 | bd81bd58| 1 gnu poppl {r0, r2, r3, r4, r5, r7, r8, pc} 247 | bdfdb469| 1 gnu ldmibvs r4!, {r0, r2, r3, r4, r5, r7, r8, sl, fp, ip, sp, lr, pc} 248 | beb02500| 1 gnu strhteq fp, [r5], -lr 249 | bf1a5e42| 1 gnu subsmi r1, lr, #782336 250 | c19a4d5e| 1 gnu vmlspl.f32 s19, s27, s2 251 | c1aab15e| 1 gnu vsqrtpl.f32 s20, s2 252 | c354b003| 1 gnu movseq r5, #-1023410176 253 | c4091dc1| 1 gnu tstgt sp, r4, asr #19 254 | c50e13a9| 1 gnu ldmdbge r3, {r0, r2, r6, r7, r9, sl, fp} 255 | c68c8637| 1 gnu strcc r8, [r6, r6, asr #25] 256 | c6ad48e3| 1 gnu movt sl, #36294 257 | c6f65ff5| 1 gnu pld [pc, #-1734] 258 | c8a92f10| 1 gnu eorne sl, pc, r8, asr #19 259 | c9016b61| 1 gnu smulbtvs fp, r9, r1 260 | cadbf49e| 1 gnu vcmpels.f64 d29, d10 261 | ce9de476| 1 gnu strbtvc r9, [r4], lr, asr #27 262 | cf3c1ab1| 1 gnu tstlt sl, pc, asr #25 263 | d355aab6| 1 gnu ssatlt r5, #11, r3, asr #11 264 | d4f4df10| 1 gnu ldrsbne pc, [pc], #68 265 | d6530d61| 1 gnu ldrdvs r5, [sp, -r6] 266 | d74d7800| 1 gnu ldrsbteq r4, [r8], #-215 267 | d9703680| 1 gnu ldrsbthi r7, [r6], -r9 268 | dbe003c0| 1 gnu ldrdgt lr, [r3], -fp 269 | dc709561| 1 gnu ldrsbvs r7, [r5, ip] 270 | dcc3b9c8| 1 gnu ldmgt r9!, {r2, r3, r4, r6, r7, r8, r9, lr, pc} 271 | debfa0e5| 1 gnu str fp, [r0, #4062]! 272 | dee062a1| 1 gnu ldrdge lr, [r2, #-14]! 273 | dfa05ab7| 1 gnu smmlslt sl, pc, r0, sl 274 | e02ef011| 1 gnu mvnsne r2, r0, ror #29 275 | e4d41718| 1 gnu ldmdane r7, {r2, r5, r6, r7, sl, ip, lr, pc} 276 | e6d0fe34| 1 gnu ldrbtcc sp, [lr], #230 277 | e73bf7be| 1 gnu vcvtlt.f32.f64 s7, d23 278 | e74e72b3| 1 gnu cmnlt r2, #3696 279 | e80bf07e| 1 gnu vabsvc.f64 d16, d24 280 | e9b5b001| 1 gnu rorseq fp, r9, #11 281 | ea7bbdbe| 1 gnu vcvtlt.s32.f64 s14, d26 282 | ec063813| 1 gnu teqne r8, #236, 12 283 | ec0e49e1| 1 gnu smlaltt r0, r9, ip, lr 284 | ee4ab85e| 1 gnu vcvtpl.f32.s32 s8, s29 285 | ef461f25| 1 gnu ldrcs r4, [pc, #-1775] 286 | ef5fd002| 1 gnu sbcseq r5, r0, #956 287 | f4cf1d36| 1 gnu ssub8cc ip, sp, r4 288 | f67f73b6| 1 gnu uhsub8lt r7, r3, r6 289 | f6e09ca0| 1 gnu ldrshge lr, [ip], r6 290 | f7702e32| 1 gnu eorcc r7, lr, #247 291 | fa4dcf20| 1 gnu strdcs r4, [pc], #218 292 | fac03720| 1 gnu ldrshtcs ip, [r7], -sl 293 | fc0f64c6| 1 gnu uqsub8gt r0, r4, ip 294 | fc28f481| 1 gnu ldrshhi r2, [r4, #140]! 295 | fc300560| 1 gnu strdvs r3, [r5], -ip 296 | fcacfc70| 1 gnu ldrshtvc sl, [ip], #204 297 | fdbcfaf7| 1 gnu undef 298 | fddf5c86| 1 gnu usub8hi sp, ip, sp 299 | fdf02013| 1 gnu dbgne #13 300 | fe0319e3| 1 gnu tst r9, #-134217725 301 | fe7f3116| 1 gnu shsub8ne r7, r1, lr 302 | ff4f2ac6| 1 gnu qsub8gt r4, sl, pc 303 | ff818c71| 1 gnu strdvc r8, [ip, pc] 304 | |6b5721d3 1 gnu error: unknown instruction 305 | |76452001 1 gnu error: unknown instruction 306 | |97acd647 1 gnu error: unknown instruction 307 | -------------------------------------------------------------------------------- /armmap/map.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Armmap constructs the ARM opcode map from the instruction set CSV file. 6 | // 7 | // Usage: 8 | // armmap [-fmt=format] arm.csv 9 | // 10 | // The known output formats are: 11 | // 12 | // text (default) - print decoding tree in text form 13 | // decoder - print decoding tables for the armasm package 14 | package main 15 | 16 | import ( 17 | "bufio" 18 | "encoding/csv" 19 | "flag" 20 | "fmt" 21 | "log" 22 | "os" 23 | "sort" 24 | "strconv" 25 | "strings" 26 | ) 27 | 28 | var format = flag.String("fmt", "text", "output format: text, decoder") 29 | 30 | var inputFile string 31 | 32 | func usage() { 33 | fmt.Fprintf(os.Stderr, "usage: armmap [-fmt=format] x86.csv\n") 34 | os.Exit(2) 35 | } 36 | 37 | func main() { 38 | log.SetFlags(0) 39 | log.SetPrefix("armmap: ") 40 | 41 | flag.Usage = usage 42 | flag.Parse() 43 | if flag.NArg() != 1 { 44 | usage() 45 | } 46 | 47 | inputFile = flag.Arg(0) 48 | 49 | var print func(*Prog) 50 | switch *format { 51 | default: 52 | log.Fatalf("unknown output format %q", *format) 53 | case "text": 54 | print = printText 55 | case "decoder": 56 | print = printDecoder 57 | } 58 | 59 | p, err := readCSV(flag.Arg(0)) 60 | if err != nil { 61 | log.Fatal(err) 62 | } 63 | 64 | print(p) 65 | } 66 | 67 | // readCSV reads the CSV file and returns the corresponding Prog. 68 | // It may print details about problems to standard error using the log package. 69 | func readCSV(file string) (*Prog, error) { 70 | // Read input. 71 | // Skip leading blank and # comment lines. 72 | f, err := os.Open(file) 73 | if err != nil { 74 | return nil, err 75 | } 76 | b := bufio.NewReader(f) 77 | for { 78 | c, err := b.ReadByte() 79 | if err != nil { 80 | break 81 | } 82 | if c == '\n' { 83 | continue 84 | } 85 | if c == '#' { 86 | b.ReadBytes('\n') 87 | continue 88 | } 89 | b.UnreadByte() 90 | break 91 | } 92 | table, err := csv.NewReader(b).ReadAll() 93 | if err != nil { 94 | return nil, fmt.Errorf("parsing %s: %v", file, err) 95 | } 96 | if len(table) == 0 { 97 | return nil, fmt.Errorf("empty csv input") 98 | } 99 | if len(table[0]) < 5 { 100 | return nil, fmt.Errorf("csv too narrow: need at least five columns") 101 | } 102 | 103 | p := &Prog{} 104 | for _, row := range table { 105 | add(p, row[0], row[1], row[2], row[3], row[4]) 106 | } 107 | return p, nil 108 | } 109 | 110 | type Prog struct { 111 | Inst []Inst 112 | OpRanges map[string]string 113 | } 114 | 115 | type Inst struct { 116 | Text string 117 | Encoding string 118 | Mask uint32 119 | Value uint32 120 | Priority int 121 | OpBase string 122 | OpBits uint64 123 | Args []string 124 | } 125 | 126 | type Arg struct { 127 | Name string 128 | Bits uint64 129 | } 130 | 131 | // add adds the entry from the CSV described by maskstr, valuestr, text, encoding, tags 132 | // to the program p. 133 | func add(p *Prog, maskstr, valuestr, text, encoding, tags string) { 134 | if strings.Contains(tags, "pseudo") { 135 | return 136 | } 137 | 138 | // For now, ignore the VFP floating point instructions. 139 | if strings.HasPrefix(text, "V") && !strings.Contains(tags, "vfp") { 140 | // TODO 141 | return 142 | } 143 | 144 | mask, err := strconv.ParseUint(maskstr, 0, 32) 145 | if err != nil { 146 | log.Printf("invalid mask %q", maskstr) 147 | return 148 | } 149 | value, err := strconv.ParseUint(valuestr, 0, 32) 150 | if err != nil { 151 | log.Printf("invalid value %q", valuestr) 152 | return 153 | } 154 | 155 | // Parse encoding, building size and offset of each field. 156 | // The first field in the encoding is the largest offset. 157 | fuzzy := uint32(0) // mask of 'should be' bits 158 | fieldOffset := map[string]int{} 159 | fieldWidth := map[string]int{} 160 | off := 32 161 | for _, f := range strings.Split(encoding, "|") { 162 | n := 1 163 | if i := strings.Index(f, ":"); i >= 0 { 164 | n, _ = strconv.Atoi(f[i+1:]) 165 | } 166 | off -= n 167 | fieldOffset[f] = off 168 | fieldWidth[f] = n 169 | if f == "(0)" || f == "(1)" { 170 | fuzzy |= 1 << uint(off) 171 | } 172 | } 173 | if off != 0 { 174 | fmt.Fprintf(os.Stderr, "%s: counted %d bits in %s\n", text, 32-off, encoding) 175 | } 176 | 177 | // Track which encoding fields we found uses for. 178 | // If we do not find a use for a field, that's an error in the input tables. 179 | fieldUsed := map[string]bool{} 180 | 181 | // Split text into opcode and arguments. 182 | var op, argstr string 183 | if i := strings.Index(text, " "); i >= 0 { 184 | op = text[:i] 185 | argstr = text[i:] 186 | } else { 187 | op = text 188 | } 189 | op = strings.TrimSpace(op) 190 | argstr = strings.TrimSpace(argstr) 191 | 192 | // Parse opcode suffixes. 193 | i := strings.Index(op, "<") 194 | if i < 0 { 195 | i = len(op) 196 | } 197 | if j := strings.Index(op, "{"); j >= 0 && j < i { 198 | i = j 199 | } 200 | op, suffix := op[:i], op[i:] 201 | if suffix != "" && opSuffix[suffix] == "" { 202 | fmt.Fprintf(os.Stderr, "%s: invalid op suffix %q in %s\n", text, suffix, op+suffix) 203 | } 204 | 205 | // Make sure fields needed by opcode suffix are available. 206 | for _, f := range strings.Split(opSuffix[suffix], ",") { 207 | if f != "" && fieldWidth[f] == 0 { 208 | fmt.Fprintf(os.Stderr, "%s: opsuffix %s missing %s in encoding %s\n", text, suffix, f, encoding) 209 | } 210 | fieldUsed[f] = true 211 | } 212 | 213 | // Build list of opcodes that can be generated by this suffix. 214 | // For example, the opcodes generated by ADD are ADD.EQ, ADD.NE, etc. 215 | // To simplify the decoding of instruction opcodes, we arrange that this 216 | // sequence aligns with the encoding, so that decoding amounts to extracting 217 | // the right bits, concatenating them, and adding them to the first opcode in 218 | // the sequence. If the condition code is present, we always place it in the 219 | // low order bits, so that x&^15 == FOO_EQ tests whether x is any of the 220 | // conditional FOO instructions. 221 | ops := []string{op} 222 | opBits := uint64(0) // record of bits to extract and add to opcode base 223 | opFields := strings.Split(opSuffix[suffix], ",") 224 | // First the optional elements, like {S} meaning "" or ".S". 225 | for strings.HasPrefix(suffix, "{") { 226 | i := strings.Index(suffix, "}") 227 | var f, option string 228 | option, suffix = suffix[1:i], suffix[i+1:] 229 | f, opFields = opFields[0], opFields[1:] 230 | if option == "W" { 231 | // The {W} option on PLD{W} uses the R bit which is !W. 232 | ops = cross(ops, "."+option, "") 233 | } else { 234 | ops = cross(ops, "", "."+option) 235 | } 236 | if fieldWidth[f] != 1 { 237 | fmt.Fprintf(os.Stderr, "%s: have %d bits for {%s}\n", text, fieldWidth[f], option) 238 | } 239 | // opBits is a sequence of 16-bit chunks describing contiguous bit sections. 240 | // Each chunk is 8-bit offset followed by 8-bit size. 241 | opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | 1 242 | } 243 | // Then the true field substitutions. 244 | haveCond := false 245 | for strings.Contains(suffix, "<") { 246 | var f, literal, x string 247 | if len(opFields) == 0 { 248 | fmt.Fprintf(os.Stderr, "%s: ran out of suffix fields for <%s>\n", text, x) 249 | break 250 | } 251 | f, opFields = opFields[0], opFields[1:] 252 | i := strings.Index(suffix, "<") 253 | j := strings.Index(suffix, ">") 254 | literal, x, suffix = suffix[:i], suffix[i+1:j], suffix[j+1:] 255 | 256 | // Add leading literal text to all opcodes. 257 | ops = cross(ops, literal) 258 | 259 | // The condition can happen anywhere in the opcode text 260 | // but we want to generate the actual variation in the low bits 261 | // of the list index. Remember when and where we've seen and apply 262 | // it after the loop has finished. 263 | if x == "c" && f == "cond:4" { 264 | haveCond = true 265 | ops = cross(ops, "_COND_") 266 | continue 267 | } 268 | 269 | // Otherwise, choices[x] lists the possible expansions of . 270 | // If is of the form the choices are A, B, and C. 271 | expand := choices[x] 272 | if expand == nil && strings.Contains(x, ",") { 273 | expand = strings.Split(x, ",") 274 | } 275 | if expand == nil { 276 | fmt.Fprintf(os.Stderr, "%s: unknown choices for <%s>\n", text, x) 277 | expand = []string{x} 278 | } else if len(expand) != 1< but %d bits\n", text, len(expand), x, fieldWidth[f]) 280 | } 281 | opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | uint64(fieldWidth[f]) 282 | ops = cross(ops, expand...) 283 | } 284 | if haveCond { 285 | // Apply condtional suffix last. 286 | opBits = opBits<<16 | 28<<8 | 4 287 | ops = crossCond(ops) 288 | } 289 | ops = cross(ops, suffix) 290 | 291 | // Now ops is a list of opcodes generated by this opcode pattern. 292 | // We want to make sure that we can arrange for those opcodes to 293 | // happen consecutively in the final opcode numbering. 294 | // Record in p.OpRanges[op] the required consecutive sequence of 295 | // opcode that includes op. To make searches easier, we record 296 | // the sequence as a comma-separated list of strings with commas 297 | // on both ends: [A, B] encodes as ",A,B,". 298 | if p.OpRanges == nil { 299 | p.OpRanges = make(map[string]string) 300 | } 301 | opstr := "," + strings.Join(ops, ",") + "," 302 | for _, op := range ops { 303 | if old := p.OpRanges[op]; old != "" && old != opstr { 304 | if strings.Contains(old, opstr) { 305 | opstr = old 306 | } else if strings.Contains(opstr, old) { 307 | // great, do nothing 308 | } else { 309 | // It would also be okay if there is some subsequence s such that 310 | // old = x+s and opstr = s+y (or vice versa), in which case we should 311 | // record opstr = x+s+y. However, this has not come up in practice. 312 | // Failing that, we can't satisfy the sequencing requirements. 313 | fmt.Fprintf(os.Stderr, "%s: %s appears in both %s and %s\n", text, op, old, opstr) 314 | } 315 | } 316 | } 317 | for _, op := range strings.Split(opstr, ",") { 318 | if op != "" { 319 | p.OpRanges[op] = opstr 320 | } 321 | } 322 | 323 | // Process the arguments, building a list of argument descriptions. 324 | // Each argument description has the form |field@off|field@off... 325 | // where the |field@off suffixes give the name and location of the fields 326 | // needed by the argument. Each such string maps to a different decoding 327 | // type in the generated table, according to the argOps map. 328 | var args []string 329 | for argstr != "" { 330 | // Find longest match among argSuffixes pieces. 331 | best := 0 332 | for a := range argSuffixes { 333 | if argstr == a || strings.HasPrefix(argstr, a+",") { 334 | if best < len(a) { 335 | best = len(a) 336 | } 337 | } 338 | } 339 | if best == 0 { 340 | fmt.Fprintf(os.Stderr, "%s: unknown arg %s\n", text, argstr) 341 | break 342 | } 343 | 344 | var arg, desc string 345 | arg, argstr = argstr[:best], strings.TrimSpace(strings.TrimLeft(argstr[best:], ",")) 346 | desc = arg 347 | for _, f := range strings.Split(argSuffixes[desc], ",") { 348 | if f == "" { 349 | continue 350 | } 351 | if fieldWidth[f] == 0 { 352 | fmt.Fprintf(os.Stderr, "%s: arg %s missing %s in encoding %s\n", text, arg, f, encoding) 353 | } 354 | fieldUsed[f] = true 355 | desc += fmt.Sprintf("|%s@%d", f, fieldOffset[f]) 356 | } 357 | args = append(args, desc) 358 | } 359 | 360 | // Check that all encoding fields were used by suffix or argument decoding. 361 | for f := range fieldWidth { 362 | switch f { 363 | case "0", "1", "(0)", "(1)": 364 | // ok 365 | default: 366 | if !fieldUsed[f] { 367 | fmt.Fprintf(os.Stderr, "%s: encoding field %s not used in %s\n", text, f, encoding) 368 | } 369 | } 370 | } 371 | 372 | // Determine decoding priority. Instructions that say 'SEE X' in the tag 373 | // are considered lower priority than ones that don't. In theory the 374 | // structure described by the SEE tags might be richer than that, but 375 | // in practice it only has those two levels. 376 | // We leave space for two more priorities according to whether the 377 | // fuzzy bits are set correctly. The full set of priorities then is: 378 | // 379 | // 4 - no SEE tag, fuzzy bits all match 380 | // 3 - no SEE tag, some fuzzy bits don't match 381 | // 2 - SEE tag, fuzzy bits all match 382 | // 1 - SEE tag, some fuzzy bits don't match 383 | // 384 | // You could argue for swapping the middle two levels but so far 385 | // it has not been an issue. 386 | pri := 4 387 | if strings.Contains(tags, "SEE") { 388 | pri = 2 389 | } 390 | 391 | inst := Inst{ 392 | Text: text, 393 | Encoding: encoding, 394 | Mask: uint32(mask), 395 | Value: uint32(value), 396 | Priority: pri, 397 | OpBase: ops[0], 398 | OpBits: opBits, 399 | Args: args, 400 | } 401 | p.Inst = append(p.Inst, inst) 402 | 403 | if fuzzy != 0 { 404 | inst.Mask &^= fuzzy 405 | inst.Priority-- 406 | p.Inst = append(p.Inst, inst) 407 | } 408 | } 409 | 410 | // opSuffix describes the encoding fields used to resolve a given opcode suffix. 411 | var opSuffix = map[string]string{ 412 | "": "op", 413 | "": "op:2", 414 | ".F<32,64>": "op,cond:4,sz", 415 | ".F<32,64>": "op,cond:4,sz", 416 | "": "tb,cond:4", 417 | ".8": "op", 418 | "": "cond:4", 419 | ".32": "cond:4", 420 | ".F<32,64>": "cond:4,sz", 421 | "": "N,M,cond:4", 422 | "": "M,cond:4", 423 | "{B}": "B,cond:4", 424 | "{E}.F<32,64>": "E,cond:4,sz", 425 | "{R}": "R,cond:4", 426 | ".F<32,64>.32": "cond:4,sz,op", 427 | ".32.F<32,64>": "op,cond:4,signed,sz", 428 | "{S}": "S,cond:4", 429 | "{W}": "R", 430 | "{X}": "M,cond:4", 431 | ".": "T,cond:4,op", 432 | ".": "cond:4,sz", 433 | ".FX<16,32>.F<32,64>": "cond:4,U,sx,sz", 434 | ".F<32,64>.FX<16,32>": "cond:4,sz,U,sx", 435 | } 436 | 437 | // choices[x] describes the choices for filling in "<"+x+">" in an opcode suffix. 438 | // Opcodes that end up containing ZZ take up a numeric sequence value but are 439 | // not exported in the package API. 440 | var choices = map[string][]string{ 441 | "c": {".EQ", ".NE", ".CS", ".CC", ".MI", ".PL", ".VS", ".VC", ".HI", ".LS", ".GE", ".LT", ".GT", ".LE", "", ".ZZ"}, 442 | "x": {"B", "T"}, 443 | "y": {"B", "T"}, 444 | } 445 | 446 | // argOps maps from argument descriptions to internal decoder name. 447 | var argOps = map[string]string{ 448 | // 4-bit register encodings 449 | "|Rm:4@0": "arg_R_0", 450 | "|Rn:4@0": "arg_R_0", 451 | "|Rt:4@0": "arg_R_0", 452 | "|Rm:4@8": "arg_R_8", 453 | "|Ra:4@12": "arg_R_12", 454 | "|Rd:4@12": "arg_R_12", 455 | "|RdLo:4@12": "arg_R_12", 456 | "|Rt:4@12": "arg_R_12", 457 | "|Rt:4@12": "arg_R_12_nzcv", 458 | "|Rd:4@16": "arg_R_16", 459 | "|RdHi:4@16": "arg_R_16", 460 | "|Rn:4@16": "arg_R_16", 461 | 462 | // first and second of consecutive register pair 463 | "|Rt:4@0": "arg_R1_0", 464 | "|Rt:4@12": "arg_R1_12", 465 | "|Rt:4@0": "arg_R2_0", 466 | "|Rt:4@12": "arg_R2_12", 467 | 468 | // register arithmetic 469 | ", |Rm:4@0|Rs:4@8|type:2@5": "arg_R_shift_R", 470 | "{,}|Rm:4@0|imm5:5@7|type:2@5": "arg_R_shift_imm", 471 | "{,}|Rn:4@0|imm5:5@7|sh@6": "arg_R_shift_imm", 472 | "{,LSL #}|Rm:4@0|imm5:5@7": "arg_R_shift_imm", 473 | "{,}|Rm:4@0|rotate:2@10": "arg_R_rotate", 474 | 475 | // memory references 476 | "{!}|Rn:4@16|W@21": "arg_R_16_WB", 477 | "[]|Rn:4@16": "arg_mem_R", 478 | "[,+/-{, }]{!}|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7|P@24|W@21": "arg_mem_R_pm_R_shift_imm_W", 479 | "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_W", 480 | "[] {,#+/-}|Rn:4@16|U@23|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_postindex", 481 | "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm12:12@0": "arg_mem_R_pm_imm12_W", 482 | "[],#+/-|Rn:4@16|imm12:12@0|U@23": "arg_mem_R_pm_imm12_postindex", 483 | "[,#+/-]|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_offset", 484 | "[] {,#+/-}|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_postindex", 485 | "[], +/-|Rn:4@16|U@23|Rm:4@0": "arg_mem_R_pm_R_postindex", 486 | "[,+/-]{!}|Rn:4@16|U@23|Rm:4@0|P@24|W@21": "arg_mem_R_pm_R_W", 487 | "[],+/-{, }|Rn:4@16|Rm:4@0|imm5:5@7|type:2@5|U@23": "arg_mem_R_pm_R_shift_imm_postindex", 488 | "[,+/-{, }]|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7": "arg_mem_R_pm_R_shift_imm_offset", 489 | "[{,#+/-}]|Rn:4@16|U@23|imm8:8@0": "arg_mem_R_pm_imm8at0_offset", 490 | 491 | // pc-relative constants 492 | "|imm12:12@0": "arg_label_p_12", 493 | "|imm12:12@0": "arg_label_m_12", 494 | "|imm12:12@0|U@23": "arg_label_pm_12", 495 | "|imm4H:4@8|imm4L:4@0|U@23": "arg_label_pm_4_4", 496 | 497 | // constants 498 | "#|imm12:12@0": "arg_const", 499 | "#|imm5:5@7": "arg_imm5", 500 | "#|imm5:5@7": "arg_imm5_nz", 501 | "#|imm5:5@7": "arg_imm5_32", 502 | "|imm24:24@0": "arg_label24", 503 | "#|lsb:5@7": "arg_imm5", 504 | "#|lsb:5@7|msb:5@16": "arg_lsb_width", 505 | "#|imm12:12@8|imm4:4@0": "arg_imm_12at8_4at0", 506 | "#|imm12:12@0|imm4:4@16": "arg_imm_4at16_12at0", 507 | "|imm24:24@0|H@24": "arg_label24H", 508 | "#