├── README ├── codereview.cfg ├── armasm ├── Makefile ├── testdata │ ├── Makefile │ └── decode.txt ├── decode_test.go ├── gnu.go ├── plan9x.go ├── objdumpext_test.go ├── objdump_test.go ├── inst.go ├── decode.go └── ext_test.go ├── LICENSE ├── armspec ├── specmap.go ├── pseudo.go ├── pseudo.y ├── spec.go ├── code.go └── y.go └── armmap └── map.go /README: -------------------------------------------------------------------------------- 1 | go get rsc.io/arm 2 | 3 | http://godoc.org/rsc.io/arm 4 | -------------------------------------------------------------------------------- /codereview.cfg: -------------------------------------------------------------------------------- 1 | contributors: http://go.googlecode.com/hg/CONTRIBUTORS 2 | -------------------------------------------------------------------------------- /armasm/Makefile: -------------------------------------------------------------------------------- 1 | tables.go: ../armmap/map.go ../arm.csv 2 | go run ../armmap/map.go -fmt=decoder ../arm.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go 3 | -------------------------------------------------------------------------------- /armasm/testdata/Makefile: -------------------------------------------------------------------------------- 1 | newdecode.txt: 2 | cd ..; go test -cover -run 'ObjdumpARMCond' -v -timeout 10h -printtests -long 2>&1 | tee log 3 | cd ..; go test -cover -run 'ObjdumpARMUncond' -v -timeout 10h -printtests -long 2>&1 | tee -a log 4 | egrep ' (gnu|plan9) ' ../log |sort >newdecode.txt 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /armasm/decode_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "encoding/hex" 9 | "io/ioutil" 10 | "strconv" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | func TestDecode(t *testing.T) { 16 | data, err := ioutil.ReadFile("testdata/decode.txt") 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | all := string(data) 21 | for strings.Contains(all, "\t\t") { 22 | all = strings.Replace(all, "\t\t", "\t", -1) 23 | } 24 | for _, line := range strings.Split(all, "\n") { 25 | line = strings.TrimSpace(line) 26 | if line == "" || strings.HasPrefix(line, "#") { 27 | continue 28 | } 29 | f := strings.SplitN(line, "\t", 4) 30 | i := strings.Index(f[0], "|") 31 | if i < 0 { 32 | t.Errorf("parsing %q: missing | separator", f[0]) 33 | continue 34 | } 35 | if i%2 != 0 { 36 | t.Errorf("parsing %q: misaligned | separator", f[0]) 37 | } 38 | size := i / 2 39 | code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) 40 | if err != nil { 41 | t.Errorf("parsing %q: %v", f[0], err) 42 | continue 43 | } 44 | mode, err := strconv.Atoi(f[1]) 45 | if err != nil { 46 | t.Errorf("invalid mode %q in: %s", f[1], line) 47 | continue 48 | } 49 | syntax, asm := f[2], f[3] 50 | inst, err := Decode(code, Mode(mode)) 51 | var out string 52 | if err != nil { 53 | out = "error: " + err.Error() 54 | } else { 55 | switch syntax { 56 | case "gnu": 57 | out = GNUSyntax(inst) 58 | case "plan9": 59 | out = plan9Syntax(inst, 0, nil, nil) 60 | default: 61 | t.Errorf("unknown syntax %q", syntax) 62 | continue 63 | } 64 | } 65 | if out != asm || inst.Len != size { 66 | t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size) 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /armasm/gnu.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "strings" 11 | ) 12 | 13 | var saveDot = strings.NewReplacer( 14 | ".F16", "_dot_F16", 15 | ".F32", "_dot_F32", 16 | ".F64", "_dot_F64", 17 | ".S32", "_dot_S32", 18 | ".U32", "_dot_U32", 19 | ".FXS", "_dot_S", 20 | ".FXU", "_dot_U", 21 | ".32", "_dot_32", 22 | ) 23 | 24 | // GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. 25 | // This form typically matches the syntax defined in the ARM Reference Manual. 26 | func GNUSyntax(inst Inst) string { 27 | var buf bytes.Buffer 28 | op := inst.Op.String() 29 | op = saveDot.Replace(op) 30 | op = strings.Replace(op, ".", "", -1) 31 | op = strings.Replace(op, "_dot_", ".", -1) 32 | op = strings.ToLower(op) 33 | buf.WriteString(op) 34 | sep := " " 35 | for i, arg := range inst.Args { 36 | if arg == nil { 37 | break 38 | } 39 | text := gnuArg(&inst, i, arg) 40 | if text == "" { 41 | continue 42 | } 43 | buf.WriteString(sep) 44 | sep = ", " 45 | buf.WriteString(text) 46 | } 47 | return buf.String() 48 | } 49 | 50 | func gnuArg(inst *Inst, argIndex int, arg Arg) string { 51 | switch inst.Op &^ 15 { 52 | case LDRD_EQ, LDREXD_EQ, STRD_EQ: 53 | if argIndex == 1 { 54 | // second argument in consecutive pair not printed 55 | return "" 56 | } 57 | case STREXD_EQ: 58 | if argIndex == 2 { 59 | // second argument in consecutive pair not printed 60 | return "" 61 | } 62 | } 63 | 64 | switch arg := arg.(type) { 65 | case Imm: 66 | switch inst.Op &^ 15 { 67 | case BKPT_EQ: 68 | return fmt.Sprintf("%#04x", uint32(arg)) 69 | case SVC_EQ: 70 | return fmt.Sprintf("%#08x", uint32(arg)) 71 | } 72 | return fmt.Sprintf("#%d", int32(arg)) 73 | 74 | case ImmAlt: 75 | return fmt.Sprintf("#%d, %d", arg.Val, arg.Rot) 76 | 77 | case Mem: 78 | R := gnuArg(inst, -1, arg.Base) 79 | X := "" 80 | if arg.Sign != 0 { 81 | X = "" 82 | if arg.Sign < 0 { 83 | X = "-" 84 | } 85 | X += gnuArg(inst, -1, arg.Index) 86 | if arg.Shift == ShiftLeft && arg.Count == 0 { 87 | // nothing 88 | } else if arg.Shift == RotateRightExt { 89 | X += ", rrx" 90 | } else { 91 | X += fmt.Sprintf(", %s #%d", strings.ToLower(arg.Shift.String()), arg.Count) 92 | } 93 | } else { 94 | X = fmt.Sprintf("#%d", arg.Offset) 95 | } 96 | 97 | switch arg.Mode { 98 | case AddrOffset: 99 | if X == "#0" { 100 | return fmt.Sprintf("[%s]", R) 101 | } 102 | return fmt.Sprintf("[%s, %s]", R, X) 103 | case AddrPreIndex: 104 | return fmt.Sprintf("[%s, %s]!", R, X) 105 | case AddrPostIndex: 106 | return fmt.Sprintf("[%s], %s", R, X) 107 | case AddrLDM: 108 | if X == "#0" { 109 | return R 110 | } 111 | case AddrLDM_WB: 112 | if X == "#0" { 113 | return R + "!" 114 | } 115 | } 116 | return fmt.Sprintf("[%s Mode(%d) %s]", R, int(arg.Mode), X) 117 | 118 | case PCRel: 119 | return fmt.Sprintf(".%+#x", int32(arg)+4) 120 | 121 | case Reg: 122 | switch inst.Op &^ 15 { 123 | case LDREX_EQ: 124 | if argIndex == 0 { 125 | return fmt.Sprintf("r%d", int32(arg)) 126 | } 127 | } 128 | switch arg { 129 | case R10: 130 | return "sl" 131 | case R11: 132 | return "fp" 133 | case R12: 134 | return "ip" 135 | } 136 | 137 | case RegList: 138 | var buf bytes.Buffer 139 | fmt.Fprintf(&buf, "{") 140 | sep := "" 141 | for i := 0; i < 16; i++ { 142 | if arg&(1<") && !strings.Contains(inst.Syntax[0], "") && !strings.Contains(inst.Syntax[0], "VLDM") && !strings.Contains(inst.Syntax[0], "VSTM") { 49 | out = append(out, inst) 50 | } 51 | } 52 | insts = out 53 | 54 | for i := range insts { 55 | dosize(&insts[i]) 56 | } 57 | 58 | var cond, special []Inst 59 | for _, inst := range insts { 60 | if inst.Base>>28 == 0xF { 61 | special = append(special, inst) 62 | } else { 63 | cond = append(cond, inst) 64 | } 65 | } 66 | 67 | fmt.Printf("special:\n") 68 | split(special, 0xF0000000, 1) 69 | fmt.Printf("cond:\n") 70 | split(cond, 0xF0000000, 1) 71 | } 72 | 73 | func dosize(inst *Inst) { 74 | var base, mask uint32 75 | off := 0 76 | for _, f := range strings.Split(inst.Bits, "|") { 77 | if i := strings.Index(f, ":"); i >= 0 { 78 | n, _ := strconv.Atoi(f[i+1:]) 79 | off += n 80 | continue 81 | } 82 | for _, bit := range strings.Fields(f) { 83 | switch bit { 84 | case "0", "(0)": 85 | mask |= 1 << uint(31-off) 86 | case "1", "(1)": 87 | base |= 1 << uint(31-off) 88 | } 89 | off++ 90 | } 91 | } 92 | if off != 16 && off != 32 { 93 | log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off) 94 | } 95 | if off == 16 { 96 | mask >>= 16 97 | base >>= 16 98 | } 99 | mask |= base 100 | inst.Mask = mask 101 | inst.Base = base 102 | } 103 | 104 | func split(insts []Inst, used uint32, depth int) { 105 | Again: 106 | if len(insts) <= 1 { 107 | for _, inst := range insts { 108 | fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1)) 109 | } 110 | return 111 | } 112 | 113 | m := ^used 114 | for _, inst := range insts { 115 | m &= inst.Mask 116 | } 117 | if m == 0 { 118 | fmt.Printf("«%*s%#08x masked out (%d)\n", depth*2, "", used, len(insts)) 119 | for _, inst := range insts { 120 | fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1)) 121 | } 122 | updated := false 123 | for i := range insts { 124 | if updateMask(&insts[i]) { 125 | updated = true 126 | } 127 | } 128 | fmt.Printf("»\n") 129 | if updated { 130 | goto Again 131 | } 132 | fmt.Printf("%*s%#08x masked out (%d)\n", depth*2, "", used, len(insts)) 133 | for _, inst := range insts { 134 | fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1)) 135 | } 136 | //checkOverlap(used, insts) 137 | return 138 | } 139 | for i := 31; i >= 0; i-- { 140 | if m&(1< 0 { 153 | suffix := "" 154 | if len(bit[1-b]) == 0 { 155 | suffix = " (only)" 156 | } 157 | fmt.Printf("%*sbit %#08x = %d%s\n", depth*2, "", m, b, suffix) 158 | split(list, used|m, depth+1) 159 | } 160 | } 161 | } 162 | 163 | var seeRE = regexp.MustCompile(`SEE ([^;\n]+)`) 164 | 165 | func updateMask(inst *Inst) bool { 166 | defer func() { 167 | if err := recover(); err != nil { 168 | fmt.Println("PANIC:", err) 169 | return 170 | } 171 | }() 172 | 173 | print(".") 174 | println(inst.Name, inst.ID, inst.Bits) 175 | println(inst.Code) 176 | wiggle := ^inst.Mask &^ 0xF0000000 177 | n := countbits(wiggle) 178 | m1 := ^uint32(0) 179 | m2 := ^uint32(0) 180 | for i := uint32(0); i < 1<>= 1 { 207 | n += int(x & 1) 208 | } 209 | return n 210 | } 211 | 212 | func expand(x, m uint32) uint32 { 213 | var out uint32 214 | for i := uint(0); i < 32; i++ { 215 | out >>= 1 216 | if m&1 != 0 { 217 | out |= (x & 1) << 31 218 | x >>= 1 219 | } 220 | m >>= 1 221 | } 222 | return out 223 | } 224 | -------------------------------------------------------------------------------- /armasm/plan9x.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "bytes" 9 | "encoding/binary" 10 | "fmt" 11 | "io" 12 | "strings" 13 | ) 14 | 15 | // plan9Syntax returns the Go assembler syntax for the instruction. 16 | // The syntax was originally defined by Plan 9. 17 | // The pc is the program counter of the instruction, used for expanding 18 | // PC-relative addresses into absolute ones. 19 | // The symname function queries the symbol table for the program 20 | // being disassembled. Given a target address it returns the name and base 21 | // address of the symbol containing the target, if any; otherwise it returns "", 0. 22 | // The reader r should read from the text segment using text addresses 23 | // as offsets; it is used to display pc-relative loads as constant loads. 24 | func plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string { 25 | if symname == nil { 26 | symname = func(uint64) (string, uint64) { return "", 0 } 27 | } 28 | 29 | var args []string 30 | for _, a := range inst.Args { 31 | if a == nil { 32 | break 33 | } 34 | args = append(args, plan9Arg(&inst, pc, symname, a)) 35 | } 36 | 37 | op := inst.Op.String() 38 | 39 | switch inst.Op &^ 15 { 40 | case LDR_EQ, LDRB_EQ, LDRH_EQ: 41 | // Check for RET 42 | reg, _ := inst.Args[0].(Reg) 43 | mem, _ := inst.Args[1].(Mem) 44 | if inst.Op&^15 == LDR_EQ && reg == R15 && mem.Base == SP && mem.Sign == 0 && mem.Mode == AddrPostIndex { 45 | return fmt.Sprintf("RET%s #%d", op[3:], mem.Offset) 46 | } 47 | 48 | // Check for PC-relative load. 49 | if mem.Base == PC && mem.Sign == 0 && mem.Mode == AddrOffset && text != nil { 50 | addr := uint32(pc) + 8 + uint32(mem.Offset) 51 | buf := make([]byte, 4) 52 | switch inst.Op &^ 15 { 53 | case LDRB_EQ: 54 | if _, err := text.ReadAt(buf[:1], int64(addr)); err != nil { 55 | break 56 | } 57 | args[1] = fmt.Sprintf("$%#x", buf[0]) 58 | 59 | case LDRH_EQ: 60 | if _, err := text.ReadAt(buf[:2], int64(addr)); err != nil { 61 | break 62 | } 63 | args[1] = fmt.Sprintf("$%#x", binary.LittleEndian.Uint16(buf)) 64 | 65 | case LDR_EQ: 66 | if _, err := text.ReadAt(buf, int64(addr)); err != nil { 67 | break 68 | } 69 | x := binary.LittleEndian.Uint32(buf) 70 | if s, base := symname(uint64(x)); s != "" && uint64(x) == base { 71 | args[1] = fmt.Sprintf("$%s(SB)", s) 72 | } else { 73 | args[1] = fmt.Sprintf("$%#x", x) 74 | } 75 | } 76 | } 77 | } 78 | 79 | // Move addressing mode into opcode suffix. 80 | suffix := "" 81 | switch inst.Op &^ 15 { 82 | case LDR_EQ, LDRB_EQ, LDRH_EQ, STR_EQ, STRB_EQ, STRH_EQ: 83 | mem, _ := inst.Args[1].(Mem) 84 | switch mem.Mode { 85 | case AddrOffset, AddrLDM: 86 | // no suffix 87 | case AddrPreIndex, AddrLDM_WB: 88 | suffix = ".W" 89 | case AddrPostIndex: 90 | suffix = ".P" 91 | } 92 | off := "" 93 | if mem.Offset != 0 { 94 | off = fmt.Sprintf("%#x", mem.Offset) 95 | } 96 | base := fmt.Sprintf("(R%d)", int(mem.Base)) 97 | index := "" 98 | if mem.Sign != 0 { 99 | sign := "" 100 | if mem.Sign < 0 { 101 | sign = "" 102 | } 103 | shift := "" 104 | if mem.Count != 0 { 105 | shift = fmt.Sprintf("%s%d", plan9Shift[mem.Shift], mem.Count) 106 | } 107 | index = fmt.Sprintf("(%sR%d%s)", sign, int(mem.Index), shift) 108 | } 109 | args[1] = off + base + index 110 | } 111 | 112 | // Reverse args, placing dest last. 113 | for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { 114 | args[i], args[j] = args[j], args[i] 115 | } 116 | 117 | switch inst.Op &^ 15 { 118 | case MOV_EQ: 119 | op = "MOVW" + op[3:] 120 | 121 | case LDR_EQ: 122 | op = "MOVW" + op[3:] + suffix 123 | case LDRB_EQ: 124 | op = "MOVB" + op[4:] + suffix 125 | case LDRH_EQ: 126 | op = "MOVH" + op[4:] + suffix 127 | 128 | case STR_EQ: 129 | op = "MOVW" + op[3:] + suffix 130 | args[0], args[1] = args[1], args[0] 131 | case STRB_EQ: 132 | op = "MOVB" + op[4:] + suffix 133 | args[0], args[1] = args[1], args[0] 134 | case STRH_EQ: 135 | op = "MOVH" + op[4:] + suffix 136 | args[0], args[1] = args[1], args[0] 137 | } 138 | 139 | if args != nil { 140 | op += " " + strings.Join(args, ", ") 141 | } 142 | 143 | return op 144 | } 145 | 146 | // assembler syntax for the various shifts. 147 | // @x> is a lie; the assembler uses @> 0 148 | // instead of @x> 1, but i wanted to be clear that it 149 | // was a different operation (rotate right extended, not rotate right). 150 | var plan9Shift = []string{"<<", ">>", "->", "@>", "@x>"} 151 | 152 | func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { 153 | switch a := arg.(type) { 154 | case Endian: 155 | 156 | case Imm: 157 | return fmt.Sprintf("$%d", int(a)) 158 | 159 | case Mem: 160 | 161 | case PCRel: 162 | addr := uint32(pc) + 8 + uint32(a) 163 | if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base { 164 | return fmt.Sprintf("%s(SB)", s) 165 | } 166 | return fmt.Sprintf("%#x", addr) 167 | 168 | case Reg: 169 | if a < 16 { 170 | return fmt.Sprintf("R%d", int(a)) 171 | } 172 | 173 | case RegList: 174 | var buf bytes.Buffer 175 | start := -2 176 | end := -2 177 | fmt.Fprintf(&buf, "[") 178 | flush := func() { 179 | if start >= 0 { 180 | if buf.Len() > 1 { 181 | fmt.Fprintf(&buf, ",") 182 | } 183 | if start == end { 184 | fmt.Fprintf(&buf, "R%d", start) 185 | } else { 186 | fmt.Fprintf(&buf, "R%d-R%d", start, end) 187 | } 188 | } 189 | } 190 | for i := 0; i < 16; i++ { 191 | if a&(1< 20 { 36 | fmt.Printf("too many errors\n") 37 | os.Exit(1) 38 | } 39 | } 40 | 41 | func re(s string) *regexp.Regexp { 42 | return regexp.MustCompile(`\A(?:` + s + `)`) 43 | } 44 | 45 | var tokens = []struct { 46 | re *regexp.Regexp 47 | val int 48 | fn func(*Lexer, string, *yySymType) 49 | }{ 50 | {re(`//[^\n]*`), -1, nil}, 51 | {re(`/\*(.|\n)*?\*/`), -1, nil}, 52 | {re(`[ \t\n]+`), -1, nil}, 53 | {re(`»`), _INDENT, nil}, 54 | {re(`«`), _UNINDENT, str}, 55 | {re(`return`), _RETURN, str}, 56 | {re(`UNDEFINED`), _UNDEFINED, str}, 57 | {re(`UNPREDICTABLE`), _UNPREDICTABLE, str}, 58 | {re(`SEE [^;]+`), _SEE, str}, 59 | {re(`IMPLEMENTATION_DEFINED( [^;]+)?`), _IMPLEMENTATION_DEFINED, str}, 60 | {re(`SUBARCHITECTURE_DEFINED( [^;]+)?`), _SUBARCHITECTURE_DEFINED, str}, 61 | {re(`if`), _IF, nil}, 62 | {re(`then`), _THEN, nil}, 63 | {re(`repeat`), _REPEAT, nil}, 64 | {re(`until`), _UNTIL, nil}, 65 | {re(`while`), _WHILE, nil}, 66 | {re(`case`), _CASE, nil}, 67 | {re(`for`), _FOR, nil}, 68 | {re(`to`), _TO, nil}, 69 | {re(`do`), _DO, nil}, 70 | {re(`of`), _OF, nil}, 71 | {re(`elsif`), _ELSIF, nil}, 72 | {re(`else`), _ELSE, nil}, 73 | {re(`otherwise`), _OTHERWISE, nil}, 74 | {re(`enumeration`), _ENUMERATION, nil}, 75 | {re(`when`), _WHEN, nil}, 76 | {re(`UNKNOWN`), _UNKNOWN, nil}, 77 | {re(`DIV`), _DIV, nil}, 78 | {re(`MOD`), _MOD, nil}, 79 | {re(`AND`), _AND, nil}, 80 | {re(`OR`), _OR, nil}, 81 | {re(`EOR`), _EOR, nil}, 82 | {re(`&&`), _ANDAND, nil}, 83 | {re(`\|\|`), _OROR, nil}, 84 | {re(`==`), _EQ, nil}, 85 | {re(`!=`), _NE, nil}, 86 | {re(` <`), _LT, nil}, 87 | {re(` ?<=`), _LE, nil}, 88 | {re(` ?>=`), _GE, nil}, 89 | {re(` >`), _GT, nil}, 90 | {re(`{`), '{', nil}, 91 | {re(`}`), '}', nil}, 92 | {re(`<`), '<', nil}, 93 | {re(`>`), '>', nil}, 94 | {re(`2^`), _TWOPOW, nil}, 95 | {re(` ?<<`), _LSH, nil}, 96 | {re(` ?>>`), _RSH, nil}, 97 | {re(`,`), ',', nil}, 98 | {re(`:`), ':', nil}, 99 | {re(`\+`), '+', nil}, 100 | {re(`\.`), '.', nil}, 101 | {re(`-`), '-', nil}, 102 | {re(`|`), '|', nil}, 103 | {re(`\^`), '^', nil}, 104 | {re(`\*`), '*', nil}, 105 | {re(`/`), '/', nil}, 106 | {re(`%`), '%', nil}, 107 | {re(`&`), '&', nil}, 108 | {re(`!`), '!', nil}, 109 | {re(`;`), ';', nil}, 110 | {re(`=`), '=', nil}, 111 | {re(`\(`), '(', nil}, 112 | {re(`\)`), ')', nil}, 113 | {re(`\[`), '[', nil}, 114 | {re(`\]`), ']', nil}, 115 | {re(`!`), '!', nil}, 116 | {re(`[0-9]+`), _CONST, str}, 117 | {re(`[0-9]+\.[0-9]+`), _CONST, str}, 118 | {re(`0x[0-9A-Fa-f]+`), _CONST, str}, 119 | {re("[‘’][ 0-9x]+’"), _CONST, strNoSpaces}, 120 | {re(`bit`), _BIT, str}, 121 | {re(`bits\(`), _BITS, str1x}, 122 | {re(`assert`), _ASSERT, str}, 123 | {re(`integer`), _INTEGER, nil}, 124 | {re(`boolean`), _BOOLEAN, nil}, 125 | 126 | {re(`[A-Za-z_][A-Za-z0-9_]*`), _NAME, str}, 127 | {re(`[A-Za-z_][A-Za-z0-9_]*\(`), _NAME_PAREN, str1x}, 128 | } 129 | 130 | func (lx *Lexer) Lex(yy *yySymType) int { 131 | if len(lx.input) == 0 { 132 | return _EOF 133 | } 134 | var ( 135 | longest string 136 | longestVal int 137 | longestFn func(*Lexer, string, *yySymType) 138 | ) 139 | for _, tok := range tokens { 140 | s := tok.re.FindString(lx.input) 141 | if len(s) > len(longest) { 142 | longest = s 143 | longestVal = tok.val 144 | longestFn = tok.fn 145 | } 146 | } 147 | if longest == "" { 148 | lx.Error(fmt.Sprintf("lexer stuck at %.10q", lx.input)) 149 | return -1 150 | } 151 | //println(longest) 152 | yy.line = lx.line() 153 | if longestFn != nil { 154 | lx.sym = longest 155 | longestFn(lx, longest, yy) 156 | } 157 | lx.input = lx.input[len(longest):] 158 | lx.lineno += strings.Count(longest, "\n") 159 | if longestVal < 0 { 160 | // skip 161 | return lx.Lex(yy) 162 | } 163 | return longestVal 164 | } 165 | 166 | func (lx *Lexer) Error(s string) { 167 | lx.line().Errorf("%s near %s", s, lx.sym) 168 | } 169 | 170 | func (lx *Lexer) line() Line { 171 | return Line{lx.file, lx.lineno} 172 | } 173 | 174 | func nop(*Lexer, string, *yySymType) { 175 | // having a function in the table 176 | // will make the lexer save the string 177 | // for use in error messages. 178 | // nothing more to do. 179 | } 180 | 181 | func str(lx *Lexer, s string, yy *yySymType) { 182 | yy.str = s 183 | } 184 | 185 | func str1(lx *Lexer, s string, yy *yySymType) { 186 | yy.str = s[1:] 187 | } 188 | 189 | func str1x(lx *Lexer, s string, yy *yySymType) { 190 | yy.str = s[:len(s)-1] 191 | } 192 | 193 | func strNoSpaces(lx *Lexer, s string, yy *yySymType) { 194 | yy.str = strings.Replace(s, " ", "", -1) 195 | } 196 | 197 | func parse(name, text string) []*Stmt { 198 | text = markup(text) 199 | lx := &Lexer{ 200 | input: text, 201 | file: name, 202 | lineno: 1, 203 | } 204 | nerror = 0 205 | yyParse(lx) 206 | return lx.prog 207 | } 208 | 209 | func markup(text string) string { 210 | prefix := "" 211 | 212 | // Fix typos. 213 | text = strings.Replace(text, "R[i}", "R[i]", -1) 214 | text = strings.Replace(text, "R[n}", "R[n]", -1) 215 | text = strings.Replace(text, "(1 << (3-UInt(op)-UInt(size));", "(1 << (3-UInt(op)-UInt(size)));", -1) 216 | text = strings.Replace(text, "(D[n+r] AND NOT(D[m+r]);", "(D[n+r] AND NOT(D[m+r]));", -1) 217 | text = strings.Replace(text, "(D[d+r] AND NOT(D[m+r]);", "(D[d+r] AND NOT(D[m+r]));", -1) 218 | text = strings.Replace(text, "(D[n+r] AND D[d+r]) OR (D[m+r] AND NOT(D[d+r]);", "(D[n+r] AND D[d+r]) OR (D[m+r] AND NOT(D[d+r]));", -1) 219 | 220 | // Add indent, unindent tags. 221 | lines := strings.Split(text, "\n") 222 | var indent []int 223 | for j, line := range lines { 224 | if i := strings.Index(line, "//"); i >= 0 { 225 | line = line[:i] 226 | } 227 | if strings.TrimSpace(line) == "" { 228 | continue 229 | } 230 | n := 0 231 | for n < len(line) && line[n] == '\t' { 232 | n++ 233 | } 234 | if len(indent) == 0 { 235 | indent = append(indent, n) 236 | } 237 | i := indent[len(indent)-1] 238 | if i > n { 239 | for i > n { 240 | line = "«" + line 241 | indent = indent[:len(indent)-1] 242 | if len(indent) == 0 { 243 | i = -1 244 | } else { 245 | i = indent[len(indent)-1] 246 | } 247 | } 248 | if i == -1 { 249 | indent = append(indent, n) 250 | i = n 251 | } 252 | } 253 | if i < n { 254 | line = "»" + line 255 | indent = append(indent, n) 256 | } 257 | lines[j] = line 258 | } 259 | n := len(indent) - 1 260 | if n < 0 { 261 | n = 0 262 | } 263 | return prefix + strings.Join(lines, "\n") + strings.Repeat("«", n) 264 | } 265 | -------------------------------------------------------------------------------- /armasm/objdumpext_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Copied and simplified from rsc.io/x86/x86asm/objdumpext_test.go. 6 | 7 | package armasm 8 | 9 | import ( 10 | "bytes" 11 | "debug/elf" 12 | "encoding/binary" 13 | "fmt" 14 | "io" 15 | "log" 16 | "os" 17 | "strconv" 18 | "strings" 19 | "testing" 20 | ) 21 | 22 | const objdumpPath = "/usr/local/bin/arm-linux-elf-objdump" 23 | 24 | func testObjdumpARM(t *testing.T, generate func(func([]byte))) { 25 | testObjdumpArch(t, generate, ModeARM) 26 | } 27 | 28 | func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch Mode) { 29 | if _, err := os.Stat(objdumpPath); err != nil { 30 | if !testing.Short() { 31 | t.Fatal(err) 32 | } 33 | t.Skip(err) 34 | } 35 | 36 | testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump) 37 | } 38 | 39 | func objdump(ext *ExtDis) error { 40 | // File already written with instructions; add ELF header. 41 | if ext.Arch == ModeARM { 42 | if err := writeELF32(ext.File, ext.Size); err != nil { 43 | return err 44 | } 45 | } else { 46 | panic("unknown arch") 47 | } 48 | 49 | b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name()) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | var ( 55 | nmatch int 56 | reading bool 57 | next uint32 = start 58 | addr uint32 59 | encbuf [4]byte 60 | enc []byte 61 | text string 62 | ) 63 | flush := func() { 64 | if addr == next { 65 | if m := pcrel.FindStringSubmatch(text); m != nil { 66 | targ, _ := strconv.ParseUint(m[2], 16, 64) 67 | text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc)))) 68 | } 69 | if strings.HasPrefix(text, "stmia") { 70 | text = "stm" + text[5:] 71 | } 72 | if strings.HasPrefix(text, "stmfd") { 73 | text = "stmdb" + text[5:] 74 | } 75 | if strings.HasPrefix(text, "ldmfd") { 76 | text = "ldm" + text[5:] 77 | } 78 | text = strings.Replace(text, "#0.0", "#0", -1) 79 | if text == "undefined" && len(enc) == 4 { 80 | text = "error: unknown instruction" 81 | enc = nil 82 | } 83 | if len(enc) == 4 { 84 | // prints as word but we want to record bytes 85 | enc[0], enc[3] = enc[3], enc[0] 86 | enc[1], enc[2] = enc[2], enc[1] 87 | } 88 | ext.Dec <- ExtInst{addr, encbuf, len(enc), text} 89 | encbuf = [4]byte{} 90 | enc = nil 91 | next += 4 92 | } 93 | } 94 | var textangle = []byte("<.text>:") 95 | for { 96 | line, err := b.ReadSlice('\n') 97 | if err != nil { 98 | if err == io.EOF { 99 | break 100 | } 101 | return fmt.Errorf("reading objdump output: %v", err) 102 | } 103 | if bytes.Contains(line, textangle) { 104 | reading = true 105 | continue 106 | } 107 | if !reading { 108 | continue 109 | } 110 | if debug { 111 | os.Stdout.Write(line) 112 | } 113 | if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil { 114 | enc = enc1 115 | continue 116 | } 117 | flush() 118 | nmatch++ 119 | addr, enc, text = parseLine(line, encbuf[:0]) 120 | if addr > next { 121 | return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line) 122 | } 123 | } 124 | flush() 125 | if next != start+uint32(ext.Size) { 126 | return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size) 127 | } 128 | if err := ext.Wait(); err != nil { 129 | return fmt.Errorf("exec: %v", err) 130 | } 131 | 132 | return nil 133 | } 134 | 135 | var ( 136 | undefined = []byte("") 137 | unpredictable = []byte("") 138 | illegalShifter = []byte("") 139 | ) 140 | 141 | func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) { 142 | oline := line 143 | i := index(line, ":\t") 144 | if i < 0 { 145 | log.Fatalf("cannot parse disassembly: %q", oline) 146 | } 147 | x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32) 148 | if err != nil { 149 | log.Fatalf("cannot parse disassembly: %q", oline) 150 | } 151 | addr = uint32(x) 152 | line = line[i+2:] 153 | i = bytes.IndexByte(line, '\t') 154 | if i < 0 { 155 | log.Fatalf("cannot parse disassembly: %q", oline) 156 | } 157 | enc, ok := parseHex(line[:i], encstart) 158 | if !ok { 159 | log.Fatalf("cannot parse disassembly: %q", oline) 160 | } 161 | line = trimSpace(line[i:]) 162 | if bytes.Contains(line, undefined) { 163 | text = "undefined" 164 | return 165 | } 166 | if bytes.Contains(line, illegalShifter) { 167 | text = "undefined" 168 | return 169 | } 170 | if false && bytes.Contains(line, unpredictable) { 171 | text = "unpredictable" 172 | return 173 | } 174 | if i := bytes.IndexByte(line, ';'); i >= 0 { 175 | line = trimSpace(line[:i]) 176 | } 177 | text = string(fixSpace(line)) 178 | return 179 | } 180 | 181 | func parseContinuation(line []byte, enc []byte) []byte { 182 | i := index(line, ":\t") 183 | if i < 0 { 184 | return nil 185 | } 186 | line = line[i+1:] 187 | enc, _ = parseHex(line, enc) 188 | return enc 189 | } 190 | 191 | // writeELF32 writes an ELF32 header to the file, 192 | // describing a text segment that starts at start 193 | // and extends for size bytes. 194 | func writeELF32(f *os.File, size int) error { 195 | f.Seek(0, 0) 196 | var hdr elf.Header32 197 | var prog elf.Prog32 198 | var sect elf.Section32 199 | var buf bytes.Buffer 200 | binary.Write(&buf, binary.LittleEndian, &hdr) 201 | off1 := buf.Len() 202 | binary.Write(&buf, binary.LittleEndian, &prog) 203 | off2 := buf.Len() 204 | binary.Write(&buf, binary.LittleEndian, §) 205 | off3 := buf.Len() 206 | buf.Reset() 207 | data := byte(elf.ELFDATA2LSB) 208 | hdr = elf.Header32{ 209 | Ident: [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1}, 210 | Type: 2, 211 | Machine: uint16(elf.EM_ARM), 212 | Version: 1, 213 | Entry: start, 214 | Phoff: uint32(off1), 215 | Shoff: uint32(off2), 216 | Flags: 0x05000002, 217 | Ehsize: uint16(off1), 218 | Phentsize: uint16(off2 - off1), 219 | Phnum: 1, 220 | Shentsize: uint16(off3 - off2), 221 | Shnum: 3, 222 | Shstrndx: 2, 223 | } 224 | binary.Write(&buf, binary.LittleEndian, &hdr) 225 | prog = elf.Prog32{ 226 | Type: 1, 227 | Off: start, 228 | Vaddr: start, 229 | Paddr: start, 230 | Filesz: uint32(size), 231 | Memsz: uint32(size), 232 | Flags: 5, 233 | Align: start, 234 | } 235 | binary.Write(&buf, binary.LittleEndian, &prog) 236 | binary.Write(&buf, binary.LittleEndian, §) // NULL section 237 | sect = elf.Section32{ 238 | Name: 1, 239 | Type: uint32(elf.SHT_PROGBITS), 240 | Addr: start, 241 | Off: start, 242 | Size: uint32(size), 243 | Flags: uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR), 244 | Addralign: 4, 245 | } 246 | binary.Write(&buf, binary.LittleEndian, §) // .text 247 | sect = elf.Section32{ 248 | Name: uint32(len("\x00.text\x00")), 249 | Type: uint32(elf.SHT_STRTAB), 250 | Addr: 0, 251 | Off: uint32(off2 + (off3-off2)*3), 252 | Size: uint32(len("\x00.text\x00.shstrtab\x00")), 253 | Addralign: 1, 254 | } 255 | binary.Write(&buf, binary.LittleEndian, §) 256 | buf.WriteString("\x00.text\x00.shstrtab\x00") 257 | f.Write(buf.Bytes()) 258 | return nil 259 | } 260 | -------------------------------------------------------------------------------- /armasm/objdump_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "encoding/binary" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestObjdumpARMTestdata(t *testing.T) { testObjdumpARM(t, testdataCases(t)) } 14 | func TestObjdumpARMManual(t *testing.T) { testObjdumpARM(t, hexCases(t, objdumpManualTests)) } 15 | func TestObjdumpARMCond(t *testing.T) { testObjdumpARM(t, condCases(t)) } 16 | func TestObjdumpARMUncond(t *testing.T) { testObjdumpARM(t, uncondCases(t)) } 17 | func TestObjdumpARMVFP(t *testing.T) { testObjdumpARM(t, vfpCases(t)) } 18 | 19 | // objdumpManualTests holds test cases that will be run by TestObjdumpARMManual. 20 | // If you are debugging a few cases that turned up in a longer run, it can be useful 21 | // to list them here and then use -run=Manual, particularly with tracing enabled. 22 | // Note that these are byte sequences, so they must be reversed from the usual 23 | // word presentation. 24 | var objdumpManualTests = ` 25 | 002a9b1d 26 | 001b9bed 27 | 020b8ded 28 | 003a9b1d 29 | 060b8ded 30 | fcde1100 31 | b4de1100 32 | bc480000 33 | 0b008de7 34 | 0b00ade7 35 | fdbcfaf7 36 | ` 37 | 38 | // allowedMismatchObjdump reports whether the mismatch between text and dec 39 | // should be allowed by the test. 40 | func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool { 41 | if hasPrefix(text, "error:") { 42 | if hasPrefix(dec.text, unsupported...) || strings.Contains(dec.text, "invalid:") || strings.HasSuffix(dec.text, "^") || strings.Contains(dec.text, "f16.f64") || strings.Contains(dec.text, "f64.f16") { 43 | return true 44 | } 45 | // word 4320F02C: libopcodes says 'nopmi {44}'. 46 | if hasPrefix(dec.text, "nop") && strings.Contains(dec.text, "{") { 47 | return true 48 | } 49 | } 50 | 51 | if hasPrefix(dec.text, "error:") && text == "undef" && inst.Enc == 0xf7fabcfd { 52 | return true 53 | } 54 | 55 | // word 00f02053: libopcodes says 'noppl {0}'. 56 | if hasPrefix(dec.text, "nop") && hasPrefix(text, "nop") && dec.text == text+" {0}" { 57 | return true 58 | } 59 | 60 | // word F57FF04F. we say 'dsb #15', libopcodes says 'dsb sy'. 61 | if hasPrefix(text, "dsb") && hasPrefix(dec.text, "dsb") { 62 | return true 63 | } 64 | // word F57FF06F. we say 'isb #15', libopcodes says 'isb sy'. 65 | if hasPrefix(text, "isb") && hasPrefix(dec.text, "isb") { 66 | return true 67 | } 68 | // word F57FF053. we say 'dmb #3', libopcodes says 'dmb osh'. 69 | if hasPrefix(text, "dmb") && hasPrefix(dec.text, "dmb") { 70 | return true 71 | } 72 | 73 | // word 992D0000. push/stmdb with no registers (undefined). 74 | // we say 'stmdbls sp!, {}', libopcodes says 'pushls {}'. 75 | if hasPrefix(text, "stmdb") && hasPrefix(dec.text, "push") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") { 76 | return true 77 | } 78 | 79 | // word 28BD0000. pop/ldm with no registers (undefined). 80 | // we say 'ldmcs sp!, {}', libopcodes says 'popcs {}'. 81 | if hasPrefix(text, "ldm") && hasPrefix(dec.text, "pop") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") { 82 | return true 83 | } 84 | 85 | // word 014640F0. 86 | // libopcodes emits #-0 for negative zero; we don't. 87 | if strings.Replace(dec.text, "#-0", "#0", -1) == text || strings.Replace(dec.text, ", #-0", "", -1) == text { 88 | return true 89 | } 90 | 91 | // word 91EF90F0. we say 'strdls r9, [pc, #0]!' but libopcodes says 'strdls r9, [pc]'. 92 | // word D16F60F0. we say 'strdle r6, [pc, #0]!' but libopcodes says 'strdle r6, [pc, #-0]'. 93 | if strings.Replace(text, ", #0]!", "]", -1) == strings.Replace(dec.text, ", #-0]", "]", -1) { 94 | return true 95 | } 96 | 97 | // word 510F4000. we say apsr, libopcodes says CPSR. 98 | if strings.Replace(dec.text, "CPSR", "apsr", -1) == text { 99 | return true 100 | } 101 | 102 | // word 06A4B059. 103 | // for ssat and usat, libopcodes decodes asr #0 as asr #0 but the manual seems to say it should be asr #32. 104 | // There is never an asr #0. 105 | if strings.Replace(dec.text, ", asr #0", ", asr #32", -1) == text { 106 | return true 107 | } 108 | 109 | if len(dec.enc) >= 4 { 110 | raw := binary.LittleEndian.Uint32(dec.enc[:4]) 111 | 112 | // word 21FFF0B5. 113 | // the manual is clear that this is pre-indexed mode (with !) but libopcodes generates post-index (without !). 114 | if raw&0x01200000 == 0x01200000 && strings.Replace(text, "!", "", -1) == dec.text { 115 | return true 116 | } 117 | 118 | // word C100543E: libopcodes says tst, but no evidence for that. 119 | if strings.HasPrefix(dec.text, "tst") && raw&0x0ff00000 != 0x03100000 && raw&0x0ff00000 != 0x01100000 { 120 | return true 121 | } 122 | 123 | // word C3203CE8: libopcodes says teq, but no evidence for that. 124 | if strings.HasPrefix(dec.text, "teq") && raw&0x0ff00000 != 0x03300000 && raw&0x0ff00000 != 0x01300000 { 125 | return true 126 | } 127 | 128 | // word D14C552E: libopcodes says cmp but no evidence for that. 129 | if strings.HasPrefix(dec.text, "cmp") && raw&0x0ff00000 != 0x03500000 && raw&0x0ff00000 != 0x01500000 { 130 | return true 131 | } 132 | 133 | // word 2166AA4A: libopcodes says cmn but no evidence for that. 134 | if strings.HasPrefix(dec.text, "cmn") && raw&0x0ff00000 != 0x03700000 && raw&0x0ff00000 != 0x01700000 { 135 | return true 136 | } 137 | 138 | // word E70AEEEF: libopcodes says str but no evidence for that. 139 | if strings.HasPrefix(dec.text, "str") && len(dec.text) >= 5 && (dec.text[3] == ' ' || dec.text[5] == ' ') && raw&0x0e500018 != 0x06000000 && raw&0x0e500000 != 0x0400000 { 140 | return true 141 | } 142 | 143 | // word B0AF48F4: libopcodes says strd but P=0,W=1 which is unpredictable. 144 | if hasPrefix(dec.text, "ldr", "str") && raw&0x01200000 == 0x00200000 { 145 | return true 146 | } 147 | 148 | // word B6CC1C76: libopcodes inexplicably says 'uxtab16lt r1, ip, r6, ROR #24' instead of 'uxtab16lt r1, ip, r6, ror #24' 149 | if strings.ToLower(dec.text) == text { 150 | return true 151 | } 152 | 153 | // word F410FDA1: libopcodes says PLDW but the manual is clear that PLDW is F5/F7, not F4. 154 | // word F7D0FB17: libopcodes says PLDW but the manual is clear that PLDW has 0x10 clear 155 | if hasPrefix(dec.text, "pld") && raw&0xfd000010 != 0xf5000000 { 156 | return true 157 | } 158 | 159 | // word F650FE14: libopcodes says PLI but the manual is clear that PLI has 0x10 clear 160 | if hasPrefix(dec.text, "pli") && raw&0xff000010 != 0xf6000000 { 161 | return true 162 | } 163 | } 164 | 165 | return false 166 | } 167 | 168 | // Instructions known to libopcodes (or xed) but not to us. 169 | // Most of these are floating point coprocessor instructions. 170 | var unsupported = strings.Fields(` 171 | abs 172 | acs 173 | adf 174 | aes 175 | asn 176 | atn 177 | cdp 178 | cf 179 | cmf 180 | cnf 181 | cos 182 | cps 183 | crc32 184 | dvf 185 | eret 186 | exp 187 | fadd 188 | fcmp 189 | fcpy 190 | fcvt 191 | fdiv 192 | fdv 193 | fix 194 | fld 195 | flt 196 | fmac 197 | fmd 198 | fml 199 | fmr 200 | fms 201 | fmul 202 | fmx 203 | fneg 204 | fnm 205 | frd 206 | fsit 207 | fsq 208 | fst 209 | fsu 210 | fto 211 | fui 212 | hlt 213 | hvc 214 | lda 215 | ldc 216 | ldf 217 | lfm 218 | lgn 219 | log 220 | mar 221 | mcr 222 | mcrr 223 | mia 224 | mnf 225 | mra 226 | mrc 227 | mrrc 228 | mrs 229 | msr 230 | msr 231 | muf 232 | mvf 233 | nrm 234 | pol 235 | pow 236 | rdf 237 | rfc 238 | rfe 239 | rfs 240 | rmf 241 | rnd 242 | rpw 243 | rsf 244 | sdiv 245 | sev 246 | sfm 247 | sha1 248 | sha256 249 | sin 250 | smc 251 | sqt 252 | srs 253 | stc 254 | stf 255 | stl 256 | suf 257 | tan 258 | udf 259 | udiv 260 | urd 261 | vfma 262 | vfms 263 | vfnma 264 | vfnms 265 | vrint 266 | wfc 267 | wfs 268 | `) 269 | -------------------------------------------------------------------------------- /armasm/inst.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | ) 11 | 12 | // A Mode is an instruction execution mode. 13 | type Mode int 14 | 15 | const ( 16 | _ Mode = iota 17 | ModeARM 18 | ModeThumb 19 | ) 20 | 21 | func (m Mode) String() string { 22 | switch m { 23 | case ModeARM: 24 | return "ARM" 25 | case ModeThumb: 26 | return "Thumb" 27 | } 28 | return fmt.Sprintf("Mode(%d)", int(m)) 29 | } 30 | 31 | // An Op is an ARM opcode. 32 | type Op uint16 33 | 34 | // NOTE: The actual Op values are defined in tables.go. 35 | // They are chosen to simplify instruction decoding and 36 | // are not a dense packing from 0 to N, although the 37 | // density is high, probably at least 90%. 38 | 39 | func (op Op) String() string { 40 | if op >= Op(len(opstr)) || opstr[op] == "" { 41 | return fmt.Sprintf("Op(%d)", int(op)) 42 | } 43 | return opstr[op] 44 | } 45 | 46 | // An Inst is a single instruction. 47 | type Inst struct { 48 | Op Op // Opcode mnemonic 49 | Enc uint32 // Raw encoding bits. 50 | Len int // Length of encoding in bytes. 51 | Args Args // Instruction arguments, in ARM manual order. 52 | } 53 | 54 | func (i Inst) String() string { 55 | var buf bytes.Buffer 56 | buf.WriteString(i.Op.String()) 57 | for j, arg := range i.Args { 58 | if arg == nil { 59 | break 60 | } 61 | if j == 0 { 62 | buf.WriteString(" ") 63 | } else { 64 | buf.WriteString(", ") 65 | } 66 | buf.WriteString(arg.String()) 67 | } 68 | return buf.String() 69 | } 70 | 71 | // An Args holds the instruction arguments. 72 | // If an instruction has fewer than 4 arguments, 73 | // the final elements in the array are nil. 74 | type Args [4]Arg 75 | 76 | // An Arg is a single instruction argument, one of these types: 77 | // Endian, Imm, Mem, PCRel, Reg, RegList, RegShift, RegShiftReg. 78 | type Arg interface { 79 | IsArg() 80 | String() string 81 | } 82 | 83 | type Float32Imm float32 84 | 85 | func (Float32Imm) IsArg() {} 86 | 87 | func (f Float32Imm) String() string { 88 | return fmt.Sprintf("#%v", float32(f)) 89 | } 90 | 91 | type Float64Imm float32 92 | 93 | func (Float64Imm) IsArg() {} 94 | 95 | func (f Float64Imm) String() string { 96 | return fmt.Sprintf("#%v", float64(f)) 97 | } 98 | 99 | // An Imm is an integer constant. 100 | type Imm uint32 101 | 102 | func (Imm) IsArg() {} 103 | 104 | func (i Imm) String() string { 105 | return fmt.Sprintf("#%#x", uint32(i)) 106 | } 107 | 108 | // A ImmAlt is an alternate encoding of an integer constant. 109 | type ImmAlt struct { 110 | Val uint8 111 | Rot uint8 112 | } 113 | 114 | func (ImmAlt) IsArg() {} 115 | 116 | func (i ImmAlt) Imm() Imm { 117 | v := uint32(i.Val) 118 | r := uint(i.Rot) 119 | return Imm(v>>r | v<<(32-r)) 120 | } 121 | 122 | func (i ImmAlt) String() string { 123 | return fmt.Sprintf("#%#x, %d", i.Val, i.Rot) 124 | } 125 | 126 | // A Label is a text (code) address. 127 | type Label uint32 128 | 129 | func (Label) IsArg() {} 130 | 131 | func (i Label) String() string { 132 | return fmt.Sprintf("%#x", uint32(i)) 133 | } 134 | 135 | // A Reg is a single register. 136 | // The zero value denotes R0, not the absence of a register. 137 | type Reg uint8 138 | 139 | const ( 140 | R0 Reg = iota 141 | R1 142 | R2 143 | R3 144 | R4 145 | R5 146 | R6 147 | R7 148 | R8 149 | R9 150 | R10 151 | R11 152 | R12 153 | R13 154 | R14 155 | R15 156 | 157 | S0 158 | S1 159 | S2 160 | S3 161 | S4 162 | S5 163 | S6 164 | S7 165 | S8 166 | S9 167 | S10 168 | S11 169 | S12 170 | S13 171 | S14 172 | S15 173 | S16 174 | S17 175 | S18 176 | S19 177 | S20 178 | S21 179 | S22 180 | S23 181 | S24 182 | S25 183 | S26 184 | S27 185 | S28 186 | S29 187 | S30 188 | S31 189 | 190 | D0 191 | D1 192 | D2 193 | D3 194 | D4 195 | D5 196 | D6 197 | D7 198 | D8 199 | D9 200 | D10 201 | D11 202 | D12 203 | D13 204 | D14 205 | D15 206 | D16 207 | D17 208 | D18 209 | D19 210 | D20 211 | D21 212 | D22 213 | D23 214 | D24 215 | D25 216 | D26 217 | D27 218 | D28 219 | D29 220 | D30 221 | D31 222 | 223 | APSR 224 | APSR_nzcv 225 | FPSCR 226 | 227 | SP = R13 228 | LR = R14 229 | PC = R15 230 | ) 231 | 232 | func (Reg) IsArg() {} 233 | 234 | func (r Reg) String() string { 235 | switch r { 236 | case APSR: 237 | return "APSR" 238 | case APSR_nzcv: 239 | return "APSR_nzcv" 240 | case FPSCR: 241 | return "FPSCR" 242 | case SP: 243 | return "SP" 244 | case PC: 245 | return "PC" 246 | case LR: 247 | return "LR" 248 | } 249 | if R0 <= r && r <= R15 { 250 | return fmt.Sprintf("R%d", int(r-R0)) 251 | } 252 | if S0 <= r && r <= S31 { 253 | return fmt.Sprintf("S%d", int(r-S0)) 254 | } 255 | if D0 <= r && r <= D31 { 256 | return fmt.Sprintf("D%d", int(r-D0)) 257 | } 258 | return fmt.Sprintf("Reg(%d)", int(r)) 259 | } 260 | 261 | // A RegX represents a fraction of a multi-value register. 262 | // The Index field specifies the index number, 263 | // but the size of the fraction is not specified. 264 | // It must be inferred from the instruction and the register type. 265 | // For example, in a VMOV instruction, RegX{D5, 1} represents 266 | // the top 32 bits of the 64-bit D5 register. 267 | type RegX struct { 268 | Reg Reg 269 | Index int 270 | } 271 | 272 | func (RegX) IsArg() {} 273 | 274 | func (r RegX) String() string { 275 | return fmt.Sprintf("%s[%d]", r.Reg, r.Index) 276 | } 277 | 278 | // A RegList is a register list. 279 | // Bits at indexes x = 0 through 15 indicate whether the corresponding Rx register is in the list. 280 | type RegList uint16 281 | 282 | func (RegList) IsArg() {} 283 | 284 | func (r RegList) String() string { 285 | var buf bytes.Buffer 286 | fmt.Fprintf(&buf, "{") 287 | sep := "" 288 | for i := 0; i < 16; i++ { 289 | if r&(1< _NAME _NAME_PAREN _CONST 75 | %token _SEE 76 | 77 | %left last_resort 78 | %left '=' 79 | %left ',' 80 | %left _IF 81 | %left _ANDAND _OROR 82 | %left _LT _LE '>' _GE _GT _EQ _NE 83 | %left ':' 84 | %left '+' '-' '|' '^' _OR _EOR 85 | %left '*' '/' '%' '&' _LSH _RSH _DIV _MOD _AND 86 | %left _TWOPOW 87 | %left '.' '<' '[' 88 | %left unary 89 | 90 | %type stmt_list simple_stmt_list stmt_list_opt 91 | %type stmt simple_stmt block simple_block otherwise enumeration 92 | %type else_opt simple_else_opt else_end simple_else_end 93 | %type elsif_list simple_elsif_list 94 | %type when 95 | %type when_list 96 | %type expr_list_opt expr_list expr_minus_list const_list 97 | %type expr call_expr const 98 | %type unnamed_type 99 | 100 | %% 101 | 102 | top: 103 | stmt_list_opt _EOF 104 | { 105 | yylex.(*Lexer).prog = $1 106 | return 0 107 | } 108 | 109 | simple_stmt: 110 | expr '=' expr ';' 111 | { 112 | $$ = &Stmt{Op: Assign, X: $1, Y: $3} 113 | } 114 | | expr _EQ expr ';' 115 | { 116 | $$ = &Stmt{Op: Assign, X: $1, Y: $3} 117 | } 118 | | call_expr ';' 119 | { 120 | $$ = &Stmt{Op: StmtExpr, X: $1} 121 | } 122 | | _RETURN expr_list_opt ';' 123 | { 124 | $$ = &Stmt{Op: Return, List: $2} 125 | } 126 | | _UNDEFINED ';' 127 | { 128 | $$ = &Stmt{Op: Undefined} 129 | } 130 | | _UNPREDICTABLE ';' 131 | { 132 | $$ = &Stmt{Op: Unpredictable} 133 | } 134 | | _SEE ';' 135 | { 136 | $$ = &Stmt{Op: See, Text: $1} 137 | } 138 | | _IMPLEMENTATION_DEFINED ';' 139 | { 140 | $$ = &Stmt{Op: ImplDefined} 141 | } 142 | | _SUBARCHITECTURE_DEFINED ';' 143 | { 144 | $$ = &Stmt{Op: SubarchDefined} 145 | } 146 | 147 | stmt: 148 | simple_stmt 149 | | unnamed_type _NAME ';' 150 | { 151 | $$ = &Stmt{Op: StmtExpr, X: &Expr{Op: Decl, Type: $1, Text: $2}} 152 | } 153 | | _IF expr _THEN block else_opt 154 | { 155 | $$ = &Stmt{Op: If, X: $2, Body: $4, ElseIf: $5, Else: $5} 156 | } 157 | | _IF expr _THEN simple_stmt simple_else_opt 158 | { 159 | $$ = &Stmt{Op: If, X: $2, Body: $4, ElseIf: $5, Else: $5} 160 | } 161 | | _REPEAT block _UNTIL expr ';' 162 | { 163 | $$ = &Stmt{Op: Repeat, Body: $2, X: $4} 164 | } 165 | | _WHILE expr do block 166 | { 167 | $$ = &Stmt{Op: While, X: $2, Body: $4} 168 | } 169 | | _FOR expr '=' expr _TO expr do block 170 | { 171 | $$ = &Stmt{Op: For, X: $2, Y: $4, Z: $6, Body: $8} 172 | } 173 | | _CASE expr _OF _INDENT when_list otherwise _UNINDENT 174 | { 175 | $$ = &Stmt{Op: Case, X: $2, When: $5, Else: $6} 176 | } 177 | | _ASSERT expr ';' 178 | { 179 | $$ = &Stmt{Op: Assert, X: $2} 180 | } 181 | | block 182 | { 183 | $$ = $1 184 | } 185 | | enumeration 186 | 187 | enumeration: 188 | _ENUMERATION _NAME '{' expr_list '}' ';' 189 | { 190 | $$ = &Stmt{Op: Enum, Text: $2, List: $4} 191 | } 192 | 193 | do: 194 | | _DO 195 | 196 | block: 197 | _INDENT stmt_list _UNINDENT 198 | { 199 | $$ = &Stmt{Op: Block, Block: $2} 200 | } 201 | 202 | simple_block: 203 | simple_stmt_list 204 | { 205 | $$ = &Stmt{Op: Block, Block: $1} 206 | } 207 | 208 | simple_stmt_list: 209 | simple_stmt 210 | { 211 | $$ = []*Stmt{$1} 212 | } 213 | | simple_stmt_list simple_stmt 214 | { 215 | $$ = append($1, $2) 216 | } 217 | 218 | stmt_list: 219 | stmt 220 | { 221 | $$ = []*Stmt{$1} 222 | } 223 | | stmt_list stmt 224 | { 225 | $$ = append($1, $2) 226 | } 227 | 228 | stmt_list_opt: 229 | { 230 | $$ = nil 231 | } 232 | | stmt_list 233 | 234 | else_opt: 235 | elsif_list else_end 236 | { 237 | $$ = $1 238 | $$ = $2 239 | } 240 | 241 | simple_else_opt: 242 | simple_elsif_list simple_else_end 243 | { 244 | $$ = $1 245 | $$ = $2 246 | } 247 | 248 | elsif_list: 249 | { 250 | $$ = nil 251 | } 252 | | elsif_list _ELSIF expr _THEN block 253 | { 254 | $$ = append($1, &ElseIf{Cond: $3, Body: $5}) 255 | } 256 | 257 | simple_elsif_list: 258 | { 259 | $$ = nil 260 | } 261 | | simple_elsif_list _ELSIF expr _THEN simple_stmt 262 | { 263 | $$ = append($1, &ElseIf{Cond: $3, Body: $5}) 264 | } 265 | 266 | else_end: 267 | { 268 | $$ = nil 269 | } 270 | | _ELSE block 271 | { 272 | $$ = $2 273 | } 274 | | _ELSE simple_stmt 275 | { 276 | $$ = $2 277 | } 278 | 279 | simple_else_end: 280 | { 281 | $$ = nil 282 | } 283 | | _ELSE simple_stmt 284 | { 285 | $$ = $2 286 | } 287 | 288 | when_list: 289 | { 290 | $$ = nil 291 | } 292 | | when_list when 293 | { 294 | $$ = append($1, $2) 295 | } 296 | 297 | when: 298 | _WHEN const_list then block 299 | { 300 | $$ = &When{Cond: $2, Body: $4} 301 | } 302 | | _WHEN const_list then simple_block 303 | { 304 | $$ = &When{Cond: $2, Body: $4} 305 | } 306 | 307 | then: 308 | | _THEN 309 | 310 | otherwise: 311 | { 312 | $$ = nil 313 | } 314 | | _OTHERWISE block 315 | { 316 | $$ = $2 317 | } 318 | | _OTHERWISE simple_block 319 | { 320 | $$ = $2 321 | } 322 | 323 | expr_list_opt: 324 | { 325 | $$ = nil 326 | } 327 | | expr_list 328 | 329 | expr_list: 330 | expr 331 | { 332 | $$ = []*Expr{$1} 333 | } 334 | | expr_list ',' expr 335 | { 336 | $$ = append($1, $3) 337 | } 338 | 339 | const_list: 340 | const 341 | { 342 | $$ = []*Expr{$1} 343 | } 344 | | const_list ',' const 345 | { 346 | $$ = append($1, $3) 347 | } 348 | 349 | const: 350 | _CONST 351 | { 352 | $$ = &Expr{Op: Const, Text: $1} 353 | } 354 | | _NAME 355 | { 356 | $$ = &Expr{Op: Name, Text: $1} 357 | } 358 | 359 | expr_minus_list: 360 | expr 361 | { 362 | $$ = []*Expr{$1} 363 | } 364 | | '-' 365 | { 366 | $$ = []*Expr{&Expr{Op: Blank}} 367 | } 368 | | expr_minus_list ',' expr 369 | { 370 | $$ = append($1, $3) 371 | } 372 | | expr_minus_list ',' '-' 373 | { 374 | $$ = append($1, &Expr{Op: Blank}) 375 | } 376 | 377 | expr: 378 | _CONST 379 | { 380 | $$ = &Expr{Op: Const, Text: $1} 381 | } 382 | | _NAME 383 | { 384 | $$ = &Expr{Op: Name, Text: $1} 385 | } 386 | | unnamed_type _NAME 387 | { 388 | $$ = &Expr{Op: Decl, Type: $1, Text: $2} 389 | } 390 | | _UNKNOWN 391 | { 392 | $$ = &Expr{Op: Unknown} 393 | } 394 | | unnamed_type _UNKNOWN 395 | { 396 | $$ = &Expr{Op: Unknown, Type: $1} 397 | } 398 | | call_expr 399 | | '(' expr_minus_list ')' 400 | { 401 | $$ = &Expr{Op: ExprTuple, List: $2} 402 | } 403 | | expr _EQ expr 404 | { 405 | $$ = &Expr{Op: Eq, X: $1, Y: $3} 406 | } 407 | | expr _NE expr 408 | { 409 | $$ = &Expr{Op: NotEq, X: $1, Y: $3} 410 | } 411 | | expr _LE expr 412 | { 413 | $$ = &Expr{Op: LtEq, X: $1, Y: $3} 414 | } 415 | | expr '<' expr_list '>' 416 | { 417 | $$ = &Expr{Op: BitIndex, X: $1, List: $3} 418 | } 419 | | expr _LT expr 420 | { 421 | $$ = &Expr{Op: Lt, X: $1, Y: $3} 422 | } 423 | | expr _GE expr 424 | { 425 | $$ = &Expr{Op: GtEq, X: $1, Y: $3} 426 | } 427 | | expr _GT expr 428 | { 429 | $$ = &Expr{Op: Gt, X: $1, Y: $3} 430 | } 431 | | _IF expr _THEN expr _ELSE expr %prec _IF 432 | { 433 | $$ = &Expr{Op: IfElse, X: $2, Y: $4, Z: $6} 434 | } 435 | | '!' expr %prec unary 436 | { 437 | $$ = &Expr{Op: Not, X: $2} 438 | } 439 | | expr _ANDAND expr 440 | { 441 | $$ = &Expr{Op: AndAnd, X: $1, Y: $3} 442 | } 443 | | expr _OROR expr 444 | { 445 | $$ = &Expr{Op: OrOr, X: $1, Y: $3} 446 | } 447 | | expr '^' expr 448 | { 449 | $$ = &Expr{Op: Eor, X: $1, Y: $3} 450 | } 451 | | expr ':' expr 452 | { 453 | $$ = &Expr{Op: Colon, X: $1, Y: $3} 454 | } 455 | | expr _AND expr 456 | { 457 | $$ = &Expr{Op: BigAND, X: $1, Y: $3} 458 | } 459 | | expr _OR expr 460 | { 461 | $$ = &Expr{Op: BigOR, X: $1, Y: $3} 462 | } 463 | | expr _EOR expr 464 | { 465 | $$ = &Expr{Op: BigEOR, X: $1, Y: $3} 466 | } 467 | | '+' expr %prec unary 468 | { 469 | $$ = &Expr{Op: Plus, X: $2} 470 | } 471 | | '-' expr %prec unary 472 | { 473 | $$ = &Expr{Op: Minus, X: $2} 474 | } 475 | | expr '+' expr 476 | { 477 | $$ = &Expr{Op: Add, X: $1, Y: $3} 478 | } 479 | | expr '-' expr 480 | { 481 | $$ = &Expr{Op: Sub, X: $1, Y: $3} 482 | } 483 | | expr '*' expr 484 | { 485 | $$ = &Expr{Op: Mul, X: $1, Y: $3} 486 | } 487 | | expr '/' expr 488 | { 489 | $$ = &Expr{Op: Div, X: $1, Y: $3} 490 | } 491 | | expr _DIV expr 492 | { 493 | $$ = &Expr{Op: BigDIV, X: $1, Y: $3} 494 | } 495 | | expr _MOD expr 496 | { 497 | $$ = &Expr{Op: BigMOD, X: $1, Y: $3} 498 | } 499 | | _TWOPOW expr 500 | { 501 | $$ = &Expr{Op: TwoPow, X: $2} 502 | } 503 | | expr _LSH expr 504 | { 505 | $$ = &Expr{Op: Lsh, X: $1, Y: $3} 506 | } 507 | | expr _RSH expr 508 | { 509 | $$ = &Expr{Op: Rsh, X: $1, Y: $3} 510 | } 511 | | expr '[' expr_list ']' 512 | { 513 | $$ = &Expr{Op: Index, X: $1, List: $3} 514 | } 515 | | expr '.' _NAME 516 | { 517 | $$ = &Expr{Op: Dot, X: $1, Text: $3} 518 | } 519 | | expr '=' expr %prec last_resort 520 | { 521 | $$ = &Expr{Op: Eq, X: $1, Y: $3} 522 | } 523 | 524 | call_expr: 525 | _NAME_PAREN expr_list_opt ')' 526 | { 527 | $$ = &Expr{Op: Call, Text: $1, List: $2} 528 | } 529 | 530 | unnamed_type: 531 | _BITS expr ')' 532 | { 533 | $$ = &Type{Op: BitType, NX: $2} 534 | } 535 | | _BIT 536 | { 537 | $$ = &Type{Op: BitType, N: 1} 538 | } 539 | | _INTEGER 540 | { 541 | $$ = &Type{Op: IntegerType} 542 | } 543 | | _BOOLEAN 544 | { 545 | $$ = &Type{Op: BoolType} 546 | } 547 | 548 | %% 549 | 550 | func parseIntConst(s string) int { 551 | n, _ := strconv.Atoi(s) 552 | return n 553 | } 554 | -------------------------------------------------------------------------------- /armasm/testdata/decode.txt: -------------------------------------------------------------------------------- 1 | 000001f1| 1 gnu setend le 2 | 00100f61| 1 gnu mrsvs r1, apsr 3 | 00f02053| 1 gnu noppl 4 | 00f0d4f4| 1 gnu pli [r4] 5 | 01f020d3| 1 gnu yieldle 6 | 02002d59| 1 gnu stmdbpl sp!, {r1} 7 | 021da9d8| 1 gnu stmle r9!, {r1, r8, sl, fp, ip} 8 | 02c0b071| 1 gnu movsvc ip, r2 9 | 02f02073| 1 gnu wfevc 10 | 03f02013| 1 gnu wfine 11 | 03f05df7| 1 gnu pld [sp, -r3] 12 | 04009d34| 1 gnu popcc {r0} 13 | 043a52b1| 1 gnu cmplt r2, r4, lsl #20 14 | 04402de5| 1 gnu push {r4} 15 | 045b148d| 1 gnu vldrhi d5, [r4, #-16] 16 | 04f02093| 1 gnu sevls 17 | 0793eab0| 1 gnu rsclt r9, sl, r7, lsl #6 18 | 079bfb9e| 1 gnu vmovls.f64 d25, #183 19 | 0a4fc9d3| 1 gnu bicle r4, r9, #10, 30 20 | 0bac7ab6| 1 gnu ldrbtlt sl, [sl], -fp, lsl #24 21 | 0c2aee44| 1 gnu strbtmi r2, [lr], #2572 22 | 0c4bb000| 1 gnu adcseq r4, r0, ip, lsl #22 23 | 0e26d561| 1 gnu bicsvs r2, r5, lr, lsl #12 24 | 0f0fa011| 1 gnu lslne r0, pc, #30 25 | 0fa448e0| 1 gnu sub sl, r8, pc, lsl #8 26 | 101af1de| 1 gnu vmrsle r1, fpscr 27 | 108a0cee| 1 gnu vmov s24, r8 28 | 108a1dae| 1 gnu vmovge r8, s26 29 | 108ae14e| 1 gnu vmsrmi fpscr, r8 30 | 10faf1ae| 1 gnu vmrsge apsr_nzcv, fpscr 31 | 10fb052e| 1 gnu vmovcs.32 d5[0], pc 32 | 11c902b7| 1 gnu smladlt r2, r1, r9, ip 33 | 11ef5b16| 1 gnu uadd16ne lr, fp, r1 34 | 12fa87a7| 1 gnu usad8ge r7, r2, sl 35 | 135f2956| 1 gnu qadd16pl r5, r9, r3 36 | 13de9aa1| 1 gnu orrsge sp, sl, r3, lsl lr 37 | 145c0e40| 1 gnu andmi r5, lr, r4, lsl ip 38 | 150f7fd6| 1 gnu uhadd16le r0, pc, r5 39 | 15b9bf12| 1 gnu adcsne fp, pc, #344064 40 | 16373391| 1 gnu teqls r3, r6, lsl r7 41 | 19ef1966| 1 gnu sadd16vs lr, r9, r9 42 | 1ab0b091| 1 gnu lslsls fp, sl, r0 43 | 1b9f6fe6| 1 gnu uqadd16 r9, pc, fp 44 | 1bb58557| 1 gnu usada8pl r5, fp, r5, fp 45 | 1beff8e0| 1 gnu rscs lr, r8, fp, lsl pc 46 | 1caff0e6| 1 gnu usat sl, #16, ip, lsl #30 47 | 1d0f3d36| 1 gnu shadd16cc r0, sp, sp 48 | 1dca1d52| 1 gnu andspl ip, sp, #118784 49 | 1e4891d0| 1 gnu addsle r4, r1, lr, lsl r8 50 | 1f0889e6| 1 gnu pkhbt r0, r9, pc, lsl #16 51 | 1f1f6fe1| 1 gnu clz r1, pc 52 | 1f26d157| 1 gnu bfcpl r2, #12, #6 53 | 1ff07ff5| 1 gnu clrex 54 | 1fff2fd1| 1 gnu bxle pc 55 | 20f153f6| 1 gnu pli [r3, -r0, lsr #2] 56 | 21047013| 1 gnu cmnne r0, #553648128 57 | 21c2eb8b| 1 gnu blhi .-0x50f778 58 | 21c2ebfb| 1 gnu blx .-0x50f776 59 | 21fa62ee| 1 gnu vmul.f32 s31, s4, s3 60 | 23005720| 1 gnu subscs r0, r7, r3, lsr #32 61 | 236a303e| 1 gnu vaddcc.f32 s12, s0, s7 62 | 23f055f6| 1 gnu pli [r5, -r3, lsr #32] 63 | 2430a031| 1 gnu lsrcc r3, r4, #32 64 | 245d0803| 1 gnu movweq r5, #36132 65 | 251a86be| 1 gnu vdivlt.f32 s2, s12, s11 66 | 25db7b81| 1 gnu cmnhi fp, r5, lsr #22 67 | 26bc3553| 1 gnu teqpl r5, #9728 68 | 277c2d69| 1 gnu pushvs {r0, r1, r2, r5, sl, fp, ip, sp, lr} 69 | 29fc1cf5| 1 gnu pldw [ip, #-3113] 70 | 29ff2fc1| 1 gnu bxjgt r9 71 | 2decd9c0| 1 gnu sbcsgt lr, r9, sp, lsr #24 72 | 30fa5e47| 1 gnu smmulrmi lr, r0, sl 73 | 316f64d6| 1 gnu uqasxle r6, r4, r1 74 | 323f5da6| 1 gnu uasxge r3, sp, r2 75 | 327fe5e6| 1 gnu usat16 r7, #5, r2 76 | 330151e3| 1 gnu cmp r1, #-1073741812 77 | 34af2ae6| 1 gnu qasx sl, sl, r4 78 | 35fd3710| 1 gnu eorsne pc, r7, r5, lsr sp 79 | 36def1c1| 1 gnu mvnsgt sp, r6, lsr lr 80 | 3801b061| 1 gnu lsrsvs r0, r8, r1 81 | 38985477| 1 gnu smmlarvc r4, r8, r8, r9 82 | 3a2fbfa6| 1 gnu revge r2, sl 83 | 3a3f1b06| 1 gnu sasxeq r3, fp, sl 84 | 3a7fa346| 1 gnu ssat16mi r7, #4, sl 85 | 3a943b94| 1 gnu ldrtls r9, [fp], #-1082 86 | 3bf505e7| 1 gnu smuadx r5, fp, r5 87 | 3cef7086| 1 gnu uhasxhi lr, r0, ip 88 | 3e5f3ec6| 1 gnu shasxgt r5, lr, lr 89 | 3f4fff86| 1 gnu rbithi r4, pc 90 | 3faf4717| 1 gnu smlaldxne sl, r7, pc, pc 91 | 3fff2fc1| 1 gnu blxgt pc 92 | 402bbf7e| 1 gnu vcvtvc.u16.f64 d2, d2, #16 93 | 403ab5de| 1 gnu vcmple.f32 s6, #0 94 | 40eb363e| 1 gnu vsubcc.f64 d14, d6, d0 95 | 420f73d1| 1 gnu cmnle r3, r2, asr #30 96 | 424a648e| 1 gnu vnmulhi.f32 s9, s8, s4 97 | 4284d717| 1 gnu ldrbne r8, [r7, r2, asr #8] 98 | 42a599c3| 1 gnu orrsgt sl, r9, #276824064 99 | 42abf0be| 1 gnu vmovlt.f64 d26, d2 100 | 446ea031| 1 gnu asrcc r6, r4, #28 101 | 4a953557| 1 gnu ldrpl r9, [r5, -sl, asr #10]! 102 | 4ab6f712| 1 gnu rscsne fp, r7, #77594624 103 | 4af07ff5| 1 gnu dsb #10 104 | 4df6def4| 1 gnu pli [lr, #1613] 105 | 4efbf52e| 1 gnu vcmpcs.f64 d31, #0 106 | 50aaac79| 1 gnu stmibvc ip!, {r4, r6, r9, fp, sp, pc} 107 | 50caf011| 1 gnu mvnsne ip, r0, asr sl 108 | 50f04961| 1 gnu qdaddvs pc, r0, r9 109 | 51282008| 1 gnu stmdaeq r0!, {r0, r4, r6, fp, sp} 110 | 52bf6576| 1 gnu uqsaxvc fp, r5, r2 111 | 5345c9d0| 1 gnu sbcle r4, r9, r3, asr r5 112 | 538f5e46| 1 gnu usaxmi r8, lr, r3 113 | 54106d31| 1 gnu qdsubcc r1, r4, sp 114 | 56e0e557| 1 gnu ubfxpl lr, r6, #0, #6 115 | 57073d11| 1 gnu teqne sp, r7, asr r7 116 | 58bb0aa9| 1 gnu stmdbge sl, {r3, r4, r6, r8, r9, fp, ip, sp, pc} 117 | 58f007b1| 1 gnu qaddlt pc, r8, r7 118 | 59fd0e77| 1 gnu smusdvc lr, r9, sp 119 | 5ab7f1c5| 1 gnu ldrbgt fp, [r1, #1882]! 120 | 5abf23c6| 1 gnu qsaxgt fp, r3, sl 121 | 5b8f1c96| 1 gnu ssaxls r8, ip, fp 122 | 5b98ab97| 1 gnu sbfxls r9, fp, #16, #12 123 | 5bc9b041| 1 gnu asrsmi ip, fp, r9 124 | 5bf07ff5| 1 gnu dmb #11 125 | 5c102b81| 1 gnu qsubhi r1, ip, fp 126 | 5caa49e1| 1 gnu qdadd sl, ip, r9 127 | 5d3f7226| 1 gnu uhsaxcs r3, r2, sp 128 | 5db55470| 1 gnu subsvc fp, r4, sp, asr r5 129 | 5ef14387| 1 gnu smlsldhi pc, r3, lr, r1 130 | 5f540a11| 1 gnu qaddne r5, pc, sl 131 | 5f9079d1| 1 gnu cmnle r9, pc, asr r0 132 | 5faf3f66| 1 gnu shsaxvs sl, pc, pc 133 | 605071d7| 1 gnu ldrble r5, [r1, -r0, rrx]! 134 | 614adc76| 1 gnu ldrbvc r4, [ip], r1, ror #20 135 | 616b9e42| 1 gnu addsmi r6, lr, #99328 136 | 62c84f15| 1 gnu strbne ip, [pc, #-2146] 137 | 62f051f7| 1 gnu pld [r1, -r2, rrx] 138 | 6346c393| 1 gnu bicls r4, r3, #103809024 139 | 654abbae| 1 gnu vcvtge.f32.u16 s8, s8, #5 140 | 65a5f0e3| 1 gnu mvns sl, #423624704 141 | 65f796f7| 1 gnu pldw [r6, r5, ror #14] 142 | 670bb12e| 1 gnu vnegcs.f64 d0, d23 143 | 67903731| 1 gnu teqcc r7, r7, rrx 144 | 68ddc637| 1 gnu strbcc sp, [r6, r8, ror #26] 145 | 695b3ab6| 1 gnu ldrtlt r5, [sl], -r9, ror #22 146 | 697cfc71| 1 gnu mvnsvc r7, r9, ror #24 147 | 6a0ab3ee| 1 gnu vcvtb.f16.f32 s0, s21 148 | 6ad9ad54| 1 gnu strtpl sp, [sp], #2410 149 | 6af07ff5| 1 gnu isb #10 150 | 6afa6f10| 1 gnu rsbne pc, pc, sl, ror #20 151 | 6d5b19ee| 1 gnu vnmla.f64 d5, d9, d29 152 | 6d60b071| 1 gnu rrxsvc r6, sp 153 | 6df754f7| 1 gnu pld [r4, -sp, ror #14] 154 | 70065821| 1 gnu cmpcs r8, r0, ror r6 155 | 7050ed86| 1 gnu uxtabhi r5, sp, r0 156 | 715f1186| 1 gnu ssub16hi r5, r1, r1 157 | 716c9805| 1 gnu ldreq r6, [r8, #3185] 158 | 718d5ab1| 1 gnu cmplt sl, r1, ror sp 159 | 71c8cfb6| 1 gnu uxtb16lt ip, r1, ror #16 160 | 7294af06| 1 gnu sxtbeq r9, r2, ror #8 161 | 72c0bac6| 1 gnu sxtahgt ip, sl, r2 162 | 730f6716| 1 gnu uqsub16ne r0, r7, r3 163 | 73608f46| 1 gnu sxtb16mi r6, r3 164 | 73687f22| 1 gnu rsbscs r6, pc, #7536640 165 | 74308816| 1 gnu sxtab16ne r3, r8, r4 166 | 757f3456| 1 gnu shsub16pl r7, r4, r5 167 | 77788016| 1 gnu sxtab16ne r7, r0, r7, ror #16 168 | 78061671| 1 gnu tstvc r6, r8, ror r6 169 | 780a2fe1| 1 gnu bkpt 0xf0a8 170 | 7850abd6| 1 gnu sxtable r5, fp, r8 171 | 792cef26| 1 gnu uxtbcs r2, r9, ror #24 172 | 799eb8e0| 1 gnu adcs r9, r8, r9, ror lr 173 | 799f5726| 1 gnu usub16cs r9, r7, r9 174 | 79d0bf16| 1 gnu sxthne sp, r9 175 | 7a037ba1| 1 gnu cmnge fp, sl, ror r3 176 | 7b0f2566| 1 gnu qsub16vs r0, r5, fp 177 | 7b79dd51| 1 gnu bicspl r7, sp, fp, ror r9 178 | 7b9a9f1d| 1 gnu vldrne s18, [pc, #492] 179 | 7c70cea6| 1 gnu uxtab16ge r7, lr, ip 180 | 7d48f966| 1 gnu uxtahvs r4, r9, sp, ror #16 181 | 7d5c13a1| 1 gnu tstge r3, sp, ror ip 182 | 7e0001f1| 1 gnu setend le 183 | 7e1c0ba7| 1 gnu smlsdxge fp, lr, ip, r1 184 | 7e567e40| 1 gnu rsbsmi r5, lr, lr, ror r6 185 | 7e8f73b6| 1 gnu uhsub16lt r8, r3, lr 186 | 7ef0ffd6| 1 gnu uxthle pc, lr 187 | 7faaa011| 1 gnu rorne sl, pc, sl 188 | 81f19af7| 1 gnu pldw [sl, r1, lsl #3] 189 | 82033901| 1 gnu teqeq r9, r2, lsl #7 190 | 82f316f5| 1 gnu pldw [r6, #-898] 191 | 830201f1| 1 gnu setend be 192 | 838a3b91| 1 gnu teqls fp, r3, lsl #21 193 | 8408af2f| 1 gnu svccs 0x00af0884 194 | 884201d1| 1 gnu smlabble r1, r8, r2, r4 195 | 8aa12e31| 1 gnu smlawbcc lr, sl, r1, sl 196 | 8b9b99c0| 1 gnu addsgt r9, r9, fp, lsl #23 197 | 8c005c81| 1 gnu cmphi ip, ip, lsl #1 198 | 8fb429c6| 1 gnu strtgt fp, [r9], -pc, lsl #9 199 | 907b1f9e| 1 gnu vmovls.32 r7, d31[0] 200 | 91975f25| 1 gnu ldrbcs r9, [pc, #-1937] 201 | 91b010e3| 1 gnu tst r0, #145 202 | 927facb1| 1 gnu strexdlt r7, r2, [ip] 203 | 92904c91| 1 gnu swpbls r9, r2, [ip] 204 | 92af1226| 1 gnu sadd8cs sl, r2, r2 205 | 92b28c70| 1 gnu umullvc fp, ip, r2, r2 206 | 945f68a6| 1 gnu uqadd8ge r5, r8, r4 207 | 950b2560| 1 gnu mlavs r5, r5, fp, r0 208 | 969fcf71| 1 gnu strexbvc r9, r6, [pc] 209 | 96cf35e6| 1 gnu shadd8 ip, r5, r6 210 | 98060eb0| 1 gnu mullt lr, r8, r6 211 | 9843fb93| 1 gnu mvnsls r4, #152, 6 212 | 9a3fe2b0| 1 gnu smlallt r3, r2, sl, pc 213 | 9aef58b6| 1 gnu uadd8lt lr, r8, sl 214 | 9afcdff5| 1 gnu pld [pc, #3226] 215 | 9c221810| 1 gnu mulsne r8, ip, r2 216 | 9c3bc9dd| 1 gnu vstrle d19, [r9, #624] 217 | 9c5f2606| 1 gnu qadd8eq r5, r6, ip 218 | 9d87dac0| 1 gnu smullsgt r8, sl, sp, r7 219 | 9e0f7c86| 1 gnu uhadd8hi r0, ip, lr 220 | 9e814560| 1 gnu umaalvs r8, r5, lr, r1 221 | 9e9f8dc1| 1 gnu strexgt r9, lr, [sp] 222 | 9ec3c9d7| 1 gnu bfile ip, lr, #7, #3 223 | 9ed26d90| 1 gnu mlsls sp, lr, r2, sp 224 | 9f7fd9c1| 1 gnu ldrexbgt r7, [r9] 225 | 9f7fea91| 1 gnu strexhls r7, pc, [sl] 226 | 9f9f9921| 1 gnu ldrexcs r9, [r9] 227 | 9faffd21| 1 gnu ldrexhcs sl, [sp] 228 | 9fcfbd61| 1 gnu ldrexdvs ip, [sp] 229 | 9ff7a710| 1 gnu umlalne pc, r7, pc, r7 230 | a05459d3| 1 gnu cmple r9, #160, 8 231 | a3062be1| 1 gnu smulwb fp, r3, r6 232 | a68a92b1| 1 gnu orrslt r8, r2, r6, lsr #21 233 | abff55f6| 1 gnu pli [r5, -fp, lsr #31] 234 | addbf8ea| 1 gnu b .-0x1c9148 235 | ae79b021| 1 gnu lsrscs r7, lr, #19 236 | b590a3b1| 1 gnu strhlt r9, [r3, r5]! 237 | b5b2e390| 1 gnu strhtls fp, [r3], #37 238 | b6ac4e30| 1 gnu strhcc sl, [lr], #-198 239 | b73fff86| 1 gnu revshhi r3, r7 240 | b75fbfc6| 1 gnu rev16gt r5, r7 241 | b80b7c80| 1 gnu ldrhthi r0, [ip], #-184 242 | b82035e0| 1 gnu ldrht r2, [r5], -r8 243 | b8877391| 1 gnu ldrhls r8, [r3, #-120]! 244 | b9703e41| 1 gnu ldrhmi r7, [lr, -r9]! 245 | b9cf8c16| 1 gnu selne ip, ip, r9 246 | bd81bd58| 1 gnu poppl {r0, r2, r3, r4, r5, r7, r8, pc} 247 | bdfdb469| 1 gnu ldmibvs r4!, {r0, r2, r3, r4, r5, r7, r8, sl, fp, ip, sp, lr, pc} 248 | beb02500| 1 gnu strhteq fp, [r5], -lr 249 | bf1a5e42| 1 gnu subsmi r1, lr, #782336 250 | c19a4d5e| 1 gnu vmlspl.f32 s19, s27, s2 251 | c1aab15e| 1 gnu vsqrtpl.f32 s20, s2 252 | c354b003| 1 gnu movseq r5, #-1023410176 253 | c4091dc1| 1 gnu tstgt sp, r4, asr #19 254 | c50e13a9| 1 gnu ldmdbge r3, {r0, r2, r6, r7, r9, sl, fp} 255 | c68c8637| 1 gnu strcc r8, [r6, r6, asr #25] 256 | c6ad48e3| 1 gnu movt sl, #36294 257 | c6f65ff5| 1 gnu pld [pc, #-1734] 258 | c8a92f10| 1 gnu eorne sl, pc, r8, asr #19 259 | c9016b61| 1 gnu smulbtvs fp, r9, r1 260 | cadbf49e| 1 gnu vcmpels.f64 d29, d10 261 | ce9de476| 1 gnu strbtvc r9, [r4], lr, asr #27 262 | cf3c1ab1| 1 gnu tstlt sl, pc, asr #25 263 | d355aab6| 1 gnu ssatlt r5, #11, r3, asr #11 264 | d4f4df10| 1 gnu ldrsbne pc, [pc], #68 265 | d6530d61| 1 gnu ldrdvs r5, [sp, -r6] 266 | d74d7800| 1 gnu ldrsbteq r4, [r8], #-215 267 | d9703680| 1 gnu ldrsbthi r7, [r6], -r9 268 | dbe003c0| 1 gnu ldrdgt lr, [r3], -fp 269 | dc709561| 1 gnu ldrsbvs r7, [r5, ip] 270 | dcc3b9c8| 1 gnu ldmgt r9!, {r2, r3, r4, r6, r7, r8, r9, lr, pc} 271 | debfa0e5| 1 gnu str fp, [r0, #4062]! 272 | dee062a1| 1 gnu ldrdge lr, [r2, #-14]! 273 | dfa05ab7| 1 gnu smmlslt sl, pc, r0, sl 274 | e02ef011| 1 gnu mvnsne r2, r0, ror #29 275 | e4d41718| 1 gnu ldmdane r7, {r2, r5, r6, r7, sl, ip, lr, pc} 276 | e6d0fe34| 1 gnu ldrbtcc sp, [lr], #230 277 | e73bf7be| 1 gnu vcvtlt.f32.f64 s7, d23 278 | e74e72b3| 1 gnu cmnlt r2, #3696 279 | e80bf07e| 1 gnu vabsvc.f64 d16, d24 280 | e9b5b001| 1 gnu rorseq fp, r9, #11 281 | ea7bbdbe| 1 gnu vcvtlt.s32.f64 s14, d26 282 | ec063813| 1 gnu teqne r8, #236, 12 283 | ec0e49e1| 1 gnu smlaltt r0, r9, ip, lr 284 | ee4ab85e| 1 gnu vcvtpl.f32.s32 s8, s29 285 | ef461f25| 1 gnu ldrcs r4, [pc, #-1775] 286 | ef5fd002| 1 gnu sbcseq r5, r0, #956 287 | f4cf1d36| 1 gnu ssub8cc ip, sp, r4 288 | f67f73b6| 1 gnu uhsub8lt r7, r3, r6 289 | f6e09ca0| 1 gnu ldrshge lr, [ip], r6 290 | f7702e32| 1 gnu eorcc r7, lr, #247 291 | fa4dcf20| 1 gnu strdcs r4, [pc], #218 292 | fac03720| 1 gnu ldrshtcs ip, [r7], -sl 293 | fc0f64c6| 1 gnu uqsub8gt r0, r4, ip 294 | fc28f481| 1 gnu ldrshhi r2, [r4, #140]! 295 | fc300560| 1 gnu strdvs r3, [r5], -ip 296 | fcacfc70| 1 gnu ldrshtvc sl, [ip], #204 297 | fdbcfaf7| 1 gnu undef 298 | fddf5c86| 1 gnu usub8hi sp, ip, sp 299 | fdf02013| 1 gnu dbgne #13 300 | fe0319e3| 1 gnu tst r9, #-134217725 301 | fe7f3116| 1 gnu shsub8ne r7, r1, lr 302 | ff4f2ac6| 1 gnu qsub8gt r4, sl, pc 303 | ff818c71| 1 gnu strdvc r8, [ip, pc] 304 | |6b5721d3 1 gnu error: unknown instruction 305 | |76452001 1 gnu error: unknown instruction 306 | |97acd647 1 gnu error: unknown instruction 307 | -------------------------------------------------------------------------------- /armasm/decode.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package armasm 6 | 7 | import ( 8 | "encoding/binary" 9 | "fmt" 10 | ) 11 | 12 | // An instFormat describes the format of an instruction encoding. 13 | // An instruction with 32-bit value x matches the format if x&mask == value 14 | // and the condition matches. 15 | // The condition matches if x>>28 == 0xF && value>>28==0xF 16 | // or if x>>28 != 0xF and value>>28 == 0. 17 | // If x matches the format, then the rest of the fields describe how to interpret x. 18 | // The opBits describe bits that should be extracted from x and added to the opcode. 19 | // For example opBits = 0x1234 means that the value 20 | // (2 bits at offset 1) followed by (4 bits at offset 3) 21 | // should be added to op. 22 | // Finally the args describe how to decode the instruction arguments. 23 | // args is stored as a fixed-size array; if there are fewer than len(args) arguments, 24 | // args[i] == 0 marks the end of the argument list. 25 | type instFormat struct { 26 | mask uint32 27 | value uint32 28 | priority int8 29 | op Op 30 | opBits uint64 31 | args instArgs 32 | } 33 | 34 | type instArgs [4]instArg 35 | 36 | var ( 37 | errMode = fmt.Errorf("unsupported execution mode") 38 | errShort = fmt.Errorf("truncated instruction") 39 | errUnknown = fmt.Errorf("unknown instruction") 40 | ) 41 | 42 | var decoderCover []bool 43 | 44 | // Decode decodes the leading bytes in src as a single instruction. 45 | func Decode(src []byte, mode Mode) (inst Inst, err error) { 46 | if mode != ModeARM { 47 | return Inst{}, errMode 48 | } 49 | if len(src) < 4 { 50 | return Inst{}, errShort 51 | } 52 | 53 | if decoderCover == nil { 54 | decoderCover = make([]bool, len(instFormats)) 55 | } 56 | 57 | x := binary.LittleEndian.Uint32(src) 58 | 59 | // The instFormat table contains both conditional and unconditional instructions. 60 | // Considering only the top 4 bits, the conditional instructions use mask=0, value=0, 61 | // while the unconditional instructions use mask=f, value=f. 62 | // Prepare a version of x with the condition cleared to 0 in conditional instructions 63 | // and then assume mask=f during matching. 64 | const condMask = 0xf0000000 65 | xNoCond := x 66 | if x&condMask != condMask { 67 | xNoCond &^= condMask 68 | } 69 | var priority int8 70 | Search: 71 | for i := range instFormats { 72 | f := &instFormats[i] 73 | if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority { 74 | continue 75 | } 76 | delta := uint32(0) 77 | deltaShift := uint(0) 78 | for opBits := f.opBits; opBits != 0; opBits >>= 16 { 79 | n := uint(opBits & 0xFF) 80 | off := uint((opBits >> 8) & 0xFF) 81 | delta |= (x >> off) & (1<> 8) & (1<<4 - 1)) 212 | case arg_R_12: 213 | return Reg((x >> 12) & (1<<4 - 1)) 214 | case arg_R_16: 215 | return Reg((x >> 16) & (1<<4 - 1)) 216 | 217 | case arg_R_12_nzcv: 218 | r := Reg((x >> 12) & (1<<4 - 1)) 219 | if r == R15 { 220 | return APSR_nzcv 221 | } 222 | return r 223 | 224 | case arg_R_16_WB: 225 | mode := AddrLDM 226 | if (x>>21)&1 != 0 { 227 | mode = AddrLDM_WB 228 | } 229 | return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode} 230 | 231 | case arg_R_rotate: 232 | Rm := Reg(x & (1<<4 - 1)) 233 | typ, count := decodeShift(x) 234 | // ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1. 235 | if typ == RotateRightExt { 236 | return Reg(Rm) 237 | } 238 | return RegShift{Rm, typ, uint8(count)} 239 | 240 | case arg_R_shift_R: 241 | Rm := Reg(x & (1<<4 - 1)) 242 | Rs := Reg((x >> 8) & (1<<4 - 1)) 243 | typ := Shift((x >> 5) & (1<<2 - 1)) 244 | return RegShiftReg{Rm, typ, Rs} 245 | 246 | case arg_R_shift_imm: 247 | Rm := Reg(x & (1<<4 - 1)) 248 | typ, count := decodeShift(x) 249 | if typ == ShiftLeft && count == 0 { 250 | return Reg(Rm) 251 | } 252 | return RegShift{Rm, typ, uint8(count)} 253 | 254 | case arg_R1_0: 255 | return Reg((x & (1<<4 - 1))) 256 | case arg_R1_12: 257 | return Reg(((x >> 12) & (1<<4 - 1))) 258 | case arg_R2_0: 259 | return Reg((x & (1<<4 - 1)) | 1) 260 | case arg_R2_12: 261 | return Reg(((x >> 12) & (1<<4 - 1)) | 1) 262 | 263 | case arg_SP: 264 | return SP 265 | 266 | case arg_Sd_Dd: 267 | v := (x >> 12) & (1<<4 - 1) 268 | vx := (x >> 22) & 1 269 | sz := (x >> 8) & 1 270 | if sz != 0 { 271 | return D0 + Reg(vx<<4+v) 272 | } else { 273 | return S0 + Reg(v<<1+vx) 274 | } 275 | 276 | case arg_Dd_Sd: 277 | return decodeArg(arg_Sd_Dd, x^(1<<8)) 278 | 279 | case arg_Sd: 280 | v := (x >> 12) & (1<<4 - 1) 281 | vx := (x >> 22) & 1 282 | return S0 + Reg(v<<1+vx) 283 | 284 | case arg_Sm_Dm: 285 | v := (x >> 0) & (1<<4 - 1) 286 | vx := (x >> 5) & 1 287 | sz := (x >> 8) & 1 288 | if sz != 0 { 289 | return D0 + Reg(vx<<4+v) 290 | } else { 291 | return S0 + Reg(v<<1+vx) 292 | } 293 | 294 | case arg_Sm: 295 | v := (x >> 0) & (1<<4 - 1) 296 | vx := (x >> 5) & 1 297 | return S0 + Reg(v<<1+vx) 298 | 299 | case arg_Dn_half: 300 | v := (x >> 16) & (1<<4 - 1) 301 | vx := (x >> 7) & 1 302 | return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)} 303 | 304 | case arg_Sn_Dn: 305 | v := (x >> 16) & (1<<4 - 1) 306 | vx := (x >> 7) & 1 307 | sz := (x >> 8) & 1 308 | if sz != 0 { 309 | return D0 + Reg(vx<<4+v) 310 | } else { 311 | return S0 + Reg(v<<1+vx) 312 | } 313 | 314 | case arg_Sn: 315 | v := (x >> 16) & (1<<4 - 1) 316 | vx := (x >> 7) & 1 317 | return S0 + Reg(v<<1+vx) 318 | 319 | case arg_const: 320 | v := x & (1<<8 - 1) 321 | rot := (x >> 8) & (1<<4 - 1) * 2 322 | if rot > 0 && v&3 == 0 { 323 | // could rotate less 324 | return ImmAlt{uint8(v), uint8(rot)} 325 | } 326 | if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v { 327 | // could wrap around to rot==0. 328 | return ImmAlt{uint8(v), uint8(rot)} 329 | } 330 | return Imm(v>>rot | v<<(32-rot)) 331 | 332 | case arg_endian: 333 | return Endian((x >> 9) & 1) 334 | 335 | case arg_fbits: 336 | return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1)) 337 | 338 | case arg_fp_0: 339 | return Imm(0) 340 | 341 | case arg_imm24: 342 | return Imm(x & (1<<24 - 1)) 343 | 344 | case arg_imm5: 345 | return Imm((x >> 7) & (1<<5 - 1)) 346 | 347 | case arg_imm5_32: 348 | x = (x >> 7) & (1<<5 - 1) 349 | if x == 0 { 350 | x = 32 351 | } 352 | return Imm(x) 353 | 354 | case arg_imm5_nz: 355 | x = (x >> 7) & (1<<5 - 1) 356 | if x == 0 { 357 | return nil 358 | } 359 | return Imm(x) 360 | 361 | case arg_imm_4at16_12at0: 362 | return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1)) 363 | 364 | case arg_imm_12at8_4at0: 365 | return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1)) 366 | 367 | case arg_imm_vfp: 368 | x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1) 369 | return Imm(x) 370 | 371 | case arg_label24: 372 | imm := (x & (1<<24 - 1)) << 2 373 | return PCRel(int32(imm<<6) >> 6) 374 | 375 | case arg_label24H: 376 | h := (x >> 24) & 1 377 | imm := (x&(1<<24-1))<<2 | h<<1 378 | return PCRel(int32(imm<<6) >> 6) 379 | 380 | case arg_label_m_12: 381 | d := int32(x & (1<<12 - 1)) 382 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)} 383 | 384 | case arg_label_p_12: 385 | d := int32(x & (1<<12 - 1)) 386 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} 387 | 388 | case arg_label_pm_12: 389 | d := int32(x & (1<<12 - 1)) 390 | u := (x >> 23) & 1 391 | if u == 0 { 392 | d = -d 393 | } 394 | return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} 395 | 396 | case arg_label_pm_4_4: 397 | d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) 398 | u := (x >> 23) & 1 399 | if u == 0 { 400 | d = -d 401 | } 402 | return PCRel(d) 403 | 404 | case arg_lsb_width: 405 | lsb := (x >> 7) & (1<<5 - 1) 406 | msb := (x >> 16) & (1<<5 - 1) 407 | if msb < lsb || msb >= 32 { 408 | return nil 409 | } 410 | return Imm(msb + 1 - lsb) 411 | 412 | case arg_mem_R: 413 | Rn := Reg((x >> 16) & (1<<4 - 1)) 414 | return Mem{Base: Rn, Mode: AddrOffset} 415 | 416 | case arg_mem_R_pm_R_postindex: 417 | // Treat [],+/- like [,+/-{,}]{!} 418 | // by forcing shift bits to <<0 and P=0, W=0 (postindex=true). 419 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21)) 420 | 421 | case arg_mem_R_pm_R_W: 422 | // Treat [,+/-]{!} like [,+/-{,}]{!} 423 | // by forcing shift bits to <<0. 424 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5)) 425 | 426 | case arg_mem_R_pm_R_shift_imm_offset: 427 | // Treat [],+/-{,} like [,+/-{,}]{!} 428 | // by forcing P=1, W=0 (index=false, wback=false). 429 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24) 430 | 431 | case arg_mem_R_pm_R_shift_imm_postindex: 432 | // Treat [],+/-{,} like [,+/-{,}]{!} 433 | // by forcing P=0, W=0 (postindex=true). 434 | return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21)) 435 | 436 | case arg_mem_R_pm_R_shift_imm_W: 437 | Rn := Reg((x >> 16) & (1<<4 - 1)) 438 | Rm := Reg(x & (1<<4 - 1)) 439 | typ, count := decodeShift(x) 440 | u := (x >> 23) & 1 441 | w := (x >> 21) & 1 442 | p := (x >> 24) & 1 443 | if p == 0 && w == 1 { 444 | return nil 445 | } 446 | sign := int8(+1) 447 | if u == 0 { 448 | sign = -1 449 | } 450 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 451 | return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count} 452 | 453 | case arg_mem_R_pm_imm12_offset: 454 | // Treat [,#+/-] like [{,#+/-}]{!} 455 | // by forcing P=1, W=0 (index=false, wback=false). 456 | return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24) 457 | 458 | case arg_mem_R_pm_imm12_postindex: 459 | // Treat [],#+/- like [{,#+/-}]{!} 460 | // by forcing P=0, W=0 (postindex=true). 461 | return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21)) 462 | 463 | case arg_mem_R_pm_imm12_W: 464 | Rn := Reg((x >> 16) & (1<<4 - 1)) 465 | u := (x >> 23) & 1 466 | w := (x >> 21) & 1 467 | p := (x >> 24) & 1 468 | if p == 0 && w == 1 { 469 | return nil 470 | } 471 | sign := int8(+1) 472 | if u == 0 { 473 | sign = -1 474 | } 475 | imm := int16(x & (1<<12 - 1)) 476 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 477 | return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} 478 | 479 | case arg_mem_R_pm_imm8_postindex: 480 | // Treat [],#+/- like [{,#+/-}]{!} 481 | // by forcing P=0, W=0 (postindex=true). 482 | return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21)) 483 | 484 | case arg_mem_R_pm_imm8_W: 485 | Rn := Reg((x >> 16) & (1<<4 - 1)) 486 | u := (x >> 23) & 1 487 | w := (x >> 21) & 1 488 | p := (x >> 24) & 1 489 | if p == 0 && w == 1 { 490 | return nil 491 | } 492 | sign := int8(+1) 493 | if u == 0 { 494 | sign = -1 495 | } 496 | imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) 497 | mode := AddrMode(uint8(p<<1) | uint8(w^1)) 498 | return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} 499 | 500 | case arg_mem_R_pm_imm8at0_offset: 501 | Rn := Reg((x >> 16) & (1<<4 - 1)) 502 | u := (x >> 23) & 1 503 | sign := int8(+1) 504 | if u == 0 { 505 | sign = -1 506 | } 507 | imm := int16(x&(1<<8-1)) << 2 508 | return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm} 509 | 510 | case arg_option: 511 | return Imm(x & (1<<4 - 1)) 512 | 513 | case arg_registers: 514 | return RegList(x & (1<<16 - 1)) 515 | 516 | case arg_registers2: 517 | x &= 1<<16 - 1 518 | n := 0 519 | for i := 0; i < 16; i++ { 520 | if x>>uint(i)&1 != 0 { 521 | n++ 522 | } 523 | } 524 | if n < 2 { 525 | return nil 526 | } 527 | return RegList(x) 528 | 529 | case arg_registers1: 530 | Rt := (x >> 12) & (1<<4 - 1) 531 | return RegList(1 << Rt) 532 | 533 | case arg_satimm4: 534 | return Imm((x >> 16) & (1<<4 - 1)) 535 | 536 | case arg_satimm5: 537 | return Imm((x >> 16) & (1<<5 - 1)) 538 | 539 | case arg_satimm4m1: 540 | return Imm((x>>16)&(1<<4-1) + 1) 541 | 542 | case arg_satimm5m1: 543 | return Imm((x>>16)&(1<<5-1) + 1) 544 | 545 | case arg_widthm1: 546 | return Imm((x>>16)&(1<<5-1) + 1) 547 | 548 | } 549 | } 550 | 551 | // decodeShift decodes the shift-by-immediate encoded in x. 552 | func decodeShift(x uint32) (Shift, uint8) { 553 | count := (x >> 7) & (1<<5 - 1) 554 | typ := Shift((x >> 5) & (1<<2 - 1)) 555 | switch typ { 556 | case ShiftRight, ShiftRightSigned: 557 | if count == 0 { 558 | count = 32 559 | } 560 | case RotateRight: 561 | if count == 0 { 562 | typ = RotateRightExt 563 | count = 1 564 | } 565 | } 566 | return typ, uint8(count) 567 | } 568 | -------------------------------------------------------------------------------- /armasm/ext_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Support for testing against external disassembler program. 6 | // Copied and simplified from rsc.io/x86/x86asm/ext_test.go. 7 | 8 | package armasm 9 | 10 | import ( 11 | "bufio" 12 | "bytes" 13 | "encoding/hex" 14 | "flag" 15 | "fmt" 16 | "io/ioutil" 17 | "log" 18 | "math/rand" 19 | "os" 20 | "os/exec" 21 | "regexp" 22 | "runtime" 23 | "strings" 24 | "testing" 25 | "time" 26 | ) 27 | 28 | var ( 29 | printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths") 30 | dumpTest = flag.Bool("dump", false, "dump all encodings") 31 | mismatch = flag.Bool("mismatch", false, "log allowed mismatches") 32 | longTest = flag.Bool("long", false, "long test") 33 | keep = flag.Bool("keep", false, "keep object files around") 34 | debug = false 35 | ) 36 | 37 | // A ExtInst represents a single decoded instruction parsed 38 | // from an external disassembler's output. 39 | type ExtInst struct { 40 | addr uint32 41 | enc [4]byte 42 | nenc int 43 | text string 44 | } 45 | 46 | func (r ExtInst) String() string { 47 | return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) 48 | } 49 | 50 | // An ExtDis is a connection between an external disassembler and a test. 51 | type ExtDis struct { 52 | Arch Mode 53 | Dec chan ExtInst 54 | File *os.File 55 | Size int 56 | KeepFile bool 57 | Cmd *exec.Cmd 58 | } 59 | 60 | // Run runs the given command - the external disassembler - and returns 61 | // a buffered reader of its standard output. 62 | func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { 63 | if *keep { 64 | log.Printf("%s\n", strings.Join(cmd, " ")) 65 | } 66 | ext.Cmd = exec.Command(cmd[0], cmd[1:]...) 67 | out, err := ext.Cmd.StdoutPipe() 68 | if err != nil { 69 | return nil, fmt.Errorf("stdoutpipe: %v", err) 70 | } 71 | if err := ext.Cmd.Start(); err != nil { 72 | return nil, fmt.Errorf("exec: %v", err) 73 | } 74 | 75 | b := bufio.NewReaderSize(out, 1<<20) 76 | return b, nil 77 | } 78 | 79 | // Wait waits for the command started with Run to exit. 80 | func (ext *ExtDis) Wait() error { 81 | return ext.Cmd.Wait() 82 | } 83 | 84 | // testExtDis tests a set of byte sequences against an external disassembler. 85 | // The disassembler is expected to produce the given syntax and be run 86 | // in the given architecture mode (16, 32, or 64-bit). 87 | // The extdis function must start the external disassembler 88 | // and then parse its output, sending the parsed instructions on ext.Dec. 89 | // The generate function calls its argument f once for each byte sequence 90 | // to be tested. The generate function itself will be called twice, and it must 91 | // make the same sequence of calls to f each time. 92 | // When a disassembly does not match the internal decoding, 93 | // allowedMismatch determines whether this mismatch should be 94 | // allowed, or else considered an error. 95 | func testExtDis( 96 | t *testing.T, 97 | syntax string, 98 | arch Mode, 99 | extdis func(ext *ExtDis) error, 100 | generate func(f func([]byte)), 101 | allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool, 102 | ) { 103 | start := time.Now() 104 | ext := &ExtDis{ 105 | Dec: make(chan ExtInst), 106 | Arch: arch, 107 | } 108 | errc := make(chan error) 109 | 110 | // First pass: write instructions to input file for external disassembler. 111 | file, f, size, err := writeInst(generate) 112 | if err != nil { 113 | t.Fatal(err) 114 | } 115 | ext.Size = size 116 | ext.File = f 117 | defer func() { 118 | f.Close() 119 | if !*keep { 120 | os.Remove(file) 121 | } 122 | }() 123 | 124 | // Second pass: compare disassembly against our decodings. 125 | var ( 126 | totalTests = 0 127 | totalSkips = 0 128 | totalErrors = 0 129 | 130 | errors = make([]string, 0, 100) // sampled errors, at most cap 131 | ) 132 | go func() { 133 | errc <- extdis(ext) 134 | }() 135 | generate(func(enc []byte) { 136 | dec, ok := <-ext.Dec 137 | if !ok { 138 | t.Errorf("decoding stream ended early") 139 | return 140 | } 141 | inst, text := disasm(syntax, arch, pad(enc)) 142 | totalTests++ 143 | if *dumpTest { 144 | fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) 145 | } 146 | if text != dec.text || inst.Len != dec.nenc { 147 | suffix := "" 148 | if allowedMismatch(text, size, &inst, dec) { 149 | totalSkips++ 150 | if !*mismatch { 151 | return 152 | } 153 | suffix += " (allowed mismatch)" 154 | } 155 | totalErrors++ 156 | if len(errors) >= cap(errors) { 157 | j := rand.Intn(totalErrors) 158 | if j >= cap(errors) { 159 | return 160 | } 161 | errors = append(errors[:j], errors[j+1:]...) 162 | } 163 | errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix)) 164 | } 165 | }) 166 | 167 | if *mismatch { 168 | totalErrors -= totalSkips 169 | } 170 | 171 | for _, b := range errors { 172 | t.Log(b) 173 | } 174 | 175 | if totalErrors > 0 { 176 | t.Fail() 177 | } 178 | t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) 179 | 180 | if err := <-errc; err != nil { 181 | t.Fatal("external disassembler: %v", err) 182 | } 183 | 184 | } 185 | 186 | const start = 0x8000 // start address of text 187 | 188 | // writeInst writes the generated byte sequences to a new file 189 | // starting at offset start. That file is intended to be the input to 190 | // the external disassembler. 191 | func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { 192 | f, err = ioutil.TempFile("", "armasm") 193 | if err != nil { 194 | return 195 | } 196 | 197 | file = f.Name() 198 | 199 | f.Seek(start, 0) 200 | w := bufio.NewWriter(f) 201 | defer w.Flush() 202 | size = 0 203 | generate(func(x []byte) { 204 | if len(x) > 4 { 205 | x = x[:4] 206 | } 207 | if debug { 208 | fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) 209 | } 210 | w.Write(x) 211 | w.Write(zeros[len(x):]) 212 | size += len(zeros) 213 | }) 214 | return file, f, size, nil 215 | } 216 | 217 | var zeros = []byte{0, 0, 0, 0} 218 | 219 | // pad pads the code sequenc with pops. 220 | func pad(enc []byte) []byte { 221 | if len(enc) < 4 { 222 | enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) 223 | } 224 | return enc 225 | } 226 | 227 | // disasm returns the decoded instruction and text 228 | // for the given source bytes, using the given syntax and mode. 229 | func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) { 230 | // If printTests is set, we record the coverage value 231 | // before and after, and we write out the inputs for which 232 | // coverage went up, in the format expected in testdata/decode.text. 233 | // This produces a fairly small set of test cases that exercise nearly 234 | // all the code. 235 | var cover float64 236 | if *printTests { 237 | cover -= coverage() 238 | } 239 | 240 | inst, err := Decode(src, mode) 241 | if err != nil { 242 | text = "error: " + err.Error() 243 | } else { 244 | text = inst.String() 245 | switch syntax { 246 | //case "arm": 247 | // text = ARMSyntax(inst) 248 | case "gnu": 249 | text = GNUSyntax(inst) 250 | //case "plan9": 251 | // text = plan9Syntax(inst, 0, nil) 252 | default: 253 | text = "error: unknown syntax " + syntax 254 | } 255 | } 256 | 257 | if *printTests { 258 | cover += coverage() 259 | if cover > 0 { 260 | max := len(src) 261 | if max > 4 && inst.Len <= 4 { 262 | max = 4 263 | } 264 | fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text) 265 | } 266 | } 267 | 268 | return 269 | } 270 | 271 | // coverage returns a floating point number denoting the 272 | // test coverage until now. The number increases when new code paths are exercised, 273 | // both in the Go program and in the decoder byte code. 274 | func coverage() float64 { 275 | /* 276 | testing.Coverage is not in the main distribution. 277 | The implementation, which must go in package testing, is: 278 | 279 | // Coverage reports the current code coverage as a fraction in the range [0, 1]. 280 | func Coverage() float64 { 281 | var n, d int64 282 | for _, counters := range cover.Counters { 283 | for _, c := range counters { 284 | if c > 0 { 285 | n++ 286 | } 287 | d++ 288 | } 289 | } 290 | if d == 0 { 291 | return 0 292 | } 293 | return float64(n) / float64(d) 294 | } 295 | */ 296 | 297 | var f float64 298 | f += testing.Coverage() 299 | f += decodeCoverage() 300 | return f 301 | } 302 | 303 | func decodeCoverage() float64 { 304 | n := 0 305 | for _, t := range decoderCover { 306 | if t { 307 | n++ 308 | } 309 | } 310 | return float64(1+n) / float64(1+len(decoderCover)) 311 | } 312 | 313 | // Helpers for writing disassembler output parsers. 314 | 315 | // hasPrefix reports whether any of the space-separated words in the text s 316 | // begins with any of the given prefixes. 317 | func hasPrefix(s string, prefixes ...string) bool { 318 | for _, prefix := range prefixes { 319 | for s := s; s != ""; { 320 | if strings.HasPrefix(s, prefix) { 321 | return true 322 | } 323 | i := strings.Index(s, " ") 324 | if i < 0 { 325 | break 326 | } 327 | s = s[i+1:] 328 | } 329 | } 330 | return false 331 | } 332 | 333 | // contains reports whether the text s contains any of the given substrings. 334 | func contains(s string, substrings ...string) bool { 335 | for _, sub := range substrings { 336 | if strings.Contains(s, sub) { 337 | return true 338 | } 339 | } 340 | return false 341 | } 342 | 343 | // isHex reports whether b is a hexadecimal character (0-9A-Fa-f). 344 | func isHex(b byte) bool { return b == '0' || unhex[b] > 0 } 345 | 346 | // parseHex parses the hexadecimal byte dump in hex, 347 | // appending the parsed bytes to raw and returning the updated slice. 348 | // The returned bool signals whether any invalid hex was found. 349 | // Spaces and tabs between bytes are okay but any other non-hex is not. 350 | func parseHex(hex []byte, raw []byte) ([]byte, bool) { 351 | hex = trimSpace(hex) 352 | for j := 0; j < len(hex); { 353 | for hex[j] == ' ' || hex[j] == '\t' { 354 | j++ 355 | } 356 | if j >= len(hex) { 357 | break 358 | } 359 | if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { 360 | return nil, false 361 | } 362 | raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]]) 363 | j += 2 364 | } 365 | return raw, true 366 | } 367 | 368 | var unhex = [256]byte{ 369 | '0': 0, 370 | '1': 1, 371 | '2': 2, 372 | '3': 3, 373 | '4': 4, 374 | '5': 5, 375 | '6': 6, 376 | '7': 7, 377 | '8': 8, 378 | '9': 9, 379 | 'A': 10, 380 | 'B': 11, 381 | 'C': 12, 382 | 'D': 13, 383 | 'E': 14, 384 | 'F': 15, 385 | 'a': 10, 386 | 'b': 11, 387 | 'c': 12, 388 | 'd': 13, 389 | 'e': 14, 390 | 'f': 15, 391 | } 392 | 393 | // index is like bytes.Index(s, []byte(t)) but avoids the allocation. 394 | func index(s []byte, t string) int { 395 | i := 0 396 | for { 397 | j := bytes.IndexByte(s[i:], t[0]) 398 | if j < 0 { 399 | return -1 400 | } 401 | i = i + j 402 | if i+len(t) > len(s) { 403 | return -1 404 | } 405 | for k := 1; k < len(t); k++ { 406 | if s[i+k] != t[k] { 407 | goto nomatch 408 | } 409 | } 410 | return i 411 | nomatch: 412 | i++ 413 | } 414 | } 415 | 416 | // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. 417 | // If s must be rewritten, it is rewritten in place. 418 | func fixSpace(s []byte) []byte { 419 | s = trimSpace(s) 420 | for i := 0; i < len(s); i++ { 421 | if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { 422 | goto Fix 423 | } 424 | } 425 | return s 426 | 427 | Fix: 428 | b := s 429 | w := 0 430 | for i := 0; i < len(s); i++ { 431 | c := s[i] 432 | if c == '\t' || c == '\n' { 433 | c = ' ' 434 | } 435 | if c == ' ' && w > 0 && b[w-1] == ' ' { 436 | continue 437 | } 438 | b[w] = c 439 | w++ 440 | } 441 | if w > 0 && b[w-1] == ' ' { 442 | w-- 443 | } 444 | return b[:w] 445 | } 446 | 447 | // trimSpace trims leading and trailing space from s, returning a subslice of s. 448 | func trimSpace(s []byte) []byte { 449 | j := len(s) 450 | for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { 451 | j-- 452 | } 453 | i := 0 454 | for i < j && (s[i] == ' ' || s[i] == '\t') { 455 | i++ 456 | } 457 | return s[i:j] 458 | } 459 | 460 | // pcrel matches instructions using relative addressing mode. 461 | var ( 462 | pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`) 463 | ) 464 | 465 | // Generators. 466 | // 467 | // The test cases are described as functions that invoke a callback repeatedly, 468 | // with a new input sequence each time. These helpers make writing those 469 | // a little easier. 470 | 471 | // condCases generates conditional instructions. 472 | func condCases(t *testing.T) func(func([]byte)) { 473 | return func(try func([]byte)) { 474 | // All the strides are relatively prime to 2 and therefore to 2²⁸, 475 | // so we will not repeat any instructions until we have tried all 2²⁸. 476 | // Using a stride other than 1 is meant to visit the instructions in a 477 | // pseudorandom order, which gives better variety in the set of 478 | // test cases chosen by -printtests. 479 | stride := uint32(10007) 480 | n := 1 << 28 / 7 481 | if testing.Short() { 482 | stride = 100003 483 | n = 1 << 28 / 1001 484 | } else if *longTest { 485 | stride = 200000033 486 | n = 1 << 28 487 | } 488 | x := uint32(0) 489 | for i := 0; i < n; i++ { 490 | enc := (x%15)<<28 | x&(1<<28-1) 491 | try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) 492 | x += stride 493 | } 494 | } 495 | } 496 | 497 | // uncondCases generates unconditional instructions. 498 | func uncondCases(t *testing.T) func(func([]byte)) { 499 | return func(try func([]byte)) { 500 | condCases(t)(func(enc []byte) { 501 | enc[3] |= 0xF0 502 | try(enc) 503 | }) 504 | } 505 | } 506 | 507 | func countBits(x uint32) int { 508 | n := 0 509 | for ; x != 0; x >>= 1 { 510 | n += int(x & 1) 511 | } 512 | return n 513 | } 514 | 515 | func expandBits(x, m uint32) uint32 { 516 | var out uint32 517 | for i := uint(0); i < 32; i++ { 518 | out >>= 1 519 | if m&1 != 0 { 520 | out |= (x & 1) << 31 521 | x >>= 1 522 | } 523 | m >>= 1 524 | } 525 | return out 526 | } 527 | 528 | func tryCondMask(mask, val uint32, try func([]byte)) { 529 | n := countBits(^mask) 530 | bits := uint32(0) 531 | for i := 0; i < 1<> 8), byte(x >> 16), byte(x >> 24)}) 535 | } 536 | } 537 | 538 | // vfpCases generates VFP instructions. 539 | func vfpCases(t *testing.T) func(func([]byte)) { 540 | const ( 541 | vfpmask uint32 = 0xFF00FE10 542 | vfp uint32 = 0x0E009A00 543 | ) 544 | return func(try func([]byte)) { 545 | tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space 546 | tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half 547 | tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32 548 | tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes 549 | } 550 | } 551 | 552 | // hexCases generates the cases written in hexadecimal in the encoded string. 553 | // Spaces in 'encoded' separate entire test cases, not individual bytes. 554 | func hexCases(t *testing.T, encoded string) func(func([]byte)) { 555 | return func(try func([]byte)) { 556 | for _, x := range strings.Fields(encoded) { 557 | src, err := hex.DecodeString(x) 558 | if err != nil { 559 | t.Errorf("parsing %q: %v", x, err) 560 | } 561 | try(src) 562 | } 563 | } 564 | } 565 | 566 | // testdataCases generates the test cases recorded in testdata/decode.txt. 567 | // It only uses the inputs; it ignores the answers recorded in that file. 568 | func testdataCases(t *testing.T) func(func([]byte)) { 569 | var codes [][]byte 570 | data, err := ioutil.ReadFile("testdata/decode.txt") 571 | if err != nil { 572 | t.Fatal(err) 573 | } 574 | for _, line := range strings.Split(string(data), "\n") { 575 | line = strings.TrimSpace(line) 576 | if line == "" || strings.HasPrefix(line, "#") { 577 | continue 578 | } 579 | f := strings.Fields(line)[0] 580 | i := strings.Index(f, "|") 581 | if i < 0 { 582 | t.Errorf("parsing %q: missing | separator", f) 583 | continue 584 | } 585 | if i%2 != 0 { 586 | t.Errorf("parsing %q: misaligned | separator", f) 587 | } 588 | code, err := hex.DecodeString(f[:i] + f[i+1:]) 589 | if err != nil { 590 | t.Errorf("parsing %q: %v", f, err) 591 | continue 592 | } 593 | codes = append(codes, code) 594 | } 595 | 596 | return func(try func([]byte)) { 597 | for _, code := range codes { 598 | try(code) 599 | } 600 | } 601 | } 602 | 603 | func caller(skip int) string { 604 | pc, _, _, _ := runtime.Caller(skip) 605 | f := runtime.FuncForPC(pc) 606 | name := "?" 607 | if f != nil { 608 | name = f.Name() 609 | if i := strings.LastIndex(name, "."); i >= 0 { 610 | name = name[i+1:] 611 | } 612 | } 613 | return name 614 | } 615 | -------------------------------------------------------------------------------- /armspec/spec.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Armspec reads the ``ARM Architecture Reference Manual'' 6 | // to collect instruction encoding details and writes those details to standard output 7 | // in JSON format. 8 | // 9 | // Warning Warning Warning 10 | // 11 | // This program is unfinished. It is being published in this incomplete form 12 | // for interested readers, but do not expect it to be runnable or useful. 13 | // 14 | package main 15 | 16 | import ( 17 | "bufio" 18 | "bytes" 19 | "encoding/json" 20 | "fmt" 21 | "log" 22 | "math" 23 | "os" 24 | "regexp" 25 | "sort" 26 | "strconv" 27 | "strings" 28 | 29 | "rsc.io/pdf" 30 | ) 31 | 32 | type Inst struct { 33 | Name string 34 | ID string 35 | Bits string 36 | Arch string 37 | Syntax []string 38 | Code string 39 | } 40 | 41 | const debugPage = 0 42 | 43 | var stdout *bufio.Writer 44 | 45 | func main() { 46 | log.SetFlags(0) 47 | log.SetPrefix("armspec: ") 48 | 49 | if len(os.Args) != 2 { 50 | fmt.Fprintf(os.Stderr, "usage: armspec file.pdf\n") 51 | os.Exit(2) 52 | } 53 | 54 | f, err := pdf.Open(os.Args[1]) 55 | if err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | // Find instruction set reference in outline, to build instruction list. 60 | instList := instHeadings(f.Outline()) 61 | if len(instList) < 200 { 62 | log.Fatalf("only found %d instructions in table of contents", len(instList)) 63 | } 64 | 65 | stdout = bufio.NewWriter(os.Stdout) 66 | fmt.Fprintf(stdout, "[") 67 | numTable := 0 68 | defer stdout.Flush() 69 | 70 | // Scan document looking for instructions. 71 | // Must find exactly the ones in the outline. 72 | n := f.NumPage() 73 | PageLoop: 74 | for pageNum := 1; pageNum <= n; pageNum++ { 75 | if debugPage > 0 && pageNum != debugPage { 76 | continue 77 | } 78 | if pageNum > 1127 { 79 | break 80 | } 81 | p := f.Page(pageNum) 82 | name, table := parsePage(pageNum, p) 83 | if name == "" { 84 | continue 85 | } 86 | if len(table) < 1 { 87 | if false { 88 | fmt.Fprintf(os.Stderr, "no encodings for instruction %q (page %d)\n", name, pageNum) 89 | } 90 | continue 91 | } 92 | for _, inst := range table { 93 | if numTable > 0 { 94 | fmt.Fprintf(stdout, ",") 95 | } 96 | numTable++ 97 | js, _ := json.Marshal(inst) 98 | fmt.Fprintf(stdout, "\n%s", jsFix.Replace(string(js))) 99 | } 100 | for j, headline := range instList { 101 | if name == headline { 102 | instList[j] = "" 103 | continue PageLoop 104 | } 105 | } 106 | fmt.Fprintf(os.Stderr, "unexpected instruction %q (page %d)\n", name, pageNum) 107 | } 108 | 109 | fmt.Fprintf(stdout, "\n]\n") 110 | stdout.Flush() 111 | 112 | if debugPage == 0 { 113 | for _, headline := range instList { 114 | if headline != "" { 115 | switch headline { 116 | default: 117 | fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline) 118 | case "CHKA": // ThumbEE 119 | case "CPS": // system instruction 120 | case "CPY": // synonym for MOV 121 | case "ENTERX": // ThumbEE 122 | case "F* (former VFP instruction mnemonics)": // synonyms 123 | case "HB, HBL, HBLP, HBP": // ThumbEE 124 | case "LEAVEX": // ThumbEE 125 | case "MOV (shifted register)": // pseudo instruction for ASR, LSL, LSR, ROR, and RRX 126 | case "NEG": // synonym for RSB 127 | case "RFE": // system instruction 128 | case "SMC (previously SMI)": // system instruction 129 | case "SRS": // system instruction 130 | case "SUBS PC, LR and related instructions": // system instruction 131 | case "VAND (immediate)": // pseudo instruction 132 | case "VCLE (register)": // pseudo instruction 133 | case "VCLT (register)": // pseudo instruction 134 | case "VORN (immediate)": // pseudo instruction 135 | } 136 | } 137 | } 138 | } 139 | } 140 | 141 | func instHeadings(outline pdf.Outline) []string { 142 | return appendInstHeadings(outline, nil) 143 | } 144 | 145 | var instRE = regexp.MustCompile(`A[\d.]+ Alphabetical list of instructions`) 146 | var childRE = regexp.MustCompile(`A[\d.]+ (.+)`) 147 | var sectionRE = regexp.MustCompile(`^A[\d.]+$`) 148 | var bitRE = regexp.MustCompile(`^( |[01]|\([01]\))*$`) 149 | 150 | func appendInstHeadings(outline pdf.Outline, list []string) []string { 151 | if instRE.MatchString(outline.Title) { 152 | for _, child := range outline.Child { 153 | m := childRE.FindStringSubmatch(child.Title) 154 | if m == nil { 155 | fmt.Fprintf(os.Stderr, "cannot parse section title: %s\n", child.Title) 156 | continue 157 | } 158 | list = append(list, m[1]) 159 | } 160 | } 161 | for _, child := range outline.Child { 162 | list = appendInstHeadings(child, list) 163 | } 164 | return list 165 | } 166 | 167 | const inch = 72.0 168 | 169 | func parsePage(num int, p pdf.Page) (name string, table []Inst) { 170 | content := p.Content() 171 | 172 | var text []pdf.Text 173 | for _, t := range content.Text { 174 | if match(t, "Times-Roman", 7.2, "") { 175 | t.FontSize = 9 176 | } 177 | if match(t, "Times-Roman", 6.72, "") && '0' <= t.S[0] && t.S[0] <= '9' { 178 | t.S = string([]rune("⁰¹²³⁴⁵⁶⁷⁸⁹")[t.S[0]-'0']) 179 | t.FontSize = 9 180 | t.Y -= 2.28 181 | } 182 | if t.Font == "Gen_Arial" { 183 | continue 184 | } 185 | text = append(text, t) 186 | } 187 | 188 | text = findWords(text) 189 | 190 | for i, t := range text { 191 | if t.Font == "Times" { 192 | t.Font = "Times-Roman" 193 | text[i] = t 194 | } 195 | } 196 | 197 | if debugPage > 0 { 198 | for _, t := range text { 199 | fmt.Println(t) 200 | } 201 | for _, r := range content.Rect { 202 | fmt.Println(r) 203 | } 204 | } 205 | 206 | // Remove text we should ignore. 207 | out := text[:0] 208 | skip := false 209 | for _, t := range text { 210 | // skip page footer 211 | if match(t, "Helvetica", 8, "A") || match(t, "Helvetica", 8, "ARM DDI") || match(t, "Helvetica-Oblique", 8, "Copyright") { 212 | continue 213 | } 214 | // skip section header and body text 215 | if match(t, "Helvetica-Bold", 12, "") && (sectionRE.MatchString(t.S) || t.S == "Alphabetical list of instructions") { 216 | skip = true 217 | continue 218 | } 219 | if skip && match(t, "Times-Roman", 9, "") { 220 | continue 221 | } 222 | skip = false 223 | out = append(out, t) 224 | } 225 | text = out 226 | 227 | // Page header must say Instruction Details. 228 | if len(text) == 0 || !match(text[0], "Helvetica-Oblique", 8, "Instruction Details") && !match(text[0], "Times-Roman", 9, "Instruction Details") { 229 | return "", nil 230 | } 231 | text = text[1:] 232 | 233 | isSection := func(text []pdf.Text, i int) int { 234 | if i+2 <= len(text) && match(text[i], "Helvetica-Bold", 10, "") && sectionRE.MatchString(text[i].S) && match(text[i+1], "Helvetica-Bold", 10, "") { 235 | return 2 236 | } 237 | if i+1 <= len(text) && match(text[i], "Helvetica-Bold", 10, "") && childRE.MatchString(text[i].S) { 238 | return 1 239 | } 240 | return 0 241 | } 242 | 243 | // Skip dummy headlines and sections. 244 | for d := isSection(text, 0); d != 0; d = isSection(text, 0) { 245 | i := d 246 | for i < len(text) && !match(text[i], "Helvetica-Bold", 9, "Encoding") && !match(text[i], "Helvetica-Bold", 10, "") { 247 | i++ 248 | } 249 | if isSection(text, i) == 0 { 250 | break 251 | } 252 | text = text[i:] 253 | } 254 | 255 | // Next line is headline. Can wrap to multiple lines. 256 | d := isSection(text, 0) 257 | if d == 0 { 258 | if debugPage > 0 { 259 | fmt.Printf("non-inst-headline: %v\n", text[0]) 260 | } 261 | checkNoEncodings(num, text) 262 | return "", nil 263 | } 264 | if d == 2 { 265 | name = text[1].S 266 | text = text[2:] 267 | } else if d == 1 { 268 | m := childRE.FindStringSubmatch(text[0].S) 269 | name = m[1] 270 | text = text[1:] 271 | } 272 | for len(text) > 0 && match(text[0], "Helvetica-Bold", 10, "") { 273 | name += " " + text[0].S 274 | text = text[1:] 275 | } 276 | 277 | // Skip description. 278 | for len(text) > 0 && (match(text[0], "Times-Roman", 9, "") || match(text[0], "LucidaSansTypewriteX", 6.48, "") || match(text[0], "Times-Bold", 10, "Note")) { 279 | text = text[1:] 280 | } 281 | 282 | // Encodings follow. 283 | warned := false 284 | for i := 0; i < len(text); { 285 | if match(text[i], "Helvetica-Bold", 10, "Assembler syntax") || 286 | match(text[i], "Helvetica-Bold", 9, "Modified operation in ThumbEE") || 287 | match(text[i], "Helvetica-Bold", 9, "Unallocated memory hints") || 288 | match(text[i], "Helvetica-Bold", 9, "Related encodings") || 289 | match(text[i], "Times-Roman", 9, "Figure A") || 290 | match(text[i], "Helvetica-Bold", 9, "Table A") || 291 | match(text[i], "Helvetica-Bold", 9, "VFP Instructions") || 292 | match(text[i], "Helvetica-Bold", 9, "VFP instructions") || 293 | match(text[i], "Helvetica-Bold", 9, "VFP vectors") || 294 | match(text[i], "Helvetica-Bold", 9, "FLDMX") || 295 | match(text[i], "Helvetica-Bold", 9, "FSTMX") || 296 | match(text[i], "Helvetica-Bold", 9, "Advanced SIMD and VFP") { 297 | checkNoEncodings(num, text[i:]) 298 | break 299 | } 300 | if match(text[i], "Helvetica-Bold", 9, "Figure A") { 301 | y := text[i].Y 302 | i++ 303 | for i < len(text) && math.Abs(text[i].Y-y) < 2 { 304 | i++ 305 | } 306 | continue 307 | } 308 | if !match(text[i], "Helvetica-Bold", 9, "Encoding") { 309 | if !warned { 310 | warned = true 311 | fmt.Fprintln(os.Stderr, "page", num, ": unexpected:", text[i]) 312 | } 313 | i++ 314 | continue 315 | } 316 | inst := Inst{ 317 | Name: name, 318 | } 319 | enc := text[i].S 320 | x := text[i].X 321 | i++ 322 | // Possible subarchitecture notes. 323 | for i < len(text) && text[i].X > x+36 { 324 | if inst.Arch != "" { 325 | inst.Arch += " " 326 | } 327 | inst.Arch += text[i].S 328 | i++ 329 | } 330 | // Encoding syntaxes. 331 | for i < len(text) && (match(text[i], "LucidaSansTypewriteX", 6.48, "") || text[i].X > x+36) { 332 | if text[i].X < x+0.25*inch { 333 | inst.Syntax = append(inst.Syntax, text[i].S) 334 | } else { 335 | s := inst.Syntax[len(inst.Syntax)-1] 336 | if !strings.Contains(s, "\t") { 337 | s += "\t" 338 | } else { 339 | s += " " 340 | } 341 | s += text[i].S 342 | inst.Syntax[len(inst.Syntax)-1] = s 343 | } 344 | i++ 345 | } 346 | 347 | var bits, abits, aenc string 348 | bits, i = readBitBox(inst.Name, inst.Syntax, content, text, i) 349 | if strings.Contains(enc, " / ") { 350 | if i < len(text) && match(text[i], "Times-Roman", 8, "") { 351 | abits, i = readBitBox(inst.Name, inst.Syntax, content, text, i) 352 | } else { 353 | abits = bits 354 | } 355 | slash := strings.Index(enc, " / ") 356 | aenc = "Encoding " + enc[slash+len(" / "):] 357 | enc = enc[:slash] 358 | } 359 | 360 | // pseudocode 361 | y0 := -1 * inch 362 | tab := 0.0 363 | for i < len(text) && match(text[i], "LucidaSansTypewriteX", 6.48, "") { 364 | t := text[i] 365 | i++ 366 | if math.Abs(t.Y-y0) < 3 { 367 | // same line as last fragment, probably just two spaces 368 | inst.Code += " " + t.S 369 | continue 370 | } 371 | if inst.Code != "" { 372 | inst.Code += "\n" 373 | } 374 | if t.X > x+0.1*inch { 375 | if tab == 0 { 376 | tab = t.X - x 377 | } 378 | inst.Code += strings.Repeat("\t", int((t.X-x)/tab+0.5)) 379 | } else { 380 | tab = 0 381 | } 382 | inst.Code += t.S 383 | y0 = t.Y 384 | } 385 | 386 | inst.ID = strings.TrimPrefix(enc, "Encoding ") 387 | inst.Bits = bits 388 | table = append(table, inst) 389 | if abits != "" { 390 | inst.ID = strings.TrimPrefix(aenc, "Encoding ") 391 | inst.Bits = abits 392 | table = append(table, inst) 393 | } 394 | 395 | } 396 | return name, table 397 | } 398 | 399 | func readBitBox(name string, syntax []string, content pdf.Content, text []pdf.Text, i int) (string, int) { 400 | // bit headings 401 | y2 := 0.0 402 | x1 := 0.0 403 | x2 := 0.0 404 | for i < len(text) && match(text[i], "Times-Roman", 8, "") { 405 | if y2 == 0 { 406 | y2 = text[i].Y 407 | } 408 | if x1 == 0 { 409 | x1 = text[i].X 410 | } 411 | i++ 412 | } 413 | // bit fields in box 414 | y1 := 0.0 415 | dy1 := 0.0 416 | for i < len(text) && match(text[i], "Times-Roman", 9, "") { 417 | if x2 < text[i].X+text[i].W { 418 | x2 = text[i].X + text[i].W 419 | } 420 | y1 = text[i].Y 421 | dy1 = text[i].FontSize 422 | i++ 423 | } 424 | 425 | if debugPage > 0 { 426 | fmt.Println("encoding box", x1, y1, x2, y2) 427 | } 428 | 429 | // Find lines (thin rectangles) separating bit fields. 430 | var bottom, top pdf.Rect 431 | const ( 432 | yMargin = 0.25 * 72 433 | xMargin = 2 * 72 434 | ) 435 | for _, r := range content.Rect { 436 | if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin { 437 | if y1-yMargin < r.Min.Y && r.Min.Y < y1 { 438 | bottom = r 439 | } 440 | if y1+dy1 < r.Min.Y && r.Min.Y < y2 { 441 | top = r 442 | } 443 | } 444 | } 445 | 446 | if debugPage > 0 { 447 | fmt.Println("top", top, "bottom", bottom) 448 | } 449 | 450 | const ε = 0.1 * 72 451 | var bars []pdf.Rect 452 | for _, r := range content.Rect { 453 | if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε { 454 | bars = append(bars, r) 455 | } 456 | } 457 | sort.Sort(RectHorizontal(bars)) 458 | 459 | // There are 16-bit and 32-bit encodings. 460 | // In practice, they are about 2.65 and 5.3 inches wide, respectively. 461 | // Use 4 inches as a cutoff. 462 | nbit := 32 463 | dx := top.Max.X - top.Min.X 464 | if top.Max.X-top.Min.X < 4*72 { 465 | nbit = 16 466 | } 467 | 468 | total := 0 469 | var buf bytes.Buffer 470 | for i := 0; i < len(bars)-1; i++ { 471 | if i > 0 { 472 | fmt.Fprintf(&buf, "|") 473 | } 474 | var sub []pdf.Text 475 | x1, x2 := bars[i].Min.X, bars[i+1].Min.X 476 | for _, t := range content.Text { 477 | tx := t.X + t.W/2 478 | ty := t.Y + t.FontSize/2 479 | if x1 < tx && tx < x2 && y1 < ty && ty < y2 { 480 | sub = append(sub, t) 481 | } 482 | } 483 | var str []string 484 | for _, t := range findWords(sub) { 485 | str = append(str, t.S) 486 | } 487 | s := strings.Join(str, " ") 488 | s = strings.Replace(s, ")(", ") (", -1) 489 | n := len(strings.Fields(s)) 490 | b := int(float64(nbit)*(x2-x1)/dx + 0.5) 491 | if n == b { 492 | for j, f := range strings.Fields(s) { 493 | if j > 0 { 494 | fmt.Fprintf(&buf, "|") 495 | } 496 | fmt.Fprintf(&buf, "%s", f) 497 | } 498 | } else { 499 | if n != 1 { 500 | fmt.Fprintf(os.Stderr, "%s - %s - multi-field %d-bit encoding: %s\n", name, syntax, n, s) 501 | } 502 | fmt.Fprintf(&buf, "%s:%d", s, b) 503 | } 504 | total += b 505 | } 506 | 507 | if total != nbit || total == 0 { 508 | fmt.Fprintf(os.Stderr, "%s - %s - %d-bit encoding\n", name, syntax, total) 509 | } 510 | return buf.String(), i 511 | } 512 | 513 | type RectHorizontal []pdf.Rect 514 | 515 | func (x RectHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 516 | func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X } 517 | func (x RectHorizontal) Len() int { return len(x) } 518 | 519 | func checkNoEncodings(num int, text []pdf.Text) { 520 | for _, t := range text { 521 | if match(t, "Helvetica-Bold", 9, "Encoding") { 522 | fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S) 523 | } 524 | } 525 | } 526 | 527 | func match(t pdf.Text, font string, size float64, substr string) bool { 528 | return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr) 529 | } 530 | 531 | func findWords(chars []pdf.Text) (words []pdf.Text) { 532 | // Sort by Y coordinate and normalize. 533 | const nudge = 1 534 | sort.Sort(pdf.TextVertical(chars)) 535 | old := -100000.0 536 | for i, c := range chars { 537 | if c.Y != old && math.Abs(old-c.Y) < nudge { 538 | chars[i].Y = old 539 | } else { 540 | old = c.Y 541 | } 542 | } 543 | 544 | // Sort by Y coordinate, breaking ties with X. 545 | // This will bring letters in a single word together. 546 | sort.Sort(pdf.TextVertical(chars)) 547 | 548 | // Loop over chars. 549 | for i := 0; i < len(chars); { 550 | // Find all chars on line. 551 | j := i + 1 552 | for j < len(chars) && chars[j].Y == chars[i].Y { 553 | j++ 554 | } 555 | var end float64 556 | // Split line into words (really, phrases). 557 | for k := i; k < j; { 558 | ck := &chars[k] 559 | s := ck.S 560 | end = ck.X + ck.W 561 | charSpace := ck.FontSize / 6 562 | wordSpace := ck.FontSize * 2 / 3 563 | l := k + 1 564 | for l < j { 565 | // Grow word. 566 | cl := &chars[l] 567 | if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { 568 | s += cl.S 569 | end = cl.X + cl.W 570 | l++ 571 | continue 572 | } 573 | // Add space to phrase before next word. 574 | if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { 575 | s += " " + cl.S 576 | end = cl.X + cl.W 577 | l++ 578 | continue 579 | } 580 | break 581 | } 582 | f := ck.Font 583 | f = strings.TrimSuffix(f, ",Italic") 584 | f = strings.TrimSuffix(f, "-Italic") 585 | words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) 586 | k = l 587 | } 588 | i = j 589 | } 590 | 591 | return words 592 | } 593 | 594 | func sameFont(f1, f2 string) bool { 595 | f1 = strings.TrimSuffix(f1, ",Italic") 596 | f1 = strings.TrimSuffix(f1, "-Italic") 597 | f2 = strings.TrimSuffix(f1, ",Italic") 598 | f2 = strings.TrimSuffix(f1, "-Italic") 599 | return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman" 600 | } 601 | 602 | var jsFix = strings.NewReplacer( 603 | // `\u003c`, `<`, 604 | // `\u003e`, `>`, 605 | // `\u0026`, `&`, 606 | // `\u0009`, `\t`, 607 | ) 608 | 609 | func printTable(name string, table []Inst) { 610 | _ = strconv.Atoi 611 | } 612 | -------------------------------------------------------------------------------- /armspec/code.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "reflect" 11 | "strconv" 12 | "strings" 13 | ) 14 | 15 | var errSeeOther = fmt.Errorf("see other") 16 | var errStop = fmt.Errorf("stop") 17 | var errUndefined = fmt.Errorf("undefined") 18 | 19 | type StmtOp int 20 | 21 | const ( 22 | _ StmtOp = iota 23 | Assign // 1 24 | Return 25 | Undefined 26 | Unpredictable 27 | See 28 | ImplDefined 29 | SubarchDefined 30 | If 31 | Repeat 32 | While // 10 33 | For 34 | Case 35 | Enum 36 | Block 37 | StmtExpr 38 | Fndef 39 | Assert 40 | ) 41 | 42 | type Stmt struct { 43 | Op StmtOp 44 | X, Y, Z *Expr 45 | List []*Expr 46 | Text string 47 | Body *Stmt 48 | Else *Stmt 49 | When []*When 50 | ElseIf []*ElseIf 51 | Block []*Stmt 52 | Type *Type 53 | } 54 | 55 | type When struct { 56 | Cond []*Expr 57 | Body *Stmt 58 | } 59 | 60 | type ElseIf struct { 61 | Cond *Expr 62 | Body *Stmt 63 | } 64 | 65 | type ExprOp int 66 | 67 | const ( 68 | _ ExprOp = iota 69 | Blank // 1 70 | Const 71 | Name 72 | Decl 73 | Unknown 74 | Call 75 | ExprTuple 76 | Eq 77 | NotEq 78 | LtEq // 10 79 | Lt 80 | GtEq 81 | Gt 82 | BitIndex 83 | IfElse 84 | Not 85 | AndAnd 86 | OrOr 87 | Eor 88 | Colon // 20 89 | And 90 | Or 91 | Plus 92 | Minus 93 | Add 94 | Sub 95 | Mul 96 | Div 97 | BigDIV 98 | BigMOD // 30 99 | BigAND 100 | BigOR 101 | BigEOR 102 | TwoPow 103 | Lsh 104 | Rsh 105 | Index 106 | Dot 107 | ) 108 | 109 | type Expr struct { 110 | Op ExprOp 111 | Text string 112 | X, Y, Z *Expr 113 | List []*Expr 114 | Type *Type 115 | } 116 | 117 | type TypeOp int 118 | 119 | const ( 120 | _ TypeOp = iota 121 | BoolType 122 | BitType 123 | IntegerType 124 | NamedType 125 | TupleType 126 | ) 127 | 128 | type Type struct { 129 | Op TypeOp 130 | List []*Type 131 | N int 132 | NX *Expr 133 | Text string 134 | } 135 | 136 | type Exec struct { 137 | Vars map[string]Value 138 | } 139 | 140 | type Inconsistent struct { 141 | } 142 | 143 | func (Inconsistent) String() string { 144 | return "INCONSISTENT" 145 | } 146 | 147 | type Bits struct { 148 | N int 149 | Val uint32 150 | DontCare uint32 151 | } 152 | 153 | func (b Bits) String() string { 154 | var buf bytes.Buffer 155 | fmt.Fprintf(&buf, "`") 156 | for i := b.N - 1; i >= 0; i-- { 157 | if (b.DontCare>>uint(i))&1 != 0 { 158 | fmt.Fprintf(&buf, "x") 159 | } else if (b.Val>>uint(i))&1 != 0 { 160 | fmt.Fprintf(&buf, "1") 161 | } else { 162 | fmt.Fprintf(&buf, "0") 163 | } 164 | } 165 | fmt.Fprintf(&buf, "'") 166 | return buf.String() 167 | } 168 | 169 | func (b Bits) Eq(v Value) bool { 170 | c := v.(Bits) 171 | if b.N != c.N { 172 | panic(fmt.Errorf("compare of mismatched bit lengths %v and %v", b, c)) 173 | } 174 | return b.Val&^c.DontCare == c.Val 175 | } 176 | 177 | func (b Bits) Lt(v Value) bool { 178 | panic("less than on bits") 179 | } 180 | 181 | func (b Bits) Gt(v Value) bool { 182 | panic("greater than on bits") 183 | } 184 | 185 | func isValid(inst *Inst, w uint32) bool { 186 | var ctxt Exec 187 | ctxt.Vars = make(map[string]Value) 188 | off := 0 189 | for _, b := range strings.Split(inst.Bits, "|") { 190 | wid := 1 191 | if i := strings.Index(b, ":"); i >= 0 { 192 | wid, _ = strconv.Atoi(b[i+1:]) 193 | b = b[:i] 194 | } 195 | switch b { 196 | case "1", "(1)": 197 | if (w>>uint(31-off))&1 != 1 { 198 | return false 199 | } 200 | case "0", "(0)": 201 | if (w>>uint(31-off))&1 != 0 { 202 | return false 203 | } 204 | default: 205 | bits := Bits{N: wid, Val: (w >> uint(32-off-wid)) & (1< y.(Int) } 279 | 280 | func (x Int) Add(y Value) Value { return x + y.(Int) } 281 | func (x Int) Sub(y Value) Value { return x - y.(Int) } 282 | func (x Int) Mul(y Value) Value { return x * y.(Int) } 283 | 284 | func (x Int) Lsh(y Value) Value { return x << uint(y.(Int)) } 285 | func (x Int) Rsh(y Value) Value { return x >> uint(y.(Int)) } 286 | func (x Int) DIV(y Value) Value { return x / y.(Int) } 287 | 288 | func _UInt(_ *Exec, args []Value) (Value, error) { 289 | if len(args) != 1 { 290 | return nil, fmt.Errorf("UInt takes a single argument") 291 | } 292 | b, ok := args[0].(Bits) 293 | if !ok { 294 | return nil, fmt.Errorf("UInt takes a Bits, not %T", args[0]) 295 | } 296 | if b.N > 63 { 297 | return nil, fmt.Errorf("UInt cannot handle %d-bit Bits", b.N) 298 | } 299 | return Int(b.Val), nil 300 | } 301 | 302 | func _SInt(_ *Exec, args []Value) (Value, error) { 303 | if len(args) != 1 { 304 | return nil, fmt.Errorf("SInt takes a single argument") 305 | } 306 | b, ok := args[0].(Bits) 307 | if !ok { 308 | return nil, fmt.Errorf("SInt takes a Bits, not %T", args[0]) 309 | } 310 | if b.N > 64 { 311 | return nil, fmt.Errorf("SInt cannot handle %d-bit Bits", b.N) 312 | } 313 | return Int(int64(b.Val) << uint(64-b.N) >> uint(64-b.N)), nil 314 | } 315 | 316 | /* 317 | 318 | func (ctxt *Exec) Run(prog []*Stmt) error { 319 | for _, stmt := range prog { 320 | if err := ctxt.stmt(stmt); err != nil { 321 | return err 322 | } 323 | } 324 | return nil 325 | } 326 | 327 | */ 328 | 329 | func (ctxt *Exec) stmt(stmt *Stmt) error { 330 | switch stmt.Op { 331 | case If: 332 | v, err := toBool(ctxt.expr(stmt.X)) 333 | if err != nil { 334 | return err 335 | } 336 | if v { 337 | return ctxt.stmt(stmt.Body) 338 | } 339 | for _, elseif := range stmt.ElseIf { 340 | v, err := toBool(ctxt.expr(elseif.Cond)) 341 | if err != nil { 342 | return err 343 | } 344 | if v { 345 | return ctxt.stmt(elseif.Body) 346 | } 347 | } 348 | if stmt.Else == nil { 349 | return nil 350 | } 351 | return ctxt.stmt(stmt.Else) 352 | 353 | case Case: 354 | v, err := ctxt.expr(stmt.X) 355 | if err != nil { 356 | return err 357 | } 358 | vv, ok := v.(interface { 359 | Eq(Value) bool 360 | }) 361 | if !ok { 362 | return fmt.Errorf("use of uncomparable value %T(%v) in case statement", v, v) 363 | } 364 | for _, when := range stmt.When { 365 | for _, cond := range when.Cond { 366 | w, err := ctxt.expr(cond) 367 | if err != nil { 368 | return err 369 | } 370 | if reflect.TypeOf(v) != reflect.TypeOf(w) { 371 | return fmt.Errorf("mistyped comparison of %T(%v) and %T(%v) in case statement", v, v, w, w) 372 | } 373 | if vv.Eq(w) { 374 | return ctxt.stmt(when.Body) 375 | } 376 | } 377 | } 378 | if stmt.Else == nil { 379 | return nil 380 | } 381 | return ctxt.stmt(stmt.Else) 382 | 383 | case Block: 384 | for _, x := range stmt.Block { 385 | if err := ctxt.stmt(x); err != nil { 386 | return err 387 | } 388 | } 389 | return nil 390 | 391 | case See: 392 | return errSeeOther 393 | 394 | case Undefined: 395 | return errUndefined 396 | 397 | case Unpredictable, ImplDefined, SubarchDefined: 398 | return errStop 399 | 400 | case StmtExpr: 401 | _, err := ctxt.expr(stmt.X) 402 | return err 403 | 404 | case Assign: 405 | v, err := ctxt.expr(stmt.Y) 406 | if err != nil { 407 | return err 408 | } 409 | if stmt.X.Op == ExprTuple { 410 | vv, ok := v.(Tuple) 411 | if !ok { 412 | return fmt.Errorf("assignment of non-tuple %T to tuple", v) 413 | } 414 | if len(stmt.X.List) != len(vv) { 415 | return fmt.Errorf("%d = %d in tuple assignment", len(stmt.X.List), len(vv)) 416 | } 417 | for i, x := range stmt.X.List { 418 | if x.Op == Blank { 419 | continue 420 | } 421 | if x.Op != Name { 422 | return fmt.Errorf("cannot assign to expr op %d", x.Op) 423 | } 424 | if err := ctxt.Assign(x.Text, vv[i]); err != nil { 425 | return err 426 | } 427 | } 428 | return nil 429 | } 430 | x := stmt.X 431 | if x.Op != Name { 432 | return fmt.Errorf("cannot assign to expr op %d", x.Op) 433 | } 434 | return ctxt.Assign(x.Text, v) 435 | } 436 | return fmt.Errorf("unknown stmt op %d", stmt.Op) 437 | } 438 | 439 | func toBool(v Value, err error) (b Bool, xerr error) { 440 | if err != nil { 441 | return false, err 442 | } 443 | switch v := v.(type) { 444 | case Bool: 445 | return v, nil 446 | default: 447 | return false, fmt.Errorf("value of type %T used as bool", v) 448 | } 449 | } 450 | 451 | type Value interface { 452 | String() string 453 | } 454 | 455 | type Bool bool 456 | 457 | func (b Bool) Eq(v Value) bool { return b == v } 458 | 459 | func (b Bool) Not() Value { return !b } 460 | 461 | func (b Bool) String() string { 462 | if b { 463 | return "TRUE" 464 | } 465 | return "FALSE" 466 | } 467 | 468 | type Tuple []Value 469 | 470 | func (t Tuple) String() string { 471 | var buf bytes.Buffer 472 | fmt.Fprintf(&buf, "(") 473 | for i, v := range t { 474 | if i > 0 { 475 | fmt.Fprintf(&buf, ", ") 476 | } 477 | fmt.Fprintf(&buf, v.String()) 478 | } 479 | fmt.Fprintf(&buf, ")") 480 | return buf.String() 481 | } 482 | 483 | func (ctxt *Exec) expr(x *Expr) (v Value, err error) { 484 | switch x.Op { 485 | case Call: 486 | fn, err := ctxt.name(x.Text) 487 | if err != nil { 488 | return nil, err 489 | } 490 | var list []Value 491 | for _, y := range x.List { 492 | v, err := ctxt.expr(y) 493 | if err != nil { 494 | return nil, err 495 | } 496 | list = append(list, v) 497 | } 498 | return ctxt.call(x.Text, fn, list) 499 | 500 | case ExprTuple: 501 | var list []Value 502 | for _, y := range x.List { 503 | v, err := ctxt.expr(y) 504 | if err != nil { 505 | return nil, err 506 | } 507 | list = append(list, v) 508 | } 509 | if len(list) == 1 { 510 | return list[0], nil 511 | } 512 | return Tuple(list), nil 513 | 514 | case AndAnd: 515 | v, err := toBool(ctxt.expr(x.X)) 516 | if err != nil { 517 | return nil, err 518 | } 519 | if !v { 520 | return v, nil 521 | } 522 | return ctxt.expr(x.Y) 523 | 524 | case OrOr: 525 | v, err := toBool(ctxt.expr(x.X)) 526 | if err != nil { 527 | return nil, err 528 | } 529 | if v { 530 | return v, nil 531 | } 532 | return ctxt.expr(x.Y) 533 | 534 | case Colon: 535 | v, err := ctxt.expr(x.X) 536 | if err != nil { 537 | return nil, err 538 | } 539 | y, err := ctxt.expr(x.Y) 540 | if err != nil { 541 | return nil, err 542 | } 543 | xb, ok := v.(Bits) 544 | yb, ok2 := y.(Bits) 545 | if !ok || !ok2 { 546 | return nil, fmt.Errorf("colon operator requires bit strings") 547 | } 548 | b := xb 549 | b.N += yb.N 550 | b.Val <<= uint(yb.N) 551 | b.DontCare <<= yb.DontCare 552 | return b, nil 553 | 554 | case Name: 555 | return ctxt.name(x.Text) 556 | 557 | case Const: 558 | if (strings.HasPrefix(x.Text, "‘") || strings.HasPrefix(x.Text, "’")) && strings.HasSuffix(x.Text, "’") { 559 | text := x.Text[len("‘") : len(x.Text)-len("’")] 560 | var b Bits 561 | b.N = len(text) 562 | for _, c := range text { 563 | b.Val <<= 1 564 | b.DontCare <<= 1 565 | if c == '1' { 566 | b.Val |= 1 567 | } 568 | if c == 'x' { 569 | b.DontCare |= 1 570 | } 571 | } 572 | return b, nil 573 | } 574 | n, err := strconv.Atoi(x.Text) 575 | if err == nil { 576 | return Int(n), nil 577 | } 578 | println("const", x.Text) 579 | 580 | case Not: 581 | l, err := ctxt.expr(x.X) 582 | if err != nil { 583 | return nil, err 584 | } 585 | switch x.Op { 586 | case Not: 587 | ll, ok := l.(interface { 588 | Not() Value 589 | }) 590 | if !ok { 591 | return nil, fmt.Errorf("type %T does not support !", l) 592 | } 593 | return ll.Not(), nil 594 | } 595 | 596 | case Eq, NotEq, Lt, LtEq, Gt, GtEq, Add, Sub, Mul, Lsh, Rsh, BigDIV: 597 | l, err := ctxt.expr(x.X) 598 | if err != nil { 599 | return nil, err 600 | } 601 | r, err := ctxt.expr(x.Y) 602 | if err != nil { 603 | return nil, err 604 | } 605 | tl := reflect.TypeOf(l) 606 | tr := reflect.TypeOf(r) 607 | if tl != tr { 608 | return nil, fmt.Errorf("arithmetic (expr op %d) of %T(%v) with %T(%v)", x.Op, l, l, r, r) 609 | } 610 | switch x.Op { 611 | case Eq: 612 | ll, ok := l.(interface { 613 | Eq(Value) bool 614 | }) 615 | if !ok { 616 | return nil, fmt.Errorf("type %T does not support ==", l) 617 | } 618 | return Bool(ll.Eq(r)), nil 619 | case NotEq: 620 | ll, ok := l.(interface { 621 | Eq(Value) bool 622 | }) 623 | if !ok { 624 | return nil, fmt.Errorf("type %T does not support !=", l) 625 | } 626 | return Bool(!ll.Eq(r)), nil 627 | case Lt: 628 | ll, ok := l.(interface { 629 | Lt(Value) bool 630 | }) 631 | if !ok { 632 | return nil, fmt.Errorf("type %T does not support <", l) 633 | } 634 | return Bool(ll.Lt(r)), nil 635 | case GtEq: 636 | ll, ok := l.(interface { 637 | Lt(Value) bool 638 | }) 639 | if !ok { 640 | return nil, fmt.Errorf("type %T does not support >=", l) 641 | } 642 | return Bool(!ll.Lt(r)), nil 643 | case Gt: 644 | ll, ok := l.(interface { 645 | Gt(Value) bool 646 | }) 647 | if !ok { 648 | return nil, fmt.Errorf("type %T does not support >", l) 649 | } 650 | return Bool(ll.Gt(r)), nil 651 | case LtEq: 652 | ll, ok := l.(interface { 653 | Gt(Value) bool 654 | }) 655 | if !ok { 656 | return nil, fmt.Errorf("type %T does not support <=", l) 657 | } 658 | return Bool(!ll.Gt(r)), nil 659 | case Add: 660 | ll, ok := l.(interface { 661 | Add(Value) Value 662 | }) 663 | if !ok { 664 | return nil, fmt.Errorf("type %T does not support +", l) 665 | } 666 | return ll.Add(r), nil 667 | case Sub: 668 | ll, ok := l.(interface { 669 | Sub(Value) Value 670 | }) 671 | if !ok { 672 | return nil, fmt.Errorf("type %T does not support -", l) 673 | } 674 | return ll.Sub(r), nil 675 | case Mul: 676 | ll, ok := l.(interface { 677 | Mul(Value) Value 678 | }) 679 | if !ok { 680 | return nil, fmt.Errorf("type %T does not support *", l) 681 | } 682 | return ll.Mul(r), nil 683 | case Lsh: 684 | ll, ok := l.(interface { 685 | Lsh(Value) Value 686 | }) 687 | if !ok { 688 | return nil, fmt.Errorf("type %T does not support <<", l) 689 | } 690 | return ll.Lsh(r), nil 691 | case Rsh: 692 | ll, ok := l.(interface { 693 | Rsh(Value) Value 694 | }) 695 | if !ok { 696 | return nil, fmt.Errorf("type %T does not support >>", l) 697 | } 698 | return ll.Rsh(r), nil 699 | case BigDIV: 700 | ll, ok := l.(interface { 701 | DIV(Value) Value 702 | }) 703 | if !ok { 704 | return nil, fmt.Errorf("type %T does not support DIV", l) 705 | } 706 | return ll.DIV(r), nil 707 | } 708 | 709 | case BitIndex: 710 | l, err := ctxt.expr(x.X) 711 | if err != nil { 712 | return nil, err 713 | } 714 | b, ok := l.(Bits) 715 | if !ok { 716 | return nil, fmt.Errorf("bit index operator requires bitstring, not %T(%v)", l, l) 717 | } 718 | out := Bits{} 719 | for _, ix := range x.List { 720 | if ix.Op == Colon { 721 | r1, err := ctxt.expr(ix.X) 722 | if err != nil { 723 | return nil, err 724 | } 725 | r2, err := ctxt.expr(ix.Y) 726 | if err != nil { 727 | return nil, err 728 | } 729 | i1, ok := r1.(Int) 730 | i2, ok2 := r2.(Int) 731 | if !ok || !ok2 { 732 | return nil, fmt.Errorf("bit indexes must be int") 733 | } 734 | if i1 <= i2 { 735 | return nil, fmt.Errorf("inverted bit indexes %d:%d", i1, i2) 736 | } 737 | w := int(i1 + 1 - i2) 738 | out.N += w 739 | out.Val <<= uint(w) 740 | out.DontCare <<= uint(w) 741 | out.Val |= (b.Val >> uint(i2)) & (1<> uint(i2)) & (1<> uint(i)) & 1 756 | } 757 | } 758 | return out, nil 759 | 760 | case IfElse: 761 | v, err := toBool(ctxt.expr(x.X)) 762 | if err != nil { 763 | return nil, err 764 | } 765 | if v { 766 | return ctxt.expr(x.Y) 767 | } 768 | return ctxt.expr(x.Z) 769 | } 770 | return nil, fmt.Errorf("unknown expr op %d", x.Op) 771 | } 772 | 773 | type Func struct { 774 | Name string 775 | F func(*Exec, []Value) (Value, error) 776 | } 777 | 778 | func (f Func) String() string { 779 | return f.Name 780 | } 781 | 782 | func (ctxt *Exec) call(name string, fn Value, args []Value) (Value, error) { 783 | switch fn := fn.(type) { 784 | case Func: 785 | return fn.F(ctxt, args) 786 | } 787 | return nil, fmt.Errorf("cannot call %s of type %T", name, fn) 788 | } 789 | 790 | var global = map[string]Value{ 791 | "UInt": Func{"UInt", _UInt}, 792 | "DecodeImmShift": Func{"DecodeImmShift", _DecodeImmShift}, 793 | "ArchVersion": Func{"ArchVersion", _ArchVersion}, 794 | "ZeroExtend": Func{"ZeroExtend", _ZeroExtend}, 795 | "ARMExpandImm": Func{"ARMExpandImm", _ARMExpandImm}, 796 | "Zeros": Func{"Zeros", _Zeros}, 797 | "TRUE": Bool(true), 798 | "FALSE": Bool(false), 799 | "BitCount": Func{"BitCount", _BitCount}, 800 | "Consistent": Func{"Consistent", _Consistent}, 801 | } 802 | 803 | func _Consistent(ctxt *Exec, args []Value) (Value, error) { 804 | if len(args) != 1 { 805 | return nil, fmt.Errorf("BitCount requires one argument") 806 | } 807 | _, inconsistent := args[0].(Inconsistent) 808 | return Bool(!inconsistent), nil 809 | } 810 | 811 | func _BitCount(ctxt *Exec, args []Value) (Value, error) { 812 | if len(args) != 1 { 813 | return nil, fmt.Errorf("BitCount requires one argument") 814 | } 815 | b, ok1 := args[0].(Bits) 816 | if !ok1 { 817 | return nil, fmt.Errorf("BitCount requires bitstring argument") 818 | } 819 | 820 | n := 0 821 | for i := 0; i < b.N; i++ { 822 | if b.Val&(1<> 8) & 0xF)) 871 | v = v>>rot | v<<(32-rot) 872 | return Bits{N: 32, Val: v}, nil 873 | } 874 | 875 | func _Zeros(ctxt *Exec, args []Value) (Value, error) { 876 | if len(args) != 1 { 877 | return nil, fmt.Errorf("Zeros requires one argument") 878 | } 879 | n, ok := args[0].(Int) 880 | if !ok { 881 | return nil, fmt.Errorf("Zeros requires int argument") 882 | } 883 | return Bits{N: int(n)}, nil 884 | } 885 | 886 | type Symbol string 887 | 888 | func (s Symbol) String() string { return string(s) } 889 | 890 | func (ctxt *Exec) name(name string) (v Value, err error) { 891 | v, ok := ctxt.Vars[name] 892 | if ok { 893 | return v, nil 894 | } 895 | v, ok = global[name] 896 | if ok { 897 | return v, nil 898 | } 899 | return Symbol(name), nil 900 | return nil, fmt.Errorf("unknown name %s", name) 901 | } 902 | 903 | /* 904 | func pseudoExec(base uint32, enc *Enc) { 905 | var ctxt Exec 906 | ctxt.Define("EncodingSpecificOperations", func(ctxt *Exec, args []Value) (Value, error) { 907 | return nil, ctxt.Run(enc.Prog) 908 | }) 909 | 910 | var n uint 911 | for _, f := range enc.Fields { 912 | switch f { 913 | case "0", "1", "(0)", "(1)": 914 | n++ 915 | default: 916 | wid := size[f] 917 | if wid == 0 { 918 | panic("missing width for " + f) 919 | } 920 | ctxt.Define(f, Bits{N: wid, Val: (base>>(31-n))&(1<= 0 { 164 | n, _ = strconv.Atoi(f[i+1:]) 165 | } 166 | off -= n 167 | fieldOffset[f] = off 168 | fieldWidth[f] = n 169 | if f == "(0)" || f == "(1)" { 170 | fuzzy |= 1 << uint(off) 171 | } 172 | } 173 | if off != 0 { 174 | fmt.Fprintf(os.Stderr, "%s: counted %d bits in %s\n", text, 32-off, encoding) 175 | } 176 | 177 | // Track which encoding fields we found uses for. 178 | // If we do not find a use for a field, that's an error in the input tables. 179 | fieldUsed := map[string]bool{} 180 | 181 | // Split text into opcode and arguments. 182 | var op, argstr string 183 | if i := strings.Index(text, " "); i >= 0 { 184 | op = text[:i] 185 | argstr = text[i:] 186 | } else { 187 | op = text 188 | } 189 | op = strings.TrimSpace(op) 190 | argstr = strings.TrimSpace(argstr) 191 | 192 | // Parse opcode suffixes. 193 | i := strings.Index(op, "<") 194 | if i < 0 { 195 | i = len(op) 196 | } 197 | if j := strings.Index(op, "{"); j >= 0 && j < i { 198 | i = j 199 | } 200 | op, suffix := op[:i], op[i:] 201 | if suffix != "" && opSuffix[suffix] == "" { 202 | fmt.Fprintf(os.Stderr, "%s: invalid op suffix %q in %s\n", text, suffix, op+suffix) 203 | } 204 | 205 | // Make sure fields needed by opcode suffix are available. 206 | for _, f := range strings.Split(opSuffix[suffix], ",") { 207 | if f != "" && fieldWidth[f] == 0 { 208 | fmt.Fprintf(os.Stderr, "%s: opsuffix %s missing %s in encoding %s\n", text, suffix, f, encoding) 209 | } 210 | fieldUsed[f] = true 211 | } 212 | 213 | // Build list of opcodes that can be generated by this suffix. 214 | // For example, the opcodes generated by ADD are ADD.EQ, ADD.NE, etc. 215 | // To simplify the decoding of instruction opcodes, we arrange that this 216 | // sequence aligns with the encoding, so that decoding amounts to extracting 217 | // the right bits, concatenating them, and adding them to the first opcode in 218 | // the sequence. If the condition code is present, we always place it in the 219 | // low order bits, so that x&^15 == FOO_EQ tests whether x is any of the 220 | // conditional FOO instructions. 221 | ops := []string{op} 222 | opBits := uint64(0) // record of bits to extract and add to opcode base 223 | opFields := strings.Split(opSuffix[suffix], ",") 224 | // First the optional elements, like {S} meaning "" or ".S". 225 | for strings.HasPrefix(suffix, "{") { 226 | i := strings.Index(suffix, "}") 227 | var f, option string 228 | option, suffix = suffix[1:i], suffix[i+1:] 229 | f, opFields = opFields[0], opFields[1:] 230 | if option == "W" { 231 | // The {W} option on PLD{W} uses the R bit which is !W. 232 | ops = cross(ops, "."+option, "") 233 | } else { 234 | ops = cross(ops, "", "."+option) 235 | } 236 | if fieldWidth[f] != 1 { 237 | fmt.Fprintf(os.Stderr, "%s: have %d bits for {%s}\n", text, fieldWidth[f], option) 238 | } 239 | // opBits is a sequence of 16-bit chunks describing contiguous bit sections. 240 | // Each chunk is 8-bit offset followed by 8-bit size. 241 | opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | 1 242 | } 243 | // Then the true field substitutions. 244 | haveCond := false 245 | for strings.Contains(suffix, "<") { 246 | var f, literal, x string 247 | if len(opFields) == 0 { 248 | fmt.Fprintf(os.Stderr, "%s: ran out of suffix fields for <%s>\n", text, x) 249 | break 250 | } 251 | f, opFields = opFields[0], opFields[1:] 252 | i := strings.Index(suffix, "<") 253 | j := strings.Index(suffix, ">") 254 | literal, x, suffix = suffix[:i], suffix[i+1:j], suffix[j+1:] 255 | 256 | // Add leading literal text to all opcodes. 257 | ops = cross(ops, literal) 258 | 259 | // The condition can happen anywhere in the opcode text 260 | // but we want to generate the actual variation in the low bits 261 | // of the list index. Remember when and where we've seen and apply 262 | // it after the loop has finished. 263 | if x == "c" && f == "cond:4" { 264 | haveCond = true 265 | ops = cross(ops, "_COND_") 266 | continue 267 | } 268 | 269 | // Otherwise, choices[x] lists the possible expansions of . 270 | // If is of the form the choices are A, B, and C. 271 | expand := choices[x] 272 | if expand == nil && strings.Contains(x, ",") { 273 | expand = strings.Split(x, ",") 274 | } 275 | if expand == nil { 276 | fmt.Fprintf(os.Stderr, "%s: unknown choices for <%s>\n", text, x) 277 | expand = []string{x} 278 | } else if len(expand) != 1< but %d bits\n", text, len(expand), x, fieldWidth[f]) 280 | } 281 | opBits = opBits<<16 | uint64(fieldOffset[f])<<8 | uint64(fieldWidth[f]) 282 | ops = cross(ops, expand...) 283 | } 284 | if haveCond { 285 | // Apply condtional suffix last. 286 | opBits = opBits<<16 | 28<<8 | 4 287 | ops = crossCond(ops) 288 | } 289 | ops = cross(ops, suffix) 290 | 291 | // Now ops is a list of opcodes generated by this opcode pattern. 292 | // We want to make sure that we can arrange for those opcodes to 293 | // happen consecutively in the final opcode numbering. 294 | // Record in p.OpRanges[op] the required consecutive sequence of 295 | // opcode that includes op. To make searches easier, we record 296 | // the sequence as a comma-separated list of strings with commas 297 | // on both ends: [A, B] encodes as ",A,B,". 298 | if p.OpRanges == nil { 299 | p.OpRanges = make(map[string]string) 300 | } 301 | opstr := "," + strings.Join(ops, ",") + "," 302 | for _, op := range ops { 303 | if old := p.OpRanges[op]; old != "" && old != opstr { 304 | if strings.Contains(old, opstr) { 305 | opstr = old 306 | } else if strings.Contains(opstr, old) { 307 | // great, do nothing 308 | } else { 309 | // It would also be okay if there is some subsequence s such that 310 | // old = x+s and opstr = s+y (or vice versa), in which case we should 311 | // record opstr = x+s+y. However, this has not come up in practice. 312 | // Failing that, we can't satisfy the sequencing requirements. 313 | fmt.Fprintf(os.Stderr, "%s: %s appears in both %s and %s\n", text, op, old, opstr) 314 | } 315 | } 316 | } 317 | for _, op := range strings.Split(opstr, ",") { 318 | if op != "" { 319 | p.OpRanges[op] = opstr 320 | } 321 | } 322 | 323 | // Process the arguments, building a list of argument descriptions. 324 | // Each argument description has the form |field@off|field@off... 325 | // where the |field@off suffixes give the name and location of the fields 326 | // needed by the argument. Each such string maps to a different decoding 327 | // type in the generated table, according to the argOps map. 328 | var args []string 329 | for argstr != "" { 330 | // Find longest match among argSuffixes pieces. 331 | best := 0 332 | for a := range argSuffixes { 333 | if argstr == a || strings.HasPrefix(argstr, a+",") { 334 | if best < len(a) { 335 | best = len(a) 336 | } 337 | } 338 | } 339 | if best == 0 { 340 | fmt.Fprintf(os.Stderr, "%s: unknown arg %s\n", text, argstr) 341 | break 342 | } 343 | 344 | var arg, desc string 345 | arg, argstr = argstr[:best], strings.TrimSpace(strings.TrimLeft(argstr[best:], ",")) 346 | desc = arg 347 | for _, f := range strings.Split(argSuffixes[desc], ",") { 348 | if f == "" { 349 | continue 350 | } 351 | if fieldWidth[f] == 0 { 352 | fmt.Fprintf(os.Stderr, "%s: arg %s missing %s in encoding %s\n", text, arg, f, encoding) 353 | } 354 | fieldUsed[f] = true 355 | desc += fmt.Sprintf("|%s@%d", f, fieldOffset[f]) 356 | } 357 | args = append(args, desc) 358 | } 359 | 360 | // Check that all encoding fields were used by suffix or argument decoding. 361 | for f := range fieldWidth { 362 | switch f { 363 | case "0", "1", "(0)", "(1)": 364 | // ok 365 | default: 366 | if !fieldUsed[f] { 367 | fmt.Fprintf(os.Stderr, "%s: encoding field %s not used in %s\n", text, f, encoding) 368 | } 369 | } 370 | } 371 | 372 | // Determine decoding priority. Instructions that say 'SEE X' in the tag 373 | // are considered lower priority than ones that don't. In theory the 374 | // structure described by the SEE tags might be richer than that, but 375 | // in practice it only has those two levels. 376 | // We leave space for two more priorities according to whether the 377 | // fuzzy bits are set correctly. The full set of priorities then is: 378 | // 379 | // 4 - no SEE tag, fuzzy bits all match 380 | // 3 - no SEE tag, some fuzzy bits don't match 381 | // 2 - SEE tag, fuzzy bits all match 382 | // 1 - SEE tag, some fuzzy bits don't match 383 | // 384 | // You could argue for swapping the middle two levels but so far 385 | // it has not been an issue. 386 | pri := 4 387 | if strings.Contains(tags, "SEE") { 388 | pri = 2 389 | } 390 | 391 | inst := Inst{ 392 | Text: text, 393 | Encoding: encoding, 394 | Mask: uint32(mask), 395 | Value: uint32(value), 396 | Priority: pri, 397 | OpBase: ops[0], 398 | OpBits: opBits, 399 | Args: args, 400 | } 401 | p.Inst = append(p.Inst, inst) 402 | 403 | if fuzzy != 0 { 404 | inst.Mask &^= fuzzy 405 | inst.Priority-- 406 | p.Inst = append(p.Inst, inst) 407 | } 408 | } 409 | 410 | // opSuffix describes the encoding fields used to resolve a given opcode suffix. 411 | var opSuffix = map[string]string{ 412 | "": "op", 413 | "": "op:2", 414 | ".F<32,64>": "op,cond:4,sz", 415 | ".F<32,64>": "op,cond:4,sz", 416 | "": "tb,cond:4", 417 | ".8": "op", 418 | "": "cond:4", 419 | ".32": "cond:4", 420 | ".F<32,64>": "cond:4,sz", 421 | "": "N,M,cond:4", 422 | "": "M,cond:4", 423 | "{B}": "B,cond:4", 424 | "{E}.F<32,64>": "E,cond:4,sz", 425 | "{R}": "R,cond:4", 426 | ".F<32,64>.32": "cond:4,sz,op", 427 | ".32.F<32,64>": "op,cond:4,signed,sz", 428 | "{S}": "S,cond:4", 429 | "{W}": "R", 430 | "{X}": "M,cond:4", 431 | ".": "T,cond:4,op", 432 | ".": "cond:4,sz", 433 | ".FX<16,32>.F<32,64>": "cond:4,U,sx,sz", 434 | ".F<32,64>.FX<16,32>": "cond:4,sz,U,sx", 435 | } 436 | 437 | // choices[x] describes the choices for filling in "<"+x+">" in an opcode suffix. 438 | // Opcodes that end up containing ZZ take up a numeric sequence value but are 439 | // not exported in the package API. 440 | var choices = map[string][]string{ 441 | "c": {".EQ", ".NE", ".CS", ".CC", ".MI", ".PL", ".VS", ".VC", ".HI", ".LS", ".GE", ".LT", ".GT", ".LE", "", ".ZZ"}, 442 | "x": {"B", "T"}, 443 | "y": {"B", "T"}, 444 | } 445 | 446 | // argOps maps from argument descriptions to internal decoder name. 447 | var argOps = map[string]string{ 448 | // 4-bit register encodings 449 | "|Rm:4@0": "arg_R_0", 450 | "|Rn:4@0": "arg_R_0", 451 | "|Rt:4@0": "arg_R_0", 452 | "|Rm:4@8": "arg_R_8", 453 | "|Ra:4@12": "arg_R_12", 454 | "|Rd:4@12": "arg_R_12", 455 | "|RdLo:4@12": "arg_R_12", 456 | "|Rt:4@12": "arg_R_12", 457 | "|Rt:4@12": "arg_R_12_nzcv", 458 | "|Rd:4@16": "arg_R_16", 459 | "|RdHi:4@16": "arg_R_16", 460 | "|Rn:4@16": "arg_R_16", 461 | 462 | // first and second of consecutive register pair 463 | "|Rt:4@0": "arg_R1_0", 464 | "|Rt:4@12": "arg_R1_12", 465 | "|Rt:4@0": "arg_R2_0", 466 | "|Rt:4@12": "arg_R2_12", 467 | 468 | // register arithmetic 469 | ", |Rm:4@0|Rs:4@8|type:2@5": "arg_R_shift_R", 470 | "{,}|Rm:4@0|imm5:5@7|type:2@5": "arg_R_shift_imm", 471 | "{,}|Rn:4@0|imm5:5@7|sh@6": "arg_R_shift_imm", 472 | "{,LSL #}|Rm:4@0|imm5:5@7": "arg_R_shift_imm", 473 | "{,}|Rm:4@0|rotate:2@10": "arg_R_rotate", 474 | 475 | // memory references 476 | "{!}|Rn:4@16|W@21": "arg_R_16_WB", 477 | "[]|Rn:4@16": "arg_mem_R", 478 | "[,+/-{, }]{!}|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7|P@24|W@21": "arg_mem_R_pm_R_shift_imm_W", 479 | "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_W", 480 | "[] {,#+/-}|Rn:4@16|U@23|imm4H:4@8|imm4L:4@0": "arg_mem_R_pm_imm8_postindex", 481 | "[{,#+/-}]{!}|Rn:4@16|P@24|U@23|W@21|imm12:12@0": "arg_mem_R_pm_imm12_W", 482 | "[],#+/-|Rn:4@16|imm12:12@0|U@23": "arg_mem_R_pm_imm12_postindex", 483 | "[,#+/-]|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_offset", 484 | "[] {,#+/-}|Rn:4@16|U@23|imm12:12@0": "arg_mem_R_pm_imm12_postindex", 485 | "[], +/-|Rn:4@16|U@23|Rm:4@0": "arg_mem_R_pm_R_postindex", 486 | "[,+/-]{!}|Rn:4@16|U@23|Rm:4@0|P@24|W@21": "arg_mem_R_pm_R_W", 487 | "[],+/-{, }|Rn:4@16|Rm:4@0|imm5:5@7|type:2@5|U@23": "arg_mem_R_pm_R_shift_imm_postindex", 488 | "[,+/-{, }]|Rn:4@16|U@23|Rm:4@0|type:2@5|imm5:5@7": "arg_mem_R_pm_R_shift_imm_offset", 489 | "[{,#+/-}]|Rn:4@16|U@23|imm8:8@0": "arg_mem_R_pm_imm8at0_offset", 490 | 491 | // pc-relative constants 492 | "|imm12:12@0": "arg_label_p_12", 493 | "|imm12:12@0": "arg_label_m_12", 494 | "|imm12:12@0|U@23": "arg_label_pm_12", 495 | "|imm4H:4@8|imm4L:4@0|U@23": "arg_label_pm_4_4", 496 | 497 | // constants 498 | "#|imm12:12@0": "arg_const", 499 | "#|imm5:5@7": "arg_imm5", 500 | "#|imm5:5@7": "arg_imm5_nz", 501 | "#|imm5:5@7": "arg_imm5_32", 502 | "|imm24:24@0": "arg_label24", 503 | "#|lsb:5@7": "arg_imm5", 504 | "#|lsb:5@7|msb:5@16": "arg_lsb_width", 505 | "#|imm12:12@8|imm4:4@0": "arg_imm_12at8_4at0", 506 | "#|imm12:12@0|imm4:4@16": "arg_imm_4at16_12at0", 507 | "|imm24:24@0|H@24": "arg_label24H", 508 | "#