├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── arch_armv7.cpp ├── arch_armv7.h ├── armv7_disasm ├── armv7.c ├── armv7.h ├── test.c └── test.py ├── il.cpp ├── il.h ├── test.py ├── test_lift.py └── thumb2_disasm ├── .gitignore ├── Makefile ├── Makefile-linux ├── README.md ├── arch_thumb2.cpp ├── arch_thumb2.h ├── arm_pcode_parser ├── Makefile ├── README.md ├── codegencpp.py ├── filter.py ├── parse.py └── pcode.ebnf ├── disassembler.cpp ├── disassembler.h ├── generator.py ├── il_thumb2.cpp ├── spec.cpp ├── spec.h ├── spec.txt ├── test.py └── update.sh /.gitignore: -------------------------------------------------------------------------------- 1 | CMakeFiles 2 | CMakeCache.txt 3 | cmake_install.cmake 4 | Makefile 5 | api 6 | build 7 | libarch_* 8 | arch_*.dll 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9 FATAL_ERROR) 2 | 3 | project(arch_armv7) 4 | 5 | if((NOT BN_API_PATH) AND (NOT BN_INTERNAL_BUILD)) 6 | set(BN_API_PATH $ENV{BN_API_PATH}) 7 | if(NOT BN_API_PATH) 8 | message(FATAL_ERROR "Provide path to Binary Ninja API source in BN_API_PATH") 9 | endif() 10 | endif() 11 | if(NOT BN_INTERNAL_BUILD) 12 | add_subdirectory(${BN_API_PATH} ${PROJECT_BINARY_DIR}/api) 13 | endif() 14 | 15 | file(GLOB SOURCES 16 | *.cpp 17 | *.h 18 | armv7_disasm/*.c 19 | armv7_disasm/*.h 20 | thumb2_disasm/*.cpp 21 | thumb2_disasm/*.h) 22 | 23 | if(DEMO) 24 | add_library(arch_armv7 STATIC ${SOURCES}) 25 | else() 26 | add_library(arch_armv7 SHARED ${SOURCES}) 27 | endif() 28 | 29 | target_include_directories(arch_armv7 30 | PRIVATE ${PROJECT_SOURCE_DIR} 31 | PRIVATE ${PROJECT_SOURCE_DIR}/armv7_disasm 32 | PRIVATE ${PROJECT_SOURCE_DIR}/thumb2_disasm) 33 | 34 | target_link_libraries(arch_armv7 binaryninjaapi) 35 | 36 | set_target_properties(arch_armv7 PROPERTIES 37 | CXX_STANDARD 17 38 | CXX_VISIBILITY_PRESET hidden 39 | CXX_STANDARD_REQUIRED ON 40 | C_STANDARD 99 41 | C_STANDARD_REQUIRED ON 42 | C_VISIBILITY_PRESET hidden 43 | VISIBILITY_INLINES_HIDDEN ON 44 | POSITION_INDEPENDENT_CODE ON) 45 | 46 | if(BN_INTERNAL_BUILD) 47 | plugin_rpath(arch_armv7) 48 | set_target_properties(arch_armv7 PROPERTIES 49 | LIBRARY_OUTPUT_DIRECTORY ${BN_CORE_PLUGIN_DIR} 50 | RUNTIME_OUTPUT_DIRECTORY ${BN_CORE_PLUGIN_DIR}) 51 | endif() 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020-2024 Vector 35 Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This module has moved 2 | 3 | This module has been moved to the [Binary Ninja API](https://github.com/Vector35/binaryninja-api/tree/dev/arch/armv7) repository. Please file issues and pull requests there. 4 | -------------------------------------------------------------------------------- /arch_armv7.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "binaryninjaapi.h" 4 | #include "armv7.h" 5 | 6 | #define BINARYNINJA_MANUAL_RELOCATION ((uint64_t)-2) 7 | 8 | class ArmCommonArchitecture: public BinaryNinja::Architecture 9 | { 10 | protected: 11 | BNEndianness m_endian; 12 | BinaryNinja::Ref m_armArch, m_thumbArch; 13 | 14 | virtual std::string GetAssemblerTriple() = 0; 15 | 16 | public: 17 | ArmCommonArchitecture(const char* name, BNEndianness endian); 18 | void SetArmAndThumbArchitectures(Architecture* arm, Architecture* thumb); 19 | 20 | virtual size_t GetAddressSize() const override; 21 | virtual BNEndianness GetEndianness() const override; 22 | virtual BinaryNinja::Ref GetAssociatedArchitectureByAddress(uint64_t& addr) override; 23 | virtual std::string GetFlagName(uint32_t flag) override; 24 | virtual std::string GetFlagWriteTypeName(uint32_t flags) override; 25 | virtual BNFlagRole GetFlagRole(uint32_t flag, uint32_t semClass = 0) override; 26 | virtual std::vector GetFlagsWrittenByFlagWriteType(uint32_t flags) override; 27 | virtual std::vector GetFlagsRequiredForFlagCondition(BNLowLevelILFlagCondition cond, uint32_t semClass) override; 28 | virtual size_t GetFlagWriteLowLevelIL(BNLowLevelILOperation op, size_t size, uint32_t flagWriteType, uint32_t flag, 29 | BNRegisterOrConstant* operands, size_t operandCount, BinaryNinja::LowLevelILFunction& il) override; 30 | virtual std::string GetRegisterName(uint32_t reg) override; 31 | virtual std::vector GetFullWidthRegisters() override; 32 | virtual std::vector GetAllRegisters() override; 33 | virtual std::vector GetAllFlags() override; 34 | virtual std::vector GetAllFlagWriteTypes() override; 35 | virtual BNRegisterInfo GetRegisterInfo(uint32_t reg) override; 36 | virtual uint32_t GetStackPointerRegister() override; 37 | virtual uint32_t GetLinkRegister() override; 38 | virtual bool CanAssemble() override; 39 | virtual bool Assemble(const std::string& code, uint64_t addr, BinaryNinja::DataBuffer& result, 40 | std::string& errors) override; 41 | }; 42 | 43 | ArmCommonArchitecture* InitThumb2Architecture(const char* name, BNEndianness endian); 44 | -------------------------------------------------------------------------------- /armv7_disasm/armv7.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #if defined(_MSC_VER) 8 | #define snprintf _snprintf 9 | #define restrict __restrict 10 | #define inline __inline 11 | #else 12 | #include 13 | #ifdef __cplusplus 14 | #define restrict __restrict 15 | #endif 16 | #endif 17 | 18 | #define MAX_OPERANDS 6 19 | 20 | #define UNCONDITIONAL(c) (((c) == COND_NONE) || ((c) == COND_NONE2)) 21 | #define CONDITIONAL(c) (((c) != COND_NONE) && ((c) != COND_NONE2)) 22 | 23 | #ifdef __cplusplus 24 | #define restrict __restrict 25 | 26 | namespace armv7 { 27 | #endif 28 | 29 | enum Operation { 30 | ARMV7_UNDEFINED, 31 | ARMV7_UNPREDICTABLE, 32 | ARMV7_ADC, 33 | ARMV7_ADCS, 34 | ARMV7_ADD, 35 | ARMV7_ADDS, 36 | ARMV7_ADDW, 37 | ARMV7_ADR, 38 | ARMV7_AND, 39 | ARMV7_ANDS, 40 | ARMV7_ASR, 41 | ARMV7_ASRS, 42 | ARMV7_B, 43 | ARMV7_BFC, 44 | ARMV7_BFI, 45 | ARMV7_BIC, 46 | ARMV7_BICS, 47 | ARMV7_BKPT, 48 | ARMV7_BL, 49 | ARMV7_BLX, 50 | ARMV7_BX, 51 | ARMV7_BXJ, 52 | ARMV7_CBNZ, 53 | ARMV7_CBZ, 54 | ARMV7_CDP, 55 | ARMV7_CDP2, 56 | ARMV7_CLREX, 57 | ARMV7_CLZ, 58 | ARMV7_CMN, 59 | ARMV7_CMP, 60 | ARMV7_CPS, 61 | ARMV7_CPSID, 62 | ARMV7_CPSIE, 63 | ARMV7_DBG, 64 | ARMV7_DMB, 65 | ARMV7_DSB, 66 | ARMV7_ENTERX, 67 | ARMV7_EOR, 68 | ARMV7_EORS, 69 | ARMV7_ERET, 70 | ARMV7_FLDMDBX, 71 | ARMV7_FLDMIAX, 72 | ARMV7_FSTMDBX, 73 | ARMV7_FSTMIAX, 74 | ARMV7_FSTMX, 75 | ARMV7_HINT, 76 | ARMV7_HVC, 77 | ARMV7_ISB, 78 | ARMV7_IT, 79 | ARMV7_LDA, 80 | ARMV7_LDAB, 81 | ARMV7_LDAH, 82 | ARMV7_LDAEX, // A32 83 | ARMV7_LDAEXB, // A32 84 | ARMV7_LDAEXH, // A32 85 | ARMV7_LDAEXD, // A32 86 | ARMV7_LDC, 87 | ARMV7_LDC2, 88 | ARMV7_LDC2L, 89 | ARMV7_LDCL, 90 | ARMV7_LDM, 91 | ARMV7_LDMDA, 92 | ARMV7_LDMDB, 93 | ARMV7_LDMIA, 94 | ARMV7_LDMIB, 95 | ARMV7_LDR, 96 | ARMV7_LDRB, 97 | ARMV7_LDRBT, 98 | ARMV7_LDRD, 99 | ARMV7_LDREX, 100 | ARMV7_LDREXB, 101 | ARMV7_LDREXD, 102 | ARMV7_LDREXH, 103 | ARMV7_LDRH, 104 | ARMV7_LDRHT, 105 | ARMV7_LDRSB, 106 | ARMV7_LDRSBT, 107 | ARMV7_LDRSH, 108 | ARMV7_LDRSHT, 109 | ARMV7_LDRT, 110 | ARMV7_LEAVEX, 111 | ARMV7_LSL, 112 | ARMV7_LSLS, 113 | ARMV7_LSR, 114 | ARMV7_LSRS, 115 | ARMV7_MCR, 116 | ARMV7_MCR2, 117 | ARMV7_MCRR, 118 | ARMV7_MCRR2, 119 | ARMV7_MLA, 120 | ARMV7_MLS, 121 | ARMV7_MOV, 122 | ARMV7_MOVS, 123 | ARMV7_MOVT, 124 | ARMV7_MOVW, 125 | ARMV7_MRC, 126 | ARMV7_MRC2, 127 | ARMV7_MRRC, 128 | ARMV7_MRRC2, 129 | ARMV7_MRS, 130 | ARMV7_MSR, 131 | ARMV7_MUL, 132 | ARMV7_MULS, 133 | ARMV7_MVN, 134 | ARMV7_MVNS, 135 | ARMV7_NOP, 136 | ARMV7_ORN, 137 | ARMV7_ORR, 138 | ARMV7_ORRS, 139 | ARMV7_PKHBT, 140 | ARMV7_PKHTB, 141 | ARMV7_PLD, 142 | ARMV7_PLDW, 143 | ARMV7_PLI, 144 | ARMV7_POP, 145 | ARMV7_PUSH, 146 | ARMV7_QADD, 147 | ARMV7_QADD16, 148 | ARMV7_QADD8, 149 | ARMV7_QASX, 150 | ARMV7_QDADD, 151 | ARMV7_QDSUB, 152 | ARMV7_QSAX, 153 | ARMV7_QSUB, 154 | ARMV7_QSUB16, 155 | ARMV7_QSUB8, 156 | ARMV7_RBIT, 157 | ARMV7_REV, 158 | ARMV7_REV16, 159 | ARMV7_REVSH, 160 | ARMV7_RFE, 161 | ARMV7_RFEDA, 162 | ARMV7_RFEDB, 163 | ARMV7_RFEIA, 164 | ARMV7_RFEIB, 165 | ARMV7_ROR, 166 | ARMV7_RORS, 167 | ARMV7_RRX, 168 | ARMV7_RSB, 169 | ARMV7_RSBS, 170 | ARMV7_RSC, 171 | ARMV7_SADD16, 172 | ARMV7_SADD8, 173 | ARMV7_SASX, 174 | ARMV7_SBC, 175 | ARMV7_SBCS, 176 | ARMV7_SBFX, 177 | ARMV7_SDIV, 178 | ARMV7_SEL, 179 | ARMV7_SETEND, 180 | ARMV7_SEV, 181 | ARMV7_SHADD16, 182 | ARMV7_SHADD8, 183 | ARMV7_SHASX, 184 | ARMV7_SHSAX, 185 | ARMV7_SHSUB16, 186 | ARMV7_SHSUB8, 187 | ARMV7_SMC, 188 | ARMV7_SMLABB, 189 | ARMV7_SMLABT, 190 | ARMV7_SMLAD, 191 | ARMV7_SMLADX, 192 | ARMV7_SMLAL, 193 | ARMV7_SMLALBB, 194 | ARMV7_SMLALBT, 195 | ARMV7_SMLALD, 196 | ARMV7_SMLALDX, 197 | ARMV7_SMLALTB, 198 | ARMV7_SMLALTT, 199 | ARMV7_SMLATB, 200 | ARMV7_SMLATT, 201 | ARMV7_SMLAWB, 202 | ARMV7_SMLAWT, 203 | ARMV7_SMLSD, 204 | ARMV7_SMLSDX, 205 | ARMV7_SMLSLD, 206 | ARMV7_SMLSLDX, 207 | ARMV7_SMMLA, 208 | ARMV7_SMMLAR, 209 | ARMV7_SMMLS, 210 | ARMV7_SMMLSR, 211 | ARMV7_SMMUL, 212 | ARMV7_SMMULR, 213 | ARMV7_SMUAD, 214 | ARMV7_SMUADX, 215 | ARMV7_SMULBB, 216 | ARMV7_SMULBT, 217 | ARMV7_SMULL, 218 | ARMV7_SMULTB, 219 | ARMV7_SMULTT, 220 | ARMV7_SMULWB, 221 | ARMV7_SMULWT, 222 | ARMV7_SMUSD, 223 | ARMV7_SMUSDT, 224 | ARMV7_SMUSDX, 225 | ARMV7_SRS, 226 | ARMV7_SRSDA, 227 | ARMV7_SRSDB, 228 | ARMV7_SRSIA, 229 | ARMV7_SRSIB, 230 | ARMV7_SSAT, 231 | ARMV7_SSAT16, 232 | ARMV7_SSAX, 233 | ARMV7_SSUB16, 234 | ARMV7_SSUB8, 235 | ARMV7_STC, 236 | ARMV7_STC2, 237 | ARMV7_STC2L, 238 | ARMV7_STCL, 239 | ARMV7_STL, // A32 240 | ARMV7_STLB, 241 | ARMV7_STLH, 242 | ARMV7_STLEX, // A32 243 | ARMV7_STLEXB, // A32 244 | ARMV7_STLEXH, // A32 245 | ARMV7_STLEXD, // A32 246 | ARMV7_STM, 247 | ARMV7_STMBD, 248 | ARMV7_STMDA, 249 | ARMV7_STMDB, 250 | ARMV7_STMIA, 251 | ARMV7_STMIB, 252 | ARMV7_STR, 253 | ARMV7_STRB, 254 | ARMV7_STRBT, 255 | ARMV7_STRD, 256 | ARMV7_STREX, 257 | ARMV7_STREXB, 258 | ARMV7_STREXD, 259 | ARMV7_STREXH, 260 | ARMV7_STRH, 261 | ARMV7_STRHT, 262 | ARMV7_STRT, 263 | ARMV7_SUB, 264 | ARMV7_SUBS, 265 | ARMV7_SUBW, 266 | ARMV7_SVC, 267 | ARMV7_SWP, 268 | ARMV7_SWPB, 269 | ARMV7_SXTAB, 270 | ARMV7_SXTAB16, 271 | ARMV7_SXTAH, 272 | ARMV7_SXTB, 273 | ARMV7_SXTB16, 274 | ARMV7_SXTH, 275 | ARMV7_TBB, 276 | ARMV7_TBH, 277 | ARMV7_TEQ, 278 | ARMV7_TRAP, 279 | ARMV7_TRT, 280 | ARMV7_TST, 281 | ARMV7_UADD16, 282 | ARMV7_UADD8, 283 | ARMV7_UASX, 284 | ARMV7_UBFX, 285 | ARMV7_UDF, 286 | ARMV7_UDIV, 287 | ARMV7_UHADD16, 288 | ARMV7_UHADD8, 289 | ARMV7_UHASX, 290 | ARMV7_UHSAX, 291 | ARMV7_UHSUB16, 292 | ARMV7_UHSUB8, 293 | ARMV7_UMAAL, 294 | ARMV7_UMLAL, 295 | ARMV7_UMULL, 296 | ARMV7_UQADD16, 297 | ARMV7_UQADD8, 298 | ARMV7_UQASX, 299 | ARMV7_UQSAX, 300 | ARMV7_UQSUB16, 301 | ARMV7_UQSUB8, 302 | ARMV7_USAD8, 303 | ARMV7_USADA8, 304 | ARMV7_USAT, 305 | ARMV7_USAT16, 306 | ARMV7_USAX, 307 | ARMV7_USUB16, 308 | ARMV7_USUB8, 309 | ARMV7_UXTAB, 310 | ARMV7_UXTAB16, 311 | ARMV7_UXTAH, 312 | ARMV7_UXTB, 313 | ARMV7_UXTB16, 314 | ARMV7_UXTH, 315 | ARMV7_VABA, 316 | ARMV7_VABAL, 317 | ARMV7_VABD, 318 | ARMV7_VABDL, 319 | ARMV7_VABS, 320 | ARMV7_VACGE, 321 | ARMV7_VACGT, 322 | ARMV7_VADD, 323 | ARMV7_VADDHN, 324 | ARMV7_VADDL, 325 | ARMV7_VADDW, 326 | ARMV7_VAND, 327 | ARMV7_VBIC, 328 | ARMV7_VBIF, 329 | ARMV7_VBIT, 330 | ARMV7_VBSL, 331 | ARMV7_VCEQ, 332 | ARMV7_VCGE, 333 | ARMV7_VCGT, 334 | ARMV7_VCLE, 335 | ARMV7_VCLS, 336 | ARMV7_VCLT, 337 | ARMV7_VCLZ, 338 | ARMV7_VCMP, 339 | ARMV7_VCMPE, 340 | ARMV7_VCNT, 341 | ARMV7_VCVT, 342 | ARMV7_VCVTA, 343 | ARMV7_VCVTB, 344 | ARMV7_VCVTM, 345 | ARMV7_VCVTN, 346 | ARMV7_VCVTP, 347 | ARMV7_VCVTR, 348 | ARMV7_VCVTT, 349 | ARMV7_VDIV, 350 | ARMV7_VDUP, 351 | ARMV7_VEOR, 352 | ARMV7_VEXT, 353 | ARMV7_VFMA, 354 | ARMV7_VFMS, 355 | ARMV7_VFNMA, 356 | ARMV7_VFNMS, 357 | ARMV7_VHADD, 358 | ARMV7_VHSUB, 359 | ARMV7_VLD1, 360 | ARMV7_VLD2, 361 | ARMV7_VLD3, 362 | ARMV7_VLD4, 363 | ARMV7_VLDM, 364 | ARMV7_VLDMDB, 365 | ARMV7_VLDMIA, 366 | ARMV7_VLDR, 367 | ARMV7_VMAX, 368 | ARMV7_VMAXNM, 369 | ARMV7_VMIN, 370 | ARMV7_VMINM, 371 | ARMV7_VMLA, 372 | ARMV7_VMLAL, 373 | ARMV7_VMLS, 374 | ARMV7_VMLSL, 375 | ARMV7_VMOV, 376 | ARMV7_VMOVL, 377 | ARMV7_VMOVN, 378 | ARMV7_VMRS, 379 | ARMV7_VMSR, 380 | ARMV7_VMUL, 381 | ARMV7_VMULL, 382 | ARMV7_VMVN, 383 | ARMV7_VNEG, 384 | ARMV7_VNMLA, 385 | ARMV7_VNMLS, 386 | ARMV7_VNMUL, 387 | ARMV7_VORN, 388 | ARMV7_VORR, 389 | ARMV7_VPADAL, 390 | ARMV7_VPADD, 391 | ARMV7_VPADDL, 392 | ARMV7_VPMAX, 393 | ARMV7_VPMIN, 394 | ARMV7_VPOP, 395 | ARMV7_VPUSH, 396 | ARMV7_VQABS, 397 | ARMV7_VQADD, 398 | ARMV7_VQDMLAL, 399 | ARMV7_VQDMLSL, 400 | ARMV7_VQDMULH, 401 | ARMV7_VQDMULL, 402 | ARMV7_VQMOVN, 403 | ARMV7_VQMOVUN, 404 | ARMV7_VQNEG, 405 | ARMV7_VQRDMULH, 406 | ARMV7_VQRSHL, 407 | ARMV7_VQRSHRN, 408 | ARMV7_VQRSHRUN, 409 | ARMV7_VQSHL, 410 | ARMV7_VQSHLU, 411 | ARMV7_VQSHRN, 412 | ARMV7_VQSHRUN, 413 | ARMV7_VQSUB, 414 | ARMV7_VRADDHN, 415 | ARMV7_VRECPE, 416 | ARMV7_VRECPS, 417 | ARMV7_VREV16, 418 | ARMV7_VREV32, 419 | ARMV7_VREV64, 420 | ARMV7_VRHADD, 421 | ARMV7_VRHSUB, 422 | ARMV7_VRINTA, 423 | ARMV7_VRINTM, 424 | ARMV7_VRINTN, 425 | ARMV7_VRINTP, 426 | ARMV7_VRINTR, 427 | ARMV7_VRINTX, 428 | ARMV7_VRINTZ, 429 | ARMV7_VRSHL, 430 | ARMV7_VRSHR, 431 | ARMV7_VRSHRN, 432 | ARMV7_VRSQRTE, 433 | ARMV7_VRSQRTS, 434 | ARMV7_VRSRA, 435 | ARMV7_VRSUBHN, 436 | ARMV7_VSEL, 437 | ARMV7_VSHL, 438 | ARMV7_VSHLL, 439 | ARMV7_VSHR, 440 | ARMV7_VSHRN, 441 | ARMV7_VSLI, 442 | ARMV7_VSQRT, 443 | ARMV7_VSRA, 444 | ARMV7_VSRI, 445 | ARMV7_VST1, 446 | ARMV7_VST2, 447 | ARMV7_VST3, 448 | ARMV7_VST4, 449 | ARMV7_VSTM, 450 | ARMV7_VSTMDB, 451 | ARMV7_VSTMIA, 452 | ARMV7_VSTR, 453 | ARMV7_VSUB, 454 | ARMV7_VSUBHN, 455 | ARMV7_VSUBL, 456 | ARMV7_VSUBW, 457 | ARMV7_VSWP, 458 | ARMV7_VTBL, 459 | ARMV7_VTBX, 460 | ARMV7_VTRN, 461 | ARMV7_VTST, 462 | ARMV7_VUZP, 463 | ARMV7_VZIP, 464 | ARMV7_WFE, 465 | ARMV7_WFI, 466 | ARMV7_YIELD, 467 | ARMV7_END_INSTRUCTION 468 | }; 469 | 470 | enum Shift { 471 | SHIFT_NONE, 472 | SHIFT_LSL, 473 | SHIFT_LSR, 474 | SHIFT_ASR, 475 | SHIFT_ROR, 476 | SHIFT_RRX, 477 | SHIFT_END 478 | }; 479 | 480 | enum Condition { 481 | COND_EQ, 482 | COND_NE, 483 | COND_CS, 484 | COND_CC, 485 | COND_MI, 486 | COND_PL, 487 | COND_VS, 488 | COND_VC, 489 | COND_HI, 490 | COND_LS, 491 | COND_GE, 492 | COND_LT, 493 | COND_GT, 494 | COND_LE, 495 | COND_NONE, 496 | COND_NONE2, 497 | COND_END 498 | }; 499 | 500 | 501 | enum RegisterList { 502 | REG_LIST_R0 = 0x0001, 503 | REG_LIST_R1 = 0x0002, 504 | REG_LIST_R2 = 0x0004, 505 | REG_LIST_R3 = 0x0008, 506 | REG_LIST_R4 = 0x0010, 507 | REG_LIST_R5 = 0x0020, 508 | REG_LIST_R6 = 0x0040, 509 | REG_LIST_R7 = 0x0080, 510 | REG_LIST_R8 = 0x0100, 511 | REG_LIST_SB = 0x0200, 512 | REG_LIST_SL = 0x0400, 513 | REG_LIST_FP = 0x0800, 514 | REG_LIST_IP = 0x1000, 515 | REG_LIST_SP = 0x2000, 516 | REG_LIST_LR = 0x4000, 517 | REG_LIST_PC = 0x8000, 518 | }; 519 | 520 | enum Register 521 | { 522 | REG_R0 = 0, 523 | REG_R1, 524 | REG_R2, 525 | REG_R3, 526 | REG_R4, 527 | REG_R5, 528 | REG_R6, 529 | REG_R7, 530 | REG_R8, 531 | REG_R9, 532 | REG_R10, 533 | REG_R11, 534 | REG_R12, 535 | REG_SP, REG_R13 = 13, 536 | REG_LR, REG_R14 = 14, 537 | REG_PC, REG_R15 = 15, 538 | REG_S0, 539 | REG_S1, 540 | REG_S2, 541 | REG_S3, 542 | REG_S4, 543 | REG_S5, 544 | REG_S6, 545 | REG_S7, 546 | REG_S8, 547 | REG_S9, 548 | REG_S10, 549 | REG_S11, 550 | REG_S12, 551 | REG_S13, 552 | REG_S14, 553 | REG_S15, 554 | REG_S16, 555 | REG_S17, 556 | REG_S18, 557 | REG_S19, 558 | REG_S20, 559 | REG_S21, 560 | REG_S22, 561 | REG_S23, 562 | REG_S24, 563 | REG_S25, 564 | REG_S26, 565 | REG_S27, 566 | REG_S28, 567 | REG_S29, 568 | REG_S30, 569 | REG_S31, 570 | REG_D0, 571 | REG_D1, 572 | REG_D2, 573 | REG_D3, 574 | REG_D4, 575 | REG_D5, 576 | REG_D6, 577 | REG_D7, 578 | REG_D8, 579 | REG_D9, 580 | REG_D10, 581 | REG_D11, 582 | REG_D12, 583 | REG_D13, 584 | REG_D14, 585 | REG_D15, 586 | REG_D16, 587 | REG_D17, 588 | REG_D18, 589 | REG_D19, 590 | REG_D20, 591 | REG_D21, 592 | REG_D22, 593 | REG_D23, 594 | REG_D24, 595 | REG_D25, 596 | REG_D26, 597 | REG_D27, 598 | REG_D28, 599 | REG_D29, 600 | REG_D30, 601 | REG_D31, 602 | REG_Q0, 603 | REG_Q1, 604 | REG_Q2, 605 | REG_Q3, 606 | REG_Q4, 607 | REG_Q5, 608 | REG_Q6, 609 | REG_Q7, 610 | REG_Q8, 611 | REG_Q9, 612 | REG_Q10, 613 | REG_Q11, 614 | REG_Q12, 615 | REG_Q13, 616 | REG_Q14, 617 | REG_Q15, 618 | 619 | /* banked registers */ 620 | REGB_ELR_HYP, 621 | REGB_LR_ABT, 622 | REGB_LR_FIQ, 623 | REGB_LR_IRQ, 624 | REGB_LR_MON, 625 | REGB_LR_SVC, 626 | REGB_LR_UND, 627 | REGB_LR_USR, 628 | REGB_R10_FIQ, 629 | REGB_R10_USR, 630 | REGB_R11_FIQ, 631 | REGB_R11_USR, 632 | REGB_R12_FIQ, 633 | REGB_R12_USR, 634 | REGB_R8_FIQ, 635 | REGB_R8_USR, 636 | REGB_R9_FIQ, 637 | REGB_R9_USR, 638 | REGB_SPSR_ABT, 639 | REGB_SPSR_FIQ, 640 | REGB_SPSR_HYP, 641 | REGB_SPSR_IRQ, 642 | REGB_SPSR_MON, 643 | REGB_SPSR_SVC, 644 | REGB_SPSR_UND, 645 | REGB_SP_ABT, 646 | REGB_SP_FIQ, 647 | REGB_SP_HYP, 648 | REGB_SP_IRQ, 649 | REGB_SP_MON, 650 | REGB_SP_SVC, 651 | REGB_SP_UND, 652 | REGB_SP_USR, 653 | 654 | /* special registers */ 655 | REGS_APSR, 656 | REGS_APSR_G, 657 | REGS_APSR_NZCVQ, 658 | REGS_APSR_NZCVQG, 659 | REGS_CPSR, 660 | REGS_CPSR_C, 661 | REGS_CPSR_X, 662 | REGS_CPSR_XC, 663 | REGS_CPSR_S, 664 | REGS_CPSR_SC, 665 | REGS_CPSR_SX, 666 | REGS_CPSR_SXC, 667 | REGS_CPSR_F, 668 | REGS_CPSR_FC, 669 | REGS_CPSR_FX, 670 | REGS_CPSR_FXC, 671 | REGS_CPSR_FS, 672 | REGS_CPSR_FSC, 673 | REGS_CPSR_FSX, 674 | REGS_CPSR_FSXC, 675 | REGS_SPSR, 676 | REGS_SPSR_C, 677 | REGS_SPSR_X, 678 | REGS_SPSR_XC, 679 | REGS_SPSR_S, 680 | REGS_SPSR_SC, 681 | REGS_SPSR_SX, 682 | REGS_SPSR_SXC, 683 | REGS_SPSR_F, 684 | REGS_SPSR_FC, 685 | REGS_SPSR_FX, 686 | REGS_SPSR_FXC, 687 | REGS_SPSR_FS, 688 | REGS_SPSR_FSC, 689 | REGS_SPSR_FSX, 690 | REGS_SPSR_FSXC, 691 | REGS_APSR_NZCV, 692 | REGS_FPSID, // 0 693 | REGS_FPSCR, // 1 694 | REGS_MVFR2, // 5 695 | REGS_MVFR1, // 6 696 | REGS_MVFR0, // 7 697 | REGS_FPEXC, // 8 698 | REGS_FPINST, // 9 699 | REGS_FPINST2, //10 700 | REGS_MSP, 701 | REGS_PSP, 702 | 703 | // these are M-profile only (special) 704 | // but are here in ARM common (general) 705 | // TODO: implement "microarchitecture support" 706 | REGS_PRIMASK, 707 | REGS_BASEPRI, 708 | REGS_FAULTMASK, 709 | REGS_CONTROL, 710 | 711 | REG_INVALID, 712 | }; 713 | 714 | enum CoprocRegisterC { 715 | REG_C0, 716 | REG_C1, 717 | REG_C2, 718 | REG_C3, 719 | REG_C4, 720 | REG_C5, 721 | REG_C6, 722 | REG_C7, 723 | REG_C8, 724 | REG_C9, 725 | REG_C10, 726 | REG_C11, 727 | REG_C12, 728 | REG_C13, 729 | REG_C14, 730 | REG_C15, 731 | REG_CEND 732 | }; 733 | 734 | enum CoprocRegisterP { 735 | REG_P0, 736 | REG_P1, 737 | REG_P2, 738 | REG_P3, 739 | REG_P4, 740 | REG_P5, 741 | REG_P6, 742 | REG_P7, 743 | REG_P8, 744 | REG_P9, 745 | REG_P10, 746 | REG_P11, 747 | REG_P12, 748 | REG_P13, 749 | REG_P14, 750 | REG_P15, 751 | REG_PEND 752 | }; 753 | 754 | enum Iflags { 755 | IFL_NONE, // 000 756 | IFL_A, // 001 757 | IFL_I, // 010 758 | IFL_IA, // 011 759 | IFL_F, // 100 760 | IFL_FA, // 101 761 | IFL_FI, // 110 762 | IFL_FIA, // 111 763 | IFL_END // 764 | }; 765 | 766 | enum EndianSpec { 767 | ES_LE, 768 | ES_BE 769 | }; 770 | 771 | enum DsbOption { 772 | DSB_NONE0, // 0 773 | DSB_NONE1, // 1 774 | DSB_OSHST, // 2 775 | DSB_OSH, // 3 776 | DSB_NONE4, // 4 777 | DSB_NONE5, // 5 778 | DSB_NSHST, // 6 779 | DSB_NSH, //7 780 | DSB_NONE8, // 8 781 | DSB_NONE9, // 9 782 | DSB_ISHST, // 10 783 | DSB_ISH, // 11 784 | DSB_NONE12, // 12 785 | DSB_NONE13, // 13 786 | DSB_ST, // 14 787 | DSB_SY, // 15 788 | DSB_END 789 | }; 790 | 791 | enum OperandClass { 792 | NONE, 793 | IMM, 794 | IMM64, 795 | LABEL, 796 | REG, 797 | REG_LIST, 798 | REG_LIST_SINGLE, 799 | REG_LIST_DOUBLE, 800 | REG_SPEC, 801 | REG_BANKED, 802 | REG_COPROCC, 803 | REG_COPROCP, 804 | IFLAGS, 805 | ENDIAN_SPEC, 806 | DSB_OPTION, 807 | MEM_ALIGNED, 808 | MEM_PRE_IDX, 809 | MEM_POST_IDX, 810 | MEM_IMM, 811 | MEM_OPTION, 812 | FIMM16, 813 | FIMM32, 814 | FIMM64 815 | }; 816 | 817 | enum DataType { 818 | DT_NONE = 0, 819 | DT_S8 = 1, 820 | DT_S16, 821 | DT_S32, 822 | DT_S64, 823 | DT_U8, 824 | DT_U16, 825 | DT_U32, 826 | DT_U64, 827 | DT_I8, 828 | DT_I16, 829 | DT_I32, 830 | DT_I64, 831 | DT_F16, 832 | DT_F32, 833 | DT_F64, 834 | DT_P8, 835 | DT_P16, 836 | DT_P32, 837 | DT_P64, 838 | DT_8, 839 | DT_16, 840 | DT_32, 841 | DT_64, 842 | DT_END 843 | }; 844 | 845 | struct InstructionOperand { 846 | enum OperandClass cls; 847 | struct { 848 | uint32_t wb:1; //write back? 849 | uint32_t add:1; //Tells whether offset should be added or subtracted 850 | uint32_t hasElements:1; //does the register have an array index 851 | uint32_t emptyElement:1; 852 | uint32_t offsetRegUsed:1; //Is the offset register being used 853 | } flags; 854 | union { 855 | enum Register reg; 856 | enum Register regb; /* banked reg */ 857 | enum Register regs; /* special reg */ 858 | enum CoprocRegisterP regp; 859 | enum CoprocRegisterC regc; 860 | enum DsbOption dsbOpt; 861 | enum Iflags iflag; 862 | enum EndianSpec endian; 863 | enum Condition cond; 864 | }; 865 | enum Register offset; 866 | enum Shift shift; 867 | union { 868 | uint32_t imm; 869 | double immd; 870 | float immf; 871 | uint64_t imm64; 872 | }; 873 | }; 874 | 875 | struct Instruction{ 876 | enum Operation operation; 877 | enum Condition cond; 878 | enum DataType dataType; 879 | enum DataType dataType2; 880 | uint32_t setsFlags; 881 | uint32_t unpredictable; 882 | struct InstructionOperand operands[MAX_OPERANDS]; 883 | }; 884 | 885 | typedef union _ieee754 { 886 | uint32_t value; 887 | struct { 888 | uint32_t fraction:23; 889 | uint32_t exponent:8; 890 | uint32_t sign:1; 891 | }; 892 | float fvalue; 893 | }ieee754; 894 | 895 | typedef union _ieee754_double { 896 | uint64_t value; 897 | struct { 898 | uint64_t fraction:52; 899 | uint64_t exponent:11; 900 | uint64_t sign:1; 901 | }; 902 | double fvalue; 903 | }ieee754_double; 904 | 905 | #ifndef __cplusplus 906 | typedef enum OperandClass OperandClass; 907 | typedef enum Operation Operation; 908 | typedef enum Shift Shift; 909 | typedef enum Condition Condition; 910 | typedef enum Register Register; 911 | typedef enum BankedRegister BankedRegister; 912 | typedef enum SpecRegister SpecRegister; 913 | typedef enum CoprocRegisterP CoprocRegisterP; 914 | typedef enum CoprocRegisterC CoprocRegisterC; 915 | typedef enum DataType DataType; 916 | typedef enum Iflags Iflags; 917 | typedef enum EndianSpec EndianSpec; 918 | typedef enum DsbOption DsbOption; 919 | typedef struct InstructionOperand InstructionOperand; 920 | typedef struct Instruction Instruction; 921 | #endif 922 | 923 | #ifdef __cplusplus 924 | extern "C" { 925 | #endif 926 | uint32_t armv7_decompose( 927 | uint32_t instructionValue, 928 | Instruction* restrict instruction, 929 | uint32_t address, 930 | uint32_t littleEndian); 931 | 932 | uint32_t armv7_disassemble( 933 | Instruction* restrict instruction, 934 | char* outBuffer, 935 | uint32_t outBufferSize); 936 | 937 | //Helpers for disassembling the instruction operands to strings 938 | const char* get_operation(Operation operation); 939 | char* get_full_operation(char* outBuffer, size_t outBufferSize, Instruction* restrict instruction); 940 | const char* get_vector_data_type(DataType dataType); 941 | const char* get_register_name(Register reg); 942 | const char* get_banked_register_name(Register regb); 943 | const char* get_spec_register_name(Register regs); 944 | const char* get_coproc_register_c_name(CoprocRegisterC regc); 945 | const char* get_coproc_register_p_name(CoprocRegisterP regp); 946 | const char* get_iflag(Iflags iflag); 947 | const char* get_endian(EndianSpec spec); 948 | const char* get_dsb_option(DsbOption opt); 949 | const char* get_shift(Shift shift); 950 | const char* get_condition(Condition cond); 951 | uint32_t get_register_size(Register reg); 952 | uint32_t get_register_names(Register reg, const char** regNames, OperandClass type); 953 | #ifdef __cplusplus 954 | } //end extern "C" 955 | #endif 956 | 957 | #ifdef __cplusplus 958 | } //end namespace 959 | #endif 960 | -------------------------------------------------------------------------------- /armv7_disasm/test.c: -------------------------------------------------------------------------------- 1 | // gcc -g test.c armv7.c -o test 2 | // lldb ./test -- e28f007b 3 | // b armv7_decompose 4 | // b armv7_disassemble 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include "armv7.h" 11 | 12 | int main(int ac, char **av) 13 | { 14 | uint32_t insword = strtoul(av[1], NULL, 16); 15 | uint32_t address = 0; 16 | uint32_t endian = 0; 17 | uint32_t rc; 18 | 19 | Instruction instr; 20 | memset(&instr, 0, sizeof(instr)); 21 | 22 | rc = armv7_decompose(insword, &instr, address, endian); 23 | if(rc) { 24 | printf("ERROR: armv7_decompose() returned %d\n", rc); 25 | return rc; 26 | } 27 | 28 | char instxt[4096]; 29 | memset(instxt, 0, sizeof(instxt)); 30 | rc = armv7_disassemble(&instr, instxt, sizeof(instxt)); 31 | if(rc) { 32 | printf("ERROR: armv7_disassemble() returned %d\n", rc); 33 | return rc; 34 | } 35 | 36 | printf("%08X: %s\n", address, instxt); 37 | } 38 | 39 | -------------------------------------------------------------------------------- /il.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "binaryninjaapi.h" 4 | #include "armv7.h" 5 | 6 | #define IL_FLAG_N 0 7 | #define IL_FLAG_Z 2 8 | #define IL_FLAG_C 4 9 | #define IL_FLAG_V 6 10 | #define IL_FLAG_Q 8 11 | 12 | #define IL_FLAGWRITE_NONE 0 13 | #define IL_FLAGWRITE_ALL 1 14 | #define IL_FLAGWRITE_NZ 2 15 | #define IL_FLAGWRITE_CNZ 3 16 | 17 | struct decomp_result; 18 | 19 | enum Armv7Intrinsic : uint32_t 20 | { 21 | ARMV7_INTRIN_DBG, 22 | ARMV7_INTRIN_DMB_SY, 23 | ARMV7_INTRIN_DMB_ST, 24 | ARMV7_INTRIN_DMB_ISH, 25 | ARMV7_INTRIN_DMB_ISHST, 26 | ARMV7_INTRIN_DMB_NSH, 27 | ARMV7_INTRIN_DMB_NSHST, 28 | ARMV7_INTRIN_DMB_OSH, 29 | ARMV7_INTRIN_DMB_OSHST, 30 | ARMV7_INTRIN_DSB_SY, 31 | ARMV7_INTRIN_DSB_ST, 32 | ARMV7_INTRIN_DSB_ISH, 33 | ARMV7_INTRIN_DSB_ISHST, 34 | ARMV7_INTRIN_DSB_NSH, 35 | ARMV7_INTRIN_DSB_NSHST, 36 | ARMV7_INTRIN_DSB_OSH, 37 | ARMV7_INTRIN_DSB_OSHST, 38 | ARMV7_INTRIN_ISB, 39 | ARMV7_INTRIN_MRS, 40 | ARMV7_INTRIN_MSR, 41 | ARMV7_INTRIN_SEV, 42 | ARMV7_INTRIN_WFE, 43 | ARMV7_INTRIN_WFI, 44 | ARM_M_INTRIN_SET_BASEPRI, 45 | // Following names are from Table D17-2 of ARM DDI 0406C.d, changed from 46 | // CamelCase to UPPERCASE with underscores preserved and ARMV7_INTRIN_ prefixed. 47 | ARMV7_INTRIN_COPROC_GETONEWORD, // MRC, MRC2 48 | ARMV7_INTRIN_COPROC_GETTWOWORDS, // MRRC, MRRC2 49 | ARMV7_INTRIN_COPROC_SENDONEWORD, // MCR, MCR2 50 | ARMV7_INTRIN_COPROC_SENDTWOWORDS, // MCRR, MCRR2 51 | 52 | ARMV7_INTRIN_EXCLUSIVE_MONITORS_PASS, 53 | ARMV7_INTRIN_SET_EXCLUSIVE_MONITORS, 54 | }; 55 | 56 | enum ArmFakeRegister: uint32_t 57 | { 58 | FAKEREG_SYSCALL_INFO = armv7::REG_INVALID+1 59 | }; 60 | 61 | bool GetLowLevelILForArmInstruction(BinaryNinja::Architecture* arch, uint64_t addr, 62 | BinaryNinja::LowLevelILFunction& il, armv7::Instruction& instr, size_t addrSize); 63 | bool GetLowLevelILForThumbInstruction(BinaryNinja::Architecture* arch, 64 | BinaryNinja::LowLevelILFunction& il, decomp_result *instr, bool ifThenBlock = false); 65 | void SetupThumbConditionalInstructionIL(BinaryNinja::LowLevelILFunction& il, BinaryNinja::LowLevelILLabel& trueLabel, 66 | BinaryNinja::LowLevelILLabel& falseLabel, uint32_t cond); 67 | BinaryNinja::ExprId GetCondition(BinaryNinja::LowLevelILFunction& il, uint32_t cond); 68 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | test_cases_arm = [ 4 | # s14 = load(0x1000 + 8 + 912) 5 | (b'\xe4\x7a\x9f\xed', 'LLIL_SET_REG(s14,LLIL_LOAD(LLIL_ADD(LLIL_CONST_PTR(4104),LLIL_CONST(912))))'), # vldr s14, [pc, #0x390] 6 | (b'\x00\x7a\xcd\xed', 'LLIL_STORE(LLIL_REG(sp),LLIL_REG(s15))'), # vstr s15, [sp] 7 | (b'\x90\x2a\x17\xee', 'LLIL_SET_REG(r2,LLIL_REG(s15))'), # vmov r2, s15 8 | # encoding A1 of BFI 9 | # r0 = (r1 & 0b11111111111111111111111111100011) | ((r1 & 0b111) << 2) 10 | (b'\x11\x01\xc4\xe7', 'LLIL_SET_REG(r0,LLIL_OR(LLIL_AND(LLIL_REG(r0),LLIL_CONST(4294967267)),LLIL_LSL(LLIL_AND(LLIL_REG(r1),LLIL_CONST(7)),LLIL_CONST(2))))'), # bfi r0, r1, #2, #3 11 | # temp0 = r2*r3; r0=tmp0&0xFFFFFFFF; r1=tmp0>>32 ... LOGICAL shift since mul is unsigned 12 | (b'\x92\x03\x81\xe0', 'LLIL_SET_REG(temp0,LLIL_MUL(LLIL_REG(r2),LLIL_REG(r3))); LLIL_SET_REG(r0,LLIL_LOW_PART(LLIL_REG(temp0))); LLIL_SET_REG(r1,LLIL_LSR(LLIL_REG(temp0),LLIL_CONST(32)))'), # umull r0, r1, r2, r3 13 | # same, but ARITHMETIC shift since mul is signed 14 | (b'\x92\x03\xc1\xe0', 'LLIL_SET_REG(temp0,LLIL_MUL(LLIL_REG(r2),LLIL_REG(r3))); LLIL_SET_REG(r0,LLIL_LOW_PART(LLIL_REG(temp0))); LLIL_SET_REG(r1,LLIL_ASR(LLIL_REG(temp0),LLIL_CONST(32)))'), # smull r0, r1, r2, r3 15 | # multiply and accumulate: mla r0, r1, r2, r3 lift to r0 = r3 + (r1 * r2) 16 | (b'\x91\x32\x20\xe0', 'LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r3),LLIL_MUL(LLIL_REG(r1),LLIL_REG(r2))))'), # mla r0, r1, r2, r3 17 | # multiply and subtract: mls r0, r1, r2, r3 lift to r0 = r3 - (r1 * r2) 18 | (b'\x91\x32\x60\xe0', 'LLIL_SET_REG(r0,LLIL_SUB(LLIL_REG(r3),LLIL_MUL(LLIL_REG(r1),LLIL_REG(r2))))'), # mls r0, r1, r2, r3 19 | # sdiv r1, r2, r3 lift to r1=r2/r3 (signed) 20 | (b'\x12\xf3\x11\xe7', 'LLIL_SET_REG(r1,LLIL_DIVS(LLIL_REG(r2),LLIL_REG(r3)))'), # 'sdiv r1, r2, r3' 21 | # udiv r1, r2, r3 lift to r1=r2/r3 (unsigned) 22 | (b'\x12\xf3\x31\xe7', 'LLIL_SET_REG(r1,LLIL_DIVU(LLIL_REG(r2),LLIL_REG(r3)))'), # 'udiv r1, r2, r3' 23 | # ubfx 24 | # ubfx r1, r2, #4, #4 should extract b7..b4, lift to r1=(r2>>4)&0b1111 25 | (b'\x52\x12\xe3\xe7', 'LLIL_SET_REG(r1,LLIL_AND(LLIL_LSR(LLIL_REG(r2),LLIL_CONST(4)),LLIL_CONST(15)))'), # 'ubfx r1, r2, #4, #4' 26 | # ubfx r2, r3, #4, #5 should extract b8..b4, lift to r2=(r3>>4)&0b11111 27 | (b'\x53\x22\xe4\xe7', 'LLIL_SET_REG(r2,LLIL_AND(LLIL_LSR(LLIL_REG(r3),LLIL_CONST(4)),LLIL_CONST(31)))'), # 'ubfx r2, r3, #4, #5' 28 | # ubfx r3, r4, #0, #16 should extract b15..b0, lift to r3=(r4>>0)&0b1111111111111111 29 | # though no shift is needed, no reason to complicate the lifter as the core should see x>>0 == x 30 | (b'\x54\x30\xef\xe7', 'LLIL_SET_REG(r3,LLIL_AND(LLIL_LSR(LLIL_REG(r4),LLIL_CONST(0)),LLIL_CONST(65535)))'), # 'ubfx r3, r4, #0, #16' 31 | (b'\x00\xf0\x20\xe3', ''), # nop, gets optimized from function 32 | ] 33 | 34 | test_cases_thumb2 = [ 35 | # itttt eq; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; 36 | # generate 1 if/goto that encloses 4 37 | (b'\x01\xbf\x00\x44\x00\x44\x00\x44\x00\x44', 'LLIL_IF(LLIL_FLAG(z),1,6); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_GOTO(6)'), 38 | # ittt eq; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; 39 | # generate 1 if/goto that encloses 3 40 | (b'\x02\xbf\x00\x44\x00\x44\x00\x44\x00\x44', 'LLIL_IF(LLIL_FLAG(z),1,5); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_GOTO(5); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0)))'), 41 | # itt eq; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; 42 | # generate 1 if/goto that encloses 2 43 | (b'\x04\xbf\x00\x44\x00\x44\x00\x44\x00\x44', 'LLIL_IF(LLIL_FLAG(z),1,4); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_GOTO(4); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0)))'), 44 | # it eq; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; add r0, r0, r0; 45 | # generate 1 if/goto that encloses 1 46 | (b'\x08\xbf\x00\x44\x00\x44\x00\x44\x00\x44', 'LLIL_IF(LLIL_FLAG(z),1,3); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_GOTO(3); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0))); LLIL_SET_REG(r0,LLIL_ADD(LLIL_REG(r0),LLIL_REG(r0)))'), 47 | # unsigned extend and add byte - LLIL_LOW_PART() has hidden parameter size=1 48 | (b'\x52\xfa\x83\xf1', 'LLIL_SET_REG(r1,LLIL_ADD(LLIL_REG(r2),LLIL_ZX(LLIL_LOW_PART(LLIL_REG(r3)))))'), # uxtab r1, r2, r3 49 | # unsigned extend and add halfword - LLIL_LOW_PART() has hidden parameter size=2 50 | (b'\x12\xfa\x83\xf1', 'LLIL_SET_REG(r1,LLIL_ADD(LLIL_REG(r2),LLIL_ZX(LLIL_LOW_PART(LLIL_REG(r3)))))'), # ustah r1, r2, r3 51 | # clear b4...b2 so lift to r1 = r1 & 0b11111111111111111111111111100011 52 | (b'\x6f\xf3\x84\x01', 'LLIL_SET_REG(r1,LLIL_AND(LLIL_REG(r1),LLIL_CONST(4294967267)))'), # bfc r1, #2, #3 53 | # these differ only when the ifThenBlock varies 54 | # TODO: vary the ifThenBlock state 55 | (b'\x62\xeb\x03\x01', 'LLIL_SET_REG(r1,LLIL_SBB(LLIL_REG(r2),LLIL_REG(r3),LLIL_NOT(LLIL_FLAG(c))))'), # sbc r1, r2, r3 56 | (b'\x72\xeb\x03\x01', 'LLIL_SET_REG(r1,LLIL_SBB(LLIL_REG(r2),LLIL_REG(r3),LLIL_NOT(LLIL_FLAG(c))))'), # sbcs r1, r2, r3 57 | # same as arm 58 | (b'\x92\xfb\xf3\xf1', 'LLIL_SET_REG(r1,LLIL_DIVS(LLIL_REG(r2),LLIL_REG(r3)))'), # sdiv r1, r2, r3 59 | # same as arm 60 | (b'\xb2\xfb\xf3\xf1', 'LLIL_SET_REG(r1,LLIL_DIVU(LLIL_REG(r2),LLIL_REG(r3)))'), # udiv r1, r2, r3 61 | # encoding T1 of BFI should lift the same as encoding A1 62 | (b'\x61\xf3\x84\x00', 'LLIL_SET_REG(r0,LLIL_OR(LLIL_AND(LLIL_REG(r0),LLIL_CONST(4294967267)),LLIL_LSL(LLIL_AND(LLIL_REG(r1),LLIL_CONST(7)),LLIL_CONST(2))))'), # bfi r0, r1, #2, #3 63 | (b'\xb1\xfa\x81\xf0', 'LLIL_SET_REG(temp0,LLIL_CONST(0)); LLIL_SET_REG(temp1,LLIL_REG(r1)); LLIL_GOTO(3); LLIL_IF(LLIL_CMP_NE(LLIL_REG(temp1),LLIL_CONST(0)),4,7); LLIL_SET_REG(temp1,LLIL_LSR(LLIL_REG(temp1),LLIL_CONST(1))); LLIL_SET_REG(temp0,LLIL_ADD(LLIL_REG(temp0),LLIL_CONST(1))); LLIL_GOTO(3); LLIL_SET_REG(r0,LLIL_SUB(LLIL_CONST(32),LLIL_REG(temp0)))'), # 'clz r0, r1' 64 | (b'\x00\xbf', ''), # nop, gets optmized from function 65 | ] 66 | 67 | import sys 68 | import binaryninja 69 | from binaryninja import core 70 | from binaryninja import binaryview 71 | from binaryninja import lowlevelil 72 | 73 | def il2str(il): 74 | if isinstance(il, lowlevelil.LowLevelILInstruction): 75 | return '%s(%s)' % (il.operation.name, ','.join([il2str(o) for o in il.operands])) 76 | else: 77 | return str(il) 78 | 79 | # TODO: make this less hacky 80 | def instr_to_il(data, plat_name): 81 | platform = binaryninja.Platform[plat_name] 82 | # make a pretend function that returns 83 | 84 | sled = b'' 85 | sled_len = 0x1000 86 | if plat_name == 'linux-thumb2': 87 | sled = b'\x00\xbf' * (sled_len//2) 88 | elif plat_name == 'linux-armv7': 89 | sled = b'\x00\xf0\x20\xe3' * (sled_len//4) 90 | 91 | bv = binaryview.BinaryView.new(sled + data) 92 | bv.add_function(sled_len, plat=platform) 93 | assert len(bv.functions) == 1 94 | 95 | result = [] 96 | for block in bv.functions[0].low_level_il: 97 | for il in block: 98 | result.append(il2str(il)) 99 | result = '; '.join(result) 100 | assert result.endswith('LLIL_UNDEF()') 101 | result = result[0:result.index('LLIL_UNDEF()')] 102 | if result.endswith('; '): 103 | result = result[0:-2] 104 | 105 | return result 106 | 107 | def check(test_i, data, actual, expected): 108 | print_always = False 109 | 110 | if (actual != expected) or print_always: 111 | print('\t test: %d' % test_i) 112 | print('\t input: %s' % data.hex()) 113 | print('\texpected: %s' % expected) 114 | print('\t actual: %s' % actual) 115 | 116 | if actual != expected: 117 | print('MISMATCH!') 118 | sys.exit(-1) 119 | 120 | if __name__ == '__main__': 121 | for (test_i, (data, expected)) in enumerate(test_cases_arm): 122 | actual = instr_to_il(data, 'linux-armv7') 123 | check(test_i, data, actual, expected) 124 | 125 | for (test_i, (data, expected)) in enumerate(test_cases_thumb2): 126 | actual = instr_to_il(data, 'linux-thumb2') 127 | check(test_i, data, actual, expected) 128 | 129 | print('success!') 130 | sys.exit(0) 131 | -------------------------------------------------------------------------------- /test_lift.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | test_cases = \ 4 | [ 5 | # umaal r0, r1, r2, r3 6 | ('A', b'\x92\x03\x41\xe0', 'LLIL_SET_REG_SPLIT.d(r1,r0,LLIL_ADD.q(LLIL_MULU_DP.d(LLIL_REG.d(r3),LLIL_REG.d(r2)),LLIL_ADD.q(LLIL_REG.d(r1),LLIL_REG.d(r0))))'), 7 | # umlal r0, r1, r2, r3 8 | ('A', b'\x92\x03\xa1\xe0', 'LLIL_SET_REG_SPLIT.d(r1,r0,LLIL_ADD.q(LLIL_MULU_DP.d(LLIL_REG.d(r3),LLIL_REG.d(r2)),LLIL_REG_SPLIT.d(r1,r0)))'), 9 | # umlals r0, r1, r2, r3 10 | ('A', b'\x92\x03\xb1\xe0', 'LLIL_SET_REG_SPLIT.d{nz}(r1,r0,LLIL_ADD.q(LLIL_MULU_DP.d(LLIL_REG.d(r3),LLIL_REG.d(r2)),LLIL_REG_SPLIT.d(r1,r0)))'), 11 | # umulls r0, r1, r2, r3 12 | ('A', b'\x92\x03\x81\xe0', 'LLIL_SET_REG_SPLIT.d(r1,r0,LLIL_MULU_DP.d(LLIL_REG.d(r2),LLIL_REG.d(r3)))'), 13 | # smull r0, r1, r2, r3 14 | ('A', b'\x92\x03\xc1\xe0', 'LLIL_SET_REG_SPLIT.d(r1,r0,LLIL_MULS_DP.d(LLIL_REG.d(r2),LLIL_REG.d(r3)))'), 15 | # teq r0, #0 16 | ('A', b'\x00\x00\x30\xe3', 'LLIL_XOR.d{cnz}(LLIL_REG.d(r0),LLIL_CONST.d(0x0))'), 17 | # teq r1, #1 18 | ('A', b'\x01\x00\x31\xe3', 'LLIL_XOR.d{cnz}(LLIL_REG.d(r1),LLIL_CONST.d(0x1))'), 19 | # teq r2, #2 20 | ('A', b'\x02\x00\x32\xe3', 'LLIL_XOR.d{cnz}(LLIL_REG.d(r2),LLIL_CONST.d(0x2))'), 21 | # teq r3, #3 22 | ('A', b'\x03\x00\x33\xe3', 'LLIL_XOR.d{cnz}(LLIL_REG.d(r3),LLIL_CONST.d(0x3))'), 23 | 24 | # sxth r0, r1, ror #0 25 | ('A', b'\x71\x00\xbf\xe6', 'LLIL_SET_REG.d(r0,LLIL_SX.d(LLIL_LOW_PART.w(LLIL_REG.d(r1))))'), 26 | # sxth r0, r1, ror #0x8 27 | ('A', b'\x71\x04\xbf\xe6', 'LLIL_SET_REG.d(r0,LLIL_SX.d(LLIL_LOW_PART.w(LLIL_ROR.d(LLIL_REG.d(r1),LLIL_CONST.b(0x8)))))'), 28 | # sxth r0, r1, ror #0x10 29 | ('A', b'\x71\x08\xbf\xe6', 'LLIL_SET_REG.d(r0,LLIL_SX.d(LLIL_LOW_PART.w(LLIL_ROR.d(LLIL_REG.d(r1),LLIL_CONST.b(0x10)))))'), 30 | # sxth r0, r1, ror #0x18 31 | ('A', b'\x71\x0c\xbf\xe6', 'LLIL_SET_REG.d(r0,LLIL_SX.d(LLIL_LOW_PART.w(LLIL_ROR.d(LLIL_REG.d(r1),LLIL_CONST.b(0x18)))))'), 32 | 33 | # ror r0, r1 34 | ('A', b'\x70\x01\xa0\xe1', 'LLIL_SET_REG.d(r0,LLIL_ROR.d(LLIL_REG.d(r0),LLIL_AND.b(LLIL_REG.d(r1),LLIL_CONST.b(0xFF))))'), 35 | # ror r0, 7 36 | ('A', b'\xe0\x03\xa0\xe1', 'LLIL_SET_REG.d(r0,LLIL_ROR.d(LLIL_REG.d(r0),LLIL_AND.b(LLIL_CONST.d(0x7),LLIL_CONST.b(0xFF))))'), 37 | # rors r0, r1 38 | ('A', b'\x70\x01\xb0\xe1', 'LLIL_SET_REG.d(r0,LLIL_ROR.d{*}(LLIL_REG.d(r0),LLIL_AND.b(LLIL_REG.d(r1),LLIL_CONST.b(0xFF))))'), 39 | # rors r0, 7 40 | ('A', b'\xe0\x03\xb0\xe1', 'LLIL_SET_REG.d(r0,LLIL_ROR.d{*}(LLIL_REG.d(r0),LLIL_AND.b(LLIL_CONST.d(0x7),LLIL_CONST.b(0xFF))))'), 41 | # vadd.f32 s0, s1, s2 42 | ('A', b'\x81\x0a\x30\xee', 'LLIL_SET_REG.d(s0,LLIL_FADD.d(LLIL_REG.d(s1),LLIL_REG.d(s2)))'), 43 | # vsub.f32 s0, s1, s2 44 | ('A', b'\xc1\x0a\x30\xee', 'LLIL_SET_REG.d(s0,LLIL_FSUB.d(LLIL_REG.d(s1),LLIL_REG.d(s2)))'), 45 | # vmul.f32 s0, s1, s2 46 | ('A', b'\x81\x0a\x20\xee', 'LLIL_SET_REG.d(s0,LLIL_FMUL.d(LLIL_REG.d(s1),LLIL_REG.d(s2)))'), 47 | # vdiv.f32 s0, s1, s2 48 | ('A', b'\x81\x0a\x80\xee', 'LLIL_SET_REG.d(s0,LLIL_FDIV.d(LLIL_REG.d(s1),LLIL_REG.d(s2)))'), 49 | # svc #0; svc #1; svc #2; svc #3 50 | ('A', b'\x00\x00\x00\xef', 'LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0x0)); LLIL_SYSCALL()'), 51 | ('A', b'\x01\x00\x00\xef', 'LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0x1)); LLIL_SYSCALL()'), 52 | ('A', b'\x02\x00\x00\xef', 'LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0x2)); LLIL_SYSCALL()'), 53 | ('A', b'\x03\x00\x00\xef', 'LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0x3)); LLIL_SYSCALL()'), 54 | # svcle #0xDEAD 55 | ('A', b'\xAD\xDE\x00\xdf', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_SLE,None),1,4); LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0xDEAD)); LLIL_SYSCALL(); LLIL_GOTO(4)'), 56 | # svcgt #0xdead 57 | ('A', b'\xad\xde\x00\xcf', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_SGT,None),1,4); LLIL_SET_REG.d(syscall_info,LLIL_CONST.d(0xDEAD)); LLIL_SYSCALL(); LLIL_GOTO(4)'), 58 | # mov r0, r1 59 | ('A', b'\x01\x00\xa0\xe1', 'LLIL_SET_REG.d(r0,LLIL_REG.d(r1))'), 60 | # nop 61 | ('A', b'\x00\xf0\x20\xe3', 'LLIL_NOP()'), 62 | # vmov.i32 d16, #0 63 | ('A', b'\x10\x00\xc0\xf2', 'LLIL_SET_REG.q(d16,LLIL_CONST.q(0x0))'), 64 | # vmov.i32 q8, #0 65 | ('A', b'\x50\x00\xc0\xf2', 'LLIL_SET_REG.o(q8,LLIL_OR.o(LLIL_CONST.q(0x0),LLIL_LSL.o(LLIL_CONST.q(0x0),LLIL_CONST.q(0x40))))'), 66 | # vmov.i32 d16, #1 67 | ('A', b'\x11\x00\xc0\xf2', 'LLIL_SET_REG.q(d16,LLIL_CONST.q(0x100000001))'), 68 | # vmov.i32 q8, #1 69 | ('A', b'\x51\x00\xc0\xf2', 'LLIL_SET_REG.o(q8,LLIL_OR.o(LLIL_CONST.q(0x100000001),LLIL_LSL.o(LLIL_CONST.q(0x100000001),LLIL_CONST.q(0x40))))'), 70 | # vmov.i16 d16, #0 71 | ('A', b'\x10\x08\xc0\xf2', 'LLIL_SET_REG.q(d16,LLIL_CONST.q(0x0))'), 72 | # vmov.i16 d16, #1 73 | ('A', b'\x11\x08\xc0\xf2', 'LLIL_SET_REG.q(d16,LLIL_CONST.q(0x1000100010001))'), 74 | # vmov.i8 d16, #1 75 | ('A', b'\x11\x0e\xc0\xf2', 'LLIL_SET_REG.q(d16,LLIL_CONST.q(0x101010101010101))'), 76 | # vmov.i8 q8, #1 77 | ('A', b'\x51\x0e\xc0\xf2', 'LLIL_SET_REG.o(q8,LLIL_OR.o(LLIL_CONST.q(0x101010101010101),LLIL_LSL.o(LLIL_CONST.q(0x101010101010101),LLIL_CONST.q(0x40))))'), 78 | # vstr s0, [r3, #0x8] 79 | ('A', b'\x02\x0a\x83\xed', 'LLIL_STORE.d(LLIL_ADD.d(LLIL_REG.d(r3),LLIL_CONST.d(0x8)),LLIL_REG.d(s0))'), 80 | # vstr d16, [r3, #0x8] 81 | ('A', b'\x02\x0b\xc3\xed', 'LLIL_STORE.q(LLIL_ADD.d(LLIL_REG.d(r3),LLIL_CONST.d(0x8)),LLIL_REG.q(d16))'), 82 | # mov r2, r0 83 | ('T', b'\x02\x46', 'LLIL_SET_REG.d(r2,LLIL_REG.d(r0))'), 84 | # cmp r1, r2 85 | ('T', b'\x91\x42', 'LLIL_SUB.d{*}(LLIL_REG.d(r1),LLIL_REG.d(r2))'), 86 | # cmp r1, r2, lsl #7 87 | ('T', b'\xb1\xeb\xc2\x1f', 'LLIL_SUB.d{*}(LLIL_REG.d(r1),LLIL_LSL.d(LLIL_REG.d(r2),LLIL_CONST.d(0x7)))'), 88 | # uadd8 r5, r2, r12 89 | ('T', b'\x82\xfa\x4c\xf5', 'LLIL_SET_REG.d(temp0,LLIL_ADD.b(LLIL_LOW_PART.b(LLIL_REG.d(r2)),LLIL_LOW_PART.b(LLIL_REG.d(r12)))); LLIL_SET_REG.d(temp1,LLIL_ADD.b(LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r2),LLIL_CONST.b(0x8))),LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r12),LLIL_CONST.b(0x8))))); LLIL_SET_REG.d(temp2,LLIL_ADD.b(LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r2),LLIL_CONST.b(0x10))),LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r12),LLIL_CONST.b(0x10))))); LLIL_SET_REG.d(temp3,LLIL_ADD.b(LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r2),LLIL_CONST.b(0x18))),LLIL_LOW_PART.b(LLIL_LSR.d(LLIL_REG.d(r12),LLIL_CONST.b(0x18))))); LLIL_SET_REG.d(r5,LLIL_OR.d(LLIL_OR.d(LLIL_LSL.d(LLIL_REG.b(temp3),LLIL_CONST.b(0x18)),LLIL_LSL.d(LLIL_REG.b(temp2),LLIL_CONST.b(0x10))),LLIL_OR.d(LLIL_LSL.d(LLIL_REG.b(temp1),LLIL_CONST.b(0x8)),LLIL_REG.b(temp0))))'), 90 | # ldrex r0, [r1, #4] 91 | ('T', b'\x51\xe8\x01\x0f', 'LLIL_SET_REG.d(r0,LLIL_LOAD.d(LLIL_ADD.d(LLIL_REG.d(r1),LLIL_CONST.d(0x4))))'), 92 | # ldrexb r0, [r1] 93 | ('T', b'\xd1\xe8\x4f\x0f', 'LLIL_SET_REG.d(r0,LLIL_ZX.d(LLIL_LOAD.b(LLIL_REG.d(r1))))'), 94 | # ldrexh r0, [r1] 95 | ('T', b'\xd1\xe8\x5f\x0f', 'LLIL_SET_REG.d(r0,LLIL_ZX.d(LLIL_LOAD.w(LLIL_REG.d(r1))))'), 96 | # umlal r0, r1, r2, r3 97 | ('T', b'\xe2\xfb\x03\x01', 'LLIL_SET_REG_SPLIT.d(r1,r0,LLIL_ADD.q(LLIL_MULU_DP.d(LLIL_REG.d(r3),LLIL_REG.d(r2)),LLIL_REG_SPLIT.d(r1,r0)))'), 98 | # sbfx r0, r1, 0, 1 (starting at b0, width 1, so extract b0) 99 | ('T', b'\x41\xf3\x00\x00', 'LLIL_SET_REG.d(r0,LLIL_ASR.d(LLIL_LSL.d(LLIL_REG.d(r1),LLIL_CONST.b(0x1F)),LLIL_CONST.b(0x1F)))'), 100 | # sbfx r0, r1, 1, 2 (starting at b1, width 2, so extract b2b1) 101 | ('T', b'\x41\xf3\x41\x00', 'LLIL_SET_REG.d(r0,LLIL_ASR.d(LLIL_LSL.d(LLIL_REG.d(r1),LLIL_CONST.b(0x1D)),LLIL_CONST.b(0x1E)))'), 102 | # sbfx r0, r1, 20, 30 (starting at b20, width 30... gets clamped, so b31b30...b20 103 | ('T', b'\x41\xf3\x1d\x50', 'LLIL_SET_REG.d(r0,LLIL_ASR.d(LLIL_LSL.d(LLIL_REG.d(r1),LLIL_CONST.b(0x0)),LLIL_CONST.b(0x14)))') 104 | ] 105 | 106 | import re 107 | import sys 108 | import binaryninja 109 | from binaryninja import binaryview 110 | from binaryninja import lowlevelil 111 | from binaryninja.enums import LowLevelILOperation 112 | 113 | def il2str(il): 114 | sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'} 115 | if isinstance(il, lowlevelil.LowLevelILInstruction): 116 | size_code = sz_lookup.get(il.size, '?') if il.size else '' 117 | flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags 118 | 119 | # print size-specified IL constants in hex 120 | if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: 121 | tmp = il.operands[0] 122 | if tmp < 0: tmp = (1<<(il.size*8))+tmp 123 | tmp = '0x%X' % tmp if il.size else '%d' % il.size 124 | return 'LLIL_CONST%s(%s)' % (size_code, tmp) 125 | else: 126 | return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) 127 | elif isinstance(il, list): 128 | return '[' + ','.join([il2str(x) for x in il]) + ']' 129 | else: 130 | return str(il) 131 | 132 | # TODO: make this less hacky 133 | def instr_to_il(data, platform): 134 | # mov pc, lr 135 | RETURN = { 'linux-armv7': b'\x0e\xf0\xa0\xe1', 136 | 'linux-thumb2': b'\xf7\x46' 137 | }[platform] 138 | RETURN_LIFTED = 'LLIL_JUMP(LLIL_REG.d(lr))' 139 | 140 | platform = binaryninja.Platform[platform] 141 | # make a pretend function that returns 142 | bv = binaryview.BinaryView.new(data + RETURN) 143 | bv.add_function(0, plat=platform) 144 | assert len(bv.functions) == 1 145 | 146 | result = [] 147 | #for block in bv.functions[0].low_level_il: 148 | for block in bv.functions[0].lifted_il: 149 | for il in block: 150 | result.append(il2str(il)) 151 | result = '; '.join(result) 152 | # strip return fence 153 | if result.endswith(RETURN_LIFTED): 154 | result = result[0:result.index(RETURN_LIFTED)] 155 | # strip trailing separator 156 | if result.endswith('; '): 157 | result = result[0:-2] 158 | 159 | return result 160 | 161 | def il_str_to_tree(ilstr): 162 | result = '' 163 | depth = 0 164 | for c in ilstr: 165 | if c == '(': 166 | result += '\n' 167 | depth += 1 168 | result += ' '*depth 169 | elif c == ')': 170 | depth -= 1 171 | elif c == ',': 172 | result += '\n' 173 | result += ' '*depth 174 | elif c == ';': 175 | result += '\n' 176 | depth = 0 177 | elif c == ' ': 178 | pass 179 | else: 180 | result += c 181 | return result 182 | 183 | def test_all(): 184 | for (test_i, (arch_name, data, expected)) in enumerate(test_cases): 185 | platform = {'A':'linux-armv7', 'T':'linux-thumb2'}[arch_name] 186 | actual = instr_to_il(data, platform) 187 | 188 | #print(f'{test_i:04d} {data.hex()} {actual}') 189 | 190 | if actual != expected: 191 | print('MISMATCH AT TEST %d!' % test_i) 192 | print('\t input: %s' % data.hex()) 193 | print('\texpected: %s' % expected) 194 | print('\t actual: %s' % actual) 195 | print('\t tree:') 196 | print(il_str_to_tree(actual)) 197 | 198 | return False 199 | 200 | return True 201 | 202 | if __name__ == '__main__': 203 | if test_all(): 204 | print('success!') 205 | sys.exit(0) 206 | else: 207 | sys.exit(-1) 208 | 209 | if __name__ == 'test_lift': 210 | if test_all(): 211 | print('success!') 212 | -------------------------------------------------------------------------------- /thumb2_disasm/.gitignore: -------------------------------------------------------------------------------- 1 | .gdb_history 2 | peda-* 3 | test 4 | *.swp 5 | -------------------------------------------------------------------------------- /thumb2_disasm/Makefile: -------------------------------------------------------------------------------- 1 | CPPFLAGS = 2 | 3 | # 4 | # debug stuff 5 | # 6 | 7 | # DEBUG_BUILD - files are compiled with debugging info, no optimizations (if you want to attach debugger) 8 | # DEBUG_DECOMP - code included to print decomposition/decode debugging info 9 | # DEBUG_DISASM - code included to print disassembling debugging info 10 | DEBUG_ALL ?= 11 | ifneq ($(DEBUG_ALL),) 12 | DEBUG_BUILD = 1 13 | DEBUG_DECOMP = 1 14 | DEBUG_DISASM = 1 15 | endif 16 | 17 | DEBUG_BUILD ?= 18 | ifneq ($(DEBUG_BUILD),) 19 | $(info DEBUG_BUILD is on!) 20 | CPPFLAGS += -g -O0 -DDEBUG_BUILD 21 | else 22 | CPPFLAGS += -O3 23 | endif 24 | 25 | DEBUG_DECOMP ?= 26 | ifneq ($(DEBUG_DECOMP),) 27 | $(info DEBUG_DECOMP is on!) 28 | CPPFLAGS += -DDEBUG_DECOMP 29 | endif 30 | 31 | DEBUG_DISASM ?= 32 | ifneq ($(DEBUG_DISASM),) 33 | $(info DEBUG_DISASM is on!) 34 | CPPFLAGS += -DDEBUG_DISASM 35 | endif 36 | 37 | CPPFLAGS += -I/usr/local/include -I../armv7 -std=c++11 38 | 39 | all: spec.cpp 40 | 41 | spec.cpp: generator.py ./arm_pcode_parser/codegencpp.py spec.txt 42 | ./generator.py spec.txt 43 | 44 | forcegen: 45 | echo '' > spec.cpp 46 | ./generator.py spec.txt 47 | 48 | # 49 | # generated stuff 50 | # 51 | clean: 52 | rm *.o *.dylib 53 | rm -rf test.dSYM 54 | rm test 55 | 56 | -------------------------------------------------------------------------------- /thumb2_disasm/Makefile-linux: -------------------------------------------------------------------------------- 1 | # export DYLD_LIBRARY_PATH=/usr/local/opt/capstone/lib/:$DYLD_LIBRARY_PATH 2 | 3 | CPPFLAGS = 4 | 5 | # all paths reference this development environment (vs. installed path) 6 | PATH_DEV = $(abspath $(shell pwd)/../..) 7 | PATH_API = $(PATH_DEV)/api 8 | PATH_CORE = $(PATH_DEV)/core 9 | PATH_PLUGINS = $(PATH_DEV)/ui/plugins 10 | 11 | # 12 | # debug stuff 13 | # 14 | 15 | # DEBUG_BUILD - files are compiled with debugging info, no optimizations (if you want to attach debugger) 16 | # DEBUG_DECOMP - code included to print decomposition/decode debugging info 17 | # DEBUG_DISASM - code included to print disassembling debugging info 18 | DEBUG_ALL ?= 19 | ifneq ($(DEBUG_ALL),) 20 | DEBUG_BUILD = 1 21 | DEBUG_DECOMP = 1 22 | DEBUG_DISASM = 1 23 | endif 24 | 25 | DEBUG_BUILD ?= 26 | ifneq ($(DEBUG_BUILD),) 27 | $(info DEBUG_BUILD is on!) 28 | CPPFLAGS += -g -O0 -DDEBUG_BUILD 29 | else 30 | CPPFLAGS += -O3 31 | endif 32 | 33 | DEBUG_DECOMP ?= 34 | ifneq ($(DEBUG_DECOMP),) 35 | $(info DEBUG_DECOMP is on!) 36 | CPPFLAGS += -DDEBUG_DECOMP 37 | endif 38 | 39 | DEBUG_DISASM ?= 40 | ifneq ($(DEBUG_DISASM),) 41 | $(info DEBUG_DISASM is on!) 42 | CPPFLAGS += -DDEBUG_DISASM 43 | endif 44 | 45 | 46 | CPPFLAGS += -I/usr/local/include -I../armv7 -std=c++11 47 | 48 | all: libthumb.so disassembler.o testadapt.so test 49 | 50 | # 51 | # shared objects (for direct use and testing) 52 | # 53 | libthumb.so: disassembler.o spec.o 54 | g++ -shared -fPIC $(CPPFLAGS) -o libthumb.so disassembler.o spec.o 55 | 56 | disassembler.o: disassembler.cpp disassembler.h spec.o 57 | g++ -fPIC $(CPPFLAGS) disassembler.cpp -c 58 | 59 | testadapt.so: testadapt.cpp disassembler.o 60 | g++ $(CPPFLAGS) \ 61 | -I$(PATH_API) \ 62 | -L$(PATH_PLUGINS) -larch_armv7 \ 63 | -L$(PATH_CORE) -lbinaryninjacore \ 64 | -L. -lthumb \ 65 | -lcapstone \ 66 | -shared -fPIC -o testadapt.so testadapt.cpp 67 | 68 | patchelf --add-needed libthumb.so testadapt.so 69 | patchelf --add-needed libcapstone.so testadapt.so 70 | patchelf --add-needed libarch_armv7.so testadapt.so 71 | patchelf --set-rpath /home/negasora/binaryninja/arch/thumb2:/home/negasora/binaryninja/arch/armv7 testadapt.so 72 | 73 | test: test.cpp disassembler.o 74 | g++ $(CPPFLAGS) -L../../core -lbinaryninjacore libthumb.so -lcapstone -o test test.cpp testadapt.so 75 | 76 | spec.o: spec.cpp 77 | g++ -fPIC $(CPPFLAGS) spec.cpp -c 78 | 79 | spec.cpp: generator.py ./arm_pcode_parser/codegencpp.py spec.txt 80 | ./generator.py spec_misc.txt 81 | 82 | forcegen: 83 | echo '' > spec.cpp 84 | ./generator.py spec_misc.txt 85 | 86 | # 87 | # generated stuff 88 | # 89 | clean: 90 | rm *.o *.so 91 | rm -rf test.dSYM 92 | rm test 93 | 94 | -------------------------------------------------------------------------------- /thumb2_disasm/README.md: -------------------------------------------------------------------------------- 1 | # ARM Thumb Decomposer/Disassembler 2 | This is a disassembler for ARM Thumb (mixed 16-bit and 32-bit). Currently, it's scope does not contain Thumb2 or ThumbEE. 3 | 4 | # Terms 5 | I'm using "Decomposer" to mean that instruction data is analyzed and a useful description of that instruction data is produced. 6 | More specific, an "instruction info" struct is created, capturing information about the instruction like its source registers and such. 7 | I'm using "Disassembler" to mean that this "instruction info" struct from the decompose stage can be parsed to generated a human readable string that we commonly associate with disassembly. 8 | This contains the instruction mnemonic and operands and any annotations (like the S suffix or condition flag). 9 | 10 | # High Level Strategy 11 | Capture as much as possible from the specification (ARM Architecture Reference Manual, ARMv7-A and ARMv7-R edition). 12 | Currently that's being done in spec.graph. 13 | Then, parse that information (see process.py) into C source (see generated.c). 14 | Finally, add another source file that calls into generated.c to interface with the rest of Binary Ninja (haven't look at this yet). 15 | 16 | # Lower Level Strategy 17 | The tables of instructions become nodes in a graph. 18 | When one table refers to another, that's an edge to another table. 19 | And when a table holds only references to instruction encodings, it's a terminal node. 20 | So decomposing/disassembling is traversing the graph from root to tip. 21 | The intermediate language used to capture this table/node info kind of gets into a tradeoff game. 22 | On one hand, I want to be able to copy/paste as much as possible from the spec. 23 | On the other, I want the language to be simple enough that I don't need to recall anything from CompSci to write a simple parser for it. 24 | The parser can be written in a nice easy language too; here, python. 25 | 26 | # How To Actually Generate? 27 | Just run generator.py. It will read spec.txt and write spec.cpp. 28 | 29 | # Notes 30 | - '.n' and '.w' qualifier/specifier select the narrow and wide encodings 31 | - 's' suffix on instructions means it updates the flags 32 | - cmp,cmn,tst,teq are result-less forms of subs,adds,ands,eors, but only update flags (but don't require the extra 's' suffix) 33 | - there are 4 flags N,Z,C,V for negative,zero,carry,overflow 34 | - the 'c' on b is a conditional execution code (14 total) that test the flags 35 | - code can be 'eq', 'ne', 'cs'/'hs', 'cc'/'lo', 'mi', 'pl', 'vs', 'vc', 'hi', 'ls', 'ge', 'lt', 'gt', 'le', 'al'/'' 36 | 37 | -------------------------------------------------------------------------------- /thumb2_disasm/arch_thumb2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vector35/arch-armv7/00cf57aec5e12169dc2ee3916a5fbaf4188a63e0/thumb2_disasm/arch_thumb2.h -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/Makefile: -------------------------------------------------------------------------------- 1 | all: parse.py 2 | 3 | parse.py: pcode.ebnf 4 | grako --name pcode pcode.ebnf -o parse.py 5 | 6 | clean: 7 | rm parse.py *.pyc 8 | -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/README.md: -------------------------------------------------------------------------------- 1 | # goal 2 | translate the pseudocode for arm instructions (given in the docs) to target languages 3 | 4 | once the pcode is extracted, automatic generation of ultra-accurate disassemblers should become possible 5 | 6 | # how 7 | use Grako parser generator, describe the language (pcode.ebnf) and write code generator (codegen.py) 8 | 9 | # example 10 | input statement: 11 | ``` 12 | if n == 15 || BitCount(registers) < 2 || (P == '1' && M == '1') then UNPREDICTABLE 13 | ``` 14 | 15 | output parse tree: 16 | ``` 17 | [ 18 | "if", 19 | [ 20 | "n", 21 | [], 22 | [ 23 | "==", 24 | [ 25 | "15", 26 | [] 27 | ], 28 | "||", 29 | [ 30 | "BitCount(", 31 | [ 32 | "registers", 33 | [] 34 | ], 35 | ")" 36 | ], 37 | "<", 38 | [ 39 | "2", 40 | [] 41 | ], 42 | "||", 43 | [ 44 | "(", 45 | "P", 46 | [], 47 | [ 48 | "==", 49 | [ 50 | "'1'", 51 | [] 52 | ], 53 | "&&", 54 | [ 55 | "M", 56 | [] 57 | ], 58 | "==", 59 | [ 60 | "'1'", 61 | [] 62 | ] 63 | ], 64 | [], 65 | ")" 66 | ] 67 | ] 68 | ], 69 | "then", 70 | "UNPREDICTABLE" 71 | ] 72 | ``` 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/codegencpp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import re 4 | import os 5 | import sys 6 | 7 | from parse import pcodeParser, pcodeSemantics 8 | 9 | DEBUG = 0 10 | 11 | ############################################################################### 12 | # misc utils 13 | ############################################################################### 14 | 15 | # convert "MOV (register)" text to the function that handles it 16 | # ->"mov_register" 17 | def convertHandlerName(name): 18 | # non-word chars to underscore 19 | name = re.sub(r'[^\w]', '_', name) 20 | # no leading or trailing underscore 21 | name = re.sub(r'^_*(.*?)_*$', r'\1', name) 22 | # no multiple underscore runs 23 | name = re.sub(r'_+', '_', name) 24 | # lowercase 25 | name = name.lower() 26 | return name 27 | 28 | def applyIndent(text, level=0): 29 | savedTrailingWhitespace = '' 30 | while len(text)>0 and text[-1].isspace(): 31 | savedTrailingWhitespace = text[-1] + savedTrailingWhitespace 32 | text = text[0:-1] 33 | text = text.rstrip() 34 | spacer = '\t' * level 35 | lines = text.split('\n') 36 | lines = map(lambda s: '%s%s' % (spacer, s), lines) 37 | return '\n'.join(lines) + savedTrailingWhitespace 38 | 39 | ############################################################################### 40 | # "better" nodes ... cleaned up AST nodes that can eval() themselves 41 | ############################################################################### 42 | 43 | class BetterNode(object): 44 | def __init__(self, name, children=[], semicolon=False): 45 | self.name = name 46 | self.children = children 47 | self.semicolon = semicolon 48 | 49 | def gen(self, extra=''): 50 | # leaf nodes (no possible descent) 51 | if self.name == 'ident': 52 | tmp = (self.children[0] + extra).replace('.', '_') 53 | if not tmp.startswith('FIELD_'): 54 | tmp = 'FIELD_' + tmp 55 | code = 'res->fields[%s]' % tmp 56 | elif self.name == 'rawtext': 57 | code = self.children[0] 58 | elif self.name == 'number': 59 | code = self.children[0] 60 | elif self.name == 'bits': 61 | code = '0x%X' % int(self.children[0], 2) 62 | elif self.name == 'see': 63 | code = '\nmemset(res, 0, sizeof(*res));' 64 | code = '\nreturn %s(req, res);' % self.children[0] 65 | self.semicolon = 0 66 | else: 67 | subCode = map(lambda x: x.gen(), self.children) 68 | subCode = tuple(subCode) 69 | 70 | # binary operations translate directly to C 71 | if self.name == 'xor': 72 | assert len(self.children) == 2 73 | code = '(%s) ^ (%s)' % subCode 74 | elif self.name == 'add': 75 | assert len(self.children) == 2 76 | code = '(%s) + (%s)' % subCode 77 | elif self.name == 'sub': 78 | assert len(self.children) == 2 79 | code = '(%s) - (%s)' % subCode 80 | elif self.name == 'less_than': 81 | assert len(self.children) == 2 82 | code = '(%s) < (%s)' % subCode 83 | elif self.name == 'greater_than': 84 | assert len(self.children) == 2 85 | code = '(%s) > (%s)' % subCode 86 | elif self.name == 'log_and': 87 | assert len(self.children) == 2 88 | code = '(%s) && (%s)' % subCode 89 | elif self.name == 'log_or': 90 | assert len(self.children) == 2 91 | code = '(%s) || (%s)' % subCode 92 | elif self.name == 'log_not': 93 | assert len(self.children) == 1 94 | code = '!(%s)' % subCode 95 | elif self.name == 'equals': 96 | assert len(self.children) == 2 97 | code = '(%s) == (%s)' % subCode 98 | elif self.name == 'not_equals': 99 | assert len(self.children) == 2 100 | code = '(%s) != (%s)' % subCode 101 | elif self.name == 'less_than_or_equals': 102 | assert len(self.children) == 2 103 | code = '(%s) <= (%s)' % subCode 104 | elif self.name == 'greater_than_or_equals': 105 | assert len(self.children) == 2 106 | code = '(%s) >= (%s)' % subCode 107 | elif self.name == 'mul': 108 | assert len(self.children) == 2 109 | code = '(%s) * (%s)' % subCode 110 | elif self.name == 'div': 111 | assert len(self.children) == 2 112 | code = '((%s) ? ((%s) / (%s)) : 0)' % (subCode[1], subCode[0], subCode[1]) 113 | elif self.name == 'xor': 114 | assert len(self.children) == 2 115 | code = '(%s) ^ (%s)' % subCode 116 | elif self.name == 'shl': 117 | assert len(self.children) == 2 118 | code = '(%s) << (%s)' % subCode 119 | elif self.name == 'rshl': 120 | assert len(self.children) == 2 121 | code = '(%s) >> (%s)' % subCode 122 | 123 | # function calls to helpers 124 | elif self.name == 'BitCount': 125 | assert len(self.children) == 1 126 | code = 'BitCount(%s)' % subCode 127 | elif self.name == 'BadReg': 128 | code = 'BadReg(%s)' % subCode 129 | elif self.name == 'Consistent': 130 | assert self.children[0].name == 'ident' 131 | var = self.children[0].gen() 132 | varCheck = self.children[0].gen('_check') 133 | code = '(%s == %s)' % (var, varCheck) 134 | elif self.name == 'DecodeImmShift': 135 | codeA = 'DecodeImmShift_shift_t(%s, %s)' % subCode 136 | codeB = 'DecodeImmShift_shift_n(%s, %s)' % subCode 137 | code = codeA + ';\n' + codeB 138 | elif self.name == 'ThumbExpandImm': 139 | codeA = 'ThumbExpandImm_C_imm32(%s, req->carry_in)' % subCode 140 | # see A6.3.2 ThumbExpandImm_C() for explanation 141 | #codeB = ' if(((%s & 0xC00)==0) && ((%s & 0x300)==1||(%s & 0x300)==2) && (%s & 0xFF)==0) { res->flags |= FLAG_UNPREDICTABLE; }' % tuple([subCode]*4) 142 | codeB = '/* TODO: handle ThumbExpandImm_C\'s possible setting of UNPREDICTABLE */ while(0)' 143 | code = codeA + ';\n' + codeB 144 | elif self.name == 'ThumbExpandImm_C': 145 | codeA = 'ThumbExpandImm_C_imm32(%s, %s)' % subCode 146 | codeB = 'ThumbExpandImm_C_cout(%s, %s)' % subCode 147 | # codeC = ' if(((%s & 0xC00)==0) && ((%s & 0x300)==1||(%s & 0x300)==2) && (%s & 0xFF)==0) { res->flags |= FLAG_UNPREDICTABLE; }' % tuple([subCode]*4) 148 | codeC = '/* TODO: handle ThumbExpandImm_C\'s possible setting of UNPREDICTABLE */ while(0)' 149 | code = codeA + ';\n' + codeB + ';\n' + codeC 150 | elif self.name == 'AdvSIMDExpandImm': 151 | code = "AdvSIMDExpandImm(%s, %s, %s, %s)" % subCode 152 | elif self.name == 'VFPExpandImm': 153 | code = "VFPExpandImm(%s, %s, %s)" % subCode 154 | elif self.name == 'UInt': 155 | code = '(%s)' % subCode[0] 156 | elif self.name == 'ZeroExtend': 157 | assert subCode[1] == '32' 158 | # zero extend is default when assigned to uint32_t 159 | # (which is type of fields[] array) 160 | code = '%s' % subCode[0] 161 | elif self.name == 'Zeros': 162 | code = '/* %s-bit */ 0' % subCode 163 | elif self.name == 'InITBlock': 164 | code = 'req->inIfThen == IFTHEN_YES' 165 | elif self.name == 'LastInITBlock': 166 | code = 'req->inIfThenLast == IFTHENLAST_YES' 167 | elif self.name == 'ArchVersion': 168 | code = 'req->arch' 169 | elif self.name == 'CurrentInstrSet': 170 | code = 'req->instrSet' 171 | elif self.name == 'SignExtend': 172 | code = 'SignExtend(%s,%s)' % (subCode[0], self.children[0].getWidth()) 173 | elif self.name == 'NOT': 174 | code = '(~(%s) & 1)' % subCode 175 | elif self.name == 'IsSecure': 176 | code = "req->arch & ARCH_SECURITY_EXTENSIONS /* || SCR.NS=='0' || CPSR.M=='10110' */" 177 | elif self.name == 'bitslice': 178 | if len(subCode) == 2: 179 | # then there is a single bit to extract 180 | shamt = int(subCode[1]) 181 | if shamt: 182 | code = '((%s >> %d) & 1)' % (subCode[0], shamt) 183 | else: 184 | code = '(%s & 1)' % subCode[0] 185 | else: 186 | # there is a bit range to extract, [hi,lo] 187 | hi = int(subCode[1]) 188 | lo = int(subCode[2]) 189 | assert hi > lo 190 | width = hi-lo+1 # spec's convention is to include the endpoints 191 | if lo: 192 | code = '((%s >> %d) & 0x%X)' % (subCode[0], lo, 2**width-1) 193 | else: 194 | code = '(%s & 0x%X)' % (subCode[0], 2**width-1) 195 | 196 | # if else 197 | elif self.name == 'if': 198 | if len(subCode) == 2: 199 | code = 'if(%s) {\n' % subCode[0] 200 | code += '\t%s\n' % '\n\t'.join(subCode[1].split('\n')) 201 | code += '}' 202 | elif len(subCode) == 3: 203 | code = 'if(%s) {\n' % subCode[0] 204 | code += '\t%s\n' % '\n\t'.join(subCode[1].split('\n')) 205 | code += '}\n' 206 | code += 'else {\n' 207 | code += '\t%s\n' % '\n\t'.join(subCode[2].split('\n')) 208 | code += '}' 209 | # tuples 210 | elif self.name == 'tuple': 211 | code = '\n'.join(subCode) 212 | 213 | # registers eg "registers" 214 | # this is tough 'cause two different types of code are generated 215 | # depending on whether this is being read or written 216 | # we generate read code here and let assignment override it 217 | elif self.name == 'registers': 218 | bitIdxer = self.children[0].gen() 219 | code = '(res->fields[FIELD_registers] & (1<<%s)) >> %s' % (bitIdxer, bitIdxer) 220 | 221 | elif self.name == 'cond': 222 | assert self.children[0].name == 'number' 223 | assert self.children[1].name == 'number' 224 | bitHi = int(self.children[0].gen()) 225 | bitLo = int(self.children[1].gen()) 226 | mask = (2**(bitHi+1)-1) - (2**bitLo-1) 227 | code = '(res->fields[FIELD_cond] & 0x%X) >> %d' % (mask, bitLo) 228 | 229 | # other 230 | elif self.name == 'dummy': 231 | code = '' 232 | elif self.name == 'nop': 233 | code = 'while(0)' 234 | elif self.name == 'Unpredictable': 235 | code = 'res->flags |= FLAG_UNPREDICTABLE' 236 | elif self.name == 'Undefined': 237 | code = 'res->status |= STATUS_UNDEFINED' 238 | elif self.name == 'not_permitted': 239 | code = 'res->flags |= FLAG_NOTPERMITTED' 240 | elif self.name == 'assign': 241 | codeLines = [] 242 | 243 | (lhs, rhs) = self.children 244 | 245 | # special case: tuple 246 | if lhs.name == 'tuple': 247 | rhsCode = rhs.gen() 248 | #codeLines.append("// RHS before split: %s" % (repr(rhsCode))) 249 | rhsCode = re.split(r'[\n;]+', rhsCode) 250 | lhsCode = lhs.gen() 251 | lhsCode = re.split(r'[\n;]+', lhsCode) 252 | #codeLines.append("// LHS: %s RHS: %s" % (repr(lhsCode), repr(rhsCode))) 253 | for (i, dest) in enumerate(lhsCode): 254 | if not dest: # dummy generates '' 255 | continue 256 | codeLines.append('%s = %s' % (dest, rhsCode[i])) 257 | if dest.startswith('res->fields'): 258 | fieldName = dest[dest.index('[') + 1 : dest.index(']')] 259 | codeLines.append('res->fields_mask[%s >> 6] |= 1LL << (%s & 63)' % (fieldName, fieldName)) 260 | # any other statements not assigned to variables continue on 261 | for codeLine in rhsCode[len(lhsCode):]: 262 | codeLines.append(codeLine) 263 | 264 | # special case: a bit (eg: "registers = 1") 265 | elif lhs.name == 'registers': 266 | bitIdxer = lhs.children[0].gen() 267 | rhsBits = rhs.gen() 268 | codeLines.append('res->fields[FIELD_registers] |= (%s << %s)' % (rhsBits, bitIdxer)) 269 | codeLines.append('res->fields_mask[FIELD_registers >> 6] |= 1LL << (FIELD_registers & 63)') 270 | 271 | else: 272 | codeLines.append('%s = %s' % subCode) 273 | if subCode[0].startswith('res->fields'): 274 | fieldName = subCode[0][subCode[0].index('[') + 1 : subCode[0].index(']')] 275 | codeLines.append('res->fields_mask[%s >> 6] |= 1LL << (%s & 63)' % (fieldName, fieldName)) 276 | 277 | code = ';\n'.join(codeLines) 278 | 279 | elif self.name == 'group': 280 | code = '(%s)' % subCode 281 | elif self.name == 'concat': 282 | bitsPushing = 0 283 | varsPushing = [] 284 | pieces = [] 285 | 286 | for child in reversed(self.children): 287 | # calculate shift amount for this piece 288 | shContributers = [] 289 | if bitsPushing: 290 | shContributers += [str(bitsPushing)] 291 | if varsPushing: 292 | shContributers += varsPushing 293 | 294 | # join them into an expression 295 | shAmt = '' 296 | if len(shContributers) == 1: 297 | shAmt = shContributers[0] 298 | elif len(shContributers) > 1: 299 | shAmt = '(%s)' % '+'.join(shContributers) 300 | 301 | # generate code 302 | if shAmt: 303 | pieces.append('(%s<<%s)' % (child.gen(), shAmt)) 304 | else: 305 | pieces.append('(%s)' % child.gen()) 306 | 307 | # adjust shift amounts for next pieces 308 | if child.name == 'ident': 309 | # if ident is of special form, we know the width (eg: "imm12" has width 12) 310 | m = re.match(r'^[a-zA-Z]+(\d+)$', child.children[0]) 311 | if m: 312 | varsPushing.append(str(m.group(1))) 313 | # else, we rely on a _width variable being present 314 | else: 315 | varsPushing.append(child.children[0]+'_width') 316 | elif child.name == 'bits': 317 | bitsPushing += len(child.children[0]) 318 | else: 319 | raise Exception('concat cannot handle child type %s' % child.name) 320 | 321 | pieces.reverse() 322 | code = '|'.join(pieces) 323 | 324 | # failure 325 | else: 326 | raise Exception("dunno what to do with op %s" % self.name) 327 | 328 | 329 | if self.semicolon: 330 | code += ';' 331 | 332 | return code 333 | 334 | def getWidth(self): 335 | if self.name == 'concat': 336 | bitsPushing = 0 337 | varsPushing = [] 338 | pieces = [] 339 | 340 | contributors = [] 341 | 342 | for child in reversed(self.children): 343 | # adjust shift amounts for next pieces 344 | if child.name == 'ident': 345 | # if ident is of special form, we know the width (eg: "imm12" has width 12) 346 | m = re.match(r'^[a-zA-Z]+(\d+)$', child.children[0]) 347 | if m: 348 | contributors.append(str(m.group(1))) 349 | # else, we rely on a _width variable being present 350 | else: 351 | contributors.append(child.children[0]+'_width') 352 | elif child.name == 'bits': 353 | contributors += '%s' % len(child.children[0]) 354 | else: 355 | raise Exception('cannot get length of concat child %s' % child.name) 356 | 357 | return '+'.join(contributors) 358 | 359 | else: 360 | raise Exception("trying to get width for %s" % str(self)) 361 | 362 | def __str__(self): 363 | buf = '%s(' % self.name 364 | buf += ','.join(map(str, self.children)) 365 | buf += ')' 366 | return buf 367 | 368 | 369 | ############################################################################### 370 | # delegate class that the parser calls after each rule is done 371 | # (replaces PcodeSemantics in parse.py) 372 | # note that arguments to the production rules end up arriving here 373 | ############################################################################### 374 | 375 | class PcodeSemantics(object): 376 | 377 | def start(self, ast): 378 | return ast 379 | 380 | def statement(self, ast): 381 | rv = None 382 | 383 | if ast == 'UNPREDICTABLE': 384 | rv = BetterNode('Unpredictable', [], True) 385 | elif ast == 'UNDEFINED': 386 | rv = BetterNode('Undefined', [], True) 387 | elif ast == 'NOT_PERMITTED': 388 | rv = BetterNode('not_permitted', [], True) 389 | elif ast[0] == 'SEE': 390 | assert len(ast)==2 391 | handler = convertHandlerName(ast[1]) 392 | rv = BetterNode('see', [handler], True) 393 | elif ast in [u'NOP', u'nop']: 394 | rv = BetterNode('nop', [], True) 395 | elif ast[0] == 'if': 396 | children = None 397 | 398 | if len(ast) == 5: 399 | antecedent = ast[1] 400 | assert ast[2] == 'then' 401 | consequent = ast[3] 402 | consequent.semicolon = True 403 | otherwise = None 404 | if ast[4] != []: 405 | assert ast[4][0][0] == 'else' 406 | otherwise = ast[4][0][1] 407 | if otherwise: 408 | children = [antecedent, consequent, otherwise] 409 | else: 410 | children = [antecedent, consequent] 411 | else: 412 | raise Exception('malformed ast for if: ', str(ast)) 413 | 414 | rv = BetterNode('if', children) 415 | 416 | elif ast[1] == '=': 417 | # simple assignments like 'foo = 5' 418 | if len(ast) == 3: 419 | rv = BetterNode('assign', [ast[0], ast[2]], True) 420 | # long assignments like 'foo = if bar == 3 then 1 else 2' 421 | elif len(ast) == 8: 422 | lval = ast[0] 423 | assert ast[2] == 'if' 424 | cond = ast[3] 425 | assert ast[4] == 'then' 426 | trueVal = ast[5] 427 | assert ast[6] == 'else' 428 | falseVal = ast[7] 429 | 430 | trueBlock = BetterNode('assign', [lval, trueVal], True) 431 | falseBlock = BetterNode('assign', [lval, falseVal], True) 432 | 433 | rv = BetterNode('if', [cond, trueBlock, falseBlock]) 434 | else: 435 | raise Exception('dunno what to do in statement semantics, ast is:', ast) 436 | 437 | global DEBUG 438 | if DEBUG: 439 | print("statement: returning", str(rv)) 440 | 441 | return rv 442 | 443 | def tuple(self, ast): 444 | rv = None 445 | 446 | # ast[0] is the '(' 447 | # ast[1] is the initial tuple token 448 | initChild = ast[1] 449 | if initChild == '-': 450 | initChild = BetterNode('dummy') 451 | 452 | rv = BetterNode('tuple', [initChild]) 453 | closure = ast[2] 454 | for i in closure: 455 | assert i[0]==',' 456 | if i[1] == '-': 457 | rv.children.append(BetterNode('dummy')) 458 | else: 459 | rv.children.append(i[1]) 460 | 461 | global DEBUG 462 | if DEBUG: 463 | print("tuple: returning", str(rv)) 464 | 465 | return rv 466 | 467 | def expr0(self, ast): 468 | rv = None 469 | 470 | if type(ast) == type([]): 471 | lookup = {'EOR':'xor', '+':'add', '-':'sub', 472 | '&&':'log_and', '||':'log_or' } 473 | 474 | cur = ast[0] 475 | closure = ast[1] 476 | 477 | for i in closure: 478 | op = i[0] 479 | nodeName = lookup[op] 480 | 481 | cur = BetterNode(nodeName, [cur, i[1]]) 482 | 483 | rv = cur 484 | else: 485 | rv = ast 486 | 487 | global DEBUG 488 | if DEBUG: 489 | print("expr0: returning", str(rv)) 490 | 491 | return rv 492 | 493 | def expr1(self, ast): 494 | rv = ast 495 | 496 | if type(ast) == type([]): 497 | lookup = {'*':'mul', '/':'div', 'XOR':'xor', 'DIV':'div', '==':'equals', '!=':'not_equals', 498 | '<':'less_than', '>':'greater_than', '<<':'shl', '>>':'rshl', 499 | '>=':'greater_than_or_equals', '<=':'less_than_or_equals'} 500 | 501 | cur = ast[0] 502 | closure = ast[1] 503 | 504 | for i in closure: 505 | op = i[0] 506 | nodeName = lookup[op] 507 | 508 | cur = BetterNode(nodeName, [cur, i[1]]) 509 | 510 | rv = cur 511 | else: 512 | rv = ast 513 | 514 | global DEBUG 515 | if DEBUG: 516 | print("expr1: returning", str(rv)) 517 | 518 | return rv 519 | 520 | def expr2(self, ast): 521 | rv = ast 522 | 523 | global DEBUG 524 | if DEBUG: 525 | print("expr2: returning", rv) 526 | 527 | return rv 528 | 529 | def expr3(self, ast): 530 | rv = 'BLUNDER' 531 | 532 | if type(ast) == type([]): 533 | #print('ast is: ', ast) 534 | 535 | # empty closure, return original 536 | if len(ast)==2 and ast[1]==[]: 537 | rv = ast[0] 538 | elif len(ast)>1: 539 | if ast[0] == '(': 540 | rv = BetterNode('group', [ast[1]]) 541 | elif ast[0] == '!': 542 | rv = BetterNode('log_not', [ast[1]]) 543 | elif type(ast[1]==[]): 544 | closure = ast[1] 545 | assert closure[0][0] == ':' 546 | bn = BetterNode('concat', [ast[0], closure[0][1]]) 547 | closure = closure[1:] 548 | for i in closure: 549 | assert i[0] == ':' 550 | bn.children.append(i[1]) 551 | rv = bn 552 | else: 553 | raise Exception("expr3(): unexpected ast: " + str(ast)) 554 | 555 | else: 556 | rv = ast 557 | 558 | global DEBUG 559 | if DEBUG: 560 | print("expr3: returning", str(rv)) 561 | 562 | return rv 563 | 564 | # 565 | def number(self, ast): 566 | # ast is just X where X is the number itself 567 | rv = BetterNode('number', [str(ast)]) 568 | 569 | global DEBUG 570 | if DEBUG: 571 | print("number: returning", str(rv)) 572 | 573 | return rv 574 | 575 | def bits(self, ast): 576 | rv = BetterNode('bits', [str(ast[1:-1])]) 577 | 578 | global DEBUG 579 | if DEBUG: 580 | print("bits: returning", str(rv)) 581 | 582 | return rv 583 | 584 | def ident(self, ast): 585 | #print('input ast is: ', str(ast)) 586 | 587 | # "foo" has ast ['foo', []] 588 | # "foo<3>" has ast ['foo', [['<', BetterNode(3), '>']]] 589 | # "foo<3,5>" has ast 590 | 591 | rv = BetterNode('ident', [str(ast)]) 592 | 593 | global DEBUG 594 | if DEBUG: 595 | print("ident: returning", rv) 596 | 597 | return rv 598 | 599 | def sliceable(self, ast): 600 | #print(ast) 601 | 602 | m = re.match(r'^(.*)<$', ast[0]) 603 | if not m: 604 | raise Exception('malformed sliceable statement') 605 | ident = BetterNode('ident', [m.group(1)]) 606 | 607 | if len(ast)==3: 608 | #print(str([m.group(1), ast[1]])) 609 | return BetterNode('bitslice', [ident, ast[1]]) 610 | elif len(ast)==5: 611 | return BetterNode('bitslice', [ident, ast[1], ast[3]]) 612 | else: 613 | raise Exception("sliceable confused by: %s" % str(ast)) 614 | 615 | def builtin_value(self, ast): 616 | lookup = {'FALSE':'0', 'TRUE':'1', 'SRType_LSL':'0', 'SRType_LSR':'1', 617 | 'SRType_ASR':'2', 'SRType_ROR':'3', 'SRType_RRX':'4', 618 | 'ARM_GRP_INVALID':0, 'ARM_GRP_JUMP':1, 'ARM_GRP_CRYPT':128, 619 | 'ARM_GRP_DATABARRIER':129, 'ARM_GRP_DIVIDE':130, 'ARM_GRP_FPARMV8':131, 620 | 'ARM_GRP_MULTPRO':132, 'ARM_GRP_NEON':133, 'ARM_GRP_T2EXTRACTPACK':134, 621 | 'ARM_GRP_THUMB2DSP':135, 'ARM_GRP_TRUSTZONE':136, 'ARM_GRP_V4T':137, 622 | 'ARM_GRP_V5T':138, 'ARM_GRP_V5TE':139, 'ARM_GRP_V6':140, 623 | 'ARM_GRP_V6T2':141, 'ARM_GRP_V7':142, 'ARM_GRP_V8':143, 624 | 'ARM_GRP_VFP2':144, 'ARM_GRP_VFP3':145, 'ARM_GRP_VFP4':146, 625 | 'ARM_GRP_ARM':147, 'ARM_GRP_MCLASS':148, 'ARM_GRP_NOTMCLASS':149, 626 | 'ARM_GRP_THUMB':150, 'ARM_GRP_THUMB1ONLY':151, 'ARM_GRP_THUMB2':152, 627 | 'ARM_GRP_PREV8':153, 'ARM_GRP_FPVMLX':154, 'ARM_GRP_MULOPS':155, 628 | 'ARM_GRP_CRC':156, 'ARM_GRP_DPVFP':157, 'ARM_GRP_V6M':158} 629 | 630 | # directly to numbers 631 | if ast[0] == 'registers<': 632 | assert ast[2] == '>' 633 | rv = BetterNode('registers', [ast[1]]) 634 | elif ast[0] == 'cond<': 635 | assert ast[2] == ':' 636 | assert ast[4] == '>' 637 | rv = BetterNode('cond', [ast[1], ast[3]]) 638 | elif ast == 'InstrSet_ThumbEE': 639 | rv = BetterNode('rawtext', ['INSTRSET_THUMBEE']) 640 | elif type(ast) == type(u'foo'): 641 | rv = BetterNode('number', [lookup[ast]]) 642 | else: 643 | raise Exception("builtin_value doesn't know how to handle ", ast) 644 | 645 | global DEBUG 646 | if DEBUG: 647 | print("builtin_value: returning", rv) 648 | 649 | return rv 650 | 651 | def func_call(self, ast): 652 | funcName = 'BLUNDER' 653 | args = [] 654 | rv = None 655 | 656 | # function without arguments 657 | if type(ast) == type(u'x'): 658 | funcName = ast[:-2] 659 | # function with arguments 660 | elif type(ast) == type([]): 661 | funcName = str(ast[0][:-1]) 662 | args = filter(lambda x: x!=',', ast[1:-1]) 663 | 664 | rv = BetterNode(funcName, args) 665 | 666 | global DEBUG 667 | if DEBUG: 668 | print("func_call: returning", rv) 669 | 670 | return rv 671 | 672 | ############################################################################### 673 | # function for library consumers 674 | ############################################################################### 675 | 676 | # take as input a single pcode statement 677 | def gen(pcode, rule='start', comments=True): 678 | 679 | # strip trailing whitespace or semicolons 680 | while pcode[-1] in [' ', '\t', ';']: 681 | pcode = pcode[0:-1] 682 | 683 | code = '' 684 | parser = pcodeParser(parseInfo=False) 685 | if comments: 686 | code = '/* pcode: %s */\n' % pcode 687 | tree = parser.parse(pcode, rule_name=rule, semantics=PcodeSemantics()) 688 | code += tree.gen() 689 | return code 690 | 691 | # take as input multiple pcode statements (separated by ";\n") 692 | def genBlock(pcode, comments=True): 693 | # 694 | result = [] 695 | 696 | # split on newlines 697 | lines = pcode.split('\n') 698 | 699 | # if there are multiple statements on a line, split them into multiple 700 | # lines, preserving the leading whitespace 701 | tmp = [] 702 | for l in lines: 703 | if not l or l.isspace(): 704 | continue 705 | 706 | if l.count(';') <= 1: 707 | tmp.append(l.replace(';', '')) 708 | continue 709 | 710 | m = re.match(r'^(\s*)(.*)$', l) 711 | leadSpace = m.group(1) 712 | for statement in m.group(2).split(';'): 713 | if not statement or statement.isspace(): 714 | continue 715 | m2 = re.match(r'^(\s*)(.*)$', statement) 716 | tmp.append(leadSpace + m2.group(2)) 717 | 718 | lines = tmp 719 | 720 | if 0: 721 | print('after mass-lining:') 722 | print('\n'.join(lines)) 723 | 724 | # generate for each line, picking out case/when statements 725 | (caseVar, indent) = (None, 0) 726 | 727 | for l in lines: 728 | #print('line is: -%s-' % l) 729 | if l[0:5] == 'case ': 730 | m = re.match(r'^case (.*) of', l) 731 | result.append('/* pcode: %s */' % l.lstrip()) 732 | (caseVar, indent) = (m.group(1), 1) 733 | 734 | elif l[0:6] == '\twhen ' or l[0:9] == ' when ': 735 | keywords = 'else\nif' 736 | 737 | if indent == 1: 738 | # then we just started the "case ..." 739 | keywords = 'if' 740 | elif indent == 2: 741 | result.append('}') 742 | indent = 1 743 | else: 744 | raise Exception('expect "when" with 1 or 2 tab') 745 | 746 | m = re.match(r'^\s+when (.*)', l) 747 | clause = gen(m.group(1), 'expr0', False) 748 | result.append('/* pcode: %s */' % l.lstrip()) 749 | result.append('%s(res->fields[FIELD_%s] == %s) {' % (keywords, caseVar, clause)) 750 | 751 | indent = 2 752 | 753 | elif l[0:2] == '\t\t' or l[0:8] == ' ': 754 | if indent != 2: 755 | raise Exception('unexpected indent, is it under a "when" ?') 756 | m = re.match(r'^\s+(.*)', l) 757 | code = gen(m.group(1)) 758 | code = applyIndent(code, 1) 759 | result.append(code) 760 | 761 | else: 762 | if indent > 0: 763 | result.append('}') 764 | (caseVar, indent) = (None, 0) 765 | code = gen(l) 766 | result.append(code) 767 | 768 | return '\n'.join(result) 769 | 770 | ############################################################################### 771 | # main 772 | ############################################################################### 773 | 774 | testTarget = None 775 | 776 | if __name__ == '__main__': 777 | if len(sys.argv) > 1 and os.path.isfile(sys.argv[1]): 778 | fp = open(sys.argv[1], 'r') 779 | stuff = fp.read() 780 | fp.close() 781 | 782 | print(genBlock(stuff)) 783 | sys.exit(0) 784 | else: 785 | DEBUG = 1 786 | statement = sys.argv[1] 787 | 788 | parser = pcodeParser(parseInfo=False) 789 | ast = parser.parse(statement, rule_name='start', semantics=PcodeSemantics()) 790 | print('true abstract syntax tree:') 791 | print(ast) 792 | print('generated code:') 793 | print(ast.gen()) 794 | -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # the docs will use a strange angled single quote, and when copy pasted 4 | # it shows up at by sequence \xe2\x80\x98 or \xe2\x80\x99 and a similar 5 | # case for double quotes 6 | 7 | # this remedies that problem on a given file 8 | 9 | 10 | import os 11 | import sys 12 | 13 | print "filtering %s" % sys.argv[1] 14 | fp = open(sys.argv[1],'rb') 15 | buf = fp.read() 16 | fp.close() 17 | 18 | len0 = len(buf) 19 | print "file size before: %d\n" % len0 20 | buf = buf.replace("\xe2\x80\x98", "'") 21 | buf = buf.replace("\xe2\x80\x99", "'") 22 | buf = buf.replace("\xe2\x80\x9C", '"') 23 | buf = buf.replace("\xe2\x80\x9D", '"') 24 | len1 = len(buf) 25 | print "file size after: %d\n" % len1 26 | print "(%d stupid quotes replaced)" % ((len0-len1)/3) 27 | 28 | fp = open(sys.argv[1],'wb') 29 | fp.write(buf) 30 | fp.close() 31 | 32 | -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/parse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # CAVEAT UTILITOR 5 | # 6 | # This file was automatically generated by Grako. 7 | # 8 | # https://pypi.python.org/pypi/grako/ 9 | # 10 | # Any changes you make to it will be overwritten the next time 11 | # the file is generated. 12 | 13 | 14 | from __future__ import print_function, division, absolute_import, unicode_literals 15 | 16 | from grako.buffering import Buffer 17 | from grako.parsing import graken, Parser 18 | from grako.util import re, RE_FLAGS, generic_main # noqa 19 | 20 | 21 | KEYWORDS = {} 22 | 23 | 24 | class pcodeBuffer(Buffer): 25 | def __init__( 26 | self, 27 | text, 28 | whitespace=None, 29 | nameguard=None, 30 | comments_re=None, 31 | eol_comments_re=None, 32 | ignorecase=None, 33 | namechars='', 34 | **kwargs 35 | ): 36 | super(pcodeBuffer, self).__init__( 37 | text, 38 | whitespace=whitespace, 39 | nameguard=nameguard, 40 | comments_re=comments_re, 41 | eol_comments_re=eol_comments_re, 42 | ignorecase=ignorecase, 43 | namechars=namechars, 44 | **kwargs 45 | ) 46 | 47 | 48 | class pcodeParser(Parser): 49 | def __init__( 50 | self, 51 | whitespace=None, 52 | nameguard=None, 53 | comments_re=None, 54 | eol_comments_re=None, 55 | ignorecase=None, 56 | left_recursion=False, 57 | parseinfo=True, 58 | keywords=None, 59 | namechars='', 60 | buffer_class=pcodeBuffer, 61 | **kwargs 62 | ): 63 | if keywords is None: 64 | keywords = KEYWORDS 65 | super(pcodeParser, self).__init__( 66 | whitespace=whitespace, 67 | nameguard=nameguard, 68 | comments_re=comments_re, 69 | eol_comments_re=eol_comments_re, 70 | ignorecase=ignorecase, 71 | left_recursion=left_recursion, 72 | parseinfo=parseinfo, 73 | keywords=keywords, 74 | namechars=namechars, 75 | buffer_class=buffer_class, 76 | **kwargs 77 | ) 78 | 79 | @graken() 80 | def _start_(self): 81 | self._statement_() 82 | with self._optional(): 83 | self._token(';') 84 | self._check_eof() 85 | 86 | @graken() 87 | def _statement_(self): 88 | with self._choice(): 89 | with self._option(): 90 | self._token('if') 91 | self._expr0_() 92 | self._token('then') 93 | self._statement_() 94 | 95 | def block0(): 96 | self._token('else') 97 | self._statement_() 98 | self._closure(block0) 99 | with self._option(): 100 | self._token('UNPREDICTABLE') 101 | with self._option(): 102 | self._token('UNDEFINED') 103 | with self._option(): 104 | self._token('NOT_PERMITTED') 105 | with self._option(): 106 | self._token('NOP') 107 | with self._option(): 108 | self._token('nop') 109 | with self._option(): 110 | self._token('SEE') 111 | self._whatever_() 112 | with self._option(): 113 | self._tuple_() 114 | self._token('=') 115 | self._expr0_() 116 | with self._option(): 117 | self._ident_() 118 | self._token('=') 119 | self._token('if') 120 | self._expr0_() 121 | self._token('then') 122 | self._expr0_() 123 | self._token('else') 124 | self._expr0_() 125 | with self._option(): 126 | self._expr0_() 127 | self._token('=') 128 | self._expr0_() 129 | self._error('expecting one of: NOP NOT_PERMITTED UNDEFINED UNPREDICTABLE nop') 130 | 131 | @graken() 132 | def _tuple_(self): 133 | self._token('(') 134 | with self._group(): 135 | with self._choice(): 136 | with self._option(): 137 | self._token('-') 138 | with self._option(): 139 | self._expr0_() 140 | self._error('expecting one of: -') 141 | 142 | def block1(): 143 | self._token(',') 144 | with self._group(): 145 | with self._choice(): 146 | with self._option(): 147 | self._token('-') 148 | with self._option(): 149 | self._expr0_() 150 | self._error('expecting one of: -') 151 | self._positive_closure(block1) 152 | self._token(')') 153 | 154 | @graken() 155 | def _expr0_(self): 156 | with self._choice(): 157 | with self._option(): 158 | self._expr1_() 159 | 160 | def block0(): 161 | with self._group(): 162 | with self._choice(): 163 | with self._option(): 164 | self._token('EOR') 165 | with self._option(): 166 | self._token('+') 167 | with self._option(): 168 | self._token('-') 169 | with self._option(): 170 | self._token('&&') 171 | with self._option(): 172 | self._token('||') 173 | self._error('expecting one of: && + - EOR ||') 174 | self._expr1_() 175 | self._positive_closure(block0) 176 | with self._option(): 177 | self._expr1_() 178 | self._error('no available options') 179 | 180 | @graken() 181 | def _expr1_(self): 182 | with self._choice(): 183 | with self._option(): 184 | self._expr2_() 185 | 186 | def block0(): 187 | with self._group(): 188 | with self._choice(): 189 | with self._option(): 190 | self._token('*') 191 | with self._option(): 192 | self._token('/') 193 | with self._option(): 194 | self._token('<<') 195 | with self._option(): 196 | self._token('>>') 197 | with self._option(): 198 | self._token('DIV') 199 | with self._option(): 200 | self._token('XOR') 201 | self._error('expecting one of: * / << >> DIV XOR') 202 | self._expr2_() 203 | self._positive_closure(block0) 204 | with self._option(): 205 | self._expr2_() 206 | 207 | def block2(): 208 | with self._group(): 209 | with self._choice(): 210 | with self._option(): 211 | self._token('==') 212 | with self._option(): 213 | self._token('!=') 214 | with self._option(): 215 | self._token('<=') 216 | with self._option(): 217 | self._token('>=') 218 | with self._option(): 219 | self._token('<') 220 | with self._option(): 221 | self._token('>') 222 | self._error('expecting one of: != < <= == > >=') 223 | self._expr2_() 224 | self._positive_closure(block2) 225 | with self._option(): 226 | self._expr2_() 227 | self._error('no available options') 228 | 229 | @graken() 230 | def _expr2_(self): 231 | with self._choice(): 232 | with self._option(): 233 | self._func_call_() 234 | with self._option(): 235 | self._expr3_() 236 | self._error('no available options') 237 | 238 | @graken() 239 | def _expr3_(self): 240 | with self._choice(): 241 | with self._option(): 242 | self._builtin_value_() 243 | with self._option(): 244 | self._sliceable_() 245 | with self._option(): 246 | with self._group(): 247 | with self._choice(): 248 | with self._option(): 249 | self._ident_() 250 | with self._option(): 251 | self._number_() 252 | with self._option(): 253 | self._bits_() 254 | self._error('no available options') 255 | 256 | def block1(): 257 | self._token(':') 258 | with self._group(): 259 | with self._choice(): 260 | with self._option(): 261 | self._ident_() 262 | with self._option(): 263 | self._number_() 264 | with self._option(): 265 | self._bits_() 266 | self._error('no available options') 267 | self._closure(block1) 268 | with self._option(): 269 | self._tuple_() 270 | with self._option(): 271 | self._token('(') 272 | self._expr0_() 273 | self._token(')') 274 | with self._option(): 275 | self._token('!') 276 | self._expr0_() 277 | self._error('no available options') 278 | 279 | @graken() 280 | def _number_(self): 281 | self._pattern(r'\d+') 282 | 283 | @graken() 284 | def _bits_(self): 285 | self._pattern(r"'[01]+'") 286 | 287 | @graken() 288 | def _ident_(self): 289 | self._pattern(r'[a-zA-Z][\.\w]*') 290 | 291 | @graken() 292 | def _whatever_(self): 293 | self._pattern(r'.*') 294 | 295 | @graken() 296 | def _sliceable_(self): 297 | with self._choice(): 298 | with self._option(): 299 | self._token('index_align<') 300 | self._number_() 301 | self._token(':') 302 | self._number_() 303 | self._token('>') 304 | with self._option(): 305 | self._token('index_align<') 306 | self._number_() 307 | self._token('>') 308 | with self._option(): 309 | self._token('align<') 310 | self._number_() 311 | self._token('>') 312 | with self._option(): 313 | self._token('mask<') 314 | self._number_() 315 | self._token('>') 316 | with self._option(): 317 | self._token('imod<') 318 | self._number_() 319 | self._token('>') 320 | with self._option(): 321 | self._token('imm6<') 322 | self._number_() 323 | self._token('>') 324 | with self._option(): 325 | self._token('imm6<') 326 | self._number_() 327 | self._token(':') 328 | self._number_() 329 | self._token('>') 330 | with self._option(): 331 | self._token('imm8<') 332 | self._number_() 333 | self._token('>') 334 | with self._option(): 335 | self._token('Vd<') 336 | self._number_() 337 | self._token('>') 338 | with self._option(): 339 | self._token('Vn<') 340 | self._number_() 341 | self._token('>') 342 | with self._option(): 343 | self._token('Vm<') 344 | self._number_() 345 | self._token('>') 346 | with self._option(): 347 | self._token('Vm<') 348 | self._number_() 349 | self._token(':') 350 | self._number_() 351 | self._token('>') 352 | with self._option(): 353 | self._token('cc<') 354 | self._number_() 355 | self._token('>') 356 | with self._option(): 357 | self._token('cmode<') 358 | self._number_() 359 | self._token('>') 360 | with self._option(): 361 | self._token('cmode<') 362 | self._number_() 363 | self._token(':') 364 | self._number_() 365 | self._token('>') 366 | self._error('no available options') 367 | 368 | @graken() 369 | def _builtin_value_(self): 370 | with self._choice(): 371 | with self._option(): 372 | self._token('TRUE') 373 | with self._option(): 374 | self._token('FALSE') 375 | with self._option(): 376 | self._token('registers<') 377 | with self._group(): 378 | with self._choice(): 379 | with self._option(): 380 | self._number_() 381 | with self._option(): 382 | self._ident_() 383 | self._error('no available options') 384 | self._token('>') 385 | with self._option(): 386 | self._token('list<') 387 | with self._group(): 388 | with self._choice(): 389 | with self._option(): 390 | self._number_() 391 | with self._option(): 392 | self._ident_() 393 | self._error('no available options') 394 | self._token('>') 395 | with self._option(): 396 | self._token('cond<') 397 | self._number_() 398 | self._token(':') 399 | self._number_() 400 | self._token('>') 401 | with self._option(): 402 | self._token('cond<') 403 | self._expr0_() 404 | self._token('>') 405 | with self._option(): 406 | self._token('SRType_LSL') 407 | with self._option(): 408 | self._token('SRType_LSR') 409 | with self._option(): 410 | self._token('SRType_ASR') 411 | with self._option(): 412 | self._token('SRType_ROR') 413 | with self._option(): 414 | self._token('SRType_RRX') 415 | with self._option(): 416 | self._token('InstrSet_ThumbEE') 417 | with self._option(): 418 | self._token('ARM_GRP_INVALID') 419 | with self._option(): 420 | self._token('ARM_GRP_JUMP') 421 | with self._option(): 422 | self._token('ARM_GRP_CRYPT') 423 | with self._option(): 424 | self._token('ARM_GRP_DATABARRIER') 425 | with self._option(): 426 | self._token('ARM_GRP_DIVIDE') 427 | with self._option(): 428 | self._token('ARM_GRP_FPARMV8') 429 | with self._option(): 430 | self._token('ARM_GRP_MULTPRO') 431 | with self._option(): 432 | self._token('ARM_GRP_NEON') 433 | with self._option(): 434 | self._token('ARM_GRP_T2EXTRACTPACK') 435 | with self._option(): 436 | self._token('ARM_GRP_THUMB2DSP') 437 | with self._option(): 438 | self._token('ARM_GRP_TRUSTZONE') 439 | with self._option(): 440 | self._token('ARM_GRP_V4T') 441 | with self._option(): 442 | self._token('ARM_GRP_V5T') 443 | with self._option(): 444 | self._token('ARM_GRP_V5TE') 445 | with self._option(): 446 | self._token('ARM_GRP_V6') 447 | with self._option(): 448 | self._token('ARM_GRP_V6T2') 449 | with self._option(): 450 | self._token('ARM_GRP_V7') 451 | with self._option(): 452 | self._token('ARM_GRP_V8') 453 | with self._option(): 454 | self._token('ARM_GRP_VFP2') 455 | with self._option(): 456 | self._token('ARM_GRP_VFP3') 457 | with self._option(): 458 | self._token('ARM_GRP_VFP4') 459 | with self._option(): 460 | self._token('ARM_GRP_ARM') 461 | with self._option(): 462 | self._token('ARM_GRP_MCLASS') 463 | with self._option(): 464 | self._token('ARM_GRP_NOTMCLASS') 465 | with self._option(): 466 | self._token('ARM_GRP_THUMB') 467 | with self._option(): 468 | self._token('ARM_GRP_THUMB1ONLY') 469 | with self._option(): 470 | self._token('ARM_GRP_THUMB2') 471 | with self._option(): 472 | self._token('ARM_GRP_PREV8') 473 | with self._option(): 474 | self._token('ARM_GRP_FPVMLX') 475 | with self._option(): 476 | self._token('ARM_GRP_MULOPS') 477 | with self._option(): 478 | self._token('ARM_GRP_CRC') 479 | with self._option(): 480 | self._token('ARM_GRP_DPVFP') 481 | with self._option(): 482 | self._token('ARM_GRP_V6M') 483 | self._error('expecting one of: ARM_GRP_ARM ARM_GRP_CRC ARM_GRP_CRYPT ARM_GRP_DATABARRIER ARM_GRP_DIVIDE ARM_GRP_DPVFP ARM_GRP_FPARMV8 ARM_GRP_FPVMLX ARM_GRP_INVALID ARM_GRP_JUMP ARM_GRP_MCLASS ARM_GRP_MULOPS ARM_GRP_MULTPRO ARM_GRP_NEON ARM_GRP_NOTMCLASS ARM_GRP_PREV8 ARM_GRP_T2EXTRACTPACK ARM_GRP_THUMB ARM_GRP_THUMB1ONLY ARM_GRP_THUMB2 ARM_GRP_THUMB2DSP ARM_GRP_TRUSTZONE ARM_GRP_V4T ARM_GRP_V5T ARM_GRP_V5TE ARM_GRP_V6 ARM_GRP_V6M ARM_GRP_V6T2 ARM_GRP_V7 ARM_GRP_V8 ARM_GRP_VFP2 ARM_GRP_VFP3 ARM_GRP_VFP4 FALSE InstrSet_ThumbEE SRType_ASR SRType_LSL SRType_LSR SRType_ROR SRType_RRX TRUE') 484 | 485 | @graken() 486 | def _func_call_(self): 487 | with self._choice(): 488 | with self._option(): 489 | self._bitcount_() 490 | with self._option(): 491 | self._badreg_() 492 | with self._option(): 493 | self._consistent_() 494 | with self._option(): 495 | self._decodeimmshift_() 496 | with self._option(): 497 | self._thumbexpandimm_() 498 | with self._option(): 499 | self._thumbexpandimm_c_() 500 | with self._option(): 501 | self._advsimdexpandimm_() 502 | with self._option(): 503 | self._vfpexpandimm_() 504 | with self._option(): 505 | self._uint_() 506 | with self._option(): 507 | self._zeroextend_() 508 | with self._option(): 509 | self._zeros_() 510 | with self._option(): 511 | self._initblock_() 512 | with self._option(): 513 | self._lastinitblock_() 514 | with self._option(): 515 | self._archversion_() 516 | with self._option(): 517 | self._currentinstrset_() 518 | with self._option(): 519 | self._signextend_() 520 | with self._option(): 521 | self._not_() 522 | with self._option(): 523 | self._issecure_() 524 | self._error('no available options') 525 | 526 | @graken() 527 | def _bitcount_(self): 528 | self._token('BitCount(') 529 | self._expr0_() 530 | self._token(')') 531 | 532 | @graken() 533 | def _badreg_(self): 534 | self._token('BadReg(') 535 | self._expr0_() 536 | self._token(')') 537 | 538 | @graken() 539 | def _consistent_(self): 540 | self._token('Consistent(') 541 | self._expr0_() 542 | self._token(')') 543 | 544 | @graken() 545 | def _decodeimmshift_(self): 546 | self._token('DecodeImmShift(') 547 | self._expr0_() 548 | self._token(',') 549 | self._expr0_() 550 | self._token(')') 551 | 552 | @graken() 553 | def _thumbexpandimm_(self): 554 | self._token('ThumbExpandImm(') 555 | self._expr0_() 556 | self._token(')') 557 | 558 | @graken() 559 | def _thumbexpandimm_c_(self): 560 | self._token('ThumbExpandImm_C(') 561 | self._expr0_() 562 | self._token(',') 563 | self._expr0_() 564 | self._token(')') 565 | 566 | @graken() 567 | def _advsimdexpandimm_(self): 568 | self._token('AdvSIMDExpandImm(') 569 | self._expr0_() 570 | self._token(',') 571 | self._expr0_() 572 | self._token(',') 573 | self._expr0_() 574 | self._token(',') 575 | self._expr0_() 576 | self._token(')') 577 | 578 | @graken() 579 | def _vfpexpandimm_(self): 580 | self._token('VFPExpandImm(') 581 | self._expr0_() 582 | self._token(',') 583 | self._expr0_() 584 | self._token(',') 585 | self._expr0_() 586 | self._token(')') 587 | 588 | @graken() 589 | def _uint_(self): 590 | self._token('UInt(') 591 | self._expr0_() 592 | self._token(')') 593 | 594 | @graken() 595 | def _zeroextend_(self): 596 | self._token('ZeroExtend(') 597 | self._expr0_() 598 | self._token(',') 599 | self._expr0_() 600 | self._token(')') 601 | 602 | @graken() 603 | def _zeros_(self): 604 | self._token('Zeros(') 605 | self._expr0_() 606 | self._token(')') 607 | 608 | @graken() 609 | def _initblock_(self): 610 | self._token('InITBlock()') 611 | 612 | @graken() 613 | def _lastinitblock_(self): 614 | self._token('LastInITBlock()') 615 | 616 | @graken() 617 | def _archversion_(self): 618 | self._token('ArchVersion()') 619 | 620 | @graken() 621 | def _currentinstrset_(self): 622 | self._token('CurrentInstrSet()') 623 | 624 | @graken() 625 | def _signextend_(self): 626 | self._token('SignExtend(') 627 | self._expr3_() 628 | self._token(', 32)') 629 | 630 | @graken() 631 | def _not_(self): 632 | self._token('NOT(') 633 | self._expr0_() 634 | self._token(')') 635 | 636 | @graken() 637 | def _issecure_(self): 638 | self._token('IsSecure()') 639 | 640 | 641 | class pcodeSemantics(object): 642 | def start(self, ast): 643 | return ast 644 | 645 | def statement(self, ast): 646 | return ast 647 | 648 | def tuple(self, ast): 649 | return ast 650 | 651 | def expr0(self, ast): 652 | return ast 653 | 654 | def expr1(self, ast): 655 | return ast 656 | 657 | def expr2(self, ast): 658 | return ast 659 | 660 | def expr3(self, ast): 661 | return ast 662 | 663 | def number(self, ast): 664 | return ast 665 | 666 | def bits(self, ast): 667 | return ast 668 | 669 | def ident(self, ast): 670 | return ast 671 | 672 | def whatever(self, ast): 673 | return ast 674 | 675 | def sliceable(self, ast): 676 | return ast 677 | 678 | def builtin_value(self, ast): 679 | return ast 680 | 681 | def func_call(self, ast): 682 | return ast 683 | 684 | def bitcount(self, ast): 685 | return ast 686 | 687 | def badreg(self, ast): 688 | return ast 689 | 690 | def consistent(self, ast): 691 | return ast 692 | 693 | def decodeimmshift(self, ast): 694 | return ast 695 | 696 | def thumbexpandimm(self, ast): 697 | return ast 698 | 699 | def thumbexpandimm_c(self, ast): 700 | return ast 701 | 702 | def advsimdexpandimm(self, ast): 703 | return ast 704 | 705 | def vfpexpandimm(self, ast): 706 | return ast 707 | 708 | def uint(self, ast): 709 | return ast 710 | 711 | def zeroextend(self, ast): 712 | return ast 713 | 714 | def zeros(self, ast): 715 | return ast 716 | 717 | def initblock(self, ast): 718 | return ast 719 | 720 | def lastinitblock(self, ast): 721 | return ast 722 | 723 | def archversion(self, ast): 724 | return ast 725 | 726 | def currentinstrset(self, ast): 727 | return ast 728 | 729 | def signextend(self, ast): 730 | return ast 731 | 732 | def not_(self, ast): 733 | return ast 734 | 735 | def issecure(self, ast): 736 | return ast 737 | 738 | 739 | def main(filename, startrule, **kwargs): 740 | with open(filename) as f: 741 | text = f.read() 742 | parser = pcodeParser() 743 | return parser.parse(text, startrule, filename=filename, **kwargs) 744 | 745 | 746 | if __name__ == '__main__': 747 | import json 748 | from grako.util import asjson 749 | 750 | ast = generic_main(main, pcodeParser, name='pcode') 751 | print('AST:') 752 | print(ast) 753 | print() 754 | print('JSON:') 755 | print(json.dumps(asjson(ast), indent=2)) 756 | print() 757 | -------------------------------------------------------------------------------- /thumb2_disasm/arm_pcode_parser/pcode.ebnf: -------------------------------------------------------------------------------- 1 | start = statement [';'] $; 2 | 3 | statement = 'if' expr0 'then' statement {'else' statement} | 4 | "UNPREDICTABLE" | 5 | "UNDEFINED" | 6 | "NOT_PERMITTED" | 7 | "NOP" | "nop" | 8 | "SEE" whatever | 9 | tuple '=' expr0 | 10 | ident '=' 'if' expr0 'then' expr0 'else' expr0 | 11 | expr0 '=' expr0; 12 | 13 | # tuples 14 | tuple = '(' ('-'|expr0) { ',' ('-'|expr0) }+ ')'; 15 | 16 | # could use kleen star here instead of alternative rule, but I don't 17 | # want to get back empty closures 18 | expr0 = expr1 {('EOR' | '+' | '-' | '&&' | '||') expr1}+ | 19 | expr1; 20 | 21 | expr1 = expr2 {('*'|'/'|'<<'|'>>'|'DIV'|'XOR') expr2}+ | 22 | expr2 {('==' | '!=' | '<=' | '>=' | '<' | '>') expr2}+ | 23 | expr2; 24 | 25 | expr2 = func_call | 26 | expr3; 27 | 28 | expr3 = builtin_value | 29 | sliceable | 30 | (ident|number|bits) {':'(ident|number|bits)}* | 31 | tuple | 32 | '(' expr0 ')' | 33 | '!' expr0; 34 | 35 | number = /\d+/; 36 | 37 | bits = /'[01]+'/; 38 | 39 | ident = /[a-zA-Z][\.\w]*/; 40 | 41 | whatever = /.*/; 42 | 43 | # the variables that can have bit slices ... these are made separate 44 | # because the intersect with the greater-than, less-than comparisons 45 | sliceable = 'index_align<' number ':' number '>' | 46 | 'index_align<' number '>' | 47 | 'align<' number '>' | 48 | 'mask<' number '>' | 49 | 'imod<' number '>' | 50 | 'imm6<' number '>' | 51 | 'imm6<' number ':' number '>' | 52 | 'imm8<' number '>' | 53 | 'Vd<' number '>' | 54 | 'Vn<' number '>' | 55 | 'Vm<' number '>' | 56 | 'Vm<' number ':' number '>' | 57 | 'cc<' number '>' | 58 | 'cmode<' number '>' | 59 | 'cmode<' number ':' number '>'; 60 | 61 | builtin_value = 'TRUE' | 62 | 'FALSE' | 63 | 'registers<' (number|ident) '>' | 64 | 'list<' (number|ident) '>' | 65 | 'cond<' number ':' number '>' | 66 | 'cond<' expr0 '>' | 67 | 'SRType_LSL' | 'SRType_LSR' | 'SRType_ASR' | 'SRType_ROR' | 'SRType_RRX' | 68 | 'InstrSet_ThumbEE' | 69 | 'ARM_GRP_INVALID' | 'ARM_GRP_JUMP' | 'ARM_GRP_CRYPT' | 'ARM_GRP_DATABARRIER' | 'ARM_GRP_DIVIDE' | 'ARM_GRP_FPARMV8' | 'ARM_GRP_MULTPRO' | 'ARM_GRP_NEON' | 'ARM_GRP_T2EXTRACTPACK' | 'ARM_GRP_THUMB2DSP' | 'ARM_GRP_TRUSTZONE' | 'ARM_GRP_V4T' | 'ARM_GRP_V5T' | 'ARM_GRP_V5TE' | 'ARM_GRP_V6' | 'ARM_GRP_V6T2' | 'ARM_GRP_V7' | 'ARM_GRP_V8' | 'ARM_GRP_VFP2' | 'ARM_GRP_VFP3' | 'ARM_GRP_VFP4' | 'ARM_GRP_ARM' | 'ARM_GRP_MCLASS' | 'ARM_GRP_NOTMCLASS' | 'ARM_GRP_THUMB' | 'ARM_GRP_THUMB1ONLY' | 'ARM_GRP_THUMB2' | 'ARM_GRP_PREV8' | 'ARM_GRP_FPVMLX' | 'ARM_GRP_MULOPS' | 'ARM_GRP_CRC' | 'ARM_GRP_DPVFP' | 'ARM_GRP_V6M'; 70 | 71 | # function calls 72 | func_call = bitcount | badreg | consistent | decodeimmshift | thumbexpandimm | 73 | thumbexpandimm_c | advsimdexpandimm | vfpexpandimm | uint | zeroextend | zeros | initblock | lastinitblock | 74 | archversion | currentinstrset | signextend | not | issecure; 75 | 76 | bitcount = 'BitCount(' expr0 ')'; 77 | badreg = 'BadReg(' expr0 ')'; 78 | consistent = 'Consistent(' expr0 ')'; 79 | decodeimmshift = 'DecodeImmShift(' expr0 ',' expr0 ')'; 80 | thumbexpandimm = 'ThumbExpandImm(' expr0 ')'; 81 | thumbexpandimm_c = 'ThumbExpandImm_C(' expr0 ',' expr0 ')'; 82 | advsimdexpandimm = 'AdvSIMDExpandImm(' expr0 ',' expr0 ',' expr0 ',' expr0 ')'; 83 | vfpexpandimm = 'VFPExpandImm(' expr0 ',' expr0 ',' expr0 ')'; 84 | uint = 'UInt(' expr0 ')'; 85 | zeroextend = 'ZeroExtend(' expr0 ',' expr0 ')'; 86 | zeros = 'Zeros(' expr0 ')'; 87 | initblock = 'InITBlock()'; 88 | lastinitblock = 'LastInITBlock()'; 89 | archversion = 'ArchVersion()'; 90 | currentinstrset = 'CurrentInstrSet()'; 91 | signextend = 'SignExtend(' expr3 ', 32)'; 92 | not = 'NOT(' expr0 ')'; 93 | issecure = 'IsSecure()'; 94 | 95 | -------------------------------------------------------------------------------- /thumb2_disasm/disassembler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "spec.h" 7 | #include "disassembler.h" 8 | 9 | /* from ../armv7/armv7.h */ 10 | #include "armv7.h" 11 | using namespace armv7; 12 | 13 | using namespace std; 14 | 15 | /* helper prototypes */ 16 | int get_reg_name(int reg_idx, char *reg_name); 17 | 18 | /* decompose an instruction stream into a decomposition result */ 19 | int thumb_decompose(struct decomp_request *info, struct decomp_result *result) 20 | { 21 | int rc; 22 | 23 | /* initialize result */ 24 | result->flags = STATUS_OK; 25 | result->status = FLAG_NONE; 26 | result->addrMode = ADDRMODE_UNSPECIFIED; 27 | memset(result->fields_mask, 0, sizeof(result->fields_mask)); 28 | result->format = nullptr; 29 | result->formats = nullptr; 30 | result->formatCount = 0; 31 | result->pc = info->addr + 4; 32 | 33 | /* jump into generated code */ 34 | rc = thumb_root(info, result); 35 | 36 | /* easy case #1: only one format string */ 37 | if(result->formatCount == 1) { 38 | result->format = &result->formats[0]; 39 | } 40 | /* easy case #2: pcode specified which format to use */ 41 | else if(IS_FIELD_PRESENT(result, FIELD_fmt_idx)) { 42 | result->format = &result->formats[result->fields[FIELD_fmt_idx]]; 43 | } 44 | /* determine address mode for neon instructions (reference: A7.7.1 Advanced SIMD addressing mode) */ 45 | else if(result->group == INSN_GROUP_NEON) { 46 | uint32_t Rm = result->fields[FIELD_Rm]; 47 | 48 | if(Rm == 0xF) { 49 | /* [{@}] */ 50 | result->addrMode = ADDRMODE_ADVSIMD_0; 51 | } 52 | else if(Rm == 0xD) { 53 | /* [{@}]! */ 54 | result->addrMode = ADDRMODE_ADVSIMD_1; 55 | } 56 | else { 57 | /* [{@}], */ 58 | result->addrMode = ADDRMODE_ADVSIMD_2; 59 | } 60 | } 61 | /* determine address mode for 3-format instructions */ 62 | else if(result->formatCount == 3 && 63 | IS_FIELD_PRESENT(result, FIELD_index) && 64 | IS_FIELD_PRESENT(result, FIELD_wback)) 65 | { 66 | uint32_t index = result->fields[FIELD_index]; 67 | uint32_t wback = result->fields[FIELD_wback]; 68 | 69 | if(index && !wback) 70 | result->addrMode = ADDRMODE_OFFSET; 71 | else if(index && wback) 72 | result->addrMode = ADDRMODE_PREINDEX; 73 | else if(!index && wback) 74 | result->addrMode = ADDRMODE_POSTINDEX; 75 | else 76 | result->flags |= FLAG_ADDRMODE_AMBIGUITY; 77 | } 78 | 79 | /* determine the address mode for 2 and 4-format instructions */ 80 | else if((result->formatCount==2 || result->formatCount==4) && 81 | IS_FIELD_PRESENT(result, FIELD_P) && 82 | IS_FIELD_PRESENT(result, FIELD_W) && 83 | IS_FIELD_PRESENT(result, FIELD_U)) 84 | { 85 | uint32_t P = result->fields[FIELD_P]; 86 | uint32_t W = result->fields[FIELD_W]; 87 | uint32_t U = result->fields[FIELD_U]; 88 | 89 | if(result->formatCount == 4) { 90 | if(P && !W) result->addrMode = ADDRMODE_OFFSET; 91 | else if(P && W) result->addrMode = ADDRMODE_PREINDEX; 92 | else if(!P && W) result->addrMode = ADDRMODE_POSTINDEX; 93 | else if(!P && !W && U) result->addrMode = ADDRMODE_UNINDEXED; 94 | else result->flags |= FLAG_ADDRMODE_AMBIGUITY; 95 | } 96 | else if(result->formatCount == 2) { 97 | if(P && !W) result->addrMode = ADDRMODE_OFFSET; 98 | else if(!P && !W && U) result->addrMode = ADDRMODE_UNINDEXED; 99 | else result->flags |= FLAG_ADDRMODE_AMBIGUITY; 100 | } 101 | } 102 | 103 | /* choose from the n decompose formats 104 | result->formats[ addressing mode ] */ 105 | if((rc == STATUS_OK) && !(result->status & STATUS_UNDEFINED) && !(result->format)) { 106 | 107 | /* if we resolved an address mode, select the format */ 108 | if(result->addrMode != ADDRMODE_UNSPECIFIED) { 109 | result->format = &result->formats[result->addrMode]; 110 | } 111 | 112 | /* otherwise, just choose the first one in the list */ 113 | else if(result->formatCount >= 1) { 114 | result->flags |= FLAG_ADDRMODE_AMBIGUITY; 115 | result->format = &result->formats[0]; 116 | } 117 | else { 118 | printf("major error! no format in decomposition result\n"); 119 | rc = STATUS_UNDEFINED; 120 | } 121 | } 122 | 123 | #ifdef DEBUG_DISASM 124 | if(getenv("DEBUG_DISASM")) { 125 | printf("decomp_result status:\n"); 126 | if(result->status & STATUS_OK) printf(" OK\n"); 127 | if(result->status & STATUS_NO_BIT_MATCH) printf(" NO_BIT_MATCH\n"); 128 | if(result->status & STATUS_ARCH_UNSUPPORTED) printf(" ARCH_UNSUPPORTED\n"); 129 | if(result->status & STATUS_UNDEFINED) printf(" UNDEFINED\n"); 130 | 131 | printf("decomp_result flags:\n"); 132 | if(result->flags & FLAG_UNPREDICTABLE) printf(" UNPREDICTABLE\n"); 133 | if(result->flags & FLAG_NOTPERMITTED) printf(" NOTPERMITTED\n"); 134 | 135 | printf("address mode: %d\n", result->addrMode); 136 | if(result->addrMode == ADDRMODE_OFFSET) printf(" OFFSET\n"); 137 | if(result->addrMode == ADDRMODE_PREINDEX) printf(" PREINDEX\n"); 138 | if(result->addrMode == ADDRMODE_POSTINDEX) printf(" POSTINDEX\n"); 139 | if(result->addrMode == ADDRMODE_UNSPECIFIED) printf(" UNSPECIFIED\n"); 140 | } 141 | #endif 142 | 143 | return rc; 144 | } 145 | 146 | const char* get_thumb_condition_name(uint32_t cond) 147 | { 148 | static const char *COND_lookup_str[] = { 149 | "eq", /* equal, Z==1 */ 150 | "ne", /* not equal, Z==0 */ 151 | "cs", /* greater than, equal, or unordered C==1, AKA HS */ 152 | "cc", /* AKA "LO" */ 153 | "mi", 154 | "pl", 155 | "vs", 156 | "vc", 157 | "hi", 158 | "ls", 159 | "ge", 160 | "lt", 161 | "gt", 162 | "le", 163 | "al", 164 | "" 165 | }; 166 | 167 | if (cond >= 0x10) { 168 | #ifdef DEBUG_DISASM 169 | if(getenv("DEBUG_DISASM")) { 170 | cout << "ERROR: invalid condition code " << cond << endl; 171 | } 172 | #endif 173 | return ""; 174 | } 175 | return COND_lookup_str[cond]; 176 | } 177 | 178 | bool thumb_has_writeback(struct decomp_result* result) 179 | { 180 | /* for 16-bit LDM, {!} is removed if the base register is in the register list */ 181 | if(result->mnem == armv7::ARMV7_LDM && result->instrSize==16) { 182 | int list = result->fields[FIELD_register_list]; 183 | int Rn = result->fields[FIELD_Rn]; 184 | //printf("Rn: 0x%x\n", Rn); 185 | /* if the base register is in the register list, we discard the bang */ 186 | if(list & (1 << Rn)) 187 | return false; 188 | else 189 | return true; 190 | } 191 | else 192 | /* for others, {!} field determined by "W" element */ 193 | if(IS_FIELD_PRESENT(result, FIELD_W)) { 194 | if(result->fields[FIELD_W]) 195 | return true; 196 | } 197 | else { 198 | #ifdef DEBUG_DISASM 199 | if(getenv("DEBUG_DISASM")) { 200 | printf("ERROR: don't know how to deal with {!} field\n"); 201 | } 202 | #endif 203 | return false; 204 | } 205 | return false; 206 | } 207 | 208 | /* inspect the decomposition result to return the operation name 209 | 210 | NOTE: this is more complicated than OP_ID -> string mapping 211 | because of standard assembler syntax fields, eg "VABS.
" 212 | 213 | main functioning is by: 214 | 1) seeking struct instruction_format from the decomposition result 215 | 2) inspecting .operation 216 | 3) inspect .operationFlags 217 | */ 218 | std::string get_thumb_operation_name(struct decomp_result* result) 219 | { 220 | if ((result->status & STATUS_UNDEFINED) || (!result->format)) { 221 | return "undefined"; 222 | } 223 | 224 | const instruction_format* format = result->format; 225 | std::string contents = format->operation; 226 | 227 | /* the standard "{S}" setflag field */ 228 | if (format->operationFlags & INSTR_FORMAT_FLAG_OPTIONAL_STATUS) { 229 | if(IS_FIELD_PRESENT(result, FIELD_S)) { 230 | if(result->fields[FIELD_S]) { 231 | contents += "s"; 232 | } 233 | } 234 | } 235 | 236 | if(format->operationFlags & INSTR_FORMAT_FLAG_MASK) { 237 | const char *lookup_fc0[16] = { 238 | "undef", "ttt", "tt", "tte", "t", "tet", "te", "tee", 239 | "", "ett", "et", "ete", "e", "eet", "ee", "eee" 240 | }; 241 | 242 | const char *lookup_fc1[16] = { 243 | "undef", "eee", "ee", "eet", "e", "ete", "et", "ett", 244 | "", "tee", "te", "tet", "t", "tte", "tt", "ttt" 245 | }; 246 | 247 | const char **lookup = lookup_fc0; 248 | 249 | if(result->fields[FIELD_firstcond] & 1) { 250 | lookup = lookup_fc1; 251 | } 252 | 253 | contents += lookup[result->fields[FIELD_mask]]; 254 | } 255 | 256 | if (format->operationFlags & INSTR_FORMAT_FLAG_EFFECT) { 257 | /* see B6.1.1 CPS */ 258 | /* Encoding T1 (16 bit) */ 259 | if(IS_FIELD_PRESENT(result, FIELD_im)) { 260 | const char *lookup[2] = {"ie", "id"}; 261 | contents += lookup[result->fields[FIELD_im]]; 262 | } 263 | /* Encoding T2 (32-bit) */ 264 | else if(IS_FIELD_PRESENT(result, FIELD_imod)) { 265 | const char *lookup[4] = {"", "", "ie", "id"}; 266 | contents += lookup[result->fields[FIELD_imod]]; 267 | } 268 | else { 269 | #ifdef DEBUG_DISASM 270 | if(getenv("DEBUG_DISASM")) { 271 | cout << "ERROR: can't populate field" << endl; 272 | } 273 | #endif 274 | while(0); 275 | } 276 | } 277 | 278 | /* is conditional execution code? "" */ 279 | if (format->operationFlags & INSTR_FORMAT_FLAG_CONDITIONAL) { 280 | uint32_t value = COND_AL; 281 | if (IS_FIELD_PRESENT(result, FIELD_cond)) 282 | value = result->fields[FIELD_cond]; 283 | 284 | if(value < 15) { 285 | if(value != COND_AL) 286 | contents += get_thumb_condition_name(value); 287 | } 288 | #ifdef DEBUG_DISASM 289 | else 290 | cout << "ERROR: invalid condition code index" << value << endl; 291 | #endif 292 | } 293 | 294 | if(format->operationFlags & INSTR_FORMAT_FLAG_NEON_SIZE) { 295 | const char *lookup[4] = {".8", ".16", ".32", ".64"}; 296 | int index = 0; 297 | if(IS_FIELD_PRESENT(result, FIELD_size)) 298 | index = result->fields[FIELD_size]; 299 | contents += lookup[index]; 300 | } 301 | if(format->operationFlags & INSTR_FORMAT_FLAG_NEON_SINGLE_SIZE) { 302 | const char *lookup[4] = {"8", "16", "32", "64"}; 303 | int index = 0; 304 | if(IS_FIELD_PRESENT(result, FIELD_size)) 305 | index = result->fields[FIELD_size]; 306 | contents += lookup[index]; 307 | } 308 | if(format->operationFlags & INSTR_FORMAT_FLAG_NEON_TYPE_SIZE) { 309 | const char *tlookup[4] = {".error", ".S", ".S", ".U"}; 310 | const char *slookup[4] = {"16", "32", "64", "8"}; 311 | int tindex = 0; 312 | int sindex = 0; 313 | if(IS_FIELD_PRESENT(result, FIELD_size) && IS_FIELD_PRESENT(result, FIELD_type)) { 314 | tindex = result->fields[FIELD_type]; 315 | sindex = result->fields[FIELD_size]; 316 | } 317 | contents += tlookup[tindex]; 318 | contents += slookup[sindex]; 319 | } 320 | if (format->operationFlags & INSTR_FORMAT_FLAG_F16) { 321 | contents += ".F16"; 322 | } 323 | if (format->operationFlags & INSTR_FORMAT_FLAG_F32) { 324 | contents += ".F32"; 325 | } 326 | if (format->operationFlags & INSTR_FORMAT_FLAG_F64) { 327 | contents += ".F64"; 328 | } 329 | 330 | if (format->operationFlags & INSTR_FORMAT_FLAG_WIDE) { 331 | contents += ".w"; 332 | } 333 | 334 | if (format->operationFlags & INSTR_FORMAT_FLAG_INCREMENT_AFTER) { 335 | contents += "ia"; 336 | } 337 | 338 | if (format->operationFlags & INSTR_FORMAT_FLAG_VFP_DATA_SIZE) { 339 | if(IS_FIELD_PRESENT(result, FIELD_dt)) { 340 | if(IS_FIELD_PRESENT(result, FIELD_td)) { 341 | switch(result->fields[FIELD_dt]) { 342 | case VFP_DATA_SIZE_S32F32: contents += ".S32.F32"; break; 343 | case VFP_DATA_SIZE_U32F32: contents += ".U32.F32"; break; 344 | case VFP_DATA_SIZE_F32S32: contents += ".F32.S32"; break; 345 | case VFP_DATA_SIZE_F32U32: contents += ".F32.U32"; break; 346 | default: contents += ".error"; break; 347 | } 348 | } 349 | else if(IS_FIELD_PRESENT(result, FIELD_unsigned)) { 350 | switch(result->fields[FIELD_dt]) { 351 | case VFP_DATA_SIZE_S8: contents += ".S8"; break; 352 | case VFP_DATA_SIZE_S16: contents += ".S16"; break; 353 | case VFP_DATA_SIZE_S32: contents += ".S32"; break; 354 | case VFP_DATA_SIZE_S64: contents += ".S64"; break; 355 | case VFP_DATA_SIZE_U8: contents += ".U8"; break; 356 | case VFP_DATA_SIZE_U16: contents += ".U16"; break; 357 | case VFP_DATA_SIZE_U32: contents += ".U32"; break; 358 | case VFP_DATA_SIZE_U64: contents += ".U64"; break; 359 | case VFP_DATA_SIZE_32: contents += ".32"; break; 360 | default: contents += ".error"; break; 361 | } 362 | } 363 | else if (IS_FIELD_PRESENT(result, FIELD_cmode)) { 364 | uint8_t cmode = result->fields[FIELD_cmode]; 365 | uint8_t op = result->fields[FIELD_op]; 366 | if (cmode >> 1 <= 3) contents += ".I32"; 367 | else if (cmode >> 1 <= 5) contents += ".I16"; 368 | else if (cmode == 12 || cmode == 13) contents += ".I32"; 369 | else if (cmode == 12 || cmode == 13) contents += ".I32"; 370 | else if (op == 0 && cmode == 14) contents += ".I8"; 371 | else if (op == 0 && cmode == 15) contents += ".F32"; 372 | else if (op == 1 && cmode == 14) contents += ".I64"; 373 | else if (op == 1 && cmode == 15) contents += ".undefined"; 374 | else contents += ".error"; 375 | } 376 | else if(IS_FIELD_PRESENT(result, FIELD_iword)) { 377 | switch(result->fields[FIELD_dt]) { 378 | case VFP_DATA_SIZE_I8: contents += ".I8"; break; 379 | case VFP_DATA_SIZE_I16: contents += ".I16"; break; 380 | case VFP_DATA_SIZE_I32: contents += ".I32"; break; 381 | case VFP_DATA_SIZE_I64: contents += ".I64"; break; 382 | case VFP_DATA_SIZE_I_F32: contents += ".F32"; break; 383 | default: contents += ".error"; break; 384 | } 385 | } 386 | else { 387 | switch(result->fields[FIELD_dt]) { 388 | case VFP_DATA_SIZE_S8: contents += ".S8"; break; 389 | case VFP_DATA_SIZE_S16: contents += ".S16"; break; 390 | case VFP_DATA_SIZE_S32: contents += ".S32"; break; 391 | case VFP_DATA_SIZE_F32: contents += ".F32"; break; 392 | case VFP_DATA_SIZE_F64: contents += ".F64"; break; 393 | default: contents += ".error"; break; 394 | } 395 | } 396 | } 397 | if(IS_FIELD_PRESENT(result, FIELD_dt_suffix)) { 398 | switch(result->fields[FIELD_dt_suffix]) { 399 | case 0: contents += ".F32"; break; 400 | case 1: contents += ".F64"; break; 401 | default: contents += ".error"; break; 402 | } 403 | } 404 | } 405 | 406 | return contents; 407 | } 408 | 409 | int 410 | get_reg_name(int reg_idx, char *reg_name) 411 | { 412 | int rc = -1; 413 | 414 | reg_name[0] = '\0'; 415 | 416 | switch(reg_idx) { 417 | case REG_R0: strcpy(reg_name, "r0"); break; 418 | case REG_R1: strcpy(reg_name, "r1"); break; 419 | case REG_R2: strcpy(reg_name, "r2"); break; 420 | case REG_R3: strcpy(reg_name, "r3"); break; 421 | case REG_R4: strcpy(reg_name, "r4"); break; 422 | case REG_R5: strcpy(reg_name, "r5"); break; 423 | case REG_R6: strcpy(reg_name, "r6"); break; 424 | case REG_R7: strcpy(reg_name, "r7"); break; 425 | case REG_R8: strcpy(reg_name, "r8"); break; 426 | case REG_R9: strcpy(reg_name, "r9"); break; 427 | case REG_R10: strcpy(reg_name, "r10"); break; 428 | case REG_R11: strcpy(reg_name, "r11"); break; 429 | case REG_R12: strcpy(reg_name, "r12"); break; 430 | case REG_SP: strcpy(reg_name, "sp"); break; // 13 431 | case REG_LR: strcpy(reg_name, "lr"); break; // 14 432 | case REG_PC: strcpy(reg_name, "pc"); break; // 15 433 | case REG_S0: strcpy(reg_name, "s0"); break; 434 | case REG_S1: strcpy(reg_name, "s1"); break; 435 | case REG_S2: strcpy(reg_name, "s2"); break; 436 | case REG_S3: strcpy(reg_name, "s3"); break; 437 | case REG_S4: strcpy(reg_name, "s4"); break; 438 | case REG_S5: strcpy(reg_name, "s5"); break; 439 | case REG_S6: strcpy(reg_name, "s6"); break; 440 | case REG_S7: strcpy(reg_name, "s7"); break; 441 | case REG_S8: strcpy(reg_name, "s8"); break; 442 | case REG_S9: strcpy(reg_name, "s9"); break; 443 | case REG_S10: strcpy(reg_name, "s10"); break; 444 | case REG_S11: strcpy(reg_name, "s11"); break; 445 | case REG_S12: strcpy(reg_name, "s12"); break; 446 | case REG_S13: strcpy(reg_name, "s13"); break; 447 | case REG_S14: strcpy(reg_name, "s14"); break; 448 | case REG_S15: strcpy(reg_name, "s15"); break; 449 | case REG_S16: strcpy(reg_name, "s16"); break; 450 | case REG_S17: strcpy(reg_name, "s17"); break; 451 | case REG_S18: strcpy(reg_name, "s18"); break; 452 | case REG_S19: strcpy(reg_name, "s19"); break; 453 | case REG_S20: strcpy(reg_name, "s20"); break; 454 | case REG_S21: strcpy(reg_name, "s21"); break; 455 | case REG_S22: strcpy(reg_name, "s22"); break; 456 | case REG_S23: strcpy(reg_name, "s23"); break; 457 | case REG_S24: strcpy(reg_name, "s24"); break; 458 | case REG_S25: strcpy(reg_name, "s25"); break; 459 | case REG_S26: strcpy(reg_name, "s26"); break; 460 | case REG_S27: strcpy(reg_name, "s27"); break; 461 | case REG_S28: strcpy(reg_name, "s28"); break; 462 | case REG_S29: strcpy(reg_name, "s29"); break; 463 | case REG_S30: strcpy(reg_name, "s30"); break; 464 | case REG_S31: strcpy(reg_name, "s31"); break; 465 | case REG_D0: strcpy(reg_name, "d0"); break; 466 | case REG_D1: strcpy(reg_name, "d1"); break; 467 | case REG_D2: strcpy(reg_name, "d2"); break; 468 | case REG_D3: strcpy(reg_name, "d3"); break; 469 | case REG_D4: strcpy(reg_name, "d4"); break; 470 | case REG_D5: strcpy(reg_name, "d5"); break; 471 | case REG_D6: strcpy(reg_name, "d6"); break; 472 | case REG_D7: strcpy(reg_name, "d7"); break; 473 | case REG_D8: strcpy(reg_name, "d8"); break; 474 | case REG_D9: strcpy(reg_name, "d9"); break; 475 | case REG_D10: strcpy(reg_name, "d10"); break; 476 | case REG_D11: strcpy(reg_name, "d11"); break; 477 | case REG_D12: strcpy(reg_name, "d12"); break; 478 | case REG_D13: strcpy(reg_name, "d13"); break; 479 | case REG_D14: strcpy(reg_name, "d14"); break; 480 | case REG_D15: strcpy(reg_name, "d15"); break; 481 | case REG_D16: strcpy(reg_name, "d16"); break; 482 | case REG_D17: strcpy(reg_name, "d17"); break; 483 | case REG_D18: strcpy(reg_name, "d18"); break; 484 | case REG_D19: strcpy(reg_name, "d19"); break; 485 | case REG_D20: strcpy(reg_name, "d20"); break; 486 | case REG_D21: strcpy(reg_name, "d21"); break; 487 | case REG_D22: strcpy(reg_name, "d22"); break; 488 | case REG_D23: strcpy(reg_name, "d23"); break; 489 | case REG_D24: strcpy(reg_name, "d24"); break; 490 | case REG_D25: strcpy(reg_name, "d25"); break; 491 | case REG_D26: strcpy(reg_name, "d26"); break; 492 | case REG_D27: strcpy(reg_name, "d27"); break; 493 | case REG_D28: strcpy(reg_name, "d28"); break; 494 | case REG_D29: strcpy(reg_name, "d29"); break; 495 | case REG_D30: strcpy(reg_name, "d30"); break; 496 | case REG_D31: strcpy(reg_name, "d31"); break; 497 | case REG_Q0: strcpy(reg_name, "q0"); break; 498 | case REG_Q1: strcpy(reg_name, "q1"); break; 499 | case REG_Q2: strcpy(reg_name, "q2"); break; 500 | case REG_Q3: strcpy(reg_name, "q3"); break; 501 | case REG_Q4: strcpy(reg_name, "q4"); break; 502 | case REG_Q5: strcpy(reg_name, "q5"); break; 503 | case REG_Q6: strcpy(reg_name, "q6"); break; 504 | case REG_Q7: strcpy(reg_name, "q7"); break; 505 | case REG_Q8: strcpy(reg_name, "q8"); break; 506 | case REG_Q9: strcpy(reg_name, "q9"); break; 507 | case REG_Q10: strcpy(reg_name, "q10"); break; 508 | case REG_Q11: strcpy(reg_name, "q11"); break; 509 | case REG_Q12: strcpy(reg_name, "q12"); break; 510 | case REG_Q13: strcpy(reg_name, "q13"); break; 511 | case REG_Q14: strcpy(reg_name, "q14"); break; 512 | case REG_Q15: strcpy(reg_name, "q15"); break; 513 | default: 514 | strcpy(reg_name, "ERROR"); 515 | goto cleanup; 516 | } 517 | 518 | rc = 0; 519 | cleanup: 520 | //printf("in response to %d, returned %s and rc=%d\n", reg_idx, reg_name, rc); 521 | return rc; 522 | } 523 | 524 | -------------------------------------------------------------------------------- /thumb2_disasm/disassembler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "armv7.h" 4 | 5 | //***************************************************************************** 6 | // defines, values 7 | //***************************************************************************** 8 | 9 | /* architectures */ 10 | #define ARCH_ARMv4T 0 11 | #define ARCH_ARMv5T 1 12 | #define ARCH_ARMv6 2 13 | #define ARCH_ARMv7 4 14 | #define ARCH_ARMv7_R 8 15 | #define ARCH_ARMv6T2 16 16 | #define ARCH_ThumbEE 32 17 | #define ARCH_SECURITY_EXTENSIONS 0x40 18 | #define ARCH_ARMv7_WITH_MP 0x80 /* eg: PLDW */ 19 | #define ARCH_ADVSIMD 0x100 /* vst, vld, etc. */ 20 | #define ARCH_VFPv2 0x200 21 | #define ARCH_VFPv3 0x400 22 | 23 | /* decompose statuses */ 24 | #define STATUS_OK 0 25 | #define STATUS_NO_BIT_MATCH 1 /* travelled along the graph and hit a 26 | contradiction */ 27 | #define STATUS_ARCH_UNSUPPORTED 2 /* an encoding match was found, but is not 28 | supported on the requested architecture */ 29 | #define STATUS_UNDEFINED 4 /* */ 30 | #define STATUS_BUFFER_TOO_SMALL 8 31 | 32 | /* instruction flags */ 33 | #define FLAG_NONE 0 34 | #define FLAG_UNPREDICTABLE 1 35 | #define FLAG_NOTPERMITTED 2 /* eg: instruction decodes ok, but not allowed in if-then block */ 36 | #define FLAG_ADDRMODE_AMBIGUITY 3 37 | 38 | enum SRType { SRType_ERROR=-1, SRType_LSL=0, SRType_LSR, SRType_ASR, SRType_ROR, SRType_RRX }; 39 | enum COND { COND_EQ=0, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, 40 | COND_VC, COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AL }; 41 | 42 | /* is in "if then" block? */ 43 | #define IFTHEN_UNKNOWN 0 44 | #define IFTHEN_YES 1 45 | #define IFTHEN_NO 2 46 | #define IFTHENLAST_UNKNOWN 0 47 | #define IFTHENLAST_YES 1 48 | #define IFTHENLAST_NO 2 49 | 50 | /* instruction sets */ 51 | #define INSTRSET_THUMB 0 /* 16-bit thumb instructions only (introduced in ARMv4T) */ 52 | #define INSTRSET_THUMB2 1 /* 16-bit and 32-bit instructions (introduced in ARMv6T2) */ 53 | #define INSTRSET_THUMBEE 2 /* defined in ARMv7 */ 54 | 55 | /* addressing modes */ 56 | /* these index the instruction format strings */ 57 | #define ADDRMODE_OFFSET 0 /* eaddr = base reg + offset, base reg unchanged, like [,] */ 58 | #define ADDRMODE_PREINDEX 1 /* eaddr = base reg + offset, base reg CHANGED, like [,]! */ 59 | #define ADDRMODE_POSTINDEX 2 /* eaddr = base reg, base reg CHANGED, like [], */ 60 | #define ADDRMODE_UNINDEXED 3 /* eaddr = base reg */ 61 | #define ADDRMODE_ADVSIMD_0 0 62 | #define ADDRMODE_ADVSIMD_1 1 63 | #define ADDRMODE_ADVSIMD_2 2 64 | #define ADDRMODE_UNSPECIFIED 255 65 | 66 | #define IS_FIELD_PRESENT(s, elem) (!!((s)->fields_mask[(elem) >> 6] & (1LL << ((elem) & 63)))) 67 | 68 | #define MAX_FORMAT_OPERANDS 8 69 | 70 | /* these append some specifier text on the opcode */ 71 | #define INSTR_FORMAT_FLAG_CONDITIONAL 1 72 | #define INSTR_FORMAT_FLAG_OPTIONAL_STATUS 2 73 | #define INSTR_FORMAT_FLAG_EFFECT 4 74 | #define INSTR_FORMAT_FLAG_MASK 8 75 | #define INSTR_FORMAT_FLAG_WIDE 0x10 76 | #define INSTR_FORMAT_FLAG_INCREMENT_AFTER 0x20 77 | #define INSTR_FORMAT_FLAG_AMODE 0x40 78 | #define INSTR_FORMAT_FLAG_NEON_SIZE 0x80 79 | #define INSTR_FORMAT_FLAG_VFP_DATA_SIZE 0x100 80 | #define INSTR_FORMAT_FLAG_NEON_TYPE_SIZE 0x200 81 | #define INSTR_FORMAT_FLAG_NEON_SINGLE_SIZE 0x400 82 | #define INSTR_FORMAT_FLAG_F16 0x800 83 | #define INSTR_FORMAT_FLAG_F32 0x1000 84 | #define INSTR_FORMAT_FLAG_F64 0x2000 85 | 86 | #define VFP_DATA_SIZE_S8 0 87 | #define VFP_DATA_SIZE_S16 1 88 | #define VFP_DATA_SIZE_S32 2 89 | #define VFP_DATA_SIZE_S64 3 90 | 91 | #define VFP_DATA_SIZE_F32 3 92 | #define VFP_DATA_SIZE_F64 4 93 | 94 | #define VFP_DATA_SIZE_U8 4 95 | #define VFP_DATA_SIZE_U16 5 96 | #define VFP_DATA_SIZE_U32 6 97 | #define VFP_DATA_SIZE_U64 7 98 | 99 | #define VFP_DATA_SIZE_I8 0 100 | #define VFP_DATA_SIZE_I16 1 101 | #define VFP_DATA_SIZE_I32 2 102 | #define VFP_DATA_SIZE_I64 3 103 | #define VFP_DATA_SIZE_I_F32 4 104 | 105 | #define VFP_DATA_SIZE_F32S32 0 106 | #define VFP_DATA_SIZE_F32U32 1 107 | #define VFP_DATA_SIZE_S32F32 2 108 | #define VFP_DATA_SIZE_U32F32 3 109 | 110 | #define VFP_DATA_SIZE_32 8 111 | 112 | //***************************************************************************** 113 | // structs and types 114 | //***************************************************************************** 115 | 116 | enum instruction_operand_format_type 117 | { 118 | OPERAND_FORMAT_END, 119 | OPERAND_FORMAT_MEMORY_ONE_REG, 120 | OPERAND_FORMAT_MEMORY_ONE_REG_IMM, 121 | OPERAND_FORMAT_MEMORY_ONE_REG_NEG_IMM, 122 | OPERAND_FORMAT_MEMORY_ONE_REG_ADD_IMM, 123 | OPERAND_FORMAT_MEMORY_ONE_REG_OPTIONAL_IMM, 124 | OPERAND_FORMAT_MEMORY_ONE_REG_OPTIONAL_ADD_IMM, 125 | OPERAND_FORMAT_MEMORY_ONE_REG_ALIGNED, 126 | OPERAND_FORMAT_MEMORY_TWO_REG, 127 | OPERAND_FORMAT_MEMORY_TWO_REG_SHIFT, 128 | OPERAND_FORMAT_MEMORY_TWO_REG_LSL_ONE, 129 | OPERAND_FORMAT_MEMORY_SP_IMM, 130 | OPERAND_FORMAT_MEMORY_SP_OPTIONAL_IMM, 131 | OPERAND_FORMAT_MEMORY_PC, 132 | OPERAND_FORMAT_FPSCR, 133 | OPERAND_FORMAT_IMM, 134 | OPERAND_FORMAT_IMM64, 135 | OPERAND_FORMAT_OPTIONAL_IMM, 136 | OPERAND_FORMAT_ADD_IMM, 137 | OPERAND_FORMAT_OPTIONAL_ADD_IMM, 138 | OPERAND_FORMAT_ZERO, 139 | OPERAND_FORMAT_REG, 140 | OPERAND_FORMAT_REG_FP, /* s0..s32, d0..d31, q0..q15 */ 141 | OPERAND_FORMAT_REG_INDEX, 142 | OPERAND_FORMAT_SP, 143 | OPERAND_FORMAT_PC, 144 | OPERAND_FORMAT_LR, 145 | OPERAND_FORMAT_COPROC, 146 | OPERAND_FORMAT_COPROC_REG, 147 | OPERAND_FORMAT_SINGLE_REGISTERS, 148 | OPERAND_FORMAT_REGISTERS, 149 | OPERAND_FORMAT_REGISTERS_INDEXED, 150 | OPERAND_FORMAT_LIST, 151 | OPERAND_FORMAT_ENDIAN, 152 | OPERAND_FORMAT_SHIFT, 153 | OPERAND_FORMAT_IFLAGS, 154 | OPERAND_FORMAT_FIRSTCOND, 155 | OPERAND_FORMAT_LABEL, 156 | OPERAND_FORMAT_SPEC_REG, 157 | OPERAND_FORMAT_NEON_SIZE, 158 | OPERAND_FORMAT_BARRIER_OPTION, 159 | OPERAND_FORMAT_RT_MRC, 160 | OPERAND_FORMAT_ROTATION 161 | }; 162 | 163 | enum instruction_operand_writeback 164 | { 165 | WRITEBACK_NO, 166 | WRITEBACK_YES, 167 | WRITEBACK_OPTIONAL 168 | }; 169 | 170 | /* "inspired" from capstone */ 171 | enum instruction_group 172 | { 173 | INSN_GROUP_UNKNOWN = 0, /* so memset() will initialize to this default */ 174 | INSN_GROUP_JUMP, 175 | INSN_GROUP_CRYPTO, 176 | INSN_GROUP_DATABARRIER, 177 | INSN_GROUP_DIVIDE, 178 | INSN_GROUP_FPARMV8, 179 | INSN_GROUP_MULTPRO, 180 | INSN_GROUP_NEON, 181 | INSN_GROUP_T2EXTRACTPACK, 182 | INSN_GROUP_THUMB2DSP, 183 | INSN_GROUP_TRUSTZONE, 184 | INSN_GROUP_V4T, 185 | INSN_GROUP_V5T, 186 | INSN_GROUP_V5TE, 187 | INSN_GROUP_V6, 188 | INSN_GROUP_V6T2, 189 | INSN_GROUP_V7, 190 | INSN_GROUP_V8, 191 | INSN_GROUP_VFP2, 192 | INSN_GROUP_VFP3, 193 | INSN_GROUP_VFP4, 194 | INSN_GROUP_ARM, 195 | INSN_GROUP_MCLASS, 196 | INSN_GROUP_NOTMCLASS, 197 | INSN_GROUP_THUMB, 198 | INSN_GROUP_THUMB1ONLY, 199 | INSN_GROUP_THUMB2, 200 | INSN_GROUP_PREV8, 201 | INSN_GROUP_FPVMLX, 202 | INSN_GROUP_MULOPS, 203 | INSN_GROUP_CRC, 204 | INSN_GROUP_DPVFP, 205 | INSN_GROUP_V6M, 206 | }; 207 | 208 | /* the decomp->text function GetInstructionText will process these and in 209 | general emit: 210 | 211 | 1) TEXT token for prefix (if it exists) 212 | 2) ???? token(s) (depending on type) 213 | 3) TEXT token for suffix (if it exists) 214 | */ 215 | struct instruction_operand_format 216 | { 217 | /* what type of operand format is this? 218 | eg: OPERAND_FORMAT_REG, OPERAND_FORMAT_REGISTERS, etc. */ 219 | instruction_operand_format_type type; 220 | 221 | /* each operand can refer to up to 2 fields */ 222 | enum decomp_field field0, field1; 223 | 224 | /* text that's prepended and appended, eg "#" or "{","}" */ 225 | const char *prefix, *suffix; 226 | 227 | /* where or not there's writeback (usually indicated by '!' in format) */ 228 | instruction_operand_writeback writeback; 229 | }; 230 | 231 | struct instruction_format 232 | { 233 | const char* operation; 234 | uint32_t operationFlags; 235 | instruction_operand_format operands[MAX_FORMAT_OPERANDS]; 236 | size_t operandCount; 237 | }; 238 | 239 | struct decomp_request 240 | { 241 | uint16_t instr_word16; 242 | uint32_t instr_word32; 243 | 244 | /* architecture, like ARCH_ARMv4T */ 245 | uint8_t arch; 246 | /* instruction set */ 247 | uint8_t instrSet; 248 | 249 | /* in if-then block? is last? */ 250 | uint8_t inIfThen; 251 | uint8_t inIfThenLast; 252 | 253 | /* disassembly of some instructions affected by APSR.C */ 254 | uint8_t carry_in; 255 | 256 | uint32_t addr; 257 | }; 258 | 259 | struct decomp_result 260 | { 261 | /* the result of the decomposition eg: STATUS_OK */ 262 | uint8_t status; 263 | 264 | /* extra flags to decorate instruction eg: FLAG_UNPREDICTABLE */ 265 | uint32_t flags; 266 | 267 | /* addressing mode of mem access instructions */ 268 | uint8_t addrMode; 269 | 270 | /* instruction group */ 271 | uint8_t group; 272 | 273 | /* instruction size in bits: 16 or 32 */ 274 | uint8_t instrSize; 275 | 276 | /* values of the fields */ 277 | uint32_t fields[FIELD_MAX]; 278 | /* bit set if field present */ 279 | uint64_t fields_mask[(FIELD_MAX + 63) / 64]; 280 | 281 | const instruction_format* formats; 282 | size_t formatCount; 283 | 284 | const instruction_format* format; 285 | armv7::Operation mnem; 286 | 287 | uint32_t pc; 288 | }; 289 | 290 | //***************************************************************************** 291 | // function prototypes 292 | //***************************************************************************** 293 | 294 | extern int thumb_decompose(struct decomp_request *, struct decomp_result *result); 295 | extern const char* get_thumb_condition_name(uint32_t cond); 296 | extern bool thumb_has_writeback(struct decomp_result* result); 297 | extern std::string get_thumb_operation_name(struct decomp_result* result); 298 | extern int get_reg_name(int reg_idx, char *reg_name); 299 | 300 | -------------------------------------------------------------------------------- /thumb2_disasm/generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # see the Makefile for how to invoke me 4 | 5 | import os 6 | import re 7 | import sys 8 | import string 9 | import binascii 10 | 11 | sys.path.append('./arm_pcode_parser') 12 | import codegencpp 13 | 14 | # globals 15 | g_code = '' 16 | g_lineNum = 0 17 | g_lines = [] 18 | g_indentLevel = 0 19 | g_DEBUG_GEN = 0 20 | g_DEBUG_DECOMP = 0 21 | 22 | header = ''' 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "spec.h" /* FIELD_imm8, FIELD_MAX, etc. */ 30 | #include "disassembler.h" /* decomp_request, decomp_result */ 31 | 32 | #if defined(__clang__) 33 | #pragma clang diagnostic push 34 | #pragma clang diagnostic ignored "-Wunused-variable" 35 | #pragma clang diagnostic ignored "-Wunused-parameter" 36 | #pragma clang diagnostic ignored "-Wunused-function" 37 | #elif defined(__GNUC__) 38 | #pragma GCC diagnostic push 39 | #pragma GCC diagnostic ignored "-Wunused-variable" 40 | #pragma GCC diagnostic ignored "-Wunused-parameter" 41 | #pragma GCC diagnostic ignored "-Wunused-function" 42 | #endif 43 | ''' 44 | 45 | support = ''' 46 | // see A8.4.3 47 | int DecodeImmShift_shift_t(uint8_t enc_bits, uint8_t imm5) 48 | { 49 | if(enc_bits == 0) 50 | return SRType_LSL; 51 | else if(enc_bits == 1) 52 | return SRType_LSR; 53 | else if(enc_bits == 2) 54 | return SRType_ASR; 55 | else if(enc_bits == 3) { 56 | if(imm5 == 0) 57 | return SRType_RRX; 58 | else 59 | return SRType_ROR; 60 | } 61 | return SRType_ERROR; 62 | } 63 | 64 | int DecodeImmShift_shift_n(uint8_t enc_bits, uint8_t imm5) 65 | { 66 | if(enc_bits == 0) 67 | return imm5; 68 | else if(enc_bits == 1) 69 | return imm5 ? imm5 : 32; 70 | else if(enc_bits == 2) 71 | return imm5 ? imm5 : 32; 72 | else if(enc_bits == 3) { 73 | if(imm5 == 0) 74 | return 1; 75 | else 76 | return imm5; 77 | } 78 | return -1; 79 | } 80 | 81 | int BadReg(uint8_t reg) 82 | { 83 | return (reg==13) || (reg==15); 84 | } 85 | 86 | uint64_t Replicate(uint32_t rep, uint32_t before, char before_char, uint32_t after, char after_char, uint8_t times) { 87 | uint64_t imm64 = 0; 88 | uint32_t i, time; 89 | for (time = 0; time < times; time++) { 90 | if (time > 0) { 91 | for (i = 0; i < before+8; i++) { 92 | imm64 <<= 1; 93 | imm64 |= before_char; 94 | } 95 | } 96 | imm64 |= rep; 97 | for (i = 0; i < after; i++) { 98 | imm64 <<= 1; 99 | imm64 |= after_char; 100 | } 101 | } 102 | return imm64; 103 | } 104 | 105 | uint32_t VFPExpandImm(uint32_t imm, uint32_t N, uint32_t lowbits) { 106 | 107 | uint32_t E = 0; 108 | if (N == 32) { 109 | E = 8; 110 | } 111 | else { 112 | E = 11; 113 | } 114 | uint32_t F = (N - E) - 1; 115 | uint32_t sign = (imm >> 7) & 1; 116 | uint32_t exp = ((imm >> 6) & 1) ^ 1; 117 | for (uint32_t i = 0; i < E-3; i++) { 118 | exp <<= 1; 119 | exp |= (imm >> 6) & 1; 120 | } 121 | exp <<= 2; 122 | exp |= (imm >> 4) & 3; 123 | uint32_t frac = (imm & 15); 124 | frac <<= F-4; 125 | uint32_t out = (sign << 31) | (exp << 23) | (frac); 126 | 127 | return out; 128 | } 129 | 130 | uint32_t AdvSIMDExpandImm(uint32_t op, uint32_t cmode, uint32_t imm8, uint32_t lowbits) { 131 | 132 | uint32_t testimm8; 133 | uint64_t imm64 = 0; 134 | uint32_t imm32 = 0; 135 | uint32_t i = 0; 136 | imm8 = imm8 & 0xff; 137 | switch(cmode >> 1) { 138 | case 0: 139 | testimm8 = 0; 140 | imm64 = Replicate(imm8, 24, 0, 0, 0, 2); 141 | if (lowbits) return imm64 & 0xffffffff; 142 | return 0; 143 | break; 144 | case 1: 145 | testimm8 = 1; 146 | imm64 = Replicate(imm8, 16, 0, 8, 0, 2); 147 | if (lowbits) return imm64 & 0xffffffff; 148 | return 0; 149 | break; 150 | case 2: 151 | testimm8 = 1; 152 | imm64 = Replicate(imm8, 8, 0, 16, 0, 2); 153 | if (lowbits) return imm64 & 0xffffffff; 154 | return 0; 155 | break; 156 | case 3: 157 | testimm8 = 1; 158 | imm64 = Replicate(imm8, 0, 0, 24, 0, 2); 159 | if (lowbits) return imm64 & 0xffffffff; 160 | return 0; 161 | break; 162 | case 4: 163 | testimm8 = 0; 164 | imm64 = Replicate(imm8, 8, 0, 0, 0, 4); 165 | if (lowbits) return imm64 & 0xff; 166 | return 0; 167 | break; 168 | case 5: 169 | testimm8 = 1; 170 | imm64 = Replicate(imm8, 0, 0, 8, 0, 4); 171 | if (lowbits) return imm64 & 0xffff; 172 | return 0; 173 | break; 174 | case 6: 175 | testimm8 = 1; 176 | if ((cmode & 1) == 0) { 177 | imm64 = Replicate(imm8, 16, 0, 8, 1, 2); 178 | } 179 | else { 180 | imm64 = Replicate(imm8, 8, 0, 16, 1, 2); 181 | } 182 | if (lowbits) return imm64 & 0xffffffff; 183 | return 0; 184 | break; 185 | case 7: 186 | testimm8 = 0; 187 | if ((cmode & 1) == 0 && (op & 1) == 0) { 188 | imm64 = Replicate(imm8, 0, 0, 0, 0, 8); 189 | if (lowbits) return imm8; 190 | return 0; 191 | } 192 | 193 | else if ((cmode & 1) == 0 && (op & 1) == 1) { 194 | int i, j; 195 | for (i = 0; i < 8; i++) { 196 | for (j = 0; j < 8; j++) { 197 | imm64 |= ((imm8 >> (7-i)) & 1); 198 | if (i != 7 || j != 7) imm64 <<= 1; 199 | } 200 | } 201 | } 202 | else if ((cmode & 1) == 1 && (op & 1) == 0) { 203 | imm32 = ((imm8 >> 7) & 1); 204 | imm32 <<= 1; 205 | imm32 |= ((imm8 >> 6) & 1) ? 0 : 1; 206 | for (i = 0; i < 5; i++) { 207 | imm32 <<= 1; 208 | imm32 |= (imm8 >> 6) & 1; 209 | } 210 | imm32 <<= 6; 211 | imm32 |= (imm8 & 63); 212 | imm32 <<= 19; 213 | imm64 = imm32; 214 | } 215 | else if ((cmode & 1) == 1 && (op & 1) == 1) { 216 | //return undefined() 217 | } 218 | break; 219 | } 220 | 221 | if (testimm8 && imm8 == 0) { 222 | //return undefined() 223 | } 224 | 225 | if (lowbits) return imm64 & 0xffffffff; 226 | return imm64 >> 32; 227 | } 228 | 229 | uint32_t ROR_C(uint32_t input, int shamt) 230 | { 231 | shamt %= 32; 232 | uint32_t left = input << (32-shamt); 233 | uint32_t right = input >> shamt; 234 | return left | right; 235 | } 236 | 237 | uint32_t ROR_C_cout(uint32_t input, int shamt) 238 | { 239 | return ROR_C(input, shamt) >> 31; 240 | } 241 | 242 | int ThumbExpandImm_C_imm32(uint32_t imm12, uint32_t carry_in) 243 | { 244 | (void)carry_in; 245 | 246 | if(0 == (imm12 & 0xC00)) { 247 | uint32_t idx = (imm12 & 0x300)>>8; 248 | uint32_t tmp = imm12 & 0xFF; 249 | if(idx==0) { 250 | return tmp; 251 | } 252 | else if(idx==1) { 253 | return (tmp << 16) | tmp; 254 | } 255 | else if(idx==2) { 256 | return (tmp << 24) | (tmp << 8); 257 | } 258 | else { 259 | return (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; 260 | } 261 | } 262 | else { 263 | uint32_t value = 0x80 | (imm12 & 0x7F); 264 | uint32_t rotamt = (imm12 & 0xF80) >> 7; 265 | return ROR_C(value, rotamt); 266 | } 267 | } 268 | 269 | int ThumbExpandImm_C_cout(uint32_t imm12, uint32_t carry_in) 270 | { 271 | if(0 == (imm12 & 0xC00)) { 272 | return carry_in; 273 | } 274 | else { 275 | uint32_t unrot_value = 0x80 | (imm12 & 0x7F); 276 | return ROR_C_cout(unrot_value, (imm12 & 0xF80) >> 7); 277 | } 278 | } 279 | 280 | // TODO: replace with optimized implementation 281 | int BitCount(int x) 282 | { 283 | int answer = 0; 284 | while(x) { 285 | if(x&1) answer += 1; 286 | x>>=1; 287 | } 288 | return answer; 289 | } 290 | 291 | uint32_t SignExtend(uint32_t val, int inWidth) 292 | { 293 | int doExtend = val & (1 << (inWidth-1)); 294 | 295 | if(doExtend) { 296 | uint32_t mask = (uint32_t)-1 ^ ((1< bit_width) ? str_width - bit_width : 0; 306 | for(int i=0; i 32) bit_width = 32; 309 | for(int i=bit_width-1; i>=0; --i) 310 | printf("%c", (val & (1<>27'] 500 | # ['i', 1, '(instr & 0x04000000)>>26'], 501 | # ['', 1, '(instr & 0x2000000)>>25'] 502 | # ['', 1, '(instr & 0x100000)>>20'] 503 | # ['Rn', 4, '(instr & 0x70000)>>16'], 504 | # ['', 1, '(instr & 0x8000)>>15'] 505 | # ['imm3', 3, '(instr & 0x7000)>>12'], 506 | # ['Rd', 4, '(instr & 0xF00)>>8'], 507 | # ['imm8', 8, '(instr & 0xFF)'] 508 | def genExtractGeneral(self, varName='instr'): 509 | result = [] 510 | leftMarg = 0 511 | seen = {} 512 | 513 | for field in self.text.split(','): 514 | # pattern fields -> [varName, #bits] 515 | # eg: 'imm3.3' -> ['imm3', 3] 516 | varName = '' 517 | nBits = 0 518 | 519 | if re.match(r'^[01x]+$', field): 520 | (varName, nBits) = ('', len(field)) 521 | else: 522 | # named variables 523 | m = re.match(r'^([\w]+)\.(\d+)$', field) 524 | if m: 525 | (varName, nBits) = (m.group(1), int(m.group(2))) 526 | 527 | # In a few cases, the encoding diagram contains more than one bit or field with same name. In these cases, the values of all of those bits or fields must be identical. The encoding-specific pseudocode contains a special case using the Consistent() function to specify what happens if they are not identical. Consistent() returns TRUE if all instruction bits or fields with the same name as its argument have the same value, and FALSE otherwise. 528 | if varName in seen: 529 | varName = varName + "_check" 530 | seen[varName] = 1 531 | 532 | else: 533 | m = re.match(r'^[\(\)01]+$', field) 534 | if m: 535 | nBits = len(field)/3 536 | else: 537 | parseError('genExtractGeneral(): unknown bit extract field %s' % field) 538 | 539 | # generate the extraction code (mask, shift) 540 | shiftAmt = self.width - (leftMarg + nBits) 541 | if shiftAmt < 0: 542 | parseError('negative shift amount') 543 | 544 | extract = 'instr & 0x%X' % ((2**nBits - 1) << shiftAmt) 545 | if shiftAmt: 546 | extract = '(%s)>>%d' % (extract, shiftAmt) 547 | 548 | # append the result 549 | result.append([varName, nBits, extract, field]) 550 | 551 | # next 552 | leftMarg += nBits 553 | 554 | return result 555 | 556 | def genExtractToNewVars(self, mgr, varName='instr'): 557 | for (varName, length, bitAction, field) in self.genExtractGeneral(): 558 | if not varName: 559 | continue 560 | mgr.add("uint%d_t %s = %s;" % (self.width, varName, bitAction)) 561 | 562 | def genExtractToElemAssigns(self, varName='instr'): 563 | result = '' 564 | for (varName, length, bitAction, field) in self.genExtractGeneral(): 565 | if not varName: 566 | continue 567 | fieldName = 'FIELD_' + varName 568 | result += "res->fields[%s] = %s;\n" % (fieldName, bitAction) 569 | result += "res->fields_mask[%s >> 6] |= 1LL << (%s & 63);\n" % (fieldName, fieldName) 570 | result += "char %s_width = %s;\n" % (varName, length) 571 | return result 572 | 573 | # generate a pretty diagram of the bit disection 574 | def genExtractToDrawing(self, varName='instr'): 575 | extractions = self.genExtractGeneral() 576 | 577 | # pad field names to length of printed bits, if needed 578 | fields = [] 579 | for [fieldName, bitLen, code, fieldText] in extractions: 580 | if len(fieldText) < bitLen: 581 | fieldText = ' '*(bitLen - len(fieldText)) + fieldText 582 | fields.append(fieldText) 583 | 584 | dashes = map(lambda x: '-'*len(x), fields) 585 | # eg: printBits((instr & 0xF800)>>11, 5, 5); 586 | values = map(lambda x: 'printBits(%s, %d, %d); printf("|");' % \ 587 | (x[2], x[1], len(x[3])), extractions) 588 | dashLine = 'printf("+%s+\\n");' % '+'.join(dashes) 589 | 590 | result = [] 591 | result.append(dashLine) 592 | result.append('printf("|' + '|'.join(fields) + '|\\n");') 593 | result.append(dashLine) 594 | result.append('printf("|");') 595 | result += values 596 | result.append('printf("\\n");') 597 | result.append(dashLine) 598 | return result 599 | 600 | 601 | # get the width of a variable from within the pattern 602 | def getVarWidth(self, varName): 603 | regex = varName + '\.(\\d)' 604 | 605 | #print("trying to get var: %s" % varName) 606 | #print("using regex: %s" % regex) 607 | 608 | m = re.search(regex, self.text) 609 | if not m: parseError('variable %s not found in pattern %s using regex %s' % \ 610 | (varName, self.text, regex)) 611 | return int(m.group(1)) 612 | 613 | # pretty string representation 614 | def __str__(self): 615 | result = 'pattern="%s" width=%d stringency=%d' % \ 616 | (self.text, self.width, self.stringency) 617 | return result 618 | 619 | #------------------------------------------------------------------------------ 620 | # code generation helpers 621 | #------------------------------------------------------------------------------ 622 | 623 | def genEncodingBlock(mgr, encName, arches, fmts, pattern, pcode): 624 | #print("genEncodingBlock on %s with pattern: %s" % (encName, pattern)) 625 | #status() 626 | 627 | if not encName: parseError("can't generate encoding block without encoding name!") 628 | if not arches: parseError("can't generate encoding block without architecture!") 629 | if not fmts: parseError("can't generate encoding block without format!") 630 | if not pattern: parseError("can't generate encoding block without pattern!") 631 | if not pcode: parseError("can't generate encoding block without pseudocode!") 632 | 633 | mgr.add("/* Encoding %s */" % encName) 634 | mgr.add("/* %s */" % pattern) 635 | 636 | mgr.add('{') 637 | mgr.tab() 638 | if pattern.width == 16: 639 | mgr.add('uint16_t instr = req->instr_word16;') 640 | elif pattern.width == 32: 641 | # arm pipelines fetches 2 bytes "halfword" at a time 642 | # that's how it knows whether to stay at a single halfword (16-bit thumb) or fetch another (32-bit thumb) 643 | mgr.add('uint32_t instr = req->instr_word32;') 644 | else: 645 | raise Exception("invalid pattern width: %d\n", pattern.width) 646 | 647 | check = pattern.genCheckMatch() 648 | mgr.add("if(%s) {" % check) 649 | mgr.tab() 650 | 651 | if(g_DEBUG_DECOMP): 652 | mgr.add('') 653 | mgr.add('if(getenv("DEBUG_DECOMP")) {') 654 | mgr.tab() 655 | mgr.add('printf("using encoding %s\\n\");' % encName) 656 | mgr.add('\n'.join(pattern.genExtractToDrawing()) + '') 657 | mgr.untab() 658 | mgr.add('}') 659 | mgr.add('') 660 | 661 | # save instruction size 662 | mgr.add('res->instrSize = %d;' % pattern.width) 663 | 664 | # generate unpredictable bits check (like "..,(0)(0)(0)(0),...") 665 | check = pattern.genCheckUnpredictable() 666 | if not check in ['0','1','!0','!1']: 667 | mgr.add('if(%s) {' % check) 668 | mgr.tab() 669 | mgr.add('res->flags |= FLAG_UNPREDICTABLE;') 670 | mgr.untab() 671 | mgr.add('}') 672 | 673 | # generate architecture check 674 | checks = [] 675 | for arch in arches.split(', '): 676 | checks.append('!(req->arch & ARCH_%s)' % string.replace(arch, '*', '')) 677 | mgr.add("if(%s) {" % ' && '.join(checks)) 678 | mgr.tab() 679 | mgr.add('res->status |= STATUS_ARCH_UNSUPPORTED;') 680 | mgr.untab() 681 | mgr.add('}') 682 | 683 | # save the named fields within the bits 684 | temp = pattern.genExtractToElemAssigns() 685 | 686 | # if 'c' or 'cond' wasn't in the pattern, mark the condition code as always 687 | if not re.search(r'c\.\d+', pattern.text) and \ 688 | not re.search(r'cond\.\d+', pattern.text): 689 | fieldName = 'FIELD_cond' 690 | mgr.add('res->fields[%s] = COND_AL;' % fieldName) 691 | mgr.add("res->fields_mask[%s >> 6] |= 1LL << (%s & 63);" % (fieldName, fieldName)) 692 | #print("at line " + str(g_lineNum) + " trying to indent: %s" % temp) 693 | 694 | if temp: 695 | mgr.add(temp) 696 | 697 | # save formats in the result 698 | mgr.add("static const instruction_format instr_formats[] = ") 699 | mgr.add('{') 700 | mgr.tab() 701 | for fmt in fmts: 702 | if ' ' not in fmt: 703 | operation = fmt.lower() 704 | operandsStr = "" 705 | else: 706 | operation = fmt[:fmt.index(' ')].lower() 707 | operandsStr = fmt[fmt.index(' ') + 1:].strip() 708 | 709 | flags = "0" 710 | if "." in operation: 711 | flags += "|INSTR_FORMAT_FLAG_NEON_TYPE_SIZE" 712 | operation = operation.replace(".", "") 713 | if "." in operation: 714 | flags += "|INSTR_FORMAT_FLAG_CONDITIONAL" 715 | flags += "|INSTR_FORMAT_FLAG_NEON_SIZE" 716 | operation = operation.replace(".", "") 717 | if "" in operation: 718 | flags += "|INSTR_FORMAT_FLAG_NEON_SINGLE_SIZE" 719 | operation = operation.replace("", "") 720 | if ".
" in operation: 721 | flags += "|INSTR_FORMAT_FLAG_CONDITIONAL" 722 | flags += "|INSTR_FORMAT_FLAG_VFP_DATA_SIZE" 723 | operation = operation.replace(".
", "") 724 | if ".
" in operation: 725 | flags += "|INSTR_FORMAT_FLAG_VFP_DATA_SIZE" 726 | operation = operation.replace(".
", "") 727 | if "" in operation: 728 | flags += "|INSTR_FORMAT_FLAG_CONDITIONAL" 729 | if "." in operation: 730 | size = operation.split("")[1].split()[0][1:].upper() 731 | if size in ["F16", "F32", "F64"]: 732 | flags += "|INSTR_FORMAT_FLAG_" + size 733 | operation = operation.replace("", "") 734 | if "{s}" in operation: 735 | flags += "|INSTR_FORMAT_FLAG_OPTIONAL_STATUS" 736 | operation = operation.replace("{s}", "") 737 | if "" in operation: 738 | flags += "|INSTR_FORMAT_FLAG_EFFECT" 739 | operation = operation.replace("", "") 740 | if "" in operation: 741 | flags += "|INSTR_FORMAT_FLAG_MASK" 742 | operation = operation.replace("", "") 743 | if ".w" in operation: 744 | flags += "|INSTR_FORMAT_FLAG_WIDE" 745 | operation = operation.replace(".w", "") 746 | if "{ia}" in operation: 747 | flags += "|INSTR_FORMAT_FLAG_INCREMENT_AFTER" 748 | operation = operation.replace("{ia}", "") 749 | if "{}" in operation: 750 | flags += "|INSTR_FORMAT_FLAG_AMODE" 751 | operation = operation.replace("{}", "") 752 | 753 | mgr.add('{ /* %s */' % fmt) 754 | mgr.tab() 755 | mgr.add('"%s", /* .operation (const char *) */' % operation) 756 | mgr.add('%s, /* .operationFlags (uint32_t) */' % flags) 757 | mgr.add('{/* .operands (instruction_operand_format) */') 758 | mgr.tab() 759 | 760 | i = 0 761 | operandCount = 0 762 | 763 | # operands is the half of the format string following the first whitespace 764 | # eg: MOV ,,# 765 | # 766 | # then operation = 'MOV' 767 | # then format = ',,# 768 | #print(' operation: %s' % operation) 769 | #print('operandsStr: %s' % operandsStr) 770 | 771 | # split the string into operands 772 | operands = [] 773 | tok_regexs = [r'^<.*?>(!|{!})?', r'^#<\+/-><.*?>', r'^#<.*?>', \ 774 | r'^{.*?}', r'^\[.*?\]!?', r'^\w+({!})?', r'^#\d+'] 775 | while operandsStr: 776 | did_split = False 777 | 778 | if operandsStr[0] == ',': 779 | operandsStr = operandsStr[1:] 780 | continue 781 | 782 | for regex in tok_regexs: 783 | m = re.match(regex, operandsStr) 784 | if m: 785 | operands.append(m.group(0)) 786 | operandsStr = operandsStr[len(m.group(0)):] 787 | did_split = True 788 | break 789 | 790 | if not did_split: 791 | raise Exception('don\'t know how to split next operand on: %s' % operandsStr) 792 | 793 | # loop over operands 794 | for operand in operands: 795 | wb_id = ['WRITEBACK_NO', 'WRITEBACK_YES'][operand[-1]=='!'] 796 | 797 | #print(' operand: %s' % operand) 798 | 799 | # 800 | # Rn 801 | # 802 | if operand == "[]": 803 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG,FIELD_Rn,FIELD_UNINIT,"","",%s},' % wb_id) 804 | continue 805 | m = re.match(r'^\[,#<\+/-><(.*)>]!?$', operand) 806 | if m: 807 | name = m.group(1) 808 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_ADD_IMM,FIELD_Rn,FIELD_%s,"","",%s},' % (name, wb_id)) 809 | continue 810 | if operand.startswith('[,#]'): 811 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_ALIGNED,FIELD_Rn,FIELD_UNINIT,"","",%s},' % wb_id) 812 | continue 813 | m = re.match(r'^\[,#<(.*)>\]!?$', operand) 814 | if m: 815 | name = m.group(1) 816 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_IMM,FIELD_Rn,FIELD_%s,"","",%s},' % (name, wb_id)) 817 | continue 818 | m = re.match(r'^\[,#-<(.*)>\]!?$', operand) 819 | if m: 820 | name = m.group(1) 821 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_NEG_IMM,FIELD_Rn,FIELD_%s,"","",%s},' % (name, wb_id)) 822 | continue 823 | m = re.match(r'^\[{,#<\+/-><(.*)>}\]!?', operand) 824 | if m: 825 | name = m.group(1) 826 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_OPTIONAL_ADD_IMM,FIELD_Rn,FIELD_%s,"","",%s},' % (name, wb_id)) 827 | continue 828 | m = re.match(r'^\[{,#<(.*)>}\]$', operand) 829 | if m: 830 | name = m.group(1) 831 | mgr.add('{OPERAND_FORMAT_MEMORY_ONE_REG_OPTIONAL_IMM,FIELD_Rn,FIELD_%s,"","",%s},' % (name, wb_id)) 832 | continue 833 | if operand.startswith("[,]"): 834 | if i < len(operand) and operand[i] == "!": 835 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_YES},') 836 | else: 837 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_NO},') 838 | continue 839 | if operand.startswith("[,{,}]"): 840 | if i < len(operand) and operand[i] == "!": 841 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG_SHIFT,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_YES},') 842 | else: 843 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG_SHIFT,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_NO},') 844 | continue 845 | if operand.startswith("[,,LSL #1]"): 846 | if i < len(operand) and operand[i] == "!": 847 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG_LSL_ONE,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_YES},') 848 | else: 849 | mgr.add('{OPERAND_FORMAT_MEMORY_TWO_REG_LSL_ONE,FIELD_Rn,FIELD_Rm,"","",WRITEBACK_NO},') 850 | continue 851 | if operand.startswith("!"): 852 | mgr.add('{OPERAND_FORMAT_REG,FIELD_Rn,FIELD_UNINIT,"","",WRITEBACK_YES},') 853 | continue 854 | if operand.startswith("{!}"): 855 | mgr.add('{OPERAND_FORMAT_REG,FIELD_Rn,FIELD_UNINIT,"","",WRITEBACK_OPTIONAL},') 856 | continue 857 | # 858 | # SP 859 | # 860 | m = re.match(r'^\[SP,#<(.*)>\]$', operand) 861 | if m: 862 | name = m.group(1) 863 | mgr.add('{OPERAND_FORMAT_MEMORY_SP_IMM,FIELD_%s,FIELD_UNINIT,"","",%s},' % (name, wb_id)) 864 | continue 865 | m = re.match(r'^\[SP{,#<(.*)>}\]', operand) 866 | if m: 867 | name = m.group(1) 868 | mgr.add('{OPERAND_FORMAT_MEMORY_SP_OPTIONAL_IMM,FIELD_%s,FIELD_UNINIT,"","",%s},' % (name, wb_id)) 869 | continue 870 | if operand.startswith("SP{!}"): 871 | mgr.add('{OPERAND_FORMAT_SP,FIELD_UNINIT,FIELD_UNINIT,"sp","",WRITEBACK_OPTIONAL},') 872 | continue 873 | if operand.startswith("SP"): 874 | mgr.add('{OPERAND_FORMAT_SP,FIELD_UNINIT,FIELD_UNINIT,"sp","",WRITEBACK_NO},') 875 | continue 876 | # 877 | # PC 878 | # 879 | if operand.startswith("[PC]"): 880 | mgr.add('{OPERAND_FORMAT_MEMORY_PC,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 881 | continue 882 | if operand.startswith("PC"): 883 | mgr.add('{OPERAND_FORMAT_PC,FIELD_UNINIT,FIELD_UNINIT,"pc","",WRITEBACK_NO},') 884 | continue 885 | if operand.startswith("LSL #1"): 886 | mgr.add('{OPERAND_FORMAT_LSL_ONE,FIELD_UNINIT,FIELD_UNINIT,"lsl #1","",WRITEBACK_NO},') 887 | continue 888 | if operand.startswith("#0"): 889 | mgr.add('{OPERAND_FORMAT_ZERO,FIELD_UNINIT,FIELD_UNINIT,"#0","",WRITEBACK_NO},') 890 | continue 891 | if operand.startswith(""): 892 | mgr.add('{OPERAND_FORMAT_BARRIER_OPTION,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 893 | continue 894 | if operand.startswith("LR"): 895 | mgr.add('{OPERAND_FORMAT_LR,FIELD_UNINIT,FIELD_UNINIT,"lr","",WRITEBACK_NO},') 896 | continue 897 | if operand.startswith("#"): 898 | mgr.add('{OPERAND_FORMAT_IMM64,FIELD_UNINIT,FIELD_UNINIT,"#","",WRITEBACK_NO},') 899 | continue 900 | m = re.match(r'^<(imm.*)>$', operand) 901 | if m: 902 | name = m.group(1) 903 | mgr.add('{OPERAND_FORMAT_IMM,FIELD_%s,FIELD_UNINIT,"#","",WRITEBACK_NO},' % (name)) 904 | continue 905 | if operand[i:4] in ['', '', '', '
', '', '', '', '', '']: 906 | name = operand[2] 907 | mgr.add('{OPERAND_FORMAT_REG_FP,FIELD_%s,FIELD_UNINIT,"%s","",WRITEBACK_OPTIONAL},' % (name, operand[1].lower())) 908 | continue 909 | if operand[i:7] in ['', '', '', '', '', '', '', '', '']: 910 | name = operand[2] 911 | mgr.add('{OPERAND_FORMAT_REG_INDEX,FIELD_%s,FIELD_x,"%s","",WRITEBACK_OPTIONAL},' % (name, operand[1].lower())) 912 | continue 913 | if operand.startswith(''): 914 | mgr.add('{OPERAND_FORMAT_FPSCR,FIELD_FPSCR,FIELD_UNINIT,"","",WRITEBACK_OPTIONAL},') 915 | continue 916 | if operand.startswith(''): 917 | mgr.add('{OPERAND_FORMAT_RT_MRC,FIELD_Rt_mrc,FIELD_UNINIT,"","",WRITEBACK_NO},') 918 | continue 919 | if operand.startswith('apsr'): 920 | mgr.add('{OPERAND_FORMAT_SPEC_REG,FIELD_UNINIT,FIELD_UNINIT,"apsr","",WRITEBACK_NO},') 921 | continue 922 | # here is generic register catcher 923 | # make sure any special register cases (eg: "" you have come before this) 924 | m = re.match(r'^<(R.*)>$', operand) 925 | if m: 926 | name = m.group(1) 927 | mgr.add('{OPERAND_FORMAT_REG,FIELD_%s,FIELD_UNINIT,"","",WRITEBACK_NO},' % name) 928 | continue 929 | if operand.startswith(""): 930 | mgr.add('{OPERAND_FORMAT_COPROC,FIELD_coproc,FIELD_UNINIT,"","",WRITEBACK_NO},') 931 | continue 932 | m = re.match(r'^<(CR.*)>$', operand) 933 | if m: 934 | name = m.group(1) 935 | mgr.add('{OPERAND_FORMAT_COPROC_REG,FIELD_%s,FIELD_UNINIT,"","",WRITEBACK_NO},' % name) 936 | continue 937 | if operand.startswith(""): 938 | mgr.add('{OPERAND_FORMAT_REGISTERS,FIELD_registers,FIELD_UNINIT,"","",WRITEBACK_NO},') 939 | continue 940 | if operand.startswith(""): 941 | mgr.add('{OPERAND_FORMAT_REGISTERS,FIELD_registers,FIELD_UNINIT,"","[]",WRITEBACK_NO},') 942 | continue 943 | if operand.startswith(""): 944 | mgr.add('{OPERAND_FORMAT_REGISTERS_INDEXED,FIELD_registers_indexed,FIELD_UNINIT,"","",WRITEBACK_NO},') 945 | continue 946 | if operand.startswith(""): 947 | mgr.add('{OPERAND_FORMAT_LIST,FIELD_list,FIELD_UNINIT,"","",WRITEBACK_NO},') 948 | continue 949 | if operand.startswith(""): 950 | mgr.add('{OPERAND_FORMAT_ENDIAN,FIELD_E,FIELD_UNINIT,"","",WRITEBACK_NO},') 951 | continue 952 | if operand.startswith("{,}"): 953 | mgr.add('{OPERAND_FORMAT_SHIFT,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 954 | continue 955 | if operand.startswith('{,}'): 956 | mgr.add('{OPERAND_FORMAT_ROTATION,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 957 | continue 958 | if operand.startswith(""): 959 | mgr.add('{OPERAND_FORMAT_EFFECT,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 960 | continue 961 | if operand.startswith(""): 962 | mgr.add('{OPERAND_FORMAT_IFLAGS,FIELD_UNINIT,FIELD_UNINIT,"","",WRITEBACK_NO},') 963 | continue 964 | if operand.startswith(""): 965 | mgr.add('{OPERAND_FORMAT_FIRSTCOND,FIELD_firstcond,FIELD_UNINIT,"","",WRITEBACK_NO},') 966 | continue 967 | if operand.startswith("