├── .gitignore
├── clemency-as
    ├── .gitignore
    ├── example
    │   ├── test.bin
    │   └── test.s
    ├── Readme.md
    ├── assembler.py
    ├── parser.py
    └── instrs.py
├── clemency-exploit-utils
    ├── clemency
    │   ├── __init__.py
    │   ├── shellcode.py
    │   └── struct.py
    ├── README.md
    ├── shellcode
    │   ├── xor.bin
    │   ├── tiny.bin
    │   ├── tiny.s
    │   └── xor.s
    ├── nc
    ├── strings
    └── xxd
├── snowball
    ├── riscv
    │   ├── test.bin
    │   ├── test.elf
    │   ├── test.py
    │   ├── Makefile
    │   ├── riscv.h
    │   ├── helpers.h
    │   ├── opcodes.h
    │   ├── README.md
    │   ├── generate_py.py
    │   ├── riscv.c
    │   └── ida-plugin
    │   │   └── riscv.py
    ├── clemency
    │   ├── hello.u16
    │   ├── hello.u9
    │   ├── test.py
    │   ├── Makefile
    │   ├── ida-plugin
    │   │   ├── clemency-libc.py
    │   │   ├── clemency-dump.py
    │   │   ├── clemency-symbols.py
    │   │   └── clemency.py
    │   ├── clemency.h
    │   ├── helpers.h
    │   ├── clemency_ldr.py
    │   ├── clemency.c
    │   ├── opcodes.h
    │   └── generate_py.py
    └── .gitignore
├── README.md
└── AUTHORS.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/clemency-as/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/clemency/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/snowball/riscv/test.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/snowball/riscv/test.bin


--------------------------------------------------------------------------------
/snowball/riscv/test.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/snowball/riscv/test.elf


--------------------------------------------------------------------------------
/snowball/clemency/hello.u16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/snowball/clemency/hello.u16


--------------------------------------------------------------------------------
/snowball/clemency/hello.u9:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/snowball/clemency/hello.u9


--------------------------------------------------------------------------------
/clemency-as/example/test.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/clemency-as/example/test.bin


--------------------------------------------------------------------------------
/clemency-exploit-utils/README.md:
--------------------------------------------------------------------------------
1 | Some library tooling to help make exploitation in a Clemency universe easier.
2 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/shellcode/xor.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/clemency-exploit-utils/shellcode/xor.bin


--------------------------------------------------------------------------------
/clemency-exploit-utils/shellcode/tiny.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pwning/defcon25-public/HEAD/clemency-exploit-utils/shellcode/tiny.bin


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # defcon25-public
2 | 
3 | * snowball: Disassembler + IDA Plugins
4 | * clemency-as: Assembler
5 | * clemency-exploit-utils: Exploit utility/helper lib
6 | 


--------------------------------------------------------------------------------
/snowball/riscv/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import pyriscv
3 | 
4 | code = open('test.bin', 'rb').read()
5 | for pc in xrange(0, len(code), 4):
6 |     inst = pyriscv.disassemble(pc, code[pc:pc+4])
7 |     print '%08X\t%s' % (pc, inst.str)
8 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/shellcode/tiny.s:
--------------------------------------------------------------------------------
 1 | #define MOVI(r,imm) ml r, {(imm) & 0x3ff} ;\
 2 |     mh r, {(imm) >> 10}
 3 | _start:
 4 |     MOVI(R9, 0x4010000)
 5 |     ml R2, 128
 6 |     MOVI(R0, 0x5010000)
 7 |     MOVI(R1, 0x5012000)
 8 |     dmt R0, R9, R2
 9 |     stt R2, [R1]
10 |     HT
11 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/clemency/shellcode.py:
--------------------------------------------------------------------------------
 1 | import clemency.struct as struct
 2 | from assembler import assemble_string
 3 | 
 4 | def make_shellcode_buffer(assembly_string):
 5 |   bitstr = assemble_string(assembly_string)
 6 |   l = (bitstr.size / 9)
 7 |   buf = struct.pack('%dS' % l, *struct.unpack('%dS' % l, bitstr.force_str()))
 8 |   return buf
 9 | 
10 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/shellcode/xor.s:
--------------------------------------------------------------------------------
 1 | // cLEMENCy XOR shellcode
 2 | #define MOVI(r,imm) ml r, {(imm) & 0x3ff} ;\
 3 |     mh r, {(imm) >> 10}
 4 | _start:
 5 |     MOVI(R9, 0x4010000)
 6 |     adi R10, R9, 60
 7 |     MOVI(R0, 0x5010000)
 8 | repeat:
 9 |     ldti R3, [R9+0,1]
10 |     xr R3, R3, ST
11 |     stti R3, [R0+0,1]
12 |     cm R9, R10
13 |     bn $repeat
14 | 
15 | ml R2, 60
16 | ml R4, {0x2000 - 60}
17 | ad R0, R0, R4
18 | stt R2, [R0]
19 | ht
20 | 


--------------------------------------------------------------------------------
/clemency-as/Readme.md:
--------------------------------------------------------------------------------
1 | To use, you probably just want to run `python assembler.py your_file.s`
2 | 
3 | See the example in the `example` folder for an example assembly file.
4 | 
5 | A few features include: full? support for clemency instruction set, labels (declared as `label:` and used as `$label`), label arithmetic for instructions supporting offsets (use `{$label - $pc + 10}`, etc), data store pseudo-ops (`.ds .dw .dt .dm`), also supports using macros with the C pre-processor.
6 | 
7 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | This project was created and developed for DEFCON 25 CTF Finals for PPP
 2 | 
 3 | * Snowball: Disassembler + IDA modules
 4 |   - Andrew Wesie (awesie@gmail.com)
 5 |   - Brian Pak (brianairb@gmail.com)
 6 |   - Ned Williamson (nedwilliamson@gmail.com)
 7 |   - Ricky Zhou (ricky@rzhou.org)
 8 | 
 9 | * clemency-as & clemency-exploit-utils: Assembler and exploit utilities
10 |   - Maxime Serrano (@mserrano)
11 |   - Robert Xiao (@nneonneo)
12 |   - Tyler Nighswander (@tylerni7)
13 | 
14 | 


--------------------------------------------------------------------------------
/snowball/clemency/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import ctypes
 3 | import pyclemency
 4 | import struct
 5 | 
 6 | buf = open('hello.u16', 'rb').read()
 7 | pc = 0x90d
 8 | while True:
 9 |     size = 6
10 |     if pc + size > len(buf) / 2:
11 |         size = len(buf) / 2 - pc
12 |     if size == 0:
13 |         break
14 |     code = (ctypes.c_uint16 * size)()
15 |     for x in xrange(size):
16 |         code[x] = struct.unpack('<H', buf[(pc+x)*2:(pc+x+1)*2])[0]
17 |     inst = pyclemency.disassemble(pc, code)
18 |     print '%08X\t%s' % (pc, inst.str)
19 |     if inst.id == 0:
20 |         break
21 |     pc += inst.size
22 | 


--------------------------------------------------------------------------------
/snowball/riscv/Makefile:
--------------------------------------------------------------------------------
 1 | CC=gcc
 2 | CC_32=i686-w64-mingw32-gcc
 3 | CC_64=x86_64-w64-mingw32-gcc
 4 | # TODO Change as appropriate
 5 | NAME=riscv
 6 | SOURCES=$(NAME).c
 7 | INCLUDES=$(NAME).h opcodes.h
 8 | 
 9 | UNAME=$(shell uname)
10 | ifeq ($(UNAME),Darwin)
11 | 	DYLIB=lib$(NAME).dylib
12 | else
13 | 	DYLIB=lib$(NAME).so
14 | endif
15 | 
16 | all: $(NAME)_32.dll $(NAME)_64.dll $(DYLIB) py$(NAME).py
17 | 
18 | clean:
19 | 	rm -f $(NAME)_32.dll $(NAME)_64.dll $(DYLIB) py$(NAME).py py$(NAME).pyc
20 | 
21 | $(NAME)_32.dll: $(SOURCES) $(INCLUDES)
22 | 	$(CC_32) -shared -o $@ $(SOURCES)
23 | 
24 | $(NAME)_64.dll: $(SOURCES) $(INCLUDES)
25 | 	$(CC_64) -shared -o $@ $(SOURCES)
26 | 
27 | ifeq ($(UNAME),Darwin)
28 | $(DYLIB): $(SOURCES) $(INCLUDES)
29 | 	$(CC) -dynamiclib -o $@ $(SOURCES)
30 | else
31 | $(DYLIB): $(SOURCES) $(INCLUDES)
32 | 	$(CC) -fPIC -shared -Wl,-soname,$@ -o $@ $(SOURCES)
33 | endif
34 | 
35 | py$(NAME).py: $(INCLUDES)
36 | 	python generate_py.py $(NAME)
37 | 


--------------------------------------------------------------------------------
/snowball/clemency/Makefile:
--------------------------------------------------------------------------------
 1 | CC=gcc
 2 | CC_32=i686-w64-mingw32-gcc
 3 | CC_64=x86_64-w64-mingw32-gcc
 4 | # TODO Change as appropriate
 5 | NAME=clemency
 6 | SOURCES=$(NAME).c
 7 | INCLUDES=$(NAME).h opcodes.h helpers.h
 8 | 
 9 | UNAME=$(shell uname)
10 | ifeq ($(UNAME),Darwin)
11 | 	DYLIB=lib$(NAME).dylib
12 | else
13 | 	DYLIB=lib$(NAME).so
14 | endif
15 | 
16 | all: $(NAME)_32.dll $(NAME)_64.dll $(DYLIB) py$(NAME).py
17 | 
18 | clean:
19 | 	rm -f $(NAME)_32.dll $(NAME)_64.dll $(DYLIB) py$(NAME).py py$(NAME).pyc
20 | 
21 | $(NAME)_32.dll: $(SOURCES) $(INCLUDES)
22 | 	$(CC_32) -g -shared -o $@ $(SOURCES)
23 | 
24 | $(NAME)_64.dll: $(SOURCES) $(INCLUDES)
25 | 	$(CC_64) -g -shared -o $@ $(SOURCES)
26 | 
27 | ifeq ($(UNAME),Darwin)
28 | $(DYLIB): $(SOURCES) $(INCLUDES)
29 | 	$(CC) -g -dynamiclib -o $@ $(SOURCES)
30 | else
31 | $(DYLIB): $(SOURCES) $(INCLUDES)
32 | 	$(CC) -g -fPIC -shared -Wl,-soname,$@ -o $@ $(SOURCES)
33 | endif
34 | 
35 | py$(NAME).py: $(INCLUDES)
36 | 	python generate_py.py $(NAME)
37 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/nc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from clemency.struct import ClemencyFile
 3 | import threading
 4 | 
 5 | def parse_args(argv):
 6 |     import argparse
 7 |     parser = argparse.ArgumentParser(description="Netcat with nytes")
 8 |     parser.add_argument('host', help='Host')
 9 |     parser.add_argument('port', help='Port', type=int)
10 |     return parser.parse_args(argv)
11 | 
12 | def do_read(nf):
13 |     while 1:
14 |         sys.stdout.write(''.join(map(chr, nf.readuntil(10, maxsize=1024))))
15 |         sys.stdout.flush()
16 | 
17 | def main(argv):
18 |     args = parse_args(argv)
19 | 
20 |     import socket
21 |     s = socket.create_connection((args.host, args.port))
22 |     f = s.makefile('rw', 0)
23 |     nf = ClemencyFile(f)
24 | 
25 |     th = threading.Thread(target=do_read, args=(nf,))
26 |     th.daemon = True
27 |     th.start()
28 | 
29 |     while 1:
30 |         res = raw_input() + '\n'
31 |         nf.write([ord(i) for i in res])
32 | 
33 | if __name__ == '__main__':
34 |     import sys
35 |     exit(main(sys.argv[1:]))
36 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/strings:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import string
 4 | 
 5 | import clemency.struct as struct
 6 | 
 7 | THRESHOLD = 3
 8 | 
 9 | if '-n' in sys.argv:
10 |   idx = sys.argv.index('-n')
11 |   THRESHOLD = int(sys.argv[idx+1])
12 |   del sys.argv[idx+1]
13 |   del sys.argv[idx]
14 | 
15 | NULL_TERMINATED_ONLY = False
16 | if '--nto' in sys.argv:
17 |   NULL_TERMINATED_ONLY = True
18 |   idx = sys.argv.index('--nto')
19 |   del sys.argv[idx]
20 | 
21 | if len(sys.argv) > 1:
22 |   f = struct.ClemencyFile(open(sys.argv[1], 'r'))
23 | else:
24 |   f = struct.ClemencyFile(sys.stdin)
25 | 
26 | ps = map(ord, string.printable)
27 | 
28 | buf = f.read_all()
29 | pos = 0
30 | while pos < len(buf):
31 |   printable_len = 0
32 |   if buf[pos] == 0:
33 |     pos = pos+1
34 |     continue
35 |   if buf[pos] in ps:
36 |     printable_len = 1
37 |     while ((pos+printable_len) < len(buf)) and buf[pos+printable_len] in ps:
38 |       printable_len += 1
39 |   else:
40 |     pos += 1
41 |     continue
42 |   if (printable_len-1) > THRESHOLD and ((not NULL_TERMINATED_ONLY) or (pos+printable_len >= len(buf) or buf[pos+printable_len] == 0)):
43 |     print buf[pos:min(pos+printable_len, len(buf))].to_printable_string().strip('\n')
44 |   pos += printable_len
45 | 


--------------------------------------------------------------------------------
/clemency-as/assembler.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import parser
 3 | import instrs
 4 | import os
 5 | 
 6 | import tempfile
 7 | 
 8 | def parse_args(argv):
 9 |     import argparse
10 |     parser = argparse.ArgumentParser(description="Assemble a thing")
11 |     parser.add_argument('infile', help='Input file')
12 |     parser.add_argument('outfile', nargs='?', help='Output file')
13 |     return parser.parse_args(argv)
14 | 
15 | def main(argv):
16 |     args = parse_args(argv)
17 | 
18 |     if args.outfile is None:
19 |         args.outfile = os.path.splitext(args.infile)[0] + '.bin'
20 | 
21 |     print "Assembling %s to %s..." % (args.infile, args.outfile)
22 | 
23 |     asm = subprocess.check_output(['cpp', '-xc++', args.infile])
24 | 
25 |     out = parser.Assembler().assemble(asm)
26 |     print "Output: %d nytes" % (len(out)/9)
27 |     with open(args.outfile, 'wb') as outf:
28 |         outf.write(out.force_str())
29 | 
30 | def assemble_string(s):
31 |     t = tempfile.NamedTemporaryFile(delete=False)
32 |     t.write(s)
33 |     t.close()
34 |     asm = subprocess.check_output(['cpp', '-xc++', t.name])
35 |     os.unlink(t.name)
36 |     out = parser.Assembler().assemble(asm)
37 |     return out
38 | 
39 | if __name__ == '__main__':
40 |     import sys
41 |     exit(main(sys.argv[1:]))
42 | 


--------------------------------------------------------------------------------
/snowball/clemency/ida-plugin/clemency-libc.py:
--------------------------------------------------------------------------------
 1 | import idaapi
 2 | from idaapi import *
 3 | 
 4 | LIBC_FUNCS = {
 5 |     'malloc': '0000 0000 00C0 0000 004B 0006 0051 000B 0010 0051',
 6 |     'free': '0004 0159 0000 0000 0000 0000 0042 0159',
 7 |     'send': '0040 00C4 0158 0000 0080',
 8 |     'recv': '0120 0000 0000 0140 0108 0070 0080 0148 0158',
 9 |     'memset': '00C0 0060 0000 0102 0100 0048 0048',
10 |     'strlen': '0040 0060 0000 0084 0070 0040 0082',
11 |     'memcpy': '0000 0181 0015 0102 0158 0008 0000 0000',
12 |     'strncmp': '0017 0080 0172 0000 0100 0180 0021 0084 0010 000A 0064 0170 01FF',
13 |     'atoi': '0000 0151 0000 0000 0000 0000 01D0 0140 0100 0100 0172 002D 0100 0180 0018',
14 |     'fflush': '0000 0120 0172 0000 0100 0186 0009 0000 0120 0000 0100 0187 0036 0010 0060 0101',
15 |     'strncpy': '010A 0170 0100 0185 0006 0014 0061 0141 0090 0060 0101 0056 0060 0161 0012 0060',
16 |     'strcmp': '0100 0180 001E 0064 0170 01FF 0180 01E6 0000 0181 0011 0000',
17 |     'isspace': '0000 0172 0020 0100 0180 0009 0100 0120 0000 0100 0187 0009 0000 0124 0001 0050 0060 0101 0002 0060 0021 003A 0159 0008 0000 0000 0000 0138 015B 0040 0000 0000',
18 |     'isalpha': '0140 013A 016B 0050 0000 0000 0000 013B 0063 01A1 017A 0013 0033 003A 0169 0050 0000 0000 0000 0001 0011 000B 0100 0172 001A 0100 0124',
19 |     'printf': '0168 00F0 0000 0000 0000 013A 016B 0050 0000 0000 0000 013B 0063 01A1 017A 0013 007B 003A 0169 0070 0000 0000 0000 0038 0001 0063 0100 0124 013D 0100 0114 0025',
20 |     'strcpy': '0000 000A 0000 0172 0003 0000 0181 0015 0048 0158 0008 0000 0000 0000',
21 | }
22 | 
23 | for x in LIBC_FUNCS:
24 |     ea = find_binary(0, SegEnd(0), LIBC_FUNCS[x], 16, SEARCH_DOWN)
25 |     if ea == 0xFFFFFFFF:
26 |         continue
27 |     ea = FirstFuncFchunk(ea + 4)
28 |     MakeName(ea, x)
29 | 


--------------------------------------------------------------------------------
/snowball/clemency/ida-plugin/clemency-dump.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dumps to original_binary.bin.patched.
 3 | """
 4 | 
 5 | import struct
 6 | import os
 7 | from idaapi import *
 8 | 
 9 | class ClemencyFile(object):
10 |     def __init__(self, f):
11 |         self.file = f
12 |         self.rpn = 0
13 |         self.rp = 0
14 |         self.wpn = 0
15 |         self.wp = 0
16 |         self.towrite = []
17 | 
18 |     def _writenyte(self, n):
19 |         self.wp = (self.wp << 9) | n
20 |         self.wpn += 9
21 |         while self.wpn >= 8:
22 |             self.towrite.append((self.wp >> (self.wpn - 8)) & 0xff)
23 |             self.wpn -= 8
24 |         self.wp = self.wp & ((1 << self.wpn) - 1)
25 | 
26 |     def _writeflush(self):
27 |         self.wp <<= (8 - self.wpn)
28 |         self.towrite.append(self.wp)
29 |         self.file.write(''.join(map(chr, self.towrite)))
30 |         self.towrite = []
31 |         self.wpn = 0
32 |         self.wp = 0
33 | 
34 |     def write(self, nytearr):
35 |         for n in nytearr:
36 |             self._writenyte(n)
37 |         self._writeflush()
38 | 
39 |     def close(self):
40 |         self.file.close()
41 | 
42 | 
43 | def get_first_segment():
44 |     """
45 |     Gets first segment as a list of integers (for each 9-bit byte).
46 |     """
47 |     bs = get_many_bytes(SegStart(0), SegEnd(0))
48 |     return struct.unpack('H' * (len(bs) / 2), bs)
49 | 
50 | def write_patch():
51 |     input_file_path = idaapi.get_input_file_path()
52 | 
53 |     if not os.path.exists(input_file_path):
54 |         print "ClemDump: warning: {} does not exist.".format(input_file_path)
55 | 
56 |     output_path = input_file_path + '.patched'
57 | 
58 |     print "ClemDump: patched binary to", output_path
59 |     with open(output_path, 'wb') as output_fd:
60 |         ClemencyFile(output_fd).write(get_first_segment())
61 | 
62 | write_patch()
63 | 


--------------------------------------------------------------------------------
/clemency-as/example/test.s:
--------------------------------------------------------------------------------
 1 | ldt    R01, [R00 + 0x57, 3]
 2 | smp    R00, R01, E
 3 | ad     R00, R00, R01
 4 | ml     R04, 0x400
 5 | mu     R05, R00, R04
 6 | smp    R05, R02, RW
 7 | ad     R00, R00, R02
 8 | mu     R05, R00, R04
 9 | smp    R05, R03, RW
10 | ad     R00, R00, R03
11 | adi    R00, R00, 0x1
12 | ml     R02, 0xffde
13 | sb     R02, R02, R00
14 | mu     R05, R00, R04
15 | smp    R05, R02, RW
16 | ml     R00, 0x0
17 | mh     R00, 0xffdf
18 | ml     R01, 0x20
19 | smp    R00, R01, RW
20 | mu     R01, R01, R04
21 | ad     ST, R00, R01
22 | ml     R00, 0x1ff
23 | ei     R00
24 | or     R00, R05, R05
25 | mu     R01, R02, R04
26 | car    $sub_901
27 | ht
28 | 
29 | sub_901:
30 |   sttd   R00, [ST, 2]
31 |   sttd   R28, [ST, 3]
32 |   or.    R28, ST, ST
33 |   sbi.   ST, ST, 0xf
34 |   sttd   R08, [ST, 4]
35 |   ldt    R08, [R28 + 0x9]
36 |   ldt    R09, [R28 + 0xc]
37 |   ml     R02, 0x21
38 |   ml     R01, 0x0
39 |   ml     R00, 0x4c
40 |   mh     R00, 0x1b
41 |   car    $sub_5c5f
42 |   ml     R02, 0xf
43 |   ml     R01, 0x0
44 |   or.    R00, R08, R08
45 |   car    $sub_5c5f
46 |   stt    R09, [R08]
47 |   ml     R10, 0x6a
48 |   mh     R10, 0x1b
49 |   stt    R08, [R10]
50 |   ml     R00, 0x0
51 |   ldti   R08, [ST, 4]
52 |   ldt    R28, [R28, 3]
53 |   adi.   ST, ST, 0x6
54 |   re
55 | 
56 | sub_5c5f:
57 |   or     R03, R00, R00
58 |   rli    R04, R01, 0x09
59 |   or     R01, R04, R01
60 |   rli    R04, R01, 0x09
61 |   or     R01, R04, R01
62 | lbl_5c6e:
63 |   cmi    R02, 0x3
64 |   bsl    $lbl_5c80
65 |   stti   R01, [R03]
66 |   sbi    R02, R02, 0x3
67 |   b      $lbl_5c6e
68 | lbl_5c80:
69 |   cmi    R02, 0x1
70 |   bl     $lbl_5c98
71 |   bg     $lbl_5c92
72 |   sts    R01, [R03]
73 |   b      $lbl_5c98
74 | lbl_5c92:
75 |   stw    R01, [R03]
76 | lbl_5c98:
77 |   re
78 | 
79 | data:
80 |   .ds 31
81 |   .ds 100
82 |   .ds 511
83 |   .dw 131072
84 |   .dw 262141
85 |   .dt 123456789
86 |   .dm 12987912378913823791
87 | 


--------------------------------------------------------------------------------
/snowball/riscv/riscv.h:
--------------------------------------------------------------------------------
 1 | // XXX This file is processed by generate_py.py. Avoid changing the format
 2 | //     unless you want to generate the ctypes struct yourself. You have been
 3 | //     warned!
 4 | //
 5 | //     In general, adding C++ comments and members in the insn_t should work
 6 | //     as expected.
 7 | //
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | #include <stdint.h>
12 | 
13 | #ifdef _WIN32
14 | #define EXPORT __declspec(dllexport) extern
15 | #else
16 | #define EXPORT extern
17 | #endif
18 | 
19 | // Enum of instructions
20 | enum {
21 |     Iinvalid = 0,
22 | #define INS(ins, opcode) I##ins,
23 | #include "opcodes.h"
24 |     I__count
25 | };
26 | 
27 | // Define a field and its used flag
28 | #define DEFINE_FIELD(type, name) type name; uint8_t used_##name;
29 | // Nameless union wrapper around fields
30 | #define BEGIN_FIELDS() union { struct {
31 | #define END_FIELDS() }; char _fields[1]; };
32 | 
33 | // Decoded instruction
34 | typedef struct {
35 |     // Structure size (used as sanity check)
36 |     unsigned int _st_size;
37 | 
38 |     // TODO Instruction
39 |     uint32_t insn;
40 |     // TODO EIP/PC/EA
41 |     uint32_t pc;
42 |     // Input bytes
43 |     const uint8_t *bytes;
44 | 
45 |     // Instruction size
46 |     unsigned int size;
47 |     // Internal instruction
48 |     unsigned int id;
49 |     // Mnemonic
50 |     const char *mnemonic;
51 |     // Assembly string
52 |     char str[64];
53 | 
54 |     BEGIN_FIELDS()
55 | 
56 |     // TODO Decoded fields (must come at the end of struct)
57 |     DEFINE_FIELD(uint8_t, opcode)
58 |     DEFINE_FIELD(uint8_t, funct3)
59 |     DEFINE_FIELD(uint8_t, funct7)
60 |     DEFINE_FIELD(uint8_t, rd)
61 |     DEFINE_FIELD(uint8_t, rs1)
62 |     DEFINE_FIELD(uint8_t, rs2)
63 |     DEFINE_FIELD(int32_t, imm) // sign-extended
64 | 
65 |     END_FIELDS()
66 | } inst_t;
67 | 
68 | EXPORT void disassemble(inst_t *inst, uint32_t pc, const uint8_t *buf);
69 | EXPORT const char *mnemonics[];
70 | EXPORT const char *registers[];
71 | EXPORT const unsigned int num_registers;
72 | #ifdef __cplusplus
73 | }
74 | #endif
75 | 


--------------------------------------------------------------------------------
/snowball/clemency/ida-plugin/clemency-symbols.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Saves all function names in idb to symbol map for org's Clemency debugger.
 3 | """
 4 | 
 5 | import os
 6 | import idaapi
 7 | from idaapi import *
 8 | 
 9 | def get_symbol_map():
10 |     """
11 |     Return symbols in current .idb as .map format.
12 |     """
13 |     functions = {}
14 |     for ea in Segments():
15 |         for funcea in Functions(SegStart(ea), SegEnd(ea)):
16 |             size = FindFuncEnd(funcea) - funcea
17 |             functions[funcea] = (GetFunctionName(funcea), size)
18 |     # It may not be necessary to sort by ea, but be safe...
19 |     output_lines = []
20 |     for i, (ea, (name, size)) in enumerate(sorted(functions.items())):
21 |         if len(name) > 255:
22 |             print "ClemSym: truncating name", name
23 |         name = name[:255]
24 |         line = "%d: %s @ %07x %d" % (i, name, ea, size)
25 |         output_lines.append(line)
26 |     return '\n'.join(output_lines)
27 | 
28 | def save_symbols():
29 |     """
30 |     Gather symbols and write to .map using expected naming convention.
31 |     """
32 |     input_file_path = idaapi.get_input_file_path()
33 | 
34 |     if not os.path.exists(input_file_path):
35 |         print "ClemSym: warning: {} does not exist.".format(input_file_path)
36 | 
37 |     output_path = input_file_path + '.map'
38 | 
39 |     new_data = get_symbol_map()
40 | 
41 |     if os.path.exists(output_path):
42 |         with open(output_path, 'rb') as orig_fd:
43 |             orig_data = orig_fd.read()
44 |         if orig_data == new_data:
45 |             print "ClemSym: symbol map on disk is already up to date"
46 |             return
47 | 
48 |         # Always backup as we *really* don't want to kill someone's
49 |         # hand-made symbol map!
50 |         bak_ctr = 0
51 |         while os.path.exists(output_path + '.bak' + str(bak_ctr)):
52 |             bak_ctr += 1
53 |         os.rename(output_path, output_path + '.bak' + str(bak_ctr))
54 | 
55 |     print "ClemSym: writing symbols to", output_path
56 |     with open(output_path, 'wb') as output_fd:
57 |         output_fd.write(new_data)
58 | 
59 | save_symbols()
60 | 


--------------------------------------------------------------------------------
/snowball/clemency/clemency.h:
--------------------------------------------------------------------------------
 1 | // XXX This file is processed by generate_py.py. Avoid changing the format
 2 | //     unless you want to generate the ctypes struct yourself. You have been
 3 | //     warned!
 4 | //
 5 | //     In general, adding C++ comments and members in the insn_t should work
 6 | //     as expected.
 7 | //
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | #include <stdint.h>
12 | 
13 | #ifdef _WIN32
14 | #define EXPORT __declspec(dllexport) extern
15 | #else
16 | #define EXPORT extern
17 | #endif
18 | 
19 | // Enum of instructions
20 | enum {
21 |     Iinvalid = 0,
22 | #define INS(ins, opcode) I##ins,
23 | #include "opcodes.h"
24 |     I__count
25 | };
26 | 
27 | // Define a field and its used flag
28 | #define DEFINE_FIELD(type, name) type name; uint8_t used_##name;
29 | // Nameless union wrapper around fields
30 | #define BEGIN_FIELDS() union { struct {
31 | #define END_FIELDS() }; char _fields[1]; };
32 | 
33 | // Decoded instruction
34 | typedef struct {
35 |     // Structure size (used as sanity check)
36 |     unsigned int _st_size;
37 | 
38 |     // TODO Instruction
39 |     uint64_t insn;
40 |     // TODO EIP/PC/EA
41 |     uint32_t pc;
42 |     // Input bytes
43 |     const uint16_t *bytes;
44 | 
45 |     // Instruction size
46 |     unsigned int size;
47 |     // Internal instruction
48 |     unsigned int id;
49 |     // Mnemonic
50 |     const char *mnemonic;
51 |     // Assembly string
52 |     char str[64];
53 | 
54 |     BEGIN_FIELDS()
55 | 
56 |     // TODO Decoded fields (must come at the end of struct)
57 |     DEFINE_FIELD(uint32_t, opcode)
58 |     DEFINE_FIELD(uint8_t, rA)
59 |     DEFINE_FIELD(uint8_t, rB)
60 |     DEFINE_FIELD(uint8_t, rC)
61 |     DEFINE_FIELD(uint8_t, arith_signed)
62 |     DEFINE_FIELD(uint8_t, is_imm)
63 |     DEFINE_FIELD(uint32_t, funct)
64 |     DEFINE_FIELD(uint8_t, uf)
65 |     DEFINE_FIELD(uint16_t, reg_count)
66 |     DEFINE_FIELD(uint16_t, cc)
67 |     DEFINE_FIELD(int32_t, imm) // sign-extended
68 |     DEFINE_FIELD(uint8_t, adj_rb)
69 |     DEFINE_FIELD(uint8_t, rw)
70 |     DEFINE_FIELD(uint8_t, mem_flags)
71 | 
72 |     END_FIELDS()
73 | } inst_t;
74 | 
75 | EXPORT void disassemble(inst_t *inst, uint32_t pc, const uint16_t *buf);
76 | EXPORT const char *mnemonics[];
77 | EXPORT const char *registers[];
78 | EXPORT const unsigned int num_registers;
79 | #ifdef __cplusplus
80 | }
81 | #endif
82 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/xxd:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import string
 5 | import clemency.struct as struct
 6 | 
 7 | COLS = 9
 8 | 
 9 | NO_RAW = False
10 | if '--noraw' in sys.argv:
11 |   NO_RAW = True
12 |   idx = sys.argv.index('--noraw')
13 |   del sys.argv[idx]
14 | 
15 | OCTAL = False
16 | if '--oc' in sys.argv:
17 |   OCTAL = True
18 |   idx = sys.argv.index('--oc')
19 |   del sys.argv[idx]
20 | 
21 | DUMP_BUFFER = False
22 | if '--dumpbuffer' in sys.argv:
23 |   DUMP_BUFFER = True
24 |   idx = sys.argv.index('--dumpbuffer')
25 |   del sys.argv[idx]
26 | 
27 | if '--wide' in sys.argv:
28 |   COLS = 18
29 |   idx = sys.argv.index('--wide')
30 |   del sys.argv[idx]
31 | if '--ultrawide' in sys.argv:
32 |   COLS = 27
33 |   idx = sys.argv.index('--ultrawide')
34 |   del sys.argv[idx]
35 | 
36 | TRIPLES = False
37 | if '--triples' in sys.argv:
38 |   TRIPLES = True
39 |   idx = sys.argv.index('--triples')
40 |   del sys.argv[idx]
41 | 
42 | if len(sys.argv) > 1:
43 |   f = struct.ClemencyFile(open(sys.argv[1], 'r'))
44 | else:
45 |   f = struct.ClemencyFile(sys.stdin)
46 | 
47 | data = f.read_all()
48 | f.close()
49 | 
50 | if DUMP_BUFFER:
51 |   print `data`
52 |   exit(0)
53 | 
54 | last_len = 0
55 | lines = [data[i:i+COLS] for i in xrange(0, len(data), COLS)]
56 | pos = 0
57 | fmt_triple = "%09o" if OCTAL else "%07x"
58 | fmt_single = "%03o" if OCTAL else "%03x"
59 | for line in lines:
60 |   output = "%07x: " % pos
61 |   pos += len(line)
62 |   real_data = None
63 |   if TRIPLES:
64 |     real_data = struct.unpack('T' * (len(line) / 3) + 'S' * (len(line) % 3), line)
65 |     for triple in real_data[:len(line) / 3]:
66 |       output += (fmt_triple % triple) + ' '
67 |     accum = 0
68 |     for single in real_data[len(line)/3:]:
69 |       accum <<= 9
70 |       accum |= single
71 |     output += (fmt_triple % accum)
72 |   else:
73 |     real_data = struct.unpack('S' * (len(line)), line)
74 |     for single in real_data:
75 |       output += (fmt_single % single) + ' '
76 |   len_so_far = len(output)
77 |   if last_len != 0 and len_so_far < last_len:
78 |     output += ' ' * (last_len - len_so_far)
79 |   last_len = len_so_far
80 |   output += ' '
81 |   if not NO_RAW:
82 |     for c in line:
83 |       cb = struct.CB([c])
84 |       if cb.is_printable() and (chr(c) in (string.ascii_letters + string.digits)):
85 |         output += cb.to_printable_string()
86 |       else:
87 |         output += '.'
88 |   print output
89 | 


--------------------------------------------------------------------------------
/snowball/.gitignore:
--------------------------------------------------------------------------------
  1 | # Swap
  2 | [._]*.s[a-v][a-z]
  3 | [._]*.sw[a-p]
  4 | [._]s[a-v][a-z]
  5 | [._]sw[a-p]
  6 | 
  7 | # Session
  8 | Session.vim
  9 | 
 10 | # Temporary
 11 | .netrwhist
 12 | *~
 13 | # Auto-generated tag files
 14 | tags
 15 | 
 16 | # General
 17 | *.DS_Store
 18 | .AppleDouble
 19 | .LSOverride
 20 | 
 21 | # Icon must end with two \r
 22 | Icon
 23 | 
 24 | 
 25 | # Thumbnails
 26 | ._*
 27 | 
 28 | # Files that might appear in the root of a volume
 29 | .DocumentRevisions-V100
 30 | .fseventsd
 31 | .Spotlight-V100
 32 | .TemporaryItems
 33 | .Trashes
 34 | .VolumeIcon.icns
 35 | .com.apple.timemachine.donotpresent
 36 | 
 37 | # Directories potentially created on remote AFP share
 38 | .AppleDB
 39 | .AppleDesktop
 40 | Network Trash Folder
 41 | Temporary Items
 42 | .apdisk
 43 | 
 44 | # Byte-compiled / optimized / DLL files
 45 | __pycache__/
 46 | *.py[cod]
 47 | *$py.class
 48 | 
 49 | # C extensions
 50 | *.so
 51 | 
 52 | # Distribution / packaging
 53 | .Python
 54 | build/
 55 | develop-eggs/
 56 | dist/
 57 | downloads/
 58 | eggs/
 59 | .eggs/
 60 | lib/
 61 | lib64/
 62 | parts/
 63 | sdist/
 64 | var/
 65 | wheels/
 66 | *.egg-info/
 67 | .installed.cfg
 68 | *.egg
 69 | 
 70 | # PyInstaller
 71 | #  Usually these files are written by a python script from a template
 72 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 73 | *.manifest
 74 | *.spec
 75 | 
 76 | # Installer logs
 77 | pip-log.txt
 78 | pip-delete-this-directory.txt
 79 | 
 80 | # Unit test / coverage reports
 81 | htmlcov/
 82 | .tox/
 83 | .coverage
 84 | .coverage.*
 85 | .cache
 86 | nosetests.xml
 87 | coverage.xml
 88 | *.cover
 89 | .hypothesis/
 90 | 
 91 | # Translations
 92 | *.mo
 93 | *.pot
 94 | 
 95 | # Django stuff:
 96 | *.log
 97 | local_settings.py
 98 | 
 99 | # Flask stuff:
100 | instance/
101 | .webassets-cache
102 | 
103 | # Scrapy stuff:
104 | .scrapy
105 | 
106 | # Sphinx documentation
107 | docs/_build/
108 | 
109 | # PyBuilder
110 | target/
111 | 
112 | # Jupyter Notebook
113 | .ipynb_checkpoints
114 | 
115 | # pyenv
116 | .python-version
117 | 
118 | # celery beat schedule file
119 | celerybeat-schedule
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | 


--------------------------------------------------------------------------------
/snowball/riscv/helpers.h:
--------------------------------------------------------------------------------
 1 | // Macros and inline functions
 2 | static uint32_t read_32(inst_t *inst)
 3 | {
 4 |     uint32_t result;
 5 |     const uint8_t *ptr = inst->bytes + inst->size;
 6 | #ifdef BIG_ENDIAN
 7 |     result = (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | (ptr[3] << 0);
 8 | #else
 9 |     result = (ptr[0] << 0) | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24);
10 | #endif
11 |     inst->size += 4;
12 |     return result;
13 | }
14 | 
15 | static uint32_t read_24(inst_t *inst)
16 | {
17 |     uint32_t result;
18 |     const uint8_t *ptr = inst->bytes + inst->size;
19 | #ifdef BIG_ENDIAN
20 |     result = (ptr[0] << 16) | (ptr[1] << 8) | (ptr[2] << 0);
21 | #else
22 |     result = (ptr[0] << 0) | (ptr[1] << 8) | (ptr[2] << 16);
23 | #endif
24 |     inst->size += 3;
25 |     return result;
26 | }
27 | 
28 | static uint16_t read_16(inst_t *inst)
29 | {
30 |     uint16_t result;
31 |     const uint8_t *ptr = inst->bytes + inst->size;
32 | #ifdef BIG_ENDIAN
33 |     result = (ptr[0] << 8) | (ptr[1] << 0);
34 | #else
35 |     result = (ptr[0] << 0) | (ptr[1] << 8);
36 | #endif
37 |     inst->size += 2;
38 |     return result;
39 | }
40 | 
41 | static uint8_t read_8(inst_t *inst)
42 | {
43 |     uint8_t result;
44 |     const uint8_t *ptr = inst->bytes + inst->size;
45 |     result = ptr[0];
46 |     inst->size += 1;
47 |     return result;
48 | }
49 | 
50 | // Extract bits [offset, offset+count)
51 | #define EXTRACT(src, offset, count) (((src) >> (offset)) & ((1 << (count))-1))
52 | 
53 | // Extract bits from inst->insn to a field (marks field as used)
54 | #define FIELD(name, offset, count) do { inst->name = EXTRACT(inst->insn, offset, count); inst->used_##name = 1; } while (0)
55 | 
56 | // Concatenate extracted bits on to a field (appends them to the end)
57 | #define CONCAT(name, offset, count) do { inst->name = (inst->name << (count)) | EXTRACT(inst->insn, offset, count); } while (0)
58 | 
59 | // Sign-extend an integer with fewer than 32 bits
60 | #define SIGN_EXTEND(name, count) do { inst->name = ((int32_t)inst->name << (32 - count)) >> (32 - count); } while (0)
61 | 
62 | // Helper to reverse bits in an integer
63 | static uint32_t reverse_bits(uint32_t x)
64 | {
65 |     x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1);
66 |     x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2);
67 |     x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4);
68 |     x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8);
69 |     x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16);
70 |     return x;
71 | }
72 | 
73 | static void clear_used(inst_t *inst)
74 | {
75 |     // Clear fields.
76 |     memset(inst->_fields, 0, (char *)inst + sizeof(inst_t) - inst->_fields);
77 | }
78 | 
79 | // Append a comma, or a space if this is the first operand
80 | #define PRINT_SEP() do { strcat(inst->str, first ? " " : ", "); first = 0; } while (0)
81 | 
82 | // Append a register field (if it is marked as used)
83 | #define PRINT_REGISTER(name) do { if (inst->used_##name) { PRINT_SEP(); strcat(inst->str, registers[inst->name]); } } while (0)
84 | 
85 | // Append an immediate field as hex (if it is marked as used)
86 | #define PRINT_IMMEDIATE(name) do { if (inst->used_##name) { PRINT_SEP(); sprintf(inst->str, "%s#%X", inst->str, inst->name); } } while (0)
87 | 
88 | 


--------------------------------------------------------------------------------
/snowball/riscv/opcodes.h:
--------------------------------------------------------------------------------
  1 | // XXX This file is processed by generate_py.py.
  2 | 
  3 | // Default macros. You can safely ignore these.
  4 | #ifndef FORMAT
  5 | #define FORMAT(x)
  6 | #endif
  7 | #ifndef INS
  8 | #define INS(x,y)
  9 | #endif
 10 | #ifndef INS_1
 11 | #define INS_1(w,x,y,z) INS(w,x)
 12 | #endif
 13 | #ifndef INS_2
 14 | #define INS_2(w,x,y1,z1,y2,z2) INS_1(w,x,y1,z1)
 15 | #endif
 16 | #ifndef INS_3
 17 | #define INS_3(w,x,y1,z1,y2,z2,y3,z3) INS_2(w,x,y1,z1,y2,z2)
 18 | #endif
 19 | #ifndef INS_4
 20 | #define INS_4(w,x,y1,z1,y2,z2,y3,z3,y4,z4) INS_3(w,x,y1,z1,y2,z2,y3,z3)
 21 | #endif
 22 | 
 23 | // TODO Instruction definitions
 24 | FORMAT( U )
 25 | INS( lui, 0b0110111 )
 26 | INS( auipc, 0b0010111 )
 27 | 
 28 | FORMAT( Jj )
 29 | INS_1( j, 0b1101111, rd, 0 )
 30 | 
 31 | FORMAT( J )
 32 | INS( jal, 0b1101111 )
 33 | 
 34 | FORMAT( Ijr )
 35 | INS_2( jr, 0b1100111, funct3, 0b000, rd, 0 )
 36 | 
 37 | FORMAT( I )
 38 | INS_1( jalr, 0b1100111, funct3, 0b000 )
 39 | INS_1( lb, 0b0000011, funct3, 0b000 )
 40 | INS_1( lh, 0b0000011, funct3, 0b001 )
 41 | INS_1( lw, 0b0000011, funct3, 0b010 )
 42 | INS_1( lbu, 0b0000011, funct3, 0b100 )
 43 | INS_1( lhu, 0b0000011, funct3, 0b101 )
 44 | INS_1( addi, 0b0010011, funct3, 0b000 )
 45 | INS_1( slti, 0b0010011, funct3, 0b010 )
 46 | INS_1( sltiu, 0b0010011, funct3, 0b011 )
 47 | INS_1( xori, 0b0010011, funct3, 0b100 )
 48 | INS_1( ori, 0b0010011, funct3, 0b110 )
 49 | INS_1( andi, 0b0010011, funct3, 0b111 )
 50 | 
 51 | FORMAT( E )
 52 | INS_4( ecall, 0b1110011, funct3, 0b000, rd, 0, rs1, 0, imm, 0 )
 53 | INS_4( ebreak, 0b1110011, funct3, 0b000, rd, 0, rs1, 0, imm, 1 )
 54 | 
 55 | FORMAT( S )
 56 | INS_1( sb, 0b0100011, funct3, 0b000 )
 57 | INS_1( sh, 0b0100011, funct3, 0b001 )
 58 | INS_1( sw, 0b0100011, funct3, 0b010 )
 59 | 
 60 | FORMAT( Rshift )
 61 | INS_2( slli, 0b0010011, funct3, 0b001, funct7, 0b0000000 )
 62 | INS_2( srli, 0b0010011, funct3, 0b101, funct7, 0b0000000 )
 63 | INS_2( srai, 0b0010011, funct3, 0b101, funct7, 0b0100000 )
 64 | 
 65 | FORMAT( R )
 66 | INS_2( add, 0b0110011, funct3, 0b000, funct7, 0b0000000 )
 67 | INS_2( sub, 0b0110011, funct3, 0b000, funct7, 0b0100000 )
 68 | INS_2( sll, 0b0110011, funct3, 0b001, funct7, 0b0000000 )
 69 | INS_2( slt, 0b0110011, funct3, 0b010, funct7, 0b0000000 )
 70 | INS_2( sltu, 0b0110011, funct3, 0b011, funct7, 0b0000000 )
 71 | INS_2( xor, 0b0110011, funct3, 0b100, funct7, 0b0000000 )
 72 | INS_2( srl, 0b0110011, funct3, 0b101, funct7, 0b0000000 )
 73 | INS_2( sra, 0b0110011, funct3, 0b101, funct7, 0b0100000 )
 74 | INS_2( or, 0b0110011, funct3, 0b110, funct7, 0b0000000 )
 75 | INS_2( and, 0b0110011, funct3, 0b111, funct7, 0b0000000 )
 76 | INS_2( mul, 0b0110011, funct3, 0b000, funct7, 0b0000001 )
 77 | INS_2( mulh, 0b0110011, funct3, 0b001, funct7, 0b0000001 )
 78 | INS_2( mulhsu, 0b0110011, funct3, 0b010, funct7, 0b0000001 )
 79 | INS_2( mulhu, 0b0110011, funct3, 0b011, funct7, 0b0000001 )
 80 | INS_2( div, 0b0110011, funct3, 0b100, funct7, 0b0000001 )
 81 | INS_2( divu, 0b0110011, funct3, 0b101, funct7, 0b0000001 )
 82 | INS_2( rem, 0b0110011, funct3, 0b110, funct7, 0b0000001 )
 83 | INS_2( remu, 0b0110011, funct3, 0b111, funct7, 0b0000001 )
 84 | 
 85 | FORMAT( B )
 86 | INS_1( beq, 0b1100011, funct3, 0b000 )
 87 | INS_1( bne, 0b1100011, funct3, 0b001 )
 88 | INS_1( blt, 0b1100011, funct3, 0b100 )
 89 | INS_1( bge, 0b1100011, funct3, 0b101 )
 90 | INS_1( bltu, 0b1100011, funct3, 0b110 )
 91 | INS_1( bgeu, 0b1100011, funct3, 0b111 )
 92 | 
 93 | // Unset the macros. You can safely ignore these.
 94 | #undef FORMAT
 95 | #undef INS
 96 | #undef INS_1
 97 | #undef INS_2
 98 | #undef INS_3
 99 | #undef INS_4
100 | 


--------------------------------------------------------------------------------
/snowball/clemency/helpers.h:
--------------------------------------------------------------------------------
 1 | // Macros and inline functions
 2 | static uint64_t read_54(inst_t *inst)
 3 | {
 4 |     uint64_t result;
 5 |     const uint16_t *ptr = inst->bytes + inst->size;
 6 |     result = ((uint64_t)ptr[1] << 45) | ((uint64_t)ptr[0] << 36) | ((uint64_t)ptr[2] << 27) | ((uint64_t)ptr[4] << 18) | (ptr[3] << 9) | (ptr[5] << 0);
 7 |     inst->size += 6;
 8 |     return result;
 9 | }
10 | 
11 | static uint64_t read_45(inst_t *inst)
12 | {
13 |     uint64_t result;
14 |     const uint16_t *ptr = inst->bytes + inst->size;
15 |     result = ((uint64_t)ptr[1] << 36) | ((uint64_t)ptr[0] << 27) | ((uint64_t)ptr[2] << 18) | (ptr[4] << 9) | (ptr[3] << 0);
16 |     inst->size += 5;
17 |     return result;
18 | }
19 | 
20 | static uint64_t read_36(inst_t *inst)
21 | {
22 |     uint64_t result;
23 |     const uint16_t *ptr = inst->bytes + inst->size;
24 |     result = ((uint64_t)ptr[1] << 27) | ((uint64_t)ptr[0] << 18) | (ptr[2] << 9) | (ptr[3] << 0);
25 |     inst->size += 4;
26 |     return result;
27 | }
28 | 
29 | static uint32_t read_27(inst_t *inst)
30 | {
31 |     uint32_t result;
32 |     const uint16_t *ptr = inst->bytes + inst->size;
33 |     result = ((uint64_t)ptr[1] << 18) | (ptr[0] << 9) | (ptr[2] << 0);
34 |     inst->size += 3;
35 |     return result;
36 | }
37 | 
38 | static uint32_t read_18(inst_t *inst)
39 | {
40 |     uint32_t result;
41 |     const uint16_t *ptr = inst->bytes + inst->size;
42 |     result = (ptr[1] << 9) | (ptr[0] << 0);
43 |     inst->size += 2;
44 |     return result;
45 | }
46 | 
47 | static uint16_t read_9(inst_t *inst)
48 | {
49 |     uint16_t result;
50 |     const uint16_t *ptr = inst->bytes + inst->size;
51 |     result = ptr[0];
52 |     inst->size += 1;
53 |     return result;
54 | }
55 | 
56 | // Extract bits [offset, offset+count)
57 | #define EXTRACT(src, offset, count) (((src) >> (bit_size - count - offset)) & ((1 << (count))-1))
58 | 
59 | // Extract bits from inst->insn to a field (marks field as used)
60 | #define FIELD(name, offset, count) do { inst->name = EXTRACT(inst->insn, offset, count); inst->used_##name = 1; } while (0)
61 | 
62 | // Concatenate extracted bits on to a field (appends them to the end)
63 | #define CONCAT(name, offset, count) do { inst->name = (inst->name << (count)) | EXTRACT(inst->insn, offset, count); } while (0)
64 | 
65 | // Sign-extend an integer with fewer than 32 bits
66 | #define SIGN_EXTEND(name, count) do { inst->name = ((int32_t)inst->name << (32 - count)) >> (32 - count); } while (0)
67 | 
68 | // Helper to reverse bits in an integer
69 | static uint32_t reverse_bits(uint32_t x)
70 | {
71 |     x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1);
72 |     x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2);
73 |     x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4);
74 |     x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8);
75 |     x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16);
76 |     return x;
77 | }
78 | 
79 | static void clear_used(inst_t *inst)
80 | {
81 |     // Clear fields.
82 |     memset(inst->_fields, 0, (char *)inst + sizeof(inst_t) - inst->_fields);
83 | }
84 | 
85 | // Append a comma, or a space if this is the first operand
86 | #define PRINT_SEP() do { strcat(inst->str, first ? " " : ", "); first = 0; } while (0)
87 | 
88 | // Append a register field (if it is marked as used)
89 | #define PRINT_REGISTER(name) do { if (inst->used_##name) { PRINT_SEP(); strcat(inst->str, registers[inst->name]); } } while (0)
90 | 
91 | // Append an immediate field as hex (if it is marked as used)
92 | #define PRINT_IMMEDIATE(name) do { if (inst->used_##name) { PRINT_SEP(); sprintf(inst->str, "%s#%X", inst->str, inst->name); } } while (0)
93 | 
94 | 


--------------------------------------------------------------------------------
/snowball/riscv/README.md:
--------------------------------------------------------------------------------
 1 | ## Usage
 2 | 
 3 | Run `make` to build the dynamic library, Windows DLLs, and wrapper for Python. The _Makefile_ uses MinGW to cross compile the Windows DLLs, so you will get errors if you don't have it installed. If you are on macOS, it is sufficient to `brew install mingw-w64`.
 4 | 
 5 | You can do a basic test of the disassembler by running _test.py_. It will attempt to disassemble every 4 byte sequency in _test.bin_.
 6 | 
 7 | If you want use the IDA plugin, you will need to copy the files to the appropriate directories. Assuming that *IDA_PATH* is your IDA directory, copy the following files:
 8 | 
 9 |   - riscv_32.dll, riscv_64.dll -> *IDA_PATH/*
10 |   - pyriscv.py -> *IDA_PATH/python/*
11 |   - ida-plugin/riscv.py -> *IDA_PATH/procs/*
12 | 
13 | The C library exposes a simple interface:
14 | 
15 | ```
16 | EXPORT void disassemble(inst_t *inst, uint32_t pc, const uint8_t *buf);
17 | EXPORT const char *mnemonics[];
18 | EXPORT const char *registers[];
19 | EXPORT const unsigned int num_registers;
20 | ```
21 | 
22 | The Python wrapper exposes a similar interface:
23 | 
24 | ```
25 | disassemble(pc, buf) # returns inst_t structure
26 | registers            # array of register names
27 | mnemonics            # array of instruction mnemonics
28 | ```
29 | 
30 | The *inst_t* structure contains the results of the disassembly. The fields should be customized for the target architecture and are filled in by the _decode_ functions in _riscv.c_. Every field also defines *used\_field\_name* which is set in _decode_ if that field is initialized.
31 | 
32 | Every architecture has the following members in *inst_t*:
33 | 
34 | ```
35 |     uint32_t pc;
36 |     const uint8_t *bytes;
37 |     unsigned int size;
38 |     unsigned int id;
39 |     const char *mnemonic;
40 |     char str[64];
41 | ```
42 | 
43 | The riscv32 architecture defines these additional members (e.g. fields):
44 | 
45 | ```
46 |     uint8_t opcode;
47 |     uint8_t funct3;
48 |     uint8_t funct7;
49 |     uint8_t rd;
50 |     uint8_t rs1;
51 |     uint8_t rs2;
52 |     int32_t imm;
53 | 
54 |     uint8_t used_opcode;
55 |     uint8_t used_funct3;
56 |     uint8_t used_funct7;
57 |     uint8_t used_rd;
58 |     uint8_t used_rs1;
59 |     uint8_t used_rs2;
60 |     uint8_t used_imm;
61 | ```
62 | 
63 | The header exposes all of the instructions as _Imnemonic_, with _Iinvalid_ (0) as a special instruction that indicates disassembly failure. Specifically, `mnemonics[Iadd] = "add"`.
64 | 
65 | ## Opcodes
66 | 
67 | The _opcodes.h_ file defines all of the instructions for the architecture. It is included in several places to setup the decoding and enumerations. It uses two macros: _FORMAT_ and _INS_. _FORMAT_ defines the decode function that will be used for the subsequent instructions. _INS_ defines the actual instruction. Every instruction should belong to a format.
68 | 
69 | An instruction is defined by its mnemonic and opcode, which is tested against _inst->opcode_. Additional qualifiers can be added by using the *INS_1*, *INS_2*, ... macros. These macros take an additional field name and value to test for equality.
70 | 
71 | For example:
72 | 
73 | ```
74 | FORMAT( R )
75 | INS_2( xor, 0b0110011, funct3, 0b100, funct7, 0b0000000 )
76 | INS_2( srl, 0b0110011, funct3, 0b101, funct7, 0b0000000 )
77 | ```
78 | 
79 | This example defines two instructions which are both decoded by `decode_R`. If `inst->opcode == 0b0110011` and `inst->funct3 == 0b100` and `inst->funct7 == 0b0000000`, then it will be disassembled as _xor_.
80 | 
81 | ## Helpers
82 | 
83 | The library defines and uses several macros and helper functions to reduce code repetition.
84 | 
85 | The *read_8*, *read_16*, *read_24*, and *read_32* helpers read an integer of that many bits and increments `inst->size` by the corresponding number of bytes. The *read_insn* helper reads the 32-bit instruction into `inst->insn` for convenience.
86 | 
87 | `EXTRACT(src, offset, count)` extracts a *count*-bit integer from *src* starting at bit *offset*.
88 | 
89 | `FIELD(name, offset, count)` extracts an integer from `inst->insn` and puts it in `inst->name`. It also sets the flag `inst->used_name`. *CONCAT* extracts an intger from `inst->insn` and appends the bits to the end of `inst->name`. *SIGN_EXTEND* sign extends a *count*-bit integer in `inst->name` to 32 bits.
90 | 


--------------------------------------------------------------------------------
/clemency-as/parser.py:
--------------------------------------------------------------------------------
  1 | from pyparsing import *
  2 | import instrs
  3 | import sys
  4 | 
  5 | sw = Suppress(Optional(White()))
  6 | 
  7 | class obj_LineComment(object):
  8 |   def __init__(self, lineno, filename, flags=0):
  9 |     self.lineno = lineno
 10 |     self.filename = filename
 11 | 
 12 | def pretty_error(filename, lineno, err, detail, line):
 13 |   print >>sys.stderr, "\x1b[1m%s:%d: \x1b[1;31m%s:\x1b[0;1m %s\x1b[0m" % (filename, lineno, err, detail)
 14 |   print >>sys.stderr, line
 15 |   print >>sys.stderr
 16 | 
 17 | def make_parser(labelstore=None):
 18 |   #define the relevant things for parsing through an asm string
 19 |   Mnem  = Word( alphas, alphanums+"." ).setParseAction(lambda x:instrs.Instr(x[0])).setName("opcode")
 20 |   Label = Word( alphas+"_", alphanums+"_" ).setParseAction(lambda x:instrs.Label(x[0], labelstore)).setName("label")
 21 |   Num    = (Optional('-') + Or( ["0x"+Word(hexnums), Word(nums)] )).setParseAction(lambda x:int(''.join(x),0)).setName("number")
 22 | 
 23 |   Expr = (Suppress("{") + Word( alphanums+"()+-*$&^%~<> \t" ) + Suppress("}")).setParseAction(lambda x:instrs.Expr(x[0], labelstore))
 24 | 
 25 |   Reg    = Or( [Word("rR", "0123456789"), oneOf(list(instrs.special_regs))] ).setParseAction(instrs.Reg)
 26 |   Mem    = Or( [Num, Suppress("$")+Label] )
 27 |   Imm    = Or( [Num] ).setName("immediate")
 28 |   Flags  = Word("RWEN").setName("memory flags")
 29 |   Const  = (oneOf(".ds .dw .dt .dm") + Num).setParseAction(lambda x:instrs.Const(x[0], x[1])).setName("constant")
 30 | 
 31 |   DispA  = (Suppress("[") + Reg + sw + Suppress("+") + sw + Imm + Suppress(",") + sw + Imm + Suppress("]"))
 32 |   DispB  = (Suppress("[") + Reg + sw + Suppress(",") + sw + Imm + Suppress("]")).setParseAction(lambda x:[x[0],0,x[1]])
 33 |   DispC  = (Suppress("[") + Reg + sw + Suppress("+") + sw + Imm + Suppress("]")).setParseAction(lambda x:[x[0],x[1],1])
 34 |   DispD  = (Suppress("[") + Reg + sw + Suppress("]")).setParseAction(lambda x:[x[0],0,1])
 35 |   Disp = Or( [DispA, DispB, DispC, DispD] ).setName("index expression")
 36 | 
 37 |   Arg       = Or( [Reg, Mem, Imm, Disp, Flags, Suppress("$")+Label, Expr] )
 38 |   TwoArgs   = Arg + Suppress(",") + sw + Arg
 39 |   ThreeArgs = Arg + Suppress(",") + sw + Arg + Suppress(",") + sw + Arg
 40 |   Instr = Mnem + Optional( Or( [Arg, TwoArgs, ThreeArgs] ) ) + sw
 41 |   #Instr.setParseAction(lambda x:x[0])
 42 | 
 43 |   LineComment = (Suppress("# ") + Num + sw + QuotedString('"', escQuote='\\"') + Suppress(ZeroOrMore(sw + Num))).setParseAction(lambda x: obj_LineComment(*x))
 44 |   Line = (Or( [Instr, Const, Label+Suppress(":"), LineComment] ) + LineEnd()).setName("instruction, directive or label")
 45 |   return Line
 46 | 
 47 | 
 48 | def parse_asm(s, labelstore=None):
 49 |   asms = []
 50 |   myline = make_parser(labelstore)
 51 |   filename = '<input>'
 52 |   lineno = 0
 53 |   for real_line in s.split("\n"):
 54 |     real_line = real_line.strip()
 55 |     lineno += 1
 56 |     for line in real_line.split(";"):
 57 |       line = line.strip()
 58 |       try:
 59 |         if not line:
 60 |           continue
 61 |         info = myline.parseString(line)
 62 |         if isinstance(info[0], instrs.Const):
 63 |           asms.append((filename, lineno, line, info[0]))
 64 |         elif isinstance(info[0], instrs.Instr):
 65 |           ins = info[0].iclass(info[1:])
 66 |           asms.append((filename, lineno, line, ins))
 67 |         elif isinstance(info[0], instrs.Label):
 68 |           asms.append((filename, lineno, line, info[0]))
 69 |         elif isinstance(info[0], obj_LineComment):
 70 |           filename = info[0].filename
 71 |           lineno = info[0].lineno - 1
 72 |         else:
 73 |           raise Exception("Unknown line type")
 74 |       
 75 |       except Exception as e:
 76 |         pretty_error(filename, lineno, "parse error", e, real_line)
 77 |         raise
 78 | 
 79 |   return asms
 80 | 
 81 | def swap_endian(bs):
 82 |   out = instrs.BitString(0,0)
 83 |   for i in xrange(0, len(bs), 27):
 84 |     out += bs[i+9:i+18] + bs[i+0:i+9] + bs[i+18:i+27]
 85 |   return out
 86 | 
 87 | class Assembler(object):
 88 |   def __init__(self):
 89 |     self.labels = {}
 90 | 
 91 |   def assemble(self, asm_str):
 92 |     asms = parse_asm(asm_str, self.labels)
 93 |     #first pass, resolve labels
 94 |     ip = 0
 95 |     for filename, lineno, line, asm in asms:
 96 |       try:
 97 |         if hasattr(asm, 'raw'):
 98 |           ip += asm.raw(ip).size/9 #how many bytes is the thingy
 99 |         else:
100 |           asm.update(ip)
101 |       except Exception as e:
102 |         pretty_error(filename, lineno, "error (pass 1)", e, line)
103 |         raise
104 | 
105 |     #second pass, write real things
106 |     instrs.BitString.CHECK = True
107 |     ip = 0
108 |     out = instrs.BitString(0,0)
109 |     for filename, lineno, line, asm in asms:
110 |       try:
111 |         if hasattr(asm, 'raw'):
112 |           out += swap_endian(asm.raw(ip))
113 |           ip += asm.raw(ip).size/9
114 |       except Exception as e:
115 |         pretty_error(filename, lineno, "error (pass 2)", e, line)
116 |         raise
117 | 
118 |     return out
119 | 


--------------------------------------------------------------------------------
/snowball/riscv/generate_py.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Generate python wrapper from headers.
  3 | import re
  4 | import sys
  5 | 
  6 | def extract_insn(header):
  7 |     insns = ['invalid']
  8 |     infp = open(header, 'r')
  9 |     for line in infp:
 10 |         comment = line.find('//')
 11 |         if comment >= 0:
 12 |             # ignore comments
 13 |             line = line[:comment]
 14 |         line = line.strip()
 15 |         if len(line) == 0 or line.startswith('#'):
 16 |             # ignore empty lines and preprocessor defines
 17 |             continue
 18 |         elif line.startswith('FORMAT'):
 19 |             # ignore
 20 |             continue
 21 |         else:
 22 |             _, insn, _ = re.match('^INS(_\d)?\s*\(\s*([\w\d]+)\s*(,.*)?\s*\)$', line).groups()
 23 |             insns += [insn]
 24 |     insns += ['__count']
 25 |     s = ''
 26 |     for i in xrange(len(insns)):
 27 |         s += 'I%s = %d\n' % (insns[i], i)
 28 |     return s
 29 | 
 30 | def extract_struct(header):
 31 |     def c_to_ctype(typ, const=None, unsigned=None, signed=None, ptr=None, arr=None):
 32 |         if typ == 'char':
 33 |             if unsigned:
 34 |                 result = 'c_ubyte'
 35 |             else:
 36 |                 result = 'c_char'
 37 |         elif typ == 'short':
 38 |             if unsigned:
 39 |                 result = 'c_ushort'
 40 |             else:
 41 |                 result = 'c_short'
 42 |         elif typ == 'int':
 43 |             if unsigned:
 44 |                 result = 'c_uint'
 45 |             else:
 46 |                 result = 'c_int'
 47 |         elif typ.endswith('_t'):
 48 |             # uint8_t -> c_uint8
 49 |             result = 'c_' + typ[:-2]
 50 |         else:
 51 |             raise Exception('Unhandled type %s' % typ)
 52 | 
 53 |         if result == 'c_char' and ptr:
 54 |             result = 'c_char_p'
 55 |         elif ptr:
 56 |             result = 'POINTER(%s)' % result
 57 | 
 58 |         if arr:
 59 |             size = int(arr[1:-1].strip(), 0)
 60 |             result = '%s*%d' % (result, size)
 61 | 
 62 |         return result
 63 | 
 64 |     fields = []
 65 |     in_struct = False
 66 | 
 67 |     infp = open(header, 'r')
 68 |     for line in infp:
 69 |         comment = line.find('//')
 70 |         if comment >= 0:
 71 |             # ignore comments
 72 |             line = line[:comment]
 73 |         line = line.strip()
 74 |         if len(line) == 0 or line.startswith('#'):
 75 |             # ignore empty lines and preprocessor defines
 76 |             pass
 77 |         elif line.startswith('EXPORT'):
 78 |             # ignore exported functions
 79 |             pass
 80 |         elif line.startswith('typedef struct'):
 81 |             in_struct = True
 82 |         elif line.startswith('} inst_t'):
 83 |             in_struct = False
 84 |         elif in_struct:
 85 |             try:
 86 |                 if line.startswith('DEFINE_FIELD'):
 87 |                     typ, name = re.match(r'^DEFINE_FIELD\s*\(\s*([^,\s]*)\s*,\s*([^,\s]*)\s*\)$', line).groups()
 88 |                     fields += [
 89 |                         (name, c_to_ctype(typ)),
 90 |                         ('used_%s' % name, 'c_uint8')
 91 |                     ]
 92 |                 elif line.startswith('BEGIN_FIELDS') or line.startswith('END_FIELDS'):
 93 |                     # ignore 
 94 |                     pass
 95 |                 else:
 96 |                     const, unsigned, signed, typ, ptr, name, arr = re.match(r'^(const)?\s*(unsigned)?(signed)?\s*([\d\w]+)\s*(\*)?\s*([\d\w]+)\s*(\[\s*\d+\s*\])?\s*;$', line).groups()
 97 |                     fields += [(name, c_to_ctype(typ, const=const, unsigned=unsigned, ptr=ptr, arr=arr))]
 98 |             except AttributeError:
 99 |                 raise Exception('Bad line: %s' % line)
100 | 
101 |     s = ''
102 |     s += 'class Inst(Structure):\n'
103 |     s += '    _fields_ = [\n'
104 |     for f in fields:
105 |         s += '        ("%s", %s),\n' % f
106 |     s += '    ]\n'
107 |     return s
108 | 
109 | TEMPLATE = '''# Autogenerated
110 | from ctypes import *
111 | import platform
112 | 
113 | if platform.system() == 'Windows':
114 |     if platform.architecture()[0] == '32bit':
115 |         dll = cdll.@@NAME@@_32
116 |     else:
117 |         dll = cdll.@@NAME@@_64
118 | elif platform.system() == 'Darwin':
119 |     dll = cdll.LoadLibrary('lib@@NAME@@.dylib')
120 | else:
121 |     dll = cdll.LoadLibrary('lib@@NAME@@.so')
122 | 
123 | @@STRUCT@@
124 | 
125 | @@INSN@@
126 | 
127 | dll.disassemble.argtypes = [POINTER(Inst), c_uint32, POINTER(c_uint8)]
128 | 
129 | def disassemble(pc, input):
130 |     inst = Inst()
131 |     if isinstance(input, str):
132 |         input = cast(input, POINTER(c_uint8))
133 |     dll.disassemble(byref(inst), pc, input)
134 |     assert inst._st_size == sizeof(inst)
135 |     return inst
136 | 
137 | mnemonics = (c_char_p * I__count).in_dll(dll, 'mnemonics')
138 | num_registers = c_uint.in_dll(dll, 'num_registers').value
139 | registers = (c_char_p * num_registers).in_dll(dll, 'registers')
140 | '''
141 | 
142 | name = sys.argv[1]
143 | output = TEMPLATE
144 | output = output.replace('@@NAME@@', name)
145 | output = output.replace('@@INSN@@', extract_insn('opcodes.h'))
146 | output = output.replace('@@STRUCT@@', extract_struct('%s.h' % name))
147 | outfp = open('py%s.py' % name, 'w')
148 | outfp.write(output)
149 | 


--------------------------------------------------------------------------------
/snowball/riscv/riscv.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | // TODO Uncomment if target instructions are big endian
  6 | #undef BIG_ENDIAN
  7 | // #define BIG_ENDIAN
  8 | 
  9 | #include "riscv.h"
 10 | #include "helpers.h"
 11 | 
 12 | // Array of mnemonics
 13 | const char *mnemonics[] = {
 14 |     "invalid",
 15 | #define INS(ins, opcode) #ins,
 16 | #include "opcodes.h"
 17 | };
 18 | 
 19 | // TODO Array of register names
 20 | const char *registers[] = {
 21 |    "zero",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
 22 |      "x8",  "x9", "x10", "x11", "x12", "x13", "x14", "x15",
 23 |     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
 24 |     "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"
 25 | };
 26 | const unsigned int num_registers = sizeof(registers) / sizeof(registers[0]);
 27 | 
 28 | // TODO Read instruction from a buffer.
 29 | static void read_insn(inst_t *inst)
 30 | {
 31 |     inst->insn = read_32(inst);
 32 | }
 33 | 
 34 | // TODO Decoding for specific instruction formats
 35 | static void decode_R(inst_t *inst)
 36 | {
 37 |     read_insn(inst);
 38 |     FIELD(opcode, 0, 7);
 39 |     FIELD(rd, 7, 5);
 40 |     FIELD(funct3, 12, 3);
 41 |     FIELD(rs1, 15, 5);
 42 |     FIELD(rs2, 20, 5);
 43 |     FIELD(funct7, 25, 7);
 44 | }
 45 | static void decode_Rshift(inst_t *inst)
 46 | {
 47 |     read_insn(inst);
 48 |     FIELD(opcode, 0, 7);
 49 |     FIELD(rd, 7, 5);
 50 |     FIELD(funct3, 12, 3);
 51 |     FIELD(rs1, 15, 5);
 52 |     FIELD(imm, 20, 5);
 53 |     FIELD(funct7, 25, 7);
 54 | }
 55 | static void decode_I(inst_t *inst)
 56 | {
 57 |     read_insn(inst);
 58 |     FIELD(opcode, 0, 7);
 59 |     FIELD(rd, 7, 5);
 60 |     FIELD(funct3, 12, 3);
 61 |     FIELD(rs1, 15, 5);
 62 |     FIELD(imm, 20, 12);
 63 |     SIGN_EXTEND(imm, 12);
 64 | }
 65 | static void decode_Ijr(inst_t *inst)
 66 | {
 67 |     decode_I(inst);
 68 |     inst->used_rd = 0;
 69 | }
 70 | static void decode_E(inst_t *inst)
 71 | {
 72 |     decode_I(inst);
 73 |     inst->used_rd = 0;
 74 |     inst->used_rs1 = 0;
 75 |     inst->used_imm = 0;
 76 | }
 77 | static void decode_Ishift(inst_t *inst)
 78 | {
 79 |     read_insn(inst);
 80 |     FIELD(opcode, 0, 7);
 81 |     FIELD(rd, 7, 5);
 82 |     FIELD(funct3, 12, 3);
 83 |     FIELD(rs1, 15, 5);
 84 |     FIELD(imm, 20, 5);
 85 |     FIELD(funct7, 25, 7);
 86 | }
 87 | static void decode_S(inst_t *inst)
 88 | {
 89 |     read_insn(inst);
 90 |     FIELD(opcode, 0, 7);
 91 |     FIELD(funct3, 12, 3);
 92 |     FIELD(rs1, 15, 5);
 93 |     FIELD(rs2, 20, 5);
 94 |     FIELD(imm, 25, 7);
 95 |     CONCAT(imm, 7, 5);
 96 |     SIGN_EXTEND(imm, 12);
 97 | }
 98 | static void decode_B(inst_t *inst)
 99 | {
100 |     read_insn(inst);
101 |     FIELD(opcode, 0, 7);
102 |     FIELD(funct3, 12, 3);
103 |     FIELD(rs1, 15, 5);
104 |     FIELD(rs2, 20, 5);
105 |     FIELD(imm, 31, 1);
106 |     CONCAT(imm, 7, 1);
107 |     CONCAT(imm, 25, 6);
108 |     CONCAT(imm, 8, 4);
109 |     SIGN_EXTEND(imm, 12);
110 |     inst->imm <<= 1;
111 |     inst->imm += inst->pc; // PC-relative
112 | }
113 | static void decode_U(inst_t *inst)
114 | {
115 |     read_insn(inst);
116 |     FIELD(opcode, 0, 7);
117 |     FIELD(rd, 7, 5);
118 |     FIELD(imm, 12, 20);
119 | }
120 | static void decode_J(inst_t *inst)
121 | {
122 |     read_insn(inst);
123 |     FIELD(opcode, 0, 7);
124 |     FIELD(rd, 7, 5);
125 |     FIELD(imm, 31, 1);
126 |     CONCAT(imm, 12, 8);
127 |     CONCAT(imm, 20, 1);
128 |     CONCAT(imm, 21, 10);
129 |     SIGN_EXTEND(imm, 20);
130 |     inst->imm <<= 1;
131 |     inst->imm += inst->pc; // PC-relative
132 | }
133 | static void decode_Jj(inst_t *inst)
134 | {
135 |     decode_J(inst);
136 |     inst->used_rd = 0;
137 | }
138 | 
139 | // Decode instruction
140 | static void decode(inst_t *inst)
141 | {
142 | #define FORMAT(fmt) do { clear_used(inst); inst->size = 0; decode_##fmt(inst); } while (0);
143 | #define INS(x,opc) do { if (inst->opcode == opc) { inst->id = I##x; return; } } while (0);
144 | #define INS_1(x,opc,f1,v1) do { if (inst->opcode == opc && inst->f1 == v1) { inst->id = I##x; return; } } while (0);
145 | #define INS_2(x,opc,f1,v1,f2,v2) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2) { inst->id = I##x; return; } } while (0);
146 | #define INS_3(x,opc,f1,v1,f2,v2,f3,v3) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2 && inst->f3 == v3) { inst->id = I##x; return; } } while (0);
147 | #define INS_4(x,opc,f1,v1,f2,v2,f3,v3,f4,v4) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2 && inst->f3 == v3 && inst->f4 == v4) { inst->id = I##x; return; } } while (0);
148 | #include "opcodes.h"
149 |     // default to invalid
150 |     clear_used(inst);
151 |     inst->size = 0;
152 |     inst->id = Iinvalid;
153 | }
154 | 
155 | // Print instruction
156 | static void tostring(inst_t *inst)
157 | {
158 |     int first = 1;
159 |     inst->mnemonic = mnemonics[inst->id];
160 |     strcpy(inst->str, inst->mnemonic);
161 | 
162 |     // TODO Print operands
163 |     PRINT_REGISTER(rd);
164 |     PRINT_REGISTER(rs1);
165 |     PRINT_REGISTER(rs2);
166 |     PRINT_IMMEDIATE(imm);
167 | }
168 | 
169 | // Disassemble one instruction from buf.
170 | EXPORT void disassemble(inst_t *inst, uint32_t pc, const uint8_t *buf)
171 | {
172 |     inst->_st_size = sizeof(inst_t);
173 |     inst->pc = pc;
174 |     inst->bytes = buf;
175 |     decode(inst);
176 |     tostring(inst);
177 | }
178 | 


--------------------------------------------------------------------------------
/snowball/clemency/clemency_ldr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | IDA Clemency Loader
  3 | 
  4 | pyclemency processor support must be installed for this to work
  5 | """
  6 | 
  7 | import struct
  8 | import sys
  9 | import idaapi
 10 | from idc import *
 11 | 
 12 | 
 13 | class BitReader(object):
 14 |     """
 15 |     Takes 9-bit BE file descriptor and provides 16-bit LE output.
 16 |     """
 17 | 
 18 |     def __init__(self, f):
 19 |         self.f = f
 20 |         self.bits = 0
 21 |         self.val = 0
 22 | 
 23 |     def get_bit(self):
 24 |         if not self.bits:
 25 |             b = self.f.read(1)
 26 |             if len(b) == 0:
 27 |                 return None
 28 |             self.val = ord(b)
 29 |             self.bits = 8
 30 |         x = self.val & (1 << (self.bits - 1))
 31 |         self.bits -= 1
 32 |         return 1 if x else 0
 33 | 
 34 |     def get_byte(self):
 35 |         x = 0
 36 |         for _ in xrange(9):
 37 |             bit = self.get_bit()
 38 |             if bit is None:
 39 |                 return None
 40 |             x = (x << 1) | bit
 41 |         return x
 42 | 
 43 |     def get_all_bytes(self):
 44 |         while True:
 45 |             byte = self.get_byte()
 46 |             if byte is None:
 47 |                 return
 48 |             yield byte
 49 | 
 50 |     def read_as_u16(self):
 51 |         return b''.join(struct.pack('<H', byte)
 52 |                         for byte in self.get_all_bytes())
 53 | 
 54 | 
 55 | def test_bit_reader():
 56 |     with open('hello.u16', 'rb') as o:
 57 |         orig_data = o.read()
 58 | 
 59 |     with open('hello.u9', 'rb') as f:
 60 |         new_data = BitReader(f).read_as_u16()
 61 | 
 62 |     assert new_data == orig_data
 63 | 
 64 | 
 65 | def accept_file(li, n):
 66 |     """
 67 |     Is this a clemency file?
 68 |     """
 69 |     if n > 0:
 70 |         return 0
 71 | 
 72 |     # There is no binary format, everything is raw bytes mapped to 0.
 73 |     # Therefore we always accept. (We could try heuristics.)
 74 |     return "Clemency (raw binary)"
 75 | 
 76 | 
 77 | def add_segment(start, end, name, type_):
 78 |     segment = idaapi.segment_t()
 79 |     segment.startEA = start
 80 |     segment.endEA = end
 81 |     segment.bitness = 1 # 32-bit
 82 | 
 83 |     idaapi.add_segm_ex(segment, name, type_, idaapi.ADDSEG_SPARSE | idaapi.ADDSEG_OR_DIE)
 84 | 
 85 | def load_file(li, _, __):
 86 |     idaapi.set_processor_type("clemency", idaapi.SETPROC_ALL
 87 |                               | idaapi.SETPROC_FATAL)
 88 | 
 89 |     # Get bytes as u16.
 90 |     li.seek(0)
 91 |     data_u16 = BitReader(li).read_as_u16()
 92 |     program_size = len(data_u16) / 2
 93 | 
 94 |     # Load the firmware image.
 95 |     add_segment(0, program_size, "MAIN_PROGRAM", "CODE")
 96 |     idaapi.put_many_bytes(0, data_u16)
 97 |     MakeName(0, '_start')
 98 |     AutoMark(0, AU_CODE)
 99 |     MakeFunction(0)
100 | 
101 |     # Fake .BSS in case it's accessed.
102 |     add_segment(program_size, program_size + 0x1000, "BSS", "BSS")
103 | 
104 |     def make_array(addr, name, size=3):
105 |         MakeName(addr, name)
106 |         MakeArray(addr, size)
107 |         return size
108 | 
109 |     '''
110 |     0x4000000 3 Timer 1 Delay
111 |     0x4000003 3 Number of milliseconds left for Timer 1
112 |     0x4000006 3 Timer 2 Delay
113 |     0x4000009 3 Number of milliseconds left for Timer 2
114 |     0x400000C 3 Timer 3 Delay
115 |     0x400000F 3 Number of milliseconds left for Timer 3
116 |     0x4000012 3 Timer 4 Delay
117 |     0x4000015 3 Number of milliseconds left for Timer 4
118 |     0x4000018 6 Number of seconds since Aug. 02, 2013 09:00 PST
119 |     0x400001E 3 Number of processing ticks since processor start
120 |     '''
121 |     add_segment(0x4000000, 0x4000021, "CLOCK_IO", "DATA")
122 |     addr = 0x4000000
123 |     for i in xrange(4):
124 |         timer = 'timer%d' % (i+1)
125 |         addr += make_array(addr, 'g_%s_delay' % timer)
126 |         addr += make_array(addr, 'g_%s_ms_left' % timer)
127 |     addr += make_array(addr, 'g_secs_since_epoch', 6)
128 |     addr += make_array(addr, 'g_ticks_since_start')
129 | 
130 |     add_segment(0x4010000, 0x4011000, "FLAG_IO", "DATA")
131 |     make_array(0x4010000, 'g_flag', 0x1000)
132 | 
133 |     add_segment(0x5000000, 0x5002003, "DATA_RECEIVED", "DATA")
134 |     make_array(0x5000000, 'g_data_received', 0x2000)
135 |     make_array(0x5002000, 'g_data_received_size')
136 | 
137 |     add_segment(0x5010000, 0x5012003, "DATA_SENT", "DATA")
138 |     make_array(0x5010000, 'g_data_sent', 0x2000)
139 |     make_array(0x5012000, 'g_data_sent_size')
140 | 
141 |     add_segment(0x6000000, 0x6800000, "SHARED_MEMORY", "DATA")
142 |     make_array(0x6000000, 'g_shm', 0x800000)
143 | 
144 |     add_segment(0x6800000, 0x7000000, "NVRAM_MEMORY", "DATA")
145 |     make_array(0x6800000, 'g_nvram', 0x800000)
146 | 
147 |     '''
148 |     0x7FFFF00 Timer 1
149 |     0x7FFFF03 Timer 2
150 |     0x7FFFF06 Timer 3
151 |     0x7FFFF09 Timer 4
152 |     0x7FFFF0C Invalid Instruction
153 |     0x7FFFF0F Divide by 0
154 |     0x7FFFF12 Memory Exception
155 |     0x7FFFF15 Data Received
156 |     0x7FFFF18 Data Sent
157 |     '''
158 |     add_segment(0x7FFFF00, 0x7FFFF1B, "INTERRUPT_POINTERS", "DATA")
159 |     addr = 0x7FFFF00
160 |     for i in xrange(4):
161 |         timer = 'timer%d' % (i+1)
162 |         addr += make_array(addr, 'g_%s_interrupt_handler' % timer)
163 |     addr += make_array(addr, 'g_invalid_instruction_handler')
164 |     addr += make_array(addr, 'g_div_by_zero_handler')
165 |     addr += make_array(addr, 'g_memory_exn_handler')
166 |     addr += make_array(addr, 'g_data_received_handler')
167 |     addr += make_array(addr, 'g_data_sent_handler')
168 | 
169 |     '''
170 |     0x7FFFF80 20 Processor name
171 |     0x7FFFFA0 3 Processor version
172 |     0x7FFFFA3 3 Processor functionality flags
173 |     0x7FFFFA6 4A For future use
174 |     0x7FFFFF0 1 Interrupt stack direction flag
175 |     0x7FFFFF1 F For future use
176 |     '''
177 |     add_segment(0x7FFFF80, 0x8000000, "PROC_ID_FEATURES", "DATA")
178 |     addr = 0x7FFFF80
179 |     addr += make_array(addr, 'g_processor_name', 0x20)
180 |     addr += make_array(addr, 'g_processor_version')
181 |     addr += make_array(addr, 'g_processor_flags')
182 |     addr += make_array(addr, 'g_processor_reserved_1', 0x4a)
183 |     addr += make_array(addr, 'g_interrupt_stack_direction', 1)
184 |     addr += make_array(addr, 'g_processor_reserved_2', 0xf)
185 | 
186 |     return 1
187 | 


--------------------------------------------------------------------------------
/snowball/clemency/clemency.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include "clemency.h"
  6 | #include "helpers.h"
  7 | 
  8 | // Array of mnemonics
  9 | const char *mnemonics[] = {
 10 |     "invalid",
 11 | #define INS(ins, opcode) #ins,
 12 | #include "opcodes.h"
 13 | };
 14 | 
 15 | // TODO Array of register names
 16 | const char *registers[] = {
 17 |     "R0", "R1", "R2", "R3",
 18 |     "R4", "R5", "R6", "R7",
 19 |     "R8", "R9", "R10", "R11",
 20 |     "R12", "R13", "R14", "R15",
 21 |     "R16", "R17", "R18", "R19",
 22 |     "R20", "R21", "R22", "R23",
 23 |     "R24", "R25", "R26", "R27",
 24 |     "R28", "ST", "RA", "PC"
 25 | };
 26 | const unsigned int num_registers = sizeof(registers) / sizeof(registers[0]);
 27 | 
 28 | // TODO Decoding for specific instruction formats
 29 | static void decode_R(inst_t *inst)
 30 | {
 31 |     int bit_size = 27;
 32 |     inst->insn = read_27(inst);
 33 |     FIELD(opcode, 0, 7);
 34 |     FIELD(rA, 7, 5);
 35 |     FIELD(rB, 12, 5);
 36 |     FIELD(rC, 17, 5);
 37 |     FIELD(funct, 22, 2);
 38 |     FIELD(arith_signed, 24, 1);
 39 |     FIELD(is_imm, 25, 1);
 40 |     FIELD(uf, 26, 1);
 41 | }
 42 | 
 43 | static void decode_R_IMM(inst_t *inst)
 44 | {
 45 |     int bit_size = 27;
 46 |     inst->insn = read_27(inst);
 47 |     FIELD(opcode, 0, 7);
 48 |     FIELD(rA, 7, 5);
 49 |     FIELD(rB, 12, 5);
 50 |     FIELD(imm, 17, 7);
 51 |     FIELD(arith_signed, 24, 1);
 52 |     FIELD(is_imm, 25, 1);
 53 |     FIELD(uf, 26, 1);
 54 | }
 55 | 
 56 | static void decode_U(inst_t *inst)
 57 | {
 58 |     int bit_size = 27;
 59 |     inst->insn = read_27(inst);
 60 |     FIELD(opcode, 0, 9);
 61 |     FIELD(rA, 9, 5);
 62 |     FIELD(rB, 14, 5);
 63 |     FIELD(funct, 19, 7);
 64 |     FIELD(uf, 26, 1);
 65 | }
 66 | 
 67 | static void decode_BIN_R(inst_t *inst)
 68 | {
 69 |     int bit_size = 18;
 70 |     inst->insn = read_18(inst);
 71 |     FIELD(opcode, 0, 8);
 72 |     FIELD(rA, 8, 5);
 73 |     FIELD(rB, 13, 5);
 74 | }
 75 | 
 76 | static void decode_BIN_R_IMM(inst_t *inst)
 77 | {
 78 |     int bit_size = 27;
 79 |     inst->insn = read_27(inst);
 80 |     FIELD(opcode, 0, 8);
 81 |     FIELD(rA, 8, 5);
 82 |     FIELD(imm, 13, 14);
 83 | }
 84 | 
 85 | static void decode_MOV_LOW_HI(inst_t *inst)
 86 | {
 87 |     int bit_size = 27;
 88 |     inst->insn = read_27(inst);
 89 |     FIELD(opcode, 0, 5);
 90 |     FIELD(rA, 5, 5);
 91 |     FIELD(imm, 10, 17);
 92 | }
 93 | 
 94 | static void decode_MOV_LOW_SIGNED(inst_t *inst)
 95 | {
 96 |     int bit_size = 27;
 97 |     inst->insn = read_27(inst);
 98 |     FIELD(opcode, 0, 5);
 99 |     FIELD(rA, 5, 5);
100 |     FIELD(imm, 10, 17);
101 |     SIGN_EXTEND(imm, 17);
102 | }
103 | 
104 | static void decode_B_CC_OFF(inst_t *inst)
105 | {
106 |     int bit_size = 27;
107 |     inst->insn = read_27(inst);
108 |     FIELD(opcode, 0, 6);
109 |     FIELD(cc, 6, 4);
110 |     FIELD(imm, 10, 17);
111 |     SIGN_EXTEND(imm, 17);
112 | }
113 | 
114 | static void decode_B_CC_LOC(inst_t *inst)
115 | {
116 |     int bit_size = 27;
117 |     inst->insn = read_27(inst);
118 |     FIELD(opcode, 0, 6);
119 |     FIELD(cc, 6, 4);
120 |     FIELD(imm, 10, 17);
121 | }
122 | 
123 | static void decode_B_CC_R(inst_t *inst)
124 | {
125 |     int bit_size = 18;
126 |     inst->insn = read_18(inst);
127 |     FIELD(opcode, 0, 6);
128 |     FIELD(cc, 6, 4);
129 |     FIELD(rA, 10, 5);
130 |     FIELD(funct, 15, 3);
131 | }
132 | 
133 | static void decode_B_OFF(inst_t *inst)
134 | {
135 |     int bit_size = 36;
136 |     inst->insn = read_36(inst);
137 |     FIELD(opcode, 0, 9);
138 |     FIELD(imm, 9, 27);
139 |     SIGN_EXTEND(imm, 27);
140 | }
141 | 
142 | static void decode_B_LOC(inst_t *inst)
143 | {
144 |     int bit_size = 36;
145 |     inst->insn = read_36(inst);
146 |     FIELD(opcode, 0, 9);
147 |     FIELD(imm, 9, 27);
148 | }
149 | 
150 | static void decode_N(inst_t *inst)
151 | {
152 |     int bit_size = 18;
153 |     inst->insn = read_18(inst);
154 |     FIELD(opcode, 0, 18);
155 | }
156 | 
157 | static void decode_FLAGS_INTS(inst_t *inst)
158 | {
159 |     int bit_size = 18;
160 |     inst->insn = read_18(inst);
161 |     FIELD(opcode, 0, 12);
162 |     FIELD(rA, 12, 5);
163 |     FIELD(funct, 17, 1);
164 | }
165 | 
166 | static void decode_U_EXTEND(inst_t *inst)
167 | {
168 |     int bit_size = 27;
169 |     inst->insn = read_27(inst);
170 |     FIELD(opcode, 0, 12);
171 |     FIELD(rA, 12, 5);
172 |     FIELD(rB, 17, 5);
173 |     FIELD(funct, 22, 5);
174 | }
175 | 
176 | static void decode_RANDOM(inst_t *inst)
177 | {
178 |     int bit_size = 27;
179 |     inst->insn = read_27(inst);
180 |     FIELD(opcode, 0, 9);
181 |     FIELD(rA, 9, 5);
182 |     FIELD(funct, 14, 12);
183 |     FIELD(uf, 26, 1);
184 | }
185 | 
186 | static void decode_M(inst_t *inst)
187 | {
188 |     int bit_size = 54;
189 |     inst->insn = read_54(inst);
190 |     FIELD(opcode, 0, 7);
191 |     FIELD(rA, 7, 5);
192 |     FIELD(rB, 12, 5);
193 |     FIELD(reg_count, 17, 5);
194 |     FIELD(adj_rb, 22, 2);
195 |     FIELD(imm, 24, 27);
196 |     SIGN_EXTEND(imm, 27);
197 |     FIELD(funct, 51, 3);
198 | 
199 |     inst->reg_count++;
200 | }
201 | 
202 | static void decode_MP(inst_t *inst)
203 | {
204 |     int bit_size = 27;
205 |     inst->insn = read_27(inst);
206 |     FIELD(opcode, 0, 7);
207 |     FIELD(rA, 7, 5);
208 |     FIELD(rB, 12, 5);
209 |     FIELD(rw, 17, 1);
210 |     FIELD(mem_flags, 18, 2);
211 |     FIELD(funct, 20, 7);
212 | }
213 | 
214 | // Decode instruction
215 | static void decode(inst_t *inst)
216 | {
217 | #define FORMAT(fmt) do { clear_used(inst); inst->size = 0; decode_##fmt(inst); } while (0);
218 | #define INS(x,opc) do { if (inst->opcode == opc) { inst->id = I##x; return; } } while (0);
219 | #define INS_1(x,opc,f1,v1) do { if (inst->opcode == opc && inst->f1 == v1) { inst->id = I##x; return; } } while (0);
220 | #define INS_2(x,opc,f1,v1,f2,v2) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2) { inst->id = I##x; return; } } while (0);
221 | #define INS_3(x,opc,f1,v1,f2,v2,f3,v3) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2 && inst->f3 == v3) { inst->id = I##x; return; } } while (0);
222 | #define INS_4(x,opc,f1,v1,f2,v2,f3,v3,f4,v4) do { if (inst->opcode == opc && inst->f1 == v1 && inst->f2 == v2 && inst->f3 == v3 && inst->f4 == v4) { inst->id = I##x; return; } } while (0);
223 | #include "opcodes.h"
224 |     // default to invalid
225 |     clear_used(inst);
226 |     inst->size = 0;
227 |     inst->id = Iinvalid;
228 | }
229 | 
230 | // Print instruction
231 | static void tostring(inst_t *inst)
232 | {
233 |     int first = 1;
234 |     inst->mnemonic = mnemonics[inst->id];
235 |     strcpy(inst->str, inst->mnemonic);
236 | 
237 |     if (inst->used_uf && inst->uf)
238 |         strcat(inst->str, ".");
239 |     if (inst->used_adj_rb)
240 |     {
241 |         if (inst->adj_rb == 1)
242 |             strcat(inst->str, "i");
243 |         if (inst->adj_rb == 2)
244 |             strcat(inst->str, "d");
245 |     }
246 | 
247 |     // TODO Print operands
248 |     PRINT_REGISTER(rA);
249 |     PRINT_REGISTER(rB);
250 |     PRINT_REGISTER(rC);
251 |     PRINT_IMMEDIATE(imm);
252 |     PRINT_IMMEDIATE(reg_count);
253 | }
254 | 
255 | // Disassemble one instruction from buf.
256 | EXPORT void disassemble(inst_t *inst, uint32_t pc, const uint16_t *buf)
257 | {
258 |     inst->_st_size = sizeof(inst_t);
259 |     inst->pc = pc;
260 |     inst->bytes = buf;
261 |     decode(inst);
262 |     tostring(inst);
263 | }
264 | 


--------------------------------------------------------------------------------
/snowball/clemency/opcodes.h:
--------------------------------------------------------------------------------
  1 | // XXX This file is processed by generate_py.py.
  2 | 
  3 | // Default macros. You can safely ignore these.
  4 | #ifndef FORMAT
  5 | #define FORMAT(x)
  6 | #endif
  7 | #ifndef INS
  8 | #define INS(x,y)
  9 | #endif
 10 | #ifndef INS_1
 11 | #define INS_1(w,x,y,z) INS(w,x)
 12 | #endif
 13 | #ifndef INS_2
 14 | #define INS_2(w,x,y1,z1,y2,z2) INS_1(w,x,y1,z1)
 15 | #endif
 16 | #ifndef INS_3
 17 | #define INS_3(w,x,y1,z1,y2,z2,y3,z3) INS_2(w,x,y1,z1,y2,z2)
 18 | #endif
 19 | #ifndef INS_4
 20 | #define INS_4(w,x,y1,z1,y2,z2,y3,z3,y4,z4) INS_3(w,x,y1,z1,y2,z2,y3,z3)
 21 | #endif
 22 | 
 23 | // TODO Instruction definitions
 24 | FORMAT( R )
 25 | INS_3( ad, 0b0000000, funct, 0, arith_signed, 0, is_imm, 0 )
 26 | INS_3( adc, 0b0100000, funct, 0, arith_signed, 0, is_imm, 0 )
 27 | INS_3( adcm, 0b0100010, funct, 0, arith_signed, 0, is_imm, 0 )
 28 | INS_3( adf, 0b0000001, funct, 0, arith_signed, 0, is_imm, 0 )
 29 | INS_3( adfm, 0b0000011, funct, 0, arith_signed, 0, is_imm, 0 )
 30 | INS_3( adm, 0b0000010, funct, 0, arith_signed, 0, is_imm, 0 )
 31 | INS_3( an, 0b0010100, funct, 0, arith_signed, 0, is_imm, 0 )
 32 | INS_3( anm, 0b0010110, funct, 0, arith_signed, 0, is_imm, 0 )
 33 | INS_4( dmt, 0b0110100, funct, 0, arith_signed, 0, is_imm, 0, uf, 0 )
 34 | INS_3( dv, 0b0001100, funct, 0, arith_signed, 0, is_imm, 0 )
 35 | INS_3( dvs, 0b0001100, funct, 0, arith_signed, 1, is_imm, 0 )
 36 | INS_3( dvf, 0b0001100, funct, 0, arith_signed, 0, is_imm, 0 )
 37 | INS_3( dvfm, 0b0001111, funct, 0, arith_signed, 0, is_imm, 0 )
 38 | INS_3( dvm, 0b0001110, funct, 0, arith_signed, 0, is_imm, 0 )
 39 | INS_3( dvsm, 0b0001110, funct, 0, arith_signed, 1, is_imm, 0 )
 40 | INS_3( md, 0b0010000, funct, 0, arith_signed, 0, is_imm, 0 )
 41 | INS_3( mds, 0b0010000, funct, 0, arith_signed, 1, is_imm, 0 )
 42 | INS_3( mdf, 0b0010001, funct, 0, arith_signed, 0, is_imm, 0 )
 43 | INS_3( mdfm, 0b0010011, funct, 0, arith_signed, 0, is_imm, 0 )
 44 | INS_3( mdm, 0b0010010, funct, 0, arith_signed, 0, is_imm, 0 )
 45 | INS_3( mdsm, 0b0010010, funct, 0, arith_signed, 1, is_imm, 0 )
 46 | 
 47 | FORMAT( R_IMM )
 48 | INS_2( adci, 0b0100000, arith_signed, 0, is_imm, 1 )
 49 | INS_2( adcim, 0b0100010, arith_signed, 0, is_imm, 1 )
 50 | INS_2( adi, 0b0000000, arith_signed, 0, is_imm, 1 )
 51 | INS_2( adim, 0b0000010, arith_signed, 0, is_imm, 1 )
 52 | INS_2( ani, 0b0010100, arith_signed, 0, is_imm, 1 )
 53 | INS_2( dvi, 0b0001100, arith_signed, 0, is_imm, 1 )
 54 | INS_2( dvis, 0b0001100, arith_signed, 1, is_imm, 1 )
 55 | INS_2( dvim, 0b0001110, arith_signed, 0, is_imm, 1 )
 56 | INS_2( dvism, 0b0001110, arith_signed, 1, is_imm, 1 )
 57 | INS_3( mdi, 0b0010000, funct, 0, arith_signed, 0, is_imm, 1 )
 58 | INS_3( mdis, 0b0010000, funct, 0, arith_signed, 1, is_imm, 1 )
 59 | INS_3( mdim, 0b0010010, funct, 0, arith_signed, 0, is_imm, 1 )
 60 | INS_3( mdism, 0b0010010, funct, 0, arith_signed, 1, is_imm, 1 )
 61 | 
 62 | FORMAT( B_CC_OFF )
 63 | INS( b, 0b110000 )
 64 | INS( c, 0b110101 )
 65 | 
 66 | FORMAT( B_CC_R )
 67 | INS_1( br, 0b110010, funct, 0 )
 68 | INS_1( cr, 0b110111, funct, 0 )
 69 | 
 70 | FORMAT( B_OFF )
 71 | INS( brr, 0b111000000 )
 72 | INS( car, 0b111001000 )
 73 | 
 74 | FORMAT( B_LOC )
 75 | INS( bra, 0b111000100 )
 76 | INS( caa, 0b111001100 )
 77 | 
 78 | FORMAT( BIN_R )
 79 | INS( cm, 0b10111000 )
 80 | INS( cmf, 0b10111010 )
 81 | INS( cmfm, 0b10111110 )
 82 | INS( cmm, 0b10111100 )
 83 | 
 84 | FORMAT( BIN_R_IMM )
 85 | INS( cmi, 0b10111001 )
 86 | INS( cmim, 0b10111101 )
 87 | 
 88 | FORMAT( MOV_LOW_HI )
 89 | INS( mh, 0b10001 )
 90 | INS( ml, 0b10010 )
 91 | 
 92 | FORMAT( MOV_LOW_SIGNED )
 93 | INS( ms, 0b10011 )
 94 | 
 95 | FORMAT( U )
 96 | INS_2( fti, 0b101000101, funct, 0, uf, 0)
 97 | INS_2( ftim, 0b101000111, funct, 0, uf, 0)
 98 | INS_2( itf, 0b101000100, funct, 0, uf, 0)
 99 | INS_2( itfm, 0b101000110, funct, 0, uf, 0)
100 | 
101 | FORMAT( R )
102 | INS_3( mu, 0b0001000, funct, 0, arith_signed, 0, is_imm, 0 )
103 | INS_3( muf, 0b0001001, funct, 0, arith_signed, 0, is_imm, 0 )
104 | INS_3( mufm, 0b0001011, funct, 0, arith_signed, 0, is_imm, 0 )
105 | INS_3( mum, 0b0001010, funct, 0, arith_signed, 0, is_imm, 0 )
106 | INS_3( mus, 0b0001000, funct, 0, arith_signed, 1, is_imm, 0 )
107 | INS_3( musm, 0b0001010, funct, 0, arith_signed, 1, is_imm, 0 )
108 | INS_3( or, 0b0011000, funct, 0, arith_signed, 0, is_imm, 0 )
109 | INS_3( orm, 0b0011010, funct, 0, arith_signed, 0, is_imm, 0 )
110 | INS_3( rl, 0b0110000, funct, 0, arith_signed, 0, is_imm, 0 )
111 | INS_3( rlm, 0b0110010, funct, 0, arith_signed, 0, is_imm, 0 )
112 | INS_3( rr, 0b0110001, funct, 0, arith_signed, 0, is_imm, 0 )
113 | INS_3( rrm, 0b0110011, funct, 0, arith_signed, 0, is_imm, 0 )
114 | INS_3( sa, 0b0101101, funct, 0, arith_signed, 0, is_imm, 0 )
115 | INS_3( sam, 0b0101111, funct, 0, arith_signed, 0, is_imm, 0 )
116 | INS_3( sb, 0b0000100, funct, 0, arith_signed, 0, is_imm, 0 )
117 | INS_3( sbc, 0b0100100, funct, 0, arith_signed, 0, is_imm, 0 )
118 | INS_3( sbcm, 0b0100110, funct, 0, arith_signed, 0, is_imm, 0 )
119 | INS_3( sbf, 0b0000101, funct, 0, arith_signed, 0, is_imm, 0 )
120 | INS_3( sbfm, 0b0000111, funct, 0, arith_signed, 0, is_imm, 0 )
121 | INS_3( sbm, 0b0000110, funct, 0, arith_signed, 0, is_imm, 0 )
122 | INS_3( sl, 0b0101000, funct, 0, arith_signed, 0, is_imm, 0 )
123 | INS_3( slm, 0b0101010, funct, 0, arith_signed, 0, is_imm, 0 )
124 | INS_3( sr, 0b0101001, funct, 0, arith_signed, 0, is_imm, 0 )
125 | INS_3( srm, 0b0101011, funct, 0, arith_signed, 0, is_imm, 0 )
126 | INS_3( xr, 0b0011100, funct, 0, arith_signed, 0, is_imm, 0 )
127 | INS_3( xrm, 0b0011110, funct, 0, arith_signed, 0, is_imm, 0 )
128 | 
129 | FORMAT( R_IMM )
130 | INS_2( mui, 0b0001000, arith_signed, 0, is_imm, 1 )
131 | INS_2( muim, 0b0001010, arith_signed, 0, is_imm, 1 )
132 | INS_2( muis, 0b0001000, arith_signed, 1, is_imm, 1 )
133 | INS_2( muism, 0b0001010, arith_signed, 1, is_imm, 1 )
134 | INS_2( ori, 0b0011000, arith_signed, 0, is_imm, 1 )
135 | INS_2( rli, 0b1000000, arith_signed, 0, is_imm, 0 )
136 | INS_2( rlim, 0b1000010, arith_signed, 0, is_imm, 0 )
137 | INS_2( rri, 0b1000001, arith_signed, 0, is_imm, 0 )
138 | INS_2( rrim, 0b1000011, arith_signed, 0, is_imm, 0 )
139 | INS_2( sai, 0b0111101, arith_signed, 0, is_imm, 0 )
140 | INS_2( saim, 0b0111111, arith_signed, 0, is_imm, 0 )
141 | INS_2( sbi, 0b0000100, arith_signed, 0, is_imm, 1 )
142 | INS_2( sbci, 0b0100100, arith_signed, 0, is_imm, 1 )
143 | INS_2( sbcim, 0b0100110, arith_signed, 0, is_imm, 1 )
144 | INS_2( sbim, 0b0000110, arith_signed, 0, is_imm, 1 )
145 | INS_2( sli, 0b0111000, arith_signed, 0, is_imm, 0 )
146 | INS_2( slim, 0b0111010, arith_signed, 0, is_imm, 0 )
147 | INS_2( sri, 0b0111001, arith_signed, 0, is_imm, 0 )
148 | INS_2( srim, 0b0111011, arith_signed, 0, is_imm, 0 )
149 | INS_2( xri, 0b0011100, arith_signed, 0, is_imm, 1 )
150 | 
151 | FORMAT( U )
152 | INS_1( bf, 0b101001100, funct, 0b1000000 )
153 | INS_1( bfm, 0b101001110, funct, 0b1000000 )
154 | INS_1( ng, 0b101001100, funct, 0b0000000 )
155 | INS_1( ngf, 0b101001101, funct, 0b0000000 )
156 | INS_1( ngfm, 0b101001111, funct, 0b0000000 )
157 | INS_1( ngm, 0b101001110, funct, 0b0000000 )
158 | INS_1( nt, 0b101001100, funct, 0b0100000 )
159 | INS_1( ntm, 0b101001110, funct, 0b0100000 )
160 | 
161 | FORMAT( U_EXTEND )
162 | INS_1( ses, 0b101000000111, funct, 0 )
163 | INS_1( sew, 0b101000001000, funct, 0 )
164 | INS_1( zes, 0b101000001001, funct, 0 )
165 | INS_1( zew, 0b101000001010, funct, 0 )
166 | 
167 | FORMAT( N )
168 | INS( re, 0b101000000000000000 )
169 | INS( dbrk, 0b111111111111111111 )
170 | INS( ht, 0b101000000011000000 )
171 | INS( ir, 0b101000000001000000 )
172 | INS( wt, 0b101000000010000000 )
173 | 
174 | FORMAT( FLAGS_INTS )
175 | INS_1( rf, 0b101000001100, funct, 0 )
176 | INS_1( sf, 0b101000001011, funct, 0 )
177 | INS_1( ei, 0b101000000100, funct, 0 )
178 | INS_1( di, 0b101000000101, funct, 0 )
179 | 
180 | FORMAT( M )
181 | INS_1( ldt, 0b1010110, funct, 0)
182 | INS_1( lds, 0b1010100, funct, 0)
183 | INS_1( ldw, 0b1010101, funct, 0)
184 | INS_1( stt, 0b1011010, funct, 0)
185 | INS_1( sts, 0b1011000, funct, 0)
186 | INS_1( stw, 0b1011001, funct, 0)
187 | 
188 | FORMAT( RANDOM )
189 | INS_1( rnd, 0b101001100, funct, 0b000001100000 )
190 | INS_1( rndm, 0b101001110, funct, 0b000001100000 )
191 | 
192 | FORMAT( MP )
193 | INS_3( rmp, 0b1010010, rw, 0, mem_flags, 0, funct, 0)
194 | INS_2( smp, 0b1010010, rw, 1, funct, 0)
195 | 
196 | // Unset the macros. You can safely ignore these.
197 | #undef FORMAT
198 | #undef INS
199 | #undef INS_1
200 | #undef INS_2
201 | #undef INS_3
202 | #undef INS_4
203 | 


--------------------------------------------------------------------------------
/snowball/clemency/generate_py.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Generate python wrapper from headers.
  3 | import re
  4 | import sys
  5 | 
  6 | def extract_insn(header):
  7 |     insns = ['invalid']
  8 |     infp = open(header, 'r')
  9 |     for line in infp:
 10 |         comment = line.find('//')
 11 |         if comment >= 0:
 12 |             # ignore comments
 13 |             line = line[:comment]
 14 |         line = line.strip()
 15 |         if len(line) == 0 or line.startswith('#'):
 16 |             # ignore empty lines and preprocessor defines
 17 |             continue
 18 |         elif line.startswith('FORMAT'):
 19 |             # ignore
 20 |             continue
 21 |         else:
 22 |             _, insn, _ = re.match('^INS(_\d)?\s*\(\s*([\w\d]+)\s*(,.*)?\s*\)$', line).groups()
 23 |             insns += [insn]
 24 |     insns += ['__count']
 25 |     s = ''
 26 |     for i in xrange(len(insns)):
 27 |         s += 'I%s = %d\n' % (insns[i], i)
 28 |     return s
 29 | 
 30 | def extract_struct(header):
 31 |     def c_to_ctype(typ, const=None, unsigned=None, signed=None, ptr=None, arr=None):
 32 |         if typ == 'char':
 33 |             if unsigned:
 34 |                 result = 'c_ubyte'
 35 |             else:
 36 |                 result = 'c_char'
 37 |         elif typ == 'short':
 38 |             if unsigned:
 39 |                 result = 'c_ushort'
 40 |             else:
 41 |                 result = 'c_short'
 42 |         elif typ == 'int':
 43 |             if unsigned:
 44 |                 result = 'c_uint'
 45 |             else:
 46 |                 result = 'c_int'
 47 |         elif typ.endswith('_t'):
 48 |             # uint8_t -> c_uint8
 49 |             result = 'c_' + typ[:-2]
 50 |         else:
 51 |             raise Exception('Unhandled type %s' % typ)
 52 | 
 53 |         if result == 'c_char' and ptr:
 54 |             result = 'c_char_p'
 55 |         elif ptr:
 56 |             result = 'POINTER(%s)' % result
 57 | 
 58 |         if arr:
 59 |             size = int(arr[1:-1].strip(), 0)
 60 |             result = '%s*%d' % (result, size)
 61 | 
 62 |         return result
 63 | 
 64 |     fields = []
 65 |     in_struct = False
 66 | 
 67 |     infp = open(header, 'r')
 68 |     for line in infp:
 69 |         comment = line.find('//')
 70 |         if comment >= 0:
 71 |             # ignore comments
 72 |             line = line[:comment]
 73 |         line = line.strip()
 74 |         if len(line) == 0 or line.startswith('#'):
 75 |             # ignore empty lines and preprocessor defines
 76 |             pass
 77 |         elif line.startswith('EXPORT'):
 78 |             # ignore exported functions
 79 |             pass
 80 |         elif line.startswith('typedef struct'):
 81 |             in_struct = True
 82 |         elif line.startswith('} inst_t'):
 83 |             in_struct = False
 84 |         elif in_struct:
 85 |             try:
 86 |                 if line.startswith('DEFINE_FIELD'):
 87 |                     typ, name = re.match(r'^DEFINE_FIELD\s*\(\s*([^,\s]*)\s*,\s*([^,\s]*)\s*\)$', line).groups()
 88 |                     fields += [
 89 |                         (name, c_to_ctype(typ)),
 90 |                         ('used_%s' % name, 'c_uint8')
 91 |                     ]
 92 |                 elif line.startswith('BEGIN_FIELDS') or line.startswith('END_FIELDS'):
 93 |                     # ignore 
 94 |                     pass
 95 |                 else:
 96 |                     const, unsigned, signed, typ, ptr, name, arr = re.match(r'^(const)?\s*(unsigned)?(signed)?\s*([\d\w]+)\s*(\*)?\s*([\d\w]+)\s*(\[\s*\d+\s*\])?\s*;$', line).groups()
 97 |                     fields += [(name, c_to_ctype(typ, const=const, unsigned=unsigned, ptr=ptr, arr=arr))]
 98 |             except AttributeError:
 99 |                 raise Exception('Bad line: %s' % line)
100 | 
101 |     s = ''
102 |     s += 'class Inst(Structure):\n'
103 |     s += '    _fields_ = [\n'
104 |     for f in fields:
105 |         s += '        ("%s", %s),\n' % f
106 |     s += '    ]\n'
107 |     return s
108 | 
109 | TEMPLATE = '''# Autogenerated
110 | from ctypes import *
111 | import platform
112 | 
113 | if platform.system() == 'Windows':
114 |     if platform.architecture()[0] == '32bit':
115 |         dll = cdll.@@NAME@@_32
116 |     else:
117 |         dll = cdll.@@NAME@@_64
118 | elif platform.system() == 'Darwin':
119 |     dll = cdll.LoadLibrary('lib@@NAME@@.dylib')
120 | else:
121 |     dll = cdll.LoadLibrary('lib@@NAME@@.so')
122 | 
123 | @@STRUCT@@
124 | 
125 | @@INSN@@
126 | 
127 | dll.disassemble.argtypes = [POINTER(Inst), c_uint32, POINTER(c_uint16)]
128 | 
129 | def disassemble(pc, input):
130 |     inst = Inst()
131 |     dll.disassemble(byref(inst), pc, input)
132 |     assert inst._st_size == sizeof(inst)
133 |     return inst
134 | 
135 | mnemonics = (c_char_p * I__count).in_dll(dll, 'mnemonics')
136 | num_registers = c_uint.in_dll(dll, 'num_registers').value
137 | registers = (c_char_p * num_registers).in_dll(dll, 'registers')
138 | 
139 | comments = {
140 |     'ad': 'Add',
141 |     'adc': 'Add With Carry',
142 |     'adci': 'Add Immediate With Carry',
143 |     'adcm': 'Add Multi Reg With Carry',
144 |     'adf': 'Add Floating Point',
145 |     'adfm': 'Add Floating Point Multi Reg',
146 |     'adi': 'Add Immediate',
147 |     'adim': 'Add Immediate Multi Reg',
148 |     'adm': 'Add Multi Reg',
149 |     'an': 'And',
150 |     'ani': 'And Immediate',
151 |     'anm': 'And Multi Reg',
152 |     'b': 'Branch Conditional',
153 |     'bf': 'Bit Flip',
154 |     'bfm': 'Bit Flip Multi Reg',
155 |     'br': 'Branch Register Conditional',
156 |     'bra': 'Branch Absolute',
157 |     'brr': 'Branch Relative',
158 |     'c': 'Call Conditional',
159 |     'caa': 'Call Absolute',
160 |     'car': 'Call Relative',
161 |     'cm': 'Compare',
162 |     'cmf': 'Compare Floating Point',
163 |     'cmfm': 'Compare Floating Point Multi Reg',
164 |     'cmi': 'Compare Immediate',
165 |     'cmim': 'Compare Immediate Multi Reg',
166 |     'cmm': 'Compare Multi Reg',
167 |     'cr': 'Call Register Conditional',
168 |     'dbrk': 'Debug Break',
169 |     'di': 'Disable Interrupts',
170 |     'dmt': 'Direct Memory Transfer',
171 |     'dv': 'Divide',
172 |     'dvf': 'Divide Floating Point',
173 |     'dvfm': 'Divide Floating Point Multi Reg',
174 |     'dvi': 'Divide Immediate',
175 |     'dvim': 'Divide Immediate Multi Reg',
176 |     'dvis': 'Divide Immediate Signed',
177 |     'dvm': 'Divide Multi Reg',
178 |     'dvs': 'Divide Signed',
179 |     'dvsm': 'Divide Signed Multi Reg',
180 |     'ei': 'Enable Interrupts',
181 |     'fti': 'Float to Integer',
182 |     'ftim': 'Float to Integer Multi Reg',
183 |     'ht': 'Halt',
184 |     'ir': 'Interrupt Return',
185 |     'itf': 'Integer to Float',
186 |     'itfm': 'Integer to Float Multi Reg',
187 |     'lds': 'Load Single',
188 |     'ldt': 'Load Tri',
189 |     'ldw': 'Load Word',
190 |     'md': 'Modulus',
191 |     'mdf': 'Modulus Floating Point',
192 |     'mdfm': 'Modulus Floating Point Multi Reg',
193 |     'mdi': 'Modulus Immediate',
194 |     'mdim': 'Modulus Immediate Multi Reg',
195 |     'mdis': 'Modulus Immediate Signed',
196 |     'mdm': 'Modulus Multi Reg',
197 |     'mds': 'Modulus Signed',
198 |     'mdsm': 'Modulus Signed Multi Reg',
199 |     'mh': 'Move High',
200 |     'ml': 'Move Low',
201 |     'ms': 'Move Low Signed',
202 |     'mu': 'Multiply',
203 |     'muf': 'Multiply Floating Point',
204 |     'mufm': 'Multiply Floating Point Multi Reg',
205 |     'mui': 'Multiply Immediate',
206 |     'muim': 'Multiply Immediate Multi Reg',
207 |     'muis': 'Multiply Immediate Signed',
208 |     'mum': 'Multiply Multi Reg',
209 |     'mus': 'Multiply Signed',
210 |     'musm': 'Multiply Signed Multi Reg',
211 |     'ng': 'Negate',
212 |     'ngf': 'Negate Floating Point',
213 |     'ngfm': 'Negate Floating Point Multi Reg',
214 |     'ngm': 'Negate Multi Reg',
215 |     'nt': 'Not',
216 |     'ntm': 'Not Multi Reg',
217 |     'or': 'Or',
218 |     'ori': 'Or Immediate',
219 |     'orm': 'Or Multi Reg',
220 |     're': 'Return',
221 |     'rf': 'Read Flags',
222 |     'rl': 'Rotate Left',
223 |     'rli': 'Rotate Left Immediate',
224 |     'rlim': 'Rotate Left Immediate Multi Reg',
225 |     'rlm': 'Rotate Left Multi Reg',
226 |     'rmp': 'Read Memory Protection',
227 |     'rnd': 'Random',
228 |     'rndm': 'Random Multi Reg',
229 |     'rr': 'Rotate Right',
230 |     'rri': 'Rotate Right Immediate',
231 |     'rrim': 'Rotate Right Immediate Multi Reg',
232 |     'rrm': 'Rotate Right Multi Reg',
233 |     'sa': 'Shift Arithemetic Right',
234 |     'sai': 'Shift Arithemetic Right Immediate',
235 |     'sam': 'Shift Arithemetic Right Multi Reg',
236 |     'sb': 'Subtract',
237 |     'sbc': 'Subtract With Carry',
238 |     'sbci': 'Subtract Immediate With Carry',
239 |     'sbcm': 'Subtract Multi Reg With Carry',
240 |     'sbf': 'Subtract Floating Point',
241 |     'sbfm': 'Subtract Floating Point Multi Reg',
242 |     'sbi': 'Subtract Immediate',
243 |     'sbim': 'Subtract Immediate Multi Reg',
244 |     'sbm': 'Subtract Multi Reg',
245 |     'ses': 'Sign Extend Single',
246 |     'sew': 'Sign Extend Word',
247 |     'sf': 'Set Flags',
248 |     'sl': 'Shift Left',
249 |     'sli': 'Shift Left Immediate',
250 |     'slim': 'Shift Left Immediate Multi Reg',
251 |     'slm': 'Shift Left Multi Reg',
252 |     'smp': 'Set Memory Protection',
253 |     'sr': 'Shift Right',
254 |     'sri': 'Shift Right Immediate',
255 |     'srim': 'Shift Right Immediate Multi Reg',
256 |     'srm': 'Shift Right Multi Reg',
257 |     'sts': 'Store Single',
258 |     'stt': 'Store Tri',
259 |     'stw': 'Store Word',
260 |     'wt': 'Wait',
261 |     'xr': 'Xor',
262 |     'xri': 'Xor Immediate',
263 |     'xrm': 'Xor Multi Reg',
264 |     'zes': 'Zero Extend Single',
265 |     'zew': 'Zero Extend Word',
266 | }
267 | '''
268 | 
269 | name = sys.argv[1]
270 | output = TEMPLATE
271 | output = output.replace('@@NAME@@', name)
272 | output = output.replace('@@INSN@@', extract_insn('opcodes.h'))
273 | output = output.replace('@@STRUCT@@', extract_struct('%s.h' % name))
274 | outfp = open('py%s.py' % name, 'w')
275 | outfp.write(output)
276 | 


--------------------------------------------------------------------------------
/clemency-exploit-utils/clemency/struct.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import StringIO
  3 | from string import printable
  4 | 
  5 | class DoneReadingException(Exception):
  6 |   pass
  7 | 
  8 | class error(Exception):
  9 |   pass
 10 | 
 11 | class ClemencyFile(object):
 12 |   def __init__(self, f):
 13 |     self.file = f
 14 |     self.rpn = 0
 15 |     self.rp = 0
 16 |     self.wpn = 0
 17 |     self.wp = 0
 18 |     self.towrite = []
 19 | 
 20 |   def _readbyte(self):
 21 |     res = self.file.read(1)
 22 |     if not res:
 23 |       raise DoneReadingException()
 24 |     return ord(res)
 25 | 
 26 |   def _readnyte(self):
 27 |     if self.rpn == 0:
 28 |       self.rp = self._readbyte()
 29 |       self.rpn = 8
 30 | 
 31 |     self.rp = (self.rp << 8) | self._readbyte()
 32 |     self.rpn += 8
 33 | 
 34 |     out = self.rp >> (self.rpn - 9)
 35 |     self.rp &= (1 << (self.rpn - 9)) - 1
 36 |     self.rpn -= 9
 37 | 
 38 |     return out
 39 | 
 40 |   def _readflush(self):
 41 |     if self.rp != 0:
 42 |       print >> sys.stderr, "WARNING: _readflush flushed %d nonzero bits - your read lengths are wrong" % self.rpn
 43 |     self.rpn = 0
 44 |     self.rp = 0
 45 | 
 46 |   def _writenyte(self, n):
 47 |     self.wp = (self.wp << 9) | n
 48 |     self.wpn += 9
 49 |     while self.wpn >= 8:
 50 |       self.towrite.append((self.wp >> (self.wpn - 8)) & 0xff)
 51 |       self.wpn -= 8
 52 |     self.wp = self.wp & ((1 << self.wpn) - 1)
 53 | 
 54 |   def _writeflush(self):
 55 |     self.wp <<= (8 - self.wpn)
 56 |     if self.wpn:
 57 |         self.towrite.append(self.wp)
 58 |     self.file.write(''.join(map(chr, self.towrite)))
 59 |     self.file.flush()
 60 |     self.towrite = []
 61 |     self.wpn = 0
 62 |     self.wp = 0
 63 | 
 64 |   def read(self, n):
 65 |     out = []
 66 |     try:
 67 |       for _ in xrange(n):
 68 |         out.append(self._readnyte())
 69 |       self._readflush()
 70 |     except BaseException as e:
 71 |       print "Read interrupted (%s); partial output:" % e, out
 72 |       raise
 73 |     return ClemencyBuffer(out)
 74 | 
 75 |   def _normalize_readuntil(self, nval):
 76 |     if isinstance(nval, str):
 77 |       nval = [ord(c) for c in nval]
 78 |     if isinstance(nval, (int, long)):
 79 |       nval = [nval]
 80 |     return nval
 81 | 
 82 |   def readuntil(self, *nvals, **kwargs):
 83 |     nvals = [self._normalize_readuntil(nval) for nval in nvals]
 84 |     maxsize = kwargs.pop('maxsize', -1)
 85 | 
 86 |     out = []
 87 |     try:
 88 |       while 1:
 89 |         n = self._readnyte()
 90 |         out.append(n)
 91 |         if len(out) == maxsize:
 92 |           break
 93 |         for nval in nvals:
 94 |           if out[-len(nval):] == nval:
 95 |             break
 96 |         else:
 97 |           continue
 98 |         break
 99 |     except BaseException as e:
100 |       print "Read interrupted (%s); partial output:" % e, out
101 |       raise
102 |     self._readflush()
103 |     return ClemencyBuffer(out)
104 | 
105 |   def write(self, nytearr):
106 |     if isinstance(nytearr, str):
107 |       nytearr = [ord(c) for c in nytearr]
108 |     for n in nytearr:
109 |       self._writenyte(n)
110 |     self._writeflush()
111 | 
112 |   def read_all(self):
113 |     output = []
114 |     while True:
115 |       try:
116 |         output.append(self._readnyte())
117 |       except DoneReadingException, e:
118 |         break
119 |     return ClemencyBuffer(output)
120 | 
121 |   def close(self):
122 |     self.file.close()
123 | 
124 | class ClemencyBuffer(object):
125 |   '''
126 |   ClemencyBuffer class - meant to represent a range of memory in the Clemency architecture.
127 |   Essentially a wrapper around a list of 9-bit unsigned integers.
128 | 
129 |   Should provide the standard list access interface (though not `del`), as well as 
130 |   concatenation, multiplication, and conversion to raw bytes that can be sent across the wire.
131 | 
132 |   There are also some helper functions to deal with printable stuff.
133 |   '''
134 |   def __init__(self, initial_data=None):
135 |     if initial_data is None:
136 |       initial_data = []
137 |     assert all(0 <= b <= 0x1ff for b in initial_data)
138 |     self._buf = initial_data
139 |   def push(self, b):
140 |     assert 0 <= b <= 0x1ff
141 |     self._buf.append(b)
142 |   def add(self, b):
143 |     self.push(b)
144 |   def __getitem__(self, key):
145 |     if type(key) == int:
146 |       return self._buf[key]
147 |     elif type(key) == slice:
148 |       sl = self._buf[key.start:key.stop:key.step]
149 |       return ClemencyBuffer(sl)
150 |   def __setitem__(self, key, value):
151 |     if type(value) == int:
152 |       self._buf[key] = value
153 |     elif type(value) == str:
154 |       assert len(value) <= 2
155 |       if len(value) == 1:
156 |         self._buf[key] = ord(value)
157 |       else:
158 |         assert ord(value[0]) in [0, 1]
159 |         self._buf[key] = (ord(value[0]) << 9) | (ord(value[1]))
160 |     else:
161 |       assert False
162 |   def __add__(self, other):
163 |     if type(other) == list:
164 |       return ClemencyBuffer(self._buf + other)
165 |     elif type(other) == str:
166 |       assert all(c in printable for c in other)
167 |       return self + ClemencyBuffer.from_string(other)
168 |     elif type(other) == type(self):
169 |       return ClemencyBuffer(self._buf + other._buf)
170 |     else:
171 |       assert False
172 |   def __mul__(self, other):
173 |     assert type(other) == int
174 |     return ClemencyBuffer(self._buf * other)
175 |   def __repr__(self):
176 |     return "ClemencyBuffer(%r)" % self._buf
177 |   def __str__(self):
178 |     return self.to_printable_string()
179 |   def raw_string(self):
180 |     io = StringIO.StringIO()
181 |     w = ClemencyFile(io)
182 |     w.write(self._buf)
183 |     v = io.getvalue()
184 |     io.close()
185 |     return v
186 |   def is_printable(self):
187 |     '''
188 |     Returns True if all the characters in the buffer are ASCII printable characters.
189 |     '''
190 |     return all((0 <= c <= 255) for c in self._buf)
191 |   def to_printable_string(self):
192 |     '''
193 |     If this buffer is printable, returns a string version of it. Otherwise fails.
194 |     '''
195 |     assert self.is_printable(), "Buffer %s is not printable" % self._buf
196 |     return ''.join(chr(c) for c in self._buf)
197 |   def __unicode__(self):
198 |     assert False
199 |   @staticmethod
200 |   def from_string(s):
201 |     '''
202 |     To convert an ASCII string into what it would appear like in Clemency memory.
203 |     Note that this does not convert from a standard byte buffer! For that, use
204 |     struct.unpack/struct.pack, or read the data using ClemencyFile.
205 |     '''
206 |     return ClemencyBuffer(map(ord, s))
207 |   def __len__(self):
208 |     return len(self._buf)
209 |   def __iter__(self):
210 |     return self._buf.__iter__()
211 |   def __eq__(self, other):
212 |     if type(other) != type(self):
213 |       return False
214 |     return other._buf == self._buf
215 |   def __ne__(self, other):
216 |     return not (self == other)
217 |   def ljust(self, nBytes, filler=0):
218 |     assert 0 <= filler <= 0x1ff
219 |     return ClemencyBuffer(self._buf + ([filler] * (nBytes - len(self))))
220 |   def rjust(self, nBytes, filler=0):
221 |     assert 0 <= filler <= 0x1ff
222 |     return ClemencyBuffer(([filler] * (nBytes - len(self))) + self._buf)
223 |   def index(self, buf):
224 |     if type(buf) == int:
225 |       return self._buf.index(buf)
226 |     if type(buf) == list:
227 |       buf = ClemencyBuffer(buf)
228 |     elif type(buf) == str:
229 |       buf = ClemencyBuffer.from_string(buf)
230 |     elif type(buf) != type(self):
231 |       assert False
232 |     # lol stolen from stackoverflow
233 |     def getsubidx(x, y):
234 |       l1, l2 = len(x), len(y)
235 |       for i in range(l1):
236 |         if x[i:i+l2] == y:
237 |             return i
238 |       return -1
239 |     return getsubidx(self._buf, buf._buf)
240 | 
241 | CB = ClemencyBuffer
242 | 
243 | def _write_single(d):
244 |   if d < -0xff or d > 0x1ff:
245 |     raise error("single format requires -0xff <= number <= 0x1ff")
246 |   if d < 0:
247 |     d &= 0x1ff
248 |   return [d]
249 | 
250 | def _write_word(d):
251 |   if d < -0x1ffff or d > 0x3ffff:
252 |     raise error("word format requires -0x1ffff <= number <= 0x3ffff")
253 |   if d < 0:
254 |     d &= ((1 << 18) - 1)
255 |   bytes = [d & 0x1ff, (d >> 9) & 0x1ff]
256 |   return bytes
257 | 
258 | def _write_triple(d):
259 |   if d < -0x3ffffff or d > 0x7ffffff:
260 |     raise error("triple format requires -0x3ffffff <= number <= 0x7ffffff")
261 |   if d < 0:
262 |     d &= ((1 << 27) - 1)
263 |   bytes = [(d >> 9) & 0x1ff, (d >> 18) & 0x1ff, d & 0x1ff]
264 |   return bytes
265 | 
266 | def _write_multi(d):
267 |   if d < -0x1fffffffffffff or d > 0x3fffffffffffff:
268 |     raise error("triple format requires -0x1fffffffffffff <= number <= 0x3fffffffffffff")
269 |   if d < 0:
270 |     d &= ((1 << 54) - 1)
271 |   triples = [(d >> 27) & 0x7ffffff, d & 0x7ffffff]
272 |   res = _write_triple(triples[0])
273 |   res += _write_triple(triples[1])
274 |   return res
275 | 
276 | def _read_single(pos, data):
277 |   return data[pos], pos + 1
278 | 
279 | def _read_word(pos, data):
280 |   return (data[pos+1] << 9) | (data[pos]), pos + 2
281 | 
282 | def _read_triple(pos, data):
283 |   return (data[pos + 1] << 18) | (data[pos] << 9) | data[pos + 2], pos + 3
284 | 
285 | def _read_multi(pos, data):
286 |   a, p1 = _read_triple(pos, data)
287 |   b, p2 = _read_triple(p1, data)
288 |   return ((a << 27) | b), p2
289 | 
290 | def _read_single_signed(pos, data):
291 |   single, pos = _read_single(pos, data)
292 |   if single & 0x100 != 0:
293 |     single -= 1
294 |     single ^= 0x1ff
295 |     single = -single
296 |   return single, pos
297 | 
298 | def _read_word_signed(pos, data):
299 |   single, pos = _read_word(pos, data)
300 |   if single & 0x20000 != 0:
301 |     single -= 1
302 |     single ^= 0x3ffff
303 |     single = -single
304 |   return single, pos
305 | 
306 | def _read_triple_signed(pos, data):
307 |   single, pos = _read_triple(pos, data)
308 |   if single & 0x4000000 != 0:
309 |     single -= 1
310 |     single ^= 0x7ffffff
311 |     single = -single
312 |   return single, pos
313 | 
314 | def _read_multi_signed(pos, data):
315 |   multi, pos = _read_multi(pos, data)
316 |   if multi & 0x20000000000000 != 0:
317 |     multi -= 1
318 |     multi ^= 0x3fffffffffffff
319 |     multi = -multi
320 |   return multi, pos
321 | 
322 | def _parse_format(fmt):
323 |   formatChars = 'swtmSWTM'
324 |   fmtPos = 0
325 |   output = []
326 |   while fmtPos < len(fmt):
327 |     c = fmt[fmtPos]
328 |     if c in formatChars:
329 |       output.append((c, 1))
330 |     elif c in '0123456789':
331 |       num = c
332 |       fmtPos += 1
333 |       while fmt[fmtPos] in '0123456789':
334 |         num += fmt[fmtPos]
335 |         fmtPos += 1
336 |       c = fmt[fmtPos]
337 |       if c not in fmt:
338 |         raise error("bad char in struct format")
339 |       num = int(num)
340 |       output.append((c, num))
341 |     else:
342 |       raise error("bad char in struct format")
343 |     fmtPos += 1
344 |   return output
345 | 
346 | def pack(fmt, *args):
347 |   '''
348 |   Analogous to Python 2 struct.pack, except that it only supports the following format types:
349 |     S - Clemency byte (9 bits)
350 |     W - Clemency word (18 bits)
351 |     T - Clemency triple (27 bits)
352 |     M - Clemency multi-triple (54 bits)
353 |   All format specifiers support both signed and unsigned operands. Negative operands will be
354 |   transformed via 2's-complement and then fed into the unsigned code.
355 | 
356 |   You may also use the lowercase versions of the formats, and they will be coerced to uppercase.
357 | 
358 |   Returns a ClemencyBuffer.
359 | 
360 |   Example usage:
361 |     >>> struct.pack('M3S', 12107830635004600, 3, 328, 384)
362 |     ClemencyBuffer([64, 344, 64, 1, 0, 184, 3, 328, 384])
363 |   '''
364 |   fmt = _parse_format(fmt)
365 |   if len(args) != sum(z for (c, z) in fmt):
366 |     raise error("pack expected %d items for packing (got %d)" % (sum(z for (c, z) in fmt), len(args)))
367 |   functions = {
368 |     'S': _write_single,
369 |     'W': _write_word,
370 |     'T': _write_triple,
371 |     'M': _write_multi
372 |   }
373 |   buf = ClemencyBuffer([])
374 |   o = []
375 |   for (c, z) in fmt:
376 |     for _ in xrange(z):
377 |       o.append(c)
378 |   for (c, a) in zip(o, args):
379 |     buf += functions[c.upper()](a)
380 |   return buf
381 | 
382 | def unpack(fmt, s):
383 |   '''
384 |   Analogous to Python 2 struck.unpack, except that it only supports the following format types:
385 |     S - unsigned Clemency byte (9 bits)
386 |     W - unsigned Clemency word (18 bits)
387 |     T - unsigned Clemency triple (27 bits)
388 |     M - unsigned Clemency multi-triple (54 bits)
389 |     s - signed Clemency byte (9 bits)
390 |     w - signed Clemency word (18 bits)
391 |     t - signed Clemency triple (27 bits)
392 |     m - signed Clemency multi-triple (54 bits)
393 |   May take a ClemencyBuffer or a string (eg, obtained directly from the network) as the 2nd arg.
394 | 
395 |   Example usage:
396 |     >>> struct.unpack('M3S', ' V\x08\x00\x10\x02\xe0\x07H\xc0\x00')
397 |     (12107830635004600, 3, 328, 384)
398 |     >>> struct.unpack('Mt', ClemencyFile(open('hello.bin', 'r')).read(11))
399 |     (12107830635004600, -48232576)
400 |   '''
401 |   total_length = calcsize(fmt)
402 |   if type(s) == str:
403 |     io = StringIO.StringIO(s)
404 |     io.seek(0)
405 |     data = ClemencyFile(io).read_all()
406 |   else:
407 |     data = s
408 |   if len(data) != total_length:
409 |     raise error("unpack requires a string argument of %d bits, not %d" % (total_length * 9, len(data) * 9))
410 |   functions = {
411 |     'S': _read_single,
412 |     'W': _read_word,
413 |     'T': _read_triple,
414 |     'M': _read_multi,
415 |     's': _read_single_signed,
416 |     'w': _read_word_signed,
417 |     't': _read_triple_signed,
418 |     'm': _read_multi_signed
419 |   }
420 |   pos = 0
421 |   output = []
422 |   fmt = _parse_format(fmt)
423 |   for (c, z) in fmt:
424 |     for i in xrange(z):
425 |       res, pos = functions[c](pos, data)
426 |       output.append(res)
427 |   return tuple(output)
428 | 
429 | def calcsize(fmt):
430 |   '''
431 |   Analogous to Python 2 struct.calcsize, given the format types from pack and unpack above.
432 |   '''
433 |   fmt = _parse_format(fmt)
434 |   lengths = {'S': 1, 'W': 2, 'T': 3, 'M': 6}
435 |   total_length = sum((z * lengths[c.upper()]) for (c, z) in fmt)
436 |   return total_length
437 | 
438 | def p(n):
439 |   '''
440 |   Short-form wrapper around pack('T', n)
441 |   '''
442 |   return pack('T', n)
443 | 
444 | def u(buf):
445 |   '''
446 |   Short-form wrapper around unpack('T', n)[0]
447 |   '''
448 |   return unpack('T', buf)[0]
449 | 
450 | def patch_bytes(filename, offset, buf):
451 |   data = ClemencyFile(open(filename, 'r')).read_all()
452 |   for i in xrange(len(buf)):
453 |     data[offset+i] = buf[i]
454 |   ClemencyFile(open(filename, 'w')).write(data)
455 | 


--------------------------------------------------------------------------------
/snowball/riscv/ida-plugin/riscv.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import pyriscv
  3 | import idaapi
  4 | from idaapi import *
  5 | 
  6 | # Registers from the C disassembler.
  7 | GREGS = list(map(str, pyriscv.registers))
  8 | 
  9 | # Instructions from the C disassembler.
 10 | INS = list(map(lambda x: {'name': x, 'feature': 0}, pyriscv.mnemonics))
 11 | 
 12 | # Pseudo-instructions for simplification
 13 | INS += [
 14 |     {'name': 'mov', 'feature': 0},
 15 |     {'name': 'movi', 'feature': 0},
 16 | ]
 17 | 
 18 | # Add all instructions to our module's scope for convenience.
 19 | for i in xrange(len(INS)):
 20 |     globals()['I%s' % INS[i]['name']] = i
 21 | 
 22 | # XXX These are mostly optional. Don't worry about them until things are working.
 23 | # If the graph view ends the basic block after a call, then you are missing the STOP/CALL/JUMP flags.
 24 | FEATURES = {
 25 |     # Control flow instructions
 26 |     Ijr: CF_STOP | CF_JUMP | CF_USE1,
 27 |     Ijalr: CF_CALL | CF_JUMP | CF_USE1 | CF_USE2,
 28 |     Ij: CF_STOP | CF_USE1,
 29 |     Ijal: CF_CALL | CF_USE1 | CF_USE2,
 30 |     # Conditional jumps should not have CF_STOP
 31 |     Ibeq: CF_USE1 | CF_USE2 | CF_USE3,
 32 |     Ibne: CF_USE1 | CF_USE2 | CF_USE3,
 33 |     Ibge: CF_USE1 | CF_USE2 | CF_USE3,
 34 |     Iblt: CF_USE1 | CF_USE2 | CF_USE3,
 35 |     Ibgeu: CF_USE1 | CF_USE2 | CF_USE3,
 36 |     Ibltu: CF_USE1 | CF_USE2 | CF_USE3,
 37 |     # Shift instructions
 38 |     Isll: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 39 |     Isrl: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 40 |     Isra: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 41 |     Islli: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 42 |     Isrli: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 43 |     Israi: CF_CHG1 | CF_USE2 | CF_USE3 | CF_SHFT,
 44 |     # Arithmetic instructions
 45 |     Iaddi: CF_CHG1 | CF_USE2 | CF_USE3,
 46 |     Islti: CF_CHG1 | CF_USE2 | CF_USE3,
 47 |     Isltiu: CF_CHG1 | CF_USE2 | CF_USE3,
 48 |     Ixori: CF_CHG1 | CF_USE2 | CF_USE3,
 49 |     Iori: CF_CHG1 | CF_USE2 | CF_USE3,
 50 |     Iandi: CF_CHG1 | CF_USE2 | CF_USE3,
 51 |     Iadd: CF_CHG1 | CF_USE2 | CF_USE3,
 52 |     Isub: CF_CHG1 | CF_USE2 | CF_USE3,
 53 |     Islt: CF_CHG1 | CF_USE2 | CF_USE3,
 54 |     Isltu: CF_CHG1 | CF_USE2 | CF_USE3,
 55 |     Ixor: CF_CHG1 | CF_USE2 | CF_USE3,
 56 |     Ior: CF_CHG1 | CF_USE2 | CF_USE3,
 57 |     Iand: CF_CHG1 | CF_USE2 | CF_USE3,
 58 |     Imul: CF_CHG1 | CF_USE2 | CF_USE3,
 59 |     Imulh: CF_CHG1 | CF_USE2 | CF_USE3,
 60 |     Imulhsu: CF_CHG1 | CF_USE2 | CF_USE3,
 61 |     Imulhu: CF_CHG1 | CF_USE2 | CF_USE3,
 62 |     Idiv: CF_CHG1 | CF_USE2 | CF_USE3,
 63 |     Idivu: CF_CHG1 | CF_USE2 | CF_USE3,
 64 |     Irem: CF_CHG1 | CF_USE2 | CF_USE3,
 65 |     Iremu: CF_CHG1 | CF_USE2 | CF_USE3,
 66 |     # Load instructions
 67 |     Ilb: CF_CHG1 | CF_USE2 | CF_USE3,
 68 |     Ilh: CF_CHG1 | CF_USE2 | CF_USE3,
 69 |     Ilw: CF_CHG1 | CF_USE2 | CF_USE3,
 70 |     Ilbu: CF_CHG1 | CF_USE2 | CF_USE3,
 71 |     Ilhu: CF_CHG1 | CF_USE2 | CF_USE3,
 72 |     # Store instructions
 73 |     Isb: CF_USE1 | CF_USE2 | CF_USE3,
 74 |     Ish: CF_USE1 | CF_USE2 | CF_USE3,
 75 |     Isw: CF_USE1 | CF_USE2 | CF_USE3,
 76 |     # Load constant instructions
 77 |     Ilui: CF_CHG1 | CF_USE2,
 78 |     Iauipc: CF_CHG1 | CF_USE2,
 79 |     # Pseudo-instructions
 80 |     Imovi: CF_CHG1 | CF_USE2,
 81 |     Imov: CF_CHG1 | CF_USE2,
 82 | }
 83 | for insn, features in FEATURES.items():
 84 |     INS[insn]['feature'] = features
 85 | 
 86 | class riscv_processor_t(idaapi.processor_t):
 87 |     # IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
 88 |     id = 0x8000 + 1
 89 | 
 90 |     # Processor features
 91 |     flag = PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE
 92 | 
 93 |     # Number of bits in a byte for code segments (usually 8)
 94 |     # IDA supports values up to 32 bits
 95 |     cnbits = 8
 96 | 
 97 |     # Number of bits in a byte for non-code segments (usually 8)
 98 |     # IDA supports values up to 32 bits
 99 |     dnbits = 8
100 | 
101 |     # short processor names
102 |     # Each name should be shorter than 9 characters
103 |     psnames = ['riscv32']
104 | 
105 |     # long processor names
106 |     # No restriction on name lengthes.
107 |     plnames = ['RISC-V 32-bit']
108 | 
109 |     # register names
110 |     regNames = GREGS + [
111 |         # Fake segment registers
112 |         "CS",
113 |         "DS"
114 |     ]
115 | 
116 |     # number of registers (optional: deduced from the len(regNames))
117 |     regsNum = len(regNames)
118 | 
119 |     # Segment register information (use virtual CS and DS registers if your
120 |     # processor doesn't have segment registers):
121 |     regFirstSreg = 16 # index of CS
122 |     regLastSreg  = 17 # index of DS
123 | 
124 |     # size of a segment register in bytes
125 |     segreg_size = 0
126 | 
127 |     # You should define 2 virtual segment registers for CS and DS.
128 | 
129 |     # number of CS/DS registers
130 |     regCodeSreg = 16
131 |     regDataSreg = 17
132 | 
133 |     # Array of typical code start sequences (optional)
134 |     # codestart = ['\x55\x8B', '\x50\x51']
135 | 
136 |     # Array of 'return' instruction opcodes (optional)
137 |     # retcodes = ['\xC3', '\xC2']
138 | 
139 |     # Array of instructions
140 |     instruc = INS
141 | 
142 |     # icode of the first instruction
143 |     instruc_start = 0
144 | 
145 |     # icode of the last instruction + 1
146 |     instruc_end = len(instruc) + 1
147 | 
148 |     #
149 |     # Number of digits in floating numbers after the decimal point.
150 |     # If an element of this array equals 0, then the corresponding
151 |     # floating point data is not used for the processor.
152 |     # This array is used to align numbers in the output.
153 |     #      real_width[0] - number of digits for short floats (only PDP-11 has them)
154 |     #      real_width[1] - number of digits for "float"
155 |     #      real_width[2] - number of digits for "double"
156 |     #      real_width[3] - number of digits for "long double"
157 |     # Example: IBM PC module has { 0,7,15,19 }
158 |     #
159 |     # (optional)
160 |     real_width = (0, 7, 15, 0)
161 | 
162 |     # icode (or instruction number) of return instruction. It is ok to give any of possible return
163 |     # instructions
164 |     icode_return = Ijr
165 | 
166 |     # only one assembler is supported
167 |     assembler = {
168 |         'flag' : ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR,
169 |         'name': "My processor module bytecode assembler",
170 |         'origin': "org",
171 |         'end': "end",
172 |         'cmnt': ";",
173 |         'ascsep': "\"",
174 |         'accsep': "'",
175 |         'esccodes': "\"'",
176 |         'a_ascii': "db",
177 |         'a_byte': "db",
178 |         'a_word': "dw",
179 |         'a_dword': "dd",
180 |         'a_qword': "dq",
181 |         'a_oword': "xmmword",
182 |         'a_yword': "ymmword",
183 |         'a_float': "dd",
184 |         'a_double': "dq",
185 |         'a_tbyte': "dt",
186 |         'a_packreal': "",
187 |         'a_dups': "#d dup(#v)",
188 |         'a_bss': "%s dup ?",
189 |         'a_equ': ".equ",
190 |         'a_seg': "seg",
191 |         'a_curip': "$",
192 |         'a_public': "public",
193 |         'a_weak': "weak",
194 |         'a_extrn': "extrn",
195 |         'a_comdef': "",
196 |         'a_align': "align",
197 |         'lbrace': "(",
198 |         'rbrace': ")",
199 |         'a_mod': "%",
200 |         'a_band': "&",
201 |         'a_bor': "|",
202 |         'a_xor': "^",
203 |         'a_bnot': "~",
204 |         'a_shl': "<<",
205 |         'a_shr': ">>",
206 |         'a_sizeof_fmt': "size %s",
207 |         'flag2': 0,
208 |         'cmnt2': "",
209 |         'low8': "",
210 |         'high8': "",
211 |         'low16': "",
212 |         'high16': "",
213 |         'a_include_fmt': "include %s",
214 |         'a_vstruc_fmt': "",
215 |         'a_3byte': "",
216 |         'a_rva': "rva"
217 |     } # Assembler
218 | 
219 |     def notify_is_sane_insn(self, no_crefs):
220 |         """
221 |         is the instruction sane for the current file type?
222 |         args: no_crefs
223 |         1: the instruction has no code refs to it.
224 |            ida just tries to convert unexplored bytes
225 |            to an instruction (but there is no other
226 |            reason to convert them into an instruction)
227 |         0: the instruction is created because
228 |            of some coderef, user request or another
229 |            weighty reason.
230 |         The instruction is in 'cmd'
231 |         returns: 1-ok, <=0-no, the instruction isn't
232 |         likely to appear in the program
233 |         """
234 |         if get_32bit(self.cmd.ea) == 0:
235 |             # All zeros is an invalid instruction.
236 |             return 0
237 |         return 1
238 | 
239 |     # Instructions that jump and then come back.
240 |     def is_call(self):
241 |         return self.cmd.itype in [
242 |             Ijal,
243 |             Ijalr,
244 |         ]
245 | 
246 |     # Instructions that jump somewhere and don't come back.
247 |     def is_jump(self):
248 |         return self.cmd.itype in [
249 |             Ij,
250 |             Ijr,
251 |             Ibeq,
252 |             Ibne,
253 |             Iblt,
254 |             Ibge,
255 |             Ibltu,
256 |             Ibgeu,
257 |         ]
258 | 
259 |     # Add cross-references for an operand.
260 |     def emu_operand(self, op):
261 |         itype = self.cmd.itype
262 |         optype = op.type
263 | 
264 |         if optype == o_imm:
265 |             if self.is_call():
266 |                 ua_add_cref(0, op.value, fl_CN)
267 |             elif self.is_jump():
268 |                 ua_add_cref(0, op.value, fl_JN)
269 | 
270 |     def emu(self):
271 |         """
272 |         Emulate instruction, create cross-references, plan to analyze
273 |         subsequent instructions, modify flags etc. Upon entrance to this function
274 |         all information about the instruction is in 'cmd' structure.
275 |         If zero is returned, the kernel will delete the instruction.
276 |         """
277 |         Feature = self.cmd.get_canon_feature()
278 | 
279 |         if self.cmd.Op1.type != o_void:
280 |             self.emu_operand(self.cmd.Op1)
281 |         if self.cmd.Op2.type != o_void:
282 |             self.emu_operand(self.cmd.Op2)
283 |         if self.cmd.Op3.type != o_void:
284 |             self.emu_operand(self.cmd.Op3)
285 | 
286 |         uncond_jmp = self.cmd.itype in [Ij, Ijr]
287 | 
288 |         flow = (Feature & CF_STOP == 0) and not uncond_jmp
289 |         if flow:
290 |             ua_add_cref(0, self.cmd.ea + self.cmd.size, fl_F)
291 | 
292 |         return 1
293 | 
294 |     def outop(self, op):
295 |         """
296 |         Generate text representation of an instructon operand.
297 |         This function shouldn't change the database, flags or anything else.
298 |         All these actions should be performed only by u_emu() function.
299 |         The output text is placed in the output buffer initialized with init_output_buffer()
300 |         This function uses out_...() functions from ua.hpp to generate the operand text
301 |         Returns: 1-ok, 0-operand is hidden.
302 |         """
303 |         optype = op.type
304 | 
305 |         # We only have two types of operands: registers and immediates.
306 |         if optype == o_reg:
307 |             out_register(self.regNames[op.reg])
308 |         elif optype == o_imm:
309 |             out_symbol('#')
310 |             OutValue(op, OOFW_IMM | OOF_SIGNED)
311 |         else:
312 |             return False
313 |         return True
314 | 
315 |     def out(self):
316 |         """
317 |         Generate text representation of an instruction in 'cmd' structure.
318 |         This function shouldn't change the database, flags or anything else.
319 |         All these actions should be performed only by u_emu() function.
320 |         Returns: nothing
321 |         """
322 |         # Init output buffer
323 |         buf = idaapi.init_output_buffer(1024)
324 |         OutMnem()
325 | 
326 |         # output first operand
327 |         # kernel will call outop()
328 |         if self.cmd.Op1.type != o_void:
329 |             out_one_operand(0)
330 | 
331 |         # output the rest of operands separated by commas
332 |         for i in xrange(1, 3):
333 |             if self.cmd[i].type == o_void:
334 |                 break
335 |             out_symbol(',')
336 |             OutChar(' ')
337 |             out_one_operand(i)
338 | 
339 |         term_output_buffer()
340 |         cvar.gl_comm = 1 # generate comment at the next call to MakeLine()
341 |         MakeLine(buf)
342 | 
343 |     def fill_op_reg(self, op, r):
344 |         op.type = o_reg
345 |         op.dtyp = dt_dword
346 |         op.reg = r
347 | 
348 |     def fill_op_imm(self, op, imm):
349 |         op.type = o_imm
350 |         op.dtyp = dt_dword
351 |         op.value = imm
352 | 
353 |     # Simplify some instructions for brevity.
354 |     def simplify(self, inst):
355 |         itype = inst.id
356 | 
357 |         if itype == Iaddi:
358 |             # addi rd, rs1, #0 -> mov rd, rs1
359 |             if inst.imm == 0:
360 |                 inst.id = Imov
361 |                 inst.used_imm = 0
362 |             # addi rd, zero, #0 -> movi rd, #0
363 |             elif inst.rs1 == 0:
364 |                 inst.id = Imovi
365 |                 inst.used_rs1 = 0
366 | 
367 |     def ana(self):
368 |         """
369 |         Decodes an instruction into self.cmd.
370 |         Returns: self.cmd.size (=the size of the decoded instruction) or zero
371 |         """
372 |         if (self.cmd.ea & 3) != 0:
373 |             # Unaligned addresses cannot have instructions
374 |             return 0
375 | 
376 |         inst = pyriscv.disassemble(self.cmd.ea, str(get_many_bytes(self.cmd.ea, 4)))
377 |         if inst.insn == Iinvalid:
378 |             # Ignore invalid instructions
379 |             return 0
380 | 
381 |         self.simplify(inst)
382 | 
383 |         self.cmd.size = inst.size
384 |         self.cmd.itype = inst.id
385 | 
386 |         op = 0
387 |         ops = (self.cmd.Op1, self.cmd.Op2, self.cmd.Op3)
388 |         if inst.used_rd:
389 |             self.fill_op_reg(ops[op], inst.rd)
390 |             op += 1
391 |         if inst.used_rs1:
392 |             self.fill_op_reg(ops[op], inst.rs1)
393 |             op += 1
394 |         if inst.used_rs2:
395 |             self.fill_op_reg(ops[op], inst.rs2)
396 |             op += 1
397 |         if inst.used_imm:
398 |             self.fill_op_imm(ops[op], inst.imm)
399 |             op += 1
400 | 
401 |         # Return decoded instruction size or zero
402 |         return self.cmd.size
403 | 
404 | def PROCESSOR_ENTRY():
405 |     return riscv_processor_t()
406 | 


--------------------------------------------------------------------------------
/clemency-as/instrs.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import pyparsing
  3 | import re
  4 | 
  5 | special_regs = {"ST": 29, "RA": 30, "PC": 31}
  6 | 
  7 | conditions   = "n e l le g ge no o ns s sl sle sg sge".split()
  8 | orderedconds = "sge sg sle sl ns no ge le n e l g o s".split()
  9 | specials     = [".", "I", "D"] + orderedconds
 10 | 
 11 | def u(x, ip=None):
 12 |   if type(x) is pyparsing.ParseResults:
 13 |     return x.asList()[0]
 14 |   if isinstance(x, Symbolic):
 15 |     return x.value(ip)
 16 |   return x
 17 | 
 18 | class BitString(object):
 19 |   CHECK = False
 20 |   def __init__(self, v, size, ip=0, signed=False):
 21 |     v = u(v, ip)
 22 |     if self.CHECK:
 23 |       if signed:
 24 |         if v < -(1 << (size-1)) or v >= (1 << (size-1)):
 25 |           raise ValueError("Value %d out of range for signed bitfield of size %d" % (v, size))
 26 |       else:
 27 |         if v < 0 or v >= (1 << size):
 28 |           raise ValueError("Value %d out of range for unsigned bitfield of size %d" % (v, size))
 29 |     self.v = v & ((1 << size) - 1)
 30 |     self.size = size
 31 | 
 32 |   def __add__(self, other):
 33 |     if not isinstance(other, BitString):
 34 |       other = BitString(int(other.encode("hex"),16), len(other)*8)
 35 |     return BitString( (self.v << other.size) | other.v, self.size+other.size)
 36 | 
 37 |   def __str__(self):
 38 |     assert (self.size%8 == 0), "Cannot stringify bitstring of partial bytes"
 39 |     return ((hex(self.v)[2:].replace("L","")).rjust((2*len(self)+7)/8, "0")).decode("hex")
 40 | 
 41 |   def __len__(self):
 42 |     return self.size
 43 | 
 44 |   def __repr__(self):
 45 |     return "{ %s }"%( bin(self.v)[2:].rjust(self.size, "0") )
 46 | 
 47 |   def force_str(self):
 48 |     """kinda like str, but will 0 pad at the end"""
 49 |     extra = 8-(self.size%8)
 50 |     return str(self + BitString(0, extra))
 51 | 
 52 |   def __getitem__(self, item):
 53 |     if isinstance(item, slice):
 54 |       assert not item.step, "No step size supported for slicing"
 55 |       if item.start > self.size:
 56 |         return BitString(0,0)
 57 | 
 58 |       newsize = min(item.stop, self.size) - item.start
 59 |       newval  = self.v >> (self.size - newsize - item.start)
 60 |       newval &= (1<<newsize)-1
 61 |       return BitString(newval, newsize)
 62 | 
 63 | class Arg(object):
 64 |   def __init__(self, v):
 65 |     self.v = u(v)
 66 |   def __len__(self):
 67 |     """Returns the length in bits"""
 68 |     return 8*len(self.raw())
 69 |   def __str__(self):
 70 |     return "{%s:%s}"%(str(self.__class__.__name__.split("_")[-1]), str(self.v))
 71 |   def __repr__(self):
 72 |     return self.__str__()
 73 |   def arg_size(self):
 74 |     return None
 75 | 
 76 | class Reg(Arg):
 77 |   def raw(self, ip=None):
 78 |     if self.v in special_regs:
 79 |       return BitString(special_regs[self.v], 5)
 80 |     else:
 81 |       return BitString(int(self.v[1:]), 5)
 82 |   def arg_size(self):
 83 |     return 27
 84 | 
 85 | class Imm(Arg):
 86 |   def raw(self, ip=None):
 87 |     return BitString(self.v, 27)
 88 |   def arg_size(self):
 89 |     return 27
 90 | 
 91 | class Const(object):
 92 |   def __init__(self, psm, num):
 93 |     sizes = {'.ds':1, '.dw':2, '.dt':3, '.dm':6}
 94 |     bits = sizes[psm]*9
 95 |     self.val = BitString(num, bits)
 96 |   def raw(self, ip=None):
 97 |     return self.val
 98 | 
 99 | class Symbolic(object):
100 |   pass
101 | 
102 | class Expr(Symbolic):
103 |   def __init__(self, vstr, labelstore, offset=0):
104 |     self.vstr = vstr
105 |     self.labelstore = labelstore
106 |     self.offset = offset
107 |   def __add__(self, value):
108 |     return Expr(self.vstr, self.labelstore, offset=value)
109 |   def __sub__(self, value):
110 |     return Expr(self.vstr, self.labelstore, offset=-value)
111 |   def value(self, ip):
112 |     #shitty eval to eval b2xiao's math
113 |     fixed = self.vstr.replace("$ip", "(ip)")
114 |     fixed = re.sub("\$([a-zA-Z][a-zA-Z0-9]+)","labels.get('\\1',0)",fixed)
115 |     return eval(fixed, {'labels':self.labelstore, 'ip':ip})+self.offset
116 | 
117 | class Label(Symbolic):
118 |   def __init__(self, name, labelstore, offset=0):
119 |     self.name = name
120 |     self.labelstore = labelstore
121 |     self.offset = offset
122 |   def update(self, value):
123 |     self.labelstore[self.name] = value
124 |   def __add__(self, value):
125 |     return Label(self.name, self.labelstore, offset=value)
126 |   def __sub__(self, value):
127 |     return Label(self.name, self.labelstore, offset=-value)
128 |   def value(self, ip):
129 |     return self.labelstore.get(self.name, 0)+self.offset
130 | 
131 | class Ins_conditional_offset(object):
132 |   def __init__(self, args, condition=0):
133 |     offset = args[0]
134 |     condition_code = 0
135 |     if not condition:
136 |       condition_code = 15 #unconditional
137 |     elif condition in conditions:
138 |       condition_code = conditions.index(condition)
139 |     else:
140 |       raise Exception("Unknown condition code for instruction: %s"%(condition))
141 |     self.condition_code = condition_code
142 |     self.offset = offset
143 |   def raw(self, ip):
144 |     op = BitString(self.opcodestr, 6) + BitString(self.condition_code, 4)
145 |     return op + BitString(self.offset - ip, 17, ip, signed=True)
146 | 
147 | class Ins_conditional_reg(object):
148 |   def __init__(self, args, condition=0):
149 |     reg, = args
150 |     condition_code = 0
151 |     if not condition:
152 |       condition_code = 15 #unconditional
153 |     elif condition in conditions:
154 |       condition_code = conditions.index(condition)
155 |     else:
156 |       raise Exception("Unknown condition code for instruction: %s"%(condition))
157 |     self.condition_code = condition_code
158 |     self.reg = reg
159 |   def raw(self, ip):
160 |     op = BitString(self.opcodestr, 6) + BitString(self.condition_code, 4)
161 |     return op + self.reg.raw() + BitString(0, 3)
162 | 
163 | class Ins_offset(object):
164 |   relative = False
165 |   def __init__(self, args):
166 |     self.offset = args[0]
167 |   def raw(self, ip):
168 |     op = BitString(self.opcodestr, 9)
169 |     if self.relative:
170 |       return op + BitString(self.offset - ip, 27, ip, signed=True)
171 |     else:
172 |       return op + BitString(self.offset, 27, ip, signed=False)
173 | 
174 | class Ins_three_ref_uf(object):
175 |   special = 0
176 |   def __init__(self, args, uf=False):
177 |     assert len(args) == 3, "Need 3 args for that op"
178 |     self.a = args[0]
179 |     self.b = args[1]
180 |     self.c = args[2]
181 |     self.uf = uf
182 |   def raw(self, ip):
183 |     op = BitString(self.opcodestr, 7)
184 | 
185 |     partial = op + self.a.raw() + self.b.raw() + self.c.raw()
186 |     partial += BitString(self.special, 4)
187 |     partial += BitString(0b1,1) if self.uf else BitString(0b0,1)
188 |     return partial
189 | 
190 | class Ins_three(object):
191 |   def __init__(self, args, uf=False):
192 |     assert len(args) == 3, "Need 3 args for that op"
193 |     self.a = args[0]
194 |     self.b = args[1]
195 |     self.c = args[2]
196 |   def raw(self, ip):
197 |     op = BitString(self.opcodestr, 7)
198 | 
199 |     partial = op + self.a.raw() + self.b.raw() + self.c.raw()
200 |     partial += BitString(0, 5)
201 |     return partial
202 | 
203 | class Ins_two_imm_uf(object):
204 |   special = 0
205 |   def __init__(self, args, uf=False):
206 |     assert len(args) == 3, "Need 3 args for that op"
207 |     self.a = args[0]
208 |     self.b = args[1]
209 |     self.imm = args[2]
210 |     self.uf = uf
211 |   def raw(self, ip):
212 |     op = BitString(self.opcodestr, 7)
213 |     partial = op + self.a.raw() + self.b.raw() + BitString(self.imm, 7, ip)
214 |     partial += BitString(self.special, 2)
215 |     partial += BitString(0b1,1) if self.uf else BitString(0b0,1)
216 |     return partial
217 | 
218 | class Imm_three(object):
219 |   def __init__(self, args):
220 |     assert len(args) == 3, "Need 3 args for that op"
221 |     self.a = args[0]
222 |     self.b = args[1]
223 |     self.c = args[2]
224 |   def raw(self, ip):
225 |     op = BitString(self.opcodestr, 7)
226 | 
227 |     partial = op + self.a.raw() + self.b.raw() + self.c.raw()
228 |     partial += BitString(0b00000, 5)
229 |     return partial
230 | 
231 | 
232 | class Ins_two_uf(object):
233 |   special = 0
234 |   def __init__(self, args, uf=False):
235 |     assert len(args) == 2, "Need 2 args for that op"
236 |     self.a = args[0]
237 |     self.b = args[1]
238 |     self.uf = uf
239 |   def raw(self, ip):
240 |     op = BitString(self.opcodestr, 9)
241 |     partial = op + self.a.raw() + self.b.raw()
242 |     partial += BitString(self.special, 7)
243 |     partial += BitString(0b1,1) if self.uf else BitString(0b0,1)
244 |     return partial
245 | 
246 | class Ins_two(object):
247 |   def __init__(self, args):
248 |     assert len(args) == 2, "Need 2 args for that op"
249 |     self.a = args[0]
250 |     self.b = args[1]
251 |   def raw(self, ip):
252 |     op = BitString(self.opcodestr, 8)
253 |     return op + self.a.raw() + self.b.raw()
254 | 
255 | class Ins_cmp_imm(object):
256 |   signed = False
257 |   def __init__(self, args):
258 |     assert len(args) == 2, "Need 2 args for that op"
259 |     self.a = args[0]
260 |     self.imm = args[1]
261 |   def raw(self, ip):
262 |     op = BitString(self.opcodestr, 8)
263 |     return op + self.a.raw() + BitString(self.imm, 14, ip, signed=self.signed)
264 | 
265 | class Ins_one_imm(object):
266 |   signed = False
267 |   def __init__(self, args):
268 |     assert len(args) == 2, "Need 2 args for that op"
269 |     self.a = args[0]
270 |     self.imm = args[1]
271 |   def raw(self, ip):
272 |     op = BitString(self.opcodestr, 5)
273 |     return op + self.a.raw() + BitString(self.imm, 17, ip, signed=self.signed)
274 | 
275 | class Ins_one_uf(object):
276 |   special = 0
277 |   def __init__(self, args, uf=False):
278 |     assert len(args) == 1, "Need 1 args for that op"
279 |     self.a = args[0]
280 |     self.uf = uf
281 |   def raw(self, ip):
282 |     op = BitString(self.opcodestr, 9, ip)
283 |     partial = op + self.a.raw() + BitString(self.special, 12)
284 |     return partial + (BitString(0b1,1) if self.uf else BitString(0b0,1))
285 | 
286 | class Ins_one(object):
287 |   def __init__(self, args):
288 |     assert len(args) == 1, "Need 1 args for that op"
289 |     self.a = args[0]
290 |   def raw(self, ip):
291 |     op = BitString(self.opcodestr, 12, ip)
292 |     return op + self.a.raw() + BitString(0b0, 1)
293 | 
294 | class Ins_mem_prot(object):
295 |   def __init__(self, args):
296 |     assert len(args) == 3, "Need 3 args for that op"
297 |     self.a = args[0]
298 |     self.b = args[1]
299 |     self.flags = 0
300 |     if "W" in args[2]:
301 |       self.flags = 2
302 |     elif "E" in args[2]:
303 |       self.flags = 3
304 |     elif "R" in args[2]:
305 |       self.flags = 1
306 |     elif 'N' in args[2]:
307 |       self.flags = 0
308 |   def raw(self, ip):
309 |     op = BitString(self.opcodestr, 7)
310 |     partial = op + self.a.raw() + self.b.raw() + BitString(1, 1)
311 |     partial += BitString(self.flags, 2)
312 |     partial += BitString(0, 7)
313 |     return partial
314 | 
315 | class Ins_mem(object):
316 |   def __init__(self, args, adjust=None):
317 |     assert len(args) == 4, "Need 4 args for that op"
318 |     self.a = args[0]
319 |     self.b = args[1]
320 |     assert args[3] >= 0 and args[3] < 31, "Register count outside of normal range"
321 |     self.count = args[3]-1
322 |     if not adjust:
323 |       self.adjust = 0
324 |     elif adjust == "I":
325 |       self.adjust = 1
326 |     elif adjust == "D":
327 |       self.adjust = 2
328 |     else:
329 |       raise Exception("Invalid mem mode: %s"%(adjust))
330 |     self.offset = args[2]
331 |   def raw(self, ip):
332 |     op = BitString(self.opcodestr, 7)
333 |     partial = op + self.a.raw() + self.b.raw() + BitString(self.count, 5)
334 |     partial += BitString(self.adjust, 2) + BitString(self.offset, 27, ip)
335 |     return partial + BitString(0b000, 3)
336 | 
337 | class Ins_zero(object):
338 |   def __init__(self, bleh=None):
339 |     pass
340 |   def raw(self, ip):
341 |     return BitString(self.opcodestr, 18)
342 |   
343 | 
344 | class Ins_Ad(Ins_three_ref_uf):
345 |   opcodestr = 0b0000000
346 | class Ins_Adc(Ins_three_ref_uf):
347 |   opcodestr = 0b0100000
348 | class Ins_Adci(Ins_two_imm_uf):
349 |   opcodestr = 0b0100000
350 |   special = 0b01
351 | class Ins_Adcim(Ins_two_imm_uf):
352 |   opcodestr = 0b0100010
353 |   special = 0b01
354 | class Ins_Adcm(Ins_three_ref_uf):
355 |   opcodestr = 0b0100010
356 | class Ins_Adf(Ins_three_ref_uf):
357 |   opcodestr = 0b0000001
358 | class Ins_Adfm(Ins_three_ref_uf):
359 |   opcodestr = 0b0000011
360 | class Ins_Adi(Ins_two_imm_uf):
361 |   opcodestr = 0b0000000
362 |   special = 0b01
363 | class Ins_Adim(Ins_two_imm_uf):
364 |   opcodestr = 0b0000010
365 |   special = 0b01
366 | class Ins_Adm(Ins_three_ref_uf):
367 |   opcodestr = 0b0000010
368 | class Ins_An(Ins_three_ref_uf):
369 |   opcodestr = 0b0010100
370 | class Ins_Ani(Ins_two_imm_uf):
371 |   opcodestr = 0b0010100
372 |   special = 0b01
373 | class Ins_Anm(Ins_three_ref_uf):
374 |   opcodestr = 0b0010110
375 | class Ins_B(Ins_conditional_offset):
376 |   opcodestr = 0b110000
377 | class Ins_Bf(Ins_two_uf):
378 |   opcodestr = 0b101001100
379 |   special = 0b1000000
380 | class Ins_Bfm(Ins_two_uf):
381 |   opcodestr = 0b101001110
382 |   special = 0b1000000
383 | class Ins_Br(Ins_conditional_reg):
384 |   opcodestr = 0b110010
385 | class Ins_Bra(Ins_offset):
386 |   opcodestr = 0b111000100
387 | class Ins_Brr(Ins_offset):
388 |   relative = True
389 |   opcodestr = 0b111000000
390 | class Ins_C(Ins_conditional_offset):
391 |   opcodestr = 0b110101
392 | class Ins_Caa(Ins_offset):
393 |   opcodestr = 0b111001100
394 | class Ins_Car(Ins_offset):
395 |   relative = True
396 |   opcodestr = 0b111001000
397 | class Ins_Cm(Ins_two):
398 |   opcodestr = 0b10111000
399 | class Ins_Cmf(Ins_two):
400 |   opcodestr = 0b10111010
401 | class Ins_Cmfm(Ins_two):
402 |   opcodestr = 0b10111110
403 | class Ins_Cmi(Ins_cmp_imm):
404 |   signed = True
405 |   opcodestr = 0b10111001
406 | class Ins_Cmim(Ins_cmp_imm):
407 |   signed = True
408 |   opcodestr = 0b10111101
409 | class Ins_Cmm(Ins_two):
410 |   opcodestr = 0b10111100
411 | class Ins_Cr(Ins_conditional_reg):
412 |   opcodestr = 0b110111
413 | class Ins_Dbrk(Ins_zero):
414 |   opcodestr = 0b111111111111111111
415 | class Ins_Di(Ins_one):
416 |   opcodestr = 0b101000000101
417 | class Ins_Dmt(Ins_three):
418 |   opcodestr = 0b0110100
419 | class Ins_Dv(Ins_three_ref_uf):
420 |   opcodestr = 0b0001100
421 | class Ins_Dvf(Ins_three_ref_uf):
422 |   opcodestr = 0b0001101
423 | class Ins_Dvfm(Ins_three_ref_uf):
424 |   opcodestr = 0b0001111
425 | class Ins_Dvi(Ins_two_imm_uf):
426 |   opcodestr = 0b0001100
427 |   special = 0b01
428 | class Ins_Dvim(Ins_two_imm_uf):
429 |   opcodestr = 0b0001110
430 |   special = 0b01
431 | class Ins_Dvis(Ins_two_imm_uf):
432 |   opcodestr = 0b0001100
433 |   special = 0b11
434 | class Ins_Dvism(Ins_two_imm_uf):
435 |   opcodestr = 0b0001110
436 |   special = 0b11
437 | class Ins_Dvm(Ins_three_ref_uf):
438 |   opcodestr = 0b0001110
439 | class Ins_Dvs(Ins_three_ref_uf):
440 |   opcodestr = 0b0001100
441 |   special   = 0b10
442 | class Ins_Dvsm(Ins_three_ref_uf):
443 |   opcodestr = 0b0001110
444 |   special   = 0b10
445 | class Ins_Ei(Ins_one):
446 |   opcodestr = 0b101000000100
447 | class Ins_Fti(Ins_two):
448 |   opcodestr = 0b101000101
449 | class Ins_Ftim(Ins_two):
450 |   opcodestr = 0b101000111
451 | class Ins_Ht(Ins_zero):
452 |   opcodestr = 0b101000000011000000
453 | class Ins_Ir(Ins_zero):
454 |   opcodestr = 0b101000000001000000
455 | class Ins_Itf(Ins_two):
456 |   opcodestr = 0b101000100
457 | class Ins_Itfm(Ins_two):
458 |   opcodestr = 0b101000110
459 | class Ins_Lds(Ins_mem):
460 |   opcodestr = 0b1010100
461 | class Ins_Ldt(Ins_mem):
462 |   opcodestr = 0b1010110
463 | class Ins_Ldw(Ins_mem):
464 |   opcodestr = 0b1010101
465 | class Ins_Md(Ins_three_ref_uf):
466 |   opcodestr = 0b0010000
467 | class Ins_Mdf(Ins_three_ref_uf):
468 |   opcodestr = 0b0010001
469 | class Ins_Mdfm(Ins_three_ref_uf):
470 |   opcodestr = 0b0010011
471 | class Ins_Mdi(Ins_two_imm_uf):
472 |   opcodestr = 0b0010000
473 |   special = 0b01
474 | class Ins_Mdim(Ins_two_imm_uf):
475 |   opcodestr = 0b0010010
476 |   special = 0b01
477 | class Ins_Mdis(Ins_two_imm_uf):
478 |   opcodestr = 0b0010000
479 |   special = 0b11
480 | class Ins_Mdism(Ins_two_imm_uf):
481 |   opcodestr = 0b0010010
482 |   special = 0b11
483 | class Ins_Mdm(Ins_three_ref_uf):
484 |   opcodestr = 0b0010010
485 | class Ins_Mds(Ins_three_ref_uf):
486 |   opcodestr = 0b0010000
487 |   special = 0b10
488 | class Ins_Mdsm(Ins_three_ref_uf):
489 |   opcodestr = 0b0010010
490 |   special = 0b10
491 | class Ins_Mh(Ins_one_imm):
492 |   opcodestr = 0b10001
493 | class Ins_Ml(Ins_one_imm):
494 |   opcodestr = 0b10010
495 | class Ins_Ms(Ins_one_imm):
496 |   signed = True
497 |   opcodestr = 0b10011
498 | class Ins_Mu(Ins_three_ref_uf):
499 |   opcodestr = 0b0001000
500 | class Ins_Muf(Ins_three_ref_uf):
501 |   opcodestr = 0b0001001
502 | class Ins_Mufm(Ins_three_ref_uf):
503 |   opcodestr = 0b0001011
504 | class Ins_Mui(Ins_two_imm_uf):
505 |   opcodestr = 0b0001000
506 |   special = 0b01
507 | class Ins_Muim(Ins_two_imm_uf):
508 |   opcodestr = 0b0001010
509 |   special = 0b01
510 | class Ins_Muis(Ins_two_imm_uf):
511 |   opcodestr = 0b0001000
512 |   special = 0b11
513 | class Ins_Muism(Ins_two_imm_uf):
514 |   opcodestr = 0b0001010
515 |   special = 0b11
516 | class Ins_Mum(Ins_three_ref_uf):
517 |   opcodestr = 0b0001010
518 | class Ins_Mus(Ins_three_ref_uf):
519 |   opcodestr = 0b0001000
520 |   special = 0b10
521 | class Ins_Musm(Ins_three_ref_uf):
522 |   opcodestr = 0b0001010
523 |   special = 0b10
524 | class Ins_Ng(Ins_two_uf):
525 |   opcodestr = 0b101001100
526 | class Ins_Ngf(Ins_two_uf):
527 |   opcodestr = 0b101001101
528 | class Ins_Ngfm(Ins_two_uf):
529 |   opcodestr = 0b101001111
530 | class Ins_Ngm(Ins_two_uf):
531 |   opcodestr = 0b101001110
532 | class Ins_Nt(Ins_two_uf):
533 |   opcodestr = 0b101001100
534 |   special = 0b0100000
535 | class Ins_Ntm(Ins_two_uf):
536 |   opcodestr = 0b101001110
537 |   special = 0b0100000
538 | class Ins_Or(Ins_three_ref_uf):
539 |   opcodestr = 0b0011000
540 | class Ins_Ori(Ins_two_imm_uf):
541 |   opcodestr = 0b0011000
542 |   special = 0b01
543 | class Ins_Orm(Ins_three_ref_uf):
544 |   opcodestr = 0b0011010
545 | class Ins_Re(Ins_zero):
546 |   opcodestr = 0b101000000000000000
547 | class Ins_Rf(Ins_one):
548 |   opcodestr = 0b101000001100
549 | class Ins_Rl(Ins_three_ref_uf):
550 |   opcodestr = 0b0110000
551 | class Ins_Rli(Ins_two_imm_uf):
552 |   opcodestr = 0b1000000
553 |   special = 0b00
554 | class Ins_Rlim(Ins_two_imm_uf):
555 |   opcodestr = 0b1000010
556 |   special = 0b00
557 | class Ins_Rlm(Ins_three_ref_uf):
558 |   opcodestr = 0b0110010
559 | class Ins_Rmp(Ins_two):
560 |   opcodestr = 0b1010010
561 | class Ins_Rnd(Ins_one_uf):
562 |   opcodestr = 0b101001100
563 |   special = 0b000001100000
564 | class Ins_Rndm(Ins_one_uf):
565 |   opcodestr = 0b101001110
566 |   special = 0b000001100000
567 | class Ins_Rr(Ins_three_ref_uf):
568 |   opcodestr = 0b0110001
569 | class Ins_Rri(Ins_two_imm_uf):
570 |   opcodestr = 0b1000001
571 |   special = 0b00
572 | class Ins_Rrim(Ins_two_imm_uf):
573 |   opcodestr = 0b1000011
574 |   special = 0b00
575 | class Ins_Rrm(Ins_three_ref_uf):
576 |   opcodestr = 0b0110011
577 | class Ins_Sa(Ins_three_ref_uf):
578 |   opcodestr = 0b0101101
579 | class Ins_Sai(Ins_two_imm_uf):
580 |   opcodestr = 0b0111101
581 |   special = 0b00
582 | class Ins_Saim(Ins_two_imm_uf):
583 |   opcodestr = 0b0111111
584 |   special = 0b00
585 | class Ins_Sam(Ins_three_ref_uf):
586 |   opcodestr = 0b0101111
587 | class Ins_Sb(Ins_three_ref_uf):
588 |   opcodestr = 0b0000100
589 | class Ins_Sbc(Ins_three_ref_uf):
590 |   opcodestr = 0b0100100
591 | class Ins_Sbci(Ins_two_imm_uf):
592 |   opcodestr = 0b0100100
593 |   special = 0b01
594 | class Ins_Sbcim(Ins_two_imm_uf):
595 |   opcodestr = 0b0100110
596 |   special = 0b01
597 | class Ins_Sbcm(Ins_three_ref_uf):
598 |   opcodestr = 0b0100110
599 | class Ins_Sbf(Ins_three_ref_uf):
600 |   opcodestr = 0b0000101
601 | class Ins_Sbfm(Ins_three_ref_uf):
602 |   opcodestr = 0b0000111
603 | class Ins_Sbi(Ins_two_imm_uf):
604 |   opcodestr = 0b0000100
605 |   special = 0b01
606 | class Ins_Sbim(Ins_two_imm_uf):
607 |   opcodestr = 0b0000110
608 |   special = 0b01
609 | class Ins_Sbm(Ins_three_ref_uf):
610 |   opcodestr = 0b0000110
611 | class Ins_Ses(Ins_two):
612 |   opcodestr = 0b101000000111
613 | class Ins_Sew(Ins_two):
614 |   opcodestr = 0b101000001000
615 | class Ins_Sf(Ins_one):
616 |   opcodestr = 0b101000001011
617 | class Ins_Sl(Ins_three_ref_uf):
618 |   opcodestr = 0b0101000
619 | class Ins_Sli(Ins_two_imm_uf):
620 |   opcodestr = 0b0111000
621 | class Ins_Slim(Ins_two_imm_uf):
622 |   opcodestr = 0b0111010
623 | class Ins_Slm(Ins_three_ref_uf):
624 |   opcodestr = 0b0101010
625 | class Ins_Smp(Ins_mem_prot):
626 |   opcodestr = 0b1010010
627 | class Ins_Sr(Ins_three_ref_uf):
628 |   opcodestr = 0b0101001
629 | class Ins_Sri(Ins_two_imm_uf):
630 |   opcodestr = 0b0111001
631 | class Ins_Srim(Ins_two_imm_uf):
632 |   opcodestr = 0b0111011
633 | class Ins_Srm(Ins_three_ref_uf):
634 |   opcodestr = 0b0101011
635 | class Ins_Sts(Ins_mem):
636 |   opcodestr = 0b1011000
637 | class Ins_Stt(Ins_mem):
638 |   opcodestr = 0b1011010
639 | class Ins_Stw(Ins_mem):
640 |   opcodestr = 0b1011001
641 | class Ins_Wt(Ins_zero):
642 |   opcodestr = 0b101000000010000000
643 | class Ins_Xr(Ins_three_ref_uf):
644 |   opcodestr = 0b0011100
645 | class Ins_Xri(Ins_two_imm_uf):
646 |   opcodestr = 0b0011100
647 |   special = 0b01
648 | class Ins_Xrm(Ins_three_ref_uf):
649 |   opcodestr = 0b0011110
650 | class Ins_Zes(Ins_two):
651 |   opcodestr = 0b101000001001
652 | class Ins_Zew(Ins_two):
653 |   opcodestr = 0b101000001010
654 | 
655 | class Instr(object):
656 |   def __init__(self, mnem):
657 |     #search through all Ins_* that we know about
658 |     #and see if we know this mnem
659 |     for k,v in globals().iteritems():
660 |       if "ins_"+mnem.lower() == k.lower():
661 |         self.iclass = v
662 |         break
663 |     else: #special mnemonic with a suffix
664 |       for end in specials:
665 |         if mnem.lower().endswith(end.lower()):
666 |           break
667 |       else:
668 |         raise Exception("Unknown mnemonic: %s"%(mnem))
669 |       for k,v in globals().iteritems():
670 |         if "ins_"+mnem.lower()[:-len(end)] == k.lower():
671 |           self.iclass = lambda x,f=v:f(x, end)
672 |           break
673 |       else:
674 |         raise Exception("Unknown mnemonic: %s"%(mnem))
675 | 
676 |     self.mnem = mnem
677 | 
678 | 


--------------------------------------------------------------------------------
/snowball/clemency/ida-plugin/clemency.py:
--------------------------------------------------------------------------------
  1 | import idaapi
  2 | import pyclemency
  3 | import string
  4 | import sys
  5 | from idaapi import *
  6 | 
  7 | # Registers from the C disassembler.
  8 | GREGS = list(map(str, pyclemency.registers))
  9 | 
 10 | REG_FP = 28
 11 | REG_ST = 29
 12 | REG_RA = 30
 13 | REG_PC = 31
 14 | 
 15 | # Instructions from the C disassembler.
 16 | INS = list(map(lambda x: {'name': x, 'feature': 0}, pyclemency.mnemonics))
 17 | for instr in INS:
 18 |     name = instr['name']
 19 |     if name in pyclemency.comments:
 20 |         instr['cmt'] = pyclemency.comments[name]
 21 | 
 22 | # Pseudo-instructions for simplification
 23 | PSEUDO_MOV = len(INS)
 24 | INS.append({'name': 'MOV', 'feature': 0, 'cmt': 'Move register (or dest, src, src)'})
 25 | 
 26 | PSEUDO_MOVI = len(INS)
 27 | INS.append({'name': 'MOVI', 'feature': 0, 'cmt': 'Move imm to register (mh + ml)'})
 28 | 
 29 | ADJ_RB_NO_ADJUST = 0
 30 | ADJ_RB_INCREMENT = 1
 31 | ADJ_RB_DECREMENT = 2
 32 | 
 33 | # sttd {R28,ST,RA}, [ST+#0]
 34 | CODESTARTS = ['3a016b015000000000000000'.decode('hex')]
 35 | 
 36 | push_args_start = '3a006801'.decode('hex')
 37 | push_args_end = '\x00' * 7 + CODESTARTS[0]
 38 | for i in xrange(6):
 39 |     nargs_d = chr(i * 0x20 + 0x10)
 40 |     nargs_i = chr(i * 0x20 + 0x8)
 41 |     CODESTARTS.insert(0, push_args_start + nargs_d + push_args_end)
 42 |     CODESTARTS.insert(0, push_args_start + nargs_i + push_args_end)
 43 | 
 44 | def to_uint16(s):
 45 |     arr = (ctypes.c_uint16 * (len(s)/2))()
 46 |     for x in xrange(len(s) / 2):
 47 |         arr[x] = struct.unpack('<H', s[x*2:(x+1)*2])[0]
 48 |     return arr
 49 | 
 50 | # instruction flags (stored in self.cmd.insnpref)
 51 | # bit 0: UF
 52 | UF = 1 << 0
 53 | # bits 1-2: adj_rb
 54 | ADJ_RB_SHIFT = 1
 55 | def get_adj_rb(flags):
 56 |     return (flags >> ADJ_RB_SHIFT) & 0b11
 57 | 
 58 | CC_SHIFT = 3
 59 | def get_cc(flags):
 60 |     return (flags >> CC_SHIFT) & 0b1111
 61 | 
 62 | CC_NAMES = {
 63 |     0b0000: 'n',
 64 |     0b0001: 'e',
 65 |     0b0010: 'l',
 66 |     0b0011: 'le',
 67 |     0b0100: 'g',
 68 |     0b0101: 'ge',
 69 |     0b0110: 'no',
 70 |     0b0111: 'o',
 71 |     0b1000: 'ns',
 72 |     0b1001: 's',
 73 |     0b1010: 'sl',
 74 |     0b1011: 'sle',
 75 |     0b1100: 'sg',
 76 |     0b1101: 'sge',
 77 |     0b1111: '',
 78 | }
 79 | 
 80 | # Add all instructions to our module's scope for convenience.
 81 | for i in xrange(len(INS)):
 82 |     globals()['I%s' % INS[i]['name']] = i
 83 | 
 84 | # XXX These are mostly optional. Don't worry about them until things are working.
 85 | # If the graph view ends the basic block after a call, then you are missing the STOP/CALL/JUMP flags.
 86 | FEATURES = {
 87 |     Ibr: CF_JUMP,
 88 |     Icr: CF_JUMP | CF_CALL,
 89 |     Iht: CF_STOP,
 90 |     Ire: CF_STOP,
 91 |     Ic: CF_CALL,
 92 |     Icar: CF_CALL,
 93 | }
 94 | for insn, features in FEATURES.items():
 95 |     INS[insn]['feature'] = features
 96 | 
 97 | # is sp delta fixed by the user?
 98 | def is_fixed_spd(ea):
 99 |     return (get_aflags(ea) & AFL_FIXEDSPD) != 0
100 | 
101 | def to_signed(offset):
102 |     return struct.unpack('i', struct.pack('I', offset))[0]
103 | 
104 | def lookahead_instruction(addr, size=12):
105 |     end = min(addr + size, SegEnd(addr))
106 |     return to_uint16(get_many_bytes(addr, end - addr))
107 | 
108 | class clemency_hooks_t(idaapi.IDP_Hooks):
109 |     def out_3byte(self, dataea, value, analyze_only):
110 |         if not analyze_only:
111 |             out_long((get_full_byte(dataea+1)<<18) | (get_full_byte(dataea+0)<<9) | get_full_byte(dataea+2), 16)
112 |             return 2
113 |         return 1
114 | 
115 | class clemency_processor_t(idaapi.processor_t):
116 |     # IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
117 |     id = 0x8000 + 2
118 | 
119 |     # Processor features
120 |     flag = PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE
121 | 
122 |     # Number of bits in a byte for code segments (usually 8)
123 |     # IDA supports values up to 32 bits
124 |     cnbits = 16
125 | 
126 |     # Number of bits in a byte for non-code segments (usually 8)
127 |     # IDA supports values up to 32 bits
128 |     dnbits = 16
129 | 
130 |     # short processor names
131 |     # Each name should be shorter than 9 characters
132 |     psnames = ['clemency']
133 | 
134 |     # long processor names
135 |     # No restriction on name lengthes.
136 |     plnames = ['Clemency']
137 | 
138 |     # register names
139 |     regNames = GREGS + [
140 |         # Fake segment registers
141 |         "CS",
142 |         "DS"
143 |     ]
144 | 
145 |     # number of registers (optional: deduced from the len(regNames))
146 |     regsNum = len(regNames)
147 | 
148 |     # Segment register information (use virtual CS and DS registers if your
149 |     # processor doesn't have segment registers):
150 |     regFirstSreg = 16 # index of CS
151 |     regLastSreg  = 17 # index of DS
152 | 
153 |     # size of a segment register in bytes
154 |     segreg_size = 0
155 | 
156 |     # You should define 2 virtual segment registers for CS and DS.
157 | 
158 |     # number of CS/DS registers
159 |     regCodeSreg = 16
160 |     regDataSreg = 17
161 | 
162 |     # Array of typical code start sequences (optional)
163 |     codestart = CODESTARTS
164 | 
165 |     # Array of 'return' instruction opcodes (optional)
166 |     retcodes = ['00004001'.decode('hex')]
167 | 
168 |     # Array of instructions
169 |     instruc = INS
170 | 
171 |     # icode of the first instruction
172 |     instruc_start = 0
173 | 
174 |     # icode of the last instruction + 1
175 |     instruc_end = len(instruc) + 1
176 | 
177 |     #
178 |     # Number of digits in floating numbers after the decimal point.
179 |     # If an element of this array equals 0, then the corresponding
180 |     # floating point data is not used for the processor.
181 |     # This array is used to align numbers in the output.
182 |     #      real_width[0] - number of digits for short floats (only PDP-11 has them)
183 |     #      real_width[1] - number of digits for "float"
184 |     #      real_width[2] - number of digits for "double"
185 |     #      real_width[3] - number of digits for "long double"
186 |     # Example: IBM PC module has { 0,7,15,19 }
187 |     #
188 |     # (optional)
189 |     real_width = (0, 7, 15, 0)
190 | 
191 |     # icode (or instruction number) of return instruction. It is ok to give any of possible return
192 |     # instructions
193 |     icode_return = Ire
194 | 
195 |     # only one assembler is supported
196 |     assembler = {
197 |         'flag' : ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR,
198 |         'name': "My processor module bytecode assembler",
199 |         'origin': "org",
200 |         'end': "end",
201 |         'cmnt': ";",
202 |         'ascsep': "\"",
203 |         'accsep': "'",
204 |         'esccodes': "\"'",
205 |         'a_ascii': "db",
206 |         'a_byte': "db",
207 |         'a_word': "dw",
208 |         'a_dword': "dd",
209 |         'a_qword': "dq",
210 |         'a_oword': "xmmword",
211 |         'a_yword': "ymmword",
212 |         'a_float': "dd",
213 |         'a_double': "dq",
214 |         'a_tbyte': "",
215 |         'a_packreal': "",
216 |         'a_dups': "#d dup(#v)",
217 |         'a_bss': "%s dup ?",
218 |         'a_equ': ".equ",
219 |         'a_seg': "seg",
220 |         'a_curip': "$",
221 |         'a_public': "public",
222 |         'a_weak': "weak",
223 |         'a_extrn': "extrn",
224 |         'a_comdef': "",
225 |         'a_align': "align",
226 |         'lbrace': "(",
227 |         'rbrace': ")",
228 |         'a_mod': "%",
229 |         'a_band': "&",
230 |         'a_bor': "|",
231 |         'a_xor': "^",
232 |         'a_bnot': "~",
233 |         'a_shl': "<<",
234 |         'a_shr': ">>",
235 |         'a_sizeof_fmt': "size %s",
236 |         'flag2': 0,
237 |         'cmnt2': "",
238 |         'low8': "",
239 |         'high8': "",
240 |         'low16': "",
241 |         'high16': "",
242 |         'a_include_fmt': "include %s",
243 |         'a_vstruc_fmt': "",
244 |         'a_3byte': "dt",
245 |         'a_rva': "rva"
246 |     } # Assembler
247 | 
248 |     def notify_is_sane_insn(self, no_crefs):
249 |         """
250 |         is the instruction sane for the current file type?
251 |         args: no_crefs
252 |         1: the instruction has no code refs to it.
253 |            ida just tries to convert unexplored bytes
254 |            to an instruction (but there is no other
255 |            reason to convert them into an instruction)
256 |         0: the instruction is created because
257 |            of some coderef, user request or another
258 |            weighty reason.
259 |         The instruction is in 'cmd'
260 |         returns: 1-ok, <=0-no, the instruction isn't
261 |         likely to appear in the program
262 |         """
263 |         if get_32bit(self.cmd.ea) == 0:
264 |             # All zeros is an invalid instruction.
265 |             return 0
266 |         return 1
267 | 
268 |     def notify_get_autocmt(self):
269 |         """
270 |         Get instruction comment. 'cmd' describes the instruction in question
271 |         @return: None or the comment string
272 |         """
273 |         if 'cmt' in self.instruc[self.cmd.itype]:
274 |             return self.instruc[self.cmd.itype]['cmt']
275 | 
276 |     # Instructions that jump and then come back.
277 |     def is_call(self):
278 |         return self.cmd.itype in [ Ic, Icaa, Icar, Icr ]
279 | 
280 |     # Instructions that jump somewhere and don't come back.
281 |     def is_jump(self):
282 |         return self.cmd.itype in [ Ib, Ibr, Ibra, Ibrr ]
283 | 
284 |     def is_load_store(self):
285 |         return self.cmd.itype in [Ilds, Ildt, Ildw, Ists, Istt, Istw]
286 | 
287 |     def is_conditional_branch(self):
288 |         return self.cmd.itype in [Ib, Ibr, Ic, Icr]
289 | 
290 |     visited_addrs = set()
291 |     # Attempt to detect whether addr is a string.
292 |     def emu_offset(self, addr):
293 |         if addr in self.visited_addrs:
294 |             return
295 |         self.visited_addrs.add(addr)
296 | 
297 |         start = addr
298 |         end = None
299 |         while end is None:
300 |             try:
301 |                 size = min((SegEnd(addr) - addr) * 2, 32)
302 |                 data = GetManyBytes(addr, size)
303 |             except:
304 |                 return
305 |             if data is None:
306 |                 return
307 |             for i in xrange(0, len(data), 2):
308 |                 if data[i+1] != '\0':
309 |                     return
310 |                 c = data[i]
311 |                 if c == '\0':
312 |                     end = addr + i / 2 + 1
313 |                     break
314 |                 if c not in string.printable:
315 |                     return
316 |             addr += len(data) / 2
317 |         # heuristic: assume 2 printable chars followed by a null byte is
318 |         # a string
319 |         if end - start > 2:
320 |             make_ascii_string(start, end - start, ASCSTR_UNICODE)
321 | 
322 |     # Add cross-references for an operand.
323 |     def emu_operand(self, op):
324 |         itype = self.cmd.itype
325 |         optype = op.type
326 | 
327 |         def is_addr(addr):
328 |             if addr < 0x1000:
329 |                 return False
330 |             return SegStart(addr) != BADADDR
331 | 
332 |         if optype == o_imm:
333 |             if self.is_call():
334 |                 ua_add_cref(0, op.value, fl_CN)
335 |                 op_offset(self.cmd.ea, op.n, REF_OFF32, op.value)
336 |             elif self.is_jump():
337 |                 ua_add_cref(0, op.value, fl_JN)
338 |                 op_offset(self.cmd.ea, op.n, REF_OFF32, op.value)
339 |             elif is_addr(op.value):
340 |                 # heuristic: treat values > 0x1000 as offsets.
341 |                 self.emu_offset(op.value)
342 |                 op_offset(self.cmd.ea, op.n, REF_OFF32, op.value)
343 |                 ua_dodata2(self.cmd.ea, op.value, op.dtyp)
344 |                 ua_add_dref(self.cmd.ea, op.value, dr_O)
345 | 
346 | 
347 |     def add_stkpnt(self, pfn, v):
348 |         if pfn:
349 |             end = self.cmd.ea + self.cmd.size
350 |             if not is_fixed_spd(end):
351 |                 add_auto_stkpnt2(pfn, end, v)
352 | 
353 |     def trace_sp(self):
354 |         pfn = get_func(self.cmd.ea)
355 |         if not pfn:
356 |             return
357 | 
358 |         spd = get_spd(pfn, self.cmd.ea)
359 | 
360 |         if self.cmd.itype == Ior and self.cmd.Op1.reg == REG_FP and self.cmd.Op2.reg == self.cmd.Op3.reg == REG_ST:
361 |             add_frame(pfn, abs(spd), 0, 0)
362 |             return
363 | 
364 |         offset = 0
365 | 
366 |         addis = [Iadci, Iadi]
367 |         subis = [Isbci, Isbi]
368 |         if self.cmd.itype in (addis + subis) and self.cmd.Op1.reg == REG_ST:
369 |             offset = to_signed(self.cmd.Op3.value)
370 |             if self.cmd.itype in subis:
371 |                 offset = -offset
372 |         elif self.is_load_store():
373 |             adj_rb = get_adj_rb(self.cmd.insnpref)
374 | 
375 |             scale = 1
376 |             if self.cmd.itype in [Ildw, Istw]:
377 |                 scale = 2
378 |             elif self.cmd.itype in [Ildt, Istt]:
379 |                 scale = 3
380 | 
381 |             regcount = self.cmd.Op3.value
382 | 
383 |             if self.cmd.Op2.reg == REG_ST:
384 |                 # push or pop
385 |                 if adj_rb == ADJ_RB_NO_ADJUST:
386 |                     return
387 |                 offset = regcount * scale
388 |                 if adj_rb == ADJ_RB_DECREMENT:
389 |                     offset = -offset
390 |             elif self.cmd.Op1.reg == REG_FP and regcount > (REG_ST - REG_FP):
391 |                 # mov sp, [bp]
392 |                 offset = -(spd - pfn.frregs)
393 | 
394 |         if offset != 0:
395 |             self.add_stkpnt(pfn, offset)
396 | 
397 |     def emu(self):
398 |         """
399 |         Emulate instruction, create cross-references, plan to analyze
400 |         subsequent instructions, modify flags etc. Upon entrance to this function
401 |         all information about the instruction is in 'cmd' structure.
402 |         If zero is returned, the kernel will delete the instruction.
403 |         """
404 |         Feature = self.cmd.get_canon_feature()
405 | 
406 |         if self.cmd.Op1.type != o_void:
407 |             self.emu_operand(self.cmd.Op1)
408 |         if self.cmd.Op2.type != o_void:
409 |             self.emu_operand(self.cmd.Op2)
410 |         if self.cmd.Op3.type != o_void:
411 |             self.emu_operand(self.cmd.Op3)
412 |         if self.cmd.Op4.type != o_void:
413 |             self.emu_operand(self.cmd.Op4)
414 |         if self.cmd.Op5.type != o_void:
415 |             self.emu_operand(self.cmd.Op5)
416 | 
417 |         itype = self.cmd.itype
418 |         uncond_jmp = itype in [Ibra, Ibrr] or (itype in [Ib, Ibr] and get_cc(self.cmd.insnpref) == 0xf)
419 | 
420 |         flow = (Feature & CF_STOP == 0) and not uncond_jmp
421 |         if flow:
422 |             ua_add_cref(0, self.cmd.ea + self.cmd.size, fl_F)
423 | 
424 |         if may_trace_sp():
425 |             if flow:
426 |                 self.trace_sp() # trace modification of SP register
427 |             else:
428 |                 recalc_spd(self.cmd.ea) # recalculate SP register for the next insn
429 | 
430 |         ''' # stack doesn't work :-(
431 |         if may_create_stkvars() and self.is_load_store() and self.cmd.Op2.reg == REG_FP:
432 |             pfn = get_func(self.cmd.ea)
433 |             op = self.cmd.Op4
434 |             if pfn and ua_stkvar2(op, op.value, STKVAR_VALID_SIZE):
435 |                 op_stkvar(self.cmd.ea, op.n)
436 |         '''
437 | 
438 |         return 1
439 | 
440 |     def outop(self, op):
441 |         """
442 |         Generate text representation of an instructon operand.
443 |         This function shouldn't change the database, flags or anything else.
444 |         All these actions should be performed only by u_emu() function.
445 |         The output text is placed in the output buffer initialized with init_output_buffer()
446 |         This function uses out_...() functions from ua.hpp to generate the operand text
447 |         Returns: 1-ok, 0-operand is hidden.
448 |         """
449 |         optype = op.type
450 | 
451 |         # We only have two types of operands: registers and immediates.
452 |         if optype == o_reg:
453 |             out_register(self.regNames[op.reg])
454 |         elif optype == o_imm:
455 |             out_symbol('#')
456 |             OutValue(op, OOFW_IMM | OOF_SIGNED)
457 |         else:
458 |             return False
459 |         return True
460 | 
461 |     def out(self):
462 |         """
463 |         Generate text representation of an instruction in 'cmd' structure.
464 |         This function shouldn't change the database, flags or anything else.
465 |         All these actions should be performed only by u_emu() function.
466 |         Returns: nothing
467 |         """
468 |         # Init output buffer
469 |         buf = idaapi.init_output_buffer(1024)
470 |         suffix = ''
471 | 
472 |         adj_rb = get_adj_rb(self.cmd.insnpref)
473 |         if adj_rb > 0:
474 |             suffix += ['', 'i', 'd'][adj_rb]
475 | 
476 |         if self.is_conditional_branch():
477 |             suffix += CC_NAMES[get_cc(self.cmd.insnpref)]
478 | 
479 |         if self.cmd.insnpref & UF != 0:
480 |             suffix += '.'
481 | 
482 |         OutMnem(8, suffix)
483 | 
484 |         # pretty-print [rA + Offset, RegCount]
485 |         if self.is_load_store():
486 |             regcount = self.cmd.Op3.value
487 |             offset = self.cmd.Op4.value
488 | 
489 |             if regcount > 1:
490 |                 start_reg = self.cmd.Op1.reg
491 |                 regs = [(start_reg + i) % len(GREGS) for i in xrange(regcount)]
492 |                 out_symbol('{')
493 |                 for i, reg in enumerate(regs):
494 |                     out_register(GREGS[reg])
495 |                     if i != len(regs) - 1:
496 |                         out_symbol(',')
497 |                 out_symbol('}')
498 |             else:
499 |                 out_one_operand(0)
500 | 
501 |             out_symbol(',')
502 |             OutChar(' ')
503 |             out_symbol('[')
504 |             out_one_operand(1) # register
505 |             if offset != 0:
506 |                 out_symbol('+')
507 |                 out_one_operand(3) # offset
508 |             out_symbol(']')
509 | 
510 |         elif self.cmd.itype == Ismp:
511 |             out_one_operand(0)
512 |             out_symbol(',')
513 |             OutChar(' ')
514 |             out_one_operand(1)
515 |             out_symbol(',')
516 |             OutChar(' ')
517 |             s = ['MEM_NO_ACCESS', 'MEM_RO', 'MEM_RW', 'MEM_RE']
518 |             for c in s[self.cmd.Op3.value]:
519 |                 OutChar(c)
520 | 
521 |         else:
522 |             # output first operand
523 |             # kernel will call outop()
524 |             if self.cmd.Op1.type != o_void:
525 |                 out_one_operand(0)
526 | 
527 |             # output the rest of operands separated by commas
528 |             for i in xrange(1, 5):
529 |                 if self.cmd[i].type == o_void:
530 |                     break
531 |                 out_symbol(',')
532 |                 OutChar(' ')
533 |                 out_one_operand(i)
534 | 
535 |         term_output_buffer()
536 |         cvar.gl_comm = 1 # generate comment at the next call to MakeLine()
537 |         MakeLine(buf)
538 | 
539 |     def fill_op_reg(self, op, r):
540 |         op.type = o_reg
541 |         op.dtyp = dt_dword
542 |         op.reg = r
543 | 
544 |     def fill_op_imm(self, op, imm, inst=None):
545 |         op.type = o_imm
546 |         op.dtyp = dt_qword
547 | 
548 |         if imm > 0:
549 |             imm = to_signed(imm)
550 | 
551 |         if self.is_imm_relative(inst):
552 |             op.value = imm + inst.pc
553 |         else:
554 |             op.value = imm
555 | 
556 |     # Simplify some instructions for brevity. Returns True if the
557 |     # instruction was rewritten (in which case this function is
558 |     # responsible for filling populating the necessary itype, operands,
559 |     # and instruction size).
560 |     def simplify(self, inst):
561 |         itype = inst.id
562 | 
563 |         if itype == Ior and inst.rB == inst.rC:
564 |             self.cmd.itype = PSEUDO_MOV
565 |             self.fill_op_reg(self.cmd.Op1, inst.rA)
566 |             self.fill_op_reg(self.cmd.Op2, inst.rB)
567 |             self.cmd.size = inst.size
568 |             return True
569 | 
570 |         elif itype == Iml:
571 |             next_addr = self.cmd.ea + inst.size
572 | 	    next_inst = pyclemency.disassemble(next_addr, lookahead_instruction(next_addr))
573 | 	    if next_inst.insn == Iinvalid or next_inst.id != Imh or inst.rA != next_inst.rA:
574 | 		return False
575 |             lo = inst.imm
576 |             hi = next_inst.imm
577 |             value = (hi << 10) | lo
578 |             self.cmd.itype = PSEUDO_MOVI
579 |             self.fill_op_reg(self.cmd.Op1, inst.rA)
580 |             self.fill_op_imm(self.cmd.Op2, value)
581 |             self.cmd.size = inst.size + next_inst.size
582 |             return True
583 | 
584 |         return False
585 | 
586 |     def is_imm_relative(self, inst):
587 |         if inst is None:
588 |             return False
589 |         return inst.id in [Ib, Ic, Ibrr, Icar]
590 | 
591 |     def ana(self):
592 |         """
593 |         Decodes an instruction into self.cmd.
594 |         Returns: self.cmd.size (=the size of the decoded instruction) or zero
595 |         """
596 |         inst = pyclemency.disassemble(self.cmd.ea, lookahead_instruction(self.cmd.ea))
597 |         if inst.insn == Iinvalid:
598 |             # Ignore invalid instructions
599 |             return 0
600 | 
601 |         self.cmd.size = inst.size
602 |         self.cmd.itype = inst.id
603 |         self.cmd.insnpref = 0
604 | 
605 |         if inst.used_uf and inst.uf:
606 |             self.cmd.insnpref |= UF
607 |         if inst.used_adj_rb and inst.adj_rb:
608 |             self.cmd.insnpref |= inst.adj_rb << ADJ_RB_SHIFT
609 |         if inst.used_cc:
610 |             self.cmd.insnpref |= inst.cc << CC_SHIFT
611 | 
612 |         if self.simplify(inst):
613 |             return self.cmd.size
614 | 
615 |         op = 0
616 |         ops = (self.cmd.Op1, self.cmd.Op2, self.cmd.Op3, self.cmd.Op4, self.cmd.Op5)
617 |         if inst.used_rA:
618 |             self.fill_op_reg(ops[op], inst.rA)
619 |             op += 1
620 |         if inst.used_rB:
621 |             self.fill_op_reg(ops[op], inst.rB)
622 |             op += 1
623 |         if inst.used_rC:
624 |             self.fill_op_reg(ops[op], inst.rC)
625 |             op += 1
626 |         if inst.used_reg_count:
627 |             self.fill_op_imm(ops[op], inst.reg_count)
628 |             op += 1
629 |         if inst.used_imm:
630 |             self.fill_op_imm(ops[op], inst.imm, inst)
631 |             op += 1
632 |         if inst.used_mem_flags:
633 |             self.fill_op_imm(ops[op], inst.mem_flags)
634 |             op += 1
635 | 
636 |         # Return decoded instruction size or zero
637 |         return self.cmd.size
638 | 
639 | hooks_idp = None
640 | def PROCESSOR_ENTRY():
641 |     global hooks_idp
642 |     hooks_idp = clemency_hooks_t()
643 |     hooks_idp.hook()
644 |     return clemency_processor_t()
645 | 


--------------------------------------------------------------------------------