├── .cvsignore ├── README.md ├── __init__.py ├── coff.py ├── coffConst.py ├── coffSymbolEntries.py ├── doc ├── AnatomyOfHelloWorldDebug.txt ├── AnatomyOfHelloWorldRelease.txt ├── bugs-features.txt ├── processText.py ├── usersGuide.txt └── x86asm.txt ├── examples ├── __init__.py └── helloWorld.py ├── excmem ├── buildexcmem ├── excmem.c ├── excmem.sln └── excmem.vcproj ├── license.txt ├── logMonitor.py ├── loggers.py ├── makeStructs.py ├── makedist ├── README ├── makedist.bat └── setup.py ├── pythonConstants.py ├── structs ├── structs.c ├── structs.sln └── structs.vcproj ├── test ├── .cvsignore ├── __init__.py ├── disasmHelloWorld.py ├── linkCmd.py ├── output │ └── readme.txt ├── rawHelloWorld.py ├── test_bugs.py ├── test_directives.py ├── test_linker.py ├── test_object_creation.py ├── test_python_funcs.py ├── test_structs.py ├── test_time.py ├── test_variables.py ├── test_winmem.py ├── test_x86asm.py ├── test_x86inst.py ├── test_x86tokenizer.py └── testall.py ├── x86PackUnpack.py ├── x86asm.py ├── x86cpToCoff.py ├── x86cpToMemory.py ├── x86disasm.py ├── x86inst.py └── x86tokenizer.py /.cvsignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.exp 3 | *.lib 4 | *.pyd -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Pyasm 2 | ===== 3 | 4 | Pyasm is a full-featured dynamic assembler written entirely in 5 | Python. By dynamic, I mean that it can be used to generate and execute 6 | machine code in python at runtime without requiring the generation of 7 | object files and linkage. It essentially allow 'inline' assembly in 8 | python modules on x86 platforms. 9 | 10 | Pyasm can also generate object files (for windows) like a traditional 11 | standalone assembler, although you're probably better off using one of 12 | the many freely available assemblers if this is you primary goal. 13 | 14 | Pyasm was written as an experimental proof-of-concept, and although it 15 | works, many x86 Opcodes remain to be implemented. 16 | 17 | For more information, read the Users Guide, available under 18 | doc/usersGuide.txt in reStructuredText format or [online in 19 | html](http://www.grant-olson.net/python/pyasm). -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from x86asm import codePackageFromFile 2 | from x86cpToMemory import CpToMemory 3 | from pythonConstants import PythonConstants 4 | import cStringIO 5 | import excmem 6 | 7 | def pyasm(scope,s): 8 | cp = codePackageFromFile(cStringIO.StringIO(s),PythonConstants) 9 | mem = CpToMemory(cp) 10 | mem.MakeMemory() 11 | mem.BindPythonFunctions(scope) 12 | 13 | 14 | -------------------------------------------------------------------------------- /coff.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | coff.py 6 | ------- 7 | 8 | Provides the framework to convert raw machine code to/from coff file formats. 9 | 10 | So far I can build a standard coff file but still need an external linker to 11 | make an exe. 12 | """ 13 | 14 | import logging, sys 15 | from coffConst import * 16 | from x86PackUnpack import * 17 | from coffSymbolEntries import attemptNameLookup, coffSymbolEntry 18 | 19 | class coffError(Exception):pass 20 | 21 | def charsAsBinDump(chars, address = 0): 22 | head,tail = chars[:16], chars[16:] 23 | while head: 24 | print "%08X: " % address , 25 | address += 0x10 26 | for char in head: 27 | print "%02X" % ord(char) , 28 | if len(head) < 16: 29 | for i in range(16-len(head)): 30 | print " " , 31 | print " ", 32 | for char in head: 33 | if ord(char)<=32: 34 | sys.stdout.write(".") 35 | else: 36 | sys.stdout.write(char) 37 | print 38 | head,tail = tail[:16], tail[16:] 39 | 40 | 41 | 42 | class coffSymbolList(list): 43 | def __init__(self): 44 | list.__init__(self) 45 | self.currentLocation = 0 46 | 47 | def append(self,item): 48 | item.Location = self.currentLocation 49 | self.currentLocation += item.Rows() 50 | list.append(self,item) 51 | 52 | def DumpInfo(self): 53 | if self: 54 | print "Symbol Entry Table" 55 | print "==================" 56 | print "%20s\t%10s\t%10s\t%10s\t%10s\t" % ("Name", 57 | 'Value','SectionNumber','Type','StorageClass') 58 | for x in self: 59 | x.DumpInfo() 60 | 61 | def InitFromFile(self, f, count): 62 | x = 0 63 | while x < count: 64 | symbol = coffSymbolEntry() 65 | symbol.InitFromFile(f) 66 | x += 1 + symbol.NumberAuxiliary #account for aux stubs 67 | self.append(symbol) 68 | 69 | def WriteToFile(self, f): 70 | for sym in self: 71 | sym.WriteToFile(f) 72 | 73 | def SetLocations(self): 74 | """ 75 | Do we still need this? 76 | I'm getting this on the append operation. 77 | """ 78 | start = 0 79 | for sym in self: 80 | sym.Location = start 81 | start += sym.Rows() 82 | 83 | def GetLocation(self,symbolName): 84 | for sym in self: 85 | if sym.Fullname == symbolName: 86 | return sym.Location 87 | raise coffError("Couldn't find symbol '%s'" % symbolName) 88 | 89 | def GetSymbol(self,symbolName): 90 | for sym in self: 91 | if sym.Fullname == symbolName: 92 | return sym 93 | raise coffError("Couldn't find symbol '%s'" % symbolName) 94 | 95 | class coffLineNumberEntry: 96 | def __init__(self,sym=0x0,num=0x0): 97 | self.Symbol = sym 98 | self.Number = num 99 | 100 | def InitFromFile(self,f): 101 | self.Symbol = ulongFromFile(f) 102 | self.Number = ushortFromFile(f) 103 | 104 | def WriteToFile(self,f): 105 | ulongToFile(f, self.Symbol) 106 | ushortToFile(f,self.Number) 107 | 108 | def DumpInfo(self): 109 | print "%s\t%s" % (self.Symbol, self.Number) 110 | 111 | def Sizeof(self): 112 | return 6 113 | 114 | class coffLineNumberList(list): 115 | def DumpInfo(self): 116 | if self: 117 | print "LINE NUMBERS" 118 | print "============" 119 | print "Symbol\tLine Number" 120 | for x in self: 121 | x.DumpInfo() 122 | 123 | class coffRelocationEntry: 124 | def __init__(self,addr=0x0,sym=0x0,typ=0x0): 125 | self.Address = addr 126 | self.Symbol = sym 127 | self.Type = typ 128 | 129 | def InitFromFile(self,f): 130 | self.Address = ulongFromFile(f) 131 | self.Symbol = ulongFromFile(f) 132 | self.Type = ushortFromFile(f) 133 | 134 | def WriteToFile(self,f): 135 | ulongToFile(f,self.Address) 136 | ulongToFile(f, self.Symbol) 137 | ushortToFile(f, self.Type) 138 | 139 | def Sizeof(self): 140 | return 10 141 | 142 | def DumpInfo(self): 143 | print "%02X\t%02X\t%10s" % (self.Address, self.Symbol, 144 | attemptNameLookup(RelocationTypes.NAME, self.Type)) 145 | 146 | class coffRelocationList(list): 147 | def DumpInfo(self): 148 | if self: 149 | print "Relocation Data" 150 | print "===============" 151 | print "%08s\t%04s\t%10s" % ("Address","Symbol","Type") 152 | for x in self: 153 | x.DumpInfo() 154 | 155 | class coffSection: 156 | def __init__(self): 157 | self.Name = "" 158 | self.PhysicalAddress = 0x0 159 | self.VirtualAddress = 0x0 160 | self.RawDataSize = 0x0 161 | self.RawDataLoc = 0x0 162 | self.RelocationLoc = 0x0 163 | self.LineNumberLoc = 0x0 164 | self.RelocationCount = 0x0 165 | self.LineNumberCount = 0x0 166 | self.Flags = 0x0 167 | self.RawData = "" 168 | self.LineNumberData = coffLineNumberList() 169 | self.RelocationData = coffRelocationList() 170 | 171 | def InitFromFilePass1(self, f): 172 | self.Name = stringFromFile(8,f) 173 | self.PhysicalAddress = ulongFromFile(f) 174 | self.VirtualAddress = ulongFromFile(f) 175 | self.RawDataSize = ulongFromFile(f) 176 | self.RawDataLoc = ulongFromFile(f) 177 | self.RelocationLoc = ulongFromFile(f) 178 | self.LineNumberLoc = ulongFromFile(f) 179 | self.RelocationCount = ushortFromFile(f) 180 | self.LineNumberCount = ushortFromFile(f) 181 | self.Flags = longFromFile(f) 182 | 183 | def WriteToFilePass1(self, f): 184 | stringToFile(f,8,self.Name) 185 | ulongToFile(f,self.PhysicalAddress) 186 | ulongToFile(f,self.VirtualAddress) 187 | ulongToFile(f,self.RawDataSize) 188 | ulongToFile(f,self.RawDataLoc) 189 | ulongToFile(f,self.RelocationLoc) 190 | ulongToFile(f,self.LineNumberLoc) 191 | ushortToFile(f,self.RelocationCount) 192 | ushortToFile(f,self.LineNumberCount) 193 | longToFile(f,self.Flags) 194 | 195 | def InitFromFilePass2(self, f): 196 | if self.RawDataSize: 197 | assert self.RawDataLoc == f.tell(), "Out of Sync" 198 | self.RawData = stringFromFile(self.RawDataSize, f) 199 | if self.RelocationCount: 200 | assert self.RelocationLoc == f.tell(), "Out of Sync" 201 | for i in range(self.RelocationCount): 202 | relEnt = coffRelocationEntry() 203 | relEnt.InitFromFile(f) 204 | self.RelocationData.append(relEnt) 205 | if self.LineNumberCount: 206 | assert self.LineNumberLoc == f.tell(), "Out of Sync" 207 | for i in range(self.LineNumberCount): 208 | ln = coffLineNumberEntry() 209 | ln.InitFromFile(f) 210 | self.LineNumberData.append(ln) 211 | 212 | def WriteToFilePass2(self,f): 213 | if self.RawDataSize: 214 | stringToFile(f, self.RawDataSize,self.RawData) 215 | if self.RelocationCount: 216 | for rec in self.RelocationData: 217 | rec.WriteToFile(f) 218 | if self.LineNumberCount: 219 | for ln in self.LineNumberData: 220 | ln.WriteToFile(f) 221 | 222 | def Sizeof(self): 223 | "excluding data" 224 | return 40 225 | 226 | def SetSizes(self): 227 | self.RawDataSize = len(self.RawData) 228 | self.RelocationCount = len(self.RelocationData) 229 | self.LineNumberCount = len(self.LineNumberData) 230 | 231 | def SetOffsets(self, currentOffset): 232 | self.RawDataLoc = currentOffset 233 | currentOffset += self.RawDataSize 234 | if self.RelocationCount: 235 | self.RelocationLoc = currentOffset 236 | currentOffset += self.RelocationCount * 10 237 | else: 238 | self.RelocationLoc = 0x0 239 | 240 | if self.LineNumberCount: 241 | self.LineNumberLoc = currentOffset 242 | currentOffset += self.LineNumberCount * 6 243 | else: 244 | self.LineNumberLoc = 0x0 245 | return currentOffset 246 | 247 | def DumpInfo(self, showData=True): 248 | print 249 | print "SECTION" 250 | print "=======" 251 | print 252 | print "Name %s\n" % self.Name 253 | print "PhysicalAddress %s" % self.PhysicalAddress 254 | print "VirtualAddress %s" % self.VirtualAddress 255 | print "RawDataSize %s" % self.RawDataSize 256 | print "RawDataLoc %s" % self.RawDataLoc 257 | print "RelocationTable %s" % self.RelocationLoc 258 | print "LineNumberTable %s" % self.LineNumberLoc 259 | print "RelocationCount %s" % self.RelocationCount 260 | print "LineNumberCount %s" % self.LineNumberCount 261 | print "Flags %s" % self.Flags 262 | for key in SectionFlags.NAME.keys(): 263 | if self.Flags & key: 264 | print "\t%s" % SectionFlags.NAME[key] 265 | align = self.Flags & SectionFlags.ALIGN_MASK 266 | print "\t%s" % SectionFlags.ALIGN_NAME[align] 267 | print 268 | 269 | if showData: 270 | charsAsBinDump(self.RawData) 271 | self.RelocationData.DumpInfo() 272 | self.LineNumberData.DumpInfo() 273 | print 274 | 275 | class coffFile: 276 | I386MAGIC = 0x14c 277 | 278 | NO_RELOC = 0x1 279 | EXECUTABLE = 0x2 280 | NO_LINENO = 0x4 281 | NO_SYMBOLS = 0x8 282 | LITTLEENDIAN = 0x100 283 | NO_DEBUG = 0x200 284 | SYSTEM = 0x1000 285 | DLL = 0x2000 286 | 287 | FLAG_NAMES = {NO_RELOC: 'NO_RELOC' , 288 | EXECUTABLE: 'EXECUTABLE', 289 | NO_LINENO: 'NO_LINENO', 290 | NO_SYMBOLS: 'NO_SYMBOLS', 291 | LITTLEENDIAN: 'LITTLENDIAN', 292 | NO_DEBUG: 'NO DEBUG', 293 | SYSTEM: 'SYSTEM', 294 | DLL: 'DLL'} 295 | 296 | def __init__(self): 297 | self.MachineType = 0x0 298 | self.NumberOfSections = 0x0 299 | self.Timestamp = 0x0 300 | self.SymbolTableLoc = 0x0 301 | self.SymbolCount = 0x0 302 | self.Symbols = coffSymbolList() 303 | self.OptionalHeaderSize = 0x0 304 | self.Characteristics = 0x0 305 | self.Sections = [] 306 | self.StringTableSize = 0 307 | self.StringTable = "" 308 | 309 | def InitFromFile(self, f): 310 | self.MachineType = ushortFromFile(f) 311 | self.NumberOfSections = ushortFromFile(f) 312 | self.Timestamp = ulongFromFile(f) 313 | self.SymbolTableLoc = ulongFromFile(f) 314 | self.SymbolCount = ulongFromFile(f) 315 | self.OptionalHeaderSize = ushortFromFile(f) 316 | self.Characteristics = ushortFromFile(f) 317 | for i in range(self.NumberOfSections): 318 | sec = coffSection() 319 | sec.InitFromFilePass1(f) 320 | self.Sections.append(sec) 321 | for sec in self.Sections: 322 | sec.InitFromFilePass2(f) 323 | assert self.SymbolTableLoc == f.tell(), "Out of sync" 324 | self.Symbols.InitFromFile(f,self.SymbolCount) 325 | self.StringTableSize = ulongFromFile(f) - 4 #includes itself in the count 326 | self.StringTable = f.read(self.StringTableSize) 327 | 328 | if f.read(): 329 | raise Exception("Finished processing before end of file") 330 | 331 | def WriteToFile(self, f): 332 | self.SetSizes() 333 | self.SetOffsets() 334 | 335 | ushortToFile(f,self.MachineType) 336 | ushortToFile(f,self.NumberOfSections) 337 | ulongToFile(f,self.Timestamp) 338 | ulongToFile(f,self.SymbolTableLoc) 339 | ulongToFile(f,self.SymbolCount) 340 | ushortToFile(f,self.OptionalHeaderSize) 341 | ushortToFile(f,self.Characteristics) 342 | for sec in self.Sections: 343 | sec.WriteToFilePass1(f) 344 | for sec in self.Sections: 345 | sec.WriteToFilePass2(f) 346 | self.Symbols.WriteToFile(f) 347 | 348 | ulongToFile(f, len(self.StringTable)+4) 349 | stringToFile(f, len(self.StringTable), self.StringTable) 350 | 351 | def Sizeof(self): 352 | "header only" 353 | return 20 354 | 355 | def SetSizes(self): 356 | for sec in self.Sections: 357 | sec.SetSizes() 358 | self.NumberOfSections = len(self.Sections) 359 | self.SymbolCount = 0 360 | for sym in self.Symbols: 361 | sym.SetSizes() 362 | self.SymbolCount += sym.Rows() 363 | 364 | def SetOffsets(self): 365 | offset = self.Sizeof() 366 | offset += len(self.Sections) * 40 367 | i = 1 368 | for sec in self.Sections: 369 | tmpOffset = offset 370 | offset = sec.SetOffsets(offset) 371 | assert tmpOffset == sec.RawDataLoc, "section %s data out of sync" % i 372 | i += 1 373 | self.SymbolTableLoc = offset 374 | 375 | def AddSymbol(self,name="",value=0x0,sec=0x0,typ=0x0,cls=0x0,aux=''): 376 | ## fullname = name 377 | ## if len(name) > 8: #add name to symbol table and reference 378 | ## if name[-1] != '\x00': 379 | ## name += '\x00' 380 | ## pos = len(self.StringTable) + 4 381 | ## self.StringTable += name 382 | ## name = '\x00\x00\x00\x00' + ulongToString(pos) 383 | self.AddExistingSymbol(coffSymbolEntry(name,value,sec,typ,cls,aux))#,fullname)) 384 | 385 | def AddExistingSymbol(self, sym): 386 | name = sym.Name 387 | if len(name) > 8: #add name to symbol table and reference 388 | if name[-1] != '\x00': 389 | name += '\x00' 390 | pos = len(self.StringTable) + 4 391 | self.StringTable += name 392 | name = '\x00\x00\x00\x00' + ulongToString(pos) 393 | sym.Fullname = sym.Name 394 | sym.Name = name 395 | self.Symbols.append(sym) 396 | 397 | def DumpInfo(self): 398 | print "Machine Type: %s" % self.MachineType 399 | print "Number of sections: %s" % self.NumberOfSections 400 | print "DateTime %s" % self.Timestamp 401 | print "Pointer to symbol table %s" % self.SymbolTableLoc 402 | print "Number of symbols %s" % self.SymbolCount 403 | print "Optional Header size %s" % self.OptionalHeaderSize 404 | print "Characteristics: %s" % self.Characteristics 405 | if self.Characteristics & coffFile.NO_RELOC:print "\tNO RELCATION INTO" 406 | if self.Characteristics & coffFile.EXECUTABLE: print"\tEXECUTABLE" 407 | if self.Characteristics & coffFile.NO_LINENO: print "\tNO LINE NOs" 408 | if self.Characteristics & coffFile.NO_SYMBOLS: print "\tNO SYMBOLS" 409 | if self.Characteristics & coffFile.LITTLEENDIAN: print "\tLITTLEENDIAN" 410 | 411 | for sec in self.Sections: 412 | sec.DumpInfo() 413 | self.Symbols.DumpInfo() 414 | print "String Table %s" % repr(self.StringTable) 415 | 416 | if __name__ == "__main__": 417 | f = file("C:/objtest/objtest/Release/objtest.obj","rb") 418 | 419 | coff = coffFile() 420 | coff.InitFromFile(f) 421 | f.close() 422 | 423 | coff.DumpInfo() 424 | 425 | f2 = file("C:/objtest/objtest/Release/objtest.obj2","wb") 426 | coff.WriteToFile(f2) 427 | f2.close() 428 | -------------------------------------------------------------------------------- /coffConst.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Windows specific constants for coff files grabbed from from winnt.h 6 | appox. line # 6064 7 | """ 8 | 9 | class SymbolValues: 10 | SYM_UNDEFINED = 0 11 | SYM_ABSOLUTE = -1 12 | SYM_DEBUG = -2 13 | SYM_SECTION_MAX = 0xFEFF 14 | 15 | NAME = {SYM_UNDEFINED:"SYM_UNDEFINED", 16 | SYM_ABSOLUTE:"SYM_ABSOLUTE", 17 | SYM_DEBUG:"SYM_DEBUG", 18 | SYM_SECTION_MAX:"SYM_SECTION_MAX"} 19 | 20 | class SymbolTypes: 21 | NULL = 0x0 22 | VOID = 0x1 23 | CHAR = 0x2 24 | SHORT = 0x3 25 | INT = 0x4 26 | LONG = 0x5 27 | FLOAT = 0x6 28 | DOUBLE = 0x7 29 | STRUCT = 0x8 30 | UNION = 0x9 31 | ENUM = 0xA 32 | MOE = 0xB # Member of enum 33 | BYTE = 0xC 34 | WORD = 0xD 35 | UINT = 0xE 36 | DWORD = 0xF 37 | PCODE = 0x8000 38 | 39 | NAME = {NULL:"NULL", 40 | VOID:"VOID", 41 | CHAR:"CHAR", 42 | SHORT:"SHORT", 43 | INT:"INT", 44 | LONG:"LONG", 45 | FLOAT:"FLOAT", 46 | DOUBLE:"DOUBLE", 47 | STRUCT:"STRUCT", 48 | UNION:"UNION", 49 | ENUM:"ENUM", 50 | MOE:"MOE", 51 | BYTE:"BYTE", 52 | WORD:"WORD", 53 | UINT:"UINT", 54 | DWORD:"DWORD", 55 | PCODE:"PCODE"} 56 | 57 | class SymbolDerivedType: 58 | NULL = 0x0 59 | POINTER = 0x1 60 | FUNCTION = 0x2 61 | ARRAY = 0x3 62 | 63 | NAME = {NULL:"NULL", 64 | POINTER:"POINTER", 65 | FUNCTION:"FUNCTION", 66 | ARRAY:"ARRAY"} 67 | 68 | class SymbolClass: 69 | END_OF_FUNCTION = -1 70 | NULL = 0x0 71 | AUTOMATIC = 0x1 72 | EXTERNAL = 0x2 73 | STATIC = 0x3 74 | REGISTER = 0x4 75 | EXTERNAL_DEF = 0x5 76 | LABEL = 0x6 77 | UNDEFINED_LABEL = 0x7 78 | MEMBER_OF_STRUCT = 0x8 79 | ARGUMENT = 0x9 80 | STRUCT_TAB = 0xA 81 | MEMBER_OF_UNION = 0xB 82 | UNION_TAG = 0xC 83 | TYPE_DEFINITION = 0xD 84 | UNDEFINED_STATIC = 0xE 85 | ENUM_TAG = 0xF 86 | MEMBER_OF_ENUM = 0x10 87 | REGISTER_PARAM = 0x11 88 | BIT_FIELD = 0x12 89 | 90 | FAR_EXTERNAL =0x44 91 | 92 | CLASS_BLOCK = 0x64 93 | CLASS_FUNCTION = 0x65 94 | CLASS_END_OF_STRUCT = 0x66 95 | CLASS_FILE = 0x67 96 | CLASS_SECTION = 0x68 97 | CLASS_WEAK_EXTERNAL = 0x69 98 | CLASS_CLR_TOKEN = 0x6B 99 | 100 | NAME = {END_OF_FUNCTION:"END_OF_FUNCTION", 101 | NULL:"NULL", 102 | AUTOMATIC:"AUTOMATIC", 103 | EXTERNAL:"EXTERNAL", 104 | STATIC:"STATIC", 105 | REGISTER:"REGISTER", 106 | EXTERNAL_DEF:"EXTERNAL_DEF", 107 | LABEL:"LABEL", 108 | UNDEFINED_LABEL:"UNDEFINED_LABEL", 109 | MEMBER_OF_STRUCT:"MEMBER_OF_STRUCT", 110 | ARGUMENT:"ARGUMENT", 111 | STRUCT_TAB:"STRUCT_TAB", 112 | MEMBER_OF_UNION:"MEMBER_OF_UNION", 113 | UNION_TAG:"UNION_TAG", 114 | TYPE_DEFINITION:"TYPE_DEFINITION", 115 | UNDEFINED_STATIC:"UNDEFINED_STATIC", 116 | ENUM_TAG:"ENUM_TAG", 117 | MEMBER_OF_ENUM:"MEMBER_OF_ENUM", 118 | REGISTER_PARAM:"REGISTER_PARAM", 119 | BIT_FIELD:"BIT_FIELD", 120 | FAR_EXTERNAL:"FAR_EXTERNAL", 121 | CLASS_BLOCK:"CLASS_BLOCK", 122 | CLASS_FUNCTION:"CLASS_FUNCTION", 123 | CLASS_END_OF_STRUCT:"CLASS_END_OF_STRUCT", 124 | CLASS_FILE:"CLASS_FILE", 125 | CLASS_SECTION:"CLASS_SECTION", 126 | CLASS_WEAK_EXTERNAL:"CLASS_WEAK_EXTERNAL", 127 | CLASS_CLR_TOKEN:"CLASS_CLR_TOKEN",} 128 | 129 | class SymbolTypePacking: 130 | BTMASK = 0xF 131 | TMASK = 0x30 132 | TMASK1 = 0xC0 133 | TMASK2 = 0xF0 134 | BTSHFT = 4 135 | TSHIFT = 2 136 | 137 | NAME={BTMASK:'BTMASK', 138 | TMASK:'TMASK', 139 | TMASK1:'TMASK1', 140 | TMASK2:'TMASK2', 141 | BTSHFT:'BTSHFT', 142 | TSHIFT:'TSHIFT'} 143 | 144 | class RelocationTypes: 145 | I386_ABSOLUTE = 0x0 146 | I386_DIR16 = 0x1 147 | I386_REL16 = 0x2 148 | I386_DIR32 = 0x6 149 | I386_DIR32NB = 0x7 150 | I386_SEG12 = 0x9 151 | I386_SECTION = 0xA 152 | I386_SECREL = 0xB 153 | I386_TOKEN = 0xC # CLR TOKEN 154 | I386_SECREL7 = 0xD 155 | I386_REL32 = 0x14 156 | 157 | NAME={I386_ABSOLUTE:'I386_ABSOLUTE', 158 | I386_DIR16:'I386_DIR16', 159 | I386_REL16:'I386_REL16', 160 | I386_DIR32:'I386_DIR32', 161 | I386_DIR32NB:'I386_DIR32NB', 162 | I386_SEG12:'I386_SEG12', 163 | I386_SECTION:'I386_SECTION', 164 | I386_SECREL:'I386_SECREL', 165 | I386_TOKEN:'I386_TOKEN', 166 | I386_SECREL7:'I386_SECREL7', 167 | I386_REL32:'I386_REL32', 168 | } 169 | 170 | 171 | 172 | 173 | class SectionFlags: 174 | TYPE_NO_PAD = 0x8 175 | CNT_CODE = 0x20 176 | CNT_INITIALIZED_DATA = 0x40 177 | CNT_UNINITIALIZED_DATA = 0x80 178 | LNK_OTHER = 0x100 179 | LNK_INFO = 0x200 180 | LNK_REMOVE = 0x800 181 | LNK_COMDAT = 0x1000 182 | NO_DEFER_SPEC_EXC = 0x4000 183 | MEM_FARDATA = 0x8000 184 | MEM_PURGEABLE = 0x20000 185 | MEM_LOCKED = 0x40000 186 | MEM_PRELOAD = 0x80000 187 | 188 | ALIGN_1BYTES = 0x100000 #THESE AREN'T BOOLEAN FLAGS ARE THEY? 189 | ALIGN_2BYTES = 0x200000 190 | ALIGN_4BYTES = 0x300000 191 | ALIGN_8BYTES = 0x400000 192 | ALIGN_16BYTES = 0x500000 193 | ALIGN_32BYTES = 0x600000 194 | ALIGN_64BYTES = 0x700000 195 | ALIGN_128BYTES = 0x800000 196 | ALIGN_256BYTES = 0x900000 197 | ALIGN_512BYTES = 0xA00000 198 | ALIGN_1024BYTES = 0xB00000 199 | ALIGN_2048BYTES = 0xC00000 200 | ALIGN_4096BYTES = 0xD00000 201 | ALIGN_8192BYTES = 0xE00000 202 | ALIGN_MASK = 0xF00000 # END NONBOOL FLAGS? 203 | 204 | LNK_NRELOC_OVFL = 0x1000000 205 | MEM_DISCARDABLE = 0x2000000 206 | NOT_CACHED = 0x4000000 207 | NOT_PAGED = 0x8000000 208 | MEM_SHARED = 0x10000000 209 | MEM_EXECUTE = 0x20000000 210 | MEM_READ = 0x40000000 211 | MEM_WRITE = 0x80000000 212 | 213 | NAME = { 214 | TYPE_NO_PAD:'TYPE_NO_PAD', 215 | CNT_CODE:'CNT_CODE', 216 | CNT_INITIALIZED_DATA:'CNT_INITIALIZED_DATA', 217 | CNT_UNINITIALIZED_DATA:'CNT_UNITIALIZED_DATA', 218 | LNK_OTHER:'LNK_OTHER', 219 | LNK_INFO:'LNK_INFO', 220 | LNK_REMOVE:'LNK_REMOVE', 221 | LNK_COMDAT:'LNK_COMDAT', 222 | NO_DEFER_SPEC_EXC:'NO_DEFER_SPEC_EXC', 223 | MEM_FARDATA:'FARDATA', 224 | MEM_PURGEABLE:'PURGEABLE', 225 | MEM_LOCKED:'LOCKED', 226 | MEM_PRELOAD:'PRELOAD', 227 | 228 | LNK_NRELOC_OVFL:'LNK_NRELOC_OVFL', 229 | MEM_DISCARDABLE:'MEM_DISCARDABLE', 230 | NOT_CACHED:'NOT_CACHED', 231 | NOT_PAGED:'NOT_PAGED', 232 | MEM_SHARED:'MEM_SHARED', 233 | MEM_EXECUTE:'MEM_EXECUTE', 234 | MEM_READ:'MEM_READ', 235 | MEM_WRITE :'MEM_WRITE' 236 | } 237 | 238 | ALIGN_NAME = { 239 | ALIGN_1BYTES:'ALIGN_1BYTES', 240 | ALIGN_2BYTES:'ALIGN_2BYTES', 241 | ALIGN_4BYTES:'ALIGN_4BYTES', 242 | ALIGN_8BYTES:'ALIGN_8BYTES', 243 | ALIGN_16BYTES:'ALIGN_16BYTES', 244 | ALIGN_32BYTES:'ALIGN_32BYTES', 245 | ALIGN_64BYTES:'ALIGN_64BYTES', 246 | ALIGN_128BYTES:'ALIGN_128BYTES', 247 | ALIGN_256BYTES:'ALIGN_256BYTES', 248 | ALIGN_512BYTES:'ALIGN_512BYTES', 249 | ALIGN_1024BYTES:'ALIGN_1024BYTES', 250 | ALIGN_2048BYTES:'ALIGN_2048BYTES', 251 | ALIGN_4096BYTES:'ALIGN_4096BYTES', 252 | ALIGN_8192BYTES:'ALIGN_8192BYTES', 253 | } 254 | 255 | -------------------------------------------------------------------------------- /coffSymbolEntries.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Coff symbol entries. 6 | 7 | There are a bunch of subclasses of coff symbol entries that MS uses. Here is an 8 | attempt to create they as actual subclasses so that we're not doing any 'magic' 9 | with the Auxialary strings. 10 | 11 | Refer to "Microsoft Portable Executable and Common Object File Format 12 | Specification" revision 6.0 for additional details. 13 | """ 14 | from coffConst import * 15 | from x86PackUnpack import * 16 | 17 | 18 | class coffSymbolError(Exception): pass 19 | 20 | def attemptNameLookup(const,id): 21 | """ Doesn't necessarily belong here but this avoids circular imports""" 22 | return const.get(id, "UNDEF??[%0X]" % id) 23 | 24 | class coffSymbolEntry: 25 | def __init__(self,name="",value=0x0,sec=0x0,typ=0x0,cls=0x0,aux='',fullname=None): 26 | self.Name = name 27 | self.Value = value 28 | self.SectionNumber = sec 29 | self.Type = typ 30 | self.StorageClass = cls 31 | self.NumberAuxiliary = 0x0 32 | self.Auxiliaries = aux 33 | self.Location = 0 34 | if fullname: 35 | self.Fullname = fullname 36 | else: 37 | self.Fullname = name 38 | 39 | def InitFromFile(self,f): 40 | self.Name = stringFromFile(8,f) 41 | self.Value = ulongFromFile(f) 42 | self.SectionNumber = shortFromFile(f) 43 | self.Type = ushortFromFile(f) 44 | self.StorageClass = ucharFromFile(f) 45 | self.NumberAuxiliary = ucharFromFile(f) 46 | self.Auxiliaries = '' 47 | 48 | for i in range(self.NumberAuxiliary): 49 | aux = stringFromFile(18,f) 50 | self.Auxiliaries += aux 51 | 52 | 53 | def WriteToFile(self,f): 54 | stringToFile(f, 8, self.Name) 55 | ulongToFile(f, self.Value) 56 | shortToFile(f, self.SectionNumber) 57 | ushortToFile(f, self.Type) 58 | ucharToFile(f, self.StorageClass) 59 | ucharToFile(f, self.NumberAuxiliaries) 60 | stringToFile(f, len(self.Auxiliaries), self.Auxiliaries) 61 | 62 | def SetSizes(self): 63 | assert not len(self.Auxiliaries) % 18, "Invalid Aux length" 64 | self.NumberAuxiliaries = (len(self.Auxiliaries) // 18) 65 | 66 | def Rows(self): 67 | self.SetSizes() 68 | return self.NumberAuxiliaries + 1 69 | 70 | def DumpInfo(self): 71 | print "%20s\t%10s\t%10s\t%10s\t%10s\t" % (repr(self.Name), 72 | attemptNameLookup(SymbolValues.NAME,self.Value), 73 | repr(self.SectionNumber), 74 | attemptNameLookup(SymbolTypes.NAME,self.Type), 75 | attemptNameLookup(SymbolClass.NAME,self.StorageClass)) 76 | tail = repr(self.Auxiliaries) 77 | head,tail = tail[:70],tail[70:] 78 | while head: 79 | print "\t%s" % head 80 | head,tail = tail[:70],tail[70:] 81 | 82 | 83 | class coffSymbolFile(coffSymbolEntry): 84 | """ 85 | See section 5.5.4 of Microsoft COFF Spec 86 | """ 87 | def __init__(self,filename): 88 | #pad filename with nulls 89 | filename = filename + "\x00" * (18 - len(filename) % 18) 90 | coffSymbolEntry.__init__(self,'.file\x00\x00\x00',SymbolValues.SYM_UNDEFINED,-2, 91 | SymbolTypes.NULL, SymbolClass.CLASS_FILE, filename) 92 | 93 | class coffSectionDef(coffSymbolFile): 94 | """ 95 | Section Definitions 5.5.5 96 | """ 97 | def __init__(self,name,sectionNumber,length=0,relocs=0,line_nos=1,chksum=0,number=0,selection=0): 98 | coffSymbolEntry.__init__(self,name,SymbolValues.SYM_UNDEFINED, sectionNumber, 99 | SymbolTypes.NULL, SymbolClass.STATIC) 100 | self.Length = length 101 | self.Relocations = relocs 102 | self.LineNumbers = line_nos 103 | self.Checksum=chksum 104 | self.Number = number 105 | self.Selection = selection 106 | self.BuildAuxiliaries() 107 | 108 | def BuildAuxiliaries(self): 109 | aux = '' 110 | aux += ulongToString(self.Length) 111 | aux += ushortToString(self.Relocations) 112 | aux += ushortToString(self.LineNumbers) 113 | aux += ulongToString(self.Checksum) 114 | aux += ushortToString(self.Number) 115 | aux += ucharToString(self.Selection) 116 | aux += "\x00\x00\x00" 117 | self.Auxiliaries = aux 118 | 119 | def RebuildAuxiliaries(self,length=0,relocs=0,line_nos=0,chksum=0,number=0,selection=0): 120 | self.Length = length 121 | self.Relocations = relocs 122 | self.LineNumbers = line_nos 123 | self.Checksum=chksum 124 | self.Number = number 125 | self.Selection = selection 126 | self.BuildAuxiliaries() 127 | 128 | class coffFunctionDef(coffSymbolEntry): 129 | def __init__(self,name,addr,sectionNumber,tag=0,size=0,line=1,fun=0): 130 | coffSymbolEntry.__init__(self,name,addr, sectionNumber, 131 | 0x20, SymbolClass.EXTERNAL) 132 | self.TagIndex = tag 133 | self.TotalSize = size 134 | self.PointerToLineNumber = line 135 | self.PointerToNextFunction = fun 136 | 137 | self.BuildAuxiliaries() 138 | 139 | def BuildAuxiliaries(self): 140 | aux = '' 141 | aux += ulongToString(self.TagIndex) 142 | aux += ulongToString(self.TotalSize) 143 | aux += ulongToString(self.PointerToLineNumber) 144 | aux += ulongToString(self.PointerToNextFunction) 145 | aux += "\x00\x00" 146 | self.Auxiliaries = aux 147 | 148 | def RebuildAuxiliaries(self,tag=0,size=0,line=1,fun=0): 149 | self.TagIndex = tag 150 | self.TotalSize = size 151 | self.PointerToLineNumber = line 152 | self.PointerToNextFunction = fun 153 | self.BuildAuxiliaries() 154 | 155 | class coffBf(coffSymbolEntry): 156 | def __init__(self,sec,line=1,nextBf=0): 157 | coffSymbolEntry.__init__(self,".bf\x00\x00\x00\x00\x00",0,sec,0x20, 158 | 101, aux="\x00" * 18) 159 | self.LineNumber = line 160 | self.PointerToNextBf = 0 161 | self.BuildAuxiliaries() 162 | 163 | def BuildAuxiliaries(self): 164 | aux = '\x00\x00\x00\x00' 165 | aux += ushortToString(self.LineNumber) 166 | aux += '\x00\x00\x00\x00\x00\x00' 167 | aux += ulongToString(self.PointerToNextBf) 168 | aux += '\x00\x00' 169 | self.Auxiliaries = aux 170 | 171 | def RebuildAuxiliaries(self,line=1,nextBf=0): 172 | self.LineNumber = line 173 | self.PointerToNextBf = nextBf 174 | self.BuildAuxiliaries() 175 | 176 | class coffLf(coffSymbolEntry): 177 | def __init__(self,sec,lines=1): 178 | coffSymbolEntry.__init__(self,".lf\x00\x00\x00\x00\x00",lines,sec,0x20, 179 | 101) 180 | 181 | class coffEf(coffSymbolEntry): 182 | def __init__(self,sec,totalSize=0,line=1,nextBf=0): 183 | coffSymbolEntry.__init__(self,".ef\x00\x00\x00\x00\x00",totalSize,sec,0x20, 184 | 101,aux="\x00" * 18) 185 | self.LineNumber = line 186 | self.BuildAuxiliaries() 187 | 188 | def BuildAuxiliaries(self): 189 | aux = '\x00\x00\x00\x00' 190 | aux += ushortToString(self.LineNumber) 191 | aux += '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 192 | self.Auxiliaries = aux 193 | 194 | def RebuildAuxiliaries(self,line=1): 195 | self.LineNumber = line 196 | self.BuildAuxiliaries() 197 | 198 | 199 | -------------------------------------------------------------------------------- /doc/AnatomyOfHelloWorldDebug.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grant-olson/pyasm/87ac92bffba380cbb4358af2e4134a1d6a5db653/doc/AnatomyOfHelloWorldDebug.txt -------------------------------------------------------------------------------- /doc/AnatomyOfHelloWorldRelease.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grant-olson/pyasm/87ac92bffba380cbb4358af2e4134a1d6a5db653/doc/AnatomyOfHelloWorldRelease.txt -------------------------------------------------------------------------------- /doc/bugs-features.txt: -------------------------------------------------------------------------------- 1 | Need to be able to PUSH 0x1 as a 32bit value. Right now it's always eight bit. 2 | Macro to automatically check for errors and abort when calling pyobjects. 3 | 4 | Why couldn't I add a !LABEL? 5 | 6 | WHy can't I ADD [ESP], 0x1? 7 | 8 | Figure out how to do name mangling. Doesn't happen at runtime, but does happen if you build a .obj file. 9 | -------------------------------------------------------------------------------- /doc/processText.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2005 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Make both html and pdf output for reStructuredText docs 6 | """ 7 | 8 | from glob import glob 9 | import os 10 | 11 | def runCmd(cmd): 12 | print 13 | print "RUNNING CMD '%s'" % cmd 14 | print 15 | f = os.popen(cmd) 16 | for line in f.readlines(): 17 | print line , 18 | print 19 | 20 | for f in glob("*.txt"): 21 | try: 22 | partOfName = f[:-4] 23 | runCmd("rst2html.py %s html\\%s.html" % (f, partOfName)) 24 | runCmd("rst2latex.py %s pdf\\%s.tex" % (f, partOfName)) 25 | runCmd("latex -output-directory pdf pdf/%s.tex" % partOfName) 26 | 27 | # don't use ugly Computer Modern font 28 | tmpFile = file("pdf/%s.tex" % partOfName).read() 29 | tmpFile = tmpFile.replace("%% generator Docutils: ", 30 | "\usepackage{times}\n%% generator Docutils: ") 31 | f = file("pdf/%s.tex","w") 32 | f.write(tmpFile) 33 | f.close() 34 | 35 | runCmd("dvipdfm -o pdf/%s.pdf pdf/%s.dvi" % (partOfName,partOfName)) 36 | except: 37 | print "Processing %s failed" % f 38 | 39 | #runCmd("del /S pdf\\*.tex") 40 | runCmd("del /S pdf\\*.dvi") 41 | runCmd("del /S pdf\\*.log") 42 | runCmd("del /S pdf\\*.aux") 43 | -------------------------------------------------------------------------------- /doc/usersGuide.txt: -------------------------------------------------------------------------------- 1 | +++++++++++++++++++++++++ 2 | pyasm User's Guide V. 0.3 3 | +++++++++++++++++++++++++ 4 | 5 | by Grant Olson 6 | +++++++++++++++++++++++++++++++++++++++++++ 7 | 8 | .. contents:: 9 | 10 | 11 | ============ 12 | Introduction 13 | ============ 14 | 15 | Pyasm is a full-featured dynamic assembler written entirely in Python. By 16 | dynamic, I mean that it can be used to generate and execute machine code in 17 | python at runtime without requiring the generation of object files and linkage. 18 | It essentially allow 'inline' assembly in python modules on x86 platforms. 19 | 20 | Pyasm can also generate object files (for windows) like a traditional 21 | standalone assembler, although you're probably better off using one of the many 22 | freely available assemblers if this is you primary goal. 23 | 24 | ------------ 25 | Installation 26 | ------------ 27 | 28 | Pyasm currently requires python 2.6. 29 | 30 | Linux Install: 31 | 32 | + Download and extract pyasm-0.3.tar.gz_ 33 | + python setup.py install 34 | 35 | Windows Install: 36 | 37 | + Download and run pyasm-0.3.win32-py2.6.exe_ 38 | + A source distribution pyasm-0.3.zip_ is available, but you'll need VS7.0 to 39 | compile the excmem module. 40 | 41 | .. _pyasm-0.3.tar.gz: http://www.grant-olson.net/python/pyasm/pyasm-0.3.tar.gz?attredirects=0&d=1 42 | .. _pyasm-0.3.win32-py2.6.exe: http://www.grant-olson.net/python/pyasm/pyasm-0.3.win32-py2.6.exe.zip?attredirects=0&d=1 43 | .. _pyasm-0.3.zip: http://www.grant-olson.net/python/pyasm/pyasm-0.3.zip?attredirects=0&d=1 44 | 45 | ------------ 46 | Hello World! 47 | ------------ 48 | 49 | A simple Windows version of a hello_world.py program is as follows:: 50 | 51 | # 52 | # Hello World in assembly: pyasm/examples/hello_World.py 53 | # 54 | # 55 | 56 | from pyasm import pyasm 57 | 58 | pyasm(globals(),r""" 59 | !PROC hello_world PYTHON 60 | !ARG self 61 | !ARG args 62 | !CALL PySys_WriteStdout "Hello, World!\n\0" 63 | ADD ESP, 0x4 64 | MOV EAX,PyNone 65 | ADD [EAX],1 66 | !ENDPROC 67 | """) 68 | 69 | hello_world() 70 | 71 | A brief description of what is happening durring the pyasm call: 72 | 73 | 1. the globals() statement tells pyasm where to bind newly created python 74 | functions 75 | 2. The !CHARS directive creates a string constant. 76 | 3. The !PROC and !ARG directives create a procedure that matches the 77 | standard CPythonFunction signature [PyObject* hello_world(PyObject* self, 78 | PyObject* args) and create procedure initialization code. 79 | 4. The procedure calls python's PySys_WriteStdout function. Since python functions 80 | use CDECL calling conventions, we: 81 | 82 | a) PUSH the paramters onto the stack from right to left 83 | b) CALL the function 84 | c) Cleanup the stack ourselves 85 | 86 | 5. PyCFunctions must return some sort of python object, so we: 87 | 88 | a) Load PyNone into the EAX register, which will become the return value. 89 | b) Add one to the reference count 90 | 91 | 6. The !ENDPROC directive ends the procedure and creates function cleanup 92 | code. This creates a procedure called hello_world that would have the C 93 | signature of `PyObject* hello_world(PyObject* self, PyObject* args)`. The 94 | procedure loads hello_str onto the stack, calls the python interpreters 95 | PySys_WriteStdout function, 96 | 97 | 7. Calling hello_world() executes the newly created function. 98 | 99 | .. WARNING:: 100 | The rest of this document assumes that you know x86 assembly language. A 101 | tutorial is beyond the scope of this document. If you don't know assembly 102 | language, you'll want to read an introductory text (such as *The Art of 103 | Assembly Language*) as well as downloading Volumes 2 and 3 of the *IA-32 104 | Intel Architecture Software Developer's Manual* for reference. 105 | 106 | 107 | ============== 108 | Everyday usage 109 | ============== 110 | 111 | ---------------- 112 | Assembler Syntax 113 | ---------------- 114 | 115 | Like most assemblers, the command-line assembler contains a very simple parser. 116 | There two basic statements that can be used. An *instruction statement* and an 117 | *assembler directive*. *Assembler directives* contain information that makes 118 | your assembly a little easier to read than raw assembly code, such as the 119 | begining and ending of function; declaration of parameters, variables, 120 | constants and data; and other stuff. *Instruction Statements* consist of real 121 | assembly instructions such as `MOV [EAX+4],value` 122 | 123 | Additional notes specific to this assembler are as follows: 124 | 125 | + Numbers use python's formatting scheme, so hex is represented as 0xFF and not FFh. 126 | + Instructions and Registers must be in all caps. mov eax,0x0 is invalid. 127 | 128 | Instruction Statements 129 | ---------------------- 130 | 131 | Instruction statements are reasonably straightforward if you know x86 assembly 132 | language. 133 | 134 | Assembler Directives 135 | -------------------- 136 | 137 | Assembler directives begin with an exclamation mark, followed by the directive 138 | itself, and followed by any applicable parameters. Keep in mind that these 139 | directives are provided for the programmer's convienence. Anything that is 140 | done via a directive could be translated into raw assembly, it's just not as 141 | readable. 142 | 143 | ========================= ==================== ======================================== 144 | Text Directive API Call Brief Description 145 | ========================= ==================== ======================================== 146 | **!CALL proc [arg arg]** *n/a* Procedure call framework 147 | **!CHARS name value** **.AStr(n,v)** Create a character array (aka a string) 148 | **!COMMENT text** *n/a* Comment line. 149 | **!CONST name value** **.AC(n,v)** Create a constant value. 150 | **!LABEL name** **.AIL(name)** Provide a symbolic label for later ref. 151 | **!PROC name [type]** **.AP(name,type)** Begin a procedure. 152 | **!ARG argname [size]** **.AA(name,size)** Add an argument to a procedure def. 153 | **!LOCAL varname [size]** **.ALoc(name,size)** Add a local var to a procedure def. 154 | **!ENDPROC** **.EP()** End a procedure 155 | ========================= ==================== ======================================== 156 | 157 | !CALL proc [arg arg arg] 158 | A convienence function for procedure calling. PUSHes arguments onto the 159 | stack from right to left and calls the appropriate procedure. Stack cleanup 160 | (if any) is still the programmer's responsibility. 161 | 162 | !CHARS name value 163 | Create a character array (aka a string) 164 | 165 | !COMMENT text 166 | Ignore this line. 167 | 168 | !CONST name value 169 | Just declares a constant that is replaced in subsequent occurances. Keep in 170 | mind that this is resolved at compile time, so the values should really only 171 | be numbers. !CONST hello_world "hello world\\n\\0" is invalid. 172 | 173 | !LABEL name 174 | Provide a symbolic label to the current memory address. Primarily used for 175 | loops, if-then logic, etc. You can use a label and hand-roll a procedure, 176 | but you probably want to use the !PROC directive instead. 177 | 178 | !PROC name[type] 179 | Begin a procedure. This will emit the boilerplate code to start a procedure. 180 | Arguments and Local variables can be declared with !ARG and !LOCAL directives 181 | listed below. These declarations must occur before any instruction 182 | statements or an error will occur. This will generate the boilerplate 183 | function startup code, which consists of PUSHing the EBP register, copying 184 | the current location of ESP, and translating arguments and local variables 185 | into references via the offset of the EBP pointer. If the previous sentence 186 | didn't make any sense to you, just remember that the EBP register shouldn't 187 | be manipulatedin your code here or things will get screwed up. 188 | 189 | !ARG argname [size] 190 | An argument passed to a procedure via the stack. By default, we assume the 191 | size is 4 bytes although you can specify if you need to. 192 | 193 | !LOCAL varname [size] 194 | A local variable maintained on the procedure's stack frame. 195 | 196 | !ENDPROC 197 | End a procedure. Emit the cleanup code as the caller's responsibility.] 198 | 199 | ------------- 200 | Typical Usage 201 | ------------- 202 | 203 | Typically, usage is as simple as the hello world example listed above. Import 204 | the pyasm function from the pyasm package and call it. globals blah blah blah. 205 | 206 | -------------------- 207 | Assembly via the API 208 | -------------------- 209 | 210 | calling pyasm is fine if you're just trying to inline some assembly functions, 211 | but if you're trying to dynamically generate assembly (such as writing a python 212 | compiler) you're better off accessing the api directly. This involves a few 213 | steps: 214 | 215 | 1) import the assembler class from x86 asm and instantiate. 216 | 2) Add instructions either as strings that need to be preprocessed or via the api. 217 | 3) generate an intermediate 'codepackage' by calling the .Compile() method 218 | 4) transform the codePackage to runtime memory via CpToMemory. 219 | 220 | ---------------------- 221 | Command-line assembler 222 | ---------------------- 223 | 224 | **NOT IMPLEMENTED YET** 225 | 226 | If you really want to, a command-line asembler is available for usage. Usage 227 | is straightforward:: 228 | 229 | python pyassemble.py asmfile.asm 230 | 231 | This will generate an object file asmfile.o that can be used by your linker. 232 | 233 | ============== 234 | Debugging Tips 235 | ============== 236 | 237 | If you write assembly, chances are that you are going to crash your app at one 238 | point or another. 239 | 240 | --------------- 241 | Linux Debuggers 242 | --------------- 243 | 244 | On Linux, you obviously have gdb. 245 | 246 | ----------------- 247 | Windows debuggers 248 | ----------------- 249 | 250 | Contrary to popular belief, there is a buildin command-line debugger on Windows 251 | NT/2000/XP called ntsd.exe that can be used in a bind. If you're doing any 252 | serious work though, do yourself a favor and download the 18MB "Debugging Tools 253 | for Windows." It includes an updated version of ntsd.exe and a version with a 254 | simple Windows interface called WinDBG. You'll really want to download this if 255 | you're getting serious about assembly debugging. Actual usage is beyond the 256 | scope of this document, but read up on setting up a symbol server. 257 | 258 | .. TIP:: 259 | After installing, you may want to register WinDBG as the default 260 | debugger by cd'ing to the program directory and issuing 'windbg -I' This will 261 | cause WinDBG to spawn automatically when any program crashes or executes an INT 262 | 3 instruction. It also has the added benefit of making friends and co-workers 263 | think that you're a much more hardcore programmer than you really are. The 264 | jury is still out as to whether this impresses the ladies or not. 265 | 266 | And yes, there is the Visual Studio .NET debugger. This is a great debugger 267 | when you're debugging C or VB code in an existing project. But it is 268 | designed to work as part of an IDE. It gets a little wierd when debugging 269 | raw assembly or compiled code without the source floating around. As ugly as 270 | WinDBG's gui looks like by todays standards, it is a lot more convienent in 271 | these cases. 272 | 273 | ------------------------------------ 274 | Source output - *not implemented yet 275 | ------------------------------------ 276 | 277 | I plan to provide a hook via the logging module so you can obtain disassembly 278 | of the source at runtime. 279 | 280 | ======================= 281 | Feel like contributing? 282 | ======================= 283 | 284 | Any and all patches will be considered. If you're planning on implenting 285 | anything serious you may want to run it by me so you don't end up wasting your 286 | time. There is some low-hanging fruit out there though. 287 | 288 | ----------------------- 289 | Adding x86 instructions 290 | ----------------------- 291 | 292 | I haven't added all of the x86 instructions yet. Most of it involves cutting 293 | and pasting from the IA32 Intel Software Architecture Manual Volumes 2 and 3. 294 | For standard instructions, you should just be able to add the appropriate text 295 | to x86inst.py and creating a test in test_instructions.py. SIMD and FPU 296 | operations will probably require some additional hacking. 297 | 298 | --------------------------------- 299 | ELF serialization/deserialization 300 | --------------------------------- 301 | 302 | There is currently code that converts windows COFF objects to a python-based 303 | object model and vice versa. This allows you to create standard object files 304 | for traditional linking. An equivilent for ELF files would allow you to do the 305 | same thing in Linux. Refer to the coff*.py files to see how this format was 306 | implemented. 307 | 308 | 309 | -------------------------------------------------------------------------------- /doc/x86asm.txt: -------------------------------------------------------------------------------- 1 | x86asm.py 2 | ========= 3 | 4 | This module contains the necessary code to assmble code into machine language. 5 | 6 | Symbols 7 | ------- 8 | 9 | There are a variety of symbols that can be declared in user's assembly code. 10 | This is an attempt to list them out so that I can figure out how to implement 11 | them. 12 | 13 | Symbols ultimately get converted to immediate values, relative values, or r/m 14 | values for indirect reference. 15 | 16 | .. warning: 17 | I need to get a better idea of how other assemblers (MASM, TASM) implement 18 | the look-n-feel for these labels in code. 19 | 20 | symbol types 21 | ~~~~~~~~~~~~ 22 | 23 | global_label 24 | A standard label just marks a location so that it can be referenced 25 | symbolically. We may not need these if proc_labels cover everything. 26 | 27 | proc_label 28 | This is a label, but also indicates that we are at the start of a procedure 29 | that will be called via call. It will trigger the generation of additional 30 | code to deal with parameters and local variables. 31 | 32 | local_label 33 | A label that is only valid for the scope of a procedure. Used for branches 34 | and what not. It's probably too much for the users to insist that they use 35 | global labels for this, although they're not strictly necessary. 36 | 37 | param_variable 38 | A variable that is passed to a function via the stack at call time. Created 39 | by the calling code. 40 | 41 | .. warning: 42 | How are these handled via standard calling conventions in Windows? 43 | 44 | local_variable 45 | A local variable for a procedure. Unlike param_variables, it is created 46 | by the callee. It is maintained on the call stack relative to the EBP 47 | pointer. 48 | 49 | heap_variable 50 | A varible that gets created on the heap at compile time. Scope is global. 51 | May need to add additional typing information. 52 | 53 | constant 54 | Resolved immediately to a real value and thrown away while processing an instruction 55 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grant-olson/pyasm/87ac92bffba380cbb4358af2e4134a1d6a5db653/examples/__init__.py -------------------------------------------------------------------------------- /examples/helloWorld.py: -------------------------------------------------------------------------------- 1 | from pyasm import pyasm 2 | 3 | pyasm(globals(),r""" 4 | !PROC hello_world PYTHON 5 | !ARG self 6 | !ARG args 7 | !CALL PyString_FromString 'Hello world!\n\0' 8 | ADD ESP, 0x4 9 | 10 | ADD EAX,PyStringObject_ob_sval 11 | PUSH EAX 12 | PUSH EDI 13 | MOV EDI,EAX 14 | MOV AL, 0x42 15 | STOSB 16 | MOV AL, 0x43 17 | STOSB 18 | POP EDI 19 | CALL PySys_WriteStdout 20 | ADD ESP, 0x4 21 | MOV EAX,PyNone 22 | ADD [EAX+PyObject_ob_refcnt],1 23 | !ENDPROC 24 | """) 25 | 26 | hello_world() 27 | -------------------------------------------------------------------------------- /excmem/buildexcmem: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | # 4 | # this is just temporary until I learn how to do a decent cross-platform 5 | # makefile. (Yep, I'm a Windows guy.) 6 | 7 | gcc -shared -I/usr/local/include/python2.4 -L/usr/local/lib/python2.4 excmem.c -o ../excmem.so 8 | -------------------------------------------------------------------------------- /excmem/excmem.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2004-2005 Grant T. Olson. See license.txt for terms.*/ 2 | 3 | /* 4 | This provides a basic interface to allocate executable memory and 5 | create PyCFunction objects. 6 | 7 | I'm currently using PyMem_Malloc until I see a real working example 8 | of W^X heap protection on x86 platforms. 9 | 10 | Also need to decide how much memory we will want to free and 11 | how that should be implemented. 12 | 13 | And I need to decide if it's important to make sure that we 14 | don't use WRITE and EXECUTE permissions at the same time. 15 | */ 16 | 17 | #include 18 | 19 | #ifdef MS_WINDOWS 20 | 21 | #include 22 | 23 | #else 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #endif 32 | 33 | 34 | static PyObject *ExcmemError; 35 | 36 | 37 | static PyObject * 38 | excmem_LoadExecutableMemoryString(PyObject *self, PyObject *args) 39 | { 40 | int len,dest; 41 | const char *memString; 42 | 43 | if (!PyArg_ParseTuple(args, "is#", &dest, &memString,&len)) 44 | return NULL; 45 | 46 | memcpy((void*)dest,memString,len); 47 | 48 | Py_INCREF(Py_None); 49 | return Py_None; 50 | } 51 | 52 | static PyObject * 53 | excmem_AllocateExecutableMemory(PyObject *self, PyObject *args) 54 | { 55 | int requestedSize, pointerToMemory; 56 | 57 | if (!PyArg_ParseTuple(args, "i", &requestedSize)) 58 | return NULL; 59 | 60 | pointerToMemory = (int)PyMem_Malloc(requestedSize); 61 | 62 | return Py_BuildValue("i",pointerToMemory); 63 | } 64 | 65 | static PyObject * 66 | excmem_GetCurrentExecutablePosition(PyObject *self, PyObject *args) 67 | { 68 | PyErr_SetString(ExcmemError,"Depreciated function!"); 69 | return 0; 70 | } 71 | 72 | static PyObject * 73 | excmem_BindFunctionAddress(PyObject *self, PyObject *args) 74 | { 75 | PyMethodDef *md; 76 | PyObject *func; 77 | int pointer; 78 | 79 | if (!PyArg_ParseTuple(args, "i", &pointer)) 80 | return NULL; 81 | 82 | 83 | 84 | md = PyMem_New(PyMethodDef,1); 85 | md->ml_doc = "foo"; 86 | md->ml_flags = METH_VARARGS; 87 | md->ml_meth = (void*)pointer; 88 | md->ml_name = "foo"; 89 | 90 | func = PyCFunction_New(md,NULL); 91 | 92 | return Py_BuildValue("O",func); 93 | } 94 | 95 | 96 | #ifndef MS_WINDOWS 97 | 98 | static PyObject * 99 | excmem_GetSymbolAddress(PyObject *self, PyObject *args) 100 | { 101 | char *symname; 102 | void *sym_addr; 103 | 104 | if (!PyArg_ParseTuple(args, "s", &symname)) 105 | return NULL; 106 | 107 | sym_addr = dlsym(0,symname); 108 | if(!sym_addr) { 109 | PyErr_SetString(ExcmemError,"Couldn't resolve symbol"); 110 | return NULL; 111 | } 112 | 113 | return Py_BuildValue("i",(int)sym_addr); 114 | } 115 | 116 | #else 117 | 118 | static PyObject * 119 | excmem_GetSymbolAddress(PyObject *self, PyObject *args) 120 | { 121 | PyErr_SetString(ExcmemError,"Please use win32api calls to get " 122 | "symbol addresses on windows."); 123 | return NULL; 124 | } 125 | 126 | #endif 127 | 128 | static PyMethodDef ExcmemMethods[] = { 129 | 130 | {"AllocateExecutableMemory",excmem_AllocateExecutableMemory, METH_VARARGS, 131 | "Allocate a chunk of memory flagged with execute privleges and return a pointer."}, 132 | 133 | {"LoadExecutableMemoryString",excmem_LoadExecutableMemoryString, METH_VARARGS, 134 | "Load a string into preallocated memory with execute permissions"}, 135 | 136 | {"GetCurrentExecutablePosition",excmem_GetCurrentExecutablePosition, METH_VARARGS, 137 | "Get the current memory location so we can determine patchins."}, 138 | 139 | {"BindFunctionAddress",excmem_BindFunctionAddress, METH_VARARGS, 140 | "Binds a function address to a python PyCFUnction object so we can call it."}, 141 | 142 | {"GetSymbolAddress",excmem_GetSymbolAddress, METH_VARARGS, 143 | "Get an address of a symbol from a shared library or executable"}, 144 | 145 | 146 | {NULL, NULL, 0, NULL} /* Sentinel */ 147 | }; 148 | 149 | PyMODINIT_FUNC 150 | initexcmem(void) 151 | { 152 | 153 | PyObject *m; 154 | 155 | m = Py_InitModule("excmem", ExcmemMethods); 156 | 157 | ExcmemError = PyErr_NewException("excmem.ExcmemError", NULL, NULL); 158 | Py_INCREF(ExcmemError); 159 | PyModule_AddObject(m, "ExcmemError", ExcmemError); 160 | 161 | } 162 | -------------------------------------------------------------------------------- /excmem/excmem.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 10.00 2 | # Visual C++ Express 2008 3 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "excmem", "excmem.vcproj", "{EC502D37-88DF-4D14-AAB9-7CF8228DC8D9}" 4 | EndProject 5 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "structs", "..\structs\structs.vcproj", "{53BBB28A-F748-4842-9E2C-33F3BB4609D6}" 6 | EndProject 7 | Global 8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 9 | Debug|Win32 = Debug|Win32 10 | Release|Win32 = Release|Win32 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {EC502D37-88DF-4D14-AAB9-7CF8228DC8D9}.Debug|Win32.ActiveCfg = Debug|Win32 14 | {EC502D37-88DF-4D14-AAB9-7CF8228DC8D9}.Debug|Win32.Build.0 = Debug|Win32 15 | {EC502D37-88DF-4D14-AAB9-7CF8228DC8D9}.Release|Win32.ActiveCfg = Release|Win32 16 | {EC502D37-88DF-4D14-AAB9-7CF8228DC8D9}.Release|Win32.Build.0 = Release|Win32 17 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Debug|Win32.ActiveCfg = Debug|Win32 18 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Debug|Win32.Build.0 = Debug|Win32 19 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Release|Win32.ActiveCfg = Release|Win32 20 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Release|Win32.Build.0 = Release|Win32 21 | EndGlobalSection 22 | GlobalSection(SolutionProperties) = preSolution 23 | HideSolutionNode = FALSE 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /excmem/excmem.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 28 | 31 | 34 | 37 | 40 | 43 | 53 | 56 | 59 | 62 | 71 | 74 | 77 | 80 | 83 | 86 | 89 | 92 | 93 | 102 | 105 | 108 | 111 | 114 | 117 | 127 | 130 | 133 | 136 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 168 | 169 | 170 | 171 | 172 | 173 | 178 | 181 | 182 | 183 | 188 | 189 | 194 | 195 | 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | pyasm 2 | Copyright (c) 2004-2010, Grant Olson (olsongt@verizon.net) 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | Redistributions in binary form must reproduce the above copyright notice, this 12 | list of conditions and the following disclaimer in the documentation and/or 13 | other materials provided with the distribution. 14 | 15 | Neither the name of Grant Olson nor the names of other contributors may be used 16 | to endorse or promote products derived from this software without specific 17 | prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /logMonitor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | # 5 | # Some of this code was ripped from the logging example 6 | # Presumably licensed under the python license 7 | # 8 | 9 | import cPickle 10 | import logging 11 | import logging.handlers 12 | import SocketServer 13 | import struct 14 | 15 | class LogRecordStreamHandler(SocketServer.StreamRequestHandler): 16 | """Handler for a streaming logging request. 17 | 18 | This basically logs the record using whatever logging policy is 19 | configured locally. 20 | """ 21 | 22 | def handle(self): 23 | """ 24 | Handle multiple requests - each expected to be a 4-byte length, 25 | followed by the LogRecord in pickle format. Logs the record 26 | according to whatever policy is configured locally. 27 | """ 28 | while 1: 29 | chunk = self.connection.recv(4) 30 | if len(chunk) < 4: 31 | break 32 | slen = struct.unpack(">L", chunk)[0] 33 | chunk = self.connection.recv(slen) 34 | while len(chunk) < slen: 35 | chunk = chunk + self.connection.recv(slen - len(chunk)) 36 | obj = self.unPickle(chunk) 37 | record = logging.makeLogRecord(obj) 38 | self.handleLogRecord(record) 39 | 40 | def unPickle(self, data): 41 | return cPickle.loads(data) 42 | 43 | def handleLogRecord(self, record): 44 | # if a name is specified, we use the named logger rather than the one 45 | # implied by the record. 46 | if self.server.logname is not None: 47 | name = self.server.logname 48 | else: 49 | name = record.name 50 | logger = logging.getLogger(name) 51 | # N.B. EVERY record gets logged. This is because Logger.handle 52 | # is normally called AFTER logger-level filtering. If you want 53 | # to do filtering, do it at the client end to save wasting 54 | # cycles and network bandwidth! 55 | logger.handle(record) 56 | 57 | class LogRecordSocketReceiver(SocketServer.ThreadingTCPServer): 58 | """simple TCP socket-based logging receiver suitable for testing. 59 | """ 60 | 61 | allow_reuse_address = 1 62 | 63 | def __init__(self, host='localhost', 64 | port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, 65 | handler=LogRecordStreamHandler): 66 | SocketServer.ThreadingTCPServer.__init__(self, (host, port), handler) 67 | self.abort = 0 68 | self.timeout = 1 69 | self.logname = None 70 | 71 | def serve_until_stopped(self): 72 | import select 73 | abort = 0 74 | while not abort: 75 | rd, wr, ex = select.select([self.socket.fileno()], 76 | [], [], 77 | self.timeout) 78 | if rd: 79 | self.handle_request() 80 | abort = self.abort 81 | 82 | # My stuff 83 | 84 | import thread, Tkinter 85 | 86 | class textboxWithScrollbars(Tkinter.Frame): 87 | def __init__(self,master): 88 | Tkinter.Frame.__init__(self,master) 89 | 90 | self.textbox = Tkinter.Text(self,font='courier') 91 | self.textbox.pack(side=Tkinter.LEFT) 92 | self.insert = self.textbox.insert 93 | 94 | self.scrollbar = Tkinter.Scrollbar(self) 95 | self.scrollbar.pack(side=Tkinter.RIGHT,fill=Tkinter.Y) 96 | self.scrollbar.config(command=self.textbox.yview) 97 | 98 | 99 | 100 | class pyasmDebuggerWindow: 101 | def __init__(self): 102 | self.root = Tkinter.Tk() 103 | self.l = Tkinter.Label(self.root,text="pyasm debugging console") 104 | self.l.pack() 105 | 106 | self.buttons = Tkinter.Frame(self.root) 107 | self.buttons.pack() 108 | self.output = Tkinter.Frame(self.root) 109 | self.output.pack() 110 | 111 | self.x86asmTextbox = self.loggerTextbox('pyasm.x86.asm') 112 | self.x86apiTextbox = self.loggerTextbox('pyasm.x86.api') 113 | self.x86srcTextbox = self.loggerTextbox('pyasm.x86.source') 114 | self.debugTextbox = self.loggerTextbox('pyasm.debug') 115 | 116 | self.activeBox = self.x86asmTextbox 117 | self.x86asmTextbox.pack() 118 | 119 | def changePane(self,textbox): 120 | self.activeBox.pack_forget() 121 | textbox.pack() 122 | self.activeBox = textbox 123 | 124 | def loggerTextbox(self,loggername): 125 | logTextbox = textboxWithScrollbars(self.output) 126 | logTextbox.insert(Tkinter.INSERT, "%s CONSOLE\n" % loggername) 127 | logTextbox.insert(Tkinter.INSERT, "==============\n\n") 128 | logTextbox.pack_forget() 129 | 130 | ts = TkTextLogStream(logTextbox) 131 | logging.getLogger(loggername).addHandler(logging.StreamHandler(ts)) 132 | 133 | button = Tkinter.Button(self.buttons,text=loggername, 134 | command=lambda:self.changePane(logTextbox)) 135 | button.pack(side=Tkinter.LEFT) 136 | 137 | return logTextbox 138 | 139 | def mainloop(self): 140 | Tkinter.mainloop() 141 | 142 | class TkTextLogStream: 143 | def __init__(self,textbox): 144 | self.textbox = textbox 145 | 146 | def write(self,text): 147 | self.textbox.insert(Tkinter.INSERT, text) 148 | 149 | def flush(self):pass 150 | 151 | def main(): 152 | logging.basicConfig( 153 | format="%(message)s") 154 | tcpserver = LogRecordSocketReceiver() 155 | print "About to start TCP server..." 156 | thread.start_new_thread(tcpserver.serve_until_stopped,()) 157 | 158 | pdw = pyasmDebuggerWindow() 159 | pdw.mainloop() 160 | 161 | 162 | if __name__ == "__main__": 163 | main() 164 | 165 | -------------------------------------------------------------------------------- /loggers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import logging, logging.handlers 5 | 6 | #so root logger doesn't log children's info messages 7 | rootLogger = logging.getLogger('') 8 | rootHandler = logging.StreamHandler() 9 | rootHandler.setLevel(logging.ERROR) 10 | rootLogger.addHandler(rootHandler) 11 | 12 | #quick debug messages 13 | debugLogger = logging.getLogger("pyasm.debug") 14 | debugHandler = logging.StreamHandler() 15 | debugHandler.setLevel(logging.DEBUG) 16 | 17 | #various loggers 18 | x86sourceLogger = logging.getLogger("pyasm.x86.source") 19 | x86asmLogger = logging.getLogger("pyasm.x86.asm") 20 | x86apiLogger = logging.getLogger("pyasm.x86.api") 21 | 22 | x86sourceLogger.setLevel(logging.INFO) 23 | x86asmLogger.setLevel(logging.INFO) 24 | x86apiLogger.setLevel(logging.INFO) 25 | debugLogger.setLevel(logging.INFO) 26 | 27 | console = logging.StreamHandler() 28 | console.setLevel(logging.INFO) 29 | formatter = logging.Formatter("%(message)s") 30 | console.setFormatter(formatter) 31 | 32 | #x86apiLogger.addHandler(console) 33 | #x86sourceLogger.addHandler(console) 34 | #x86asmLogger.addHandler(console) 35 | 36 | socketHandler = logging.handlers.SocketHandler('localhost', 37 | logging.handlers.DEFAULT_TCP_LOGGING_PORT) 38 | 39 | x86sourceLogger.addHandler(socketHandler) 40 | x86asmLogger.addHandler(socketHandler) 41 | x86apiLogger.addHandler(socketHandler) 42 | debugLogger.addHandler(socketHandler) 43 | -------------------------------------------------------------------------------- /makeStructs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import re, sys, glob, os, time 5 | 6 | #These files are in the 'include' directory, but are modules. 7 | #They won't get built properly 8 | skipFiles = ('datetime.h','py_curses.h','structseq.h','symtable.h') 9 | 10 | 11 | def emitFileHeader(): 12 | print """/* Copyright 2004-2006 Grant T. Olson. See license.txt for terms.*/ 13 | #include 14 | 15 | /* file autogenerated by pyasm's makeStructs script on """ + time.ctime() + """ %/ 16 | 17 | /* Preprocessor abuse at it's finest. 18 | 19 | We could probably do all of this in a straight python file, but then 20 | it wouldn't be in sync with a particular build. This insures we have 21 | the right offsets for our structs in a way we can't in pure python 22 | */ 23 | 24 | #define OFFSET_STRING(f) #f 25 | #define OFFSET(m,s,f) \ 26 | offset = PyInt_FromLong((long)&(((s*)0)->f)); \ 27 | Py_INCREF(offset); \ 28 | PyModule_AddObject(m, OFFSET_STRING(f), offset); 29 | 30 | /* Py_DEBUG implies Py_TRACE_REFS. */ 31 | #if defined(Py_DEBUG) && !defined(Py_TRACE_REFS) 32 | #define Py_TRACE_REFS 33 | #endif 34 | 35 | /* Py_TRACE_REFS implies Py_REF_DEBUG. */ 36 | #if defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) 37 | #define Py_REF_DEBUG 38 | #endif 39 | 40 | 41 | static PyObject *StructsError; 42 | static PyObject *offset; 43 | 44 | static PyMethodDef StructsMethods[] = { 45 | 46 | {NULL, NULL, 0, NULL} /* Sentinel */ 47 | }; 48 | 49 | """ 50 | 51 | def emitFileBody(): 52 | print """ 53 | static void 54 | load_PyObject(PyObject* module) 55 | { 56 | #ifdef Py_TRACE_REFS 57 | OFFSET(module,PyObject,_ob_next); 58 | OFFSET(module,PyObject,_ob_prev); 59 | #endif 60 | OFFSET(module,PyObject,ob_refcnt); 61 | OFFSET(module,PyObject,ob_type); 62 | } 63 | 64 | static void 65 | load_PyVarObject(PyObject* module) 66 | { 67 | load_PyObject(module); 68 | OFFSET(module,PyVarObject,ob_size); 69 | } 70 | 71 | 72 | """ 73 | 74 | def emitFileFooter(modules): 75 | print """ 76 | 77 | PyMODINIT_FUNC 78 | initstructs(void) 79 | { 80 | 81 | PyObject *m, *n, *o; 82 | /*PyObject *offset;*/ 83 | 84 | m = Py_InitModule("structs", StructsMethods); 85 | n = Py_InitModule("PyObject", StructsMethods); 86 | o = Py_InitModule("PyVarObject", StructsMethods); 87 | 88 | StructsError = PyErr_NewException("structs.StructsError", NULL, NULL); 89 | Py_INCREF(StructsError); 90 | PyModule_AddObject(m, "StructsError", StructsError); 91 | 92 | load_PyObject(n); 93 | Py_INCREF(n); 94 | PyModule_AddObject(m, "PyObject", n); 95 | 96 | load_PyVarObject(o); 97 | Py_INCREF(o); 98 | PyModule_AddObject(m, "PyVarObject", o); 99 | 100 | %s 101 | }""" % ''.join([" load_%s(m);\n" % x for x in modules]) 102 | 103 | 104 | def emitModuleHeader(moduleName): 105 | print """static void 106 | load_%(funcname)s(PyObject *structs) 107 | { 108 | PyObject *sm = Py_InitModule("%(funcname)s",StructsMethods); 109 | 110 | """ % {'funcname':moduleName} 111 | 112 | def emitModuleFooter(moduleName): 113 | print """ 114 | 115 | Py_INCREF(sm); 116 | PyModule_AddObject(structs,"%(funcname)s",sm); 117 | }""" % {'funcname':moduleName} 118 | 119 | structsRe = re.compile("typedef\s+struct\s*\w*\s*{(.*?)}\s*(\w+)",re.DOTALL) 120 | typeofRe = re.compile(r"(?P\w+)\s*(?P[^;]+);") 121 | variablesRe = re.compile(r"(\(|\)|\*\*|\*|\[|\]|\w+)[,\s]*") 122 | names = [] 123 | 124 | def emitComment(commentText): 125 | print "/* %s */" % commentText 126 | 127 | def emitRaw(rawText): 128 | print rawText 129 | 130 | def emitOffset(name,val): 131 | print " OFFSET(sm,%s,%s);" % (name,val) 132 | 133 | def parse_filetext(filetext): 134 | global names 135 | for struct in structsRe.findall(filetext): 136 | body,name = struct 137 | if name in ('PyObject','PyVarObject', 'PyFrameObject'): 138 | emitComment("Skipping object %s" % name) 139 | continue 140 | print >> sys.stderr, "NAME", name 141 | 142 | startComment = body.find("/*") 143 | while startComment >= 0: #strip multiline comments 144 | endComment = body.find("*/",startComment) + 2 145 | body = body[:startComment] + body[endComment:] 146 | startComment = body.find("/*") 147 | 148 | lines = body.split("\n") 149 | isPyObject = False 150 | for line in lines: 151 | 152 | line = line.strip() 153 | if not line: 154 | continue 155 | print >> sys.stderr, "LINE:" , line 156 | if line.startswith("#"): 157 | print >> sys.stderr, "PREPROCESSOR DIRECTIVE" 158 | emitRaw(line) 159 | elif line == 'PyObject_HEAD': 160 | print >> sys.stderr, "HEADER" , line 161 | 162 | isPyObject = True 163 | emitModuleHeader(name) 164 | names.append(name) 165 | emitRaw(" load_PyObject(sm);") 166 | elif line == 'PyObject_VAR_HEAD': 167 | print >> sys.stderr, "HEADER" , line 168 | 169 | isPyObject = True 170 | emitModuleHeader(name) 171 | names.append(name) 172 | emitRaw(" load_PyVarObject(sm);") 173 | elif line: 174 | if isPyObject == False: 175 | print >> sys.stderr, "NOT A PyObject: SKIPPING" , name 176 | emitComment("Skipping struct %s, not a PyObject based struct" % name) 177 | break 178 | typeof,rest = typeofRe.match(line).groups() 179 | print >> sys.stderr, "TYPE", typeof 180 | vars = variablesRe.findall(rest) 181 | vars.reverse() 182 | 183 | if typeof == "struct": # skip struct def 184 | print >> sys.stderr, "STRUCT", vars 185 | vars.pop() 186 | 187 | while vars: 188 | var = vars.pop() 189 | if var in ('*', '**'): 190 | var = vars.pop() 191 | if var == "(": 192 | #function pointer 193 | print >> sys.stderr, "FUNCTION POINTER", vars 194 | var = vars.pop() 195 | if var != "*": 196 | print >> sys.stderr, var, vars 197 | raise RuntimeError("Invalid Function Pointer " 198 | "format: %s. Expected '*' got %s from %s" % (line,var,vars)) 199 | var = vars.pop() 200 | emitOffset(name, var) 201 | vars = None 202 | else: 203 | print >> sys.stderr, "POINTER", var 204 | emitOffset(name, var) 205 | elif var == '(': 206 | print >> sys.stderr, "FUNCTION POINTER", vars 207 | var = vars.pop() 208 | print >> sys.stderr, "NAME VAR" , name, var 209 | if var != "*": 210 | print >> sys.stderr, var, vars 211 | raise RuntimeError("Invalid Function Pointer " 212 | "format: %s. Expected '*' got %s from %s" % (line,var,vars)) 213 | var = vars.pop() 214 | 215 | emitOffset(name, var) 216 | vars = None 217 | elif var == "[": 218 | 219 | print >> sys.stderr, "SKIPPING ARRAY STUB" , vars 220 | var = vars.pop() 221 | var = vars.pop() 222 | else: 223 | print >> sys.stderr, "normal", var 224 | emitOffset(name,var) 225 | 226 | if isPyObject == True: 227 | emitModuleFooter(name) 228 | 229 | def parse_headers(): 230 | headerDir = os.path.join(sys.exec_prefix, "include") 231 | headerFiles = glob.glob(os.path.join(headerDir,"*.h")) 232 | headerFiles = [x for x in headerFiles if os.path.split(x)[1] not in skipFiles] 233 | for filename in headerFiles: 234 | 235 | print >> sys.stderr, "PROCESSING FILE", filename 236 | print "\n\n/* Generated from file %s */\n\n" % filename 237 | f = file(filename) 238 | filetext = f.read() 239 | f.close() 240 | parse_filetext(filetext) 241 | 242 | def make_struct_c(): 243 | emitFileHeader() 244 | emitFileBody() 245 | parse_headers() 246 | emitFileFooter(names) 247 | 248 | make_struct_c() 249 | -------------------------------------------------------------------------------- /makedist/README: -------------------------------------------------------------------------------- 1 | PyASM by Grant Olson 2 | ============================================= 3 | 4 | PyASM is a dynamic x86 assembler for python. By "dynamic", I mean that it can 5 | be used to generate inline assembly functions in python at runtime without 6 | requiring object file generation or linkage. 7 | 8 | New in version 0.3 9 | ------------------ 10 | 11 | + You can now run the test cases via mingw as well as msvc. Set the command 12 | in test/linkCmd.py appropraitely. Thanks to Markus Lall for figuring out 13 | how to do this. 14 | 15 | + Updated to python 2.6. 16 | 17 | + Updated MSVC project files to VC 2008. 18 | 19 | + Python structure values are loaded automatically if desired. For Example, 20 | assuming EAX is a pointer to a string MOV [EAX+PyString_ob_sval],0x42424242 21 | will change the first four letters of the string to B's. 22 | 23 | + Preliminary debugging console to view generation of assembly at various 24 | stages in the compilation pipeline. 25 | 26 | + Implicit string variable creation is now possible. e.g. "PUSH 'foo\n\0'" 27 | now works instead of requiring "!CHARS foo 'foo\n\0'" and "PUSH foo" 28 | 29 | + New !CALL assembler directive handles throwing arguements onto the stack. 30 | e.g. "!CALL foo bar baz bot" instead of "PUSH bot" "PUSH baz" "PUSH bar" 31 | "CALL foo" 32 | 33 | + Fixed tokenizer for instruction definitions with numbers in them such as 34 | INT 3 35 | 36 | + Now includes an 'examples' directory that should be easier for users to 37 | read than the test directory. 38 | 39 | + Show symbol name in disassembly if it exists. 40 | 41 | New in version 0.2 42 | ------------------ 43 | 44 | + Linux Support. Will work in Linux environments as well as Windows. 45 | 46 | + Simplified Interface. You only need to use one function to generate code. 47 | 48 | + Preliminary Documentation. 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /makedist/makedist.bat: -------------------------------------------------------------------------------- 1 | del MANIFEST 2 | rmdir /Q /S pyasm 3 | svn export svn+ssh://grant@johnwhorfin/var/local/svn/pyasm/trunk pyasm 4 | setup.py sdist --formats=gztar,zip 5 | setup.py bdist_wininst 6 | rmdir /Q /S pyasm 7 | 8 | cd dist 9 | gpg --detach-sign pyasm-0.3.tar.gz 10 | gpg --detach-sign pyasm-0.3.win32-py2.6.exe 11 | gpg --detach-sign pyasm-0.3.zip 12 | -------------------------------------------------------------------------------- /makedist/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup, Extension 4 | 5 | excmem = Extension('pyasm.excmem',['pyasm/excmem/excmem.c']) 6 | structs = Extension('pyasm.structs',['pyasm/structs/structs.c']) 7 | 8 | setup(name='pyasm', 9 | version='0.3', 10 | description='dynamic x86 assembler for python', 11 | author='Grant Olson', 12 | author_email='kgo@grant-olson.net', 13 | packages=['pyasm','pyasm.test','pyasm.examples'], 14 | ext_modules=[excmem, structs] 15 | ) 16 | -------------------------------------------------------------------------------- /pythonConstants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import pyasm.structs 5 | 6 | def PythonConstants(a): 7 | """ 8 | takes an assembler object and loads appropriate constants for 9 | python 10 | """ 11 | a.AC("PyNone",repr(id(None))) 12 | 13 | # add precalculated values from structs modules. 14 | for obj in dir(pyasm.structs): 15 | if obj.startswith("_"): 16 | continue 17 | 18 | for offset in dir(getattr(pyasm.structs,obj)): 19 | if offset.startswith("_"): 20 | continue 21 | 22 | mangledName = "%s_%s" % (obj,offset) 23 | val = repr(getattr(getattr(pyasm.structs,obj),offset)) 24 | a.AC(mangledName, val) 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /structs/structs.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2004-2006 Grant T. Olson. See license.txt for terms.*/ 2 | #include 3 | 4 | /* file autogenerated by pyasm's makeStructs script on Mon May 29 15:38:07 2006 %/ 5 | 6 | /* Preprocessor abuse at it's finest. 7 | 8 | We could probably do all of this in a straight python file, but then 9 | it wouldn't be in sync with a particular build. This insures we have 10 | the right offsets for our structs in a way we can't in pure python 11 | */ 12 | 13 | #define OFFSET_STRING(f) #f 14 | #define OFFSET(m,s,f) offset = PyInt_FromLong((long)&(((s*)0)->f)); Py_INCREF(offset); PyModule_AddObject(m, OFFSET_STRING(f), offset); 15 | 16 | /* Py_DEBUG implies Py_TRACE_REFS. */ 17 | #if defined(Py_DEBUG) && !defined(Py_TRACE_REFS) 18 | #define Py_TRACE_REFS 19 | #endif 20 | 21 | /* Py_TRACE_REFS implies Py_REF_DEBUG. */ 22 | #if defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) 23 | #define Py_REF_DEBUG 24 | #endif 25 | 26 | 27 | static PyObject *StructsError; 28 | static PyObject *offset; 29 | 30 | static PyMethodDef StructsMethods[] = { 31 | 32 | {NULL, NULL, 0, NULL} /* Sentinel */ 33 | }; 34 | 35 | 36 | 37 | static void 38 | load_PyObject(PyObject* module) 39 | { 40 | #ifdef Py_TRACE_REFS 41 | OFFSET(module,PyObject,_ob_next); 42 | OFFSET(module,PyObject,_ob_prev); 43 | #endif 44 | OFFSET(module,PyObject,ob_refcnt); 45 | OFFSET(module,PyObject,ob_type); 46 | } 47 | 48 | static void 49 | load_PyVarObject(PyObject* module) 50 | { 51 | load_PyObject(module); 52 | OFFSET(module,PyVarObject,ob_size); 53 | } 54 | 55 | 56 | 57 | 58 | 59 | /* Generated from file c:\Python24\include\abstract.h */ 60 | 61 | 62 | 63 | 64 | /* Generated from file c:\Python24\include\bitset.h */ 65 | 66 | 67 | 68 | 69 | /* Generated from file c:\Python24\include\boolobject.h */ 70 | 71 | 72 | 73 | 74 | /* Generated from file c:\Python24\include\bufferobject.h */ 75 | 76 | 77 | 78 | 79 | /* Generated from file c:\Python24\include\cellobject.h */ 80 | 81 | 82 | static void 83 | load_PyCellObject(PyObject *structs) 84 | { 85 | PyObject *sm = Py_InitModule("PyCellObject",StructsMethods); 86 | 87 | 88 | load_PyObject(sm); 89 | OFFSET(sm,PyCellObject,ob_ref); 90 | 91 | 92 | Py_INCREF(sm); 93 | PyModule_AddObject(structs,"PyCellObject",sm); 94 | } 95 | 96 | 97 | /* Generated from file c:\Python24\include\ceval.h */ 98 | 99 | 100 | 101 | 102 | /* Generated from file c:\Python24\include\classobject.h */ 103 | 104 | 105 | static void 106 | load_PyClassObject(PyObject *structs) 107 | { 108 | PyObject *sm = Py_InitModule("PyClassObject",StructsMethods); 109 | 110 | 111 | load_PyObject(sm); 112 | OFFSET(sm,PyClassObject,cl_bases); 113 | OFFSET(sm,PyClassObject,cl_dict); 114 | OFFSET(sm,PyClassObject,cl_name); 115 | OFFSET(sm,PyClassObject,cl_getattr); 116 | OFFSET(sm,PyClassObject,cl_setattr); 117 | OFFSET(sm,PyClassObject,cl_delattr); 118 | 119 | 120 | Py_INCREF(sm); 121 | PyModule_AddObject(structs,"PyClassObject",sm); 122 | } 123 | static void 124 | load_PyInstanceObject(PyObject *structs) 125 | { 126 | PyObject *sm = Py_InitModule("PyInstanceObject",StructsMethods); 127 | 128 | 129 | load_PyObject(sm); 130 | OFFSET(sm,PyInstanceObject,in_class); 131 | OFFSET(sm,PyInstanceObject,in_dict); 132 | OFFSET(sm,PyInstanceObject,in_weakreflist); 133 | 134 | 135 | Py_INCREF(sm); 136 | PyModule_AddObject(structs,"PyInstanceObject",sm); 137 | } 138 | static void 139 | load_PyMethodObject(PyObject *structs) 140 | { 141 | PyObject *sm = Py_InitModule("PyMethodObject",StructsMethods); 142 | 143 | 144 | load_PyObject(sm); 145 | OFFSET(sm,PyMethodObject,im_func); 146 | OFFSET(sm,PyMethodObject,im_self); 147 | OFFSET(sm,PyMethodObject,im_class); 148 | OFFSET(sm,PyMethodObject,im_weakreflist); 149 | 150 | 151 | Py_INCREF(sm); 152 | PyModule_AddObject(structs,"PyMethodObject",sm); 153 | } 154 | 155 | 156 | /* Generated from file c:\Python24\include\cobject.h */ 157 | 158 | 159 | 160 | 161 | /* Generated from file c:\Python24\include\codecs.h */ 162 | 163 | 164 | 165 | 166 | /* Generated from file c:\Python24\include\compile.h */ 167 | 168 | 169 | static void 170 | load_PyCodeObject(PyObject *structs) 171 | { 172 | PyObject *sm = Py_InitModule("PyCodeObject",StructsMethods); 173 | 174 | 175 | load_PyObject(sm); 176 | OFFSET(sm,PyCodeObject,co_argcount); 177 | OFFSET(sm,PyCodeObject,co_nlocals); 178 | OFFSET(sm,PyCodeObject,co_stacksize); 179 | OFFSET(sm,PyCodeObject,co_flags); 180 | OFFSET(sm,PyCodeObject,co_code); 181 | OFFSET(sm,PyCodeObject,co_consts); 182 | OFFSET(sm,PyCodeObject,co_names); 183 | OFFSET(sm,PyCodeObject,co_varnames); 184 | OFFSET(sm,PyCodeObject,co_freevars); 185 | OFFSET(sm,PyCodeObject,co_cellvars); 186 | OFFSET(sm,PyCodeObject,co_filename); 187 | OFFSET(sm,PyCodeObject,co_name); 188 | OFFSET(sm,PyCodeObject,co_firstlineno); 189 | OFFSET(sm,PyCodeObject,co_lnotab); 190 | 191 | 192 | Py_INCREF(sm); 193 | PyModule_AddObject(structs,"PyCodeObject",sm); 194 | } 195 | /* Skipping struct PyFutureFeatures, not a PyObject based struct */ 196 | 197 | 198 | /* Generated from file c:\Python24\include\complexobject.h */ 199 | 200 | 201 | /* Skipping struct Py_complex, not a PyObject based struct */ 202 | static void 203 | load_PyComplexObject(PyObject *structs) 204 | { 205 | PyObject *sm = Py_InitModule("PyComplexObject",StructsMethods); 206 | 207 | 208 | load_PyObject(sm); 209 | OFFSET(sm,PyComplexObject,cval); 210 | 211 | 212 | Py_INCREF(sm); 213 | PyModule_AddObject(structs,"PyComplexObject",sm); 214 | } 215 | 216 | 217 | /* Generated from file c:\Python24\include\cStringIO.h */ 218 | 219 | 220 | 221 | 222 | /* Generated from file c:\Python24\include\descrobject.h */ 223 | 224 | 225 | /* Skipping struct PyGetSetDef, not a PyObject based struct */ 226 | /* Skipping struct PyDescrObject, not a PyObject based struct */ 227 | /* Skipping struct PyMethodDescrObject, not a PyObject based struct */ 228 | /* Skipping struct PyMemberDescrObject, not a PyObject based struct */ 229 | /* Skipping struct PyGetSetDescrObject, not a PyObject based struct */ 230 | /* Skipping struct PyWrapperDescrObject, not a PyObject based struct */ 231 | 232 | 233 | /* Generated from file c:\Python24\include\dictobject.h */ 234 | 235 | 236 | /* Skipping struct PyDictEntry, not a PyObject based struct */ 237 | 238 | 239 | /* Generated from file c:\Python24\include\enumobject.h */ 240 | 241 | 242 | 243 | 244 | /* Generated from file c:\Python24\include\errcode.h */ 245 | 246 | 247 | 248 | 249 | /* Generated from file c:\Python24\include\eval.h */ 250 | 251 | 252 | 253 | 254 | /* Generated from file c:\Python24\include\fileobject.h */ 255 | 256 | 257 | static void 258 | load_PyFileObject(PyObject *structs) 259 | { 260 | PyObject *sm = Py_InitModule("PyFileObject",StructsMethods); 261 | 262 | 263 | load_PyObject(sm); 264 | OFFSET(sm,PyFileObject,f_fp); 265 | OFFSET(sm,PyFileObject,f_name); 266 | OFFSET(sm,PyFileObject,f_mode); 267 | OFFSET(sm,PyFileObject,f_close); 268 | OFFSET(sm,PyFileObject,f_softspace); 269 | OFFSET(sm,PyFileObject,f_binary); 270 | OFFSET(sm,PyFileObject,f_buf); 271 | OFFSET(sm,PyFileObject,f_bufend); 272 | OFFSET(sm,PyFileObject,f_bufptr); 273 | OFFSET(sm,PyFileObject,f_setbuf); 274 | OFFSET(sm,PyFileObject,f_univ_newline); 275 | OFFSET(sm,PyFileObject,f_newlinetypes); 276 | OFFSET(sm,PyFileObject,f_skipnextlf); 277 | OFFSET(sm,PyFileObject,f_encoding); 278 | OFFSET(sm,PyFileObject,weakreflist); 279 | 280 | 281 | Py_INCREF(sm); 282 | PyModule_AddObject(structs,"PyFileObject",sm); 283 | } 284 | 285 | 286 | /* Generated from file c:\Python24\include\floatobject.h */ 287 | 288 | 289 | static void 290 | load_PyFloatObject(PyObject *structs) 291 | { 292 | PyObject *sm = Py_InitModule("PyFloatObject",StructsMethods); 293 | 294 | 295 | load_PyObject(sm); 296 | OFFSET(sm,PyFloatObject,ob_fval); 297 | 298 | 299 | Py_INCREF(sm); 300 | PyModule_AddObject(structs,"PyFloatObject",sm); 301 | } 302 | 303 | 304 | /* Generated from file c:\Python24\include\frameobject.h */ 305 | 306 | 307 | /* Skipping struct PyTryBlock, not a PyObject based struct */ 308 | /* Skipping object PyFrameObject */ 309 | 310 | 311 | /* Generated from file c:\Python24\include\funcobject.h */ 312 | 313 | 314 | static void 315 | load_PyFunctionObject(PyObject *structs) 316 | { 317 | PyObject *sm = Py_InitModule("PyFunctionObject",StructsMethods); 318 | 319 | 320 | load_PyObject(sm); 321 | OFFSET(sm,PyFunctionObject,func_code); 322 | OFFSET(sm,PyFunctionObject,func_globals); 323 | OFFSET(sm,PyFunctionObject,func_defaults); 324 | OFFSET(sm,PyFunctionObject,func_closure); 325 | OFFSET(sm,PyFunctionObject,func_doc); 326 | OFFSET(sm,PyFunctionObject,func_name); 327 | OFFSET(sm,PyFunctionObject,func_dict); 328 | OFFSET(sm,PyFunctionObject,func_weakreflist); 329 | OFFSET(sm,PyFunctionObject,func_module); 330 | 331 | 332 | Py_INCREF(sm); 333 | PyModule_AddObject(structs,"PyFunctionObject",sm); 334 | } 335 | 336 | 337 | /* Generated from file c:\Python24\include\genobject.h */ 338 | 339 | 340 | static void 341 | load_PyGenObject(PyObject *structs) 342 | { 343 | PyObject *sm = Py_InitModule("PyGenObject",StructsMethods); 344 | 345 | 346 | load_PyObject(sm); 347 | OFFSET(sm,PyGenObject,gi_frame); 348 | OFFSET(sm,PyGenObject,gi_running); 349 | OFFSET(sm,PyGenObject,gi_weakreflist); 350 | 351 | 352 | Py_INCREF(sm); 353 | PyModule_AddObject(structs,"PyGenObject",sm); 354 | } 355 | 356 | 357 | /* Generated from file c:\Python24\include\graminit.h */ 358 | 359 | 360 | 361 | 362 | /* Generated from file c:\Python24\include\grammar.h */ 363 | 364 | 365 | /* Skipping struct label, not a PyObject based struct */ 366 | /* Skipping struct labellist, not a PyObject based struct */ 367 | /* Skipping struct arc, not a PyObject based struct */ 368 | /* Skipping struct state, not a PyObject based struct */ 369 | /* Skipping struct dfa, not a PyObject based struct */ 370 | /* Skipping struct grammar, not a PyObject based struct */ 371 | 372 | 373 | /* Generated from file c:\Python24\include\import.h */ 374 | 375 | 376 | 377 | 378 | /* Generated from file c:\Python24\include\intobject.h */ 379 | 380 | 381 | static void 382 | load_PyIntObject(PyObject *structs) 383 | { 384 | PyObject *sm = Py_InitModule("PyIntObject",StructsMethods); 385 | 386 | 387 | load_PyObject(sm); 388 | OFFSET(sm,PyIntObject,ob_ival); 389 | 390 | 391 | Py_INCREF(sm); 392 | PyModule_AddObject(structs,"PyIntObject",sm); 393 | } 394 | 395 | 396 | /* Generated from file c:\Python24\include\intrcheck.h */ 397 | 398 | 399 | 400 | 401 | /* Generated from file c:\Python24\include\iterobject.h */ 402 | 403 | 404 | 405 | 406 | /* Generated from file c:\Python24\include\listobject.h */ 407 | 408 | 409 | static void 410 | load_PyListObject(PyObject *structs) 411 | { 412 | PyObject *sm = Py_InitModule("PyListObject",StructsMethods); 413 | 414 | 415 | load_PyVarObject(sm); 416 | OFFSET(sm,PyListObject,ob_item); 417 | OFFSET(sm,PyListObject,allocated); 418 | 419 | 420 | Py_INCREF(sm); 421 | PyModule_AddObject(structs,"PyListObject",sm); 422 | } 423 | 424 | 425 | /* Generated from file c:\Python24\include\longintrepr.h */ 426 | 427 | 428 | 429 | 430 | /* Generated from file c:\Python24\include\longobject.h */ 431 | 432 | 433 | 434 | 435 | /* Generated from file c:\Python24\include\marshal.h */ 436 | 437 | 438 | 439 | 440 | /* Generated from file c:\Python24\include\metagrammar.h */ 441 | 442 | 443 | 444 | 445 | /* Generated from file c:\Python24\include\methodobject.h */ 446 | 447 | 448 | /* Skipping struct PyMethodChain, not a PyObject based struct */ 449 | static void 450 | load_PyCFunctionObject(PyObject *structs) 451 | { 452 | PyObject *sm = Py_InitModule("PyCFunctionObject",StructsMethods); 453 | 454 | 455 | load_PyObject(sm); 456 | OFFSET(sm,PyCFunctionObject,m_ml); 457 | OFFSET(sm,PyCFunctionObject,m_self); 458 | OFFSET(sm,PyCFunctionObject,m_module); 459 | 460 | 461 | Py_INCREF(sm); 462 | PyModule_AddObject(structs,"PyCFunctionObject",sm); 463 | } 464 | 465 | 466 | /* Generated from file c:\Python24\include\modsupport.h */ 467 | 468 | 469 | 470 | 471 | /* Generated from file c:\Python24\include\moduleobject.h */ 472 | 473 | 474 | 475 | 476 | /* Generated from file c:\Python24\include\node.h */ 477 | 478 | 479 | /* Skipping struct node, not a PyObject based struct */ 480 | 481 | 482 | /* Generated from file c:\Python24\include\object.h */ 483 | 484 | 485 | /* Skipping object PyObject */ 486 | /* Skipping object PyVarObject */ 487 | /* Skipping struct PyNumberMethods, not a PyObject based struct */ 488 | /* Skipping struct PySequenceMethods, not a PyObject based struct */ 489 | /* Skipping struct PyMappingMethods, not a PyObject based struct */ 490 | /* Skipping struct PyBufferProcs, not a PyObject based struct */ 491 | static void 492 | load_PyTypeObject(PyObject *structs) 493 | { 494 | PyObject *sm = Py_InitModule("PyTypeObject",StructsMethods); 495 | 496 | 497 | load_PyVarObject(sm); 498 | OFFSET(sm,PyTypeObject,tp_name); 499 | OFFSET(sm,PyTypeObject,tp_basicsize); 500 | OFFSET(sm,PyTypeObject,tp_itemsize); 501 | OFFSET(sm,PyTypeObject,tp_dealloc); 502 | OFFSET(sm,PyTypeObject,tp_print); 503 | OFFSET(sm,PyTypeObject,tp_getattr); 504 | OFFSET(sm,PyTypeObject,tp_setattr); 505 | OFFSET(sm,PyTypeObject,tp_compare); 506 | OFFSET(sm,PyTypeObject,tp_repr); 507 | OFFSET(sm,PyTypeObject,tp_as_number); 508 | OFFSET(sm,PyTypeObject,tp_as_sequence); 509 | OFFSET(sm,PyTypeObject,tp_as_mapping); 510 | OFFSET(sm,PyTypeObject,tp_hash); 511 | OFFSET(sm,PyTypeObject,tp_call); 512 | OFFSET(sm,PyTypeObject,tp_str); 513 | OFFSET(sm,PyTypeObject,tp_getattro); 514 | OFFSET(sm,PyTypeObject,tp_setattro); 515 | OFFSET(sm,PyTypeObject,tp_as_buffer); 516 | OFFSET(sm,PyTypeObject,tp_flags); 517 | OFFSET(sm,PyTypeObject,tp_doc); 518 | OFFSET(sm,PyTypeObject,tp_traverse); 519 | OFFSET(sm,PyTypeObject,tp_clear); 520 | OFFSET(sm,PyTypeObject,tp_richcompare); 521 | OFFSET(sm,PyTypeObject,tp_weaklistoffset); 522 | OFFSET(sm,PyTypeObject,tp_iter); 523 | OFFSET(sm,PyTypeObject,tp_iternext); 524 | OFFSET(sm,PyTypeObject,tp_methods); 525 | OFFSET(sm,PyTypeObject,tp_members); 526 | OFFSET(sm,PyTypeObject,tp_getset); 527 | OFFSET(sm,PyTypeObject,tp_base); 528 | OFFSET(sm,PyTypeObject,tp_dict); 529 | OFFSET(sm,PyTypeObject,tp_descr_get); 530 | OFFSET(sm,PyTypeObject,tp_descr_set); 531 | OFFSET(sm,PyTypeObject,tp_dictoffset); 532 | OFFSET(sm,PyTypeObject,tp_init); 533 | OFFSET(sm,PyTypeObject,tp_alloc); 534 | OFFSET(sm,PyTypeObject,tp_new); 535 | OFFSET(sm,PyTypeObject,tp_free); 536 | OFFSET(sm,PyTypeObject,tp_is_gc); 537 | OFFSET(sm,PyTypeObject,tp_bases); 538 | OFFSET(sm,PyTypeObject,tp_mro); 539 | OFFSET(sm,PyTypeObject,tp_cache); 540 | OFFSET(sm,PyTypeObject,tp_subclasses); 541 | OFFSET(sm,PyTypeObject,tp_weaklist); 542 | OFFSET(sm,PyTypeObject,tp_del); 543 | #ifdef COUNT_ALLOCS 544 | OFFSET(sm,PyTypeObject,tp_allocs); 545 | OFFSET(sm,PyTypeObject,tp_frees); 546 | OFFSET(sm,PyTypeObject,tp_maxalloc); 547 | OFFSET(sm,PyTypeObject,tp_next); 548 | #endif 549 | 550 | 551 | Py_INCREF(sm); 552 | PyModule_AddObject(structs,"PyTypeObject",sm); 553 | } 554 | /* Skipping struct PyHeapTypeObject, not a PyObject based struct */ 555 | 556 | 557 | /* Generated from file c:\Python24\include\objimpl.h */ 558 | 559 | 560 | 561 | 562 | /* Generated from file c:\Python24\include\opcode.h */ 563 | 564 | 565 | 566 | 567 | /* Generated from file c:\Python24\include\osdefs.h */ 568 | 569 | 570 | 571 | 572 | /* Generated from file c:\Python24\include\parsetok.h */ 573 | 574 | 575 | /* Skipping struct perrdetail, not a PyObject based struct */ 576 | 577 | 578 | /* Generated from file c:\Python24\include\patchlevel.h */ 579 | 580 | 581 | 582 | 583 | /* Generated from file c:\Python24\include\pgen.h */ 584 | 585 | 586 | 587 | 588 | /* Generated from file c:\Python24\include\pgenheaders.h */ 589 | 590 | 591 | 592 | 593 | /* Generated from file c:\Python24\include\pyconfig.h */ 594 | 595 | 596 | 597 | 598 | /* Generated from file c:\Python24\include\pydebug.h */ 599 | 600 | 601 | 602 | 603 | /* Generated from file c:\Python24\include\pyerrors.h */ 604 | 605 | 606 | 607 | 608 | /* Generated from file c:\Python24\include\pyfpe.h */ 609 | 610 | 611 | 612 | 613 | /* Generated from file c:\Python24\include\pygetopt.h */ 614 | 615 | 616 | 617 | 618 | /* Generated from file c:\Python24\include\pymactoolbox.h */ 619 | 620 | 621 | 622 | 623 | /* Generated from file c:\Python24\include\pymem.h */ 624 | 625 | 626 | 627 | 628 | /* Generated from file c:\Python24\include\pyport.h */ 629 | 630 | 631 | /* Skipping struct fd_set, not a PyObject based struct */ 632 | 633 | 634 | /* Generated from file c:\Python24\include\pystate.h */ 635 | 636 | 637 | /* Skipping struct PyInterpreterState, not a PyObject based struct */ 638 | /* Skipping struct PyThreadState, not a PyObject based struct */ 639 | 640 | 641 | /* Generated from file c:\Python24\include\pystrtod.h */ 642 | 643 | 644 | 645 | 646 | /* Generated from file c:\Python24\include\Python.h */ 647 | 648 | 649 | 650 | 651 | /* Generated from file c:\Python24\include\pythonrun.h */ 652 | 653 | 654 | /* Skipping struct PyCompilerFlags, not a PyObject based struct */ 655 | 656 | 657 | /* Generated from file c:\Python24\include\pythread.h */ 658 | 659 | 660 | 661 | 662 | /* Generated from file c:\Python24\include\rangeobject.h */ 663 | 664 | 665 | 666 | 667 | /* Generated from file c:\Python24\include\setobject.h */ 668 | 669 | 670 | static void 671 | load_PySetObject(PyObject *structs) 672 | { 673 | PyObject *sm = Py_InitModule("PySetObject",StructsMethods); 674 | 675 | 676 | load_PyObject(sm); 677 | OFFSET(sm,PySetObject,table); 678 | OFFSET(sm,PySetObject,hash); 679 | OFFSET(sm,PySetObject,weakreflist); 680 | 681 | 682 | Py_INCREF(sm); 683 | PyModule_AddObject(structs,"PySetObject",sm); 684 | } 685 | 686 | 687 | /* Generated from file c:\Python24\include\sliceobject.h */ 688 | 689 | 690 | static void 691 | load_PySliceObject(PyObject *structs) 692 | { 693 | PyObject *sm = Py_InitModule("PySliceObject",StructsMethods); 694 | 695 | 696 | load_PyObject(sm); 697 | OFFSET(sm,PySliceObject,start); 698 | OFFSET(sm,PySliceObject,stop); 699 | OFFSET(sm,PySliceObject,step); 700 | 701 | 702 | Py_INCREF(sm); 703 | PyModule_AddObject(structs,"PySliceObject",sm); 704 | } 705 | 706 | 707 | /* Generated from file c:\Python24\include\stringobject.h */ 708 | 709 | 710 | static void 711 | load_PyStringObject(PyObject *structs) 712 | { 713 | PyObject *sm = Py_InitModule("PyStringObject",StructsMethods); 714 | 715 | 716 | load_PyVarObject(sm); 717 | OFFSET(sm,PyStringObject,ob_shash); 718 | OFFSET(sm,PyStringObject,ob_sstate); 719 | OFFSET(sm,PyStringObject,ob_sval); 720 | 721 | 722 | Py_INCREF(sm); 723 | PyModule_AddObject(structs,"PyStringObject",sm); 724 | } 725 | 726 | 727 | /* Generated from file c:\Python24\include\structmember.h */ 728 | 729 | 730 | /* Skipping struct PyMemberDef, not a PyObject based struct */ 731 | 732 | 733 | /* Generated from file c:\Python24\include\sysmodule.h */ 734 | 735 | 736 | 737 | 738 | /* Generated from file c:\Python24\include\timefuncs.h */ 739 | 740 | 741 | 742 | 743 | /* Generated from file c:\Python24\include\token.h */ 744 | 745 | 746 | 747 | 748 | /* Generated from file c:\Python24\include\traceback.h */ 749 | 750 | 751 | static void 752 | load_PyTracebackObject(PyObject *structs) 753 | { 754 | PyObject *sm = Py_InitModule("PyTracebackObject",StructsMethods); 755 | 756 | 757 | load_PyObject(sm); 758 | OFFSET(sm,PyTracebackObject,tb_next); 759 | OFFSET(sm,PyTracebackObject,tb_frame); 760 | OFFSET(sm,PyTracebackObject,tb_lasti); 761 | OFFSET(sm,PyTracebackObject,tb_lineno); 762 | 763 | 764 | Py_INCREF(sm); 765 | PyModule_AddObject(structs,"PyTracebackObject",sm); 766 | } 767 | 768 | 769 | /* Generated from file c:\Python24\include\tupleobject.h */ 770 | 771 | 772 | static void 773 | load_PyTupleObject(PyObject *structs) 774 | { 775 | PyObject *sm = Py_InitModule("PyTupleObject",StructsMethods); 776 | 777 | 778 | load_PyVarObject(sm); 779 | OFFSET(sm,PyTupleObject,ob_item); 780 | 781 | 782 | Py_INCREF(sm); 783 | PyModule_AddObject(structs,"PyTupleObject",sm); 784 | } 785 | 786 | 787 | /* Generated from file c:\Python24\include\ucnhash.h */ 788 | 789 | 790 | /* Skipping struct _PyUnicode_Name_CAPI, not a PyObject based struct */ 791 | 792 | 793 | /* Generated from file c:\Python24\include\unicodeobject.h */ 794 | 795 | 796 | static void 797 | load_PyUnicodeObject(PyObject *structs) 798 | { 799 | PyObject *sm = Py_InitModule("PyUnicodeObject",StructsMethods); 800 | 801 | 802 | load_PyObject(sm); 803 | OFFSET(sm,PyUnicodeObject,length); 804 | OFFSET(sm,PyUnicodeObject,str); 805 | OFFSET(sm,PyUnicodeObject,hash); 806 | OFFSET(sm,PyUnicodeObject,defenc); 807 | 808 | 809 | Py_INCREF(sm); 810 | PyModule_AddObject(structs,"PyUnicodeObject",sm); 811 | } 812 | 813 | 814 | /* Generated from file c:\Python24\include\weakrefobject.h */ 815 | 816 | 817 | 818 | 819 | PyMODINIT_FUNC 820 | initstructs(void) 821 | { 822 | 823 | PyObject *m, *n, *o; 824 | /*PyObject *offset;*/ 825 | 826 | m = Py_InitModule("structs", StructsMethods); 827 | n = Py_InitModule("PyObject", StructsMethods); 828 | o = Py_InitModule("PyVarObject", StructsMethods); 829 | 830 | StructsError = PyErr_NewException("structs.StructsError", NULL, NULL); 831 | Py_INCREF(StructsError); 832 | PyModule_AddObject(m, "StructsError", StructsError); 833 | 834 | load_PyObject(n); 835 | Py_INCREF(n); 836 | PyModule_AddObject(m, "PyObject", n); 837 | 838 | load_PyVarObject(o); 839 | Py_INCREF(o); 840 | PyModule_AddObject(m, "PyVarObject", o); 841 | 842 | load_PyCellObject(m); 843 | load_PyClassObject(m); 844 | load_PyInstanceObject(m); 845 | load_PyMethodObject(m); 846 | load_PyCodeObject(m); 847 | load_PyComplexObject(m); 848 | load_PyFileObject(m); 849 | load_PyFloatObject(m); 850 | load_PyFunctionObject(m); 851 | load_PyGenObject(m); 852 | load_PyIntObject(m); 853 | load_PyListObject(m); 854 | load_PyCFunctionObject(m); 855 | load_PyTypeObject(m); 856 | load_PySetObject(m); 857 | load_PySliceObject(m); 858 | load_PyStringObject(m); 859 | load_PyTracebackObject(m); 860 | load_PyTupleObject(m); 861 | load_PyUnicodeObject(m); 862 | 863 | } 864 | -------------------------------------------------------------------------------- /structs/structs.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 8.00 2 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "structs", "structs.vcproj", "{53BBB28A-F748-4842-9E2C-33F3BB4609D6}" 3 | ProjectSection(ProjectDependencies) = postProject 4 | EndProjectSection 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfiguration) = preSolution 8 | Debug = Debug 9 | Release = Release 10 | EndGlobalSection 11 | GlobalSection(ProjectConfiguration) = postSolution 12 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Debug.ActiveCfg = Debug|Win32 13 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Debug.Build.0 = Debug|Win32 14 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Release.ActiveCfg = Release|Win32 15 | {53BBB28A-F748-4842-9E2C-33F3BB4609D6}.Release.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(ExtensibilityGlobals) = postSolution 18 | EndGlobalSection 19 | GlobalSection(ExtensibilityAddIns) = postSolution 20 | EndGlobalSection 21 | EndGlobal 22 | -------------------------------------------------------------------------------- /structs/structs.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 73 | 76 | 79 | 82 | 85 | 88 | 91 | 94 | 95 | 103 | 106 | 109 | 112 | 115 | 118 | 127 | 130 | 133 | 136 | 148 | 151 | 154 | 157 | 160 | 163 | 166 | 169 | 170 | 171 | 172 | 173 | 174 | 179 | 182 | 183 | 184 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /test/.cvsignore: -------------------------------------------------------------------------------- 1 | *.pyc -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grant-olson/pyasm/87ac92bffba380cbb4358af2e4134a1d6a5db653/test/__init__.py -------------------------------------------------------------------------------- /test/disasmHelloWorld.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2005 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86disasm import x86Block,x86Disassembler 6 | 7 | """ 8 | We could probably write a more proper unittest, but at this point the output 9 | isn't perfect. I'm happy if it runs without crashing. 10 | 11 | Here is the output for a release build of hello world from dumpbin: 12 | 13 | _main: 14 | 00000000: 55 push ebp 15 | 00000001: 8B EC mov ebp,esp 16 | 00000003: 83 EC 40 sub esp,40h 17 | 00000006: 53 push ebx 18 | 00000007: 56 push esi 19 | 00000008: 57 push edi 20 | 00000009: 8D 7D C0 lea edi,[ebp-40h] 21 | 0000000C: B9 10 00 00 00 mov ecx,10h 22 | 00000011: B8 CC CC CC CC mov eax,0CCCCCCCCh 23 | 00000016: F3 AB rep stos dword ptr [edi] 24 | 00000018: 68 00 00 00 00 push offset _main 25 | 0000001D: E8 00 00 00 00 call 00000022 26 | 00000022: 83 C4 04 add esp,4 27 | 00000025: 33 C0 xor eax,eax 28 | 00000027: 5F pop edi 29 | 00000028: 5E pop esi 30 | 00000029: 5B pop ebx 31 | 0000002A: 83 C4 40 add esp,40h 32 | 0000002D: 3B EC cmp ebp,esp 33 | 0000002F: E8 00 00 00 00 call 00000034 34 | 00000034: 8B E5 mov esp,ebp 35 | 00000036: 5D pop ebp 36 | 00000037: C3 ret 37 | 38 | """ 39 | 40 | class test_x86disasm(unittest.TestCase): 41 | def test_release_hello_world(self): 42 | code = x86Block('h\x00\x00\x00\x00\xe8\x00\x00\x00\x00\x83\xc4\x043\xc0\xc3') 43 | dis = x86Disassembler(code) 44 | #dis.disasm() 45 | 46 | def test_debug_hello_world(self): 47 | code = x86Block('U\x8b\xec\x83\xec@SVW\x8d}\xc0\xb9\x10\x00\x00\x00\xb8\xcc\xcc\xcc\xcc\xf3\xabh\x00\x00\x00\x00\xe8\x00\x00\x00\x00\x83\xc4\x043\xc0_^[\x83\xc4@;\xec\xe8\x00\x00\x00\x00\x8b\xe5]\xc3') 48 | dis = x86Disassembler(code) 49 | #dis.disasm() 50 | 51 | if __name__ == '__main__': 52 | unittest.main() -------------------------------------------------------------------------------- /test/linkCmd.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import sys 5 | 6 | """ 7 | By default we link with the MSVC toolchain. Change 'True' to 'False' 8 | if you want to use the mingw toolchain. 9 | 10 | Thanks to Markus Lall for figuring out that (bizarrely) the mingw ld 11 | will not link coff format files, but running the object file through 12 | gcc will cause the files to link correctly. 13 | 14 | """ 15 | 16 | if True: 17 | def linkCmd(s): 18 | return "cd output && link /DEBUG /OPT:REF /OPT:ICF %s.obj" % s 19 | else: 20 | def linkCmd(s): 21 | return "cd output && gcc %s.obj -o %s.exe" % (s,s) 22 | 23 | -------------------------------------------------------------------------------- /test/output/readme.txt: -------------------------------------------------------------------------------- 1 | TortoiseCVS doesn't seem to want to add empty directories -------------------------------------------------------------------------------- /test/rawHelloWorld.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | from pyasm.coff import coffFile, coffSection, coffRelocationEntry, coffSymbolEntry 5 | from pyasm.coffConst import * 6 | import time,os,sys 7 | 8 | """Creates a simple .objfile that should be good enough to 9 | link as a hello world program""" 10 | 11 | c = coffFile() 12 | 13 | c.MachineType = coffFile.I386MAGIC 14 | 15 | s1 = coffSection() 16 | s1.Name = ".drectve" 17 | s1.Flags = (SectionFlags.LNK_REMOVE | 18 | SectionFlags.LNK_INFO | 19 | SectionFlags.ALIGN_1BYTES) 20 | s1.RawData = '-defaultlib:LIBCMT -defaultlib:OLDNAMES ' 21 | c.Sections.append(s1) 22 | 23 | s2 = coffSection() 24 | s2.Name = ".text\x00\x00\x00" 25 | s2.Flags = (SectionFlags.CNT_CODE | 26 | SectionFlags.LNK_COMDAT | 27 | SectionFlags.MEM_EXECUTE | 28 | SectionFlags.MEM_READ | 29 | SectionFlags.ALIGN_16BYTES) 30 | s2.RawData = "\x68\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x83\xC4\x04\x33\xC0\xC3" 31 | 32 | r = coffRelocationEntry(addr=0x1,sym=0xC,typ=RelocationTypes.I386_DIR32) 33 | s2.RelocationData.append(r) 34 | 35 | r= coffRelocationEntry(addr=0x6,sym=0x9,typ=RelocationTypes.I386_REL32) 36 | s2.RelocationData.append(r) 37 | 38 | c.Sections.append(s2) 39 | 40 | 41 | s3 = coffSection() 42 | s3.Name = '.data\x00\x00\x00' 43 | s3.Flags = (SectionFlags.LNK_COMDAT | 44 | SectionFlags.CNT_INITIALIZED_DATA | 45 | SectionFlags.MEM_WRITE | 46 | SectionFlags.MEM_READ | 47 | SectionFlags.ALIGN_4BYTES) 48 | s3.RawData = '\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64\x21\x0a\x00' 49 | c.Sections.append(s3) 50 | 51 | s4=coffSection() 52 | s4.Name = '.debug$F' 53 | s4.Flags = (SectionFlags.LNK_COMDAT | 54 | SectionFlags.TYPE_NO_PAD | 55 | SectionFlags.CNT_INITIALIZED_DATA | 56 | SectionFlags.MEM_DISCARDABLE | 57 | SectionFlags.MEM_READ | 58 | SectionFlags.ALIGN_1BYTES) 59 | s4.RawData = '\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00' 60 | 61 | r = coffRelocationEntry(addr=0x0,sym=0x8,typ=RelocationTypes.I386_DIR32NB) 62 | s4.RelocationData.append(r) 63 | 64 | c.Sections.append(s4) 65 | 66 | s = coffSymbolEntry('.file\x00\x00\x00',SymbolValues.SYM_UNDEFINED,-2, 67 | SymbolTypes.NULL, SymbolClass.CLASS_FILE) 68 | s.Auxiliaries = 'C:\\objtest\\objtest\\objtest.cpp\x00\x00\x00\x00\x00\x00' 69 | c.Symbols.append(s) 70 | 71 | s = coffSymbolEntry('@comp.id',0xB2306, -1, SymbolTypes.NULL, SymbolClass.STATIC) 72 | c.Symbols.append(s) 73 | 74 | s = coffSymbolEntry('.drectve', SymbolValues.SYM_UNDEFINED, 1, SymbolTypes.NULL, 75 | SymbolClass.STATIC) 76 | s.Auxiliaries = '&\x00\x00\x00\x00\x00\x00\x00O\xe0\xad\x98\x00\x00\x00\x00\x00\x00' 77 | c.Symbols.append(s) 78 | 79 | s = coffSymbolEntry('.text\x00\x00\x00', SymbolValues.SYM_UNDEFINED, 2, 80 | SymbolTypes.NULL, SymbolClass.STATIC) 81 | s.Auxiliaries = "\x10\x00\x00\x00\x02\x00\x00\x00\x9d\xf0\xcd3\x00\x00\x01\x00\x00\x00" 82 | c.Symbols.append(s) 83 | 84 | s = coffSymbolEntry('_main\x00\x00\x00', SymbolValues.SYM_UNDEFINED, 2, 0x20, 85 | SymbolClass.EXTERNAL) 86 | c.Symbols.append(s) 87 | 88 | s = coffSymbolEntry('_printf\x00', SymbolValues.SYM_UNDEFINED, 0, 0x20, 89 | SymbolClass.EXTERNAL) 90 | c.Symbols.append(s) 91 | 92 | s = coffSymbolEntry('.data\x00\x00\x00', SymbolValues.SYM_UNDEFINED, 3, 93 | SymbolTypes.NULL, SymbolClass.STATIC) 94 | s.Auxiliaries = '\x0e\x00\x00\x00\x00\x00\x00\x00\xfe,\xa6\xfb\x00\x00\x02\x00\x00\x00' 95 | c.Symbols.append(s) 96 | 97 | s = coffSymbolEntry('\x00\x00\x00\x00\x04\x00\x00\x00', SymbolValues.SYM_UNDEFINED, 3, 98 | SymbolTypes.NULL, SymbolClass.EXTERNAL) 99 | c.Symbols.append(s) 100 | 101 | s = coffSymbolEntry('.debug$F', SymbolValues.SYM_UNDEFINED, 4, SymbolTypes.NULL, 102 | SymbolClass.STATIC) 103 | s.Auxiliaries = '\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x05\x00\x00\x00' 104 | c.Symbols.append(s) 105 | 106 | c.StringTable = '??_C@_0O@FEEI@Hello?5World?$CB?6?$AA@\x00' 107 | 108 | c.SetSizes() 109 | c.SetOffsets() 110 | 111 | #c.DumpInfo() 112 | 113 | f = file("output/rawHelloWorld.obj","wb") 114 | c.WriteToFile(f) 115 | f.close() 116 | 117 | if sys.platform == 'win32': 118 | os.system("cd output && link rawHelloWorld.obj") 119 | os.system("cd output && rawHelloWorld.exe") 120 | else: 121 | print "Skipping linker test, coff files are only valid on win32 platforms" 122 | 123 | -------------------------------------------------------------------------------- /test/test_bugs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86inst import longToString 6 | from pyasm.x86asm import assembler, x86asmError 7 | 8 | class test_long_to_string(unittest.TestCase): 9 | def test_positive(self): 10 | self.assertEquals(longToString(0x40),'\x40\x00\x00\x00') 11 | self.assertEquals(longToString(0x40,1),'\x40') 12 | 13 | def test_negative(self): 14 | self.assertEquals(longToString(-0x40),'\xc0\xff\xff\xff') 15 | self.assertEquals(longToString(-0x40,1),'\xc0') 16 | 17 | def test_no_endproc(self): 18 | a = assembler() 19 | a("!PROC foo") 20 | a("NOP") 21 | self.failUnlessRaises(x86asmError, a.Compile) 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | 26 | -------------------------------------------------------------------------------- /test/test_directives.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | from pyasm.x86asm import assembler 5 | from pyasm.x86cpToMemory import CpToMemory 6 | 7 | a = assembler() 8 | 9 | 10 | a("!COMMENT This is a samle hello world program") 11 | a("!COMMENT by Grant") 12 | 13 | a("!CHARS hello_str 'hello world\n\0'") 14 | 15 | a("!PROC hello_world PYTHON") 16 | a("!ARG self") 17 | a("!ARG args") 18 | #a(" INT 3") 19 | a(" PUSH hello_str") 20 | a(" CALL PySys_WriteStdout") 21 | a(" ADD ESP,0x4") #CDECL 22 | a(" MOV EAX,%s" % id(None)) 23 | a(" ADD [EAX], 0x1") 24 | a("!ENDPROC") 25 | 26 | cp = a.Compile() 27 | mem = CpToMemory(cp) 28 | 29 | mem.MakeMemory() 30 | mem.BindPythonFunctions(globals()) 31 | 32 | hello_world() 33 | 34 | -------------------------------------------------------------------------------- /test/test_linker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Test various variables, parameters and constants in procedures 6 | """ 7 | 8 | from pyasm.x86asm import assembler, CDECL 9 | from pyasm.x86cpToCoff import CpToCoff 10 | import unittest 11 | import os,sys 12 | from linkCmd import linkCmd 13 | 14 | """ 15 | Hopefully this will fix itself when I get all the proper coff entries in place 16 | """ 17 | 18 | class test_linker(unittest.TestCase): 19 | def test_linker(self): 20 | """ 21 | Make sure params get refrennced correctly 22 | """ 23 | a = assembler() 24 | 25 | a.ADStr('hello_planets','3h + 12h + 12h = %xh\n\0') 26 | 27 | a.AP("_main") 28 | a.AI("PUSH EBX") 29 | a.AI("MOV EBX,0x12") 30 | a.AI("PUSH EBX") 31 | a.AI("MOV EBX,0x3") 32 | a.AI("PUSH EBX") 33 | a.AI("CALL get_x_plus_two_y") 34 | a.AI("PUSH EAX") 35 | a.AI("PUSH hello_planets") 36 | a.AI("CALL _printf") 37 | a.AI("ADD ESP,0x8") #printf is _cdecl 38 | #a.AI("XOR EAX,EAX") 39 | a.AI("POP EBX") 40 | a.EP() 41 | 42 | #get_planets proc 43 | a.AP("get_x_plus_two_y") 44 | a.AA("x") 45 | a.AA("y") 46 | a.AI("XOR EAX,EAX") 47 | a.AI("MOV EAX,x") 48 | a.AI("ADD EAX,y") 49 | a.AI("ADD EAX,y") 50 | a.EP() 51 | 52 | cp = a.Compile() 53 | 54 | coff = CpToCoff(cp,"-defaultlib:LIBMT -defaultlib:OLDNAMES ").makeReleaseCoff() 55 | f = file("output/testLinker.obj","wb") 56 | coff.WriteToFile(f) 57 | f.close() 58 | 59 | if sys.platform == "win32": 60 | self.assertEquals(os.system(linkCmd("testLinker")), 0) 61 | self.assertEquals(os.popen("cd output && testLinker.exe").read(), 62 | "3h + 12h + 12h = 27h\n") 63 | else: 64 | print "Skipping linker test, coff files are only valid on win32 platforms" 65 | 66 | 67 | if __name__ == '__main__': 68 | unittest.main() 69 | 70 | -------------------------------------------------------------------------------- /test/test_object_creation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Test the whole chain to a .obj file for a simple 'hello world' 6 | assembly app. 7 | 8 | Selected output from dumpbin against the app build in visual studio: 9 | 10 | _main: 11 | 00000000: 55 push ebp 12 | 00000001: 8B EC mov ebp,esp 13 | 00000003: 83 EC 40 sub esp,40h 14 | 00000006: 53 push ebx 15 | 00000007: 56 push esi 16 | 00000008: 57 push edi 17 | 00000009: 8D 7D C0 lea edi,[ebp-40h] 18 | 0000000C: B9 10 00 00 00 mov ecx,10h 19 | 00000011: B8 CC CC CC CC mov eax,0CCCCCCCCh 20 | 00000016: F3 AB rep stos dword ptr [edi] 21 | 00000018: 68 00 00 00 00 push offset _main 22 | 0000001D: E8 00 00 00 00 call 00000022 23 | 00000022: 83 C4 04 add esp,4 24 | 00000025: 33 C0 xor eax,eax 25 | 00000027: 5F pop edi 26 | 00000028: 5E pop esi 27 | 00000029: 5B pop ebx 28 | 0000002A: 83 C4 40 add esp,40h 29 | 0000002D: 3B EC cmp ebp,esp 30 | 0000002F: E8 00 00 00 00 call 00000034 31 | 00000034: 8B E5 mov esp,ebp 32 | 00000036: 5D pop ebp 33 | 00000037: C3 ret 34 | 35 | """ 36 | 37 | from pyasm.x86asm import assembler 38 | from pyasm.x86cpToCoff import CpToCoff 39 | import unittest 40 | import os,sys 41 | from linkCmd import linkCmd 42 | 43 | class test_object_creation(unittest.TestCase): 44 | 45 | def test_hello_world(self): 46 | a = assembler() 47 | 48 | a.ADStr('hello_world','Hello, World\n\0') 49 | a.AIL("_main") 50 | a.AI("PUSH EBP") 51 | a.AI("MOV EBP,ESP") 52 | a.AI("SUB ESP,0x40") 53 | a.AI("PUSH EBX") 54 | a.AI("PUSH ESI") 55 | a.AI("PUSH EDI") 56 | a.AI("LEA EDI,[EBP-0x40]") 57 | a.AI("MOV ECX,0x10") 58 | a.AI("MOV EAX,0x0CCCCCCCC") 59 | a.AI("REP STOS [EDI]") 60 | a.AI("PUSH hello_world") 61 | a.AI("CALL _printf") 62 | a.AI("ADD ESP,4") 63 | a.AI("XOR EAX,EAX") 64 | a.AI("POP EDI") 65 | a.AI("POP ESI") 66 | a.AI("POP EBX") 67 | a.AI("ADD ESP,0x40") 68 | a.AI("CMP EBP,ESP") 69 | a.AI("CALL __chkesp") 70 | a.AI("MOV ESP,EBP") 71 | a.AI("POP EBP") 72 | a.AI("RET") 73 | a.ADStr("goodbye_world", "GOODBYE WORLD!\n\0") 74 | 75 | cp = a.Compile() 76 | 77 | self.assertEquals(cp.Code,'U\x8b\xec\x81\xec@\x00\x00\x00SVW\x8d}\xc0\xb9\x10\x00\x00\x00\xb8\xcc\xcc\xcc\xcc\xf3\xabh\x00\x00\x00\x00\xe8\x00\x00\x00\x00\x83\xc4\x043\xc0_^[\x81\xc4@\x00\x00\x00;\xec\xe8\x00\x00\x00\x00\x8b\xe5]\xc3') 78 | self.assertEquals(cp.CodePatchins,[('hello_world', 28, 2), ('_printf', 33, 1), ('__chkesp', 54, 1)]) 79 | self.assertEquals(cp.CodeSymbols,[('_main', 0, 0)]) 80 | self.assertEquals(cp.Data,'Hello, World\n\x00GOODBYE WORLD!\n\x00') 81 | self.assertEquals(cp.DataSymbols,[('hello_world', 0), ('goodbye_world', 14)]) 82 | 83 | coff = CpToCoff(cp,"-defaultlib:LIBCPMTD -defaultlib:LIBCMTD -defaultlib:OLDNAMES ").makeReleaseCoff() 84 | 85 | f = file("output/testHelloWorld.obj","wb") 86 | coff.WriteToFile(f) 87 | f.close() 88 | 89 | if sys.platform == 'win32': 90 | self.assertEquals(os.system(linkCmd("testHelloWorld")),0) 91 | self.assertEquals(os.popen("cd output && testHelloWorld.exe").read(),"Hello, World\n") 92 | else: 93 | print "Skipping linker test, coff files are only valid on win32 platforms" 94 | 95 | def test_proc(self): 96 | a = assembler() 97 | 98 | a.ADStr('hello_world','Hello, World\n\0') 99 | a.AP("_main") 100 | a.AI("PUSH hello_world") 101 | a.AI("CALL _printf") 102 | a.AI("ADD ESP,0x4") # _cdecl cleanup 103 | a.AI("XOR EAX,EAX") 104 | a.EP() 105 | 106 | cp = a.Compile() 107 | coff = CpToCoff(cp,"-defaultlib:LIBCPMTD -defaultlib:LIBCMTD -defaultlib:OLDNAMES ").makeReleaseCoff() 108 | f = file("output/testProc.obj","wb") 109 | coff.WriteToFile(f) 110 | f.close() 111 | 112 | if sys.platform == 'win32': 113 | self.assertEquals(os.system(linkCmd("testProc")), 0) 114 | self.assertEquals(os.popen("cd output && testProc.exe").read(), "Hello, World\n") 115 | else: 116 | print "Skipping linker test, coff files are only valid on win32 platforms" 117 | 118 | def test_goodbye_world(self): 119 | """ 120 | Make sure we see the second param instead of defaulting to the first 121 | """ 122 | a = assembler() 123 | 124 | a.ADStr('hello_world','Hello, World\n\0') 125 | a.ADStr('Goodbye_World','Goodbye, World\n\0') 126 | a.AP("_main") 127 | a.AI("PUSH Goodbye_World") 128 | a.AI("CALL _printf") 129 | a.AI("ADD ESP,0x4") # _cdecl 130 | a.AI("XOR EAX,EAX") 131 | a.EP() 132 | 133 | cp = a.Compile() 134 | coff = CpToCoff(cp,"-defaultlib:LIBCPMTD -defaultlib:LIBCMTD -defaultlib:OLDNAMES ").makeReleaseCoff() 135 | f = file("output/testGoodbyeWorld.obj","wb") 136 | coff.WriteToFile(f) 137 | f.close() 138 | 139 | if sys.platform == 'win32': 140 | self.assertEquals(os.system(linkCmd("testGoodbyeWorld")), 0) 141 | self.assertEquals(os.popen("cd output &&testGoodbyeWorld.exe").read(), "Goodbye, World\n") 142 | else: 143 | print "Skipping linker test, coff files are only valid on win32 platforms" 144 | 145 | def test_two_procs(self): 146 | """ 147 | Make sure second proc gets called correctly 148 | """ 149 | a = assembler() 150 | 151 | a.ADStr('hello_planets','Hello, all %i planets!\n\0') 152 | 153 | a.AP("_main") 154 | a.AI("CALL get_planets") 155 | a.AI("PUSH EAX") 156 | a.AI("PUSH hello_planets") 157 | a.AI("CALL _printf") 158 | a.AI("ADD ESP,0x8") #printf is _cdecl 159 | a.AI("XOR EAX,EAX") 160 | a.EP() 161 | 162 | #get_planets proc 163 | a.AP("get_planets") 164 | a.AI("MOV EAX,0x12") 165 | a.EP() 166 | 167 | cp = a.Compile() 168 | 169 | coff = CpToCoff(cp,"-defaultlib:LIBCPMTD -defaultlib:LIBCMTD -defaultlib:OLDNAMES ").makeReleaseCoff() 170 | f = file("output/testTwoProcs.obj","wb") 171 | coff.WriteToFile(f) 172 | f.close() 173 | 174 | if sys.platform == 'win32': 175 | self.assertEquals(os.system(linkCmd("testTwoProcs")), 0) 176 | self.assertEquals(os.popen("cd output && testTwoProcs.exe").read(), 177 | "Hello, all 18 planets!\n") 178 | else: 179 | print "Skipping linker test, coff files are only valid on win32 platforms" 180 | 181 | if __name__ == '__main__': 182 | unittest.main() 183 | 184 | -------------------------------------------------------------------------------- /test/test_python_funcs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86asm import assembler, CDECL, STDCALL, PYTHON 6 | from pyasm.x86cpToMemory import CpToMemory 7 | 8 | class test_python_funcs(unittest.TestCase): 9 | 10 | 11 | def test_simple_function(self): 12 | a = assembler() 13 | a.ADStr("hello_world", "Hello world!\n\0") 14 | a.AP("test_print", PYTHON) 15 | a.AddLocal("self") 16 | a.AddLocal("args") 17 | #a.AI("INT 3") 18 | a.AI("PUSH hello_world") 19 | a.AI("CALL PySys_WriteStdout") 20 | a.AI("MOV EAX,%s" % id(None)) 21 | a.AI("ADD [EAX],0x1") #refcount 22 | a.EP() 23 | 24 | a.AP("test_print2", PYTHON) 25 | a.AddLocal("self") 26 | a.AddLocal("args") 27 | #a.AI("INT 3") 28 | a.AI("PUSH hello_world") 29 | a.AI("CALL PySys_WriteStdout") 30 | a.AI("MOV EAX,%s" % id(None)) 31 | a.AI("ADD [EAX],0x1") #refcount 32 | a.EP() 33 | 34 | mem = CpToMemory(a.Compile()) 35 | mem.MakeMemory() 36 | mem.BindPythonFunctions(globals()) 37 | 38 | test_print("Foo") 39 | test_print2('bar') 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | 44 | -------------------------------------------------------------------------------- /test/test_structs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.structs import PyObject, PyUnicodeObject 6 | 7 | debugOffset = 0 8 | from sys import executable 9 | if executable.endswith("_d.exe"): 10 | debugOffset = 8 11 | 12 | class test_long_to_string(unittest.TestCase): 13 | def testPyObject(self): 14 | self.assertEquals(PyObject.ob_refcnt, debugOffset+0) 15 | self.assertEquals(PyObject.ob_type, debugOffset+4) 16 | 17 | def testPyUnicodeObject(self): 18 | """ This is the last object, make sure this got generated""" 19 | 20 | self.assertEquals(PyUnicodeObject.ob_refcnt,debugOffset+0) 21 | self.assertEquals(PyUnicodeObject.ob_type, debugOffset+4) 22 | self.assertEquals(PyUnicodeObject.str, debugOffset+ 12) 23 | 24 | if __name__ == "__main__": 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /test/test_time.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | from pyasm.x86asm import assembler, CDECL 5 | from pyasm.x86cpToMemory import CpToMemory 6 | 7 | nonePointer = id(None) 8 | noneRefcount = nonePointer 9 | 10 | a = assembler() 11 | a.ADStr("hello_world", "Hello world!\n\0") 12 | a.AP("test_print", CDECL) 13 | a.AddLocal("self") 14 | a.AddLocal("args") 15 | #a.AI("INT 3") 16 | a.AI("PUSH hello_world") 17 | a.AI("CALL PySys_WriteStdout") 18 | #a.AI("INT 3") 19 | a.AI("MOV EAX,%s" % id(None)) 20 | a.AI("ADD [EAX],0x1") 21 | a.EP() 22 | 23 | 24 | mem = CpToMemory(a.Compile()) 25 | mem.MakeMemory() 26 | mem.BindPythonFunctions(globals()) 27 | 28 | def normalHelloWorld(): 29 | print "Hello World!" 30 | return None 31 | 32 | -------------------------------------------------------------------------------- /test/test_variables.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | Test various variables, parameters and constants in procedures 6 | """ 7 | 8 | from pyasm.x86asm import assembler, CDECL 9 | from pyasm.x86cpToCoff import CpToCoff 10 | import unittest 11 | import os,sys 12 | from linkCmd import linkCmd 13 | 14 | class test_variables(unittest.TestCase): 15 | def test_params(self): 16 | """ 17 | Make sure params get refrennced correctly 18 | """ 19 | a = assembler() 20 | 21 | a.ADStr('hello_planets','3h + 12h + 12h = %xh\n\0') 22 | 23 | a.AP("_main") 24 | a.AI("PUSH EBX") 25 | a.AI("MOV EBX,0x12") 26 | a.AI("PUSH EBX") 27 | a.AI("MOV EBX,0x3") 28 | a.AI("PUSH EBX") 29 | a.AI("CALL get_x_plus_two_y") 30 | a.AI("PUSH EAX") 31 | a.AI("PUSH hello_planets") 32 | a.AI("CALL _printf") 33 | a.AI("ADD ESP,0x8") #printf is _cdecl 34 | #a.AI("XOR EAX,EAX") 35 | a.AI("POP EBX") 36 | a.EP() 37 | 38 | #get_planets proc 39 | a.AP("get_x_plus_two_y") 40 | a.AA("x") 41 | a.AA("y") 42 | a.AI("XOR EAX,EAX") 43 | a.AI("MOV EAX,x") 44 | a.AI("ADD EAX,y") 45 | a.AI("ADD EAX,y") 46 | a.EP() 47 | 48 | cp = a.Compile() 49 | 50 | coff = CpToCoff(cp,"-defaultlib:LIBCMT -defaultlib:OLDNAMES ").makeReleaseCoff() 51 | f = file("output/testParams.obj","wb") 52 | coff.WriteToFile(f) 53 | f.close() 54 | 55 | if sys.platform == 'win32': 56 | self.assertEquals(os.system(linkCmd("testParams")), 0) 57 | self.assertEquals(os.popen("cd output && testParams.exe").read(), 58 | "3h + 12h + 12h = 27h\n") 59 | else: 60 | print "Skipping linker test, coff files are only valid on win32 platforms" 61 | 62 | def test_locals(self): 63 | """ 64 | Make sure params get refrennced correctly 65 | """ 66 | a = assembler() 67 | 68 | a.ADStr('hello_planets','3h + 12h + 12h = %xh\n\0') 69 | 70 | a.AP("_main") 71 | a.AI("CALL _get_x_plus_two_y") 72 | a.AI("PUSH EAX") 73 | a.AI("PUSH hello_planets") 74 | a.AI("CALL _printf") 75 | a.AI("XOR EAX,EAX") 76 | a.AI("ADD ESP,0x8") #printf is _cdecl 77 | a.EP() 78 | 79 | #get_planets proc 80 | a.AP("_get_x_plus_two_y") 81 | a.AddLocal("x") 82 | a.AddLocal("y") 83 | a.AI("MOV x,0x3") 84 | a.AI("MOV y,0x12") 85 | a.AI("XOR EAX,EAX") 86 | a.AI("MOV EAX,x") 87 | a.AI("ADD EAX,y") 88 | a.AI("ADD EAX,y") 89 | a.EP() 90 | 91 | cp = a.Compile() 92 | 93 | coff = CpToCoff(cp,"-defaultlib:LIBCMT -defaultlib:OLDNAMES ").makeReleaseCoff() 94 | f = file("output/testLocals.obj","wb") 95 | coff.WriteToFile(f) 96 | f.close() 97 | 98 | if sys.platform == 'win32': 99 | self.assertEquals(os.system(linkCmd("testLocals")), 0) 100 | self.assertEquals(os.popen("cd output && testlocals.exe").read(), 101 | "3h + 12h + 12h = 27h\n") 102 | else: 103 | print "Skipping linker test, coff files are only valid on win32 platforms" 104 | 105 | 106 | if __name__ == '__main__': 107 | unittest.main() 108 | 109 | -------------------------------------------------------------------------------- /test/test_winmem.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | import pyasm.excmem 6 | from pyasm.x86asm import assembler, CDECL, STDCALL, PYTHON 7 | from pyasm.x86cpToMemory import CpToMemory 8 | 9 | class test_excmem(unittest.TestCase): 10 | def test_simple_function(self): 11 | a = assembler() 12 | a.ADStr("hello_world", "Hello world!\n\0") 13 | a.AP("test_print", PYTHON) 14 | a.AddLocal("self") 15 | a.AddLocal("args") 16 | #a.AI("INT 3") 17 | a.AI("PUSH hello_world") 18 | a.AI("CALL PySys_WriteStdout") 19 | a.AI("ADD ESP,0x4") #CDECL CLEANUP 20 | a.AI("MOV EAX,%s" % id(None)) 21 | a.AI("ADD [EAX],0x1") #refcount 22 | a.EP() 23 | 24 | a.AP("test_print2", PYTHON) 25 | a.AddLocal("self") 26 | a.AddLocal("args") 27 | #a.AI("INT 3") 28 | a.AI("PUSH hello_world") 29 | a.AI("CALL PySys_WriteStdout") 30 | a.AI("ADD ESP,0x4") #cdecl cleanup 31 | a.AI("MOV EAX,%s" % id(None)) 32 | a.AI("ADD [EAX],0x1") #refcount 33 | a.EP() 34 | 35 | mem = CpToMemory(a.Compile()) 36 | mem.MakeMemory() 37 | mem.BindPythonFunctions(globals()) 38 | 39 | test_print("Foo") 40 | test_print2('bar') 41 | 42 | if __name__ == "__main__": 43 | unittest.main() 44 | 45 | -------------------------------------------------------------------------------- /test/test_x86asm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86asm import * 6 | 7 | class test_instruction_lookups(unittest.TestCase): 8 | 9 | def test_simple_matches(self): 10 | self.assertEquals(findBestMatch("MOV EAX, 12").InstructionString,'MOV EAX,imm32') 11 | self.assertEquals(findBestMatch("MOV EAX,EBX").InstructionString,'MOV r32,r/m32') 12 | self.assertEquals(findBestMatch("MOV [EAX],12").InstructionString,'MOV r/m32,imm32') 13 | self.assertEquals(findBestMatch("MOV [EAX-4],12").InstructionString,'MOV r/m32,imm32') 14 | 15 | def test_byte_word_matches(self): 16 | self.assertEquals(findBestMatch("MOV AL,3").InstructionString,'MOV AL,imm8') 17 | self.assertEquals(findBestMatch("MOV AX,BX").InstructionString,'MOV r16,r/m16') 18 | 19 | def test_register_memory(self): 20 | self.assertEquals(findBestMatch("MOV [EAX],EBX").InstructionString,'MOV r/m32,r32') 21 | self.assertEquals(findBestMatch("MOV EAX,[EBX]").InstructionString,'MOV r32,r/m32') 22 | self.assertEquals(findBestMatch("MOV EAX, EBX").InstructionString,'MOV r32,r/m32') 23 | self.failUnlessRaises(x86asmError,printBestMatch,"MOV [EAX],[EBX]") 24 | self.assertEquals(findBestMatch("MOV [0x1234],EAX").InstructionString,'MOV r/m32,r32') 25 | self.assertEquals(findBestMatch("MOV [foo],EAX").InstructionString,'MOV r/m32,r32') 26 | 27 | 28 | def test_invalid_combos(self): 29 | "Can't move different sized registers back and forth" 30 | self.failUnlessRaises(x86asmError,printBestMatch,"MOV AX,AL") 31 | self.failUnlessRaises(x86asmError,printBestMatch,"MOV AL,EAX") 32 | self.failUnlessRaises(x86asmError,printBestMatch,"MOV [EAX],AX") 33 | self.failUnlessRaises(x86asmError,printBestMatch,"MOV EAX,[AX]") 34 | 35 | def test_symbol_resolution(self): 36 | self.assertEquals(findBestMatch('PUSH hw_string').InstructionString,'PUSH imm32') 37 | self.assertEquals(findBestMatch('CALL _printf').InstructionString,'CALL rel32') 38 | 39 | def test_int3(self): 40 | self.assertEquals(findBestMatch('INT 3').InstructionString,'INT 3') 41 | 42 | class assemblerTests(unittest.TestCase): 43 | def test_basic_assembler(self): 44 | a = assembler() 45 | a.ADStr('hw_string','Hello, World!\n\0') 46 | a.AIL('_main') 47 | a.AI('PUSH hw_string') 48 | a.AI('CALL _printf') 49 | a.AI('ADD ESP,4') 50 | a.AI('XOR EAX,EAX') 51 | a.AI('RET') 52 | 53 | a.AIL('_main2') 54 | a.AI('PUSH hw_string') 55 | a.AI('CALL _printf') 56 | a.AI('ADD ESP,4') 57 | a.AI('XOR EAX,EAX') 58 | a.AI('RET') 59 | 60 | def test_proc_locals(self): 61 | a = assembler() 62 | a.AP("foo") 63 | a.AA("bar") 64 | a.AA("baz") 65 | a.AddLocal("x") 66 | a.AddLocal("y") 67 | a.AI("MOV EAX,bar") 68 | a.AI("MOV EAX,baz") 69 | a.AI("MOV x,EAX") 70 | a.AI("MOV y,12") 71 | a.EP() 72 | 73 | def test_proc_end(self): 74 | a = assembler() 75 | a.AP('foo') 76 | a.AI('XOR EAX,EAX') 77 | self.failUnlessRaises(x86asmError, a.AP, 'bar') 78 | 79 | def test_no_args_after_code(self): 80 | a = assembler() 81 | a.AP("foo") 82 | a.AA("bar") 83 | a.AI("MOV bar, 4") 84 | self.failUnlessRaises(x86asmError,a.AA,"baz") 85 | a.EP() 86 | 87 | def test_no_locals_after_code(self): 88 | a = assembler() 89 | a.AP("foo") 90 | a.AddLocal("bar") 91 | a.AI("MOV bar, 4") 92 | self.failUnlessRaises(x86asmError,a.AddLocal,"baz") 93 | a.EP() 94 | 95 | def test_constants(self): 96 | a = assembler() 97 | a.AC("foo","0x4") 98 | a.AI("MOV EBX,[EAX+foo]") 99 | self.assertEquals(a.Instructions,[((2, 'MOV'), (1, 'EBX'), (3, ','), 100 | (5, '['), (1, 'EAX'), (7, '0x4'), 101 | (6, ']'))]) 102 | 103 | if __name__ == '__main__': 104 | unittest.main() 105 | -------------------------------------------------------------------------------- /test/test_x86inst.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86inst import * 6 | from pyasm.x86asm import * 7 | 8 | class test_concrete_value_resolution(unittest.TestCase): 9 | def test_ModRMs(self): 10 | a = assembler() 11 | a.AP("foo") 12 | a.AA("bar") 13 | a.AA("baz") 14 | a.AddLocal("x") 15 | a.AddLocal("y") 16 | a.AI("MOV EAX,bar") 17 | a.AI("MOV EAX,baz") 18 | a.AI("MOV x,EAX") 19 | a.AI("MOV y,12") 20 | a.EP() 21 | 22 | a.Compile() 23 | 24 | class test_digit_flag(unittest.TestCase): 25 | def test_sub_flag(self): 26 | """ 27 | Make sure we set the appropriate 'digit' flag for instructions that have it. 28 | I'm cheating a little in this test because we'll eventually optimize to find the 29 | imm8 version of the instruction, but I'm not doing that yet. 30 | """ 31 | i = findBestMatch("SUB ESP,0x40") 32 | ii = i.GetInstance() 33 | ii.LoadConcreteValues(tokenizeInst("SUB ESP,0x40")) 34 | s = ii.OpDataAsString() 35 | self.assertEqual(s,"\x81\xec@\x00\x00\x00") 36 | 37 | class test_text_generation(unittest.TestCase): 38 | def test_ModRM_calculation(self): 39 | "wasn't calculating this properly. Added an extra 00 to some text output" 40 | m = findBestMatch("CALL foo") 41 | i = m.GetInstance() 42 | i.LoadConcreteValues("CALL foo") 43 | self.assertEqual(i.OpText(), 44 | ' 00000000: E8 00 00 00 00 CALL foo ') 45 | def test_other_bad_string(self): 46 | m = findBestMatch("MOV EAX,0xCCCCCCCC") 47 | i = m.GetInstance() 48 | i.LoadConcreteValues("MOV EAX,0xCCCCCCCC") 49 | self.assertEqual(i.OpText(), 50 | ' 00000000: B8 CC CC CC CC MOV EAX,0xCCCCCCCC') 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | 55 | -------------------------------------------------------------------------------- /test/test_x86tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | from pyasm.x86tokenizer import * 6 | 7 | class x86tokenizer_test(unittest.TestCase): 8 | def test_res(self): 9 | """ 10 | Just make sure that our re's don't throw an error while being parsed 11 | by the engine 12 | """ 13 | re.compile(opcodeRe) 14 | re.compile(operandRe) 15 | re.compile(commaRe) 16 | re.compile(defRegRe) 17 | re.compile(basicRegisterRe) 18 | re.compile(lbracketRe) 19 | re.compile(rbracketRe) 20 | re.compile(numberRe) 21 | re.compile(symbolRe) 22 | re.compile(instRegRe) 23 | 24 | def test_tokenizeInstDef(self): 25 | self.failUnlessRaises(tokenizeError,tokenizeInstDef,"MOV EAX,foo") 26 | 27 | def test_InstDefVerification(self): 28 | self.failUnlessRaises(tokenizeError,tokenizeInstDef,"MOV EAX,,,r/m8") 29 | self.failUnlessRaises(tokenizeError,tokenizeInstDef,"m8 MOV EAX") 30 | self.failUnlessRaises(tokenizeError,tokenizeInstDef,"MOV EAX EAX EAX") 31 | self.failUnlessRaises(tokenizeError,tokenizeInstDef,"MOV ,EAX,r/m8") 32 | 33 | def test_tokenizeInst(self): 34 | """ 35 | Test various inputs to this function 36 | """ 37 | self.assertEquals(tokenizeInst('PUSH hw_string'), 38 | ((OPCODE, 'PUSH'), (SYMBOL, 'hw_string'))) 39 | self.assertEquals(tokenizeInst('CALL _printf'), 40 | ((OPCODE, 'CALL'), (SYMBOL, '_printf'))) 41 | self.assertEquals(tokenizeInst('ADD ESP,4'), 42 | ((OPCODE, 'ADD'), (REGISTER, 'ESP'), 43 | (COMMA, ','), (NUMBER, '4'))) 44 | self.assertEquals(tokenizeInst('XOR EAX,EAX'), 45 | ((OPCODE, 'XOR'), (REGISTER, 'EAX'), 46 | (COMMA, ','), (REGISTER, 'EAX'))) 47 | self.assertEquals(tokenizeInst('MOV [EAX],12'), 48 | ((OPCODE, 'MOV'), (LBRACKET, '['), (REGISTER, 'EAX'), 49 | (RBRACKET, ']'), (COMMA, ','), (NUMBER, '12'))) 50 | self.assertEquals(tokenizeInst('MOV [EAX+0xCC],12'), 51 | ((OPCODE, 'MOV'), (LBRACKET, '['), (REGISTER, 'EAX'), 52 | (NUMBER, '0xCC'),(RBRACKET, ']'), (COMMA, ','), 53 | (NUMBER, '12'))) 54 | self.assertEquals(tokenizeInst('MOV [EAX+12],12'), 55 | ((OPCODE, 'MOV'), (LBRACKET, '['), (REGISTER, 'EAX'), 56 | (NUMBER, '12'), (RBRACKET, ']'), (COMMA, ','), 57 | (NUMBER, '12'))) 58 | self.assertEquals(tokenizeInst('RET'),((OPCODE, 'RET'),)) 59 | self.failUnlessRaises(tokenizeError, tokenizeInst,'MOV EAX,r/m8') 60 | 61 | 62 | def test_mixed_case_symbols(self): 63 | self.assertEquals(tokenizeInst('RET'),((OPCODE, 'RET'),)) 64 | self.assertEquals(tokenizeInst('RET RETurn'),((OPCODE, 'RET'),(SYMBOL, 'RETurn'),)) 65 | self.assertEquals(tokenizeInst('RET _RET'),((OPCODE, 'RET'),(SYMBOL, '_RET'),)) 66 | self.assertEquals(tokenizeInst('RET RET_URN'),((OPCODE, 'RET'),(SYMBOL, 'RET_URN'),)) 67 | 68 | def test_InstVerification(self): 69 | self.failUnlessRaises(tokenizeError,tokenizeInst,"MOV [EAX,12],12") 70 | self.failUnlessRaises(tokenizeError,tokenizeInst,"MOV ,EAX,12") 71 | self.failUnlessRaises(tokenizeError,tokenizeInst,"MOV [EAX+12+13],12") 72 | self.failUnlessRaises(tokenizeError,tokenizeInst,"MOV [[EAX,12],12") 73 | self.failUnlessRaises(tokenizeError,tokenizeInst,"MOV [EAX,12,12") 74 | 75 | def test_constant(self): 76 | self.assertEquals(tokenizeInst("MOV [EAX+foo],0x3"),((2, 'MOV'),(5, '['), 77 | (1, 'EAX'), 78 | (8, 'foo'), (6, ']'), (3, ','), 79 | (7, '0x3'))) 80 | 81 | def test_string(self): 82 | self.assertEquals(tokenizeInst("PUSH 'hello world\\n\\0'"),((2, 'PUSH'), 83 | (9, 'hello world\\n\\0'))) 84 | 85 | if __name__ == '__main__': 86 | unittest.main() 87 | 88 | 89 | -------------------------------------------------------------------------------- /test/testall.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | import unittest 5 | 6 | """ 7 | Quick and dirty way to run all tests. Should probably use introspection 8 | in the future. 9 | """ 10 | 11 | from rawHelloWorld import * 12 | from disasmHelloWorld import * 13 | from test_x86asm import * 14 | from test_bugs import * 15 | from test_directives import * 16 | #from test_linker import * 17 | from test_object_creation import * 18 | from test_python_funcs import * 19 | from test_time import * 20 | from test_variables import * 21 | from test_winmem import * 22 | from test_x86inst import * 23 | from test_x86tokenizer import * 24 | from test_structs import * 25 | 26 | if __name__ == "__main__": 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /x86PackUnpack.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | x86PackUnpack.py 6 | ---------------- 7 | 8 | Contains all the functions to go between little-endian data streams and 9 | actual binary data. 10 | 11 | """ 12 | import struct, sys 13 | 14 | def pylongToSignedInt(x): 15 | """ 16 | TODO: Use Guido's formula 17 | """ 18 | if x > 0xFFFFFFFF: 19 | raise TypeError("This is too big to convert to a signed int") 20 | elif x >= 0x80000000: 21 | x = x - sys.maxint - sys.maxint - 2 22 | else: 23 | pass 24 | return int(x) 25 | 26 | def ucharFromFile(f):return struct.unpack("= -127 and num <= 128: 58 | immVals.insert(0,(OPERAND,'imm8')) 59 | relVals.insert(0,(OPERAND,'rel8')) 60 | 61 | 62 | #lookup constant value like INT 3 63 | if first[0] == NUMBER: 64 | vals.append(first) 65 | 66 | vals.extend(immVals) 67 | vals.extend(relVals) 68 | 69 | if not rest: 70 | for val in vals: 71 | yield [val] 72 | else: 73 | possibleLookup = getProperLookup(*rest) 74 | for val in vals: 75 | for restMatches in possibleLookup(*rest): 76 | yldVal = [val] 77 | yldVal.extend(restMatches) 78 | yield yldVal 79 | 80 | def possibleRegister(*toks): 81 | """ 82 | Registers may be hardcoded for superfast lookups, or an r or r/m value. 83 | We could probably optimize better with a better understanding of the environment. 84 | i.e. it doesn't make sense to move an r/m8 into an r32 85 | """ 86 | regName = toks[0][1] 87 | registerVals = [(REGISTER, '%s' % regName)] 88 | if regName in rb: 89 | registerVals.append((OPERAND,'r8')) 90 | registerVals.append((OPERAND, 'r/m8')) 91 | elif regName in rw: 92 | registerVals.append((OPERAND, 'r16')) 93 | registerVals.append((OPERAND,'r/m16')) 94 | elif regName in rd: 95 | registerVals.append((OPERAND,'r32')) 96 | registerVals.append((OPERAND,'r/m32')) 97 | else: 98 | raise x86asmError("Invalid Register name '%s'" % regName) 99 | 100 | first,rest = toks[0],toks[1:] 101 | if not rest: 102 | for val in registerVals: 103 | yield [val] 104 | else: 105 | possibleLookup = getProperLookup(*rest) 106 | for val in registerVals: 107 | for restMatches in possibleLookup(*rest): 108 | yldVal = [val] 109 | yldVal.extend(restMatches) 110 | yield yldVal 111 | 112 | def possibleIndirect(*toks): 113 | """ 114 | This is pretty much an r/m value 115 | i.e. it doesn't make sense to move an r/m8 into an r32 116 | """ 117 | possibleVals = [] 118 | lbracket,operand,rest = toks[0],toks[1],toks[2:] 119 | 120 | if operand[0] in (NUMBER,SYMBOL): 121 | # TODO: CAN WE OPTIMIZE THIS? 122 | possibleVals.append((OPERAND,'r/m32')) 123 | possibleVals.append((OPERAND,'r/m16')) 124 | possibleVals.append((OPERAND,'r/m8')) 125 | elif operand[0] == REGISTER: 126 | regName = operand[1] 127 | if rest[0][0] == RBRACKET: 128 | #Special case 129 | possibleVals.append((REGISTER, '[%s]' % regName)) 130 | 131 | if regName in rb: 132 | possibleVals.append((OPERAND, 'r/m8')) 133 | elif regName in rw: 134 | possibleVals.append((OPERAND,'r/m16')) 135 | elif regName in rd: 136 | possibleVals.append((OPERAND,'r/m32')) 137 | else: 138 | raise x86asmError("Invalid Register name '%s'" % regName) 139 | 140 | while rest[0] != (RBRACKET, ']'): 141 | rest = rest[1:] 142 | rest = rest[1:] 143 | if not rest: 144 | for val in possibleVals: 145 | yield [val] 146 | else: 147 | possibleLookup = getProperLookup(*rest) 148 | for val in possibleVals: 149 | for restMatches in possibleLookup(*rest): 150 | yldVal = [val] 151 | yldVal.extend(restMatches) 152 | yield yldVal 153 | 154 | 155 | possibleLookups = { 156 | REGISTER:possibleRegister, 157 | OPCODE:possibleDefault, 158 | COMMA:possibleDefault, 159 | LBRACKET:possibleIndirect, 160 | NUMBER:possibleImmediateOrRelative, 161 | SYMBOL:possibleImmediateOrRelative,} 162 | 163 | def getProperLookup(*toks): 164 | return possibleLookups[toks[0][0]] 165 | 166 | def findBestMatchTokens(toks): 167 | retVal = None 168 | for x in possibleDefault(*toks): 169 | y = tuple(x) 170 | if mnemonicDict.has_key(y): 171 | retVal = mnemonicDict[y] 172 | break 173 | if retVal: 174 | return retVal 175 | else: 176 | raise x86asmError("Unable to find match for " + `toks`) 177 | 178 | def findBestMatch(s): 179 | toks = tokenizeInst(s) 180 | try: 181 | return findBestMatchTokens(toks) 182 | except x86asmError: 183 | raise x86asmError("Unable to find match for '%s'" % s) 184 | 185 | def printBestMatch(s): 186 | print "Best match for '%s' => '%s'" % (s,findBestMatch(s).InstructionString) 187 | 188 | ################################################################## 189 | ## END OF Find right instruction def based on concrete instruction 190 | ################################################################## 191 | 192 | class labelRef: 193 | def __init__(self, name): 194 | self.Name = name 195 | 196 | class label: 197 | def __init__(self, name,typ=0): 198 | self.Name = name 199 | self.Address = 0x0 200 | self.Type = typ 201 | 202 | class labelDict(dict): 203 | def __setitem__(self,key,val): 204 | if self.has_key(key): 205 | raise x86asmError("Duplicate Label Declaration '%s'" % key) 206 | else: 207 | dict.__setitem__(self,key,val) 208 | 209 | class constDict(dict): 210 | def __setitem__(self,key,val): 211 | if self.has_key(key): 212 | raise x86asmError("Duplicate Constant Declaration '%s'" % key) 213 | else: 214 | dict.__setitem__(self,key, (NUMBER,val) ) 215 | 216 | class data: 217 | def __init__(self,name,dat,size=0): 218 | self.Name = name 219 | self.Data = dat 220 | self.Size = size 221 | self.Address = 0x0 222 | 223 | class codePackage: 224 | def __init__(self): 225 | self.Code = '' 226 | self.CodeSymbols = [] 227 | self.CodePatchins = [] 228 | self.Data = '' 229 | self.DataSymbols = [] 230 | 231 | STDCALL, CDECL, PYTHON = range(1,4) 232 | 233 | class procedure: 234 | def __init__(self,name, typ=CDECL): 235 | self.Name = name 236 | self.Address = 0x0 237 | 238 | self.Type = typ 239 | 240 | self.Args = [] 241 | self.ArgOffset = 8 242 | self.Locals = [] 243 | self.LocalOffset = 0 244 | self.Frozen = 0 245 | 246 | def AddArg(self,name,bytes=4): 247 | if self.Frozen: 248 | raise x86asmError("Cannot add arg %s to procedure %s." \ 249 | "This must happen before instrutions are" \ 250 | "added." % (self.Name, name)) 251 | self.Args.append( (name, self.ArgOffset, bytes) ) 252 | self.ArgOffset += bytes 253 | 254 | def AddLocal(self,name,bytes=4): 255 | if self.Frozen: 256 | raise x86asmError("Cannot add arg %s to procedure %s." \ 257 | "This must happen before instrutions are" \ 258 | "added." % (self.Name, name)) 259 | self.Locals.append( (name, self.LocalOffset, bytes) ) 260 | self.LocalOffset += bytes 261 | 262 | def LookupArg(self,name): 263 | for x in self.Args: 264 | if x[0] == name: 265 | return ( (LBRACKET, '['), (REGISTER,'EBP'),(NUMBER, str(x[1])), 266 | (RBRACKET,']') ) 267 | return None 268 | 269 | def LookupLocal(self,name): 270 | for x in self.Locals: 271 | if x[0] == name: 272 | return ( (LBRACKET, '['), (REGISTER,'EBP'),(NUMBER, str(-(x[1]+4))), 273 | (RBRACKET,']') ) 274 | return None 275 | 276 | def LookupVar(self, name): 277 | retVal = self.LookupArg(name) 278 | if retVal is None: 279 | retVal = self.LookupLocal(name) 280 | return retVal 281 | 282 | def EmitProcStartCode(self, a): 283 | """ 284 | Save EBP 285 | Copy ESP so we can use it to reference params and locals 286 | Subtrack 287 | """ 288 | a.AI("PUSH EBP") 289 | a.AI("MOV EBP, ESP") 290 | if self.LocalOffset: 291 | a.AI("SUB ESP, %s" % self.LocalOffset) 292 | 293 | def EmitProcEndCode(self, a): 294 | """ 295 | Restore settings and RETurn 296 | TODO: Do we need to handle a Return value here? 297 | """ 298 | if self.LocalOffset: 299 | a.AI("ADD ESP, %s" % self.LocalOffset) 300 | 301 | #check for malformed stack 302 | #a.AI("CMP EBP,ESP") 303 | #a.AI("CALL __chkesp") 304 | 305 | a.AI("MOV ESP, EBP") 306 | a.AI("POP EBP") 307 | 308 | if self.Type == STDCALL and self.ArgOffset - 8: 309 | #HAD ARGS AND IS A STDCALL, CLEANUP 310 | a.AI("RET %s" % (self.ArgOffset - 8)) 311 | else: 312 | a.AI("RET") 313 | 314 | # 315 | # assembler directive re's 316 | # 317 | strRe = re.compile("\s*" + symbolRe + "\s*" + stringRe + "?$",re.DOTALL) 318 | procRe = re.compile("\s*" + symbolRe +"\s*(?PSTDCALL|CDECL|PYTHON)?$") 319 | varRe = re.compile("\s*" + symbolRe + "\s*(?P" + Number[1:] + "?$") 320 | callRe = re.compile("\s*(%s|%s)\s*(?P.*)" % (symbolRe, stringRe)) 321 | 322 | class assembler: 323 | def __init__(self): 324 | self.Instructions = [] 325 | self.Data = [] 326 | self.Labels = {} 327 | self.Constants = constDict() 328 | self.CurrentProcedure = None 329 | self.StartAddress = 0x0 330 | self.DataStartAddress = 0x0 331 | self.inlineStringNo = 1000 332 | 333 | def registerLabel(self,lbl): 334 | if self.Labels.has_key(lbl.Name): 335 | raise x86asmError("Duplicate Label Registration [%s]" % lbl.Name) 336 | self.Labels[lbl.Name] = lbl 337 | 338 | 339 | # 340 | # Write assmebly code 341 | # 342 | 343 | def freezeProc(self): 344 | if self.CurrentProcedure and not self.CurrentProcedure.Frozen: 345 | #initialize proc 346 | self.CurrentProcedure.Frozen = 1 347 | self.CurrentProcedure.EmitProcStartCode(self) 348 | 349 | def AddInstruction(self,inst): 350 | self.freezeProc() 351 | instToks = tokenizeInst(inst) 352 | instToksMinusLocals = () 353 | 354 | for tok in instToks: 355 | if tok[0] == STRING: 356 | # Create an inlined string 357 | inlineName = "inline_pyasm_string%i" % self.inlineStringNo 358 | escapedString = tok[1].decode("string_escape") 359 | self.ADStr(inlineName,escapedString) 360 | instToksMinusLocals += ((SYMBOL,inlineName),) 361 | self.inlineStringNo += 1 362 | elif tok[0] != SYMBOL: # do nothing 363 | instToksMinusLocals += ( tok,) 364 | elif self.Constants.has_key(tok[1]): #replace constant 365 | instToksMinusLocals += (self.Constants[tok[1]],) 366 | elif self.CurrentProcedure: 367 | #look for local match 368 | local = self.CurrentProcedure.LookupVar(tok[1]) 369 | if local: #found match 370 | instToksMinusLocals += local 371 | else: # defer resolution to second pass 372 | instToksMinusLocals += (tok,) 373 | else: # stick with local 374 | instToksMinusLocals = instToks 375 | 376 | self.Instructions.append(instToksMinusLocals) 377 | 378 | def AI(self,inst): 379 | 380 | self.AddInstruction(inst) 381 | 382 | def AddInstructionLabel(self,name,typ=0): 383 | lbl = label(name,typ) 384 | self.registerLabel(lbl) 385 | self.Instructions.append(lbl) 386 | 387 | def AIL(self,name): 388 | self.AddInstructionLabel(name) 389 | 390 | def AddData(self,name,dat): 391 | lbl = label(name) 392 | self.registerLabel(lbl) 393 | self.Data.append(data(name,dat,len(dat))) 394 | 395 | def ADStr(self,name,dat): 396 | self.AddData(name,dat) 397 | 398 | def AddProcedure(self,name,typ=STDCALL): 399 | if self.CurrentProcedure: # didn't emit procedure cleanup code 400 | raise x86asmError("Must end procedure '%s' before starting proc " \ 401 | " '%s'" % (self.CurrentProcedure.Name, name)) 402 | self.AddInstructionLabel(name,typ) 403 | proc = procedure(name,typ) 404 | self.CurrentProcedure = proc 405 | 406 | def AP(self,name,typ=STDCALL): 407 | self.AddProcedure(name,typ) 408 | 409 | def AddArgument(self,name,size=4): 410 | self.CurrentProcedure.AddArg(name,size) 411 | 412 | def AA(self,name,size=4): 413 | self.AddArgument(name,size) 414 | 415 | def AddLocal(self,name,size=4): 416 | self.CurrentProcedure.AddLocal(name,size) 417 | 418 | def EndProc(self): 419 | if self.CurrentProcedure: 420 | self.CurrentProcedure.EmitProcEndCode(self) 421 | self.CurrentProcedure = None 422 | 423 | def EP(self): 424 | self.EndProc() 425 | 426 | def AddConstant(self,name,val): 427 | self.Constants[name] = val 428 | 429 | def AC(self,name,val): 430 | self.AddConstant(name,val) 431 | 432 | # 433 | # end write assembly code 434 | # 435 | 436 | # 437 | # handle assembler directives 438 | # 439 | def getVarNameAndSize(t,s): 440 | matches = varRe.match(s) 441 | if not matches: 442 | raise x86asmError("Couldn't parse %s assembler directive %s" % (t,repr(s))) 443 | matches = matches.groupdict() 444 | name = matches['SYMBOL'] 445 | if matches['NUM']: 446 | size = eval(matches['NUM']) 447 | else: 448 | size = 4 #default to DWORD 449 | return name,size 450 | 451 | def PROC(self,params): 452 | matches = procRe.match(params) 453 | if not matches: 454 | x86asmError("Couldn't parse PROC assembler directive %s" % repr(params)) 455 | matches = matches.groupdict() 456 | 457 | name = matches['SYMBOL'] 458 | 459 | if matches['TYPE']: 460 | t = matches['TYPE'] 461 | if t == 'CDECL': 462 | c = CDECL 463 | elif t == 'STDCALL': 464 | c = STDCALL 465 | elif t == 'PYTHON': 466 | c = PYTHON 467 | else: 468 | raise x86asmError("Couldn't parse PROC assembler directive %s" % repr(params)) 469 | else: 470 | c = CDECL 471 | 472 | self.AddProcedure(name,c) 473 | 474 | def ARG(self,params): 475 | name,size = self.getVarNameAndSize(params) 476 | self.AddArgument(name,size) 477 | 478 | def LOCAL(self,params): 479 | name,size = self.getVarNameAndSize(params) 480 | self.AddLocal(name,size) 481 | 482 | def ENDPROC(self,params): 483 | if params: 484 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 485 | self.EndProc() 486 | 487 | def CALL(self,params): 488 | matches = callRe.match(params) 489 | if not matches: 490 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 491 | matches = matches.groupdict() 492 | proc,rest = matches['SYMBOL'],matches['REST'] 493 | params = [] 494 | 495 | while rest: 496 | matches = callRe.match(rest).groupdict() 497 | if not matches: 498 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 499 | rest = matches['REST'] 500 | if matches['SYMBOL']: 501 | first = matches['SYMBOL'] 502 | elif matches['STRING']: 503 | first = matches['q'] + matches['STRING'] + matches['q'] 504 | else: 505 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 506 | params.append(first) 507 | 508 | params.reverse() # push from right to left 509 | for param in params: 510 | self.AI("PUSH %s" % param) 511 | self.AI("CALL %s" % proc) 512 | 513 | def CHARS(self,params): 514 | matches = strRe.match(params) 515 | if not matches: 516 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 517 | matches = matches.groupdict() 518 | name,s = matches['SYMBOL'], matches['STRING'] 519 | if not (name and s): 520 | raise x86asmError("Couldn't parse assembler directive %s" % repr(params)) 521 | self.ADStr(name,s.decode("string_escape")) 522 | 523 | def COMMENT(self,params): 524 | pass 525 | 526 | def dispatchDirective(self,s): 527 | firstSpace = s.find(' ') 528 | if firstSpace < 0: 529 | directive,params = s[1:],'' 530 | else: 531 | directive,params = s[1:firstSpace],s[firstSpace+1:] 532 | getattr(self,directive)(params) 533 | 534 | 535 | def dispatchStatement(self,s): 536 | self.AddInstruction(s) 537 | 538 | def DispatchString(self,s): 539 | x86sourceLogger.info(s.rstrip()) #don't want extra newline 540 | s = s.strip() 541 | if not s: 542 | pass #blank line 543 | elif s[0] == "!": 544 | self.dispatchDirective(s) 545 | else: 546 | self.dispatchStatement(s) 547 | 548 | def __call__(self,s): 549 | self.DispatchString(s) 550 | 551 | # 552 | # start actual compilation code 553 | # 554 | def pass1(self): 555 | cp = codePackage() 556 | newInsts = [] 557 | newData = [] 558 | 559 | currentAddress = self.StartAddress 560 | for i in self.Instructions: 561 | if type(i) == types.TupleType: # and instruction to lookup 562 | inst = findBestMatchTokens(i).GetInstance() 563 | inst.LoadConcreteValues(i) 564 | inst.Address = currentAddress 565 | currentAddress += inst.GetInstructionSize() 566 | newInsts.append(inst) 567 | x86asmLogger.info(inst.OpText()) 568 | cp.CodePatchins.extend(inst.GetSymbolPatchins()) 569 | else: # a label 570 | i.Address = currentAddress 571 | logMsg = " %08X: %s" % (i.Address,i.Name) 572 | x86asmLogger.info(logMsg) 573 | cp.CodeSymbols.append((i.Name,i.Address,i.Type)) 574 | 575 | currentAddress = self.DataStartAddress 576 | newData = [] 577 | for d in self.Data: 578 | d.Address = currentAddress 579 | newData.append(d.Data) 580 | cp.DataSymbols.append( (d.Name,d.Address) ) 581 | currentAddress += d.Size 582 | cp.Code = ''.join([i.OpDataAsString() for i in newInsts]) 583 | cp.Data = ''.join([d for d in newData]) 584 | 585 | return cp 586 | 587 | 588 | def Compile(self): 589 | if self.CurrentProcedure: 590 | raise x86asmError("Never ended procedure '%s'" % self.CurrentProcedure.Name) 591 | return self.pass1() 592 | 593 | def _log_header(text): 594 | line = "=" * len(text) 595 | x86sourceLogger.info(line) 596 | x86sourceLogger.info(text) 597 | x86sourceLogger.info(line) 598 | x86sourceLogger.info('') 599 | 600 | 601 | def codePackageFromFile(fil,constCallback=None): 602 | try: 603 | filename = fil.name 604 | except: #stringIO objects don't have a name property 605 | filename = repr(fil) 606 | 607 | _log_header("COMPILING FILE %s" % filename) 608 | a = assembler() 609 | 610 | if constCallback: 611 | constCallback(a) 612 | 613 | for line in fil.readlines(): 614 | a(line) 615 | 616 | _log_header("END COMPILE OF %s" % filename) 617 | return a.Compile() 618 | 619 | def manglePythonNames(cp): 620 | """ 621 | Python names need to start with a _ for STDCALL designation in 622 | static compilation, but names are resolved without this. This adds 623 | The name mangling where appropriate. 624 | """ 625 | newPatchins = [] 626 | for patch in cp.CodePatchins: 627 | if patch[0].startswith("Py") or patch[0].startswith("_Py"): 628 | patch = ("_" + patch[0], patch[1],patch[2]) 629 | newPatchins.append(patch) 630 | cp.CodePatchins = newPatchins 631 | 632 | return cp # even though it did it in place 633 | 634 | 635 | if __name__ == '__main__': 636 | a = assembler() 637 | a.AP("foo") 638 | a.AA("bar") 639 | a.AA("baz") 640 | a.AddLocal("x") 641 | a.AddLocal("y") 642 | a.AI("MOV EAX,bar") 643 | a.AI("MOV EAX,baz") 644 | a.AI("MOV x,EAX") 645 | a.AI("MOV y,12") 646 | a.EP() 647 | 648 | a.Compile() 649 | -------------------------------------------------------------------------------- /x86cpToCoff.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | from pyasm.coff import (coffError, coffFile, coffSection, coffRelocationEntry, 5 | coffSymbolEntry, coffLineNumberEntry) 6 | from pyasm.coffConst import * 7 | from pyasm.coffSymbolEntries import (coffSymbolFile, coffSectionDef, coffFunctionDef, 8 | coffBf, coffLf, coffEf) 9 | import logging, time 10 | from binascii import crc32 11 | from x86inst import RELATIVE, DIRECT 12 | 13 | class CpToCoff: 14 | def __init__(self,cp,directives="-defaultlib:LIBCMT -defaultlib:OLDNAMES "): 15 | self.cp = cp 16 | self.directives=directives 17 | 18 | self.lastFunction = None 19 | self.lastBf = None 20 | self.lastEf = None 21 | self.lastEfPos = 0 22 | 23 | c = coffFile() 24 | c.MachineType = coffFile.I386MAGIC 25 | 26 | self.coff = c 27 | 28 | def linkDirectiveSection(self): 29 | sect = coffSection() 30 | sect.Name = '.drectve' 31 | sect.Flags = (SectionFlags.LNK_REMOVE | 32 | SectionFlags.LNK_INFO | 33 | SectionFlags.ALIGN_1BYTES) 34 | sect.RawData = self.directives 35 | 36 | sym = self.coff.Symbols.GetSymbol('.drectve') 37 | sym.RebuildAuxiliaries(len(sect.RawData),0,0,crc32(sect.RawData),0,0) 38 | 39 | return sect 40 | 41 | def textSection(self): 42 | sect = coffSection() 43 | sect.Name = '.text\x00\x00\x00' 44 | sect.Flags = (SectionFlags.CNT_CODE | 45 | SectionFlags.LNK_COMDAT | 46 | SectionFlags.MEM_EXECUTE | 47 | SectionFlags.MEM_READ | 48 | SectionFlags.ALIGN_16BYTES) 49 | sect.RawData = self.cp.Code 50 | 51 | for patchin in self.cp.CodePatchins: 52 | # How do I tell what type it is? 53 | addr = patchin[1] 54 | if patchin[2] == DIRECT: 55 | patchinType = RelocationTypes.I386_DIR32 56 | elif patchin[2] == RELATIVE: 57 | patchinType = RelocationTypes.I386_REL32 58 | else: 59 | raise RuntimeError("Invalid patchin type") 60 | 61 | try: 62 | loc = self.coff.Symbols.GetLocation(patchin[0]) 63 | r = coffRelocationEntry(addr,loc,typ=patchinType) 64 | except coffError: 65 | r = coffRelocationEntry(addr,0x0,typ=patchinType) 66 | 67 | sect.RelocationData.append(r) 68 | 69 | sym = self.coff.Symbols.GetSymbol('.text\x00\x00\x00') 70 | sym.RebuildAuxiliaries(len(sect.RawData),len(self.cp.CodePatchins),0, 71 | crc32(sect.RawData),0,0) 72 | 73 | # attempt to add line numbers 74 | for sym in self.cp.CodeSymbols: 75 | symLoc = self.coff.Symbols.GetLocation(sym[0]) 76 | sect.LineNumberData.append(coffLineNumberEntry(symLoc,0x0)) 77 | 78 | return sect 79 | 80 | def dataSection(self): 81 | sect = coffSection() 82 | sect.Name = '.data\x00\x00\x00' 83 | sect.Flags = (SectionFlags.LNK_COMDAT | 84 | SectionFlags.CNT_INITIALIZED_DATA | 85 | SectionFlags.MEM_WRITE | 86 | SectionFlags.MEM_READ | 87 | SectionFlags.ALIGN_4BYTES) 88 | sect.RawData = self.cp.Data 89 | 90 | sym = self.coff.Symbols.GetSymbol('.data\x00\x00\x00') 91 | sym.RebuildAuxiliaries(len(sect.RawData),0,0,crc32(sect.RawData),0,0) 92 | 93 | return sect 94 | 95 | def rdataSection(self): 96 | sect = coffSection() 97 | sect.Name = '.rdata\x00\x00' 98 | sect.Flags = (SectionFlags.LNK_COMDAT | 99 | SectionFlags.CNT_INITIALIZED_DATA | 100 | SectionFlags.MEM_READ | 101 | SectionFlags.ALIGN_4BYTES) 102 | sect.RawData = self.cp.Data 103 | 104 | sym = self.coff.Symbols.GetSymbol('.rdata\x00\x00') 105 | sym.RebuildAuxiliaries(len(sect.RawData),0,0,crc32(sect.RawData),0,0) 106 | 107 | return sect 108 | 109 | def debugF_Section(self): 110 | "What the hell is this?" 111 | pass 112 | ## s4=coffSection() 113 | ## s4.Name = '.debug$F' 114 | ## s4.Flags = (SectionFlags.LNK_COMDAT | 115 | ## SectionFlags.TYPE_NO_PAD | 116 | ## SectionFlags.CNT_INITIALIZED_DATA | 117 | ## SectionFlags.MEM_DISCARDABLE | 118 | ## SectionFlags.MEM_READ | 119 | ## SectionFlags.ALIGN_1BYTES) 120 | ## s4.RawData = '\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00' 121 | ## 122 | ## r = coffRelocationEntry(addr=0x0,sym=0x8,typ=RelocationTypes.I386_DIR32NB) 123 | ## s4.RelocationData.append(r) 124 | ## 125 | ## c.Sections.append(s4) 126 | 127 | def addSymbol(self,name,val,sec,typ,cls,aux=''): 128 | self.coff.AddSymbol(name,val,sec,typ,cls,aux) 129 | 130 | def addFunctionSymbols(self,sym,section): 131 | """ 132 | A function actually has 4 symbol entries: 133 | + the function entry 134 | + the BeginFunction(.bf) entry 135 | + the LinesInFunction(.lf) entry 136 | + the EndFunction(.ef) entry 137 | """ 138 | 139 | fun = coffFunctionDef(sym[0],sym[1],2) 140 | self.coff.AddExistingSymbol(fun) 141 | if self.lastFunction: 142 | self.lastFunction.PointerToNextFunction = fun.Location 143 | self.lastFunction.TotalSize = fun.Value - self.lastFunction.Value 144 | self.lastFunction.BuildAuxiliaries() 145 | self.lastFunction = fun 146 | 147 | bf = coffBf(2) 148 | self.coff.AddExistingSymbol(bf) 149 | if self.lastBf: 150 | self.lastBf.PointerToNextFunction = bf 151 | self.lastBf.BuildAuxiliaries() 152 | self.lastBf = bf 153 | 154 | fun.TagIndex = bf.Location 155 | fun.BuildAuxiliaries() 156 | 157 | lf = coffLf(2) 158 | self.coff.AddExistingSymbol(lf) 159 | 160 | ef = coffEf(2) 161 | self.coff.AddExistingSymbol(ef) 162 | if self.lastEf: 163 | self.lastEf.Value = sym[1] - self.lastEfPos 164 | self.lastEf = ef 165 | self.lastEfPos = sym[1] 166 | 167 | def addSymbols(self): 168 | self.coff.AddExistingSymbol(coffSymbolFile('C:\\objtest\\objtest\\objtest.cpp')) 169 | 170 | self.addSymbol('@comp.id',0xB2306, -1, SymbolTypes.NULL, SymbolClass.STATIC) 171 | 172 | self.coff.AddExistingSymbol(coffSectionDef('.drectve',1)) 173 | self.coff.AddExistingSymbol(coffSectionDef('.text\x00\x00\x00',2)) 174 | self.coff.AddExistingSymbol(coffSectionDef('.data\x00\x00\x00',3)) 175 | 176 | for sym in self.cp.CodeSymbols: 177 | self.addFunctionSymbols(sym,2) 178 | 179 | #sizes for last function 180 | totalSize = len(self.cp.Code) 181 | self.lastFunction.TotalSize = totalSize - self.lastFunction.Value 182 | self.lastFunction.BuildAuxiliaries() 183 | self.lastEf.Value = totalSize - self.lastEfPos 184 | 185 | for sym in self.cp.DataSymbols: 186 | self.addSymbol(sym[0], sym[1],3,0x20,SymbolClass.EXTERNAL) 187 | 188 | #resolve external label references here 189 | for patchin in self.cp.CodePatchins: 190 | try: 191 | self.coff.Symbols.GetLocation(patchin[0]) 192 | except coffError: 193 | # no symble entry, add ref 194 | self.addSymbol(patchin[0], SymbolValues.SYM_UNDEFINED, 0, 0x20, 195 | SymbolClass.EXTERNAL) 196 | 197 | self.coff.Symbols.SetLocations() 198 | 199 | 200 | def makeReleaseCoff(self): 201 | """ 202 | converts a generic codePackage to a coff object 203 | """ 204 | self.addSymbols() 205 | 206 | self.coff.Sections.append(self.linkDirectiveSection()) 207 | self.coff.Sections.append(self.textSection()) 208 | self.coff.Sections.append(self.dataSection()) 209 | #c.Sections.append(self.DebugF_Section()) 210 | 211 | self.coff.SetSizes() 212 | self.coff.SetOffsets() 213 | 214 | return self.coff 215 | 216 | -------------------------------------------------------------------------------- /x86cpToMemory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | from x86inst import RELATIVE,DIRECT 5 | from x86PackUnpack import ulongToString 6 | from x86asm import PYTHON 7 | import pyasm.excmem as excmem 8 | 9 | import logging, sys 10 | 11 | if sys.platform == 'win32': 12 | import win32api, pywintypes 13 | from sys import dllhandle 14 | 15 | def runtimeResolve(funcName): 16 | try: 17 | addr = win32api.GetProcAddress(dllhandle,funcName) 18 | except pywintypes.error: 19 | raise RuntimeError("Unable to resolve external symbol '%s'" % funcName) 20 | return addr 21 | elif sys.platform in ('linux2'): 22 | def runtimeResolve(funcName): 23 | return excmem.GetSymbolAddress(funcName) 24 | else: 25 | raise RuntimeError("Don't know how to resolve external symbols for platform '%s'" % sys.platform) 26 | 27 | 28 | class CpToMemory: 29 | def __init__(self,cp): 30 | self.cp = cp 31 | self.symbols = {} 32 | self.resolvedCode = '' 33 | 34 | def LookupAddress(self,sym): 35 | if self.symbols.has_key(sym): 36 | return self.symbols[sym] 37 | else: #try runtime resolution, currently windows specific 38 | funcaddress = runtimeResolve(sym) 39 | self.symbols[sym] = funcaddress 40 | return funcaddress 41 | 42 | def BindPythonFunctions(self,glb=None,bindFunction=excmem.BindFunctionAddress): 43 | if glb is None: 44 | glb = globals() 45 | for proc in self.cp.CodeSymbols: 46 | if proc[2] == PYTHON: 47 | glb[proc[0]] = bindFunction(proc[1] + self.codeAddr) 48 | 49 | def MakeMemory(self,glb=None): 50 | if not glb: 51 | glb = globals() 52 | 53 | self.codeAddr = excmem.AllocateExecutableMemory(len(self.cp.Code)) 54 | self.dataAddr = excmem.AllocateExecutableMemory(len(self.cp.Data)) 55 | 56 | self.symbols = {} 57 | for sym in self.cp.CodeSymbols: 58 | self.symbols[sym[0]] = sym[1] + self.codeAddr 59 | for sym in self.cp.DataSymbols: 60 | self.symbols[sym[0]] = sym[1] + self.dataAddr 61 | 62 | self.resolvedCode = self.cp.Code # nondestructive on cp 63 | 64 | for patch in self.cp.CodePatchins: 65 | if patch[2] == DIRECT: 66 | resolvedAddr = self.LookupAddress(patch[0]) 67 | elif patch[2] == RELATIVE: 68 | #XXX 69 | # I'm just assuming that the pathin is at the end of a function 70 | # and the next instrution address is that +4 71 | # Is this valid or do I need to calculate? 72 | resolvedAddr = self.LookupAddress(patch[0]) - (self.codeAddr + patch[1] + 4) 73 | else: 74 | raise RuntimeError("Invalid patchin information") 75 | self.resolvedCode = self.resolvedCode[:patch[1]] + ulongToString(resolvedAddr) \ 76 | + self.resolvedCode[patch[1]+4:] 77 | 78 | assert len(self.resolvedCode) == len(self.cp.Code) 79 | 80 | excmem.LoadExecutableMemoryString(self.codeAddr,self.resolvedCode) 81 | excmem.LoadExecutableMemoryString(self.dataAddr,self.cp.Data) 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /x86disasm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2004-2010 Grant T. Olson. 2 | # See license.txt for terms. 3 | 4 | """ 5 | x86disasm.py 6 | ------------ 7 | 8 | Create a disassembly dump of x86 code. 9 | 10 | TODO: Extract x86Block as the assembler probably needs the same stuff. 11 | """ 12 | 13 | from x86inst import * 14 | from x86PackUnpack import * 15 | 16 | class x86Block: 17 | """ 18 | Arbitrary block of x86 data 19 | """ 20 | def __init__(self,data='',location=0x0): 21 | self.Data = data 22 | self.Location = location 23 | self.Labels = [] 24 | self.LabelRefs = [] 25 | 26 | def unpackFromString(self,f,s): 27 | ret = struct.unpack(f, self.Data[self.Location:self.Location+s])[0] 28 | self.Location += s 29 | return ret 30 | 31 | def GetUnsignedByte(self): 32 | return self.unpackFromString(".*)(?P=q))" 87 | lbracketRe = '(?P\[)' 88 | rbracketRe = '(?P\])' 89 | numberRe = '(?P[\+\-]?(0x[0-9A-Fa-f]+|[0-9]+))' 90 | symbolRe = '(?P[A-Za-z_@][A-Za-z_@0-9]*)' 91 | 92 | #define final re's 93 | instructionDefRe = re.compile("(?:%s*(?:%s|%s|%s|%s|%s|%s|%s)(?P.*))" % \ 94 | (whitespaceRe,defRegRe,operandRe,symbolRe,commaRe,numberRe,lbracketRe,rbracketRe)) 95 | 96 | instructionRe = re.compile("(?:%s*(?:%s|%s|%s|%s|%s|%s|%s)(?P.*))" % \ 97 | (whitespaceRe,lbracketRe,rbracketRe,instRegRe, 98 | commaRe,numberRe,symbolRe,stringRe)) 99 | 100 | def tokenizeString(s,reToProcess): 101 | lst = [] 102 | rest = s 103 | while rest: 104 | instMatch = reToProcess.match(rest) 105 | if not instMatch: 106 | raise tokenizeError("Couldn't find match for string '%s' from '%s'" % (rest,s)) 107 | 108 | instDict = instMatch.groupdict() 109 | if instDict['REGISTER']: lst.append((REGISTER,instDict['REGISTER'])) 110 | elif instDict['SYMBOL']: 111 | if opcodeRe.match(instDict['SYMBOL']): 112 | lst.append((OPCODE,instDict['SYMBOL'])) 113 | else: 114 | lst.append((SYMBOL,instDict['SYMBOL'])) 115 | elif instDict['COMMA']: lst.append((COMMA,instDict['COMMA'])) 116 | elif instDict.has_key('OPERAND') and instDict['OPERAND']: 117 | # only defs have operands. 118 | #only instructions have anything below here, but if it's a def 119 | #we've already (hopefully) found a match so we don't need to check 120 | #for key existance. 121 | 122 | opText = instDict['OPERAND'] 123 | # Hack for 'm' codes. These are stored in the RM field, 124 | # But register values are technically invalid. Should this be 125 | # verified while compiling? 126 | if opText == 'm32': 127 | opText = 'r/m32' 128 | elif opText == 'm16': 129 | opText = 'r/m16' 130 | elif opText == 'm8': 131 | opText = 'r/m8' 132 | lst.append((OPERAND,opText)) 133 | elif instDict['LBRACKET']: lst.append((LBRACKET,instDict['LBRACKET'])) 134 | elif instDict['RBRACKET']: lst.append((RBRACKET,instDict['RBRACKET'])) 135 | elif instDict['NUMBER']: lst.append((NUMBER,instDict['NUMBER'])) 136 | elif instDict['STRING']: lst.append((STRING,instDict['STRING'])) 137 | else: 138 | raise tokenizeError("Tokenization failed on string %s, match %s" \ 139 | % (s,rest)) 140 | rest = instDict['rest'] 141 | return tuple(lst) 142 | 143 | def tokenizeInstDef(s): 144 | toks = tokenizeString(s, instructionDefRe) 145 | index,length = 0,len(toks) 146 | if length == 0: 147 | raise tokenizeError("Invalid Instruction. Cannot be blank") 148 | if toks[index][0] != OPCODE: 149 | raise tokenizeError("Invalid Instruction: '%s' " \ 150 | "Must start with an OPCODE" % s) 151 | 152 | while index < length and toks[index][0] == OPCODE: 153 | index += 1 154 | 155 | while index < length: 156 | if toks[index][0] not in (REGISTER,OPERAND,NUMBER): 157 | raise tokenizeError("Invalid Instruction Definition: '%s' " \ 158 | "Expected a REGISTER OR OPERAND ENTRY" % s) 159 | index += 1 160 | if index < length: 161 | if toks[index][0] != COMMA: 162 | raise tokenizeError("Invalid Instruction Def: '%s' Expected " \ 163 | "a COMMA" % s) 164 | index += 1 165 | return toks 166 | 167 | 168 | def tokenizeInst(s): 169 | toks = tokenizeString(s, instructionRe) 170 | index,length = 0,len(toks) 171 | if length == 0: 172 | raise tokenizeError("Invalid Instruction. Cannot be blank") 173 | if toks[index][0] != OPCODE: 174 | raise tokenizeError("Invalid Instruction: '%s' " \ 175 | "Must start with an OPCODE" % s) 176 | 177 | while index < length and toks[index][0] == OPCODE: 178 | index += 1 179 | 180 | while index < length: 181 | if toks[index][0] in (REGISTER,NUMBER,SYMBOL,STRING): 182 | index += 1 183 | elif toks[index][0] == LBRACKET: 184 | index += 1 185 | if toks[index][0] in (NUMBER,SYMBOL): 186 | index += 1 187 | elif toks[index][0] == REGISTER: 188 | index += 1 189 | if toks[index][0] in (NUMBER,SYMBOL): 190 | index += 1 191 | else: 192 | raise tokenizeError("Invalid Instruction: '%s' Expected a " \ 193 | "REGISTER inside the [brackets]" %s) 194 | if toks[index][0] != RBRACKET: 195 | raise tokenizeError("Invalid Instruction: '%s' Expected an " \ 196 | "ending BRACKET here." % s) 197 | else: 198 | index += 1 199 | else: 200 | raise tokenizeError("Invalid Instruction: '%s' " \ 201 | "Expected a REGISTER,LBRACKET,NUMBER,SYMBOL, or STRING" % s) 202 | 203 | if index < length: 204 | if toks[index][0] != COMMA: 205 | raise tokenizeError("Invalid Instruction: '%s' Expected " \ 206 | "a COMMA" % s) 207 | index += 1 208 | return toks 209 | --------------------------------------------------------------------------------