├── .gitignore ├── LICENSE ├── README.md ├── alphablend.asm ├── demo.asm ├── images ├── intel2gas_0.png ├── intel2gas_1.png └── intel2gas_2.png ├── intel2gas.py ├── intel2gui.pyw └── long-mode.asm /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Linwei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intel2GAS 2 | Convert MASM style inline assembly to AT&T style inline assembly, and output in pure AT&T assembly code or gcc inline assembly code. support x86, x86_64 instructions. It is a brand new replacement to old [intel2gas](http://freecode.com/projects/intel2gas "Old Intel2GAS") project. 3 | 4 | Install 5 | ------- 6 | 7 | > $ git clone https://github.com/skywind3000/Intel2GAS.git Intel2GAS 8 | 9 | Convert Assembly in GUI 10 | ----------------------- 11 | 12 | Run intel2gui.pyw directly, to get into GUI front-end. and convert masm source into AT&T Style (with or without inline mode). 13 | 14 | > $ cd Intel2GAS 15 | > 16 | > $ python intel2gui.pyw 17 | 18 | **Convert Without GCC Inline mode** 19 | 20 | ![](https://raw.githubusercontent.com/skywind3000/Intel2GAS/master/images/intel2gas_0.png) 21 | 22 | **Convert With GCC Inline mode** 23 | 24 | ![](https://raw.githubusercontent.com/skywind3000/Intel2GAS/master/images/intel2gas_1.png) 25 | 26 | **MMX Alpha Blend Demo** 27 | 28 | ![](https://raw.githubusercontent.com/skywind3000/Intel2GAS/master/images/intel2gas_2.png) 29 | 30 | 31 | Convert Assembly in Console 32 | --------------------------- 33 | 34 | > $ cd Intel2GAS 35 | > 36 | > $ cat demo.asm 37 | 38 | ```asm 39 | cld 40 | mov esi, src 41 | mov edi, dst 42 | mov ecx, size 43 | label1: 44 | mov al, [esi] 45 | inc al ; calculate 46 | mov [edi], al 47 | inc esi 48 | inc edi 49 | dec ecx 50 | jnz label1 ; loop to la 51 | ret 52 | ``` 53 | 54 | **Convert Without GCC Inline** 55 | 56 | > $ python intel2gas.py -m < demo.asm 57 | 58 | ```asm 59 | cld 60 | mov %0, %esi 61 | mov %1, %edi 62 | mov %2, %ecx 63 | label1: 64 | mov (%esi), %al 65 | inc %al //calculate 66 | mov %al, (%edi) 67 | inc %esi 68 | inc %edi 69 | dec %ecx 70 | jnz label1 //loop to la 71 | ret 72 | ``` 73 | 74 | **Convert With GCC Inline** 75 | 76 | > $ python intel2gas.py -i -m < demo.asm 77 | 78 | ```asm 79 | __asm__ __volatile__ ( 80 | " cld\n" 81 | " mov %0, %%esi\n" 82 | " mov %1, %%edi\n" 83 | " mov %2, %%ecx\n" 84 | "label1:\n" 85 | " mov (%%esi), %%al\n" 86 | " inc %%al\n" //calculate 87 | " mov %%al, (%%edi)\n" 88 | " inc %%esi\n" 89 | " inc %%edi\n" 90 | " dec %%ecx\n" 91 | " jnz label1\n" //loop to la 92 | " ret\n" 93 | : 94 | :"m"(src), "m"(dst), "m"(size) 95 | :"memory", "esi", "edi", "eax", "ebx", "ecx", "edx" 96 | ); 97 | ``` 98 | 99 | -------------------------------------------------------------------------------- /alphablend.asm: -------------------------------------------------------------------------------- 1 | mov edi, ptr1; 2 | mov esi, ptr2; 3 | pxor mm7, mm7; // mm7 = 0000....00 4 | pcmpeqb mm6, mm6; // mm6 = ffff....ff 5 | loop_line: 6 | mov ecx, w; 7 | shr ecx, 1; 8 | ALIGN 8 9 | loop_pixel_x2: 10 | prefetchnta [esi + 128]; 11 | prefetchnta [edi + 128]; 12 | movd mm0, [esi]; 13 | movd mm1, [esi + 4]; 14 | mov eax, [edi]; 15 | mov ebx, [edi + 4]; //ddddddd 16 | movq mm2, mm0; 17 | movq mm3, mm1; 18 | psrlq mm2, 24; 19 | psrlq mm3, 24; 20 | punpcklwd mm2, mm2; 21 | punpcklwd mm3, mm3; 22 | punpckldq mm2, mm2; // mm2 = 0 a1 0 a1 0 a1 0 a1 (word) 23 | punpckldq mm3, mm3; // mm3 = 0 a2 0 a2 0 a2 0 a2 (word) 24 | pcmpeqb mm4, mm4; // mm4 = 0xffff...ff 25 | pcmpeqb mm5, mm5; // mm5 = 0xffff...ff 26 | punpcklbw mm0, mm7; // mm0 = src1 27 | punpcklbw mm1, mm7; // mm1 = src2 28 | punpcklbw mm4, mm7; 29 | punpcklbw mm5, mm7; 30 | psubb mm4, mm2; // mm4 = (0xff - a1)... 31 | psubb mm5, mm3; // mm5 = (0xff - a2)... 32 | 33 | pmullw mm0, mm2; // mm0 = src1 * alpha1 34 | pmullw mm1, mm3; // mm1 = src2 * alpha2 35 | movd mm2, eax; // mm2 = dst1 36 | movd mm3, ebx; // mm3 = dst2 37 | punpcklbw mm2, mm7; 38 | punpcklbw mm3, mm7; 39 | pmullw mm2, mm4; // mm2 = dst1 * (255 - a1) 40 | pmullw mm3, mm5; // mm3 = dst2 * (255 - a2) 41 | 42 | pcmpeqw mm5, mm5; 43 | punpcklbw mm5, mm7; 44 | 45 | paddw mm0, mm2; 46 | paddw mm1, mm3; 47 | psrlw mm0, 8; 48 | psrlw mm1, 8; 49 | pand mm0, mm5; 50 | pand mm1, mm5; 51 | packuswb mm0, mm0; 52 | packuswb mm1, mm1; 53 | 54 | movd [edi], mm0; 55 | movd [edi + 4], mm1; 56 | 57 | add edi, 8; 58 | add esi, 8; 59 | dec ecx; 60 | jnz loop_pixel_x2; 61 | 62 | mov ecx, w; 63 | and ecx, 1; 64 | cmp ecx, 0; 65 | jz end_line; 66 | 67 | // last single pixel 68 | movd mm0, [esi]; 69 | mov eax, [edi]; 70 | movq mm2, mm0; 71 | psrlq mm2, 24; 72 | punpcklwd mm2, mm2; 73 | punpckldq mm2, mm2; // mm2 = 0 a1 0 a1 0 a1 0 a1 (word) 74 | pcmpeqb mm4, mm4; // mm4 = 0xffff...ff 75 | punpcklbw mm0, mm7; // mm0 = src1 76 | punpcklbw mm4, mm7; 77 | psubb mm4, mm2; // mm4 = (0xff - a1)... 78 | pmullw mm0, mm2; // mm0 = src1 * alpha1 79 | movd mm2, eax; // mm2 = dst1 80 | punpcklbw mm2, mm7; 81 | pmullw mm2, mm4; // mm2 = dst1 * (255 - a1) 82 | pcmpeqw mm5, mm5; 83 | punpcklbw mm5, mm7; 84 | paddw mm0, mm2; 85 | psrlw mm0, 8; 86 | packuswb mm0, mm0; 87 | movd [edi], mm0; 88 | add esi, 4; 89 | add edi, 4; 90 | jmp end_line 91 | jmp end_line 92 | end_line: 93 | add edi, diff1; 94 | add esi, diff2; 95 | dec dword ptr h; 96 | push word ptr w; 97 | pop word ptr w; 98 | jnz loop_line; 99 | nop: nop 100 | db 1,2 101 | mov byte ptr[edi], 0 102 | mov word ptr[edi], 0 103 | mov dword ptr[edi], 0 104 | mov eax, word ptr[edi] 105 | push ax 106 | emms; 107 | -------------------------------------------------------------------------------- /demo.asm: -------------------------------------------------------------------------------- 1 | cld 2 | mov esi, src 3 | mov edi, dst 4 | mov ecx, size 5 | label1: 6 | mov al, [esi] 7 | inc al ; calculate 8 | mov [edi], al 9 | inc esi 10 | inc edi 11 | dec ecx 12 | jnz label1 ; loop to la 13 | ret -------------------------------------------------------------------------------- /images/intel2gas_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skywind3000/Intel2GAS/7791666fcdf8e95b33bfb2498281450ff056bad7/images/intel2gas_0.png -------------------------------------------------------------------------------- /images/intel2gas_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skywind3000/Intel2GAS/7791666fcdf8e95b33bfb2498281450ff056bad7/images/intel2gas_1.png -------------------------------------------------------------------------------- /images/intel2gas_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skywind3000/Intel2GAS/7791666fcdf8e95b33bfb2498281450ff056bad7/images/intel2gas_2.png -------------------------------------------------------------------------------- /intel2gas.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #====================================================================== 4 | # 5 | # intel2gas.py - intel assembly to at&t format 6 | # 7 | # NOTE: 8 | # for more information, please see the readme file 9 | # 10 | #====================================================================== 11 | import sys, time 12 | import os 13 | from io import StringIO 14 | 15 | #---------------------------------------------------------------------- 16 | # TOKEN TYPE 17 | #---------------------------------------------------------------------- 18 | CTOKEN_ENDL = 0 19 | CTOKEN_ENDF = 1 20 | CTOKEN_IDENT = 2 21 | CTOKEN_KEYWORD = 3 22 | CTOKEN_STR = 4 23 | CTOKEN_OPERATOR = 5 24 | CTOKEN_INT = 6 25 | CTOKEN_FLOAT = 7 26 | CTOKEN_ERROR = 8 27 | 28 | CTOKEN_NAME = { 0:'endl', 1:'endf', 2:'ident', 3:'keyword', 4:'str', 29 | 5:'op', 6:'int', 7:'float', 8:'error' } 30 | 31 | #---------------------------------------------------------------------- 32 | # CTOKEN Declare 33 | #---------------------------------------------------------------------- 34 | class ctoken (object): 35 | def __init__ (self, mode = 0, value = 0, text = '', row = -1, col = -1): 36 | self.mode = mode 37 | self.value = value 38 | self.text = text 39 | self.row = row 40 | self.col = col 41 | self.fd = '' 42 | self.source = '' 43 | def copy (self): 44 | token = ctoken(self.mode, self.value, self.text, self.line, self.fd) 45 | token.source = self.source 46 | return token 47 | def is_endl (self): 48 | return self.mode == CTOKEN_ENDL 49 | def is_endf (self): 50 | return self.mode == CTOKEN_ENDF 51 | def is_ident (self): 52 | return self.mode == CTOKEN_IDENT 53 | def is_keyword (self): 54 | return self.mode == CTOKEN_KEYWORD 55 | def is_str (self): 56 | return self.mode == CTOKEN_STR 57 | def is_operator (self): 58 | return self.mode == CTOKEN_OPERATOR 59 | def is_int (self): 60 | return self.mode == CTOKEN_INT 61 | def is_float (self): 62 | return self.mode == CTOKEN_FLOAT 63 | def is_error (self): 64 | return self.mode == CTOKEN_ERROR 65 | def __repr__ (self): 66 | x = '(%s, %s)'%(CTOKEN_NAME[self.mode], repr(self.value)) 67 | return x 68 | 69 | 70 | 71 | #---------------------------------------------------------------------- 72 | # CTOKENIZE Declare 73 | #---------------------------------------------------------------------- 74 | class ctokenize (object): 75 | def __init__ (self, fp = ''): 76 | if type(fp) == type(''): 77 | fp = StringIO(fp) 78 | self.fp = fp 79 | self.reset() 80 | def reset (self): 81 | self.ch = '' 82 | self.un = '' 83 | self.col = 0 84 | self.row = 1 85 | self.eof = 0 86 | self.state = 0 87 | self.text = '' 88 | self.init = 0 89 | self.error = '' 90 | self.code = 0 91 | self.tokens = [] 92 | def getch (self): 93 | if self.un != '': 94 | self.ch = self.un 95 | self.un = '' 96 | return self.ch 97 | try: ch = self.fp.read(1) 98 | except: ch = '' 99 | self.ch = ch 100 | if ch == '\n': 101 | self.col = 1 102 | self.row += 1 103 | else: 104 | self.col += 1 105 | return self.ch 106 | def ungetch (self, ch): 107 | self.un = ch 108 | def isspace (self, ch): 109 | return ch in (' ', '\r', '\n', '\t') 110 | def isalpha (self, ch): 111 | return ch.isalpha() 112 | def isalnum (self, ch): 113 | return ch.isalnum() 114 | def skipspace (self): 115 | skip = 0 116 | while 1: 117 | if self.ch == '': 118 | return -1 119 | if not self.ch in (' ', '\r', '\n', '\t'): 120 | break 121 | if self.ch == '\n': 122 | break 123 | self.getch() 124 | skip += 1 125 | return skip 126 | def read (self): 127 | return None 128 | def next (self): 129 | if not self.init: 130 | self.init = 1 131 | self.getch() 132 | token = self.read() 133 | if token != None: 134 | self.tokens.append(token) 135 | return token 136 | def gettokens (self): 137 | result = [] 138 | while 1: 139 | token = self.next() 140 | if token == None: 141 | if self.code: 142 | text = '%d: %s'%(self.row, self.error) 143 | raise SyntaxError(text) 144 | break 145 | result.append(token) 146 | if token.mode == CTOKEN_ENDF: 147 | break 148 | return result 149 | def __iter__ (self): 150 | return self.gettokens().__iter__() 151 | 152 | 153 | #---------------------------------------------------------------------- 154 | # C/ASM Style Tokenizer 155 | #---------------------------------------------------------------------- 156 | class cscanner (ctokenize): 157 | 158 | def __init__ (self, fp = '', keywords = [], casesensitive = False): 159 | super(cscanner, self).__init__ (fp) 160 | self.keywords = keywords 161 | self.casesensitive = casesensitive 162 | self.ch = ' ' 163 | self.memo = {} 164 | 165 | def skipmemo (self): 166 | memo = '' 167 | while 1: 168 | skip = 0 169 | self.skipspace() 170 | if self.ch == '': 171 | break 172 | if self.ch in (';', '#'): 173 | skip += 1 174 | while (self.ch != '\n') and (self.ch != ''): 175 | memo += self.ch 176 | self.getch() 177 | skip += 1 178 | elif self.ch == '/': 179 | self.getch() 180 | if self.ch == '/': 181 | memo += self.ch 182 | skip += 1 183 | while (self.ch != '\n') and (self.ch != ''): 184 | memo += self.ch 185 | self.getch() 186 | skip += 1 187 | else: 188 | self.ungetch(self.ch) 189 | self.ch = '/' 190 | if skip == 0: 191 | break 192 | return memo 193 | 194 | def read_string (self): 195 | token = None 196 | self.error = '' 197 | if not self.ch in ('\'', '\"'): 198 | return None 199 | mode = (self.ch == '\'') and 1 or 0 200 | text = '' 201 | done = -1 202 | while 1: 203 | ch = self.getch() 204 | if ch == '\\': 205 | self.getch() 206 | text += '\\' + self.ch 207 | elif (mode == 0) and (ch == '\''): 208 | text += '\'' 209 | elif (mode == 1) and (ch == '\"'): 210 | text == '\"' 211 | elif (mode == 0) and (ch == '\"'): 212 | ch = self.getch() 213 | if ch == '\"': 214 | text += '\"\"' 215 | else: 216 | done = 1 217 | token = ctoken(CTOKEN_STR, text, text) 218 | self.text = text 219 | break 220 | elif (mode == 1) and (ch == '\''): 221 | ch = self.getch() 222 | if ch == '\'': 223 | text += '\'\'' 224 | else: 225 | done = 1 226 | token = ctoken(CTOKEN_STR, text, text) 227 | self.text = text 228 | break 229 | elif ch == '\n': 230 | self.error = 'EOL while scanning string literal' 231 | self.code = 1 232 | break 233 | elif ch != '': 234 | text += ch 235 | else: 236 | self.error = 'EOF while scanning string literal' 237 | self.code = 2 238 | break 239 | if not token: 240 | return None 241 | token.row = self.row 242 | token.col = self.col 243 | return token 244 | 245 | def read_number (self): 246 | token = None 247 | done = -1 248 | if ((self.ch < '0') or (self.ch > '9')): 249 | return None 250 | text = '' 251 | while self.ch.isalnum() or self.ch == '.': 252 | text += self.ch 253 | self.getch() 254 | 255 | pos = len(text) 256 | while pos > 0: 257 | ch = text[pos - 1] 258 | if ch.isdigit() or ch == '.': 259 | break 260 | if ch >= 'A' and ch <= 'F': 261 | break 262 | if ch >= 'a' and ch <= 'f': 263 | break 264 | pos -= 1 265 | if len(text) - pos > 2: 266 | self.error = 'number format error' 267 | self.code = 1 268 | return None 269 | 270 | if len(text) - pos == 2: ec1, ec2 = text[pos - 2], text[pos - 1] 271 | elif len(text) - pos == 1: ec1, ec2 = text[pos - 1], 0 272 | else: ec1, ec2 = 0, 0 273 | text = text[:pos] 274 | 275 | if text[:2] in ('0x', '0X'): 276 | try: value = int(text, 16) 277 | #Debug 278 | except: 279 | self.error = 'bad hex number ' + text 280 | self.code = 2 281 | return None 282 | if value >= -0x80000000 and value <= 0x7fffffff: 283 | value = int(value) 284 | token = ctoken(CTOKEN_INT, value, text) 285 | elif ec1 == 'h' and ec2 == 0: 286 | try: value = int(text, 16) 287 | except: 288 | self.error = 'bad hex number ' + text 289 | self.code = 3 290 | return None 291 | if value >= -0x80000000 and value <= 0x7fffffff: 292 | value = int(value) 293 | token = ctoken(CTOKEN_INT, value, text) 294 | elif ec1 == 'b' and ec2 == 0: 295 | try: value = int(text, 2) 296 | except: 297 | self.error = 'bad binary number ' + text 298 | self.code = 4 299 | return None 300 | if value >= -0x80000000 and value <= 0x7fffffff: 301 | value = int(value) 302 | token = ctoken(CTOKEN_INT, value, text) 303 | elif ec1 == 'q' and ec2 == 0: 304 | try: value = int(text, 8) 305 | except: 306 | self.error = 'bad octal number ' + text 307 | self.code = 5 308 | return None 309 | if value >= -0x80000000 and value <= 0x7fffffff: 310 | value = int(value) 311 | token = ctoken(CTOKEN_INT, value, text) 312 | else: 313 | decimal = (not '.' in text) and 1 or 0 314 | if decimal: 315 | try: value = int(text, 10) 316 | except: 317 | self.error = 'bad decimal number ' + text 318 | self.code = 6 319 | return None 320 | if value >= -0x80000000 and value <= 0x7fffffff: 321 | value = int(value) 322 | token = ctoken(CTOKEN_INT, value, text) 323 | else: 324 | try: value = float(text) 325 | except: 326 | self.error = 'bad float number ' + text 327 | self.code = 7 328 | return None 329 | token = ctoken(CTOKEN_FLOAT, value, text) 330 | 331 | token.row = self.row 332 | token.col = self.col 333 | return token 334 | 335 | def read (self): 336 | 337 | memo = self.skipmemo() 338 | 339 | if self.ch == '\n': 340 | lineno = self.row - 1 341 | token = ctoken(CTOKEN_ENDL) 342 | token.row = lineno 343 | self.memo[lineno] = memo 344 | memo = '' 345 | self.getch() 346 | return token 347 | 348 | if self.ch == '': 349 | self.eof += 1 350 | if self.eof > 1: 351 | return None 352 | token = ctoken(CTOKEN_ENDF, 0) 353 | token.row = self.row 354 | if memo: 355 | self.memo[self.row] = memo 356 | return token 357 | 358 | # this is a string 359 | if self.ch in ('\"', '\''): 360 | row, col = self.row, self.col 361 | self.code = 0 362 | self.error = '' 363 | token = self.read_string() 364 | if self.code: 365 | return None 366 | token.row, token.col = row, col 367 | return token 368 | 369 | issym2f = lambda x: x.isalpha() or (x in ('_', '$', '@')) 370 | issym2x = lambda x: x.isalnum() or (x in ('_', '$', '@')) 371 | 372 | # identity or keyword 373 | if issym2f(self.ch): 374 | row, col = self.row, self.col 375 | text = '' 376 | while issym2x(self.ch): 377 | text += self.ch 378 | self.getch() 379 | if self.keywords: 380 | for i in range(len(self.keywords)): 381 | same = 0 382 | if self.casesensitive: 383 | same = (text == self.keywords[i]) 384 | else: 385 | same = (text.lower() == self.keywords[i].lower()) 386 | if same: 387 | token = ctoken(CTOKEN_KEYWORD, i, text) 388 | token.row, token.col = row, col 389 | return token 390 | token = ctoken(CTOKEN_IDENT, text, text) 391 | token.row, token.col = row, col 392 | return token 393 | 394 | # this is a number 395 | if self.ch >= '0' and self.ch <= '9': 396 | row, col = self.row, self.col 397 | self.code = 0 398 | self.error = '' 399 | token = self.read_number() 400 | if self.code: 401 | return None 402 | token.row, token.col = row, col 403 | return token 404 | 405 | # this is an operator 406 | token = ctoken(CTOKEN_OPERATOR, self.ch, self.ch) 407 | token.row, token.col = self.row, self.col 408 | self.getch() 409 | 410 | return token 411 | 412 | 413 | 414 | #---------------------------------------------------------------------- 415 | # tokenize in single function 416 | #---------------------------------------------------------------------- 417 | def tokenize(script): 418 | scanner = cscanner(script) 419 | result = [ n for n in scanner ] 420 | scanner.reset() 421 | return result 422 | 423 | 424 | 425 | #---------------------------------------------------------------------- 426 | # X86 - ASSEMBLY 427 | #---------------------------------------------------------------------- 428 | REGNAME = [ 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', 'AX', 429 | 'BX', 'CX', 'DX', 'EAX', 'EBX', 'ECX', 'EDX', 'RAX', 'RBX', 'RCX', 430 | 'RDX', 'CR0', 'CR1', 'CR2', 'CR3', 'DR0', 'DR1', 'DR2', 'DR3', 431 | 'DR4', 'DR5', 'DR6', 'DR7', 'SI', 'DI', 'SP', 'BP', 'ESI', 432 | 'EDI', 'ESP', 'EBP', 'RSI', 'RDI', 'RSP', 'RBP', 'TR6', 'TR7', 433 | 'ST0', 'ST1', 'ST2', 'ST3', 'ST4', 'ST5', 'ST6', 'ST7', 'MM0', 434 | 'MM1', 'MM2', 'MM3', 'MM4', 'MM5', 'MM6', 'MM7', 'MM8', 'MM9', 435 | 'MM10', 'MM11', 'MM12', 'MM13', 'MM14', 'MM15', 'XMM0', 'XMM1', 436 | 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 437 | 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15', 'R0', 'R1', 438 | 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'R9', 'R10', 'R11', 439 | 'R12', 'R13', 'R14', 'R15' ] 440 | 441 | def reginfo(name): 442 | name = name.lower() 443 | if name[:2] == 'mm': 444 | return 8 445 | if name[:3] == 'xmm': 446 | return 16 447 | if name[:1] == 'r' and name[1:].isdigit(): 448 | return 8 449 | if name[:2] in ('cr', 'dr'): 450 | return 4 451 | if name[:2] in ('st', 'tr'): 452 | return 8 453 | if len(name) == 2: 454 | if name[0] == 'r': 455 | return 8 456 | if name[1] in ('h', 'l'): 457 | return 1 458 | if name[1] in ('x', 'i', 'p'): 459 | return 2 460 | raise SyntaxError('unknow register ' + name) 461 | if len(name) == 3: 462 | if name[2] in ('x', 'p', 'i'): 463 | if name[0] == 'e': 464 | return 4 465 | if name[0] == 'r': 466 | return 8 467 | raise SyntaxError('unknow register ' + name) 468 | return 0 469 | 470 | REGSIZE = { } 471 | 472 | for reg in REGNAME: 473 | REGSIZE[reg] = reginfo(reg) 474 | 475 | regsize = lambda reg: REGSIZE[reg.upper()] 476 | isreg = lambda reg: (reg.upper() in REGSIZE) 477 | 478 | 479 | instreplace = { 480 | "cbw":"cbtw", 481 | "cdq":"cltd", 482 | "cmpsd":"cmpsl", 483 | "codeseg":".text", 484 | "cwd":"cwtd", 485 | "cwde":"cwtl", 486 | "dataseg":".data", 487 | "db":".byte", 488 | "dd":".int", 489 | "dw":".short", 490 | "emit":".byte", 491 | "_emit":".byte", 492 | "insd":"insl", 493 | "lodsd":"lodsl", 494 | "movsd":"movsl", 495 | "movsx":"movs", 496 | "movzx":"movz", 497 | "outsd":"outsl", 498 | "public":".globl", 499 | "scasd":"scasl", 500 | "stosd":"stosl", 501 | } 502 | 503 | prefix = [ 'lock', 'rep', 'repne', 'repnz', 'repe', 'repz' ] 504 | 505 | 506 | #---------------------------------------------------------------------- 507 | # coperand 508 | #---------------------------------------------------------------------- 509 | O_REG = 0 # 寄存器 510 | O_IMM = 1 # 立即数字 511 | O_MEM = 2 # 内存 512 | O_LABEL = 3 # 标识,可能是变量也可能是跳转地址 513 | 514 | class coperand (object): 515 | def __init__ (self, tokens = None): 516 | self.mode = -1 517 | self.reg = '' # 默认寄存器 518 | self.base = '' # 寻址:基址寄存器 519 | self.index = '' # 寻址:索引寄存器 520 | self.scale = 0 # 寻址:放大倍数 521 | self.offset = 0 # 寻址:偏移量 522 | self.segment = '' # 段地址 523 | self.immediate = 0 # 立即数字 524 | self.label = '' # 变量或者跳转地址 525 | self.size = 0 # 数据大小 526 | if tokens != None: 527 | self.parse(tokens) 528 | self.name = 'operand' 529 | def reset (self): 530 | self.reg = '' 531 | self.base = '' 532 | self.index = '' 533 | self.scale = 0 534 | self.offset = 0 535 | self.immediate = 0 536 | self.segment = '' 537 | self.label = '' 538 | self.size = 0 539 | def parse (self, tokens): 540 | if type(tokens) == type(''): 541 | tokens = tokenize(tokens) 542 | tokens = [ n for n in tokens ] 543 | while len(tokens) > 0: 544 | if not tokens[-1].mode in (CTOKEN_ENDF, CTOKEN_ENDL): 545 | break 546 | tokens.pop() 547 | self.reset() 548 | if len(tokens) >= 2: 549 | t1 = tokens[0] 550 | t2 = tokens[1] 551 | if t2.mode == CTOKEN_IDENT and t2.value.lower() == 'ptr': 552 | if t1.mode == CTOKEN_IDENT: 553 | size = t1.value.lower() 554 | if size == 'byte': self.size = 1 555 | elif size == 'word': self.size = 2 556 | elif size == 'dword': self.size = 4 557 | elif size == 'qword': self.size = 8 558 | if self.size != 0: 559 | tokens = tokens[2:] 560 | if len(tokens) == 0: 561 | raise SyntaxError('expected operand token') 562 | head = tokens[0] 563 | tail = tokens[-1] 564 | if head.mode == CTOKEN_INT: # 如果是立即数 565 | self.mode = O_IMM 566 | self.immediate = head.value 567 | elif head.mode == CTOKEN_IDENT: # 寄存器或标识 568 | if isreg(head.value): # 如果是寄存器 569 | self.mode = O_REG 570 | self.reg = head.value 571 | self.size = regsize(self.reg) 572 | else: 573 | self.mode = O_LABEL # 如果是标识 574 | self.label = head.value 575 | elif head.mode == CTOKEN_OPERATOR: # 如果是符号 576 | if head.value == '[': # 如果是内存 577 | self.mode = O_MEM 578 | if tail.mode != CTOKEN_OPERATOR or tail.value != ']': 579 | raise SyntaxError('bad memory operand') 580 | self.__parse_memory(tokens) 581 | else: 582 | raise SyntaxError('bad operand descript ' + repr(head.value)) 583 | else: 584 | raise SyntaxError('bad operand desc') 585 | return 0 586 | def __parse_memory (self, tokens): 587 | tokens = tokens[1:-1] 588 | if len(tokens) == 0: 589 | raise SyntaxError('memory operand error') 590 | self.scale = 1 591 | self.index = '' 592 | self.offset = 0 593 | self.base = '' 594 | segments = [ 'cs', 'ss', 'ds', 'es', 'fs', 'gs' ] 595 | pos = -1 596 | for i in range(len(tokens)): 597 | token = tokens[i] 598 | if token.mode == CTOKEN_OPERATOR and token.value == ':': 599 | pos = i 600 | break 601 | if pos >= 0 and pos < len(tokens): # 如果覆盖段地址 602 | if pos == 0 or pos == len(tokens) - 1: 603 | raise SyntaxError('memory operand segment error') 604 | t1 = tokens[pos - 1] 605 | tokens = tokens[:pos - 1] + tokens[pos + 1:] 606 | if t1.mode != CTOKEN_IDENT: 607 | raise SyntaxError('memory operand segment bad') 608 | seg = t1.value.lower() 609 | if not seg in segments: 610 | raise SyntaxError('memory operand segment unknow') 611 | self.segment = seg 612 | pos = -1 613 | for i in range(len(tokens)): 614 | token = tokens[i] 615 | if token.mode == CTOKEN_OPERATOR and token.value == '*': 616 | pos = i 617 | break 618 | if pos >= 0 and pos < len(tokens): # 如果有乘号 619 | if pos == 0 or pos == len(tokens) - 1: 620 | raise SyntaxError('memory operand error (bad scale)') 621 | t1 = tokens[pos - 1] 622 | t2 = tokens[pos + 1] 623 | tokens = tokens[:pos - 1] + tokens[pos + 2:] 624 | if t1.mode == CTOKEN_IDENT and t2.mode == CTOKEN_INT: 625 | pass 626 | elif t1.mode == CTOKEN_INT and t2.mode == CTOKEN_IDENT: 627 | t1, t2 = t2, t1 628 | else: 629 | raise SyntaxError('memory operand error (scale error)') 630 | if not isreg(t1.value): 631 | raise SyntaxError('memory operand error (no index register)') 632 | self.index = t1.value 633 | self.scale = (t2.value) 634 | if not self.scale in (1, 2, 4, 8): 635 | raise SyntaxError('memory operand error (bad scale number)') 636 | #for token in tokens: print token, 637 | #print '' 638 | for token in tokens: 639 | if token.mode == CTOKEN_IDENT and isreg(token.value): 640 | if self.base == '': 641 | self.base = token.value 642 | elif self.index == '': 643 | self.index = token.value 644 | else: 645 | print(token) 646 | raise SyntaxError('memory operand error (too many regs)') 647 | elif token.mode == CTOKEN_INT: 648 | if self.offset == 0: 649 | self.offset = token.value 650 | else: 651 | raise SyntaxError('memory operand error (too many offs)') 652 | elif token.mode == CTOKEN_OPERATOR and token.value == '+': 653 | pass 654 | else: 655 | raise SyntaxError('operand token error ' + repr(token)) 656 | return 0 657 | def info (self): 658 | if self.mode == O_REG: 659 | return 'reg:%s'%self.reg 660 | elif self.mode == O_IMM: 661 | return 'imm:%d'%self.immediate 662 | elif self.mode == O_LABEL: 663 | return 'label:%s'%self.label 664 | data = [] 665 | if self.base: 666 | data.append(self.base) 667 | if self.index: 668 | if self.scale == 1: 669 | data.append('%s'%self.index) 670 | else: 671 | data.append('%s * %d'%(self.index, self.scale)) 672 | if self.offset != 0: 673 | data.append('0x%x'%(self.offset)) 674 | size = '' 675 | if self.size == 1: size = '8' 676 | elif self.size == 2: size = '16' 677 | elif self.size == 4: size = '32' 678 | elif self.size == 8: size = '64' 679 | return 'mem%s:[%s]'%(size, ' + '.join(data)) 680 | def translate (self, inline = 0): 681 | prefix = r'%' 682 | if inline: prefix = r'%%' 683 | if self.mode == O_REG: 684 | return prefix + self.reg 685 | if self.mode == O_IMM: 686 | return '$' + hex(self.immediate) 687 | if self.mode == O_LABEL: 688 | return self.label 689 | text = '' 690 | base = self.base and (prefix + self.base) or '' 691 | index = self.index and (prefix + self.index) or '' 692 | if not self.index: 693 | text = '(%s)'%base 694 | else: 695 | text = '(%s,%s,%d)'%(base, index, self.scale) 696 | if self.offset: 697 | text = '0x%x%s'%(self.offset, text) 698 | if self.segment: 699 | text = '%s:%s'%(self.segment, text) 700 | return text 701 | def __repr__ (self): 702 | return self.info() + ' -> ' + self.translate() 703 | 704 | 705 | 706 | #---------------------------------------------------------------------- 707 | # cencoding 708 | #---------------------------------------------------------------------- 709 | class cencoding (object): 710 | 711 | def __init__ (self, tokens = None): 712 | self.reset() 713 | if tokens != None: 714 | self.parse(tokens) 715 | self.name = 'cencoding' 716 | 717 | def reset (self): 718 | self.label = '' 719 | self.prefix = '' 720 | self.instruction = '' 721 | self.operands = [] 722 | self.tokens = None 723 | self.empty = False 724 | return 0 725 | 726 | def parse (self, tokens = None): 727 | if type(tokens) == type(''): 728 | tokens = tokenize(tokens) 729 | tokens = [ n for n in tokens ] 730 | while len(tokens) > 0: 731 | if not tokens[-1].mode in (CTOKEN_ENDF, CTOKEN_ENDL): 732 | break 733 | tokens.pop() 734 | if len(tokens) == 0: 735 | self.empty = True 736 | return 0 737 | self.reset() 738 | self.tokens = tokens 739 | self.__parse_label() 740 | self.__parse_prefix() 741 | self.__parse_instruction() 742 | self.__parse_operands() 743 | self.__update() 744 | self.tokens = None 745 | return 0 746 | 747 | def __parse_label (self): 748 | if len(self.tokens) < 2: 749 | return 0 750 | t1, t2 = self.tokens[:2] 751 | if t2.mode == CTOKEN_OPERATOR and t2.value == ':': 752 | if t1.mode != CTOKEN_IDENT: 753 | raise SyntaxError('error label type') 754 | self.label = t1.value 755 | self.tokens = self.tokens[2:] 756 | return 0 757 | 758 | def __parse_prefix (self): 759 | prefix = [ 'lock', 'rep', 'repne', 'repnz', 'repe', 'repz' ] 760 | segments = [ 'cs', 'ss', 'ds', 'es', 'fs', 'gs' ] 761 | while len(self.tokens) >= 1: 762 | t1 = self.tokens[0] 763 | if t1.mode != CTOKEN_IDENT: 764 | break 765 | text = t1.value.lower() 766 | if (not text in prefix) and (not text in segments): 767 | break 768 | self.prefix += ' ' + text 769 | self.prefix = self.prefix.strip(' ') 770 | self.tokens = self.tokens[1:] 771 | return 0 772 | 773 | def __parse_instruction (self): 774 | if len(self.tokens) < 1: 775 | return 0 776 | t1 = self.tokens[0] 777 | self.tokens = self.tokens[1:] 778 | if t1.mode != CTOKEN_IDENT: 779 | raise SyntaxError('instruction type error') 780 | self.instruction = t1.value 781 | return 0 782 | 783 | def __parse_operands (self): 784 | operands = [] 785 | while len(self.tokens) > 0: 786 | size = len(self.tokens) 787 | pos = size 788 | for i in range(size): 789 | if self.tokens[i].mode == CTOKEN_OPERATOR: 790 | if self.tokens[i].value == ',': 791 | pos = i 792 | break 793 | operands.append(self.tokens[:pos]) 794 | self.tokens = self.tokens[pos + 1:] 795 | for tokens in operands: 796 | n = coperand(tokens) 797 | self.operands.append(n) 798 | operands = None 799 | return 0 800 | 801 | def __update (self): 802 | self.size = 0 803 | for operand in self.operands: 804 | if operand.size > self.size: 805 | self.size = operand.size 806 | if self.prefix == '' and self.instruction == '': 807 | if len(self.operands) == 0: 808 | self.empty = True 809 | return 0 810 | 811 | def translate_instruction (self): 812 | lower = self.instruction.islower() 813 | instruction = self.instruction.lower() 814 | if instruction == 'align': 815 | return '.align' 816 | if instruction in instreplace: 817 | instruction = instreplace[instruction] 818 | postfix = False 819 | if len(self.operands) == 1: 820 | o = self.operands[0] 821 | if o.mode == O_MEM: 822 | postfix = True 823 | elif o.mode == O_LABEL: 824 | postfix = True 825 | elif len(self.operands) == 2: 826 | o1, o2 = self.operands[:2] 827 | if o1.mode == O_IMM and o2.mode == O_MEM: 828 | postfix = True 829 | if o1.mode == O_MEM and o2.mode == O_IMM: 830 | postfix = True 831 | if o1.mode == O_IMM and o2.mode == O_LABEL: 832 | postfix = True 833 | if o1.mode == O_LABEL and o2.mode == O_IMM: 834 | postfix = True 835 | if postfix: 836 | if self.size == 1: 837 | instruction += 'b' 838 | elif self.size == 2: 839 | instruction += 'w' 840 | elif self.size == 4: 841 | instruction += 'l' 842 | elif self.size == 8: 843 | instruction += 'q' 844 | if not lower: 845 | instruction = instruction.upper() 846 | return instruction 847 | 848 | def translate_operand (self, id, inline = 0): 849 | desc = [] 850 | if self.instruction.lower() == 'align': 851 | size = 4 852 | if len(self.operands) > 0: 853 | op = self.operands[0] 854 | if op.mode == O_IMM: 855 | size = op.immediate 856 | if id == 0: 857 | return '%d, 0x90'%size 858 | return '' 859 | if id < 0 or id >= len(self.operands): 860 | raise KeyError('operand id out of range') 861 | text = self.operands[id].translate(inline) 862 | return text 863 | 864 | def __repr__ (self): 865 | text = '' 866 | if self.label: 867 | text += '%s: '%self.label 868 | if self.prefix: 869 | text += '%s '%self.prefix 870 | text += self.translate_instruction() 871 | text += ' ' 872 | text += self.translate_operands() 873 | return text 874 | 875 | 876 | #---------------------------------------------------------------------- 877 | # csynth 878 | #---------------------------------------------------------------------- 879 | class csynthesis (object): 880 | 881 | def __init__ (self, source = None): 882 | self.reset() 883 | if source != None: 884 | self.parse(source) 885 | self.name = 'csynthesis' 886 | 887 | def reset (self): 888 | self.source = '' 889 | self.tokens = [] 890 | self.encoding = [] 891 | self.labels = {} 892 | self.references = {} 893 | self.lines = [] 894 | self.memos = {} 895 | self.table = [] 896 | self.vars = {} 897 | self.maps = {} 898 | self.variables = [] 899 | self.registers = {} 900 | self.amd64 = False 901 | self.size = 0 902 | self.error = '' 903 | 904 | def parse (self, source = None): 905 | self.reset() 906 | if self.__tokenizer(source) != 0: 907 | return -1 908 | if self.__encoding() != 0: 909 | return -2 910 | if self.__analyse() != 0: 911 | return -3 912 | return 0 913 | 914 | def __tokenizer (self, source = None): 915 | scanner = cscanner(source) 916 | tokens = [] 917 | while 1: 918 | token = scanner.next() 919 | if token == None: 920 | text = '%d: %s'%(scanner.row, scanner.error) 921 | self.error = text 922 | return -1 923 | tokens.append(token) 924 | if token.mode == CTOKEN_ENDF: 925 | break 926 | self.tokens = [ n for n in tokens ] 927 | while len(tokens) > 0: 928 | size = len(tokens) 929 | pos = size - 1 930 | for i in range(size): 931 | if tokens[i].mode == CTOKEN_ENDL: 932 | pos = i 933 | break 934 | self.lines.append(tokens[:pos + 1]) 935 | tokens = tokens[pos + 1:] 936 | for i in range(len(self.lines)): 937 | lineno = i + 1 938 | if lineno in scanner.memo: 939 | self.memos[i] = scanner.memo[lineno].strip('\r\n\t ') 940 | else: 941 | self.memos[i] = '' 942 | scanner = None 943 | return 0 944 | 945 | def __encoding (self): 946 | lineno = 1 947 | for tokens in self.lines: 948 | try: 949 | encoding = cencoding(tokens) 950 | except SyntaxError as e: 951 | text = '%d: %s'%(lineno, e) 952 | self.error = text 953 | return -1 954 | self.encoding.append(encoding) 955 | lineno += 1 956 | if len(self.lines) != len(self.encoding): 957 | raise Exception('core fault') 958 | return 0 959 | 960 | def __analyse (self): 961 | self.size = len(self.lines) 962 | index = 0 963 | amd64 = ('rax', 'rbx', 'rcx', 'rdx', 'rdi', 'rsi', 'rbp', 'rsp') 964 | for i in range(self.size): 965 | encoding = self.encoding[i] 966 | if encoding.label: 967 | index += 1 968 | self.labels[encoding.label] = (i, index) 969 | varlist = [] 970 | for i in range(self.size): 971 | encoding = self.encoding[i] 972 | for j in range(len(encoding.operands)): 973 | operand = encoding.operands[j] 974 | if operand.mode == O_LABEL: 975 | if not operand.label in self.labels: 976 | varlist.append((operand.label, i, j)) 977 | else: 978 | desc = self.references.get(operand.label, []) 979 | desc.append((i, j)) 980 | self.references[operand.label] = desc 981 | elif operand.mode == O_REG: 982 | reg = operand.reg.lower() 983 | self.registers[reg] = 1 984 | if reg in amd64: 985 | self.amd64 = True 986 | vartable = [] 987 | for var, line, pos in varlist: 988 | if pos == 0: vartable.append((var, line, pos)) 989 | for var, line, pos in varlist: 990 | if pos != 0: vartable.append((var, line, pos)) 991 | names = {} 992 | for i in range(len(vartable)): 993 | var, line, pos = vartable[i] 994 | desc = self.vars.get(var, []) 995 | if len(desc) == 0: 996 | index = len(names) 997 | names[var] = index 998 | self.table.append((var, line, pos, index)) 999 | writable = pos == 0 and 1 or 0 1000 | self.maps[var] = (index, writable) 1001 | self.variables.append((index, var, writable)) 1002 | else: 1003 | index = names[var] 1004 | desc.append((var, line, pos, index)) 1005 | self.vars[var] = desc 1006 | indent1 = 0 1007 | indent2 = 0 1008 | for i in range(self.size): 1009 | encoding = self.encoding[i] 1010 | encoding.inst = encoding.translate_instruction() 1011 | if encoding.label and (not encoding.empty): 1012 | if len(encoding.label) > indent1: 1013 | indent1 = len(encoding.label) 1014 | if len(encoding.inst) > indent2: 1015 | indent2 = len(encoding.inst) 1016 | self.indent1 = indent1 + 2 1017 | self.indent2 = indent2 1018 | if self.indent1 < 4: self.indent1 = 4 1019 | if self.indent2 < 4: self.indent2 = 4 1020 | return 0 1021 | 1022 | def get_label (self, lineno, clabel = 0): 1023 | if lineno < 0 or lineno >= self.size: 1024 | raise KeyError('line number out of range') 1025 | encoding = self.encoding[lineno] 1026 | if encoding.label == '': 1027 | return '' 1028 | if clabel == 0: 1029 | return encoding.label + ':' 1030 | line, index = self.labels[encoding.label] 1031 | return '%d:'%index 1032 | 1033 | def get_instruction (self, lineno): 1034 | if lineno < 0 or lineno >= self.size: 1035 | raise KeyError('line number out of range') 1036 | encoding = self.encoding[lineno] 1037 | if encoding.empty: 1038 | return '' 1039 | source = encoding.prefix + ' ' + encoding.inst 1040 | source = source.strip(' ') 1041 | return source 1042 | 1043 | def get_operand (self, lineno, id, clabel = 0, inline = 0): 1044 | if lineno < 0 or lineno >= self.size: 1045 | raise KeyError('line number out of range') 1046 | encoding = self.encoding[lineno] 1047 | if id < 0 or id >= len(encoding.operands): 1048 | raise KeyError('operand id out of range') 1049 | operand = encoding.operands[id] 1050 | if operand.mode in (O_IMM, O_REG, O_MEM): 1051 | return operand.translate(inline) 1052 | label = operand.label 1053 | if label in self.labels: # this is a jmp label 1054 | if not clabel: 1055 | return label 1056 | line, index = self.labels[label] 1057 | if line <= lineno: 1058 | return '%db'%index 1059 | return '%df'%index 1060 | if label in self.vars: # this is a variable 1061 | id, writable = self.maps[label] 1062 | return '%%%d'%id 1063 | return '$' + label 1064 | 1065 | def synthesis (self, lineno, clabel = 0, inline = 0, align = 0): 1066 | if lineno < 0 or lineno >= self.size: 1067 | raise KeyError('line number out of range') 1068 | encoding = self.encoding[lineno] 1069 | source = self.get_label(lineno, clabel) 1070 | # 内容缩进 1071 | indent = self.indent1 1072 | if clabel: 1073 | indent = len(self.labels) + 2 1074 | indent = int(((indent + 1) / 2) * 2) 1075 | source = source.ljust(indent) 1076 | # 没有指令 1077 | if encoding.empty: 1078 | return source 1079 | instruction = self.get_instruction(lineno) 1080 | if align: 1081 | indent = int(((self.indent2 + 3) / 4 ) * 4) 1082 | instruction = instruction.ljust(indent) 1083 | source += instruction + ' ' 1084 | if encoding.instruction.lower() == 'align': 1085 | size = 4 1086 | if len(encoding.operands) > 0: 1087 | operand = encoding.operands[0] 1088 | if operand.mode == O_IMM: 1089 | size = operand.immediate 1090 | source += '%d, 0x90'%size 1091 | elif encoding.inst.lower() in ('.byte', '.int', '.short'): 1092 | operands = [] 1093 | for i in range(len(encoding.operands)): 1094 | op = encoding.operands[i] 1095 | text = hex(op.immediate) 1096 | operands.append(text) 1097 | source += ', '.join(operands) 1098 | else: 1099 | operands = [] 1100 | for i in range(len(encoding.operands)): 1101 | op = self.get_operand(lineno, i, clabel, inline) 1102 | operands.append(op) 1103 | operands.reverse() 1104 | source += ', '.join(operands) 1105 | source = source.rstrip(' ') 1106 | return source 1107 | 1108 | def getvars (self, mode = 0): 1109 | vars = [] 1110 | for id, name, writable in self.variables: 1111 | if mode == 0 and writable != 0: 1112 | vars.append('"=m"(%s)'%name) 1113 | elif mode == 1 and writable == 0: 1114 | vars.append('"m"(%s)'%name) 1115 | text = ', '.join(vars) 1116 | return text 1117 | 1118 | def getregs (self): 1119 | if self.amd64: 1120 | return '"rsi", "rdi", "rax", "rbx", "rcx", "rdx"' 1121 | return '"esi", "edi", "eax", "ebx", "ecx", "edx"' 1122 | 1123 | 1124 | #---------------------------------------------------------------------- 1125 | # intel2gas 1126 | #---------------------------------------------------------------------- 1127 | class CIntel2GAS (object): 1128 | 1129 | def __init__ (self): 1130 | self.synthesis = csynthesis() 1131 | self.config = {} 1132 | self.config['align'] = 0 1133 | self.config['inline'] = 0 1134 | self.config['clabel'] = 0 1135 | self.config['memo'] = 0 1136 | self.error = '' 1137 | self.lines = [] 1138 | self.output = [] 1139 | self.option() 1140 | 1141 | def option (self, align = 1, inline = 1, clabel = 1, memo = 1): 1142 | self.config['align'] = align 1143 | self.config['clabel'] = clabel 1144 | self.config['inline'] = inline 1145 | self.config['memo'] = memo 1146 | return 0 1147 | 1148 | def __parse (self, source, clabel, inline, align): 1149 | self.lines = [] 1150 | self.output = [] 1151 | self.memos = {} 1152 | retval = self.synthesis.parse(source) 1153 | self.error = self.synthesis.error 1154 | if retval != 0: 1155 | return retval 1156 | for i in range(self.synthesis.size): 1157 | text = self.synthesis.synthesis(i, clabel, inline, align) 1158 | self.lines.append(text) 1159 | memo = self.synthesis.memos[i].strip('\r\n\t ') 1160 | if memo[:1] == ';': memo = memo[1:] 1161 | memo = memo.strip('\r\n\t ') 1162 | if memo[:2] != '//' and memo != '': memo = '//' + memo 1163 | self.memos[i] = memo 1164 | self.maxsize = 0 1165 | for text in self.lines: 1166 | if len(text) > self.maxsize: 1167 | self.maxsize = len(text) 1168 | self.maxsize = ((self.maxsize + 6) / 2) * 2 1169 | return 0 1170 | 1171 | def intel2gas (self, source): 1172 | self.lines = [] 1173 | self.output = [] 1174 | clabel = self.config['clabel'] 1175 | inline = self.config['inline'] 1176 | align = self.config['align'] 1177 | memo = self.config['memo'] 1178 | retval = self.__parse(source, clabel, inline, align) 1179 | prefix = '' 1180 | if retval != 0: 1181 | return retval 1182 | if inline: 1183 | self.output.append('__asm__ __volatile__ (') 1184 | prefix = ' ' 1185 | for i in range(self.synthesis.size): 1186 | line = self.lines[i] 1187 | if line.strip('\r\n\t ') == '': 1188 | if self.memos[i] == '' or memo == 0: 1189 | if inline: 1190 | self.output.append(prefix + '') 1191 | else: 1192 | self.output.append('') 1193 | else: 1194 | self.output.append(prefix + self.memos[i]) 1195 | else: 1196 | if inline: 1197 | line = '"' + line + '\\n"' 1198 | if self.memos[i] and memo: 1199 | line = line.ljust(int(self.maxsize)) + self.memos[i] 1200 | self.output.append(prefix + line) 1201 | if inline: 1202 | self.output.append(' :' + self.synthesis.getvars(0)) 1203 | self.output.append(' :' + self.synthesis.getvars(1)) 1204 | self.output.append(' :"memory", ' + self.synthesis.getregs()) 1205 | self.output.append(');') 1206 | return 0 1207 | 1208 | 1209 | #---------------------------------------------------------------------- 1210 | # main 1211 | #---------------------------------------------------------------------- 1212 | def main (): 1213 | align = 0 1214 | memo = 0 1215 | clabel = 0 1216 | inline = 0 1217 | for argv in sys.argv[1:]: 1218 | argv = argv.lower() 1219 | if argv == '-i': inline = 1 1220 | if argv == '-a': align = 1 1221 | if argv == '-l': clabel = 1 1222 | if argv == '-m': memo = 1 1223 | source = sys.stdin.read() 1224 | intel2gas = CIntel2GAS() 1225 | intel2gas.option(align, inline, clabel, memo) 1226 | if intel2gas.intel2gas(source) != 0: 1227 | sys.stderr.write('error: ' + intel2gas.error + '\n') 1228 | return -1 1229 | for line in intel2gas.output: 1230 | print (line) 1231 | return 0 1232 | 1233 | 1234 | #---------------------------------------------------------------------- 1235 | # testing case 1236 | #---------------------------------------------------------------------- 1237 | if __name__ == '__main__': 1238 | def test1(): 1239 | scanner = cscanner(open('intel2gas.asm')) 1240 | for token in scanner: 1241 | print (token) 1242 | print (REGSIZE) 1243 | def test2(): 1244 | print (coperand('12')) 1245 | print (coperand('loop_pixel')) 1246 | print (coperand('eax')) 1247 | print (coperand('ebx')) 1248 | print (coperand('ax')) 1249 | print (coperand('al')) 1250 | print (coperand('[eax]')) 1251 | print (coperand('[eax + ebx]')) 1252 | print (coperand('[eax + 2*ebx]')) 1253 | print (coperand('[eax + 2*ebx + 1]')) 1254 | print (coperand('[eax + ebx + 3]')) 1255 | print (coperand('[eax + 1]')) 1256 | print (coperand('[eax*2]')) 1257 | print (coperand('[eax*2 + 1]')) 1258 | print (coperand('dword ptr [eax]')) 1259 | print (coperand('word ptr [eax+ebx+3]')) 1260 | print (coperand('byte ptr [es:eax+ebx*4+3]')) 1261 | print (coperand('byte ptr abc')) 1262 | return 0 1263 | def test3(): 1264 | synth = csynth(open('intel2gas.asm')) 1265 | for i in range(len(synth.encoding)): 1266 | print ('%d: '%(i + 1), synth.encoding[i]) 1267 | print (synth.labels) 1268 | print (synth.references) 1269 | print (synth.vars) 1270 | print (synth.variables) 1271 | return 0 1272 | def test4(): 1273 | synth = csynthesis() 1274 | if synth.parse(open('intel2gas.asm')) != 0: 1275 | print ('error', synth.error) 1276 | return 0 1277 | for i in range(len(synth.encoding)): 1278 | print ('%3d: '%(i + 1), synth.synthesis(i, 1, 1, 1)) 1279 | print (synth.getvars(0)) 1280 | print (synth.getvars(1)) 1281 | print (synth.indent1, synth.indent2) 1282 | def test5(): 1283 | intel2gas = CIntel2GAS() 1284 | if intel2gas.intel2gas(open('intel2gas.asm')): 1285 | return -1 1286 | for line in intel2gas.output: 1287 | print (line) 1288 | return 0 1289 | #test5() 1290 | main() 1291 | 1292 | 1293 | -------------------------------------------------------------------------------- /intel2gui.pyw: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #====================================================================== 4 | # 5 | # intel2gas.py - intel assembly to at&t format 6 | # 7 | # NOTE: 8 | # for more information, please see the readme file 9 | # 10 | #====================================================================== 11 | import sys, time 12 | import intel2gas 13 | 14 | from Tkinter import * 15 | 16 | 17 | class Intel2GUI (Frame): 18 | 19 | def __init__ (self, parent=None, text='Intel2GAS', file=None): 20 | Frame.__init__(self, parent) 21 | self.pack(expand=YES, fill=BOTH) # make me expandable 22 | self.makewidgets() 23 | self.intel2gas = intel2gas.CIntel2GAS() 24 | 25 | def settext (self, widget, text): 26 | widget.delete('1.0', END) 27 | widget.insert('1.0', text) 28 | widget.mark_set(INSERT, '1.0') 29 | 30 | def gettext(self, widget): 31 | return widget.get('1.0', END+'-1c') 32 | 33 | def makewidgets (self): 34 | form = Frame(self, width=800, height=600) 35 | botm = Frame(self) 36 | left = Frame(form) 37 | rite = Frame(form) 38 | botm.pack(side=BOTTOM, expand=YES, fill=X) 39 | form.pack(side=TOP, expand=YES, fill=BOTH) 40 | left.pack(side=LEFT, expand=YES, fill=BOTH, padx=2, pady=2) 41 | rite.pack(side=RIGHT, expand=YES, fill=BOTH, padx=2, pady=2) 42 | text1 = Text(left, relief=SUNKEN, width=40, height=25, wrap='none') 43 | text2 = Text(rite, relief=SUNKEN, width=40, height=25, wrap='none') 44 | sbar1 = Scrollbar(left) 45 | sbar2 = Scrollbar(rite) 46 | sbar3 = Scrollbar(left, orient='horizontal') 47 | sbar4 = Scrollbar(rite, orient='horizontal') 48 | text1.config(yscrollcommand=sbar1.set) 49 | text2.config(yscrollcommand=sbar2.set) 50 | text1.config(xscrollcommand=sbar3.set) 51 | text2.config(xscrollcommand=sbar4.set) 52 | sbar1.config(command=text1.yview) 53 | sbar2.config(command=text2.yview) 54 | sbar3.config(command=text1.xview) 55 | sbar4.config(command=text2.xview) 56 | sbar1.pack(side=RIGHT, fill=Y) 57 | sbar2.pack(side=RIGHT, fill=Y) 58 | sbar3.pack(side=BOTTOM, fill=X) 59 | sbar4.pack(side=BOTTOM, fill=X) 60 | text1.pack(side=LEFT, expand=YES, fill=BOTH) 61 | text2.pack(side=LEFT, expand=YES, fill=BOTH) 62 | self.text1 = text1 63 | self.text2 = text2 64 | text3 = Text(botm, width=40, height=4) 65 | text3.pack(side=TOP, expand=YES, fill=X, padx=2, pady=2) 66 | self.text3 = text3 67 | self.int1 = IntVar() 68 | self.int2 = IntVar() 69 | self.int3 = IntVar() 70 | self.int4 = IntVar() 71 | cb1 = Checkbutton(botm, text = 'inline mode', variable=self.int1) 72 | cb2 = Checkbutton(botm, text = 'operands align', variable=self.int2) 73 | cb3 = Checkbutton(botm, text = 'convert label', variable=self.int3) 74 | cb4 = Checkbutton(botm, text = 'including memo', variable=self.int4) 75 | cb1.pack(side=LEFT, padx=2, pady=2) 76 | cb2.pack(side=LEFT, padx=2, pady=2) 77 | cb3.pack(side=LEFT, padx=2, pady=2) 78 | cb4.pack(side=LEFT, padx=2, pady=2) 79 | self.int1.set(1) 80 | self.int2.set(1) 81 | self.int3.set(1) 82 | self.int4.set(0) 83 | font1 = ('Courier New', 10, 'bold') 84 | font2 = ('Courier New', 10, '') 85 | btn = Button(botm, text='Intel2GAS', command=self.convert, padx=2, pady=2) 86 | btn.pack(side=RIGHT, padx=2, pady=2) 87 | btn.config(font=font1) 88 | text = 'HELP: type intel format assembly in the left edit box' 89 | self.settext(self.text3, text) 90 | btn = Button(botm, text='Clear Code', command=self.clear, padx=2, pady=2) 91 | btn.pack(side=RIGHT, padx=2, pady=2) 92 | btn.config(font=font1) 93 | self.text1.focus() 94 | 95 | def convert (self): 96 | self.settext(self.text2, '') 97 | self.settext(self.text3, '') 98 | inline = self.int1.get() 99 | align = self.int2.get() 100 | clabel = self.int3.get() 101 | memo = self.int4.get() 102 | self.intel2gas.option(align, inline, clabel, memo) 103 | source = self.gettext(self.text1) 104 | if type(source) == type(u''): 105 | source = source.encode('gbk') 106 | import cStringIO 107 | sio = cStringIO.StringIO(source) 108 | if self.intel2gas.intel2gas(sio) != 0: 109 | self.settext(self.text3, 'error: ' + self.intel2gas.error) 110 | lineno = int(self.intel2gas.error.split(':')[0]) 111 | self.text1.tag_add(SEL, '%d.0'%lineno, '%d.0'%(lineno + 1)) 112 | self.text1.mark_set(INSERT, '%d.0'%lineno) 113 | return -1 114 | text = '\r\n'.join(self.intel2gas.output) 115 | self.settext(self.text2, text) 116 | return 0 117 | 118 | def clear (self): 119 | self.settext(self.text1, '') 120 | self.settext(self.text2, '') 121 | self.settext(self.text3, '') 122 | self.text1.focus() 123 | 124 | 125 | def demo4(): 126 | fields = 'Name', 'Job', 'Pay' 127 | 128 | def fetch(variables): 129 | for variable in variables: 130 | print 'Input => "%s"' % variable.get() # get from var 131 | 132 | def makeform(root, fields): 133 | form = Frame(root) # make outer frame 134 | left = Frame(form) # make two columns 135 | rite = Frame(form) 136 | form.pack(fill=X) 137 | left.pack(side=LEFT) 138 | rite.pack(side=RIGHT, expand=YES, fill=X) # grow horizontal 139 | 140 | variables = [] 141 | for field in fields: 142 | lab = Label(left, width=5, text=field) # add to columns 143 | ent = Entry(rite) 144 | lab.pack(side=TOP) 145 | ent.pack(side=TOP, fill=X) # grow horizontal 146 | var = StringVar() 147 | ent.config(textvariable=var) # link field to var 148 | var.set('enter here') 149 | variables.append(var) 150 | return variables 151 | 152 | root = Tk() 153 | vars = makeform(root, fields) 154 | Button(root, text='Fetch', 155 | command=(lambda v=vars: fetch(v))).pack(side=LEFT) 156 | #Quitter(root).pack(side=RIGHT) 157 | root.bind('', (lambda event, v=vars: fetch(v))) 158 | root.mainloop() 159 | return 0 160 | 161 | if __name__ == '__main__': 162 | def demo1(): 163 | root = Tk() 164 | root.title('LINWEI') 165 | labelfont = ('times', 20, 'bold') 166 | widget = Label(root, text = 'Hello config world') 167 | widget.config(bg='black', fg='yellow') 168 | widget.config(font = labelfont) 169 | widget.config(height = 3, width = 20) 170 | widget.pack(expand = YES, fill = BOTH) 171 | root.mainloop() 172 | def demo2(): 173 | widget = Button(text='Spam', padx=10, pady=10) 174 | widget.pack(padx=20, pady=20) 175 | widget.config(cursor='gumby') 176 | widget.config(bd=8, relief=RAISED) 177 | widget.config(bg='dark green', fg='white') 178 | widget.config(font=('helvetica', 20, 'underline italic')) 179 | mainloop() 180 | def demo3(): 181 | def fetch(): 182 | print 'Input => "%s"' % ent.get() # get text 183 | root = Tk() 184 | ent = Entry(root) 185 | ent.insert(0, 'Type words here') # set text 186 | ent.pack(side=TOP, fill=X) # grow horiz 187 | ent.focus() # save a click 188 | ent.bind('', (lambda event: fetch())) # on enter key 189 | btn = Button(root, text='Fetch', command=fetch) # and on button 190 | btn.pack(side=LEFT) 191 | #Quitter(root).pack(side=RIGHT) 192 | root.mainloop() 193 | def demo5(): 194 | root = Tk() 195 | root.title('Intel2GAS (Assembly format convertion from Intel to GNU AS)') 196 | Intel2GUI(root) 197 | root.mainloop() 198 | demo5() 199 | 200 | 201 | -------------------------------------------------------------------------------- /long-mode.asm: -------------------------------------------------------------------------------- 1 | ; Check if CPUID is supported by attempting to flip the ID bit (bit 21) in 2 | ; the FLAGS register. If we can flip it, CPUID is available. 3 | 4 | ; Copy FLAGS in to EAX via stack 5 | pushfd 6 | pop eax 7 | 8 | ; Copy to ECX as well for comparing later on 9 | mov ecx, eax 10 | 11 | ; Flip the ID bit 12 | xor eax, 1 << 21 13 | 14 | ; Copy EAX to FLAGS via the stack 15 | push eax 16 | popfd 17 | 18 | ; Copy FLAGS back to EAX (with the flipped bit if CPUID is supported) 19 | pushfd 20 | pop eax 21 | 22 | ; Restore FLAGS from the old version stored in ECX (i.e. flipping the ID bit 23 | ; back if it was ever flipped). 24 | push ecx 25 | popfd 26 | 27 | ; Compare EAX and ECX. If they are equal then that means the bit wasn't 28 | ; flipped, and CPUID isn't supported. 29 | xor eax, ecx 30 | jz .NoCPUID 31 | ret 32 | 33 | mov eax, 0x80000000 ; Set the A-register to 0x80000000. 34 | cpuid ; CPU identification. 35 | cmp eax, 0x80000001 ; Compare the A-register with 0x80000001. 36 | jb .NoLongMode ; It is less, there is no long mode. 37 | 38 | mov eax, 0x80000001 ; Set the A-register to 0x80000001. 39 | cpuid ; CPU identification. 40 | test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. 41 | jz .NoLongMode ; They aren't, there is no long mode. 42 | 43 | mov eax, cr0 ; Set the A-register to control register 0. 44 | and eax, 01111111111111111111111111111111b ; Clear the PG-bit, which is bit 31. 45 | mov cr0, eax ; Set control register 0 to the A-register. 46 | 47 | mov edi, 0x1000 ; Set the destination index to 0x1000. 48 | mov cr3, edi ; Set control register 3 to the destination index. 49 | xor eax, eax ; Nullify the A-register. 50 | mov ecx, 4096 ; Set the C-register to 4096. 51 | rep stosd ; Clear the memory. 52 | mov edi, cr3 ; Set the destination index to control register 3. 53 | 54 | mov DWORD [edi], 0x2003 ; Set the uint32_t at the destination index to 0x2003. 55 | add edi, 0x1000 ; Add 0x1000 to the destination index. 56 | mov DWORD [edi], 0x3003 ; Set the uint32_t at the destination index to 0x3003. 57 | add edi, 0x1000 ; Add 0x1000 to the destination index. 58 | mov DWORD [edi], 0x4003 ; Set the uint32_t at the destination index to 0x4003. 59 | add edi, 0x1000 ; Add 0x1000 to the destination index. 60 | 61 | mov ebx, 0x00000003 ; Set the B-register to 0x00000003. 62 | mov ecx, 512 ; Set the C-register to 512. 63 | 64 | .SetEntry: 65 | mov DWORD [edi], ebx ; Set the uint32_t at the destination index to the B-register. 66 | add ebx, 0x1000 ; Add 0x1000 to the B-register. 67 | add edi, 8 ; Add eight to the destination index. 68 | loop .SetEntry ; Set the next entry. 69 | 70 | mov eax, cr4 ; Set the A-register to control register 4. 71 | or eax, 1 << 5 ; Set the PAE-bit, which is the 6th bit (bit 5). 72 | mov cr4, eax ; Set control register 4 to the A-register. 73 | 74 | BITS 32 75 | mov eax, cr4 76 | or eax, (1<<12) ;CR4.LA57 77 | mov cr4, eax 78 | 79 | mov ecx, 0xC0000080 ; Set the C-register to 0xC0000080, which is the EFER MSR. 80 | rdmsr ; Read from the model-specific register. 81 | or eax, 1 << 8 ; Set the LM-bit which is the 9th bit (bit 8). 82 | wrmsr ; Write to the model-specific register. 83 | 84 | GDT64: ; Global Descriptor Table (64-bit). 85 | .Null: equ $ - GDT64 ; The null descriptor. 86 | dw 0xFFFF ; Limit (low). 87 | dw 0 ; Base (low). 88 | db 0 ; Base (middle) 89 | db 0 ; Access. 90 | db 1 ; Granularity. 91 | db 0 ; Base (high). 92 | .Code: equ $ - GDT64 ; The code descriptor. 93 | dw 0 ; Limit (low). 94 | dw 0 ; Base (low). 95 | db 0 ; Base (middle) 96 | db 10011010b ; Access (exec/read). 97 | db 10101111b ; Granularity, 64 bits flag, limit19:16. 98 | db 0 ; Base (high). 99 | .Data: equ $ - GDT64 ; The data descriptor. 100 | dw 0 ; Limit (low). 101 | dw 0 ; Base (low). 102 | db 0 ; Base (middle) 103 | db 10010010b ; Access (read/write). 104 | db 00000000b ; Granularity. 105 | db 0 ; Base (high). 106 | .Pointer: ; The GDT-pointer. 107 | dw $ - GDT64 - 1 ; Limit. 108 | dq GDT64 ; Base. 109 | 110 | lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. 111 | jmp GDT64.Code:Realm64 ; Set the code segment and enter 64-bit long mode. 112 | 113 | ; Use 64-bit. 114 | [BITS 64] 115 | 116 | Realm64: 117 | cli ; Clear the interrupt flag. 118 | mov ax, GDT64.Data ; Set the A-register to the data descriptor. 119 | mov ds, ax ; Set the data segment to the A-register. 120 | mov es, ax ; Set the extra segment to the A-register. 121 | mov fs, ax ; Set the F-segment to the A-register. 122 | mov gs, ax ; Set the G-segment to the A-register. 123 | mov ss, ax ; Set the stack segment to the A-register. 124 | mov edi, 0xB8000 ; Set the destination index to 0xB8000. 125 | mov rax, 0x1F201F201F201F20 ; Set the A-register to 0x1F201F201F201F20. 126 | mov ecx, 500 ; Set the C-register to 500. 127 | rep stosq ; Clear the screen. 128 | hlt ; Halt the processor. 129 | 130 | .NoCPUID: 131 | hlt ;If the CPU is incapable of identifying itself, you're in trouble. 132 | 133 | .NoLongMode: 134 | [BITS 32] ;Fallback 135 | --------------------------------------------------------------------------------