├── TODO.md ├── tests ├── __init__.py └── test_hackercodecs.py ├── LICENSE.Expat ├── setup.py ├── README.md └── hackercodecs └── __init__.py /TODO.md: -------------------------------------------------------------------------------- 1 | add Track 2 ABA encoding for binary data http://blog.tehinterweb.com/?p=60 2 | Port to python 3 and add hex encoding 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | from unittest import TestLoader 3 | import doctest 4 | import test_hackercodecs 5 | 6 | from sys import path 7 | path.append('../') 8 | import hackercodecs 9 | 10 | 11 | def HackerCodecsSuite(): 12 | loader = TestLoader() 13 | suite = loader.loadTestsFromModule(test_hackercodecs) 14 | suite.addTests(doctest.DocTestSuite(hackercodecs)) 15 | return suite 16 | -------------------------------------------------------------------------------- /LICENSE.Expat: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any person obtaining 2 | a copy of this software and associated documentation files (the 3 | "Software"), to deal in the Software without restriction, including 4 | without limitation the rights to use, copy, modify, merge, publish, 5 | distribute, sublicense, and/or sell copies of the Software, and to 6 | permit persons to whom the Software is furnished to do so, subject to 7 | the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included 10 | in all copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | from setuptools import setup 4 | 5 | import os 6 | import sys 7 | sys.path.insert(0,os.path.join(os.path.dirname(__file__),'hackercodecs')) 8 | import hackercodecs 9 | 10 | setup(name="hackercodecs", 11 | version="0.3", 12 | description="A set of codecs for hackers", 13 | url="https://github.com/jdukes/hackercodecs", 14 | author="Josh Dukes", 15 | author_email="hex@neg9.org", 16 | license="MIT", 17 | test_suite="tests.HackerCodecsSuite", 18 | tests_require=['hypothesis'], 19 | keywords = "hacker, codecs, CTF", 20 | long_description=hackercodecs.__doc__, 21 | packages=["hackercodecs"]) 22 | 23 | 24 | # Copyright © 2012–2015 Josh Dukes and contributors. 25 | # 26 | # This is free software: you may copy, modify, and/or distribute this work 27 | # under the terms of the Expat License. 28 | # No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details. 29 | 30 | # Local variables: 31 | # coding: utf-8 32 | # End: 33 | # vim: fileencoding=utf-8 filetype=python: 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Hacker Codecs 2 | ============= 3 | 4 | This is a set of codecs for decoding and encoing things related to 5 | hacking and hacking CTFs. Specifically this was designed originally 6 | around the fact that decode('bin') doesn't exist in the standard 7 | library and there are times (especially in a CTF) where this is 8 | extremely convinient. 9 | 10 | Later 'morse' was added to easily encode and decode morse code without 11 | needing to do it (as) manually. 12 | 13 | The 'ascii85' codec was added specifically for PDF parsing in 14 | forensics challenges. This could be used, for example, with 15 | python-magic to check if a string inside of a PDF is actually a file 16 | of a specific type. 17 | 18 | The 'url' and 'entity' codecs were added as a quick way to encode and 19 | decode data for web hacking. 20 | 21 | 'y' is a stripped down 'yenc' as used for NNTP. Headers and footers 22 | are not included, they will need to be handled elsewhere. 23 | 24 | Other encodings may have been added, a full list is avaliable by reviewing the code. 25 | 26 | As I run across, or am told about other obscure encoding methods I 27 | will continue to add to this library. 28 | 29 | 30 | Copying 31 | ------- 32 | 33 | Copyright © 2012–2015 Josh Dukes and contributors. 34 | 35 | This is free software: you may copy, modify, and/or distribute this work 36 | under the terms of the Expat License. 37 | No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details. 38 | 39 | 40 | -------------------------------------------------------------------------------- /tests/test_hackercodecs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from hypothesis import given, assume, strategies as st 3 | from sys import path 4 | path.append('../') 5 | from hackercodecs import * 6 | 7 | class TestHelperFunctions(unittest.TestCase): 8 | 9 | @given(st.tuples(st.text(), st.integers(max_value=2**32))) 10 | def test_blocks(self, s): 11 | data, size = s 12 | assume(size > 0) # we don't need to check divide by zero 13 | if not ((len(data) % size) == 0): 14 | # make sure we assert here 15 | try: 16 | blocks(data, size) 17 | except AssertionError, e: 18 | assert e.message == ( 19 | "Cannot divide into blocks of size %s" % size) 20 | else: 21 | results = blocks(data, size) 22 | try: 23 | first = next(results) 24 | assert all(len(r) == len(first) 25 | for r in results) 26 | except StopIteration: 27 | pass 28 | 29 | @given(st.lists(st.booleans())) 30 | def test_parity(self, s): 31 | if sum(s) % 2 == 0: 32 | assert parity(s) == 0 33 | assert parity(s, odd=True) == 1 34 | else: 35 | assert parity(s) == 1 36 | assert parity(s, odd=True) == 0 37 | 38 | @given(st.tuples(st.text(), 39 | st.integers(min_value=0, max_value=26))) 40 | def test_rotx(self, s): 41 | data, rot = s 42 | encoded = rotx(data, rot) 43 | decoded = rotx(encoded, -rot) 44 | assert data == decoded 45 | 46 | def test_rotx_codec_generator(self): 47 | # we proved rotx above 48 | codec = rotx_codec_generator(10) 49 | self.assertEqual(codec.name, 'rot10') 50 | 51 | 52 | class TestCodecs(unittest.TestCase): 53 | @given(st.text(alphabet=''.join(i[0] for i in MORSE))) 54 | def test_morse(self, s): 55 | encoded,encoded_len = morse_encode(s) 56 | decoded, decoded_len = morse_decode(encoded) 57 | assert s.upper() == decoded 58 | 59 | @given(st.text()) 60 | def test_bin(self, s): 61 | assume(all(ord(c) <= 255 for c in s)) 62 | encoded, encoded_len = bin_encode(s) 63 | decoded, decoded_len = bin_decode(encoded) 64 | assert s.encode('bin') == decoded.encode('bin') 65 | 66 | @given(st.text()) 67 | def test_url(self, s): 68 | assume(all(ord(c) <= 255 for c in s)) 69 | encoded, encoded_len = bin_encode(s) 70 | decoded, decoded_len = bin_decode(encoded) 71 | assert s.encode('bin') == decoded.encode('bin') 72 | 73 | @given(st.text()) 74 | def test_entity(self, s): 75 | assume(all(ord(c) <= 255 for c in s)) 76 | encoded, encoded_len = entity_encode(s) 77 | decoded, decoded_len = entity_decode(encoded) 78 | assert s.encode('bin') == decoded.encode('bin') 79 | 80 | @given(st.text()) 81 | def test_entity_hex(self, s): 82 | assume(all(ord(c) <= 255 for c in s)) 83 | encoded, encoded_len = entity_encode_hex(s) 84 | decoded, decoded_len = entity_decode_hex(encoded) 85 | assert s.encode('bin') == decoded.encode('bin'), ( 86 | "{} != {}".format(s, decoded)) 87 | 88 | @given(st.text()) 89 | def test_ascii85(self, s): 90 | assume(all(ord(c) <= 255 for c in s)) 91 | assume(not s.endswith('\0')) # we know we can't encode this 92 | encoded, encoded_len = ascii85_encode(s) 93 | decoded, decoded_len = ascii85_decode(encoded) 94 | assert s.encode('bin') == decoded.encode('bin'), ( 95 | "{} != {}".format(repr(s), repr(decoded))) 96 | 97 | @given(st.text()) 98 | def test_y(self, s): 99 | assume(all(ord(c) <= 255 for c in s)) 100 | encoded, encoded_len = y_encode(s) 101 | decoded, decoded_len = y_decode(encoded) 102 | assert s.encode('bin') == decoded.encode('bin'), ( 103 | "{} != {}".format(repr(s), repr(decoded))) 104 | 105 | # these need a lot of fixing 106 | @given(st.text()) 107 | def test_aba_track_2(self, s): 108 | encoded, encoded_len = aba_track_2_encode(s) 109 | decoded, decoded_len = aba_track_2_decode(encoded) 110 | assert s.encode('bin') == decoded.encode('bin'), ( 111 | "{} != {}".format(repr(s), repr(decoded))) 112 | 113 | 114 | if __name__ == '__main__': 115 | unittest.main() 116 | -------------------------------------------------------------------------------- /hackercodecs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | 4 | # use binascii for this stuff 5 | """ This package provides codecs useful for hacking and hacking related CTFs. 6 | 7 | There are several codecs avaliable once you import this module. To get 8 | a full list you can use the CODECS_IN_FILE dictionary which is used to 9 | populated the codec entries:: 10 | 11 | >>> import pprint 12 | >>> pprint.pprint(sorted(CODECS_IN_FILE.keys())) 13 | ['ascii85', 14 | 'bin', 15 | 'entity', 16 | 'entityhex', 17 | 'morse', 18 | 'rot1', 19 | 'rot10', 20 | 'rot11', 21 | 'rot12', 22 | 'rot13', 23 | 'rot14', 24 | 'rot15', 25 | 'rot16', 26 | 'rot17', 27 | 'rot18', 28 | 'rot19', 29 | 'rot2', 30 | 'rot20', 31 | 'rot21', 32 | 'rot22', 33 | 'rot23', 34 | 'rot24', 35 | 'rot25', 36 | 'rot3', 37 | 'rot4', 38 | 'rot5', 39 | 'rot6', 40 | 'rot7', 41 | 'rot8', 42 | 'rot9', 43 | 'url', 44 | 'yenc'] 45 | 46 | 47 | You should first notice all the "rot" entries. The `rot-13` codec is 48 | provided by default. The rest of these provide similar functionality 49 | for rapid checks of shift ciphers:: 50 | 51 | >>> pprint.pprint(['ymj vznhp gwtbs ktc ozruji tajw ymj qfed itl'.decode('rot%d' % i) for i in xrange(1,26)]) 52 | [u'xli uymgo fvsar jsb nyqtih sziv xli pedc hsk', 53 | u'wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj', 54 | u'vjg swkem dtqyp hqz lworgf qxgt vjg ncba fqi', 55 | u'uif rvjdl cspxo gpy kvnqfe pwfs uif mbaz eph', 56 | u'the quick brown fox jumped over the lazy dog', 57 | u'sgd pthbj aqnvm enw itlodc nudq sgd kzyx cnf', 58 | u'rfc osgai zpmul dmv hskncb mtcp rfc jyxw bme', 59 | u'qeb nrfzh yoltk clu grjmba lsbo qeb ixwv ald', 60 | u'pda mqeyg xnksj bkt fqilaz kran pda hwvu zkc', 61 | u'ocz lpdxf wmjri ajs ephkzy jqzm ocz gvut yjb', 62 | u'nby kocwe vliqh zir dogjyx ipyl nby futs xia', 63 | u'max jnbvd ukhpg yhq cnfixw hoxk max etsr whz', 64 | u'lzw imauc tjgof xgp bmehwv gnwj lzw dsrq vgy', 65 | u'kyv hlztb sifne wfo aldgvu fmvi kyv crqp ufx', 66 | u'jxu gkysa rhemd ven zkcfut eluh jxu bqpo tew', 67 | u'iwt fjxrz qgdlc udm yjbets dktg iwt apon sdv', 68 | u'hvs eiwqy pfckb tcl xiadsr cjsf hvs zonm rcu', 69 | u'gur dhvpx oebja sbk whzcrq bire gur ynml qbt', 70 | u'ftq cguow ndaiz raj vgybqp ahqd ftq xmlk pas', 71 | u'esp bftnv mczhy qzi ufxapo zgpc esp wlkj ozr', 72 | u'dro aesmu lbygx pyh tewzon yfob dro vkji nyq', 73 | u'cqn zdrlt kaxfw oxg sdvynm xena cqn ujih mxp', 74 | u'bpm ycqks jzwev nwf rcuxml wdmz bpm tihg lwo', 75 | u'aol xbpjr iyvdu mve qbtwlk vcly aol shgf kvn', 76 | u'znk waoiq hxuct lud pasvkj ubkx znk rgfe jum'] 77 | 78 | "the quick brown fox jumped over the lazy dog" <- bingo 79 | 80 | My favorite codec, and reason I started the project, is of course 81 | morse encoding:: 82 | 83 | >>> 'SOS'.encode('morse') 84 | '... --- ...' 85 | 86 | >>> '... --- ...'.decode('morse') 87 | 'SOS' 88 | 89 | Morse code is doesn't support the full ascii character set, nor does 90 | it support casing so keep that in mind:: 91 | 92 | >>> 'asdf'.encode('morse').decode('morse') 93 | 'ASDF' 94 | 95 | >>> "#THIS IS MORSE CODE#".encode('morse') 96 | Traceback (most recent call last): 97 | ... 98 | AssertionError: Unencodable character '#' found. Failing 99 | 100 | Another favorite of mine is `bin`. It's only a few lines, but there's 101 | no reason to write those same lines over and over again each CTF. Just 102 | let hackercodecs handle that for you:: 103 | 104 | >>> 'asdf'.encode('bin') 105 | '01100001011100110110010001100110' 106 | 107 | >>> '01100001011100110110010001100110'.decode('bin') 108 | 'asdf' 109 | 110 | It also counts bits to make sure you're not doing something stupid:: 111 | 112 | '0110000101110011011001000110011'.decode('bin') 113 | Traceback (most recent call last): 114 | ... 115 | AssertionError: Wrong number of bits, 31 is not divisible by 8 116 | 117 | If you ever hack on web challenges you know how nice it is to have 118 | urllib handle url encoding. Since we already have this library for 119 | hacker codecs, I figured it would be worth while to just add 120 | that. Everything in one place is nice sometimes:: 121 | 122 | >>> "' or ''='".encode('url') 123 | '%27%20or%20%27%27%3D%27' 124 | >>> '%27%20or%20%27%27%3D%27'.decode('url') 125 | "' or ''='" 126 | 127 | Likewise entity encoding can be nice when attacking some XML based 128 | challenge:: 129 | 130 | >>> "]]>&xxe;".encode('entity') 131 | ']]>&xxe;' 132 | 133 | >>> ']]>&xxe;'.decode('entity') 134 | ']]>&xxe;' 135 | 136 | Or the hex equivilent:: 137 | 138 | >>> ''.encode('entityhex') 139 | '<script>alert("1")</script>' 140 | 141 | Then we get a little less common. If you're from the internet you 142 | Might know that usenet uses yEnc:: 143 | 144 | >>> print repr('asdf'.encode('yenc')) 145 | '\\x8b\\x9d\\x8e\\x90' 146 | 147 | >>> '\\x8b\\x9d\\x8e\\x90'.decode('yenc') 148 | 'asdf' 149 | 150 | And if you work on PDFs at all, you may have seen ascii85. It's kind 151 | of like base64, but not really at all:: 152 | 153 | >>> 'asdf'.encode('ascii85') 154 | '@<5sk' 155 | 156 | >>> '@<5sk'.decode('ascii85') 157 | 'asdf' 158 | 159 | In the future I'll probably add more things 160 | _ after 161 | porting everything to python 3. If there are any special requests for 162 | encodings you'd like me to add feel free to write them yourself and 163 | submit a patch. It should be pretty damn easy to add a codec based on 164 | the code below. 165 | 166 | """ 167 | # http://en.wikipedia.org/wiki/Sixbit_code_pages 168 | # http://en.wikipedia.org/wiki/Six-bit_BCD 169 | import re 170 | 171 | from urllib2 import quote as urlquote 172 | from urllib2 import unquote as urlunquote 173 | from urllib import _is_unicode 174 | from urllib import _asciire 175 | from urllib import _hextochr 176 | from xml.sax.saxutils import escape as entityquote 177 | from xml.sax.saxutils import unescape as entityunquote 178 | from codecs import register, CodecInfo 179 | 180 | from struct import pack, unpack 181 | 182 | ############################################################################### 183 | # Morse Codec Defs (International Morse Code) 184 | # Reference: 185 | # https://en.wikipedia.org/wiki/Morse_code#Letters.2C_numbers.2C_punctuation.2C_prosigns_and_non-English_variants 186 | ############################################################################### 187 | MORSE = ( 188 | ('A', ".-"), # A, a 189 | ('B', "-..."), # B, b 190 | ('C', "-.-."), # C, c 191 | ('D', "-.."), # D, d 192 | ('E', "."), # E, e 193 | ('F', "..-."), # F, f 194 | ('G', "--."), # G, g 195 | ('H', "...."), # H, h 196 | ('I', ".."), # I, i 197 | ('J', ".---"), # J, j 198 | ('K', "-.-"), # K, k; 199 | # also used to indicate "Invitation to Transmit" 200 | ('L', ".-.."), # L, l 201 | ('M', "--"), # M, m 202 | ('N', "-."), # N, n 203 | ('O', "---"), # O, o 204 | ('P', ".--."), # P, p 205 | ('Q', "--.-"), # Q, q 206 | ('R', ".-."), # R, r 207 | ('S', "..."), # S, s 208 | ('T', "-"), # T, t 209 | ('U', "..-"), # U, u 210 | ('V', "...-"), # V, v 211 | ('W', ".--"), # W, w 212 | ('X', "-..-"), # X, x 213 | ('Y', "-.--"), # Y, y 214 | ('Z', "--.."), # Z, z 215 | ('0', "-----"), # 0 216 | ('1', ".----"), # 1 217 | ('2', "..---"), # 2 218 | ('3', "...--"), # 3 219 | ('4', "....-"), # 4 220 | ('5', "....."), # 5 221 | ('6', "-...."), # 6 222 | ('7', "--..."), # 7 223 | ('8', "---.."), # 8 224 | ('9', "----."), # 9 225 | (' ', "/"), # Currently used to indicate character boundaries 226 | ('.', ".-.-.-"), # Period 227 | (',', "--..--"), # Comma 228 | ('?', "..--.."), # Question Mark 229 | ('\'', ".----."), # Apostrophe 230 | ('!', "-.-.--"), # Exclamation Point, Digraph: KW (Not standardized, ---. also used) 231 | ('/', "-..-."), # Slash or Fraction Bar 232 | ('(', "-.--."), # Open Parenthesis 233 | (')', "-.--.-"), # Close Parenthesis 234 | ('&', ".-..."), # Ampersand, Digraph: AS, Prosign: Wait (Not in ITU-R recommendation) 235 | (':', "---..."), # Colon 236 | (';', "-.-.-."), # Semicolon 237 | ('=', "-...-"), # Double Dash (Equal Sign) 238 | ('+', ".-.-."), # Plus Sign 239 | ('-', "-....-"), # Hyphen or Minus Sign 240 | ('_', "..--.-"), # Underscore (Not in ITU-R recommendation) 241 | ('"', ".-..-."), # Quotation Mark 242 | ('$', "...-..-"), # Dollar Sign, Digraph: SX (Not in ITU-R recommendation) 243 | ('@', ".--.-."), # At Sign, Digraph: AC (Formally added to ITU-R recommendation in 2004) 244 | ('', '') 245 | ) 246 | 247 | 248 | ############################################################################### 249 | # ascii85 defs 250 | ############################################################################### 251 | 252 | 253 | ascii85_charset = re.compile('([!-u]*)') 254 | 255 | 256 | ############################################################################### 257 | # yenc defs 258 | ############################################################################### 259 | 260 | 261 | yenc_escape = [0x00, 0x0a, 0x0d, ord('='), ord('.')] 262 | 263 | 264 | ############################################################################### 265 | # BCD 266 | ############################################################################### 267 | 268 | # soon.... 269 | 270 | ############################################################################### 271 | # helper functions 272 | ############################################################################### 273 | 274 | 275 | def blocks(data, size): 276 | assert (len(data) == 0 or len(data) >= size), ( 277 | "Cannot create blocks of size %d" 278 | " from data of len %d") % (size, len(data)) 279 | assert (len(data) % size) == 0, \ 280 | "Cannot divide into blocks of size %s" % size 281 | for i in xrange(0, len(data), size): 282 | yield data[i:i + size] 283 | 284 | 285 | def parity(bit_array, odd=False): 286 | out = sum(bit_array) % 2 287 | if odd: 288 | out = ~out % 2 289 | return out 290 | 291 | 292 | def rotx(data, rotval): 293 | if len(data) == 0: 294 | return data 295 | output = [] 296 | for d in data: 297 | if (not d.isalpha()) or ( 298 | # cause 299 | # u'\xaa'.isalpha() == True 300 | # ^ wat 301 | ord(d) < ord('A') or 302 | ord(d) > ord('z')): 303 | output.append(d) 304 | continue 305 | off = 65 306 | if d.islower(): 307 | off += 32 308 | output.append(chr((((ord(d) - off) + rotval) % 26) + off)) 309 | return unicode(''.join(output)) 310 | 311 | 312 | def rotx_codec_generator(rotval): 313 | name = "rot%d" % rotval 314 | rx_enc = lambda data: (rotx(data, rotval), len(data)) 315 | rx_dec = lambda data: (rotx(data, -rotval), len(data)) 316 | return CodecInfo(name=name, encode=rx_enc, decode=rx_dec) 317 | 318 | 319 | def get_codecs_list(): 320 | """In case you're wondering what's in this package, you can find out. 321 | """ 322 | for codec in CODECS_IN_FILE.iterkeys(): 323 | print codec 324 | 325 | 326 | ############################################################################### 327 | # actual encoders and encoding wrappers 328 | ############################################################################### 329 | 330 | 331 | def morse_encode(input, errors='strict'): 332 | morse_map = dict(MORSE) 333 | input = input.upper() 334 | for c in input: 335 | assert c in morse_map, "Unencodable character '%s' found. Failing" % c 336 | output = ' '.join(morse_map[c] for c in input) 337 | return (output, len(input)) 338 | 339 | 340 | def morse_decode(input, errors='strict'): 341 | morse_map = dict((c, m) for m, c in MORSE) 342 | input = input.replace(' ', '/').replace('/', ' / ') 343 | splinput = input.split() 344 | for c in splinput: 345 | assert c in morse_map, "Could not decode '%s' to ascii. Failing" % c 346 | output = ''.join(morse_map[c] for c in splinput) 347 | return (output, len(input)) 348 | 349 | 350 | def bin_encode(input, errors='strict'): 351 | """print 8 bits of whatever int goes in""" 352 | output = "" 353 | bs = 8 354 | for c in input: 355 | l = '{0:0>8b}'.format(ord(c)) 356 | padding = bs - ((len(l) % bs) or bs) 357 | output += ('0' * padding ) + ''.join(l) 358 | return (output, len(input)) 359 | 360 | 361 | def bin_decode(input, errors='strict'): 362 | """print 8 bits of whatever int goes in""" 363 | output = "" 364 | assert (len(input) % 8) == 0, \ 365 | "Wrong number of bits, %s is not divisible by 8" % len(input) 366 | output = ''.join(chr(int(c, 2)) for c in blocks(input, 8)) 367 | return (output, len(input)) 368 | 369 | 370 | def url_decode(input, errors='strict'): 371 | output = urlunquote(input) 372 | return (output, len(input)) 373 | 374 | 375 | def url_encode(input, errors='strict'): 376 | output = urlquote(input) 377 | return (output, len(input)) 378 | 379 | 380 | def entity_decode(input, errors='strict'): 381 | output = entityunquote(input) 382 | return (output, len(input)) 383 | 384 | 385 | def entity_encode(input, errors='strict'): 386 | output = entityquote(input) 387 | return (output, len(input)) 388 | 389 | def entity_encode_hex(input, errors='strict'): 390 | """ 391 | Encode &, <, and > in a string of data. 392 | as their hex HTML entity representation. 393 | """ 394 | output = '' 395 | for character in input: 396 | if character in ('&', '<', '>'): 397 | output += "&#x%s;" % character.encode('hex') 398 | else: 399 | output += character 400 | 401 | return (output, len(input)) 402 | 403 | def entity_decode_hex(input, errors='strict'): 404 | """ 405 | Decode hex HTML entity data in a string. 406 | """ 407 | if _is_unicode(input): 408 | if '&' not in input: 409 | return input, len(input) 410 | bits = _asciire.split(input) 411 | res = [bits[0]] 412 | append = res.append 413 | for i in range(1, len(bits), 2): 414 | append(entityunquote(str(bits[i])) 415 | .encode('bin').decode('bin')) 416 | append(bits[i + 1]) 417 | preamble_regex = re.compile(r"&#x", flags=re.I) 418 | bits = preamble_regex.split(input) 419 | # fastpath 420 | if len(bits) == 1: 421 | return input, len(input) 422 | res = [bits[0]] 423 | append = res.append 424 | for item in bits[1:]: 425 | try: 426 | append(_hextochr[item[:2]]) 427 | append(item[3:]) 428 | except KeyError: 429 | append('&#x') 430 | append(item) 431 | append(';') 432 | 433 | return (''.join(res), len(input)) 434 | 435 | 436 | def ascii85_encode(input, errors='strict'): 437 | assert not input.endswith('\0'), "Trailing nulls unsupported" 438 | if _is_unicode(input): 439 | # convert from multibyte to codepoint in a horrible way. Good 440 | # luck debugging the stupid bugs here fuckers. 441 | o, l = bin_encode(input) 442 | input, l = bin_decode(o) 443 | #encoding is adobe not btoa 444 | bs = 4 445 | padding = bs - ((len(input) % bs) or bs) 446 | input += '\0' * padding 447 | output = "" 448 | for block in blocks(input, bs): 449 | start = unpack(">I", block)[0] 450 | if not start: 451 | output += "z" 452 | continue 453 | quot, rem = divmod(start, 85) 454 | chr_block = chr(rem + 33) 455 | for i in xrange(bs): 456 | quot, rem = divmod(quot, 85) 457 | chr_block += chr(rem + 33) 458 | output += ''.join(reversed(chr_block)) 459 | if padding: 460 | output = output[:-padding] 461 | return output, len(input) 462 | 463 | 464 | def ascii85_decode(input, errors='strict'): 465 | bs = 5 466 | for i in ('y', 'z'): 467 | for block in input.split(i)[:-1]: 468 | assert not len(block) % bs, "'%s' found within a block" % i 469 | #this will handle the error but it will not give a good 470 | #error message 471 | # supports decoding as adobe or btoa 4.2 472 | input = input.replace('z', '!!!!!') # adobe & btoa 4.2 473 | input = input.replace('y', '+I", data) 489 | if padding: 490 | output = output[:-padding] 491 | return output, len(input) 492 | 493 | 494 | def y_encode(input, errors='strict'): 495 | output = '' 496 | for c in input: 497 | o = (ord(c) + 42) % 256 498 | if o in yenc_escape: 499 | output += '=' 500 | o = (o + 64) % 256 501 | output += chr(o) 502 | return output, len(input) 503 | 504 | 505 | def y_decode(input, errors='strict'): 506 | output = '' 507 | #this is more C than python 508 | len_in = len(input) 509 | i = 0 510 | while True: 511 | if i == len_in: 512 | break 513 | c = ord(input[i]) 514 | if input[i] == '=': 515 | assert len_in > (i + 1), "last character cannot be an escape" 516 | i += 1 517 | c = (ord(input[i]) - 64) % 256 518 | c = (c - 42) % 256 519 | i += 1 520 | output += chr(c) 521 | return output, len(input) 522 | 523 | 524 | def aba_track_2_encode(input, errors='strict'): 525 | #this is in progress 526 | output = '' 527 | assert all(map(lambda x: 0x3f >= ord(x) >= 0x30, input)), \ 528 | "Characters found out of range 0x30 - 0x3f" 529 | len_in = len(input) 530 | assert len_in <= 37, ("No room for sentinel and LRC. " 531 | "Input must be 37 characters or under") 532 | input = ";" + input + "?" 533 | out = [] 534 | for c in input: 535 | c = ord(c) - 48 536 | l = list('{0:0>4b}'.format(c)) 537 | l = [int(i) for i in reversed(l)] 538 | l.append(sum(l) % 2) 539 | out.append(l) 540 | lrc = [parity(int(l[i]) for l in out) for i in xrange(4)] 541 | lrc.append(parity(lrc)) 542 | out.append(lrc) 543 | output = "" 544 | for l in out: 545 | output += ''.join(str(i) for i in l) 546 | return output, len(input) 547 | 548 | 549 | def aba_track_2_decode(input, errors='strict'): 550 | #this is in progress 551 | len_in = len(input) 552 | assert not len_in % 5, "Input must be divisible by 5" 553 | assert not len_in > (5 * 40), "String too long: cannot be ABA Track 2" 554 | #we're going to ignore parity for now 555 | print [chr(int(c[:0:-1], 2)+48) for c in blocks(input, 5)] 556 | output = ''.join(chr(int(c[:0:-1], 2)+48) for c in blocks(input, 5)) 557 | output = output[-1:] 558 | return output, len(input) 559 | 560 | 561 | 562 | 563 | 564 | ############################################################################### 565 | # Codec Registration 566 | ############################################################################### 567 | 568 | CODECS_IN_FILE = {"morse": CodecInfo(name='morse', 569 | encode=morse_encode, 570 | decode=morse_decode), 571 | "bin": CodecInfo(name='bin', 572 | encode=bin_encode, 573 | decode=bin_decode), 574 | "url": CodecInfo(name='url', 575 | encode=url_encode, 576 | decode=url_decode), 577 | "entity": CodecInfo(name='entity', 578 | encode=entity_encode, 579 | decode=entity_decode), 580 | "entityhex": CodecInfo(name='entityhex', 581 | encode=entity_encode_hex, 582 | decode=entity_decode_hex), 583 | "ascii85": CodecInfo(name='ascii85', 584 | encode=ascii85_encode, 585 | decode=ascii85_decode), 586 | "yenc": CodecInfo(name='yenc', 587 | encode=y_encode, 588 | decode=y_decode), 589 | } 590 | 591 | 592 | for r in xrange(1, 26): 593 | CODECS_IN_FILE["rot%d" % r] = rotx_codec_generator(r) 594 | 595 | 596 | #this is bad, I need to do something different 597 | register(lambda name: CODECS_IN_FILE.get(name, None)) 598 | 599 | 600 | if __name__ == "__main__": 601 | import doctest 602 | doctest.testmod() 603 | 604 | # Local variables: 605 | # eval: (add-hook 'after-save-hook '(lambda () 606 | # (shell-command "pep8 __init__.py > lint")) nil t) 607 | # end: 608 | 609 | # Copyright © 2012–2015 Josh Dukes and contributors. 610 | # 611 | # This is free software: you may copy, modify, and/or distribute this work 612 | # under the terms of the Expat License. 613 | # No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details. 614 | 615 | # Local variables: 616 | # coding: utf-8 617 | # mode: text 618 | # mode: markdown 619 | # End: 620 | # vim: fileencoding=utf-8 filetype=python: 621 | --------------------------------------------------------------------------------