├── TODO.md
├── tests
    ├── __init__.py
    └── test_hackercodecs.py
├── LICENSE.Expat
├── setup.py
├── README.md
└── hackercodecs
    └── __init__.py


/TODO.md:
--------------------------------------------------------------------------------
1 | add Track 2 ABA encoding for binary data http://blog.tehinterweb.com/?p=60
2 | Port to python 3 and add hex encoding
3 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | from unittest import TestLoader
 3 | import doctest
 4 | import test_hackercodecs
 5 | 
 6 | from sys import path
 7 | path.append('../')
 8 | import hackercodecs
 9 | 
10 | 
11 | def HackerCodecsSuite():
12 |     loader = TestLoader()
13 |     suite = loader.loadTestsFromModule(test_hackercodecs)
14 |     suite.addTests(doctest.DocTestSuite(hackercodecs))
15 |     return suite
16 | 


--------------------------------------------------------------------------------
/LICENSE.Expat:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person obtaining
 2 | a copy of this software and associated documentation files (the
 3 | "Software"), to deal in the Software without restriction, including
 4 | without limitation the rights to use, copy, modify, merge, publish,
 5 | distribute, sublicense, and/or sell copies of the Software, and to
 6 | permit persons to whom the Software is furnished to do so, subject to
 7 | the following conditions:
 8 | 
 9 | The above copyright notice and this permission notice shall be included
10 | in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
15 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
16 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
17 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | from setuptools import setup
 4 | 
 5 | import os
 6 | import sys
 7 | sys.path.insert(0,os.path.join(os.path.dirname(__file__),'hackercodecs'))
 8 | import hackercodecs
 9 | 
10 | setup(name="hackercodecs",
11 |       version="0.3",
12 |       description="A set of codecs for hackers",
13 |       url="https://github.com/jdukes/hackercodecs",
14 |       author="Josh Dukes",
15 |       author_email="hex@neg9.org",
16 |       license="MIT",
17 |       test_suite="tests.HackerCodecsSuite",
18 |       tests_require=['hypothesis'],
19 |       keywords = "hacker, codecs, CTF",
20 |       long_description=hackercodecs.__doc__,
21 |       packages=["hackercodecs"])
22 | 
23 | 
24 | # Copyright © 2012–2015 Josh Dukes <hex@neg9.org> and contributors.
25 | #
26 | # This is free software: you may copy, modify, and/or distribute this work
27 | # under the terms of the Expat License.
28 | # No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details.
29 | 
30 | # Local variables:
31 | # coding: utf-8
32 | # End:
33 | # vim: fileencoding=utf-8 filetype=python:
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Hacker Codecs
 2 | =============
 3 | 
 4 | This is a set of codecs for decoding and encoing things related to
 5 | hacking and hacking CTFs. Specifically this was designed originally
 6 | around the fact that decode('bin') doesn't exist in the standard
 7 | library and there are times (especially in a CTF) where this is
 8 | extremely convinient.
 9 | 
10 | Later 'morse' was added to easily encode and decode morse code without
11 | needing to do it (as) manually. 
12 | 
13 | The 'ascii85' codec was added specifically for PDF parsing in
14 | forensics challenges. This could be used, for example, with
15 | python-magic to check if a string inside of a PDF is actually a file
16 | of a specific type. 
17 | 
18 | The 'url' and 'entity' codecs were added as a quick way to encode and
19 | decode data for web hacking. 
20 | 
21 | 'y' is a stripped down 'yenc' as used for NNTP. Headers and footers
22 | are not included, they will need to be handled elsewhere. 
23 | 
24 | Other encodings may have been added, a full list is avaliable by reviewing the code. 
25 | 
26 | As I run across, or am told about other obscure encoding methods I
27 | will continue to add to this library. 
28 | 
29 | 
30 | Copying
31 | -------
32 | 
33 | Copyright © 2012–2015 Josh Dukes <hex@neg9.org> and contributors.
34 | 
35 | This is free software: you may copy, modify, and/or distribute this work
36 | under the terms of the Expat License.
37 | No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details.
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/tests/test_hackercodecs.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from hypothesis import given, assume, strategies as st
  3 | from sys import path
  4 | path.append('../')
  5 | from hackercodecs import *
  6 | 
  7 | class TestHelperFunctions(unittest.TestCase):
  8 | 
  9 |     @given(st.tuples(st.text(), st.integers(max_value=2**32)))
 10 |     def test_blocks(self, s):
 11 |         data, size = s
 12 |         assume(size > 0) # we don't need to check divide by zero
 13 |         if not ((len(data) % size) == 0):
 14 |             # make sure we assert here
 15 |             try:
 16 |                 blocks(data, size)
 17 |             except AssertionError, e:
 18 |                 assert e.message == (
 19 |                     "Cannot divide into blocks of size %s" % size)
 20 |         else:
 21 |             results = blocks(data, size)
 22 |             try:
 23 |                 first = next(results)
 24 |                 assert all(len(r) == len(first)
 25 |                            for r in results)
 26 |             except StopIteration:
 27 |                 pass
 28 | 
 29 |     @given(st.lists(st.booleans()))
 30 |     def test_parity(self, s):
 31 |         if sum(s) % 2 == 0:
 32 |            assert parity(s) == 0
 33 |            assert parity(s, odd=True) == 1
 34 |         else:
 35 |            assert parity(s) == 1
 36 |            assert parity(s, odd=True) == 0
 37 | 
 38 |     @given(st.tuples(st.text(),
 39 |                      st.integers(min_value=0, max_value=26)))
 40 |     def test_rotx(self, s):
 41 |         data, rot = s
 42 |         encoded = rotx(data, rot)
 43 |         decoded = rotx(encoded, -rot)
 44 |         assert data == decoded
 45 | 
 46 |     def test_rotx_codec_generator(self):
 47 |         # we proved rotx above
 48 |         codec = rotx_codec_generator(10)
 49 |         self.assertEqual(codec.name, 'rot10')
 50 | 
 51 | 
 52 | class TestCodecs(unittest.TestCase):
 53 |     @given(st.text(alphabet=''.join(i[0] for i in MORSE)))
 54 |     def test_morse(self, s):
 55 |         encoded,encoded_len = morse_encode(s)
 56 |         decoded, decoded_len = morse_decode(encoded)
 57 |         assert s.upper() == decoded
 58 | 
 59 |     @given(st.text())
 60 |     def test_bin(self, s):
 61 |         assume(all(ord(c) <= 255 for c in s))
 62 |         encoded, encoded_len = bin_encode(s)
 63 |         decoded, decoded_len = bin_decode(encoded)
 64 |         assert s.encode('bin') == decoded.encode('bin')
 65 | 
 66 |     @given(st.text())
 67 |     def test_url(self, s):
 68 |         assume(all(ord(c) <= 255 for c in s))
 69 |         encoded, encoded_len = bin_encode(s)
 70 |         decoded, decoded_len = bin_decode(encoded)
 71 |         assert s.encode('bin') == decoded.encode('bin')
 72 | 
 73 |     @given(st.text())
 74 |     def test_entity(self, s):
 75 |         assume(all(ord(c) <= 255 for c in s))
 76 |         encoded, encoded_len = entity_encode(s)
 77 |         decoded, decoded_len = entity_decode(encoded)
 78 |         assert s.encode('bin') == decoded.encode('bin')
 79 | 
 80 |     @given(st.text())
 81 |     def test_entity_hex(self, s):
 82 |         assume(all(ord(c) <= 255 for c in s))
 83 |         encoded, encoded_len = entity_encode_hex(s)
 84 |         decoded, decoded_len = entity_decode_hex(encoded)
 85 |         assert s.encode('bin') == decoded.encode('bin'), (
 86 |             "{} != {}".format(s, decoded))
 87 | 
 88 |     @given(st.text())
 89 |     def test_ascii85(self, s):
 90 |         assume(all(ord(c) <= 255 for c in s))
 91 |         assume(not s.endswith('\0')) # we know we can't encode this
 92 |         encoded, encoded_len = ascii85_encode(s)
 93 |         decoded, decoded_len = ascii85_decode(encoded)
 94 |         assert s.encode('bin') == decoded.encode('bin'), (
 95 |             "{} != {}".format(repr(s), repr(decoded)))
 96 | 
 97 |     @given(st.text())
 98 |     def test_y(self, s):
 99 |         assume(all(ord(c) <= 255 for c in s))
100 |         encoded, encoded_len = y_encode(s)
101 |         decoded, decoded_len = y_decode(encoded)
102 |         assert s.encode('bin') == decoded.encode('bin'), (
103 |             "{} != {}".format(repr(s), repr(decoded)))
104 | 
105 |     # these need a lot of fixing
106 |     @given(st.text())
107 |     def test_aba_track_2(self, s):
108 |         encoded, encoded_len = aba_track_2_encode(s)
109 |         decoded, decoded_len = aba_track_2_decode(encoded)
110 |         assert s.encode('bin') == decoded.encode('bin'), (
111 |             "{} != {}".format(repr(s), repr(decoded)))
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     unittest.main()
116 | 


--------------------------------------------------------------------------------
/hackercodecs/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # use binascii for this stuff
  5 | """ This package provides codecs useful for hacking and hacking related CTFs.
  6 | 
  7 | There are several codecs avaliable once you import this module. To get
  8 | a full list you can use the CODECS_IN_FILE dictionary which is used to
  9 | populated the codec entries::
 10 | 
 11 |     >>> import pprint
 12 |     >>> pprint.pprint(sorted(CODECS_IN_FILE.keys()))
 13 |     ['ascii85',
 14 |      'bin',
 15 |      'entity',
 16 |      'entityhex',
 17 |      'morse',
 18 |      'rot1',
 19 |      'rot10',
 20 |      'rot11',
 21 |      'rot12',
 22 |      'rot13',
 23 |      'rot14',
 24 |      'rot15',
 25 |      'rot16',
 26 |      'rot17',
 27 |      'rot18',
 28 |      'rot19',
 29 |      'rot2',
 30 |      'rot20',
 31 |      'rot21',
 32 |      'rot22',
 33 |      'rot23',
 34 |      'rot24',
 35 |      'rot25',
 36 |      'rot3',
 37 |      'rot4',
 38 |      'rot5',
 39 |      'rot6',
 40 |      'rot7',
 41 |      'rot8',
 42 |      'rot9',
 43 |      'url',
 44 |      'yenc']
 45 | 
 46 | 
 47 | You should first notice all the "rot" entries. The `rot-13` codec is
 48 | provided by default. The rest of these provide similar functionality
 49 | for rapid checks of shift ciphers::
 50 | 
 51 |     >>> pprint.pprint(['ymj vznhp gwtbs ktc ozruji tajw ymj qfed itl'.decode('rot%d' % i) for i in xrange(1,26)])
 52 |     [u'xli uymgo fvsar jsb nyqtih sziv xli pedc hsk',
 53 |      u'wkh txlfn eurzq ira mxpshg ryhu wkh odcb grj',
 54 |      u'vjg swkem dtqyp hqz lworgf qxgt vjg ncba fqi',
 55 |      u'uif rvjdl cspxo gpy kvnqfe pwfs uif mbaz eph',
 56 |      u'the quick brown fox jumped over the lazy dog',
 57 |      u'sgd pthbj aqnvm enw itlodc nudq sgd kzyx cnf',
 58 |      u'rfc osgai zpmul dmv hskncb mtcp rfc jyxw bme',
 59 |      u'qeb nrfzh yoltk clu grjmba lsbo qeb ixwv ald',
 60 |      u'pda mqeyg xnksj bkt fqilaz kran pda hwvu zkc',
 61 |      u'ocz lpdxf wmjri ajs ephkzy jqzm ocz gvut yjb',
 62 |      u'nby kocwe vliqh zir dogjyx ipyl nby futs xia',
 63 |      u'max jnbvd ukhpg yhq cnfixw hoxk max etsr whz',
 64 |      u'lzw imauc tjgof xgp bmehwv gnwj lzw dsrq vgy',
 65 |      u'kyv hlztb sifne wfo aldgvu fmvi kyv crqp ufx',
 66 |      u'jxu gkysa rhemd ven zkcfut eluh jxu bqpo tew',
 67 |      u'iwt fjxrz qgdlc udm yjbets dktg iwt apon sdv',
 68 |      u'hvs eiwqy pfckb tcl xiadsr cjsf hvs zonm rcu',
 69 |      u'gur dhvpx oebja sbk whzcrq bire gur ynml qbt',
 70 |      u'ftq cguow ndaiz raj vgybqp ahqd ftq xmlk pas',
 71 |      u'esp bftnv mczhy qzi ufxapo zgpc esp wlkj ozr',
 72 |      u'dro aesmu lbygx pyh tewzon yfob dro vkji nyq',
 73 |      u'cqn zdrlt kaxfw oxg sdvynm xena cqn ujih mxp',
 74 |      u'bpm ycqks jzwev nwf rcuxml wdmz bpm tihg lwo',
 75 |      u'aol xbpjr iyvdu mve qbtwlk vcly aol shgf kvn',
 76 |      u'znk waoiq hxuct lud pasvkj ubkx znk rgfe jum']
 77 | 
 78 | "the quick brown fox jumped over the lazy dog" <- bingo
 79 | 
 80 | My favorite codec, and reason I started the project, is of course
 81 | morse encoding::
 82 | 
 83 |     >>> 'SOS'.encode('morse')
 84 |     '... --- ...'
 85 | 
 86 |     >>> '... --- ...'.decode('morse')
 87 |     'SOS'
 88 | 
 89 | Morse code is doesn't support the full ascii character set, nor does
 90 | it support casing so keep that in mind::
 91 | 
 92 |     >>> 'asdf'.encode('morse').decode('morse')
 93 |     'ASDF'
 94 | 
 95 |     >>> "#THIS IS MORSE CODE#".encode('morse')
 96 |     Traceback (most recent call last):
 97 |     ...
 98 |     AssertionError: Unencodable character '#' found. Failing
 99 | 
100 | Another favorite of mine is `bin`. It's only a few lines, but there's
101 | no reason to write those same lines over and over again each CTF. Just
102 | let hackercodecs handle that for you::
103 | 
104 |     >>> 'asdf'.encode('bin')
105 |     '01100001011100110110010001100110'
106 | 
107 |     >>> '01100001011100110110010001100110'.decode('bin')
108 |     'asdf'
109 | 
110 | It also counts bits to make sure you're not doing something stupid::
111 | 
112 |     '0110000101110011011001000110011'.decode('bin')
113 |     Traceback (most recent call last):
114 |     ...
115 |     AssertionError: Wrong number of bits, 31 is not divisible by 8
116 | 
117 | If you ever hack on web challenges you know how nice it is to have
118 | urllib handle url encoding. Since we already have this library for
119 | hacker codecs, I figured it would be worth while to just add
120 | that. Everything in one place is nice sometimes::
121 | 
122 |     >>> "' or ''='".encode('url')
123 |     '%27%20or%20%27%27%3D%27'
124 |     >>> '%27%20or%20%27%27%3D%27'.decode('url')
125 |     "' or ''='"
126 | 
127 | Likewise entity encoding can be nice when attacking some XML based
128 | challenge::
129 | 
130 |     >>> "]]>&xxe;".encode('entity')
131 |     ']]&gt;&amp;xxe;'
132 | 
133 |     >>> ']]&gt;&amp;xxe;'.decode('entity')
134 |     ']]>&xxe;'
135 | 
136 | Or the hex equivilent::
137 | 
138 |     >>> '<script>alert("1")</script>'.encode('entityhex')
139 |     '&#x3c;script&#x3e;alert("1")&#x3c;/script&#x3e;'
140 | 
141 | Then we get a little less common. If you're from the internet you
142 | Might know that usenet uses yEnc::
143 | 
144 |     >>> print repr('asdf'.encode('yenc'))
145 |     '\\x8b\\x9d\\x8e\\x90'
146 | 
147 |     >>> '\\x8b\\x9d\\x8e\\x90'.decode('yenc')
148 |     'asdf'
149 | 
150 | And if you work on PDFs at all, you may have seen ascii85. It's kind
151 | of like base64, but not really at all::
152 | 
153 |     >>> 'asdf'.encode('ascii85')
154 |     '@<5sk'
155 | 
156 |     >>> '@<5sk'.decode('ascii85')
157 |     'asdf'
158 | 
159 | In the future I'll probably add more things
160 | <http://en.wikipedia.org/wiki/Binary-to-text_encoding>_ after
161 | porting everything to python 3. If there are any special requests for
162 | encodings you'd like me to add feel free to write them yourself and
163 | submit a patch. It should be pretty damn easy to add a codec based on
164 | the code below.
165 | 
166 | """
167 | # http://en.wikipedia.org/wiki/Sixbit_code_pages
168 | # http://en.wikipedia.org/wiki/Six-bit_BCD
169 | import re
170 | 
171 | from urllib2 import quote as urlquote
172 | from urllib2 import unquote as urlunquote
173 | from urllib import _is_unicode
174 | from urllib import _asciire
175 | from urllib import _hextochr
176 | from xml.sax.saxutils import escape as entityquote
177 | from xml.sax.saxutils import unescape as entityunquote
178 | from codecs import register, CodecInfo
179 | 
180 | from struct import pack, unpack
181 | 
182 | ###############################################################################
183 | # Morse Codec Defs (International Morse Code)
184 | # Reference:
185 | # https://en.wikipedia.org/wiki/Morse_code#Letters.2C_numbers.2C_punctuation.2C_prosigns_and_non-English_variants
186 | ###############################################################################
187 | MORSE = (
188 |     ('A', ".-"),            # A, a
189 |     ('B', "-..."),          # B, b
190 |     ('C', "-.-."),          # C, c
191 |     ('D', "-.."),           # D, d
192 |     ('E', "."),             # E, e
193 |     ('F', "..-."),          # F, f
194 |     ('G', "--."),           # G, g
195 |     ('H', "...."),          # H, h
196 |     ('I', ".."),            # I, i
197 |     ('J', ".---"),          # J, j
198 |     ('K', "-.-"),           # K, k; 
199 |                             # also used to indicate "Invitation to Transmit"
200 |     ('L', ".-.."),          # L, l
201 |     ('M', "--"),            # M, m
202 |     ('N', "-."),            # N, n
203 |     ('O', "---"),           # O, o
204 |     ('P', ".--."),          # P, p
205 |     ('Q', "--.-"),          # Q, q
206 |     ('R', ".-."),           # R, r
207 |     ('S', "..."),           # S, s
208 |     ('T', "-"),             # T, t
209 |     ('U', "..-"),           # U, u
210 |     ('V', "...-"),          # V, v
211 |     ('W', ".--"),           # W, w
212 |     ('X', "-..-"),          # X, x
213 |     ('Y', "-.--"),          # Y, y
214 |     ('Z', "--.."),          # Z, z
215 |     ('0', "-----"),         # 0
216 |     ('1', ".----"),         # 1
217 |     ('2', "..---"),         # 2
218 |     ('3', "...--"),         # 3
219 |     ('4', "....-"),         # 4
220 |     ('5', "....."),         # 5
221 |     ('6', "-...."),         # 6
222 |     ('7', "--..."),         # 7
223 |     ('8', "---.."),         # 8
224 |     ('9', "----."),         # 9
225 |     (' ', "/"),             # Currently used to indicate character boundaries
226 |     ('.', ".-.-.-"),        # Period
227 |     (',', "--..--"),        # Comma
228 |     ('?', "..--.."),        # Question Mark
229 |     ('\'', ".----."),       # Apostrophe
230 |     ('!', "-.-.--"),        # Exclamation Point, Digraph: KW (Not standardized, ---. also used)
231 |     ('/', "-..-."),         # Slash or Fraction Bar
232 |     ('(', "-.--."),         # Open Parenthesis
233 |     (')', "-.--.-"),        # Close Parenthesis
234 |     ('&', ".-..."),         # Ampersand, Digraph: AS, Prosign: Wait (Not in ITU-R recommendation)
235 |     (':', "---..."),        # Colon
236 |     (';', "-.-.-."),        # Semicolon
237 |     ('=', "-...-"),         # Double Dash (Equal Sign)
238 |     ('+', ".-.-."),         # Plus Sign
239 |     ('-', "-....-"),        # Hyphen or Minus Sign
240 |     ('_', "..--.-"),        # Underscore (Not in ITU-R recommendation)
241 |     ('"', ".-..-."),        # Quotation Mark
242 |     ('$', "...-..-"),       # Dollar Sign, Digraph: SX (Not in ITU-R recommendation)
243 |     ('@', ".--.-."),        # At Sign, Digraph: AC (Formally added to ITU-R recommendation in 2004)
244 |     ('', '')
245 |     )
246 | 
247 | 
248 | ###############################################################################
249 | # ascii85 defs
250 | ###############################################################################
251 | 
252 | 
253 | ascii85_charset = re.compile('([!-u]*)')
254 | 
255 | 
256 | ###############################################################################
257 | # yenc defs
258 | ###############################################################################
259 | 
260 | 
261 | yenc_escape = [0x00, 0x0a, 0x0d, ord('='), ord('.')]
262 | 
263 | 
264 | ###############################################################################
265 | # BCD
266 | ###############################################################################
267 | 
268 | # soon....
269 | 
270 | ###############################################################################
271 | # helper functions
272 | ###############################################################################
273 | 
274 | 
275 | def blocks(data, size):
276 |     assert (len(data) == 0 or len(data) >= size), (
277 |         "Cannot create blocks of size %d"
278 |         " from data of len %d") % (size, len(data))
279 |     assert (len(data) % size) == 0, \
280 |         "Cannot divide into blocks of size %s" % size
281 |     for i in xrange(0, len(data), size):
282 |         yield data[i:i + size]
283 | 
284 | 
285 | def parity(bit_array, odd=False):
286 |     out = sum(bit_array) % 2
287 |     if odd:
288 |         out = ~out % 2
289 |     return out
290 | 
291 | 
292 | def rotx(data, rotval):
293 |     if len(data) == 0:
294 |         return data
295 |     output = []
296 |     for d in data:
297 |         if (not d.isalpha()) or (
298 |                 # cause
299 |                 # u'\xaa'.isalpha() == True
300 |                 # ^ wat
301 |                 ord(d) < ord('A') or
302 |                 ord(d) > ord('z')):
303 |             output.append(d)
304 |             continue
305 |         off = 65
306 |         if d.islower():
307 |             off += 32
308 |         output.append(chr((((ord(d) - off) + rotval) % 26) + off))
309 |     return unicode(''.join(output))
310 | 
311 | 
312 | def rotx_codec_generator(rotval):
313 |     name = "rot%d"  % rotval
314 |     rx_enc = lambda data: (rotx(data, rotval), len(data))
315 |     rx_dec = lambda data: (rotx(data, -rotval), len(data))
316 |     return CodecInfo(name=name, encode=rx_enc, decode=rx_dec)
317 | 
318 | 
319 | def get_codecs_list():
320 |     """In case you're wondering what's in this package, you can find out.
321 |     """
322 |     for codec in  CODECS_IN_FILE.iterkeys():
323 |         print codec
324 | 
325 | 
326 | ###############################################################################
327 | # actual encoders and encoding wrappers
328 | ###############################################################################
329 | 
330 | 
331 | def morse_encode(input, errors='strict'):
332 |     morse_map = dict(MORSE)
333 |     input = input.upper()
334 |     for c in input:
335 |         assert c in morse_map, "Unencodable character '%s' found. Failing" % c
336 |     output = ' '.join(morse_map[c] for c in input)
337 |     return (output, len(input))
338 | 
339 | 
340 | def morse_decode(input, errors='strict'):
341 |     morse_map = dict((c, m) for m, c in MORSE)
342 |     input = input.replace('  ', '/').replace('/', ' / ')
343 |     splinput = input.split()
344 |     for c in splinput:
345 |         assert c in morse_map, "Could not decode '%s' to ascii. Failing" % c
346 |     output = ''.join(morse_map[c] for c in splinput)
347 |     return (output, len(input))
348 | 
349 | 
350 | def bin_encode(input, errors='strict'):
351 |     """print 8 bits of whatever int goes in"""
352 |     output = ""
353 |     bs = 8
354 |     for c in input:
355 |         l = '{0:0>8b}'.format(ord(c))
356 |         padding = bs - ((len(l) % bs) or bs)
357 |         output += ('0' * padding ) + ''.join(l)
358 |     return (output, len(input))
359 | 
360 | 
361 | def bin_decode(input, errors='strict'):
362 |     """print 8 bits of whatever int goes in"""
363 |     output = ""
364 |     assert (len(input) % 8) == 0, \
365 |            "Wrong number of bits, %s is not divisible by 8" % len(input)
366 |     output = ''.join(chr(int(c, 2)) for c in blocks(input, 8))
367 |     return (output, len(input))
368 | 
369 | 
370 | def url_decode(input, errors='strict'):
371 |     output = urlunquote(input)
372 |     return (output, len(input))
373 | 
374 | 
375 | def url_encode(input, errors='strict'):
376 |     output = urlquote(input)
377 |     return (output, len(input))
378 | 
379 | 
380 | def entity_decode(input, errors='strict'):
381 |     output = entityunquote(input)
382 |     return (output, len(input))
383 | 
384 | 
385 | def entity_encode(input, errors='strict'):
386 |     output = entityquote(input)
387 |     return (output, len(input))
388 | 
389 | def entity_encode_hex(input, errors='strict'):
390 |     """
391 |     Encode &, <, and > in a string of data.
392 |     as their hex HTML entity representation.
393 |     """
394 |     output = ''
395 |     for character in input:
396 |         if character in ('&', '<', '>'):
397 |             output += "&#x%s;" % character.encode('hex')
398 |         else:
399 |             output += character
400 | 
401 |     return (output, len(input))
402 | 
403 | def entity_decode_hex(input, errors='strict'):
404 |     """
405 |     Decode hex HTML entity data in a string.
406 |     """
407 |     if _is_unicode(input):
408 |         if '&' not in input:
409 |             return input, len(input)
410 |         bits = _asciire.split(input)
411 |         res = [bits[0]]
412 |         append = res.append
413 |         for i in range(1, len(bits), 2):
414 |             append(entityunquote(str(bits[i]))
415 |                    .encode('bin').decode('bin'))
416 |             append(bits[i + 1])
417 |     preamble_regex = re.compile(r"&#x", flags=re.I)
418 |     bits = preamble_regex.split(input)
419 |     # fastpath
420 |     if len(bits) == 1:
421 |         return input, len(input)
422 |     res = [bits[0]]
423 |     append = res.append
424 |     for item in bits[1:]:
425 |         try:
426 |             append(_hextochr[item[:2]])
427 |             append(item[3:])
428 |         except KeyError:
429 |             append('&#x')
430 |             append(item)
431 |             append(';')
432 | 
433 |     return (''.join(res), len(input))
434 | 
435 | 
436 | def ascii85_encode(input, errors='strict'):
437 |     assert not input.endswith('\0'), "Trailing nulls unsupported"
438 |     if _is_unicode(input):
439 |         # convert from multibyte to codepoint in a horrible way. Good
440 |         # luck debugging the stupid bugs here fuckers.
441 |         o, l = bin_encode(input)
442 |         input, l = bin_decode(o)
443 |     #encoding is adobe not btoa
444 |     bs = 4
445 |     padding = bs - ((len(input) % bs) or bs)
446 |     input += '\0' * padding
447 |     output = ""
448 |     for block in blocks(input, bs):
449 |         start = unpack(">I", block)[0]
450 |         if not start:
451 |             output += "z"
452 |             continue
453 |         quot, rem = divmod(start, 85)
454 |         chr_block = chr(rem + 33)
455 |         for i in xrange(bs):
456 |             quot, rem = divmod(quot, 85)
457 |             chr_block += chr(rem + 33)
458 |         output += ''.join(reversed(chr_block))
459 |     if padding:
460 |         output = output[:-padding]
461 |     return output, len(input)
462 | 
463 | 
464 | def ascii85_decode(input, errors='strict'):
465 |     bs = 5
466 |     for i in ('y', 'z'):
467 |         for block in input.split(i)[:-1]:
468 |             assert not len(block) % bs, "'%s' found within a block" % i
469 |             #this will handle the error but it will not give a good
470 |             #error message
471 |     # supports decoding as adobe or btoa 4.2
472 |     input = input.replace('z', '!!!!!')  # adobe & btoa 4.2
473 |     input = input.replace('y', '+<VdL')  # btoa replace block of ' '
474 |     input = ''.join(re.findall(ascii85_charset, input))
475 |     # silently drop all non-ascii85 chars....
476 |     padding = bs - ((len(input) % bs) or bs)
477 |     input += 'u' * padding
478 |     output = ""
479 |     for block in blocks(input, bs):
480 |         data = 0
481 |         for idx in xrange(len(block)):
482 |             place = (bs - 1) - idx
483 |             place_val = ord(block[idx]) - 33
484 |             if place:
485 |                 place_val = place_val * (85 ** place)
486 |             data += place_val
487 |         assert 0 <= data <= 4294967295, "invalid block '%s'" % block
488 |         output += pack(">I", data)
489 |     if padding:
490 |         output = output[:-padding]
491 |     return output, len(input)
492 | 
493 | 
494 | def y_encode(input, errors='strict'):
495 |     output = ''
496 |     for c in input:
497 |         o = (ord(c) + 42) % 256
498 |         if o in yenc_escape:
499 |             output += '='
500 |             o = (o + 64) % 256
501 |         output += chr(o)
502 |     return output, len(input)
503 | 
504 | 
505 | def y_decode(input, errors='strict'):
506 |     output = ''
507 |     #this is more C than python
508 |     len_in = len(input)
509 |     i = 0
510 |     while True:
511 |         if i == len_in:
512 |             break
513 |         c = ord(input[i])
514 |         if input[i] == '=':
515 |             assert len_in > (i + 1), "last character cannot be an escape"
516 |             i += 1
517 |             c = (ord(input[i]) - 64) % 256
518 |         c = (c - 42) % 256
519 |         i += 1
520 |         output += chr(c)
521 |     return output, len(input)
522 | 
523 | 
524 | def aba_track_2_encode(input, errors='strict'):
525 |     #this is in progress
526 |     output = ''
527 |     assert all(map(lambda x: 0x3f >= ord(x) >= 0x30, input)), \
528 |                "Characters found out of range 0x30 - 0x3f"
529 |     len_in = len(input)
530 |     assert len_in <= 37, ("No room for sentinel and LRC. "
531 |                           "Input must be 37 characters or under")
532 |     input = ";" + input + "?"
533 |     out = []
534 |     for c in input:
535 |         c = ord(c) - 48
536 |         l = list('{0:0>4b}'.format(c))
537 |         l = [int(i) for i in reversed(l)]
538 |         l.append(sum(l) % 2)
539 |         out.append(l)
540 |     lrc = [parity(int(l[i]) for l in out) for i in xrange(4)]
541 |     lrc.append(parity(lrc))
542 |     out.append(lrc)
543 |     output = ""
544 |     for l in out:
545 |         output += ''.join(str(i) for i in l)
546 |     return output, len(input)
547 | 
548 | 
549 | def aba_track_2_decode(input, errors='strict'):
550 |     #this is in progress
551 |     len_in = len(input)
552 |     assert not len_in % 5, "Input must be divisible by 5"
553 |     assert not len_in > (5 * 40), "String too long: cannot be ABA Track 2"
554 |     #we're going to ignore parity for now
555 |     print [chr(int(c[:0:-1], 2)+48) for c in blocks(input, 5)]
556 |     output = ''.join(chr(int(c[:0:-1], 2)+48) for c in blocks(input, 5))
557 |     output = output[-1:]
558 |     return output, len(input)
559 | 
560 | 
561 | 
562 | 
563 | 
564 | ###############################################################################
565 | # Codec Registration
566 | ###############################################################################
567 | 
568 | CODECS_IN_FILE = {"morse": CodecInfo(name='morse',
569 |                                      encode=morse_encode,
570 |                                      decode=morse_decode),
571 |                   "bin": CodecInfo(name='bin',
572 |                                    encode=bin_encode,
573 |                                    decode=bin_decode),
574 |                   "url": CodecInfo(name='url',
575 |                                    encode=url_encode,
576 |                                    decode=url_decode),
577 |                   "entity": CodecInfo(name='entity',
578 |                                    encode=entity_encode,
579 |                                    decode=entity_decode),
580 |                   "entityhex": CodecInfo(name='entityhex',
581 |                                    encode=entity_encode_hex,
582 |                                    decode=entity_decode_hex),
583 |                   "ascii85": CodecInfo(name='ascii85',
584 |                                        encode=ascii85_encode,
585 |                                        decode=ascii85_decode),
586 |                   "yenc": CodecInfo(name='yenc',
587 |                                        encode=y_encode,
588 |                                        decode=y_decode),
589 |                 }
590 | 
591 | 
592 | for r in xrange(1, 26):
593 |     CODECS_IN_FILE["rot%d" % r] = rotx_codec_generator(r)
594 | 
595 | 
596 | #this is bad, I need to do something different
597 | register(lambda name: CODECS_IN_FILE.get(name, None))
598 | 
599 | 
600 | if __name__ == "__main__":
601 |     import doctest
602 |     doctest.testmod()
603 | 
604 | # Local variables:
605 | # eval: (add-hook 'after-save-hook '(lambda ()
606 | #           (shell-command "pep8 __init__.py > lint")) nil t)
607 | # end:
608 | 
609 | # Copyright © 2012–2015 Josh Dukes <hex@neg9.org> and contributors.
610 | #
611 | # This is free software: you may copy, modify, and/or distribute this work
612 | # under the terms of the Expat License.
613 | # No warranty expressed or implied. See the file ‘LICENSE.Expat’ for details.
614 | 
615 | # Local variables:
616 | # coding: utf-8
617 | # mode: text
618 | # mode: markdown
619 | # End:
620 | # vim: fileencoding=utf-8 filetype=python:
621 | 


--------------------------------------------------------------------------------