├── .gitignore ├── universal_decode.py ├── oxford_comma_encode.py ├── line_endings_encode.py ├── EncoderBoilerplate.py ├── tab_encode.py ├── Explanation.txt ├── README.md ├── Readme.txt └── DissidentXEncoding.py /.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__/ 2 | -------------------------------------------------------------------------------- /universal_decode.py: -------------------------------------------------------------------------------- 1 | from DissidentXEncoding import decode_and_decrypt_message 2 | from sys import argv 3 | 4 | f = open(argv[1], 'br') 5 | p = f.read() 6 | f.close() 7 | m = decode_and_decrypt_message(argv[2].encode('utf-8'), p) 8 | if m is not None: 9 | print(m.decode('utf-8')) 10 | -------------------------------------------------------------------------------- /oxford_comma_encode.py: -------------------------------------------------------------------------------- 1 | def oxford_encode(p): 2 | r = [] 3 | for s in p.split(b', and'): 4 | if r: 5 | r.append([b', and', b' and']) 6 | r.append(s) 7 | return r 8 | 9 | if __name__ == '__main__': 10 | from EncoderBoilerplate import encode 11 | encode(oxford_encode) 12 | -------------------------------------------------------------------------------- /line_endings_encode.py: -------------------------------------------------------------------------------- 1 | def endings_encode(p): 2 | r = [] 3 | for s in p.split(b'\n'): 4 | if r: 5 | r.append([b'\n', b' \n']) 6 | r.append(s.rstrip()) 7 | return r 8 | 9 | if __name__ == '__main__': 10 | from EncoderBoilerplate import encode 11 | encode(endings_encode) 12 | -------------------------------------------------------------------------------- /EncoderBoilerplate.py: -------------------------------------------------------------------------------- 1 | from DissidentXEncoding import prepare_message, pack_and_encode_messages 2 | from sys import argv 3 | 4 | def encode(preparefunc): 5 | f = open(argv[1], 'br') 6 | p = f.read() 7 | f.close() 8 | messages = [prepare_message(argv[i].encode('utf-8'), argv[i+1].encode('utf-8')) for i in range(2, len(argv), 2)] 9 | m = pack_and_encode_messages(messages, preparefunc(p)) 10 | if m is None: 11 | print('Error') 12 | else: 13 | f = open(argv[1], 'bw') 14 | f.write(m) 15 | f.close() 16 | -------------------------------------------------------------------------------- /tab_encode.py: -------------------------------------------------------------------------------- 1 | def tab_cover(p): 2 | covertext = None 3 | for s in p.split(b'\n'): 4 | if covertext: 5 | covertext[-1] += b'\n' 6 | else: 7 | covertext = [''] 8 | if s[0:1] == b'\t': 9 | p = 1 10 | while s[p:p+1] == b'\t': 11 | p += 1 12 | covertext.append([b'\t' * p, b' ' * p]) 13 | covertext.append(s[p:]) 14 | elif s[0:8] == b' ': 15 | p = 1 16 | while s[p*8:(p+1)*8] == b' ': 17 | p += 1 18 | covertext.append([b' ' * p, b'\t' * p]) 19 | covertext.append(s[p*8:]) 20 | else: 21 | covertext[-1] += s 22 | return covertext 23 | 24 | if __name__ == '__main__': 25 | from EncoderBoilerplate import encode 26 | encode(tab_cover) 27 | -------------------------------------------------------------------------------- /Explanation.txt: -------------------------------------------------------------------------------- 1 | The general API is explained in the readme. This file assumes you already know 2 | what the high-level API looks like. 3 | 4 | This scheme is divided into three layers, which go together like this: 5 | 6 | encode(pack(encrypt())) 7 | 8 | The three layers have few dependencies, and there are unit tests for each of 9 | them. 10 | 11 | The most novel part is the encode layer. It takes a key, plaintext with 12 | possible alternates, and value to encode. Intuitively, if the key and plaintext 13 | are used as the keys to a stream cipher, then it will probably be possible to 14 | make the output of that stream cipher begin with a desired value if the number 15 | of alternates is more than the number of bits in the value. Unfortunately that 16 | would require time exponential on the number of bits to find the encoding. This 17 | scheme uses a very specifically designed stream cipher which makes it possible 18 | to compute which alternates to use to get the desired value in a polynomial 19 | amount of time. Specifically, it uses each contiguous section of sixteen bytes 20 | for a stream cipher (AES in OFB mode) and xors the outputs together, and makes 21 | sure that alternates have at least fifteen fixed bytes between them. The result 22 | is that flipping an alternate always xors the output by a specific value, 23 | independently from flipping other alternates, so it's possible to calculate 24 | which alternates are needed by row reduction. 25 | 26 | The way that row reduction is done is currently very crude. What it really 27 | should do is assume that the first value of each alternate is the less 28 | suspicious one, and attempt to use as few of those as possible, by going over 29 | the possible alternates in random order, row reducing each one, and throwing 30 | out ones which don't add possibilities until it has exactly as many rows as 31 | there are bits which need to be encoded. 32 | 33 | Packing is an unkeyed step which adds a length prefix and unencrypted checksum 34 | to the encrypted payload. In order to avoid obvious patterns in the plaintext 35 | values it xors them with the hash of the first four bytes of the ciphertext 36 | (it's actually a little bit more complex, but that's the basic idea). It's 37 | assumed that the ciphertext is at least four bytes long and that the first four 38 | bytes look fairly random, which is a reasonable assumption because the 39 | ciphertext is salted. 40 | 41 | Encryption is done with a threat model assuming that encrypted messages will be 42 | left in plaintext on public web servers. Obviously encoding will provide an 43 | additional layer of obfuscation, but it's easier to analyze assuming that 44 | obfuscation is absent. It's done with parameters which are reasonable for that 45 | use case, but far less than necessary for others, a tradeoff done because bits 46 | are assumed to be very precious due to the limits of the encoding step. To 47 | encrypt, the first four bytes of the sha3 hash of the plaintext are taken, 48 | those are included at the beginning of the ciphertext, then the plaintext 49 | encrypted in OFB mode using the first four bytes padded with zeros as the salt. 50 | The obvious attack is that if an attacker finds two different messages with the 51 | same first four bytes of hash and knows what the plaintext is of one they can 52 | find the plaintext of the other. Also if the same plaintext is encrypted twice 53 | it will result in the exact some ciphertext, so an attacker can trivially 54 | compare two ciphertexts to see if that's the case. 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## DissidentX 2 | 3 | Bram's steganographic framework 4 | 5 | __DissidentX is a censorship resistance tool.__ 6 | 7 | It has the capability of steganographically encoding messages in files. Special features include: 8 | 9 | * Messages cannot be decoded without the key 10 | 11 | * A single decoder for all file types and encoding techniques, including all future ones 12 | 13 | * Format-specific encoders can be easily written without having to worry about information theoretic encoding or cryptography 14 | 15 | * Support for multiple messages to multiple keys in a single file 16 | 17 | ### Primary Use 18 | 19 | The primary use case for DissidentX is encoding messages in files on the web. 20 | 21 | There should be a utility which scans all objects the user's web browser downloads (html files, images, css files, etc.) for messages using all of the keys the user has entered. Someone sending messages to that person provides a web service where users who have widely viewed web sites can upload their files and get back slightly modified version with messages steganographically added. 22 | 23 | The web users should not be able to read what the messages are, and it should be possible for the service doing the encoding to not have to keep messages in plaintext. 24 | 25 | Because encoding rates are so low, a number of the parameters to the encoding and decoding libraries have been lowered to not be appropriate for all use cases. They should be evaluated in the context of this one. The same technology should alse be used for easter egg hunts, because that's fun and provides cover traffic. 26 | 27 | 28 | ### Usage guide: 29 | 30 | Uses Python3, [PyCrypto](http://pypi.python.org/pypi/pycrypto), and [sha3](http://pypi.python.org/pypi/pysha3/) 31 | 32 | 33 | As examples, the command line tools `line_ending_encoder` and `universal_decoder` are included. `line_ending_encoder` is based on adding trailing spaces to the end of lines in a text file. 34 | 35 | Use `line_ending_encoder` like this: 36 | 37 | python3 line_ending_encoder.py myfile.txt key1 payload1 key2 payload2 38 | 39 | That will modify `myfile.txt`, hiding _payload1_ to the key `key1` and `payload2` to the key `key2`. 40 | 41 | Any number of key/payload pairs are allowed, although any given file can only support a certain total length of payloads. 42 | 43 | The keys are assumed to be in unicode, which is correct. The payloads are also assumed to be in unicode, which is a hack to make the output pretty, and not completely general. 44 | 45 | After you encode data with `line_ending_encoder` you can get it back out like this: 46 | 47 | python3 universal_decoder.py myfile.txt key1 48 | 49 | which will print out `payload1`. Likewise for `payload2` and `key2`. 50 | 51 | Note that `line_ending_encoder` only gets one bit per line, with overhead of seven bytes, and that encoding the same section of text repeatedly in a text file doesn't get extra bits. 52 | 53 | 54 | ### Encoder writing guide: 55 | 56 | The `prepare_message()` function takes a key and plaintext, both byte strings, and returns another key and ciphertext to be used later. This is done as a separate step to enable the use case where messages to be encoded are stored on a server already encrypted. 57 | 58 | The `pack_and_encode_messages()` function takes an array of results from `prepare_message()` and a processed file for the messages to be stored in. The processed file is an array consisting alternately of fixed binary strings and arrays of length two giving alternate possible values for that position. 59 | 60 | Alternates can be anything semantically valid for the file format being used. For example in human readable text files eliminating unnecessary commas in text, or alternate spellings for words, or alternative word orders can all be used. Multiple methods of generating alternates can be used in the same file. 61 | 62 | Simple implementations are in `line_endings_encoder.py` and `tab_encoder.py`, both designed to work on common computer language files. 63 | 64 | More detail on the math involved is in Explanation.txt 65 | 66 | 67 | ### FAQ: 68 | 69 | __Q. Can someone modify the message stored in a file?__ 70 | 71 | A. No. Changing even a single byte of the file will completely obliterate any message which was stored. 72 | 73 | __Q. Why did you use Python3 as a reference language?__ 74 | 75 | A. Because not having distinct binary and unicode string types is barbaric. 76 | 77 | __Q. Can I get a copy of this for another language?__ 78 | 79 | A. If somebody writes it. This code is being released as a reference in the hopes that other people will pick it up and run with it. 80 | 81 | __Q. Why are you doing row reduction manually in Python instead of using `numpy`?__ 82 | 83 | A. Because I don't know how. Feel free to implement improvements. 84 | 85 | __Q. Can someone detect that a file has messages encoded in it?__ 86 | 87 | A. That depends on the encoding used and the properties of the file the data is 88 | being encoded in. There's a whole field of academic literature 89 | on steganography, none of which is invalidated by this code. What this code 90 | does is vastly simplify the implementation of new steganographic techniques, 91 | and allow a universal decoder and encoding of multiple messages to different 92 | keys in the same file. 93 | 94 | __Q. How much data can be encoded in a file?__ 95 | 96 | A. That's entirely dependant on the file type and specific encoding, but if 97 | you insist on a made up number, let's say a ratio of around 500:1, and the 98 | encoded message has overhead of about 7 bytes. 99 | 100 | __Q. Why can't it be given more than two alternates for one position to encode more information?__ 101 | 102 | A. Because of math. See `Explanation.txt` for a bit more detail. 103 | 104 | __Q. Your code is horribly inefficient and can be optimized in all kinds of ways.__ 105 | 106 | A. That's why it's called 'reference' code. 107 | 108 | __Q. It would be possible to pack in data more densely if alternates are required to always be the same length, or variable bytes are allowed to be set to arbitrary values.__ 109 | 110 | A. Yes, but those put severe restrictions on what can be done in an encoder, and hence are less likely to be useful in practice. 111 | 112 | __Q. Why don't you use public key encryption?__ 113 | 114 | A. Because bits are precious enough for that to be unwieldy, and it would disallow use of arbitrary human readable strings as keys. The symmetry is best viewed as a feature: because the value of a key is severely diminished if it's widely known, there's a reason to hoard them, which is the desired behavior. 115 | -------------------------------------------------------------------------------- /Readme.txt: -------------------------------------------------------------------------------- 1 | DissidentX is a censorship resistance tool. 2 | 3 | It has the capability of steganographically encoding messages in 4 | files. Special features include: 5 | 6 | * Messages cannot be decoded without the key 7 | 8 | * A single decoder for all file types and encoding techniques, 9 | including all future ones 10 | 11 | * Format-specific encoders can be easily written without having to 12 | worry about information theoretic encoding or cryptography 13 | 14 | * Support for multiple messages to multiple keys in a single file 15 | 16 | The primary use case for DissidentX is encoding messages in files on 17 | the web. There should be a utility which scans all objects the user's 18 | web browser downloads (html files, images, css files, etc.) for messages 19 | using all of the keys the user has entered. Someone sending messages 20 | to that person provides a web service where users who have widely 21 | viewed web sites can upload their files and get back slightly modified 22 | version with messages steganographically added. The web users should 23 | not be able to read what the messages are, and it should be possible for 24 | the service doing the encoding to not have to keep messages in plaintext. 25 | Because encoding rates are so low, a number of the parameters to the 26 | encoding and decoding libraries have been lowered to not be appropriate 27 | for all use cases. They should be evaluated in the context of this one. 28 | 29 | The same technology should alse be used for easter egg hunts, because 30 | that's fun and provides cover traffic. 31 | 32 | 33 | Usage guide: 34 | 35 | Uses Python3, PyCrypto, and sha3 36 | http://pypi.python.org/pypi/pycrypto 37 | http://pypi.python.org/pypi/pysha3/ 38 | 39 | As examples, the command line tools line_ending_encoder and 40 | universal_decoder are included. line_ending_encoder is based on adding 41 | trailing spaces to the end of lines in a text file. 42 | 43 | Use line_ending_encoder like this: 44 | 45 | python3 line_ending_encoder.py myfile.txt key1 payload1 key2 payload2 46 | 47 | That will modify myfile.txt, hiding payload1 to the key key1 and 48 | payload2 to the key key2. Any number of key/payload pairs are allowed, 49 | although any given file can only support a certain total length of 50 | payloads. 51 | 52 | The keys are assumed to be in unicode, which is correct. The payloads 53 | are also assumed to be in unicode, which is a hack to make the output 54 | pretty, and not completely general. 55 | 56 | After you encode data with line_ending_encoder you can get it back out 57 | like this: 58 | 59 | python3 universal_decoder.py myfile.txt key1 60 | 61 | which will print out payload1. Likewise for payload2 and key2. 62 | 63 | Note that line_ending_encoder only gets one bit per line, with overhead 64 | of seven bytes, and that repeating the same section of text repeatedly in 65 | a text file doesn't get extra bits. 66 | 67 | 68 | Encoder writing guide: 69 | 70 | The prepare_message() function takes a key and plaintext, both byte strings, 71 | and returns another key and ciphertext to be used later. This is done as a 72 | separate step to enable the use case where messages to be encoded are stored 73 | on a server already encrypted. 74 | 75 | The pack_and_encode_messages() function takes an array of results from 76 | prepare_message() and a processed file for the messages to be stored in. The 77 | processed file is an array consisting alternately of fixed binary strings and 78 | arrays of length two giving alternate possible values for that position. 79 | Alternates can be anything semantically valid for the file format being used. 80 | For example in human readable text files eliminating unnecessary commas in 81 | text, or alternate spellings for words, or alternative word orders can all 82 | be used. Multiple methods of generating alternates can be used in the same 83 | file. 84 | 85 | Simple implementations are in line_endings_encoder.py and tab_encoder.py, 86 | both designed to work on common computer language files. 87 | 88 | More detail on the math involved is in Explanation.txt 89 | 90 | 91 | FAQ: 92 | 93 | Q. Can someone modify the message stored in a file? 94 | 95 | A. No. Changing even a single byte of the file will completely 96 | obliterate any message which was stored. 97 | 98 | Q. Why did you use Python3 as a reference language? 99 | 100 | A. Because not having distinct binary and unicode string types is barbaric. 101 | 102 | Q. Can I get a copy of this for another language? 103 | 104 | A. If somebody writes it. This code is being released as a reference 105 | in the hopes that other people will pick it up and run with it. 106 | 107 | Q. Why are you doing row reduction manually in Python instead of using numpy? 108 | 109 | A. Because I don't know how. Feel free to implement improvements. 110 | 111 | Q. Can someone detect that a file has messages encoded in it? 112 | 113 | A. That depends on the encoding used and the properties of the file the data is 114 | being encoded in. There's a whole field of academic literature 115 | on steganography, none of which is invalidated by this code. What this code 116 | does is vastly simplify the implementation of new steganographic techniques, 117 | and allow a universal decoder and encoding of multiple messages to different 118 | keys in the same file. 119 | 120 | Q. How much data can be encoded in a file? 121 | 122 | A. That's entirely dependant on the file type and specific encoding, but if 123 | you insist on a made up number, let's say a ratio of around 500:1, and the 124 | encoded message has overhead of about 7 bytes. 125 | 126 | Q. Why can't it be given more than two alternates for one position to encode 127 | more information? 128 | 129 | A. Because of math. See Explanation.txt for a bit more detail. 130 | 131 | Q. Your code is horribly inefficient and can be optimized in all kinds of ways. 132 | 133 | A. That's why it's called 'reference' code. 134 | 135 | Q. It would be possible to pack in data more densely if alternates are 136 | required to always be the same length, or variable bytes are allowed to be set 137 | to arbitrary values. 138 | 139 | A. Yes, but those put severe restrictions on what can be done in an 140 | encoder, and hence are less likely to be useful in practice. 141 | 142 | Q. Why don't you use public key encryption? 143 | 144 | A. Because bits are precious enough for that to be unweildy, and it would 145 | disallow use of arbitrary human readable strings as keys. The symmetry is 146 | best viewed as a feature: because the value of a key is severely diminished 147 | if it's widely known, there's a reason to hoard them, which is the desired 148 | behavior. 149 | -------------------------------------------------------------------------------- /DissidentXEncoding.py: -------------------------------------------------------------------------------- 1 | # http://pypi.python.org/pypi/pycrypto 2 | # http://pypi.python.org/pypi/pysha3/ 3 | 4 | import hashlib 5 | import sha3 6 | from Crypto.Cipher import AES 7 | 8 | def h(message): 9 | return hashlib.sha3_256(message).digest() 10 | 11 | def x(m1, m2): 12 | assert type(m1) is bytes 13 | assert type(m2) is bytes 14 | return (int.from_bytes(m1, 'big') ^ int.from_bytes(m2, 'big')).to_bytes(len(m1), 'big') 15 | 16 | assert x(x(b'abc', b'def'), b'def') == b'abc' 17 | 18 | def encrypt_ofb(key, iv, plaintext): 19 | assert len(key) == 16, key 20 | assert len(iv) == 16, iv 21 | return AES.new(key, AES.MODE_OFB, iv).encrypt(plaintext + b'a' * (-len(plaintext) % 16))[:len(plaintext)] 22 | 23 | assert encrypt_ofb(b'abcd' * 4, b'iv' * 8, encrypt_ofb(b'abcd' * 4, b'iv' * 8, b'plaintext')) == b'plaintext' 24 | 25 | def encrypt_message(key, plaintext): 26 | mac = h(key + plaintext)[:4] 27 | return mac + encrypt_ofb(key, mac + bytes([0] * 12), plaintext) 28 | 29 | def prepare_message(key, plaintext): 30 | key = h(key)[:16] 31 | return h(key)[:16], encrypt_message(key, plaintext) 32 | 33 | def decrypt_message(key, ciphertext): 34 | mac = ciphertext[:4] 35 | r = encrypt_ofb(key, mac + bytes([0] * 12), ciphertext[4:]) 36 | return (r if mac == h(key + r)[:4] else None) 37 | 38 | def test_encrypt(): 39 | key = b'abcd' * 4 40 | fullstr = bytes(list(range(256))) 41 | for i in range(256): 42 | mystr = fullstr[:i] 43 | assert decrypt_message(key, encrypt_message(key, mystr)) == mystr 44 | 45 | test_encrypt() 46 | 47 | def pack_message(message): 48 | assert len(message) >= 4, message 49 | r = message[:4] 50 | v = len(message) - 4 51 | lb = bytes([v] if v < 128 else [128 | v >> 8, v & 0xFF]) 52 | r += x(lb, h(r)[:len(lb)]) 53 | r += h(r)[:2] 54 | return r + message[4:] 55 | 56 | def begin_unpack_message(message): 57 | prefix = x(h(message[:4])[:2], message[4:6]) 58 | if prefix[0] < 128: 59 | mlen = prefix[0] + 4 60 | mbegin = 5 61 | else: 62 | mlen = (((prefix[0] - 128) << 8) | prefix[1]) + 4 63 | mbegin = 6 64 | if message[mbegin:mbegin + 2] != h(message[:mbegin])[:2]: 65 | return None 66 | return mlen + mbegin - 2 67 | 68 | def unpack_message(message): 69 | prefix = x(h(message[:4])[:2], message[4:6]) 70 | if prefix[0] < 128: 71 | mlen = prefix[0] + 4 72 | mbegin = 5 73 | else: 74 | mlen = (((prefix[0] - 128) << 8) | prefix[1]) + 4 75 | mbegin = 6 76 | assert len(message) == mlen + mbegin - 2 77 | return message[:4] + message[mbegin + 2:] 78 | 79 | def test_pack(): 80 | fullstr = bytes(list(range(256))) 81 | for i in range(4, 256): 82 | mystr = fullstr[:i] 83 | packed = pack_message(mystr) 84 | assert begin_unpack_message(packed) == len(packed) 85 | assert unpack_message(packed) == mystr 86 | 87 | test_pack() 88 | 89 | def remove_too_short(plaintext): 90 | p2 = [b''] 91 | for i in range(0, len(plaintext)-1, 2): 92 | p2[-1] += plaintext[i] 93 | if len(p2) > 1 and len(p2[-1]) < 15: 94 | p2[-1] += plaintext[i+1][0] 95 | else: 96 | a, b = plaintext[i+1] 97 | j = 0 98 | while j < len(a) and j < len(b) and a[j] == b[j]: 99 | j += 1 100 | if j: 101 | p2[-1] += a[:j] 102 | a = a[j:] 103 | b = b[j:] 104 | j = 0 105 | while j < len(a) and j < len(b) and a[-j-1] == b[-j-1]: 106 | j += 1 107 | if j: 108 | excess = a[-j:] 109 | a = a[:-j] 110 | b = b[:-j] 111 | else: 112 | excess = b'' 113 | p2.append([a, b]) 114 | p2.append(excess) 115 | p2[-1] += plaintext[-1] 116 | return p2 117 | 118 | assert remove_too_short([b'', [b'abc', b'aqc'], b'y']) == [b'a', [b'b', b'q'], b'cy'] 119 | assert remove_too_short([b'x', [b'abc', b'abcd'], b'y']) == [b'xabc', [b'', b'd'], b'y'] 120 | assert remove_too_short([b'x', [b'abc', b'dabc'], b'y']) == [b'x', [b'', b'd'], b'abcy'] 121 | assert remove_too_short([b'x', [b'ac', b'aqc'], b'y']) == [b'xa', [b'', b'q'], b'cy'] 122 | 123 | def to_bitfield(m): 124 | r = [] 125 | for v in m: 126 | for i in range(8): 127 | r.append((v >> i) & 1) 128 | return r 129 | 130 | def encode_messages(messages, plaintext): 131 | plaintext = remove_too_short(plaintext) 132 | base = [plaintext[0]] 133 | for i in range(1, len(plaintext), 2): 134 | base.append(plaintext[i][0]) 135 | base.append(plaintext[i+1]) 136 | goal = to_bitfield(x(b''.join([message for key, message in messages]), pdms(messages, b''.join(base)))) 137 | vectors = [] 138 | for i in range(1, len(plaintext), 2): 139 | vectors.append(to_bitfield(x(pdms(messages, plaintext[i-1][-15:] + plaintext[i][0] + plaintext[i+1][:15]), 140 | pdms(messages, plaintext[i-1][-15:] + plaintext[i][1] + plaintext[i+1][:15])))) 141 | toflips = solve(vectors, goal) 142 | if toflips is None: 143 | return None 144 | r = [plaintext[0]] 145 | for p, i in enumerate(range(1, len(plaintext), 2)): 146 | r.append(plaintext[i][toflips[p]]) 147 | r.append(plaintext[i+1]) 148 | return b''.join(r) 149 | 150 | def pack_and_encode_messages(messages, plaintext): 151 | return encode_messages([(key, pack_message(message)) for key, message in messages], plaintext) 152 | 153 | def pdms(messages, text): 154 | return b''.join([partial_decode_message(key, text, len(message)) for (key, message) in messages]) 155 | 156 | def partial_decode_message(key, message, mylen): 157 | assert type(key) is bytes 158 | assert type(message) is bytes 159 | r = bytes([0] * mylen) 160 | for i in range(len(message) - 15): 161 | r = x(r, encrypt_ofb(key, message[i:i+16], bytes([0] * mylen))) 162 | return r 163 | 164 | def decode_and_decrypt_message(key, message): 165 | key = h(key)[:16] 166 | key2 = h(key)[:16] 167 | mystr = partial_decode_message(key2, message, 16) 168 | mylen = begin_unpack_message(mystr) 169 | if mylen is None: 170 | return None 171 | mystr = partial_decode_message(key2, message, mylen) 172 | if mystr is None: 173 | return None 174 | mystr = unpack_message(mystr) 175 | if mystr is None: 176 | return None 177 | mystr = decrypt_message(key, mystr) 178 | if mystr is None: 179 | return None 180 | return mystr 181 | 182 | def xor(a, b): 183 | assert type(a) is list 184 | assert type(b) is list 185 | return [x^y for x, y in zip(a, b)] 186 | 187 | assert xor([0, 0, 1, 1], [0, 1, 0, 1]) == [0, 1, 1, 0] 188 | 189 | def solve(vectors, goal): 190 | active = [x + [0] * len(vectors) for x in vectors] 191 | for i in range(len(active)): 192 | active[i][len(goal) + i] = 1 193 | for i in range(len(goal)): 194 | p = i 195 | while p < len(active) and active[p][i] == 0: 196 | p += 1 197 | if p == len(vectors): 198 | return None 199 | active[p], active[i] = active[i], active[p] 200 | for j in range(len(active)): 201 | if j != i and active[j][i]: 202 | active[j] = xor(active[j], active[i]) 203 | r = [0] * len(active) 204 | for i in range(len(goal)): 205 | if goal[i]: 206 | r = xor(r, active[i][len(goal):]) 207 | return r 208 | 209 | from random import randrange 210 | 211 | def test_solve(): 212 | vectors = [[randrange(2) for j in range(5)] for i in range(10)] 213 | goal = [randrange(2) for i in range(5)] 214 | solution = solve(vectors, goal) 215 | t = [0] * 5 216 | for i in range(len(solution)): 217 | if solution[i]: 218 | t = xor(t, vectors[i]) 219 | assert t == goal 220 | 221 | test_solve() 222 | 223 | def test_encode(): 224 | key = bytes([7] * 16) 225 | plaintext = [b'abc', [b'', b'pqr']] 226 | for i in range(50): 227 | plaintext.append(bytes([randrange(256) for j in range(15)])) 228 | plaintext.append([b'ab', b'cde']) 229 | plaintext.append(b'stuv') 230 | message = b'hey' 231 | assert partial_decode_message(key, encode_messages([(key, message)], plaintext), len(message)) == message 232 | 233 | test_encode() 234 | 235 | def test_crypt(): 236 | key = b'key' 237 | message = b'abc' 238 | key2, message2 = prepare_message(key, message) 239 | plaintext = [b'abc', [b'', b'pqr']] 240 | for i in range(100): 241 | plaintext.append(bytes([randrange(256) for j in range(15)])) 242 | plaintext.append([b'ab', b'cde']) 243 | plaintext.append(b'stuv') 244 | assert decode_and_decrypt_message(key, pack_and_encode_messages([(key2, message2)], plaintext)) == message 245 | 246 | test_crypt() 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | --------------------------------------------------------------------------------