├── .gitignore
├── universal_decode.py
├── oxford_comma_encode.py
├── line_endings_encode.py
├── EncoderBoilerplate.py
├── tab_encode.py
├── Explanation.txt
├── README.md
├── Readme.txt
└── DissidentXEncoding.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /__pycache__/
2 | 


--------------------------------------------------------------------------------
/universal_decode.py:
--------------------------------------------------------------------------------
 1 | from DissidentXEncoding import decode_and_decrypt_message
 2 | from sys import argv
 3 | 
 4 | f = open(argv[1], 'br')
 5 | p = f.read()
 6 | f.close()
 7 | m = decode_and_decrypt_message(argv[2].encode('utf-8'), p)
 8 | if m is not None:
 9 | 	print(m.decode('utf-8'))
10 | 


--------------------------------------------------------------------------------
/oxford_comma_encode.py:
--------------------------------------------------------------------------------
 1 | def oxford_encode(p):
 2 |     r = []
 3 |     for s in p.split(b', and'):
 4 |         if r:
 5 |             r.append([b', and', b' and'])
 6 |         r.append(s)
 7 |     return r
 8 | 
 9 | if __name__ == '__main__':
10 |     from EncoderBoilerplate import encode
11 |     encode(oxford_encode)
12 | 


--------------------------------------------------------------------------------
/line_endings_encode.py:
--------------------------------------------------------------------------------
 1 | def endings_encode(p):
 2 |     r = []
 3 |     for s in p.split(b'\n'):
 4 |         if r:
 5 |             r.append([b'\n', b' \n'])
 6 |         r.append(s.rstrip())
 7 |     return r
 8 | 
 9 | if __name__ == '__main__':
10 |     from EncoderBoilerplate import encode
11 |     encode(endings_encode)
12 | 


--------------------------------------------------------------------------------
/EncoderBoilerplate.py:
--------------------------------------------------------------------------------
 1 | from DissidentXEncoding import prepare_message, pack_and_encode_messages
 2 | from sys import argv
 3 | 
 4 | def encode(preparefunc):
 5 |     f = open(argv[1], 'br')
 6 |     p = f.read()
 7 |     f.close()
 8 |     messages = [prepare_message(argv[i].encode('utf-8'), argv[i+1].encode('utf-8')) for i in range(2, len(argv), 2)]
 9 |     m = pack_and_encode_messages(messages, preparefunc(p))
10 |     if m is None:
11 |         print('Error')
12 |     else:
13 |         f = open(argv[1], 'bw')
14 |         f.write(m)
15 |         f.close()
16 | 


--------------------------------------------------------------------------------
/tab_encode.py:
--------------------------------------------------------------------------------
 1 | def tab_cover(p):
 2 | 	covertext = None
 3 | 	for s in p.split(b'\n'):
 4 | 		if covertext:
 5 | 			covertext[-1] += b'\n'
 6 | 		else:
 7 | 			covertext = ['']
 8 | 		if s[0:1] == b'\t':
 9 | 			p = 1
10 | 			while s[p:p+1] == b'\t':
11 | 				p += 1
12 | 			covertext.append([b'\t' * p, b'        ' * p])
13 | 			covertext.append(s[p:])
14 | 		elif s[0:8] == b'        ':
15 | 			p = 1
16 | 			while s[p*8:(p+1)*8] == b'        ':
17 | 				p += 1
18 | 			covertext.append([b'        ' * p, b'\t' * p])
19 | 			covertext.append(s[p*8:])
20 | 		else:
21 | 			covertext[-1] += s
22 | 	return covertext
23 | 
24 | if __name__ == '__main__':
25 | 	from EncoderBoilerplate import encode
26 | 	encode(tab_cover)
27 | 


--------------------------------------------------------------------------------
/Explanation.txt:
--------------------------------------------------------------------------------
 1 | The general API is explained in the readme. This file assumes you already know 
 2 | what the high-level API looks like.
 3 | 
 4 | This scheme is divided into three layers, which go together like this:
 5 | 
 6 | encode(pack(encrypt()))
 7 | 
 8 | The three layers have few dependencies, and there are unit tests for each of 
 9 | them.
10 | 
11 | The most novel part is the encode layer. It takes a key, plaintext with 
12 | possible alternates, and value to encode. Intuitively, if the key and plaintext
13 | are used as the keys to a stream cipher, then it will probably be possible to 
14 | make the output of that stream cipher begin with a desired value if the number 
15 | of alternates is more than the number of bits in the value. Unfortunately that 
16 | would require time exponential on the number of bits to find the encoding. This 
17 | scheme uses a very specifically designed stream cipher which makes it possible 
18 | to compute which alternates to use to get the desired value in a polynomial 
19 | amount of time. Specifically, it uses each contiguous section of sixteen bytes
20 | for a stream cipher (AES in OFB mode) and xors the outputs together, and makes 
21 | sure that alternates have at least fifteen fixed bytes between them. The result 
22 | is that flipping an alternate always xors the output by a specific value, 
23 | independently from flipping other alternates, so it's possible to calculate 
24 | which alternates are needed by row reduction.
25 | 
26 | The way that row reduction is done is currently very crude. What it really 
27 | should do is assume that the first value of each alternate is the less 
28 | suspicious one, and attempt to use as few of those as possible, by going over 
29 | the possible alternates in random order, row reducing each one, and throwing 
30 | out ones which don't add possibilities until it has exactly as many rows as 
31 | there are bits which need to be encoded.
32 | 
33 | Packing is an unkeyed step which adds a length prefix and unencrypted checksum 
34 | to the encrypted payload. In order to avoid obvious patterns in the plaintext 
35 | values it xors them with the hash of the first four bytes of the ciphertext 
36 | (it's actually a little bit more complex, but that's the basic idea). It's 
37 | assumed that the ciphertext is at least four bytes long and that the first four 
38 | bytes look fairly random, which is a reasonable assumption because the 
39 | ciphertext is salted.
40 | 
41 | Encryption is done with a threat model assuming that encrypted messages will be 
42 | left in plaintext on public web servers. Obviously encoding will provide an 
43 | additional layer of obfuscation, but it's easier to analyze assuming that 
44 | obfuscation is absent. It's done with parameters which are reasonable for that 
45 | use case, but far less than necessary for others, a tradeoff done because bits 
46 | are assumed to be very precious due to the limits of the encoding step. To 
47 | encrypt, the first four bytes of the sha3 hash of the plaintext are taken, 
48 | those are included at the beginning of the ciphertext, then the plaintext 
49 | encrypted in OFB mode using the first four bytes padded with zeros as the salt. 
50 | The obvious attack is that if an attacker finds two different messages with the 
51 | same first four bytes of hash and knows what the plaintext is of one they can 
52 | find the plaintext of the other. Also if the same plaintext is encrypted twice 
53 | it will result in the exact some ciphertext, so an attacker can trivially 
54 | compare two ciphertexts to see if that's the case.
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## DissidentX
  2 | 
  3 | Bram's steganographic framework
  4 | 
  5 | __DissidentX is a censorship resistance tool.__
  6 | 
  7 | It has the capability of steganographically encoding messages in files. Special features include:
  8 | 
  9 | * Messages cannot be decoded without the key
 10 | 
 11 | * A single decoder for all file types and encoding techniques, including all future ones
 12 | 
 13 | * Format-specific encoders can be easily written without having to worry about information theoretic encoding or cryptography
 14 | 
 15 | * Support for multiple messages to multiple keys in a single file 
 16 | 
 17 | ### Primary Use
 18 | 
 19 | The primary use case for DissidentX is encoding messages in files on the web. 
 20 | 
 21 | There should be a utility which scans all objects the user's web browser downloads (html files, images, css files, etc.) for messages using all of the keys the user has entered. Someone sending messages to that person provides a web service where users who have widely viewed web sites can upload their files and get back slightly modified version with messages steganographically added. 
 22 | 
 23 | The web users should not be able to read what the messages are, and it should be possible for  the service doing the encoding to not have to keep messages in plaintext.  
 24 | 
 25 | Because encoding rates are so low, a number of the parameters to the  encoding and decoding libraries have been lowered to not be appropriate  for all use cases. They should be evaluated in the context of this one. The same technology should alse be used for easter egg hunts, because that's fun and provides cover traffic.
 26 | 
 27 | 
 28 | ### Usage guide:
 29 | 
 30 | Uses Python3, [PyCrypto](http://pypi.python.org/pypi/pycrypto), and [sha3](http://pypi.python.org/pypi/pysha3/)
 31 | 
 32 | 
 33 | As examples, the command line tools `line_ending_encoder` and  `universal_decoder` are included. `line_ending_encoder` is based on adding trailing spaces to the end of lines in a text file.
 34 | 
 35 | Use `line_ending_encoder` like this:
 36 | 
 37 |     python3 line_ending_encoder.py myfile.txt key1 payload1 key2 payload2
 38 | 
 39 | That will modify `myfile.txt`, hiding _payload1_ to the key `key1` and `payload2` to the key `key2`.
 40 | 
 41 | Any number of key/payload pairs are allowed, although any given file can only support a certain total length of payloads.
 42 | 
 43 | The keys are assumed to be in unicode, which is correct. The payloads are also assumed to be in unicode, which is a hack to make the output pretty, and not completely general.
 44 | 
 45 | After you encode data with `line_ending_encoder` you can get it back out like this:
 46 | 
 47 |    python3 universal_decoder.py myfile.txt key1
 48 | 
 49 | which will print out `payload1`. Likewise for `payload2` and `key2`.
 50 | 
 51 | Note that `line_ending_encoder` only gets one bit per line, with overhead of seven bytes, and that encoding the same section of text repeatedly in a text file doesn't get extra bits.
 52 | 
 53 | 
 54 | ### Encoder writing guide:
 55 | 
 56 | The `prepare_message()` function takes a key and plaintext, both byte strings, and returns another key and ciphertext to be used later. This is done as a separate step to enable the use case where messages to be encoded are stored on a server already encrypted.
 57 | 
 58 | The `pack_and_encode_messages()` function takes an array of results from `prepare_message()` and a processed file for the messages to be stored in. The processed file is an array consisting alternately of fixed binary strings and arrays of length two giving alternate possible values for that position. 
 59 | 
 60 | Alternates can be anything semantically valid for the file format being used. For example in human readable text files eliminating unnecessary commas in text, or alternate spellings for words, or alternative word orders can all be used. Multiple methods of generating alternates can be used in the same file.
 61 | 
 62 | Simple implementations are in `line_endings_encoder.py` and `tab_encoder.py`, both designed to work on common computer language files.
 63 | 
 64 | More detail on the math involved is in Explanation.txt
 65 | 
 66 | 
 67 | ### FAQ:
 68 | 
 69 | __Q. Can someone modify the message stored in a file?__
 70 | 
 71 | A. No. Changing even a single byte of the file will completely obliterate any message which was stored.
 72 | 
 73 | __Q. Why did you use Python3 as a reference language?__
 74 | 
 75 | A. Because not having distinct binary and unicode string types is barbaric.
 76 | 
 77 | __Q. Can I get a copy of this for another language?__
 78 | 
 79 | A. If somebody writes it. This code is being released as a reference in the hopes that other people will pick it up and run with it.
 80 | 
 81 | __Q. Why are you doing row reduction manually in Python instead of using `numpy`?__
 82 | 
 83 | A. Because I don't know how. Feel free to implement improvements.
 84 | 
 85 | __Q. Can someone detect that a file has messages encoded in it?__
 86 | 
 87 | A. That depends on the encoding used and the properties of the file the data is 
 88 | being encoded in. There's a whole field of academic literature 
 89 | on steganography, none of which is invalidated by this code. What this code 
 90 | does is vastly simplify the implementation of new steganographic techniques, 
 91 | and allow a universal decoder and encoding of multiple messages to different 
 92 | keys in the same file.
 93 | 
 94 | __Q. How much data can be encoded in a file?__
 95 | 
 96 | A. That's entirely dependant on the file type and specific encoding, but if 
 97 | you insist on a made up number, let's say a ratio of around 500:1, and the 
 98 | encoded message has overhead of about 7 bytes.
 99 | 
100 | __Q. Why can't it be given more than two alternates for one position to encode more information?__
101 | 
102 | A. Because of math. See `Explanation.txt` for a bit more detail.
103 | 
104 | __Q. Your code is horribly inefficient and can be optimized in all kinds of ways.__
105 | 
106 | A. That's why it's called 'reference' code.
107 | 
108 | __Q. It would be possible to pack in data more densely if alternates are required to always be the same length, or variable bytes are allowed to be set to arbitrary values.__
109 | 
110 | A. Yes, but those put severe restrictions on what can be done in an encoder, and hence are less likely to be useful in practice.
111 | 
112 | __Q. Why don't you use public key encryption?__
113 | 
114 | A. Because bits are precious enough for that to be unwieldy, and it would disallow use of arbitrary human readable strings as keys. The symmetry is best viewed as a feature: because the value of a key is severely diminished if it's widely known, there's a reason to hoard them, which is the desired behavior.
115 | 


--------------------------------------------------------------------------------
/Readme.txt:
--------------------------------------------------------------------------------
  1 | DissidentX is a censorship resistance tool.
  2 | 
  3 | It has the capability of steganographically encoding messages in
  4 | files. Special features include:
  5 | 
  6 | * Messages cannot be decoded without the key
  7 | 
  8 | * A single decoder for all file types and encoding techniques,
  9 | including all future ones
 10 | 
 11 | * Format-specific encoders can be easily written without having to
 12 | worry about information theoretic encoding or cryptography
 13 | 
 14 | * Support for multiple messages to multiple keys in a single file
 15 | 
 16 | The primary use case for DissidentX is encoding messages in files on 
 17 | the web. There should be a utility which scans all objects the user's 
 18 | web browser downloads (html files, images, css files, etc.) for messages
 19 | using all of the keys the user has entered. Someone sending messages 
 20 | to that person provides a web service where users who have widely 
 21 | viewed web sites can upload their files and get back slightly modified
 22 | version with messages steganographically added. The web users should 
 23 | not be able to read what the messages are, and it should be possible for 
 24 | the service doing the encoding to not have to keep messages in plaintext. 
 25 | Because encoding rates are so low, a number of the parameters to the 
 26 | encoding and decoding libraries have been lowered to not be appropriate 
 27 | for all use cases. They should be evaluated in the context of this one.
 28 | 
 29 | The same technology should alse be used for easter egg hunts, because 
 30 | that's fun and provides cover traffic.
 31 | 
 32 | 
 33 | Usage guide:
 34 | 
 35 | Uses Python3, PyCrypto, and sha3
 36 | http://pypi.python.org/pypi/pycrypto
 37 | http://pypi.python.org/pypi/pysha3/
 38 | 
 39 | As examples, the command line tools line_ending_encoder and 
 40 | universal_decoder are included. line_ending_encoder is based on adding 
 41 | trailing spaces to the end of lines in a text file.
 42 | 
 43 | Use line_ending_encoder like this:
 44 | 
 45 | python3 line_ending_encoder.py myfile.txt key1 payload1 key2 payload2
 46 | 
 47 | That will modify myfile.txt, hiding payload1 to the key key1 and 
 48 | payload2 to the key key2. Any number of key/payload pairs are allowed,
 49 | although any given file can only support a certain total length of 
 50 | payloads.
 51 | 
 52 | The keys are assumed to be in unicode, which is correct. The payloads 
 53 | are also assumed to be in unicode, which is a hack to make the output 
 54 | pretty, and not completely general.
 55 | 
 56 | After you encode data with line_ending_encoder you can get it back out 
 57 | like this:
 58 | 
 59 | python3 universal_decoder.py myfile.txt key1
 60 | 
 61 | which will print out payload1. Likewise for payload2 and key2.
 62 | 
 63 | Note that line_ending_encoder only gets one bit per line, with overhead 
 64 | of seven bytes, and that repeating the same section of text repeatedly in 
 65 | a text file doesn't get extra bits.
 66 | 
 67 | 
 68 | Encoder writing guide:
 69 | 
 70 | The prepare_message() function takes a key and plaintext, both byte strings, 
 71 | and returns another key and ciphertext to be used later. This is done as a 
 72 | separate step to enable the use case where messages to be encoded are stored 
 73 | on a server already encrypted.
 74 | 
 75 | The pack_and_encode_messages() function takes an array of results from 
 76 | prepare_message() and a processed file for the messages to be stored in. The 
 77 | processed file is an array consisting alternately of fixed binary strings and 
 78 | arrays of length two giving alternate possible values for that position. 
 79 | Alternates can be anything semantically valid for the file format being used. 
 80 | For example in human readable text files eliminating unnecessary commas in 
 81 | text, or alternate spellings for words, or alternative word orders can all 
 82 | be used. Multiple methods of generating alternates can be used in the same 
 83 | file.
 84 | 
 85 | Simple implementations are in line_endings_encoder.py and tab_encoder.py, 
 86 | both designed to work on common computer language files.
 87 | 
 88 | More detail on the math involved is in Explanation.txt
 89 | 
 90 | 
 91 | FAQ:
 92 | 
 93 | Q. Can someone modify the message stored in a file?
 94 | 
 95 | A. No. Changing even a single byte of the file will completely
 96 | obliterate any message which was stored.
 97 | 
 98 | Q. Why did you use Python3 as a reference language?
 99 | 
100 | A. Because not having distinct binary and unicode string types is barbaric.
101 | 
102 | Q. Can I get a copy of this for another language?
103 | 
104 | A. If somebody writes it. This code is being released as a reference
105 | in the hopes that other people will pick it up and run with it.
106 | 
107 | Q. Why are you doing row reduction manually in Python instead of using numpy?
108 | 
109 | A. Because I don't know how. Feel free to implement improvements.
110 | 
111 | Q. Can someone detect that a file has messages encoded in it?
112 | 
113 | A. That depends on the encoding used and the properties of the file the data is 
114 | being encoded in. There's a whole field of academic literature 
115 | on steganography, none of which is invalidated by this code. What this code 
116 | does is vastly simplify the implementation of new steganographic techniques, 
117 | and allow a universal decoder and encoding of multiple messages to different 
118 | keys in the same file.
119 | 
120 | Q. How much data can be encoded in a file?
121 | 
122 | A. That's entirely dependant on the file type and specific encoding, but if 
123 | you insist on a made up number, let's say a ratio of around 500:1, and the 
124 | encoded message has overhead of about 7 bytes.
125 | 
126 | Q. Why can't it be given more than two alternates for one position to encode 
127 | more information?
128 | 
129 | A. Because of math. See Explanation.txt for a bit more detail.
130 | 
131 | Q. Your code is horribly inefficient and can be optimized in all kinds of ways.
132 | 
133 | A. That's why it's called 'reference' code.
134 | 
135 | Q. It would be possible to pack in data more densely if alternates are
136 | required to always be the same length, or variable bytes are allowed to be set 
137 | to arbitrary values.
138 | 
139 | A. Yes, but those put severe restrictions on what can be done in an
140 | encoder, and hence are less likely to be useful in practice.
141 | 
142 | Q. Why don't you use public key encryption?
143 | 
144 | A. Because bits are precious enough for that to be unweildy, and it would 
145 | disallow use of arbitrary human readable strings as keys. The symmetry is 
146 | best viewed as a feature: because the value of a key is severely diminished 
147 | if it's widely known, there's a reason to hoard them, which is the desired 
148 | behavior.
149 | 


--------------------------------------------------------------------------------
/DissidentXEncoding.py:
--------------------------------------------------------------------------------
  1 | # http://pypi.python.org/pypi/pycrypto
  2 | # http://pypi.python.org/pypi/pysha3/
  3 | 
  4 | import hashlib
  5 | import sha3
  6 | from Crypto.Cipher import AES
  7 | 
  8 | def h(message):
  9 | 	return hashlib.sha3_256(message).digest()
 10 | 
 11 | def x(m1, m2):
 12 | 	assert type(m1) is bytes
 13 | 	assert type(m2) is bytes
 14 | 	return (int.from_bytes(m1, 'big') ^ int.from_bytes(m2, 'big')).to_bytes(len(m1), 'big')
 15 | 
 16 | assert x(x(b'abc', b'def'), b'def') == b'abc'
 17 | 
 18 | def encrypt_ofb(key, iv, plaintext):
 19 | 	assert len(key) == 16, key
 20 | 	assert len(iv) == 16, iv
 21 | 	return AES.new(key, AES.MODE_OFB, iv).encrypt(plaintext + b'a' * (-len(plaintext) % 16))[:len(plaintext)]
 22 | 
 23 | assert encrypt_ofb(b'abcd' * 4, b'iv' * 8, encrypt_ofb(b'abcd' * 4, b'iv' * 8, b'plaintext')) == b'plaintext'
 24 | 
 25 | def encrypt_message(key, plaintext):
 26 | 	mac = h(key + plaintext)[:4]
 27 | 	return mac + encrypt_ofb(key, mac + bytes([0] * 12), plaintext)
 28 | 
 29 | def prepare_message(key, plaintext):
 30 | 	key = h(key)[:16]
 31 | 	return h(key)[:16], encrypt_message(key, plaintext)
 32 | 
 33 | def decrypt_message(key, ciphertext):
 34 | 	mac = ciphertext[:4]
 35 | 	r = encrypt_ofb(key, mac + bytes([0] * 12), ciphertext[4:])
 36 | 	return (r if mac == h(key + r)[:4] else None)
 37 | 
 38 | def test_encrypt():
 39 | 	key = b'abcd' * 4
 40 | 	fullstr = bytes(list(range(256)))
 41 | 	for i in range(256):
 42 | 		mystr = fullstr[:i]
 43 | 		assert decrypt_message(key, encrypt_message(key, mystr)) == mystr
 44 | 
 45 | test_encrypt()
 46 | 
 47 | def pack_message(message):
 48 | 	assert len(message) >= 4, message
 49 | 	r = message[:4]
 50 | 	v = len(message) - 4
 51 | 	lb = bytes([v] if v < 128 else [128 | v >> 8, v & 0xFF])
 52 | 	r += x(lb, h(r)[:len(lb)])
 53 | 	r += h(r)[:2]
 54 | 	return r + message[4:]
 55 | 
 56 | def begin_unpack_message(message):
 57 | 	prefix = x(h(message[:4])[:2], message[4:6])
 58 | 	if prefix[0] < 128:
 59 | 		mlen = prefix[0] + 4
 60 | 		mbegin = 5
 61 | 	else:
 62 | 		mlen = (((prefix[0] - 128) << 8) | prefix[1]) + 4
 63 | 		mbegin = 6
 64 | 	if message[mbegin:mbegin + 2] != h(message[:mbegin])[:2]:
 65 | 		return None
 66 | 	return mlen + mbegin - 2
 67 | 
 68 | def unpack_message(message):
 69 | 	prefix = x(h(message[:4])[:2], message[4:6])
 70 | 	if prefix[0] < 128:
 71 | 		mlen = prefix[0] + 4
 72 | 		mbegin = 5
 73 | 	else:
 74 | 		mlen = (((prefix[0] - 128) << 8) | prefix[1]) + 4
 75 | 		mbegin = 6
 76 | 	assert len(message) == mlen + mbegin - 2
 77 | 	return message[:4] + message[mbegin + 2:]
 78 | 
 79 | def test_pack():
 80 | 	fullstr = bytes(list(range(256)))
 81 | 	for i in range(4, 256):
 82 | 		mystr = fullstr[:i]
 83 | 		packed = pack_message(mystr)
 84 | 		assert begin_unpack_message(packed) == len(packed)
 85 | 		assert unpack_message(packed) == mystr
 86 | 
 87 | test_pack()
 88 | 
 89 | def remove_too_short(plaintext):
 90 | 	p2 = [b'']
 91 | 	for i in range(0, len(plaintext)-1, 2):
 92 | 		p2[-1] += plaintext[i]
 93 | 		if len(p2) > 1 and len(p2[-1]) < 15:
 94 | 			p2[-1] += plaintext[i+1][0]
 95 | 		else:
 96 | 			a, b = plaintext[i+1]
 97 | 			j = 0
 98 | 			while j < len(a) and j < len(b) and a[j] == b[j]:
 99 | 				j += 1
100 | 			if j:
101 | 				p2[-1] += a[:j]
102 | 				a = a[j:]
103 | 				b = b[j:]
104 | 			j = 0
105 | 			while j < len(a) and j < len(b) and a[-j-1] == b[-j-1]:
106 | 				j += 1
107 | 			if j:
108 | 				excess = a[-j:]
109 | 				a = a[:-j]
110 | 				b = b[:-j]
111 | 			else:
112 | 				excess = b''
113 | 			p2.append([a, b])
114 | 			p2.append(excess)
115 | 	p2[-1] += plaintext[-1]
116 | 	return p2
117 | 
118 | assert remove_too_short([b'', [b'abc', b'aqc'], b'y']) == [b'a', [b'b', b'q'], b'cy']
119 | assert remove_too_short([b'x', [b'abc', b'abcd'], b'y']) == [b'xabc', [b'', b'd'], b'y']
120 | assert remove_too_short([b'x', [b'abc', b'dabc'], b'y']) == [b'x', [b'', b'd'], b'abcy']
121 | assert remove_too_short([b'x', [b'ac', b'aqc'], b'y']) == [b'xa', [b'', b'q'], b'cy']
122 | 
123 | def to_bitfield(m):
124 | 	r = []
125 | 	for v in m:
126 | 		for i in range(8):
127 | 			r.append((v >> i) & 1)
128 | 	return r
129 | 
130 | def encode_messages(messages, plaintext):
131 | 	plaintext = remove_too_short(plaintext)
132 | 	base = [plaintext[0]]
133 | 	for i in range(1, len(plaintext), 2):
134 | 		base.append(plaintext[i][0])
135 | 		base.append(plaintext[i+1])
136 | 	goal = to_bitfield(x(b''.join([message for key, message in messages]), pdms(messages, b''.join(base))))
137 | 	vectors = []
138 | 	for i in range(1, len(plaintext), 2):
139 | 		vectors.append(to_bitfield(x(pdms(messages, plaintext[i-1][-15:] + plaintext[i][0] + plaintext[i+1][:15]),
140 | 			pdms(messages, plaintext[i-1][-15:] + plaintext[i][1] + plaintext[i+1][:15]))))
141 | 	toflips = solve(vectors, goal)
142 | 	if toflips is None:
143 | 		return None
144 | 	r = [plaintext[0]]
145 | 	for p, i in enumerate(range(1, len(plaintext), 2)):
146 | 		r.append(plaintext[i][toflips[p]])
147 | 		r.append(plaintext[i+1])
148 | 	return b''.join(r)
149 | 
150 | def pack_and_encode_messages(messages, plaintext):
151 | 	return encode_messages([(key, pack_message(message)) for key, message in messages], plaintext)
152 | 
153 | def pdms(messages, text):
154 | 	return b''.join([partial_decode_message(key, text, len(message)) for (key, message) in messages])
155 | 
156 | def partial_decode_message(key, message, mylen):
157 | 	assert type(key) is bytes
158 | 	assert type(message) is bytes
159 | 	r = bytes([0] * mylen)
160 | 	for i in range(len(message) - 15):
161 | 		r = x(r, encrypt_ofb(key, message[i:i+16], bytes([0] * mylen)))
162 | 	return r
163 | 
164 | def decode_and_decrypt_message(key, message):
165 | 	key = h(key)[:16]
166 | 	key2 = h(key)[:16]
167 | 	mystr = partial_decode_message(key2, message, 16)
168 | 	mylen = begin_unpack_message(mystr)
169 | 	if mylen is None:
170 | 		return None
171 | 	mystr = partial_decode_message(key2, message, mylen)
172 | 	if mystr is None:
173 | 		return None
174 | 	mystr = unpack_message(mystr)
175 | 	if mystr is None:
176 | 		return None
177 | 	mystr = decrypt_message(key, mystr)
178 | 	if mystr is None:
179 | 		return None
180 | 	return mystr
181 | 
182 | def xor(a, b):
183 | 	assert type(a) is list
184 | 	assert type(b) is list
185 | 	return [x^y for x, y in zip(a, b)]
186 | 
187 | assert xor([0, 0, 1, 1], [0, 1, 0, 1]) == [0, 1, 1, 0]
188 | 
189 | def solve(vectors, goal):
190 | 	active = [x + [0] * len(vectors) for x in vectors]
191 | 	for i in range(len(active)):
192 | 		active[i][len(goal) + i] = 1
193 | 	for i in range(len(goal)):
194 | 		p = i
195 | 		while p < len(active) and active[p][i] == 0:
196 | 			p += 1
197 | 		if p == len(vectors):
198 | 			return None
199 | 		active[p], active[i] = active[i], active[p]
200 | 		for j in range(len(active)):
201 | 			if j != i and active[j][i]:
202 | 				active[j] = xor(active[j], active[i])
203 | 	r = [0] * len(active)
204 | 	for i in range(len(goal)):
205 | 		if goal[i]:
206 | 			r = xor(r, active[i][len(goal):])
207 | 	return r
208 | 
209 | from random import randrange
210 | 
211 | def test_solve():
212 | 	vectors = [[randrange(2) for j in range(5)] for i in range(10)]
213 | 	goal = [randrange(2) for i in range(5)]
214 | 	solution = solve(vectors, goal)
215 | 	t = [0] * 5
216 | 	for i in range(len(solution)):
217 | 		if solution[i]:
218 | 			t = xor(t, vectors[i])
219 | 	assert t == goal
220 | 
221 | test_solve()
222 | 
223 | def test_encode():
224 | 	key = bytes([7] * 16)
225 | 	plaintext = [b'abc', [b'', b'pqr']]
226 | 	for i in range(50):
227 | 		plaintext.append(bytes([randrange(256) for j in range(15)]))
228 | 		plaintext.append([b'ab', b'cde'])
229 | 	plaintext.append(b'stuv')
230 | 	message = b'hey'
231 | 	assert partial_decode_message(key, encode_messages([(key, message)], plaintext), len(message)) == message
232 | 
233 | test_encode()
234 | 
235 | def test_crypt():
236 | 	key = b'key'
237 | 	message = b'abc'
238 | 	key2, message2 = prepare_message(key, message)
239 | 	plaintext = [b'abc', [b'', b'pqr']]
240 | 	for i in range(100):
241 | 		plaintext.append(bytes([randrange(256) for j in range(15)]))
242 | 		plaintext.append([b'ab', b'cde'])
243 | 	plaintext.append(b'stuv')
244 | 	assert decode_and_decrypt_message(key, pack_and_encode_messages([(key2, message2)], plaintext)) == message
245 | 
246 | test_crypt()
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 


--------------------------------------------------------------------------------