├── .gitignore
├── LICENSE
├── example_input.json
├── example_output.json
├── generate_input.py
├── grade.py
├── images
    ├── book_message_schedule.png
    ├── i_have_made_fire.jpg
    ├── matrix.jpg
    ├── merkle-damgard.png
    ├── standard_message_schedule.png
    └── standard_round_function.png
├── readme.md
├── solution_py
    ├── sha256.py
    └── solution.py
└── solution_rs
    ├── Cargo.toml
    └── src
        └── main.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | target
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |      wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |      wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution 4.0 International Public License
 58 | 
 59 | By exercising the Licensed Rights (defined below), You accept and agree
 60 | to be bound by the terms and conditions of this Creative Commons
 61 | Attribution 4.0 International Public License ("Public License"). To the
 62 | extent this Public License may be interpreted as a contract, You are
 63 | granted the Licensed Rights in consideration of Your acceptance of
 64 | these terms and conditions, and the Licensor grants You such rights in
 65 | consideration of benefits the Licensor receives from making the
 66 | Licensed Material available under these terms and conditions.
 67 | 
 68 | 
 69 | Section 1 -- Definitions.
 70 | 
 71 |   a. Adapted Material means material subject to Copyright and Similar
 72 |      Rights that is derived from or based upon the Licensed Material
 73 |      and in which the Licensed Material is translated, altered,
 74 |      arranged, transformed, or otherwise modified in a manner requiring
 75 |      permission under the Copyright and Similar Rights held by the
 76 |      Licensor. For purposes of this Public License, where the Licensed
 77 |      Material is a musical work, performance, or sound recording,
 78 |      Adapted Material is always produced where the Licensed Material is
 79 |      synched in timed relation with a moving image.
 80 | 
 81 |   b. Adapter's License means the license You apply to Your Copyright
 82 |      and Similar Rights in Your contributions to Adapted Material in
 83 |      accordance with the terms and conditions of this Public License.
 84 | 
 85 |   c. Copyright and Similar Rights means copyright and/or similar rights
 86 |      closely related to copyright including, without limitation,
 87 |      performance, broadcast, sound recording, and Sui Generis Database
 88 |      Rights, without regard to how the rights are labeled or
 89 |      categorized. For purposes of this Public License, the rights
 90 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 91 |      Rights.
 92 | 
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. Share means to provide material to the public by any means or
116 |      process that requires permission under the Licensed Rights, such
117 |      as reproduction, public display, public performance, distribution,
118 |      dissemination, communication, or importation, and to make material
119 |      available to the public including in ways that members of the
120 |      public may access the material from a place and at a time
121 |      individually chosen by them.
122 | 
123 |   j. Sui Generis Database Rights means rights other than copyright
124 |      resulting from Directive 96/9/EC of the European Parliament and of
125 |      the Council of 11 March 1996 on the legal protection of databases,
126 |      as amended and/or succeeded, as well as other essentially
127 |      equivalent rights anywhere in the world.
128 | 
129 |   k. You means the individual or entity exercising the Licensed Rights
130 |      under this Public License. Your has a corresponding meaning.
131 | 
132 | 
133 | Section 2 -- Scope.
134 | 
135 |   a. License grant.
136 | 
137 |        1. Subject to the terms and conditions of this Public License,
138 |           the Licensor hereby grants You a worldwide, royalty-free,
139 |           non-sublicensable, non-exclusive, irrevocable license to
140 |           exercise the Licensed Rights in the Licensed Material to:
141 | 
142 |             a. reproduce and Share the Licensed Material, in whole or
143 |                in part; and
144 | 
145 |             b. produce, reproduce, and Share Adapted Material.
146 | 
147 |        2. Exceptions and Limitations. For the avoidance of doubt, where
148 |           Exceptions and Limitations apply to Your use, this Public
149 |           License does not apply, and You do not need to comply with
150 |           its terms and conditions.
151 | 
152 |        3. Term. The term of this Public License is specified in Section
153 |           6(a).
154 | 
155 |        4. Media and formats; technical modifications allowed. The
156 |           Licensor authorizes You to exercise the Licensed Rights in
157 |           all media and formats whether now known or hereafter created,
158 |           and to make technical modifications necessary to do so. The
159 |           Licensor waives and/or agrees not to assert any right or
160 |           authority to forbid You from making technical modifications
161 |           necessary to exercise the Licensed Rights, including
162 |           technical modifications necessary to circumvent Effective
163 |           Technological Measures. For purposes of this Public License,
164 |           simply making modifications authorized by this Section 2(a)
165 |           (4) never produces Adapted Material.
166 | 
167 |        5. Downstream recipients.
168 | 
169 |             a. Offer from the Licensor -- Licensed Material. Every
170 |                recipient of the Licensed Material automatically
171 |                receives an offer from the Licensor to exercise the
172 |                Licensed Rights under the terms and conditions of this
173 |                Public License.
174 | 
175 |             b. No downstream restrictions. You may not offer or impose
176 |                any additional or different terms or conditions on, or
177 |                apply any Effective Technological Measures to, the
178 |                Licensed Material if doing so restricts exercise of the
179 |                Licensed Rights by any recipient of the Licensed
180 |                Material.
181 | 
182 |        6. No endorsement. Nothing in this Public License constitutes or
183 |           may be construed as permission to assert or imply that You
184 |           are, or that Your use of the Licensed Material is, connected
185 |           with, or sponsored, endorsed, or granted official status by,
186 |           the Licensor or others designated to receive attribution as
187 |           provided in Section 3(a)(1)(A)(i).
188 | 
189 |   b. Other rights.
190 | 
191 |        1. Moral rights, such as the right of integrity, are not
192 |           licensed under this Public License, nor are publicity,
193 |           privacy, and/or other similar personality rights; however, to
194 |           the extent possible, the Licensor waives and/or agrees not to
195 |           assert any such rights held by the Licensor to the limited
196 |           extent necessary to allow You to exercise the Licensed
197 |           Rights, but not otherwise.
198 | 
199 |        2. Patent and trademark rights are not licensed under this
200 |           Public License.
201 | 
202 |        3. To the extent possible, the Licensor waives any right to
203 |           collect royalties from You for the exercise of the Licensed
204 |           Rights, whether directly or through a collecting society
205 |           under any voluntary or waivable statutory or compulsory
206 |           licensing scheme. In all other cases the Licensor expressly
207 |           reserves any right to collect such royalties.
208 | 
209 | 
210 | Section 3 -- License Conditions.
211 | 
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 | 
215 |   a. Attribution.
216 | 
217 |        1. If You Share the Licensed Material (including in modified
218 |           form), You must:
219 | 
220 |             a. retain the following if it is supplied by the Licensor
221 |                with the Licensed Material:
222 | 
223 |                  i. identification of the creator(s) of the Licensed
224 |                     Material and any others designated to receive
225 |                     attribution, in any reasonable manner requested by
226 |                     the Licensor (including by pseudonym if
227 |                     designated);
228 | 
229 |                 ii. a copyright notice;
230 | 
231 |                iii. a notice that refers to this Public License;
232 | 
233 |                 iv. a notice that refers to the disclaimer of
234 |                     warranties;
235 | 
236 |                  v. a URI or hyperlink to the Licensed Material to the
237 |                     extent reasonably practicable;
238 | 
239 |             b. indicate if You modified the Licensed Material and
240 |                retain an indication of any previous modifications; and
241 | 
242 |             c. indicate the Licensed Material is licensed under this
243 |                Public License, and include the text of, or the URI or
244 |                hyperlink to, this Public License.
245 | 
246 |        2. You may satisfy the conditions in Section 3(a)(1) in any
247 |           reasonable manner based on the medium, means, and context in
248 |           which You Share the Licensed Material. For example, it may be
249 |           reasonable to satisfy the conditions by providing a URI or
250 |           hyperlink to a resource that includes the required
251 |           information.
252 | 
253 |        3. If requested by the Licensor, You must remove any of the
254 |           information required by Section 3(a)(1)(A) to the extent
255 |           reasonably practicable.
256 | 
257 |        4. If You Share Adapted Material You produce, the Adapter's
258 |           License You apply must not prevent recipients of the Adapted
259 |           Material from complying with this Public License.
260 | 
261 | 
262 | Section 4 -- Sui Generis Database Rights.
263 | 
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 | 
267 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 |      to extract, reuse, reproduce, and Share all or a substantial
269 |      portion of the contents of the database;
270 | 
271 |   b. if You include all or a substantial portion of the database
272 |      contents in a database in which You have Sui Generis Database
273 |      Rights, then the database in which You have Sui Generis Database
274 |      Rights (but not its individual contents) is Adapted Material; and
275 | 
276 |   c. You must comply with the conditions in Section 3(a) if You Share
277 |      all or a substantial portion of the contents of the database.
278 | 
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 | 
283 | 
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 | 
286 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 | 
297 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 | 
307 |   c. The disclaimer of warranties and limitation of liability provided
308 |      above shall be interpreted in a manner that, to the extent
309 |      possible, most closely approximates an absolute disclaimer and
310 |      waiver of all liability.
311 | 
312 | 
313 | Section 6 -- Term and Termination.
314 | 
315 |   a. This Public License applies for the term of the Copyright and
316 |      Similar Rights licensed here. However, if You fail to comply with
317 |      this Public License, then Your rights under this Public License
318 |      terminate automatically.
319 | 
320 |   b. Where Your right to use the Licensed Material has terminated under
321 |      Section 6(a), it reinstates:
322 | 
323 |        1. automatically as of the date the violation is cured, provided
324 |           it is cured within 30 days of Your discovery of the
325 |           violation; or
326 | 
327 |        2. upon express reinstatement by the Licensor.
328 | 
329 |      For the avoidance of doubt, this Section 6(b) does not affect any
330 |      right the Licensor may have to seek remedies for Your violations
331 |      of this Public License.
332 | 
333 |   c. For the avoidance of doubt, the Licensor may also offer the
334 |      Licensed Material under separate terms or conditions or stop
335 |      distributing the Licensed Material at any time; however, doing so
336 |      will not terminate this Public License.
337 | 
338 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 |      License.
340 | 
341 | 
342 | Section 7 -- Other Terms and Conditions.
343 | 
344 |   a. The Licensor shall not be bound by any additional or different
345 |      terms or conditions communicated by You unless expressly agreed.
346 | 
347 |   b. Any arrangements, understandings, or agreements regarding the
348 |      Licensed Material not stated herein are separate from and
349 |      independent of the terms and conditions of this Public License.
350 | 
351 | 
352 | Section 8 -- Interpretation.
353 | 
354 |   a. For the avoidance of doubt, this Public License does not, and
355 |      shall not be interpreted to, reduce, limit, restrict, or impose
356 |      conditions on any use of the Licensed Material that could lawfully
357 |      be made without permission under this Public License.
358 | 
359 |   b. To the extent possible, if any provision of this Public License is
360 |      deemed unenforceable, it shall be automatically reformed to the
361 |      minimum extent necessary to make it enforceable. If the provision
362 |      cannot be reformed, it shall be severed from this Public License
363 |      without affecting the enforceability of the remaining terms and
364 |      conditions.
365 | 
366 |   c. No term or condition of this Public License will be waived and no
367 |      failure to comply consented to unless expressly agreed to by the
368 |      Licensor.
369 | 
370 |   d. Nothing in this Public License constitutes or may be interpreted
371 |      as a limitation upon, or waiver of, any privileges and immunities
372 |      that apply to the Licensor or You, including from the legal
373 |      processes of any jurisdiction or authority.
374 | 
375 | 
376 | =======================================================================
377 | 
378 | Creative Commons is not a party to its public licenses.
379 | Notwithstanding, Creative Commons may elect to apply one of its public
380 | licenses to material it publishes and in those instances will be
381 | considered the “Licensor.” The text of the Creative Commons public
382 | licenses is dedicated to the public domain under the CC0 Public Domain
383 | Dedication. Except for the limited purpose of indicating that material
384 | is shared under a Creative Commons public license or as otherwise
385 | permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the public
393 | licenses.
394 | 
395 | Creative Commons may be contacted at creativecommons.org.
396 | 


--------------------------------------------------------------------------------
/example_input.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "problem1": [
 3 |     [1, 2],
 4 |     [4294967295, 1],
 5 |     [3050487260, 3710144918]
 6 |   ],
 7 |   "problem2": [
 8 |     [2, 1],
 9 |     [1, 1],
10 |     [2919882184, 31]
11 |   ],
12 |   "problem3": 1114723206,
13 |   "problem4": 1232674167,
14 |   "problem5": "iguana wombat dog kangaroo llama turkey yak unicorn sheep xenoce",
15 |   "problem6": 3536071395,
16 |   "problem7": 651015076,
17 |   "problem8": [2749825547, 776049372, 1213590135],
18 |   "problem9": [3758166654, 2821345890, 1850678816],
19 |   "problem10": {
20 |     "state": [
21 |         2739944672, 3126690193, 4191866847, 1163785745,
22 |         3714074692, 1172792371, 283469062,   826169706
23 |     ],
24 |     "round_constant": 961987163,
25 |     "schedule_word": 3221900128
26 |   },
27 |   "problem11": {
28 |     "state": [
29 |       2918946378, 1679978889, 1678006433,  650957219,
30 |        379281712, 2112907926, 1775216060, 2152648190
31 |     ],
32 |     "block": "manatee fox unicorn octopus dog fox fox llama vulture jaguar xen"
33 |   },
34 |   "problem12": [0, 1, 55, 56, 64, 492022654431536432],
35 |   "problem13": [
36 |     "",
37 |     "hello world",
38 |     "aardvark zebra yak pig jaguar aardvark rhinoceros butte",
39 |     "narwhal dog llama llama giraffe narwhal octopus dog xeno",
40 |     "John Jacob Jingleheimer Schmidt! His name is my name too. Whenever we go out the people always shout there goes John Jacob Jingleheimer Schmidt! Nanananananana..."
41 |   ],
42 |   "problem14": {
43 |     "original_input": "fox elephant dog",
44 |     "chosen_suffix": "pig jaguar iguana"
45 |   },
46 |   "problem15": "bacb15aef84802baa0f530845013a98ee1eede664b914f8ebc2a520e69049a09",
47 |   "problem16": {
48 |     "original_hash": "27b82abe296f3ecd5174b6e6168ea683cd8ef94306d9abd9f81807f2fa587d2a",
49 |     "original_len": 41,
50 |     "chosen_suffix": "manatee jaguar zebra zebra dog"
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/example_output.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "problem1": [3, 0, 2465664882],
 3 |   "problem2": [1, 2147483648, 1544797073],
 4 |   "problem3": 1345017931,
 5 |   "problem4": 2902922196,
 6 |   "problem5": [
 7 |     1768387937, 1851859063, 1869439585, 1948279919, 1730177889, 1852268914, 1869553772, 1818324321,
 8 |      544503154, 1801812256, 2036427552, 1970170211, 1869770272, 1936221541, 1881176165, 1852793701,
 9 |     3002878561, 3711121932, 1520676164, 3002441970, 2935068969, 1610329529, 1904580351, 3219988740,
10 |     2337695268,  263015313, 2120931855,  131203777, 3818546915,   19163115, 3479924161, 2154860703,
11 |     1790169326,  516580487, 2414737634,  909025701, 2241053595, 1237268359, 3797503938, 1773623028,
12 |     2840671725, 2299292186, 1933596460, 2279513616,  514132674, 3245155609, 1753922983, 2241450350,
13 |     2449659630,  262239956,  773552098, 3253131632, 3863807927,  879696536, 3143654396, 3973063648,
14 |      509015903,  270850193, 1893431553,  719566283, 2310657204,  365781698, 3761063438, 1007484868
15 |   ],
16 |   "problem6": 3003388882,
17 |   "problem7": 2194029931,
18 |   "problem8": 1783753340,
19 |   "problem9": 3893039714,
20 |   "problem10": [
21 |     1724514418, 2739944672, 3126690193, 4191866847,
22 |     1638715774, 3714074692, 1172792371, 283469062
23 |   ],
24 |   "problem11": [
25 |     1251501988, 1663226031, 2877128394, 4050467288,
26 |     2375501075, 1434687977, 2625842981, 650253644
27 |   ],
28 |   "problem12": [
29 |     "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
30 |     "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008",
31 |     "8000000000000001b8",
32 |     "8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001c0",
33 |     "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200",
34 |     "800000000000000036a01ffa96b12980"
35 |   ],
36 |   "problem13": [
37 |     "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
38 |     "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
39 |     "4b45e1bec21185865d1628a8a502eed789193a3c253a529983e4bc17fa65f32b",
40 |     "99069f1eba4c874aba649c17136a253e1dd504cda936ab77cf189c2cf9eb88ff",
41 |     "68b74d91364475247c10bfee2621eaa13bcabb033ed1dee58b74c05e7944489a"
42 |   ],
43 |   "problem14": "666f7820656c657068616e7420646f67800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000080706967206a616775617220696775616e61",
44 |   "problem15": [
45 |     3133871534, 4165468858, 2700423300, 1343465870,
46 |     3790528102, 1267814286, 3156890126, 1761909257
47 |   ],
48 |   "problem16": "50417b93404facb1b481990a7bf6ac963b1e1ee0ccced8b2a5938caa28b52b41"
49 | }
50 | 


--------------------------------------------------------------------------------
/generate_input.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | import json
  4 | import secrets
  5 | from secrets import randbits, randbelow
  6 | import sys
  7 | 
  8 | # CAUTION: `random` isn't cryptographically secure. In general, prefer to use
  9 | # `secrets`. But `secrets` currently lacks a shuffle() function, so we just
 10 | # import that specific function from `random`.
 11 | from random import shuffle
 12 | 
 13 | import hashlib
 14 | 
 15 | animals = [
 16 |     "aardvark",
 17 |     "butterfly",
 18 |     "cat",
 19 |     "dog",
 20 |     "elephant",
 21 |     "fox",
 22 |     "giraffe",
 23 |     "hippopotamus",
 24 |     "iguana",
 25 |     "jaguar",
 26 |     "kangaroo",
 27 |     "llama",
 28 |     "manatee",
 29 |     "narwhal",
 30 |     "octopus",
 31 |     "pig",
 32 |     "quail",
 33 |     "rhinoceros",
 34 |     "sheep",
 35 |     "turkey",
 36 |     "unicorn",
 37 |     "vulture",
 38 |     "wombat",
 39 |     "xenoceratops",
 40 |     "yak",
 41 |     "zebra",
 42 | ]
 43 | 
 44 | 
 45 | def random_animals(n):
 46 |     return " ".join(secrets.choice(animals) for _ in range(n))
 47 | 
 48 | 
 49 | def random_string(n):
 50 |     return random_animals(n)[:n]
 51 | 
 52 | 
 53 | inputs = {}
 54 | 
 55 | 
 56 | ### Building Blocks
 57 | 
 58 | # Problem 1
 59 | inputs["problem1"] = [
 60 |     [1, 2],
 61 |     [(2**32) - 1, 1],
 62 |     # Something random that will overflow.
 63 |     [(2**31) + randbits(31), (2**31) + randbits(31)],
 64 | ]
 65 | 
 66 | # Problem 2
 67 | inputs["problem2"] = [
 68 |     [2, 1],
 69 |     [1, 1],
 70 |     # Avoid generating pathological rotations (0 or 32). Bitshifting by those
 71 |     # amounts is weird in C/C++/Rust.
 72 |     [randbits(32), randbelow(31) + 1],
 73 | ]
 74 | 
 75 | 
 76 | ### The Message Schedule
 77 | 
 78 | # Problem 3
 79 | inputs["problem3"] = randbits(32)
 80 | 
 81 | # Problem 4
 82 | inputs["problem4"] = randbits(32)
 83 | 
 84 | # Problem 5
 85 | inputs["problem5"] = random_string(64)
 86 | 
 87 | 
 88 | ### The Round Function
 89 | 
 90 | # Problem 6
 91 | inputs["problem6"] = randbits(32)
 92 | 
 93 | # Problem 7
 94 | inputs["problem7"] = randbits(32)
 95 | 
 96 | # Problem 8
 97 | inputs["problem8"] = [randbits(32), randbits(32), randbits(32)]
 98 | 
 99 | # Problem 9
100 | inputs["problem9"] = [randbits(32), randbits(32), randbits(32)]
101 | 
102 | # Problem 10
103 | state = [randbits(32) for _ in range(8)]
104 | inputs["problem10"] = {
105 |     "state": [randbits(32) for _ in range(8)],
106 |     "round_constant": 0x428A2F98,
107 |     "schedule_word": randbits(32),
108 | }
109 | 
110 | 
111 | ### The Compression Function
112 | 
113 | # Problem 11
114 | inputs["problem11"] = {
115 |     "state": [randbits(32) for _ in range(8)],
116 |     "block": random_string(64),
117 | }
118 | 
119 | 
120 | ### Padding
121 | 
122 | # Problem 12
123 | inputs["problem12"] = [
124 |     0,
125 |     1,
126 |     55,
127 |     56,
128 |     64,
129 |     secrets.randbits(61),  # not 64, because of the x8 step
130 | ]
131 | 
132 | 
133 | ### The Hash Function
134 | 
135 | # Problem 13
136 | lyrics = "John Jacob Jingleheimer Schmidt! His name is my name too. Whenever we go out the people always shout there goes John Jacob Jingleheimer Schmidt! Nanananananana..."
137 | inputs["problem13"] = [
138 |     "",
139 |     "hello world",
140 |     random_string(55),
141 |     random_string(56),
142 |     lyrics,
143 | ]
144 | 
145 | 
146 | ### The Length Extension Attack
147 | 
148 | # Problem 14
149 | inputs["problem14"] = {
150 |     "original_input": random_animals(3),
151 |     "chosen_suffix": random_animals(3),
152 | }
153 | 
154 | # Problem 15
155 | inputs["problem15"] = secrets.token_bytes(32).hex()
156 | 
157 | # Problem 16
158 | hidden_input = random_animals(10)
159 | inputs["problem16"] = {
160 |     "original_hash": hashlib.sha256(hidden_input.encode()).hexdigest(),
161 |     "original_len": len(hidden_input),
162 |     "chosen_suffix": random_animals(5),
163 | }
164 | 
165 | 
166 | json.dump(inputs, sys.stdout, indent="  ")
167 | print()
168 | 


--------------------------------------------------------------------------------
/grade.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import json
 4 | from os import path
 5 | import pprint
 6 | import sys
 7 | import subprocess
 8 | import textwrap
 9 | 
10 | HERE = path.dirname(path.realpath(__file__))
11 | 
12 | 
13 | def main():
14 |     if len(sys.argv) < 2 or sys.argv[1] == "--help":
15 |         print("Usage example:")
16 |         print("    ./grade.py python my_solution.py")
17 |         return
18 | 
19 |     # Generate test input.
20 |     input_json = subprocess.run(
21 |         [sys.executable, path.join(HERE, "generate_input.py")],
22 |         stdout=subprocess.PIPE,
23 |         check=True,
24 |     ).stdout
25 |     input_obj = json.loads(input_json)
26 | 
27 |     # Run the provided Python solution with the test input from above to
28 |     # generate expected output.
29 |     expected_output_json = subprocess.run(
30 |         [sys.executable, path.join(HERE, "solution_py", "solution.py")],
31 |         input=input_json,
32 |         stdout=subprocess.PIPE,
33 |         check=True,
34 |     ).stdout
35 |     expected_output_obj = json.loads(expected_output_json)
36 | 
37 |     # Run the solution we're grading with the same input.
38 |     your_command = sys.argv[1:]
39 |     your_output_json = subprocess.run(
40 |         sys.argv[1:],
41 |         input=input_json,
42 |         stdout=subprocess.PIPE,
43 |         check=True,
44 |     ).stdout
45 |     if len(your_output_json.strip()) == 0:
46 |         print("Your output is empty. Did you forget to call json.dump() or similar?")
47 |         return 1
48 |     try:
49 |         your_output_obj = json.loads(your_output_json)
50 |     except json.decoder.JSONDecodeError:
51 |         print("Your output isn't valid JSON. Do you have any extra print statements?")
52 |         return 1
53 | 
54 |     # Compare the answers
55 |     any_incorrect = False
56 |     for problem in input_obj:
57 |         if problem not in your_output_obj:
58 |             print(f"{problem} missing")
59 |             any_incorrect = True
60 |         elif your_output_obj[problem] == expected_output_obj[problem]:
61 |             print(f"{problem} correct")
62 |         else:
63 | 
64 |             def pretty_print(obj):
65 |                 pp = pprint.PrettyPrinter(indent=4)
66 |                 print(textwrap.indent(pp.pformat(obj), " " * 4))
67 | 
68 |             print(f"{problem} incorrect")
69 |             print("randomized input:")
70 |             pretty_print(input_obj[problem])
71 |             print("expected output:")
72 |             pretty_print(expected_output_obj[problem])
73 |             print("your output:")
74 |             pretty_print(your_output_obj[problem])
75 |             any_incorrect = True
76 |     if not any_incorrect:
77 |         print("Well done!")
78 |     else:
79 |         return 1
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     sys.exit(main())
84 | 


--------------------------------------------------------------------------------
/images/book_message_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/book_message_schedule.png


--------------------------------------------------------------------------------
/images/i_have_made_fire.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/i_have_made_fire.jpg


--------------------------------------------------------------------------------
/images/matrix.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/matrix.jpg


--------------------------------------------------------------------------------
/images/merkle-damgard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/merkle-damgard.png


--------------------------------------------------------------------------------
/images/standard_message_schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/standard_message_schedule.png


--------------------------------------------------------------------------------
/images/standard_round_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oconnor663/sha256_project/bcc9e7713c64364bd57edcb597057d96a121a68b/images/standard_round_function.png


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
   1 | # The SHA-256 Project
   2 | 
   3 | > This project was originally assigned in NYU Tandon's CS-GY 6903 Applied
   4 | Cryptography course, Fall 2021. Here's the [original course
   5 | repo](https://github.com/oconnor663/applied_crypto_2021_fall) with all the
   6 | other problem sets.
   7 | 
   8 | In this project we're going to implement SHA-256 ourselves, and then we'll use
   9 | our implementation to demonstrate a "length extension attack". To get a sense
  10 | of scale, take a look at the [SHA-256 pseudocode on
  11 | Wikipedia](https://en.wikipedia.org/wiki/SHA-2#Pseudocode). That pseudocode
  12 | will be one of our references, and there will be several direct quotes from it
  13 | below. The [animations in this video](https://youtu.be/f9EbD6iY9zI) might also
  14 | help you get a big-picture sense of what the algorithm is doing.
  15 | 
  16 | Implementing that pseudocode takes less than a hundred lines of Python, which
  17 | might not seem like a lot. But there are lots of little details in those lines,
  18 | and the nature of the "avalanche effect" is such that a tiny mistake will
  19 | totally mess up your output, usually without giving you any useful feedback
  20 | about what you did wrong. So we'll move slowly, piece by piece, making sure to
  21 | test each piece before we move on to the next. Read and reread each problem
  22 | carefully, *two or three times through,* and then follow the instructions
  23 | *exactly* as you write your code. If the instructions are unclear, ask for help
  24 | and avoid the temptation to guess. Mistakes will be difficult to debug, which
  25 | makes this project challenging.
  26 | 
  27 | So...what's the point of such a challenging project? If we almost never
  28 | implement hash functions ourselves in the real world, why are we going to spend
  29 | our precious time on it now? Two reasons:
  30 | 
  31 | Concretely, as long as SHA-2 remains widely used, length extension attacks will
  32 | remain a common pitfall. You need to know about them to use SHA-2 safely, and
  33 | to help others use it safely. As with most attacks, the best way to understand
  34 | the length extension attack is to do it yourself, which means we need to get
  35 | our hands on the inner workings of SHA-2.
  36 | 
  37 | More broadly, there are just so many black boxes in cryptography that we almost
  38 | never look inside, especially our block ciphers, stream ciphers, and hash
  39 | functions. No one has enough time to learn the details of all of them, not even
  40 | professional cryptographers. But these algorithms are not magic, and this class
  41 | would be doing you a disservice if we never opened up any black boxes. Our goal
  42 | isn't to memorize all the details, but to build up the sort of practical
  43 | intuition that can only come from having seen the details before. And I want
  44 | you to come away from this class with the confidence that you can handle this
  45 | level of detail for any algorithm, if and when you need to.
  46 | 
  47 | So this is it. This is where we're going to open one of the black boxes and get
  48 | all the way to the bottom of it. This is SHA-256.
  49 | 
  50 | ## Contents
  51 | 
  52 | * [Workflow](#workflow)
  53 | * [Example input](#example-input)
  54 | * [Example output](#example-output)
  55 | * [Building blocks](#building-blocks)
  56 |    * [Problem 1: addition modulo 2<sup>32</sup>](#problem-1-addition-modulo-232)
  57 |    * [Problem 2: bitwise right rotation](#problem-2-bitwise-right-rotation)
  58 | * [The Message Schedule](#the-message-schedule)
  59 |    * [Problem 3: `little_sigma0()`](#problem-3-little_sigma0)
  60 |    * [Problem 4: `little_sigma1()`](#problem-4-little_sigma1)
  61 |    * [Problem 5: the message schedule](#problem-5-the-message-schedule)
  62 | * [The Round Function](#the-round-function)
  63 |    * [Problem 6: `big_sigma0()`](#problem-6-big_sigma0)
  64 |    * [Problem 7: `big_sigma1()`](#problem-7-big_sigma1)
  65 |    * [Problem 8: `choice()`](#problem-8-choice)
  66 |    * [Problem 9: `majority()`](#problem-9-majority)
  67 |    * [Problem 10: the round function](#problem-10-the-round-function)
  68 | * [The Compression Function](#the-compression-function)
  69 |    * [Problem 11: the compression function](#problem-11-the-compression-function)
  70 | * [Padding](#padding)
  71 |    * [Problem 12: padding](#problem-12-padding)
  72 | * [The Hash Function](#the-hash-function)
  73 |    * [Problem 13: the hash function](#problem-13-the-hash-function)
  74 | * [The Length Extension Attack](#the-length-extension-attack)
  75 |    * [Problem 14: modeling the extended input](#problem-14-modeling-the-extended-input)
  76 |    * [Problem 15: recovering the state](#problem-15-recovering-the-state)
  77 |    * [Problem 16: the length extension attack](#problem-16-the-length-extension-attack)
  78 | * [Conclusion](#conclusion)
  79 | 
  80 | ## Workflow
  81 | 
  82 | This project was originally assigned in NYU Tandon's CS-GY 6903 Applied
  83 | Cryptography course. It's intended to be JSON-in-JSON-out and autograded. A
  84 | simplified [`grade.py`](grade.py) script is provided in this repo, but if you
  85 | prefer you can also just visually compare the output of your solution to the
  86 | example output provided. The original class was taught in Python, and some of
  87 | the problems below include example Python code, but feel free to code in
  88 | whatever language you like. Example solutions are provided in both
  89 | [Python](solution_py) and [Rust](solution_rs).
  90 | 
  91 | Here's a bare minimum example of parsing JSON input and producing JSON output
  92 | using Python:
  93 | 
  94 | ```python
  95 | import json
  96 | import sys
  97 | 
  98 | inputs = json.load(sys.stdin)
  99 | outputs = {}
 100 | 
 101 | outputs["problem1"] = ["your", "answer", "here"]
 102 | 
 103 | json.dump(outputs, sys.stdout)
 104 | ```
 105 | 
 106 | To run that directly with [`example_input.json`](example_input.json), you'd
 107 | save it to a file like `my_solution.py` and then run this in the terminal:
 108 | 
 109 | ```
 110 | $ python3 my_solution.py < example_input.json
 111 | {"problem1": ["your", "answer", "here"]}
 112 | ```
 113 | 
 114 | To grade it, you'd run this in the terminal:
 115 | 
 116 | ```
 117 | $ ./grade.py python3 my_solution.py
 118 | problem1 incorrect
 119 | randomized input:
 120 |     [[1, 2], [4294967295, 1], [3148047433, 2995627551]]
 121 | expected output:
 122 |     [3, 0, 1848707688]
 123 | your output:
 124 |     ['your', 'answer', 'here']
 125 | problem2 missing
 126 | problem3 missing
 127 | problem4 missing
 128 | problem5 missing
 129 | problem6 missing
 130 | problem7 missing
 131 | problem8 missing
 132 | problem9 missing
 133 | problem10 missing
 134 | problem11 missing
 135 | problem12 missing
 136 | problem13 missing
 137 | problem14 missing
 138 | problem15 missing
 139 | problem16 missing
 140 | ```
 141 | 
 142 | As you can see there, the grading script generates random inputs every time you
 143 | run it. So a complete solution should read input values from the JSON input
 144 | every time, rather than just hardcoding the example inputs.
 145 | 
 146 | Here's a common pitfall for folks who haven't worked with JSON and stdin/stdout
 147 | before: If you print anything extra to stdout (like with the regular Python
 148 | `print()` function) that will mess up your JSON output, and the grading script
 149 | will give you an error message like "Your solution isn't valid JSON." If you
 150 | see that error, make sure to comment out your print statements.
 151 | 
 152 | ## Example input
 153 | 
 154 | ```json
 155 | {
 156 |   "problem1": [
 157 |     [1, 2],
 158 |     [4294967295, 1],
 159 |     [3050487260, 3710144918]
 160 |   ],
 161 |   "problem2": [
 162 |     [2, 1],
 163 |     [1, 1],
 164 |     [2919882184, 31]
 165 |   ],
 166 |   "problem3": 1114723206,
 167 |   "problem4": 1232674167,
 168 |   "problem5": "iguana wombat dog kangaroo llama turkey yak unicorn sheep xenoce",
 169 |   "problem6": 3536071395,
 170 |   "problem7": 651015076,
 171 |   "problem8": [2749825547, 776049372, 1213590135],
 172 |   "problem9": [3758166654, 2821345890, 1850678816],
 173 |   "problem10": {
 174 |     "state": [
 175 |         2739944672, 3126690193, 4191866847, 1163785745,
 176 |         3714074692, 1172792371, 283469062,   826169706
 177 |     ],
 178 |     "round_constant": 961987163,
 179 |     "schedule_word": 3221900128
 180 |   },
 181 |   "problem11": {
 182 |     "state": [
 183 |       2918946378, 1679978889, 1678006433,  650957219,
 184 |        379281712, 2112907926, 1775216060, 2152648190
 185 |     ],
 186 |     "block": "manatee fox unicorn octopus dog fox fox llama vulture jaguar xen"
 187 |   },
 188 |   "problem12": [0, 1, 55, 56, 64, 492022654431536432],
 189 |   "problem13": [
 190 |     "",
 191 |     "hello world",
 192 |     "aardvark zebra yak pig jaguar aardvark rhinoceros butte",
 193 |     "narwhal dog llama llama giraffe narwhal octopus dog xeno",
 194 |     "John Jacob Jingleheimer Schmidt! His name is my name too. Whenever we go out the people always shout there goes John Jacob Jingleheimer Schmidt! Nanananananana..."
 195 |   ],
 196 |   "problem14": {
 197 |     "original_input": "fox elephant dog",
 198 |     "chosen_suffix": "pig jaguar iguana"
 199 |   },
 200 |   "problem15": "bacb15aef84802baa0f530845013a98ee1eede664b914f8ebc2a520e69049a09",
 201 |   "problem16": {
 202 |     "original_hash": "27b82abe296f3ecd5174b6e6168ea683cd8ef94306d9abd9f81807f2fa587d2a",
 203 |     "original_len": 41,
 204 |     "chosen_suffix": "manatee jaguar zebra zebra dog"
 205 |   }
 206 | }
 207 | ```
 208 | 
 209 | ## Example output
 210 | 
 211 | ```json
 212 | {
 213 |   "problem1": [3, 0, 2465664882],
 214 |   "problem2": [1, 2147483648, 1544797073],
 215 |   "problem3": 1345017931,
 216 |   "problem4": 2902922196,
 217 |   "problem5": [
 218 |     1768387937, 1851859063, 1869439585, 1948279919, 1730177889, 1852268914, 1869553772, 1818324321,
 219 |      544503154, 1801812256, 2036427552, 1970170211, 1869770272, 1936221541, 1881176165, 1852793701,
 220 |     3002878561, 3711121932, 1520676164, 3002441970, 2935068969, 1610329529, 1904580351, 3219988740,
 221 |     2337695268,  263015313, 2120931855,  131203777, 3818546915,   19163115, 3479924161, 2154860703,
 222 |     1790169326,  516580487, 2414737634,  909025701, 2241053595, 1237268359, 3797503938, 1773623028,
 223 |     2840671725, 2299292186, 1933596460, 2279513616,  514132674, 3245155609, 1753922983, 2241450350,
 224 |     2449659630,  262239956,  773552098, 3253131632, 3863807927,  879696536, 3143654396, 3973063648,
 225 |      509015903,  270850193, 1893431553,  719566283, 2310657204,  365781698, 3761063438, 1007484868
 226 |   ],
 227 |   "problem6": 3003388882,
 228 |   "problem7": 2194029931,
 229 |   "problem8": 1783753340,
 230 |   "problem9": 3893039714,
 231 |   "problem10": [
 232 |     1724514418, 2739944672, 3126690193, 4191866847,
 233 |     1638715774, 3714074692, 1172792371, 283469062
 234 |   ],
 235 |   "problem11": [
 236 |     1251501988, 1663226031, 2877128394, 4050467288,
 237 |     2375501075, 1434687977, 2625842981, 650253644
 238 |   ],
 239 |   "problem12": [
 240 |     "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
 241 |     "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008",
 242 |     "8000000000000001b8",
 243 |     "8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001c0",
 244 |     "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200",
 245 |     "800000000000000036a01ffa96b12980"
 246 |   ],
 247 |   "problem13": [
 248 |     "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
 249 |     "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
 250 |     "4b45e1bec21185865d1628a8a502eed789193a3c253a529983e4bc17fa65f32b",
 251 |     "99069f1eba4c874aba649c17136a253e1dd504cda936ab77cf189c2cf9eb88ff",
 252 |     "68b74d91364475247c10bfee2621eaa13bcabb033ed1dee58b74c05e7944489a"
 253 |   ],
 254 |   "problem14": "666f7820656c657068616e7420646f67800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000080706967206a616775617220696775616e61",
 255 |   "problem15": [
 256 |     3133871534, 4165468858, 2700423300, 1343465870,
 257 |     3790528102, 1267814286, 3156890126, 1761909257
 258 |   ],
 259 |   "problem16": "50417b93404facb1b481990a7bf6ac963b1e1ee0ccced8b2a5938caa28b52b41"
 260 | }
 261 | ```
 262 | 
 263 | ## Building blocks
 264 | 
 265 | We'll start with the smallest details at the very bottom of the box. As a first
 266 | step, we need to build a couple of math operations that Python doesn't give us
 267 | directly: modular addition and bitwise right-rotation.
 268 | 
 269 | ### Problem 1: addition modulo 2<sup>32</sup>
 270 | 
 271 | If you've learned a language like C or Java before, you might know that modular
 272 | addition is what many languages do with integers by default. In these
 273 | languages, integers have some fixed size, like 32 bits, and any math operation
 274 | that would normally give a result ≥2<sup>32</sup> instead "overflows" and
 275 | starts counting up from 0 again. These fixed-size integer operations are very
 276 | efficient in hardware, so they're common in CPU instruction sets and in
 277 | algorithms like SHA-256. However, integers in Python have no fixed size, and
 278 | math operations in Python never overflow. If you want to see this in action,
 279 | ask Python for the value of 2<sup>1,000,000</sup>. This property is lovely for
 280 | our intuition as programmers, because it means Python integers work like the
 281 | regular math we're used to. But alas, it's not how addition is done in SHA-256,
 282 | so we'll need to give ourselves a helper function for this.
 283 | 
 284 | Define a function like `add32(x, y)`. (I'll suggest names for your functions
 285 | throughout this project, but you can name them whatever you like.) It should
 286 | add its two arguments and then take the result modulo 2<sup>32</sup>, i.e. the
 287 | remainder when the result is divided by 2<sup>32</sup>. Remember that `%` is
 288 | the "modulo" or "remainder" operator in Python, and `**` is the exponentiation
 289 | operator.
 290 | 
 291 | **Input:** a list of `(x, y)` pairs
 292 | 
 293 | **Output:** a list of results from calling `add32` on each pair
 294 | 
 295 | ### Problem 2: bitwise right rotation
 296 | 
 297 | The other building block we need is bitwise rotation. Most programming
 298 | languages including Python provide a very similar operation called bit
 299 | _shifting_, usually written `<<` (left shift) or `>>` (right shift). A bit
 300 | rotation is like bit shift, but instead of "falling off the end" of the number,
 301 | the bits rotate around to the other end. This is nice for cryptographic
 302 | functions that need to do a lot of mixing, because it moves bits around without
 303 | losing any information. For example, consider this 32-bit number:
 304 | 
 305 | ```
 306 | 00000000000000000000000000001111
 307 | ```
 308 | 
 309 | If we right-*shift* that number by two places, we get:
 310 | 
 311 | ```
 312 | 00000000000000000000000000000011
 313 | ```
 314 | 
 315 | But if we right-*rotate* that number by two places, we get:
 316 | 
 317 | ```
 318 | 11000000000000000000000000000011
 319 | ```
 320 | 
 321 | Python doesn't have a built-in bit rotation operator, but we can accomplish the
 322 | same thing by combining the results of two shifts. If you enjoy bit twiddling
 323 | puzzles, figure out how to do this before reading further. If not, it's ok to
 324 | just copy the following function, but make sure you take a few moments to walk
 325 | through the example above and see how it does the right thing.
 326 | 
 327 | ```python
 328 | def rightrotate32(x, n):
 329 |     assert x < 2 ** 32, "x is too large. Did you use + instead of add32 somewhere?"
 330 |     right_part = x >> n
 331 |     left_part = x << (32 - n)
 332 |     return add32(left_part, right_part)
 333 | ```
 334 | 
 335 | **Input:** a list of `(x, n)` pairs
 336 | 
 337 | **Output:** a list of results from calling `rightrotate32` on each pair
 338 | 
 339 | Using these helper functions and Python's other built-in operations, we're
 340 | going to do a lot of math using 32-bit integers. As a shorthand, we'll refer to
 341 | these integers as "words". A "word" is just another way of saying "an integer
 342 | of the size that we prefer to / are able to work with". The size of a word
 343 | depends on context, but **in the context of SHA-256, a "word" means a 32-bit
 344 | unsigned integer.**
 345 | 
 346 | ## The Message Schedule
 347 | 
 348 | With those two building blocks in place, we're ready to implement the first
 349 | major moving part of our hash function, the "message schedule". Here the
 350 | "message" means the hash function's input. In each round of its compression
 351 | function, SHA-256 mixes in one word from the message. (Make sure you read the
 352 | definition of a "word" above.) The "message schedule" defines exactly what
 353 | those words are and the order in which they're used.
 354 | 
 355 | A SHA-256 message block is 64 bytes long, and a word is 4 bytes long, so one
 356 | block contains exactly 16 words. SHA-256 has 64 rounds, and the first 16 of
 357 | those rounds use those 16 message words directly. The subsequent 48 rounds mix
 358 | different message words together using a formula. We're about to implement that
 359 | formula. First we need a couple more small helpers, which we'll call
 360 | `little_sigma0` and `little_sigma1`.
 361 | 
 362 | ### Problem 3: `little_sigma0()`
 363 | 
 364 | Given a word `x`, we define `little_sigma0(x)` to be the value:
 365 | 
 366 | ```python
 367 | rightrotate32(x, 7) ^ rightrotate32(x, 18) ^ (x >> 3)
 368 | ```
 369 | 
 370 | Implement this function in Python. You can copy the line above if you like.
 371 | 
 372 | **Inputs:** an integer `x`
 373 | 
 374 | **Outputs:** the value `little_sigma0(x)`
 375 | 
 376 | Based on [this paper](https://arxiv.org/pdf/1402.1314.pdf), I'm pretty sure the
 377 | name "sigma" (Greek lowercase σ and uppercase Σ) refers to the "S-boxes" or
 378 | "substitution boxes" that we're familiar with from block ciphers. See p. 57 of
 379 | *Serious Cryptography*.
 380 | 
 381 | ### Problem 4: `little_sigma1()`
 382 | 
 383 | Similarly, given a word `x`, we define `little_sigma1(x)` to be the value:
 384 | 
 385 | ```python
 386 | rightrotate32(x, 17) ^ rightrotate32(x, 19) ^ (x >> 10)
 387 | ```
 388 | 
 389 | Implement this function in Python too. Again, you can copy the line above if
 390 | you like.
 391 | 
 392 | **Inputs:** an integer `x`
 393 | 
 394 | **Outputs:** the value `little_sigma1(x)`
 395 | 
 396 | ### Problem 5: the message schedule
 397 | 
 398 | Now we're ready to compute the full 64-**word** message schedule array, which
 399 | is usually called `W` (for "words"). As we said above, the block size of
 400 | SHA-256 is 64 **bytes**, so for this process you start off with a 64-byte block
 401 | of input. Convert these 64 bytes into 16 words, by converting each 4-byte group
 402 | into an integer using a **big-endian** conversion like
 403 | [`int.from_bytes(..., "big")`](https://docs.python.org/3/library/stdtypes.html#int.from_bytes).
 404 | (Using the wrong endianness here will be a *common mistake*.) This gives you
 405 | the first 16 elements of `W`. For each of the remaining 48 elements — that is,
 406 | for each index from 16 to 63 — use the following formula:
 407 | 
 408 | ```
 409 | W[i] := W[i-16] + little_sigma0(W[i-15]) + W[i-7] + little_sigma1(W[i-2])
 410 | ```
 411 | 
 412 | Note that in this case the formula is pseudocode, not Python. The `:=` symbol
 413 | means "is defined to be", similar to `=` in Python. Importantly, the `+` symbol
 414 | in SHA-256 pseudocode does *not* mean Python's `+`, but rather the `add32()`
 415 | function that we defined back in Problem&nbsp;1. (Implementing pseudocode using
 416 | regular Python addition rather than `add32` will be a *common mistake*
 417 | throughout this project.) Depending on how you structure your Python code, you
 418 | might also want to use the
 419 | [`.append()`](https://docs.python.org/3/tutorial/datastructures.html) method on
 420 | lists.
 421 | 
 422 | Define a function like `message_schedule(block)` which takes a 64-byte block
 423 | and returns a 64-word list, according to the formula described above. Your
 424 | input for this problem is an ASCII string of length 64. Convert it to bytes,
 425 | and use your `message_schedule()` function to construct message schedule for
 426 | that block. Your output should be the resulting list.
 427 | 
 428 | **Input:** an ASCII string of length 64, which represents a block of input for the compression function
 429 | 
 430 | **Output:** the resulting message schedule, a list of 64 words (integers)
 431 | 
 432 | As you work on this part of the algorithm, it might be helpful or interesting
 433 | to compare notes with how different sources describe it. Here's how *Serious
 434 | Cryptography* describes it, on p. 119:
 435 | 
 436 | <kbd><img alt="message schedule code from the book" src="images/book_message_schedule.png" width="700px"></kbd>
 437 | 
 438 | And here's how [the pseudocode on
 439 | Wikipedia](https://en.wikipedia.org/wiki/SHA-2#Pseudocode) describes it:
 440 | 
 441 | ```
 442 | create a 64-entry message schedule array w[0..63] of 32-bit words
 443 | (The initial values in w[0..63] don't matter, so many implementations zero them here)
 444 | copy chunk into first 16 words w[0..15] of the message schedule array
 445 | 
 446 | Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
 447 | for i from 16 to 63
 448 |     s0 := (w[i-15] rightrotate  7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift  3)
 449 |     s1 := (w[i- 2] rightrotate 17) xor (w[i- 2] rightrotate 19) xor (w[i- 2] rightshift 10)
 450 |     w[i] := w[i-16] + s0 + w[i-7] + s1
 451 | ```
 452 | 
 453 | And finally, here's how it's described in the official standard that defines
 454 | SHA-256, p. 22 of [FIPS
 455 | 180-4](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf):
 456 | 
 457 | <kbd><img alt="message schedule formula from the standard" src="images/standard_message_schedule.png" width="640px"></kbd>
 458 | 
 459 | These are all different ways of describing the same message schedule.
 460 | 
 461 | Once you've got the message schedule implemented correctly, you've reached the
 462 | first major milestone of the project. Well done! We'll need to set it aside for
 463 | a moment to focus on another big moving part, but don't worry: we'll come back
 464 | and make use of it before long.
 465 | 
 466 | ## The Round Function
 467 | 
 468 | As we said above, the SHA-256 compression function does 64 rounds of mixing.
 469 | We're about to implement the operation that's done for each round. To get
 470 | started, we're going to need four more small helper functions:
 471 | 
 472 | ### Problem 6: `big_sigma0()`
 473 | 
 474 | Given a word `x`, we define `big_sigma0(x)` to be the value:
 475 | 
 476 | ```python
 477 | rightrotate32(x, 2) ^ rightrotate32(x, 13) ^ rightrotate32(x, 22)
 478 | ```
 479 | 
 480 | Implement this function in Python. You can copy the line above if you like.
 481 | 
 482 | **Inputs:** an integer `x`
 483 | 
 484 | **Outputs:** the value `big_sigma0(x)`
 485 | 
 486 | ### Problem 7: `big_sigma1()`
 487 | 
 488 | Given a word `x`, we define `big_sigma1(x)` to be the value:
 489 | 
 490 | ```python
 491 | rightrotate32(x, 6) ^ rightrotate32(x, 11) ^ rightrotate32(x, 25)
 492 | ```
 493 | 
 494 | Implement this function in Python too. Again, you can copy the line above if
 495 | you like.
 496 | 
 497 | **Inputs:** an integer `x`
 498 | 
 499 | **Outputs:** the value `big_sigma1(x)`
 500 | 
 501 | ### Problem 8: `choice()`
 502 | 
 503 | Given three words, `x`, `y`, and `z`, we define `choice(x, y, z)` to be the value:
 504 | 
 505 | ```python
 506 | (x & y) ^ (~x & z)
 507 | ```
 508 | 
 509 | Implement this function in Python too. Again, you can copy the line above if
 510 | you like.
 511 | 
 512 | Note that the `~` symbol in Python means "bitwise-not", i.e. turn all the
 513 | 0-bits into 1's and all the 1-bits into 0's. This isn't an operation we need
 514 | very often, but it's nice that it's built-in. The fact that Python integers are
 515 | both signed and also variably-sized means that the behavior of `~` is subtler
 516 | than it might seem at first glance. Because of the rules of ["two's complement"
 517 | signed arithmetic](https://en.wikipedia.org/wiki/Two%27s_complement), it tends
 518 | to give us negative numbers. Luckily, all the little details work out in the
 519 | end, and we can use `~` here without worrying about it. You can just trust me
 520 | on that and copy the line of code above, or you can explore how `~` works
 521 | in Python as an exercise.
 522 | 
 523 | **Inputs:** a list of three integers, `[x, y, z]`
 524 | 
 525 | **Outputs:** the value `choice(x, y, z)`
 526 | 
 527 | Before you move on from this function, take a moment to stare at it. Can you
 528 | tell why it's called "choice"?
 529 | 
 530 | ### Problem 9: `majority()`
 531 | 
 532 | The last helper for the round function. Given three words, `x`, `y`, and `z`,
 533 | we define `majority(x, y, z)` to be the value:
 534 | 
 535 | ```python
 536 | (x & y) ^ (x & z) ^ (y & z)
 537 | ```
 538 | 
 539 | Implement this function in Python too. Again, you can copy the line above if
 540 | you like.
 541 | 
 542 | **Inputs:** a list of three integers, `[x, y, z]`
 543 | 
 544 | **Outputs:** the value `majority(x, y, z)`
 545 | 
 546 | Same follow-up question as above: Can you tell why this function is called
 547 | "majority"? This one's a little trickier. Three bits put together have
 548 | 2<sup>3</sup> = 8 possible values, and the easiest way to see this one is to
 549 | just make a table and calculate what happens in each case.
 550 | 
 551 | ### Problem 10: the round function
 552 | 
 553 | Alright, we're ready to implement the next big moving part of SHA-256, the
 554 | round function. The round function takes three arguments. The most important of
 555 | these is the **state**, a list of 8 words. Recall the diagram of the
 556 | Merkle–Damgård construction from p. 112 of *Serious Cryptography*:
 557 | 
 558 | <kbd><img alt="Merkle–Damgård diagram" src="images/merkle-damgard.png"></kbd>
 559 | 
 560 | The values H<sub>0</sub>, H<sub>1</sub>, and H<sub>2</sub> represent this
 561 | 8-word state as it's transformed by each call to the compression function. At
 562 | this point we're working on the round function, which is _inside_ the
 563 | compression function (i.e. inside the trapezoids in that diagram), but it's the
 564 | same state that we're talking about.
 565 | 
 566 | The other two inputs to the round function are the **round constant** and the
 567 | **schedule word**, each of which is one word (an integer). As you might guess,
 568 | the schedule word is ultimately going to come from the message schedule, which
 569 | we implemented in Problem&nbsp;5, but for now we'll just take it as an
 570 | argument.
 571 | 
 572 | Define a function like `round(state, round_constant, schedule_word)`. This
 573 | function starts by computing several values, using the helper functions defined
 574 | above:
 575 | 
 576 | ```
 577 | ch    := choice(state[4], state[5], state[6])
 578 | temp1 := state[7] + big_sigma1(state[4]) + ch + round_constant + schedule_word
 579 | maj   := majority(state[0], state[1], state[2])
 580 | temp2 := big_sigma0(state[0]) + maj
 581 | ```
 582 | 
 583 | As in Problem&nbsp;5, these formulas are pseudocode, and the `+` symbol means
 584 | `add32()`. Finally, the round function assembles a new state:
 585 | 
 586 | ```
 587 | new_state := [
 588 |     temp1 + temp2,
 589 |     state[0],
 590 |     state[1],
 591 |     state[2],
 592 |     state[3] + temp1,
 593 |     state[4],
 594 |     state[5],
 595 |     state[6],
 596 | ]
 597 | ```
 598 | 
 599 | This `new_state` is the return value of `round()`.
 600 | 
 601 | Your input for this problem is an object with three fields, `"state"`
 602 | containing a list of 8 integers, `"round_constant"` containing one integer, and
 603 | `"schedule_word"` containing one integer. Call your `round()` function with
 604 | these three arguments. Your output should be the resulting new state.
 605 | 
 606 | **Input:** an object with three fields, `"state"`, `"round_constant"`, and `"schedule_word"`
 607 | 
 608 | **Output:** a list of 8 words (integers), the new state returned by `round()`
 609 | 
 610 | As we did in Problem&nbsp;5, we can compare how different sources describe the
 611 | same part of the algorithm. *Serious Cryptography* doesn't include the SHA-256
 612 | round function in detail, describing it only as "more complex than that of
 613 | SHA-1" on p. 119.
 614 | 
 615 | [The pseudocode on Wikipedia](https://en.wikipedia.org/wiki/SHA-2#Pseudocode)
 616 | uses the variables `a`, `b`, `c`, `d`, `e`, `f`, `g`, and `h` to refer to the 8
 617 | elements of the state array. Here's how it describes the round function:
 618 | 
 619 | ```
 620 | S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
 621 | ch := (e and f) xor ((not e) and g)
 622 | temp1 := h + S1 + ch + k[i] + w[i]
 623 | S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
 624 | maj := (a and b) xor (a and c) xor (b and c)
 625 | temp2 := S0 + maj
 626 | 
 627 | h := g
 628 | g := f
 629 | f := e
 630 | e := d + temp1
 631 | d := c
 632 | c := b
 633 | b := a
 634 | a := temp1 + temp2
 635 | ```
 636 | 
 637 | P. 23 of the [FIPS
 638 | 180-4](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf) standard
 639 | describes the round function using uses the same 8 variables:
 640 | 
 641 | <kbd><img alt="the round function formulas from the standard" src="images/standard_round_function.png" width="480"></kbd>
 642 | 
 643 | Once you've got the round function working, you've reached the second major
 644 | milestone of the project. Very well done! Most of the little details are behind
 645 | us now, and the pieces we've built are about to start fitting together.
 646 | 
 647 | ## The Compression Function
 648 | 
 649 | ### Problem 11: the compression function
 650 | 
 651 | Finally, we've arrived at a piece big enough that we've actually heard of it
 652 | before. The compression function is the trapezoid from the Merkle–Damgård
 653 | diagram above. This is where we're going to write the "round loop" that
 654 | executes the round function 64 times, once for each of the 64 rounds of
 655 | SHA-256.
 656 | 
 657 | We saw the `round_constant` argument above. We need to start by copying the
 658 | array of values that we'll use for this argument. Paste the following into your
 659 | Python code as a global variable:
 660 | 
 661 | ```python
 662 | ROUND_CONSTANTS = [
 663 |     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 664 |     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 665 |     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
 666 |     0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
 667 |     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
 668 |     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
 669 |     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
 670 |     0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
 671 | ]
 672 | ```
 673 | 
 674 | You'll see the same array near the top of the [Wikipedia
 675 | pseudocode](https://en.wikipedia.org/wiki/SHA-2#Pseudocode). In effect, these
 676 | are just some hardcoded, random-looking numbers that we add to the mix. In
 677 | fact, they do actually come from a formula, something to do with the cube roots
 678 | of the first 64 prime numbers. But the details of the formula don't matter to
 679 | us. These are just ["nothing-up-my-sleeve
 680 | numbers"](https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number).
 681 | 
 682 | Now, define a function like `compress(input_state, block)`, where `input_state`
 683 | is an 8-word list, and `block` is a 64-byte block of the hash function's input.
 684 | This function combines the message schedule from Problem&nbsp;5 with the round
 685 | function from Problem&nbsp;10, like this:
 686 | 
 687 | ```
 688 | W := message_schedule(block)
 689 | 
 690 | state := input_state
 691 | for i in 0, 1, ..., 63
 692 |     state = round(state, ROUND_CONSTANTS[i], W[i])
 693 | 
 694 | state = [
 695 |     input_state[0] + state[0],
 696 |     input_state[1] + state[1],
 697 |     input_state[2] + state[2],
 698 |     input_state[3] + state[3],
 699 |     input_state[4] + state[4],
 700 |     input_state[5] + state[5],
 701 |     input_state[6] + state[6],
 702 |     input_state[7] + state[7],
 703 | ]
 704 | ```
 705 | 
 706 | As in Problem&nbsp;5, these formulas are pseudocode, and the `+` symbol means
 707 | `add32()`. The final value of `state` is the return value of `compress()`. Note
 708 | that the value of `input_state` gets used again at the end, so `input_state`
 709 | and `state` do need to be two different variables.
 710 | 
 711 | Your input for this problem is an object with two fields, `"state"` containing
 712 | a list of 8 integers and `"block"` containing an ASCII string of length 64.
 713 | Convert the block to bytes and then call your `compress()` function with those
 714 | arguments. Your output should be the resulting new state.
 715 | 
 716 | **Input:** an object with two fields, `"state"` and `"block"`
 717 | 
 718 | **Output:** a list of 8 words (integers), the new state returned by `compress()`
 719 | 
 720 | Before you move on, think about the loop you just wrote. It's probably just two
 721 | or three lines of code. But 64 rounds is actually quite a lot of work for the
 722 | computer. This little loop, plus all the code inside of `round()`, is where the
 723 | magic happens. This is the mixing loop. When cryptographers study SHA-256 and
 724 | try to develop attacks, this little loop is what they're attacking. That makes
 725 | the number 64 a very careful tradeoff between speed and security. Is 64 rounds
 726 | enough mixing to guarantee collision resistance and all the other security
 727 | properties? It seems to be enough today, but what about ten or twenty years
 728 | from now? Will SHA-256 be able to withstand another generation of clever
 729 | attacks and faster computers? Maybe some of you will have a hand in that
 730 | research...
 731 | 
 732 | In any case, for now we have our secure compression function. With this
 733 | working, we've turned onto the home stretch. The full hash function is in
 734 | sight.
 735 | 
 736 | ## Padding
 737 | 
 738 | ### Problem 12: padding
 739 | 
 740 | SHA-256 takes a "message" of any length as input, but the compression function
 741 | works with 64-byte blocks at a time, so we need to pad the message to be an
 742 | exact multiple of the block size. This is very similar to what we did with
 743 | block ciphers in Chapter&nbsp;4 and Problem&nbsp;Set&nbsp;3. As with block
 744 | ciphers, a naive padding scheme like "just fill the remainder of the last block
 745 | with zeros" isn't going to work. This time it's because of collision
 746 | resistance: If two different messages looked the same after padding, then their
 747 | hashes would be the same too, which is never supposed to happen. That means we
 748 | need a proper, unambiguous padding scheme.
 749 | 
 750 | It would be nice if we could reuse our PCKS#7 code from Problem Set 3, but alas
 751 | SHA-256 does something different. On the bright side, because this is hashing
 752 | and not encryption, at least we don't need to write any code for unpadding.
 753 | 
 754 | The SHA-256 padding scheme is originally defined in terms of bits, not bytes. I
 755 | think it's a little clearer in those terms, so let's start there. Remember that
 756 | there are 8 bits in a byte, so a block size of 64 bytes is the same as 512
 757 | bits. Here's the padding scheme as it's originally defined:
 758 | 
 759 | 1. Start the padding bitstring with a single 1-bit.
 760 | 2. Then append some 0-bits after that. We'll define how many in step 4 below.
 761 | 3. Finally, append the bit-length of the message, encoded as a 64-bit unsigned
 762 |    big-endian number with
 763 |    [`.to_bytes(8, "big")`](https://docs.python.org/3/library/stdtypes.html#int.to_bytes).
 764 | 4. Choose the number of 0-bits for step 2 to be the smallest number such that
 765 |    the total bit-length of the message plus the padding is an exact multiple of
 766 |    512.
 767 | 
 768 | A side note: You might notice that step 3 there isn't actually necessary for
 769 | making the padding unambiguous. Steps 1 and 2 are sufficient for that. The goal
 770 | of step 3 is to make it harder to find collisions, by including the message
 771 | length in the mix.
 772 | 
 773 | Defining the padding scheme in terms of bits like this is pretty
 774 | straightforward, but in practice our programming languages and our computer
 775 | hardware don't usually talk about individual bits directly. We need to
 776 | translate that definition into bytes. So here's the exact same padding scheme,
 777 | redescribed in terms of bytes, the way we'll actually implement it:
 778 | 
 779 | 1. Start the padding bytestring with a single 0x80 byte (decimal 128, binary
 780 |    0b10000000). As you can see in the binary representation, this is a single
 781 |    1-bit followed by seven 0-bits.
 782 | 2. Then append some 0x00 bytes after that. We'll define how many in step 4
 783 |    below.
 784 | 3. Finally, append **8 times** the byte-length of the message, encoded as an
 785 |    8-byte unsigned big-endian number with
 786 |    [`.to_bytes(8, "big")`](https://docs.python.org/3/library/stdtypes.html#int.to_bytes).
 787 |    (Forgetting to multiply the `len()` by 8 here is a *common mistake*.)
 788 | 4. Choose the number of 0x00 bytes for step 2 to be the smallest number such
 789 |    that the total byte-length of the message plus the padding is an exact
 790 |    multiple of 64.
 791 | 
 792 | That translation made things a little less elegant. The first byte is less
 793 | obvious, and the multiply-by-8 step is easy to forget. But we'll manage.
 794 | 
 795 | How do we determine the number of 0x00 bytes in step 4? If you like little
 796 | arithmetic puzzles, this is another good one to think about on your own before
 797 | reading further. Otherwise, feel free to copy the following three lines of
 798 | Python:
 799 | 
 800 | ```python
 801 | remainder_bytes = (message_length + 8) % 64  # number of bytes in the final block, including the appended length
 802 | filler_bytes = 64 - remainder_bytes          # number of bytes we need to add, including the initial 0x80 byte
 803 | zero_bytes = filler_bytes - 1                # number of 0x00 bytes we need to add
 804 | ```
 805 | 
 806 | Take a minute or two to review that logic and convince yourself it's correct.
 807 | Then write a function like `padding(message_length)`, which takes the original
 808 | **byte-length** of a message and returns the padding **bytestring** for that
 809 | message. Your input for this problem is a list of message byte-lengths. For
 810 | each of these, call your `padding()` function with that length as an argument
 811 | and hex-encode the resulting padding bytes. (There are no message bytes to
 812 | concatenate in this problem, just the padding bytes themselves.) Your output
 813 | for this problem should be the resulting list of hex-encoded padding strings.
 814 | 
 815 | I recommend that you have your `padding()` function return raw bytes, and that
 816 | you call it like `padding(...).hex()` for this problem. If you prefer to have
 817 | your `padding()` function do hex-encoding internally, that's ok too, but then
 818 | you'll need to remember to hex-decode its output in the following problems.
 819 | 
 820 | **Input:** a list of message lengths, counted in bytes
 821 | 
 822 | **Output:** a list of SHA-256 padding bytestrings, each hex-encoded
 823 | 
 824 | This padding function was our last big moving part. All we have to do now is
 825 | put the padding function and the compression function together.
 826 | 
 827 | ## The Hash Function
 828 | 
 829 | ### Problem 13: the hash function
 830 | 
 831 | Now we're ready to assemble the complete hash function. The genuine article.
 832 | Once you finish this problem, you can test your code against Python's `hashlib`
 833 | or against any other SHA-256 implementation in the world, and your output will
 834 | be exactly the same. Knock on wood.
 835 | 
 836 | As we did with block ciphers, we're going to pad the message and split it up
 837 | into blocks. Let's look at that Merkle–Damgård diagram again:
 838 | 
 839 | <kbd><img alt="Merkle–Damgård diagram" src="images/merkle-damgard.png"></kbd>
 840 | 
 841 | M<sub>1</sub>, M<sub>2</sub>, and so on represent 64-byte blocks of the padded
 842 | message. There are as many M blocks as needed, depending on the padded message
 843 | length. The output state ("chaining value") returned by each call to the
 844 | compression function (H<sub>1</sub>, H<sub>2</sub>, and so on) becomes the
 845 | input state for the following call. And the final chaining value returned by
 846 | the last call to the compression function is the SHA-256 hash of the message.
 847 | 
 848 | You might've noticed one last missing detail: Where do we get H<sub>0</sub>,
 849 | the input state for the first call to the compression function? We'll use a
 850 | constant for this. As in CBC mode, we'll call this constant the "initialization
 851 | vector", or IV for short. Unlike CBC mode, where the IV needs to be uniformly
 852 | random every time, the SHA-256 IV never changes. It's baked into the standard.
 853 | This is the other set of constants at the top of the [Wikipedia
 854 | pseudocode](https://en.wikipedia.org/wiki/SHA-2#Pseudocode). Paste the
 855 | following into your Python code as another global variable:
 856 | 
 857 | ```python
 858 | IV = [
 859 |     0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
 860 |     0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
 861 | ]
 862 | ```
 863 | 
 864 | Now, write a function like
 865 | [`sha256(message)`](https://youtu.be/6v5VahaEL7s?t=438). Inside it, use your
 866 | `padding()` function to generate padding bytes, and then append them to the
 867 | message bytes. Note that nothing should be hex-encoded at this point. (Using
 868 | hex-encoded padding here is a _common mistake_.) Create a `state` variable,
 869 | whose starting value is `IV`. Then split the padded message up into 64-byte
 870 | blocks and loop over the blocks, calling your `compress()` function on each
 871 | one. For each call to `compress()`, use the current `state` value as input and
 872 | assign the return value back to `state`. Double check that your argument types
 873 | for `compress()` are the same as they were in Problem&nbsp;11. (Calling
 874 | `compress()` with block bytes here but block words there is another _common
 875 | mistake_.) Once the block loop is finished, convert the final value of `state`
 876 | into 32 bytes by encoding each of the 8 state words as a 4-byte **big endian**
 877 | integer and concatenating them. Those 32 bytes are the return value of
 878 | `sha256()`.
 879 | 
 880 | > Debugging tips: Even if you've passed tests for all the previous problems,
 881 | > and your `sha256()` function looks good, sometimes you can still get the
 882 | > wrong answer here. Look carefully for the common mistakes described above.
 883 | > Also look for accidental global variables in your functions, which might
 884 | > refer to input from a previous problem. If you get stuck, put print
 885 | > statements everywhere, and compare what you see to these [known-good debug
 886 | > printouts for
 887 | > `sha256(b"")`](https://gist.github.com/oconnor663/27804bb33542bbf398aab16e102d8594).
 888 | 
 889 | Your input for this problem is a list of ASCII strings. Convert each string to
 890 | bytes and hash it with your `sha256()` function. Your output should be a list
 891 | of the resulting SHA-256 hashes, each encoded as hex.
 892 | 
 893 | **Input:** a list of ASCII strings
 894 | 
 895 | **Output:** a list of the hex-encoded SHA-256 hashes of those strings
 896 | 
 897 | <a href="https://youtu.be/LUDEjulbqzk?t=123">
 898 |   <img alt="I have made fire!" src="images/i_have_made_fire.jpg" width="400px">
 899 | </a>
 900 | 
 901 | ## The Length Extension Attack
 902 | 
 903 | If we were to stop here, all our blood, sweat, and tears would not have been
 904 | wasted. Implementing SHA-256 is an accomplishment in itself, and the intuition
 905 | you've gained along the way will hopefully be useful to you whenever you see a
 906 | hash function from now on. But besides that broad intuition, you've also
 907 | learned some very specific tricks: Now you know how to invoke the SHA-256
 908 | compression and padding functions directly, which isn't something that most
 909 | library implementations will let you do. It turns out that you can use these
 910 | tricks to pull off an important attack, and the best time to learn this attack
 911 | is while the tricks are still fresh in your mind. Strike while the iron is hot,
 912 | as they say.
 913 | 
 914 | SHA-256 has a flaw. Although its collision resistance and other security
 915 | properties remain unbroken so far, it does *not* behave like a true ["random
 916 | oracle"](https://en.wikipedia.org/wiki/Random_oracle). Some SHA-256 outputs are
 917 | _related_ to each other, in a way that you can detect or exploit even when you
 918 | don't know the input. This exploit is called a "length extension attack".
 919 | 
 920 | Remember how the "chaining values" worked in Problem&nbsp;13. The output from
 921 | each call to the compression function became the input for the next call. But
 922 | the final output, well, it just became the hash. We didn't do anything special
 923 | to it; we just returned it. That means that if you look at a SHA-256 hash,
 924 | you're looking at the same state that _would have been used_ to call the
 925 | compression function again _if there had been more input._
 926 | 
 927 | This was a design mistake. (The designers actually knew about this issue at the
 928 | time but didn't consider it important.) Here's the problem: Suppose you're an
 929 | attacker, and you're looking at a hash that I've published. Let's say you don't
 930 | know what input I used, maybe because I included a secret key or something like
 931 | that. Because of this mistake, even though you don't know my input, you can
 932 | construct a _new_ hash, which matches a _different_ input, one which starts
 933 | with the _same bytes as mine_ but then has some extra bytes of your choosing
 934 | added to the end. If SHA-256 hashes were truly independent of each other, this
 935 | wouldn't be possible, but they aren't, and it is possible.
 936 | 
 937 | There's one thing standing between you and this attack: the padding. I didn't
 938 | do anything special to the last chaining value, but I did pad my input. Those
 939 | padding bytes went into the state that you're looking at, and there's no way
 940 | for you to unmix them. But you can live with that, by making a clever
 941 | compromise:
 942 | 
 943 | *Pretend that my padding bytes are part of your chosen suffix.*
 944 | 
 945 | That is to say, you can't extend my input with a totally arbitrary suffix, but
 946 | you can choose any suffix that starts with my padding bytes. That's an
 947 | important limitation, but it still allows for quite a lot of mischief.
 948 | 
 949 | If you're reading through this project before we've covered Chapter 7 of
 950 | *Serious Cryptography*, it might not yet be clear why this attack is important.
 951 | The short answer is, this attack is why we need an algorithm called
 952 | [HMAC](https://en.wikipedia.org/wiki/HMAC) for keyed hashing, and programmers
 953 | who don't know about HMAC often misuse hash functions in ways that are
 954 | vulnerable to this attack. We'll get to HMAC in class shortly, if we haven't
 955 | already. For now, let's see the length extension attack in action.
 956 | 
 957 | ### Problem 14: modeling the extended input
 958 | 
 959 | Let's say my original input is 55 bytes long. I've chosen that length because
 960 | it's the most that still fits in one 64-byte block after padding is added.
 961 | What's the padding in this case? Let's use our `padding()` function to see it:
 962 | 
 963 | ```
 964 | >>> padding(55)
 965 | b'\x80\x00\x00\x00\x00\x00\x00\x01\xb8'
 966 | >>> padding(55).hex()
 967 | '8000000000000001b8'
 968 | ```
 969 | 
 970 | We can recognize the pieces there. One 0x80 byte at the front, no extra 0x00
 971 | bytes in this case, and an 8-byte big-endian integer encoding the value
 972 | 0x01b8&nbsp;=&nbsp;440&nbsp;=&nbsp;8&nbsp;*&nbsp;55, which is my input length
 973 | in bits. My original 55 bytes and these 9 bytes of padding are 64 bytes put
 974 | together, exactly one block. Clear so far?
 975 | 
 976 | Now put your attacker hat back on. You're going to pretend that those padding
 977 | bytes are actually the start of your chosen suffix. Then you're going to add
 978 | any number of additional suffix bytes of your choosing. The resulting
 979 | "synthetic" input, which you're ultimately going to compute the hash of, will
 980 | be equivalent to my original, plus my padding, plus the rest of your chosen
 981 | suffix. Let's say my original input was fifty-five `0xaa` bytes, and you chose
 982 | three `0xff` bytes for your suffix. In that case the synthetic message,
 983 | represented here as a hex-encoded string that I've split over a few lines,
 984 | would be:
 985 | 
 986 | ```
 987 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa    <-- the first 32-byte half of the first block
 988 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8000000000000001b8    <-- the second 32-byte half of the first block
 989 | ffffff                                                              <-- the second block, 3 bytes *before* padding
 990 | ```
 991 | 
 992 | To be clear, we won't construct this complete synthetic string ourselves when
 993 | we perform the length extension attack. In fact, we can't. All those `0xaa`
 994 | bytes in my original input are hidden from the attacker. But this synthetic
 995 | string is what our final length-extended hash will *represent*, and we want to
 996 | model it in this problem.
 997 | 
 998 | Your input for this problem is an object with two fields, `"original_input"`
 999 | containing an ASCII string that we want to extend, and `"chosen_suffix"`
1000 | containing the ASCII string that we want to extend it with. Convert these
1001 | strings to bytes, and construct the synthetic message with padding in the
1002 | middle that a length extension attack would compute the hash of. Your output
1003 | should be this synthetic string, encoded as hex.
1004 | 
1005 | **Input:** an object with two fields, `"original_input"` and `"chosen_suffix"`
1006 | 
1007 | **Output:** the synthetic message, encoded as hex
1008 | 
1009 | ### Problem 15: recovering the state
1010 | 
1011 | The length extension attack will reuse a hash as a chaining value, feeding it
1012 | into additional calls to the compression function. However, you might remember
1013 | that there was a conversion step we did when we returned the hash. We converted
1014 | it from 8 words to 32 bytes. We need to undo that and recover the words.
1015 | 
1016 | Your input for this problem is a 32-byte hash, encoded as hex. Hex-decode it
1017 | into bytes. Then convert it back into a list of 8 words, by breaking it into
1018 | groups of 4 bytes and parsing each 4-byte group as a **big-endian** integer.
1019 | Your output should be that list.
1020 | 
1021 | **Input:** a 32-byte hash, encoded as hex
1022 | 
1023 | **Output:** the list of 8 state words recovered from the hash
1024 | 
1025 | ### Problem 16: the length extension attack
1026 | 
1027 | We're ready to perform the attack. Your input for this problem will be an
1028 | object with three fields, `"original_hash"`, `"original_len"`, and
1029 | `"chosen_suffix"`. Hex-decode the original hash and convert the chosen suffix
1030 | to ASCII bytes. Recover the list of 8 state words from the original hash, as
1031 | you did in Problem&nbsp;15 above.
1032 | 
1033 | Now, to begin the attack, _re-pad_ the chosen suffix, like you padded the
1034 | regular message in Problem&nbsp;13. However, instead of calling your
1035 | `padding()` function with the length of the suffix itself, call it with the
1036 | *total length of the synthetic message*. That is, the original input length,
1037 | plus the length of the original input's padding, plus the length of the suffix.
1038 | (This makes your padding bytes different, but it doesn't change _how many_
1039 | padding bytes you get. Can you see why?)
1040 | 
1041 | Next, hash the padded suffix by looping over its blocks and calling
1042 | `compress()` on each of them, again as you did in Problem&nbsp;13. However,
1043 | instead of using `IV` for your initial state, use the state words that you
1044 | recovered from the original hash.
1045 | 
1046 | Once you've compressed all the resulting blocks, the attack is finished.
1047 | Convert your list of 8 state words back into 32 bytes, using the same method as
1048 | in Problem&nbsp;13. Your output for this problem should be the resulting hash,
1049 | encoded as hex.
1050 | 
1051 | The input for the `"original_hash"` given in `example_input.json` was `elephant
1052 | jaguar vulture octopus butterfly`. You don't need to know that to extend it,
1053 | but if you like, you can check that the output is indeed a valid extension of
1054 | that original string as an exercise.
1055 | 
1056 | **Input:** an object with three fields, `"original_hash"`, `"original_len"`, and `"chosen_suffix"`
1057 | 
1058 | **Output:** the length-extended hash, encoded as hex
1059 | 
1060 | <a href="https://youtu.be/Vy7RaQUmOzE?t=201">
1061 |   <img alt="he is the one" src="images/matrix.jpg" width="400px">
1062 | </a>
1063 | 
1064 | ## Conclusion
1065 | 
1066 | The project is finished, and there are no more questions. If you've made it
1067 | this far, then you know more about the insides of a hash function than many
1068 | cryptographers do. That's something to be proud of, and I hope you'll find that
1069 | it was worth the trouble.
1070 | 
1071 | If you're tired of hashing and ready for a break, no need to read any further.
1072 | But if you found all this very interesting and you're eager to learn more,
1073 | there are many different avenues to explore. Here are a few:
1074 | 
1075 | - In Problem&nbsp;13, we implemented "all-at-once" hashing. That is, the entire
1076 |   input string was provided as an argument. In practice however, most hash
1077 |   functions are designed to work incrementally, piece-by-piece. When the input
1078 |   is very large, they read smaller chunks of it in a loop, so that the
1079 |   application doesn't need to allocate lots of memory for a large string.
1080 |   Python's `hashlib` module provides the
1081 |   [`.update()`](https://docs.python.org/3/library/hashlib.html#hashlib.hash.update)
1082 |   method for this. You can try refactoring your own SHA-256 code to support
1083 |   some sort of "update" function, which can be called multiple times. You'll
1084 |   need to think about how to "buffer" input when what you're given isn't an
1085 |   exact multiple of 64 bytes.
1086 | 
1087 | - More recent designs like SHA-3, BLAKE2, and BLAKE3 prevent length extension
1088 |   attacks by making sure that their chaining values and their published hashes
1089 |   are different from each other in some way. This prevents an attacker from
1090 |   looking at a hash and recovering the chaining value that would have been used
1091 |   to compress more input, like we did in Problems 15 and 16. Think about ways
1092 |   you might modify SHA-256 to prevent this. What if the compression function
1093 |   was implemented in hardware, and you weren't allowed to change it?
1094 | 
1095 | - The Merkle–Damgård contruction is very common, but there are other ways to
1096 |   organize things. SHA-3 uses a "sponge construction" (p. 115), and BLAKE3 uses
1097 |   a "Merkle tree" (named after the same Ralph Merkle). These different
1098 |   structures can have a variety of different benefits. You might compare and
1099 |   contrast your SHA-256 code with [this Python implementation of
1100 |   SHA-3](https://github.com/coruus/py-keccak/blob/master/fips202/keccak.py),
1101 |   especially the part where they use `permute()` instead of `compress()`.
1102 | 
1103 | - Some use cases, particularly hash tables (dictionaries in Python), can
1104 |   tolerate collisions. For these cases, it's common to use a faster hash
1105 |   function with a smaller state and a shorter output. See for example
1106 |   [SipHash](https://en.wikipedia.org/wiki/SipHash), also designed by J.P.
1107 |   Aumasson, the author of [our textbook](https://nostarch.com/seriouscrypto).
1108 |   SipHash is used by default in the Rust
1109 |   [`HashMap`](https://doc.rust-lang.org/std/collections/struct.HashMap.html),
1110 |   for example. But note that even though hash tables/maps don't need collision
1111 |   resistance per se, they often do need some related security properties,
1112 |   because they can be [vulnerable to DOS
1113 |   attacks](https://www.anchor.com.au/blog/2012/12/how-to-explain-hash-dos-to-your-parents-by-using-cats/)
1114 |   if an attacker is able to produce too many collisions.
1115 | 
1116 | - Some applications need a hash function with more exotic properties. For
1117 |   example, you might be familiar with the `rsync` command for copying files
1118 |   over a network. Rsync uses a ["rolling
1119 |   hash"](https://en.wikipedia.org/wiki/Rolling_hash) to efficiently detect
1120 |   blocks that are the same between two different versions of a file. Rolling
1121 |   hashes look quite different from cryptographic hash functions, and they
1122 |   usually don't make strong security guarantees. If you have access to a remote
1123 |   server, you can play with making a tiny change to a large file, and see how
1124 |   long it takes Rsync to pick up the change.
1125 | 
1126 | Happy hashing.
1127 | 


--------------------------------------------------------------------------------
/solution_py/sha256.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | import sys
  4 | 
  5 | # fmt: off
  6 | IV = [
  7 |     0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
  8 | ]
  9 | 
 10 | ROUND_CONSTANTS = [
 11 |     0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
 12 |     0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
 13 |     0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
 14 |     0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
 15 |     0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
 16 |     0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
 17 |     0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
 18 |     0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
 19 | ]
 20 | # fmt: on
 21 | 
 22 | 
 23 | ### Building Blocks
 24 | 
 25 | 
 26 | def add32(*args):
 27 |     return sum(args) % (2**32)
 28 | 
 29 | 
 30 | def rightrotate32(x, n):
 31 |     assert x < 2**32, "x is too large. Did you use + instead of add32 somewhere?"
 32 |     right_part = x >> n
 33 |     left_part = x << (32 - n)
 34 |     return add32(left_part, right_part)
 35 | 
 36 | 
 37 | ### The Message Schedule
 38 | 
 39 | 
 40 | def little_sigma0(word):
 41 |     return rightrotate32(word, 7) ^ rightrotate32(word, 18) ^ (word >> 3)
 42 | 
 43 | 
 44 | def little_sigma1(word):
 45 |     return rightrotate32(word, 17) ^ rightrotate32(word, 19) ^ (word >> 10)
 46 | 
 47 | 
 48 | def message_schedule_array(block):
 49 |     assert len(block) == 64
 50 |     w = []
 51 |     for i in range(16):
 52 |         assert i == len(w)
 53 |         w.append(int.from_bytes(block[4 * i : 4 * i + 4], "big"))
 54 |     for i in range(16, 64):
 55 |         s0 = little_sigma0(w[i - 15])
 56 |         s1 = little_sigma1(w[i - 2])
 57 |         w.append(add32(w[i - 16], s0, w[i - 7], s1))
 58 |     return w
 59 | 
 60 | 
 61 | ### The Round Function
 62 | 
 63 | 
 64 | def big_sigma0(word):
 65 |     return rightrotate32(word, 2) ^ rightrotate32(word, 13) ^ rightrotate32(word, 22)
 66 | 
 67 | 
 68 | def big_sigma1(word):
 69 |     return rightrotate32(word, 6) ^ rightrotate32(word, 11) ^ rightrotate32(word, 25)
 70 | 
 71 | 
 72 | def choice(x, y, z):
 73 |     return (x & y) ^ (~x & z)
 74 | 
 75 | 
 76 | def majority(x, y, z):
 77 |     return (x & y) ^ (x & z) ^ (y & z)
 78 | 
 79 | 
 80 | def round(state, round_constant, schedule_word):
 81 |     S1 = big_sigma1(state[4])
 82 |     ch = choice(state[4], state[5], state[6])
 83 |     temp1 = add32(state[7], S1, ch, round_constant, schedule_word)
 84 |     S0 = big_sigma0(state[0])
 85 |     maj = majority(state[0], state[1], state[2])
 86 |     temp2 = add32(S0, maj)
 87 |     return [
 88 |         add32(temp1, temp2),
 89 |         state[0],
 90 |         state[1],
 91 |         state[2],
 92 |         add32(state[3], temp1),
 93 |         state[4],
 94 |         state[5],
 95 |         state[6],
 96 |     ]
 97 | 
 98 | 
 99 | ### The Compression Function
100 | 
101 | 
102 | def compress_block(input_state_words, block):
103 |     w = message_schedule_array(block)
104 |     state_words = input_state_words
105 |     for round_number in range(64):
106 |         round_constant = ROUND_CONSTANTS[round_number]
107 |         schedule_word = w[round_number]
108 |         state_words = round(state_words, round_constant, schedule_word)
109 |     return [add32(x, y) for x, y in zip(input_state_words, state_words)]
110 | 
111 | 
112 | ### Padding
113 | 
114 | 
115 | def padding_bytes(input_len):
116 |     remainder_bytes = (input_len + 8) % 64
117 |     filler_bytes = 64 - remainder_bytes
118 |     zero_bytes = filler_bytes - 1
119 |     encoded_bit_length = (8 * input_len).to_bytes(8, "big")
120 |     return b"\x80" + b"\0" * zero_bytes + encoded_bit_length
121 | 
122 | 
123 | ### The Hash Function
124 | 
125 | 
126 | def sha256(message):
127 |     padded = message + padding_bytes(len(message))
128 |     assert len(padded) % 64 == 0
129 |     state_words = IV
130 |     i = 0
131 |     while i < len(padded):
132 |         block = padded[i : i + 64]
133 |         state_words = compress_block(state_words, block)
134 |         i += 64
135 |     return b"".join(x.to_bytes(4, "big") for x in state_words)
136 | 
137 | 
138 | # This is a test function. If you want to run it (and the other test below),
139 | # install pytest with `pip install pytest` and then run `pytest sha256.py`.
140 | # This test is the only place in this solution where I use hashlib.
141 | def test_sha256():
142 |     import hashlib
143 | 
144 |     for test_len in range(200):
145 |         print(f"test_len = {test_len}")
146 |         input251 = bytes(x % 251 for x in range(test_len))
147 |         assert sha256(input251) == hashlib.sha256(input251).digest()
148 | 
149 | 
150 | ### The Length Extension Attack
151 | 
152 | 
153 | def extended_len(original_len, suffix_len):
154 |     return original_len + len(padding_bytes(original_len)) + suffix_len
155 | 
156 | 
157 | def reconstitute_state(original_hash):
158 |     return [int.from_bytes(original_hash[4 * i : 4 * i + 4], "big") for i in range(8)]
159 | 
160 | 
161 | def length_extend(original_hash, original_len, suffix):
162 |     new_padding = padding_bytes(extended_len(original_len, len(suffix)))
163 |     new_blocks = suffix + new_padding
164 |     state_words = reconstitute_state(original_hash)
165 |     i = 0
166 |     while i < len(new_blocks):
167 |         block = new_blocks[i : i + 64]
168 |         state_words = compress_block(state_words, block)
169 |         i += 64
170 |     return b"".join(x.to_bytes(4, "big") for x in state_words)
171 | 
172 | 
173 | # This is a test function. If you want to run it (and the other test above),
174 | # install pytest with `pip install pytest` and then run `pytest sha256.py`.
175 | def test_length_extend():
176 |     for test_len in range(200):
177 |         print(f"test_len = {test_len}")
178 |         input251 = bytes(x % 251 for x in range(test_len))
179 |         suffix = b"hello world"
180 |         original_hash = sha256(input251)
181 |         suffixed_input = input251 + padding_bytes(len(input251)) + suffix
182 |         expected_hash = sha256(suffixed_input)
183 |         extended_hash = length_extend(original_hash, len(input251), suffix)
184 |         assert expected_hash == extended_hash
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     message = sys.stdin.buffer.read()
189 |     output = sha256(message)
190 |     print(output.hex())
191 | 


--------------------------------------------------------------------------------
/solution_py/solution.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import json
 4 | import sys
 5 | 
 6 | # sha256.py in this directory
 7 | import sha256
 8 | 
 9 | inputs = json.load(sys.stdin)
10 | outputs = {}
11 | 
12 | 
13 | ### Building Blocks
14 | 
15 | # Problem 1
16 | outputs["problem1"] = [sha256.add32(a, b) for a, b in inputs["problem1"]]
17 | 
18 | # Problem 2
19 | outputs["problem2"] = [sha256.rightrotate32(a, b) for a, b in inputs["problem2"]]
20 | 
21 | 
22 | ### The Message Schedule
23 | 
24 | # Problem 3
25 | outputs["problem3"] = sha256.little_sigma0(inputs["problem3"])
26 | 
27 | # Problem 4
28 | outputs["problem4"] = sha256.little_sigma1(inputs["problem4"])
29 | 
30 | # Problem 5
31 | outputs["problem5"] = sha256.message_schedule_array(inputs["problem5"].encode())
32 | 
33 | 
34 | ### The Round Function
35 | 
36 | # Problem 6
37 | outputs["problem6"] = sha256.big_sigma0(inputs["problem6"])
38 | 
39 | # Problem 7
40 | outputs["problem7"] = sha256.big_sigma1(inputs["problem7"])
41 | 
42 | # Problem 8
43 | outputs["problem8"] = sha256.choice(*inputs["problem8"])
44 | 
45 | # Problem 9
46 | outputs["problem9"] = sha256.majority(*inputs["problem9"])
47 | 
48 | # Problem 10
49 | obj = inputs["problem10"]
50 | outputs["problem10"] = sha256.round(
51 |     obj["state"], obj["round_constant"], obj["schedule_word"]
52 | )
53 | 
54 | ### The Compression Function
55 | 
56 | # Problem 11
57 | obj = inputs["problem11"]
58 | outputs["problem11"] = sha256.compress_block(obj["state"], obj["block"].encode())
59 | 
60 | 
61 | ### Padding
62 | 
63 | # Problem 12
64 | outputs["problem12"] = [sha256.padding_bytes(n).hex() for n in inputs["problem12"]]
65 | 
66 | 
67 | ### The Hash Function
68 | 
69 | # Problem 13
70 | outputs["problem13"] = [sha256.sha256(s.encode()).hex() for s in inputs["problem13"]]
71 | 
72 | 
73 | ### The Length Extension Attack
74 | 
75 | # Problem 14
76 | original_input = inputs["problem14"]["original_input"].encode()
77 | chosen_suffix = inputs["problem14"]["chosen_suffix"].encode()
78 | outputs["problem14"] = (
79 |     original_input + sha256.padding_bytes(len(original_input)) + chosen_suffix
80 | ).hex()
81 | 
82 | # Problem 15
83 | outputs["problem15"] = sha256.reconstitute_state(bytes.fromhex(inputs["problem15"]))
84 | 
85 | # Problem 16
86 | obj = inputs["problem16"]
87 | original_hash = bytes.fromhex(obj["original_hash"])
88 | original_len = obj["original_len"]
89 | chosen_suffix = obj["chosen_suffix"].encode()
90 | outputs["problem16"] = sha256.length_extend(
91 |     original_hash, original_len, chosen_suffix
92 | ).hex()
93 | 
94 | json.dump(outputs, sys.stdout, indent="  ")
95 | print()
96 | 


--------------------------------------------------------------------------------
/solution_rs/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "solution_rs"
 3 | version = "0.0.0"
 4 | edition = "2018"
 5 | 
 6 | [dependencies]
 7 | hex = "0.4.3"
 8 | serde = { version = "1.0", features = ["derive"] }
 9 | serde_json = "1.0"
10 | sha2 = "0.9.8"
11 | 


--------------------------------------------------------------------------------
/solution_rs/src/main.rs:
--------------------------------------------------------------------------------
  1 | use serde::{Deserialize, Serialize};
  2 | use std::convert::TryInto;
  3 | 
  4 | type State = [u32; 8];
  5 | type Block = [u8; 64];
  6 | type Hash = [u8; 32];
  7 | 
  8 | const IV: [u32; 8] = [
  9 |     0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
 10 | ];
 11 | 
 12 | const ROUND_CONSTANTS: [u32; 64] = [
 13 |     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 14 |     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 15 |     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
 16 |     0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
 17 |     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
 18 |     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
 19 |     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
 20 |     0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
 21 | ];
 22 | 
 23 | fn add32(a: u32, b: u32) -> u32 {
 24 |     a.wrapping_add(b)
 25 | }
 26 | 
 27 | fn rightrotate32(x: u32, n: u32) -> u32 {
 28 |     // The right operand of >> or << must always be within 0..=31.
 29 |     (x >> (n % 32)) | (x << ((32 - n) % 32))
 30 | }
 31 | 
 32 | fn little_sigma0(x: u32) -> u32 {
 33 |     rightrotate32(x, 7) ^ rightrotate32(x, 18) ^ (x >> 3)
 34 | }
 35 | 
 36 | fn little_sigma1(x: u32) -> u32 {
 37 |     rightrotate32(x, 17) ^ rightrotate32(x, 19) ^ (x >> 10)
 38 | }
 39 | 
 40 | fn message_schedule(block: &Block) -> [u32; 64] {
 41 |     let mut w = [0; 64];
 42 |     for i in 0..16 {
 43 |         w[i] = u32::from_be_bytes(block[4 * i..][..4].try_into().unwrap());
 44 |     }
 45 |     for i in 16..64 {
 46 |         w[i] = w[i - 16].wrapping_add(
 47 |             little_sigma0(w[i - 15]).wrapping_add(w[i - 7].wrapping_add(little_sigma1(w[i - 2]))),
 48 |         )
 49 |     }
 50 |     w
 51 | }
 52 | 
 53 | fn big_sigma0(x: u32) -> u32 {
 54 |     rightrotate32(x, 2) ^ rightrotate32(x, 13) ^ rightrotate32(x, 22)
 55 | }
 56 | 
 57 | fn big_sigma1(x: u32) -> u32 {
 58 |     rightrotate32(x, 6) ^ rightrotate32(x, 11) ^ rightrotate32(x, 25)
 59 | }
 60 | 
 61 | fn choice(x: u32, y: u32, z: u32) -> u32 {
 62 |     (x & y) ^ (!x & z)
 63 | }
 64 | 
 65 | fn majority(x: u32, y: u32, z: u32) -> u32 {
 66 |     (x & y) ^ (x & z) ^ (y & z)
 67 | }
 68 | 
 69 | fn round(state: &State, round_constant: u32, schedule_word: u32) -> State {
 70 |     let ch = choice(state[4], state[5], state[6]);
 71 |     let temp1 = add32(
 72 |         add32(
 73 |             add32(add32(state[7], big_sigma1(state[4])), ch),
 74 |             round_constant,
 75 |         ),
 76 |         schedule_word,
 77 |     );
 78 |     let maj = majority(state[0], state[1], state[2]);
 79 |     let temp2 = add32(big_sigma0(state[0]), maj);
 80 |     [
 81 |         add32(temp1, temp2),
 82 |         state[0],
 83 |         state[1],
 84 |         state[2],
 85 |         add32(state[3], temp1),
 86 |         state[4],
 87 |         state[5],
 88 |         state[6],
 89 |     ]
 90 | }
 91 | 
 92 | fn compress(input_state: &State, block: &Block) -> State {
 93 |     let w = message_schedule(block);
 94 |     let mut state = *input_state;
 95 |     for i in 0..64 {
 96 |         state = round(&state, ROUND_CONSTANTS[i], w[i]);
 97 |     }
 98 |     [
 99 |         add32(input_state[0], state[0]),
100 |         add32(input_state[1], state[1]),
101 |         add32(input_state[2], state[2]),
102 |         add32(input_state[3], state[3]),
103 |         add32(input_state[4], state[4]),
104 |         add32(input_state[5], state[5]),
105 |         add32(input_state[6], state[6]),
106 |         add32(input_state[7], state[7]),
107 |     ]
108 | }
109 | 
110 | fn padding(input_length: u64) -> Vec<u8> {
111 |     let mut padding_bytes = vec![0x80];
112 |     let remainder_bytes = (input_length + 8) % 64;
113 |     let filler_bytes = 64 - remainder_bytes;
114 |     let zero_bytes = filler_bytes - 1;
115 |     for _ in 0..zero_bytes {
116 |         padding_bytes.push(0);
117 |     }
118 |     padding_bytes.extend_from_slice(&(8 * input_length).to_be_bytes());
119 |     padding_bytes
120 | }
121 | 
122 | fn hash_from_state(state: &State) -> Hash {
123 |     let mut hash = [0; 32];
124 |     for i in 0..8 {
125 |         hash[4 * i..][..4].copy_from_slice(&state[i].to_be_bytes());
126 |     }
127 |     hash
128 | }
129 | 
130 | fn sha256(message: &[u8]) -> Hash {
131 |     let mut padded_message = message.to_vec();
132 |     padded_message.extend_from_slice(&padding(message.len() as u64));
133 |     assert_eq!(0, padded_message.len() % 64);
134 |     let mut state = IV;
135 |     for block in padded_message.chunks(64) {
136 |         state = compress(&state, block.try_into().unwrap());
137 |     }
138 |     hash_from_state(&state)
139 | }
140 | 
141 | #[test]
142 | fn test_sha256() {
143 |     // The sha2 dependency is only used right here, for testing.
144 |     use sha2::{Digest, Sha256};
145 |     for i in 0..1000 {
146 |         dbg!(i);
147 |         let input = vec![i as u8; i];
148 |         let my_hash = sha256(&input);
149 |         let mut standard_hasher = Sha256::new();
150 |         standard_hasher.update(&input);
151 |         let expected = standard_hasher.finalize();
152 |         assert_eq!(my_hash[..], expected[..]);
153 |     }
154 | }
155 | 
156 | fn recover_state(hash: &Hash) -> State {
157 |     let mut state = [0; 8];
158 |     for i in 0..8 {
159 |         state[i] = u32::from_be_bytes(hash[4 * i..][..4].try_into().unwrap());
160 |     }
161 |     state
162 | }
163 | 
164 | fn length_extend(original_hash: &Hash, original_len: u64, chosen_suffix: &[u8]) -> Hash {
165 |     let mut state = recover_state(original_hash);
166 |     let mut padded_suffix = chosen_suffix.to_vec();
167 |     let synthetic_len =
168 |         original_len + padding(original_len).len() as u64 + chosen_suffix.len() as u64;
169 |     padded_suffix.extend_from_slice(&padding(synthetic_len));
170 |     for block in padded_suffix.chunks(64) {
171 |         state = compress(&state, block.try_into().unwrap());
172 |     }
173 |     hash_from_state(&state)
174 | }
175 | 
176 | #[derive(Deserialize)]
177 | struct Input {
178 |     problem1: Vec<(u32, u32)>,
179 |     problem2: Vec<(u32, u32)>,
180 |     problem3: u32,
181 |     problem4: u32,
182 |     problem5: String,
183 |     problem6: u32,
184 |     problem7: u32,
185 |     problem8: (u32, u32, u32),
186 |     problem9: (u32, u32, u32),
187 |     problem10: Problem10Input,
188 |     problem11: Problem11Input,
189 |     problem12: Vec<u64>,
190 |     problem13: Vec<String>,
191 |     problem14: Problem14Input,
192 |     problem15: String,
193 |     problem16: Problem16Input,
194 | }
195 | 
196 | #[derive(Deserialize)]
197 | struct Problem10Input {
198 |     state: State,
199 |     round_constant: u32,
200 |     schedule_word: u32,
201 | }
202 | 
203 | #[derive(Deserialize)]
204 | struct Problem11Input {
205 |     state: State,
206 |     block: String,
207 | }
208 | 
209 | #[derive(Deserialize)]
210 | struct Problem14Input {
211 |     original_input: String,
212 |     chosen_suffix: String,
213 | }
214 | 
215 | #[derive(Deserialize)]
216 | struct Problem16Input {
217 |     original_hash: String,
218 |     original_len: u64,
219 |     chosen_suffix: String,
220 | }
221 | 
222 | #[derive(Default, Serialize)]
223 | struct Output {
224 |     problem1: Vec<u32>,
225 |     problem2: Vec<u32>,
226 |     problem3: u32,
227 |     problem4: u32,
228 |     problem5: Vec<u32>,
229 |     problem6: u32,
230 |     problem7: u32,
231 |     problem8: u32,
232 |     problem9: u32,
233 |     problem10: State,
234 |     problem11: State,
235 |     problem12: Vec<String>,
236 |     problem13: Vec<String>,
237 |     problem14: String,
238 |     problem15: State,
239 |     problem16: String,
240 | }
241 | 
242 | fn main() {
243 |     let input: Input = serde_json::from_reader(std::io::stdin()).expect("parsing JSON failed");
244 |     let mut output = Output::default();
245 | 
246 |     // Problem 1
247 |     output.problem1 = input.problem1.iter().map(|&(a, b)| add32(a, b)).collect();
248 | 
249 |     // Problem 2
250 |     output.problem2 = input
251 |         .problem2
252 |         .iter()
253 |         .map(|&(x, n)| rightrotate32(x, n))
254 |         .collect();
255 | 
256 |     output.problem3 = little_sigma0(input.problem3);
257 | 
258 |     output.problem4 = little_sigma1(input.problem4);
259 | 
260 |     output.problem5 = message_schedule(input.problem5.as_bytes().try_into().unwrap()).to_vec();
261 | 
262 |     output.problem6 = big_sigma0(input.problem6);
263 | 
264 |     output.problem7 = big_sigma1(input.problem7);
265 | 
266 |     let (a, b, c) = input.problem8;
267 |     output.problem8 = choice(a, b, c);
268 | 
269 |     let (a, b, c) = input.problem9;
270 |     output.problem9 = majority(a, b, c);
271 | 
272 |     output.problem10 = round(
273 |         &input.problem10.state,
274 |         input.problem10.round_constant,
275 |         input.problem10.schedule_word,
276 |     );
277 | 
278 |     output.problem11 = compress(
279 |         &input.problem11.state,
280 |         input.problem11.block.as_bytes().try_into().unwrap(),
281 |     );
282 | 
283 |     output.problem12 = input
284 |         .problem12
285 |         .iter()
286 |         .map(|&len| hex::encode(padding(len)))
287 |         .collect();
288 | 
289 |     output.problem13 = input
290 |         .problem13
291 |         .iter()
292 |         .map(|s| hex::encode(sha256(s.as_bytes())))
293 |         .collect();
294 | 
295 |     let mut synthetic = Vec::new();
296 |     synthetic.extend_from_slice(input.problem14.original_input.as_bytes());
297 |     synthetic.extend_from_slice(&padding(input.problem14.original_input.len() as u64));
298 |     synthetic.extend_from_slice(input.problem14.chosen_suffix.as_bytes());
299 |     output.problem14 = hex::encode(&synthetic);
300 | 
301 |     output.problem15 = recover_state(
302 |         hex::decode(input.problem15).unwrap()[..]
303 |             .try_into()
304 |             .unwrap(),
305 |     );
306 | 
307 |     let original_hash: Hash = hex::decode(input.problem16.original_hash)
308 |         .unwrap()
309 |         .try_into()
310 |         .unwrap();
311 |     output.problem16 = hex::encode(length_extend(
312 |         &original_hash,
313 |         input.problem16.original_len,
314 |         input.problem16.chosen_suffix.as_bytes(),
315 |     ));
316 | 
317 |     serde_json::to_writer_pretty(std::io::stdout(), &output).expect("output failed");
318 |     println!()
319 | }
320 | 


--------------------------------------------------------------------------------