├── .gitignore ├── README.md ├── bigrams.txt ├── break_affine.py ├── break_autokey.py ├── break_caesar.py ├── break_fracmorse.py ├── break_hill3.py ├── break_simplesub.py ├── break_vigenere.py ├── break_xhill3.py ├── fmorse_quadgrams.txt ├── licence_MIT.txt ├── lorenz.c ├── mat_rnn.py ├── mat_rnn_run.py ├── monograms.txt ├── ngram_score.py ├── quadgrams.txt ├── rnn_generate.m ├── savednn800small9B.mat └── trigrams.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.rar 3 | *.bz2 4 | *.zip 5 | *.gz 6 | 7 | .DS_Store 8 | *.sublime-* 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | bin/ 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | 48 | # Mr Developer 49 | .mr.developer.cfg 50 | .project 51 | .pydevproject 52 | 53 | # Rope 54 | .ropeproject 55 | 56 | # Django stuff: 57 | *.log 58 | *.pot 59 | 60 | # Sphinx documentation 61 | docs/_build/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python_cryptanalysis 2 | ==================== 3 | 4 | All code and data MIT licenced. 5 | 6 | Cryptanalysis scripts for classical ciphers e.g. substitution, Vigenere etc. These are for practicalcryptography.com/cryptanalysis 7 | 8 | These cryptanalysis scripts depend on having pycipher installed. 9 | 10 | currently not working too great: 11 | - break_coltrans.py 12 | - break_playfair.py 13 | -------------------------------------------------------------------------------- /bigrams.txt: -------------------------------------------------------------------------------- 1 | TH 116997844 2 | HE 100689263 3 | IN 87674002 4 | ER 77134382 5 | AN 69775179 6 | RE 60923600 7 | ES 57070453 8 | ON 56915252 9 | ST 54018399 10 | NT 50701084 11 | EN 48991276 12 | AT 48274564 13 | ED 46647960 14 | ND 46194306 15 | TO 46115188 16 | OR 45725191 17 | EA 43329810 18 | TI 42888666 19 | AR 42353262 20 | TE 42295813 21 | NG 38567365 22 | AL 38211584 23 | IT 37938534 24 | AS 37773878 25 | IS 37349981 26 | HA 35971841 27 | ET 32872552 28 | SE 31532272 29 | OU 31112284 30 | OF 30540904 31 | LE 30383262 32 | SA 30080131 33 | VE 29320973 34 | RO 29230770 35 | RA 28645577 36 | RI 27634643 37 | HI 27495342 38 | NE 27331675 39 | ME 27237733 40 | DE 27029835 41 | CO 26737101 42 | TA 26147593 43 | EC 25775798 44 | SI 25758841 45 | LL 24636875 46 | SO 23903631 47 | NA 23547524 48 | LI 23291169 49 | LA 23178317 50 | EL 23092248 51 | MA 21828378 52 | DI 21673998 53 | IC 21468412 54 | RT 21456059 55 | NS 21306421 56 | RS 21237259 57 | IO 21210160 58 | OM 21066156 59 | CH 20132750 60 | OT 20088048 61 | CA 19930754 62 | CE 19803619 63 | HO 19729026 64 | BE 19468489 65 | TT 19367472 66 | FO 18923772 67 | TS 18922522 68 | SS 18915696 69 | NO 18894111 70 | EE 18497942 71 | EM 18145294 72 | AC 17904683 73 | IL 17877600 74 | DA 17584055 75 | NI 17452104 76 | UR 17341717 77 | WA 16838794 78 | SH 16773127 79 | EI 16026915 80 | AM 15975981 81 | TR 15821226 82 | DT 15759673 83 | US 15699353 84 | LO 15596310 85 | PE 15573318 86 | UN 15237699 87 | NC 15214623 88 | WI 15213018 89 | UT 15137169 90 | AD 14877234 91 | EW 14776406 92 | OW 14610429 93 | GE 14425023 94 | EP 14024377 95 | AI 13974919 96 | LY 13742031 97 | OL 13726491 98 | FT 13696078 99 | OS 13596265 100 | EO 13524186 101 | EF 13252227 102 | PR 13191182 103 | WE 13185116 104 | DO 13120322 105 | MO 12950768 106 | ID 12896787 107 | IE 12505546 108 | MI 12168944 109 | PA 12068709 110 | FI 11993833 111 | PO 11917535 112 | CT 11888752 113 | WH 11852909 114 | IR 11681353 115 | AY 11523416 116 | GA 11239788 117 | SC 10800636 118 | KE 10650670 119 | EV 10574011 120 | SP 10570626 121 | IM 10544422 122 | OP 10459455 123 | DS 10429887 124 | LD 10245579 125 | UL 10173468 126 | OO 10168856 127 | SU 10031005 128 | IA 10002012 129 | GH 9880399 130 | PL 9812226 131 | EB 9738798 132 | IG 9530574 133 | VI 9380037 134 | IV 9129232 135 | WO 9106647 136 | YO 9088497 137 | RD 9025637 138 | TW 8910254 139 | BA 8867461 140 | AG 8809266 141 | RY 8788539 142 | AB 8775582 143 | LS 8675452 144 | SW 8673234 145 | AP 8553911 146 | FE 8529289 147 | TU 8477495 148 | CI 8446084 149 | FA 8357929 150 | HT 8351551 151 | FR 8339376 152 | AV 8288885 153 | EG 8286463 154 | GO 8188708 155 | BO 8172395 156 | BU 8113271 157 | TY 8008918 158 | MP 7835172 159 | OC 7646952 160 | OD 7610214 161 | EH 7559141 162 | YS 7539621 163 | EY 7528342 164 | RM 7377989 165 | OV 7350014 166 | GT 7347990 167 | YA 7239548 168 | CK 7205091 169 | GI 7103140 170 | RN 7064635 171 | GR 6989963 172 | RC 6974063 173 | BL 6941044 174 | LT 6817273 175 | YT 6714151 176 | OA 6554221 177 | YE 6499305 178 | OB 6212512 179 | DB 6106719 180 | FF 6085519 181 | SF 6073995 182 | RR 5896212 183 | DU 5861311 184 | KI 5814357 185 | UC 5742385 186 | IF 5740414 187 | AF 5702567 188 | DR 5701879 189 | CL 5683204 190 | EX 5649363 191 | SM 5580755 192 | PI 5559210 193 | SB 5553684 194 | CR 5514347 195 | TL 5403137 196 | OI 5336616 197 | RU 5330557 198 | UP 5306948 199 | BY 5232074 200 | TC 5196817 201 | NN 5180899 202 | AK 5137311 203 | SL 4965012 204 | NF 4950333 205 | UE 4927837 206 | DW 4906814 207 | AU 4884168 208 | PP 4873393 209 | UG 4832325 210 | RL 4803246 211 | RG 4645938 212 | BR 4621080 213 | CU 4604045 214 | UA 4589997 215 | DH 4585765 216 | RK 4491400 217 | YI 4461214 218 | LU 4402940 219 | UM 4389720 220 | BI 4356462 221 | NY 4343290 222 | NW 4215967 223 | QU 4169424 224 | OG 4163126 225 | SN 4157990 226 | MB 4121764 227 | VA 4111375 228 | DF 4033878 229 | DD 4001275 230 | MS 3922855 231 | GS 3920675 232 | AW 3918960 233 | NH 3915410 234 | PU 3858148 235 | HR 3843001 236 | SD 3842250 237 | TB 3815459 238 | PT 3812475 239 | NM 3796928 240 | DC 3782481 241 | GU 3768430 242 | TM 3759861 243 | MU 3755834 244 | NU 3732602 245 | MM 3730508 246 | NL 3692985 247 | EU 3674130 248 | WN 3649615 249 | NB 3602692 250 | RP 3588188 251 | DM 3544905 252 | SR 3513808 253 | UD 3499535 254 | UI 3481482 255 | RF 3436232 256 | OK 3397570 257 | YW 3379064 258 | TF 3368452 259 | IP 3348621 260 | RW 3348005 261 | RB 3346212 262 | OH 3254659 263 | KS 3227333 264 | DP 3145043 265 | FU 3138900 266 | YC 3128053 267 | TP 3070427 268 | MT 3055946 269 | DL 3050945 270 | NK 3043200 271 | CC 3026492 272 | UB 2990868 273 | RH 2968706 274 | NP 2968126 275 | JU 2924815 276 | FL 2890839 277 | DN 2840522 278 | KA 2833038 279 | PH 2825344 280 | HU 2771830 281 | JO 2721345 282 | LF 2702522 283 | YB 2696786 284 | RV 2692445 285 | OE 2616308 286 | IB 2598444 287 | IK 2585124 288 | YP 2581863 289 | GL 2576787 290 | LP 2543957 291 | YM 2516273 292 | LB 2463693 293 | HS 2462026 294 | DG 2442139 295 | GN 2426429 296 | EK 2411639 297 | NR 2393580 298 | PS 2377036 299 | TD 2346516 300 | LC 2328063 301 | SK 2321888 302 | YF 2305244 303 | YH 2291273 304 | VO 2253292 305 | AH 2225270 306 | DY 2218040 307 | LM 2216514 308 | SY 2214270 309 | NV 2194534 310 | YD 2122337 311 | FS 2047416 312 | SG 2043770 313 | YR 2021939 314 | YL 2013939 315 | WS 1988727 316 | MY 1949129 317 | OY 1932892 318 | KN 1903836 319 | IZ 1865802 320 | XP 1840696 321 | LW 1836811 322 | TN 1782119 323 | KO 1758001 324 | AA 1721143 325 | JA 1712763 326 | ZE 1709871 327 | FC 1570791 328 | GW 1567991 329 | TG 1530045 330 | XT 1509969 331 | FH 1507604 332 | LR 1505092 333 | JE 1487348 334 | YN 1485655 335 | GG 1468286 336 | GF 1465290 337 | EQ 1461436 338 | HY 1446451 339 | KT 1443985 340 | HC 1441057 341 | BS 1409672 342 | HW 1403223 343 | HN 1383958 344 | CS 1381608 345 | HM 1353001 346 | NJ 1342735 347 | HH 1329998 348 | WT 1301293 349 | GC 1299541 350 | LH 1274048 351 | EJ 1256993 352 | FM 1251312 353 | DV 1238565 354 | LV 1238287 355 | WR 1226755 356 | GP 1215204 357 | FP 1199845 358 | GB 1184377 359 | GM 1178511 360 | HL 1169468 361 | LK 1164186 362 | CY 1145316 363 | MC 1101727 364 | YG 1049082 365 | XI 1024736 366 | HB 1014004 367 | FW 1005903 368 | GY 979804 369 | HP 978649 370 | MW 937621 371 | PM 931225 372 | ZA 929119 373 | LG 926472 374 | IW 922059 375 | XA 904148 376 | FB 888155 377 | SV 882083 378 | GD 879792 379 | IX 879360 380 | AJ 870262 381 | KL 846309 382 | HF 834284 383 | HD 828755 384 | AE 815963 385 | SQ 800346 386 | DJ 799366 387 | FY 789961 388 | AZ 768359 389 | LN 752316 390 | AO 749566 391 | FD 748027 392 | KW 719633 393 | MF 715087 394 | MH 710864 395 | SJ 704442 396 | UF 701892 397 | TV 698150 398 | XC 697995 399 | YU 695512 400 | BB 689158 401 | WW 674610 402 | OJ 661082 403 | AX 660826 404 | MR 660619 405 | WL 657782 406 | XE 653947 407 | KH 650095 408 | OX 650078 409 | UO 649906 410 | ZI 644035 411 | FG 637758 412 | IH 610683 413 | TK 610333 414 | II 607124 415 | IU 576683 416 | TJ 559473 417 | MN 558397 418 | WY 553647 419 | KY 553296 420 | KF 537342 421 | FN 534362 422 | UY 531960 423 | PW 530411 424 | DK 525744 425 | RJ 518157 426 | UK 514873 427 | KR 507020 428 | KU 506618 429 | WM 505687 430 | KM 485617 431 | MD 481126 432 | ML 478528 433 | EZ 465466 434 | KB 457860 435 | WC 448394 436 | WD 432646 437 | HG 429607 438 | BT 428276 439 | ZO 424016 440 | KC 420017 441 | PF 418168 442 | YV 411487 443 | PC 400308 444 | PY 396147 445 | WB 394820 446 | YK 391953 447 | CP 382923 448 | YJ 378679 449 | KP 375653 450 | PB 369336 451 | CD 358435 452 | JI 357577 453 | UW 352732 454 | UH 339341 455 | WF 336213 456 | YY 332973 457 | WP 321746 458 | BC 320380 459 | AQ 315068 460 | CB 298053 461 | IQ 291635 462 | CM 285942 463 | MG 285133 464 | DQ 283314 465 | BJ 282608 466 | TZ 280007 467 | KD 277982 468 | PD 273162 469 | FJ 269865 470 | CF 267630 471 | NZ 266461 472 | CW 257253 473 | FV 244685 474 | VY 233082 475 | FK 228905 476 | OZ 228556 477 | ZZ 221275 478 | IJ 219128 479 | LJ 218362 480 | NQ 217422 481 | UV 212051 482 | XO 211173 483 | PG 211133 484 | HK 210385 485 | KG 209266 486 | VS 204093 487 | HV 197539 488 | BM 191807 489 | HJ 189906 490 | CN 188046 491 | GV 186777 492 | CG 181590 493 | WU 180884 494 | GJ 176947 495 | XH 166599 496 | GK 163830 497 | TQ 159111 498 | CQ 157546 499 | RQ 156933 500 | BH 154489 501 | XS 154347 502 | UZ 153736 503 | WK 148964 504 | XU 147533 505 | UX 144814 506 | BD 141752 507 | BW 140189 508 | WG 139890 509 | MV 136314 510 | MJ 134263 511 | PN 131645 512 | XM 127492 513 | OQ 122677 514 | BV 120081 515 | XW 119322 516 | KK 118811 517 | BP 115161 518 | ZU 113538 519 | RZ 113432 520 | XF 113031 521 | MK 111041 522 | ZH 107639 523 | BN 106125 524 | ZY 105871 525 | HQ 101241 526 | WJ 99435 527 | IY 98361 528 | DZ 98038 529 | VR 96416 530 | ZS 94993 531 | XY 94329 532 | CV 94224 533 | XB 94041 534 | XR 90046 535 | UJ 88168 536 | YQ 87953 537 | VD 85611 538 | PK 83017 539 | VU 82830 540 | JR 80471 541 | ZL 80039 542 | SZ 79840 543 | YZ 78281 544 | LQ 77148 545 | KJ 76816 546 | BF 75352 547 | NX 74844 548 | QA 73527 549 | QI 73387 550 | KV 73184 551 | ZW 68865 552 | WV 63930 553 | UU 63043 554 | VT 62912 555 | VP 62577 556 | XD 60101 557 | GQ 59750 558 | XL 59585 559 | VC 59024 560 | CZ 57914 561 | LZ 57314 562 | ZT 56955 563 | WZ 52836 564 | SX 50975 565 | ZB 50652 566 | VL 49032 567 | PV 48105 568 | FQ 47504 569 | PJ 47043 570 | ZM 46034 571 | VW 45608 572 | CJ 41526 573 | ZC 41037 574 | BG 40516 575 | JS 39326 576 | XG 39289 577 | RX 38654 578 | HZ 37066 579 | XX 35052 580 | VM 35024 581 | XN 34734 582 | QW 34669 583 | JP 34520 584 | VN 33082 585 | ZD 32906 586 | ZR 32685 587 | FZ 31186 588 | XV 31117 589 | ZP 30389 590 | VH 30203 591 | VB 29192 592 | ZF 28658 593 | GZ 28514 594 | TX 28156 595 | VF 28090 596 | DX 27413 597 | QB 27307 598 | BK 26993 599 | ZG 26369 600 | VG 25585 601 | JC 24770 602 | ZK 24262 603 | ZN 24241 604 | UQ 23386 605 | JM 22338 606 | VV 22329 607 | JD 21903 608 | MQ 21358 609 | JH 20960 610 | QS 20847 611 | JT 20408 612 | JB 19380 613 | FX 19313 614 | PQ 18607 615 | MZ 18271 616 | YX 16945 617 | QT 16914 618 | WQ 16245 619 | JJ 16085 620 | JW 16083 621 | LX 15467 622 | GX 14778 623 | JN 14452 624 | ZV 14339 625 | MX 14250 626 | JK 13967 627 | KQ 13905 628 | XK 13651 629 | JF 12640 630 | QM 12315 631 | QH 12273 632 | JL 12149 633 | JG 12023 634 | VK 11469 635 | VJ 11432 636 | KZ 11192 637 | QC 10667 638 | XJ 10629 639 | PZ 9697 640 | QL 9603 641 | QO 9394 642 | JV 8925 643 | QF 8778 644 | QD 8678 645 | BZ 8132 646 | HX 7526 647 | ZJ 7167 648 | PX 6814 649 | QP 6062 650 | QE 6020 651 | QR 5975 652 | ZQ 5773 653 | JY 5723 654 | BQ 5513 655 | XQ 5416 656 | CX 5300 657 | KX 5083 658 | WX 4678 659 | QY 4557 660 | QV 4212 661 | QN 3808 662 | VX 3192 663 | BX 3021 664 | JZ 2859 665 | VZ 2633 666 | QG 2567 667 | QQ 2499 668 | ZX 2463 669 | XZ 2082 670 | QK 2023 671 | VQ 1488 672 | QJ 1342 673 | QX 765 674 | JX 747 675 | JQ 722 676 | QZ 280 677 | -------------------------------------------------------------------------------- /break_affine.py: -------------------------------------------------------------------------------- 1 | # this code cracks the affine cipher 2 | import re 3 | from ngram_score import ngram_score 4 | fitness = ngram_score('quadgrams.txt') # load our quadgram statistics 5 | from pycipher import Affine 6 | 7 | def break_affine(ctext): 8 | # make sure ciphertext has all spacing/punc removed and is uppercase 9 | ctext = re.sub('[^A-Z]','',ctext.upper()) 10 | # try all posiible keys, return the one with the highest fitness 11 | scores = [] 12 | for i in [1,3,5,7,9,11,15,17,19,21,23,25]: 13 | scores.extend([(fitness.score(Affine(i,j).decipher(ctext)),(i,j)) for j in range(0,25)]) 14 | return max(scores) 15 | 16 | # example ciphertext 17 | ctext = 'QUVNLAUVILZKVZZZVNHIVQUFSFZHWZQLQHQLJSNLAUVI' 18 | max_key = break_affine(ctext) 19 | 20 | print 'best candidate with key (a,b) = '+str(max_key[1])+':' 21 | print Affine(max_key[1][0],max_key[1][1]).decipher(ctext) 22 | -------------------------------------------------------------------------------- /break_autokey.py: -------------------------------------------------------------------------------- 1 | from ngram_score import ngram_score 2 | from pycipher import Autokey 3 | import re 4 | from itertools import permutations 5 | 6 | qgram = ngram_score('quadgrams.txt') 7 | trigram = ngram_score('trigrams.txt') 8 | ctext = 'isjiqymdebvuzrvwhmvysibugzhyinmiyeiklcvioimbninyksmmnjmgalvimlhspjxmgfiraqlhjcpvolqmnyynhpdetoxemgnoxl' 9 | ctext = re.sub(r'[^A-Z]','',ctext.upper()) 10 | 11 | # keep a list of the N best things we have seen, discard anything else 12 | class nbest(object): 13 | def __init__(self,N=1000): 14 | self.store = [] 15 | self.N = N 16 | 17 | def add(self,item): 18 | self.store.append(item) 19 | self.store.sort(reverse=True) 20 | self.store = self.store[:self.N] 21 | 22 | def __getitem__(self,k): 23 | return self.store[k] 24 | 25 | def __len__(self): 26 | return len(self.store) 27 | 28 | #init 29 | N=100 30 | for KLEN in range(3,20): 31 | rec = nbest(N) 32 | 33 | for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ',3): 34 | key = ''.join(i) + 'A'*(KLEN-len(i)) 35 | pt = Autokey(key).decipher(ctext) 36 | score = 0 37 | for j in range(0,len(ctext),KLEN): 38 | score += trigram.score(pt[j:j+3]) 39 | rec.add((score,''.join(i),pt[:30])) 40 | 41 | next_rec = nbest(N) 42 | for i in range(0,KLEN-3): 43 | for k in xrange(N): 44 | for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': 45 | key = rec[k][1] + c 46 | fullkey = key + 'A'*(KLEN-len(key)) 47 | pt = Autokey(fullkey).decipher(ctext) 48 | score = 0 49 | for j in range(0,len(ctext),KLEN): 50 | score += qgram.score(pt[j:j+len(key)]) 51 | next_rec.add((score,key,pt[:30])) 52 | rec = next_rec 53 | next_rec = nbest(N) 54 | bestkey = rec[0][1] 55 | pt = Autokey(bestkey).decipher(ctext) 56 | bestscore = qgram.score(pt) 57 | for i in range(N): 58 | pt = Autokey(rec[i][1]).decipher(ctext) 59 | score = qgram.score(pt) 60 | if score > bestscore: 61 | bestkey = rec[i][1] 62 | bestscore = score 63 | print bestscore,'autokey, klen',KLEN,':"'+bestkey+'",',Autokey(bestkey).decipher(ctext) 64 | 65 | 66 | -------------------------------------------------------------------------------- /break_caesar.py: -------------------------------------------------------------------------------- 1 | import re 2 | from ngram_score import ngram_score 3 | fitness = ngram_score('quadgrams.txt') # load our quadgram statistics 4 | from pycipher import Caesar 5 | 6 | def break_caesar(ctext): 7 | # make sure ciphertext has all spacing/punc removed and is uppercase 8 | ctext = re.sub('[^A-Z]','',ctext.upper()) 9 | # try all possible keys, return the one with the highest fitness 10 | scores = [] 11 | for i in range(26): 12 | scores.append((fitness.score(Caesar(i).decipher(ctext)),i)) 13 | return max(scores) 14 | 15 | # example ciphertext 16 | ctext = 'YMJHFJXFWHNUMJWNXTSJTKYMJJFWQNJXYPSTBSFSIXNRUQJXYHNUMJWX' 17 | max_key = break_caesar(ctext) 18 | 19 | print 'best candidate with key (a,b) = '+str(max_key[1])+':' 20 | print Caesar(max_key[1]).decipher(ctext) 21 | -------------------------------------------------------------------------------- /break_fracmorse.py: -------------------------------------------------------------------------------- 1 | # usage: python break_fracmorse.py 'CIPHERTEXTMESSAGE' 2 | # ideally you'll want 200 or so characters to reliably decrypt, shorter will often work but not as reliably. 3 | 4 | import random 5 | from ngram_score import ngram_score 6 | import re 7 | import sys 8 | from pycipher import FracMorse 9 | 10 | #ctext = FracMorse('PQRSTUVWXYZABCDEFGHIJKLMNO').encipher("He has not been returned to sea because of his affection for caregivers.The waitress pointed to the lunch menu, but the oldest living ex-major leaguer had no use for it") 11 | fitness = ngram_score('fmorse_quadgrams.txt') # load our quadgram model 12 | 13 | # helper function, converts an integer 0-25 into a character 14 | def i2a(i): return 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[i%26] 15 | 16 | # decipher a piece of text using the substitution cipher and a certain key 17 | def sub_decipher(text,key): 18 | invkey = [i2a(key.index(i)) for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'] 19 | ret = '' 20 | for c in text: 21 | if c.isalpha(): ret += invkey[ord(c.upper())-ord('A')] 22 | else: ret += c 23 | return ret 24 | 25 | # This code is just the simple substitution cipher cracking code, it works perfectly for fractionated morse as 26 | # long as you use fractioned morse statistics instead of english statistics. 27 | def break_simplesub(ctext,startkey=None): 28 | ''' perform hill-climbing with a single start. This function may have to be called many times 29 | to break a substitution cipher. ''' 30 | # make sure ciphertext has all spacing/punc removed and is uppercase 31 | ctext = re.sub('[^A-Z]','',ctext.upper()) 32 | parentkey,parentscore = startkey or list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'),-99e99 33 | if not startkey: random.shuffle(parentkey) 34 | parentscore = fitness.score(sub_decipher(ctext,parentkey)) 35 | count = 0 36 | while count < 1000: 37 | a = random.randint(0,25) 38 | b = random.randint(0,25) 39 | child = parentkey[:] 40 | # swap two characters in the child 41 | child[a],child[b] = child[b],child[a] 42 | score = fitness.score(sub_decipher(ctext,child)) 43 | # if the child was better, replace the parent with it 44 | if score > parentscore: 45 | parentscore, parentkey = score, child[:] 46 | count = 0 # reset the counter 47 | count += 1 48 | return parentscore, parentkey 49 | 50 | ctext = sys.argv[1] 51 | ctext = re.sub(r'[^A-Z ]','',ctext.upper()) 52 | maxscore, maxkey = break_simplesub(ctext,list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')) 53 | print str(maxscore),'FractionatedMorse key:',''.join(maxkey), 'decrypt: ',FracMorse(maxkey).decipher(ctext) 54 | for i in range(1000): 55 | score, key = break_simplesub(ctext) 56 | if score > maxscore: 57 | maxscore,maxkey = score,key[:] 58 | print str(maxscore),'FractionatedMorse key:',''.join(maxkey), 'decrypt: ',FracMorse(maxkey).decipher(ctext) 59 | 60 | -------------------------------------------------------------------------------- /break_hill3.py: -------------------------------------------------------------------------------- 1 | # this code breaks 3by3 hill ciphers fairly efficiently. 2 | 3 | from itertools import product 4 | from ngram_score import ngram_score 5 | L2I = dict(zip("ABCDEFGHIJKLMNOPQRSTUVWXYZ",range(26))) 6 | I2L = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 7 | import sys 8 | preamble = sys.argv[1] 9 | ctext = sys.argv[2] 10 | 11 | def hill3decipher(ctext,key): 12 | if len(ctext)%3==1: 13 | ctext = ctext + 'XX' 14 | elif len(ctext)%3==2: 15 | ctext = ctext + 'X' 16 | ptext = "" 17 | for i in range(0,len(ctext),3): 18 | ptext += I2L[(key[0]*L2I[ctext[i]] + key[1]*L2I[ctext[i+1]] + key[2]*L2I[ctext[i+2]])%26] + \ 19 | I2L[(key[3]*L2I[ctext[i]] + key[4]*L2I[ctext[i+1]] + key[5]*L2I[ctext[i+2]])%26] + \ 20 | I2L[(key[6]*L2I[ctext[i]] + key[7]*L2I[ctext[i+1]] + key[8]*L2I[ctext[i+2]])%26] 21 | return ptext 22 | 23 | # keep a list of the N best things we have seen, discard anything else 24 | # the list may be greater than N, and unsorted. Call finalise() before accessing 25 | # to guarantee correct length and sorted order. 26 | class nbest(object): 27 | def __init__(self,N=1000): 28 | self.store = [] 29 | self.N = N 30 | 31 | def add(self,item): 32 | self.store.append(item) 33 | if len(self.store): self.finalise() 34 | 35 | def finalise(self): 36 | self.store.sort(reverse=True) 37 | self.store = self.store[:self.N] 38 | 39 | def __getitem__(self,k): 40 | return self.store[k] 41 | 42 | def __len__(self): 43 | return len(self.store) 44 | 45 | 46 | import re 47 | # this is the second feynman cipher, no known decryption 48 | ctext ='XUKEXWSLZJUAXUNKIGWFSOZRAWURORKXAOSLHROBXBTKCMUWDVPTFBLMKEFVWMUXTVTWUIDDJVZKBRMCWOIWYDXMLUFPVSHAGSVWUFWORCWUIDUJCNVTTBERTUNOJUZHVTWKORSVRZSVVFSQXOCMUWPYTRLGBMCYPOJCLRIYTVFCCMUWUFPOXCNMCIWMSKPXEDLYIQKDJWIWCJUMVRCJUMVRKXWURKPSEEIWZVXULEIOETOOFWKBIUXPXUGOWLFPWUSCH' 49 | ctext = re.sub('[^A-Z]','',ctext.upper()) 50 | 51 | mono = ngram_score('monograms.txt') 52 | bi = ngram_score('bigrams.txt') 53 | quad = ngram_score('quadgrams.txt') 54 | 55 | N = 20 56 | rec = nbest(N) 57 | for seq in product(range(26),repeat=3): 58 | if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0: 59 | continue 60 | if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0: 61 | continue 62 | seq2 = (seq[0],seq[1],seq[2],1,1,1,1,1,1) 63 | txt = hill3decipher(ctext,seq2) 64 | score = 0 65 | for i in range(0,len(txt),3): 66 | score += mono.score(txt[i]) 67 | rec.add((score,seq2)) 68 | 69 | rec.finalise() 70 | rec2 = nbest(N) 71 | for j in range(N): 72 | for seq in product(range(26),repeat=3): 73 | if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0: 74 | continue 75 | if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0: 76 | continue 77 | seq2 = (rec[j][1][0],rec[j][1][1],rec[j][1][2],seq[0],seq[1],seq[2],1,1,1) 78 | txt = hill3decipher(ctext,seq2) 79 | score = 0 80 | for i in range(0,len(txt),3): 81 | score += bi.score(txt[i:i+2]) 82 | rec2.add((score,seq2)) 83 | 84 | rec2.finalise() 85 | rec3 = nbest(N) 86 | for j in range(N): 87 | for seq in product(range(26),repeat=3): 88 | seq2 = (rec2[j][1][0],rec2[j][1][1],rec2[j][1][2],rec2[j][1][3],rec2[j][1][4],rec2[j][1][5],seq[0],seq[1],seq[2]) 89 | da = (seq2[0]*seq2[4]*seq2[8] + seq2[1]*seq2[5]*seq2[6] + seq2[2]*seq2[3]*seq2[7]) - (seq2[2]*seq2[4]*seq2[6] + seq2[1]*seq2[3]*seq2[8] + seq2[0]*seq2[5]*seq2[7]) 90 | if da % 2 == 0 or da % 13 ==0: 91 | continue 92 | txt = hill3decipher(ctext,seq2) 93 | score = quad.score(txt) 94 | rec3.add((score,seq2)) 95 | 96 | rec3.finalise() 97 | for j in range(10): 98 | print rec3[j],preamble, hill3decipher(ctext,rec3[j][1]) 99 | sys.stdout.flush() 100 | 101 | -------------------------------------------------------------------------------- /break_simplesub.py: -------------------------------------------------------------------------------- 1 | from pycipher import SimpleSubstitution as SimpleSub 2 | import random 3 | import re 4 | from ngram_score import ngram_score 5 | fitness = ngram_score('quadgrams.txt') # load our quadgram statistics 6 | 7 | ctext='pmpafxaikkitprdsikcplifhwceigixkirradfeirdgkipgigudkcekiigpwrpucikceiginasikwduearrxiiqepcceindgmieinpwdfprduppcedoikiqiasafmfddfipfgmdafmfdteiki' 8 | ctext = re.sub('[^A-Z]','',ctext.upper()) 9 | 10 | maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') 11 | maxscore = -99e9 12 | parentscore,parentkey = maxscore,maxkey[:] 13 | print "Substitution Cipher solver, you may have to wait several iterations" 14 | print "for the correct result. Press ctrl+c to exit program." 15 | # keep going until we are killed by the user 16 | i = 0 17 | while 1: 18 | i = i+1 19 | random.shuffle(parentkey) 20 | deciphered = SimpleSub(parentkey).decipher(ctext) 21 | parentscore = fitness.score(deciphered) 22 | count = 0 23 | while count < 1000: 24 | a = random.randint(0,25) 25 | b = random.randint(0,25) 26 | child = parentkey[:] 27 | # swap two characters in the child 28 | child[a],child[b] = child[b],child[a] 29 | deciphered = SimpleSub(child).decipher(ctext) 30 | score = fitness.score(deciphered) 31 | # if the child was better, replace the parent with it 32 | if score > parentscore: 33 | parentscore = score 34 | parentkey = child[:] 35 | count = 0 36 | count = count+1 37 | # keep track of best score seen so far 38 | if parentscore>maxscore: 39 | maxscore,maxkey = parentscore,parentkey[:] 40 | print '\nbest score so far:',maxscore,'on iteration',i 41 | ss = SimpleSub(maxkey) 42 | print ' best key: '+''.join(maxkey) 43 | print ' plaintext: '+ss.decipher(ctext) 44 | 45 | 46 | -------------------------------------------------------------------------------- /break_vigenere.py: -------------------------------------------------------------------------------- 1 | from ngram_score import ngram_score 2 | from pycipher import Vigenere 3 | import re 4 | from itertools import permutations 5 | 6 | qgram = ngram_score('quadgrams.txt') 7 | trigram = ngram_score('trigrams.txt') 8 | ctext = 'kiqpbkxspshwehospzqhoinlgapp' 9 | ctext = re.sub(r'[^A-Z]','',ctext.upper()) 10 | 11 | # keep a list of the N best things we have seen, discard anything else 12 | class nbest(object): 13 | def __init__(self,N=1000): 14 | self.store = [] 15 | self.N = N 16 | 17 | def add(self,item): 18 | self.store.append(item) 19 | self.store.sort(reverse=True) 20 | self.store = self.store[:self.N] 21 | 22 | def __getitem__(self,k): 23 | return self.store[k] 24 | 25 | def __len__(self): 26 | return len(self.store) 27 | 28 | #init 29 | N=100 30 | for KLEN in range(3,20): 31 | rec = nbest(N) 32 | 33 | for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ',3): 34 | key = ''.join(i) + 'A'*(KLEN-len(i)) 35 | pt = Vigenere(key).decipher(ctext) 36 | score = 0 37 | for j in range(0,len(ctext),KLEN): 38 | score += trigram.score(pt[j:j+3]) 39 | rec.add((score,''.join(i),pt[:30])) 40 | 41 | next_rec = nbest(N) 42 | for i in range(0,KLEN-3): 43 | for k in xrange(N): 44 | for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': 45 | key = rec[k][1] + c 46 | fullkey = key + 'A'*(KLEN-len(key)) 47 | pt = Vigenere(fullkey).decipher(ctext) 48 | score = 0 49 | for j in range(0,len(ctext),KLEN): 50 | score += qgram.score(pt[j:j+len(key)]) 51 | next_rec.add((score,key,pt[:30])) 52 | rec = next_rec 53 | next_rec = nbest(N) 54 | bestkey = rec[0][1] 55 | pt = Vigenere(bestkey).decipher(ctext) 56 | bestscore = qgram.score(pt) 57 | for i in range(N): 58 | pt = Vigenere(rec[i][1]).decipher(ctext) 59 | score = qgram.score(pt) 60 | if score > bestscore: 61 | bestkey = rec[i][1] 62 | bestscore = score 63 | print bestscore,'Vigenere, klen',KLEN,':"'+bestkey+'",',Vigenere(bestkey).decipher(ctext) 64 | 65 | 66 | -------------------------------------------------------------------------------- /break_xhill3.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from ngram_score import ngram_score 3 | L2I = dict(zip("ABCDEFGHIJKLMNOPQRSTUVWXYZ",range(26))) 4 | I2L = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 5 | import sys 6 | ctext = 'hwduyfsfqdxnx nx ymj fwy tk gwjfpnsl htijx fsi hnumjwx. bmjs fyyjruynsl yt hwfhp f mnqq hnumjw, kwjvzjshd fsfqdxnx bnqq gj uwfhynhfqqd zxjqjxx' 7 | 8 | def hill3decipher(ctext,key,key2): 9 | if len(ctext)%3==1: 10 | ctext = ctext + 'XX' 11 | elif len(ctext)%3==2: 12 | ctext = ctext + 'X' 13 | ptext = "" 14 | for i in range(0,len(ctext),3): 15 | ptext += I2L[(key[0]*L2I[ctext[i]] + key[1]*L2I[ctext[i+1]] + key[2]*L2I[ctext[i+2]] + key2[0])%26] 16 | ptext += I2L[(key[3]*L2I[ctext[i]] + key[4]*L2I[ctext[i+1]] + key[5]*L2I[ctext[i+2]] + key2[1])%26] 17 | ptext += I2L[(key[6]*L2I[ctext[i]] + key[7]*L2I[ctext[i+1]] + key[8]*L2I[ctext[i+2]] + key2[2])%26] 18 | return ptext 19 | 20 | # keep a list of the N best things we have seen, discard anything else 21 | # the list may be greater than N, and unsorted. Call finalise() before accessing 22 | # to guarantee correct length and sorted order. 23 | class nbest(object): 24 | def __init__(self,N=1000): 25 | self.store = [] 26 | self.N = N 27 | 28 | def add(self,item): 29 | self.store.append(item) 30 | if len(self.store)>2*N: self.finalise() 31 | 32 | def finalise(self): 33 | self.store.sort(reverse=True) 34 | self.store = self.store[:self.N] 35 | 36 | def __getitem__(self,k): 37 | return self.store[k] 38 | 39 | def __len__(self): 40 | return len(self.store) 41 | 42 | 43 | import re 44 | #ctext ='XUKEXWSLZJUAXUNKIGWFSOZRAWURORKXAOSLHROBXBTKCMUWDVPTFBLMKEFVWMUXTVTWUIDDJVZKBRMCWOIWYDXMLUFPVSHAGSVWUFWORCWUIDUJCNVTTBERTUNOJUZHVTWKORSVRZSVVFSQXOCMUWPYTRLGBMCYPOJCLRIYTVFCCMUWUFPOXCNMCIWMSKPXEDLYIQKDJWIWCJUMVRCJUMVRKXWURKPSEEIWZVXULEIOETOOFWKBIUXPXUGOWLFPWUSCH' 45 | ctext = re.sub('[^A-Z]','',ctext.upper()) 46 | 47 | mono = ngram_score('monograms.txt') 48 | bi = ngram_score('bigrams.txt') 49 | quad = ngram_score('quadgrams.txt') 50 | 51 | N = 100 52 | rec = nbest(N) 53 | for seq in product(range(26),repeat=4): 54 | if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0: 55 | continue 56 | if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0: 57 | continue 58 | seq2 = (seq[0],seq[1],seq[2],1,1,1,1,1,1) 59 | txt = hill3decipher(ctext,seq2,(seq[3],0,0)) 60 | score = 0 61 | for i in range(0,len(txt),3): 62 | score += mono.score(txt[i]) 63 | rec.add((score,seq2,(seq[3],0,0))) 64 | rec.finalise() 65 | print 'stage 1 complete...' 66 | rec2 = nbest(N) 67 | for j in range(N): 68 | print j, 69 | sys.stdout.flush() 70 | for seq in product(range(26),repeat=4): 71 | if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0: 72 | continue 73 | if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0: 74 | continue 75 | seq2 = (rec[j][1][0],rec[j][1][1],rec[j][1][2],seq[0],seq[1],seq[2],1,1,1) 76 | txt = hill3decipher(ctext,seq2,(rec[j][2][0],seq[3],0)) 77 | score = 0 78 | for i in range(0,len(txt),3): 79 | score += bi.score(txt[i:i+2]) 80 | rec2.add((score,seq2,(rec[j][2][0],seq[3],0))) 81 | print 'stage 2 complete.' 82 | rec2.finalise() 83 | rec3 = nbest(N) 84 | for j in range(N): 85 | print j, 86 | sys.stdout.flush() 87 | for seq in product(range(26),repeat=4): 88 | seq2 = (rec2[j][1][0],rec2[j][1][1],rec2[j][1][2],rec2[j][1][3],rec2[j][1][4],rec2[j][1][5],seq[0],seq[1],seq[2]) 89 | da = (seq2[0]*seq2[4]*seq2[8] + seq2[1]*seq2[5]*seq2[6] + seq2[2]*seq2[3]*seq2[7]) - (seq2[2]*seq2[4]*seq2[6] + seq2[1]*seq2[3]*seq2[8] + seq2[0]*seq2[5]*seq2[7]) 90 | if da % 2 != 0 and da % 13 !=0: 91 | txt = hill3decipher(ctext,seq2,(rec2[j][2][0],rec2[j][2][1],seq[3])) 92 | score = quad.score(txt) 93 | rec3.add((score,seq2,(rec2[j][2][0],rec2[j][2][1],seq[3]))) 94 | # also try other permutation 95 | seq2 = (seq[0],seq[1],seq[2],rec2[j][1][0],rec2[j][1][1],rec2[j][1][2],rec2[j][1][3],rec2[j][1][4],rec2[j][1][5]) 96 | da = (seq2[0]*seq2[4]*seq2[8] + seq2[1]*seq2[5]*seq2[6] + seq2[2]*seq2[3]*seq2[7]) - (seq2[2]*seq2[4]*seq2[6] + seq2[1]*seq2[3]*seq2[8] + seq2[0]*seq2[5]*seq2[7]) 97 | if da % 2 != 0 and da % 13 !=0: 98 | txt = hill3decipher(ctext,seq2,(seq[3],rec2[j][2][0],rec2[j][2][1])) 99 | score = quad.score(txt) 100 | rec3.add((score,seq2,(seq[3],rec2[j][2][0],rec2[j][2][1]))) 101 | 102 | rec3.finalise() 103 | print 'stage 3 complete.' 104 | 105 | for j in range(10): 106 | print rec3[j], hill3decipher(ctext,rec3[j][1],rec3[j][2]) 107 | 108 | 109 | -------------------------------------------------------------------------------- /licence_MIT.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2015 James Lyons - python_cryptanalysis 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 5 | associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial 11 | portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE 15 | AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | -------------------------------------------------------------------------------- /lorenz.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define BAUDOT_ERROR 0xFF; 5 | 6 | #define LIMITATION_CHI_2_BACK 1 7 | #define LIMITATION_NONE 0 8 | 9 | char ascii2baudot(char a); 10 | char baudot2ascii(char b, int LTRS); 11 | char get_bit(char b, int pos); 12 | char set_bit(char b, int pos, int bit); 13 | void advance_kpos(int kpos[5]); 14 | void advance_spos(int spos[5]); 15 | void advance_m1pos(int *m1pos); 16 | void advance_m2pos(int *m2pos); 17 | 18 | // BREAM SETTINGS for LORENZ CIPHER 19 | // . . . x x x x . . . . x x . . . . x . x x . . x . . x x . x . x x . x . x x x x . 20 | char bk1[41] = {0,0,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0}; 21 | // x x . . x x x . x x . . . x . x . x x . . . x . . . . x x x . 22 | char bk2[31] = {1,1,0,0,1,1,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0}; 23 | // . . x x x . x x . . x . . . . x x x . . x x . x x . . x x 24 | char bk3[29] = {0,0,1,1,1,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,1,0,1,1,0,0,1,1}; 25 | // . . x x . . x . x x . . x . . x x . . x . . x x x x 26 | char bk4[26] = {0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,1,0,0,1,1,1,1}; 27 | // . x . . . x . x x . . x . . . x x x . x x x . 28 | char bk5[23] = {0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,1,1,1,0,1,1,1,0}; 29 | // x x x . x . x x . . x x . . x x . . . x x x x . x . x x . x x . . . x x . . . . x x x x . x x . . x x . . . x x . . . . x 30 | char bm1[61] = {1,1,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,1,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1}; 31 | // x . x x x . x . x . x . x . . x . x . x x x . x . x . x . x . x . x . x . 32 | char bm2[37] = {1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}; 33 | // . . x . x . x . x . x . . x . . x . x x . x x . x . x . . x x . x x x . . x x x . . . 34 | char bs1[43] = {0,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,0}; 35 | // . . x . x x . x . x . x . x . x . x x . . x x . x . . x . x x x x . . . . . x x x . . x . x x 36 | char bs2[47] = {0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1}; 37 | // x . x . x . x . x . x . x . . x . . x x . x . x . x x x x . . . . x x x . . . x x x . x x . . x . . x 38 | char bs3[51] = {1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,1,0,0,1}; 39 | // x . x . . x x . x . x . x . x . x . x x . x . . . . x x . . x x . . x x . x x x x x . x . . x . . . . x . 40 | char bs4[53] = {1,0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,0,1,0}; 41 | // . x . x . x . x . x x . . . x . x . . x x x . x x x x . x x . x . . . . x . . . x . . x x . x x . . x x . . x . x . x 42 | char bs5[59] = {0,1,0,1,0,1,0,1,0,1,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,1,1,0,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1}; 43 | 44 | 45 | int main(int argc, char *argv[]) 46 | { 47 | // the following are the start positions for each of the wheels 48 | int kpos[5]={0,6,7,3,4}; 49 | int m1pos=20; 50 | int m2pos=10; 51 | int spos[5]={8,26,16,21,24}; 52 | 53 | int TOTAL_MOTOR, BASIC_MOTOR; 54 | int LIMITATION; 55 | 56 | char message[] = "qzahlen<>"; 57 | char ch,bp,bc; // bp = plaintext baudot code, bc = ciphertext 58 | char bit; 59 | 60 | int i,j; 61 | // for each letter in message 62 | for(i=0; i>= pos; 108 | b &= 0x01; 109 | return b; 110 | } 111 | 112 | // sets position 'pos' of byte 'b' to value 'bit' 113 | char set_bit(char b, int pos, int bit){ 114 | int mask = ~(1 << pos); 115 | int ret = 0; 116 | bit = bit ? 1 : 0; // ensure b is either 0 or 1 117 | ret = b & mask; 118 | ret |= bit << pos; 119 | return ret; 120 | } 121 | 122 | /* advance the k-wheels 1 position */ 123 | void advance_kpos(int kpos[5]){ 124 | kpos[0] = (kpos[0] + 1)%41; 125 | kpos[1] = (kpos[1] + 1)%31; 126 | kpos[2] = (kpos[2] + 1)%29; 127 | kpos[3] = (kpos[3] + 1)%26; 128 | kpos[4] = (kpos[4] + 1)%23; 129 | } 130 | 131 | void advance_m1pos(int *m1pos){ 132 | *m1pos = (*m1pos + 1)%61; 133 | } 134 | 135 | void advance_m2pos(int *m2pos){ 136 | *m2pos = (*m2pos + 1)%37; 137 | } 138 | 139 | /* advance the k-wheels 1 position */ 140 | void advance_spos(int spos[5]){ 141 | spos[0] = (spos[0] + 1)%43; 142 | spos[1] = (spos[1] + 1)%47; 143 | spos[2] = (spos[2] + 1)%51; 144 | spos[3] = (spos[3] + 1)%53; 145 | spos[4] = (spos[4] + 1)%59; 146 | } 147 | 148 | 149 | /************************************************************************* 150 | converts a baudot code to ascii. if LTRS is 1, returns letters, else assumes FIGS 151 | **************************************************************************/ 152 | char baudot2ascii(char b, int LTRS){ 153 | switch(b){ 154 | case 0x03: return LTRS ? 'A' : '-'; 155 | case 0x19: return LTRS ? 'B' : '?'; 156 | case 0x0E: return LTRS ? 'C' : ':'; 157 | case 0x09: return LTRS ? 'D' : '$'; 158 | case 0x01: return LTRS ? 'E' : '3'; 159 | case 0x0D: return LTRS ? 'F' : '!'; 160 | case 0x1A: return LTRS ? 'G' : '&'; 161 | case 0x14: return LTRS ? 'H' : '#'; 162 | case 0x06: return LTRS ? 'I' : '8'; 163 | case 0x0B: return LTRS ? 'J' : 'b'; 164 | case 0x0F: return LTRS ? 'K' : '('; 165 | case 0x12: return LTRS ? 'L' : ')'; 166 | case 0x1C: return LTRS ? 'M' : '.'; 167 | case 0x0C: return LTRS ? 'N' : ','; 168 | case 0x18: return LTRS ? 'O' : '9'; 169 | case 0x16: return LTRS ? 'P' : '0'; 170 | case 0x17: return LTRS ? 'Q' : '1'; 171 | case 0x0A: return LTRS ? 'R' : '4'; 172 | case 0x05: return LTRS ? 'S' : '\''; 173 | case 0x10: return LTRS ? 'T' : '5'; 174 | case 0x07: return LTRS ? 'U' : '7'; 175 | case 0x1E: return LTRS ? 'V' : ';'; 176 | case 0x13: return LTRS ? 'W' : '2'; 177 | case 0x1D: return LTRS ? 'X' : '/'; 178 | case 0x15: return LTRS ? 'Y' : '6'; 179 | case 0x11: return LTRS ? 'Z' : '"'; 180 | case 0x08: return 'n'; // '3' in comments are the characters as used by british 181 | case 0x02: return 'r'; // '4' 182 | case 0x04: return '_'; // ' ' 183 | case 0x1F: return '>'; // LTRS // '+' 184 | case 0x1B: return '<'; // FIGS // '-' 185 | case 0x00: return 'i'; // '/' 186 | default: return BAUDOT_ERROR; 187 | } 188 | } 189 | 190 | /* this table only deals with encoded ascii, and doesnt worry about FIGS */ 191 | char ascii2baudot(char a){ 192 | switch(a){ 193 | case 'A': return 0x03; 194 | case 'B': return 0x19; 195 | case 'C': return 0x0E; 196 | case 'D': return 0x09; 197 | case 'E': return 0x01; 198 | case 'F': return 0x0D; 199 | case 'G': return 0x1A; 200 | case 'H': return 0x14; 201 | case 'I': return 0x06; 202 | case 'J': return 0x0B; 203 | case 'K': return 0x0F; 204 | case 'L': return 0x12; 205 | case 'M': return 0x1C; 206 | case 'N': return 0x0C; 207 | case 'O': return 0x18; 208 | case 'P': return 0x16; 209 | case 'Q': return 0x17; 210 | case 'R': return 0x0A; 211 | case 'S': return 0x05; 212 | case 'T': return 0x10; 213 | case 'U': return 0x07; 214 | case 'V': return 0x1E; 215 | case 'W': return 0x13; 216 | case 'X': return 0x1D; 217 | case 'Y': return 0x15; 218 | case 'Z': return 0x11; 219 | case 'n': return 0x08; // represents \n 220 | case 'r': return 0x02; // represents \r 221 | case '_': return 0x04; 222 | case '>': return 0x1F; // LTRS 223 | case '<': return 0x1B; // FIGS 224 | case 'i': return 0; // NULL 225 | default: return 0x04; // just a placeholder 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /mat_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import shape 3 | from util import relu 4 | import scipy.io as sio 5 | from math import log 6 | from sortedcontainers import SortedList 7 | from copy import deepcopy 8 | 9 | def a2i(ch): 10 | arr = {'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'J':9,'K':10, 11 | 'L':11,'M':12,'N':13,'O':14,'P':15,'Q':16,'R':17,'S':18,'T':19,'U':20, 12 | 'V':21,'W':22,'X':23,'Y':24,'Z':25, 13 | 'a':0,'b':1,'c':2,'d':3,'e':4,'f':5,'g':6,'h':7,'i':8,'j':9,'k':10, 14 | 'l':11,'m':12,'n':13,'o':14,'p':15,'q':16,'r':17,'s':18,'t':19,'u':20, 15 | 'v':21,'w':22,'x':23,'y':24,'z':25} 16 | return arr[ch] 17 | 18 | def i2a(i): 19 | i = i%26 20 | arr = ('A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z') 21 | return arr[i] 22 | 23 | # assumes uppercase A-Z, converts to 1-hot 24 | def letter2onehot(inputstr): 25 | out = np.zeros((len(inputstr),26)) 26 | for i in range(len(inputstr)): 27 | out[i,a2i(inputstr[i])] = 1. 28 | return out 29 | 30 | def neighbours(str): 31 | ln = np.zeros((26,26)) 32 | rn = np.zeros((26,26)) 33 | onehot = letter2onehot(str) 34 | for i in range(26): 35 | for j in range(1,len(str)-1): 36 | if a2i(str[j]) == i: 37 | rn[i,:] += onehot[j+1,:] 38 | ln[i,:] += onehot[j-1,:] 39 | 40 | eln = np.zeros((1,26)) 41 | for i in range(26): 42 | p = ln[i,:] / sum(ln[i,:] + 1e-10) 43 | eln[0,i] = -np.sum(p * np.log(p+1e-10)) 44 | ern = np.zeros((1,26)) 45 | for i in range(26): 46 | p = rn[i,:] / sum(rn[i,:] + 1e-10) 47 | ern[0,i] = -np.sum(p * np.log(p+1e-10)) 48 | return eln,ern 49 | 50 | from numpy.random import rand 51 | 52 | monocounts = np.array([374061888.,70195826,138416451,169330528,529117365,95422055,91258980,216768975, 53 | 320410057,9613410,35373464,183996130,110504544,313720540,326627740,90376747, 54 | 4550166,277000841,294300210,390965105,117295780,46337161,79843664,8369915,75294515,4975847]) 55 | monodist = monocounts/np.sum(monocounts) 56 | 57 | ''' keep a top N list ''' 58 | class Store: 59 | def __init__(self,N=10): 60 | self.store = SortedList() 61 | self.N = N 62 | 63 | def add(self,item): 64 | self.store.add(item) 65 | if len(self.store) > self.N: self.store.pop(0) 66 | 67 | def pop(self,i): 68 | self.store.pop(i) 69 | 70 | def __len__(self): 71 | return len(self.store) 72 | 73 | def __getitem__(self,i): 74 | return self.store[i] 75 | 76 | def __str__(self): 77 | return str(self.store) 78 | 79 | ''' helper function, print just relevent parts of store ''' 80 | def printstore(store): 81 | for i in range(len(store)): 82 | print store[i][0],store[i][1] 83 | 84 | ''' rnn class for solving substitution ciphers ''' 85 | class rnn: 86 | def __init__(self,matname='C:\\Users\\james\\Documents\\MATLAB\\rnn_char\\savednn800small9B.mat'): 87 | mat_contents = sio.loadmat(matname) 88 | self.W1 = mat_contents['W1'] 89 | self.W2 = mat_contents['W2'] 90 | self.W3 = mat_contents['W3'] 91 | self.WF = mat_contents['WF'] 92 | self.b1 = mat_contents['b1'] 93 | self.b2 = mat_contents['b2'] 94 | self.b3 = mat_contents['b3'] 95 | self.I = np.shape(self.W1)[0] 96 | self.H = np.shape(self.WF)[0] 97 | self.O = np.shape(self.W3)[1] 98 | 99 | ''' do the feedforward prediction of a piece of data''' 100 | def predict(self,input): 101 | L = np.shape(input)[0] 102 | #output = np.zeros((L,self.O)) 103 | 104 | a1 = relu(np.dot(input,self.W1) + self.b1) 105 | a2 = np.zeros((L,self.H)) 106 | a2prev = np.zeros((1,self.H)) 107 | for i in range(L): 108 | a2[i,:] = relu(np.dot(a1[i,:],self.W2) + np.dot(a2prev,self.WF) + self.b2) 109 | a2prev = a2[i,:] 110 | out = np.exp(np.dot(a2,self.W3) + self.b3) 111 | output = out.T / (np.sum(out,1)+ 3.5e-15) 112 | return output.T 113 | 114 | ''' should give identical results as predict, except uses predict1step''' 115 | def predict1(self,input): 116 | L = np.shape(input)[0] 117 | output = np.zeros((L,self.O)) 118 | a2 = np.zeros((1,self.H)) 119 | for i in range(len(input)): 120 | output[i,:],a2 = self.predict1step(input[i,:],a2) 121 | return output 122 | 123 | ''' given a2prev predict one step into future ''' 124 | def predict1step(self,input,a2prev): 125 | a1 = relu(np.dot(input,self.W1) + self.b1) 126 | a2 = relu(np.dot(a1,self.W2) + np.dot(a2prev,self.WF) + self.b2) 127 | out = np.exp(np.dot(a2,self.W3) + self.b3) 128 | output = out.T / (np.sum(out,1)+ 3.5e-15) 129 | return output.T, a2 130 | 131 | ''' given a vector of probabilities, pull a sample from the distribution ''' 132 | def sampleletter(self,distribution): 133 | dist = np.cumsum(distribution) 134 | point = rand() 135 | for i in range(len(distribution)): 136 | if point < dist[i]: 137 | return i 138 | 139 | ''' solve a substitution cipher, return top N candidates in a list ''' 140 | def solve(self,ciphertext,key={},N=200): 141 | alph = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ") 142 | input = self.str2in(ciphertext) 143 | 144 | store = Store(N) 145 | key = {} # key is sometimes not empty? 146 | if ciphertext[0] in key: 147 | c = key[ciphertext[0]] 148 | store.add((log(monodist[a2i(c)]),c,np.zeros((1,self.H)),deepcopy(key))) 149 | else: 150 | unused = alph - set(key.values()) 151 | for c in unused: 152 | key[ciphertext[0]] = c 153 | store.add((log(monodist[a2i(c)]),c,np.zeros((1,self.H)),deepcopy(key))) 154 | 155 | for i in range(1,len(ciphertext)): 156 | prevstore = store 157 | store = Store(N) 158 | if len(key) > len(set(ciphertext[:i])): print 'BAD3',key,i,ciphertext 159 | for j in range(len(prevstore)): 160 | score,text,a2prev,key = prevstore[j] 161 | feat = input[:i,:] 162 | feat[:,:26] = letter2onehot(text) 163 | pred,a2prev = self.predict1step(feat[-1,:],a2prev[:]) 164 | if ciphertext[i] in key: 165 | c = key[ciphertext[i]] 166 | store.add((score+log(pred[0,a2i(c)]), text + c, a2prev[:], deepcopy(key))) 167 | else: 168 | unused = alph - set(key.values()) 169 | for c in unused: 170 | key[ciphertext[i]] = c 171 | store.add((score+log(pred[0,a2i(c)]), text + c, a2prev[:], deepcopy(key))) 172 | ret = [] 173 | for i in range(len(store)): 174 | ret.append((store[i][0],store[i][1])) 175 | return ret 176 | 177 | ''' return the likelyhood of a string given the rnn model ''' 178 | def prob(self,str): 179 | feat = self.str2in(str) 180 | probs = self.predict(feat) 181 | prob = 0 182 | for i in range(len(str)-1): 183 | prob = prob + np.log(probs[i,a2i(str[i+1])]) 184 | return prob 185 | 186 | ''' build the feature vector for a string ''' 187 | def str2in(self,str): 188 | onehot = letter2onehot(str) 189 | freq = np.mean(onehot,0) 190 | eln,ern = neighbours(str) 191 | f0 = onehot 192 | temp = np.dot(onehot,freq) 193 | f1 = np.append(temp[1:],0) 194 | f2 = np.append(temp[2:],(0,0)) 195 | f3 = np.append(temp[3:],(0,0,0)) 196 | 197 | temp = np.dot(onehot,eln.T) 198 | f4 = np.append(temp[1:],0) 199 | f5 = np.append(temp[2:],(0,0)) 200 | f6 = np.append(temp[3:],(0,0,0)) 201 | 202 | temp = np.dot(onehot,ern.T) 203 | f7 = np.append(temp[1:],0) 204 | f8 = np.append(temp[2:],(0,0)) 205 | f9 = np.append(temp[3:],(0,0,0)) 206 | 207 | temp = np.vstack((f1,f2,f3,f4,f5,f6,f7,f8,f9)) 208 | feat = np.concatenate((f0,temp.T),1) 209 | return feat 210 | -------------------------------------------------------------------------------- /mat_rnn_run.py: -------------------------------------------------------------------------------- 1 | from sortedcontainers import SortedList 2 | import time 3 | from mat_rnn2 import rnn 4 | 5 | strs = ['RNUSSLSTGNNZOMSOMSNMTAOOKPPSXWNLTC', 6 | 'RNYSSCSTGNNZOMOOMONMTAOSKPXSXKNLTC', 7 | 'ROPSSLSRRNPPOOHOOHNOIANTKNZSZMNODC', 8 | 'ROXSSCSRINPXOOMOOMNORANTKNZSZHNOQC', 9 | 'RPOSQNSRRNPPONIONINOIAOSKPXSWPNODC', 10 | 'RPSSQNSRINPXONRONRNORAOOKPPSWXNOQC', 11 | 'RNUSOLSTGNNZOHSOHSNHTASOKXPSPWNCTC', 12 | 'RNYSOCSTGNNZOHOOHONHTASSKXXSPKNCTC', 13 | 'ROPSNQSRRNPPOINOINNIOASOKXPSPWNDOC', 14 | 'ROXSNDSRINPXOINOINNISASSKXXSPKNDSC', 15 | 'RPOSLSSRRNPPOHOOHONIOATNKZNSMZNDOC', 16 | 'RPSSLOSRINPXOHSOHSNISATNKZNSMZNDSC', 17 | 'RSPSOLSIRNXPOSHOSHNSIANTKNZSZMNSDC', 18 | 'RSXSOCSIINXXOSMOSMNSRANTKNZSZHNSQC', 19 | 'RSPSNQSIRNXPORNORNNROAOOKPPSXWNQOC', 20 | 'RSXSNDSIINXXORNORNNRSAOSKPXSXKNQSC', 21 | 'RUNSLSSGTNZNOSMOSMNTMAOOKPPSWXNTLC', 22 | 'RUNSLOSGTNZNOSHOSHNTHAOSKPXSWPNTCC', 23 | 'RXOSDNSIRNXPONIONINSIASSKXXSKPNSDC', 24 | 'RXSSDNSIINXXONRONRNSRASOKXPSKXNSQC', 25 | 'RXOSCSSIRNXPOMOOMONROATNKZNSHZNQOC', 26 | 'RXSSCOSIINXXOMSOMSNRSATNKZNSHZNQSC', 27 | 'RYNSCSSGTNZNOOMOOMNTMASOKXPSKXNTLC', 28 | 'RYNSCOSGTNZNOOHOOHNTHASSKXXSKPNTCC'] 29 | 30 | def rev(st): 31 | temp = list(st) 32 | temp.reverse() 33 | return ''.join(temp) 34 | 35 | rstrs = [rev(s) for s in strs] 36 | strs = strs + rstrs 37 | 38 | from word_score import word_score 39 | fitness = word_score() 40 | 41 | r = rnn() 42 | 43 | for count,string in enumerate(strs): 44 | fname = "bstr%d.txt" % count 45 | print 'working on '+fname + ' - ' + time.asctime() 46 | fw = open(fname,'wb') 47 | res = r.solve(string,N=20000) 48 | res2 = [] 49 | for score,text in res: 50 | f = fitness.score(text) 51 | res2.append((f[0],' '.join(f[1]),text,score)) 52 | res2.sort() 53 | for i in res2: 54 | fw.write(str(i)+'\n') 55 | last = str(i) 56 | fw.close() 57 | print last 58 | 59 | -------------------------------------------------------------------------------- /monograms.txt: -------------------------------------------------------------------------------- 1 | E 529117365 2 | T 390965105 3 | A 374061888 4 | O 326627740 5 | I 320410057 6 | N 313720540 7 | S 294300210 8 | R 277000841 9 | H 216768975 10 | L 183996130 11 | D 169330528 12 | C 138416451 13 | U 117295780 14 | M 110504544 15 | F 95422055 16 | G 91258980 17 | P 90376747 18 | W 79843664 19 | Y 75294515 20 | B 70195826 21 | V 46337161 22 | K 35373464 23 | J 9613410 24 | X 8369915 25 | Z 4975847 26 | Q 4550166 27 | -------------------------------------------------------------------------------- /ngram_score.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Allows scoring of text using n-gram probabilities 3 | 17/07/12 4 | ''' 5 | from math import log10 6 | 7 | class ngram_score(object): 8 | def __init__(self,ngramfile,sep=' '): 9 | ''' load a file containing ngrams and counts, calculate log probabilities ''' 10 | self.ngrams = {} 11 | for line in file(ngramfile): 12 | key,count = line.split(sep) 13 | self.ngrams[key] = int(count) 14 | self.L = len(key) 15 | self.N = sum(self.ngrams.itervalues()) 16 | #calculate log probabilities 17 | for key in self.ngrams.keys(): 18 | self.ngrams[key] = log10(float(self.ngrams[key])/self.N) 19 | self.floor = log10(0.01/self.N) 20 | 21 | def score(self,text): 22 | ''' compute the score of text ''' 23 | score = 0 24 | ngrams = self.ngrams.__getitem__ 25 | for i in xrange(len(text)-self.L+1): 26 | if text[i:i+self.L] in self.ngrams: score += ngrams(text[i:i+self.L]) 27 | else: score += self.floor 28 | return score 29 | 30 | -------------------------------------------------------------------------------- /rnn_generate.m: -------------------------------------------------------------------------------- 1 | I = 35; 2 | H = 800; %number of hidden nodes in each hidden layer 3 | O = 26; % number of output classes 4 | LR = 0.0001 ; 5 | momentum = 0.9; 6 | 7 | readfrom = 'savednn800small9' 8 | writeto = 'savednn800small9A' 9 | if 1 10 | load(readfrom); 11 | else 12 | W1 = randn(I,H)*0.01; b1 = zeros(1,H); W1v = zeros(size(W1)); b1v = zeros(size(b1)); 13 | W2 = randn(H,H)*0.01; b2 = zeros(1,H); W2v = zeros(size(W2)); b2v = zeros(size(b2)); 14 | WF = randn(H,H)*0.01; WFv = zeros(size(WF)); 15 | W3 = randn(H,O)*0.01; b3 = zeros(1,O); W3v = zeros(size(W3)); b3v = zeros(size(b3)); 16 | end 17 | sigm = @(x) max(x,0); deriv = @(x) x>0; 18 | load eln500k.mat 19 | load ern500k.mat 20 | f = fopen('J:\\lab_pc\\char_rnn\\eng500k.txt'); 21 | instr = fgetl(f); 22 | perror = 0; 23 | acc=0; 24 | count = 0; 25 | bestperp = 100; 26 | fprintf('training \n'); 27 | while ischar(instr) 28 | if count < 250000 29 | count = count + 1; 30 | instr = fgetl(f); 31 | continue 32 | end 33 | instr = instr(1:min(length(instr),34)); 34 | input = id2oneofk2(instr(1:end-1),'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 35 | label = id2oneofk2(instr(2:end),'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 36 | freqs = mean(id2oneofk2(instr,'ABCDEFGHIJKLMNOPQRSTUVWXYZ')); 37 | teln = eln(count,:); 38 | tern = ern(count,:); 39 | input = [input, label*freqs', [label(2:end,:)*freqs';0], [label(3:end,:)*freqs';0;0],... 40 | label*teln', [label(2:end,:)*teln';0], [label(3:end,:)*teln';0;0],... 41 | label*tern', [label(2:end,:)*tern';0], [label(3:end,:)*tern';0;0]]; 42 | 43 | L = size(input,1); 44 | a2 = zeros(L,H); 45 | 46 | a2prev = zeros(1,H); 47 | a1 = sigm(bsxfun(@plus,input*(W1+momentum*W1v),(b1+momentum*b1v))); 48 | for k = 1:L 49 | a2(k,:) = sigm(a1(k,:)*(W2+momentum*W2v) + a2prev*(WF+momentum*WFv) + (b2+momentum*b2v)); 50 | a2prev = a2(k,:); 51 | end 52 | %out = sigm(bsxfun(@plus,a2*(W3+momentum*W3v),(b3+momentum*b3v))); 53 | out = exp(bsxfun(@plus,a2*(W3+momentum*W3v),(b3+momentum*b3v))); 54 | out = bsxfun(@rdivide,out,sum(out,2)+eps); 55 | 56 | %perror = mean(sum((label-out).^2,2)); 57 | 58 | % now do back prop over the protein 59 | err = sign(label - out).*abs(label - out); 60 | %err(label(:,4)==1,:) = 0; 61 | delta3 = err;%.*deriv(out); 62 | W3v = momentum*W3v + 1/L * LR*a2'*delta3; 63 | b3v = momentum*b3v + LR*mean(delta3,1); 64 | W3 = W3 + W3v; 65 | b3 = b3 + b3v; 66 | 67 | delta2 = zeros(L,H); 68 | deltaFnext = zeros(1,H); 69 | for k = L:-1:1 70 | delta2(k,:) = (delta3(k,:)*W3' + deltaFnext*WF').*deriv(a2(k,:)); 71 | deltaFnext = delta2(k,:); 72 | end 73 | 74 | % update the weights 75 | 76 | WFv = momentum*WFv + 1/L * LR*a2(1:end-1,:)'*delta2(2:end,:); 77 | WF = WF + WFv; 78 | 79 | W2v = momentum*W2v + 1/L * LR*a1'*delta2; 80 | b2v = momentum*b2v + LR*mean(delta2,1); 81 | W2 = W2 + W2v; 82 | b2 = b2 + b2v; 83 | 84 | delta1 = (W2*delta2')'.*deriv(a1); 85 | W1v = momentum*W1v + 1/L * LR*input'*delta1; 86 | b1v = momentum*b1v + LR*mean(delta1,1); 87 | W1 = W1 + W1v; 88 | b1 = b1 + b1v; 89 | instr = fgetl(f); 90 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 91 | if mod(count,100)==0 92 | 93 | correct = 0; 94 | total = 0; 95 | tf = fopen('J:\\lab_pc\\char_rnn\\eng10k.txt'); 96 | perp = 0; 97 | for w = 1:1000 98 | tinstr = fgetl(tf); 99 | tinstr = tinstr(1:min(length(tinstr),34)); 100 | input = id2oneofk2(tinstr(1:end-1),'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 101 | label = id2oneofk2(tinstr(2:end),'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 102 | freqs = mean(id2oneofk2(tinstr,'ABCDEFGHIJKLMNOPQRSTUVWXYZ')); 103 | teln = eln(w,:); 104 | tern = ern(w,:); 105 | input = [input, label*freqs', [label(2:end,:)*freqs';0], [label(3:end,:)*freqs';0;0],... 106 | label*teln', [label(2:end,:)*teln';0], [label(3:end,:)*teln';0;0],... 107 | label*tern', [label(2:end,:)*tern';0], [label(3:end,:)*tern';0;0]]; 108 | 109 | LL = size(input,1); 110 | a22 = zeros(LL,H); 111 | a2prev = zeros(1,H); 112 | a11 = sigm(bsxfun(@plus,input*W1,b1)); 113 | for k = 1:LL 114 | a22(k,:) = sigm(a11(k,:)*W2 + a2prev*WF + b2); 115 | a2prev = a22(k,:); 116 | end 117 | %out1 = sigm(bsxfun(@plus,a22*W3,b3)); 118 | out1 = exp(bsxfun(@plus,a22*W3,b3)); 119 | out1 = bsxfun(@rdivide,out1,sum(out1,2)+eps); 120 | 121 | %out1 = out1(:,1:3); 122 | [~,ind1] = max(out1,[],2); 123 | [~,ind2] = max(label,[],2); 124 | perp = perp + sum(log(diag(out1(:,ind2)))); 125 | total = total + LL; 126 | correct = correct + sum(ind1==ind2); 127 | 128 | end 129 | acc = correct/total; 130 | fprintf('%d: %f (%f,%f)',count,LR,exp(-perp/total),acc); 131 | 132 | fclose(tf); 133 | if exp(-perp/total) < bestperp 134 | bestperp = exp(-perp/total); 135 | save(writeto,'W1','W2','W3','b1','b2','b3','WF'); 136 | fprintf(' saved'); 137 | else 138 | %LR = LR * 0.995; 139 | end 140 | fprintf('\n'); 141 | fprintf('%s\n',char(ind2+'A'-1)); 142 | fprintf('%s\n',char(ind1+'A'-1)); 143 | 144 | end 145 | count = count + 1; 146 | %fprintf('\naccuracy = %f\n',acc); 147 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 148 | %LR = LR * 0.9999; 149 | end 150 | fclose(f); 151 | fprintf('\naccuracy = %f\n',acc); 152 | 153 | 154 | -------------------------------------------------------------------------------- /savednn800small9B.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jameslyons/python_cryptanalysis/81ad09e519777603c1fd276381849d18246382b4/savednn800small9B.mat --------------------------------------------------------------------------------