├── sha1.h ├── pow ├── pow-fast.c └── cacheit.c ├── README.md ├── powserver.py ├── sha256.h ├── blake.c ├── rc4.c ├── perl-rand-sha1.c ├── aes256_ecb.h ├── aes256_ecb.cpp ├── aes128.c ├── sha256.cpp ├── sha1.c └── sha256.c /sha1.h: -------------------------------------------------------------------------------- 1 | #include 2 | typedef struct { 3 | #define MBYTES 64 4 | unsigned char msgbuf[MBYTES]; 5 | size_t msgbuf_count; 6 | uint64_t total_count; 7 | __m128i h0123; 8 | __m128i h4; 9 | } sha1_ctx; 10 | void SHA1Init(sha1_ctx* ctx); 11 | void SHA1Update(sha1_ctx* ctx, const void* buf, size_t length); 12 | void SHA1Final(sha1_ctx* ctx, void* digest); 13 | -------------------------------------------------------------------------------- /pow/pow-fast.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | // Retrieve cached sha256 starting with XXXXXX 7 | int main(int argc, char** argv){ 8 | uint32_t key; 9 | sscanf(argv[1], "%x", &key); 10 | key = ((uint32_t)htonl(key))>>8; 11 | // printf("%x\n", key); 12 | 13 | uint64_t value; 14 | FILE* cache = fopen("powcache.bin", "rb"); 15 | fseek(cache, sizeof(uint64_t)*key, SEEK_SET); 16 | fread(&value, sizeof(uint64_t),1, cache); 17 | fclose(cache); 18 | 19 | printf("%lu", value); 20 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fast parallel hardware-accelerated brute-force for popular crypto algorithms 2 | 3 | I'm bored of seeing the same problems over and over again in CTFs. 4 | It's always AES or RC4, SHA or MD5. It's always a 32 bits brute. 5 | 6 | And I was also annoyed by how difficult it is to find a simple, hackable example code 7 | for using the fancy cpu instructions. 8 | 9 | I just collected all the code I had lying around for parallelizing the brute force. 10 | Next time I can just copy paste the code and quickly work off the template. 11 | Run it on some beefy server with a nice cpu with many cores and AES-NI and SHA-NI. 12 | -------------------------------------------------------------------------------- /powserver.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from subprocess import Popen, PIPE 3 | import os 4 | 5 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 6 | s.bind(('0.0.0.0', 16969)) 7 | s.listen(1) 8 | 9 | while True: 10 | conn, addr = s.accept() 11 | print 'Connection address:', addr 12 | data = conn.recv(1024) 13 | prefix = data.split('\n')[0] 14 | print 'prefix: %s' % (prefix,) 15 | Popen(['./fastpow', prefix], stdout=PIPE).communicate() 16 | output = open('sice.txt', 'r').read() 17 | print 'PoW: %s' % (output,) 18 | conn.send(output) 19 | conn.close() 20 | try: 21 | os.remove('sice.txt') 22 | except OSError: 23 | continue 24 | 25 | -------------------------------------------------------------------------------- /sha256.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class SHA256H 6 | { 7 | protected: 8 | // Message block 9 | static const size_t MBYTES = 64; 10 | unsigned char msgbuf[MBYTES]; 11 | size_t msgbuf_count; // length (in byte) of the data currently in the message block 12 | unsigned __int64 total_count; // total length (in byte) of the message 13 | 14 | // Intermediate hash 15 | __m128i h0145; // h0:h1:h4:h5 16 | __m128i h2367; // h2:h3:h6:h7 17 | 18 | public: 19 | SHA256H() { Initialize(); } 20 | ~SHA256H() {} 21 | 22 | void Update(const void* buf, size_t length); 23 | void Final(void* digest); 24 | 25 | protected: 26 | void Initialize(); 27 | void ProcessMsgBlock(const unsigned char* msg); 28 | }; -------------------------------------------------------------------------------- /blake.c: -------------------------------------------------------------------------------- 1 | // compile me with -O3 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define PARALLEL 7 14 | 15 | int main(){ 16 | uint8_t buf[64]; 17 | uint8_t hash[16]; 18 | memset(buf, 0, 64); 19 | read(0, buf, 16); 20 | 21 | // parallelism shit 22 | setpgid(0,0); 23 | unsigned int brute = 0; 24 | uint64_t upto = (0x100000000L / (uint64_t)PARALLEL); 25 | int worker = 0; 26 | for (;worker < PARALLEL; worker++) { 27 | if (!fork()) { 28 | fprintf(stderr, "worker %d, %x to %lx\n", worker,brute,upto); 29 | break; // child 30 | } 31 | brute = upto; 32 | upto += (0x100000000L / (uint64_t)PARALLEL); 33 | } 34 | uint32_t start = brute; 35 | 36 | do { 37 | if ((brute & 0xffffff) == 0) fprintf(stderr, "%08x, %.02f%% done\n", brute, 100.f*(brute-start)/(float)(upto-start)); 38 | *(uint32_t*)(&buf[16]) = brute; 39 | 40 | // check 41 | crypto_generichash(hash, 16, buf, 64, 0, 0); 42 | if ((*(uint32_t*)hash & 0x00ffffff) != 0) 43 | goto fail; 44 | 45 | //gucci 46 | write(1, buf, 64); 47 | // int fd = open("sice.txt", O_APPEND | O_RDWR | O_CREAT,0); 48 | // dprintf(fd, "FOUND IT!!!!!!! %08x\n", brute); 49 | // close(fd); 50 | kill(0, SIGQUIT); // kill process group 51 | break; 52 | 53 | fail: 54 | brute++; 55 | } while(brute < upto); 56 | fprintf(stderr, "exhausted...\n"); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /rc4.c: -------------------------------------------------------------------------------- 1 | // from fbctf 2019 2 | // compile me with -O3 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define PARALLEL 8 14 | 15 | int main(){ 16 | unsigned char desired[4] = {231, 112, 90, 192}; // desired keystream 17 | unsigned char key[8]; 18 | *(uint32_t*)&key[4] = 0x36395477; 19 | 20 | uint64_t rolling_val = 0x0706050403020100; 21 | unsigned char initial[256]; 22 | for (int i = 0; i < 256; i++) 23 | initial[i] = i; 24 | unsigned char S[256]; 25 | 26 | // parallelism shit 27 | unsigned int brute = 0; 28 | uint64_t upto = (0x100000000L / (uint64_t)PARALLEL); 29 | int worker = 0; 30 | for (;worker < PARALLEL; worker++) { 31 | if (!fork()) { 32 | printf("worker %d, %x to %lx\n", worker,brute,upto); 33 | break; // child 34 | } 35 | brute = upto; 36 | upto += (0x100000000L / (uint64_t)PARALLEL); 37 | } 38 | uint32_t start = brute; 39 | 40 | do { 41 | if ((brute & 0xffffff) == 0) printf("%08x, %.02f%% done\n", brute, 100.f*(brute-start)/(float)(upto-start)); 42 | memcpy(S, initial, 256); 43 | *(uint32_t*)key = brute; 44 | 45 | unsigned char j = 0; 46 | for (int i = 0; i < 256; i++){ 47 | j = (j + S[i] + key[i % 8]); 48 | unsigned char tmp = S[i]; 49 | S[i] = S[j]; 50 | S[j] = tmp; 51 | } 52 | unsigned char i =0; 53 | j = 0; 54 | for (int n = 0; n < 4; n++) { 55 | i = (i + 1); 56 | j = (j + S[i]); 57 | unsigned char tmp = S[i]; 58 | S[i] = S[j]; 59 | S[j] = tmp; 60 | if (S[(S[i] + S[j]) & 0xff] != desired[n]) goto fail; 61 | } 62 | 63 | //gucci 64 | unsigned char cipher[8] = {0xF6,0x2C,0x72,0x1A,0x03,0x99,0x0E,0x78}; 65 | printf("FOUND IT!!!!!!! %08x\n", brute); 66 | for (int n = 0; n < 4; n++) { 67 | i = (i + 1); 68 | j = (j + S[i]); 69 | unsigned char tmp = S[i]; 70 | S[i] = S[j]; 71 | S[j] = tmp; 72 | printf("%02x ", (uint8_t)S[(S[i] + S[j])&0xff]^cipher[n+4]); 73 | }printf("\n"); 74 | int fd = open("sice.txt", O_APPEND | O_RDWR | O_CREAT,0); 75 | dprintf(fd, "FOUND IT!!!!!!! %08x\n", brute); 76 | close(fd); 77 | kill(0,SIGQUIT); 78 | break; 79 | 80 | fail: 81 | brute++; 82 | // super ghetto ascii-only brute 83 | if ((brute & 0xff) < 0x20) brute += 0x20-(brute&0xff); 84 | if ((brute & 0xffff) < 0x2000) brute += 0x2000-(brute&0xffff); 85 | if ((brute & 0xffffff) < 0x200000) brute += 0x200000-(brute&0xffffff); 86 | if ((brute & 0xffffffff) < 0x20000000) brute += 0x20000000-(brute&0xffffffff); 87 | if ((brute & 0xff) >= 0x7f) brute += 0x100-(brute&0xff); 88 | if ((brute & 0xffff) >= 0x7f00) brute += 0x10000-(brute&0xffff); 89 | if ((brute & 0xffffff) >= 0x7f0000) brute += 0x1000000-(brute&0xffffff); 90 | if ((brute & 0xffffffff) >= 0x7f000000) break; 91 | } while(brute < upto); 92 | } 93 | -------------------------------------------------------------------------------- /perl-rand-sha1.c: -------------------------------------------------------------------------------- 1 | // clang -Wall -O3 -mssse3 -msha cpu-brute.c sha.c -o brute 2 | 3 | // Failed attempt to brute ida 7.2 installation key :( 4 | 5 | // Bullshit ripped from perl source code 6 | #include 7 | #include 8 | #define FREEBSD_DRAND48_SEED_0 (0x330e) 9 | 10 | 11 | #define FREEBSD_DRAND48_SEED_1 (0xabcd) 12 | #define FREEBSD_DRAND48_SEED_2 (0x1234) 13 | #define FREEBSD_DRAND48_MULT_0 (0xe66d) 14 | #define FREEBSD_DRAND48_MULT_1 (0xdeec) 15 | #define FREEBSD_DRAND48_MULT_2 (0x0005) 16 | #define FREEBSD_DRAND48_ADD (0x000b) 17 | 18 | const unsigned short _rand48_mult[3] = { 19 | FREEBSD_DRAND48_MULT_0, 20 | FREEBSD_DRAND48_MULT_1, 21 | FREEBSD_DRAND48_MULT_2 22 | }; 23 | 24 | const unsigned short _rand48_add = FREEBSD_DRAND48_ADD; 25 | 26 | #define U16 uint16_t 27 | #define U32 uint32_t 28 | typedef struct { 29 | U16 seed[3]; 30 | } perl_drand48_t; 31 | 32 | void Perl_drand48_init_r(perl_drand48_t *random_state, U32 seed) 33 | { 34 | random_state->seed[0] = FREEBSD_DRAND48_SEED_0; 35 | random_state->seed[1] = (U16) seed; 36 | random_state->seed[2] = (U16) (seed >> 16); 37 | } 38 | 39 | 40 | double Perl_drand48_r(perl_drand48_t *random_state) 41 | { 42 | U32 accu; 43 | U16 temp[2]; 44 | 45 | accu = (U32) _rand48_mult[0] * (U32) random_state->seed[0] 46 | + (U32) _rand48_add; 47 | temp[0] = (U16) accu; /* lower 16 bits */ 48 | accu >>= sizeof(U16) * 8; 49 | accu += (U32) _rand48_mult[0] * (U32) random_state->seed[1] 50 | + (U32) _rand48_mult[1] * (U32) random_state->seed[0]; 51 | temp[1] = (U16) accu; /* middle 16 bits */ 52 | accu >>= sizeof(U16) * 8; 53 | accu += _rand48_mult[0] * random_state->seed[2] 54 | + _rand48_mult[1] * random_state->seed[1] 55 | + _rand48_mult[2] * random_state->seed[0]; 56 | random_state->seed[0] = temp[0]; 57 | random_state->seed[1] = temp[1]; 58 | random_state->seed[2] = (U16) accu; 59 | 60 | return ldexp((double) random_state->seed[0], -48) + 61 | ldexp((double) random_state->seed[1], -32) + 62 | ldexp((double) random_state->seed[2], -16); 63 | } 64 | 65 | #include "sha1.h" 66 | 67 | #include 68 | #include 69 | #include 70 | #include 71 | #include 72 | #include 73 | #include 74 | #define PARALLEL 48 75 | int main() 76 | { 77 | char* charset = "abcdefghijkmpqrstuvwxyzABCDEFGHJKLMPQRSTUVWXYZ23456789"; 78 | uint8_t hash[20] = { 79 | 0xF2, 0x9F, 0x55, 0xF0, 0x7C, 0x04, 0x3A, 0xD3, 0x4B, 0x3D, 0xE1, 0x50, 80 | 0x50, 0x15, 0x35, 0xF4, 0x44, 0x24, 0xED, 0xAD 81 | }; 82 | 83 | uint8_t pw[37] = { 84 | 0x50, 0x61, 0x73, 0x73, 0x77, 0x6F, 0x72, 0x64, 0x43, 0x68, 0x65, 0x63, 85 | 0x6B, 0x48, 0x61, 0x73, 0x68, 0xC4, 0x16, 0x39, 0x79, 0x28, 0x46, 0xE4, 86 | 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 87 | 0x00 88 | }; 89 | perl_drand48_t rand_state; 90 | sha1_ctx ctx; 91 | uint8_t digest[20]; 92 | 93 | uint32_t seed = 0; 94 | uint32_t upto = (uint32_t)(0x100000000L / (uint64_t)PARALLEL); 95 | int worker = 0; 96 | for (; worker < PARALLEL; worker++) { 97 | if (!fork()) { 98 | printf("Worker %d, bruting %x to %x\n", worker,seed,upto); 99 | break; // child 100 | } 101 | seed = upto; 102 | upto += (uint64_t)(0x100000000L / (uint64_t)PARALLEL); 103 | } 104 | 105 | do 106 | { 107 | if (!(seed & 0xffffff)) printf("%x\n", seed); 108 | Perl_drand48_init_r(&rand_state, seed); 109 | for (int i = 0; i < 12; i++) 110 | { 111 | int key = (int)(Perl_drand48_r(&rand_state) * 54.0); 112 | pw[i+0x19] = charset[key]; 113 | } 114 | SHA1Init(&ctx); 115 | SHA1Update(&ctx, pw, sizeof(pw)); 116 | SHA1Final(&ctx, digest); 117 | 118 | if (!memcmp(digest, hash, 20)) 119 | { 120 | printf("CRACKED!!!! %u %s\n", seed, &pw[0x19]); 121 | int fd = open("sice.txt", O_APPEND | O_RDWR | O_CREAT,0); 122 | dprintf(fd, "CRACKED!!!! %u %s\n", seed, &pw[0x19]); 123 | close(fd); 124 | kill(0, SIGQUIT); 125 | break; 126 | } 127 | } while(++seed != upto); 128 | printf("Worker %d done\n", worker); 129 | } 130 | -------------------------------------------------------------------------------- /aes256_ecb.h: -------------------------------------------------------------------------------- 1 | /* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */ 2 | /* ==================================================================== 3 | * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in 14 | * the documentation and/or other materials provided with the 15 | * distribution. 16 | * 17 | * 3. All advertising materials mentioning features or use of this 18 | * software must display the following acknowledgment: 19 | * "This product includes software developed by the OpenSSL Project 20 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 21 | * 22 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 23 | * endorse or promote products derived from this software without 24 | * prior written permission. For written permission, please contact 25 | * openssl-core@openssl.org. 26 | * 27 | * 5. Products derived from this software may not be called "OpenSSL" 28 | * nor may "OpenSSL" appear in their names without prior written 29 | * permission of the OpenSSL Project. 30 | * 31 | * 6. Redistributions of any form whatsoever must retain the following 32 | * acknowledgment: 33 | * "This product includes software developed by the OpenSSL Project 34 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 35 | * 36 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 37 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 39 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 40 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 42 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 43 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 44 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 45 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 46 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 47 | * OF THE POSSIBILITY OF SUCH DAMAGE. 48 | * ==================================================================== 49 | * 50 | */ 51 | 52 | #ifndef _AES_ 53 | #define _AES_ 54 | 55 | #include /* SSE instructions and _mm_malloc */ 56 | #include /* SSE2 instructions */ 57 | 58 | 59 | typedef __m128i block; 60 | 61 | typedef struct { __m128i rd_key[15]; int rounds; } AES_KEY; 62 | #define ROUNDS(ctx) ((ctx)->rounds) 63 | 64 | #define EXPAND_ASSIST(v1,v2,v3,v4,shuff_const,aes_const) \ 65 | v2 = _mm_aeskeygenassist_si128(v4,aes_const); \ 66 | v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \ 67 | _mm_castsi128_ps(v1), 16)); \ 68 | v1 = _mm_xor_si128(v1,v3); \ 69 | v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), \ 70 | _mm_castsi128_ps(v1), 140)); \ 71 | v1 = _mm_xor_si128(v1,v3); \ 72 | v2 = _mm_shuffle_epi32(v2,shuff_const); \ 73 | v1 = _mm_xor_si128(v1,v2) 74 | 75 | #define EXPAND192_STEP(idx,aes_const) \ 76 | EXPAND_ASSIST(x0,x1,x2,x3,85,aes_const); \ 77 | x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \ 78 | x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \ 79 | kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), \ 80 | _mm_castsi128_ps(x0), 68)); \ 81 | kp[idx+1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), \ 82 | _mm_castsi128_ps(x3), 78)); \ 83 | EXPAND_ASSIST(x0,x1,x2,x3,85,(aes_const*2)); \ 84 | x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \ 85 | x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \ 86 | kp[idx+2] = x0; tmp = x3 87 | 88 | 89 | int AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); 90 | void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey); 91 | int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); 92 | 93 | void AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key); 94 | void AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key); 95 | void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key); 96 | void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key); 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /aes256_ecb.cpp: -------------------------------------------------------------------------------- 1 | // Another honorable mention: 2 | // https://github.com/BrianGladman/aes/blob/master/aes_ni.c 3 | 4 | /* 5 | This file is part of JustGarble. 6 | 7 | JustGarble is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | JustGarble is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with JustGarble. If not, see . 19 | 20 | */ 21 | 22 | 23 | #include 24 | #include "aes256_ecb.h" 25 | 26 | void AES_128_Key_Expansion(const unsigned char *userkey, void *key) { 27 | __m128i x0, x1, x2; 28 | __m128i *kp = (__m128i *) key; 29 | kp[0] = x0 = _mm_loadu_si128((__m128i *) userkey); 30 | x2 = _mm_setzero_si128(); 31 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 1); 32 | kp[1] = x0; 33 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 2); 34 | kp[2] = x0; 35 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 4); 36 | kp[3] = x0; 37 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 8); 38 | kp[4] = x0; 39 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 16); 40 | kp[5] = x0; 41 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 32); 42 | kp[6] = x0; 43 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 64); 44 | kp[7] = x0; 45 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 128); 46 | kp[8] = x0; 47 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 27); 48 | kp[9] = x0; 49 | EXPAND_ASSIST(x0, x1, x2, x0, 255, 54); 50 | kp[10] = x0; 51 | } 52 | 53 | void AES_192_Key_Expansion(const unsigned char *userkey, void *key) { 54 | __m128i x0, x1, x2, x3, tmp, *kp = (__m128i *) key; 55 | kp[0] = x0 = _mm_loadu_si128((__m128i *) userkey); 56 | tmp = x3 = _mm_loadu_si128((__m128i *) (userkey + 16)); 57 | x2 = _mm_setzero_si128(); 58 | EXPAND192_STEP(1, 1); 59 | EXPAND192_STEP(4, 4); 60 | EXPAND192_STEP(7, 16); 61 | EXPAND192_STEP(10, 64); 62 | } 63 | 64 | void AES_256_Key_Expansion(const unsigned char *userkey, void *key) { 65 | __m128i x0, x1, x2, x3, *kp = (__m128i *) key; 66 | kp[0] = x0 = _mm_loadu_si128((__m128i *) userkey); 67 | kp[1] = x3 = _mm_loadu_si128((__m128i *) (userkey + 16)); 68 | x2 = _mm_setzero_si128(); 69 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 1); 70 | kp[2] = x0; 71 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 1); 72 | kp[3] = x3; 73 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 2); 74 | kp[4] = x0; 75 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 2); 76 | kp[5] = x3; 77 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 4); 78 | kp[6] = x0; 79 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 4); 80 | kp[7] = x3; 81 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 8); 82 | kp[8] = x0; 83 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 8); 84 | kp[9] = x3; 85 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 16); 86 | kp[10] = x0; 87 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 16); 88 | kp[11] = x3; 89 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 32); 90 | kp[12] = x0; 91 | EXPAND_ASSIST(x3, x1, x2, x0, 170, 32); 92 | kp[13] = x3; 93 | EXPAND_ASSIST(x0, x1, x2, x3, 255, 64); 94 | kp[14] = x0; 95 | } 96 | 97 | int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 98 | AES_KEY *key) { 99 | if (bits == 128) { 100 | AES_128_Key_Expansion(userKey, key); 101 | } 102 | else if (bits == 192) { 103 | AES_192_Key_Expansion(userKey, key); 104 | } 105 | else if (bits == 256) { 106 | AES_256_Key_Expansion(userKey, key); 107 | } 108 | #if (OCB_KEY_LEN == 0) 109 | key->rounds = 6 + bits / 32; 110 | #endif 111 | return 0; 112 | } 113 | 114 | void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey) { 115 | int j = 0; 116 | int i = ROUNDS(ekey); 117 | #if (OCB_KEY_LEN == 0) 118 | dkey->rounds = i; 119 | #endif 120 | dkey->rd_key[i--] = ekey->rd_key[j++]; 121 | while (i) 122 | dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]); 123 | dkey->rd_key[i] = ekey->rd_key[j]; 124 | } 125 | 126 | int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 127 | AES_KEY *key) { 128 | AES_KEY temp_key; 129 | AES_set_encrypt_key(userKey, bits, &temp_key); 130 | AES_set_decrypt_key_fast(key, &temp_key); 131 | return 0; 132 | } 133 | 134 | void AES_encrypt(const unsigned char *in, unsigned char *out, 135 | const AES_KEY *key) { 136 | int j, rnds = ROUNDS(key); 137 | const __m128i *sched = ((__m128i *) (key->rd_key)); 138 | __m128i tmp = _mm_load_si128((__m128i *) in); 139 | tmp = _mm_xor_si128(tmp, sched[0]); 140 | for (j = 1; j < rnds; j++) 141 | tmp = _mm_aesenc_si128(tmp, sched[j]); 142 | tmp = _mm_aesenclast_si128(tmp, sched[j]); 143 | _mm_store_si128((__m128i *) out, tmp); 144 | } 145 | 146 | void AES_decrypt(const unsigned char *in, unsigned char *out, 147 | const AES_KEY *key) { 148 | int j, rnds = ROUNDS(key); 149 | const __m128i *sched = ((__m128i *) (key->rd_key)); 150 | __m128i tmp = _mm_load_si128((__m128i *) in); 151 | tmp = _mm_xor_si128(tmp, sched[0]); 152 | for (j = 1; j < rnds; j++) 153 | tmp = _mm_aesdec_si128(tmp, sched[j]); 154 | tmp = _mm_aesdeclast_si128(tmp, sched[j]); 155 | _mm_store_si128((__m128i *) out, tmp); 156 | } 157 | 158 | void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { 159 | unsigned i, j, rnds = ROUNDS(key); 160 | const __m128i *sched = ((__m128i *) (key->rd_key)); 161 | for (i = 0; i < nblks; ++i) 162 | blks[i] = _mm_xor_si128(blks[i], sched[0]); 163 | for (j = 1; j < rnds; ++j) 164 | for (i = 0; i < nblks; ++i) 165 | blks[i] = _mm_aesenc_si128(blks[i], sched[j]); 166 | for (i = 0; i < nblks; ++i) 167 | blks[i] = _mm_aesenclast_si128(blks[i], sched[j]); 168 | } 169 | 170 | void AES_ecb_encrypt_blks_4(block *blks, AES_KEY *key) { 171 | unsigned i, j, rnds = ROUNDS(key); 172 | const __m128i *sched = ((__m128i *) (key->rd_key)); 173 | blks[0] = _mm_xor_si128(blks[0], sched[0]); 174 | blks[1] = _mm_xor_si128(blks[1], sched[0]); 175 | blks[2] = _mm_xor_si128(blks[2], sched[0]); 176 | blks[3] = _mm_xor_si128(blks[3], sched[0]); 177 | 178 | for (j = 1; j < rnds; ++j) { 179 | blks[0] = _mm_aesenc_si128(blks[0], sched[j]); 180 | blks[1] = _mm_aesenc_si128(blks[1], sched[j]); 181 | blks[2] = _mm_aesenc_si128(blks[2], sched[j]); 182 | blks[3] = _mm_aesenc_si128(blks[3], sched[j]); 183 | } 184 | blks[0] = _mm_aesenclast_si128(blks[0], sched[j]); 185 | blks[1] = _mm_aesenclast_si128(blks[1], sched[j]); 186 | blks[2] = _mm_aesenclast_si128(blks[2], sched[j]); 187 | blks[3] = _mm_aesenclast_si128(blks[3], sched[j]); 188 | } 189 | 190 | 191 | void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) { 192 | unsigned i, j, rnds = ROUNDS(key); 193 | const __m128i *sched = ((__m128i *) (key->rd_key)); 194 | for (i = 0; i < nblks; ++i) 195 | blks[i] = _mm_xor_si128(blks[i], sched[0]); 196 | for (j = 1; j < rnds; ++j) 197 | for (i = 0; i < nblks; ++i) 198 | blks[i] = _mm_aesdec_si128(blks[i], sched[j]); 199 | for (i = 0; i < nblks; ++i) 200 | blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]); 201 | } 202 | 203 | -------------------------------------------------------------------------------- /aes128.c: -------------------------------------------------------------------------------- 1 | // from defcon quals 2019 2 | // gcc -march=native -O3 test.c -lcrypto 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | //compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes 15 | #define DO_ENC_BLOCK(m,k) \ 16 | do{\ 17 | m = _mm_xor_si128 (m, k[ 0]); \ 18 | m = _mm_aesenc_si128 (m, k[ 1]); \ 19 | m = _mm_aesenc_si128 (m, k[ 2]); \ 20 | m = _mm_aesenc_si128 (m, k[ 3]); \ 21 | m = _mm_aesenc_si128 (m, k[ 4]); \ 22 | m = _mm_aesenc_si128 (m, k[ 5]); \ 23 | m = _mm_aesenc_si128 (m, k[ 6]); \ 24 | m = _mm_aesenc_si128 (m, k[ 7]); \ 25 | m = _mm_aesenc_si128 (m, k[ 8]); \ 26 | m = _mm_aesenc_si128 (m, k[ 9]); \ 27 | m = _mm_aesenclast_si128(m, k[10]);\ 28 | }while(0) 29 | 30 | #define DO_DEC_BLOCK(m,k) \ 31 | do{\ 32 | m = _mm_xor_si128 (m, k[10+0]); \ 33 | m = _mm_aesdec_si128 (m, k[10+1]); \ 34 | m = _mm_aesdec_si128 (m, k[10+2]); \ 35 | m = _mm_aesdec_si128 (m, k[10+3]); \ 36 | m = _mm_aesdec_si128 (m, k[10+4]); \ 37 | m = _mm_aesdec_si128 (m, k[10+5]); \ 38 | m = _mm_aesdec_si128 (m, k[10+6]); \ 39 | m = _mm_aesdec_si128 (m, k[10+7]); \ 40 | m = _mm_aesdec_si128 (m, k[10+8]); \ 41 | m = _mm_aesdec_si128 (m, k[10+9]); \ 42 | m = _mm_aesdeclast_si128(m, k[0]);\ 43 | }while(0) 44 | 45 | #define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon)) 46 | 47 | static __m128i key_schedule[20];//the expanded key 48 | 49 | static __m128i aes_128_key_expansion(__m128i key, __m128i keygened){ 50 | keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3,3,3,3)); 51 | key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); 52 | key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); 53 | key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); 54 | return _mm_xor_si128(key, keygened); 55 | } 56 | 57 | //public API 58 | void aes128_load_key(int8_t *enc_key){ 59 | key_schedule[0] = _mm_load_si128((const __m128i*) enc_key); 60 | key_schedule[1] = AES_128_key_exp(key_schedule[0], 0x01); 61 | key_schedule[2] = AES_128_key_exp(key_schedule[1], 0x02); 62 | key_schedule[3] = AES_128_key_exp(key_schedule[2], 0x04); 63 | key_schedule[4] = AES_128_key_exp(key_schedule[3], 0x08); 64 | key_schedule[5] = AES_128_key_exp(key_schedule[4], 0x10); 65 | key_schedule[6] = AES_128_key_exp(key_schedule[5], 0x20); 66 | key_schedule[7] = AES_128_key_exp(key_schedule[6], 0x40); 67 | key_schedule[8] = AES_128_key_exp(key_schedule[7], 0x80); 68 | key_schedule[9] = AES_128_key_exp(key_schedule[8], 0x1B); 69 | key_schedule[10] = AES_128_key_exp(key_schedule[9], 0x36); 70 | 71 | // generate decryption keys in reverse order. 72 | // k[10] is shared by last encryption and first decryption rounds 73 | // k[0] is shared by first encryption round and last decryption round (and is the original user key) 74 | // For some implementation reasons, decryption key schedule is NOT the encryption key schedule in reverse order 75 | key_schedule[11] = _mm_aesimc_si128(key_schedule[9]); 76 | key_schedule[12] = _mm_aesimc_si128(key_schedule[8]); 77 | key_schedule[13] = _mm_aesimc_si128(key_schedule[7]); 78 | key_schedule[14] = _mm_aesimc_si128(key_schedule[6]); 79 | key_schedule[15] = _mm_aesimc_si128(key_schedule[5]); 80 | key_schedule[16] = _mm_aesimc_si128(key_schedule[4]); 81 | key_schedule[17] = _mm_aesimc_si128(key_schedule[3]); 82 | key_schedule[18] = _mm_aesimc_si128(key_schedule[2]); 83 | key_schedule[19] = _mm_aesimc_si128(key_schedule[1]); 84 | } 85 | 86 | void aes128_enc(int8_t *plainText,int8_t *cipherText){ 87 | __m128i m = _mm_load_si128((__m128i *) plainText); 88 | 89 | DO_ENC_BLOCK(m,key_schedule); 90 | 91 | _mm_storeu_si128((__m128i *) cipherText, m); 92 | } 93 | 94 | __m128i aes128_dec(__m128i m){ 95 | DO_DEC_BLOCK(m,key_schedule); 96 | return m; 97 | } 98 | 99 | #include 100 | 101 | 102 | // 2nd to last block 103 | __attribute__ ((aligned(16))) unsigned char lastiv[16] = { 104 | 0x7C,0xD5,0xE0,0xDD,0x5D,0x69,0x47,0xEF,0xA3,0xBE,0x28,0xD2,0xD8,0x42,0xF2,0x07, 105 | }; 106 | 107 | // last block 108 | __attribute__ ((aligned(16))) unsigned char lastblock[16] = { 109 | 0xA0,0x62,0x04,0x26,0x4E,0xA0,0x21,0xA9,0x18,0x4F,0xFF,0xAD,0x24,0x81,0x84,0x73, 110 | }; 111 | 112 | __attribute__ ((aligned(16))) unsigned char firstiv[16] = { 113 | 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55 114 | }; 115 | 116 | __attribute__ ((aligned(16))) unsigned char firstblock[16] = { 117 | 0x95, 0x40, 0x92, 0xE1, 0x67, 0x89, 0xAC, 0xB2, 0xEA, 0x6E, 0x53, 0xCA, 118 | 0x14, 0x6C, 0x4B, 0xFB 119 | }; 120 | 121 | 122 | 123 | __attribute__ ((aligned(16))) unsigned char key[16]; 124 | __attribute__ ((aligned(16))) unsigned char plain[16]; 125 | 126 | #define PARALLEL 48 127 | 128 | int main(void) 129 | { 130 | __m128i _lastiv = _mm_load_si128(lastiv); 131 | __m128i _lastblock = _mm_load_si128(lastblock); 132 | __m128i _firstiv = _mm_load_si128(firstiv); 133 | __m128i _firstblock = _mm_load_si128(firstblock); 134 | 135 | 136 | unsigned int brute = 0; 137 | unsigned int upto = (int)(0x100000000L / (uint64_t)PARALLEL); 138 | 139 | int worker = 0; 140 | for (;worker < PARALLEL; worker++) { 141 | if (!fork()) { 142 | printf("worker %d, %x to %x\n", worker,brute,upto); 143 | break; // child 144 | } 145 | brute = upto; 146 | upto += (int)(0x100000000L / (uint64_t)PARALLEL); 147 | } 148 | 149 | do { 150 | // if (!(brute & 0xfffff)) printf("%x\n",brute); 151 | MD5((unsigned char*)&brute, 4, key); 152 | aes128_load_key(key); 153 | _mm_store_si128(plain, _mm_xor_si128(aes128_dec(_lastblock), _lastiv)); 154 | 155 | // check padding 156 | unsigned char last = plain[15]; 157 | if (last < 1 || last > 16) goto bad; 158 | for (int i = 0; i < last; i++) { 159 | if (plain[15-i] != last) { 160 | goto bad; 161 | } 162 | } 163 | 164 | // check dex header key already loaded 165 | _mm_store_si128(plain, _mm_xor_si128(aes128_dec(_firstblock), _firstiv)); 166 | //if (*(int*)plain != 0xA786564 && *(int*)plain != 0x04034b50 && *(int*)plain != 0x464C457F) goto bad; // 'dex\n' and 'pk\x03\x04' 167 | if (*(int*)plain != 0x464C457F) goto bad; // 7FELF 168 | 169 | // gucci 170 | printf("FOUND IT!!!!!!! %08x\n", brute); 171 | int fd = open("sice.txt", O_APPEND | O_RDWR | O_CREAT,0); 172 | dprintf(fd, "FOUND IT!!!!!!! %08x\n", brute); 173 | for (int i = 0; i < 16; i++) { 174 | printf("%02x ", plain[i]); 175 | dprintf(fd, "%02x ", plain[i]); 176 | } 177 | printf("\n"); 178 | dprintf(fd, "\n"); 179 | close(fd); 180 | kill(0,SIGQUIT); 181 | break; 182 | 183 | bad:continue; 184 | } while(++brute != upto); 185 | printf("worker %d done\n", worker); 186 | } 187 | -------------------------------------------------------------------------------- /pow/cacheit.c: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Filename: sha256.c 3 | * Author: Brad Conte (brad AT bradconte.com) 4 | * Copyright: 5 | * Disclaimer: This code is presented "as is" without any guarantees. 6 | * Details: Implementation of the SHA-256 hashing algorithm. 7 | SHA-256 is one of the three algorithms in the SHA2 8 | specification. The others, SHA-384 and SHA-512, are not 9 | offered in this implementation. 10 | Algorithm specification can be found here: 11 | * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf 12 | This implementation uses little endian byte order. 13 | *********************************************************************/ 14 | 15 | /*************************** HEADER FILES ***************************/ 16 | #include 17 | #include 18 | /********************************************************************* 19 | * Filename: sha256.h 20 | * Author: Brad Conte (brad AT bradconte.com) 21 | * Copyright: 22 | * Disclaimer: This code is presented "as is" without any guarantees. 23 | * Details: Defines the API for the corresponding SHA1 implementation. 24 | *********************************************************************/ 25 | 26 | #ifndef SHA256_H 27 | #define SHA256_H 28 | 29 | /*************************** HEADER FILES ***************************/ 30 | #include 31 | 32 | /****************************** MACROS ******************************/ 33 | #define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest 34 | 35 | /**************************** DATA TYPES ****************************/ 36 | typedef unsigned char BYTE; // 8-bit byte 37 | typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines 38 | 39 | typedef struct { 40 | BYTE data[64]; 41 | WORD datalen; 42 | unsigned long long bitlen; 43 | WORD state[8]; 44 | } SHA256_CTX; 45 | 46 | /*********************** FUNCTION DECLARATIONS **********************/ 47 | void sha256_init(SHA256_CTX *ctx); 48 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); 49 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]); 50 | 51 | #endif // SHA256_H 52 | /****************************** MACROS ******************************/ 53 | #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) 54 | #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) 55 | 56 | #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) 57 | #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 58 | #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) 59 | #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) 60 | #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) 61 | #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) 62 | 63 | /**************************** VARIABLES *****************************/ 64 | static const WORD k[64] = { 65 | 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 66 | 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 67 | 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 68 | 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 69 | 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 70 | 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 71 | 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 72 | 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 73 | }; 74 | 75 | /*********************** FUNCTION DEFINITIONS ***********************/ 76 | void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) 77 | { 78 | WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; 79 | 80 | for (i = 0, j = 0; i < 16; ++i, j += 4) 81 | m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); 82 | for ( ; i < 64; ++i) 83 | m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; 84 | 85 | a = ctx->state[0]; 86 | b = ctx->state[1]; 87 | c = ctx->state[2]; 88 | d = ctx->state[3]; 89 | e = ctx->state[4]; 90 | f = ctx->state[5]; 91 | g = ctx->state[6]; 92 | h = ctx->state[7]; 93 | 94 | for (i = 0; i < 64; ++i) { 95 | t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; 96 | t2 = EP0(a) + MAJ(a,b,c); 97 | h = g; 98 | g = f; 99 | f = e; 100 | e = d + t1; 101 | d = c; 102 | c = b; 103 | b = a; 104 | a = t1 + t2; 105 | } 106 | 107 | ctx->state[0] += a; 108 | ctx->state[1] += b; 109 | ctx->state[2] += c; 110 | ctx->state[3] += d; 111 | ctx->state[4] += e; 112 | ctx->state[5] += f; 113 | ctx->state[6] += g; 114 | ctx->state[7] += h; 115 | } 116 | 117 | void sha256_init(SHA256_CTX *ctx) 118 | { 119 | ctx->datalen = 0; 120 | ctx->bitlen = 0; 121 | ctx->state[0] = 0x6a09e667; 122 | ctx->state[1] = 0xbb67ae85; 123 | ctx->state[2] = 0x3c6ef372; 124 | ctx->state[3] = 0xa54ff53a; 125 | ctx->state[4] = 0x510e527f; 126 | ctx->state[5] = 0x9b05688c; 127 | ctx->state[6] = 0x1f83d9ab; 128 | ctx->state[7] = 0x5be0cd19; 129 | } 130 | 131 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) 132 | { 133 | WORD i; 134 | 135 | for (i = 0; i < len; ++i) { 136 | ctx->data[ctx->datalen] = data[i]; 137 | ctx->datalen++; 138 | if (ctx->datalen == 64) { 139 | sha256_transform(ctx, ctx->data); 140 | ctx->bitlen += 512; 141 | ctx->datalen = 0; 142 | } 143 | } 144 | } 145 | 146 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]) 147 | { 148 | WORD i; 149 | 150 | i = ctx->datalen; 151 | 152 | // Pad whatever data is left in the buffer. 153 | if (ctx->datalen < 56) { 154 | ctx->data[i++] = 0x80; 155 | while (i < 56) 156 | ctx->data[i++] = 0x00; 157 | } 158 | else { 159 | ctx->data[i++] = 0x80; 160 | while (i < 64) 161 | ctx->data[i++] = 0x00; 162 | sha256_transform(ctx, ctx->data); 163 | memset(ctx->data, 0, 56); 164 | } 165 | 166 | // Append to the padding the total message's length in bits and transform. 167 | ctx->bitlen += ctx->datalen * 8; 168 | ctx->data[63] = ctx->bitlen; 169 | ctx->data[62] = ctx->bitlen >> 8; 170 | ctx->data[61] = ctx->bitlen >> 16; 171 | ctx->data[60] = ctx->bitlen >> 24; 172 | ctx->data[59] = ctx->bitlen >> 32; 173 | ctx->data[58] = ctx->bitlen >> 40; 174 | ctx->data[57] = ctx->bitlen >> 48; 175 | ctx->data[56] = ctx->bitlen >> 56; 176 | sha256_transform(ctx, ctx->data); 177 | 178 | // Since this implementation uses little endian byte ordering and SHA uses big endian, 179 | // reverse all the bytes when copying the final state to the output hash. 180 | for (i = 0; i < 4; ++i) { 181 | hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; 182 | hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; 183 | hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; 184 | hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; 185 | hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; 186 | hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; 187 | hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; 188 | hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; 189 | } 190 | } 191 | 192 | #include 193 | #include 194 | #include 195 | 196 | uint64_t fucks[0x1000000]; 197 | uint64_t fuck_saturation = 0; 198 | 199 | // Cache sha256 starting with XXXXXX 200 | int main(int argc, char** argv){ 201 | memset(fucks,0,sizeof(fucks)); 202 | 203 | uint64_t nonce =0; 204 | char buf[64]; 205 | 206 | unsigned char digest[32]; 207 | SHA256_CTX ctx; 208 | 209 | while(fuck_saturation < 0x1000000){ 210 | sprintf(buf, "%lu", nonce); 211 | sha256_init(&ctx); 212 | sha256_update(&ctx, &buf, strlen(buf)); 213 | sha256_final(&ctx, digest); 214 | 215 | uint32_t key = (*(uint32_t*)(digest+0)&0xffffff); 216 | if (!fucks[key]) { 217 | fucks[key] = nonce; 218 | fuck_saturation++; 219 | if ((fuck_saturation & 0xffff) == 0) 220 | printf("%lx\n", fuck_saturation); 221 | } 222 | nonce++; 223 | } 224 | 225 | FILE* out = fopen("powcache.bin", "wb"); 226 | fwrite(fucks, 1, sizeof(fucks), out); 227 | fclose(out); 228 | } -------------------------------------------------------------------------------- /sha256.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "sha256.h" 3 | 4 | // K Array 5 | static const union { 6 | unsigned __int32 dw[64]; 7 | __m128i x[16]; 8 | } K = 9 | { 10 | 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 11 | 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 12 | 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 13 | 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 14 | 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 15 | 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 16 | 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 17 | 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, 18 | }; 19 | 20 | #define H0 0x6a09e667 21 | #define H1 0xbb67ae85 22 | #define H2 0x3c6ef372 23 | #define H3 0xa54ff53a 24 | #define H4 0x510e527f 25 | #define H5 0x9b05688c 26 | #define H6 0x1f83d9ab 27 | #define H7 0x5be0cd19 28 | 29 | void SHA256H::Initialize() 30 | { 31 | h0145 = _mm_set_epi32(H0, H1, H4, H5); 32 | h2367 = _mm_set_epi32(H2, H3, H6, H7); 33 | msgbuf_count = 0; 34 | total_count = 0; 35 | } 36 | 37 | void SHA256H::Update(const void* buf, size_t length) 38 | { 39 | const unsigned char* p = (const unsigned char*)buf; 40 | total_count += length; 41 | 42 | // If any bytes are left in the message buffer, 43 | // fullfill the block first 44 | if (msgbuf_count) { 45 | size_t c = MBYTES - msgbuf_count; 46 | if (length < c) { 47 | memcpy(msgbuf + msgbuf_count, p, length); 48 | msgbuf_count += length; 49 | return; 50 | } 51 | else { 52 | memcpy(msgbuf + msgbuf_count, p, c); 53 | p += c; 54 | length -= c; 55 | ProcessMsgBlock(msgbuf); 56 | msgbuf_count = 0; 57 | } 58 | } 59 | 60 | // When we reach here, we have no data left in the message buffer 61 | while (length >= MBYTES) { 62 | // No need to copy into the internal message block 63 | ProcessMsgBlock(p); 64 | p += MBYTES; 65 | length -= MBYTES; 66 | } 67 | 68 | // Leave the remaining bytes in the message buffer 69 | if (length) { 70 | memcpy(msgbuf, p, length); 71 | msgbuf_count = length; 72 | } 73 | } 74 | 75 | void SHA256H::Final(void* digest) 76 | { 77 | // When we reach here, the block is supposed to be unfullfilled. 78 | // Add the terminating bit 79 | msgbuf[msgbuf_count++] = 0x80; 80 | 81 | // Need to set total length in the last 8-byte of the block. 82 | // If there is no room for the length, process this block first 83 | if (msgbuf_count + 8 > MBYTES) { 84 | // Fill zeros and process 85 | memset(msgbuf + msgbuf_count, 0, MBYTES - msgbuf_count); 86 | ProcessMsgBlock(msgbuf); 87 | msgbuf_count = 0; 88 | } 89 | 90 | // Fill zeros before the last 8-byte of the block 91 | memset(msgbuf + msgbuf_count, 0, MBYTES - 8 - msgbuf_count); 92 | 93 | // Set the length of the message in big-endian 94 | __m128i tmp = _mm_loadl_epi64((__m128i*)&total_count); 95 | tmp = _mm_slli_epi64(tmp, 3); // convert # of bytes to # of bits 96 | const __m128i total_count_byteswapindex = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7); 97 | tmp = _mm_shuffle_epi8(tmp, total_count_byteswapindex); // convert to big endian 98 | _mm_storel_epi64((__m128i*)(msgbuf + MBYTES - 8), tmp); 99 | 100 | // Process the last block 101 | ProcessMsgBlock(msgbuf); 102 | 103 | // Get the resulting hash value. 104 | // h0:h1:h4:h5 105 | // h2:h3:h6:h7 106 | // | 107 | // V 108 | // h0:h1:h2:h3 109 | // h4:h5:h6:h7 110 | __m128i h0123 = _mm_unpackhi_epi64(h2367, h0145); 111 | __m128i h4567 = _mm_unpacklo_epi64(h2367, h0145); 112 | 113 | // Swap the byte order 114 | const __m128i byteswapindex = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 115 | 116 | h0123 = _mm_shuffle_epi8(h0123, byteswapindex); 117 | h4567 = _mm_shuffle_epi8(h4567, byteswapindex); 118 | 119 | __m128i* digestX = (__m128i*)digest; 120 | _mm_storeu_si128(digestX, h0123); 121 | _mm_storeu_si128(digestX + 1, h4567); 122 | } 123 | 124 | void SHA256H::ProcessMsgBlock(const unsigned char* msg) 125 | { 126 | // Cyclic W array 127 | // We keep the W array content cyclically in 4 variables 128 | // Initially: 129 | // cw0 = w3 : w2 : w1 : w0 130 | // cw1 = w7 : w6 : w5 : w4 131 | // cw2 = w11 : w10 : w9 : w8 132 | // cw3 = w15 : w14 : w13 : w12 133 | const __m128i byteswapindex = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); 134 | const __m128i* msgx = (const __m128i*)msg; 135 | __m128i cw0 = _mm_shuffle_epi8(_mm_loadu_si128(msgx), byteswapindex); 136 | __m128i cw1 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 1), byteswapindex); 137 | __m128i cw2 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 2), byteswapindex); 138 | __m128i cw3 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 3), byteswapindex); 139 | 140 | // Advance W array cycle 141 | // Inputs: 142 | // CW0 = w[t-13] : w[t-14] : w[t-15] : w[t-16] 143 | // CW1 = w[t-9] : w[t-10] : w[t-11] : w[t-12] 144 | // CW2 = w[t-5] : w[t-6] : w[t-7] : w[t-8] 145 | // CW3 = w[t-1] : w[t-2] : w[t-3] : w[t-4] 146 | // Outputs: 147 | // CW1 = w[t-9] : w[t-10] : w[t-11] : w[t-12] 148 | // CW2 = w[t-5] : w[t-6] : w[t-7] : w[t-8] 149 | // CW3 = w[t-1] : w[t-2] : w[t-3] : w[t-4] 150 | // CW0 = w[t+3] : w[t+2] : w[t+1] : w[t] 151 | #define CYCLE_W(CW0, CW1, CW2, CW3) \ 152 | CW0 = _mm_sha256msg1_epu32(CW0, CW1); \ 153 | CW0 = _mm_add_epi32(CW0, _mm_alignr_epi8(CW3, CW2, 4)); /* add w[t-4]:w[t-5]:w[t-6]:w[t-7]*/\ 154 | CW0 = _mm_sha256msg2_epu32(CW0, CW3); 155 | 156 | __m128i state1 = h0145; // a:b:e:f 157 | __m128i state2 = h2367; // c:d:g:h 158 | __m128i tmp; 159 | 160 | /* w0 - w3 */ 161 | #define SHA256_ROUNDS_4(cwN, n) \ 162 | tmp = _mm_add_epi32(cwN, K.x[n]); /* w3+K3 : w2+K2 : w1+K1 : w0+K0 */ \ 163 | state2 = _mm_sha256rnds2_epu32(state2, state1, tmp);/* state2 = a':b':e':f' / state1 = c':d':g':h' */ \ 164 | tmp = _mm_unpackhi_epi64(tmp, tmp); /* - : - : w3+K3 : w2+K2 */ \ 165 | state1 = _mm_sha256rnds2_epu32(state1, state2, tmp);/* state1 = a':b':e':f' / state2 = c':d':g':h' */ 166 | 167 | /* w0 - w3 */ 168 | SHA256_ROUNDS_4(cw0, 0); 169 | /* w4 - w7 */ 170 | SHA256_ROUNDS_4(cw1, 1); 171 | /* w8 - w11 */ 172 | SHA256_ROUNDS_4(cw2, 2); 173 | /* w12 - w15 */ 174 | SHA256_ROUNDS_4(cw3, 3); 175 | /* w16 - w19 */ 176 | CYCLE_W(cw0, cw1, cw2, cw3); /* cw0 = w19 : w18 : w17 : w16 */ 177 | SHA256_ROUNDS_4(cw0, 4); 178 | /* w20 - w23 */ 179 | CYCLE_W(cw1, cw2, cw3, cw0); /* cw1 = w23 : w22 : w21 : w20 */ 180 | SHA256_ROUNDS_4(cw1, 5); 181 | /* w24 - w27 */ 182 | CYCLE_W(cw2, cw3, cw0, cw1); /* cw2 = w27 : w26 : w25 : w24 */ 183 | SHA256_ROUNDS_4(cw2, 6); 184 | /* w28 - w31 */ 185 | CYCLE_W(cw3, cw0, cw1, cw2); /* cw3 = w31 : w30 : w29 : w28 */ 186 | SHA256_ROUNDS_4(cw3, 7); 187 | /* w32 - w35 */ 188 | CYCLE_W(cw0, cw1, cw2, cw3); /* cw0 = w35 : w34 : w33 : w32 */ 189 | SHA256_ROUNDS_4(cw0, 8); 190 | /* w36 - w39 */ 191 | CYCLE_W(cw1, cw2, cw3, cw0); /* cw1 = w39 : w38 : w37 : w36 */ 192 | SHA256_ROUNDS_4(cw1, 9); 193 | /* w40 - w43 */ 194 | CYCLE_W(cw2, cw3, cw0, cw1); /* cw2 = w43 : w42 : w41 : w40 */ 195 | SHA256_ROUNDS_4(cw2, 10); 196 | /* w44 - w47 */ 197 | CYCLE_W(cw3, cw0, cw1, cw2); /* cw3 = w47 : w46 : w45 : w44 */ 198 | SHA256_ROUNDS_4(cw3, 11); 199 | /* w48 - w51 */ 200 | CYCLE_W(cw0, cw1, cw2, cw3); /* cw0 = w51 : w50 : w49 : w48 */ 201 | SHA256_ROUNDS_4(cw0, 12); 202 | /* w52 - w55 */ 203 | CYCLE_W(cw1, cw2, cw3, cw0); /* cw1 = w55 : w54 : w53 : w52 */ 204 | SHA256_ROUNDS_4(cw1, 13); 205 | /* w56 - w59 */ 206 | CYCLE_W(cw2, cw3, cw0, cw1); /* cw2 = w59 : w58 : w57 : w56 */ 207 | SHA256_ROUNDS_4(cw2, 14); 208 | /* w60 - w63 */ 209 | CYCLE_W(cw3, cw0, cw1, cw2); /* cw3 = w63 : w62 : w61 : w60 */ 210 | SHA256_ROUNDS_4(cw3, 15); 211 | 212 | // Add to the intermediate hash 213 | h0145 = _mm_add_epi32(state1, h0145); 214 | h2367 = _mm_add_epi32(state2, h2367); 215 | } -------------------------------------------------------------------------------- /sha1.c: -------------------------------------------------------------------------------- 1 | // clang -Wall -O3 -mssse3 -msha sha.c -o sha 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define MBYTES 64 8 | 9 | typedef struct { 10 | unsigned char msgbuf[MBYTES]; 11 | size_t msgbuf_count; 12 | uint64_t total_count; 13 | 14 | // Intermediate hash 15 | __m128i h0123; // h0 : h1 : h2 : h3 16 | __m128i h4; // h4 : 0 : 0 : 0 17 | } sha1_ctx; 18 | 19 | 20 | #define H0 0x67452301 21 | #define H1 0xefcdab89 22 | #define H2 0x98badcfe 23 | #define H3 0x10325476 24 | #define H4 0xc3d2e1f0 25 | 26 | void SHA1Init(sha1_ctx* ctx) 27 | { 28 | ctx->h0123 = _mm_set_epi32(H0, H1, H2, H3); 29 | ctx->h4 = _mm_set_epi32(H4, 0, 0, 0); 30 | ctx->msgbuf_count = 0; 31 | ctx->total_count = 0; 32 | } 33 | 34 | void SHA1ProcessMsgBlock(sha1_ctx* ctx, const unsigned char* msg) 35 | { 36 | // Cyclic W array 37 | // We keep the W array content cyclically in 4 variables 38 | // Initially: 39 | // cw0 = w0 : w1 : w2 : w3 40 | // cw1 = w4 : w5 : w6 : w7 41 | // cw2 = w8 : w9 : w10 : w11 42 | // cw3 = w12 : w13 : w14 : w15 43 | const __m128i byteswapindex = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 44 | const __m128i* msgx = (const __m128i*)msg; 45 | __m128i cw0 = _mm_shuffle_epi8(_mm_loadu_si128(msgx), byteswapindex); 46 | __m128i cw1 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 1), byteswapindex); 47 | __m128i cw2 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 2), byteswapindex); 48 | __m128i cw3 = _mm_shuffle_epi8(_mm_loadu_si128(msgx + 3), byteswapindex); 49 | 50 | // Advance W array cycle 51 | // Inputs: 52 | // CW0 = w[t-16] : w[t-15] : w[t-14] : w[t-13] 53 | // CW1 = w[t-12] : w[t-11] : w[t-10] : w[t-9] 54 | // CW2 = w[t-8] : w[t-7] : w[t-6] : w[t-5] 55 | // CW3 = w[t-4] : w[t-3] : w[t-2] : w[t-1] 56 | // Outputs: 57 | // CW1 = w[t-12] : w[t-11] : w[t-10] : w[t-9] 58 | // CW2 = w[t-8] : w[t-7] : w[t-6] : w[t-5] 59 | // CW3 = w[t-4] : w[t-3] : w[t-2] : w[t-1] 60 | // CW0 = w[t] : w[t+1] : w[t+2] : w[t+3] 61 | #define CYCLE_W(CW0, CW1, CW2, CW3) \ 62 | CW0 = _mm_sha1msg1_epu32(CW0, CW1); \ 63 | CW0 = _mm_xor_si128(CW0, CW2); \ 64 | CW0 = _mm_sha1msg2_epu32(CW0, CW3); 65 | 66 | __m128i state1 = ctx->h0123; // state1 = a : b : c : d 67 | __m128i w_next = _mm_add_epi32(cw0, ctx->h4); // w_next = w0+e : w1 : w2 : w3 68 | __m128i state2; 69 | 70 | // w0 - w3 71 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 0);// state2 = a' : b' : c' : d' 72 | w_next = _mm_sha1nexte_epu32(state1, cw1); // w_next = w4+e' : w5 : w6 : w7 73 | // w4 - w7 74 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 0); 75 | w_next = _mm_sha1nexte_epu32(state2, cw2); 76 | // w8 - w11 77 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 0); 78 | w_next = _mm_sha1nexte_epu32(state1, cw3); 79 | // w12 - w15 80 | CYCLE_W(cw0, cw1, cw2, cw3); // cw0 = w16 : w17 : w18 : w19 81 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 0); 82 | w_next = _mm_sha1nexte_epu32(state2, cw0); 83 | // w16 - w19 84 | CYCLE_W(cw1, cw2, cw3, cw0); // cw1 = w20 : w21 : w22 : w23 85 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 0); 86 | w_next = _mm_sha1nexte_epu32(state1, cw1); 87 | // w20 - w23 88 | CYCLE_W(cw2, cw3, cw0, cw1); // cw2 = w24 : w25 : w26 : w27 89 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 1); 90 | w_next = _mm_sha1nexte_epu32(state2, cw2); 91 | // w24 - w27 92 | CYCLE_W(cw3, cw0, cw1, cw2); // cw3 = w28 : w29 : w30 : w31 93 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 1); 94 | w_next = _mm_sha1nexte_epu32(state1, cw3); 95 | // w28 - w31 96 | CYCLE_W(cw0, cw1, cw2, cw3); // cw0 = w32 : w33 : w34 : w35 97 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 1); 98 | w_next = _mm_sha1nexte_epu32(state2, cw0); 99 | // w32 - w35 100 | CYCLE_W(cw1, cw2, cw3, cw0); // cw1 = w36 : w37 : w38 : w39 101 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 1); 102 | w_next = _mm_sha1nexte_epu32(state1, cw1); 103 | // w36 - w39 104 | CYCLE_W(cw2, cw3, cw0, cw1); // cw2 = w40 : w41 : w42 : w43 105 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 1); 106 | w_next = _mm_sha1nexte_epu32(state2, cw2); 107 | // w40 - w43 108 | CYCLE_W(cw3, cw0, cw1, cw2); // cw3 = w44 : w45 : w46 : w47 109 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 2); 110 | w_next = _mm_sha1nexte_epu32(state1, cw3); 111 | // w44 - w47 112 | CYCLE_W(cw0, cw1, cw2, cw3); // cw0 = w48 : w49 : w50 : w51 113 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 2); 114 | w_next = _mm_sha1nexte_epu32(state2, cw0); 115 | // w48 - w51 116 | CYCLE_W(cw1, cw2, cw3, cw0); // cw1 = w52 : w53 : w54 : w55 117 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 2); 118 | w_next = _mm_sha1nexte_epu32(state1, cw1); 119 | // w52 - w55 120 | CYCLE_W(cw2, cw3, cw0, cw1); // cw2 = w56 : w57 : w58 : w59 121 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 2); 122 | w_next = _mm_sha1nexte_epu32(state2, cw2); 123 | // w56 - w59 124 | CYCLE_W(cw3, cw0, cw1, cw2); // cw3 = w60 : w61 : w62 : w63 125 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 2); 126 | w_next = _mm_sha1nexte_epu32(state1, cw3); 127 | // w60 - w63 128 | CYCLE_W(cw0, cw1, cw2, cw3); // cw0 = w64 : w65 : w66 : w67 129 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 3); 130 | w_next = _mm_sha1nexte_epu32(state2, cw0); 131 | // w64 - w67 132 | CYCLE_W(cw1, cw2, cw3, cw0); // cw1 = w68 : w69 : w70 : w71 133 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 3); 134 | w_next = _mm_sha1nexte_epu32(state1, cw1); 135 | // w68 - w71 136 | CYCLE_W(cw2, cw3, cw0, cw1); // cw2 = w72 : w73 : w74 : w75 137 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 3); 138 | w_next = _mm_sha1nexte_epu32(state2, cw2); 139 | // w72 - w75 140 | CYCLE_W(cw3, cw0, cw1, cw2); // cw3 = w76 : w77 : w78 : w79 141 | state2 = _mm_sha1rnds4_epu32(state1, w_next, 3); 142 | w_next = _mm_sha1nexte_epu32(state1, cw3); 143 | 144 | // w76 - w79 145 | state1 = _mm_sha1rnds4_epu32(state2, w_next, 3); // state1 = final a : b : c : d 146 | ctx->h4 = _mm_sha1nexte_epu32(state2, ctx->h4); // Add final e to h4 147 | ctx->h0123 = _mm_add_epi32(state1, ctx->h0123); // Add final a:b:c:d to h0:h1:h2:h3 148 | } 149 | 150 | void SHA1Update(sha1_ctx* ctx, const void* buf, size_t length) 151 | { 152 | const unsigned char* p = (const unsigned char*)buf; 153 | ctx->total_count += length; 154 | 155 | // If any bytes are left in the message buffer, 156 | // fullfill the block first 157 | if (ctx->msgbuf_count) { 158 | size_t c = MBYTES - ctx->msgbuf_count; 159 | if (length < c) { 160 | memcpy(ctx->msgbuf + ctx->msgbuf_count, p, length); 161 | ctx->msgbuf_count += length; 162 | return; 163 | } 164 | else { 165 | memcpy(ctx->msgbuf + ctx->msgbuf_count, p, c); 166 | p += c; 167 | length -= c; 168 | SHA1ProcessMsgBlock(ctx, ctx->msgbuf); 169 | ctx->msgbuf_count = 0; 170 | } 171 | } 172 | 173 | // When we reach here, we have no data left in the message buffer 174 | while (length >= MBYTES) { 175 | // No need to copy into the internal message block 176 | SHA1ProcessMsgBlock(ctx, p); 177 | p += MBYTES; 178 | length -= MBYTES; 179 | } 180 | 181 | // Leave the remaining bytes in the message buffer 182 | if (length) { 183 | memcpy(ctx->msgbuf, p, length); 184 | ctx->msgbuf_count = length; 185 | } 186 | } 187 | 188 | void SHA1Final(sha1_ctx* ctx, void* digest) 189 | { 190 | // When we reach here, the block is supposed to be unfullfilled. 191 | // Add the terminating bit 192 | ctx->msgbuf[ctx->msgbuf_count++] = 0x80; 193 | 194 | // Need to set total length in the last 8-byte of the block. 195 | // If there is no room for the length, process this block first 196 | if (ctx->msgbuf_count + 8 > MBYTES) { 197 | // Fill zeros and process 198 | memset(ctx->msgbuf + ctx->msgbuf_count, 0, MBYTES - ctx->msgbuf_count); 199 | SHA1ProcessMsgBlock(ctx, ctx->msgbuf); 200 | ctx->msgbuf_count = 0; 201 | } 202 | 203 | // Fill zeros before the last 8-byte of the block 204 | memset(ctx->msgbuf + ctx->msgbuf_count, 0, MBYTES - 8 - ctx->msgbuf_count); 205 | 206 | // Set the length of the message in big-endian 207 | __m128i tmp = _mm_loadl_epi64((__m128i*)&ctx->total_count); 208 | tmp = _mm_slli_epi64(tmp, 3); // convert # of bytes to # of bits 209 | const __m128i total_count_byteswapindex = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7); 210 | tmp = _mm_shuffle_epi8(tmp, total_count_byteswapindex); // convert to big endian 211 | _mm_storel_epi64((__m128i*)(ctx->msgbuf + MBYTES - 8), tmp); 212 | 213 | // Process the last block 214 | SHA1ProcessMsgBlock(ctx, ctx->msgbuf); 215 | 216 | // Set the resulting hash value, upside down 217 | const __m128i byteswapindex = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 218 | __m128i r0123 = _mm_shuffle_epi8(ctx->h0123, byteswapindex); 219 | __m128i r4 = _mm_shuffle_epi8(ctx->h4, byteswapindex); 220 | 221 | uint32_t* digestdw = (uint32_t*)digest; 222 | _mm_storeu_si128((__m128i*)digestdw, r0123); 223 | digestdw[4] = _mm_cvtsi128_si32(r4); 224 | } 225 | 226 | #if 0 227 | #include 228 | int main() 229 | { 230 | sha1_ctx ctx; 231 | SHA1Init(&ctx); 232 | SHA1Update(&ctx, "a", 1); 233 | uint8_t digest[20]; 234 | SHA1Final(&ctx, digest); 235 | for (int i = 0; i < 20; i++) 236 | { 237 | printf("%02x", digest[i]); 238 | } 239 | printf("\n"); 240 | } 241 | #endif -------------------------------------------------------------------------------- /sha256.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /********************************************************************* 15 | * Filename: sha256.c 16 | * Author: Brad Conte (brad AT bradconte.com) 17 | * Copyright: 18 | * Disclaimer: This code is presented "as is" without any guarantees. 19 | * Details: Implementation of the SHA-256 hashing algorithm. 20 | SHA-256 is one of the three algorithms in the SHA2 21 | specification. The others, SHA-384 and SHA-512, are not 22 | offered in this implementation. 23 | Algorithm specification can be found here: 24 | * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf 25 | This implementation uses little endian byte order. 26 | *********************************************************************/ 27 | 28 | /*************************** HEADER FILES ***************************/ 29 | /********************************************************************* 30 | * Filename: sha256.h 31 | * Author: Brad Conte (brad AT bradconte.com) 32 | * Copyright: 33 | * Disclaimer: This code is presented "as is" without any guarantees. 34 | * Details: Defines the API for the corresponding SHA1 implementation. 35 | *********************************************************************/ 36 | 37 | #ifndef SHA256_H 38 | #define SHA256_H 39 | 40 | /*************************** HEADER FILES ***************************/ 41 | 42 | /****************************** MACROS ******************************/ 43 | #define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest 44 | 45 | /**************************** DATA TYPES ****************************/ 46 | typedef unsigned char BYTE; // 8-bit byte 47 | typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines 48 | 49 | typedef struct { 50 | BYTE data[64]; 51 | WORD datalen; 52 | unsigned long long bitlen; 53 | WORD state[8]; 54 | } SHA256_CTX; 55 | 56 | /*********************** FUNCTION DECLARATIONS **********************/ 57 | void sha256_init(SHA256_CTX *ctx); 58 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); 59 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]); 60 | 61 | #endif // SHA256_H 62 | /****************************** MACROS ******************************/ 63 | #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) 64 | #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) 65 | 66 | #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) 67 | #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 68 | #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) 69 | #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) 70 | #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) 71 | #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) 72 | 73 | /**************************** VARIABLES *****************************/ 74 | static const WORD k[64] = { 75 | 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 76 | 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 77 | 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 78 | 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 79 | 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 80 | 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 81 | 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 82 | 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 83 | }; 84 | 85 | /*********************** FUNCTION DEFINITIONS ***********************/ 86 | void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) 87 | { 88 | WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; 89 | 90 | for (i = 0, j = 0; i < 16; ++i, j += 4) 91 | m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); 92 | for ( ; i < 64; ++i) 93 | m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; 94 | 95 | a = ctx->state[0]; 96 | b = ctx->state[1]; 97 | c = ctx->state[2]; 98 | d = ctx->state[3]; 99 | e = ctx->state[4]; 100 | f = ctx->state[5]; 101 | g = ctx->state[6]; 102 | h = ctx->state[7]; 103 | 104 | for (i = 0; i < 64; ++i) { 105 | t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; 106 | t2 = EP0(a) + MAJ(a,b,c); 107 | h = g; 108 | g = f; 109 | f = e; 110 | e = d + t1; 111 | d = c; 112 | c = b; 113 | b = a; 114 | a = t1 + t2; 115 | } 116 | 117 | ctx->state[0] += a; 118 | ctx->state[1] += b; 119 | ctx->state[2] += c; 120 | ctx->state[3] += d; 121 | ctx->state[4] += e; 122 | ctx->state[5] += f; 123 | ctx->state[6] += g; 124 | ctx->state[7] += h; 125 | } 126 | 127 | void sha256_init(SHA256_CTX *ctx) 128 | { 129 | ctx->datalen = 0; 130 | ctx->bitlen = 0; 131 | ctx->state[0] = 0x6a09e667; 132 | ctx->state[1] = 0xbb67ae85; 133 | ctx->state[2] = 0x3c6ef372; 134 | ctx->state[3] = 0xa54ff53a; 135 | ctx->state[4] = 0x510e527f; 136 | ctx->state[5] = 0x9b05688c; 137 | ctx->state[6] = 0x1f83d9ab; 138 | ctx->state[7] = 0x5be0cd19; 139 | } 140 | 141 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) 142 | { 143 | WORD i; 144 | 145 | for (i = 0; i < len; ++i) { 146 | ctx->data[ctx->datalen] = data[i]; 147 | ctx->datalen++; 148 | if (ctx->datalen == 64) { 149 | sha256_transform(ctx, ctx->data); 150 | ctx->bitlen += 512; 151 | ctx->datalen = 0; 152 | } 153 | } 154 | } 155 | 156 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]) 157 | { 158 | WORD i; 159 | 160 | i = ctx->datalen; 161 | 162 | // Pad whatever data is left in the buffer. 163 | if (ctx->datalen < 56) { 164 | ctx->data[i++] = 0x80; 165 | while (i < 56) 166 | ctx->data[i++] = 0x00; 167 | } 168 | else { 169 | ctx->data[i++] = 0x80; 170 | while (i < 64) 171 | ctx->data[i++] = 0x00; 172 | sha256_transform(ctx, ctx->data); 173 | memset(ctx->data, 0, 56); 174 | } 175 | 176 | // Append to the padding the total message's length in bits and transform. 177 | ctx->bitlen += ctx->datalen * 8; 178 | ctx->data[63] = ctx->bitlen; 179 | ctx->data[62] = ctx->bitlen >> 8; 180 | ctx->data[61] = ctx->bitlen >> 16; 181 | ctx->data[60] = ctx->bitlen >> 24; 182 | ctx->data[59] = ctx->bitlen >> 32; 183 | ctx->data[58] = ctx->bitlen >> 40; 184 | ctx->data[57] = ctx->bitlen >> 48; 185 | ctx->data[56] = ctx->bitlen >> 56; 186 | sha256_transform(ctx, ctx->data); 187 | 188 | // Since this implementation uses little endian byte ordering and SHA uses big endian, 189 | // reverse all the bytes when copying the final state to the output hash. 190 | for (i = 0; i < 4; ++i) { 191 | hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; 192 | hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; 193 | hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; 194 | hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; 195 | hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; 196 | hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; 197 | hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; 198 | hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; 199 | } 200 | } 201 | 202 | char* buf; 203 | int prefix_len; 204 | 205 | void do_worker(uint32_t nonce_start, uint32_t nonce_end) { 206 | unsigned char digest[32]; 207 | for (uint32_t nonce = nonce_start; nonce != nonce_end; nonce++) { 208 | // if (!(nonce & 0xfffff)) 209 | // printf("%x\n", nonce); 210 | SHA256_CTX ctx; 211 | sprintf(buf+prefix_len, "%08x", nonce); 212 | sha256_init(&ctx); 213 | sha256_update(&ctx, buf, prefix_len+8); 214 | sha256_final(&ctx, digest); 215 | if ((*(uint32_t*)&digest[28] & 0xffffff0f) == 0xffffff0f) { 216 | // printf("FOUND\n"); 217 | printf("%s\n", buf); 218 | int fd = open("sice.txt", O_RDWR | O_CREAT,0777); 219 | dprintf(fd, "%s\n", buf); 220 | close(fd); 221 | // printf("digest: "); 222 | // for (int i = 0; i < sizeof(digest); i++) 223 | // printf("%02x", (uint8_t)digest[i]); 224 | // printf("\nhexdump: "); 225 | // for (int i = 0; i < sizeof(buf); i++) 226 | // printf("%02x ", (uint8_t)buf[i]); 227 | // printf("\n"); 228 | kill(0,SIGQUIT); // kill process group 229 | break; 230 | } 231 | } 232 | printf("exhausted...\n"); 233 | } 234 | 235 | // replace me with nproc 236 | #define PARALLEL 40 237 | 238 | int main(int argc, char** argv){ 239 | if (argc != 2) { 240 | printf("usage: ./pow "); 241 | return 1; 242 | } 243 | char* prefix = argv[1]; 244 | prefix_len = strlen(prefix); 245 | // printf("prefix: %s\n", prefix); 246 | 247 | buf = malloc(prefix_len+8+1); 248 | strcpy(buf, prefix); 249 | 250 | // parallelism shit 251 | setpgid(0,0); 252 | unsigned int brute = 0; 253 | uint64_t upto = (0x100000000L / (uint64_t)PARALLEL); 254 | for (int worker = 0; worker < PARALLEL; worker++) { 255 | if (!fork()) { 256 | // child 257 | // printf("worker %d, %x to %lx\n", worker,brute,upto); 258 | do_worker(brute, upto); 259 | break; 260 | } 261 | brute = upto; 262 | upto += (0x100000000L / (uint64_t)PARALLEL); 263 | } 264 | 265 | // wait for worker to kill me on completion 266 | while (1) { sleep(1); } 267 | } 268 | 269 | --------------------------------------------------------------------------------