├── .erlang ├── clean.sh ├── perftest.sh ├── stats.txt ├── makefile ├── utils.h ├── utils.c ├── README.md ├── sha256.h ├── miner_gpu.erl ├── sha256.c ├── amoveo_pow_gpu.c └── sha256_gpu.cu /.erlang: -------------------------------------------------------------------------------- 1 | application:start(inets). -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | killall amoveo_gpu_miner 2 | -------------------------------------------------------------------------------- /perftest.sh: -------------------------------------------------------------------------------- 1 | make all 2 | rm *.o 3 | ./amoveo_gpu_miner perftest 4 | -------------------------------------------------------------------------------- /stats.txt: -------------------------------------------------------------------------------- 1 | - V1 2 | GTX1080 - 146 MH/s default, 1010 MH/s upgraded 3 | GTX1080Ti - 233 MH/s default, 945 MH/s upgraded 4 | GTX1050 - 61 MH/s default, 353 MH/s upgraded 5 | K80 - 83 MH/s default, 256.11 MH/s upgraded 6 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | OBJECTS=amoveo_gpu_miner 2 | 3 | all: $(OBJECTS) 4 | 5 | clean: 6 | rm $(OBJECTS) sha256.o utils.o gpu_miner.o 7 | 8 | amoveo_gpu_miner: amoveo_pow_gpu.c gpu_miner.o utils.o sha256.o 9 | nvcc -O1 -v -o $@ $^ 10 | 11 | gpu_miner.o: sha256_gpu.cu 12 | nvcc -O1 -v -lrt -lm -arch=sm_30 -D_FORCE_INLINES -c -o $@ $^ 13 | 14 | sha256.o: sha256.c 15 | gcc -O1 -v -c -o $@ $^ 16 | 17 | utils.o: utils.c 18 | gcc -O1 -v -c -o $@ $^ -lrt 19 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define GIG 1000000000 9 | 10 | extern struct timespec time1, time2; 11 | 12 | #define tick() clock_gettime(CLOCK_MONOTONIC, &time1) 13 | #define tock() clock_gettime(CLOCK_MONOTONIC, &time2) 14 | 15 | #define ENDIAN_SWAP_32(x) (\ 16 | ((x & 0xff000000) >> 24) | \ 17 | ((x & 0x00ff0000) >> 8 ) | \ 18 | ((x & 0x0000ff00) << 8 ) | \ 19 | ((x & 0x000000ff) << 24)) 20 | 21 | typedef struct { 22 | bool nonce_found; 23 | uint64_t nonce; 24 | } Nonce_result; 25 | 26 | long int get_execution_time(); 27 | void initialize_nonce_result(Nonce_result *nr); 28 | void set_difficulty(unsigned char *difficulty, unsigned int nBits); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | struct timespec diff(struct timespec start, struct timespec end); 4 | 5 | struct timespec time1, time2; 6 | 7 | struct timespec diff(struct timespec start, struct timespec end) 8 | { 9 | struct timespec temp; 10 | if ((end.tv_nsec-start.tv_nsec)<0) { 11 | temp.tv_sec = end.tv_sec-start.tv_sec-1; 12 | temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; 13 | } else { 14 | temp.tv_sec = end.tv_sec-start.tv_sec; 15 | temp.tv_nsec = end.tv_nsec-start.tv_nsec; 16 | } 17 | return temp; 18 | } 19 | 20 | long int get_execution_time() { 21 | struct timespec delta = diff(time1,time2); 22 | return (long int) (GIG * delta.tv_sec + delta.tv_nsec); 23 | } 24 | 25 | void initialize_nonce_result(Nonce_result *nr) { 26 | nr->nonce_found = false; 27 | nr->nonce = 0; 28 | } 29 | 30 | //difficulty MUST be 32 bytes 31 | void set_difficulty(unsigned char *difficulty, unsigned int nBits) { 32 | int i; 33 | for(i=0; i<32; i++) { 34 | difficulty[i] = 0; 35 | } 36 | int msb = 32 - ((nBits & 0xff000000) >> 24); 37 | difficulty[msb++] = (nBits & 0xff0000) >> 16; 38 | difficulty[msb++] = (nBits & 0xff00) >> 8; 39 | difficulty[msb] = nBits & 0xff; 40 | } 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Measured GPU Speeds: 2 | - GTX1080 - 146 MH/s default, 1010 MH/s upgraded 3 | - GTX1080Ti - 233 MH/s default, 945 MH/s upgraded 4 | - GTX1050 - 61 MH/s default, 353 MH/s upgraded 5 | - K80 - 83 MH/s default, 256.11 MH/s upgraded 6 | 7 | Dependencies : 8 | - Ubuntu 16.04 9 | - [CUDA 8.0 or later](https://askubuntu.com/a/799185) 10 | ``` 11 | sudo apt-get install erlang libncurses5-dev libssl-dev unixodbc-dev g++ git 12 | sudo apt-get install build-essential 13 | sudo apt-get install curl 14 | ``` 15 | 16 | Steps to mine: 17 | 1. [Only needed for the first time] Set Pubkey in miner_gpu.erl 18 | 2. sh build_ubuntu.sh 19 | 3. miner_gpu:start(). 20 | 4. To see debug info, open debug.txt ("tail -f debug.txt" in a separate terminal to stream debug info) 21 | 5. sh clean.sh when finished mining 22 | 23 | Steps to perform perf test 24 | 1. sh perftest.sh 25 | 26 | By default, the miner will mine to [a mining pool](https://github.com/zack-bitcoin/amoveo-mining-pool) that takes a 1% fee. 27 | 28 | The CUDA code here is a basic and unoptimized version for Amoveo GPU mining. An upgrade available to provide the most optimized CUDA code for Amoveo GPU mining, and typically gives a 3-5x performance improvement, depending on your GPU. For upgrade inquiries, please contact decryptoed@gmail.com or @Iridescence in the Amoveo telegram. [Performance stats here](https://github.com/decryptoed/amoveo-cuda-miner/blob/master/stats.txt). 29 | 30 | Donations to encourage improvements and optimizations: 31 | 32 | Amoveo - BIGGeST9w6M//7Bo8iLnqFSrLLnkDXHj9WFFc+kwxeWm2FBBi0NDS0ERROgBiNQqv47wkh0iABPN1/2ECooCTOM= 33 | 34 | Bitcoin - 39RMFMprjdzCRvedLhFdz5uNEzTP4uMbkV 35 | 36 | Ethereum - 0x74ed96b787def62e9b183ff5fc0e93753ebc4c76 -------------------------------------------------------------------------------- /sha256.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Filename: sha256.h 3 | * Author: Brad Conte (brad AT bradconte.com) 4 | * Copyright: 5 | * Disclaimer: This code is presented "as is" without any guarantees. 6 | * Details: Defines the API for the corresponding SHA1 implementation. 7 | *********************************************************************/ 8 | 9 | #ifndef SHA256_H 10 | #define SHA256_H 11 | 12 | /*************************** HEADER FILES ***************************/ 13 | #include 14 | 15 | /****************************** MACROS ******************************/ 16 | #define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest 17 | 18 | #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) 19 | #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) 20 | 21 | #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) 22 | #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 23 | #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) 24 | #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) 25 | #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) 26 | #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) 27 | 28 | /**************************** DATA TYPES ****************************/ 29 | typedef unsigned char BYTE; // 8-bit byte 30 | typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines 31 | 32 | typedef struct { 33 | BYTE data[64]; 34 | WORD datalen; 35 | unsigned long long bitlen; 36 | WORD state[8]; 37 | } SHA256_CTX; 38 | 39 | /*********************** FUNCTION DECLARATIONS **********************/ 40 | void sha256_init(SHA256_CTX *ctx); 41 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); 42 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]); 43 | 44 | #endif // SHA256_H 45 | -------------------------------------------------------------------------------- /miner_gpu.erl: -------------------------------------------------------------------------------- 1 | -module(miner_gpu) 2 | . 3 | -export([start/0, unpack_mining_data/1]). 4 | %-define(Peer, "http://localhost:8081/").%for a full node on same computer. 5 | %-define(Peer, "http://localhost:8085/").%for a mining pool on the same computer. 6 | -define(Peer, "http://24.5.185.238:8085/").%for a mining pool on an external server 7 | -define(CORES, 1). 8 | -define(Pubkey, <<"BIGGeST9w6M//7Bo8iLnqFSrLLnkDXHj9WFFc+kwxeWm2FBBi0NDS0ERROgBiNQqv47wkh0iABPN1/2ECooCTOM=">>). 9 | -define(timeout, 600).%how long to wait in seconds before checking if new mining data is available. 10 | -define(pool_sleep_period, 1000).%How long to wait in miliseconds if we cannot connect to the mining pool. 11 | %This should probably be around 1/20th of the blocktime. 12 | 13 | unpack_mining_data(R) -> 14 | <<_:(8*11), R2/binary>> = list_to_binary(R), 15 | {First, R3} = slice(R2, hd("\"")), 16 | <<_:(8*2), R4/binary>> = R3, 17 | {Second, R5} = slice(R4, hd("\"")), 18 | <<_:8, R6/binary>> = R5, 19 | {Third, _} = slice(R6, hd("]")), 20 | F = base64:decode(First), 21 | S = base64:decode(Second), 22 | {F, S, Third}. 23 | start() -> 24 | io:fwrite("Started mining.\n"), 25 | start2(). 26 | start2() -> 27 | flush(), 28 | Data = <<"[\"mining_data\"]">>, 29 | R = talk_helper(Data, ?Peer, 1000), 30 | if 31 | is_list(R) -> 32 | start_gpu_miner(R); 33 | is_atom(R) -> 34 | timer:sleep(1000), 35 | start() 36 | end. 37 | read_nonce(0) -> 0; 38 | read_nonce(N) -> 39 | case file:read_file("nonce.txt") of 40 | {ok, <>} -> Nonce; 41 | {ok, <<>>} -> 42 | io:fwrite("nonce failed "), 43 | io:fwrite(integer_to_list(N)), 44 | io:fwrite("\n"), 45 | timer:sleep(100), 46 | read_nonce(N-1) 47 | end. 48 | 49 | start_gpu_miner(R) -> 50 | {F, _, Third} = unpack_mining_data(R), %S is the nonce 51 | RS = crypto:strong_rand_bytes(32), 52 | ok = file:write_file("nonce.txt", <<"">>), 53 | file:write_file("mining_input", <>), 54 | Port = open_port({spawn, "./amoveo_gpu_miner"},[exit_status]), 55 | receive 56 | {Port, {exit_status,1}}-> 57 | io:fwrite("Found a block. 1\n"), 58 | Nonce = read_nonce(1), 59 | BinNonce = base64:encode(<>), 60 | Data = << <<"[\"work\",\"">>/binary, BinNonce/binary, <<"\",\"">>/binary, ?Pubkey/binary, <<"\"]">>/binary>>, 61 | talk_helper(Data, ?Peer, 5), 62 | io:fwrite("Found a block. 2\n"), 63 | timer:sleep(100); 64 | {Port, {exit_status,0}}-> 65 | io:fwrite("did not find a block in that period \n"), 66 | ok 67 | end, 68 | start2(). 69 | 70 | talk_helper2(Data, Peer) -> 71 | httpc:request(post, {Peer, [], "application/octet-stream", iolist_to_binary(Data)}, [{timeout, 3000}], []). 72 | talk_helper(_Data, _Peer, 0) -> throw("talk helper failed"); 73 | talk_helper(Data, Peer, N) -> 74 | case talk_helper2(Data, Peer) of 75 | {ok, {_Status, _Headers, []}} -> 76 | io:fwrite("server gave confusing response\n"), 77 | timer:sleep(?pool_sleep_period), 78 | talk_helper(Data, Peer, N-1); 79 | {ok, {_, _, R}} -> R; 80 | %{error, _} -> 81 | E -> 82 | io:fwrite("\nIf you are running a solo-mining node, then this error may have happened because you need to turn on and sync your Amoveo node before you can mine. You can get it here: https://github.com/zack-bitcoin/amoveo \n If this error happens while connected to the public mining node, then it can probably be safely ignored."), 83 | timer:sleep(?pool_sleep_period), 84 | talk_helper(Data, Peer, N-1) 85 | end. 86 | slice(Bin, Char) -> 87 | slice(Bin, Char, 0). 88 | slice(Bin, Char, N) -> 89 | NN = N*8, 90 | <> = Bin, 91 | if 92 | N > size(Bin) -> 1=2; 93 | (Char == Char2) -> 94 | {<>, Second}; 95 | true -> 96 | slice(Bin, Char, N+1) 97 | end. 98 | flush() -> 99 | receive 100 | _ -> 101 | flush() 102 | after 103 | 0 -> 104 | ok 105 | end. 106 | -------------------------------------------------------------------------------- /sha256.c: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Filename: sha256.c 3 | * Author: Brad Conte (brad AT bradconte.com) 4 | * Copyright: 5 | * Disclaimer: This code is presented "as is" without any guarantees. 6 | * Details: Implementation of the SHA-256 hashing algorithm. 7 | SHA-256 is one of the three algorithms in the SHA2 8 | specification. The others, SHA-384 and SHA-512, are not 9 | offered in this implementation. 10 | Algorithm specification can be found here: 11 | * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf 12 | This implementation uses little endian byte order. 13 | *********************************************************************/ 14 | 15 | /*************************** HEADER FILES ***************************/ 16 | #include 17 | #include 18 | #include "sha256.h" 19 | 20 | /**************************** VARIABLES *****************************/ 21 | static const WORD k[64] = { 22 | 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 23 | 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 24 | 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 25 | 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 26 | 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 27 | 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 28 | 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 29 | 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 30 | }; 31 | 32 | /*********************** FUNCTION DEFINITIONS ***********************/ 33 | void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) 34 | { 35 | WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; 36 | 37 | for (i = 0, j = 0; i < 16; ++i, j += 4) 38 | m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); 39 | for ( ; i < 64; ++i) 40 | m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; 41 | 42 | a = ctx->state[0]; 43 | b = ctx->state[1]; 44 | c = ctx->state[2]; 45 | d = ctx->state[3]; 46 | e = ctx->state[4]; 47 | f = ctx->state[5]; 48 | g = ctx->state[6]; 49 | h = ctx->state[7]; 50 | 51 | for (i = 0; i < 64; ++i) { 52 | t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; 53 | t2 = EP0(a) + MAJ(a,b,c); 54 | h = g; 55 | g = f; 56 | f = e; 57 | e = d + t1; 58 | d = c; 59 | c = b; 60 | b = a; 61 | a = t1 + t2; 62 | } 63 | 64 | ctx->state[0] += a; 65 | ctx->state[1] += b; 66 | ctx->state[2] += c; 67 | ctx->state[3] += d; 68 | ctx->state[4] += e; 69 | ctx->state[5] += f; 70 | ctx->state[6] += g; 71 | ctx->state[7] += h; 72 | } 73 | 74 | void sha256_init(SHA256_CTX *ctx) 75 | { 76 | ctx->datalen = 0; 77 | ctx->bitlen = 0; 78 | ctx->state[0] = 0x6a09e667; 79 | ctx->state[1] = 0xbb67ae85; 80 | ctx->state[2] = 0x3c6ef372; 81 | ctx->state[3] = 0xa54ff53a; 82 | ctx->state[4] = 0x510e527f; 83 | ctx->state[5] = 0x9b05688c; 84 | ctx->state[6] = 0x1f83d9ab; 85 | ctx->state[7] = 0x5be0cd19; 86 | } 87 | 88 | void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) 89 | { 90 | WORD i; 91 | 92 | for (i = 0; i < len; ++i) { 93 | ctx->data[ctx->datalen] = data[i]; 94 | ctx->datalen++; 95 | if (ctx->datalen == 64) { 96 | sha256_transform(ctx, ctx->data); 97 | ctx->bitlen += 512; 98 | ctx->datalen = 0; 99 | } 100 | } 101 | } 102 | 103 | void sha256_final(SHA256_CTX *ctx, BYTE hash[]) 104 | { 105 | WORD i; 106 | 107 | i = ctx->datalen; 108 | 109 | // Pad whatever data is left in the buffer. 110 | if (ctx->datalen < 56) { 111 | ctx->data[i++] = 0x80; 112 | while (i < 56) 113 | ctx->data[i++] = 0x00; 114 | } 115 | else { 116 | ctx->data[i++] = 0x80; 117 | while (i < 64) 118 | ctx->data[i++] = 0x00; 119 | sha256_transform(ctx, ctx->data); 120 | memset(ctx->data, 0, 56); 121 | } 122 | 123 | // Append to the padding the total message's length in bits and transform. 124 | ctx->bitlen += ctx->datalen * 8; 125 | ctx->data[63] = ctx->bitlen; 126 | ctx->data[62] = ctx->bitlen >> 8; 127 | ctx->data[61] = ctx->bitlen >> 16; 128 | ctx->data[60] = ctx->bitlen >> 24; 129 | ctx->data[59] = ctx->bitlen >> 32; 130 | ctx->data[58] = ctx->bitlen >> 40; 131 | ctx->data[57] = ctx->bitlen >> 48; 132 | ctx->data[56] = ctx->bitlen >> 56; 133 | sha256_transform(ctx, ctx->data); 134 | 135 | // Since this implementation uses little endian byte ordering and SHA uses big endian, 136 | // reverse all the bytes when copying the final state to the output hash. 137 | for (i = 0; i < 4; ++i) { 138 | hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; 139 | hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; 140 | hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; 141 | hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; 142 | hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; 143 | hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; 144 | hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; 145 | hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /amoveo_pow_gpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sha256.h" 6 | #include "utils.h" 7 | 8 | int amoveo_mine_gpu(BYTE nonce[32], unsigned int difficulty, BYTE data[32], unsigned int,unsigned int,unsigned int); 9 | 10 | WORD hash2integer(BYTE h[32]); 11 | static WORD pair2sci(WORD l[2]); 12 | int check_pow(BYTE nonce[32], int, BYTE data[32]); 13 | 14 | WORD hash2integer(BYTE h[32]) { 15 | WORD x = 0; 16 | WORD y[2]; 17 | for (int i = 0; i < 31; i++) { 18 | if (h[i] == 0) { 19 | x += 8; 20 | y[1] = h[i+1]; 21 | continue; 22 | } else if (h[i] < 2) { 23 | x += 7; 24 | y[1] = (h[i] * 128) + (h[i+1] / 2); 25 | } else if (h[i] < 4) { 26 | x += 6; 27 | y[1] = (h[i] * 64) + (h[i+1] / 4); 28 | } else if (h[i] < 8) { 29 | x += 5; 30 | y[1] = (h[i] * 32) + (h[i+1] / 8); 31 | } else if (h[i] < 16) { 32 | x += 4; 33 | y[1] = (h[i] * 16) + (h[i+1] / 16); 34 | } else if (h[i] < 32) { 35 | x += 3; 36 | y[1] = (h[i] * 8) + (h[i+1] / 32); 37 | } else if (h[i] < 64) { 38 | x += 2; 39 | y[1] = (h[i] * 4) + (h[i+1] / 64); 40 | } else if (h[i] < 128) { 41 | x += 1; 42 | y[1] = (h[i] * 2) + (h[i+1] / 128); 43 | } else { 44 | y[1] = h[i]; 45 | } 46 | break; 47 | } 48 | y[0] = x; 49 | return(pair2sci(y)); 50 | } 51 | static WORD pair2sci(WORD l[2]) { 52 | return((256*l[0]) + l[1]); 53 | } 54 | int check_pow(BYTE nonce[32], int difficulty, BYTE data[32]) { 55 | BYTE text[66];//32+2+32 56 | for (int i = 0; i < 32; i++) 57 | text[i] = data[i]; 58 | text[32] = difficulty / 256; 59 | text[33] = difficulty % 256; 60 | for (int i = 0; i < 32; i++) 61 | text[i+34] = nonce[i]; 62 | 63 | SHA256_CTX ctx; 64 | sha256_init(&ctx); 65 | sha256_update(&ctx, text, 66); 66 | BYTE buf[32]; 67 | sha256_final(&ctx, buf); 68 | 69 | int i = hash2integer(buf); 70 | return(i > difficulty); 71 | } 72 | void write_nonce(BYTE x[32]) { 73 | FILE *f = fopen("nonce.txt", "w"); 74 | if (f == NULL) { 75 | printf("Error opening file!\n"); 76 | //exit(1); 77 | } 78 | rewind(f);//unnecessary line? 79 | fwrite(x, 1, 32, f); 80 | fclose(f); 81 | return; 82 | } 83 | int get_height(){ 84 | char buf[256]; 85 | FILE* fp; 86 | fp = popen("curl -s -i -d \'[\"height\"]\' http://24.5.185.238:8080","r"); 87 | if(fp == NULL) 88 | { 89 | printf("Couldn't get height from node\n"); 90 | return 0; 91 | } 92 | 93 | while(fgets(buf,sizeof(buf),fp) != NULL) 94 | continue; 95 | 96 | int start; 97 | int end; 98 | for(int i = 0; i < sizeof(buf) ; i++) 99 | { 100 | if(buf[i] == ',') 101 | start = i; 102 | if(buf[i] == ']') 103 | { 104 | end = i; 105 | break; 106 | } 107 | } 108 | buf[end]=0; 109 | return atoi(buf+start+1); 110 | } 111 | int read_input(BYTE B[32], BYTE N[32], WORD id) { 112 | FILE *fileptr; 113 | fileptr = fopen("mining_input", "rb"); 114 | fseek(fileptr, 0, SEEK_END); // Jump to the end of the file 115 | int filelen = ftell(fileptr); // Get the current byte offset in the file 116 | //ftell returns a long, maybe we shouldn't truncate it. 117 | rewind(fileptr); 118 | fread(B, 32, 1, fileptr); 119 | fread(N, 32, 1, fileptr); 120 | N[28] = id % 256; 121 | N[29] = (id / 256) % 256; 122 | N[30] = ((id / 256) / 256) % 256; 123 | N[31] = (((id / 256) / 256) / 256) % 256; 124 | BYTE buffer[10] = { 0 }; 125 | fread(buffer, filelen-64, 1, fileptr); 126 | int diff = 0; 127 | BYTE c = 1; 128 | for (int i = 0; i < 10; i++) { 129 | c = buffer[i]; 130 | if (c == 0) { 131 | break; 132 | } 133 | diff *= 10; 134 | diff += (c - '0'); 135 | } 136 | fclose(fileptr); // Close the file 137 | return diff; 138 | } 139 | 140 | 141 | int correctness_CUDA(){ 142 | printf("Starting correctness test for 10 seconds.\n"); 143 | double timeout = 10.0; 144 | srand(time(NULL)); 145 | 146 | BYTE nonce[32]; 147 | BYTE data[66]; 148 | 149 | unsigned int d = 3000; //some low difficulty 150 | unsigned int gdim = 1<<8; 151 | unsigned int bdim = 1<<10; 152 | 153 | double elapsed = 0; 154 | int success = 1; 155 | unsigned int m = 0; 156 | 157 | clock_t t_start; 158 | clock_t t_end; 159 | 160 | t_start = clock(); 161 | do{ 162 | for(int i = 0; i < 32; i++) 163 | { 164 | data[i] = rand()%255; 165 | data[34+i] = 0; 166 | } 167 | data[32] = d/256; 168 | data[33] = d%256; 169 | int r; 170 | for (int i = 0; i < 30; i++) 171 | { 172 | r = rand()%255; 173 | data[i+34] = r; 174 | nonce[i] = r; 175 | } 176 | data[64] = 0; 177 | data[65] = 0; 178 | nonce[30] = 0; 179 | nonce[31] = 0; 180 | 181 | success = amoveo_mine_gpu(nonce,d,data,gdim,bdim,0); 182 | 183 | if(success){ 184 | m++; 185 | 186 | if(!check_pow(nonce,d,data)) 187 | return 0; 188 | }else{ 189 | printf("Your GPU couldn't solve an easy problem!\n"); 190 | return 0; 191 | } 192 | t_end = clock(); 193 | 194 | elapsed = ((double)(t_end-t_start))/CLOCKS_PER_SEC; 195 | 196 | }while(elapsed < timeout); 197 | 198 | printf("Correctness test passed - %d checks\n", m); 199 | return 1; 200 | } 201 | 202 | //Tests hash rate 203 | void perf_CUDA(){ 204 | srand(time(NULL)); 205 | 206 | BYTE nonce[32]; 207 | BYTE data[66]; 208 | 209 | unsigned int d = 1000000; //some super-high difficulty 210 | unsigned int gdim = 1<<10; 211 | unsigned int bdim = 1<<10; 212 | double trials = 10; 213 | 214 | printf("Starting hash rate test for 10 trials.\n"); 215 | 216 | for(int i = 0; i < 32; i++) 217 | { 218 | data[i] = rand()%255; 219 | data[34+i] = 0; 220 | } 221 | data[32] = d/256; 222 | data[33] = d%256; 223 | int r; 224 | for (int i = 0; i < 30; i++) 225 | { 226 | r = rand()%255; 227 | data[i+34] = r; 228 | nonce[i] = r; 229 | } 230 | data[64] = 0; 231 | data[65] = 0; 232 | nonce[30] = 0; 233 | nonce[31] = 0; 234 | 235 | unsigned int m = 0; 236 | double cuda_elapsed = 0; 237 | double elapsed = 0; 238 | int success = 0; 239 | 240 | clock_t t_start; 241 | clock_t t_cudastart; 242 | clock_t t_end; 243 | 244 | double numHashesPerRound = ((double)gdim)*((double)gdim)*((double)bdim); 245 | t_start = clock(); 246 | do{ 247 | t_cudastart = clock(); 248 | success = amoveo_mine_gpu(nonce,d,data,gdim,bdim,m); 249 | 250 | m++; 251 | t_end = clock(); 252 | 253 | cuda_elapsed = ((double)(t_end-t_cudastart))/CLOCKS_PER_SEC; 254 | elapsed = ((double)(t_end-t_start))/CLOCKS_PER_SEC; 255 | printf("CUDA kernel took %f s, Hashrate : %0.2f MH/s, %f total elapsed \n",cuda_elapsed,numHashesPerRound/(1000000.0*cuda_elapsed),elapsed); 256 | }while(!success && m < trials); 257 | 258 | double averageRate = m*numHashesPerRound/(1000000.0*elapsed); 259 | 260 | printf("Hash rate test finished - Average %0.2f MH/s\n",averageRate); 261 | } 262 | 263 | int main(int argc, char *argv[]) 264 | { 265 | FILE *fdebug = fopen("debug.txt","w"); 266 | 267 | int init_height = get_height(); 268 | if(init_height == 0) 269 | return(0); 270 | 271 | BYTE bhash[32]; 272 | BYTE nonce[32]; 273 | if (argc > 1) { 274 | if(strcmp(argv[1],"perftest")==0) 275 | { 276 | if(!correctness_CUDA()) 277 | { 278 | printf("Wrong correctness! Something is wrong.\n"); 279 | return(0); 280 | } 281 | perf_CUDA(); 282 | return(0); 283 | } 284 | } 285 | 286 | int diff = read_input(bhash, nonce, 0); 287 | fprintf(fdebug,"Height : %d, Difficulty : %d\n",init_height, diff); 288 | fflush(fdebug); 289 | 290 | BYTE bdata[66];//32+2+32 291 | for (int i = 0; i < 32; i++) 292 | bdata[i] = bhash[i]; 293 | bdata[32] = diff / 256; 294 | bdata[33] = diff % 256; 295 | for (int i = 0; i < 30; i++) 296 | bdata[i+34] = nonce[i]; 297 | bdata[64] = 0; 298 | bdata[65] = 0; 299 | nonce[30] = 0; 300 | nonce[31] = 0; 301 | 302 | unsigned int bdim = 1<<10; 303 | unsigned int gdim = 1<<11; 304 | 305 | int success = 0; 306 | unsigned int m = 0; 307 | clock_t t_start; 308 | clock_t t_round; 309 | clock_t t_end; 310 | double total_elapsed; 311 | double round_elapsed; 312 | 313 | double HashesPerRound = ((double)gdim)*((double)gdim)*((double)bdim); 314 | 315 | t_start = clock(); 316 | t_round = clock(); 317 | do{ 318 | success = amoveo_mine_gpu(nonce,diff,bdata,gdim,bdim,0); 319 | 320 | t_end = clock(); 321 | round_elapsed = ((double)(t_end-t_round))/CLOCKS_PER_SEC; 322 | total_elapsed = ((double)(t_end-t_start))/CLOCKS_PER_SEC; 323 | fprintf(fdebug,"Round %d Hash Rate : %0.2f MH/s took %0.1f s\n",m,HashesPerRound/(1000000.0*round_elapsed),round_elapsed); 324 | fflush(fdebug); 325 | m++; 326 | t_round = clock(); 327 | 328 | if(get_height() != init_height) 329 | break; 330 | }while(!success); 331 | 332 | if(success){ 333 | fprintf(fdebug,"Nonce found after %f seconds\n",total_elapsed); 334 | write_nonce(nonce); 335 | }else{ 336 | fprintf(fdebug,"Somebody else found nonce within %f seconds\n",total_elapsed); 337 | } 338 | fprintf(fdebug,"\n"); 339 | fflush(fdebug); 340 | 341 | return(success); 342 | } 343 | -------------------------------------------------------------------------------- /sha256_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | extern "C" { 8 | #include "sha256.h" 9 | #include "utils.h" 10 | } 11 | 12 | #define DATASIZE 66 13 | 14 | __global__ void kernel_sha256(BYTE *data, unsigned int* difficulty, Nonce_result *nr,unsigned int *multiplier); 15 | __device__ WORD hash2int(BYTE h[32]); 16 | 17 | inline void gpuAssert(cudaError_t code, char *file, int line, bool abort) 18 | { 19 | if (code != cudaSuccess) 20 | { 21 | fprintf(stderr,"CUDA_SAFE_CALL: %s %s %d\n", cudaGetErrorString(code), file, line); 22 | if (abort) exit(code); 23 | } 24 | } 25 | 26 | #define CUDA_SAFE_CALL(ans) { gpuAssert((ans), __FILE__, __LINE__, true); } 27 | 28 | extern "C" bool amoveo_mine_gpu(BYTE nonce[32],unsigned int difficulty,BYTE data[66],unsigned int GDIM, unsigned int BDIM, unsigned int multiplier) 29 | { 30 | //Initialize Cuda Grid variables 31 | dim3 DimGrid(GDIM,GDIM); 32 | dim3 DimBlock(BDIM,1); 33 | 34 | //Used to store a nonce if a block is mined 35 | Nonce_result h_nr; 36 | initialize_nonce_result(&h_nr); 37 | 38 | //Allocate space on Global Memory 39 | BYTE *d_data; 40 | unsigned int *d_difficulty; 41 | Nonce_result *d_nr; 42 | unsigned int *d_multiplier; 43 | 44 | CUDA_SAFE_CALL(cudaMalloc((void **)&d_data, DATASIZE*sizeof(BYTE))); 45 | CUDA_SAFE_CALL(cudaMalloc((void **)&d_difficulty, sizeof(unsigned int))); 46 | CUDA_SAFE_CALL(cudaMalloc((void **)&d_nr, sizeof(Nonce_result))); 47 | CUDA_SAFE_CALL(cudaMalloc((void **)&d_multiplier, sizeof(unsigned int))); 48 | 49 | //Copy data to device 50 | CUDA_SAFE_CALL(cudaMemcpy(d_data, (void *) data, DATASIZE*sizeof(BYTE), cudaMemcpyHostToDevice)); 51 | CUDA_SAFE_CALL(cudaMemcpy(d_difficulty, (void *) &difficulty, sizeof(unsigned int), cudaMemcpyHostToDevice)); 52 | CUDA_SAFE_CALL(cudaMemcpy(d_nr, (void *) &h_nr, sizeof(Nonce_result), cudaMemcpyHostToDevice)); 53 | CUDA_SAFE_CALL(cudaMemcpy(d_multiplier, (void *) &multiplier, sizeof(unsigned int), cudaMemcpyHostToDevice)); 54 | 55 | kernel_sha256<<>>(d_data,d_difficulty,d_nr,d_multiplier); 56 | 57 | //Copy nonce result back to host 58 | CUDA_SAFE_CALL(cudaMemcpy((void *) &h_nr, d_nr, sizeof(Nonce_result), cudaMemcpyDeviceToHost)); 59 | 60 | cudaDeviceSynchronize(); 61 | 62 | //Free memory on device 63 | CUDA_SAFE_CALL(cudaFree(d_data)); 64 | CUDA_SAFE_CALL(cudaFree(d_difficulty)); 65 | CUDA_SAFE_CALL(cudaFree(d_nr)); 66 | CUDA_SAFE_CALL(cudaFree(d_multiplier)); 67 | 68 | //Copy nonce if found 69 | if(h_nr.nonce_found){ 70 | for(int i=34; i<66;i++) 71 | nonce[i-34]=data[i]; 72 | for(int i=0; istate[0]; 141 | b = ctx->state[1]; 142 | c = ctx->state[2]; 143 | d = ctx->state[3]; 144 | e = ctx->state[4]; 145 | f = ctx->state[5]; 146 | g = ctx->state[6]; 147 | h = ctx->state[7]; 148 | 149 | for (i = 0; i < 64; ++i) { 150 | t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; 151 | t2 = EP0(a) + MAJ(a,b,c); 152 | h = g; 153 | g = f; 154 | f = e; 155 | e = d + t1; 156 | d = c; 157 | c = b; 158 | b = a; 159 | a = t1 + t2; 160 | } 161 | 162 | ctx->state[0] += a; 163 | ctx->state[1] += b; 164 | ctx->state[2] += c; 165 | ctx->state[3] += d; 166 | ctx->state[4] += e; 167 | ctx->state[5] += f; 168 | ctx->state[6] += g; 169 | ctx->state[7] += h; 170 | } 171 | 172 | __device__ void d_sha256_init(SHA256_CTX *ctx) 173 | { 174 | ctx->datalen = 0; 175 | ctx->bitlen = 0; 176 | ctx->state[0] = 0x6a09e667; 177 | ctx->state[1] = 0xbb67ae85; 178 | ctx->state[2] = 0x3c6ef372; 179 | ctx->state[3] = 0xa54ff53a; 180 | ctx->state[4] = 0x510e527f; 181 | ctx->state[5] = 0x9b05688c; 182 | ctx->state[6] = 0x1f83d9ab; 183 | ctx->state[7] = 0x5be0cd19; 184 | } 185 | 186 | __device__ void d_sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) 187 | { 188 | WORD i; 189 | 190 | for (i = 0; i < len; ++i) { 191 | ctx->data[ctx->datalen] = data[i]; 192 | ctx->datalen++; 193 | if (ctx->datalen == 64) { 194 | d_sha256_transform(ctx, ctx->data); 195 | ctx->bitlen += 512; 196 | ctx->datalen = 0; 197 | } 198 | } 199 | } 200 | 201 | __device__ void d_sha256_final(SHA256_CTX *ctx, BYTE hash[]) 202 | { 203 | WORD i; 204 | 205 | i = ctx->datalen; 206 | 207 | // Pad whatever data is left in the buffer. 208 | ctx->data[i++] = 0x80; 209 | while (i < 56) 210 | ctx->data[i++] = 0x00; 211 | 212 | // Append to the padding the total message's length in bits and transform. 213 | ctx->bitlen += ctx->datalen * 8; 214 | ctx->data[63] = ctx->bitlen; 215 | ctx->data[62] = ctx->bitlen >> 8; 216 | ctx->data[61] = ctx->bitlen >> 16; 217 | ctx->data[60] = ctx->bitlen >> 24; 218 | ctx->data[59] = ctx->bitlen >> 32; 219 | ctx->data[58] = ctx->bitlen >> 40; 220 | ctx->data[57] = ctx->bitlen >> 48; 221 | ctx->data[56] = ctx->bitlen >> 56; 222 | 223 | d_sha256_transform(ctx, ctx->data); 224 | 225 | // Since this implementation uses little endian byte ordering and SHA uses big endian, 226 | // reverse all the bytes when copying the final state to the output hash. 227 | for (i = 0; i < 4; ++i) { 228 | hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; 229 | hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; 230 | hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; 231 | hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; 232 | hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; 233 | hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; 234 | hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; 235 | hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; 236 | } 237 | } 238 | 239 | #define NONCE_VAL (gridDim.x*blockDim.x*blockIdx.y + blockDim.x*blockIdx.x + threadIdx.x) 240 | 241 | __global__ void kernel_sha256(BYTE *data, unsigned int *difficulty, Nonce_result *nr, unsigned int* multiplier) { 242 | 243 | if(nr->nonce_found) return; 244 | int i; 245 | int64_t nonce = gridDim.x*gridDim.x; 246 | nonce *= blockDim.x; 247 | nonce *= *multiplier; 248 | nonce += NONCE_VAL; 249 | 250 | BYTE* byte_nonce = (BYTE *)&nonce; 251 | 252 | BYTE l_data[66]; 253 | for(i=0;i<66;i++) 254 | l_data[i] = data[i]; 255 | for(i=0;i *difficulty) 267 | { 268 | nr->nonce_found = true; 269 | nr->nonce = nonce; 270 | } 271 | } 272 | 273 | --------------------------------------------------------------------------------