├── .travis.yml ├── GPU.md ├── GPU_tuning.md ├── LICENSE.txt ├── README.md ├── doc ├── IEEEtran.bst ├── Makefile ├── Nano_PoW ├── SipHash ├── SipHash12 ├── SipHash13 ├── bitcoin2015.tex ├── blog ├── cuckoo.bib ├── cuckoo.pdf ├── cuckoo.tex ├── leancore ├── mathspec ├── pbkdf ├── simplesolve └── spec ├── img ├── cuckatoo_cycle.jpg ├── cuckoo.png ├── emissions.gif ├── grinlogo.pdf ├── grinlogo.ps ├── inflation.gif └── logo.png ├── private ├── bfs ├── comp.pl ├── cycle_freq.eps ├── cyclefreq ├── cycles.eps ├── equihash ├── found.pl ├── found25 ├── fracs ├── minbfs ├── real ├── siphash-2-4 ├── speedup30 ├── speedup30na0 ├── speedup30na1 ├── speedup30na2 ├── speedup32 ├── speedup32na0 └── speedup32na1 └── src ├── Makefile ├── crypto ├── blake2-impl.h ├── blake2.h ├── blake2b-ref.c ├── portable_endian.h ├── siphash.cuh ├── siphash.hpp └── siphashxN.h ├── cuckaroo ├── Makefile ├── bitmap.hpp ├── compress.hpp ├── cuckaroo.c ├── cuckaroo.hpp ├── graph.hpp ├── mean.cpp ├── mean.cu ├── mean.hpp └── simple.cpp ├── cuckarood ├── Makefile ├── bitmap.hpp ├── compress.hpp ├── cuckarood.c ├── cuckarood.hpp ├── graph.hpp ├── kernel.cuh ├── mean.cpp ├── mean.cu ├── mean.hpp ├── photon.cu └── simple.cpp ├── cuckaroom ├── Makefile ├── bitmap.hpp ├── compress.hpp ├── cuckaroom.c ├── cuckaroom.hpp ├── graph.hpp ├── kernel.cuh ├── mean.cpp ├── mean.cu ├── mean.hpp ├── meaner.cu └── simple.cpp ├── cuckarooz ├── Makefile ├── bitmap.hpp ├── compress.hpp ├── cuckarooz.c ├── cuckarooz.hpp ├── graph.hpp ├── kernel.cuh ├── mean.cpp ├── mean.cu ├── mean.hpp └── simple.cpp ├── cuckatoo ├── Makefile ├── bitmap.hpp ├── compress.hpp ├── cuckatoo.c ├── cuckatoo.h ├── cumal.cu ├── graph.hpp ├── lean.cpp ├── lean.cu ├── lean.hpp ├── mean.cpp ├── mean.cu ├── mean.hpp └── simple.cpp ├── cuckoo ├── Makefile ├── cuckoo.c ├── cuckoo.h ├── cyclebase.hpp ├── lean.cpp ├── lean.cu ├── lean.hpp ├── mean.cpp ├── mean.cu ├── mean.hpp └── simple.cpp ├── java ├── Cuckoo.java ├── Makefile └── SimpleMiner.java ├── perl └── cycles.pl ├── threads └── barrier.hpp └── tmto ├── momentomatum.cpp ├── momentomatum.h ├── tomato_miner.cpp └── tomato_miner.h /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | language: cpp 4 | env: 5 | - JOB=cuckoo 6 | - JOB=cuckatoo 7 | - JOB=java 8 | - JOB=doc 9 | matrix: 10 | include: 11 | - os: osx 12 | osx_image: xcode8.3 # [`xcode8.3` is Xcode 8.3.3 on OS X 10.12](https://docs.travis-ci.com/user/reference/osx#OS-X-Version) 13 | language: generic 14 | env: JOB=java 15 | addons: 16 | apt: 17 | packages: 18 | - texlive-latex-base 19 | - texlive-pictures 20 | - pgf 21 | before_script: 22 | - case "${TRAVIS_OS_NAME:?}" in linux) LIBV=LD_LIBRARY_PATH;; osx) LIBV=DYLD_LIBRARY_PATH;; esac 23 | - echo "The library path variable name is ${LIBV:?}" 24 | - LIBP="$(pwd)/src" 25 | - echo "The library path variable value is ${LIBP:?}" 26 | script: 27 | - if test cuckoo = "${JOB:?}"; then ( cd src/cuckoo && make; ); fi 28 | - if test cuckatoo = "${JOB:?}"; then ( cd src/cuckatoo && make; ); fi 29 | - if test java = "${JOB:?}"; then ( cd src/java && make; ); fi 30 | - if test doc = "${JOB:?}"; then ( cd doc && make cuckoo.pdf; ); fi 31 | after_success: 32 | - | 33 | if test doc = "${JOB:?}"; then 34 | echo "Below is the generated PDF encoded as base64. You can decode it using 'openssl base64 -d'." 35 | cat doc/cuckoo.pdf | openssl base64 36 | fi 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The FAIR MINING License 2 | 3 | Copyright (c) 2013-2019 John Tromp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | FAIR MINING 13 | Any derived miner that charges a developer fee for mining a fair coin 14 | ---one with no premine or other form of developer compensation--- 15 | shall offer to share half the fee revenue with the coin developers. 16 | 17 | The above copyright notice, FAIR MINING condition, and this permission notice 18 | shall be included in all copies or substantial portions of the Software. 19 | 20 | 21 | ALTERNATIVELY, this software may be distributed under the terms of the 22 | GNU General Public License ("GPL") version 2 or later, as published by 23 | the Free Software Foundation. 24 | 25 | 26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 31 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | SOFTWARE. 33 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | cuckoo.pdf: cuckoo.tex cuckoo.bbl 2 | pdflatex cuckoo.tex 3 | # The following two `pdflatex` runs are needed to resolve references of citations from bibliography. (See [Wikibooks](https://en.wikibooks.org/w/index.php?title=LaTeX/Bibliography_Management&oldid=3442012#Why_won't_LaTeX_generate_any_output?).) 4 | pdflatex cuckoo.tex 5 | pdflatex cuckoo.tex 6 | 7 | cuckoo.bbl: cuckoo.aux cuckoo.bib 8 | bibtex cuckoo 9 | 10 | cuckoo.aux: cuckoo.tex 11 | latex cuckoo.tex 12 | 13 | -------------------------------------------------------------------------------- /doc/Nano_PoW: -------------------------------------------------------------------------------- 1 | Nanocoin author Colin Lemahieu recently made available [1] a newly developed proof of work scheme "Nano PoW" 2 | whose solutions are pairs (x,y) satisfying 3 | 4 | H0(x) + H1(y) = 0 mod D 5 | 6 | where H0,H1 are two different hash functions and D = 2^d is a difficulty parameter. 7 | 8 | It is claimed to be memory-hard, requiring on the order of sqrt(D) = 2^(d/2) memory for efficient solving. 9 | 10 | However, a simple variation of rho search [2] can efficiently find solutions without any use of memory. 11 | First, note that by defining H2(y) = -H1(y), the above equation simplifies to 12 | 13 | H0(x) = H2(y) mod D 14 | 15 | Next, define a chain of values 16 | 17 | x_{2k} = H0( x_{2k-1} ) mod D 18 | x_{2k+1} = H2( x_{2k } ) mod D 19 | 20 | starting at some arbitrary x_0. 21 | 22 | This chain is expected to have repeating values x_i = x_j, i < j, within O(sqrt(D)) steps, 23 | which can be found efficiently using the before mentioned rho search in time O(sqrt(D)) and zero memory. 24 | 25 | EDIT: PlasmaPower pointed out [3] that the chain almost certainly diverges again at x_{i+1} and x_{j+1}, 26 | preventing rho search from cycling around the loop more than once. 27 | 28 | So instead let's consider a chain of values 29 | 30 | x_{i+1} = min( H0(x_i), H2(x_i) ) mod D 31 | 32 | starting at some arbitrary x_0. 33 | 34 | This chain is expected to have repeating values x_i = x_j, i < j, within O(sqrt(D)) steps, 35 | which this time can be found efficiently with rho search. 36 | 37 | With some probability, x_i and x_j will result from different hash functions of x_{i-1} and x_{j-1} respectively, e.g. 38 | 39 | H0(x_{i-1}) = x_i = x_j = H2(x_{j-1}) mod D 40 | 41 | yielding a solution (x,y) = (x_{i-1}, x_{j-1}) to the PoW. 42 | 43 | Btw, another chain of values that should work is 44 | 45 | x_{i+1} = if H3(x_i) < P * 2^64 then H0(x_i) else H2(x_i) 46 | 47 | where H3 is another arbitrary (64-bit output) hash function, and P is the probability of taking an H0 step. 48 | 49 | [1] https://github.com/nanocurrency/nano-pow 50 | [2] Parallel Collision Search with Cryptanalytic Applications: https://people.scs.carleton.ca/~paulv/papers/JoC97.pdf 51 | [3] https://www.reddit.com/r/nanocurrency/comments/dces6e/nanopow_the_details/f2aw6bx/ 52 | -------------------------------------------------------------------------------- /doc/blog: -------------------------------------------------------------------------------- 1 | http://cryptorials.io/beyond-hashcash-proof-work-theres-mining-hashing/ 2 | 3 | Beyond the Hashcash Proof-of-Work 4 | (there's more to mining than hashing) 5 | ------------------------------------- 6 | 7 | Many people equate Proof of Work (PoW) with one particular instance of it. 8 | It's not hard to understand why. The Hashcash PoW is used not only in Bitcoin 9 | but in the vast majority of altcoins as well. 10 | 11 | In Hashcash, miners all compete to look for a so called `nonce' which, 12 | if provided as input (together with other parts of a block header) to a hash function, 13 | yields an output that's numerically small enough to claim the next block reward. 14 | 15 | Where most crypto currencies differ is in the choice of hash function; the Hashcash flavor as it were. 16 | Besides Bitcoin's `vanilla' flavor of SHA256, there is Litecoin's scrypt, Cryptonote's CryptoNight, 17 | Darkcoin's X11, and many more. Most alternative flavors have the explicitly stated goal of reducing the 18 | performance gap between custom and commodity hardware, either by use of memory, or by sheer complexity. 19 | 20 | But miners are only part of the picture. Proofs of work must not only be found, but verified as well, 21 | by every single client, including smartphones and other devices with limited resources. In Hashcash, 22 | verification amounts to evaluating the hash function on the given nonce and comparing the output with 23 | the difficulty threshold. Which is exactly the same effort as a single proof attempt. 24 | 25 | Thus, in order to keep verification cheap, hash functions in Hashcash must restrict their resource usage as well. 26 | That's why scrypt is configured to use only 128KB of memory. 27 | 28 | Non-Hashcash PoWs do not suffer this limitation; they are asymmetric, with verification much cheaper 29 | than proof attempt. The first such PoW is Primecoin, which finds chains of nearly doubled prime numbers. 30 | The most recent example is my Cuckoo Cycle PoW, which was presented at the BITCOIN'2015 31 | workshop in January. The whitepaper can be found at https://github.com/tromp/cuckoo, 32 | which also hosts various implementations, as well as bounties for improving on them. 33 | 34 | In Cuckoo Cycle, proofs take the form of a length 42 cycle (loop) in a large random graph defined by some nonce. 35 | Imagine two countries, each with a billion cities, and imagine picking a billion border crossing roads that 36 | connect a random city in one country to a random city in the other country (the PoW actually uses a cheaply 37 | computed hash function to map the nonce, road number, and country to a city). 38 | We are asked if there is cycle of 42 roads visiting 42 different cities. 39 | If someone hands you a nonce and 42 road numbers, it is indeed easy to verify, requiring negligible time and memory. 40 | 41 | But finding such a cycle is no easy task. Note however, that a city that connects to one road only cannot be part 42 | of the solution, nor can that road. David Andersen pointed out that such dead-end roads can be repeatedly eliminated, 43 | using one bit of memory per road to remember if that road is useful, 44 | and two bits per city to count if there are zero, one, or multiple useful roads to that city. 45 | This process of computing counts for cities, and marking roads that lead to a city with count one as not useful, 46 | is the essence of Cuckoo Cycle mining and accounts for about 98% of the effort. 47 | It results in billions of random global memory accesses for reading and writing the counters. 48 | Consequently, about 2/3 of the runtime is memory latency, making this a low-power algorithm that keeps 49 | computers running cool. 50 | 51 | After a sufficient number of counting and marking rounds, so few useful roads remain that another algorithm, 52 | inspired by Cuckoo Hashing, can quickly identify cycles (re-using the memory for the no longer needed counters). 53 | 54 | Cuckoo Cycle has some downsides as well. First of all, proofs are large and will roughly triple the size of block headers. 55 | Secondly, it is very slow, taking for instance the better part of a minute on a high end CPU (or GPU, which offer roughly the same speed) to look for a cycle among a billion roads. 56 | 57 | In order to give slower CPUs a (somewhat) fair chance to win, the block interval should be much longer than 58 | a single proof attempt, so the amount of memory Cuckoo Cycle can use is constrained by the choice of block interval length. 59 | 60 | These seem like reasonable compromises for an instantly verifiable memory bound PoW that is unique in being dominated 61 | by latency rather than computation. In that sense, mining Cuckoo Cycle is a form of ASIC mining where DRAM chips serve 62 | the application of randomly reading and writing billions of bits. 63 | 64 | When even phones charging overnight can mine without orders of magnitude loss in efficiency, not with a mindset 65 | of profitability but of playing the lottery, the mining hardware landscape will see vast expansion, benefiting 66 | adoption as well as decentralization. 67 | 68 | 69 | 1 Comment 70 | 71 | The article is no longer accurate regarding Cuckoo Cycle being latency bound. 72 | Someone going by the handle of “xenoncat” demonstrated a bandwidth bound method 73 | of edge trimming that’s 4x faster, although it uses about 12x more memory. On 74 | the plus side, this allows a GPU to process a billion node graph in under a 75 | second, which makes Cuckoo Cycle suitable for smaller block intervals. On the 76 | down side, this makes the use of phones look less attractive. 77 | 78 | -------------------------------------------------------------------------------- /doc/cuckoo.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{Birmele2013, 2 | author = {Birmel{\'e}, Etienne and Ferreira, Rui and Grossi, Roberto and Marino, Andrea and Pisanti, Nadia and Rizzi, Romeo and Sacomoto, Gustavo}, 3 | title = {Optimal Listing of Cycles and St-paths in Undirected Graphs}, 4 | booktitle = {Proceedings of the Twenty-fourth Annual ACM-SIAM Symposium on Discrete Algorithms}, 5 | series = {SODA '13}, 6 | year = {2013}, 7 | isbn = {978-1-611972-51-1}, 8 | location = {New Orleans, Louisiana}, 9 | pages = {1884--1896}, 10 | numpages = {13}, 11 | url = {http://dl.acm.org/citation.cfm?id=2627817.2627951}, 12 | acmid = {2627951}, 13 | publisher = {Society for Industrial and Applied Mathematics}, 14 | address = {Philadelphia, PA, USA}, 15 | } 16 | 17 | @article{1980-brent-cycles, 18 | author={Richard P. Brent}, 19 | ISSN={0006--3835}, 20 | journal={BIT}, 21 | MR={82a:10017}, 22 | pages={176--184}, 23 | title={{An improved Monte Carlo factorization algorithm}}, 24 | volume={20}, 25 | year={1980} 26 | } 27 | 28 | @techreport{nakamoto2009bitcoin, 29 | author = {Nakamoto, Satoshi}, 30 | keywords = {bitcoin cash electronic peer privacy}, 31 | month = may, 32 | title = {Bitcoin: A Peer-to-Peer Electronic Cash System}, 33 | url = {http://www.bitcoin.org/bitcoin.pdf}, 34 | year = 2009 35 | } 36 | 37 | @article{Pagh04cuckoohashing, 38 | author = {Pagh, Rasmus and Rodler, Flemming Friche}, 39 | title = {Cuckoo Hashing}, 40 | journal = {J. Algorithms}, 41 | issue_date = {May 2004}, 42 | volume = {51}, 43 | number = {2}, 44 | month = may, 45 | year = {2004}, 46 | issn = {0196-6774}, 47 | pages = {122--144}, 48 | numpages = {23}, 49 | url = {http://dx.doi.org/10.1016/j.jalgor.2003.12.002}, 50 | doi = {10.1016/j.jalgor.2003.12.002}, 51 | acmid = {1006426}, 52 | publisher = {Academic Press, Inc.}, 53 | address = {Duluth, MN, USA}, 54 | keywords = {data structures, dictionaries, experiments, hashing, information retrieval, searching}, 55 | } 56 | 57 | @misc{wikidsds2014, 58 | author = "Wikipedia", 59 | title = "Disjoint-set data structure --- Wikipedia{,} The Free Encyclopedia", 60 | year = "2014", 61 | url = "http://en.wikipedia.org/w/index.php?title=Disjoint-set_data_structure", 62 | note = "[Online; accessed 23-March-2014]" 63 | } 64 | 65 | @techreport{king2013, 66 | author = {King, Sunny}, 67 | keywords = {primecoin cash electronic peer}, 68 | month = jul, 69 | title = {Primecoin: Cryptocurrency with Prime Number Proof-of-Work}, 70 | url = {http://primecoin.org/static/primecoin-paper.pdf}, 71 | year = 2013 72 | } 73 | 74 | @misc{scrypt2009, 75 | author = {Percival, Colin}, 76 | title = {Stronger Key Derivation via Sequential Memory-Hard Functions}, 77 | month = may, 78 | year = 2009, 79 | url = {http://www.tarsnap.com/scrypt/scrypt.pdf}, 80 | note = "presented at BSDCan 2009" 81 | } 82 | 83 | @techreport{larimer2013, 84 | author = {Larimer, Daniel}, 85 | month = oct, 86 | title = {Momentum - A Memory-Hard Proof-of-Work via finding Birthday Collisions}, 87 | url = {www.hashcash.org/papers/momentum.pdf}, 88 | year = 2013 89 | } 90 | 91 | @misc{back2014, 92 | author = {Back, Adam}, 93 | title = {Hashcash.org}, 94 | month = feb, 95 | year = 2014, 96 | url = {http://www.hashcash.org/papers/} 97 | } 98 | 99 | @misc{preshing2013, 100 | author = {Preshing, Jeff}, 101 | title = {The World's Simplest Lock-Free Hash Table}, 102 | month = jun, 103 | year = 2013, 104 | url = {http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table/} 105 | } 106 | 107 | @misc{tenebrix2011, 108 | author = {Lolcust}, 109 | title = {[ANNOUNCE] Tenebrix, a CPU-friendly, GPU-hostile cryptocurrency}, 110 | month = sep, 111 | year = 2011, 112 | url = {https://bitcointalk.org/index.php?topic=45667.0} 113 | } 114 | 115 | @misc{litecoin2011, 116 | author = {coblee}, 117 | title = {[ANN] Litecoin - a lite version of Bitcoin. Launched!}, 118 | month = oct, 119 | year = 2011, 120 | url = {https://bitcointalk.org/index.php?topic=47417.0} 121 | } 122 | 123 | @misc{poelstra2014, 124 | author = {Poelstra, Andrew}, 125 | title = {ASICs and Decentralization FAQ}, 126 | year = 2014, 127 | url = {https://download.wpsoftware.net/bitcoin/asic-faq.pdf} 128 | } 129 | 130 | @misc{dga2014, 131 | author = {Andersen, David}, 132 | title = {A Public Review of Cuckoo Cycle}, 133 | month = apr, 134 | year = 2014, 135 | url = {http://da-data.blogspot.com/2014/03/a-public-review-of-cuckoo-cycle.html} 136 | } 137 | 138 | @article{parallel99, 139 | author = {van Oorschot, Paul C. and Wiener, Michael J.}, 140 | title = {Parallel collision search with cryptanalytic applications}, 141 | journal = {J. Cryptology}, 142 | issue_date = {Jan 1999}, 143 | volume = {12}, 144 | number = {1}, 145 | month = jan, 146 | year = {1999}, 147 | pages = {1--28}, 148 | numpages = {28}, 149 | } 150 | 151 | @misc{equihash16, 152 | author = {Alex Biryukov and Dmitry Khovratovich}, 153 | title = {Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem}, 154 | howpublished = {Cryptology ePrint Archive, Report 2015/946}, 155 | year = {2015}, 156 | note = {\url{https://eprint.iacr.org/2015/946}}, 157 | } 158 | 159 | @techreport{back2002, 160 | author = {Back, Adam}, 161 | month = aug, 162 | year = 2002, 163 | title = {Hashcash - A Denial of Service Counter-Measure}, 164 | note = {(implementation released in mar 1997)} 165 | } 166 | -------------------------------------------------------------------------------- /doc/cuckoo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/doc/cuckoo.pdf -------------------------------------------------------------------------------- /doc/leancore: -------------------------------------------------------------------------------- 1 | struct bitmap { 2 | u32 *bits; 3 | __device__ void reset() { 4 | memset(bits, 0, BITMAP_WORDS * sizeof(u32)); 5 | } 6 | __device__ void set(edge_t n) { 7 | bits[n/32] |= 1 << (n%32); 8 | } 9 | __device__ bool test(node_t n) const { 10 | return (bits[n/32] >> (n%32)) & 1; 11 | } 12 | __device__ u32 block(node_t n) const { 13 | return bits[n/32]; 14 | } 15 | }; 16 | 17 | __global__ void count_node_deg(cuckoo_ctx *ctx, u32 uorv, u32 part) { 18 | int id = blockIdx.x * blockDim.x + threadIdx.x; 19 | for (edge_t block = id*32; block < NEDGES; block += ctx->nthreads*32) { 20 | u32 alive32 = ctx->alive.block(block); 21 | for (edge_t nonce = block-1; alive32; ) { 22 | u32 ffs = __ffs(alive32); 23 | nonce += ffs; alive32 >>= ffs; 24 | node_t u = dipnode(ctx->sip_keys, nonce, uorv); 25 | if ((u & PART_MASK) == part) 26 | ctx->nonleaf.set(u >> PART_BITS); 27 | } 28 | } 29 | } 30 | 31 | __global__ void kill_leaf_edges(cuckoo_ctx *ctx, u32 uorv, u32 part) { 32 | int id = blockIdx.x * blockDim.x + threadIdx.x; 33 | for (edge_t block = id*32; block < NEDGES; block += ctx->nthreads*32) { 34 | u32 alive32 = ctx->alive.block(block); 35 | for (edge_t nonce = block-1; alive32; ) { 36 | u32 ffs = __ffs(alive32); 37 | nonce += ffs; alive32 >>= ffs; 38 | node_t u = dipnode(ctx->sip_keys, nonce, uorv) ^ 1; 39 | if ((u & PART_MASK) == part && !ctx->nonleaf.test(u >> PART_BITS)) 40 | ctx->alive.reset(nonce); 41 | } 42 | } 43 | } 44 | 45 | for (u32 round=0; round < trims; round++) { 46 | for (u32 uorv = 0; uorv < 2; uorv++) { 47 | for (u32 part = 0; part <= PART_MASK; part++) { 48 | checkCudaErrors(cudaMemset(ctx.nonleaf.bits, 0, nodeBytes)); 49 | count_node_deg<<>>(device_ctx, uorv, part); 50 | kill_leaf_edges<<>>(device_ctx, uorv, part); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /doc/mathspec: -------------------------------------------------------------------------------- 1 | Let siphash24 be the standard siphash-2-4 function [0] 2 | with a 256 bit key K= instead of the usual 128-bit one, 3 | and a modified Initialization phase that sets v_i to k_i for 0 <= i < 4. 4 | 5 | Set N = 2^32 6 | Define a bipartite graph [1] G_K=(V,E) with N edges on N + N nodes as follows: 7 | for 0 <= i < N, E_i = (V_i_0, V_i_1) = (siphash24(K,2*i) % N, siphash24(K,2*i+1) % N) 8 | 9 | From G_K we obtain the graph G'_K by identifying nodes that differ only in the last bit: 10 | for 0 <= i < N, E'_i = (V_i_0 >> 1, V_i_1 >> 1) 11 | 12 | A Cuckatoo32 solution for key K is a 42-cycle [2] in G'_K that is a matching [3] in G_K. 13 | In other words, it's a cycle on node-pairs with edges incident on both nodes in a pair. 14 | 15 | For verification purposes, the solution is given as the sequence of 42 edge indices in increasing order. 16 | 17 | [0] https://cr.yp.to/siphash/siphash-20120918.pdf 18 | [1] https://en.wikipedia.org/wiki/Bipartite_graph 19 | [2] https://en.wikipedia.org/wiki/Cycle_(graph_theory) 20 | [3] https://en.wikipedia.org/wiki/Matching_(graph_theory) 21 | -------------------------------------------------------------------------------- /doc/pbkdf: -------------------------------------------------------------------------------- 1 | A Cuckoo inspired Password Based Key Derivation Function 2 | 3 | 4 | Fix memory parameter N=2^n. Fix path length d. Given password p. 5 | Let siphash key = BLAKE2(p). 6 | Define bipartitie graph G=(U,V) on N+N nodes with N edges, where for 0<=i d. 10 | 11 | Let PBKDF_{N,d}(p) = BLAKE2(w) 12 | 13 | Efficiently computable in 2N bits of memory and O(N) time. 14 | Hard to compute in less than N bits of memory. 15 | -------------------------------------------------------------------------------- /doc/simplesolve: -------------------------------------------------------------------------------- 1 | void count_node_deg(u32 uorv) { 2 | for (edge_t nonce = 0; nonce < NEDGES; nonce++) { 3 | if (alive(nonce)) { 4 | nonleaf.set(sipnode(sip_keys, nonce, uorv)); 5 | } 6 | } 7 | } 8 | 9 | void kill_leaf_edges(u32 uorv) { 10 | for (edge_t nonce = 0; nonce < NEDGES; nonce++) { 11 | if (alive(nonce)) { 12 | if (!nonleaf.test(sipnode(sip_keys, nonce, uorv) ^ 1)) 13 | alive.reset(nonce); 14 | } 15 | } 16 | } 17 | 18 | for (u32 round=0; round < trims; round++) { 19 | for (u32 uorv = 0; uorv < 2; uorv++) { 20 | memset(nonleaf.bits, 0, nodeBytes)); 21 | count_node_deg(uorv); 22 | kill_leaf_edges(uorv); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /doc/spec: -------------------------------------------------------------------------------- 1 | #define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) 2 | #define SIPROUND \ 3 | do { \ 4 | v0 += v1; v2 += v3; v1 = ROTL(v1,13); \ 5 | v3 = ROTL(v3,16); v1 ^= v0; v3 ^= v2; \ _\( == __\ 6 | v0 = ROTL(v0,32); v2 += v1; v0 += v3; \ / '> __.-"\---|__ 7 | v1 = ROTL(v1,17); v3 = ROTL(v3,21); \ ( (\ / \\_@\-'/ \ 8 | v1 ^= v2; v3 ^= v0; v2 = ROTL(v2,32); \ ""-'\ \__/ \__/ 9 | } while(0) 10 | 11 | u64 siphash24(const siphash_keys *keys, const u64 nonce) { 12 | u64 v0 = keys->k0, v1 = keys->k1, v2 = keys->k2, v3 = keys->k3 ^ nonce; 13 | SIPROUND; SIPROUND; 14 | v0 ^= nonce; v2 ^= 0xff; 15 | SIPROUND; SIPROUND; SIPROUND; SIPROUND; 16 | return (v0 ^ v1) ^ (v2 ^ v3); 17 | } 18 | 19 | int verify(edge_t edges[PROOFSIZE], siphash_keys *keys) { 20 | node_t uvs[2*PROOFSIZE]; 21 | for (u32 n = 0; n < PROOFSIZE; n++) { 22 | if (edges[n] > EDGEMASK) 23 | return POW_TOO_BIG; 24 | if (n && edges[n] <= edges[n-1]) 25 | return POW_TOO_SMALL; 26 | uvs[2*n ] = siphash24(keys, 2*edges[n] ) & EDGEMASK; 27 | uvs[2*n+1] = siphash24(keys, 2*edges[n]+1) & EDGEMASK; 28 | } 29 | u32 n = 0, i = 0, j; 30 | do { 31 | for (u32 k = j = i; (k = (k+2) % (2*PROOFSIZE)) != i; ) { 32 | if (uvs[k] >> 1 == uvs[i] >> 1) { 33 | if (j != i) return POW_BRANCH; 34 | j = k; 35 | } 36 | } 37 | if (j == i || uvs[j] == uvs[i]) return POW_DEAD_END; 38 | i = j^1; 39 | n++; 40 | } while (i != 0); 41 | return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE; 42 | } 43 | 44 | Ascii Art Credits 45 | AsH from http://ascii.co.uk/art/cockatoo 46 | jgs from https://web.archive.org/web/20091028022932/\ 47 | http://www.geocities.com/SoHo/7373/transp.htm#BIKE 48 | -------------------------------------------------------------------------------- /img/cuckatoo_cycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/cuckatoo_cycle.jpg -------------------------------------------------------------------------------- /img/cuckoo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/cuckoo.png -------------------------------------------------------------------------------- /img/emissions.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/emissions.gif -------------------------------------------------------------------------------- /img/grinlogo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/grinlogo.pdf -------------------------------------------------------------------------------- /img/grinlogo.ps: -------------------------------------------------------------------------------- 1 | %!PS 2 | 42 42 scale 3 15 translate 2 setlinecap .1 setlinewidth .8 setgray 3 | 1 1 6 { 0 moveto 0 2 rlineto } for 4 | 1 0 moveto 6 1 1 270 90 arc 2 2 lineto 5 | 0 1 moveto 7 1 lineto stroke 0 setgray 6 | 2 2 moveto 1 1 1 90 270 arc 2 0 lineto 2 1 lineto 1 1 lineto 7 | 3 0 moveto 3 2 lineto 4 2 lineto 4 0 moveto 4 1 lineto 8 | 5 0 moveto 5 2 lineto 6 2 lineto 6 0 lineto 9 | stroke showpage 10 | -------------------------------------------------------------------------------- /img/inflation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/inflation.gif -------------------------------------------------------------------------------- /img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/logo.png -------------------------------------------------------------------------------- /private/bfs: -------------------------------------------------------------------------------- 1 | 4 20.7 2 | 6 16.7 3 | 8 29.8 4 | 10 30.8 5 | 12 20.0 6 | 14 26.1 7 | 16 38.6 8 | 20 31.5 9 | 24 27.1 10 | 28 33.6 11 | 32 45.8 12 | 40 61.3 13 | 48 51.2 14 | 56 31.9 15 | 64 59.1 16 | -------------------------------------------------------------------------------- /private/comp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use POSIX; 3 | use strict; 4 | 5 | sub ent1 { 6 | my ($p) = @_; 7 | return -$p*log($p)/log(2.0); 8 | } 9 | 10 | sub ent { 11 | my ($p) = @_; 12 | return ent1($p) + ent1(1-$p); 13 | } 14 | 15 | my $f = 0; 16 | my @comp; 17 | my @len; 18 | while (<>) { 19 | next unless /8 part V0 load (\d+)/; 20 | my $load = $1; 21 | my $nc = 3200 * ++$f/100.0; 22 | my $nunc = 3200-$nc; 23 | my $p = ($load-$nunc)/$nc; 24 | my $e = ent($p); 25 | $comp[$f] = $e; 26 | # printf("%2d %4d %3.4lf %3.4lf\n",$f,$load,$p,$e); 27 | printf("(%2d,%0.4lf)%s",$f, $e, $f % 5 ? " " : "\n"); 28 | } 29 | for my $mp (19..99) { # memory percentage 30 | printf("\nmp = %2d ",$mp); 31 | my $i; 32 | my $np = $mp; # nonce percentage 33 | for ($i=0; $np < 100; $i++) { 34 | my $c = $comp[$mp]; 35 | $np += (1-$c)*$mp; 36 | $np = int($np); 37 | } 38 | $len[$mp] = $i; 39 | # printf("%2d %3d",$mp, $i); 40 | } 41 | for my $tot (25..100) { # memory percentage 42 | my $best=999999; 43 | my $bmp; 44 | for my $mp (19..$tot-1) { # memory percentage 45 | my $cost = $len[$mp] * 16*int(ceil(200/($tot-$mp))); 46 | next unless $cost < $best; 47 | $best = $cost; 48 | $bmp = $mp; 49 | } 50 | printf("(%2d,%3d)%s",$tot, $best, $tot % 5 ? " " : "\n"); 51 | # printf("(%2d,%3d)%c",$tot, $bmp, $tot%5?" ":"\n"); 52 | } 53 | print "\n"; 54 | -------------------------------------------------------------------------------- /private/cyclefreq: -------------------------------------------------------------------------------- 1 | 2 50083 2 | 4 25166 3 | 6 16803 4 | 8 12391 5 | 10 9939 6 | 12 8363 7 | 14 7139 8 | 16 6273 9 | 18 5488 10 | 20 4928 11 | 22 4507 12 | 24 4182 13 | 26 3818 14 | 28 3595 15 | 30 3511 16 | 32 3161 17 | 34 2889 18 | 36 2866 19 | 38 2552 20 | 40 2506 21 | 42 2434 22 | 44 2247 23 | 46 2238 24 | 48 2159 25 | 50 1935 26 | 52 1894 27 | 54 1828 28 | 56 1811 29 | 58 1757 30 | 60 1637 31 | 62 1586 32 | 64 1484 33 | 66 1492 34 | 68 1513 35 | 70 1396 36 | 72 1358 37 | 74 1390 38 | 76 1348 39 | 78 1278 40 | 80 1232 41 | 82 1195 42 | 84 1191 43 | 86 1090 44 | 88 1199 45 | 90 1056 46 | 92 1139 47 | 94 1069 48 | 96 1061 49 | 98 999 50 | 100 1001 51 | 102 961 52 | 104 963 53 | 106 964 54 | 108 923 55 | 110 920 56 | 112 932 57 | 114 845 58 | 116 858 59 | 118 838 60 | 120 878 61 | 122 846 62 | 124 771 63 | 126 865 64 | 128 781 65 | 130 781 66 | 132 712 67 | 134 722 68 | 136 724 69 | 138 713 70 | 140 796 71 | 142 726 72 | 144 702 73 | 146 641 74 | 148 661 75 | 150 642 76 | 152 659 77 | 154 628 78 | 156 617 79 | 158 623 80 | 160 606 81 | 162 583 82 | 164 613 83 | 166 558 84 | 168 575 85 | 170 601 86 | 172 558 87 | 174 554 88 | 176 499 89 | 178 567 90 | 180 497 91 | 182 522 92 | 184 518 93 | 186 484 94 | 188 518 95 | 190 494 96 | 192 469 97 | 194 441 98 | 196 472 99 | 198 476 100 | 200 473 101 | 202 453 102 | 204 440 103 | 206 448 104 | 208 478 105 | 210 445 106 | 212 444 107 | 214 429 108 | 216 444 109 | 218 436 110 | 220 412 111 | 222 424 112 | 224 398 113 | 226 413 114 | 228 405 115 | 230 412 116 | 232 419 117 | 234 401 118 | 236 403 119 | 238 405 120 | 240 377 121 | 242 369 122 | 244 387 123 | 246 351 124 | 248 322 125 | 250 379 126 | 252 341 127 | 254 379 128 | 256 340 129 | 258 346 130 | 260 359 131 | 262 362 132 | 264 344 133 | 266 343 134 | 268 327 135 | 270 367 136 | 272 327 137 | 274 319 138 | 276 341 139 | 278 351 140 | 280 323 141 | 282 325 142 | 284 308 143 | 286 306 144 | 288 326 145 | 290 298 146 | 292 339 147 | 294 295 148 | 296 302 149 | 298 304 150 | 300 324 151 | 302 310 152 | 304 303 153 | 306 277 154 | 308 282 155 | 310 283 156 | 312 276 157 | 314 251 158 | 316 270 159 | 318 234 160 | 320 260 161 | 322 248 162 | 324 276 163 | 326 259 164 | 328 260 165 | 330 269 166 | 332 246 167 | 334 240 168 | 336 236 169 | 338 252 170 | 340 229 171 | 342 253 172 | 344 223 173 | 346 229 174 | 348 244 175 | 350 227 176 | 352 266 177 | 354 228 178 | 356 227 179 | 358 212 180 | 360 221 181 | 362 262 182 | 364 217 183 | 366 204 184 | 368 214 185 | 370 206 186 | 372 194 187 | 374 195 188 | 376 197 189 | 378 192 190 | 380 202 191 | 382 206 192 | 384 220 193 | 386 212 194 | 388 214 195 | 390 214 196 | 392 184 197 | 394 212 198 | 396 203 199 | 398 190 200 | 400 215 201 | 402 153 202 | 404 185 203 | 406 194 204 | 408 184 205 | 410 186 206 | 412 157 207 | 414 177 208 | 416 202 209 | 418 179 210 | 420 179 211 | 422 176 212 | 424 177 213 | 426 172 214 | 428 174 215 | 430 162 216 | 432 154 217 | 434 149 218 | 436 149 219 | 438 138 220 | 440 145 221 | 442 199 222 | 444 160 223 | 446 145 224 | 448 168 225 | 450 161 226 | 452 120 227 | 454 139 228 | 456 157 229 | 458 151 230 | 460 132 231 | 462 188 232 | 464 153 233 | 466 155 234 | 468 155 235 | 470 153 236 | 472 150 237 | 474 146 238 | 476 139 239 | 478 145 240 | 480 150 241 | 482 137 242 | 484 118 243 | 486 133 244 | 488 142 245 | 490 151 246 | 492 128 247 | 494 140 248 | 496 123 249 | 498 110 250 | 500 105 251 | 502 136 252 | 504 132 253 | 506 126 254 | 508 124 255 | 510 120 256 | 512 144 257 | 514 122 258 | 516 113 259 | 518 123 260 | 520 120 261 | 522 110 262 | 524 109 263 | 526 115 264 | 528 138 265 | 530 117 266 | 532 106 267 | 534 130 268 | 536 129 269 | 538 113 270 | 540 134 271 | 542 123 272 | 544 103 273 | 546 103 274 | 548 116 275 | 550 91 276 | 552 102 277 | 554 91 278 | 556 84 279 | 558 114 280 | 560 98 281 | 562 92 282 | 564 98 283 | 566 84 284 | 568 95 285 | 570 72 286 | 572 93 287 | 574 97 288 | 576 93 289 | 578 82 290 | 580 86 291 | 582 91 292 | 584 69 293 | 586 101 294 | 588 63 295 | 590 103 296 | 592 96 297 | 594 102 298 | 596 85 299 | 598 79 300 | 600 84 301 | 602 92 302 | 604 77 303 | 606 71 304 | 608 73 305 | 610 86 306 | 612 68 307 | 614 77 308 | 616 73 309 | 618 77 310 | 620 72 311 | 622 76 312 | 624 74 313 | 626 68 314 | 628 72 315 | 630 74 316 | 632 67 317 | 634 67 318 | 636 78 319 | 638 68 320 | 640 69 321 | 642 76 322 | 644 59 323 | 646 69 324 | 648 66 325 | 650 65 326 | 652 55 327 | 654 62 328 | 656 66 329 | 658 63 330 | 660 48 331 | 662 67 332 | 664 52 333 | 666 62 334 | 668 57 335 | 670 53 336 | 672 73 337 | 674 48 338 | 676 43 339 | 678 57 340 | 680 62 341 | 682 59 342 | 684 59 343 | 686 58 344 | 688 47 345 | 690 57 346 | 692 51 347 | 694 65 348 | 696 58 349 | 698 56 350 | 700 63 351 | 702 60 352 | 704 52 353 | 706 63 354 | 708 48 355 | 710 62 356 | 712 53 357 | 714 41 358 | 716 37 359 | 718 47 360 | 720 44 361 | 722 44 362 | 724 47 363 | 726 53 364 | 728 46 365 | 730 44 366 | 732 47 367 | 734 43 368 | 736 48 369 | 738 37 370 | 740 37 371 | 742 50 372 | 744 46 373 | 746 34 374 | 748 27 375 | 750 46 376 | 752 41 377 | 754 53 378 | 756 36 379 | 758 46 380 | 760 36 381 | 762 46 382 | 764 39 383 | 766 33 384 | 768 36 385 | 770 35 386 | 772 29 387 | 774 42 388 | 776 40 389 | 778 27 390 | 780 28 391 | 782 36 392 | 784 40 393 | 786 33 394 | 788 34 395 | 790 24 396 | 792 31 397 | 794 31 398 | 796 34 399 | 798 30 400 | 800 24 401 | 802 28 402 | 804 35 403 | 806 24 404 | 808 29 405 | 810 46 406 | 812 32 407 | 814 30 408 | 816 34 409 | 818 32 410 | 820 36 411 | 822 36 412 | 824 26 413 | 826 18 414 | 828 36 415 | 830 21 416 | 832 26 417 | 834 35 418 | 836 27 419 | 838 28 420 | 840 19 421 | 842 29 422 | 844 29 423 | 846 31 424 | 848 28 425 | 850 28 426 | 852 29 427 | 854 25 428 | 856 30 429 | 858 30 430 | 860 23 431 | 862 25 432 | 864 21 433 | 866 20 434 | 868 25 435 | 870 22 436 | 872 20 437 | 874 29 438 | 876 20 439 | 878 23 440 | 880 24 441 | 882 12 442 | 884 25 443 | 886 28 444 | 888 23 445 | 890 14 446 | 892 14 447 | 894 15 448 | 896 27 449 | 898 26 450 | 900 15 451 | 902 19 452 | 904 22 453 | 906 25 454 | 908 21 455 | 910 21 456 | 912 18 457 | 914 18 458 | 916 18 459 | 918 21 460 | 920 26 461 | 922 14 462 | 924 23 463 | 926 18 464 | 928 29 465 | 930 18 466 | 932 14 467 | 934 17 468 | 936 15 469 | 938 23 470 | 940 18 471 | 942 24 472 | 944 14 473 | 946 16 474 | 948 12 475 | 950 12 476 | 952 14 477 | 954 7 478 | 956 20 479 | 958 16 480 | 960 13 481 | 962 18 482 | 964 18 483 | 966 18 484 | 968 16 485 | 970 13 486 | 972 11 487 | 974 15 488 | 976 25 489 | 978 12 490 | 980 15 491 | 982 23 492 | 984 17 493 | 986 4 494 | 988 12 495 | 990 11 496 | 992 11 497 | 994 19 498 | 996 11 499 | 998 13 500 | -------------------------------------------------------------------------------- /private/equihash: -------------------------------------------------------------------------------- 1 | Cuckoo Cycle Equihash 2 | 3 | Problem Size Huge Medium 4 | Solution Size Medium Medium to High 5 | Progress Free Y Y 6 | Amortization-free High Medium 7 | TMTO steepness linear (~20) exponential 8 | Time+Memory Tunable Smoothly Roughly 9 | Time*Memory Tunable Roughly 10 | Optimization Free High Medium to High 11 | Bandwidth Limited Y 12 | Latency Limited Y 13 | Speed Low Medium 14 | Awesome Logo Y 15 | 16 | 17 | http://discussions.password-hashing.narkive.com/BQbAY4yu/phc-asymmetric-proof-of-work-based-on-the-generalized-birthday-problem 18 | 19 | (individual messages in above thread also available at 20 | http://lists.openwall.net/phc-discussions/2015/09/30/11 21 | http://lists.openwall.net/phc-discussions/2015/09/30/12 22 | http://lists.openwall.net/phc-discussions/2015/09/30/13 23 | http://lists.openwall.net/phc-discussions/2015/09/30/14 24 | ) 25 | 26 | https://www.reddit.com/r/Bitcoin/comments/3n5nws/research_paper_asymmetric_proofofwork_based_on/ 27 | 28 | https://arxiv.org/abs/1606.03588 29 | Egalitarian computing 30 | Alex Biryukov, Dmitry Khovratovich 31 | (Submitted on 11 Jun 2016) 32 | 33 | Finally, we mention schemes Momentum [21] and Cuckoo 34 | cycle [32], which provide fast verification due to their combi- 35 | natorial nature. They rely on the memory requirements for the 36 | collision search (Momentum) or graph cycle finding (Cuckoo). 37 | However, Momentum is vulnerable to a sublinear time-space 38 | tradeoff [11], whereas the first version of the Cuckoo scheme 39 | was recently broken in [6]. 40 | 41 | 42 | https://github.com/zcash/zcash/issues/27 43 | "Select a mining proof-of-work algorithm. on Dec 11, 2014" 44 | 45 | -------------------------------------------------------------------------------- /private/found.pl: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl 2 | use strict; 3 | 4 | my $len; 5 | 6 | my @foundat = (); 7 | my $n = 0; 8 | my $part; 9 | my $nparts; 10 | my $found; 11 | my $time = 0; 12 | while (<>) { 13 | if (/Looking for (\d+)-cycle/) { 14 | $len = $1; 15 | $n++; 16 | die unless /\/(\d+) parts/; 17 | $nparts = $1; 18 | $found = $part = 0; 19 | next; 20 | } 21 | if (/^(\S+)user / ) { 22 | $time += $1; 23 | } elsif (/^user\s+(\d+)m([\.\d]+)/ ) { 24 | $time += 60*$1 + $2; 25 | } 26 | if (!$found && /(\d+)-cycle found/) { 27 | my $l = $1; 28 | if ($l == $len) { 29 | $foundat[$part]++; 30 | $found = 1; 31 | } 32 | } 33 | if (/OVERLOAD/) { 34 | print $_; 35 | next; 36 | } 37 | if (/[uv]part (\d+)/) { 38 | die unless $1 == $part; 39 | $part++; 40 | } 41 | } 42 | my $quartsum = 0; 43 | my $quartparts = 0; 44 | my $sum = 0; 45 | my $sumat = 0; 46 | for my $i (0..$#foundat) { 47 | print "$i\t $foundat[$i]\n"; 48 | $sum += $foundat[$i]; 49 | if (!$quartparts && $sum >= $n/4) { 50 | $quartparts = $i+1; 51 | $quartsum = $sum; 52 | } 53 | $sumat += ($i+1) * $foundat[$i]; 54 | } 55 | print "Total\t $sum/$n\n"; 56 | printf("Avg parts\t %.1lf/%d\n", $sumat/$sum, $nparts); 57 | printf("Avg time\t %.1lf\n", $time/$n); 58 | printf("Quartile parts\t (%d/%d) at %d\n", $quartsum, $n, $quartparts); 59 | printf("Quartile time\t %.1lf\n", ($time/$n)*($quartparts/($sumat/$sum))); 60 | -------------------------------------------------------------------------------- /private/minbfs: -------------------------------------------------------------------------------- 1 | 2 4.0 2 | 4 9.2 3 | 6 14.0 4 | 8 20.9 5 | 10 18.7 6 | 12 36.2 7 | 14 41.8 8 | 16 41.0 9 | 20 43.8 10 | 24 66.3 11 | 28 51.9 12 | 32 62.3 13 | 40 73.4 14 | 48 118.4 15 | 56 116.6 16 | 64 104.7 17 | -------------------------------------------------------------------------------- /private/real: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | my $i = 0; 3 | while(<>) { 4 | next unless /^real\s+(.+)m(.+)s$/; 5 | ++$i; 6 | my $t = 60*$1+$2; 7 | push @t, $t; 8 | my $r = $t[0]/$t; 9 | printf("(%d,%.3f) ",$i,$r); 10 | print "\n" unless $i % 5; 11 | } 12 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | SUBDIRS := cuckoo cuckatoo cuckaroo cuckarood 2 | 3 | all: $(SUBDIRS) 4 | $(SUBDIRS): 5 | $(MAKE) -C $@ 6 | 7 | .PHONY: all $(SUBDIRS) 8 | -------------------------------------------------------------------------------- /src/crypto/blake2-impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - reference C implementations 3 | 4 | Copyright 2012, Samuel Neves . You may use this under the 5 | terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 6 | your option. The terms of these licenses can be found at: 7 | 8 | - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 9 | - OpenSSL license : https://www.openssl.org/source/license.html 10 | - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | More information about the BLAKE2 hash function can be found at 13 | https://blake2.net. 14 | */ 15 | #ifndef BLAKE2_IMPL_H 16 | #define BLAKE2_IMPL_H 17 | 18 | #include 19 | #include 20 | 21 | #if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) 22 | #if defined(_MSC_VER) 23 | #define BLAKE2_INLINE __inline 24 | #elif defined(__GNUC__) 25 | #define BLAKE2_INLINE __inline__ 26 | #else 27 | #define BLAKE2_INLINE 28 | #endif 29 | #else 30 | #define BLAKE2_INLINE inline 31 | #endif 32 | 33 | static BLAKE2_INLINE uint32_t load32( const void *src ) 34 | { 35 | #if defined(NATIVE_LITTLE_ENDIAN) 36 | uint32_t w; 37 | memcpy(&w, src, sizeof w); 38 | return w; 39 | #else 40 | const uint8_t *p = ( const uint8_t * )src; 41 | return (( uint32_t )( p[0] ) << 0) | 42 | (( uint32_t )( p[1] ) << 8) | 43 | (( uint32_t )( p[2] ) << 16) | 44 | (( uint32_t )( p[3] ) << 24) ; 45 | #endif 46 | } 47 | 48 | static BLAKE2_INLINE uint64_t load64( const void *src ) 49 | { 50 | #if defined(NATIVE_LITTLE_ENDIAN) 51 | uint64_t w; 52 | memcpy(&w, src, sizeof w); 53 | return w; 54 | #else 55 | const uint8_t *p = ( const uint8_t * )src; 56 | return (( uint64_t )( p[0] ) << 0) | 57 | (( uint64_t )( p[1] ) << 8) | 58 | (( uint64_t )( p[2] ) << 16) | 59 | (( uint64_t )( p[3] ) << 24) | 60 | (( uint64_t )( p[4] ) << 32) | 61 | (( uint64_t )( p[5] ) << 40) | 62 | (( uint64_t )( p[6] ) << 48) | 63 | (( uint64_t )( p[7] ) << 56) ; 64 | #endif 65 | } 66 | 67 | static BLAKE2_INLINE uint16_t load16( const void *src ) 68 | { 69 | #if defined(NATIVE_LITTLE_ENDIAN) 70 | uint16_t w; 71 | memcpy(&w, src, sizeof w); 72 | return w; 73 | #else 74 | const uint8_t *p = ( const uint8_t * )src; 75 | return (( uint16_t )( p[0] ) << 0) | 76 | (( uint16_t )( p[1] ) << 8) ; 77 | #endif 78 | } 79 | 80 | static BLAKE2_INLINE void store16( void *dst, uint16_t w ) 81 | { 82 | #if defined(NATIVE_LITTLE_ENDIAN) 83 | memcpy(dst, &w, sizeof w); 84 | #else 85 | uint8_t *p = ( uint8_t * )dst; 86 | *p++ = ( uint8_t )w; w >>= 8; 87 | *p++ = ( uint8_t )w; 88 | #endif 89 | } 90 | 91 | static BLAKE2_INLINE void store32( void *dst, uint32_t w ) 92 | { 93 | #if defined(NATIVE_LITTLE_ENDIAN) 94 | memcpy(dst, &w, sizeof w); 95 | #else 96 | uint8_t *p = ( uint8_t * )dst; 97 | p[0] = (uint8_t)(w >> 0); 98 | p[1] = (uint8_t)(w >> 8); 99 | p[2] = (uint8_t)(w >> 16); 100 | p[3] = (uint8_t)(w >> 24); 101 | #endif 102 | } 103 | 104 | static BLAKE2_INLINE void store64( void *dst, uint64_t w ) 105 | { 106 | #if defined(NATIVE_LITTLE_ENDIAN) 107 | memcpy(dst, &w, sizeof w); 108 | #else 109 | uint8_t *p = ( uint8_t * )dst; 110 | p[0] = (uint8_t)(w >> 0); 111 | p[1] = (uint8_t)(w >> 8); 112 | p[2] = (uint8_t)(w >> 16); 113 | p[3] = (uint8_t)(w >> 24); 114 | p[4] = (uint8_t)(w >> 32); 115 | p[5] = (uint8_t)(w >> 40); 116 | p[6] = (uint8_t)(w >> 48); 117 | p[7] = (uint8_t)(w >> 56); 118 | #endif 119 | } 120 | 121 | static BLAKE2_INLINE uint64_t load48( const void *src ) 122 | { 123 | const uint8_t *p = ( const uint8_t * )src; 124 | return (( uint64_t )( p[0] ) << 0) | 125 | (( uint64_t )( p[1] ) << 8) | 126 | (( uint64_t )( p[2] ) << 16) | 127 | (( uint64_t )( p[3] ) << 24) | 128 | (( uint64_t )( p[4] ) << 32) | 129 | (( uint64_t )( p[5] ) << 40) ; 130 | } 131 | 132 | static BLAKE2_INLINE void store48( void *dst, uint64_t w ) 133 | { 134 | uint8_t *p = ( uint8_t * )dst; 135 | p[0] = (uint8_t)(w >> 0); 136 | p[1] = (uint8_t)(w >> 8); 137 | p[2] = (uint8_t)(w >> 16); 138 | p[3] = (uint8_t)(w >> 24); 139 | p[4] = (uint8_t)(w >> 32); 140 | p[5] = (uint8_t)(w >> 40); 141 | } 142 | 143 | static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) 144 | { 145 | return ( w >> c ) | ( w << ( 32 - c ) ); 146 | } 147 | 148 | static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) 149 | { 150 | return ( w >> c ) | ( w << ( 64 - c ) ); 151 | } 152 | 153 | /* prevents compiler optimizing out memset() */ 154 | static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) 155 | { 156 | static void *(*const volatile memset_v)(void *, int, size_t) = &memset; 157 | memset_v(v, 0, n); 158 | } 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /src/crypto/portable_endian.h: -------------------------------------------------------------------------------- 1 | // "License": Public Domain 2 | // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. 3 | // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to 4 | // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it 5 | // an example on how to get the endian conversion functions on different platforms. 6 | 7 | #ifndef PORTABLE_ENDIAN_H__ 8 | #define PORTABLE_ENDIAN_H__ 9 | 10 | #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) 11 | 12 | # define __WINDOWS__ 13 | 14 | #endif 15 | 16 | #if defined(__linux__) || defined(__CYGWIN__) 17 | 18 | # include 19 | 20 | #elif defined(__APPLE__) 21 | 22 | # include 23 | 24 | # define htobe16(x) OSSwapHostToBigInt16(x) 25 | # define htole16(x) OSSwapHostToLittleInt16(x) 26 | # define be16toh(x) OSSwapBigToHostInt16(x) 27 | # define le16toh(x) OSSwapLittleToHostInt16(x) 28 | 29 | # define htobe32(x) OSSwapHostToBigInt32(x) 30 | # define htole32(x) OSSwapHostToLittleInt32(x) 31 | # define be32toh(x) OSSwapBigToHostInt32(x) 32 | # define le32toh(x) OSSwapLittleToHostInt32(x) 33 | 34 | # define htobe64(x) OSSwapHostToBigInt64(x) 35 | # define htole64(x) OSSwapHostToLittleInt64(x) 36 | # define be64toh(x) OSSwapBigToHostInt64(x) 37 | # define le64toh(x) OSSwapLittleToHostInt64(x) 38 | 39 | # define __BYTE_ORDER BYTE_ORDER 40 | # define __BIG_ENDIAN BIG_ENDIAN 41 | # define __LITTLE_ENDIAN LITTLE_ENDIAN 42 | # define __PDP_ENDIAN PDP_ENDIAN 43 | 44 | #elif defined(__OpenBSD__) 45 | 46 | # include 47 | 48 | #elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) 49 | 50 | # include 51 | 52 | # define be16toh(x) betoh16(x) 53 | # define le16toh(x) letoh16(x) 54 | 55 | # define be32toh(x) betoh32(x) 56 | # define le32toh(x) letoh32(x) 57 | 58 | # define be64toh(x) betoh64(x) 59 | # define le64toh(x) letoh64(x) 60 | 61 | #elif defined(__WINDOWS__) 62 | 63 | # include 64 | # include 65 | 66 | # if BYTE_ORDER == LITTLE_ENDIAN 67 | 68 | # define htobe16(x) htons(x) 69 | # define htole16(x) (x) 70 | # define be16toh(x) ntohs(x) 71 | # define le16toh(x) (x) 72 | 73 | # define htobe32(x) htonl(x) 74 | # define htole32(x) (x) 75 | # define be32toh(x) ntohl(x) 76 | # define le32toh(x) (x) 77 | 78 | # define htobe64(x) htonll(x) 79 | # define htole64(x) (x) 80 | # define be64toh(x) ntohll(x) 81 | # define le64toh(x) (x) 82 | 83 | # elif BYTE_ORDER == BIG_ENDIAN 84 | 85 | /* that would be xbox 360 */ 86 | # define htobe16(x) (x) 87 | # define htole16(x) __builtin_bswap16(x) 88 | # define be16toh(x) (x) 89 | # define le16toh(x) __builtin_bswap16(x) 90 | 91 | # define htobe32(x) (x) 92 | # define htole32(x) __builtin_bswap32(x) 93 | # define be32toh(x) (x) 94 | # define le32toh(x) __builtin_bswap32(x) 95 | 96 | # define htobe64(x) (x) 97 | # define htole64(x) __builtin_bswap64(x) 98 | # define be64toh(x) (x) 99 | # define le64toh(x) __builtin_bswap64(x) 100 | 101 | # else 102 | 103 | # error byte order not supported 104 | 105 | # endif 106 | 107 | # define __BYTE_ORDER BYTE_ORDER 108 | # define __BIG_ENDIAN BIG_ENDIAN 109 | # define __LITTLE_ENDIAN LITTLE_ENDIAN 110 | # define __PDP_ENDIAN PDP_ENDIAN 111 | 112 | #else 113 | 114 | # error platform not supported 115 | 116 | #endif 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /src/crypto/siphash.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if (__CUDA_ARCH__ >= 320) // make rotate-left use funnel shifter, 3% speed gain 4 | typedef uint2 sip64; 5 | 6 | static __device__ __forceinline__ sip64 operator^ (uint2 a, uint2 b) { 7 | return make_uint2(a.x ^ b.x, a.y ^ b.y); 8 | } 9 | static __device__ __forceinline__ void operator^= (uint2 &a, uint2 b) { 10 | a.x ^= b.x, a.y ^= b.y; 11 | } 12 | static __device__ __forceinline__ void operator+= (uint2 &a, uint2 b) { 13 | asm("{\n\tadd.cc.u32 %0,%2,%4;\n\taddc.u32 %1,%3,%5;\n\t}\n\t" 14 | : "=r"(a.x), "=r"(a.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y)); 15 | } 16 | 17 | __inline__ __device__ sip64 rotl(const sip64 a, const int offset) { 18 | sip64 result; 19 | if (offset >= 32) { 20 | asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); 21 | asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); 22 | } else { 23 | asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); 24 | asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); 25 | } 26 | return result; 27 | } 28 | __device__ __forceinline__ sip64 vectorize(const uint64_t x) { 29 | uint2 result; 30 | asm("mov.b64 {%0,%1},%2; \n\t" : "=r"(result.x), "=r"(result.y) : "l"(x)); 31 | return result; 32 | } 33 | __device__ __forceinline__ uint64_t devectorize(sip64 x) { 34 | uint64_t result; 35 | asm("mov.b64 %0,{%1,%2}; \n\t" : "=l"(result) : "r"(x.x), "r"(x.y)); 36 | return result; 37 | } 38 | 39 | #else 40 | 41 | typedef uint64_t sip64; 42 | 43 | __inline__ __device__ sip64 rotl(const sip64 a, const int offset) { 44 | return (a << offset) | (a >> (64 - offset)); 45 | } 46 | __device__ __forceinline__ sip64 vectorize(const uint64_t x) { 47 | return x; 48 | } 49 | __device__ __forceinline__ uint64_t devectorize(sip64 x) { 50 | return x; 51 | } 52 | 53 | #endif 54 | 55 | template 56 | class diphash_state { 57 | public: 58 | sip64 v0; 59 | sip64 v1; 60 | sip64 v2; 61 | sip64 v3; 62 | 63 | __device__ diphash_state(const siphash_keys &sk) { 64 | v0 = vectorize(sk.k0); v1 = vectorize(sk.k1); v2 = vectorize(sk.k2); v3 = vectorize(sk.k3); 65 | } 66 | __device__ uint64_t xor_lanes() { 67 | return devectorize((v0 ^ v1) ^ (v2 ^ v3)); 68 | } 69 | __device__ void xor_with(const diphash_state &x) { 70 | v0 ^= x.v0; 71 | v1 ^= x.v1; 72 | v2 ^= x.v2; 73 | v3 ^= x.v3; 74 | } 75 | __device__ void dip_round() { 76 | v0 += v1; v2 += v3; v1 = rotl(v1,13); 77 | v3 = rotl(v3,16); v1 ^= v0; v3 ^= v2; 78 | v0 = rotl(v0,32); v2 += v1; v0 += v3; 79 | v1 = rotl(v1,17); v3 = rotl(v3,rotE); 80 | v1 ^= v2; v3 ^= v0; v2 = rotl(v2,32); 81 | } 82 | __device__ void hash24(const uint64_t nonce) { 83 | v3 ^= vectorize(nonce); 84 | dip_round(); dip_round(); 85 | v0 ^= vectorize(nonce); 86 | v2 ^= vectorize(0xff); 87 | dip_round(); dip_round(); dip_round(); dip_round(); 88 | } 89 | }; 90 | 91 | __device__ uint64_t dipnode(const siphash_keys &sip_keys, const uint64_t nonce, const int uorv) { 92 | diphash_state<> v(sip_keys); 93 | v.hash24((nonce << 1) | uorv); 94 | return v.xor_lanes() & NODE1MASK; 95 | } 96 | -------------------------------------------------------------------------------- /src/crypto/siphash.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for types uint32_t,uint64_t 4 | #include "portable_endian.h" // for htole32/64 5 | 6 | // generalize siphash by using a quadruple of 64-bit keys, 7 | class siphash_keys { 8 | public: 9 | uint64_t k0; 10 | uint64_t k1; 11 | uint64_t k2; 12 | uint64_t k3; 13 | 14 | void setkeys(const char *keybuf); 15 | 16 | uint64_t siphash24(const uint64_t nonce) const; 17 | }; 18 | 19 | template 20 | class siphash_state { 21 | public: 22 | uint64_t v0; 23 | uint64_t v1; 24 | uint64_t v2; 25 | uint64_t v3; 26 | 27 | siphash_state(const siphash_keys &sk) { 28 | v0 = sk.k0; v1 = sk.k1; v2 = sk.k2; v3 = sk.k3; 29 | } 30 | uint64_t xor_lanes() { 31 | return (v0 ^ v1) ^ (v2 ^ v3); 32 | } 33 | void xor_with(const siphash_state &x) { 34 | v0 ^= x.v0; 35 | v1 ^= x.v1; 36 | v2 ^= x.v2; 37 | v3 ^= x.v3; 38 | } 39 | static uint64_t rotl(uint64_t x, uint64_t b) { 40 | return (x << b) | (x >> (64 - b)); 41 | } 42 | void sip_round() { 43 | v0 += v1; v2 += v3; v1 = rotl(v1,13); 44 | v3 = rotl(v3,16); v1 ^= v0; v3 ^= v2; 45 | v0 = rotl(v0,32); v2 += v1; v0 += v3; 46 | v1 = rotl(v1,17); v3 = rotl(v3,rotE); 47 | v1 ^= v2; v3 ^= v0; v2 = rotl(v2,32); 48 | } 49 | void hash24(const uint64_t nonce) { 50 | v3 ^= nonce; 51 | sip_round(); sip_round(); 52 | v0 ^= nonce; 53 | v2 ^= 0xff; 54 | sip_round(); sip_round(); sip_round(); sip_round(); 55 | } 56 | }; 57 | 58 | // set siphash keys from 32 byte char array 59 | void siphash_keys::setkeys(const char *keybuf) { 60 | k0 = htole64(((uint64_t *)keybuf)[0]); 61 | k1 = htole64(((uint64_t *)keybuf)[1]); 62 | k2 = htole64(((uint64_t *)keybuf)[2]); 63 | k3 = htole64(((uint64_t *)keybuf)[3]); 64 | } 65 | 66 | uint64_t siphash_keys::siphash24(const uint64_t nonce) const { 67 | siphash_state<> v(*this); 68 | v.hash24(nonce); 69 | return v.xor_lanes(); 70 | } 71 | -------------------------------------------------------------------------------- /src/cuckaroo/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c 15 | NVCC ?= nvcc -std=c++11 16 | 17 | all : simpletest meantest 18 | 19 | simpletest: simple19 20 | ./simple19 -n 71 21 | 22 | meantest: mean29x4 23 | ./mean29x4 -n 671 -t 4 -s 24 | 25 | simple19: ../crypto/siphash.hpp cuckaroo.hpp bitmap.hpp graph.hpp simple.cpp Makefile 26 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC) 27 | 28 | simple29: ../crypto/siphash.hpp cuckaroo.hpp bitmap.hpp graph.hpp simple.cpp Makefile 29 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC) 30 | 31 | mean19x1: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 32 | $(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 33 | 34 | mean19x8: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 35 | $(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 36 | 37 | mean29x4: cuckaroo.hpp bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 38 | $(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 39 | 40 | mean29x8: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 41 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 42 | 43 | mean29x8s: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 44 | $(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 45 | 46 | mean29x1: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 47 | $(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 48 | 49 | mean30x1: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 50 | $(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 51 | 52 | mean30x8: cuckaroo.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 53 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 54 | 55 | cuda19: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile 56 | $(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 57 | 58 | cuda29: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile 59 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 60 | -------------------------------------------------------------------------------- /src/cuckaroo/bitmap.hpp: -------------------------------------------------------------------------------- 1 | template 2 | class bitmap { 3 | public: 4 | word_t SIZE; 5 | word_t BITMAP_WORDS; 6 | #ifdef ATOMIC 7 | typedef std::atomic aword_t; 8 | #else 9 | typedef word_t aword_t; 10 | #endif 11 | aword_t *bits; 12 | const u32 BITS_PER_WORD = sizeof(word_t) * 8; 13 | 14 | bitmap(word_t size) { 15 | SIZE = size; 16 | BITMAP_WORDS = SIZE / BITS_PER_WORD; 17 | bits = new aword_t[BITMAP_WORDS]; 18 | assert(bits != 0); 19 | } 20 | ~bitmap() { 21 | freebits(); 22 | } 23 | void freebits() { 24 | delete[] bits; 25 | bits = 0; 26 | } 27 | void clear() { 28 | assert(bits); 29 | memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t)); 30 | } 31 | void prefetch(u32 u) const { 32 | #ifdef PREFETCH 33 | __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0); 34 | #endif 35 | } 36 | void set(u32 u) { 37 | u32 idx = u / BITS_PER_WORD; 38 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 39 | #ifdef ATOMIC 40 | std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed); 41 | #else 42 | bits[idx] |= bit; 43 | #endif 44 | } 45 | void reset(u32 u) { 46 | u32 idx = u / BITS_PER_WORD; 47 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 48 | #ifdef ATOMIC 49 | std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed); 50 | #else 51 | bits[idx] &= ~bit; 52 | #endif 53 | } 54 | bool test(u32 u) const { 55 | u32 idx = u / BITS_PER_WORD; 56 | u32 bit = u % BITS_PER_WORD; 57 | #ifdef ATOMIC 58 | return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1; 59 | #else 60 | return (bits[idx] >> bit) & 1; 61 | #endif 62 | } 63 | word_t block(u32 n) const { 64 | u32 idx = n / BITS_PER_WORD; 65 | return bits[idx]; 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /src/cuckaroo/compress.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // compressor for cuckaroo nodes where edgetrimming 4 | // has left at most a fraction 2^-compressbits nodes in each partition 5 | template 6 | class compressor { 7 | public: 8 | u32 NODEBITS; 9 | u32 SHIFTBITS; 10 | u32 SIZEBITS; 11 | word_t SIZE; 12 | word_t SIZE2; 13 | word_t MASK; 14 | word_t MASK2; 15 | word_t nnodes; 16 | const word_t NIL = ~(word_t)0; 17 | word_t *nodes; 18 | bool sharedmem; 19 | 20 | compressor(u32 nodebits, u32 compressbits, char *bytes) { 21 | NODEBITS = nodebits; 22 | SHIFTBITS = compressbits; 23 | SIZEBITS = NODEBITS-compressbits; 24 | SIZE = (word_t)1 << SIZEBITS; 25 | SIZE2 = (word_t)2 << SIZEBITS; 26 | nodes = new (bytes) word_t[SIZE2]; 27 | sharedmem = true; 28 | MASK = SIZE-1; 29 | MASK2 = SIZE2-1; 30 | } 31 | 32 | compressor(u32 nodebits, u32 compressbits) { 33 | NODEBITS = nodebits; 34 | SHIFTBITS = compressbits; 35 | SIZEBITS = NODEBITS-compressbits; 36 | SIZE = (word_t)1 << SIZEBITS; 37 | SIZE2 = (word_t)2 << SIZEBITS; 38 | nodes = new word_t[SIZE2]; 39 | sharedmem = false; 40 | MASK = SIZE-1; 41 | MASK2 = SIZE2-1; 42 | } 43 | 44 | ~compressor() { 45 | if (!sharedmem) 46 | delete[] nodes; 47 | } 48 | 49 | uint64_t bytes() { 50 | return sizeof(word_t[SIZE2]); 51 | } 52 | 53 | void reset() { 54 | memset(nodes, (char)NIL, sizeof(word_t[SIZE2])); 55 | nnodes = 0; 56 | } 57 | 58 | word_t compress(word_t u) { 59 | word_t ui = u >> SHIFTBITS; 60 | for (; ; ui = (ui+1) & MASK2) { 61 | word_t cu = nodes[ui]; 62 | if (cu == NIL) { 63 | if (nnodes >= SIZE) { 64 | print_log("NODE OVERFLOW at %x\n", u); 65 | return 0; 66 | } 67 | nodes[ui] = u << SIZEBITS | nnodes; 68 | return nnodes++; 69 | } 70 | if ((cu & ~MASK) == u << SIZEBITS) { 71 | return cu & MASK; 72 | } 73 | } 74 | } 75 | }; 76 | -------------------------------------------------------------------------------- /src/cuckaroo/cuckaroo.c: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckaroo.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 80 13 | 14 | int main(int argc, char **argv) { 15 | const char *header = ""; 16 | int nonce = 0; 17 | int c; 18 | while ((c = getopt (argc, argv, "h:n:")) != -1) { 19 | switch (c) { 20 | case 'h': 21 | header = optarg; 22 | break; 23 | case 'n': 24 | nonce = atoi(optarg); 25 | break; 26 | } 27 | } 28 | char headernonce[HEADERLEN]; 29 | u32 hdrlen = strlen(header); 30 | memcpy(headernonce, header, hdrlen); 31 | memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen); 32 | ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); 33 | siphash_keys keys; 34 | setheader(headernonce, sizeof(headernonce), &keys); 35 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3); 36 | printf("Verifying size %d proof for cuckaroo%d(\"%s\",%d)\n", 37 | PROOFSIZE, EDGEBITS, header, nonce); 38 | for (int nsols=0; scanf(" Solution") == 0; nsols++) { 39 | word_t nonces[PROOFSIZE]; 40 | for (int n = 0; n < PROOFSIZE; n++) { 41 | uint64_t nonce; 42 | int nscan = scanf(" %" SCNx64, &nonce); 43 | assert(nscan == 1); 44 | nonces[n] = nonce; 45 | } 46 | int pow_rc = verify(nonces, &keys); 47 | if (pow_rc == POW_OK) { 48 | printf("Verified with cyclehash "); 49 | unsigned char cyclehash[32]; 50 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0); 51 | for (int i=0; i<32; i++) 52 | printf("%02x", cyclehash[i]); 53 | printf("\n"); 54 | } else { 55 | printf("FAILED due to %s\n", errstr[pow_rc]); 56 | } 57 | } 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /src/cuckaroo/cuckaroo.hpp: -------------------------------------------------------------------------------- 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include // for types uint32_t,uint64_t 5 | #include // for functions strlen, memset 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "../crypto/blake2.h" 11 | #include "../crypto/siphash.hpp" 12 | 13 | // save some keystrokes since i'm a lazy typer 14 | typedef uint32_t u32; 15 | typedef uint64_t u64; 16 | 17 | #ifndef MAX_SOLS 18 | #define MAX_SOLS 4 19 | #endif 20 | 21 | #ifndef EDGE_BLOCK_BITS 22 | #define EDGE_BLOCK_BITS 6 23 | #endif 24 | #define EDGE_BLOCK_SIZE (1 << EDGE_BLOCK_BITS) 25 | #define EDGE_BLOCK_MASK (EDGE_BLOCK_SIZE - 1) 26 | 27 | // proof-of-work parameters 28 | #ifndef EDGEBITS 29 | // the main parameter is the number of bits in an edge index, 30 | // i.e. the 2-log of the number of edges 31 | #define EDGEBITS 29 32 | #endif 33 | #ifndef PROOFSIZE 34 | // the next most important parameter is the (even) length 35 | // of the cycle to be found. a minimum of 12 is recommended 36 | #define PROOFSIZE 42 37 | #endif 38 | 39 | #if EDGEBITS > 30 40 | typedef uint64_t word_t; 41 | #elif EDGEBITS > 14 42 | typedef u32 word_t; 43 | #else // if EDGEBITS <= 14 44 | typedef uint16_t word_t; 45 | #endif 46 | 47 | // number of edges 48 | #define NEDGES ((word_t)1 << EDGEBITS) 49 | // used to mask siphash output 50 | #define EDGEMASK ((word_t)NEDGES - 1) 51 | #define NODEMASK EDGEMASK 52 | #define NODE1MASK NODEMASK 53 | 54 | // Common Solver parameters, to return to caller 55 | struct SolverParams { 56 | u32 nthreads = 0; 57 | u32 ntrims = 0; 58 | bool showcycle; 59 | bool allrounds; 60 | bool mutate_nonce = 1; 61 | bool cpuload = 1; 62 | 63 | // Common cuda params 64 | u32 device = 0; 65 | 66 | // Cuda-lean specific params 67 | u32 blocks = 0; 68 | u32 tpb = 0; 69 | 70 | // Cuda-mean specific params 71 | u32 expand = 0; 72 | u32 genablocks = 0; 73 | u32 genatpb = 0; 74 | u32 genbtpb = 0; 75 | u32 trimtpb = 0; 76 | u32 tailtpb = 0; 77 | u32 recoverblocks = 0; 78 | u32 recovertpb = 0; 79 | }; 80 | 81 | // Solutions result structs to be instantiated by caller, 82 | // and filled by solver if desired 83 | struct Solution { 84 | u64 id = 0; 85 | u64 nonce = 0; 86 | u64 proof[PROOFSIZE]; 87 | }; 88 | 89 | struct SolverSolutions { 90 | u32 edge_bits = 0; 91 | u32 num_sols = 0; 92 | Solution sols[MAX_SOLS]; 93 | }; 94 | 95 | #define MAX_NAME_LEN 256 96 | 97 | // last error reason, to be picked up by stats 98 | // to be returned to caller 99 | char LAST_ERROR_REASON[MAX_NAME_LEN]; 100 | 101 | // Solver statistics, to be instantiated by caller 102 | // and filled by solver if desired 103 | struct SolverStats { 104 | u32 device_id = 0; 105 | u32 edge_bits = 0; 106 | char plugin_name[MAX_NAME_LEN]; // will be filled in caller-side 107 | char device_name[MAX_NAME_LEN]; 108 | bool has_errored = false; 109 | char error_reason[MAX_NAME_LEN]; 110 | u32 iterations = 0; 111 | u64 last_start_time = 0; 112 | u64 last_end_time = 0; 113 | u64 last_solution_time = 0; 114 | }; 115 | 116 | enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_TOO_BIG, POW_TOO_SMALL, POW_NON_MATCHING, POW_BRANCH, POW_DEAD_END, POW_SHORT_CYCLE}; 117 | const char *errstr[] = { "OK", "wrong header length", "edge too big", "edges not ascending", "endpoints don't match up", "branch in cycle", "cycle dead ends", "cycle too short"}; 118 | 119 | // fills buffer with EDGE_BLOCK_SIZE siphash outputs for block containing edge in cuckaroo graph 120 | // return siphash output for given edge 121 | u64 sipblock(siphash_keys &keys, const word_t edge, u64 *buf) { 122 | siphash_state<> shs(keys); 123 | word_t edge0 = edge & ~EDGE_BLOCK_MASK; 124 | for (u32 i=0; i < EDGE_BLOCK_SIZE; i++) { 125 | shs.hash24(edge0 + i); 126 | buf[i] = shs.xor_lanes(); 127 | } 128 | const u64 last = buf[EDGE_BLOCK_MASK]; 129 | for (u32 i=0; i < EDGE_BLOCK_MASK; i++) 130 | buf[i] ^= last; 131 | return buf[edge & EDGE_BLOCK_MASK]; 132 | } 133 | 134 | // verify that edges are ascending and form a cycle in header-generated graph 135 | int verify(word_t edges[PROOFSIZE], siphash_keys &keys) { 136 | word_t xor0 = 0, xor1 = 0; 137 | u64 sips[EDGE_BLOCK_SIZE]; 138 | word_t uvs[2*PROOFSIZE]; 139 | 140 | for (u32 n = 0; n < PROOFSIZE; n++) { 141 | if (edges[n] > EDGEMASK) 142 | return POW_TOO_BIG; 143 | if (n && edges[n] <= edges[n-1]) 144 | return POW_TOO_SMALL; 145 | u64 edge = sipblock(keys, edges[n], sips); 146 | xor0 ^= uvs[2*n ] = edge & EDGEMASK; 147 | xor1 ^= uvs[2*n+1] = (edge >> 32) & EDGEMASK; 148 | } 149 | if (xor0 | xor1) // optional check for obviously bad proofs 150 | return POW_NON_MATCHING; 151 | u32 n = 0, i = 0, j; 152 | do { // follow cycle 153 | for (u32 k = j = i; (k = (k+2) % (2*PROOFSIZE)) != i; ) { 154 | if (uvs[k] == uvs[i]) { // find other edge endpoint identical to one at i 155 | if (j != i) // already found one before 156 | return POW_BRANCH; 157 | j = k; 158 | } 159 | } 160 | if (j == i) return POW_DEAD_END; // no matching endpoint 161 | i = j^1; 162 | n++; 163 | } while (i != 0); // must cycle back to start or we would have found branch 164 | return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE; 165 | } 166 | 167 | // convenience function for extracting siphash keys from header 168 | void setheader(const char *header, const u32 headerlen, siphash_keys *keys) { 169 | char hdrkey[32]; 170 | // SHA256((unsigned char *)header, headerlen, (unsigned char *)hdrkey); 171 | blake2b((void *)hdrkey, sizeof(hdrkey), (const void *)header, headerlen, 0, 0); 172 | keys->setkeys(hdrkey); 173 | } 174 | 175 | u64 timestamp() { 176 | using namespace std::chrono; 177 | high_resolution_clock::time_point now = high_resolution_clock::now(); 178 | auto dn = now.time_since_epoch(); 179 | return dn.count(); 180 | } 181 | 182 | ///////////////////////////////////////////////////////////////// 183 | // Declarations to make it easier for callers to link as required 184 | ///////////////////////////////////////////////////////////////// 185 | 186 | #ifndef C_CALL_CONVENTION 187 | #define C_CALL_CONVENTION 0 188 | #endif 189 | 190 | // convention to prepend to called functions 191 | #if C_CALL_CONVENTION 192 | #define CALL_CONVENTION extern "C" 193 | #else 194 | #define CALL_CONVENTION 195 | #endif 196 | 197 | // Ability to squash printf output at compile time, if desired 198 | #ifndef SQUASH_OUTPUT 199 | #define SQUASH_OUTPUT 0 200 | #endif 201 | 202 | void print_log(const char *fmt, ...) { 203 | if (SQUASH_OUTPUT) return; 204 | va_list args; 205 | va_start(args, fmt); 206 | vprintf(fmt, args); 207 | va_end(args); 208 | } 209 | ////////////////////////////////////////////////////////////////// 210 | // END caller QOL 211 | ////////////////////////////////////////////////////////////////// 212 | 213 | -------------------------------------------------------------------------------- /src/cuckaroo/graph.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitmap.hpp" 5 | #include "compress.hpp" 6 | #include 7 | 8 | typedef word_t proof[PROOFSIZE]; 9 | 10 | // cuck(ar)oo graph with given limit on number of edges (and on single partition nodes) 11 | template 12 | class graph { 13 | public: 14 | // terminates adjacency lists 15 | static const word_t NIL = ~(word_t)0; 16 | 17 | struct link { // element of adjacency list 18 | word_t next; 19 | word_t to; 20 | }; 21 | 22 | word_t MAXEDGES; 23 | word_t MAXNODES; 24 | word_t nlinks; // aka halfedges, twice number of edges 25 | word_t *adjlist; // index into links array 26 | link *links; 27 | bool sharedmem; 28 | compressor *compressu; 29 | compressor *compressv; 30 | bitmap visited; 31 | u32 MAXSOLS; 32 | proof *sols; 33 | u32 nsols; 34 | 35 | graph(word_t maxedges, word_t maxnodes, u32 maxsols) : visited(2*maxnodes) { 36 | MAXEDGES = maxedges; 37 | MAXNODES = maxnodes; 38 | MAXSOLS = maxsols; 39 | adjlist = new word_t[2*MAXNODES]; // index into links array 40 | links = new link[2*MAXEDGES]; 41 | compressu = compressv = 0; 42 | sharedmem = false; 43 | sols = new proof[MAXSOLS+1]; // extra one for current path 44 | visited.clear(); 45 | } 46 | 47 | ~graph() { 48 | if (!sharedmem) { 49 | delete[] adjlist; 50 | delete[] links; 51 | } 52 | delete[] sols; 53 | } 54 | 55 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(2*maxnodes) { 56 | MAXEDGES = maxedges; 57 | MAXNODES = maxnodes; 58 | MAXSOLS = maxsols; 59 | adjlist = new word_t[2*MAXNODES]; // index into links array 60 | links = new link[2*MAXEDGES]; 61 | compressu = new compressor(EDGEBITS, compressbits); 62 | compressv = new compressor(EDGEBITS, compressbits); 63 | sharedmem = false; 64 | sols = new proof[MAXSOLS]; 65 | visited.clear(); 66 | } 67 | 68 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, char *bytes) : visited(2*maxnodes) { 69 | MAXEDGES = maxedges; 70 | MAXNODES = maxnodes; 71 | MAXSOLS = maxsols; 72 | adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array 73 | links = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES]; 74 | compressu = compressv = 0; 75 | sharedmem = true; 76 | sols = new proof[MAXSOLS]; 77 | visited.clear(); 78 | } 79 | 80 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(2*maxnodes) { 81 | MAXEDGES = maxedges; 82 | MAXNODES = maxnodes; 83 | MAXSOLS = maxsols; 84 | adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array 85 | links = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES]; 86 | compressu = new compressor(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES])); 87 | compressv = new compressor(EDGEBITS, compressbits, bytes + compressu->bytes()); 88 | sharedmem = true; 89 | sols = new proof[MAXSOLS]; 90 | visited.clear(); 91 | } 92 | 93 | // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits 94 | uint64_t bytes() { 95 | return sizeof(word_t[2*MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0); 96 | } 97 | 98 | void reset() { 99 | memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES])); 100 | if (compressu) { 101 | compressu->reset(); 102 | compressv->reset(); 103 | } 104 | resetcounts(); 105 | } 106 | 107 | void resetcounts() { 108 | nlinks = nsols = 0; 109 | // visited has entries set only during cycles() call 110 | } 111 | 112 | static int nonce_cmp(const void *a, const void *b) { 113 | return *(word_t *)a - *(word_t *)b; 114 | } 115 | 116 | void cycles_with_link(u32 len, word_t u, word_t dest) { 117 | // printf("cycles_with_link(%d, %x, %x)\n", len, u, dest); 118 | if (visited.test(u)) 119 | return; 120 | if (u == dest) { 121 | print_log(" %d-cycle found\n", len); 122 | if (len == PROOFSIZE && nsols < MAXSOLS) { 123 | qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp); 124 | memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0])); 125 | } 126 | return; 127 | } 128 | if (len == PROOFSIZE) 129 | return; 130 | word_t au1 = adjlist[u]; 131 | if (au1 != NIL) { 132 | visited.set(u); 133 | for (; au1 != NIL; au1 = links[au1].next) { 134 | sols[nsols][len] = au1/2; 135 | cycles_with_link(len+1, links[au1 ^ 1].to, dest); 136 | } 137 | visited.reset(u); 138 | } 139 | } 140 | 141 | void add_edge(word_t u, word_t v) { 142 | assert(u < MAXNODES); 143 | assert(v < MAXNODES); 144 | v += MAXNODES; // distinguish partitions 145 | if (adjlist[u] != NIL && adjlist[v] != NIL) { // possibly part of a cycle 146 | sols[nsols][0] = nlinks/2; 147 | assert(!visited.test(u)); 148 | cycles_with_link(1, u, v); 149 | } 150 | word_t ulink = nlinks++; 151 | word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit 152 | assert(vlink != NIL); // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1 153 | links[ulink].next = adjlist[u]; 154 | links[vlink].next = adjlist[v]; 155 | links[adjlist[u] = ulink].to = u; 156 | links[adjlist[v] = vlink].to = v; 157 | } 158 | 159 | void add_compress_edge(word_t u, word_t v) { 160 | add_edge(compressu->compress(u), compressv->compress(v)); 161 | } 162 | }; 163 | -------------------------------------------------------------------------------- /src/cuckaroo/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckaroo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | #include 7 | 8 | #ifndef HEADERLEN 9 | // arbitrary length of header hashed into siphash key 10 | #define HEADERLEN 80 11 | #endif 12 | 13 | typedef solver_ctx SolverCtx; 14 | 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 16 | char* header, 17 | int header_length, 18 | u32 nonce, 19 | u32 range, 20 | SolverSolutions *solutions, 21 | SolverStats *stats 22 | ) 23 | { 24 | u64 time0, time1; 25 | u32 timems; 26 | u32 sumnsols = 0; 27 | 28 | for (u32 r = 0; r < range; r++) { 29 | time0 = timestamp(); 30 | ctx->setheadernonce(header, header_length, nonce + r); 31 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3); 32 | u32 nsols = ctx->solve(); 33 | time1 = timestamp(); 34 | timems = (time1 - time0) / 1000000; 35 | print_log("Time: %d ms\n", timems); 36 | 37 | for (unsigned s = 0; s < nsols; s++) { 38 | print_log("Solution"); 39 | word_t *prf = &ctx->sols[s * PROOFSIZE]; 40 | for (u32 i = 0; i < PROOFSIZE; i++) 41 | print_log(" %jx", (uintmax_t)prf[i]); 42 | print_log("\n"); 43 | if (solutions != NULL){ 44 | solutions->edge_bits = EDGEBITS; 45 | solutions->num_sols++; 46 | solutions->sols[sumnsols+s].nonce = nonce + r; 47 | for (u32 i = 0; i < PROOFSIZE; i++) 48 | solutions->sols[sumnsols+s].proof[i] = (u64) prf[i]; 49 | } 50 | int pow_rc = verify(prf, ctx->trimmer.sip_keys); 51 | if (pow_rc == POW_OK) { 52 | print_log("Verified with cyclehash "); 53 | unsigned char cyclehash[32]; 54 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 55 | for (int i=0; i<32; i++) 56 | print_log("%02x", cyclehash[i]); 57 | print_log("\n"); 58 | } else { 59 | print_log("FAILED due to %s\n", errstr[pow_rc]); 60 | } 61 | } 62 | sumnsols += nsols; 63 | if (stats != NULL) { 64 | stats->device_id = 0; 65 | stats->edge_bits = EDGEBITS; 66 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 67 | stats->last_start_time = time0; 68 | stats->last_end_time = time1; 69 | stats->last_solution_time = time1 - time0; 70 | } 71 | } 72 | print_log("%d total solutions\n", sumnsols); 73 | return sumnsols > 0; 74 | } 75 | 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 77 | if (params->nthreads == 0) params->nthreads = 1; 78 | if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68; 79 | 80 | SolverCtx* ctx = new SolverCtx(params->nthreads, 81 | params->ntrims, 82 | params->allrounds, 83 | params->showcycle, 84 | params->mutate_nonce); 85 | return ctx; 86 | } 87 | 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 89 | delete ctx; 90 | } 91 | 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 93 | ctx->abort(); 94 | } 95 | 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 97 | // not required in this solver 98 | } 99 | 100 | int main(int argc, char **argv) { 101 | u32 nthreads = 0; 102 | u32 ntrims = 0; 103 | u32 nonce = 0; 104 | u32 range = 1; 105 | #ifdef SAVEEDGES 106 | bool showcycle = 1; 107 | #else 108 | bool showcycle = 0; 109 | #endif 110 | char header[HEADERLEN]; 111 | u32 len; 112 | bool allrounds = false; 113 | int c; 114 | 115 | memset(header, 0, sizeof(header)); 116 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 117 | switch (c) { 118 | case 'a': 119 | allrounds = true; 120 | break; 121 | case 'h': 122 | len = strlen(optarg); 123 | assert(len <= sizeof(header)); 124 | memcpy(header, optarg, len); 125 | break; 126 | case 'x': 127 | len = strlen(optarg)/2; 128 | assert(len == sizeof(header)); 129 | for (u32 i=0; i 1) 160 | print_log("-%d", nonce+range-1); 161 | print_log(") with 50%% edges\n"); 162 | 163 | u64 sbytes = ctx->sharedbytes(); 164 | u32 tbytes = ctx->threadbytes(); 165 | int sunit,tunit; 166 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 167 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 168 | print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); 169 | print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); 170 | print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 171 | 172 | run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); 173 | 174 | destroy_solver_ctx(ctx); 175 | } 176 | -------------------------------------------------------------------------------- /src/cuckaroo/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckaroo.hpp" 5 | #include "graph.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define NNODES (2*NEDGES) 13 | #ifndef MAXSOLS 14 | #define MAXSOLS 4 15 | #endif 16 | 17 | typedef unsigned char u8; 18 | 19 | class cuckoo_ctx { 20 | public: 21 | siphash_keys sip_keys; 22 | word_t easiness; 23 | graph cg; 24 | 25 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) : cg(NEDGES, NEDGES, MAXSOLS) { 26 | easiness = easy_ness; 27 | } 28 | 29 | ~cuckoo_ctx() { } 30 | 31 | u64 bytes() { 32 | return cg.bytes(); 33 | } 34 | 35 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 36 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 37 | setheader(headernonce, len, &sip_keys); 38 | cg.reset(); 39 | } 40 | 41 | void find_cycles() { 42 | u64 sips[EDGE_BLOCK_SIZE]; 43 | for (word_t block = 0; block < easiness; block += EDGE_BLOCK_SIZE) { 44 | sipblock(sip_keys, block, sips); 45 | for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) { 46 | u64 edge = sips[i]; 47 | word_t u = edge & EDGEMASK; 48 | word_t v = (edge >> 32) & EDGEMASK; 49 | cg.add_edge(u, v); 50 | #ifdef SHOW 51 | word_t nonce = block + i; 52 | printf("%d add (%d,%d)\n", nonce,u,v+NEDGES); 53 | for (unsigned j=0; j::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to); 56 | if ((j+1)%NEDGES == 0) 57 | printf("\n"); 58 | } 59 | #endif 60 | } 61 | } 62 | for (u32 s=0; s < cg.nsols; s++) { 63 | printf("Solution"); 64 | // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp); 65 | for (u32 j=0; j < PROOFSIZE; j++) { 66 | word_t nonce = cg.sols[s][j]; 67 | // u64 edge = sipblock(sip_keys, nonce, sips); 68 | // printf(" (%x,%x)", edge & EDGEMASK, (edge >> 32) & EDGEMASK); 69 | printf(" %x", nonce); 70 | } 71 | printf("\n"); 72 | int pow_rc = verify(cg.sols[s], sip_keys); 73 | if (pow_rc == POW_OK) { 74 | printf("Verified with cyclehash "); 75 | unsigned char cyclehash[32]; 76 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0); 77 | for (int i=0; i<32; i++) 78 | printf("%02x", cyclehash[i]); 79 | printf("\n"); 80 | } else { 81 | printf("FAILED due to %s\n", errstr[pow_rc]); 82 | } 83 | 84 | } 85 | } 86 | }; 87 | 88 | // arbitrary length of header hashed into siphash key 89 | #define HEADERLEN 80 90 | 91 | int main(int argc, char **argv) { 92 | char header[HEADERLEN]; 93 | memset(header, 0, HEADERLEN); 94 | int c, easipct = 50; 95 | u32 nonce = 0; 96 | u32 range = 1; 97 | u64 time0, time1; 98 | u32 timems; 99 | 100 | while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) { 101 | switch (c) { 102 | case 'e': 103 | easipct = atoi(optarg); 104 | break; 105 | case 'h': 106 | memcpy(header, optarg, strlen(optarg)); 107 | break; 108 | case 'n': 109 | nonce = atoi(optarg); 110 | break; 111 | case 'r': 112 | range = atoi(optarg); 113 | break; 114 | } 115 | } 116 | assert(easipct >= 0 && easipct <= 100); 117 | printf("Looking for %d-cycle on cuckaroo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce); 118 | if (range > 1) 119 | printf("-%d", nonce+range-1); 120 | printf(") with %d%% edges, ", easipct); 121 | word_t easiness = easipct * (uint64_t)NNODES / 100; 122 | cuckoo_ctx ctx(header, sizeof(header), nonce, easiness); 123 | u64 bytes = ctx.bytes(); 124 | int unit; 125 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 126 | printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]); 127 | 128 | for (u32 r = 0; r < range; r++) { 129 | time0 = timestamp(); 130 | ctx.setheadernonce(header, sizeof(header), nonce + r); 131 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 132 | ctx.find_cycles(); 133 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 134 | printf("Time: %d ms\n", timems); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/cuckarood/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c 15 | NVCC ?= nvcc -std=c++11 16 | 17 | all : simpletest meantest 18 | 19 | simpletest: simple19 20 | ./simple19 -n 64 21 | 22 | meantest: mean29x4 23 | ./mean29x4 -n 23 -t 4 -s 24 | 25 | simple19: ../crypto/siphash.hpp cuckarood.hpp bitmap.hpp graph.hpp simple.cpp Makefile 26 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC) 27 | 28 | simple29: ../crypto/siphash.hpp cuckarood.hpp bitmap.hpp graph.hpp simple.cpp Makefile 29 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC) 30 | 31 | mean19x1: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 32 | $(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 33 | 34 | mean19x4: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 35 | $(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 36 | 37 | mean19x8: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 38 | $(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 39 | 40 | mean29x4: cuckarood.hpp bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 41 | $(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 42 | 43 | mean29x8: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 44 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 45 | 46 | mean29x8s: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 47 | $(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 48 | 49 | mean29x1: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 50 | $(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 51 | 52 | mean30x1: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 53 | $(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 54 | 55 | mean30x8: cuckarood.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 56 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 57 | 58 | cuda19: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile 59 | $(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 60 | 61 | cuda29: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile 62 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 63 | 64 | photon29: ../crypto/siphash.cuh compress.hpp graph.hpp kernel.cuh photon.cu Makefile 65 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 photon.cu $(BLAKE_2B_SRC) 66 | -------------------------------------------------------------------------------- /src/cuckarood/bitmap.hpp: -------------------------------------------------------------------------------- 1 | template 2 | class bitmap { 3 | public: 4 | word_t SIZE; 5 | word_t BITMAP_WORDS; 6 | #ifdef ATOMIC 7 | typedef std::atomic aword_t; 8 | #else 9 | typedef word_t aword_t; 10 | #endif 11 | aword_t *bits; 12 | const u32 BITS_PER_WORD = sizeof(word_t) * 8; 13 | 14 | bitmap(word_t size) { 15 | SIZE = size; 16 | BITMAP_WORDS = SIZE / BITS_PER_WORD; 17 | bits = new aword_t[BITMAP_WORDS]; 18 | assert(bits != 0); 19 | } 20 | ~bitmap() { 21 | freebits(); 22 | } 23 | void freebits() { 24 | delete[] bits; 25 | bits = 0; 26 | } 27 | void clear() { 28 | assert(bits); 29 | memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t)); 30 | } 31 | void prefetch(u32 u) const { 32 | #ifdef PREFETCH 33 | __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0); 34 | #endif 35 | } 36 | void set(u32 u) { 37 | u32 idx = u / BITS_PER_WORD; 38 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 39 | #ifdef ATOMIC 40 | std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed); 41 | #else 42 | bits[idx] |= bit; 43 | #endif 44 | } 45 | void reset(u32 u) { 46 | u32 idx = u / BITS_PER_WORD; 47 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 48 | #ifdef ATOMIC 49 | std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed); 50 | #else 51 | bits[idx] &= ~bit; 52 | #endif 53 | } 54 | bool test(u32 u) const { 55 | u32 idx = u / BITS_PER_WORD; 56 | u32 bit = u % BITS_PER_WORD; 57 | #ifdef ATOMIC 58 | return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1; 59 | #else 60 | return (bits[idx] >> bit) & 1; 61 | #endif 62 | } 63 | word_t block(u32 n) const { 64 | u32 idx = n / BITS_PER_WORD; 65 | return bits[idx]; 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /src/cuckarood/compress.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // compressor for cuckatoo nodes where edgetrimming 4 | // has left at most 2^-compressbits nodes in each partition 5 | template 6 | class compressor { 7 | public: 8 | u32 NODEBITS; 9 | u32 COMPRESSBITS; 10 | u32 SIZEBITS; 11 | u32 SIZEBITS1; 12 | word_t SIZE; 13 | word_t MASK; 14 | word_t MASK1; 15 | word_t npairs; 16 | const word_t NIL = ~(word_t)0; 17 | word_t *nodes; 18 | bool sharedmem; 19 | 20 | compressor(u32 nodebits, u32 compressbits, char *bytes) { 21 | NODEBITS = nodebits; 22 | COMPRESSBITS = compressbits; 23 | SIZEBITS = NODEBITS-COMPRESSBITS; 24 | SIZEBITS1 = SIZEBITS-1; 25 | SIZE = (word_t)1 << SIZEBITS; 26 | assert(SIZE); 27 | MASK = SIZE-1; 28 | MASK1 = MASK >> 1; 29 | nodes = new (bytes) word_t[SIZE]; 30 | sharedmem = true; 31 | } 32 | 33 | compressor(u32 nodebits, u32 compressbits) { 34 | NODEBITS = nodebits; 35 | COMPRESSBITS = compressbits; 36 | SIZEBITS = NODEBITS-COMPRESSBITS; 37 | SIZEBITS1 = SIZEBITS-1; 38 | SIZE = (word_t)1 << SIZEBITS; 39 | assert(SIZE); 40 | MASK = SIZE-1; 41 | MASK1 = MASK >> 1; 42 | nodes = new word_t[SIZE]; 43 | sharedmem = false; 44 | } 45 | 46 | ~compressor() { 47 | if (!sharedmem) 48 | delete[] nodes; 49 | } 50 | 51 | uint64_t bytes() { 52 | return sizeof(word_t[SIZE]); 53 | } 54 | 55 | void reset() { 56 | memset(nodes, (char)NIL, sizeof(word_t[SIZE])); 57 | npairs = 0; 58 | } 59 | 60 | word_t compress(word_t u) { 61 | u32 parity = u & 1; 62 | word_t ui = u >> COMPRESSBITS; 63 | u >>= 1; 64 | for (; ; ui = (ui+1) & MASK) { 65 | word_t cu = nodes[ui]; 66 | if (cu == NIL) { 67 | if (npairs >= SIZE/2) { 68 | print_log("NODE OVERFLOW at %x\n", u << 1 | parity); 69 | return parity; 70 | } 71 | nodes[ui] = u << SIZEBITS1 | npairs; 72 | return (npairs++ << 1) | parity; 73 | } 74 | if ((cu & ~MASK1) == u << SIZEBITS1) { 75 | return ((cu & MASK1) << 1) | parity; 76 | } 77 | } 78 | } 79 | }; 80 | -------------------------------------------------------------------------------- /src/cuckarood/cuckarood.c: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckarood.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 80 13 | 14 | int main(int argc, char **argv) { 15 | const char *header = ""; 16 | int nonce = 0; 17 | int c; 18 | while ((c = getopt (argc, argv, "h:n:")) != -1) { 19 | switch (c) { 20 | case 'h': 21 | header = optarg; 22 | break; 23 | case 'n': 24 | nonce = atoi(optarg); 25 | break; 26 | } 27 | } 28 | char headernonce[HEADERLEN]; 29 | u32 hdrlen = strlen(header); 30 | memcpy(headernonce, header, hdrlen); 31 | memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen); 32 | ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); 33 | siphash_keys keys; 34 | setheader(headernonce, sizeof(headernonce), &keys); 35 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3); 36 | printf("Verifying size %d proof for cuckarood%d(\"%s\",%d)\n", 37 | PROOFSIZE, EDGEBITS, header, nonce); 38 | for (int nsols=0; scanf(" Solution") == 0; nsols++) { 39 | word_t nonces[PROOFSIZE]; 40 | for (int n = 0; n < PROOFSIZE; n++) { 41 | uint64_t nonce; 42 | int nscan = scanf(" %" SCNx64, &nonce); 43 | assert(nscan == 1); 44 | nonces[n] = nonce; 45 | } 46 | int pow_rc = verify(nonces, &keys); 47 | if (pow_rc == POW_OK) { 48 | printf("Verified with cyclehash "); 49 | unsigned char cyclehash[32]; 50 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0); 51 | for (int i=0; i<32; i++) 52 | printf("%02x", cyclehash[i]); 53 | printf("\n"); 54 | } else { 55 | printf("FAILED due to %s\n", errstr[pow_rc]); 56 | } 57 | } 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /src/cuckarood/graph.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitmap.hpp" 5 | #include "compress.hpp" 6 | #include 7 | 8 | typedef word_t proof[PROOFSIZE]; 9 | 10 | // cuck(ar)oo graph with given limit on number of edges (and on single partition nodes) 11 | template 12 | class graph { 13 | public: 14 | // terminates adjacency lists 15 | const word_t NIL = ~(word_t)0; 16 | 17 | struct link { // element of adjacency list 18 | word_t next; 19 | word_t to; 20 | }; 21 | 22 | word_t MAXEDGES; 23 | word_t MAXNODES; 24 | word_t nlinks; // aka halfedges, twice number of edges 25 | word_t *adjlist; // index into links array 26 | link *links; 27 | bool sharedmem; 28 | compressor *compressu; 29 | compressor *compressv; 30 | bitmap visited; 31 | u32 MAXSOLS; 32 | proof *sols; 33 | u32 nsols; 34 | 35 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(2*maxnodes) { 36 | MAXEDGES = maxedges; 37 | MAXNODES = maxnodes; 38 | MAXSOLS = maxsols; 39 | adjlist = new word_t[2*MAXNODES]; // index into links array 40 | links = new link[MAXEDGES]; 41 | compressu = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 42 | compressv = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 43 | sharedmem = false; 44 | sols = new proof[MAXSOLS+1]; // extra one for current path 45 | visited.clear(); 46 | } 47 | 48 | ~graph() { 49 | if (!sharedmem) { 50 | delete[] adjlist; 51 | delete[] links; 52 | } 53 | delete[] sols; 54 | } 55 | 56 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(2*maxnodes) { 57 | MAXEDGES = maxedges; 58 | MAXNODES = maxnodes; 59 | MAXSOLS = maxsols; 60 | adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array 61 | links = new (bytes += sizeof(word_t[2*MAXNODES])) link[MAXEDGES]; 62 | compressu = compressbits ? new compressor(EDGEBITS, compressbits, bytes += sizeof(link[MAXEDGES])) : 0; 63 | compressv = compressbits ? new compressor(EDGEBITS, compressbits, bytes + compressu->bytes()) : 0; 64 | sharedmem = true; 65 | sols = new proof[MAXSOLS+1]; 66 | visited.clear(); 67 | } 68 | 69 | // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits 70 | uint64_t bytes() { 71 | return sizeof(word_t[2*MAXNODES]) + sizeof(link[MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0); 72 | } 73 | 74 | void reset() { 75 | memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES])); 76 | if (compressu) { 77 | compressu->reset(); 78 | compressv->reset(); 79 | } 80 | resetcounts(); 81 | } 82 | 83 | void resetcounts() { 84 | nlinks = nsols = 0; 85 | // visited has entries set only during cycles() call 86 | } 87 | 88 | static int nonce_cmp(const void *a, const void *b) { 89 | return *(word_t *)a - *(word_t *)b; 90 | } 91 | 92 | void cycles_with_link(u32 len, word_t u, word_t dest) { 93 | if (visited.test(u)) 94 | return; 95 | if (u == dest) { 96 | print_log(" %d-cycle found\n", len); 97 | if (len == PROOFSIZE && nsols < MAXSOLS) { 98 | qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp); 99 | memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0])); 100 | } 101 | return; 102 | } 103 | if (len == PROOFSIZE) 104 | return; 105 | word_t au1 = adjlist[u]; 106 | if (au1 != NIL) { 107 | visited.set(u); 108 | for (; au1 != NIL; au1 = links[au1].next) { 109 | sols[nsols][len] = au1; 110 | cycles_with_link(len+1, links[au1].to, dest); 111 | } 112 | visited.reset(u); 113 | } 114 | } 115 | 116 | void add_edge(word_t u, word_t v, u32 dir) { 117 | assert(u < MAXNODES); 118 | assert(v < MAXNODES); 119 | v += MAXNODES; // distinguish partitions 120 | if (dir) { 121 | u32 tmp = v; 122 | v = u; 123 | u = tmp; 124 | } 125 | if (adjlist[v] != NIL) { // possibly part of a cycle 126 | sols[nsols][0] = nlinks; 127 | assert(!visited.test(u)); 128 | cycles_with_link(1, v, u); 129 | } 130 | word_t ulink = nlinks++; 131 | assert(ulink != NIL); // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1 132 | assert(ulink < MAXEDGES); 133 | links[ulink].next = adjlist[u]; 134 | links[adjlist[u] = ulink].to = v; 135 | } 136 | 137 | void add_compress_edge(word_t u, word_t v) { 138 | assert( (u&1) == (v&1)); 139 | add_edge(compressu->compress(u) >> 1, compressv->compress(v) >> 1, u&1); 140 | } 141 | }; 142 | -------------------------------------------------------------------------------- /src/cuckarood/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckarood Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | #include 7 | 8 | #ifndef HEADERLEN 9 | // arbitrary length of header hashed into siphash key 10 | #define HEADERLEN 80 11 | #endif 12 | 13 | typedef solver_ctx SolverCtx; 14 | 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 16 | char* header, 17 | int header_length, 18 | u32 nonce, 19 | u32 range, 20 | SolverSolutions *solutions, 21 | SolverStats *stats 22 | ) 23 | { 24 | u64 time0, time1; 25 | u32 timems; 26 | u32 sumnsols = 0; 27 | 28 | for (u32 r = 0; r < range; r++) { 29 | time0 = timestamp(); 30 | ctx->setheadernonce(header, header_length, nonce + r); 31 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3); 32 | u32 nsols = ctx->solve(); 33 | time1 = timestamp(); 34 | timems = (time1 - time0) / 1000000; 35 | print_log("Time: %d ms\n", timems); 36 | 37 | for (unsigned s = 0; s < nsols; s++) { 38 | print_log("Solution"); 39 | word_t *prf = &ctx->sols[s * PROOFSIZE]; 40 | for (u32 i = 0; i < PROOFSIZE; i++) 41 | print_log(" %jx", (uintmax_t)prf[i]); 42 | print_log("\n"); 43 | if (solutions != NULL){ 44 | solutions->edge_bits = EDGEBITS; 45 | solutions->num_sols++; 46 | solutions->sols[sumnsols+s].nonce = nonce + r; 47 | for (u32 i = 0; i < PROOFSIZE; i++) 48 | solutions->sols[sumnsols+s].proof[i] = (u64) prf[i]; 49 | } 50 | int pow_rc = verify(prf, ctx->trimmer.sip_keys); 51 | if (pow_rc == POW_OK) { 52 | print_log("Verified with cyclehash "); 53 | unsigned char cyclehash[32]; 54 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 55 | for (int i=0; i<32; i++) 56 | print_log("%02x", cyclehash[i]); 57 | print_log("\n"); 58 | } else { 59 | print_log("FAILED due to %s\n", errstr[pow_rc]); 60 | } 61 | } 62 | sumnsols += nsols; 63 | if (stats != NULL) { 64 | stats->device_id = 0; 65 | stats->edge_bits = EDGEBITS; 66 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 67 | stats->last_start_time = time0; 68 | stats->last_end_time = time1; 69 | stats->last_solution_time = time1 - time0; 70 | } 71 | } 72 | print_log("%d total solutions\n", sumnsols); 73 | return sumnsols > 0; 74 | } 75 | 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 77 | if (params->nthreads == 0) params->nthreads = 1; 78 | if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68; 79 | 80 | SolverCtx* ctx = new SolverCtx(params->nthreads, 81 | params->ntrims, 82 | params->allrounds, 83 | params->showcycle, 84 | params->mutate_nonce); 85 | return ctx; 86 | } 87 | 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 89 | delete ctx; 90 | } 91 | 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 93 | ctx->abort(); 94 | } 95 | 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 97 | // not required in this solver 98 | } 99 | 100 | int main(int argc, char **argv) { 101 | u32 nthreads = 0; 102 | u32 ntrims = 0; 103 | u32 nonce = 0; 104 | u32 range = 1; 105 | #ifdef SAVEEDGES 106 | bool showcycle = 1; 107 | #else 108 | bool showcycle = 0; 109 | #endif 110 | char header[HEADERLEN]; 111 | u32 len; 112 | bool allrounds = false; 113 | int c; 114 | 115 | memset(header, 0, sizeof(header)); 116 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 117 | switch (c) { 118 | case 'a': 119 | allrounds = true; 120 | break; 121 | case 'h': 122 | len = strlen(optarg); 123 | assert(len <= sizeof(header)); 124 | memcpy(header, optarg, len); 125 | break; 126 | case 'x': 127 | len = strlen(optarg)/2; 128 | assert(len == sizeof(header)); 129 | for (u32 i=0; i 1) 160 | print_log("-%d", nonce+range-1); 161 | print_log(") with 50%% edges\n"); 162 | 163 | u64 sbytes = ctx->sharedbytes(); 164 | u32 tbytes = ctx->threadbytes(); 165 | int sunit,tunit; 166 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 167 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 168 | print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); 169 | print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); 170 | print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 171 | 172 | run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); 173 | 174 | destroy_solver_ctx(ctx); 175 | } 176 | -------------------------------------------------------------------------------- /src/cuckarood/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckarood.hpp" 5 | #include "graph.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef unsigned char u8; 13 | 14 | class cuckoo_ctx { 15 | public: 16 | siphash_keys sip_keys; 17 | graph cg; 18 | 19 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES2, NNODES1, MAX_SOLS, 0) { 20 | } 21 | 22 | ~cuckoo_ctx() { } 23 | 24 | u64 bytes() { 25 | return cg.bytes(); 26 | } 27 | 28 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 29 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 30 | setheader(headernonce, len, &sip_keys); 31 | cg.reset(); 32 | } 33 | 34 | void find_cycles() { 35 | u64 sips[EDGE_BLOCK_SIZE]; 36 | for (word_t block = 0; block < NEDGES2; block += EDGE_BLOCK_SIZE) { 37 | sipblock(sip_keys, block, sips); 38 | for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) { 39 | u64 edge = sips[i]; 40 | word_t u = edge & NODE1MASK; 41 | word_t v = (edge >> 32) & NODE1MASK; 42 | cg.add_edge(u, v, i&1); 43 | #ifdef SHOW 44 | word_t nonce = block + i; 45 | printf("%d add (%d,%d)\n", nonce,u,v+NNODES1); 46 | for (unsigned j=0; j::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to); 49 | if ((j+1) % NNODES1 == 0) 50 | printf("\n"); 51 | } 52 | #endif 53 | } 54 | } 55 | for (u32 s=0; s < cg.nsols; s++) { 56 | printf("Solution"); 57 | // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp); 58 | for (u32 j=0; j < PROOFSIZE; j++) { 59 | word_t nonce = cg.sols[s][j]; 60 | // u64 edge = sipblock(sip_keys, nonce, sips); 61 | // printf(" (%x,%x)", edge & NODE1MASK, (edge >> 32) & NODE1MASK); 62 | printf(" %x", nonce); 63 | } 64 | printf("\n"); 65 | int pow_rc = verify(cg.sols[s], sip_keys); 66 | if (pow_rc == POW_OK) { 67 | printf("Verified with cyclehash "); 68 | unsigned char cyclehash[32]; 69 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0); 70 | for (int i=0; i<32; i++) 71 | printf("%02x", cyclehash[i]); 72 | printf("\n"); 73 | } else { 74 | printf("FAILED due to %s\n", errstr[pow_rc]); 75 | } 76 | 77 | } 78 | } 79 | }; 80 | 81 | // arbitrary length of header hashed into siphash key 82 | #define HEADERLEN 80 83 | 84 | int main(int argc, char **argv) { 85 | char header[HEADERLEN]; 86 | memset(header, 0, HEADERLEN); 87 | int c; 88 | u32 nonce = 0; 89 | u32 range = 1; 90 | u64 time0, time1; 91 | u32 timems; 92 | 93 | while ((c = getopt (argc, argv, "h:n:r:")) != -1) { 94 | switch (c) { 95 | case 'h': 96 | memcpy(header, optarg, strlen(optarg)); 97 | break; 98 | case 'n': 99 | nonce = atoi(optarg); 100 | break; 101 | case 'r': 102 | range = atoi(optarg); 103 | break; 104 | } 105 | } 106 | printf("Looking for %d-cycle on cuckarood%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce); 107 | if (range > 1) 108 | printf("-%d", nonce+range-1); 109 | printf("), "); 110 | cuckoo_ctx ctx(header, sizeof(header), nonce); 111 | u64 bytes = ctx.bytes(); 112 | int unit; 113 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 114 | printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]); 115 | 116 | for (u32 r = 0; r < range; r++) { 117 | time0 = timestamp(); 118 | ctx.setheadernonce(header, sizeof(header), nonce + r); 119 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 120 | ctx.find_cycles(); 121 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 122 | printf("Time: %d ms\n", timems); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/cuckaroom/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c 15 | NVCC ?= nvcc -std=c++11 16 | 17 | all : simpletest # cpu miner not working yet # meantest 18 | 19 | simpletest: simple19 20 | ./simple19 -n 64 21 | 22 | meantest: mean29x4 23 | ./mean29x4 -n 23 -t 4 -s 24 | 25 | simple19: ../crypto/siphash.hpp cuckaroom.hpp bitmap.hpp graph.hpp simple.cpp Makefile 26 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC) 27 | 28 | simple29: ../crypto/siphash.hpp cuckaroom.hpp bitmap.hpp graph.hpp simple.cpp Makefile 29 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC) 30 | 31 | mean19x1: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 32 | $(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 33 | 34 | mean19x4: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 35 | $(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 36 | 37 | mean19x8: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 38 | $(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 39 | 40 | mean29x4: cuckaroom.hpp bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 41 | $(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 42 | 43 | mean29x8: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 44 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 45 | 46 | mean29x8s: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 47 | $(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 48 | 49 | mean29x1: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 50 | $(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 51 | 52 | mean30x1: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 53 | $(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 54 | 55 | mean30x8: cuckaroom.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 56 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 57 | 58 | cuda19: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile 59 | $(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 60 | 61 | oldcuda29: ../crypto/siphash.cuh compress.hpp graph.hpp mean.cu kernel.cuh Makefile 62 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 63 | 64 | cuda29: ../crypto/siphash.cuh compress.hpp graph.hpp meaner.cu Makefile 65 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 meaner.cu $(BLAKE_2B_SRC) 66 | 67 | cuda66: ../crypto/siphash.cuh compress.hpp graph.hpp meaner66.cu Makefile 68 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 meaner66.cu $(BLAKE_2B_SRC) 69 | 70 | cuda29v: ../crypto/siphash.cuh compress.hpp graph.hpp meaner.cu Makefile 71 | $(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 meaner.cu $(BLAKE_2B_SRC) 72 | 73 | old66v: ../crypto/siphash.cuh compress.hpp graph.hpp old66.cu Makefile 74 | $(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 old66.cu $(BLAKE_2B_SRC) 75 | 76 | cuda66v: ../crypto/siphash.cuh compress.hpp graph.hpp meaner66.cu Makefile 77 | $(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 meaner66.cu $(BLAKE_2B_SRC) 78 | -------------------------------------------------------------------------------- /src/cuckaroom/bitmap.hpp: -------------------------------------------------------------------------------- 1 | template 2 | class bitmap { 3 | public: 4 | word_t SIZE; 5 | word_t BITMAP_WORDS; 6 | #ifdef ATOMIC 7 | typedef std::atomic aword_t; 8 | #else 9 | typedef word_t aword_t; 10 | #endif 11 | aword_t *bits; 12 | const u32 BITS_PER_WORD = sizeof(word_t) * 8; 13 | 14 | bitmap(word_t size) { 15 | SIZE = size; 16 | BITMAP_WORDS = SIZE / BITS_PER_WORD; 17 | bits = new aword_t[BITMAP_WORDS]; 18 | assert(bits != 0); 19 | } 20 | ~bitmap() { 21 | freebits(); 22 | } 23 | void freebits() { 24 | delete[] bits; 25 | bits = 0; 26 | } 27 | void clear() { 28 | assert(bits); 29 | memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t)); 30 | } 31 | void prefetch(u32 u) const { 32 | #ifdef PREFETCH 33 | __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0); 34 | #endif 35 | } 36 | void set(u32 u) { 37 | u32 idx = u / BITS_PER_WORD; 38 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 39 | #ifdef ATOMIC 40 | std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed); 41 | #else 42 | bits[idx] |= bit; 43 | #endif 44 | } 45 | void reset(u32 u) { 46 | u32 idx = u / BITS_PER_WORD; 47 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 48 | #ifdef ATOMIC 49 | std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed); 50 | #else 51 | bits[idx] &= ~bit; 52 | #endif 53 | } 54 | bool test(u32 u) const { 55 | u32 idx = u / BITS_PER_WORD; 56 | u32 bit = u % BITS_PER_WORD; 57 | #ifdef ATOMIC 58 | return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1; 59 | #else 60 | return (bits[idx] >> bit) & 1; 61 | #endif 62 | } 63 | word_t block(u32 n) const { 64 | u32 idx = n / BITS_PER_WORD; 65 | return bits[idx]; 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /src/cuckaroom/compress.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // compressor for cuckaroom nodes where edgetrimming 4 | // has left at most a fraction 2^-compressbits nodes in each partition 5 | template 6 | class compressor { 7 | public: 8 | u32 NODEBITS; 9 | u32 SHIFTBITS; 10 | u32 SIZEBITS; 11 | word_t SIZE; 12 | word_t SIZE2; 13 | word_t MASK; 14 | word_t MASK2; 15 | word_t nnodes; 16 | const word_t NIL = ~(word_t)0; 17 | word_t *nodes; 18 | bool sharedmem; 19 | 20 | compressor(u32 nodebits, u32 compressbits, char *bytes) { 21 | NODEBITS = nodebits; 22 | SHIFTBITS = compressbits; 23 | SIZEBITS = NODEBITS-compressbits; 24 | SIZE = (word_t)1 << SIZEBITS; 25 | SIZE2 = (word_t)2 << SIZEBITS; 26 | nodes = new (bytes) word_t[SIZE2]; 27 | sharedmem = true; 28 | MASK = SIZE-1; 29 | MASK2 = SIZE2-1; 30 | } 31 | 32 | compressor(u32 nodebits, u32 compressbits) { 33 | NODEBITS = nodebits; 34 | SHIFTBITS = compressbits; 35 | SIZEBITS = NODEBITS-compressbits; 36 | SIZE = (word_t)1 << SIZEBITS; 37 | SIZE2 = (word_t)2 << SIZEBITS; 38 | nodes = new word_t[SIZE2]; 39 | sharedmem = false; 40 | MASK = SIZE-1; 41 | MASK2 = SIZE2-1; 42 | } 43 | 44 | ~compressor() { 45 | if (!sharedmem) 46 | delete[] nodes; 47 | } 48 | 49 | uint64_t bytes() { 50 | return sizeof(word_t[SIZE2]); 51 | } 52 | 53 | void reset() { 54 | memset(nodes, (char)NIL, sizeof(word_t[SIZE2])); 55 | nnodes = 0; 56 | } 57 | 58 | word_t compress(word_t u) { 59 | word_t ui = u >> SHIFTBITS; 60 | for (; ; ui = (ui+1) & MASK2) { 61 | word_t cu = nodes[ui]; 62 | if (cu == NIL) { 63 | if (nnodes >= SIZE) { 64 | print_log("NODE OVERFLOW at %x\n", u); 65 | return 0; 66 | } 67 | nodes[ui] = u << SIZEBITS | nnodes; 68 | return nnodes++; 69 | } 70 | if ((cu & ~MASK) == u << SIZEBITS) { 71 | return cu & MASK; 72 | } 73 | } 74 | } 75 | }; 76 | -------------------------------------------------------------------------------- /src/cuckaroom/cuckaroom.c: -------------------------------------------------------------------------------- 1 | // Cuckaroom Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckaroom.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 80 13 | 14 | int main(int argc, char **argv) { 15 | const char *header = ""; 16 | int nonce = 0; 17 | int c; 18 | while ((c = getopt (argc, argv, "h:n:")) != -1) { 19 | switch (c) { 20 | case 'h': 21 | header = optarg; 22 | break; 23 | case 'n': 24 | nonce = atoi(optarg); 25 | break; 26 | } 27 | } 28 | char headernonce[HEADERLEN]; 29 | u32 hdrlen = strlen(header); 30 | memcpy(headernonce, header, hdrlen); 31 | memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen); 32 | ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); 33 | siphash_keys keys; 34 | setheader(headernonce, sizeof(headernonce), &keys); 35 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3); 36 | printf("Verifying size %d proof for cuckaroom%d(\"%s\",%d)\n", 37 | PROOFSIZE, EDGEBITS, header, nonce); 38 | for (int nsols=0; scanf(" Solution") == 0; nsols++) { 39 | word_t nonces[PROOFSIZE]; 40 | for (int n = 0; n < PROOFSIZE; n++) { 41 | uint64_t nonce; 42 | int nscan = scanf(" %" SCNx64, &nonce); 43 | assert(nscan == 1); 44 | nonces[n] = nonce; 45 | } 46 | int pow_rc = verify(nonces, &keys); 47 | if (pow_rc == POW_OK) { 48 | printf("Verified with cyclehash "); 49 | unsigned char cyclehash[32]; 50 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0); 51 | for (int i=0; i<32; i++) 52 | printf("%02x", cyclehash[i]); 53 | printf("\n"); 54 | } else { 55 | printf("FAILED due to %s\n", errstr[pow_rc]); 56 | } 57 | } 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /src/cuckaroom/cuckaroom.hpp: -------------------------------------------------------------------------------- 1 | // Cuckaroom Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include // for types uint32_t,uint64_t 5 | #include // for functions strlen, memset 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "../crypto/blake2.h" 11 | #include "../crypto/siphash.hpp" 12 | 13 | // save some keystrokes since i'm a lazy typer 14 | typedef uint32_t u32; 15 | typedef uint64_t u64; 16 | 17 | #ifndef MAX_SOLS 18 | #define MAX_SOLS 4 19 | #endif 20 | 21 | #ifndef EDGE_BLOCK_BITS 22 | #define EDGE_BLOCK_BITS 6 23 | #endif 24 | #define EDGE_BLOCK_SIZE (1 << EDGE_BLOCK_BITS) 25 | #define EDGE_BLOCK_MASK (EDGE_BLOCK_SIZE - 1) 26 | 27 | // proof-of-work parameters 28 | #ifndef EDGEBITS 29 | // the main parameter is the number of bits in an edge index, 30 | // i.e. the 2-log of the number of edges 31 | #define EDGEBITS 29 32 | #endif 33 | #ifndef PROOFSIZE 34 | // the next most important parameter is the (even) length 35 | // of the cycle to be found. a minimum of 12 is recommended 36 | #define PROOFSIZE 42 37 | #endif 38 | 39 | #if EDGEBITS > 30 40 | typedef uint64_t word_t; 41 | #elif EDGEBITS > 14 42 | typedef u32 word_t; 43 | #else // if EDGEBITS <= 14 44 | typedef uint16_t word_t; 45 | #endif 46 | 47 | // number of edges 48 | #define NEDGES ((word_t)1 << EDGEBITS) 49 | #define EDGEMASK ((word_t)NEDGES - 1) 50 | #define NNODES NEDGES 51 | // used to mask siphash output 52 | #define NODEMASK ((word_t)NNODES - 1) 53 | 54 | // Common Solver parameters, to return to caller 55 | struct SolverParams { 56 | u32 nthreads = 0; 57 | u32 ntrims = 0; 58 | bool showcycle; 59 | bool allrounds; 60 | bool mutate_nonce = 1; 61 | bool cpuload = 1; 62 | 63 | // Common cuda params 64 | u32 device = 0; 65 | 66 | // Cuda-lean specific params 67 | u32 blocks = 0; 68 | u32 tpb = 0; 69 | 70 | // Cuda-mean specific params 71 | u32 expand = 0; 72 | u32 genablocks = 0; 73 | u32 genatpb = 0; 74 | u32 genbtpb = 0; 75 | u32 trimtpb = 0; 76 | u32 tailtpb = 0; 77 | u32 recoverblocks = 0; 78 | u32 recovertpb = 0; 79 | }; 80 | 81 | // Solutions result structs to be instantiated by caller, 82 | // and filled by solver if desired 83 | struct Solution { 84 | u64 id = 0; 85 | u64 nonce = 0; 86 | u64 proof[PROOFSIZE]; 87 | }; 88 | 89 | struct SolverSolutions { 90 | u32 edge_bits = 0; 91 | u32 num_sols = 0; 92 | Solution sols[MAX_SOLS]; 93 | }; 94 | 95 | #define MAX_NAME_LEN 256 96 | 97 | // last error reason, to be picked up by stats 98 | // to be returned to caller 99 | char LAST_ERROR_REASON[MAX_NAME_LEN]; 100 | 101 | // Solver statistics, to be instantiated by caller 102 | // and filled by solver if desired 103 | struct SolverStats { 104 | u32 device_id = 0; 105 | u32 edge_bits = 0; 106 | char plugin_name[MAX_NAME_LEN]; // will be filled in caller-side 107 | char device_name[MAX_NAME_LEN]; 108 | bool has_errored = false; 109 | char error_reason[MAX_NAME_LEN]; 110 | u32 iterations = 0; 111 | u64 last_start_time = 0; 112 | u64 last_end_time = 0; 113 | u64 last_solution_time = 0; 114 | }; 115 | 116 | enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_TOO_BIG, POW_TOO_SMALL, POW_NON_MATCHING, POW_BRANCH, POW_DEAD_END, POW_SHORT_CYCLE, POW_UNBALANCED}; 117 | const char *errstr[] = { "OK", "wrong header length", "edge too big", "edges not ascending", "endpoints don't match up", "branch in cycle", "cycle dead ends", "cycle too short", "edges not balanced"}; 118 | 119 | // fills buffer with EDGE_BLOCK_SIZE siphash outputs for block containing edge in cuckaroo graph 120 | // return siphash output for given edge 121 | u64 sipblock(siphash_keys &keys, const word_t edge, u64 *buf) { 122 | siphash_state<> shs(keys); 123 | word_t edge0 = edge & ~EDGE_BLOCK_MASK; 124 | for (u32 i=0; i < EDGE_BLOCK_SIZE; i++) { 125 | shs.hash24(edge0 + i); 126 | buf[i] = shs.xor_lanes(); 127 | } 128 | for (u32 i=EDGE_BLOCK_MASK; i; i--) 129 | buf[i-1] ^= buf[i]; 130 | return buf[edge & EDGE_BLOCK_MASK]; 131 | } 132 | 133 | // verify that edges are ascending and form a cycle in header-generated graph 134 | int verify(word_t edges[PROOFSIZE], siphash_keys &keys) { 135 | word_t xorfrom = 0, xorto = 0; 136 | u64 sips[EDGE_BLOCK_SIZE]; 137 | word_t from[PROOFSIZE], to[PROOFSIZE],visited[PROOFSIZE]; 138 | 139 | for (u32 n = 0; n < PROOFSIZE; n++) { 140 | if (edges[n] > EDGEMASK) 141 | return POW_TOO_BIG; 142 | if (n && edges[n] <= edges[n-1]) 143 | return POW_TOO_SMALL; 144 | u64 edge = sipblock(keys, edges[n], sips); 145 | xorfrom ^= from[n] = edge & EDGEMASK; 146 | xorto ^= to [n] = (edge >> 32) & EDGEMASK; 147 | visited[n] = false; 148 | } 149 | if (xorfrom != xorto) // optional check for obviously bad proofs 150 | return POW_NON_MATCHING; 151 | u32 n = 0, i = 0; 152 | do { // follow cycle 153 | if (visited[i]) 154 | return POW_BRANCH; 155 | visited[i] = true; 156 | u32 nexti; 157 | for (nexti = 0; from[nexti] != to[i]; ) // find outgoing edge meeting incoming edge i 158 | if (++nexti == PROOFSIZE) 159 | return POW_DEAD_END; 160 | i = nexti; 161 | n++; 162 | } while (i != 0); // must cycle back to start or we would have found branch 163 | return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE; 164 | } 165 | 166 | // convenience function for extracting siphash keys from header 167 | void setheader(const char *header, const u32 headerlen, siphash_keys *keys) { 168 | char hdrkey[32]; 169 | // SHA256((unsigned char *)header, headerlen, (unsigned char *)hdrkey); 170 | blake2b((void *)hdrkey, sizeof(hdrkey), (const void *)header, headerlen, 0, 0); 171 | keys->setkeys(hdrkey); 172 | } 173 | 174 | u64 timestamp() { 175 | using namespace std::chrono; 176 | high_resolution_clock::time_point now = high_resolution_clock::now(); 177 | auto dn = now.time_since_epoch(); 178 | return dn.count(); 179 | } 180 | 181 | ///////////////////////////////////////////////////////////////// 182 | // Declarations to make it easier for callers to link as required 183 | ///////////////////////////////////////////////////////////////// 184 | 185 | #ifndef C_CALL_CONVENTION 186 | #define C_CALL_CONVENTION 0 187 | #endif 188 | 189 | // convention to prepend to called functions 190 | #if C_CALL_CONVENTION 191 | #define CALL_CONVENTION extern "C" 192 | #else 193 | #define CALL_CONVENTION 194 | #endif 195 | 196 | // Ability to squash printf output at compile time, if desired 197 | #ifndef SQUASH_OUTPUT 198 | #define SQUASH_OUTPUT 0 199 | #endif 200 | 201 | void print_log(const char *fmt, ...) { 202 | if (SQUASH_OUTPUT) return; 203 | va_list args; 204 | va_start(args, fmt); 205 | vprintf(fmt, args); 206 | va_end(args); 207 | } 208 | ////////////////////////////////////////////////////////////////// 209 | // END caller QOL 210 | ////////////////////////////////////////////////////////////////// 211 | -------------------------------------------------------------------------------- /src/cuckaroom/graph.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitmap.hpp" 5 | #include "compress.hpp" 6 | #include 7 | 8 | typedef word_t proof[PROOFSIZE]; 9 | 10 | // cuck(ar)oom graph with given limit on number of edges (and on single partition nodes) 11 | template 12 | class graph { 13 | public: 14 | // terminates adjacency lists 15 | const word_t NIL = ~(word_t)0; 16 | 17 | struct link { // element of adjacency list 18 | word_t next; 19 | word_t to; 20 | }; 21 | 22 | word_t MAXEDGES; 23 | word_t MAXNODES; 24 | word_t nlinks; // aka halfedges, twice number of edges 25 | word_t *adjlist; // index into links array 26 | link *links; 27 | bool sharedmem; 28 | compressor *compress; 29 | bitmap visited; 30 | u32 MAXSOLS; 31 | proof *sols; 32 | u32 nsols; 33 | 34 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxnodes) { 35 | MAXEDGES = maxedges; 36 | MAXNODES = maxnodes; 37 | MAXSOLS = maxsols; 38 | adjlist = new word_t[MAXNODES]; // index into links array 39 | links = new link[MAXEDGES]; 40 | compress = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 41 | sharedmem = false; 42 | sols = new proof[MAXSOLS+1]; // extra one for current path 43 | visited.clear(); 44 | } 45 | 46 | ~graph() { 47 | if (!sharedmem) { 48 | delete[] adjlist; 49 | delete[] links; 50 | } 51 | delete[] sols; 52 | } 53 | 54 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxnodes) { 55 | MAXEDGES = maxedges; 56 | MAXNODES = maxnodes; 57 | MAXSOLS = maxsols; 58 | adjlist = new (bytes) word_t[MAXNODES]; // index into links array 59 | links = new (bytes += sizeof(word_t[MAXNODES])) link[MAXEDGES]; 60 | compress = compressbits ? new compressor(EDGEBITS, compressbits, bytes += sizeof(link[MAXEDGES])) : 0; 61 | sharedmem = true; 62 | sols = new proof[MAXSOLS+1]; 63 | visited.clear(); 64 | } 65 | 66 | // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits 67 | uint64_t bytes() { 68 | return sizeof(word_t[MAXNODES]) + sizeof(link[MAXEDGES]) + (compress ? compress->bytes() : 0); 69 | } 70 | 71 | void reset() { 72 | memset(adjlist, (char)NIL, sizeof(word_t[MAXNODES])); 73 | if (compress) 74 | compress->reset(); 75 | resetcounts(); 76 | } 77 | 78 | void resetcounts() { 79 | nlinks = nsols = 0; 80 | // visited has entries set only during cycles() call 81 | } 82 | 83 | static int nonce_cmp(const void *a, const void *b) { 84 | return *(word_t *)a - *(word_t *)b; 85 | } 86 | 87 | void cycles_with_link(u32 len, word_t u, word_t dest) { 88 | if (visited.test(u)) 89 | return; 90 | if (u == dest) { 91 | print_log(" %d-cycle found\n", len); 92 | if (len == PROOFSIZE && nsols < MAXSOLS) { 93 | qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp); 94 | memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0])); 95 | } 96 | return; 97 | } 98 | if (len == PROOFSIZE) 99 | return; 100 | word_t au1 = adjlist[u]; 101 | if (au1 != NIL) { 102 | visited.set(u); 103 | for (; au1 != NIL; au1 = links[au1].next) { 104 | sols[nsols][len] = au1; 105 | cycles_with_link(len+1, links[au1].to, dest); 106 | } 107 | visited.reset(u); 108 | } 109 | } 110 | 111 | bool add_edge(word_t from, word_t to) { 112 | assert(from < MAXNODES); 113 | assert(to < MAXNODES); 114 | if (from == to || adjlist[to] != NIL) { // possibly part of a cycle 115 | sols[nsols][0] = nlinks; 116 | assert(!visited.test(from)); 117 | cycles_with_link(1, to, from); 118 | } 119 | word_t link = nlinks++; 120 | assert(link != NIL); // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1 121 | assert(link < MAXEDGES); 122 | #ifndef ALLOWDUPES 123 | for (word_t au = adjlist[from]; au != NIL; au = links[au].next) 124 | if (links[au].to == to) return false; // drop duplicate edge 125 | #endif 126 | links[link].next = adjlist[from]; 127 | links[adjlist[from] = link].to = to; 128 | return true; 129 | } 130 | 131 | bool add_compress_edge(word_t from, word_t to) { 132 | return add_edge(compress->compress(from), compress->compress(to)); 133 | } 134 | }; 135 | -------------------------------------------------------------------------------- /src/cuckaroom/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckaroom Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | #include 7 | 8 | #ifndef HEADERLEN 9 | // arbitrary length of header hashed into siphash key 10 | #define HEADERLEN 80 11 | #endif 12 | 13 | typedef solver_ctx SolverCtx; 14 | 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 16 | char* header, 17 | int header_length, 18 | u32 nonce, 19 | u32 range, 20 | SolverSolutions *solutions, 21 | SolverStats *stats 22 | ) 23 | { 24 | u64 time0, time1; 25 | u32 timems; 26 | u32 sumnsols = 0; 27 | 28 | for (u32 r = 0; r < range; r++) { 29 | time0 = timestamp(); 30 | ctx->setheadernonce(header, header_length, nonce + r); 31 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3); 32 | u32 nsols = ctx->solve(); 33 | time1 = timestamp(); 34 | timems = (time1 - time0) / 1000000; 35 | print_log("Time: %d ms\n", timems); 36 | 37 | for (unsigned s = 0; s < nsols; s++) { 38 | print_log("Solution"); 39 | word_t *prf = &ctx->sols[s * PROOFSIZE]; 40 | for (u32 i = 0; i < PROOFSIZE; i++) 41 | print_log(" %jx", (uintmax_t)prf[i]); 42 | print_log("\n"); 43 | if (solutions != NULL){ 44 | solutions->edge_bits = EDGEBITS; 45 | solutions->num_sols++; 46 | solutions->sols[sumnsols+s].nonce = nonce + r; 47 | for (u32 i = 0; i < PROOFSIZE; i++) 48 | solutions->sols[sumnsols+s].proof[i] = (u64) prf[i]; 49 | } 50 | int pow_rc = verify(prf, ctx->trimmer.sip_keys); 51 | if (pow_rc == POW_OK) { 52 | print_log("Verified with cyclehash "); 53 | unsigned char cyclehash[32]; 54 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 55 | for (int i=0; i<32; i++) 56 | print_log("%02x", cyclehash[i]); 57 | print_log("\n"); 58 | } else { 59 | print_log("FAILED due to %s\n", errstr[pow_rc]); 60 | } 61 | } 62 | sumnsols += nsols; 63 | if (stats != NULL) { 64 | stats->device_id = 0; 65 | stats->edge_bits = EDGEBITS; 66 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 67 | stats->last_start_time = time0; 68 | stats->last_end_time = time1; 69 | stats->last_solution_time = time1 - time0; 70 | } 71 | } 72 | print_log("%d total solutions\n", sumnsols); 73 | return sumnsols > 0; 74 | } 75 | 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 77 | if (params->nthreads == 0) params->nthreads = 1; 78 | if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68; 79 | 80 | SolverCtx* ctx = new SolverCtx(params->nthreads, 81 | params->ntrims, 82 | params->allrounds, 83 | params->showcycle, 84 | params->mutate_nonce); 85 | return ctx; 86 | } 87 | 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 89 | delete ctx; 90 | } 91 | 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 93 | ctx->abort(); 94 | } 95 | 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 97 | // not required in this solver 98 | } 99 | 100 | int main(int argc, char **argv) { 101 | u32 nthreads = 0; 102 | u32 ntrims = 0; 103 | u32 nonce = 0; 104 | u32 range = 1; 105 | #ifdef SAVEEDGES 106 | bool showcycle = 1; 107 | #else 108 | bool showcycle = 0; 109 | #endif 110 | char header[HEADERLEN]; 111 | u32 len; 112 | bool allrounds = false; 113 | int c; 114 | 115 | memset(header, 0, sizeof(header)); 116 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 117 | switch (c) { 118 | case 'a': 119 | allrounds = true; 120 | break; 121 | case 'h': 122 | len = strlen(optarg); 123 | assert(len <= sizeof(header)); 124 | memcpy(header, optarg, len); 125 | break; 126 | case 'x': 127 | len = strlen(optarg)/2; 128 | assert(len == sizeof(header)); 129 | for (u32 i=0; i 1) 160 | print_log("-%d", nonce+range-1); 161 | print_log(") with 50%% edges\n"); 162 | 163 | u64 sbytes = ctx->sharedbytes(); 164 | u32 tbytes = ctx->threadbytes(); 165 | int sunit,tunit; 166 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 167 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 168 | print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); 169 | print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); 170 | print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 171 | 172 | run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); 173 | 174 | destroy_solver_ctx(ctx); 175 | } 176 | -------------------------------------------------------------------------------- /src/cuckaroom/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuckaroom Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckaroom.hpp" 5 | #include "graph.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef unsigned char u8; 13 | 14 | class cuckoo_ctx { 15 | public: 16 | siphash_keys sip_keys; 17 | graph cg; 18 | 19 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES, NNODES, MAX_SOLS, 0) { 20 | } 21 | 22 | ~cuckoo_ctx() { } 23 | 24 | u64 bytes() { 25 | return cg.bytes(); 26 | } 27 | 28 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 29 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 30 | setheader(headernonce, len, &sip_keys); 31 | cg.reset(); 32 | } 33 | 34 | void find_cycles() { 35 | u64 sips[EDGE_BLOCK_SIZE]; 36 | for (word_t block = 0; block < NEDGES; block += EDGE_BLOCK_SIZE) { 37 | sipblock(sip_keys, block, sips); 38 | for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) { 39 | u64 edge = sips[i]; 40 | word_t u = edge & NODEMASK; 41 | word_t v = (edge >> 32) & NODEMASK; 42 | cg.add_edge(u, v); 43 | #ifdef SHOW 44 | word_t nonce = block + i; 45 | printf("%d add (%d,%d)\n", nonce,u,v+NNODES); 46 | for (unsigned j=0; j::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to); 49 | if ((j+1) % NNODES == 0) 50 | printf("\n"); 51 | } 52 | #endif 53 | } 54 | } 55 | for (u32 s=0; s < cg.nsols; s++) { 56 | printf("Solution"); 57 | // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp); 58 | for (u32 j=0; j < PROOFSIZE; j++) { 59 | word_t nonce = cg.sols[s][j]; 60 | u64 edge = sipblock(sip_keys, nonce, sips); 61 | printf(" (%x,%x)", edge & NODEMASK, (edge >> 32) & NODEMASK); 62 | printf(" %x", nonce); 63 | } 64 | printf("\n"); 65 | int pow_rc = verify(cg.sols[s], sip_keys); 66 | if (pow_rc == POW_OK) { 67 | printf("Verified with cyclehash "); 68 | unsigned char cyclehash[32]; 69 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0); 70 | for (int i=0; i<32; i++) 71 | printf("%02x", cyclehash[i]); 72 | printf("\n"); 73 | } else { 74 | printf("FAILED due to %s\n", errstr[pow_rc]); 75 | } 76 | 77 | } 78 | } 79 | }; 80 | 81 | // arbitrary length of header hashed into siphash key 82 | #define HEADERLEN 80 83 | 84 | int main(int argc, char **argv) { 85 | char header[HEADERLEN]; 86 | memset(header, 0, HEADERLEN); 87 | int c; 88 | u32 nonce = 0; 89 | u32 range = 1; 90 | u64 time0, time1; 91 | u32 timems; 92 | 93 | while ((c = getopt (argc, argv, "h:n:r:")) != -1) { 94 | switch (c) { 95 | case 'h': 96 | memcpy(header, optarg, strlen(optarg)); 97 | break; 98 | case 'n': 99 | nonce = atoi(optarg); 100 | break; 101 | case 'r': 102 | range = atoi(optarg); 103 | break; 104 | } 105 | } 106 | printf("Looking for %d-cycle on cuckaroom%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce); 107 | if (range > 1) 108 | printf("-%d", nonce+range-1); 109 | printf("), "); 110 | cuckoo_ctx ctx(header, sizeof(header), nonce); 111 | u64 bytes = ctx.bytes(); 112 | int unit; 113 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 114 | printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]); 115 | 116 | for (u32 r = 0; r < range; r++) { 117 | time0 = timestamp(); 118 | ctx.setheadernonce(header, sizeof(header), nonce + r); 119 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 120 | ctx.find_cycles(); 121 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 122 | printf("Time: %d ms\n", timems); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/cuckarooz/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c 15 | NVCC ?= nvcc -std=c++11 16 | 17 | all : simpletest # cpu miner not working yet # meantest 18 | 19 | simpletest: simple19 20 | ./simple19 -n 64 21 | 22 | meantest: mean29x4 23 | ./mean29x4 -n 23 -t 4 -s 24 | 25 | simple19: ../crypto/siphash.hpp cuckarooz.hpp bitmap.hpp graph.hpp simple.cpp Makefile 26 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC) 27 | 28 | simple29: ../crypto/siphash.hpp cuckarooz.hpp bitmap.hpp graph.hpp simple.cpp Makefile 29 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC) 30 | 31 | mean19x1: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 32 | $(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 33 | 34 | mean19x4: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 35 | $(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 36 | 37 | mean19x8: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 38 | $(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 39 | 40 | mean29x4: cuckarooz.hpp bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 41 | $(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 42 | 43 | mean29x8: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 44 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 45 | 46 | mean29x8s: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 47 | $(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 48 | 49 | mean29x1: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 50 | $(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 51 | 52 | mean30x1: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 53 | $(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 54 | 55 | mean30x8: cuckarooz.hpp bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 56 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 57 | 58 | cuda19: ../crypto/siphash.cuh cuckarooz.hpp compress.hpp graph.hpp mean.cu Makefile 59 | $(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 60 | 61 | cuda29: ../crypto/siphash.cuh cuckarooz.hpp compress.hpp graph.hpp mean.cu Makefile 62 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 63 | -------------------------------------------------------------------------------- /src/cuckarooz/bitmap.hpp: -------------------------------------------------------------------------------- 1 | template 2 | class bitmap { 3 | public: 4 | word_t SIZE; 5 | word_t BITMAP_WORDS; 6 | #ifdef ATOMIC 7 | typedef std::atomic aword_t; 8 | #else 9 | typedef word_t aword_t; 10 | #endif 11 | aword_t *bits; 12 | const u32 BITS_PER_WORD = sizeof(word_t) * 8; 13 | 14 | bitmap(word_t size) { 15 | SIZE = size; 16 | BITMAP_WORDS = SIZE / BITS_PER_WORD; 17 | bits = new aword_t[BITMAP_WORDS]; 18 | assert(bits != 0); 19 | } 20 | ~bitmap() { 21 | freebits(); 22 | } 23 | void freebits() { 24 | delete[] bits; 25 | bits = 0; 26 | } 27 | void clear() { 28 | assert(bits); 29 | memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t)); 30 | } 31 | void prefetch(u32 u) const { 32 | #ifdef PREFETCH 33 | __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0); 34 | #endif 35 | } 36 | void set(u32 u) { 37 | u32 idx = u / BITS_PER_WORD; 38 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 39 | #ifdef ATOMIC 40 | std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed); 41 | #else 42 | bits[idx] |= bit; 43 | #endif 44 | } 45 | void reset(u32 u) { 46 | u32 idx = u / BITS_PER_WORD; 47 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 48 | #ifdef ATOMIC 49 | std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed); 50 | #else 51 | bits[idx] &= ~bit; 52 | #endif 53 | } 54 | bool test(u32 u) const { 55 | u32 idx = u / BITS_PER_WORD; 56 | u32 bit = u % BITS_PER_WORD; 57 | #ifdef ATOMIC 58 | return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1; 59 | #else 60 | return (bits[idx] >> bit) & 1; 61 | #endif 62 | } 63 | word_t block(u32 n) const { 64 | u32 idx = n / BITS_PER_WORD; 65 | return bits[idx]; 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /src/cuckarooz/compress.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // compressor for cuckarooz nodes where edgetrimming 4 | // has left at most a fraction 2^-compressbits nodes in each partition 5 | template 6 | class compressor { 7 | public: 8 | u32 NODEBITS; 9 | u32 SHIFTBITS; 10 | u32 SIZEBITS; 11 | word_t SIZE; 12 | word_t SIZE2; 13 | word_t MASK; 14 | word_t MASK2; 15 | word_t nnodes; 16 | const word_t NIL = ~(word_t)0; 17 | word_t *nodes; 18 | bool sharedmem; 19 | 20 | compressor(u32 nodebits, u32 compressbits, char *bytes) { 21 | NODEBITS = nodebits; 22 | SHIFTBITS = compressbits; 23 | SIZEBITS = NODEBITS-compressbits; 24 | SIZE = (word_t)1 << SIZEBITS; 25 | SIZE2 = (word_t)2 << SIZEBITS; 26 | nodes = new (bytes) word_t[SIZE2]; 27 | sharedmem = true; 28 | MASK = SIZE-1; 29 | MASK2 = SIZE2-1; 30 | } 31 | 32 | compressor(u32 nodebits, u32 compressbits) { 33 | NODEBITS = nodebits; 34 | SHIFTBITS = compressbits; 35 | SIZEBITS = NODEBITS-compressbits; 36 | SIZE = (word_t)1 << SIZEBITS; 37 | SIZE2 = (word_t)2 << SIZEBITS; 38 | nodes = new word_t[SIZE2]; 39 | sharedmem = false; 40 | MASK = SIZE-1; 41 | MASK2 = SIZE2-1; 42 | } 43 | 44 | ~compressor() { 45 | if (!sharedmem) 46 | delete[] nodes; 47 | } 48 | 49 | uint64_t bytes() { 50 | return sizeof(word_t[SIZE2]); 51 | } 52 | 53 | void reset() { 54 | memset(nodes, (char)NIL, sizeof(word_t[SIZE2])); 55 | nnodes = 0; 56 | } 57 | 58 | word_t compress(word_t u) { 59 | word_t ui = u >> SHIFTBITS; 60 | for (; ; ui = (ui+1) & MASK2) { 61 | word_t cu = nodes[ui]; 62 | if (cu == NIL) { 63 | if (nnodes >= SIZE) { 64 | print_log("NODE OVERFLOW at %x\n", u); 65 | return 0; 66 | } 67 | nodes[ui] = u << SIZEBITS | nnodes; 68 | return nnodes++; 69 | } 70 | if ((cu & ~MASK) == u << SIZEBITS) { 71 | return cu & MASK; 72 | } 73 | } 74 | } 75 | }; 76 | -------------------------------------------------------------------------------- /src/cuckarooz/cuckarooz.c: -------------------------------------------------------------------------------- 1 | // Cuckarooz Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckarooz.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 80 13 | 14 | int main(int argc, char **argv) { 15 | const char *header = ""; 16 | int nonce = 0; 17 | int c; 18 | while ((c = getopt (argc, argv, "h:n:")) != -1) { 19 | switch (c) { 20 | case 'h': 21 | header = optarg; 22 | break; 23 | case 'n': 24 | nonce = atoi(optarg); 25 | break; 26 | } 27 | } 28 | char headernonce[HEADERLEN]; 29 | u32 hdrlen = strlen(header); 30 | memcpy(headernonce, header, hdrlen); 31 | memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen); 32 | ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); 33 | siphash_keys keys; 34 | setheader(headernonce, sizeof(headernonce), &keys); 35 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3); 36 | printf("Verifying size %d proof for cuckarooz%d(\"%s\",%d)\n", 37 | PROOFSIZE, EDGEBITS, header, nonce); 38 | for (int nsols=0; scanf(" Solution") == 0; nsols++) { 39 | word_t nonces[PROOFSIZE]; 40 | for (int n = 0; n < PROOFSIZE; n++) { 41 | uint64_t nonce; 42 | int nscan = scanf(" %" SCNx64, &nonce); 43 | assert(nscan == 1); 44 | nonces[n] = nonce; 45 | } 46 | int pow_rc = verify(nonces, &keys); 47 | if (pow_rc == POW_OK) { 48 | printf("Verified with cyclehash "); 49 | unsigned char cyclehash[32]; 50 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0); 51 | for (int i=0; i<32; i++) 52 | printf("%02x", cyclehash[i]); 53 | printf("\n"); 54 | } else { 55 | printf("FAILED due to %s\n", errstr[pow_rc]); 56 | } 57 | } 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /src/cuckarooz/graph.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitmap.hpp" 5 | #include "compress.hpp" 6 | #include 7 | 8 | typedef word_t proof[PROOFSIZE]; 9 | 10 | // cuck(ar)ooz graph with given limit on number of edges (and on single partition nodes) 11 | template 12 | class graph { 13 | public: 14 | // terminates adjacency lists 15 | const word_t NIL = ~(word_t)0; 16 | 17 | struct link { // element of adjacency list 18 | word_t next; 19 | word_t to; 20 | }; 21 | 22 | word_t MAXEDGES; 23 | word_t MAXNODES; 24 | word_t nlinks; // aka halfedges, twice number of edges 25 | word_t *adjlist; // index into links array 26 | link *links; 27 | bool sharedmem; 28 | compressor *compress; 29 | bitmap visited; 30 | u32 MAXSOLS; 31 | proof *sols; 32 | u32 nsols; 33 | 34 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxnodes) { 35 | MAXEDGES = maxedges; 36 | MAXNODES = maxnodes; 37 | MAXSOLS = maxsols; 38 | adjlist = new word_t[MAXNODES]; // index into links array 39 | links = new link[2*MAXEDGES]; 40 | compress = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 41 | sharedmem = false; 42 | sols = new proof[MAXSOLS+1]; // extra one for current path 43 | visited.clear(); 44 | } 45 | 46 | ~graph() { 47 | if (!sharedmem) { 48 | delete[] adjlist; 49 | delete[] links; 50 | } 51 | delete[] sols; 52 | } 53 | 54 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxnodes) { 55 | MAXEDGES = maxedges; 56 | MAXNODES = maxnodes; 57 | MAXSOLS = maxsols; 58 | adjlist = new (bytes) word_t[MAXNODES]; // index into links array 59 | links = new (bytes += sizeof(word_t[MAXNODES])) link[2*MAXEDGES]; 60 | compress = compressbits ? new compressor(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES])) : 0; 61 | sharedmem = true; 62 | sols = new proof[MAXSOLS+1]; 63 | visited.clear(); 64 | } 65 | 66 | // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits 67 | uint64_t bytes() { 68 | return sizeof(word_t[MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compress ? compress->bytes() : 0); 69 | } 70 | 71 | void reset() { 72 | memset(adjlist, (char)NIL, sizeof(word_t[MAXNODES])); 73 | if (compress) 74 | compress->reset(); 75 | resetcounts(); 76 | } 77 | 78 | void resetcounts() { 79 | nlinks = nsols = 0; 80 | // visited has entries set only during cycles() call 81 | } 82 | 83 | static int nonce_cmp(const void *a, const void *b) { 84 | return *(word_t *)a - *(word_t *)b; 85 | } 86 | 87 | void cycles_with_link(u32 len, word_t u, word_t dest) { 88 | if (visited.test(u)) 89 | return; 90 | if (u == dest) { 91 | print_log(" %d-cycle found\n", len); 92 | if (len == PROOFSIZE && nsols < MAXSOLS) { 93 | qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp); 94 | memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0])); 95 | } 96 | return; 97 | } 98 | if (len == PROOFSIZE) 99 | return; 100 | word_t au1 = adjlist[u]; 101 | if (au1 != NIL) { 102 | visited.set(u); 103 | for (; au1 != NIL; au1 = links[au1].next) { 104 | sols[nsols][len] = au1/2; 105 | cycles_with_link(len+1, links[au1 ^ 1].to, dest); 106 | } 107 | visited.reset(u); 108 | } 109 | } 110 | 111 | bool add_edge(word_t u, word_t v) { 112 | #ifndef ALLOWDUPES 113 | for (word_t au = adjlist[u]; au != NIL; au = links[au].next) 114 | if (links[au^1].to == v) { nlinks += 2; return false; } // drop duplicate edge 115 | #endif 116 | assert(u < MAXNODES); 117 | assert(v < MAXNODES); 118 | if (u != v && adjlist[u] != NIL && adjlist[v] != NIL) { // possibly part of a cycle 119 | sols[nsols][0] = nlinks/2; 120 | assert(!visited.test(u)); 121 | cycles_with_link(1, v, u); 122 | } 123 | word_t ulink = nlinks++; 124 | word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit 125 | assert(vlink < 2*MAXEDGES); // assume MAXEDGES fits in word_t 126 | links[ulink].next = adjlist[u]; 127 | links[vlink].next = adjlist[v]; 128 | links[adjlist[u] = ulink].to = u; 129 | links[adjlist[v] = vlink].to = v; 130 | return true; 131 | } 132 | 133 | bool add_compress_edge(word_t u, word_t v) { 134 | return add_edge(compress->compress(u), compress->compress(v)); 135 | } 136 | }; 137 | -------------------------------------------------------------------------------- /src/cuckarooz/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckarooz Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | #include 7 | 8 | #ifndef HEADERLEN 9 | // arbitrary length of header hashed into siphash key 10 | #define HEADERLEN 80 11 | #endif 12 | 13 | typedef solver_ctx SolverCtx; 14 | 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 16 | char* header, 17 | int header_length, 18 | u32 nonce, 19 | u32 range, 20 | SolverSolutions *solutions, 21 | SolverStats *stats 22 | ) 23 | { 24 | u64 time0, time1; 25 | u32 timems; 26 | u32 sumnsols = 0; 27 | 28 | for (u32 r = 0; r < range; r++) { 29 | time0 = timestamp(); 30 | ctx->setheadernonce(header, header_length, nonce + r); 31 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3); 32 | u32 nsols = ctx->solve(); 33 | time1 = timestamp(); 34 | timems = (time1 - time0) / 1000000; 35 | print_log("Time: %d ms\n", timems); 36 | 37 | for (unsigned s = 0; s < nsols; s++) { 38 | print_log("Solution"); 39 | word_t *prf = &ctx->sols[s * PROOFSIZE]; 40 | for (u32 i = 0; i < PROOFSIZE; i++) 41 | print_log(" %jx", (uintmax_t)prf[i]); 42 | print_log("\n"); 43 | if (solutions != NULL){ 44 | solutions->edge_bits = EDGEBITS; 45 | solutions->num_sols++; 46 | solutions->sols[sumnsols+s].nonce = nonce + r; 47 | for (u32 i = 0; i < PROOFSIZE; i++) 48 | solutions->sols[sumnsols+s].proof[i] = (u64) prf[i]; 49 | } 50 | int pow_rc = verify(prf, ctx->trimmer.sip_keys); 51 | if (pow_rc == POW_OK) { 52 | print_log("Verified with cyclehash "); 53 | unsigned char cyclehash[32]; 54 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 55 | for (int i=0; i<32; i++) 56 | print_log("%02x", cyclehash[i]); 57 | print_log("\n"); 58 | } else { 59 | print_log("FAILED due to %s\n", errstr[pow_rc]); 60 | } 61 | } 62 | sumnsols += nsols; 63 | if (stats != NULL) { 64 | stats->device_id = 0; 65 | stats->edge_bits = EDGEBITS; 66 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 67 | stats->last_start_time = time0; 68 | stats->last_end_time = time1; 69 | stats->last_solution_time = time1 - time0; 70 | } 71 | } 72 | print_log("%d total solutions\n", sumnsols); 73 | return sumnsols > 0; 74 | } 75 | 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 77 | if (params->nthreads == 0) params->nthreads = 1; 78 | if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68; 79 | 80 | SolverCtx* ctx = new SolverCtx(params->nthreads, 81 | params->ntrims, 82 | params->allrounds, 83 | params->showcycle, 84 | params->mutate_nonce); 85 | return ctx; 86 | } 87 | 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 89 | delete ctx; 90 | } 91 | 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 93 | ctx->abort(); 94 | } 95 | 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 97 | // not required in this solver 98 | } 99 | 100 | int main(int argc, char **argv) { 101 | u32 nthreads = 0; 102 | u32 ntrims = 0; 103 | u32 nonce = 0; 104 | u32 range = 1; 105 | #ifdef SAVEEDGES 106 | bool showcycle = 1; 107 | #else 108 | bool showcycle = 0; 109 | #endif 110 | char header[HEADERLEN]; 111 | u32 len; 112 | bool allrounds = false; 113 | int c; 114 | 115 | memset(header, 0, sizeof(header)); 116 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 117 | switch (c) { 118 | case 'a': 119 | allrounds = true; 120 | break; 121 | case 'h': 122 | len = strlen(optarg); 123 | assert(len <= sizeof(header)); 124 | memcpy(header, optarg, len); 125 | break; 126 | case 'x': 127 | len = strlen(optarg)/2; 128 | assert(len == sizeof(header)); 129 | for (u32 i=0; i 1) 160 | print_log("-%d", nonce+range-1); 161 | print_log(") with 50%% edges\n"); 162 | 163 | u64 sbytes = ctx->sharedbytes(); 164 | u32 tbytes = ctx->threadbytes(); 165 | int sunit,tunit; 166 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 167 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 168 | print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); 169 | print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); 170 | print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 171 | 172 | run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); 173 | 174 | destroy_solver_ctx(ctx); 175 | } 176 | -------------------------------------------------------------------------------- /src/cuckarooz/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuckarooz Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckarooz.hpp" 5 | #include "graph.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef unsigned char u8; 13 | 14 | class cuckoo_ctx { 15 | public: 16 | siphash_keys sip_keys; 17 | graph cg; 18 | 19 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES, NNODES, MAX_SOLS, 0) { 20 | } 21 | 22 | ~cuckoo_ctx() { } 23 | 24 | u64 bytes() { 25 | return cg.bytes(); 26 | } 27 | 28 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 29 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 30 | setheader(headernonce, len, &sip_keys); 31 | cg.reset(); 32 | } 33 | 34 | void find_cycles() { 35 | u64 sips[EDGE_BLOCK_SIZE]; 36 | for (word_t block = 0; block < NEDGES; block += EDGE_BLOCK_SIZE) { 37 | sipblock(sip_keys, block, sips); 38 | for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) { 39 | u64 edge = sips[i]; 40 | word_t u = edge & NODEMASK; 41 | word_t v = (edge >> 32) & NODEMASK; 42 | cg.add_edge(u, v); 43 | #ifdef SHOW 44 | word_t nonce = block + i; 45 | printf("%d add (%d,%d)\n", nonce,u,v+NNODES); 46 | for (unsigned j=0; j::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to); 49 | if ((j+1) % NNODES == 0) 50 | printf("\n"); 51 | } 52 | #endif 53 | } 54 | } 55 | for (u32 s=0; s < cg.nsols; s++) { 56 | printf("Solution"); 57 | // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp); 58 | for (u32 j=0; j < PROOFSIZE; j++) { 59 | word_t nonce = cg.sols[s][j]; 60 | u64 edge = sipblock(sip_keys, nonce, sips); 61 | printf(" (%x,%x)", edge & NODEMASK, (edge >> 32) & NODEMASK); 62 | printf(" %x", nonce); 63 | } 64 | printf("\n"); 65 | int pow_rc = verify(cg.sols[s], sip_keys); 66 | if (pow_rc == POW_OK) { 67 | printf("Verified with cyclehash "); 68 | unsigned char cyclehash[32]; 69 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0); 70 | for (int i=0; i<32; i++) 71 | printf("%02x", cyclehash[i]); 72 | printf("\n"); 73 | } else { 74 | printf("FAILED due to %s\n", errstr[pow_rc]); 75 | } 76 | 77 | } 78 | } 79 | }; 80 | 81 | // arbitrary length of header hashed into siphash key 82 | #define HEADERLEN 80 83 | 84 | int main(int argc, char **argv) { 85 | char header[HEADERLEN]; 86 | memset(header, 0, HEADERLEN); 87 | int c; 88 | u32 nonce = 0; 89 | u32 range = 1; 90 | u64 time0, time1; 91 | u32 timems; 92 | u32 len; 93 | 94 | while ((c = getopt (argc, argv, "h:n:r:x:")) != -1) { 95 | switch (c) { 96 | case 'h': 97 | memcpy(header, optarg, strlen(optarg)); 98 | break; 99 | case 'x': 100 | len = strlen(optarg)/2; 101 | assert(len == sizeof(header)); 102 | for (u32 i=0; i 1) 115 | printf("-%d", nonce+range-1); 116 | printf("), "); 117 | cuckoo_ctx ctx(header, sizeof(header), nonce); 118 | u64 bytes = ctx.bytes(); 119 | int unit; 120 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 121 | printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]); 122 | 123 | for (u32 r = 0; r < range; r++) { 124 | time0 = timestamp(); 125 | ctx.setheadernonce(header, sizeof(header), nonce + r); 126 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 127 | ctx.find_cycles(); 128 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 129 | printf("Time: %d ms\n", timems); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/cuckatoo/bitmap.hpp: -------------------------------------------------------------------------------- 1 | template 2 | class bitmap { 3 | public: 4 | u64 SIZE; 5 | word_t BITMAP_WORDS; 6 | #ifdef ATOMIC 7 | typedef std::atomic aword_t; 8 | #else 9 | typedef word_t aword_t; 10 | #endif 11 | aword_t *bits; 12 | const u32 BITS_PER_WORD = sizeof(word_t) * 8; 13 | 14 | bitmap(u64 size) { 15 | SIZE = size; 16 | BITMAP_WORDS = SIZE / BITS_PER_WORD; 17 | bits = new aword_t[BITMAP_WORDS]; 18 | assert(bits != 0); 19 | } 20 | ~bitmap() { 21 | freebits(); 22 | } 23 | void freebits() { 24 | delete[] bits; 25 | bits = 0; 26 | } 27 | void clear() { 28 | assert(bits); 29 | memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t)); 30 | } 31 | void prefetch(word_t u) const { 32 | #ifdef PREFETCH 33 | __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0); 34 | #endif 35 | } 36 | void set(word_t u) { 37 | word_t idx = u / BITS_PER_WORD; 38 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 39 | #ifdef ATOMIC 40 | std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed); 41 | #else 42 | bits[idx] |= bit; 43 | #endif 44 | } 45 | void reset(word_t u) { 46 | word_t idx = u / BITS_PER_WORD; 47 | word_t bit = (word_t)1 << (u % BITS_PER_WORD); 48 | #ifdef ATOMIC 49 | std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed); 50 | #else 51 | bits[idx] &= ~bit; 52 | #endif 53 | } 54 | bool test(word_t u) const { 55 | word_t idx = u / BITS_PER_WORD; 56 | u32 bit = u % BITS_PER_WORD; 57 | #ifdef ATOMIC 58 | return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1; 59 | #else 60 | return (bits[idx] >> bit) & 1; 61 | #endif 62 | } 63 | word_t block(word_t n) const { 64 | word_t idx = n / BITS_PER_WORD; 65 | return bits[idx]; 66 | } 67 | }; 68 | -------------------------------------------------------------------------------- /src/cuckatoo/compress.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // compressor for cuckatoo nodes where edgetrimming 4 | // has left at most 2^-compressbits nodes in each partition 5 | template 6 | class compressor { 7 | public: 8 | u32 NODEBITS; 9 | u32 COMPRESSBITS; 10 | u32 SIZEBITS; 11 | u32 SIZEBITS1; 12 | word_t SIZE; 13 | word_t MASK; 14 | word_t MASK1; 15 | word_t npairs; 16 | const word_t NIL = ~(word_t)0; 17 | word_t *nodes; 18 | bool sharedmem; 19 | 20 | compressor(u32 nodebits, u32 compressbits, char *bytes) { 21 | NODEBITS = nodebits; 22 | COMPRESSBITS = compressbits; 23 | SIZEBITS = NODEBITS-COMPRESSBITS; 24 | SIZEBITS1 = SIZEBITS-1; 25 | SIZE = (word_t)1 << SIZEBITS; 26 | assert(SIZE); 27 | MASK = SIZE-1; 28 | MASK1 = MASK >> 1; 29 | nodes = new (bytes) word_t[SIZE]; 30 | sharedmem = true; 31 | } 32 | 33 | compressor(u32 nodebits, u32 compressbits) { 34 | NODEBITS = nodebits; 35 | COMPRESSBITS = compressbits; 36 | SIZEBITS = NODEBITS-COMPRESSBITS; 37 | SIZEBITS1 = SIZEBITS-1; 38 | SIZE = (word_t)1 << SIZEBITS; 39 | assert(SIZE); 40 | MASK = SIZE-1; 41 | MASK1 = MASK >> 1; 42 | nodes = new word_t[SIZE]; 43 | sharedmem = false; 44 | } 45 | 46 | ~compressor() { 47 | if (!sharedmem) 48 | delete[] nodes; 49 | } 50 | 51 | uint64_t bytes() { 52 | return sizeof(word_t[SIZE]); 53 | } 54 | 55 | void reset() { 56 | memset(nodes, (char)NIL, sizeof(word_t[SIZE])); 57 | npairs = 0; 58 | } 59 | 60 | word_t compress(word_t u) { 61 | u32 parity = u & 1; 62 | word_t ui = u >> COMPRESSBITS; 63 | u >>= 1; 64 | for (; ; ui = (ui+1) & MASK) { 65 | word_t cu = nodes[ui]; 66 | if (cu == NIL) { 67 | if (npairs >= SIZE/2) { 68 | print_log("NODE OVERFLOW at %x\n", u << 1 | parity); 69 | return parity; 70 | } 71 | nodes[ui] = u << SIZEBITS1 | npairs; 72 | return (npairs++ << 1) | parity; 73 | } 74 | if ((cu & ~MASK1) == u << SIZEBITS1) { 75 | return ((cu & MASK1) << 1) | parity; 76 | } 77 | } 78 | } 79 | }; 80 | -------------------------------------------------------------------------------- /src/cuckatoo/cuckatoo.c: -------------------------------------------------------------------------------- 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckatoo.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 246 13 | 14 | int main(int argc, char **argv) { 15 | char headernonce[HEADERLEN]; 16 | memset(headernonce, 0, HEADERLEN); 17 | int nonce = 0; 18 | int len, c; 19 | while ((c = getopt (argc, argv, "h:n:x:")) != -1) { 20 | switch (c) { 21 | case 'h': 22 | len = strlen(optarg); 23 | assert(len <= sizeof(headernonce)); 24 | memcpy(headernonce, optarg, len); 25 | break; 26 | case 'x': 27 | len = strlen(optarg)/2; 28 | assert(len == sizeof(headernonce)-sizeof(u64) || len == sizeof(headernonce)); 29 | for (u32 i=0; i 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) { 6 | size_t bufferMB; 7 | void *buffer; 8 | int device = argc > 1 ? atoi(argv[argc-1]) : 1; 9 | int nDevices; 10 | cudaGetDeviceCount(&nDevices); 11 | assert(device < nDevices); 12 | cudaDeviceProp prop; 13 | cudaGetDeviceProperties(&prop, device); 14 | uint64_t dbytes = prop.totalGlobalMem; 15 | int availMB = dbytes >> 20; 16 | printf("%s with %d MB @ %d bits x %dMHz\n", prop.name, availMB, prop.memoryBusWidth, prop.memoryClockRate/1000); 17 | 18 | cudaSetDevice(device); 19 | for (bufferMB = availMB; ; bufferMB -= 1) { 20 | int ret = cudaMalloc((void**)&buffer, bufferMB << 20); 21 | if (ret) printf("cudaMalloc(%d MB) returned %d\n", bufferMB, ret); 22 | else break; 23 | } 24 | printf("cudaMalloc(%d MB) succeeded %d\n", bufferMB); 25 | cudaFree(buffer); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /src/cuckatoo/graph.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitmap.hpp" 5 | #include "compress.hpp" 6 | #include 7 | 8 | typedef word_t proof[PROOFSIZE]; 9 | 10 | // cuck(at)oo graph with given limit on number of edges (and on single partition nodes) 11 | template 12 | class graph { 13 | public: 14 | // terminates adjacency lists 15 | const word_t NIL = ~(word_t)0; // NOTE: matches last edge when EDGEBITS==32 16 | 17 | struct link { // element of adjacency list 18 | word_t next; 19 | word_t to; 20 | }; 21 | 22 | word_t MAXEDGES; 23 | word_t MAXNODES; 24 | word_t nlinks; // aka halfedges, twice number of edges 25 | word_t *adjlist; // index into links array 26 | link *links; 27 | bool sharedmem; 28 | compressor *compressu; 29 | compressor *compressv; 30 | bitmap visited; 31 | u32 MAXSOLS; 32 | proof *sols; 33 | u32 nsols; 34 | 35 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxedges) { 36 | MAXEDGES = maxedges; 37 | MAXNODES = maxnodes; 38 | MAXSOLS = maxsols; 39 | adjlist = new word_t[2*MAXNODES]; // index into links array 40 | links = new link[2*MAXEDGES]; 41 | compressu = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 42 | compressv = compressbits ? new compressor(EDGEBITS, compressbits) : 0; 43 | sharedmem = false; 44 | sols = new proof[MAXSOLS+1]; // extra one for current path 45 | visited.clear(); 46 | } 47 | 48 | ~graph() { 49 | if (!sharedmem) { 50 | delete[] adjlist; 51 | delete[] links; 52 | } 53 | delete[] sols; 54 | } 55 | 56 | graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxedges) { 57 | MAXEDGES = maxedges; 58 | MAXNODES = maxnodes; 59 | MAXSOLS = maxsols; 60 | adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array 61 | links = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES]; 62 | compressu = compressbits ? new compressor(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES])) : 0; 63 | compressv = compressbits ? new compressor(EDGEBITS, compressbits, bytes + compressu->bytes()) : 0; 64 | sharedmem = true; 65 | sols = new proof[MAXSOLS+1]; 66 | visited.clear(); 67 | } 68 | 69 | // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits 70 | uint64_t bytes() { 71 | assert(2*MAXNODES != 0 && 2*MAXEDGES != 0); // allocation fails for uncompressed EDGEBITS=31 72 | return sizeof(word_t[2*MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0); 73 | } 74 | 75 | void reset() { 76 | memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES])); 77 | if (compressu) { 78 | compressu->reset(); 79 | compressv->reset(); 80 | } 81 | resetcounts(); 82 | } 83 | 84 | void resetcounts() { 85 | nlinks = nsols = 0; 86 | // visited has entries set only during cycles() call 87 | } 88 | 89 | static int nonce_cmp(const void *a, const void *b) { 90 | u32 x = *(u32 *)a, y = *(u32 *)b; 91 | // printf("nonce_cmp %x %x\n", x, y); 92 | return x < y ? -1 : x > y; 93 | } 94 | 95 | void cycles_with_link(u32 len, word_t u, word_t dest) { 96 | // assert((u>>1) < MAXEDGES); 97 | if (visited.test(u >> 1)) 98 | return; 99 | if ((u ^ 1) == dest) { 100 | print_log(" %d-cycle found\n", len); 101 | if (len == PROOFSIZE && nsols < MAXSOLS) { 102 | memcpy(sols[nsols+1], sols[nsols], sizeof(sols[0])); 103 | qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp); 104 | } 105 | return; 106 | } 107 | if (len == PROOFSIZE) 108 | return; 109 | word_t au1 = adjlist[u ^ 1]; 110 | if (au1 != NIL) { 111 | visited.set(u >> 1); 112 | for (; au1 != NIL; au1 = links[au1].next) { 113 | sols[nsols][len] = au1/2; 114 | cycles_with_link(len+1, links[au1 ^ 1].to, dest); 115 | } 116 | visited.reset(u >> 1); 117 | } 118 | } 119 | 120 | bool add_edge(word_t u, word_t v) { 121 | assert(u < MAXNODES); 122 | assert(v < MAXNODES); 123 | v += MAXNODES; // distinguish partitions 124 | if (adjlist[u ^ 1] != NIL && adjlist[v ^ 1] != NIL) { // possibly part of a cycle 125 | sols[nsols][0] = nlinks/2; 126 | assert(!visited.test(u >> 1)); 127 | cycles_with_link(1, u, v); 128 | } 129 | word_t ulink = nlinks++; 130 | word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit 131 | assert(vlink != NIL); // avoid confusing links with NIL (possible if word_t is u32 and EDGEBITS is 31 or 32) 132 | #ifndef ALLOWDUPES 133 | for (word_t au = adjlist[u]; au != NIL; au = links[au].next) 134 | if (links[au ^ 1].to == v) return false; // drop duplicate edge 135 | #endif 136 | links[ulink].next = adjlist[u]; 137 | links[vlink].next = adjlist[v]; 138 | links[adjlist[u] = ulink].to = u; 139 | links[adjlist[v] = vlink].to = v; 140 | return true; 141 | } 142 | 143 | bool add_compress_edge(word_t u, word_t v) { 144 | return add_edge(compressu->compress(u), compressv->compress(v)); 145 | } 146 | }; 147 | -------------------------------------------------------------------------------- /src/cuckatoo/lean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckatoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2023 John Tromp 3 | 4 | #include "lean.hpp" 5 | #include 6 | 7 | #ifndef HEADERLEN 8 | #define HEADERLEN 246 9 | #endif 10 | 11 | typedef cuckoo_ctx SolverCtx; 12 | 13 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 14 | char* header, 15 | int header_length, 16 | u32 nonce, 17 | u32 range, 18 | SolverSolutions *solutions, 19 | SolverStats *stats 20 | ) 21 | { 22 | u64 time0, time1; 23 | u32 timems; 24 | u32 sumnsols = 0; 25 | thread_ctx *threads = new thread_ctx[ctx->nthreads]; 26 | assert(threads); 27 | for (u32 r = 0; r < range; r++) { 28 | time0 = timestamp(); 29 | ctx->setheadernonce(header, header_length, nonce + r); 30 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->sip_keys.k0, ctx->sip_keys.k1, ctx->sip_keys.k2, ctx->sip_keys.k3); 31 | ctx->barry.clear(); 32 | for (u32 t = 0; t < ctx->nthreads; t++) { 33 | threads[t].id = t; 34 | threads[t].ctx = ctx; 35 | int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]); 36 | assert(err == 0); 37 | } 38 | for (u32 t = 0; t < ctx->nthreads; t++) { 39 | int err = pthread_join(threads[t].thread, NULL); 40 | assert(err == 0); 41 | } 42 | time1 = timestamp(); 43 | timems = (time1 - time0) / 1000000; 44 | print_log("Time: %d ms\n", timems); 45 | for (unsigned s = 0; s < ctx->nsols; s++) { 46 | print_log("Solution"); 47 | for (int j = 0; j < PROOFSIZE; j++) 48 | print_log(" %jx", (uintmax_t)ctx->sols[s][j]); 49 | print_log("\n"); 50 | if (solutions != NULL){ 51 | solutions->edge_bits = EDGEBITS; 52 | solutions->num_sols++; 53 | solutions->sols[sumnsols+s].nonce = nonce + r; 54 | for (u32 i = 0; i < PROOFSIZE; i++) 55 | solutions->sols[sumnsols+s].proof[i] = (u64) ctx->sols[s][i]; 56 | } 57 | int pow_rc = verify(ctx->sols[s], &ctx->sip_keys); 58 | if (pow_rc == POW_OK) { 59 | print_log("Verified with cyclehash "); 60 | unsigned char cyclehash[32]; 61 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)ctx->sols[s], sizeof(ctx->sols[0]), 0, 0); 62 | for (int i=0; i<32; i++) 63 | print_log("%02x", cyclehash[i]); 64 | print_log("\n"); 65 | } else { 66 | print_log("FAILED due to %s\n", errstr[pow_rc]); 67 | } 68 | sumnsols += ctx->nsols; 69 | } 70 | if (stats != NULL) { 71 | stats->device_id = 0; 72 | stats->edge_bits = EDGEBITS; 73 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 74 | stats->last_start_time = time0; 75 | stats->last_end_time = time1; 76 | stats->last_solution_time = time1 - time0; 77 | } 78 | } 79 | delete[] threads; 80 | print_log("%d total solutions\n", sumnsols); 81 | return 0; 82 | } 83 | 84 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 85 | if (params->nthreads == 0) params->nthreads = 1; 86 | if (params->ntrims == 0) params->ntrims = EDGEBITS > 30 ? 96 : 68; 87 | 88 | SolverCtx* ctx = new SolverCtx(params->nthreads, 89 | params->ntrims, 90 | MAXSOLS, 91 | params->mutate_nonce); 92 | return ctx; 93 | } 94 | 95 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 96 | delete ctx; 97 | } 98 | 99 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 100 | ctx->abort(); 101 | } 102 | 103 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 104 | params->nthreads = 1; 105 | params->ntrims = 8 * (PART_BITS+3) * (PART_BITS+4); 106 | params->mutate_nonce = 0; 107 | } 108 | 109 | int main(int argc, char **argv) { 110 | int nthreads = 1; 111 | int ntrims = 8 * (PART_BITS+3) * (PART_BITS+4); 112 | int nonce = 0; 113 | int range = 1; 114 | char header[HEADERLEN]; 115 | unsigned len; 116 | int c; 117 | SolverParams params; 118 | 119 | fill_default_params(¶ms); 120 | memset(header, 0, len = sizeof(header)); 121 | while ((c = getopt (argc, argv, "h:m:n:r:t:x:")) != -1) { 122 | switch (c) { 123 | case 'h': 124 | len = strlen(optarg); 125 | assert(len <= sizeof(header)); 126 | memcpy(header, optarg, len); 127 | break; 128 | case 'n': 129 | nonce = atoi(optarg); 130 | params.mutate_nonce = 1; 131 | break; 132 | case 'r': 133 | range = atoi(optarg); 134 | params.mutate_nonce = 1; 135 | break; 136 | case 'm': 137 | ntrims = atoi(optarg); 138 | break; 139 | case 't': 140 | nthreads = atoi(optarg); 141 | break; 142 | case 'x': 143 | len = strlen(optarg)/2; 144 | assert(len == sizeof(header)-sizeof(u64) || len == sizeof(header)); 145 | for (u32 i=0; i 1) 158 | print_log(",%d-%d", nonce, nonce+range-1); 159 | else if (nonce) print_log(",%d", nonce); 160 | print_log(") with trimming to %d bits, %d trimming rounds, %d threads\n", EDGEBITS-IDXSHIFT, ntrims, nthreads); 161 | 162 | u64 EdgeBytes = NEDGES/8; 163 | int EdgeUnit; 164 | for (EdgeUnit=0; EdgeBytes >= 1024; EdgeBytes>>=10,EdgeUnit++) ; 165 | u64 NodeBytes = (NEDGES >> PART_BITS)/8; 166 | int NodeUnit; 167 | for (NodeUnit=0; NodeBytes >= 1024; NodeBytes>>=10,NodeUnit++) ; 168 | print_log("Using %d%cB edge and %d%cB node memory, and %d-way siphash\n", 169 | (int)EdgeBytes, " KMGT"[EdgeUnit], (int)NodeBytes, " KMGT"[NodeUnit], NSIPHASH); 170 | 171 | SolverCtx* ctx = create_solver_ctx(¶ms); 172 | run_solver(ctx, header, len, nonce, range, NULL, NULL); 173 | destroy_solver_ctx(ctx); 174 | 175 | return 0; 176 | } 177 | -------------------------------------------------------------------------------- /src/cuckatoo/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckatoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | 7 | // arbitrary length of header hashed into siphash key 8 | #define HEADERLEN 80 9 | 10 | typedef solver_ctx SolverCtx; 11 | 12 | CALL_CONVENTION int run_solver(SolverCtx* ctx, 13 | char* header, 14 | int header_length, 15 | u32 nonce, 16 | u32 range, 17 | SolverSolutions *solutions, 18 | SolverStats *stats 19 | ) 20 | { 21 | u64 time0, time1; 22 | u32 timems; 23 | u32 sumnsols = 0; 24 | 25 | for (u32 r = 0; r < range; r++) { 26 | time0 = timestamp(); 27 | ctx->setheadernonce(header, header_length, nonce + r); 28 | print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3); 29 | u32 nsols = ctx->solve(); 30 | time1 = timestamp(); 31 | timems = (time1 - time0) / 1000000; 32 | print_log("Time: %d ms\n", timems); 33 | 34 | for (unsigned s = 0; s < nsols; s++) { 35 | print_log("Solution"); 36 | word_t *prf = &ctx->sols[s * PROOFSIZE]; 37 | for (u32 i = 0; i < PROOFSIZE; i++) 38 | print_log(" %jx", (uintmax_t)prf[i]); 39 | print_log("\n"); 40 | if (solutions != NULL){ 41 | solutions->edge_bits = EDGEBITS; 42 | solutions->num_sols++; 43 | solutions->sols[sumnsols+s].nonce = nonce + r; 44 | for (u32 i = 0; i < PROOFSIZE; i++) 45 | solutions->sols[sumnsols+s].proof[i] = (u64) prf[i]; 46 | } 47 | int pow_rc = verify(prf, &ctx->trimmer.sip_keys); 48 | if (pow_rc == POW_OK) { 49 | print_log("Verified with cyclehash "); 50 | unsigned char cyclehash[32]; 51 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 52 | for (int i=0; i<32; i++) 53 | print_log("%02x", cyclehash[i]); 54 | print_log("\n"); 55 | } else { 56 | print_log("FAILED due to %s\n", errstr[pow_rc]); 57 | } 58 | } 59 | sumnsols += nsols; 60 | if (stats != NULL) { 61 | stats->device_id = 0; 62 | stats->edge_bits = EDGEBITS; 63 | strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN); 64 | stats->last_start_time = time0; 65 | stats->last_end_time = time1; 66 | stats->last_solution_time = time1 - time0; 67 | } 68 | } 69 | print_log("%d total solutions\n", sumnsols); 70 | return sumnsols > 0; 71 | } 72 | 73 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) { 74 | if (params->nthreads == 0) params->nthreads = 1; 75 | if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68; 76 | 77 | SolverCtx* ctx = new SolverCtx(params->nthreads, 78 | params->ntrims, 79 | params->allrounds, 80 | params->showcycle, 81 | params->mutate_nonce); 82 | return ctx; 83 | } 84 | 85 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) { 86 | delete ctx; 87 | } 88 | 89 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) { 90 | ctx->abort(); 91 | } 92 | 93 | CALL_CONVENTION void fill_default_params(SolverParams* params) { 94 | // not required in this solver 95 | } 96 | 97 | int main(int argc, char **argv) { 98 | u32 nthreads = 0; 99 | u32 ntrims = 0; 100 | u32 nonce = 0; 101 | u32 range = 1; 102 | #ifdef SAVEEDGES 103 | bool showcycle = 1; 104 | #else 105 | bool showcycle = 0; 106 | #endif 107 | char header[HEADERLEN]; 108 | u32 len; 109 | bool allrounds = false; 110 | int c; 111 | 112 | memset(header, 0, sizeof(header)); 113 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 114 | switch (c) { 115 | case 'a': 116 | allrounds = true; 117 | break; 118 | case 'h': 119 | len = strlen(optarg); 120 | assert(len <= sizeof(header)); 121 | memcpy(header, optarg, len); 122 | break; 123 | case 'x': 124 | len = strlen(optarg)/2; 125 | assert(len == sizeof(header)); 126 | for (u32 i=0; i 1) 157 | print_log("-%d", nonce+range-1); 158 | print_log(") with 50%% edges\n"); 159 | 160 | u64 sbytes = ctx->sharedbytes(); 161 | u32 tbytes = ctx->threadbytes(); 162 | int sunit,tunit; 163 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 164 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 165 | print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets); 166 | print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets); 167 | print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 168 | 169 | run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL); 170 | 171 | destroy_solver_ctx(ctx); 172 | } 173 | -------------------------------------------------------------------------------- /src/cuckatoo/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckatoo.h" 5 | #include "graph.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define NNODES (2*NEDGES) 13 | #ifndef MAXSOLS 14 | #define MAXSOLS 4 15 | #endif 16 | 17 | typedef unsigned char u8; 18 | 19 | class cuckoo_ctx { 20 | public: 21 | siphash_keys sip_keys; 22 | word_t easiness; 23 | graph cg; 24 | 25 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) : cg(NEDGES, NEDGES, MAXSOLS, 0) { 26 | easiness = easy_ness; 27 | } 28 | 29 | ~cuckoo_ctx() { } 30 | 31 | u64 bytes() { 32 | return cg.bytes(); 33 | } 34 | 35 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 36 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 37 | setheader(headernonce, len, &sip_keys); 38 | cg.reset(); 39 | } 40 | 41 | void find_cycles() { 42 | for (word_t nonce = 0; nonce < easiness; nonce++) { 43 | word_t u = sipnode(&sip_keys, nonce, 0); 44 | word_t v = sipnode(&sip_keys, nonce, 1); 45 | cg.add_edge(u, v); 46 | #ifdef SHOW 47 | printf("%d add (%d,%d)\n", nonce,u,v+NEDGES); 48 | for (unsigned j=0; j::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to); 51 | if ((j+1)%NEDGES == 0) 52 | printf("\n"); 53 | } 54 | #endif 55 | } 56 | for (u32 s=0; s < cg.nsols; s++) { 57 | printf("Solution"); 58 | // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp); 59 | for (u32 j=0; j < PROOFSIZE; j++) { 60 | word_t nonce = cg.sols[s][j]; 61 | // printf(" (%x,%x)", sipnode(&sip_keys, nonce, 0), sipnode(&sip_keys, nonce, 1)); 62 | printf(" %x", nonce); 63 | } 64 | printf("\n"); 65 | int pow_rc = verify(cg.sols[s], &sip_keys); 66 | if (pow_rc == POW_OK) { 67 | printf("Verified with cyclehash "); 68 | unsigned char cyclehash[32]; 69 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0); 70 | for (int i=0; i<32; i++) 71 | printf("%02x", cyclehash[i]); 72 | printf("\n"); 73 | } else { 74 | printf("FAILED due to %s\n", errstr[pow_rc]); 75 | } 76 | 77 | } 78 | } 79 | }; 80 | 81 | // arbitrary length of header hashed into siphash key 82 | #define HEADERLEN 80 83 | 84 | int main(int argc, char **argv) { 85 | char header[HEADERLEN]; 86 | memset(header, 0, HEADERLEN); 87 | int c, easipct = 50; 88 | u32 nonce = 0; 89 | u32 range = 1; 90 | u64 time0, time1; 91 | u32 timems; 92 | 93 | while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) { 94 | switch (c) { 95 | case 'e': 96 | easipct = atoi(optarg); 97 | break; 98 | case 'h': 99 | memcpy(header, optarg, strlen(optarg)); 100 | break; 101 | case 'n': 102 | nonce = atoi(optarg); 103 | break; 104 | case 'r': 105 | range = atoi(optarg); 106 | break; 107 | } 108 | } 109 | assert(easipct >= 0 && easipct <= 100); 110 | printf("Looking for %d-cycle on cuckatoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce); 111 | if (range > 1) 112 | printf("-%d", nonce+range-1); 113 | printf(") with %d%% edges, ", easipct); 114 | word_t easiness = easipct * (uint64_t)NNODES / 100; 115 | cuckoo_ctx ctx(header, sizeof(header), nonce, easiness); 116 | u64 bytes = ctx.bytes(); 117 | int unit; 118 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 119 | printf("using %lld%cB memory\n", (u32)bytes, " KMGT"[unit]); 120 | 121 | for (u32 r = 0; r < range; r++) { 122 | time0 = timestamp(); 123 | ctx.setheadernonce(header, sizeof(header), nonce + r); 124 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 125 | ctx.find_cycles(); 126 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 127 | printf("Time: %d ms\n", timems); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/cuckoo/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c 15 | NVCC ?= nvcc -std=c++11 16 | 17 | all : simpletest leantest meantest 18 | 19 | simpletest: simple19 20 | ./simple19 -n 38 21 | 22 | leantest: lean19x1 23 | ./lean19x1 -n 38 24 | 25 | meantest: mean29x4 26 | ./mean29x4 -n 63 -t 4 -s 27 | 28 | verifytest: lean19x1 verify19 29 | ./lean19x1 -n 38 | grep ^Sol | ./verify19 -n 38 30 | 31 | simple19: ../crypto/siphash.hpp cuckoo.h cyclebase.hpp simple.cpp Makefile 32 | $(GPP) -o $@ -DIDXSHIFT=0 -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC) 33 | 34 | verify19: ../crypto/siphash.hpp cuckoo.h cuckoo.c simple.cpp Makefile 35 | $(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 cuckoo.c $(BLAKE_2B_SRC) 36 | 37 | simple29: ../crypto/siphash.hpp cuckoo.h cyclebase.hpp simple.cpp Makefile 38 | $(GPP) -o $@ -DIDXSHIFT=0 -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC) 39 | 40 | lean19x1: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 41 | $(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=19 lean.cpp $(BLAKE_2B_SRC) 42 | 43 | lean19x8: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 44 | $(GPP) -o $@ -DATOMIC -DNSIPHASH=8 -DEDGEBITS=19 lean.cpp $(BLAKE_2B_SRC) 45 | 46 | lean27x1: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 47 | $(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=27 lean.cpp $(BLAKE_2B_SRC) 48 | 49 | lean29x1: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 50 | $(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=29 lean.cpp $(BLAKE_2B_SRC) 51 | 52 | lean31x1: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 53 | $(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=31 lean.cpp $(BLAKE_2B_SRC) 54 | 55 | lean29x8: ../crypto/siphash.hpp cuckoo.h lean.hpp lean.cpp Makefile 56 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DATOMIC -DEDGEBITS=29 lean.cpp $(BLAKE_2B_SRC) 57 | 58 | mean19x8: cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 59 | $(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC) 60 | 61 | mean29x4: cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 62 | $(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 63 | 64 | mean29x8: cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 65 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 66 | 67 | mean30x8: cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 68 | $(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC) 69 | 70 | mean29x1: cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile 71 | $(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC) 72 | 73 | lcuda29: ../crypto/siphash.cuh lean.cu Makefile 74 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 lean.cu $(BLAKE_2B_SRC) 75 | 76 | cuda29: ../crypto/siphash.cuh mean.cu Makefile 77 | $(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 78 | 79 | cuda29_1: ../crypto/siphash.cuh mean.cu Makefile 80 | $(NVCC) -o $@ -DNB=1 -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC) 81 | -------------------------------------------------------------------------------- /src/cuckoo/cuckoo.c: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckoo.h" 5 | #include // for SCNx64 macro 6 | #include // printf/scanf 7 | #include // exit 8 | #include // getopt 9 | #include // d'uh 10 | 11 | // arbitrary length of header hashed into siphash key 12 | #define HEADERLEN 80 13 | 14 | int main(int argc, char **argv) { 15 | const char *header = ""; 16 | int nonce = 0; 17 | int c; 18 | while ((c = getopt (argc, argv, "h:n:")) != -1) { 19 | switch (c) { 20 | case 'h': 21 | header = optarg; 22 | break; 23 | case 'n': 24 | nonce = atoi(optarg); 25 | break; 26 | } 27 | } 28 | char headernonce[HEADERLEN]; 29 | u32 hdrlen = strlen(header); 30 | memcpy(headernonce, header, hdrlen); 31 | memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen); 32 | ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); 33 | siphash_keys keys; 34 | setheader(headernonce, sizeof(headernonce), &keys); 35 | printf("Verifying size %d proof for cuckoo%d(\"%s\",%d)\n", 36 | PROOFSIZE, EDGEBITS+1, header, nonce); 37 | for (int nsols=0; scanf(" Solution") == 0; nsols++) { 38 | word_t nonces[PROOFSIZE]; 39 | for (int n = 0; n < PROOFSIZE; n++) { 40 | uint64_t nonce; 41 | int nscan = scanf(" %" SCNx64, &nonce); 42 | assert(nscan == 1); 43 | nonces[n] = nonce; 44 | } 45 | int pow_rc = verify(nonces, &keys); 46 | if (pow_rc == POW_OK) { 47 | printf("Verified with cyclehash "); 48 | unsigned char cyclehash[32]; 49 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0); 50 | for (int i=0; i<32; i++) 51 | printf("%02x", cyclehash[i]); 52 | printf("\n"); 53 | } else { 54 | printf("FAILED due to %s\n", errstr[pow_rc]); 55 | } 56 | } 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /src/cuckoo/cyclebase.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifndef MAXCYCLES 7 | #define MAXCYCLES 64 // single byte 8 | #endif 9 | 10 | struct edge { 11 | u32 u; 12 | u32 v; 13 | edge() : u(0), v(0) { } 14 | edge(u32 x, u32 y) : u(x), v(y) { } 15 | }; 16 | 17 | struct cyclebase { 18 | // should avoid different values of MAXPATHLEN in different threads of one process 19 | static const u32 MAXPATHLEN = 16 << (EDGEBITS/3); 20 | 21 | int ncycles; 22 | word_t *cuckoo; 23 | edge cycleedges[MAXCYCLES]; 24 | u32 cyclelengths[MAXCYCLES]; 25 | u32 prevcycle[MAXCYCLES]; 26 | u32 us[MAXPATHLEN]; 27 | u32 vs[MAXPATHLEN]; 28 | 29 | void alloc() { 30 | cuckoo = (word_t *)calloc(NCUCKOO, sizeof(word_t)); 31 | } 32 | 33 | void freemem() { // not a destructor, as memory may have been allocated elsewhere, bypassing alloc() 34 | free(cuckoo); 35 | } 36 | 37 | void reset() { 38 | resetcounts(); 39 | } 40 | 41 | void resetcounts() { 42 | memset(cuckoo, -1, NCUCKOO * sizeof(word_t)); // for prevcycle nil 43 | ncycles = 0; 44 | } 45 | 46 | int path(u32 u0, u32 *us) const { 47 | int nu; 48 | for (u32 u = us[nu = 0] = u0; cuckoo[u] < 0x80000000; ) { 49 | u = cuckoo[u]; 50 | if (++nu >= (int)MAXPATHLEN) { 51 | while (nu-- && us[nu] != u) ; 52 | if (nu < 0) 53 | printf("maximum path length exceeded\n"); 54 | else printf("illegal % 4d-cycle from node %d\n", MAXPATHLEN-nu, u0); 55 | exit(0); 56 | } 57 | us[nu] = u; 58 | } 59 | return nu; 60 | } 61 | 62 | int pathjoin(u32 *us, int *pnu, u32 *vs, int *pnv) { 63 | int nu = *pnu, nv = *pnv; 64 | int min = nu < nv ? nu : nv; 65 | for (nu -= min, nv -= min; us[nu] != vs[nv]; nu++, nv++) min--; 66 | *pnu = nu; *pnv = nv; 67 | return min; 68 | } 69 | 70 | void addedge(u32 u0, u32 v0) { 71 | u32 u = u0 << 1, v = (v0 << 1) | 1; 72 | int nu = path(u, us), nv = path(v, vs); 73 | if (us[nu] == vs[nv]) { 74 | u32 ccsize = -cuckoo[us[nu]]; 75 | pathjoin(us, &nu, vs, &nv); 76 | int len = nu + nv + 1; 77 | printf("% 4d-cycle found in ccsize %d\n", len, ccsize); 78 | cycleedges[ncycles].u = u; 79 | cycleedges[ncycles].v = v; 80 | cyclelengths[ncycles++] = len; 81 | if (len == PROOFSIZE) 82 | solution(us, nu, vs, nv); 83 | assert(ncycles < MAXCYCLES); 84 | } else if (nu < nv) { 85 | cuckoo[vs[nv]] += cuckoo[us[nu]]; 86 | while (nu--) 87 | cuckoo[us[nu+1]] = us[nu]; 88 | cuckoo[u] = v; 89 | } else { 90 | cuckoo[us[nu]] += cuckoo[vs[nv]]; 91 | while (nv--) 92 | cuckoo[vs[nv+1]] = vs[nv]; 93 | cuckoo[v] = u; 94 | } 95 | } 96 | 97 | void recordedge(const u32 i, const u32 u, const u32 v) { 98 | printf(" (%x,%x)", u, v); 99 | } 100 | 101 | void solution(u32 *us, int nu, u32 *vs, int nv) { 102 | printf("Nodes"); 103 | u32 ni = 0; 104 | recordedge(ni++, *us, *vs); 105 | while (nu--) 106 | recordedge(ni++, us[(nu+1)&~1], us[nu|1]); // u's in even position; v's in odd 107 | while (nv--) 108 | recordedge(ni++, vs[nv|1], vs[(nv+1)&~1]); // u's in odd position; v's in even 109 | printf("\n"); 110 | #if 0 111 | for (u32 nonce = n = 0; nonce < NEDGES; nonce++) { 112 | edge e(2*sipnode(&sip_keys, nonce, 0), 2*sipnode(&sip_keys, nonce, 1)+1); 113 | if (cycle.find(e) != cycle.end()) { 114 | printf(" %x", nonce); 115 | cycle.erase(e); 116 | } 117 | } 118 | printf("\n"); 119 | #endif 120 | } 121 | 122 | int sharedlen(u32 *us, int nu, u32 *vs, int nv) { 123 | int len = 0; 124 | for (; nu-- && nv-- && us[nu] == vs[nv]; len++) ; 125 | return len; 126 | } 127 | }; 128 | -------------------------------------------------------------------------------- /src/cuckoo/lean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "lean.hpp" 5 | #include 6 | 7 | #define MAXSOLS 8 8 | // arbitrary length of header hashed into siphash key 9 | #ifndef HEADERLEN 10 | #define HEADERLEN 80 11 | #endif 12 | 13 | 14 | int main(int argc, char **argv) { 15 | int nthreads = 1; 16 | int ntrims = 2 + (PART_BITS+3)*(PART_BITS+4); 17 | int nonce = 0; 18 | int range = 1; 19 | char header[HEADERLEN]; 20 | unsigned len; 21 | u64 time0, time1; 22 | u32 timems; 23 | int c; 24 | 25 | memset(header, 0, sizeof(header)); 26 | while ((c = getopt (argc, argv, "h:m:n:r:t:x:")) != -1) { 27 | switch (c) { 28 | case 'h': 29 | len = strlen(optarg); 30 | assert(len <= sizeof(header)); 31 | memcpy(header, optarg, len); 32 | break; 33 | case 'x': 34 | len = strlen(optarg)/2; 35 | assert(len <= sizeof(header)); 36 | for (u32 i=0; i 1) 55 | printf("-%d", nonce+range-1); 56 | printf(") with 50%% edges, %d trims, %d threads\n", ntrims, nthreads); 57 | 58 | u64 edgeBytes = NEDGES/8, nodeBytes = TWICE_ATOMS*sizeof(atwice); 59 | int edgeUnit, nodeUnit; 60 | for (edgeUnit=0; edgeBytes >= 1024; edgeBytes>>=10,edgeUnit++) ; 61 | for (nodeUnit=0; nodeBytes >= 1024; nodeBytes>>=10,nodeUnit++) ; 62 | printf("Using %d%cB edge and %d%cB node memory, %d-way siphash, and %d-byte counters\n", 63 | (int)edgeBytes, " KMGT"[edgeUnit], (int)nodeBytes, " KMGT"[nodeUnit], NSIPHASH, SIZEOF_TWICE_ATOM); 64 | 65 | thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx)); 66 | assert(threads); 67 | cuckoo_ctx ctx(nthreads, ntrims, MAXSOLS); 68 | 69 | u32 sumnsols = 0; 70 | for (int r = 0; r < range; r++) { 71 | time0 = timestamp(); 72 | ctx.setheadernonce(header, sizeof(header), nonce + r); 73 | ctx.barry.clear(); 74 | for (int t = 0; t < nthreads; t++) { 75 | threads[t].id = t; 76 | threads[t].ctx = &ctx; 77 | int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]); 78 | assert(err == 0); 79 | } 80 | // sleep(33); ctx.abort(); 81 | for (int t = 0; t < nthreads; t++) { 82 | int err = pthread_join(threads[t].thread, NULL); 83 | assert(err == 0); 84 | } 85 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 86 | printf("Time: %d ms\n", timems); 87 | for (unsigned s = 0; s < ctx.nsols; s++) { 88 | printf("Solution"); 89 | for (int i = 0; i < PROOFSIZE; i++) 90 | printf(" %jx", (uintmax_t)ctx.sols[s][i]); 91 | printf("\n"); 92 | } 93 | sumnsols += ctx.nsols; 94 | } 95 | free(threads); 96 | printf("%d total solutions\n", sumnsols); 97 | return 0; 98 | } 99 | -------------------------------------------------------------------------------- /src/cuckoo/mean.cpp: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "mean.hpp" 5 | #include 6 | 7 | // arbitrary length of header hashed into siphash key 8 | #define HEADERLEN 80 9 | 10 | int main(int argc, char **argv) { 11 | u32 nthreads = 1; 12 | u32 ntrims = EDGEBITS >= 30 ? 96 : 68; 13 | u32 nonce = 0; 14 | u32 range = 1; 15 | #ifdef SAVEEDGES 16 | bool showcycle = 1; 17 | #else 18 | bool showcycle = 0; 19 | #endif 20 | u64 time0, time1; 21 | u32 timems; 22 | char header[HEADERLEN]; 23 | u32 len; 24 | bool allrounds = false; 25 | int c; 26 | 27 | memset(header, 0, sizeof(header)); 28 | while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) { 29 | switch (c) { 30 | case 'a': 31 | allrounds = true; 32 | break; 33 | case 'h': 34 | len = strlen(optarg); 35 | assert(len <= sizeof(header)); 36 | memcpy(header, optarg, len); 37 | break; 38 | case 'x': 39 | len = strlen(optarg)/2; 40 | assert(len == sizeof(header)); 41 | for (u32 i=0; i 1) 63 | printf("-%d", nonce+range-1); 64 | printf(") with 50%% edges\n"); 65 | 66 | solver_ctx ctx(nthreads, ntrims, allrounds, showcycle); 67 | 68 | u64 sbytes = ctx.sharedbytes(); 69 | u32 tbytes = ctx.threadbytes(); 70 | int sunit,tunit; 71 | for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ; 72 | for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ; 73 | printf("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx.trimmer->buckets); 74 | printf("%dx%d%cB thread memory at %lx,\n", nthreads, tbytes, " KMGT"[tunit], (u64)ctx.trimmer->tbuckets); 75 | printf("%d-way siphash, and %d buckets.\n", NSIPHASH, NX); 76 | 77 | u32 sumnsols = 0; 78 | for (u32 r = 0; r < range; r++) { 79 | time0 = timestamp(); 80 | ctx.setheadernonce(header, sizeof(header), nonce + r); 81 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.trimmer->sip_keys.k0, ctx.trimmer->sip_keys.k1, ctx.trimmer->sip_keys.k2, ctx.trimmer->sip_keys.k3); 82 | u32 nsols = ctx.solve(); 83 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 84 | printf("Time: %d ms\n", timems); 85 | 86 | for (unsigned s = 0; s < nsols; s++) { 87 | printf("Solution"); 88 | word_t *prf = &ctx.sols[s * PROOFSIZE]; 89 | for (u32 i = 0; i < PROOFSIZE; i++) 90 | printf(" %jx", (uintmax_t)prf[i]); 91 | printf("\n"); 92 | int pow_rc = verify(prf, &ctx.trimmer->sip_keys); 93 | if (pow_rc == POW_OK) { 94 | printf("Verified with cyclehash "); 95 | unsigned char cyclehash[32]; 96 | blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0); 97 | for (int i=0; i<32; i++) 98 | printf("%02x", cyclehash[i]); 99 | printf("\n"); 100 | } else { 101 | printf("FAILED due to %s\n", errstr[pow_rc]); 102 | } 103 | } 104 | sumnsols += nsols; 105 | } 106 | printf("%d total solutions\n", sumnsols); 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /src/cuckoo/simple.cpp: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2020 John Tromp 3 | 4 | #include "cuckoo.h" 5 | 6 | // assume EDGEBITS < 31 7 | #define NNODES (2 * NEDGES) 8 | #define NCUCKOO NNODES 9 | 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "cyclebase.hpp" 16 | #include 17 | 18 | typedef unsigned char u8; 19 | 20 | class cuckoo_ctx { 21 | public: 22 | siphash_keys sip_keys; 23 | word_t easiness; 24 | cyclebase cb; 25 | 26 | cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) { 27 | easiness = easy_ness; 28 | cb.alloc(); 29 | assert(cb.cuckoo != 0); 30 | } 31 | 32 | ~cuckoo_ctx() { 33 | cb.freemem(); 34 | } 35 | 36 | u64 bytes() { 37 | return (word_t)(1+NNODES) * sizeof(word_t); 38 | } 39 | 40 | void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) { 41 | ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end 42 | setheader(headernonce, len, &sip_keys); 43 | cb.reset(); 44 | } 45 | 46 | void cycle_base() { 47 | for (word_t nonce = 0; nonce < easiness; nonce++) { 48 | word_t u = sipnode(&sip_keys, nonce, 0); 49 | word_t v = sipnode(&sip_keys, nonce, 1); 50 | #ifdef SHOW 51 | for (unsigned j=1; j= 1000) 63 | nlarge += size; 64 | } 65 | printf("%u nodes in ccsize >= 1000\n", nlarge); 66 | #endif 67 | } 68 | }; 69 | 70 | // arbitrary length of header hashed into siphash key 71 | #define HEADERLEN 80 72 | 73 | int main(int argc, char **argv) { 74 | char header[HEADERLEN]; 75 | memset(header, 0, HEADERLEN); 76 | int c, easipct = 50; 77 | u32 nonce = 0; 78 | u32 range = 1; 79 | u64 time0, time1; 80 | u32 timems; 81 | 82 | while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) { 83 | switch (c) { 84 | case 'e': 85 | easipct = atoi(optarg); 86 | break; 87 | case 'h': 88 | memcpy(header, optarg, strlen(optarg)); 89 | break; 90 | case 'n': 91 | nonce = atoi(optarg); 92 | break; 93 | case 'r': 94 | range = atoi(optarg); 95 | break; 96 | } 97 | } 98 | assert(easipct >= 0 && easipct <= 100); 99 | printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS+1, header, nonce); 100 | if (range > 1) 101 | printf("-%d", nonce+range-1); 102 | printf(") with %d%% edges, ", easipct); 103 | word_t easiness = easipct * (word_t)NNODES / 100; 104 | cuckoo_ctx ctx(header, sizeof(header), nonce, easiness); 105 | u64 bytes = ctx.bytes(); 106 | int unit; 107 | for (unit=0; bytes >= 10240; bytes>>=10,unit++) ; 108 | printf("using %d%cB memory at %llx.\n", (u32)bytes, " KMGT"[unit], (uint64_t)ctx.cb.cuckoo); 109 | 110 | for (u32 r = 0; r < range; r++) { 111 | time0 = timestamp(); 112 | ctx.setheadernonce(header, sizeof(header), nonce + r); 113 | printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3); 114 | ctx.cycle_base(); 115 | time1 = timestamp(); timems = (time1 - time0) / 1000000; 116 | printf("Time: %d ms\n", timems); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/java/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .SUFFIXES: 3 | 4 | OPT ?= -O3 5 | 6 | GCC_ARCH_FLAGS ?= -march=native 7 | GPP_ARCH_FLAGS ?= -march=native 8 | 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang 10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread 11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS) 12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT) 13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS) 14 | LIBS ?= -L. -lblake2b 15 | 16 | all : java 17 | 18 | Cuckoo.class: Cuckoo.java Makefile 19 | javac -O Cuckoo.java 20 | 21 | SimpleMiner.class: Cuckoo.java SimpleMiner.java Makefile 22 | javac -O Cuckoo.java SimpleMiner.java 23 | 24 | java: Cuckoo.class SimpleMiner.class Makefile 25 | java SimpleMiner -h 261 | tail -1 | java Cuckoo -h 261 26 | -------------------------------------------------------------------------------- /src/java/SimpleMiner.java: -------------------------------------------------------------------------------- 1 | // Cuckoo Cycle, a memory-hard proof-of-work 2 | // Copyright (c) 2013-2016 John Tromp 3 | 4 | import java.util.Set; 5 | import java.util.HashSet; 6 | 7 | class CuckooSolve { 8 | static final int MAXPATHLEN = 4096; 9 | Cuckoo graph; 10 | int easiness; 11 | int[] cuckoo; 12 | int[][] sols; 13 | int nsols; 14 | int nthreads; 15 | 16 | public CuckooSolve(byte[] hdr, int en, int ms, int nt) { 17 | graph = new Cuckoo(hdr); 18 | easiness = en; 19 | sols = new int[ms][Cuckoo.PROOFSIZE]; 20 | cuckoo = new int[1+(int)Cuckoo.NNODES]; 21 | assert cuckoo != null; 22 | nsols = 0; 23 | nthreads = nt; 24 | } 25 | 26 | public int path(int u, int[] us) { 27 | int nu; 28 | for (nu = 0; u != 0; u = cuckoo[u]) { 29 | if (++nu >= MAXPATHLEN) { 30 | while (nu-- != 0 && us[nu] != u) ; 31 | if (nu < 0) 32 | System.out.println("maximum path length exceeded"); 33 | else System.out.println("illegal " + (MAXPATHLEN-nu) + "-cycle"); 34 | Thread.currentThread().interrupt(); 35 | } 36 | us[nu] = u; 37 | } 38 | return nu; 39 | } 40 | 41 | public synchronized void solution(int[] us, int nu, int[] vs, int nv) { 42 | Set cycle = new HashSet(); 43 | int n; 44 | cycle.add(new Edge(us[0],vs[0]-Cuckoo.NEDGES)); 45 | while (nu-- != 0) // u's in even position; v's in odd 46 | cycle.add(new Edge(us[(nu+1)&~1],us[nu|1]-Cuckoo.NEDGES)); 47 | while (nv-- != 0) // u's in odd position; v's in even 48 | cycle.add(new Edge(vs[nv|1],vs[(nv+1)&~1]-Cuckoo.NEDGES)); 49 | for (int nonce = n = 0; nonce < easiness; nonce++) { 50 | Edge e = graph.sipedge(nonce); 51 | if (cycle.contains(e)) { 52 | sols[nsols][n++] = nonce; 53 | cycle.remove(e); 54 | } 55 | } 56 | if (n == Cuckoo.PROOFSIZE) 57 | nsols++; 58 | else System.out.println("Only recovered " + n + " nonces"); 59 | } 60 | } 61 | 62 | public class SimpleMiner implements Runnable { 63 | int id; 64 | CuckooSolve solve; 65 | 66 | public SimpleMiner(int i, CuckooSolve cs) { 67 | id = i; 68 | solve = cs; 69 | } 70 | 71 | public void run() { 72 | int[] cuckoo = solve.cuckoo; 73 | int[] us = new int[CuckooSolve.MAXPATHLEN], vs = new int[CuckooSolve.MAXPATHLEN]; 74 | for (int nonce = id; nonce < solve.easiness; nonce += solve.nthreads) { 75 | int u = cuckoo[us[0] = (int)solve.graph.sipnode(nonce,0)]; 76 | int v = cuckoo[vs[0] = (int)(Cuckoo.NEDGES + solve.graph.sipnode(nonce,1))]; 77 | if (u == vs[0] || v == us[0]) 78 | continue; // ignore duplicate edges 79 | int nu = solve.path(u, us), nv = solve.path(v, vs); 80 | if (us[nu] == vs[nv]) { 81 | int min = nu < nv ? nu : nv; 82 | for (nu -= min, nv -= min; us[nu] != vs[nv]; nu++, nv++) ; 83 | int len = nu + nv + 1; 84 | System.out.println(" " + len + "-cycle found at " + id + ":" + (int)(nonce*100L/solve.easiness) + "%"); 85 | if (len == Cuckoo.PROOFSIZE && solve.nsols < solve.sols.length) 86 | solve.solution(us, nu, vs, nv); 87 | continue; 88 | } 89 | if (nu < nv) { 90 | while (nu-- != 0) 91 | cuckoo[us[nu+1]] = us[nu]; 92 | cuckoo[us[0]] = vs[0]; 93 | } else { 94 | while (nv-- != 0) 95 | cuckoo[vs[nv+1]] = vs[nv]; 96 | cuckoo[vs[0]] = us[0]; 97 | } 98 | } 99 | Thread.currentThread().interrupt(); 100 | } 101 | 102 | public static void main(String argv[]) { 103 | assert Cuckoo.NNODES > 0; 104 | int nthreads = 1; 105 | int maxsols = 8; 106 | String header = ""; 107 | int easipct = 50; 108 | for (int i = 0; i < argv.length; i++) { 109 | if (argv[i].equals("-e")) { 110 | easipct = Integer.parseInt(argv[++i]); 111 | } else if (argv[i].equals("-h")) { 112 | header = argv[++i]; 113 | } else if (argv[i].equals("-m")) { 114 | maxsols = Integer.parseInt(argv[++i]); 115 | } else if (argv[i].equals("-t")) { 116 | nthreads = Integer.parseInt(argv[++i]); 117 | } 118 | } 119 | assert easipct >= 0 && easipct <= 100; 120 | System.out.println("Looking for " + Cuckoo.PROOFSIZE + "-cycle on cuckoo" + Cuckoo.NODEBITS + "(\"" + header + "\") with " + easipct + "% edges and " + nthreads + " threads"); 121 | CuckooSolve solve = new CuckooSolve(header.getBytes(), (int)(easipct * (long)Cuckoo.NNODES / 100), maxsols, nthreads); 122 | 123 | Thread[] threads = new Thread[nthreads]; 124 | for (int t = 0; t < nthreads; t++) { 125 | threads[t] = new Thread(new SimpleMiner(t, solve)); 126 | threads[t].start(); 127 | } 128 | for (int t = 0; t < nthreads; t++) { 129 | try { 130 | threads[t].join(); 131 | } catch (InterruptedException e) { 132 | System.out.println(e); 133 | System.exit(1); 134 | } 135 | } 136 | for (int s = 0; s < solve.nsols; s++) { 137 | System.out.print("Solution"); 138 | for (int i = 0; i < Cuckoo.PROOFSIZE; i++) 139 | System.out.print(String.format(" %x", solve.sols[s][i])); 140 | System.out.println(""); 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/perl/cycles.pl: -------------------------------------------------------------------------------- 1 | my $maxcycles = 0; 2 | my $nonce = 0; 3 | my $maxnonce = 0; 4 | my $nnonces = 0; 5 | my $maxcycles = 0; 6 | my $ncycles = 0; 7 | my @count; 8 | while (<>) { 9 | if (/^nonce (\d+)/) { 10 | $nonce = $1; 11 | $nnonces += 1; 12 | } elsif (/^Time/ || /^findcycles/) { 13 | if ($ncycles > $maxcycles) { 14 | $maxnonce = $nonce; 15 | $maxcycles = $ncycles; 16 | } 17 | $ncycles = 0; 18 | } elsif (/(\d+)-cycle found/) { 19 | $ncycles += 1; 20 | $count[$1]++; 21 | } 22 | } 23 | for $i (1..$#count) { 24 | my $c = $count[$i]; 25 | my $f = $c * $i / $nnonces; 26 | print "$i $c $f\n" if $c; 27 | } 28 | printf "$nnonces nonces $maxcycles cycles at nonce $maxnonce\n"; 29 | -------------------------------------------------------------------------------- /src/threads/barrier.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #ifdef __APPLE__ 6 | typedef int pthread_barrierattr_t; 7 | #endif 8 | 9 | class trim_barrier { 10 | pthread_mutex_t mutex; 11 | pthread_cond_t cond; 12 | unsigned limit; 13 | unsigned count; 14 | int phase; 15 | 16 | public: 17 | trim_barrier(unsigned int count) { 18 | pthread_mutex_init(&mutex, 0); 19 | pthread_cond_init(&cond, 0); 20 | limit = count; 21 | } 22 | 23 | ~trim_barrier() { 24 | pthread_mutex_destroy(&mutex); 25 | pthread_cond_destroy(&cond); 26 | } 27 | 28 | void clear() { 29 | count = phase = 0; 30 | } 31 | 32 | void abort() { 33 | pthread_mutex_lock(&mutex); 34 | phase = -1; 35 | pthread_mutex_unlock(&mutex); 36 | } 37 | 38 | bool aborted() { 39 | return phase < 0; 40 | } 41 | 42 | void wait() { 43 | pthread_mutex_lock(&mutex); 44 | int wait_phase = phase; 45 | if (++count >= limit) { 46 | if (wait_phase >= 0) { 47 | phase = wait_phase + 1; 48 | count = 0; 49 | } 50 | pthread_cond_broadcast(&cond); 51 | } else if (wait_phase >= 0) { 52 | do 53 | pthread_cond_wait(&cond, &mutex); 54 | while (phase == wait_phase); 55 | } 56 | pthread_mutex_unlock(&mutex); 57 | if (wait_phase < 0) 58 | pthread_exit(NULL); 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /src/tmto/momentomatum.cpp: -------------------------------------------------------------------------------- 1 | // Bounty Cycle, an attempt to disprove John's claims 2 | 3 | #include "momentomatum.h" 4 | #include 5 | 6 | int main(int argc, char **argv) { 7 | int nthreads = 1; 8 | bool minimalbfs = true; 9 | int nparts = NUPARTS; 10 | const char *header = ""; 11 | int c; 12 | while ((c = getopt (argc, argv, "h:mn:t:")) != -1) { 13 | switch (c) { 14 | case 'h': 15 | header = optarg; 16 | break; 17 | case 'm': 18 | minimalbfs = true; 19 | break; 20 | case 'n': 21 | nparts = atoi(optarg); 22 | break; 23 | case 't': 24 | nthreads = atoi(optarg); 25 | break; 26 | } 27 | } 28 | printf("Looking for %d-cycle on cuckoo%d(\"%s\") with 50%% edges, 1/%d memory, %d/%d parts, %d threads %d minimalbfs\n", PROOFSIZE, NODEBITS, header, 1<= 1024; nodeBytes>>=10,nodeUnit++) ; 32 | printf("Using %d%cB node memory.\n", (int)nodeBytes, " KMGT"[nodeUnit]); 33 | cuckoo_ctx ctx(header, nthreads, nparts, minimalbfs); 34 | thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx)); 35 | assert(threads); 36 | for (int t = 0; t < nthreads; t++) { 37 | threads[t].id = t; 38 | threads[t].ctx = &ctx; 39 | int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]); 40 | assert(err == 0); 41 | } 42 | for (int t = 0; t < nthreads; t++) { 43 | int err = pthread_join(threads[t].thread, NULL); 44 | assert(err == 0); 45 | } 46 | free(threads); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /src/tmto/tomato_miner.cpp: -------------------------------------------------------------------------------- 1 | // Time Memory Trade Off (TMTO, or tomato) solver 2 | 3 | #include "tomato_miner.h" 4 | #include 5 | 6 | // arbitrary length of header hashed into siphash key 7 | #define HEADERLEN 80 8 | 9 | int main(int argc, char **argv) { 10 | int nthreads = 1; 11 | bool minimalbfs = false; 12 | int nparts = NUPARTS; 13 | int range = 1; 14 | int nonce = 0; 15 | int c; 16 | char header[HEADERLEN]; 17 | unsigned len; 18 | 19 | memset(header, 0, sizeof(header)); 20 | while ((c = getopt (argc, argv, "h:n:p:t:r:m")) != -1) { 21 | switch (c) { 22 | case 'h': 23 | len = strlen(optarg); 24 | assert(len <= sizeof(header)); 25 | memcpy(header, optarg, len); 26 | break; 27 | case 'm': 28 | minimalbfs = true; 29 | break; 30 | case 'n': 31 | nonce = atoi(optarg); 32 | break; 33 | case 'p': 34 | nparts = atoi(optarg); 35 | break; 36 | case 'r': 37 | range = atoi(optarg); 38 | break; 39 | case 't': 40 | nthreads = atoi(optarg); 41 | break; 42 | } 43 | } 44 | printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, NODEBITS, header, nonce); 45 | if (range > 1) 46 | printf("-%d", nonce+range-1); 47 | printf(") with 50%% edges, 1/%d memory, %d/%d parts, %d threads %d minimalbfs\n", 48 | 1<= 1024; nodeBytes>>=10,nodeUnit++) ; 52 | printf("Using %d%cB node memory.\n", (int)nodeBytes, " KMGT"[nodeUnit]); 53 | thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx)); 54 | assert(threads); 55 | cuckoo_ctx ctx(nthreads, nparts, minimalbfs); 56 | 57 | for (int r = 0; r < range; r++) { 58 | ctx.setheadernonce(header, sizeof(header), nonce + r); 59 | 60 | for (int t = 0; t < nthreads; t++) { 61 | threads[t].id = t; 62 | threads[t].ctx = &ctx; 63 | int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]); 64 | assert(err == 0); 65 | } 66 | for (int t = 0; t < nthreads; t++) { 67 | int err = pthread_join(threads[t].thread, NULL); 68 | assert(err == 0); 69 | } 70 | } 71 | free(threads); 72 | return 0; 73 | } 74 | --------------------------------------------------------------------------------