├── .travis.yml
├── GPU.md
├── GPU_tuning.md
├── LICENSE.txt
├── README.md
├── doc
    ├── IEEEtran.bst
    ├── Makefile
    ├── Nano_PoW
    ├── SipHash
    ├── SipHash12
    ├── SipHash13
    ├── bitcoin2015.tex
    ├── blog
    ├── cuckoo.bib
    ├── cuckoo.pdf
    ├── cuckoo.tex
    ├── leancore
    ├── mathspec
    ├── pbkdf
    ├── simplesolve
    └── spec
├── img
    ├── cuckatoo_cycle.jpg
    ├── cuckoo.png
    ├── emissions.gif
    ├── grinlogo.pdf
    ├── grinlogo.ps
    ├── inflation.gif
    └── logo.png
├── private
    ├── bfs
    ├── comp.pl
    ├── cycle_freq.eps
    ├── cyclefreq
    ├── cycles.eps
    ├── equihash
    ├── found.pl
    ├── found25
    ├── fracs
    ├── minbfs
    ├── real
    ├── siphash-2-4
    ├── speedup30
    ├── speedup30na0
    ├── speedup30na1
    ├── speedup30na2
    ├── speedup32
    ├── speedup32na0
    └── speedup32na1
└── src
    ├── Makefile
    ├── crypto
        ├── blake2-impl.h
        ├── blake2.h
        ├── blake2b-ref.c
        ├── portable_endian.h
        ├── siphash.cuh
        ├── siphash.hpp
        └── siphashxN.h
    ├── cuckaroo
        ├── Makefile
        ├── bitmap.hpp
        ├── compress.hpp
        ├── cuckaroo.c
        ├── cuckaroo.hpp
        ├── graph.hpp
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        └── simple.cpp
    ├── cuckarood
        ├── Makefile
        ├── bitmap.hpp
        ├── compress.hpp
        ├── cuckarood.c
        ├── cuckarood.hpp
        ├── graph.hpp
        ├── kernel.cuh
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        ├── photon.cu
        └── simple.cpp
    ├── cuckaroom
        ├── Makefile
        ├── bitmap.hpp
        ├── compress.hpp
        ├── cuckaroom.c
        ├── cuckaroom.hpp
        ├── graph.hpp
        ├── kernel.cuh
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        ├── meaner.cu
        └── simple.cpp
    ├── cuckarooz
        ├── Makefile
        ├── bitmap.hpp
        ├── compress.hpp
        ├── cuckarooz.c
        ├── cuckarooz.hpp
        ├── graph.hpp
        ├── kernel.cuh
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        └── simple.cpp
    ├── cuckatoo
        ├── Makefile
        ├── bitmap.hpp
        ├── compress.hpp
        ├── cuckatoo.c
        ├── cuckatoo.h
        ├── cumal.cu
        ├── graph.hpp
        ├── lean.cpp
        ├── lean.cu
        ├── lean.hpp
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        └── simple.cpp
    ├── cuckoo
        ├── Makefile
        ├── cuckoo.c
        ├── cuckoo.h
        ├── cyclebase.hpp
        ├── lean.cpp
        ├── lean.cu
        ├── lean.hpp
        ├── mean.cpp
        ├── mean.cu
        ├── mean.hpp
        └── simple.cpp
    ├── java
        ├── Cuckoo.java
        ├── Makefile
        └── SimpleMiner.java
    ├── perl
        └── cycles.pl
    ├── threads
        └── barrier.hpp
    └── tmto
        ├── momentomatum.cpp
        ├── momentomatum.h
        ├── tomato_miner.cpp
        └── tomato_miner.h


/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | sudo: false
 3 | language: cpp
 4 | env:
 5 |   - JOB=cuckoo
 6 |   - JOB=cuckatoo
 7 |   - JOB=java
 8 |   - JOB=doc
 9 | matrix:
10 |   include:
11 |     - os: osx
12 |       osx_image: xcode8.3 # [`xcode8.3` is Xcode 8.3.3 on OS X 10.12](https://docs.travis-ci.com/user/reference/osx#OS-X-Version)
13 |       language: generic
14 |       env: JOB=java
15 | addons:
16 |   apt:
17 |     packages:
18 |       - texlive-latex-base
19 |       - texlive-pictures
20 |       - pgf
21 | before_script:
22 |   - case "${TRAVIS_OS_NAME:?}" in linux) LIBV=LD_LIBRARY_PATH;; osx) LIBV=DYLD_LIBRARY_PATH;; esac
23 |   - echo "The library path variable name is ${LIBV:?}"
24 |   - LIBP="$(pwd)/src"
25 |   - echo "The library path variable value is ${LIBP:?}"
26 | script:
27 |   - if test cuckoo = "${JOB:?}"; then ( cd src/cuckoo && make; ); fi
28 |   - if test cuckatoo = "${JOB:?}"; then ( cd src/cuckatoo && make; ); fi
29 |   - if test java = "${JOB:?}"; then ( cd src/java && make; ); fi
30 |   - if test doc = "${JOB:?}"; then ( cd doc && make cuckoo.pdf; ); fi
31 | after_success:
32 |   - |
33 |     if test doc = "${JOB:?}"; then
34 |       echo "Below is the generated PDF encoded as base64. You can decode it using 'openssl base64 -d'."
35 |       cat doc/cuckoo.pdf | openssl base64
36 |     fi
37 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The FAIR MINING License
 2 | 
 3 | Copyright (c) 2013-2019 John Tromp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | FAIR MINING
13 | Any derived miner that charges a developer fee for mining a fair coin
14 | ---one with no premine or other form of developer compensation---
15 | shall offer to share half the fee revenue with the coin developers.
16 | 
17 | The above copyright notice, FAIR MINING condition, and this permission notice
18 | shall be included in all copies or substantial portions of the Software.
19 | 
20 | 
21 | ALTERNATIVELY, this software may be distributed under the terms of the
22 | GNU General Public License ("GPL") version 2 or later, as published by
23 | the Free Software Foundation.
24 | 
25 | 
26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 | SOFTWARE.
33 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | cuckoo.pdf:	cuckoo.tex cuckoo.bbl
 2 | 	pdflatex cuckoo.tex
 3 | 	# The following two `pdflatex` runs are needed to resolve references of citations from bibliography. (See [Wikibooks](https://en.wikibooks.org/w/index.php?title=LaTeX/Bibliography_Management&oldid=3442012#Why_won't_LaTeX_generate_any_output?).)
 4 | 	pdflatex cuckoo.tex
 5 | 	pdflatex cuckoo.tex
 6 | 
 7 | cuckoo.bbl:	cuckoo.aux cuckoo.bib
 8 | 	bibtex cuckoo
 9 | 
10 | cuckoo.aux:	cuckoo.tex
11 | 	latex cuckoo.tex
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/Nano_PoW:
--------------------------------------------------------------------------------
 1 | Nanocoin author Colin Lemahieu recently made available [1] a newly developed proof of work scheme "Nano PoW"
 2 | whose solutions are pairs (x,y) satisfying
 3 | 
 4 |  H0(x) + H1(y) = 0 mod D
 5 | 
 6 | where H0,H1 are two different hash functions and D = 2^d is a difficulty parameter.
 7 | 
 8 | It is claimed to be memory-hard, requiring on the order of sqrt(D) = 2^(d/2) memory for efficient solving.
 9 | 
10 | However, a simple variation of rho search [2] can efficiently find solutions without any use of memory.
11 | First, note that by defining H2(y) = -H1(y), the above equation simplifies to
12 | 
13 |  H0(x) = H2(y) mod D
14 | 
15 | Next, define a chain of values
16 | 
17 | x_{2k}   = H0( x_{2k-1} ) mod D
18 | x_{2k+1} = H2( x_{2k  } ) mod D
19 | 
20 | starting at some arbitrary x_0.
21 | 
22 | This chain is expected to have repeating values x_i = x_j, i < j, within O(sqrt(D)) steps,
23 | which can be found efficiently using the before mentioned rho search in time O(sqrt(D)) and zero memory.
24 | 
25 | EDIT: PlasmaPower pointed out [3] that the chain almost certainly diverges again at x_{i+1} and x_{j+1},
26 | preventing rho search from cycling around the loop more than once.
27 | 
28 | So instead let's consider a chain of values
29 | 
30 | x_{i+1} = min( H0(x_i), H2(x_i) ) mod D
31 | 
32 | starting at some arbitrary x_0.
33 | 
34 | This chain is expected to have repeating values x_i = x_j, i < j, within O(sqrt(D)) steps,
35 | which this time can be found efficiently with rho search.
36 | 
37 | With some probability, x_i and x_j will result from different hash functions of x_{i-1} and x_{j-1} respectively, e.g.
38 | 
39 |  H0(x_{i-1}) = x_i = x_j = H2(x_{j-1}) mod D
40 | 
41 | yielding a solution (x,y) = (x_{i-1}, x_{j-1}) to the PoW.
42 | 
43 | Btw, another chain of values that should work is
44 | 
45 | x_{i+1} = if H3(x_i) < P * 2^64 then H0(x_i) else H2(x_i)
46 | 
47 | where H3 is another arbitrary (64-bit output) hash function, and P is the probability of taking an H0 step.
48 | 
49 | [1] https://github.com/nanocurrency/nano-pow
50 | [2] Parallel Collision Search with Cryptanalytic Applications: https://people.scs.carleton.ca/~paulv/papers/JoC97.pdf
51 | [3] https://www.reddit.com/r/nanocurrency/comments/dces6e/nanopow_the_details/f2aw6bx/
52 | 


--------------------------------------------------------------------------------
/doc/blog:
--------------------------------------------------------------------------------
 1 | http://cryptorials.io/beyond-hashcash-proof-work-theres-mining-hashing/
 2 | 
 3 | Beyond the Hashcash Proof-of-Work
 4 | (there's more to mining than hashing)
 5 | -------------------------------------
 6 | 
 7 | Many people equate Proof of Work (PoW) with one particular instance of it.
 8 | It's not hard to understand why. The Hashcash PoW is used not only in Bitcoin
 9 | but in the vast majority of altcoins as well.
10 | 
11 | In Hashcash, miners all compete to look for a so called `nonce' which,
12 | if provided as input (together with other parts of a block header) to a hash function, 
13 | yields an output that's numerically small enough to claim the next block reward.
14 | 
15 | Where most crypto currencies differ is in the choice of hash function; the Hashcash flavor as it were.
16 | Besides Bitcoin's `vanilla' flavor of SHA256, there is Litecoin's scrypt, Cryptonote's CryptoNight,
17 | Darkcoin's X11, and many more. Most alternative flavors have the explicitly stated goal of reducing the
18 | performance gap between custom and commodity hardware, either by use of memory, or by sheer complexity.
19 | 
20 | But miners are only part of the picture. Proofs of work must not only be found, but verified as well,
21 | by every single client, including smartphones and other devices with limited resources. In Hashcash,
22 | verification amounts to evaluating the hash function on the given nonce and comparing the output with
23 | the difficulty threshold. Which is exactly the same effort as a single proof attempt.
24 | 
25 | Thus, in order to keep verification cheap, hash functions in Hashcash must restrict their resource usage as well.
26 | That's why scrypt is configured to use only 128KB of memory.
27 | 
28 | Non-Hashcash PoWs do not suffer this limitation; they are asymmetric, with verification much cheaper
29 | than proof attempt. The first such PoW is Primecoin, which finds chains of nearly doubled prime numbers.
30 | The most recent example is my Cuckoo Cycle PoW, which was presented at the BITCOIN'2015
31 | workshop in January. The whitepaper can be found at https://github.com/tromp/cuckoo,
32 | which also hosts various implementations, as well as bounties for improving on them.
33 | 
34 | In Cuckoo Cycle, proofs take the form of a length 42 cycle (loop) in a large random graph defined by some nonce.
35 | Imagine two countries, each with a billion cities, and imagine picking a billion border crossing roads that
36 | connect a random city in one country to a random city in the other country (the PoW actually uses a cheaply
37 | computed hash function to map the nonce, road number, and country to a city).
38 | We are asked if there is cycle of 42 roads visiting 42 different cities.
39 | If someone hands you a nonce and 42 road numbers, it is indeed easy to verify, requiring negligible time and memory.
40 | 
41 | But finding such a cycle is no easy task. Note however, that a city that connects to one road only cannot be part
42 | of the solution, nor can that road. David Andersen pointed out that such dead-end roads can be repeatedly eliminated,
43 | using one bit of memory per road to remember if that road is useful,
44 | and two bits per city to count if there are zero, one, or multiple useful roads to that city.
45 | This process of computing counts for cities, and marking roads that lead to a city with count one as not useful,
46 | is the essence of Cuckoo Cycle mining and accounts for about 98% of the effort.
47 | It results in billions of random global memory accesses for reading and writing the counters.
48 | Consequently, about 2/3 of the runtime is memory latency, making this a low-power algorithm that keeps
49 | computers running cool.
50 | 
51 | After a sufficient number of counting and marking rounds, so few useful roads remain that another algorithm,
52 | inspired by Cuckoo Hashing, can quickly identify cycles (re-using the memory for the no longer needed counters).
53 | 
54 | Cuckoo Cycle has some downsides as well. First of all, proofs are large and will roughly triple the size of block headers.
55 | Secondly, it is very slow, taking for instance the better part of a minute on a high end CPU (or GPU, which offer roughly the same speed) to look for a cycle among a billion roads.
56 | 
57 | In order to give slower CPUs a (somewhat) fair chance to win, the block interval should be much longer than
58 | a single proof attempt, so the amount of memory Cuckoo Cycle can use is constrained by the choice of block interval length.
59 | 
60 | These seem like reasonable compromises for an instantly verifiable memory bound PoW that is unique in being dominated
61 | by latency rather than computation. In that sense, mining Cuckoo Cycle is a form of ASIC mining where DRAM chips serve
62 | the application of randomly reading and writing billions of bits. 
63 | 
64 | When even phones charging overnight can mine without orders of magnitude loss in efficiency, not with a mindset
65 | of profitability but of playing the lottery, the mining hardware landscape will see vast expansion, benefiting
66 | adoption as well as decentralization.
67 | 
68 | 
69 | 1 Comment
70 | 
71 | The article is no longer accurate regarding Cuckoo Cycle being latency bound.
72 | Someone going by the handle of “xenoncat” demonstrated a bandwidth bound method
73 | of edge trimming that’s 4x faster, although it uses about 12x more memory. On
74 | the plus side, this allows a GPU to process a billion node graph in under a
75 | second, which makes Cuckoo Cycle suitable for smaller block intervals. On the
76 | down side, this makes the use of phones look less attractive.
77 | 
78 | 


--------------------------------------------------------------------------------
/doc/cuckoo.bib:
--------------------------------------------------------------------------------
  1 | @inproceedings{Birmele2013,
  2 |  author = {Birmel{\'e}, Etienne and Ferreira, Rui and Grossi, Roberto and Marino, Andrea and Pisanti, Nadia and Rizzi, Romeo and Sacomoto, Gustavo},
  3 |  title = {Optimal Listing of Cycles and St-paths in Undirected Graphs},
  4 |  booktitle = {Proceedings of the Twenty-fourth Annual ACM-SIAM Symposium on Discrete Algorithms},
  5 |  series = {SODA '13},
  6 |  year = {2013},
  7 |  isbn = {978-1-611972-51-1},
  8 |  location = {New Orleans, Louisiana},
  9 |  pages = {1884--1896},
 10 |  numpages = {13},
 11 |  url = {http://dl.acm.org/citation.cfm?id=2627817.2627951},
 12 |  acmid = {2627951},
 13 |  publisher = {Society for Industrial and Applied Mathematics},
 14 |  address = {Philadelphia, PA, USA},
 15 | }
 16 | 
 17 | @article{1980-brent-cycles,
 18 |   author={Richard P. Brent},
 19 |   ISSN={0006--3835},
 20 |   journal={BIT},
 21 |   MR={82a:10017},
 22 |   pages={176--184},
 23 |   title={{An improved Monte Carlo factorization algorithm}},
 24 |   volume={20},
 25 |   year={1980}
 26 | }
 27 | 
 28 | @techreport{nakamoto2009bitcoin,
 29 |   author = {Nakamoto, Satoshi},
 30 |   keywords = {bitcoin cash electronic peer privacy},
 31 |   month = may,
 32 |   title = {Bitcoin: A Peer-to-Peer Electronic Cash System},
 33 |   url = {http://www.bitcoin.org/bitcoin.pdf},
 34 |   year = 2009
 35 | }
 36 | 
 37 | @article{Pagh04cuckoohashing,
 38 |  author = {Pagh, Rasmus and Rodler, Flemming Friche},
 39 |  title = {Cuckoo Hashing},
 40 |  journal = {J. Algorithms},
 41 |  issue_date = {May 2004},
 42 |  volume = {51},
 43 |  number = {2},
 44 |  month = may,
 45 |  year = {2004},
 46 |  issn = {0196-6774},
 47 |  pages = {122--144},
 48 |  numpages = {23},
 49 |  url = {http://dx.doi.org/10.1016/j.jalgor.2003.12.002},
 50 |  doi = {10.1016/j.jalgor.2003.12.002},
 51 |  acmid = {1006426},
 52 |  publisher = {Academic Press, Inc.},
 53 |  address = {Duluth, MN, USA},
 54 |  keywords = {data structures, dictionaries, experiments, hashing, information retrieval, searching},
 55 | } 
 56 | 
 57 | @misc{wikidsds2014,
 58 |   author = "Wikipedia",
 59 |   title = "Disjoint-set data structure --- Wikipedia{,} The Free Encyclopedia",
 60 |   year = "2014",
 61 |   url = "http://en.wikipedia.org/w/index.php?title=Disjoint-set_data_structure",
 62 |   note = "[Online; accessed 23-March-2014]"
 63 | }
 64 | 
 65 | @techreport{king2013,
 66 |   author = {King, Sunny},
 67 |   keywords = {primecoin cash electronic peer},
 68 |   month = jul,
 69 |   title = {Primecoin: Cryptocurrency with Prime Number Proof-of-Work},
 70 |   url = {http://primecoin.org/static/primecoin-paper.pdf},
 71 |   year = 2013
 72 | }
 73 | 
 74 | @misc{scrypt2009,
 75 |   author = {Percival, Colin},
 76 |   title = {Stronger Key Derivation via Sequential Memory-Hard Functions},
 77 |   month = may,
 78 |   year = 2009,
 79 |   url = {http://www.tarsnap.com/scrypt/scrypt.pdf},
 80 |   note = "presented at BSDCan 2009"
 81 | }
 82 | 
 83 | @techreport{larimer2013,
 84 |   author = {Larimer, Daniel},
 85 |   month = oct,
 86 |   title = {Momentum - A Memory-Hard Proof-of-Work via finding Birthday Collisions},
 87 |   url = {www.hashcash.org/papers/momentum.pdf},
 88 |   year = 2013
 89 | }
 90 | 
 91 | @misc{back2014,
 92 |   author = {Back, Adam},
 93 |   title = {Hashcash.org},
 94 |   month = feb,
 95 |   year = 2014,
 96 |   url = {http://www.hashcash.org/papers/}
 97 | }
 98 | 
 99 | @misc{preshing2013,
100 |   author = {Preshing, Jeff},
101 |   title = {The World's Simplest Lock-Free Hash Table},
102 |   month = jun,
103 |   year = 2013,
104 |   url = {http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table/}
105 | }
106 | 
107 | @misc{tenebrix2011,
108 |   author = {Lolcust},
109 |   title = {[ANNOUNCE] Tenebrix, a CPU-friendly, GPU-hostile cryptocurrency},
110 |   month = sep,
111 |   year = 2011,
112 |   url = {https://bitcointalk.org/index.php?topic=45667.0}
113 | }
114 | 
115 | @misc{litecoin2011,
116 |   author = {coblee},
117 |   title = {[ANN] Litecoin - a lite version of Bitcoin. Launched!},
118 |   month = oct,
119 |   year = 2011,
120 |   url = {https://bitcointalk.org/index.php?topic=47417.0}
121 | }
122 | 
123 | @misc{poelstra2014,
124 |   author = {Poelstra, Andrew},
125 |   title = {ASICs and Decentralization FAQ},
126 |   year = 2014,
127 |   url = {https://download.wpsoftware.net/bitcoin/asic-faq.pdf}
128 | }
129 | 
130 | @misc{dga2014,
131 |   author = {Andersen, David},
132 |   title = {A Public Review of Cuckoo Cycle},
133 |   month = apr,
134 |   year = 2014,
135 |   url = {http://da-data.blogspot.com/2014/03/a-public-review-of-cuckoo-cycle.html}
136 | }
137 | 
138 | @article{parallel99,
139 |  author = {van Oorschot, Paul C. and Wiener, Michael J.},
140 |  title = {Parallel collision search with cryptanalytic applications},
141 |  journal = {J. Cryptology},
142 |  issue_date = {Jan 1999},
143 |  volume = {12},
144 |  number = {1},
145 |  month = jan,
146 |  year = {1999},
147 |  pages = {1--28},
148 |  numpages = {28},
149 | } 
150 | 
151 | @misc{equihash16,
152 |     author = {Alex Biryukov and Dmitry Khovratovich},
153 |     title = {Equihash: Asymmetric Proof-of-Work Based on the Generalized Birthday Problem},
154 |     howpublished = {Cryptology ePrint Archive, Report 2015/946},
155 |     year = {2015},
156 |     note = {\url{https://eprint.iacr.org/2015/946}},
157 | }
158 | 
159 | @techreport{back2002,
160 |   author = {Back, Adam},
161 |   month = aug,
162 |   year = 2002,
163 |   title = {Hashcash - A Denial of Service Counter-Measure},
164 |   note = {(implementation released in mar 1997)}
165 | }
166 | 


--------------------------------------------------------------------------------
/doc/cuckoo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/doc/cuckoo.pdf


--------------------------------------------------------------------------------
/doc/leancore:
--------------------------------------------------------------------------------
 1 | struct bitmap {
 2 |   u32 *bits;
 3 |   __device__ void reset() {
 4 |     memset(bits, 0, BITMAP_WORDS * sizeof(u32));
 5 |   }
 6 |   __device__ void set(edge_t n) {
 7 |     bits[n/32] |= 1 << (n%32);
 8 |   }
 9 |   __device__ bool test(node_t n) const {
10 |     return (bits[n/32] >> (n%32)) & 1;
11 |   }
12 |   __device__ u32 block(node_t n) const {
13 |     return bits[n/32];
14 |   }
15 | };
16 | 
17 | __global__ void count_node_deg(cuckoo_ctx *ctx, u32 uorv, u32 part) {
18 |   int id = blockIdx.x * blockDim.x + threadIdx.x;
19 |   for (edge_t block = id*32; block < NEDGES; block += ctx->nthreads*32) {
20 |     u32 alive32 = ctx->alive.block(block);
21 |     for (edge_t nonce = block-1; alive32; ) {
22 |       u32 ffs = __ffs(alive32);
23 |       nonce += ffs; alive32 >>= ffs;
24 |       node_t u = dipnode(ctx->sip_keys, nonce, uorv);
25 |       if ((u & PART_MASK) == part)
26 |         ctx->nonleaf.set(u >> PART_BITS);
27 |     }
28 |   }
29 | }
30 | 
31 | __global__ void kill_leaf_edges(cuckoo_ctx *ctx, u32 uorv, u32 part) {
32 |   int id = blockIdx.x * blockDim.x + threadIdx.x;
33 |   for (edge_t block = id*32; block < NEDGES; block += ctx->nthreads*32) {
34 |     u32 alive32 = ctx->alive.block(block);
35 |     for (edge_t nonce = block-1; alive32; ) {
36 |       u32 ffs = __ffs(alive32);
37 |       nonce += ffs; alive32 >>= ffs;
38 |       node_t u = dipnode(ctx->sip_keys, nonce, uorv) ^ 1;
39 |       if ((u & PART_MASK) == part && !ctx->nonleaf.test(u >> PART_BITS))
40 |         ctx->alive.reset(nonce);
41 |     }
42 |   }
43 | }
44 | 
45 |     for (u32 round=0; round < trims; round++) {
46 |       for (u32 uorv = 0; uorv < 2; uorv++) {
47 |         for (u32 part = 0; part <= PART_MASK; part++) {
48 |           checkCudaErrors(cudaMemset(ctx.nonleaf.bits, 0, nodeBytes));
49 |           count_node_deg<<<nthreads/tpb,tpb >>>(device_ctx, uorv, part);
50 |           kill_leaf_edges<<<nthreads/tpb,tpb >>>(device_ctx, uorv, part);
51 |         }
52 |       }
53 |     }
54 | 


--------------------------------------------------------------------------------
/doc/mathspec:
--------------------------------------------------------------------------------
 1 | Let siphash24 be the standard siphash-2-4 function [0]
 2 | with a 256 bit key K=<k0,k1,k2,k3> instead of the usual 128-bit one,
 3 | and a modified Initialization phase that sets v_i to k_i for 0 <= i < 4.
 4 | 
 5 | Set N = 2^32
 6 | Define a bipartite graph [1] G_K=(V,E) with N edges on N + N nodes as follows:
 7 |   for 0 <= i < N, E_i = (V_i_0, V_i_1) = (siphash24(K,2*i) % N, siphash24(K,2*i+1) % N)
 8 | 
 9 | From G_K we obtain the graph G'_K by identifying nodes that differ only in the last bit:
10 |   for 0 <= i < N, E'_i = (V_i_0 >> 1, V_i_1 >> 1)
11 | 
12 | A Cuckatoo32 solution for key K is a 42-cycle [2] in G'_K that is a matching [3] in G_K.
13 | In other words, it's a cycle on node-pairs with edges incident on both nodes in a pair.
14 | 
15 | For verification purposes, the solution is given as the sequence of 42 edge indices in increasing order.
16 | 
17 | [0] https://cr.yp.to/siphash/siphash-20120918.pdf
18 | [1] https://en.wikipedia.org/wiki/Bipartite_graph
19 | [2] https://en.wikipedia.org/wiki/Cycle_(graph_theory)
20 | [3] https://en.wikipedia.org/wiki/Matching_(graph_theory)
21 | 


--------------------------------------------------------------------------------
/doc/pbkdf:
--------------------------------------------------------------------------------
 1 | A Cuckoo inspired Password Based Key Derivation Function
 2 | 
 3 | 
 4 | Fix memory parameter N=2^n. Fix path length d.  Given password p.
 5 | Let siphash key = BLAKE2(p).
 6 | Define bipartitie graph G=(U,V) on N+N nodes with N edges, where for 0<=i<N,
 7 | edge i has U endpoint siphash(k,i0) % N and V endpoint siphash(k,i1) % N.
 8 | Define w as size N bitvector of whether edge i
 9 | is adjacent to a U node with leaf distance > d.
10 | 
11 | Let PBKDF_{N,d}(p) = BLAKE2(w)
12 | 
13 | Efficiently computable in 2N bits of memory and O(N) time.
14 | Hard to compute in less than N bits of memory.
15 | 


--------------------------------------------------------------------------------
/doc/simplesolve:
--------------------------------------------------------------------------------
 1 | void count_node_deg(u32 uorv) {
 2 |   for (edge_t nonce = 0; nonce < NEDGES; nonce++) {
 3 |     if (alive(nonce)) {
 4 |       nonleaf.set(sipnode(sip_keys, nonce, uorv));
 5 |     }
 6 |   }
 7 | }
 8 | 
 9 | void kill_leaf_edges(u32 uorv) {
10 |   for (edge_t nonce = 0; nonce < NEDGES; nonce++) {
11 |     if (alive(nonce)) {
12 |       if (!nonleaf.test(sipnode(sip_keys, nonce, uorv) ^ 1))
13 |         alive.reset(nonce);
14 |     }
15 |   }
16 | }
17 | 
18 |   for (u32 round=0; round < trims; round++) {
19 |     for (u32 uorv = 0; uorv < 2; uorv++) {
20 |       memset(nonleaf.bits, 0, nodeBytes));
21 |       count_node_deg(uorv);
22 |       kill_leaf_edges(uorv);
23 |     }
24 |   }
25 | 


--------------------------------------------------------------------------------
/doc/spec:
--------------------------------------------------------------------------------
 1 | #define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
 2 | #define SIPROUND \
 3 |   do { \
 4 |     v0 += v1; v2 += v3; v1 = ROTL(v1,13); \
 5 |     v3 = ROTL(v3,16); v1 ^= v0; v3 ^= v2; \         _\(        == __\
 6 |     v0 = ROTL(v0,32); v2 += v1; v0 += v3; \         / '>    __.-"\---|__
 7 |     v1 = ROTL(v1,17);   v3 = ROTL(v3,21); \        (  (\   /  \\_@\-'/  \
 8 |     v1 ^= v2; v3 ^= v0; v2 = ROTL(v2,32); \        ""-'\   \__/      \__/
 9 |   } while(0)
10 |  
11 | u64 siphash24(const siphash_keys *keys, const u64 nonce) {
12 |   u64 v0 = keys->k0, v1 = keys->k1, v2 = keys->k2, v3 = keys->k3 ^ nonce;
13 |   SIPROUND; SIPROUND;
14 |   v0 ^= nonce; v2 ^= 0xff;
15 |   SIPROUND; SIPROUND; SIPROUND; SIPROUND;
16 |   return (v0 ^ v1) ^ (v2  ^ v3);
17 | }
18 | 
19 | int verify(edge_t edges[PROOFSIZE], siphash_keys *keys) {
20 |   node_t uvs[2*PROOFSIZE];
21 |   for (u32 n = 0; n < PROOFSIZE; n++) {
22 |     if (edges[n] > EDGEMASK)
23 |       return POW_TOO_BIG;
24 |     if (n && edges[n] <= edges[n-1])
25 |       return POW_TOO_SMALL;
26 |     uvs[2*n  ] = siphash24(keys, 2*edges[n]  ) & EDGEMASK;
27 |     uvs[2*n+1] = siphash24(keys, 2*edges[n]+1) & EDGEMASK;
28 |   }
29 |   u32 n = 0, i = 0, j;
30 |   do {
31 |     for (u32 k = j = i; (k = (k+2) % (2*PROOFSIZE)) != i; ) {
32 |       if (uvs[k] >> 1 == uvs[i] >> 1) {
33 |         if (j != i) return POW_BRANCH;
34 |         j = k;
35 |       }
36 |     }
37 |     if (j == i || uvs[j] == uvs[i]) return POW_DEAD_END;
38 |     i = j^1;
39 |     n++;                                               
40 |   } while (i != 0);                                    
41 |   return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE;    
42 | }                                                      
43 |                                                                  
44 | Ascii Art Credits
45 | AsH from http://ascii.co.uk/art/cockatoo
46 | jgs from https://web.archive.org/web/20091028022932/\
47 |           http://www.geocities.com/SoHo/7373/transp.htm#BIKE
48 | 


--------------------------------------------------------------------------------
/img/cuckatoo_cycle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/cuckatoo_cycle.jpg


--------------------------------------------------------------------------------
/img/cuckoo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/cuckoo.png


--------------------------------------------------------------------------------
/img/emissions.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/emissions.gif


--------------------------------------------------------------------------------
/img/grinlogo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/grinlogo.pdf


--------------------------------------------------------------------------------
/img/grinlogo.ps:
--------------------------------------------------------------------------------
 1 | %!PS
 2 | 42 42 scale 3 15 translate 2 setlinecap .1 setlinewidth .8 setgray
 3 | 1 1 6 { 0 moveto 0 2 rlineto } for
 4 | 1 0 moveto 6 1 1 270 90 arc 2 2 lineto
 5 | 0 1 moveto 7 1 lineto stroke 0 setgray
 6 | 2 2 moveto 1 1 1 90 270 arc 2 0 lineto 2 1 lineto 1 1 lineto
 7 | 3 0 moveto 3 2 lineto 4 2 lineto 4 0 moveto 4 1 lineto
 8 | 5 0 moveto 5 2 lineto 6 2 lineto 6 0 lineto
 9 | stroke showpage
10 | 


--------------------------------------------------------------------------------
/img/inflation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/inflation.gif


--------------------------------------------------------------------------------
/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tromp/cuckoo/60af1f78c408b5ef9333e295f8c2dc9055d59d50/img/logo.png


--------------------------------------------------------------------------------
/private/bfs:
--------------------------------------------------------------------------------
 1 | 4 	 20.7
 2 | 6 	 16.7
 3 | 8 	 29.8
 4 | 10 	 30.8
 5 | 12 	 20.0
 6 | 14 	 26.1
 7 | 16 	 38.6
 8 | 20 	 31.5
 9 | 24 	 27.1
10 | 28 	 33.6
11 | 32 	 45.8
12 | 40 	 61.3
13 | 48 	 51.2
14 | 56 	 31.9
15 | 64 	 59.1
16 | 


--------------------------------------------------------------------------------
/private/comp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use POSIX;
 3 | use strict;
 4 | 
 5 | sub ent1 {
 6 |   my ($p) = @_;
 7 |   return -$p*log($p)/log(2.0);
 8 | }
 9 | 
10 | sub ent {
11 |   my ($p) = @_;
12 |   return ent1($p) + ent1(1-$p);
13 | }
14 | 
15 | my $f = 0;
16 | my @comp;
17 | my @len;
18 | while (<>) {
19 |   next unless /8 part V0 load (\d+)/;
20 |   my $load = $1;
21 |   my $nc = 3200 * ++$f/100.0;
22 |   my $nunc = 3200-$nc;
23 |   my $p = ($load-$nunc)/$nc;
24 |   my $e = ent($p);
25 |   $comp[$f] = $e;
26 |   # printf("%2d %4d %3.4lf %3.4lf\n",$f,$load,$p,$e);
27 |   printf("(%2d,%0.4lf)%s",$f, $e, $f % 5 ? " " : "\n");
28 | }
29 | for my $mp (19..99) { # memory percentage
30 |   printf("\nmp = %2d ",$mp);
31 |   my $i;
32 |   my $np = $mp;   # nonce percentage
33 |   for ($i=0; $np < 100;  $i++) {
34 |     my $c = $comp[$mp];
35 |     $np += (1-$c)*$mp;
36 |     $np = int($np);
37 |   }
38 |   $len[$mp] = $i;
39 |   # printf("%2d %3d",$mp, $i);
40 | }
41 | for my $tot (25..100) { # memory percentage
42 |   my $best=999999;
43 |   my $bmp;
44 |   for my $mp (19..$tot-1) { # memory percentage
45 |     my $cost = $len[$mp] * 16*int(ceil(200/($tot-$mp)));
46 |     next unless $cost < $best;
47 |     $best = $cost;
48 |     $bmp = $mp;
49 |   }
50 |   printf("(%2d,%3d)%s",$tot, $best, $tot % 5 ? " " : "\n");
51 |   # printf("(%2d,%3d)%c",$tot, $bmp, $tot%5?" ":"\n");
52 | }
53 | print "\n";
54 | 


--------------------------------------------------------------------------------
/private/cyclefreq:
--------------------------------------------------------------------------------
  1 | 2 50083
  2 | 4 25166
  3 | 6 16803
  4 | 8 12391
  5 | 10 9939
  6 | 12 8363
  7 | 14 7139
  8 | 16 6273
  9 | 18 5488
 10 | 20 4928
 11 | 22 4507
 12 | 24 4182
 13 | 26 3818
 14 | 28 3595
 15 | 30 3511
 16 | 32 3161
 17 | 34 2889
 18 | 36 2866
 19 | 38 2552
 20 | 40 2506
 21 | 42 2434
 22 | 44 2247
 23 | 46 2238
 24 | 48 2159
 25 | 50 1935
 26 | 52 1894
 27 | 54 1828
 28 | 56 1811
 29 | 58 1757
 30 | 60 1637
 31 | 62 1586
 32 | 64 1484
 33 | 66 1492
 34 | 68 1513
 35 | 70 1396
 36 | 72 1358
 37 | 74 1390
 38 | 76 1348
 39 | 78 1278
 40 | 80 1232
 41 | 82 1195
 42 | 84 1191
 43 | 86 1090
 44 | 88 1199
 45 | 90 1056
 46 | 92 1139
 47 | 94 1069
 48 | 96 1061
 49 | 98 999
 50 | 100 1001
 51 | 102 961
 52 | 104 963
 53 | 106 964
 54 | 108 923
 55 | 110 920
 56 | 112 932
 57 | 114 845
 58 | 116 858
 59 | 118 838
 60 | 120 878
 61 | 122 846
 62 | 124 771
 63 | 126 865
 64 | 128 781
 65 | 130 781
 66 | 132 712
 67 | 134 722
 68 | 136 724
 69 | 138 713
 70 | 140 796
 71 | 142 726
 72 | 144 702
 73 | 146 641
 74 | 148 661
 75 | 150 642
 76 | 152 659
 77 | 154 628
 78 | 156 617
 79 | 158 623
 80 | 160 606
 81 | 162 583
 82 | 164 613
 83 | 166 558
 84 | 168 575
 85 | 170 601
 86 | 172 558
 87 | 174 554
 88 | 176 499
 89 | 178 567
 90 | 180 497
 91 | 182 522
 92 | 184 518
 93 | 186 484
 94 | 188 518
 95 | 190 494
 96 | 192 469
 97 | 194 441
 98 | 196 472
 99 | 198 476
100 | 200 473
101 | 202 453
102 | 204 440
103 | 206 448
104 | 208 478
105 | 210 445
106 | 212 444
107 | 214 429
108 | 216 444
109 | 218 436
110 | 220 412
111 | 222 424
112 | 224 398
113 | 226 413
114 | 228 405
115 | 230 412
116 | 232 419
117 | 234 401
118 | 236 403
119 | 238 405
120 | 240 377
121 | 242 369
122 | 244 387
123 | 246 351
124 | 248 322
125 | 250 379
126 | 252 341
127 | 254 379
128 | 256 340
129 | 258 346
130 | 260 359
131 | 262 362
132 | 264 344
133 | 266 343
134 | 268 327
135 | 270 367
136 | 272 327
137 | 274 319
138 | 276 341
139 | 278 351
140 | 280 323
141 | 282 325
142 | 284 308
143 | 286 306
144 | 288 326
145 | 290 298
146 | 292 339
147 | 294 295
148 | 296 302
149 | 298 304
150 | 300 324
151 | 302 310
152 | 304 303
153 | 306 277
154 | 308 282
155 | 310 283
156 | 312 276
157 | 314 251
158 | 316 270
159 | 318 234
160 | 320 260
161 | 322 248
162 | 324 276
163 | 326 259
164 | 328 260
165 | 330 269
166 | 332 246
167 | 334 240
168 | 336 236
169 | 338 252
170 | 340 229
171 | 342 253
172 | 344 223
173 | 346 229
174 | 348 244
175 | 350 227
176 | 352 266
177 | 354 228
178 | 356 227
179 | 358 212
180 | 360 221
181 | 362 262
182 | 364 217
183 | 366 204
184 | 368 214
185 | 370 206
186 | 372 194
187 | 374 195
188 | 376 197
189 | 378 192
190 | 380 202
191 | 382 206
192 | 384 220
193 | 386 212
194 | 388 214
195 | 390 214
196 | 392 184
197 | 394 212
198 | 396 203
199 | 398 190
200 | 400 215
201 | 402 153
202 | 404 185
203 | 406 194
204 | 408 184
205 | 410 186
206 | 412 157
207 | 414 177
208 | 416 202
209 | 418 179
210 | 420 179
211 | 422 176
212 | 424 177
213 | 426 172
214 | 428 174
215 | 430 162
216 | 432 154
217 | 434 149
218 | 436 149
219 | 438 138
220 | 440 145
221 | 442 199
222 | 444 160
223 | 446 145
224 | 448 168
225 | 450 161
226 | 452 120
227 | 454 139
228 | 456 157
229 | 458 151
230 | 460 132
231 | 462 188
232 | 464 153
233 | 466 155
234 | 468 155
235 | 470 153
236 | 472 150
237 | 474 146
238 | 476 139
239 | 478 145
240 | 480 150
241 | 482 137
242 | 484 118
243 | 486 133
244 | 488 142
245 | 490 151
246 | 492 128
247 | 494 140
248 | 496 123
249 | 498 110
250 | 500 105
251 | 502 136
252 | 504 132
253 | 506 126
254 | 508 124
255 | 510 120
256 | 512 144
257 | 514 122
258 | 516 113
259 | 518 123
260 | 520 120
261 | 522 110
262 | 524 109
263 | 526 115
264 | 528 138
265 | 530 117
266 | 532 106
267 | 534 130
268 | 536 129
269 | 538 113
270 | 540 134
271 | 542 123
272 | 544 103
273 | 546 103
274 | 548 116
275 | 550 91
276 | 552 102
277 | 554 91
278 | 556 84
279 | 558 114
280 | 560 98
281 | 562 92
282 | 564 98
283 | 566 84
284 | 568 95
285 | 570 72
286 | 572 93
287 | 574 97
288 | 576 93
289 | 578 82
290 | 580 86
291 | 582 91
292 | 584 69
293 | 586 101
294 | 588 63
295 | 590 103
296 | 592 96
297 | 594 102
298 | 596 85
299 | 598 79
300 | 600 84
301 | 602 92
302 | 604 77
303 | 606 71
304 | 608 73
305 | 610 86
306 | 612 68
307 | 614 77
308 | 616 73
309 | 618 77
310 | 620 72
311 | 622 76
312 | 624 74
313 | 626 68
314 | 628 72
315 | 630 74
316 | 632 67
317 | 634 67
318 | 636 78
319 | 638 68
320 | 640 69
321 | 642 76
322 | 644 59
323 | 646 69
324 | 648 66
325 | 650 65
326 | 652 55
327 | 654 62
328 | 656 66
329 | 658 63
330 | 660 48
331 | 662 67
332 | 664 52
333 | 666 62
334 | 668 57
335 | 670 53
336 | 672 73
337 | 674 48
338 | 676 43
339 | 678 57
340 | 680 62
341 | 682 59
342 | 684 59
343 | 686 58
344 | 688 47
345 | 690 57
346 | 692 51
347 | 694 65
348 | 696 58
349 | 698 56
350 | 700 63
351 | 702 60
352 | 704 52
353 | 706 63
354 | 708 48
355 | 710 62
356 | 712 53
357 | 714 41
358 | 716 37
359 | 718 47
360 | 720 44
361 | 722 44
362 | 724 47
363 | 726 53
364 | 728 46
365 | 730 44
366 | 732 47
367 | 734 43
368 | 736 48
369 | 738 37
370 | 740 37
371 | 742 50
372 | 744 46
373 | 746 34
374 | 748 27
375 | 750 46
376 | 752 41
377 | 754 53
378 | 756 36
379 | 758 46
380 | 760 36
381 | 762 46
382 | 764 39
383 | 766 33
384 | 768 36
385 | 770 35
386 | 772 29
387 | 774 42
388 | 776 40
389 | 778 27
390 | 780 28
391 | 782 36
392 | 784 40
393 | 786 33
394 | 788 34
395 | 790 24
396 | 792 31
397 | 794 31
398 | 796 34
399 | 798 30
400 | 800 24
401 | 802 28
402 | 804 35
403 | 806 24
404 | 808 29
405 | 810 46
406 | 812 32
407 | 814 30
408 | 816 34
409 | 818 32
410 | 820 36
411 | 822 36
412 | 824 26
413 | 826 18
414 | 828 36
415 | 830 21
416 | 832 26
417 | 834 35
418 | 836 27
419 | 838 28
420 | 840 19
421 | 842 29
422 | 844 29
423 | 846 31
424 | 848 28
425 | 850 28
426 | 852 29
427 | 854 25
428 | 856 30
429 | 858 30
430 | 860 23
431 | 862 25
432 | 864 21
433 | 866 20
434 | 868 25
435 | 870 22
436 | 872 20
437 | 874 29
438 | 876 20
439 | 878 23
440 | 880 24
441 | 882 12
442 | 884 25
443 | 886 28
444 | 888 23
445 | 890 14
446 | 892 14
447 | 894 15
448 | 896 27
449 | 898 26
450 | 900 15
451 | 902 19
452 | 904 22
453 | 906 25
454 | 908 21
455 | 910 21
456 | 912 18
457 | 914 18
458 | 916 18
459 | 918 21
460 | 920 26
461 | 922 14
462 | 924 23
463 | 926 18
464 | 928 29
465 | 930 18
466 | 932 14
467 | 934 17
468 | 936 15
469 | 938 23
470 | 940 18
471 | 942 24
472 | 944 14
473 | 946 16
474 | 948 12
475 | 950 12
476 | 952 14
477 | 954 7
478 | 956 20
479 | 958 16
480 | 960 13
481 | 962 18
482 | 964 18
483 | 966 18
484 | 968 16
485 | 970 13
486 | 972 11
487 | 974 15
488 | 976 25
489 | 978 12
490 | 980 15
491 | 982 23
492 | 984 17
493 | 986 4
494 | 988 12
495 | 990 11
496 | 992 11
497 | 994 19
498 | 996 11
499 | 998 13
500 | 


--------------------------------------------------------------------------------
/private/equihash:
--------------------------------------------------------------------------------
 1 | 			Cuckoo Cycle	Equihash
 2 | 
 3 | Problem Size		Huge		Medium
 4 | Solution Size		Medium		Medium to High
 5 | Progress Free		Y		Y
 6 | Amortization-free	High		Medium
 7 | TMTO steepness		linear (~20)	exponential
 8 | Time+Memory Tunable	Smoothly	Roughly
 9 | Time*Memory Tunable			Roughly
10 | Optimization Free	High		Medium to High
11 | Bandwidth Limited			Y
12 | Latency Limited		Y
13 | Speed			Low		Medium
14 | Awesome Logo		Y		
15 | 
16 | 
17 | http://discussions.password-hashing.narkive.com/BQbAY4yu/phc-asymmetric-proof-of-work-based-on-the-generalized-birthday-problem
18 | 
19 | (individual messages in above thread also available at
20 | http://lists.openwall.net/phc-discussions/2015/09/30/11
21 | http://lists.openwall.net/phc-discussions/2015/09/30/12
22 | http://lists.openwall.net/phc-discussions/2015/09/30/13
23 | http://lists.openwall.net/phc-discussions/2015/09/30/14
24 | )
25 | 
26 | https://www.reddit.com/r/Bitcoin/comments/3n5nws/research_paper_asymmetric_proofofwork_based_on/
27 | 
28 | https://arxiv.org/abs/1606.03588
29 | Egalitarian computing
30 | Alex Biryukov, Dmitry Khovratovich
31 | (Submitted on 11 Jun 2016)
32 | 
33 | Finally, we mention schemes Momentum [21] and Cuckoo
34 | cycle [32], which provide fast verification due to their combi-
35 | natorial nature. They rely on the memory requirements for the
36 | collision search (Momentum) or graph cycle finding (Cuckoo).
37 | However, Momentum is vulnerable to a sublinear time-space
38 | tradeoff [11], whereas the first version of the Cuckoo scheme
39 | was recently broken in [6].
40 | 
41 | 
42 | https://github.com/zcash/zcash/issues/27
43 | "Select a mining proof-of-work algorithm. on Dec 11, 2014"
44 | 
45 | 


--------------------------------------------------------------------------------
/private/found.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/perl
 2 | use strict;
 3 | 
 4 | my $len;
 5 | 
 6 | my @foundat = ();
 7 | my $n = 0;
 8 | my $part;
 9 | my $nparts;
10 | my $found;
11 | my $time = 0;
12 | while (<>) {
13 |   if (/Looking for (\d+)-cycle/) {
14 |     $len = $1;
15 |     $n++;
16 |     die unless /\/(\d+) parts/;
17 |     $nparts = $1;
18 |     $found = $part = 0;
19 |     next;
20 |   }
21 |   if (/^(\S+)user / ) {
22 |     $time += $1;
23 |   } elsif (/^user\s+(\d+)m([\.\d]+)/ ) {
24 |     $time += 60*$1 + $2;
25 |   }
26 |   if (!$found && /(\d+)-cycle found/) {
27 |     my $l = $1;
28 |     if ($l == $len) {
29 |       $foundat[$part]++;
30 |       $found = 1;
31 |     }
32 |   }
33 |   if (/OVERLOAD/) {
34 |     print $_;
35 |     next;
36 |   }
37 |   if (/[uv]part (\d+)/) {
38 |     die unless $1 == $part;
39 |     $part++;
40 |   }
41 | }
42 | my $quartsum = 0;
43 | my $quartparts = 0;
44 | my $sum = 0;
45 | my $sumat = 0;
46 | for my $i (0..$#foundat) {
47 |   print "$i\t $foundat[$i]\n";
48 |   $sum += $foundat[$i];
49 |   if (!$quartparts && $sum >= $n/4) {
50 |     $quartparts = $i+1;
51 |     $quartsum = $sum;
52 |   }
53 |   $sumat += ($i+1) * $foundat[$i];
54 | }
55 | print "Total\t $sum/$n\n";
56 | printf("Avg parts\t %.1lf/%d\n", $sumat/$sum, $nparts);
57 | printf("Avg time\t %.1lf\n", $time/$n);
58 | printf("Quartile parts\t (%d/%d) at %d\n", $quartsum, $n, $quartparts);
59 | printf("Quartile time\t %.1lf\n", ($time/$n)*($quartparts/($sumat/$sum)));
60 | 


--------------------------------------------------------------------------------
/private/minbfs:
--------------------------------------------------------------------------------
 1 | 2 	 4.0
 2 | 4 	 9.2
 3 | 6 	 14.0
 4 | 8 	 20.9
 5 | 10 	 18.7
 6 | 12 	 36.2
 7 | 14 	 41.8
 8 | 16 	 41.0
 9 | 20 	 43.8
10 | 24 	 66.3
11 | 28 	 51.9
12 | 32 	 62.3
13 | 40 	 73.4
14 | 48 	 118.4
15 | 56 	 116.6
16 | 64 	 104.7
17 | 


--------------------------------------------------------------------------------
/private/real:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | my $i = 0;
 3 | while(<>) {
 4 |   next unless /^real\s+(.+)m(.+)s$/;
 5 |   ++$i;
 6 |   my $t = 60*$1+$2;
 7 |   push @t, $t;
 8 |   my $r = $t[0]/$t;
 9 |   printf("(%d,%.3f) ",$i,$r);
10 |   print "\n" unless $i % 5;
11 | }
12 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | SUBDIRS := cuckoo cuckatoo cuckaroo cuckarood
2 | 
3 | all: $(SUBDIRS)
4 | $(SUBDIRS):
5 | 	$(MAKE) -C $@
6 | 
7 | .PHONY: all $(SUBDIRS)
8 | 


--------------------------------------------------------------------------------
/src/crypto/blake2-impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |    BLAKE2 reference source code package - reference C implementations
  3 | 
  4 |    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
  5 |    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
  6 |    your option.  The terms of these licenses can be found at:
  7 | 
  8 |    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
  9 |    - OpenSSL license   : https://www.openssl.org/source/license.html
 10 |    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |    More information about the BLAKE2 hash function can be found at
 13 |    https://blake2.net.
 14 | */
 15 | #ifndef BLAKE2_IMPL_H
 16 | #define BLAKE2_IMPL_H
 17 | 
 18 | #include <stdint.h>
 19 | #include <string.h>
 20 | 
 21 | #if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
 22 |   #if   defined(_MSC_VER)
 23 |     #define BLAKE2_INLINE __inline
 24 |   #elif defined(__GNUC__)
 25 |     #define BLAKE2_INLINE __inline__
 26 |   #else
 27 |     #define BLAKE2_INLINE
 28 |   #endif
 29 | #else
 30 |   #define BLAKE2_INLINE inline
 31 | #endif
 32 | 
 33 | static BLAKE2_INLINE uint32_t load32( const void *src )
 34 | {
 35 | #if defined(NATIVE_LITTLE_ENDIAN)
 36 |   uint32_t w;
 37 |   memcpy(&w, src, sizeof w);
 38 |   return w;
 39 | #else
 40 |   const uint8_t *p = ( const uint8_t * )src;
 41 |   return (( uint32_t )( p[0] ) <<  0) |
 42 |          (( uint32_t )( p[1] ) <<  8) |
 43 |          (( uint32_t )( p[2] ) << 16) |
 44 |          (( uint32_t )( p[3] ) << 24) ;
 45 | #endif
 46 | }
 47 | 
 48 | static BLAKE2_INLINE uint64_t load64( const void *src )
 49 | {
 50 | #if defined(NATIVE_LITTLE_ENDIAN)
 51 |   uint64_t w;
 52 |   memcpy(&w, src, sizeof w);
 53 |   return w;
 54 | #else
 55 |   const uint8_t *p = ( const uint8_t * )src;
 56 |   return (( uint64_t )( p[0] ) <<  0) |
 57 |          (( uint64_t )( p[1] ) <<  8) |
 58 |          (( uint64_t )( p[2] ) << 16) |
 59 |          (( uint64_t )( p[3] ) << 24) |
 60 |          (( uint64_t )( p[4] ) << 32) |
 61 |          (( uint64_t )( p[5] ) << 40) |
 62 |          (( uint64_t )( p[6] ) << 48) |
 63 |          (( uint64_t )( p[7] ) << 56) ;
 64 | #endif
 65 | }
 66 | 
 67 | static BLAKE2_INLINE uint16_t load16( const void *src )
 68 | {
 69 | #if defined(NATIVE_LITTLE_ENDIAN)
 70 |   uint16_t w;
 71 |   memcpy(&w, src, sizeof w);
 72 |   return w;
 73 | #else
 74 |   const uint8_t *p = ( const uint8_t * )src;
 75 |   return (( uint16_t )( p[0] ) <<  0) |
 76 |          (( uint16_t )( p[1] ) <<  8) ;
 77 | #endif
 78 | }
 79 | 
 80 | static BLAKE2_INLINE void store16( void *dst, uint16_t w )
 81 | {
 82 | #if defined(NATIVE_LITTLE_ENDIAN)
 83 |   memcpy(dst, &w, sizeof w);
 84 | #else
 85 |   uint8_t *p = ( uint8_t * )dst;
 86 |   *p++ = ( uint8_t )w; w >>= 8;
 87 |   *p++ = ( uint8_t )w;
 88 | #endif
 89 | }
 90 | 
 91 | static BLAKE2_INLINE void store32( void *dst, uint32_t w )
 92 | {
 93 | #if defined(NATIVE_LITTLE_ENDIAN)
 94 |   memcpy(dst, &w, sizeof w);
 95 | #else
 96 |   uint8_t *p = ( uint8_t * )dst;
 97 |   p[0] = (uint8_t)(w >>  0);
 98 |   p[1] = (uint8_t)(w >>  8);
 99 |   p[2] = (uint8_t)(w >> 16);
100 |   p[3] = (uint8_t)(w >> 24);
101 | #endif
102 | }
103 | 
104 | static BLAKE2_INLINE void store64( void *dst, uint64_t w )
105 | {
106 | #if defined(NATIVE_LITTLE_ENDIAN)
107 |   memcpy(dst, &w, sizeof w);
108 | #else
109 |   uint8_t *p = ( uint8_t * )dst;
110 |   p[0] = (uint8_t)(w >>  0);
111 |   p[1] = (uint8_t)(w >>  8);
112 |   p[2] = (uint8_t)(w >> 16);
113 |   p[3] = (uint8_t)(w >> 24);
114 |   p[4] = (uint8_t)(w >> 32);
115 |   p[5] = (uint8_t)(w >> 40);
116 |   p[6] = (uint8_t)(w >> 48);
117 |   p[7] = (uint8_t)(w >> 56);
118 | #endif
119 | }
120 | 
121 | static BLAKE2_INLINE uint64_t load48( const void *src )
122 | {
123 |   const uint8_t *p = ( const uint8_t * )src;
124 |   return (( uint64_t )( p[0] ) <<  0) |
125 |          (( uint64_t )( p[1] ) <<  8) |
126 |          (( uint64_t )( p[2] ) << 16) |
127 |          (( uint64_t )( p[3] ) << 24) |
128 |          (( uint64_t )( p[4] ) << 32) |
129 |          (( uint64_t )( p[5] ) << 40) ;
130 | }
131 | 
132 | static BLAKE2_INLINE void store48( void *dst, uint64_t w )
133 | {
134 |   uint8_t *p = ( uint8_t * )dst;
135 |   p[0] = (uint8_t)(w >>  0);
136 |   p[1] = (uint8_t)(w >>  8);
137 |   p[2] = (uint8_t)(w >> 16);
138 |   p[3] = (uint8_t)(w >> 24);
139 |   p[4] = (uint8_t)(w >> 32);
140 |   p[5] = (uint8_t)(w >> 40);
141 | }
142 | 
143 | static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
144 | {
145 |   return ( w >> c ) | ( w << ( 32 - c ) );
146 | }
147 | 
148 | static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
149 | {
150 |   return ( w >> c ) | ( w << ( 64 - c ) );
151 | }
152 | 
153 | /* prevents compiler optimizing out memset() */
154 | static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
155 | {
156 |   static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
157 |   memset_v(v, 0, n);
158 | }
159 | 
160 | #endif
161 | 


--------------------------------------------------------------------------------
/src/crypto/portable_endian.h:
--------------------------------------------------------------------------------
  1 | // "License": Public Domain
  2 | // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
  3 | // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
  4 | // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
  5 | // an example on how to get the endian conversion functions on different platforms.
  6 | 
  7 | #ifndef PORTABLE_ENDIAN_H__
  8 | #define PORTABLE_ENDIAN_H__
  9 | 
 10 | #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
 11 | 
 12 | #	define __WINDOWS__
 13 | 
 14 | #endif
 15 | 
 16 | #if defined(__linux__) || defined(__CYGWIN__)
 17 | 
 18 | #	include <endian.h>
 19 | 
 20 | #elif defined(__APPLE__)
 21 | 
 22 | #	include <libkern/OSByteOrder.h>
 23 | 
 24 | #	define htobe16(x) OSSwapHostToBigInt16(x)
 25 | #	define htole16(x) OSSwapHostToLittleInt16(x)
 26 | #	define be16toh(x) OSSwapBigToHostInt16(x)
 27 | #	define le16toh(x) OSSwapLittleToHostInt16(x)
 28 |  
 29 | #	define htobe32(x) OSSwapHostToBigInt32(x)
 30 | #	define htole32(x) OSSwapHostToLittleInt32(x)
 31 | #	define be32toh(x) OSSwapBigToHostInt32(x)
 32 | #	define le32toh(x) OSSwapLittleToHostInt32(x)
 33 |  
 34 | #	define htobe64(x) OSSwapHostToBigInt64(x)
 35 | #	define htole64(x) OSSwapHostToLittleInt64(x)
 36 | #	define be64toh(x) OSSwapBigToHostInt64(x)
 37 | #	define le64toh(x) OSSwapLittleToHostInt64(x)
 38 | 
 39 | #	define __BYTE_ORDER    BYTE_ORDER
 40 | #	define __BIG_ENDIAN    BIG_ENDIAN
 41 | #	define __LITTLE_ENDIAN LITTLE_ENDIAN
 42 | #	define __PDP_ENDIAN    PDP_ENDIAN
 43 | 
 44 | #elif defined(__OpenBSD__)
 45 | 
 46 | #	include <sys/endian.h>
 47 | 
 48 | #elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
 49 | 
 50 | #	include <sys/endian.h>
 51 | 
 52 | #	define be16toh(x) betoh16(x)
 53 | #	define le16toh(x) letoh16(x)
 54 | 
 55 | #	define be32toh(x) betoh32(x)
 56 | #	define le32toh(x) letoh32(x)
 57 | 
 58 | #	define be64toh(x) betoh64(x)
 59 | #	define le64toh(x) letoh64(x)
 60 | 
 61 | #elif defined(__WINDOWS__)
 62 | 
 63 | #	include <winsock2.h>
 64 | #	include <sys/param.h>
 65 | 
 66 | #	if BYTE_ORDER == LITTLE_ENDIAN
 67 | 
 68 | #		define htobe16(x) htons(x)
 69 | #		define htole16(x) (x)
 70 | #		define be16toh(x) ntohs(x)
 71 | #		define le16toh(x) (x)
 72 |  
 73 | #		define htobe32(x) htonl(x)
 74 | #		define htole32(x) (x)
 75 | #		define be32toh(x) ntohl(x)
 76 | #		define le32toh(x) (x)
 77 |  
 78 | #		define htobe64(x) htonll(x)
 79 | #		define htole64(x) (x)
 80 | #		define be64toh(x) ntohll(x)
 81 | #		define le64toh(x) (x)
 82 | 
 83 | #	elif BYTE_ORDER == BIG_ENDIAN
 84 | 
 85 | 		/* that would be xbox 360 */
 86 | #		define htobe16(x) (x)
 87 | #		define htole16(x) __builtin_bswap16(x)
 88 | #		define be16toh(x) (x)
 89 | #		define le16toh(x) __builtin_bswap16(x)
 90 |  
 91 | #		define htobe32(x) (x)
 92 | #		define htole32(x) __builtin_bswap32(x)
 93 | #		define be32toh(x) (x)
 94 | #		define le32toh(x) __builtin_bswap32(x)
 95 |  
 96 | #		define htobe64(x) (x)
 97 | #		define htole64(x) __builtin_bswap64(x)
 98 | #		define be64toh(x) (x)
 99 | #		define le64toh(x) __builtin_bswap64(x)
100 | 
101 | #	else
102 | 
103 | #		error byte order not supported
104 | 
105 | #	endif
106 | 
107 | #	define __BYTE_ORDER    BYTE_ORDER
108 | #	define __BIG_ENDIAN    BIG_ENDIAN
109 | #	define __LITTLE_ENDIAN LITTLE_ENDIAN
110 | #	define __PDP_ENDIAN    PDP_ENDIAN
111 | 
112 | #else
113 | 
114 | #	error platform not supported
115 | 
116 | #endif
117 | 
118 | #endif
119 | 


--------------------------------------------------------------------------------
/src/crypto/siphash.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #if (__CUDA_ARCH__  >= 320) // make rotate-left use funnel shifter, 3% speed gain
 4 | typedef uint2 sip64;
 5 | 
 6 | static __device__ __forceinline__ sip64 operator^ (uint2 a, uint2 b) {
 7 |   return make_uint2(a.x ^ b.x, a.y ^ b.y);
 8 | }
 9 | static __device__ __forceinline__ void operator^= (uint2 &a, uint2 b) {
10 |   a.x ^= b.x, a.y ^= b.y;
11 | }
12 | static __device__ __forceinline__ void operator+= (uint2 &a, uint2 b) {
13 |   asm("{\n\tadd.cc.u32 %0,%2,%4;\n\taddc.u32 %1,%3,%5;\n\t}\n\t"
14 |     : "=r"(a.x), "=r"(a.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y));
15 | }
16 | 
17 | __inline__ __device__ sip64 rotl(const sip64 a, const int offset) {
18 |   sip64 result;
19 |   if (offset >= 32) {
20 |     asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
21 |     asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
22 |   } else {
23 |     asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
24 |     asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
25 |   }
26 |   return result;
27 | }
28 | __device__ __forceinline__ sip64 vectorize(const uint64_t x) {
29 |   uint2 result;
30 |   asm("mov.b64 {%0,%1},%2; \n\t" : "=r"(result.x), "=r"(result.y) : "l"(x));
31 |   return result;
32 | }
33 | __device__ __forceinline__ uint64_t devectorize(sip64 x) {
34 |   uint64_t result;
35 |   asm("mov.b64 %0,{%1,%2}; \n\t" : "=l"(result) : "r"(x.x), "r"(x.y));
36 |   return result;
37 | }
38 | 
39 | #else
40 | 
41 | typedef uint64_t sip64;
42 | 
43 | __inline__ __device__ sip64 rotl(const sip64 a, const int offset) {
44 |   return (a << offset) | (a >> (64 - offset));
45 | }
46 | __device__ __forceinline__ sip64 vectorize(const uint64_t x) {
47 |   return x;
48 | }
49 | __device__ __forceinline__ uint64_t devectorize(sip64 x) {
50 |   return x;
51 | }
52 | 
53 | #endif
54 | 
55 | template <int rotE = 21>
56 | class diphash_state {
57 | public:
58 |   sip64 v0;
59 |   sip64 v1;
60 |   sip64 v2;
61 |   sip64 v3;
62 | 
63 |   __device__ diphash_state(const siphash_keys &sk) {
64 |     v0 = vectorize(sk.k0); v1 = vectorize(sk.k1); v2 = vectorize(sk.k2); v3 = vectorize(sk.k3);
65 |   }
66 |   __device__ uint64_t xor_lanes() {
67 |     return devectorize((v0 ^ v1) ^ (v2  ^ v3));
68 |   }
69 |   __device__ void xor_with(const diphash_state &x) {
70 |     v0 ^= x.v0;
71 |     v1 ^= x.v1;
72 |     v2 ^= x.v2;
73 |     v3 ^= x.v3;
74 |   }
75 |   __device__ void dip_round() {
76 |     v0 += v1; v2 += v3; v1 = rotl(v1,13);
77 |     v3 = rotl(v3,16); v1 ^= v0; v3 ^= v2;
78 |     v0 = rotl(v0,32); v2 += v1; v0 += v3;
79 |     v1 = rotl(v1,17);   v3 = rotl(v3,rotE);
80 |     v1 ^= v2; v3 ^= v0; v2 = rotl(v2,32);
81 |   }
82 |   __device__ void hash24(const uint64_t nonce) {
83 |     v3 ^= vectorize(nonce);
84 |     dip_round(); dip_round();
85 |     v0 ^= vectorize(nonce);
86 |     v2 ^= vectorize(0xff);
87 |     dip_round(); dip_round(); dip_round(); dip_round();
88 |   }
89 | };
90 |  
91 | __device__ uint64_t dipnode(const siphash_keys &sip_keys, const uint64_t nonce, const int uorv) {
92 |   diphash_state<> v(sip_keys);
93 |   v.hash24((nonce << 1) | uorv);
94 |   return v.xor_lanes() & NODE1MASK;
95 | }
96 | 


--------------------------------------------------------------------------------
/src/crypto/siphash.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>    // for types uint32_t,uint64_t
 4 | #include "portable_endian.h"    // for htole32/64
 5 | 
 6 | // generalize siphash by using a quadruple of 64-bit keys,
 7 | class siphash_keys {
 8 | public:
 9 |   uint64_t k0;
10 |   uint64_t k1;
11 |   uint64_t k2;
12 |   uint64_t k3;
13 | 
14 |   void setkeys(const char *keybuf);
15 | 
16 |   uint64_t siphash24(const uint64_t nonce) const;
17 | };
18 | 
19 | template <int rotE = 21>
20 | class siphash_state {
21 | public:
22 |   uint64_t v0;
23 |   uint64_t v1;
24 |   uint64_t v2;
25 |   uint64_t v3;
26 | 
27 |   siphash_state(const siphash_keys &sk) {
28 |     v0 = sk.k0; v1 = sk.k1; v2 = sk.k2; v3 = sk.k3;
29 |   }
30 |   uint64_t xor_lanes() {
31 |     return (v0 ^ v1) ^ (v2  ^ v3);
32 |   }
33 |   void xor_with(const siphash_state &x) {
34 |     v0 ^= x.v0;
35 |     v1 ^= x.v1;
36 |     v2 ^= x.v2;
37 |     v3 ^= x.v3;
38 |   }
39 |   static uint64_t rotl(uint64_t x, uint64_t b) {
40 |     return (x << b) | (x >> (64 - b));
41 |   }
42 |   void sip_round() {
43 |     v0 += v1; v2 += v3; v1 = rotl(v1,13);
44 |     v3 = rotl(v3,16); v1 ^= v0; v3 ^= v2;
45 |     v0 = rotl(v0,32); v2 += v1; v0 += v3;
46 |     v1 = rotl(v1,17);   v3 = rotl(v3,rotE);
47 |     v1 ^= v2; v3 ^= v0; v2 = rotl(v2,32);
48 |   }
49 |   void hash24(const uint64_t nonce) {
50 |     v3 ^= nonce;
51 |     sip_round(); sip_round();
52 |     v0 ^= nonce;
53 |     v2 ^= 0xff;
54 |     sip_round(); sip_round(); sip_round(); sip_round();
55 |   }
56 | };
57 |  
58 | // set siphash keys from 32 byte char array
59 | void siphash_keys::setkeys(const char *keybuf) {
60 |   k0 = htole64(((uint64_t *)keybuf)[0]);
61 |   k1 = htole64(((uint64_t *)keybuf)[1]);
62 |   k2 = htole64(((uint64_t *)keybuf)[2]);
63 |   k3 = htole64(((uint64_t *)keybuf)[3]);
64 | }
65 | 
66 | uint64_t siphash_keys::siphash24(const uint64_t nonce) const {
67 |   siphash_state<> v(*this);
68 |   v.hash24(nonce);
69 |   return v.xor_lanes();
70 | }
71 | 


--------------------------------------------------------------------------------
/src/cuckaroo/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c
15 | NVCC ?= nvcc -std=c++11 
16 | 
17 | all : simpletest meantest
18 | 
19 | simpletest:     simple19
20 | 	./simple19 -n 71
21 | 
22 | meantest:	mean29x4
23 | 	./mean29x4 -n 671 -t 4 -s
24 | 
25 | simple19:	../crypto/siphash.hpp cuckaroo.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
26 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC)
27 | 
28 | simple29:	../crypto/siphash.hpp cuckaroo.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
29 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC)
30 | 
31 | mean19x1:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
32 | 	$(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
33 | 
34 | mean19x8:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
35 | 	$(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
36 | 
37 | mean29x4:	cuckaroo.hpp  bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
38 | 	$(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
39 | 
40 | mean29x8:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
41 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
42 | 
43 | mean29x8s:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
44 | 	$(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
45 | 
46 | mean29x1:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
47 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
48 | 
49 | mean30x1:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
50 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
51 | 
52 | mean30x8:	cuckaroo.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
53 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
54 | 
55 | cuda19:		../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile
56 | 	$(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
57 | 
58 | cuda29:		../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile
59 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
60 | 


--------------------------------------------------------------------------------
/src/cuckaroo/bitmap.hpp:
--------------------------------------------------------------------------------
 1 | template <typename word_t>
 2 | class bitmap {
 3 | public:
 4 |   word_t SIZE;
 5 |   word_t BITMAP_WORDS;
 6 | #ifdef ATOMIC
 7 |   typedef std::atomic<word_t> aword_t;
 8 | #else
 9 |   typedef word_t aword_t;
10 | #endif
11 |   aword_t *bits;
12 |   const u32 BITS_PER_WORD = sizeof(word_t) * 8;
13 | 
14 |   bitmap(word_t size) {
15 |     SIZE = size;
16 |     BITMAP_WORDS = SIZE / BITS_PER_WORD;
17 |     bits = new aword_t[BITMAP_WORDS];
18 |     assert(bits != 0);
19 |   }
20 |   ~bitmap() {
21 |     freebits();
22 |   }
23 |   void freebits() {
24 |     delete[] bits;
25 |     bits = 0;
26 |   }
27 |   void clear() {
28 |     assert(bits);
29 |     memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t));
30 |   }
31 |  void prefetch(u32 u) const {
32 | #ifdef PREFETCH
33 |     __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0);
34 | #endif
35 |   }
36 |   void set(u32 u) {
37 |     u32 idx = u / BITS_PER_WORD;
38 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
39 | #ifdef ATOMIC
40 |     std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed);
41 | #else
42 |     bits[idx] |= bit;
43 | #endif
44 |   }
45 |   void reset(u32 u) {
46 |     u32 idx = u / BITS_PER_WORD;
47 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
48 | #ifdef ATOMIC
49 |     std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed);
50 | #else
51 |     bits[idx] &= ~bit;
52 | #endif
53 |   }
54 |   bool test(u32 u) const {
55 |     u32 idx = u / BITS_PER_WORD;
56 |     u32 bit = u % BITS_PER_WORD;
57 | #ifdef ATOMIC
58 |     return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1;
59 | #else
60 |     return (bits[idx] >> bit) & 1;
61 | #endif
62 |   }
63 |   word_t block(u32 n) const {
64 |     u32 idx = n / BITS_PER_WORD;
65 |     return bits[idx];
66 |   }
67 | };
68 | 


--------------------------------------------------------------------------------
/src/cuckaroo/compress.hpp:
--------------------------------------------------------------------------------
 1 | #include <new>
 2 | 
 3 | // compressor for cuckaroo nodes where edgetrimming
 4 | // has left at most a fraction 2^-compressbits nodes in each partition
 5 | template <typename word_t>
 6 | class compressor {
 7 | public:
 8 |   u32 NODEBITS;
 9 |   u32 SHIFTBITS;
10 |   u32 SIZEBITS;
11 |   word_t SIZE;
12 |   word_t SIZE2;
13 |   word_t MASK;
14 |   word_t MASK2;
15 |   word_t nnodes;
16 |   const word_t NIL = ~(word_t)0;
17 |   word_t *nodes;
18 |   bool sharedmem;
19 | 
20 |   compressor(u32 nodebits, u32 compressbits, char *bytes) {
21 |     NODEBITS = nodebits;
22 |     SHIFTBITS = compressbits;
23 |     SIZEBITS = NODEBITS-compressbits;
24 |     SIZE = (word_t)1 << SIZEBITS;
25 |     SIZE2 = (word_t)2 << SIZEBITS;
26 |     nodes = new (bytes) word_t[SIZE2];
27 |     sharedmem = true;
28 |     MASK = SIZE-1;
29 |     MASK2 = SIZE2-1;
30 |   }
31 | 
32 |   compressor(u32 nodebits, u32 compressbits) {
33 |     NODEBITS = nodebits;
34 |     SHIFTBITS = compressbits;
35 |     SIZEBITS = NODEBITS-compressbits;
36 |     SIZE = (word_t)1 << SIZEBITS;
37 |     SIZE2 = (word_t)2 << SIZEBITS;
38 |     nodes = new word_t[SIZE2];
39 |     sharedmem = false;
40 |     MASK = SIZE-1;
41 |     MASK2 = SIZE2-1;
42 |   }
43 | 
44 |   ~compressor() {
45 |     if (!sharedmem)
46 |       delete[] nodes;
47 |   }
48 | 
49 |   uint64_t bytes() {
50 |     return sizeof(word_t[SIZE2]);
51 |   }
52 | 
53 |   void reset() {
54 |     memset(nodes, (char)NIL, sizeof(word_t[SIZE2]));
55 |     nnodes = 0;
56 |   }
57 | 
58 |   word_t compress(word_t u) {
59 |     word_t ui = u >> SHIFTBITS;
60 |     for (; ; ui = (ui+1) & MASK2) {
61 |       word_t cu = nodes[ui];
62 |       if (cu == NIL) {
63 |         if (nnodes >= SIZE) {
64 |           print_log("NODE OVERFLOW at %x\n", u);
65 |           return 0;
66 |         }
67 |         nodes[ui] = u << SIZEBITS | nnodes;
68 |         return nnodes++;
69 |       }
70 |       if ((cu & ~MASK) == u << SIZEBITS) {
71 |         return cu & MASK;
72 |       }
73 |     }
74 |   }
75 | };
76 | 


--------------------------------------------------------------------------------
/src/cuckaroo/cuckaroo.c:
--------------------------------------------------------------------------------
 1 | // Cuckoo Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckaroo.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 80
13 | 
14 | int main(int argc, char **argv) {
15 |   const char *header = "";
16 |   int nonce = 0;
17 |   int c;
18 |   while ((c = getopt (argc, argv, "h:n:")) != -1) {
19 |     switch (c) {
20 |       case 'h':
21 |         header = optarg;
22 |         break;
23 |       case 'n':
24 |         nonce = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   char headernonce[HEADERLEN];
29 |   u32 hdrlen = strlen(header);
30 |   memcpy(headernonce, header, hdrlen);
31 |   memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
32 |   ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce);
33 |   siphash_keys keys;
34 |   setheader(headernonce, sizeof(headernonce), &keys);
35 |   printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3);
36 |   printf("Verifying size %d proof for cuckaroo%d(\"%s\",%d)\n",
37 |                PROOFSIZE, EDGEBITS, header, nonce);
38 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
39 |     word_t nonces[PROOFSIZE];
40 |     for (int n = 0; n < PROOFSIZE; n++) {
41 |       uint64_t nonce;
42 |       int nscan = scanf(" %" SCNx64, &nonce);
43 |       assert(nscan == 1);
44 |       nonces[n] = nonce;
45 |     }
46 |     int pow_rc = verify(nonces, &keys);
47 |     if (pow_rc == POW_OK) {
48 |       printf("Verified with cyclehash ");
49 |       unsigned char cyclehash[32];
50 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
51 |       for (int i=0; i<32; i++)
52 |         printf("%02x", cyclehash[i]);
53 |       printf("\n");
54 |     } else {
55 |       printf("FAILED due to %s\n", errstr[pow_rc]);
56 |     }
57 |   }
58 |   return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/cuckaroo/cuckaroo.hpp:
--------------------------------------------------------------------------------
  1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include <stdint.h> // for types uint32_t,uint64_t
  5 | #include <string.h> // for functions strlen, memset
  6 | #include <stdarg.h>
  7 | #include <stdio.h> 
  8 | #include <chrono>
  9 | #include <ctime>
 10 | #include "../crypto/blake2.h"
 11 | #include "../crypto/siphash.hpp"
 12 | 
 13 | // save some keystrokes since i'm a lazy typer
 14 | typedef uint32_t u32;
 15 | typedef uint64_t u64;
 16 | 
 17 | #ifndef MAX_SOLS
 18 | #define MAX_SOLS 4
 19 | #endif
 20 | 
 21 | #ifndef EDGE_BLOCK_BITS
 22 | #define EDGE_BLOCK_BITS 6
 23 | #endif
 24 | #define EDGE_BLOCK_SIZE (1 << EDGE_BLOCK_BITS)
 25 | #define EDGE_BLOCK_MASK (EDGE_BLOCK_SIZE - 1)
 26 | 
 27 | // proof-of-work parameters
 28 | #ifndef EDGEBITS
 29 | // the main parameter is the number of bits in an edge index,
 30 | // i.e. the 2-log of the number of edges
 31 | #define EDGEBITS 29
 32 | #endif
 33 | #ifndef PROOFSIZE
 34 | // the next most important parameter is the (even) length
 35 | // of the cycle to be found. a minimum of 12 is recommended
 36 | #define PROOFSIZE 42
 37 | #endif
 38 | 
 39 | #if EDGEBITS > 30
 40 | typedef uint64_t word_t;
 41 | #elif EDGEBITS > 14
 42 | typedef u32 word_t;
 43 | #else // if EDGEBITS <= 14
 44 | typedef uint16_t word_t;
 45 | #endif
 46 | 
 47 | // number of edges
 48 | #define NEDGES ((word_t)1 << EDGEBITS)
 49 | // used to mask siphash output
 50 | #define EDGEMASK ((word_t)NEDGES - 1)
 51 | #define NODEMASK EDGEMASK
 52 | #define NODE1MASK NODEMASK
 53 | 
 54 | // Common Solver parameters, to return to caller
 55 | struct SolverParams {
 56 | 	u32 nthreads = 0;
 57 | 	u32 ntrims = 0;
 58 | 	bool showcycle;
 59 | 	bool allrounds;
 60 | 	bool mutate_nonce = 1;
 61 | 	bool cpuload = 1;
 62 | 
 63 | 	// Common cuda params
 64 | 	u32 device = 0;
 65 | 
 66 | 	// Cuda-lean specific params
 67 | 	u32 blocks = 0;
 68 | 	u32 tpb = 0;
 69 | 
 70 | 	// Cuda-mean specific params
 71 | 	u32 expand = 0;
 72 | 	u32 genablocks = 0;
 73 | 	u32 genatpb = 0;
 74 | 	u32 genbtpb = 0;
 75 | 	u32 trimtpb = 0;
 76 | 	u32 tailtpb = 0;
 77 | 	u32 recoverblocks = 0;
 78 | 	u32 recovertpb = 0;
 79 | };
 80 | 
 81 | // Solutions result structs to be instantiated by caller,
 82 | // and filled by solver if desired
 83 | struct Solution {
 84 |  u64 id = 0;
 85 |  u64 nonce = 0;
 86 |  u64 proof[PROOFSIZE];
 87 | };
 88 | 
 89 | struct SolverSolutions {
 90 |  u32 edge_bits = 0;
 91 |  u32 num_sols = 0;
 92 |  Solution sols[MAX_SOLS];
 93 | };
 94 | 
 95 | #define MAX_NAME_LEN 256
 96 | 
 97 | // last error reason, to be picked up by stats
 98 | // to be returned to caller
 99 | char LAST_ERROR_REASON[MAX_NAME_LEN];
100 | 
101 | // Solver statistics, to be instantiated by caller
102 | // and filled by solver if desired
103 | struct SolverStats {
104 | 	u32 device_id = 0;
105 | 	u32 edge_bits = 0;
106 | 	char plugin_name[MAX_NAME_LEN]; // will be filled in caller-side
107 | 	char device_name[MAX_NAME_LEN];
108 | 	bool has_errored = false;
109 | 	char error_reason[MAX_NAME_LEN];
110 | 	u32 iterations = 0;
111 | 	u64 last_start_time = 0;
112 | 	u64 last_end_time = 0;
113 | 	u64 last_solution_time = 0;
114 | };
115 | 
116 | enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_TOO_BIG, POW_TOO_SMALL, POW_NON_MATCHING, POW_BRANCH, POW_DEAD_END, POW_SHORT_CYCLE};
117 | const char *errstr[] = { "OK", "wrong header length", "edge too big", "edges not ascending", "endpoints don't match up", "branch in cycle", "cycle dead ends", "cycle too short"};
118 | 
119 | // fills buffer with EDGE_BLOCK_SIZE siphash outputs for block containing edge in cuckaroo graph
120 | // return siphash output for given edge
121 | u64 sipblock(siphash_keys &keys, const word_t edge, u64 *buf) {
122 |   siphash_state<> shs(keys);
123 |   word_t edge0 = edge & ~EDGE_BLOCK_MASK;
124 |   for (u32 i=0; i < EDGE_BLOCK_SIZE; i++) {
125 |     shs.hash24(edge0 + i);
126 |     buf[i] = shs.xor_lanes();
127 |   }
128 |   const u64 last = buf[EDGE_BLOCK_MASK];
129 |   for (u32 i=0; i < EDGE_BLOCK_MASK; i++)
130 |     buf[i] ^= last;
131 |   return buf[edge & EDGE_BLOCK_MASK];
132 | }
133 | 
134 | // verify that edges are ascending and form a cycle in header-generated graph
135 | int verify(word_t edges[PROOFSIZE], siphash_keys &keys) {
136 |   word_t xor0 = 0, xor1 = 0;
137 |   u64 sips[EDGE_BLOCK_SIZE];
138 |   word_t uvs[2*PROOFSIZE];
139 | 
140 |   for (u32 n = 0; n < PROOFSIZE; n++) {
141 |     if (edges[n] > EDGEMASK)
142 |       return POW_TOO_BIG;
143 |     if (n && edges[n] <= edges[n-1])
144 |       return POW_TOO_SMALL;
145 |     u64 edge = sipblock(keys, edges[n], sips);
146 |     xor0 ^= uvs[2*n  ] = edge & EDGEMASK;
147 |     xor1 ^= uvs[2*n+1] = (edge >> 32) & EDGEMASK;
148 |   }
149 |   if (xor0 | xor1)              // optional check for obviously bad proofs
150 |     return POW_NON_MATCHING;
151 |   u32 n = 0, i = 0, j;
152 |   do {                        // follow cycle
153 |     for (u32 k = j = i; (k = (k+2) % (2*PROOFSIZE)) != i; ) {
154 |       if (uvs[k] == uvs[i]) { // find other edge endpoint identical to one at i
155 |         if (j != i)           // already found one before
156 |           return POW_BRANCH;
157 |         j = k;
158 |       }
159 |     }
160 |     if (j == i) return POW_DEAD_END;  // no matching endpoint
161 |     i = j^1;
162 |     n++;
163 |   } while (i != 0);           // must cycle back to start or we would have found branch
164 |   return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE;
165 | }
166 | 
167 | // convenience function for extracting siphash keys from header
168 | void setheader(const char *header, const u32 headerlen, siphash_keys *keys) {
169 |   char hdrkey[32];
170 |   // SHA256((unsigned char *)header, headerlen, (unsigned char *)hdrkey);
171 |   blake2b((void *)hdrkey, sizeof(hdrkey), (const void *)header, headerlen, 0, 0);
172 |   keys->setkeys(hdrkey);
173 | }
174 | 
175 | u64 timestamp() {
176 | 	using namespace std::chrono;
177 | 	high_resolution_clock::time_point now = high_resolution_clock::now();
178 | 	auto dn = now.time_since_epoch();
179 | 	return dn.count();
180 | }
181 | 
182 | /////////////////////////////////////////////////////////////////
183 | // Declarations to make it easier for callers to link as required
184 | /////////////////////////////////////////////////////////////////
185 | 
186 | #ifndef C_CALL_CONVENTION
187 | #define C_CALL_CONVENTION 0
188 | #endif
189 | 
190 | // convention to prepend to called functions
191 | #if C_CALL_CONVENTION
192 | #define CALL_CONVENTION extern "C"
193 | #else
194 | #define CALL_CONVENTION
195 | #endif
196 | 
197 | // Ability to squash printf output at compile time, if desired
198 | #ifndef SQUASH_OUTPUT
199 | #define SQUASH_OUTPUT 0
200 | #endif
201 | 
202 | void print_log(const char *fmt, ...) {
203 | 	if (SQUASH_OUTPUT) return;
204 | 	va_list args;
205 | 	va_start(args, fmt);
206 | 	vprintf(fmt, args);
207 | 	va_end(args);
208 | }
209 | //////////////////////////////////////////////////////////////////
210 | // END caller QOL
211 | //////////////////////////////////////////////////////////////////
212 | 
213 | 


--------------------------------------------------------------------------------
/src/cuckaroo/graph.hpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include "bitmap.hpp"
  5 | #include "compress.hpp"
  6 | #include <new>
  7 | 
  8 | typedef word_t proof[PROOFSIZE];
  9 | 
 10 | // cuck(ar)oo graph with given limit on number of edges (and on single partition nodes)
 11 | template <typename word_t>
 12 | class graph {
 13 | public:
 14 |   // terminates adjacency lists
 15 |   static const word_t NIL = ~(word_t)0;
 16 | 
 17 |   struct link { // element of adjacency list
 18 |     word_t next;
 19 |     word_t to;
 20 |   };
 21 | 
 22 |   word_t MAXEDGES;
 23 |   word_t MAXNODES;
 24 |   word_t nlinks; // aka halfedges, twice number of edges
 25 |   word_t *adjlist; // index into links array
 26 |   link *links;
 27 |   bool sharedmem;
 28 |   compressor<word_t> *compressu;
 29 |   compressor<word_t> *compressv;
 30 |   bitmap<u32> visited;
 31 |   u32 MAXSOLS;
 32 |   proof *sols;
 33 |   u32 nsols;
 34 | 
 35 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols) : visited(2*maxnodes) {
 36 |     MAXEDGES = maxedges;
 37 |     MAXNODES = maxnodes;
 38 |     MAXSOLS = maxsols;
 39 |     adjlist = new word_t[2*MAXNODES]; // index into links array
 40 |     links   = new link[2*MAXEDGES];
 41 |     compressu = compressv = 0;
 42 |     sharedmem = false;
 43 |     sols    = new proof[MAXSOLS+1]; // extra one for current path
 44 |     visited.clear();
 45 |   }
 46 | 
 47 |   ~graph() {
 48 |     if (!sharedmem) {
 49 |       delete[] adjlist;
 50 |       delete[] links;
 51 |     }
 52 |     delete[] sols;
 53 |   }
 54 | 
 55 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(2*maxnodes) {
 56 |     MAXEDGES = maxedges;
 57 |     MAXNODES = maxnodes;
 58 |     MAXSOLS = maxsols;
 59 |     adjlist = new word_t[2*MAXNODES]; // index into links array
 60 |     links   = new link[2*MAXEDGES];
 61 |     compressu = new compressor<word_t>(EDGEBITS, compressbits);
 62 |     compressv = new compressor<word_t>(EDGEBITS, compressbits);
 63 |     sharedmem = false;
 64 |     sols    = new  proof[MAXSOLS];
 65 |     visited.clear();
 66 |   }
 67 | 
 68 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, char *bytes) : visited(2*maxnodes) {
 69 |     MAXEDGES = maxedges;
 70 |     MAXNODES = maxnodes;
 71 |     MAXSOLS = maxsols;
 72 |     adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array
 73 |     links   = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES];
 74 |     compressu = compressv = 0;
 75 |     sharedmem = true;
 76 |     sols    = new  proof[MAXSOLS];
 77 |     visited.clear();
 78 |   }
 79 | 
 80 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(2*maxnodes) {
 81 |     MAXEDGES = maxedges;
 82 |     MAXNODES = maxnodes;
 83 |     MAXSOLS = maxsols;
 84 |     adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array
 85 |     links   = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES];
 86 |     compressu = new compressor<word_t>(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES]));
 87 |     compressv = new compressor<word_t>(EDGEBITS, compressbits, bytes + compressu->bytes());
 88 |     sharedmem = true;
 89 |     sols    = new  proof[MAXSOLS];
 90 |     visited.clear();
 91 |   }
 92 | 
 93 |   // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits
 94 |   uint64_t bytes() {
 95 |     return sizeof(word_t[2*MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0);
 96 |   }
 97 | 
 98 |   void reset() {
 99 |     memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES]));
100 |     if (compressu) {
101 |       compressu->reset();
102 |       compressv->reset();
103 |     }
104 |     resetcounts();
105 |   }
106 | 
107 |   void resetcounts() {
108 |     nlinks = nsols = 0;
109 |     // visited has entries set only during cycles() call
110 |   }
111 | 
112 |   static int nonce_cmp(const void *a, const void *b) {
113 |     return *(word_t *)a - *(word_t *)b;
114 |   }
115 | 
116 |   void cycles_with_link(u32 len, word_t u, word_t dest) {
117 |     // printf("cycles_with_link(%d, %x, %x)\n", len, u, dest);
118 |     if (visited.test(u))
119 |       return;
120 |     if (u == dest) {
121 |       print_log("  %d-cycle found\n", len);
122 |       if (len == PROOFSIZE && nsols < MAXSOLS) {
123 |         qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp);
124 |         memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0]));
125 |       }
126 |       return;
127 |     }
128 |     if (len == PROOFSIZE)
129 |       return;
130 |     word_t au1 = adjlist[u];
131 |     if (au1 != NIL) {
132 |       visited.set(u);
133 |       for (; au1 != NIL; au1 = links[au1].next) {
134 |         sols[nsols][len] = au1/2;
135 |         cycles_with_link(len+1, links[au1 ^ 1].to, dest);
136 |       }
137 |       visited.reset(u);
138 |     }
139 |   }
140 | 
141 |   void add_edge(word_t u, word_t v) {
142 |     assert(u < MAXNODES);
143 |     assert(v < MAXNODES);
144 |     v += MAXNODES; // distinguish partitions
145 |     if (adjlist[u] != NIL && adjlist[v] != NIL) { // possibly part of a cycle
146 |       sols[nsols][0] = nlinks/2;
147 |       assert(!visited.test(u));
148 |       cycles_with_link(1, u, v);
149 |     }
150 |     word_t ulink = nlinks++;
151 |     word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit
152 |     assert(vlink != NIL);    // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1
153 |     links[ulink].next = adjlist[u];
154 |     links[vlink].next = adjlist[v];
155 |     links[adjlist[u] = ulink].to = u;
156 |     links[adjlist[v] = vlink].to = v;
157 |   }
158 | 
159 |   void add_compress_edge(word_t u, word_t v) {
160 |     add_edge(compressu->compress(u), compressv->compress(v));
161 |   }
162 | };
163 | 


--------------------------------------------------------------------------------
/src/cuckaroo/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckaroo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | #include <chrono>
  7 | 
  8 | #ifndef HEADERLEN
  9 | // arbitrary length of header hashed into siphash key
 10 | #define HEADERLEN 80
 11 | #endif
 12 | 
 13 | typedef solver_ctx SolverCtx;
 14 | 
 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 16 |                                char* header,
 17 |                                int header_length,
 18 |                                u32 nonce,
 19 |                                u32 range,
 20 |                                SolverSolutions *solutions,
 21 |                                SolverStats *stats
 22 |                                )
 23 | {
 24 |   u64 time0, time1;
 25 |   u32 timems;
 26 |   u32 sumnsols = 0;
 27 | 
 28 |   for (u32 r = 0; r < range; r++) {
 29 |     time0 = timestamp();
 30 |     ctx->setheadernonce(header, header_length, nonce + r);
 31 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3);
 32 |     u32 nsols = ctx->solve();
 33 |     time1 = timestamp();
 34 |     timems = (time1 - time0) / 1000000;
 35 |     print_log("Time: %d ms\n", timems);
 36 | 
 37 |     for (unsigned s = 0; s < nsols; s++) {
 38 |       print_log("Solution");
 39 |       word_t *prf = &ctx->sols[s * PROOFSIZE];
 40 |       for (u32 i = 0; i < PROOFSIZE; i++)
 41 |         print_log(" %jx", (uintmax_t)prf[i]);
 42 |       print_log("\n");
 43 |       if (solutions != NULL){
 44 |         solutions->edge_bits = EDGEBITS;
 45 |         solutions->num_sols++;
 46 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 47 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 48 |           solutions->sols[sumnsols+s].proof[i] = (u64) prf[i];
 49 |       }
 50 |       int pow_rc = verify(prf, ctx->trimmer.sip_keys);
 51 |       if (pow_rc == POW_OK) {
 52 |         print_log("Verified with cyclehash ");
 53 |         unsigned char cyclehash[32];
 54 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 55 |         for (int i=0; i<32; i++)
 56 |           print_log("%02x", cyclehash[i]);
 57 |         print_log("\n");
 58 |       } else {
 59 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 60 |       }
 61 |     }
 62 |     sumnsols += nsols;
 63 |     if (stats != NULL) {
 64 |         stats->device_id = 0;
 65 |         stats->edge_bits = EDGEBITS;
 66 |         strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 67 |         stats->last_start_time = time0;
 68 |         stats->last_end_time = time1;
 69 |         stats->last_solution_time = time1 - time0;
 70 |     }
 71 |   }
 72 |   print_log("%d total solutions\n", sumnsols);
 73 |   return sumnsols > 0;
 74 | }
 75 | 
 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 77 |   if (params->nthreads == 0) params->nthreads = 1;
 78 |   if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68;
 79 | 
 80 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 81 |                                  params->ntrims,
 82 |                                  params->allrounds,
 83 |                                  params->showcycle,
 84 |                                  params->mutate_nonce);
 85 |   return ctx;
 86 | }
 87 | 
 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 89 |   delete ctx;
 90 | }
 91 | 
 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
 93 |   ctx->abort();
 94 | }
 95 | 
 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
 97 | 	// not required in this solver
 98 | }
 99 | 
100 | int main(int argc, char **argv) {
101 |   u32 nthreads = 0;
102 |   u32 ntrims = 0;
103 |   u32 nonce = 0;
104 |   u32 range = 1;
105 | #ifdef SAVEEDGES
106 |   bool showcycle = 1;
107 | #else
108 |   bool showcycle = 0;
109 | #endif
110 |   char header[HEADERLEN];
111 |   u32 len;
112 |   bool allrounds = false;
113 |   int c;
114 | 
115 |   memset(header, 0, sizeof(header));
116 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
117 |     switch (c) {
118 |       case 'a':
119 |         allrounds = true;
120 |         break;
121 |       case 'h':
122 |         len = strlen(optarg);
123 |         assert(len <= sizeof(header));
124 |         memcpy(header, optarg, len);
125 |         break;
126 |       case 'x':
127 |         len = strlen(optarg)/2;
128 |         assert(len == sizeof(header));
129 |         for (u32 i=0; i<len; i++)
130 |           sscanf(optarg+2*i, "%2hhx", header+i);
131 |         break;
132 |       case 'n':
133 |         nonce = atoi(optarg);
134 |         break;
135 |       case 'r':
136 |         range = atoi(optarg);
137 |         break;
138 |       case 'm':
139 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
140 |         break;
141 |       case 's':
142 |         showcycle = true;
143 |         break;
144 |       case 't':
145 |         nthreads = atoi(optarg);
146 |         break;
147 |     }
148 |   }
149 | 
150 |   SolverParams params;
151 |   params.nthreads = nthreads;
152 |   params.ntrims = ntrims;
153 |   params.showcycle = showcycle;
154 |   params.allrounds = allrounds;
155 | 
156 |   SolverCtx* ctx = create_solver_ctx(&params);
157 | 
158 |   print_log("Looking for %d-cycle on cuckaroo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
159 |   if (range > 1)
160 |     print_log("-%d", nonce+range-1);
161 |   print_log(") with 50%% edges\n");
162 | 
163 |   u64 sbytes = ctx->sharedbytes();
164 |   u32 tbytes = ctx->threadbytes();
165 |   int sunit,tunit;
166 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
167 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
168 |   print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets);
169 |   print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets);
170 |   print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
171 | 
172 | 	run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL);
173 | 
174 | 	destroy_solver_ctx(ctx);
175 | }
176 | 


--------------------------------------------------------------------------------
/src/cuckaroo/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckaroo.hpp"
  5 | #include "graph.hpp"
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <assert.h>
  9 | #include <unistd.h>
 10 | #include <set>
 11 | 
 12 | #define NNODES (2*NEDGES)
 13 | #ifndef MAXSOLS
 14 | #define MAXSOLS 4
 15 | #endif
 16 | 
 17 | typedef unsigned char u8;
 18 | 
 19 | class cuckoo_ctx {
 20 | public:
 21 |   siphash_keys sip_keys;
 22 |   word_t easiness;
 23 |   graph<word_t> cg;
 24 | 
 25 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) : cg(NEDGES, NEDGES, MAXSOLS) {
 26 |     easiness = easy_ness;
 27 |   }
 28 | 
 29 |   ~cuckoo_ctx() { }
 30 | 
 31 |   u64 bytes() {
 32 |     return cg.bytes();
 33 |   }
 34 | 
 35 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 36 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 37 |     setheader(headernonce, len, &sip_keys);
 38 |     cg.reset();
 39 |   }
 40 | 
 41 |   void find_cycles() {
 42 |     u64 sips[EDGE_BLOCK_SIZE];
 43 |     for (word_t block = 0; block < easiness; block += EDGE_BLOCK_SIZE) {
 44 |       sipblock(sip_keys, block, sips);
 45 |       for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) {
 46 |         u64 edge = sips[i];
 47 |         word_t u = edge & EDGEMASK;
 48 |         word_t v = (edge >> 32) & EDGEMASK;
 49 |         cg.add_edge(u, v);
 50 | #ifdef SHOW
 51 |         word_t nonce = block + i;
 52 |         printf("%d add (%d,%d)\n", nonce,u,v+NEDGES);
 53 |         for (unsigned j=0; j<NNODES; j++) {
 54 |           printf("\t%d",j);
 55 |           for (int a=cg.adjlist[j]; a!=graph<word_t>::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to);
 56 |           if ((j+1)%NEDGES == 0)
 57 |           printf("\n");
 58 |         }
 59 | #endif
 60 |       }
 61 |     }
 62 |     for (u32 s=0; s < cg.nsols; s++) {
 63 |       printf("Solution");
 64 |       // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp);
 65 |       for (u32 j=0; j < PROOFSIZE; j++) {
 66 |         word_t nonce = cg.sols[s][j];
 67 |         // u64 edge = sipblock(sip_keys, nonce, sips);
 68 |         // printf(" (%x,%x)", edge & EDGEMASK, (edge >> 32) & EDGEMASK);
 69 |         printf(" %x", nonce);
 70 |       }
 71 |       printf("\n");
 72 |       int pow_rc = verify(cg.sols[s], sip_keys);
 73 |       if (pow_rc == POW_OK) {
 74 |         printf("Verified with cyclehash ");
 75 |         unsigned char cyclehash[32];
 76 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0);
 77 |         for (int i=0; i<32; i++)
 78 |           printf("%02x", cyclehash[i]);
 79 |         printf("\n");
 80 |       } else {
 81 |         printf("FAILED due to %s\n", errstr[pow_rc]);
 82 |       }
 83 | 
 84 |     }
 85 |   }
 86 | };
 87 | 
 88 | // arbitrary length of header hashed into siphash key
 89 | #define HEADERLEN 80
 90 | 
 91 | int main(int argc, char **argv) {
 92 |   char header[HEADERLEN];
 93 |   memset(header, 0, HEADERLEN);
 94 |   int c, easipct = 50;
 95 |   u32 nonce = 0;
 96 |   u32 range = 1;
 97 |   u64 time0, time1;
 98 |   u32 timems;
 99 | 
100 |   while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) {
101 |     switch (c) {
102 |       case 'e':
103 |         easipct = atoi(optarg);
104 |         break;
105 |       case 'h':
106 |         memcpy(header, optarg, strlen(optarg));
107 |         break;
108 |       case 'n':
109 |         nonce = atoi(optarg);
110 |         break;
111 |       case 'r':
112 |         range = atoi(optarg);
113 |         break;
114 |     }
115 |   }
116 |   assert(easipct >= 0 && easipct <= 100);
117 |   printf("Looking for %d-cycle on cuckaroo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
118 |   if (range > 1)
119 |     printf("-%d", nonce+range-1);
120 |   printf(") with %d%% edges, ", easipct);
121 |   word_t easiness = easipct * (uint64_t)NNODES / 100;
122 |   cuckoo_ctx ctx(header, sizeof(header), nonce, easiness);
123 |   u64 bytes = ctx.bytes();
124 |   int unit;
125 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
126 |   printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]);
127 | 
128 |   for (u32 r = 0; r < range; r++) {
129 |     time0 = timestamp();
130 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
131 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
132 |     ctx.find_cycles();
133 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
134 |     printf("Time: %d ms\n", timems);
135 |   }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/cuckarood/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c
15 | NVCC ?= nvcc -std=c++11 
16 | 
17 | all : simpletest meantest
18 | 
19 | simpletest:     simple19
20 | 	./simple19 -n 64
21 | 
22 | meantest:	mean29x4
23 | 	./mean29x4 -n 23 -t 4 -s
24 | 
25 | simple19:	../crypto/siphash.hpp cuckarood.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
26 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC)
27 | 
28 | simple29:	../crypto/siphash.hpp cuckarood.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
29 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC)
30 | 
31 | mean19x1:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
32 | 	$(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
33 | 
34 | mean19x4:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
35 | 	$(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
36 | 
37 | mean19x8:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
38 | 	$(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
39 | 
40 | mean29x4:	cuckarood.hpp  bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
41 | 	$(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
42 | 
43 | mean29x8:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
44 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
45 | 
46 | mean29x8s:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
47 | 	$(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
48 | 
49 | mean29x1:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
50 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
51 | 
52 | mean30x1:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
53 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
54 | 
55 | mean30x8:	cuckarood.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
56 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
57 | 
58 | cuda19:		../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile
59 | 	$(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
60 | 
61 | cuda29:		../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile
62 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
63 | 
64 | photon29:	../crypto/siphash.cuh compress.hpp graph.hpp kernel.cuh photon.cu Makefile
65 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 photon.cu $(BLAKE_2B_SRC)
66 | 


--------------------------------------------------------------------------------
/src/cuckarood/bitmap.hpp:
--------------------------------------------------------------------------------
 1 | template <typename word_t>
 2 | class bitmap {
 3 | public:
 4 |   word_t SIZE;
 5 |   word_t BITMAP_WORDS;
 6 | #ifdef ATOMIC
 7 |   typedef std::atomic<word_t> aword_t;
 8 | #else
 9 |   typedef word_t aword_t;
10 | #endif
11 |   aword_t *bits;
12 |   const u32 BITS_PER_WORD = sizeof(word_t) * 8;
13 | 
14 |   bitmap(word_t size) {
15 |     SIZE = size;
16 |     BITMAP_WORDS = SIZE / BITS_PER_WORD;
17 |     bits = new aword_t[BITMAP_WORDS];
18 |     assert(bits != 0);
19 |   }
20 |   ~bitmap() {
21 |     freebits();
22 |   }
23 |   void freebits() {
24 |     delete[] bits;
25 |     bits = 0;
26 |   }
27 |   void clear() {
28 |     assert(bits);
29 |     memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t));
30 |   }
31 |  void prefetch(u32 u) const {
32 | #ifdef PREFETCH
33 |     __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0);
34 | #endif
35 |   }
36 |   void set(u32 u) {
37 |     u32 idx = u / BITS_PER_WORD;
38 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
39 | #ifdef ATOMIC
40 |     std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed);
41 | #else
42 |     bits[idx] |= bit;
43 | #endif
44 |   }
45 |   void reset(u32 u) {
46 |     u32 idx = u / BITS_PER_WORD;
47 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
48 | #ifdef ATOMIC
49 |     std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed);
50 | #else
51 |     bits[idx] &= ~bit;
52 | #endif
53 |   }
54 |   bool test(u32 u) const {
55 |     u32 idx = u / BITS_PER_WORD;
56 |     u32 bit = u % BITS_PER_WORD;
57 | #ifdef ATOMIC
58 |     return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1;
59 | #else
60 |     return (bits[idx] >> bit) & 1;
61 | #endif
62 |   }
63 |   word_t block(u32 n) const {
64 |     u32 idx = n / BITS_PER_WORD;
65 |     return bits[idx];
66 |   }
67 | };
68 | 


--------------------------------------------------------------------------------
/src/cuckarood/compress.hpp:
--------------------------------------------------------------------------------
 1 | #include <new>
 2 | 
 3 | // compressor for cuckatoo nodes where edgetrimming
 4 | // has left at most 2^-compressbits nodes in each partition
 5 | template <typename word_t>
 6 | class compressor {
 7 | public:
 8 |   u32 NODEBITS;
 9 |   u32 COMPRESSBITS;
10 |   u32 SIZEBITS;
11 |   u32 SIZEBITS1;
12 |   word_t SIZE;
13 |   word_t MASK;
14 |   word_t MASK1;
15 |   word_t npairs;
16 |   const word_t NIL = ~(word_t)0;
17 |   word_t *nodes;
18 |   bool sharedmem;
19 | 
20 |   compressor(u32 nodebits, u32 compressbits, char *bytes) {
21 |     NODEBITS = nodebits;
22 |     COMPRESSBITS = compressbits;
23 |     SIZEBITS = NODEBITS-COMPRESSBITS;
24 |     SIZEBITS1 = SIZEBITS-1;
25 |     SIZE = (word_t)1 << SIZEBITS;
26 |     assert(SIZE);
27 |     MASK = SIZE-1;
28 |     MASK1 = MASK >> 1;
29 |     nodes = new (bytes) word_t[SIZE];
30 |     sharedmem = true;
31 |   }
32 | 
33 |   compressor(u32 nodebits, u32 compressbits) {
34 |     NODEBITS = nodebits;
35 |     COMPRESSBITS = compressbits;
36 |     SIZEBITS = NODEBITS-COMPRESSBITS;
37 |     SIZEBITS1 = SIZEBITS-1;
38 |     SIZE = (word_t)1 << SIZEBITS;
39 |     assert(SIZE);
40 |     MASK = SIZE-1;
41 |     MASK1 = MASK >> 1;
42 |     nodes = new word_t[SIZE];
43 |     sharedmem = false;
44 |   }
45 | 
46 |   ~compressor() {
47 |     if (!sharedmem)
48 |       delete[] nodes;
49 |   }
50 | 
51 |   uint64_t bytes() {
52 |     return sizeof(word_t[SIZE]);
53 |   }
54 | 
55 |   void reset() {
56 |     memset(nodes, (char)NIL, sizeof(word_t[SIZE]));
57 |     npairs = 0;
58 |   }
59 | 
60 |   word_t compress(word_t u) {
61 |     u32 parity = u & 1;
62 |     word_t ui = u >> COMPRESSBITS;
63 |     u >>= 1;
64 |     for (; ; ui = (ui+1) & MASK) {
65 |       word_t cu = nodes[ui];
66 |       if (cu == NIL) {
67 |         if (npairs >= SIZE/2) {
68 |           print_log("NODE OVERFLOW at %x\n", u << 1 | parity);
69 |           return parity;
70 |         }
71 |         nodes[ui] = u << SIZEBITS1 | npairs;
72 |         return (npairs++ << 1) | parity;
73 |       }
74 |       if ((cu & ~MASK1) == u << SIZEBITS1) {
75 |         return ((cu & MASK1) << 1) | parity;
76 |       }
77 |     }
78 |   }
79 | };
80 | 


--------------------------------------------------------------------------------
/src/cuckarood/cuckarood.c:
--------------------------------------------------------------------------------
 1 | // Cuckoo Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckarood.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 80
13 | 
14 | int main(int argc, char **argv) {
15 |   const char *header = "";
16 |   int nonce = 0;
17 |   int c;
18 |   while ((c = getopt (argc, argv, "h:n:")) != -1) {
19 |     switch (c) {
20 |       case 'h':
21 |         header = optarg;
22 |         break;
23 |       case 'n':
24 |         nonce = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   char headernonce[HEADERLEN];
29 |   u32 hdrlen = strlen(header);
30 |   memcpy(headernonce, header, hdrlen);
31 |   memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
32 |   ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce);
33 |   siphash_keys keys;
34 |   setheader(headernonce, sizeof(headernonce), &keys);
35 |   printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3);
36 |   printf("Verifying size %d proof for cuckarood%d(\"%s\",%d)\n",
37 |                PROOFSIZE, EDGEBITS, header, nonce);
38 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
39 |     word_t nonces[PROOFSIZE];
40 |     for (int n = 0; n < PROOFSIZE; n++) {
41 |       uint64_t nonce;
42 |       int nscan = scanf(" %" SCNx64, &nonce);
43 |       assert(nscan == 1);
44 |       nonces[n] = nonce;
45 |     }
46 |     int pow_rc = verify(nonces, &keys);
47 |     if (pow_rc == POW_OK) {
48 |       printf("Verified with cyclehash ");
49 |       unsigned char cyclehash[32];
50 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
51 |       for (int i=0; i<32; i++)
52 |         printf("%02x", cyclehash[i]);
53 |       printf("\n");
54 |     } else {
55 |       printf("FAILED due to %s\n", errstr[pow_rc]);
56 |     }
57 |   }
58 |   return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/cuckarood/graph.hpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include "bitmap.hpp"
  5 | #include "compress.hpp"
  6 | #include <new>
  7 | 
  8 | typedef word_t proof[PROOFSIZE];
  9 | 
 10 | // cuck(ar)oo graph with given limit on number of edges (and on single partition nodes)
 11 | template <typename word_t>
 12 | class graph {
 13 | public:
 14 |   // terminates adjacency lists
 15 |   const word_t NIL = ~(word_t)0;
 16 | 
 17 |   struct link { // element of adjacency list
 18 |     word_t next;
 19 |     word_t to;
 20 |   };
 21 | 
 22 |   word_t MAXEDGES;
 23 |   word_t MAXNODES;
 24 |   word_t nlinks; // aka halfedges, twice number of edges
 25 |   word_t *adjlist; // index into links array
 26 |   link *links;
 27 |   bool sharedmem;
 28 |   compressor<word_t> *compressu;
 29 |   compressor<word_t> *compressv;
 30 |   bitmap<u32> visited;
 31 |   u32 MAXSOLS;
 32 |   proof *sols;
 33 |   u32 nsols;
 34 | 
 35 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(2*maxnodes) {
 36 |     MAXEDGES = maxedges;
 37 |     MAXNODES = maxnodes;
 38 |     MAXSOLS = maxsols;
 39 |     adjlist = new word_t[2*MAXNODES]; // index into links array
 40 |     links   = new link[MAXEDGES];
 41 |     compressu = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 42 |     compressv = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 43 |     sharedmem = false;
 44 |     sols    = new proof[MAXSOLS+1]; // extra one for current path
 45 |     visited.clear();
 46 |   }
 47 | 
 48 |   ~graph() {
 49 |     if (!sharedmem) {
 50 |       delete[] adjlist;
 51 |       delete[] links;
 52 |     }
 53 |     delete[] sols;
 54 |   }
 55 | 
 56 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(2*maxnodes) {
 57 |     MAXEDGES = maxedges;
 58 |     MAXNODES = maxnodes;
 59 |     MAXSOLS = maxsols;
 60 |     adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array
 61 |     links   = new (bytes += sizeof(word_t[2*MAXNODES])) link[MAXEDGES];
 62 |     compressu = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes += sizeof(link[MAXEDGES])) : 0;
 63 |     compressv = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes + compressu->bytes()) : 0;
 64 |     sharedmem = true;
 65 |     sols    = new  proof[MAXSOLS+1];
 66 |     visited.clear();
 67 |   }
 68 | 
 69 |   // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits
 70 |   uint64_t bytes() {
 71 |     return sizeof(word_t[2*MAXNODES]) + sizeof(link[MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0);
 72 |   }
 73 | 
 74 |   void reset() {
 75 |     memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES]));
 76 |     if (compressu) {
 77 |       compressu->reset();
 78 |       compressv->reset();
 79 |     }
 80 |     resetcounts();
 81 |   }
 82 | 
 83 |   void resetcounts() {
 84 |     nlinks = nsols = 0;
 85 |     // visited has entries set only during cycles() call
 86 |   }
 87 | 
 88 |   static int nonce_cmp(const void *a, const void *b) {
 89 |     return *(word_t *)a - *(word_t *)b;
 90 |   }
 91 | 
 92 |   void cycles_with_link(u32 len, word_t u, word_t dest) {
 93 |     if (visited.test(u))
 94 |       return;
 95 |     if (u == dest) {
 96 |       print_log("  %d-cycle found\n", len);
 97 |       if (len == PROOFSIZE && nsols < MAXSOLS) {
 98 |         qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp);
 99 |         memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0]));
100 |       }
101 |       return;
102 |     }
103 |     if (len == PROOFSIZE)
104 |       return;
105 |     word_t au1 = adjlist[u];
106 |     if (au1 != NIL) {
107 |       visited.set(u);
108 |       for (; au1 != NIL; au1 = links[au1].next) {
109 |         sols[nsols][len] = au1;
110 |         cycles_with_link(len+1, links[au1].to, dest);
111 |       }
112 |       visited.reset(u);
113 |     }
114 |   }
115 | 
116 |   void add_edge(word_t u, word_t v, u32 dir) {
117 |     assert(u < MAXNODES);
118 |     assert(v < MAXNODES);
119 |     v += MAXNODES; // distinguish partitions
120 |     if (dir) {
121 |       u32 tmp = v;
122 |       v = u;
123 |       u = tmp;
124 |     }
125 |     if (adjlist[v] != NIL) { // possibly part of a cycle
126 |       sols[nsols][0] = nlinks;
127 |       assert(!visited.test(u));
128 |       cycles_with_link(1, v, u);
129 |     }
130 |     word_t ulink = nlinks++;
131 |     assert(ulink != NIL);    // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1
132 |     assert(ulink < MAXEDGES);
133 |     links[ulink].next = adjlist[u];
134 |     links[adjlist[u] = ulink].to = v;
135 |   }
136 | 
137 |   void add_compress_edge(word_t u, word_t v) {
138 |     assert( (u&1) == (v&1));
139 |     add_edge(compressu->compress(u) >> 1, compressv->compress(v) >> 1, u&1);
140 |   }
141 | };
142 | 


--------------------------------------------------------------------------------
/src/cuckarood/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckarood Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | #include <chrono>
  7 | 
  8 | #ifndef HEADERLEN
  9 | // arbitrary length of header hashed into siphash key
 10 | #define HEADERLEN 80
 11 | #endif
 12 | 
 13 | typedef solver_ctx SolverCtx;
 14 | 
 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 16 |                                char* header,
 17 |                                int header_length,
 18 |                                u32 nonce,
 19 |                                u32 range,
 20 |                                SolverSolutions *solutions,
 21 |                                SolverStats *stats
 22 |                                )
 23 | {
 24 |   u64 time0, time1;
 25 |   u32 timems;
 26 |   u32 sumnsols = 0;
 27 | 
 28 |   for (u32 r = 0; r < range; r++) {
 29 |     time0 = timestamp();
 30 |     ctx->setheadernonce(header, header_length, nonce + r);
 31 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3);
 32 |     u32 nsols = ctx->solve();
 33 |     time1 = timestamp();
 34 |     timems = (time1 - time0) / 1000000;
 35 |     print_log("Time: %d ms\n", timems);
 36 | 
 37 |     for (unsigned s = 0; s < nsols; s++) {
 38 |       print_log("Solution");
 39 |       word_t *prf = &ctx->sols[s * PROOFSIZE];
 40 |       for (u32 i = 0; i < PROOFSIZE; i++)
 41 |         print_log(" %jx", (uintmax_t)prf[i]);
 42 |       print_log("\n");
 43 |       if (solutions != NULL){
 44 |         solutions->edge_bits = EDGEBITS;
 45 |         solutions->num_sols++;
 46 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 47 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 48 |           solutions->sols[sumnsols+s].proof[i] = (u64) prf[i];
 49 |       }
 50 |       int pow_rc = verify(prf, ctx->trimmer.sip_keys);
 51 |       if (pow_rc == POW_OK) {
 52 |         print_log("Verified with cyclehash ");
 53 |         unsigned char cyclehash[32];
 54 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 55 |         for (int i=0; i<32; i++)
 56 |           print_log("%02x", cyclehash[i]);
 57 |         print_log("\n");
 58 |       } else {
 59 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 60 |       }
 61 |     }
 62 |     sumnsols += nsols;
 63 |     if (stats != NULL) {
 64 |         stats->device_id = 0;
 65 |         stats->edge_bits = EDGEBITS;
 66 |         strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 67 |         stats->last_start_time = time0;
 68 |         stats->last_end_time = time1;
 69 |         stats->last_solution_time = time1 - time0;
 70 |     }
 71 |   }
 72 |   print_log("%d total solutions\n", sumnsols);
 73 |   return sumnsols > 0;
 74 | }
 75 | 
 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 77 |   if (params->nthreads == 0) params->nthreads = 1;
 78 |   if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68;
 79 | 
 80 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 81 |                                  params->ntrims,
 82 |                                  params->allrounds,
 83 |                                  params->showcycle,
 84 |                                  params->mutate_nonce);
 85 |   return ctx;
 86 | }
 87 | 
 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 89 |   delete ctx;
 90 | }
 91 | 
 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
 93 |   ctx->abort();
 94 | }
 95 | 
 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
 97 | 	// not required in this solver
 98 | }
 99 | 
100 | int main(int argc, char **argv) {
101 |   u32 nthreads = 0;
102 |   u32 ntrims = 0;
103 |   u32 nonce = 0;
104 |   u32 range = 1;
105 | #ifdef SAVEEDGES
106 |   bool showcycle = 1;
107 | #else
108 |   bool showcycle = 0;
109 | #endif
110 |   char header[HEADERLEN];
111 |   u32 len;
112 |   bool allrounds = false;
113 |   int c;
114 | 
115 |   memset(header, 0, sizeof(header));
116 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
117 |     switch (c) {
118 |       case 'a':
119 |         allrounds = true;
120 |         break;
121 |       case 'h':
122 |         len = strlen(optarg);
123 |         assert(len <= sizeof(header));
124 |         memcpy(header, optarg, len);
125 |         break;
126 |       case 'x':
127 |         len = strlen(optarg)/2;
128 |         assert(len == sizeof(header));
129 |         for (u32 i=0; i<len; i++)
130 |           sscanf(optarg+2*i, "%2hhx", header+i);
131 |         break;
132 |       case 'n':
133 |         nonce = atoi(optarg);
134 |         break;
135 |       case 'r':
136 |         range = atoi(optarg);
137 |         break;
138 |       case 'm':
139 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
140 |         break;
141 |       case 's':
142 |         showcycle = true;
143 |         break;
144 |       case 't':
145 |         nthreads = atoi(optarg);
146 |         break;
147 |     }
148 |   }
149 | 
150 |   SolverParams params;
151 |   params.nthreads = nthreads;
152 |   params.ntrims = ntrims;
153 |   params.showcycle = showcycle;
154 |   params.allrounds = allrounds;
155 | 
156 |   SolverCtx* ctx = create_solver_ctx(&params);
157 | 
158 |   print_log("Looking for %d-cycle on cuckarood%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
159 |   if (range > 1)
160 |     print_log("-%d", nonce+range-1);
161 |   print_log(") with 50%% edges\n");
162 | 
163 |   u64 sbytes = ctx->sharedbytes();
164 |   u32 tbytes = ctx->threadbytes();
165 |   int sunit,tunit;
166 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
167 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
168 |   print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets);
169 |   print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets);
170 |   print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
171 | 
172 | 	run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL);
173 | 
174 | 	destroy_solver_ctx(ctx);
175 | }
176 | 


--------------------------------------------------------------------------------
/src/cuckarood/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckarood.hpp"
  5 | #include "graph.hpp"
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <assert.h>
  9 | #include <unistd.h>
 10 | #include <set>
 11 | 
 12 | typedef unsigned char u8;
 13 | 
 14 | class cuckoo_ctx {
 15 | public:
 16 |   siphash_keys sip_keys;
 17 |   graph<word_t> cg;
 18 | 
 19 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES2, NNODES1, MAX_SOLS, 0) {
 20 |   }
 21 | 
 22 |   ~cuckoo_ctx() { }
 23 | 
 24 |   u64 bytes() {
 25 |     return cg.bytes();
 26 |   }
 27 | 
 28 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 29 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 30 |     setheader(headernonce, len, &sip_keys);
 31 |     cg.reset();
 32 |   }
 33 | 
 34 |   void find_cycles() {
 35 |     u64 sips[EDGE_BLOCK_SIZE];
 36 |     for (word_t block = 0; block < NEDGES2; block += EDGE_BLOCK_SIZE) {
 37 |       sipblock(sip_keys, block, sips);
 38 |       for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) {
 39 |         u64 edge = sips[i];
 40 |         word_t u = edge & NODE1MASK;
 41 |         word_t v = (edge >> 32) & NODE1MASK;
 42 |         cg.add_edge(u, v, i&1);
 43 | #ifdef SHOW
 44 |         word_t nonce = block + i;
 45 |         printf("%d add (%d,%d)\n", nonce,u,v+NNODES1);
 46 |         for (unsigned j=0; j<NNODES2; j++) {
 47 |           printf("\t%d",j);
 48 |           for (int a=cg.adjlist[j]; a!=graph<word_t>::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to);
 49 |           if ((j+1) % NNODES1 == 0)
 50 |           printf("\n");
 51 |         }
 52 | #endif
 53 |       }
 54 |     }
 55 |     for (u32 s=0; s < cg.nsols; s++) {
 56 |       printf("Solution");
 57 |       // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp);
 58 |       for (u32 j=0; j < PROOFSIZE; j++) {
 59 |         word_t nonce = cg.sols[s][j];
 60 |         // u64 edge = sipblock(sip_keys, nonce, sips);
 61 |         // printf(" (%x,%x)", edge & NODE1MASK, (edge >> 32) & NODE1MASK);
 62 |         printf(" %x", nonce);
 63 |       }
 64 |       printf("\n");
 65 |       int pow_rc = verify(cg.sols[s], sip_keys);
 66 |       if (pow_rc == POW_OK) {
 67 |         printf("Verified with cyclehash ");
 68 |         unsigned char cyclehash[32];
 69 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0);
 70 |         for (int i=0; i<32; i++)
 71 |           printf("%02x", cyclehash[i]);
 72 |         printf("\n");
 73 |       } else {
 74 |         printf("FAILED due to %s\n", errstr[pow_rc]);
 75 |       }
 76 | 
 77 |     }
 78 |   }
 79 | };
 80 | 
 81 | // arbitrary length of header hashed into siphash key
 82 | #define HEADERLEN 80
 83 | 
 84 | int main(int argc, char **argv) {
 85 |   char header[HEADERLEN];
 86 |   memset(header, 0, HEADERLEN);
 87 |   int c;
 88 |   u32 nonce = 0;
 89 |   u32 range = 1;
 90 |   u64 time0, time1;
 91 |   u32 timems;
 92 | 
 93 |   while ((c = getopt (argc, argv, "h:n:r:")) != -1) {
 94 |     switch (c) {
 95 |       case 'h':
 96 |         memcpy(header, optarg, strlen(optarg));
 97 |         break;
 98 |       case 'n':
 99 |         nonce = atoi(optarg);
100 |         break;
101 |       case 'r':
102 |         range = atoi(optarg);
103 |         break;
104 |     }
105 |   }
106 |   printf("Looking for %d-cycle on cuckarood%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
107 |   if (range > 1)
108 |     printf("-%d", nonce+range-1);
109 |   printf("), ");
110 |   cuckoo_ctx ctx(header, sizeof(header), nonce);
111 |   u64 bytes = ctx.bytes();
112 |   int unit;
113 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
114 |   printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]);
115 | 
116 |   for (u32 r = 0; r < range; r++) {
117 |     time0 = timestamp();
118 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
119 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
120 |     ctx.find_cycles();
121 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
122 |     printf("Time: %d ms\n", timems);
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/cuckaroom/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c
15 | NVCC ?= nvcc -std=c++11 
16 | 
17 | all : simpletest # cpu miner not working yet # meantest
18 | 
19 | simpletest:     simple19
20 | 	./simple19 -n 64
21 | 
22 | meantest:	mean29x4
23 | 	./mean29x4 -n 23 -t 4 -s
24 | 
25 | simple19:	../crypto/siphash.hpp cuckaroom.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
26 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC)
27 | 
28 | simple29:	../crypto/siphash.hpp cuckaroom.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
29 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC)
30 | 
31 | mean19x1:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
32 | 	$(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
33 | 
34 | mean19x4:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
35 | 	$(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
36 | 
37 | mean19x8:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
38 | 	$(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
39 | 
40 | mean29x4:	cuckaroom.hpp  bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
41 | 	$(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
42 | 
43 | mean29x8:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
44 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
45 | 
46 | mean29x8s:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
47 | 	$(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
48 | 
49 | mean29x1:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
50 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
51 | 
52 | mean30x1:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
53 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
54 | 
55 | mean30x8:	cuckaroom.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
56 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
57 | 
58 | cuda19:		../crypto/siphash.cuh compress.hpp graph.hpp mean.cu Makefile
59 | 	$(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
60 | 
61 | oldcuda29:	../crypto/siphash.cuh compress.hpp graph.hpp mean.cu kernel.cuh Makefile
62 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
63 | 
64 | cuda29:		../crypto/siphash.cuh compress.hpp graph.hpp meaner.cu Makefile
65 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 meaner.cu $(BLAKE_2B_SRC)
66 | 
67 | cuda66:		../crypto/siphash.cuh compress.hpp graph.hpp meaner66.cu Makefile
68 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 meaner66.cu $(BLAKE_2B_SRC)
69 | 
70 | cuda29v:		../crypto/siphash.cuh compress.hpp graph.hpp meaner.cu Makefile
71 | 	$(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 meaner.cu $(BLAKE_2B_SRC)
72 | 
73 | old66v:		../crypto/siphash.cuh compress.hpp graph.hpp old66.cu Makefile
74 | 	$(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 old66.cu $(BLAKE_2B_SRC)
75 | 
76 | cuda66v:		../crypto/siphash.cuh compress.hpp graph.hpp meaner66.cu Makefile
77 | 	$(NVCC) -o $@ -DEDGEBITS=29 -DVERBOSE -arch sm_35 meaner66.cu $(BLAKE_2B_SRC)
78 | 


--------------------------------------------------------------------------------
/src/cuckaroom/bitmap.hpp:
--------------------------------------------------------------------------------
 1 | template <typename word_t>
 2 | class bitmap {
 3 | public:
 4 |   word_t SIZE;
 5 |   word_t BITMAP_WORDS;
 6 | #ifdef ATOMIC
 7 |   typedef std::atomic<word_t> aword_t;
 8 | #else
 9 |   typedef word_t aword_t;
10 | #endif
11 |   aword_t *bits;
12 |   const u32 BITS_PER_WORD = sizeof(word_t) * 8;
13 | 
14 |   bitmap(word_t size) {
15 |     SIZE = size;
16 |     BITMAP_WORDS = SIZE / BITS_PER_WORD;
17 |     bits = new aword_t[BITMAP_WORDS];
18 |     assert(bits != 0);
19 |   }
20 |   ~bitmap() {
21 |     freebits();
22 |   }
23 |   void freebits() {
24 |     delete[] bits;
25 |     bits = 0;
26 |   }
27 |   void clear() {
28 |     assert(bits);
29 |     memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t));
30 |   }
31 |  void prefetch(u32 u) const {
32 | #ifdef PREFETCH
33 |     __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0);
34 | #endif
35 |   }
36 |   void set(u32 u) {
37 |     u32 idx = u / BITS_PER_WORD;
38 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
39 | #ifdef ATOMIC
40 |     std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed);
41 | #else
42 |     bits[idx] |= bit;
43 | #endif
44 |   }
45 |   void reset(u32 u) {
46 |     u32 idx = u / BITS_PER_WORD;
47 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
48 | #ifdef ATOMIC
49 |     std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed);
50 | #else
51 |     bits[idx] &= ~bit;
52 | #endif
53 |   }
54 |   bool test(u32 u) const {
55 |     u32 idx = u / BITS_PER_WORD;
56 |     u32 bit = u % BITS_PER_WORD;
57 | #ifdef ATOMIC
58 |     return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1;
59 | #else
60 |     return (bits[idx] >> bit) & 1;
61 | #endif
62 |   }
63 |   word_t block(u32 n) const {
64 |     u32 idx = n / BITS_PER_WORD;
65 |     return bits[idx];
66 |   }
67 | };
68 | 


--------------------------------------------------------------------------------
/src/cuckaroom/compress.hpp:
--------------------------------------------------------------------------------
 1 | #include <new>
 2 | 
 3 | // compressor for cuckaroom nodes where edgetrimming
 4 | // has left at most a fraction 2^-compressbits nodes in each partition
 5 | template <typename word_t>
 6 | class compressor {
 7 | public:
 8 |   u32 NODEBITS;
 9 |   u32 SHIFTBITS;
10 |   u32 SIZEBITS;
11 |   word_t SIZE;
12 |   word_t SIZE2;
13 |   word_t MASK;
14 |   word_t MASK2;
15 |   word_t nnodes;
16 |   const word_t NIL = ~(word_t)0;
17 |   word_t *nodes;
18 |   bool sharedmem;
19 | 
20 |   compressor(u32 nodebits, u32 compressbits, char *bytes) {
21 |     NODEBITS = nodebits;
22 |     SHIFTBITS = compressbits;
23 |     SIZEBITS = NODEBITS-compressbits;
24 |     SIZE = (word_t)1 << SIZEBITS;
25 |     SIZE2 = (word_t)2 << SIZEBITS;
26 |     nodes = new (bytes) word_t[SIZE2];
27 |     sharedmem = true;
28 |     MASK = SIZE-1;
29 |     MASK2 = SIZE2-1;
30 |   }
31 | 
32 |   compressor(u32 nodebits, u32 compressbits) {
33 |     NODEBITS = nodebits;
34 |     SHIFTBITS = compressbits;
35 |     SIZEBITS = NODEBITS-compressbits;
36 |     SIZE = (word_t)1 << SIZEBITS;
37 |     SIZE2 = (word_t)2 << SIZEBITS;
38 |     nodes = new word_t[SIZE2];
39 |     sharedmem = false;
40 |     MASK = SIZE-1;
41 |     MASK2 = SIZE2-1;
42 |   }
43 | 
44 |   ~compressor() {
45 |     if (!sharedmem)
46 |       delete[] nodes;
47 |   }
48 | 
49 |   uint64_t bytes() {
50 |     return sizeof(word_t[SIZE2]);
51 |   }
52 | 
53 |   void reset() {
54 |     memset(nodes, (char)NIL, sizeof(word_t[SIZE2]));
55 |     nnodes = 0;
56 |   }
57 | 
58 |   word_t compress(word_t u) {
59 |     word_t ui = u >> SHIFTBITS;
60 |     for (; ; ui = (ui+1) & MASK2) {
61 |       word_t cu = nodes[ui];
62 |       if (cu == NIL) {
63 |         if (nnodes >= SIZE) {
64 |           print_log("NODE OVERFLOW at %x\n", u);
65 |           return 0;
66 |         }
67 |         nodes[ui] = u << SIZEBITS | nnodes;
68 |         return nnodes++;
69 |       }
70 |       if ((cu & ~MASK) == u << SIZEBITS) {
71 |         return cu & MASK;
72 |       }
73 |     }
74 |   }
75 | };
76 | 


--------------------------------------------------------------------------------
/src/cuckaroom/cuckaroom.c:
--------------------------------------------------------------------------------
 1 | // Cuckaroom Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckaroom.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 80
13 | 
14 | int main(int argc, char **argv) {
15 |   const char *header = "";
16 |   int nonce = 0;
17 |   int c;
18 |   while ((c = getopt (argc, argv, "h:n:")) != -1) {
19 |     switch (c) {
20 |       case 'h':
21 |         header = optarg;
22 |         break;
23 |       case 'n':
24 |         nonce = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   char headernonce[HEADERLEN];
29 |   u32 hdrlen = strlen(header);
30 |   memcpy(headernonce, header, hdrlen);
31 |   memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
32 |   ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce);
33 |   siphash_keys keys;
34 |   setheader(headernonce, sizeof(headernonce), &keys);
35 |   printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3);
36 |   printf("Verifying size %d proof for cuckaroom%d(\"%s\",%d)\n",
37 |                PROOFSIZE, EDGEBITS, header, nonce);
38 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
39 |     word_t nonces[PROOFSIZE];
40 |     for (int n = 0; n < PROOFSIZE; n++) {
41 |       uint64_t nonce;
42 |       int nscan = scanf(" %" SCNx64, &nonce);
43 |       assert(nscan == 1);
44 |       nonces[n] = nonce;
45 |     }
46 |     int pow_rc = verify(nonces, &keys);
47 |     if (pow_rc == POW_OK) {
48 |       printf("Verified with cyclehash ");
49 |       unsigned char cyclehash[32];
50 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
51 |       for (int i=0; i<32; i++)
52 |         printf("%02x", cyclehash[i]);
53 |       printf("\n");
54 |     } else {
55 |       printf("FAILED due to %s\n", errstr[pow_rc]);
56 |     }
57 |   }
58 |   return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/cuckaroom/cuckaroom.hpp:
--------------------------------------------------------------------------------
  1 | // Cuckaroom Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include <stdint.h> // for types uint32_t,uint64_t
  5 | #include <string.h> // for functions strlen, memset
  6 | #include <stdarg.h>
  7 | #include <stdio.h> 
  8 | #include <chrono>
  9 | #include <ctime>
 10 | #include "../crypto/blake2.h"
 11 | #include "../crypto/siphash.hpp"
 12 | 
 13 | // save some keystrokes since i'm a lazy typer
 14 | typedef uint32_t u32;
 15 | typedef uint64_t u64;
 16 | 
 17 | #ifndef MAX_SOLS
 18 | #define MAX_SOLS 4
 19 | #endif
 20 | 
 21 | #ifndef EDGE_BLOCK_BITS
 22 | #define EDGE_BLOCK_BITS 6
 23 | #endif
 24 | #define EDGE_BLOCK_SIZE (1 << EDGE_BLOCK_BITS)
 25 | #define EDGE_BLOCK_MASK (EDGE_BLOCK_SIZE - 1)
 26 | 
 27 | // proof-of-work parameters
 28 | #ifndef EDGEBITS
 29 | // the main parameter is the number of bits in an edge index,
 30 | // i.e. the 2-log of the number of edges
 31 | #define EDGEBITS 29
 32 | #endif
 33 | #ifndef PROOFSIZE
 34 | // the next most important parameter is the (even) length
 35 | // of the cycle to be found. a minimum of 12 is recommended
 36 | #define PROOFSIZE 42
 37 | #endif
 38 | 
 39 | #if EDGEBITS > 30
 40 | typedef uint64_t word_t;
 41 | #elif EDGEBITS > 14
 42 | typedef u32 word_t;
 43 | #else // if EDGEBITS <= 14
 44 | typedef uint16_t word_t;
 45 | #endif
 46 | 
 47 | // number of edges
 48 | #define NEDGES ((word_t)1 << EDGEBITS)
 49 | #define EDGEMASK ((word_t)NEDGES - 1)
 50 | #define NNODES NEDGES
 51 | // used to mask siphash output
 52 | #define NODEMASK ((word_t)NNODES - 1)
 53 | 
 54 | // Common Solver parameters, to return to caller
 55 | struct SolverParams {
 56 | 	u32 nthreads = 0;
 57 | 	u32 ntrims = 0;
 58 | 	bool showcycle;
 59 | 	bool allrounds;
 60 | 	bool mutate_nonce = 1;
 61 | 	bool cpuload = 1;
 62 | 
 63 | 	// Common cuda params
 64 | 	u32 device = 0;
 65 | 
 66 | 	// Cuda-lean specific params
 67 | 	u32 blocks = 0;
 68 | 	u32 tpb = 0;
 69 | 
 70 | 	// Cuda-mean specific params
 71 | 	u32 expand = 0;
 72 | 	u32 genablocks = 0;
 73 | 	u32 genatpb = 0;
 74 | 	u32 genbtpb = 0;
 75 | 	u32 trimtpb = 0;
 76 | 	u32 tailtpb = 0;
 77 | 	u32 recoverblocks = 0;
 78 | 	u32 recovertpb = 0;
 79 | };
 80 | 
 81 | // Solutions result structs to be instantiated by caller,
 82 | // and filled by solver if desired
 83 | struct Solution {
 84 |  u64 id = 0;
 85 |  u64 nonce = 0;
 86 |  u64 proof[PROOFSIZE];
 87 | };
 88 | 
 89 | struct SolverSolutions {
 90 |  u32 edge_bits = 0;
 91 |  u32 num_sols = 0;
 92 |  Solution sols[MAX_SOLS];
 93 | };
 94 | 
 95 | #define MAX_NAME_LEN 256
 96 | 
 97 | // last error reason, to be picked up by stats
 98 | // to be returned to caller
 99 | char LAST_ERROR_REASON[MAX_NAME_LEN];
100 | 
101 | // Solver statistics, to be instantiated by caller
102 | // and filled by solver if desired
103 | struct SolverStats {
104 | 	u32 device_id = 0;
105 | 	u32 edge_bits = 0;
106 | 	char plugin_name[MAX_NAME_LEN]; // will be filled in caller-side
107 | 	char device_name[MAX_NAME_LEN];
108 | 	bool has_errored = false;
109 | 	char error_reason[MAX_NAME_LEN];
110 | 	u32 iterations = 0;
111 | 	u64 last_start_time = 0;
112 | 	u64 last_end_time = 0;
113 | 	u64 last_solution_time = 0;
114 | };
115 | 
116 | enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_TOO_BIG, POW_TOO_SMALL, POW_NON_MATCHING, POW_BRANCH, POW_DEAD_END, POW_SHORT_CYCLE, POW_UNBALANCED};
117 | const char *errstr[] = { "OK", "wrong header length", "edge too big", "edges not ascending", "endpoints don't match up", "branch in cycle", "cycle dead ends", "cycle too short", "edges not balanced"};
118 | 
119 | // fills buffer with EDGE_BLOCK_SIZE siphash outputs for block containing edge in cuckaroo graph
120 | // return siphash output for given edge
121 | u64 sipblock(siphash_keys &keys, const word_t edge, u64 *buf) {
122 |   siphash_state<> shs(keys);
123 |   word_t edge0 = edge & ~EDGE_BLOCK_MASK;
124 |   for (u32 i=0; i < EDGE_BLOCK_SIZE; i++) {
125 |     shs.hash24(edge0 + i);
126 |     buf[i] = shs.xor_lanes();
127 |   }
128 |   for (u32 i=EDGE_BLOCK_MASK; i; i--)
129 |     buf[i-1] ^= buf[i];
130 |   return buf[edge & EDGE_BLOCK_MASK];
131 | }
132 | 
133 | // verify that edges are ascending and form a cycle in header-generated graph
134 | int verify(word_t edges[PROOFSIZE], siphash_keys &keys) {
135 |   word_t xorfrom = 0, xorto = 0;
136 |   u64 sips[EDGE_BLOCK_SIZE];
137 |   word_t from[PROOFSIZE], to[PROOFSIZE],visited[PROOFSIZE];
138 | 
139 |   for (u32 n = 0; n < PROOFSIZE; n++) {
140 |     if (edges[n] > EDGEMASK)
141 |       return POW_TOO_BIG;
142 |     if (n && edges[n] <= edges[n-1])
143 |       return POW_TOO_SMALL;
144 |     u64 edge = sipblock(keys, edges[n], sips);
145 |     xorfrom ^= from[n] =  edge        & EDGEMASK;
146 |     xorto   ^= to  [n] = (edge >> 32) & EDGEMASK;
147 |     visited[n] = false;
148 |   }
149 |   if (xorfrom != xorto)              // optional check for obviously bad proofs
150 |     return POW_NON_MATCHING;
151 |   u32 n = 0, i = 0;
152 |   do {                        // follow cycle
153 |     if (visited[i])
154 |       return POW_BRANCH;
155 |     visited[i] = true;
156 |     u32 nexti;
157 |     for (nexti = 0; from[nexti] != to[i]; ) // find outgoing edge meeting incoming edge i
158 |       if (++nexti == PROOFSIZE)
159 |         return POW_DEAD_END;
160 |     i = nexti;
161 |     n++;
162 |   } while (i != 0);           // must cycle back to start or we would have found branch
163 |   return n == PROOFSIZE ? POW_OK : POW_SHORT_CYCLE;
164 | }
165 | 
166 | // convenience function for extracting siphash keys from header
167 | void setheader(const char *header, const u32 headerlen, siphash_keys *keys) {
168 |   char hdrkey[32];
169 |   // SHA256((unsigned char *)header, headerlen, (unsigned char *)hdrkey);
170 |   blake2b((void *)hdrkey, sizeof(hdrkey), (const void *)header, headerlen, 0, 0);
171 |   keys->setkeys(hdrkey);
172 | }
173 | 
174 | u64 timestamp() {
175 | 	using namespace std::chrono;
176 | 	high_resolution_clock::time_point now = high_resolution_clock::now();
177 | 	auto dn = now.time_since_epoch();
178 | 	return dn.count();
179 | }
180 | 
181 | /////////////////////////////////////////////////////////////////
182 | // Declarations to make it easier for callers to link as required
183 | /////////////////////////////////////////////////////////////////
184 | 
185 | #ifndef C_CALL_CONVENTION
186 | #define C_CALL_CONVENTION 0
187 | #endif
188 | 
189 | // convention to prepend to called functions
190 | #if C_CALL_CONVENTION
191 | #define CALL_CONVENTION extern "C"
192 | #else
193 | #define CALL_CONVENTION
194 | #endif
195 | 
196 | // Ability to squash printf output at compile time, if desired
197 | #ifndef SQUASH_OUTPUT
198 | #define SQUASH_OUTPUT 0
199 | #endif
200 | 
201 | void print_log(const char *fmt, ...) {
202 | 	if (SQUASH_OUTPUT) return;
203 | 	va_list args;
204 | 	va_start(args, fmt);
205 | 	vprintf(fmt, args);
206 | 	va_end(args);
207 | }
208 | //////////////////////////////////////////////////////////////////
209 | // END caller QOL
210 | //////////////////////////////////////////////////////////////////
211 | 


--------------------------------------------------------------------------------
/src/cuckaroom/graph.hpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include "bitmap.hpp"
  5 | #include "compress.hpp"
  6 | #include <new>
  7 | 
  8 | typedef word_t proof[PROOFSIZE];
  9 | 
 10 | // cuck(ar)oom graph with given limit on number of edges (and on single partition nodes)
 11 | template <typename word_t>
 12 | class graph {
 13 | public:
 14 |   // terminates adjacency lists
 15 |   const word_t NIL = ~(word_t)0;
 16 | 
 17 |   struct link { // element of adjacency list
 18 |     word_t next;
 19 |     word_t to;
 20 |   };
 21 | 
 22 |   word_t MAXEDGES;
 23 |   word_t MAXNODES;
 24 |   word_t nlinks; // aka halfedges, twice number of edges
 25 |   word_t *adjlist; // index into links array
 26 |   link *links;
 27 |   bool sharedmem;
 28 |   compressor<word_t> *compress;
 29 |   bitmap<u32> visited;
 30 |   u32 MAXSOLS;
 31 |   proof *sols;
 32 |   u32 nsols;
 33 | 
 34 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxnodes) {
 35 |     MAXEDGES = maxedges;
 36 |     MAXNODES = maxnodes;
 37 |     MAXSOLS = maxsols;
 38 |     adjlist = new word_t[MAXNODES]; // index into links array
 39 |     links   = new link[MAXEDGES];
 40 |     compress = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 41 |     sharedmem = false;
 42 |     sols    = new proof[MAXSOLS+1]; // extra one for current path
 43 |     visited.clear();
 44 |   }
 45 | 
 46 |   ~graph() {
 47 |     if (!sharedmem) {
 48 |       delete[] adjlist;
 49 |       delete[] links;
 50 |     }
 51 |     delete[] sols;
 52 |   }
 53 | 
 54 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxnodes) {
 55 |     MAXEDGES = maxedges;
 56 |     MAXNODES = maxnodes;
 57 |     MAXSOLS = maxsols;
 58 |     adjlist = new (bytes) word_t[MAXNODES]; // index into links array
 59 |     links   = new (bytes += sizeof(word_t[MAXNODES])) link[MAXEDGES];
 60 |     compress = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes += sizeof(link[MAXEDGES])) : 0;
 61 |     sharedmem = true;
 62 |     sols    = new  proof[MAXSOLS+1];
 63 |     visited.clear();
 64 |   }
 65 | 
 66 |   // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits
 67 |   uint64_t bytes() {
 68 |     return sizeof(word_t[MAXNODES]) + sizeof(link[MAXEDGES]) + (compress ? compress->bytes() : 0);
 69 |   }
 70 | 
 71 |   void reset() {
 72 |     memset(adjlist, (char)NIL, sizeof(word_t[MAXNODES]));
 73 |     if (compress)
 74 |       compress->reset();
 75 |     resetcounts();
 76 |   }
 77 | 
 78 |   void resetcounts() {
 79 |     nlinks = nsols = 0;
 80 |     // visited has entries set only during cycles() call
 81 |   }
 82 | 
 83 |   static int nonce_cmp(const void *a, const void *b) {
 84 |     return *(word_t *)a - *(word_t *)b;
 85 |   }
 86 | 
 87 |   void cycles_with_link(u32 len, word_t u, word_t dest) {
 88 |     if (visited.test(u))
 89 |       return;
 90 |     if (u == dest) {
 91 |       print_log("  %d-cycle found\n", len);
 92 |       if (len == PROOFSIZE && nsols < MAXSOLS) {
 93 |         qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp);
 94 |         memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0]));
 95 |       }
 96 |       return;
 97 |     }
 98 |     if (len == PROOFSIZE)
 99 |       return;
100 |     word_t au1 = adjlist[u];
101 |     if (au1 != NIL) {
102 |       visited.set(u);
103 |       for (; au1 != NIL; au1 = links[au1].next) {
104 |         sols[nsols][len] = au1;
105 |         cycles_with_link(len+1, links[au1].to, dest);
106 |       }
107 |       visited.reset(u);
108 |     }
109 |   }
110 | 
111 |   bool add_edge(word_t from, word_t to) {
112 |     assert(from < MAXNODES);
113 |     assert(to   < MAXNODES);
114 |     if (from == to || adjlist[to] != NIL) { // possibly part of a cycle
115 |       sols[nsols][0] = nlinks;
116 |       assert(!visited.test(from));
117 |       cycles_with_link(1, to, from);
118 |     }
119 |     word_t link = nlinks++;
120 |     assert(link != NIL);    // avoid confusing links with NIL; guaranteed if bits in word_t > EDGEBITS + 1
121 |     assert(link < MAXEDGES);
122 | #ifndef ALLOWDUPES
123 |     for (word_t au = adjlist[from]; au != NIL; au = links[au].next)
124 |       if (links[au].to == to) return false; // drop duplicate edge
125 | #endif
126 |     links[link].next = adjlist[from];
127 |     links[adjlist[from] = link].to = to;
128 |     return true;
129 |   }
130 | 
131 |   bool add_compress_edge(word_t from, word_t to) {
132 |     return add_edge(compress->compress(from), compress->compress(to));
133 |   }
134 | };
135 | 


--------------------------------------------------------------------------------
/src/cuckaroom/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckaroom Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | #include <chrono>
  7 | 
  8 | #ifndef HEADERLEN
  9 | // arbitrary length of header hashed into siphash key
 10 | #define HEADERLEN 80
 11 | #endif
 12 | 
 13 | typedef solver_ctx SolverCtx;
 14 | 
 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 16 |                                char* header,
 17 |                                int header_length,
 18 |                                u32 nonce,
 19 |                                u32 range,
 20 |                                SolverSolutions *solutions,
 21 |                                SolverStats *stats
 22 |                                )
 23 | {
 24 |   u64 time0, time1;
 25 |   u32 timems;
 26 |   u32 sumnsols = 0;
 27 | 
 28 |   for (u32 r = 0; r < range; r++) {
 29 |     time0 = timestamp();
 30 |     ctx->setheadernonce(header, header_length, nonce + r);
 31 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3);
 32 |     u32 nsols = ctx->solve();
 33 |     time1 = timestamp();
 34 |     timems = (time1 - time0) / 1000000;
 35 |     print_log("Time: %d ms\n", timems);
 36 | 
 37 |     for (unsigned s = 0; s < nsols; s++) {
 38 |       print_log("Solution");
 39 |       word_t *prf = &ctx->sols[s * PROOFSIZE];
 40 |       for (u32 i = 0; i < PROOFSIZE; i++)
 41 |         print_log(" %jx", (uintmax_t)prf[i]);
 42 |       print_log("\n");
 43 |       if (solutions != NULL){
 44 |         solutions->edge_bits = EDGEBITS;
 45 |         solutions->num_sols++;
 46 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 47 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 48 |           solutions->sols[sumnsols+s].proof[i] = (u64) prf[i];
 49 |       }
 50 |       int pow_rc = verify(prf, ctx->trimmer.sip_keys);
 51 |       if (pow_rc == POW_OK) {
 52 |         print_log("Verified with cyclehash ");
 53 |         unsigned char cyclehash[32];
 54 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 55 |         for (int i=0; i<32; i++)
 56 |           print_log("%02x", cyclehash[i]);
 57 |         print_log("\n");
 58 |       } else {
 59 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 60 |       }
 61 |     }
 62 |     sumnsols += nsols;
 63 |     if (stats != NULL) {
 64 |         stats->device_id = 0;
 65 |         stats->edge_bits = EDGEBITS;
 66 |         strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 67 |         stats->last_start_time = time0;
 68 |         stats->last_end_time = time1;
 69 |         stats->last_solution_time = time1 - time0;
 70 |     }
 71 |   }
 72 |   print_log("%d total solutions\n", sumnsols);
 73 |   return sumnsols > 0;
 74 | }
 75 | 
 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 77 |   if (params->nthreads == 0) params->nthreads = 1;
 78 |   if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68;
 79 | 
 80 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 81 |                                  params->ntrims,
 82 |                                  params->allrounds,
 83 |                                  params->showcycle,
 84 |                                  params->mutate_nonce);
 85 |   return ctx;
 86 | }
 87 | 
 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 89 |   delete ctx;
 90 | }
 91 | 
 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
 93 |   ctx->abort();
 94 | }
 95 | 
 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
 97 | 	// not required in this solver
 98 | }
 99 | 
100 | int main(int argc, char **argv) {
101 |   u32 nthreads = 0;
102 |   u32 ntrims = 0;
103 |   u32 nonce = 0;
104 |   u32 range = 1;
105 | #ifdef SAVEEDGES
106 |   bool showcycle = 1;
107 | #else
108 |   bool showcycle = 0;
109 | #endif
110 |   char header[HEADERLEN];
111 |   u32 len;
112 |   bool allrounds = false;
113 |   int c;
114 | 
115 |   memset(header, 0, sizeof(header));
116 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
117 |     switch (c) {
118 |       case 'a':
119 |         allrounds = true;
120 |         break;
121 |       case 'h':
122 |         len = strlen(optarg);
123 |         assert(len <= sizeof(header));
124 |         memcpy(header, optarg, len);
125 |         break;
126 |       case 'x':
127 |         len = strlen(optarg)/2;
128 |         assert(len == sizeof(header));
129 |         for (u32 i=0; i<len; i++)
130 |           sscanf(optarg+2*i, "%2hhx", header+i);
131 |         break;
132 |       case 'n':
133 |         nonce = atoi(optarg);
134 |         break;
135 |       case 'r':
136 |         range = atoi(optarg);
137 |         break;
138 |       case 'm':
139 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
140 |         break;
141 |       case 's':
142 |         showcycle = true;
143 |         break;
144 |       case 't':
145 |         nthreads = atoi(optarg);
146 |         break;
147 |     }
148 |   }
149 | 
150 |   SolverParams params;
151 |   params.nthreads = nthreads;
152 |   params.ntrims = ntrims;
153 |   params.showcycle = showcycle;
154 |   params.allrounds = allrounds;
155 | 
156 |   SolverCtx* ctx = create_solver_ctx(&params);
157 | 
158 |   print_log("Looking for %d-cycle on cuckaroom%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
159 |   if (range > 1)
160 |     print_log("-%d", nonce+range-1);
161 |   print_log(") with 50%% edges\n");
162 | 
163 |   u64 sbytes = ctx->sharedbytes();
164 |   u32 tbytes = ctx->threadbytes();
165 |   int sunit,tunit;
166 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
167 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
168 |   print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets);
169 |   print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets);
170 |   print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
171 | 
172 | 	run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL);
173 | 
174 | 	destroy_solver_ctx(ctx);
175 | }
176 | 


--------------------------------------------------------------------------------
/src/cuckaroom/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckaroom Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckaroom.hpp"
  5 | #include "graph.hpp"
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <assert.h>
  9 | #include <unistd.h>
 10 | #include <set>
 11 | 
 12 | typedef unsigned char u8;
 13 | 
 14 | class cuckoo_ctx {
 15 | public:
 16 |   siphash_keys sip_keys;
 17 |   graph<word_t> cg;
 18 | 
 19 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES, NNODES, MAX_SOLS, 0) {
 20 |   }
 21 | 
 22 |   ~cuckoo_ctx() { }
 23 | 
 24 |   u64 bytes() {
 25 |     return cg.bytes();
 26 |   }
 27 | 
 28 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 29 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 30 |     setheader(headernonce, len, &sip_keys);
 31 |     cg.reset();
 32 |   }
 33 | 
 34 |   void find_cycles() {
 35 |     u64 sips[EDGE_BLOCK_SIZE];
 36 |     for (word_t block = 0; block < NEDGES; block += EDGE_BLOCK_SIZE) {
 37 |       sipblock(sip_keys, block, sips);
 38 |       for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) {
 39 |         u64 edge = sips[i];
 40 |         word_t u = edge & NODEMASK;
 41 |         word_t v = (edge >> 32) & NODEMASK;
 42 |         cg.add_edge(u, v);
 43 | #ifdef SHOW
 44 |         word_t nonce = block + i;
 45 |         printf("%d add (%d,%d)\n", nonce,u,v+NNODES);
 46 |         for (unsigned j=0; j<NNODES; j++) {
 47 |           printf("\t%d",j);
 48 |           for (int a=cg.adjlist[j]; a!=graph<word_t>::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to);
 49 |           if ((j+1) % NNODES == 0)
 50 |           printf("\n");
 51 |         }
 52 | #endif
 53 |       }
 54 |     }
 55 |     for (u32 s=0; s < cg.nsols; s++) {
 56 |       printf("Solution");
 57 |       // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp);
 58 |       for (u32 j=0; j < PROOFSIZE; j++) {
 59 |         word_t nonce = cg.sols[s][j];
 60 |         u64 edge = sipblock(sip_keys, nonce, sips);
 61 |         printf(" (%x,%x)", edge & NODEMASK, (edge >> 32) & NODEMASK);
 62 |         printf(" %x", nonce);
 63 |       }
 64 |       printf("\n");
 65 |       int pow_rc = verify(cg.sols[s], sip_keys);
 66 |       if (pow_rc == POW_OK) {
 67 |         printf("Verified with cyclehash ");
 68 |         unsigned char cyclehash[32];
 69 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0);
 70 |         for (int i=0; i<32; i++)
 71 |           printf("%02x", cyclehash[i]);
 72 |         printf("\n");
 73 |       } else {
 74 |         printf("FAILED due to %s\n", errstr[pow_rc]);
 75 |       }
 76 | 
 77 |     }
 78 |   }
 79 | };
 80 | 
 81 | // arbitrary length of header hashed into siphash key
 82 | #define HEADERLEN 80
 83 | 
 84 | int main(int argc, char **argv) {
 85 |   char header[HEADERLEN];
 86 |   memset(header, 0, HEADERLEN);
 87 |   int c;
 88 |   u32 nonce = 0;
 89 |   u32 range = 1;
 90 |   u64 time0, time1;
 91 |   u32 timems;
 92 | 
 93 |   while ((c = getopt (argc, argv, "h:n:r:")) != -1) {
 94 |     switch (c) {
 95 |       case 'h':
 96 |         memcpy(header, optarg, strlen(optarg));
 97 |         break;
 98 |       case 'n':
 99 |         nonce = atoi(optarg);
100 |         break;
101 |       case 'r':
102 |         range = atoi(optarg);
103 |         break;
104 |     }
105 |   }
106 |   printf("Looking for %d-cycle on cuckaroom%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
107 |   if (range > 1)
108 |     printf("-%d", nonce+range-1);
109 |   printf("), ");
110 |   cuckoo_ctx ctx(header, sizeof(header), nonce);
111 |   u64 bytes = ctx.bytes();
112 |   int unit;
113 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
114 |   printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]);
115 | 
116 |   for (u32 r = 0; r < range; r++) {
117 |     time0 = timestamp();
118 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
119 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
120 |     ctx.find_cycles();
121 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
122 |     printf("Time: %d ms\n", timems);
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/cuckarooz/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c
15 | NVCC ?= nvcc -std=c++11 
16 | 
17 | all : simpletest # cpu miner not working yet # meantest
18 | 
19 | simpletest:     simple19
20 | 	./simple19 -n 64
21 | 
22 | meantest:	mean29x4
23 | 	./mean29x4 -n 23 -t 4 -s
24 | 
25 | simple19:	../crypto/siphash.hpp cuckarooz.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
26 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC)
27 | 
28 | simple29:	../crypto/siphash.hpp cuckarooz.hpp  bitmap.hpp graph.hpp simple.cpp Makefile
29 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC)
30 | 
31 | mean19x1:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
32 | 	$(GPP) -o $@ -DXBITS=2 -DNSIPHASH=1 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
33 | 
34 | mean19x4:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
35 | 	$(GPP) -o $@ -mno-avx2 -DXBITS=2 -DNSIPHASH=4 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
36 | 
37 | mean19x8:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
38 | 	$(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
39 | 
40 | mean29x4:	cuckarooz.hpp  bitmap.hpp graph.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
41 | 	$(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
42 | 
43 | mean29x8:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
44 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
45 | 
46 | mean29x8s:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
47 | 	$(GPP) -o $@ -mavx2 -DSAVEEDGES -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
48 | 
49 | mean29x1:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
50 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
51 | 
52 | mean30x1:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
53 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
54 | 
55 | mean30x8:	cuckarooz.hpp  bitmap.hpp graph.hpp ../threads/barrier.hpp ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
56 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
57 | 
58 | cuda19:		../crypto/siphash.cuh cuckarooz.hpp compress.hpp graph.hpp mean.cu Makefile
59 | 	$(NVCC) -o $@ -DEPS_A=4 -DEPS_B=3 -DIDXSHIFT=2 -DEDGEBITS=19 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
60 | 
61 | cuda29:		../crypto/siphash.cuh cuckarooz.hpp compress.hpp graph.hpp mean.cu Makefile
62 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
63 | 


--------------------------------------------------------------------------------
/src/cuckarooz/bitmap.hpp:
--------------------------------------------------------------------------------
 1 | template <typename word_t>
 2 | class bitmap {
 3 | public:
 4 |   word_t SIZE;
 5 |   word_t BITMAP_WORDS;
 6 | #ifdef ATOMIC
 7 |   typedef std::atomic<word_t> aword_t;
 8 | #else
 9 |   typedef word_t aword_t;
10 | #endif
11 |   aword_t *bits;
12 |   const u32 BITS_PER_WORD = sizeof(word_t) * 8;
13 | 
14 |   bitmap(word_t size) {
15 |     SIZE = size;
16 |     BITMAP_WORDS = SIZE / BITS_PER_WORD;
17 |     bits = new aword_t[BITMAP_WORDS];
18 |     assert(bits != 0);
19 |   }
20 |   ~bitmap() {
21 |     freebits();
22 |   }
23 |   void freebits() {
24 |     delete[] bits;
25 |     bits = 0;
26 |   }
27 |   void clear() {
28 |     assert(bits);
29 |     memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t));
30 |   }
31 |  void prefetch(u32 u) const {
32 | #ifdef PREFETCH
33 |     __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0);
34 | #endif
35 |   }
36 |   void set(u32 u) {
37 |     u32 idx = u / BITS_PER_WORD;
38 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
39 | #ifdef ATOMIC
40 |     std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed);
41 | #else
42 |     bits[idx] |= bit;
43 | #endif
44 |   }
45 |   void reset(u32 u) {
46 |     u32 idx = u / BITS_PER_WORD;
47 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
48 | #ifdef ATOMIC
49 |     std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed);
50 | #else
51 |     bits[idx] &= ~bit;
52 | #endif
53 |   }
54 |   bool test(u32 u) const {
55 |     u32 idx = u / BITS_PER_WORD;
56 |     u32 bit = u % BITS_PER_WORD;
57 | #ifdef ATOMIC
58 |     return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1;
59 | #else
60 |     return (bits[idx] >> bit) & 1;
61 | #endif
62 |   }
63 |   word_t block(u32 n) const {
64 |     u32 idx = n / BITS_PER_WORD;
65 |     return bits[idx];
66 |   }
67 | };
68 | 


--------------------------------------------------------------------------------
/src/cuckarooz/compress.hpp:
--------------------------------------------------------------------------------
 1 | #include <new>
 2 | 
 3 | // compressor for cuckarooz nodes where edgetrimming
 4 | // has left at most a fraction 2^-compressbits nodes in each partition
 5 | template <typename word_t>
 6 | class compressor {
 7 | public:
 8 |   u32 NODEBITS;
 9 |   u32 SHIFTBITS;
10 |   u32 SIZEBITS;
11 |   word_t SIZE;
12 |   word_t SIZE2;
13 |   word_t MASK;
14 |   word_t MASK2;
15 |   word_t nnodes;
16 |   const word_t NIL = ~(word_t)0;
17 |   word_t *nodes;
18 |   bool sharedmem;
19 | 
20 |   compressor(u32 nodebits, u32 compressbits, char *bytes) {
21 |     NODEBITS = nodebits;
22 |     SHIFTBITS = compressbits;
23 |     SIZEBITS = NODEBITS-compressbits;
24 |     SIZE = (word_t)1 << SIZEBITS;
25 |     SIZE2 = (word_t)2 << SIZEBITS;
26 |     nodes = new (bytes) word_t[SIZE2];
27 |     sharedmem = true;
28 |     MASK = SIZE-1;
29 |     MASK2 = SIZE2-1;
30 |   }
31 | 
32 |   compressor(u32 nodebits, u32 compressbits) {
33 |     NODEBITS = nodebits;
34 |     SHIFTBITS = compressbits;
35 |     SIZEBITS = NODEBITS-compressbits;
36 |     SIZE = (word_t)1 << SIZEBITS;
37 |     SIZE2 = (word_t)2 << SIZEBITS;
38 |     nodes = new word_t[SIZE2];
39 |     sharedmem = false;
40 |     MASK = SIZE-1;
41 |     MASK2 = SIZE2-1;
42 |   }
43 | 
44 |   ~compressor() {
45 |     if (!sharedmem)
46 |       delete[] nodes;
47 |   }
48 | 
49 |   uint64_t bytes() {
50 |     return sizeof(word_t[SIZE2]);
51 |   }
52 | 
53 |   void reset() {
54 |     memset(nodes, (char)NIL, sizeof(word_t[SIZE2]));
55 |     nnodes = 0;
56 |   }
57 | 
58 |   word_t compress(word_t u) {
59 |     word_t ui = u >> SHIFTBITS;
60 |     for (; ; ui = (ui+1) & MASK2) {
61 |       word_t cu = nodes[ui];
62 |       if (cu == NIL) {
63 |         if (nnodes >= SIZE) {
64 |           print_log("NODE OVERFLOW at %x\n", u);
65 |           return 0;
66 |         }
67 |         nodes[ui] = u << SIZEBITS | nnodes;
68 |         return nnodes++;
69 |       }
70 |       if ((cu & ~MASK) == u << SIZEBITS) {
71 |         return cu & MASK;
72 |       }
73 |     }
74 |   }
75 | };
76 | 


--------------------------------------------------------------------------------
/src/cuckarooz/cuckarooz.c:
--------------------------------------------------------------------------------
 1 | // Cuckarooz Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckarooz.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 80
13 | 
14 | int main(int argc, char **argv) {
15 |   const char *header = "";
16 |   int nonce = 0;
17 |   int c;
18 |   while ((c = getopt (argc, argv, "h:n:")) != -1) {
19 |     switch (c) {
20 |       case 'h':
21 |         header = optarg;
22 |         break;
23 |       case 'n':
24 |         nonce = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   char headernonce[HEADERLEN];
29 |   u32 hdrlen = strlen(header);
30 |   memcpy(headernonce, header, hdrlen);
31 |   memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
32 |   ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce);
33 |   siphash_keys keys;
34 |   setheader(headernonce, sizeof(headernonce), &keys);
35 |   printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3);
36 |   printf("Verifying size %d proof for cuckarooz%d(\"%s\",%d)\n",
37 |                PROOFSIZE, EDGEBITS, header, nonce);
38 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
39 |     word_t nonces[PROOFSIZE];
40 |     for (int n = 0; n < PROOFSIZE; n++) {
41 |       uint64_t nonce;
42 |       int nscan = scanf(" %" SCNx64, &nonce);
43 |       assert(nscan == 1);
44 |       nonces[n] = nonce;
45 |     }
46 |     int pow_rc = verify(nonces, &keys);
47 |     if (pow_rc == POW_OK) {
48 |       printf("Verified with cyclehash ");
49 |       unsigned char cyclehash[32];
50 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
51 |       for (int i=0; i<32; i++)
52 |         printf("%02x", cyclehash[i]);
53 |       printf("\n");
54 |     } else {
55 |       printf("FAILED due to %s\n", errstr[pow_rc]);
56 |     }
57 |   }
58 |   return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/cuckarooz/graph.hpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include "bitmap.hpp"
  5 | #include "compress.hpp"
  6 | #include <new>
  7 | 
  8 | typedef word_t proof[PROOFSIZE];
  9 | 
 10 | // cuck(ar)ooz graph with given limit on number of edges (and on single partition nodes)
 11 | template <typename word_t>
 12 | class graph {
 13 | public:
 14 |   // terminates adjacency lists
 15 |   const word_t NIL = ~(word_t)0;
 16 | 
 17 |   struct link { // element of adjacency list
 18 |     word_t next;
 19 |     word_t to;
 20 |   };
 21 | 
 22 |   word_t MAXEDGES;
 23 |   word_t MAXNODES;
 24 |   word_t nlinks; // aka halfedges, twice number of edges
 25 |   word_t *adjlist; // index into links array
 26 |   link *links;
 27 |   bool sharedmem;
 28 |   compressor<word_t> *compress;
 29 |   bitmap<u32> visited;
 30 |   u32 MAXSOLS;
 31 |   proof *sols;
 32 |   u32 nsols;
 33 | 
 34 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxnodes) {
 35 |     MAXEDGES = maxedges;
 36 |     MAXNODES = maxnodes;
 37 |     MAXSOLS = maxsols;
 38 |     adjlist = new word_t[MAXNODES]; // index into links array
 39 |     links   = new link[2*MAXEDGES];
 40 |     compress = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 41 |     sharedmem = false;
 42 |     sols    = new proof[MAXSOLS+1]; // extra one for current path
 43 |     visited.clear();
 44 |   }
 45 | 
 46 |   ~graph() {
 47 |     if (!sharedmem) {
 48 |       delete[] adjlist;
 49 |       delete[] links;
 50 |     }
 51 |     delete[] sols;
 52 |   }
 53 | 
 54 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxnodes) {
 55 |     MAXEDGES = maxedges;
 56 |     MAXNODES = maxnodes;
 57 |     MAXSOLS = maxsols;
 58 |     adjlist = new (bytes) word_t[MAXNODES]; // index into links array
 59 |     links   = new (bytes += sizeof(word_t[MAXNODES])) link[2*MAXEDGES];
 60 |     compress = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES])) : 0;
 61 |     sharedmem = true;
 62 |     sols    = new  proof[MAXSOLS+1];
 63 |     visited.clear();
 64 |   }
 65 | 
 66 |   // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits
 67 |   uint64_t bytes() {
 68 |     return sizeof(word_t[MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compress ? compress->bytes() : 0);
 69 |   }
 70 | 
 71 |   void reset() {
 72 |     memset(adjlist, (char)NIL, sizeof(word_t[MAXNODES]));
 73 |     if (compress)
 74 |       compress->reset();
 75 |     resetcounts();
 76 |   }
 77 | 
 78 |   void resetcounts() {
 79 |     nlinks = nsols = 0;
 80 |     // visited has entries set only during cycles() call
 81 |   }
 82 | 
 83 |   static int nonce_cmp(const void *a, const void *b) {
 84 |     return *(word_t *)a - *(word_t *)b;
 85 |   }
 86 | 
 87 |   void cycles_with_link(u32 len, word_t u, word_t dest) {
 88 |     if (visited.test(u))
 89 |       return;
 90 |     if (u == dest) {
 91 |       print_log("  %d-cycle found\n", len);
 92 |       if (len == PROOFSIZE && nsols < MAXSOLS) {
 93 |         qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp);
 94 |         memcpy(sols[nsols], sols[nsols-1], sizeof(sols[0]));
 95 |       }
 96 |       return;
 97 |     }
 98 |     if (len == PROOFSIZE)
 99 |       return;
100 |     word_t au1 = adjlist[u];
101 |     if (au1 != NIL) {
102 |       visited.set(u);
103 |       for (; au1 != NIL; au1 = links[au1].next) {
104 |         sols[nsols][len] = au1/2;
105 |         cycles_with_link(len+1, links[au1 ^ 1].to, dest);
106 |       }
107 |       visited.reset(u);
108 |     }
109 |   }
110 | 
111 |   bool add_edge(word_t u, word_t v) {
112 | #ifndef ALLOWDUPES
113 |     for (word_t au = adjlist[u]; au != NIL; au = links[au].next)
114 |       if (links[au^1].to == v) { nlinks += 2; return false; } // drop duplicate edge
115 | #endif
116 |     assert(u < MAXNODES);
117 |     assert(v < MAXNODES);
118 |     if (u != v && adjlist[u] != NIL && adjlist[v] != NIL) { // possibly part of a cycle
119 |       sols[nsols][0] = nlinks/2;
120 |       assert(!visited.test(u));
121 |       cycles_with_link(1, v, u);
122 |     }
123 |     word_t ulink = nlinks++;
124 |     word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit
125 |     assert(vlink < 2*MAXEDGES); // assume MAXEDGES fits in word_t
126 |     links[ulink].next = adjlist[u];
127 |     links[vlink].next = adjlist[v];
128 |     links[adjlist[u] = ulink].to = u;
129 |     links[adjlist[v] = vlink].to = v;
130 |     return true;
131 |   }
132 | 
133 |   bool add_compress_edge(word_t u, word_t v) {
134 |     return add_edge(compress->compress(u), compress->compress(v));
135 |   }
136 | };
137 | 


--------------------------------------------------------------------------------
/src/cuckarooz/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckarooz Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | #include <chrono>
  7 | 
  8 | #ifndef HEADERLEN
  9 | // arbitrary length of header hashed into siphash key
 10 | #define HEADERLEN 80
 11 | #endif
 12 | 
 13 | typedef solver_ctx SolverCtx;
 14 | 
 15 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 16 |                                char* header,
 17 |                                int header_length,
 18 |                                u32 nonce,
 19 |                                u32 range,
 20 |                                SolverSolutions *solutions,
 21 |                                SolverStats *stats
 22 |                                )
 23 | {
 24 |   u64 time0, time1;
 25 |   u32 timems;
 26 |   u32 sumnsols = 0;
 27 | 
 28 |   for (u32 r = 0; r < range; r++) {
 29 |     time0 = timestamp();
 30 |     ctx->setheadernonce(header, header_length, nonce + r);
 31 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3);
 32 |     u32 nsols = ctx->solve();
 33 |     time1 = timestamp();
 34 |     timems = (time1 - time0) / 1000000;
 35 |     print_log("Time: %d ms\n", timems);
 36 | 
 37 |     for (unsigned s = 0; s < nsols; s++) {
 38 |       print_log("Solution");
 39 |       word_t *prf = &ctx->sols[s * PROOFSIZE];
 40 |       for (u32 i = 0; i < PROOFSIZE; i++)
 41 |         print_log(" %jx", (uintmax_t)prf[i]);
 42 |       print_log("\n");
 43 |       if (solutions != NULL){
 44 |         solutions->edge_bits = EDGEBITS;
 45 |         solutions->num_sols++;
 46 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 47 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 48 |           solutions->sols[sumnsols+s].proof[i] = (u64) prf[i];
 49 |       }
 50 |       int pow_rc = verify(prf, ctx->trimmer.sip_keys);
 51 |       if (pow_rc == POW_OK) {
 52 |         print_log("Verified with cyclehash ");
 53 |         unsigned char cyclehash[32];
 54 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 55 |         for (int i=0; i<32; i++)
 56 |           print_log("%02x", cyclehash[i]);
 57 |         print_log("\n");
 58 |       } else {
 59 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 60 |       }
 61 |     }
 62 |     sumnsols += nsols;
 63 |     if (stats != NULL) {
 64 |         stats->device_id = 0;
 65 |         stats->edge_bits = EDGEBITS;
 66 |         strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 67 |         stats->last_start_time = time0;
 68 |         stats->last_end_time = time1;
 69 |         stats->last_solution_time = time1 - time0;
 70 |     }
 71 |   }
 72 |   print_log("%d total solutions\n", sumnsols);
 73 |   return sumnsols > 0;
 74 | }
 75 | 
 76 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 77 |   if (params->nthreads == 0) params->nthreads = 1;
 78 |   if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68;
 79 | 
 80 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 81 |                                  params->ntrims,
 82 |                                  params->allrounds,
 83 |                                  params->showcycle,
 84 |                                  params->mutate_nonce);
 85 |   return ctx;
 86 | }
 87 | 
 88 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 89 |   delete ctx;
 90 | }
 91 | 
 92 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
 93 |   ctx->abort();
 94 | }
 95 | 
 96 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
 97 | 	// not required in this solver
 98 | }
 99 | 
100 | int main(int argc, char **argv) {
101 |   u32 nthreads = 0;
102 |   u32 ntrims = 0;
103 |   u32 nonce = 0;
104 |   u32 range = 1;
105 | #ifdef SAVEEDGES
106 |   bool showcycle = 1;
107 | #else
108 |   bool showcycle = 0;
109 | #endif
110 |   char header[HEADERLEN];
111 |   u32 len;
112 |   bool allrounds = false;
113 |   int c;
114 | 
115 |   memset(header, 0, sizeof(header));
116 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
117 |     switch (c) {
118 |       case 'a':
119 |         allrounds = true;
120 |         break;
121 |       case 'h':
122 |         len = strlen(optarg);
123 |         assert(len <= sizeof(header));
124 |         memcpy(header, optarg, len);
125 |         break;
126 |       case 'x':
127 |         len = strlen(optarg)/2;
128 |         assert(len == sizeof(header));
129 |         for (u32 i=0; i<len; i++)
130 |           sscanf(optarg+2*i, "%2hhx", header+i);
131 |         break;
132 |       case 'n':
133 |         nonce = atoi(optarg);
134 |         break;
135 |       case 'r':
136 |         range = atoi(optarg);
137 |         break;
138 |       case 'm':
139 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
140 |         break;
141 |       case 's':
142 |         showcycle = true;
143 |         break;
144 |       case 't':
145 |         nthreads = atoi(optarg);
146 |         break;
147 |     }
148 |   }
149 | 
150 |   SolverParams params;
151 |   params.nthreads = nthreads;
152 |   params.ntrims = ntrims;
153 |   params.showcycle = showcycle;
154 |   params.allrounds = allrounds;
155 | 
156 |   SolverCtx* ctx = create_solver_ctx(&params);
157 | 
158 |   print_log("Looking for %d-cycle on cuckarooz%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
159 |   if (range > 1)
160 |     print_log("-%d", nonce+range-1);
161 |   print_log(") with 50%% edges\n");
162 | 
163 |   u64 sbytes = ctx->sharedbytes();
164 |   u32 tbytes = ctx->threadbytes();
165 |   int sunit,tunit;
166 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
167 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
168 |   print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets);
169 |   print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets);
170 |   print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
171 | 
172 | 	run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL);
173 | 
174 | 	destroy_solver_ctx(ctx);
175 | }
176 | 


--------------------------------------------------------------------------------
/src/cuckarooz/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckarooz Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckarooz.hpp"
  5 | #include "graph.hpp"
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <assert.h>
  9 | #include <unistd.h>
 10 | #include <set>
 11 | 
 12 | typedef unsigned char u8;
 13 | 
 14 | class cuckoo_ctx {
 15 | public:
 16 |   siphash_keys sip_keys;
 17 |   graph<word_t> cg;
 18 | 
 19 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce) : cg(NEDGES, NNODES, MAX_SOLS, 0) {
 20 |   }
 21 | 
 22 |   ~cuckoo_ctx() { }
 23 | 
 24 |   u64 bytes() {
 25 |     return cg.bytes();
 26 |   }
 27 | 
 28 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 29 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 30 |     setheader(headernonce, len, &sip_keys);
 31 |     cg.reset();
 32 |   }
 33 | 
 34 |   void find_cycles() {
 35 |     u64 sips[EDGE_BLOCK_SIZE];
 36 |     for (word_t block = 0; block < NEDGES; block += EDGE_BLOCK_SIZE) {
 37 |       sipblock(sip_keys, block, sips);
 38 |       for (u32 i = 0; i < EDGE_BLOCK_SIZE; i++) {
 39 |         u64 edge = sips[i];
 40 |         word_t u = edge & NODEMASK;
 41 |         word_t v = (edge >> 32) & NODEMASK;
 42 |         cg.add_edge(u, v);
 43 | #ifdef SHOW
 44 |         word_t nonce = block + i;
 45 |         printf("%d add (%d,%d)\n", nonce,u,v+NNODES);
 46 |         for (unsigned j=0; j<NNODES; j++) {
 47 |           printf("\t%d",j);
 48 |           for (int a=cg.adjlist[j]; a!=graph<word_t>::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to);
 49 |           if ((j+1) % NNODES == 0)
 50 |           printf("\n");
 51 |         }
 52 | #endif
 53 |       }
 54 |     }
 55 |     for (u32 s=0; s < cg.nsols; s++) {
 56 |       printf("Solution");
 57 |       // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp);
 58 |       for (u32 j=0; j < PROOFSIZE; j++) {
 59 |         word_t nonce = cg.sols[s][j];
 60 |         u64 edge = sipblock(sip_keys, nonce, sips);
 61 |         printf(" (%x,%x)", edge & NODEMASK, (edge >> 32) & NODEMASK);
 62 |         printf(" %x", nonce);
 63 |       }
 64 |       printf("\n");
 65 |       int pow_rc = verify(cg.sols[s], sip_keys);
 66 |       if (pow_rc == POW_OK) {
 67 |         printf("Verified with cyclehash ");
 68 |         unsigned char cyclehash[32];
 69 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0);
 70 |         for (int i=0; i<32; i++)
 71 |           printf("%02x", cyclehash[i]);
 72 |         printf("\n");
 73 |       } else {
 74 |         printf("FAILED due to %s\n", errstr[pow_rc]);
 75 |       }
 76 | 
 77 |     }
 78 |   }
 79 | };
 80 | 
 81 | // arbitrary length of header hashed into siphash key
 82 | #define HEADERLEN 80
 83 | 
 84 | int main(int argc, char **argv) {
 85 |   char header[HEADERLEN];
 86 |   memset(header, 0, HEADERLEN);
 87 |   int c;
 88 |   u32 nonce = 0;
 89 |   u32 range = 1;
 90 |   u64 time0, time1;
 91 |   u32 timems;
 92 |   u32 len;
 93 | 
 94 |   while ((c = getopt (argc, argv, "h:n:r:x:")) != -1) {
 95 |     switch (c) {
 96 |       case 'h':
 97 |         memcpy(header, optarg, strlen(optarg));
 98 |         break;
 99 |       case 'x':
100 |         len = strlen(optarg)/2;
101 |         assert(len == sizeof(header));
102 |         for (u32 i=0; i<len; i++)
103 |           sscanf(optarg+2*i, "%2hhx", header+i);
104 |         break;
105 |       case 'n':
106 |         nonce = atoi(optarg);
107 |         break;
108 |       case 'r':
109 |         range = atoi(optarg);
110 |         break;
111 |     }
112 |   }
113 |   printf("Looking for %d-cycle on cuckarooz%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
114 |   if (range > 1)
115 |     printf("-%d", nonce+range-1);
116 |   printf("), ");
117 |   cuckoo_ctx ctx(header, sizeof(header), nonce);
118 |   u64 bytes = ctx.bytes();
119 |   int unit;
120 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
121 |   printf("using %d%cB memory\n", (u32)bytes, " KMGT"[unit]);
122 | 
123 |   for (u32 r = 0; r < range; r++) {
124 |     time0 = timestamp();
125 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
126 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
127 |     ctx.find_cycles();
128 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
129 |     printf("Time: %d ms\n", timems);
130 |   }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/cuckatoo/bitmap.hpp:
--------------------------------------------------------------------------------
 1 | template <typename word_t>
 2 | class bitmap {
 3 | public:
 4 |   u64 SIZE;
 5 |   word_t BITMAP_WORDS;
 6 | #ifdef ATOMIC
 7 |   typedef std::atomic<word_t> aword_t;
 8 | #else
 9 |   typedef word_t aword_t;
10 | #endif
11 |   aword_t *bits;
12 |   const u32 BITS_PER_WORD = sizeof(word_t) * 8;
13 | 
14 |   bitmap(u64 size) {
15 |     SIZE = size;
16 |     BITMAP_WORDS = SIZE / BITS_PER_WORD;
17 |     bits = new aword_t[BITMAP_WORDS];
18 |     assert(bits != 0);
19 |   }
20 |   ~bitmap() {
21 |     freebits();
22 |   }
23 |   void freebits() {
24 |     delete[] bits;
25 |     bits = 0;
26 |   }
27 |   void clear() {
28 |     assert(bits);
29 |     memset((word_t *)bits, 0, BITMAP_WORDS*sizeof(word_t));
30 |   }
31 |  void prefetch(word_t u) const {
32 | #ifdef PREFETCH
33 |     __builtin_prefetch((const void *)(&bits[u/BITS_PER_WORD]), /*READ=*/0, /*TEMPORAL=*/0);
34 | #endif
35 |   }
36 |   void set(word_t u) {
37 |     word_t idx = u / BITS_PER_WORD;
38 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
39 | #ifdef ATOMIC
40 |     std::atomic_fetch_or_explicit(&bits[idx], bit, std::memory_order_relaxed);
41 | #else
42 |     bits[idx] |= bit;
43 | #endif
44 |   }
45 |   void reset(word_t u) {
46 |     word_t idx = u / BITS_PER_WORD;
47 |     word_t bit = (word_t)1 << (u % BITS_PER_WORD);
48 | #ifdef ATOMIC
49 |     std::atomic_fetch_and_explicit(&bits[idx], ~bit, std::memory_order_relaxed);
50 | #else
51 |     bits[idx] &= ~bit;
52 | #endif
53 |   }
54 |   bool test(word_t u) const {
55 |     word_t idx = u / BITS_PER_WORD;
56 |     u32 bit = u % BITS_PER_WORD;
57 | #ifdef ATOMIC
58 |     return (bits[idx].load(std::memory_order_relaxed) >> bit) & 1;
59 | #else
60 |     return (bits[idx] >> bit) & 1;
61 | #endif
62 |   }
63 |   word_t block(word_t n) const {
64 |     word_t idx = n / BITS_PER_WORD;
65 |     return bits[idx];
66 |   }
67 | };
68 | 


--------------------------------------------------------------------------------
/src/cuckatoo/compress.hpp:
--------------------------------------------------------------------------------
 1 | #include <new>
 2 | 
 3 | // compressor for cuckatoo nodes where edgetrimming
 4 | // has left at most 2^-compressbits nodes in each partition
 5 | template <typename word_t>
 6 | class compressor {
 7 | public:
 8 |   u32 NODEBITS;
 9 |   u32 COMPRESSBITS;
10 |   u32 SIZEBITS;
11 |   u32 SIZEBITS1;
12 |   word_t SIZE;
13 |   word_t MASK;
14 |   word_t MASK1;
15 |   word_t npairs;
16 |   const word_t NIL = ~(word_t)0;
17 |   word_t *nodes;
18 |   bool sharedmem;
19 | 
20 |   compressor(u32 nodebits, u32 compressbits, char *bytes) {
21 |     NODEBITS = nodebits;
22 |     COMPRESSBITS = compressbits;
23 |     SIZEBITS = NODEBITS-COMPRESSBITS;
24 |     SIZEBITS1 = SIZEBITS-1;
25 |     SIZE = (word_t)1 << SIZEBITS;
26 |     assert(SIZE);
27 |     MASK = SIZE-1;
28 |     MASK1 = MASK >> 1;
29 |     nodes = new (bytes) word_t[SIZE];
30 |     sharedmem = true;
31 |   }
32 | 
33 |   compressor(u32 nodebits, u32 compressbits) {
34 |     NODEBITS = nodebits;
35 |     COMPRESSBITS = compressbits;
36 |     SIZEBITS = NODEBITS-COMPRESSBITS;
37 |     SIZEBITS1 = SIZEBITS-1;
38 |     SIZE = (word_t)1 << SIZEBITS;
39 |     assert(SIZE);
40 |     MASK = SIZE-1;
41 |     MASK1 = MASK >> 1;
42 |     nodes = new word_t[SIZE];
43 |     sharedmem = false;
44 |   }
45 | 
46 |   ~compressor() {
47 |     if (!sharedmem)
48 |       delete[] nodes;
49 |   }
50 | 
51 |   uint64_t bytes() {
52 |     return sizeof(word_t[SIZE]);
53 |   }
54 | 
55 |   void reset() {
56 |     memset(nodes, (char)NIL, sizeof(word_t[SIZE]));
57 |     npairs = 0;
58 |   }
59 | 
60 |   word_t compress(word_t u) {
61 |     u32 parity = u & 1;
62 |     word_t ui = u >> COMPRESSBITS;
63 |     u >>= 1;
64 |     for (; ; ui = (ui+1) & MASK) {
65 |       word_t cu = nodes[ui];
66 |       if (cu == NIL) {
67 |         if (npairs >= SIZE/2) {
68 |           print_log("NODE OVERFLOW at %x\n", u << 1 | parity);
69 |           return parity;
70 |         }
71 |         nodes[ui] = u << SIZEBITS1 | npairs;
72 |         return (npairs++ << 1) | parity;
73 |       }
74 |       if ((cu & ~MASK1) == u << SIZEBITS1) {
75 |         return ((cu & MASK1) << 1) | parity;
76 |       }
77 |     }
78 |   }
79 | };
80 | 


--------------------------------------------------------------------------------
/src/cuckatoo/cuckatoo.c:
--------------------------------------------------------------------------------
 1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckatoo.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 246
13 | 
14 | int main(int argc, char **argv) {
15 |   char headernonce[HEADERLEN];
16 |   memset(headernonce, 0, HEADERLEN);
17 |   int nonce = 0;
18 |   int len, c;
19 |   while ((c = getopt (argc, argv, "h:n:x:")) != -1) {
20 |     switch (c) {
21 |       case 'h':
22 |         len = strlen(optarg);
23 |         assert(len <= sizeof(headernonce));
24 |         memcpy(headernonce, optarg, len);
25 |         break;
26 |       case 'x':
27 |         len = strlen(optarg)/2;
28 |         assert(len == sizeof(headernonce)-sizeof(u64) || len == sizeof(headernonce));
29 |         for (u32 i=0; i<len; i++) {
30 |           sscanf(optarg+2*i, "%2hhx", headernonce+i);
31 |         }
32 |         break;
33 |       case 'n':
34 |         nonce = atoi(optarg);
35 |         ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce); // place nonce near end aligned at u32
36 |         break;
37 |     }
38 |   }
39 |   siphash_keys keys;
40 |   setheader(headernonce, sizeof(headernonce), &keys);
41 |   printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce, keys.k0, keys.k1, keys.k2, keys.k3);
42 |   printf("Verifying size %d proof for cuckatoo%d(\"", PROOFSIZE, EDGEBITS);
43 |   for (int i=0; i < HEADERLEN; i++)
44 |     print_log("%02x", (unsigned char)headernonce[i]);
45 |   if (nonce) print_log(",%d", nonce);
46 |   print_log(")\n");
47 | 
48 |   word_t nonces[PROOFSIZE];
49 |   uint64_t index;
50 | #ifdef cuckoo_solution
51 |   for (int nsols=0; scanf(" \"cuckoo_solution\": [") == 0; nsols++) {
52 |     for (int n = 0; n < PROOFSIZE; n++) {
53 |       int nscan = scanf(" %" SCNu64 ",", &index);
54 | #else
55 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
56 |     for (int n = 0; n < PROOFSIZE; n++) {
57 |       int nscan = scanf(" %" SCNx64, &index);
58 | #endif
59 |       assert(nscan == 1);
60 |       nonces[n] = index;
61 |     }
62 |     int pow_rc = verify(nonces, &keys);
63 |     if (pow_rc == POW_OK) {
64 |       printf("Verified with cyclehash ");
65 |       unsigned char cyclehash[32];
66 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
67 |       for (int i=0; i<32; i++)
68 |         printf("%02x", cyclehash[i]);
69 |       printf("\n");
70 |     } else {
71 |       printf("FAILED due to %s\n", errstr[pow_rc]);
72 |     }
73 |   }
74 |   return 0;
75 | }
76 | 


--------------------------------------------------------------------------------
/src/cuckatoo/cumal.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <inttypes.h>
 3 | #include <assert.h>
 4 | 
 5 | int main(int argc, char **argv) {
 6 |   size_t bufferMB;
 7 |   void *buffer;
 8 |   int device = argc > 1 ? atoi(argv[argc-1]) : 1;
 9 |   int nDevices;
10 |   cudaGetDeviceCount(&nDevices);
11 |   assert(device < nDevices);
12 |   cudaDeviceProp prop;
13 |   cudaGetDeviceProperties(&prop, device);
14 |   uint64_t dbytes = prop.totalGlobalMem;
15 |   int availMB = dbytes >> 20;
16 |   printf("%s with %d MB @ %d bits x %dMHz\n", prop.name, availMB, prop.memoryBusWidth, prop.memoryClockRate/1000);
17 | 
18 |   cudaSetDevice(device);
19 |   for (bufferMB = availMB; ; bufferMB -= 1) {
20 |     int ret = cudaMalloc((void**)&buffer, bufferMB << 20);
21 |     if (ret) printf("cudaMalloc(%d MB) returned %d\n", bufferMB, ret);
22 |     else break;
23 |   }
24 |   printf("cudaMalloc(%d MB) succeeded %d\n", bufferMB);
25 |   cudaFree(buffer);
26 | 
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/cuckatoo/graph.hpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include "bitmap.hpp"
  5 | #include "compress.hpp"
  6 | #include <new>
  7 | 
  8 | typedef word_t proof[PROOFSIZE];
  9 | 
 10 | // cuck(at)oo graph with given limit on number of edges (and on single partition nodes)
 11 | template <typename word_t>
 12 | class graph {
 13 | public:
 14 |   // terminates adjacency lists
 15 |   const word_t NIL = ~(word_t)0;	// NOTE: matches last edge when EDGEBITS==32
 16 | 
 17 |   struct link { // element of adjacency list
 18 |     word_t next;
 19 |     word_t to;
 20 |   };
 21 | 
 22 |   word_t MAXEDGES;
 23 |   word_t MAXNODES;
 24 |   word_t nlinks; // aka halfedges, twice number of edges
 25 |   word_t *adjlist; // index into links array
 26 |   link *links;
 27 |   bool sharedmem;
 28 |   compressor<word_t> *compressu;
 29 |   compressor<word_t> *compressv;
 30 |   bitmap<u32> visited;
 31 |   u32 MAXSOLS;
 32 |   proof *sols;
 33 |   u32 nsols;
 34 | 
 35 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits) : visited(maxedges) {
 36 |     MAXEDGES = maxedges;
 37 |     MAXNODES = maxnodes;
 38 |     MAXSOLS = maxsols;
 39 |     adjlist = new word_t[2*MAXNODES]; // index into links array
 40 |     links   = new link[2*MAXEDGES];
 41 |     compressu = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 42 |     compressv = compressbits ? new compressor<word_t>(EDGEBITS, compressbits) : 0;
 43 |     sharedmem = false;
 44 |     sols    = new proof[MAXSOLS+1]; // extra one for current path
 45 |     visited.clear();
 46 |   }
 47 | 
 48 |   ~graph() {
 49 |     if (!sharedmem) {
 50 |       delete[] adjlist;
 51 |       delete[] links;
 52 |     }
 53 |     delete[] sols;
 54 |   }
 55 | 
 56 |   graph(word_t maxedges, word_t maxnodes, u32 maxsols, u32 compressbits, char *bytes) : visited(maxedges) {
 57 |     MAXEDGES = maxedges;
 58 |     MAXNODES = maxnodes;
 59 |     MAXSOLS = maxsols;
 60 |     adjlist = new (bytes) word_t[2*MAXNODES]; // index into links array
 61 |     links   = new (bytes += sizeof(word_t[2*MAXNODES])) link[2*MAXEDGES];
 62 |     compressu = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes += sizeof(link[2*MAXEDGES])) : 0;
 63 |     compressv = compressbits ? new compressor<word_t>(EDGEBITS, compressbits, bytes + compressu->bytes()) : 0;
 64 |     sharedmem = true;
 65 |     sols    = new  proof[MAXSOLS+1];
 66 |     visited.clear();
 67 |   }
 68 | 
 69 |   // total size of new-operated data, excludes sols and visited bitmap of MAXEDGES bits
 70 |   uint64_t bytes() {
 71 |     assert(2*MAXNODES != 0 && 2*MAXEDGES != 0); // allocation fails for uncompressed EDGEBITS=31
 72 |     return sizeof(word_t[2*MAXNODES]) + sizeof(link[2*MAXEDGES]) + (compressu ? 2 * compressu->bytes() : 0);
 73 |   }
 74 | 
 75 |   void reset() {
 76 |     memset(adjlist, (char)NIL, sizeof(word_t[2*MAXNODES]));
 77 |     if (compressu) {
 78 |       compressu->reset();
 79 |       compressv->reset();
 80 |     }
 81 |     resetcounts();
 82 |   }
 83 | 
 84 |   void resetcounts() {
 85 |     nlinks = nsols = 0;
 86 |     // visited has entries set only during cycles() call
 87 |   }
 88 | 
 89 |   static int nonce_cmp(const void *a, const void *b) {
 90 |     u32 x = *(u32 *)a, y = *(u32 *)b;
 91 |     // printf("nonce_cmp %x %x\n", x, y);
 92 |     return x < y ? -1 : x > y;
 93 |   }
 94 | 
 95 |   void cycles_with_link(u32 len, word_t u, word_t dest) {
 96 |     // assert((u>>1) < MAXEDGES);
 97 |     if (visited.test(u >> 1))
 98 |       return;
 99 |     if ((u ^ 1) == dest) {
100 |       print_log("  %d-cycle found\n", len);
101 |       if (len == PROOFSIZE && nsols < MAXSOLS) {
102 |         memcpy(sols[nsols+1], sols[nsols], sizeof(sols[0]));
103 |         qsort(sols[nsols++], PROOFSIZE, sizeof(word_t), nonce_cmp);
104 |       }
105 |       return;
106 |     }
107 |     if (len == PROOFSIZE)
108 |       return;
109 |     word_t au1 = adjlist[u ^ 1];
110 |     if (au1 != NIL) {
111 |       visited.set(u >> 1);
112 |       for (; au1 != NIL; au1 = links[au1].next) {
113 |         sols[nsols][len] = au1/2;
114 |         cycles_with_link(len+1, links[au1 ^ 1].to, dest);
115 |       }
116 |       visited.reset(u >> 1);
117 |     }
118 |   }
119 | 
120 |   bool add_edge(word_t u, word_t v) {
121 |     assert(u < MAXNODES);
122 |     assert(v < MAXNODES);
123 |     v += MAXNODES; // distinguish partitions
124 |     if (adjlist[u ^ 1] != NIL && adjlist[v ^ 1] != NIL) { // possibly part of a cycle
125 |       sols[nsols][0] = nlinks/2;
126 |       assert(!visited.test(u >> 1));
127 |       cycles_with_link(1, u, v);
128 |     }
129 |     word_t ulink = nlinks++;
130 |     word_t vlink = nlinks++; // the two halfedges of an edge differ only in last bit
131 |     assert(vlink != NIL);    // avoid confusing links with NIL (possible if word_t is u32 and EDGEBITS is 31 or 32)
132 | #ifndef ALLOWDUPES
133 |     for (word_t au = adjlist[u]; au != NIL; au = links[au].next)
134 |       if (links[au ^ 1].to == v) return false; // drop duplicate edge
135 | #endif
136 |     links[ulink].next = adjlist[u];
137 |     links[vlink].next = adjlist[v];
138 |     links[adjlist[u] = ulink].to = u;
139 |     links[adjlist[v] = vlink].to = v;
140 |     return true;
141 |   }
142 | 
143 |   bool add_compress_edge(word_t u, word_t v) {
144 |     return add_edge(compressu->compress(u), compressv->compress(v));
145 |   }
146 | };
147 | 


--------------------------------------------------------------------------------
/src/cuckatoo/lean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckatoo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2023 John Tromp
  3 | 
  4 | #include "lean.hpp"
  5 | #include <unistd.h>
  6 | 
  7 | #ifndef HEADERLEN
  8 | #define HEADERLEN 246
  9 | #endif
 10 | 
 11 | typedef cuckoo_ctx SolverCtx;
 12 | 
 13 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 14 |                                char* header,
 15 |                                int header_length,
 16 |                                u32 nonce,
 17 |                                u32 range,
 18 |                                SolverSolutions *solutions,
 19 |                                SolverStats *stats
 20 |                                )
 21 | {
 22 |   u64 time0, time1;
 23 |   u32 timems;
 24 |   u32 sumnsols = 0;
 25 |   thread_ctx *threads = new thread_ctx[ctx->nthreads];
 26 |   assert(threads);
 27 |   for (u32 r = 0; r < range; r++) {
 28 |     time0 = timestamp();
 29 |     ctx->setheadernonce(header, header_length, nonce + r);
 30 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->sip_keys.k0, ctx->sip_keys.k1, ctx->sip_keys.k2, ctx->sip_keys.k3);
 31 |     ctx->barry.clear();
 32 |     for (u32 t = 0; t < ctx->nthreads; t++) {
 33 |       threads[t].id = t;
 34 |       threads[t].ctx = ctx;
 35 |       int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
 36 |       assert(err == 0);
 37 |     }
 38 |     for (u32 t = 0; t < ctx->nthreads; t++) {
 39 |       int err = pthread_join(threads[t].thread, NULL);
 40 |       assert(err == 0);
 41 |     }
 42 |     time1 = timestamp();
 43 |     timems = (time1 - time0) / 1000000;
 44 |     print_log("Time: %d ms\n", timems);
 45 |     for (unsigned s = 0; s < ctx->nsols; s++) {
 46 |       print_log("Solution");
 47 |       for (int j = 0; j < PROOFSIZE; j++)
 48 |         print_log(" %jx", (uintmax_t)ctx->sols[s][j]);
 49 |       print_log("\n");
 50 |       if (solutions != NULL){
 51 |         solutions->edge_bits = EDGEBITS;
 52 |         solutions->num_sols++;
 53 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 54 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 55 |           solutions->sols[sumnsols+s].proof[i] = (u64) ctx->sols[s][i];
 56 |       }
 57 |       int pow_rc = verify(ctx->sols[s], &ctx->sip_keys);
 58 |       if (pow_rc == POW_OK) {
 59 |         print_log("Verified with cyclehash ");
 60 |         unsigned char cyclehash[32];
 61 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)ctx->sols[s], sizeof(ctx->sols[0]), 0, 0);
 62 |         for (int i=0; i<32; i++)
 63 |           print_log("%02x", cyclehash[i]);
 64 |         print_log("\n");
 65 |       } else {
 66 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 67 |       }
 68 |       sumnsols += ctx->nsols;
 69 |     }
 70 |     if (stats != NULL) {
 71 |       stats->device_id = 0;
 72 |       stats->edge_bits = EDGEBITS;
 73 |       strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 74 |       stats->last_start_time = time0;
 75 |       stats->last_end_time = time1;
 76 |       stats->last_solution_time = time1 - time0;
 77 |     }
 78 |   }
 79 |   delete[] threads;
 80 |   print_log("%d total solutions\n", sumnsols);
 81 |   return 0;
 82 | }
 83 | 
 84 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 85 |   if (params->nthreads == 0) params->nthreads = 1;
 86 |   if (params->ntrims == 0) params->ntrims = EDGEBITS > 30 ? 96 : 68;
 87 | 
 88 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 89 |                                  params->ntrims,
 90 |                                  MAXSOLS,
 91 |                                  params->mutate_nonce);
 92 |   return ctx;
 93 | }
 94 | 
 95 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 96 |   delete ctx;
 97 | }
 98 | 
 99 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
100 |   ctx->abort();
101 | }
102 | 
103 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
104 |   params->nthreads = 1;
105 |   params->ntrims   = 8 * (PART_BITS+3) * (PART_BITS+4);
106 |   params->mutate_nonce = 0;
107 | }
108 | 
109 | int main(int argc, char **argv) {
110 |   int nthreads = 1;
111 |   int ntrims   = 8 * (PART_BITS+3) * (PART_BITS+4);
112 |   int nonce = 0;
113 |   int range = 1;
114 |   char header[HEADERLEN];
115 |   unsigned len;
116 |   int c;
117 |   SolverParams params;
118 | 
119 |   fill_default_params(&params);
120 |   memset(header, 0, len = sizeof(header));
121 |   while ((c = getopt (argc, argv, "h:m:n:r:t:x:")) != -1) {
122 |     switch (c) {
123 |       case 'h':
124 |         len = strlen(optarg);
125 |         assert(len <= sizeof(header));
126 |         memcpy(header, optarg, len);
127 |         break;
128 |       case 'n':
129 |         nonce = atoi(optarg);
130 |         params.mutate_nonce = 1;
131 |         break;
132 |       case 'r':
133 |         range = atoi(optarg);
134 |         params.mutate_nonce = 1;
135 |         break;
136 |       case 'm':
137 |         ntrims = atoi(optarg);
138 |         break;
139 |       case 't':
140 |         nthreads = atoi(optarg);
141 |         break;
142 |       case 'x':
143 |         len = strlen(optarg)/2;
144 |         assert(len == sizeof(header)-sizeof(u64) || len == sizeof(header));
145 |         for (u32 i=0; i<len; i++)
146 |           sscanf(optarg+2*i, "%2hhx", header+i);
147 |         break;
148 |     }
149 |   }
150 |   params.nthreads = nthreads;
151 |   params.ntrims = ntrims;
152 | 
153 |   print_log("Looking for %d-cycle on cuckatoo%d(\"", PROOFSIZE, EDGEBITS);
154 |   for (int i=0; i < len; i++)
155 |     print_log("%02x", (unsigned char)header[i]);
156 |   print_log("\"");
157 |   if (range > 1)
158 |     print_log(",%d-%d", nonce, nonce+range-1);
159 |   else if (nonce)  print_log(",%d", nonce);
160 |   print_log(") with trimming to %d bits, %d trimming rounds, %d threads\n", EDGEBITS-IDXSHIFT, ntrims, nthreads);
161 | 
162 |   u64 EdgeBytes = NEDGES/8;
163 |   int EdgeUnit;
164 |   for (EdgeUnit=0; EdgeBytes >= 1024; EdgeBytes>>=10,EdgeUnit++) ;
165 |   u64 NodeBytes = (NEDGES >> PART_BITS)/8;
166 |   int NodeUnit;
167 |   for (NodeUnit=0; NodeBytes >= 1024; NodeBytes>>=10,NodeUnit++) ;
168 |   print_log("Using %d%cB edge and %d%cB node memory, and %d-way siphash\n",
169 |      (int)EdgeBytes, " KMGT"[EdgeUnit], (int)NodeBytes, " KMGT"[NodeUnit], NSIPHASH);
170 | 
171 |   SolverCtx* ctx = create_solver_ctx(&params);
172 |   run_solver(ctx, header, len, nonce, range, NULL, NULL);
173 |   destroy_solver_ctx(ctx);
174 | 
175 |   return 0;
176 | }
177 | 


--------------------------------------------------------------------------------
/src/cuckatoo/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckatoo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | 
  7 | // arbitrary length of header hashed into siphash key
  8 | #define HEADERLEN 80
  9 | 
 10 | typedef solver_ctx SolverCtx;
 11 | 
 12 | CALL_CONVENTION int run_solver(SolverCtx* ctx,
 13 |                                char* header,
 14 |                                int header_length,
 15 |                                u32 nonce,
 16 |                                u32 range,
 17 |                                SolverSolutions *solutions,
 18 |                                SolverStats *stats
 19 |                                )
 20 | {
 21 |   u64 time0, time1;
 22 |   u32 timems;
 23 |   u32 sumnsols = 0;
 24 | 
 25 |   for (u32 r = 0; r < range; r++) {
 26 |     time0 = timestamp();
 27 |     ctx->setheadernonce(header, header_length, nonce + r);
 28 |     print_log("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx->trimmer.sip_keys.k0, ctx->trimmer.sip_keys.k1, ctx->trimmer.sip_keys.k2, ctx->trimmer.sip_keys.k3);
 29 |     u32 nsols = ctx->solve();
 30 |     time1 = timestamp();
 31 |     timems = (time1 - time0) / 1000000;
 32 |     print_log("Time: %d ms\n", timems);
 33 | 
 34 |     for (unsigned s = 0; s < nsols; s++) {
 35 |       print_log("Solution");
 36 |       word_t *prf = &ctx->sols[s * PROOFSIZE];
 37 |       for (u32 i = 0; i < PROOFSIZE; i++)
 38 |         print_log(" %jx", (uintmax_t)prf[i]);
 39 |       print_log("\n");
 40 |       if (solutions != NULL){
 41 |         solutions->edge_bits = EDGEBITS;
 42 |         solutions->num_sols++;
 43 |         solutions->sols[sumnsols+s].nonce = nonce + r;
 44 |         for (u32 i = 0; i < PROOFSIZE; i++) 
 45 |           solutions->sols[sumnsols+s].proof[i] = (u64) prf[i];
 46 |       }
 47 |       int pow_rc = verify(prf, &ctx->trimmer.sip_keys);
 48 |       if (pow_rc == POW_OK) {
 49 |         print_log("Verified with cyclehash ");
 50 |         unsigned char cyclehash[32];
 51 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 52 |         for (int i=0; i<32; i++)
 53 |           print_log("%02x", cyclehash[i]);
 54 |         print_log("\n");
 55 |       } else {
 56 |         print_log("FAILED due to %s\n", errstr[pow_rc]);
 57 |       }
 58 |     }
 59 |     sumnsols += nsols;
 60 |     if (stats != NULL) {
 61 |         stats->device_id = 0;
 62 |         stats->edge_bits = EDGEBITS;
 63 |         strncpy(stats->device_name, "CPU\0", MAX_NAME_LEN);
 64 |         stats->last_start_time = time0;
 65 |         stats->last_end_time = time1;
 66 |         stats->last_solution_time = time1 - time0;
 67 |     }
 68 |   }
 69 |   print_log("%d total solutions\n", sumnsols);
 70 |   return sumnsols > 0;
 71 | }
 72 | 
 73 | CALL_CONVENTION SolverCtx* create_solver_ctx(SolverParams* params) {
 74 |   if (params->nthreads == 0) params->nthreads = 1;
 75 |   if (params->ntrims == 0) params->ntrims = EDGEBITS >= 30 ? 96 : 68;
 76 | 
 77 |   SolverCtx* ctx = new SolverCtx(params->nthreads,
 78 |                                  params->ntrims,
 79 |                                  params->allrounds,
 80 |                                  params->showcycle,
 81 |                                  params->mutate_nonce);
 82 |   return ctx;
 83 | }
 84 | 
 85 | CALL_CONVENTION void destroy_solver_ctx(SolverCtx* ctx) {
 86 |   delete ctx;
 87 | }
 88 | 
 89 | CALL_CONVENTION void stop_solver(SolverCtx* ctx) {
 90 |   ctx->abort();
 91 | }
 92 | 
 93 | CALL_CONVENTION void fill_default_params(SolverParams* params) {
 94 | 	// not required in this solver
 95 | }
 96 | 
 97 | int main(int argc, char **argv) {
 98 |   u32 nthreads = 0;
 99 |   u32 ntrims = 0;
100 |   u32 nonce = 0;
101 |   u32 range = 1;
102 | #ifdef SAVEEDGES
103 |   bool showcycle = 1;
104 | #else
105 |   bool showcycle = 0;
106 | #endif
107 |   char header[HEADERLEN];
108 |   u32 len;
109 |   bool allrounds = false;
110 |   int c;
111 | 
112 |   memset(header, 0, sizeof(header));
113 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
114 |     switch (c) {
115 |       case 'a':
116 |         allrounds = true;
117 |         break;
118 |       case 'h':
119 |         len = strlen(optarg);
120 |         assert(len <= sizeof(header));
121 |         memcpy(header, optarg, len);
122 |         break;
123 |       case 'x':
124 |         len = strlen(optarg)/2;
125 |         assert(len == sizeof(header));
126 |         for (u32 i=0; i<len; i++)
127 |           sscanf(optarg+2*i, "%2hhx", header+i);
128 |         break;
129 |       case 'n':
130 |         nonce = atoi(optarg);
131 |         break;
132 |       case 'r':
133 |         range = atoi(optarg);
134 |         break;
135 |       case 'm':
136 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
137 |         break;
138 |       case 's':
139 |         showcycle = true;
140 |         break;
141 |       case 't':
142 |         nthreads = atoi(optarg);
143 |         break;
144 |     }
145 |   }
146 | 
147 |   SolverParams params;
148 |   params.nthreads = nthreads;
149 |   params.ntrims = ntrims;
150 |   params.showcycle = showcycle;
151 |   params.allrounds = allrounds;
152 | 
153 |   SolverCtx* ctx = create_solver_ctx(&params);
154 | 
155 |   print_log("Looking for %d-cycle on cuckatoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
156 |   if (range > 1)
157 |     print_log("-%d", nonce+range-1);
158 |   print_log(") with 50%% edges\n");
159 | 
160 |   u64 sbytes = ctx->sharedbytes();
161 |   u32 tbytes = ctx->threadbytes();
162 |   int sunit,tunit;
163 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
164 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
165 |   print_log("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx->trimmer.buckets);
166 |   print_log("%dx%d%cB thread memory at %lx,\n", params.nthreads, tbytes, " KMGT"[tunit], (u64)ctx->trimmer.tbuckets);
167 |   print_log("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
168 | 
169 | 	run_solver(ctx, header, sizeof(header), nonce, range, NULL, NULL);
170 | 
171 | 	destroy_solver_ctx(ctx);
172 | }
173 | 


--------------------------------------------------------------------------------
/src/cuckatoo/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuck(at)oo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckatoo.h"
  5 | #include "graph.hpp"
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <assert.h>
  9 | #include <unistd.h>
 10 | #include <set>
 11 | 
 12 | #define NNODES (2*NEDGES)
 13 | #ifndef MAXSOLS
 14 | #define MAXSOLS 4
 15 | #endif
 16 | 
 17 | typedef unsigned char u8;
 18 | 
 19 | class cuckoo_ctx {
 20 | public:
 21 |   siphash_keys sip_keys;
 22 |   word_t easiness;
 23 |   graph<word_t> cg;
 24 | 
 25 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) : cg(NEDGES, NEDGES, MAXSOLS, 0) {
 26 |     easiness = easy_ness;
 27 |   }
 28 | 
 29 |   ~cuckoo_ctx() { }
 30 | 
 31 |   u64 bytes() {
 32 |     return cg.bytes();
 33 |   }
 34 | 
 35 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 36 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 37 |     setheader(headernonce, len, &sip_keys);
 38 |     cg.reset();
 39 |   }
 40 | 
 41 |   void find_cycles() {
 42 |     for (word_t nonce = 0; nonce < easiness; nonce++) {
 43 |       word_t u = sipnode(&sip_keys, nonce, 0);
 44 |       word_t v = sipnode(&sip_keys, nonce, 1);
 45 |       cg.add_edge(u, v);
 46 |   #ifdef SHOW
 47 |       printf("%d add (%d,%d)\n", nonce,u,v+NEDGES);
 48 |       for (unsigned j=0; j<NNODES; j++) {
 49 |         printf("\t%d",j);
 50 |         for (int a=cg.adjlist[j]; a!=graph<word_t>::NIL; a=cg.links[a].next) printf(":%d", cg.links[a^1].to);
 51 |         if ((j+1)%NEDGES == 0)
 52 |         printf("\n");
 53 |       }
 54 |   #endif
 55 |     }
 56 |     for (u32 s=0; s < cg.nsols; s++) {
 57 |       printf("Solution");
 58 |       // qsort(&cg.sols[s], PROOFSIZE, sizeof(word_t), cg.nonce_cmp);
 59 |       for (u32 j=0; j < PROOFSIZE; j++) {
 60 |         word_t nonce = cg.sols[s][j];
 61 |         // printf(" (%x,%x)", sipnode(&sip_keys, nonce, 0), sipnode(&sip_keys, nonce, 1));
 62 |         printf(" %x", nonce);
 63 |       }
 64 |       printf("\n");
 65 |       int pow_rc = verify(cg.sols[s], &sip_keys);
 66 |       if (pow_rc == POW_OK) {
 67 |         printf("Verified with cyclehash ");
 68 |         unsigned char cyclehash[32];
 69 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)cg.sols[s], sizeof(cg.sols[0]), 0, 0);
 70 |         for (int i=0; i<32; i++)
 71 |           printf("%02x", cyclehash[i]);
 72 |         printf("\n");
 73 |       } else {
 74 |         printf("FAILED due to %s\n", errstr[pow_rc]);
 75 |       }
 76 | 
 77 |     }
 78 |   }
 79 | };
 80 | 
 81 | // arbitrary length of header hashed into siphash key
 82 | #define HEADERLEN 80
 83 | 
 84 | int main(int argc, char **argv) {
 85 |   char header[HEADERLEN];
 86 |   memset(header, 0, HEADERLEN);
 87 |   int c, easipct = 50;
 88 |   u32 nonce = 0;
 89 |   u32 range = 1;
 90 |   u64 time0, time1;
 91 |   u32 timems;
 92 | 
 93 |   while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) {
 94 |     switch (c) {
 95 |       case 'e':
 96 |         easipct = atoi(optarg);
 97 |         break;
 98 |       case 'h':
 99 |         memcpy(header, optarg, strlen(optarg));
100 |         break;
101 |       case 'n':
102 |         nonce = atoi(optarg);
103 |         break;
104 |       case 'r':
105 |         range = atoi(optarg);
106 |         break;
107 |     }
108 |   }
109 |   assert(easipct >= 0 && easipct <= 100);
110 |   printf("Looking for %d-cycle on cuckatoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS, header, nonce);
111 |   if (range > 1)
112 |     printf("-%d", nonce+range-1);
113 |   printf(") with %d%% edges, ", easipct);
114 |   word_t easiness = easipct * (uint64_t)NNODES / 100;
115 |   cuckoo_ctx ctx(header, sizeof(header), nonce, easiness);
116 |   u64 bytes = ctx.bytes();
117 |   int unit;
118 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
119 |   printf("using %lld%cB memory\n", (u32)bytes, " KMGT"[unit]);
120 | 
121 |   for (u32 r = 0; r < range; r++) {
122 |     time0 = timestamp();
123 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
124 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
125 |     ctx.find_cycles();
126 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
127 |     printf("Time: %d ms\n", timems);
128 |   }
129 | }
130 | 


--------------------------------------------------------------------------------
/src/cuckoo/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | BLAKE_2B_SRC ?= ../crypto/blake2b-ref.c
15 | NVCC ?= nvcc -std=c++11 
16 | 
17 | all : simpletest leantest meantest
18 | 
19 | simpletest:     simple19
20 | 	./simple19 -n 38
21 | 
22 | leantest:       lean19x1
23 | 	./lean19x1 -n 38
24 | 
25 | meantest:	mean29x4
26 | 	./mean29x4 -n 63 -t 4 -s
27 | 
28 | verifytest:     lean19x1 verify19
29 | 	./lean19x1 -n 38 | grep ^Sol | ./verify19 -n 38
30 | 
31 | simple19:	../crypto/siphash.hpp cuckoo.h cyclebase.hpp simple.cpp Makefile
32 | 	$(GPP) -o $@ -DIDXSHIFT=0 -DPROOFSIZE=42 -DEDGEBITS=19 simple.cpp $(BLAKE_2B_SRC)
33 | 
34 | verify19:	../crypto/siphash.hpp cuckoo.h cuckoo.c simple.cpp Makefile
35 | 	$(GPP) -o $@ -DPROOFSIZE=42 -DEDGEBITS=19 cuckoo.c $(BLAKE_2B_SRC)
36 | 
37 | simple29:	../crypto/siphash.hpp cuckoo.h cyclebase.hpp simple.cpp Makefile
38 | 	$(GPP) -o $@ -DIDXSHIFT=0 -DPROOFSIZE=42 -DEDGEBITS=29 simple.cpp $(BLAKE_2B_SRC)
39 | 
40 | lean19x1:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
41 | 	$(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=19 lean.cpp $(BLAKE_2B_SRC)
42 | 
43 | lean19x8:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
44 | 	$(GPP) -o $@ -DATOMIC -DNSIPHASH=8 -DEDGEBITS=19 lean.cpp $(BLAKE_2B_SRC)
45 | 
46 | lean27x1:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
47 | 	$(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=27 lean.cpp $(BLAKE_2B_SRC)
48 | 
49 | lean29x1:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
50 | 	$(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=29 lean.cpp $(BLAKE_2B_SRC)
51 | 
52 | lean31x1:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
53 | 	$(GPP) -o $@ -DATOMIC -DNSIPHASH=1 -DEDGEBITS=31 lean.cpp $(BLAKE_2B_SRC)
54 | 
55 | lean29x8:	../crypto/siphash.hpp cuckoo.h  lean.hpp lean.cpp Makefile
56 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DATOMIC -DEDGEBITS=29 lean.cpp $(BLAKE_2B_SRC)
57 | 
58 | mean19x8:	cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
59 | 	$(GPP) -o $@ -mavx2 -DXBITS=2 -DNSIPHASH=8 -DEDGEBITS=19 mean.cpp $(BLAKE_2B_SRC)
60 | 
61 | mean29x4:	cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
62 | 	$(GPP) -o $@ -mno-avx2 -DNSIPHASH=4 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
63 | 
64 | mean29x8:	cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
65 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
66 | 
67 | mean30x8:	cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
68 | 	$(GPP) -o $@ -mavx2 -DNSIPHASH=8 -DEXPANDROUND=10 -DCOMPRESSROUND=22 -DEDGEBITS=30 mean.cpp $(BLAKE_2B_SRC)
69 | 
70 | mean29x1:	cuckoo.h ../crypto/siphash.hpp mean.hpp mean.cpp Makefile
71 | 	$(GPP) -o $@ -DNSIPHASH=1 -DEDGEBITS=29 mean.cpp $(BLAKE_2B_SRC)
72 | 
73 | lcuda29:	../crypto/siphash.cuh lean.cu Makefile
74 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 lean.cu $(BLAKE_2B_SRC)
75 | 
76 | cuda29:		../crypto/siphash.cuh mean.cu Makefile
77 | 	$(NVCC) -o $@ -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
78 | 
79 | cuda29_1:		../crypto/siphash.cuh mean.cu Makefile
80 | 	$(NVCC) -o $@ -DNB=1 -DEDGEBITS=29 -arch sm_35 mean.cu $(BLAKE_2B_SRC)
81 | 


--------------------------------------------------------------------------------
/src/cuckoo/cuckoo.c:
--------------------------------------------------------------------------------
 1 | // Cuckoo Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "cuckoo.h"
 5 | #include <inttypes.h> // for SCNx64 macro
 6 | #include <stdio.h>    // printf/scanf
 7 | #include <stdlib.h>   // exit
 8 | #include <unistd.h>   // getopt
 9 | #include <assert.h>   // d'uh
10 | 
11 | // arbitrary length of header hashed into siphash key
12 | #define HEADERLEN 80
13 | 
14 | int main(int argc, char **argv) {
15 |   const char *header = "";
16 |   int nonce = 0;
17 |   int c;
18 |   while ((c = getopt (argc, argv, "h:n:")) != -1) {
19 |     switch (c) {
20 |       case 'h':
21 |         header = optarg;
22 |         break;
23 |       case 'n':
24 |         nonce = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   char headernonce[HEADERLEN];
29 |   u32 hdrlen = strlen(header);
30 |   memcpy(headernonce, header, hdrlen);
31 |   memset(headernonce+hdrlen, 0, sizeof(headernonce)-hdrlen);
32 |   ((u32 *)headernonce)[HEADERLEN/sizeof(u32)-1] = htole32(nonce);
33 |   siphash_keys keys;
34 |   setheader(headernonce, sizeof(headernonce), &keys);
35 |   printf("Verifying size %d proof for cuckoo%d(\"%s\",%d)\n",
36 |                PROOFSIZE, EDGEBITS+1, header, nonce);
37 |   for (int nsols=0; scanf(" Solution") == 0; nsols++) {
38 |     word_t nonces[PROOFSIZE];
39 |     for (int n = 0; n < PROOFSIZE; n++) {
40 |       uint64_t nonce;
41 |       int nscan = scanf(" %" SCNx64, &nonce);
42 |       assert(nscan == 1);
43 |       nonces[n] = nonce;
44 |     }
45 |     int pow_rc = verify(nonces, &keys);
46 |     if (pow_rc == POW_OK) {
47 |       printf("Verified with cyclehash ");
48 |       unsigned char cyclehash[32];
49 |       blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)nonces, sizeof(nonces), 0, 0);
50 |       for (int i=0; i<32; i++)
51 |         printf("%02x", cyclehash[i]);
52 |       printf("\n");
53 |     } else {
54 |       printf("FAILED due to %s\n", errstr[pow_rc]);
55 |     }
56 |   }
57 |   return 0;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/cuckoo/cyclebase.hpp:
--------------------------------------------------------------------------------
  1 | #include <utility>
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include <set>
  5 | 
  6 | #ifndef MAXCYCLES
  7 | #define MAXCYCLES 64 // single byte
  8 | #endif
  9 | 
 10 | struct edge {
 11 |   u32 u;
 12 |   u32 v;
 13 |   edge() : u(0), v(0) { }
 14 |   edge(u32 x, u32 y) : u(x), v(y) { }
 15 | };
 16 | 
 17 | struct cyclebase {
 18 |   // should avoid different values of MAXPATHLEN in different threads of one process
 19 |   static const u32 MAXPATHLEN = 16 << (EDGEBITS/3);
 20 | 
 21 |   int ncycles;
 22 |   word_t *cuckoo;
 23 |   edge cycleedges[MAXCYCLES];
 24 |   u32 cyclelengths[MAXCYCLES];
 25 |   u32 prevcycle[MAXCYCLES];
 26 |   u32 us[MAXPATHLEN];
 27 |   u32 vs[MAXPATHLEN];
 28 | 
 29 |   void alloc() {
 30 |     cuckoo = (word_t *)calloc(NCUCKOO, sizeof(word_t));
 31 |   }
 32 | 
 33 |   void freemem() { // not a destructor, as memory may have been allocated elsewhere, bypassing alloc()
 34 |     free(cuckoo);
 35 |   }
 36 | 
 37 |   void reset() {
 38 |     resetcounts();
 39 |   }
 40 | 
 41 |   void resetcounts() {
 42 |     memset(cuckoo, -1, NCUCKOO * sizeof(word_t)); // for prevcycle nil
 43 |     ncycles = 0;
 44 |   }
 45 | 
 46 |   int path(u32 u0, u32 *us) const {
 47 |     int nu;
 48 |     for (u32 u = us[nu = 0] = u0; cuckoo[u] < 0x80000000; ) {
 49 |       u = cuckoo[u];
 50 |       if (++nu >= (int)MAXPATHLEN) {
 51 |         while (nu-- && us[nu] != u) ;
 52 |         if (nu < 0)
 53 |           printf("maximum path length exceeded\n");
 54 |         else printf("illegal % 4d-cycle from node %d\n", MAXPATHLEN-nu, u0);
 55 |         exit(0);
 56 |       }
 57 |       us[nu] = u;
 58 |     }
 59 |     return nu;
 60 |   }
 61 | 
 62 |   int pathjoin(u32 *us, int *pnu, u32 *vs, int *pnv) {
 63 |     int nu = *pnu, nv = *pnv;
 64 |     int min = nu < nv ? nu : nv;
 65 |     for (nu -= min, nv -= min; us[nu] != vs[nv]; nu++, nv++) min--;
 66 |     *pnu = nu; *pnv = nv;
 67 |     return min;
 68 |   }
 69 | 
 70 |   void addedge(u32 u0, u32 v0) {
 71 |     u32 u = u0 << 1, v = (v0 << 1) | 1;
 72 |     int nu = path(u, us), nv = path(v, vs);
 73 |     if (us[nu] == vs[nv]) {
 74 |      u32 ccsize = -cuckoo[us[nu]];
 75 |       pathjoin(us, &nu, vs, &nv);
 76 |       int len = nu + nv + 1;
 77 |       printf("% 4d-cycle found in ccsize %d\n", len, ccsize);
 78 |       cycleedges[ncycles].u = u;
 79 |       cycleedges[ncycles].v = v;
 80 |       cyclelengths[ncycles++] = len;
 81 |       if (len == PROOFSIZE)
 82 |         solution(us, nu, vs, nv);
 83 |       assert(ncycles < MAXCYCLES);
 84 |     } else if (nu < nv) {
 85 |       cuckoo[vs[nv]] += cuckoo[us[nu]];
 86 |       while (nu--)
 87 |         cuckoo[us[nu+1]] = us[nu];
 88 |       cuckoo[u] = v;
 89 |     } else {
 90 |       cuckoo[us[nu]] += cuckoo[vs[nv]];
 91 |       while (nv--)
 92 |         cuckoo[vs[nv+1]] = vs[nv];
 93 |       cuckoo[v] = u;
 94 |     }
 95 |   }
 96 | 
 97 |   void recordedge(const u32 i, const u32 u, const u32 v) {
 98 |     printf(" (%x,%x)", u, v);
 99 |   }
100 | 
101 |   void solution(u32 *us, int nu, u32 *vs, int nv) {
102 |     printf("Nodes");
103 |     u32 ni = 0;
104 |     recordedge(ni++, *us, *vs);
105 |     while (nu--)
106 |       recordedge(ni++, us[(nu+1)&~1], us[nu|1]); // u's in even position; v's in odd
107 |     while (nv--)
108 |       recordedge(ni++, vs[nv|1], vs[(nv+1)&~1]); // u's in odd position; v's in even
109 |     printf("\n");
110 | #if 0
111 |     for (u32 nonce = n = 0; nonce < NEDGES; nonce++) {
112 |       edge e(2*sipnode(&sip_keys, nonce, 0), 2*sipnode(&sip_keys, nonce, 1)+1);
113 |       if (cycle.find(e) != cycle.end()) {
114 |         printf(" %x", nonce);
115 |         cycle.erase(e);
116 |       }
117 |     }
118 |     printf("\n");
119 | #endif
120 |   }
121 | 
122 |   int sharedlen(u32 *us, int nu, u32 *vs, int nv) {
123 |     int len = 0;
124 |     for (; nu-- && nv-- && us[nu] == vs[nv]; len++) ;
125 |     return len;
126 |   }
127 | };
128 | 


--------------------------------------------------------------------------------
/src/cuckoo/lean.cpp:
--------------------------------------------------------------------------------
 1 | // Cuckoo Cycle, a memory-hard proof-of-work
 2 | // Copyright (c) 2013-2020 John Tromp
 3 | 
 4 | #include "lean.hpp"
 5 | #include <unistd.h>
 6 | 
 7 | #define MAXSOLS 8
 8 | // arbitrary length of header hashed into siphash key
 9 | #ifndef HEADERLEN
10 | #define HEADERLEN 80
11 | #endif
12 | 
13 | 
14 | int main(int argc, char **argv) {
15 |   int nthreads = 1;
16 |   int ntrims   = 2 + (PART_BITS+3)*(PART_BITS+4);
17 |   int nonce = 0;
18 |   int range = 1;
19 |   char header[HEADERLEN];
20 |   unsigned len;
21 |   u64 time0, time1;
22 |   u32 timems;
23 |   int c;
24 | 
25 |   memset(header, 0, sizeof(header));
26 |   while ((c = getopt (argc, argv, "h:m:n:r:t:x:")) != -1) {
27 |     switch (c) {
28 |       case 'h':
29 |         len = strlen(optarg);
30 |         assert(len <= sizeof(header));
31 |         memcpy(header, optarg, len);
32 |         break;
33 |       case 'x':
34 |         len = strlen(optarg)/2;
35 |         assert(len <= sizeof(header));
36 |         for (u32 i=0; i<len; i++)
37 |           sscanf(optarg+2*i, "%2hhx", header+i);
38 |         break;
39 |       case 'n':
40 |         nonce = atoi(optarg);
41 |         break;
42 |       case 'r':
43 |         range = atoi(optarg);
44 |         break;
45 |       case 'm':
46 |         ntrims = atoi(optarg);
47 |         break;
48 |       case 't':
49 |         nthreads = atoi(optarg);
50 |         break;
51 |     }
52 |   }
53 |   printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS+1, header, nonce);
54 |   if (range > 1)
55 |     printf("-%d", nonce+range-1);
56 |   printf(") with 50%% edges, %d trims, %d threads\n", ntrims, nthreads);
57 | 
58 |   u64 edgeBytes = NEDGES/8, nodeBytes = TWICE_ATOMS*sizeof(atwice);
59 |   int edgeUnit, nodeUnit;
60 |   for (edgeUnit=0; edgeBytes >= 1024; edgeBytes>>=10,edgeUnit++) ;
61 |   for (nodeUnit=0; nodeBytes >= 1024; nodeBytes>>=10,nodeUnit++) ;
62 |   printf("Using %d%cB edge and %d%cB node memory, %d-way siphash, and %d-byte counters\n",
63 |      (int)edgeBytes, " KMGT"[edgeUnit], (int)nodeBytes, " KMGT"[nodeUnit], NSIPHASH, SIZEOF_TWICE_ATOM);
64 | 
65 |   thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx));
66 |   assert(threads);
67 |   cuckoo_ctx ctx(nthreads, ntrims, MAXSOLS);
68 | 
69 |   u32 sumnsols = 0;
70 |   for (int r = 0; r < range; r++) {
71 |     time0 = timestamp();
72 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
73 |     ctx.barry.clear();
74 |     for (int t = 0; t < nthreads; t++) {
75 |       threads[t].id = t;
76 |       threads[t].ctx = &ctx;
77 |       int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
78 |       assert(err == 0);
79 |     }
80 |     // sleep(33); ctx.abort();
81 |     for (int t = 0; t < nthreads; t++) {
82 |       int err = pthread_join(threads[t].thread, NULL);
83 |       assert(err == 0);
84 |     }
85 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
86 |     printf("Time: %d ms\n", timems);
87 |     for (unsigned s = 0; s < ctx.nsols; s++) {
88 |       printf("Solution");
89 |       for (int i = 0; i < PROOFSIZE; i++)
90 |         printf(" %jx", (uintmax_t)ctx.sols[s][i]);
91 |       printf("\n");
92 |     }
93 |     sumnsols += ctx.nsols;
94 |   }
95 |   free(threads);
96 |   printf("%d total solutions\n", sumnsols);
97 |   return 0;
98 | }
99 | 


--------------------------------------------------------------------------------
/src/cuckoo/mean.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckoo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "mean.hpp"
  5 | #include <unistd.h>
  6 | 
  7 | // arbitrary length of header hashed into siphash key
  8 | #define HEADERLEN 80
  9 | 
 10 | int main(int argc, char **argv) {
 11 |   u32 nthreads = 1;
 12 |   u32 ntrims = EDGEBITS >= 30 ? 96 : 68;
 13 |   u32 nonce = 0;
 14 |   u32 range = 1;
 15 | #ifdef SAVEEDGES
 16 |   bool showcycle = 1;
 17 | #else
 18 |   bool showcycle = 0;
 19 | #endif
 20 |   u64 time0, time1;
 21 |   u32 timems;
 22 |   char header[HEADERLEN];
 23 |   u32 len;
 24 |   bool allrounds = false;
 25 |   int c;
 26 | 
 27 |   memset(header, 0, sizeof(header));
 28 |   while ((c = getopt (argc, argv, "ah:m:n:r:st:x:")) != -1) {
 29 |     switch (c) {
 30 |       case 'a':
 31 |         allrounds = true;
 32 |         break;
 33 |       case 'h':
 34 |         len = strlen(optarg);
 35 |         assert(len <= sizeof(header));
 36 |         memcpy(header, optarg, len);
 37 |         break;
 38 |       case 'x':
 39 |         len = strlen(optarg)/2;
 40 |         assert(len == sizeof(header));
 41 |         for (u32 i=0; i<len; i++)
 42 |           sscanf(optarg+2*i, "%2hhx", header+i);
 43 |         break;
 44 |       case 'n':
 45 |         nonce = atoi(optarg);
 46 |         break;
 47 |       case 'r':
 48 |         range = atoi(optarg);
 49 |         break;
 50 |       case 'm':
 51 |         ntrims = atoi(optarg) & -2; // make even as required by solve()
 52 |         break;
 53 |       case 's':
 54 |         showcycle = true;
 55 |         break;
 56 |       case 't':
 57 |         nthreads = atoi(optarg);
 58 |         break;
 59 |     }
 60 |   }
 61 |   printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, NODEBITS, header, nonce);
 62 |   if (range > 1)
 63 |     printf("-%d", nonce+range-1);
 64 |   printf(") with 50%% edges\n");
 65 | 
 66 |   solver_ctx ctx(nthreads, ntrims, allrounds, showcycle);
 67 | 
 68 |   u64 sbytes = ctx.sharedbytes();
 69 |   u32 tbytes = ctx.threadbytes();
 70 |   int sunit,tunit;
 71 |   for (sunit=0; sbytes >= 10240; sbytes>>=10,sunit++) ;
 72 |   for (tunit=0; tbytes >= 10240; tbytes>>=10,tunit++) ;
 73 |   printf("Using %d%cB bucket memory at %lx,\n", sbytes, " KMGT"[sunit], (u64)ctx.trimmer->buckets);
 74 |   printf("%dx%d%cB thread memory at %lx,\n", nthreads, tbytes, " KMGT"[tunit], (u64)ctx.trimmer->tbuckets);
 75 |   printf("%d-way siphash, and %d buckets.\n", NSIPHASH, NX);
 76 | 
 77 |   u32 sumnsols = 0;
 78 |   for (u32 r = 0; r < range; r++) {
 79 |     time0 = timestamp();
 80 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
 81 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.trimmer->sip_keys.k0, ctx.trimmer->sip_keys.k1, ctx.trimmer->sip_keys.k2, ctx.trimmer->sip_keys.k3);
 82 |     u32 nsols = ctx.solve();
 83 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
 84 |     printf("Time: %d ms\n", timems);
 85 | 
 86 |     for (unsigned s = 0; s < nsols; s++) {
 87 |       printf("Solution");
 88 |       word_t *prf = &ctx.sols[s * PROOFSIZE];
 89 |       for (u32 i = 0; i < PROOFSIZE; i++)
 90 |         printf(" %jx", (uintmax_t)prf[i]);
 91 |       printf("\n");
 92 |       int pow_rc = verify(prf, &ctx.trimmer->sip_keys);
 93 |       if (pow_rc == POW_OK) {
 94 |         printf("Verified with cyclehash ");
 95 |         unsigned char cyclehash[32];
 96 |         blake2b((void *)cyclehash, sizeof(cyclehash), (const void *)prf, sizeof(proof), 0, 0);
 97 |         for (int i=0; i<32; i++)
 98 |           printf("%02x", cyclehash[i]);
 99 |         printf("\n");
100 |       } else {
101 |         printf("FAILED due to %s\n", errstr[pow_rc]);
102 |       }
103 |     }
104 |     sumnsols += nsols;
105 |   }
106 |   printf("%d total solutions\n", sumnsols);
107 |   return 0;
108 | }
109 | 


--------------------------------------------------------------------------------
/src/cuckoo/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Cuckoo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2020 John Tromp
  3 | 
  4 | #include "cuckoo.h"
  5 | 
  6 | // assume EDGEBITS < 31
  7 | #define NNODES (2 * NEDGES)
  8 | #define NCUCKOO NNODES
  9 | 
 10 | 
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <assert.h>
 14 | #include <unistd.h>
 15 | #include "cyclebase.hpp"
 16 | #include <set>
 17 | 
 18 | typedef unsigned char u8;
 19 | 
 20 | class cuckoo_ctx {
 21 | public:
 22 |   siphash_keys sip_keys;
 23 |   word_t easiness;
 24 |   cyclebase cb;
 25 | 
 26 |   cuckoo_ctx(const char* header, const u32 headerlen, const u32 nonce, word_t easy_ness) {
 27 |     easiness = easy_ness;
 28 |     cb.alloc();
 29 |     assert(cb.cuckoo != 0);
 30 |   }
 31 | 
 32 |   ~cuckoo_ctx() {
 33 |     cb.freemem();
 34 |   }
 35 | 
 36 |   u64 bytes() {
 37 |     return (word_t)(1+NNODES) * sizeof(word_t);
 38 |   }
 39 | 
 40 |   void setheadernonce(char* const headernonce, const u32 len, const u32 nonce) {
 41 |     ((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
 42 |     setheader(headernonce, len, &sip_keys);
 43 |     cb.reset();
 44 |   }
 45 | 
 46 |   void cycle_base() {
 47 |     for (word_t nonce = 0; nonce < easiness; nonce++) {
 48 |       word_t u = sipnode(&sip_keys, nonce, 0);
 49 |       word_t v = sipnode(&sip_keys, nonce, 1);
 50 |   #ifdef SHOW
 51 |       for (unsigned j=1; j<NNODES; j++)
 52 |         if (!cb.cuckoo[j]) printf("%2d:   ",j);
 53 |         else               printf("%2d:%02d ",j,cb.cuckoo[j]);
 54 |       printf(" %x (%d,%d)\n", nonce,2*u,2*v+1);
 55 |   #endif
 56 |       cb.addedge(u, v);
 57 |     }
 58 | #ifdef CCSIZE1000
 59 |     u32 nlarge = 0;
 60 |     for (u32 i=0; i<NNODES; i++) {
 61 |       int size = -cb.cuckoo[i];
 62 |       if (size >= 1000)
 63 |         nlarge += size;
 64 |     }
 65 |     printf("%u nodes in ccsize >= 1000\n", nlarge);
 66 | #endif
 67 |   }
 68 | };
 69 | 
 70 | // arbitrary length of header hashed into siphash key
 71 | #define HEADERLEN 80
 72 | 
 73 | int main(int argc, char **argv) {
 74 |   char header[HEADERLEN];
 75 |   memset(header, 0, HEADERLEN);
 76 |   int c, easipct = 50;
 77 |   u32 nonce = 0;
 78 |   u32 range = 1;
 79 |   u64 time0, time1;
 80 |   u32 timems;
 81 | 
 82 |   while ((c = getopt (argc, argv, "e:h:n:r:")) != -1) {
 83 |     switch (c) {
 84 |       case 'e':
 85 |         easipct = atoi(optarg);
 86 |         break;
 87 |       case 'h':
 88 |         memcpy(header, optarg, strlen(optarg));
 89 |         break;
 90 |       case 'n':
 91 |         nonce = atoi(optarg);
 92 |         break;
 93 |       case 'r':
 94 |         range = atoi(optarg);
 95 |         break;
 96 |     }
 97 |   }
 98 |   assert(easipct >= 0 && easipct <= 100);
 99 |   printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, EDGEBITS+1, header, nonce);
100 |   if (range > 1)
101 |     printf("-%d", nonce+range-1);
102 |   printf(") with %d%% edges, ", easipct);
103 |   word_t easiness = easipct * (word_t)NNODES / 100;
104 |   cuckoo_ctx ctx(header, sizeof(header), nonce, easiness);
105 |   u64 bytes = ctx.bytes();
106 |   int unit;
107 |   for (unit=0; bytes >= 10240; bytes>>=10,unit++) ;
108 |   printf("using %d%cB memory at %llx.\n", (u32)bytes, " KMGT"[unit], (uint64_t)ctx.cb.cuckoo);
109 | 
110 |   for (u32 r = 0; r < range; r++) {
111 |     time0 = timestamp();
112 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
113 |     printf("nonce %d k0 k1 k2 k3 %llx %llx %llx %llx\n", nonce+r, ctx.sip_keys.k0, ctx.sip_keys.k1, ctx.sip_keys.k2, ctx.sip_keys.k3);
114 |     ctx.cycle_base();
115 |     time1 = timestamp(); timems = (time1 - time0) / 1000000;
116 |     printf("Time: %d ms\n", timems);
117 |   }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/java/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .SUFFIXES:
 3 | 
 4 | OPT ?= -O3
 5 | 
 6 | GCC_ARCH_FLAGS ?= -march=native
 7 | GPP_ARCH_FLAGS ?= -march=native
 8 | 
 9 | # -Wno-deprecated-declarations shuts up Apple OSX clang
10 | FLAGS ?= -Wall -Wno-format -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) -DPREFETCH -I. $(CPPFLAGS) -pthread
11 | GPP ?= g++ $(GPP_ARCH_FLAGS) -std=c++11 $(FLAGS)
12 | CFLAGS ?= -Wall -Wno-format -fomit-frame-pointer $(OPT)
13 | GCC ?= gcc $(GCC_ARCH_FLAGS) -std=gnu11 $(CFLAGS)
14 | LIBS ?= -L. -lblake2b
15 | 
16 | all : java
17 | 
18 | Cuckoo.class:	Cuckoo.java Makefile
19 | 	javac -O Cuckoo.java
20 | 
21 | SimpleMiner.class:	Cuckoo.java SimpleMiner.java Makefile
22 | 	javac -O Cuckoo.java SimpleMiner.java
23 | 
24 | java:	Cuckoo.class SimpleMiner.class Makefile
25 | 	java SimpleMiner -h 261 | tail -1 | java Cuckoo -h 261
26 | 


--------------------------------------------------------------------------------
/src/java/SimpleMiner.java:
--------------------------------------------------------------------------------
  1 | // Cuckoo Cycle, a memory-hard proof-of-work
  2 | // Copyright (c) 2013-2016 John Tromp
  3 | 
  4 | import java.util.Set;
  5 | import java.util.HashSet;
  6 | 
  7 | class CuckooSolve {
  8 |   static final int MAXPATHLEN = 4096;
  9 |   Cuckoo graph;
 10 |   int easiness;
 11 |   int[] cuckoo;
 12 |   int[][] sols;
 13 |   int nsols;
 14 |   int nthreads;
 15 | 
 16 |   public CuckooSolve(byte[] hdr, int en, int ms, int nt) {
 17 |     graph = new Cuckoo(hdr);
 18 |     easiness = en;
 19 |     sols = new int[ms][Cuckoo.PROOFSIZE];
 20 |     cuckoo = new int[1+(int)Cuckoo.NNODES];
 21 |     assert cuckoo != null;
 22 |     nsols = 0;
 23 |     nthreads = nt;
 24 |   }
 25 | 
 26 |   public int path(int u, int[] us) {
 27 |     int nu;
 28 |     for (nu = 0; u != 0; u = cuckoo[u]) {
 29 |       if (++nu >= MAXPATHLEN) {
 30 |         while (nu-- != 0 && us[nu] != u) ;
 31 |         if (nu < 0)
 32 |           System.out.println("maximum path length exceeded");
 33 |         else System.out.println("illegal " + (MAXPATHLEN-nu) + "-cycle");
 34 |         Thread.currentThread().interrupt();
 35 |       }
 36 |       us[nu] = u;
 37 |     }
 38 |     return nu;
 39 |   }
 40 |   
 41 |   public synchronized void solution(int[] us, int nu, int[] vs, int nv) {
 42 |     Set<Edge> cycle = new HashSet<Edge>();
 43 |     int n;
 44 |     cycle.add(new Edge(us[0],vs[0]-Cuckoo.NEDGES));
 45 |     while (nu-- != 0) // u's in even position; v's in odd
 46 |       cycle.add(new Edge(us[(nu+1)&~1],us[nu|1]-Cuckoo.NEDGES));
 47 |     while (nv-- != 0) // u's in odd position; v's in even
 48 |       cycle.add(new Edge(vs[nv|1],vs[(nv+1)&~1]-Cuckoo.NEDGES));
 49 |     for (int nonce = n = 0; nonce < easiness; nonce++) {
 50 |       Edge e = graph.sipedge(nonce);
 51 |       if (cycle.contains(e)) {
 52 |         sols[nsols][n++] = nonce;
 53 |         cycle.remove(e);
 54 |       }
 55 |     }
 56 |     if (n == Cuckoo.PROOFSIZE)
 57 |       nsols++;
 58 |     else System.out.println("Only recovered " + n + " nonces");
 59 |   }
 60 | }
 61 | 
 62 | public class SimpleMiner implements Runnable {
 63 |   int id;
 64 |   CuckooSolve solve;
 65 | 
 66 |   public SimpleMiner(int i, CuckooSolve cs) {
 67 |     id = i;
 68 |     solve = cs;
 69 |   }
 70 | 
 71 |   public void run() {
 72 |     int[] cuckoo = solve.cuckoo;
 73 |     int[] us = new int[CuckooSolve.MAXPATHLEN], vs = new int[CuckooSolve.MAXPATHLEN];
 74 |     for (int nonce = id; nonce < solve.easiness; nonce += solve.nthreads) {
 75 |       int u = cuckoo[us[0] = (int)solve.graph.sipnode(nonce,0)];
 76 |       int v = cuckoo[vs[0] = (int)(Cuckoo.NEDGES + solve.graph.sipnode(nonce,1))];
 77 |       if (u == vs[0] || v == us[0])
 78 |         continue; // ignore duplicate edges
 79 |       int nu = solve.path(u, us), nv = solve.path(v, vs);
 80 |       if (us[nu] == vs[nv]) {
 81 |         int min = nu < nv ? nu : nv;
 82 |         for (nu -= min, nv -= min; us[nu] != vs[nv]; nu++, nv++) ;
 83 |         int len = nu + nv + 1;
 84 |         System.out.println(" " + len + "-cycle found at " + id + ":" + (int)(nonce*100L/solve.easiness) + "%");
 85 |         if (len == Cuckoo.PROOFSIZE && solve.nsols < solve.sols.length)
 86 |           solve.solution(us, nu, vs, nv);
 87 |         continue;
 88 |       }
 89 |       if (nu < nv) {
 90 |         while (nu-- != 0)
 91 |           cuckoo[us[nu+1]] = us[nu];
 92 |         cuckoo[us[0]] = vs[0];
 93 |       } else {
 94 |         while (nv-- != 0)
 95 |           cuckoo[vs[nv+1]] = vs[nv];
 96 |         cuckoo[vs[0]] = us[0];
 97 |       }
 98 |     }
 99 |     Thread.currentThread().interrupt();
100 |   }
101 | 
102 |   public static void main(String argv[]) {
103 |     assert Cuckoo.NNODES > 0;
104 |     int nthreads = 1;
105 |     int maxsols = 8;
106 |     String header = "";
107 |     int easipct = 50;
108 |     for (int i = 0; i < argv.length; i++) {
109 |       if (argv[i].equals("-e")) {
110 |         easipct = Integer.parseInt(argv[++i]);
111 |       } else if (argv[i].equals("-h")) {
112 |         header = argv[++i];
113 |       } else if (argv[i].equals("-m")) {
114 |         maxsols = Integer.parseInt(argv[++i]);
115 |       } else if (argv[i].equals("-t")) {
116 |         nthreads = Integer.parseInt(argv[++i]);
117 |       }
118 |     }
119 |     assert easipct >= 0 && easipct <= 100;
120 |     System.out.println("Looking for " + Cuckoo.PROOFSIZE + "-cycle on cuckoo" + Cuckoo.NODEBITS + "(\"" + header + "\") with " + easipct + "% edges and " + nthreads + " threads");
121 |     CuckooSolve solve = new CuckooSolve(header.getBytes(), (int)(easipct * (long)Cuckoo.NNODES / 100), maxsols, nthreads);
122 |   
123 |     Thread[] threads = new Thread[nthreads];
124 |     for (int t = 0; t < nthreads; t++) {
125 |       threads[t] = new Thread(new SimpleMiner(t, solve));
126 |       threads[t].start();
127 |     }
128 |     for (int t = 0; t < nthreads; t++) {
129 |       try {
130 |         threads[t].join();
131 |       } catch (InterruptedException e) {
132 |         System.out.println(e);
133 |         System.exit(1);
134 |       }
135 |     }
136 |     for (int s = 0; s < solve.nsols; s++) {
137 |       System.out.print("Solution");
138 |       for (int i = 0; i < Cuckoo.PROOFSIZE; i++)
139 |         System.out.print(String.format(" %x", solve.sols[s][i]));
140 |       System.out.println("");
141 |     }
142 |   }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/perl/cycles.pl:
--------------------------------------------------------------------------------
 1 | my $maxcycles = 0;
 2 | my $nonce = 0;
 3 | my $maxnonce = 0;
 4 | my $nnonces = 0;
 5 | my $maxcycles = 0;
 6 | my $ncycles = 0;
 7 | my @count;
 8 | while (<>) {
 9 |   if (/^nonce (\d+)/) {
10 |     $nonce = $1;
11 |     $nnonces += 1;
12 |   } elsif (/^Time/ || /^findcycles/) {
13 |     if ($ncycles > $maxcycles) {
14 |       $maxnonce = $nonce;
15 |       $maxcycles = $ncycles;
16 |     }
17 |     $ncycles = 0;
18 |   } elsif (/(\d+)-cycle found/) {
19 |     $ncycles += 1;
20 |     $count[$1]++;
21 |   }
22 | }
23 | for $i (1..$#count) {
24 |   my $c = $count[$i];
25 |   my $f = $c * $i / $nnonces;
26 |   print "$i $c $f\n" if $c;
27 | }
28 | printf "$nnonces nonces $maxcycles cycles at nonce $maxnonce\n";
29 | 


--------------------------------------------------------------------------------
/src/threads/barrier.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <pthread.h>
 3 | #include <errno.h>
 4 | 
 5 | #ifdef __APPLE__
 6 | typedef int pthread_barrierattr_t;
 7 | #endif
 8 | 
 9 | class trim_barrier {
10 |   pthread_mutex_t mutex;
11 |   pthread_cond_t cond;
12 |   unsigned limit;
13 |   unsigned count;
14 |   int phase;
15 | 
16 | public:
17 |   trim_barrier(unsigned int count) {
18 |     pthread_mutex_init(&mutex, 0);
19 |     pthread_cond_init(&cond, 0);
20 |     limit = count;
21 |   }
22 |   
23 |   ~trim_barrier() {
24 |     pthread_mutex_destroy(&mutex);
25 |     pthread_cond_destroy(&cond);
26 |   }
27 |   
28 |   void clear() {
29 |     count = phase = 0;
30 |   }
31 | 
32 |   void abort() {
33 |     pthread_mutex_lock(&mutex);
34 |     phase = -1;
35 |     pthread_mutex_unlock(&mutex);
36 |   }
37 |   
38 |   bool aborted() {
39 |     return phase < 0;
40 |   }
41 | 
42 |   void wait() {
43 |     pthread_mutex_lock(&mutex);
44 |     int wait_phase = phase;
45 |     if (++count >= limit) {
46 |       if (wait_phase >= 0) {
47 |         phase = wait_phase + 1;
48 |         count = 0;
49 |       }
50 |       pthread_cond_broadcast(&cond);
51 |     } else if (wait_phase >= 0) {
52 |       do
53 |         pthread_cond_wait(&cond, &mutex);
54 |       while (phase == wait_phase);
55 |     }
56 |     pthread_mutex_unlock(&mutex);
57 |     if (wait_phase < 0)
58 |       pthread_exit(NULL);
59 |   }
60 | };
61 | 


--------------------------------------------------------------------------------
/src/tmto/momentomatum.cpp:
--------------------------------------------------------------------------------
 1 | // Bounty Cycle, an attempt to disprove John's claims
 2 | 
 3 | #include "momentomatum.h"
 4 | #include <unistd.h>
 5 | 
 6 | int main(int argc, char **argv) {
 7 |   int nthreads = 1;
 8 |   bool minimalbfs = true;
 9 |   int nparts = NUPARTS;
10 |   const char *header = "";
11 |   int c;
12 |   while ((c = getopt (argc, argv, "h:mn:t:")) != -1) {
13 |     switch (c) {
14 |       case 'h':
15 |         header = optarg;
16 |         break;
17 |       case 'm':
18 |         minimalbfs = true;
19 |         break;
20 |       case 'n':
21 |         nparts = atoi(optarg);
22 |         break;
23 |       case 't':
24 |         nthreads = atoi(optarg);
25 |         break;
26 |     }
27 |   }
28 |   printf("Looking for %d-cycle on cuckoo%d(\"%s\") with 50%% edges, 1/%d memory, %d/%d parts, %d threads %d minimalbfs\n", PROOFSIZE, NODEBITS, header, 1<<SAVEMEM_BITS, nparts, NUPARTS, nthreads, minimalbfs);
29 |   u64 nodeBytes = CUCKOO_SIZE*sizeof(u64);
30 |   int nodeUnit;
31 |   for (nodeUnit=0; nodeBytes >= 1024; nodeBytes>>=10,nodeUnit++) ;
32 |   printf("Using %d%cB node memory.\n", (int)nodeBytes, " KMGT"[nodeUnit]);
33 |   cuckoo_ctx ctx(header, nthreads, nparts, minimalbfs);
34 |   thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx));
35 |   assert(threads);
36 |   for (int t = 0; t < nthreads; t++) {
37 |     threads[t].id = t;
38 |     threads[t].ctx = &ctx;
39 |     int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
40 |     assert(err == 0);
41 |   }
42 |   for (int t = 0; t < nthreads; t++) {
43 |     int err = pthread_join(threads[t].thread, NULL);
44 |     assert(err == 0);
45 |   }
46 |   free(threads);
47 |   return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/src/tmto/tomato_miner.cpp:
--------------------------------------------------------------------------------
 1 | // Time Memory Trade Off (TMTO, or tomato) solver
 2 | 
 3 | #include "tomato_miner.h"
 4 | #include <unistd.h>
 5 | 
 6 | // arbitrary length of header hashed into siphash key
 7 | #define HEADERLEN 80
 8 | 
 9 | int main(int argc, char **argv) {
10 |   int nthreads = 1;
11 |   bool minimalbfs = false;
12 |   int nparts = NUPARTS;
13 |   int range = 1;
14 |   int nonce = 0;
15 |   int c;
16 |   char header[HEADERLEN];
17 |   unsigned len;
18 | 
19 |   memset(header, 0, sizeof(header));
20 |   while ((c = getopt (argc, argv, "h:n:p:t:r:m")) != -1) {
21 |     switch (c) {
22 |       case 'h':
23 |         len = strlen(optarg);
24 |         assert(len <= sizeof(header));
25 |         memcpy(header, optarg, len);
26 |         break;
27 |       case 'm':
28 |         minimalbfs = true;
29 |         break;
30 |       case 'n':
31 |         nonce = atoi(optarg);
32 |         break;
33 |       case 'p':
34 |         nparts = atoi(optarg);
35 |         break;
36 |       case 'r':
37 |         range = atoi(optarg);
38 |         break;
39 |       case 't':
40 |         nthreads = atoi(optarg);
41 |         break;
42 |     }
43 |   }
44 |   printf("Looking for %d-cycle on cuckoo%d(\"%s\",%d", PROOFSIZE, NODEBITS, header, nonce);
45 |   if (range > 1)
46 |     printf("-%d", nonce+range-1);
47 |   printf(") with 50%% edges, 1/%d memory, %d/%d parts, %d threads %d minimalbfs\n",
48 |     1<<SAVEMEM_BITS, nparts, NUPARTS, nthreads, minimalbfs);
49 |   u64 nodeBytes = CUCKOO_SIZE*sizeof(u64);
50 |   int nodeUnit;
51 |   for (nodeUnit=0; nodeBytes >= 1024; nodeBytes>>=10,nodeUnit++) ;
52 |   printf("Using %d%cB node memory.\n", (int)nodeBytes, " KMGT"[nodeUnit]);
53 |   thread_ctx *threads = (thread_ctx *)calloc(nthreads, sizeof(thread_ctx));
54 |   assert(threads);
55 |   cuckoo_ctx ctx(nthreads, nparts, minimalbfs);
56 | 
57 |   for (int r = 0; r < range; r++) {
58 |     ctx.setheadernonce(header, sizeof(header), nonce + r);
59 | 
60 |     for (int t = 0; t < nthreads; t++) {
61 |       threads[t].id = t;
62 |       threads[t].ctx = &ctx;
63 |       int err = pthread_create(&threads[t].thread, NULL, worker, (void *)&threads[t]);
64 |       assert(err == 0);
65 |     }
66 |     for (int t = 0; t < nthreads; t++) {
67 |       int err = pthread_join(threads[t].thread, NULL);
68 |       assert(err == 0);
69 |     }
70 |   }
71 |   free(threads);
72 |   return 0;
73 | }
74 | 


--------------------------------------------------------------------------------