├── .gitignore
├── plugins
    ├── opencl
    │   ├── resources
    │   │   ├── bin
    │   │   │   ├── gfx906_kaspa-opencl.bin
    │   │   │   ├── gfx1010_kaspa-opencl.bin
    │   │   │   ├── gfx1011_kaspa-opencl.bin
    │   │   │   ├── gfx1012_kaspa-opencl.bin
    │   │   │   ├── gfx1030_kaspa-opencl.bin
    │   │   │   ├── gfx1031_kaspa-opencl.bin
    │   │   │   ├── gfx1032_kaspa-opencl.bin
    │   │   │   ├── gfx1034_kaspa-opencl.bin
    │   │   │   └── ellesmere_kaspa-opencl.bin
    │   │   └── kaspa-opencl.cl
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── cli.rs
    │   │   ├── lib.rs
    │   │   └── worker.rs
    ├── cuda
    │   ├── kaspa-cuda-native
    │   │   └── src
    │   │   │   ├── keccak-tiny.h
    │   │   │   ├── xoshiro256starstar.c
    │   │   │   ├── keccak-tiny.c
    │   │   │   ├── keccak-tiny-unrolled.c
    │   │   │   └── kaspa-cuda.cu
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── cli.rs
    │   │   ├── lib.rs
    │   │   └── worker.rs
    └── README.md
├── integrations
    ├── hiveos
    │   ├── build.sh
    │   ├── h-config.sh
    │   ├── h-run.sh
    │   ├── createmanifest.sh
    │   └── h-stats.sh
    └── windows
    │   └── create_bat.sh
├── .rustfmt.toml
├── src
    ├── pow
    │   ├── keccak.rs
    │   ├── xoshiro.rs
    │   └── hasher.rs
    ├── client.rs
    ├── kaspad_messages.rs
    ├── xoshiro256starstar.rs
    ├── target.rs
    ├── cli.rs
    ├── lib.rs
    ├── main.rs
    ├── client
    │   ├── stratum
    │   │   └── statum_codec.rs
    │   └── grpc.rs
    ├── keccakf1600_x86-64-osx.s
    └── keccakf1600_x86-64.s
├── Cargo.toml
├── README.md
├── proto
    ├── p2p.proto
    └── messages.proto
├── .github
    └── workflows
    │   ├── deploy.yaml
    │   └── ci.yaml
├── LICENSE-MIT
└── LICENSE-APACHE


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx906_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx906_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1010_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1010_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1011_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1011_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1012_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1012_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1030_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1030_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1031_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1031_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1032_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1032_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/gfx1034_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1034_kaspa-opencl.bin


--------------------------------------------------------------------------------
/plugins/opencl/resources/bin/ellesmere_kaspa-opencl.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/ellesmere_kaspa-opencl.bin


--------------------------------------------------------------------------------
/integrations/hiveos/build.sh:
--------------------------------------------------------------------------------
1 | integrations/hiveos/createmanifest.sh $1 $2
2 | mkdir $3
3 | cp h-manifest.conf integrations/hiveos/*.sh $2/* $3
4 | tar czvf $3-hiveos.tgz $3


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 120
2 | use_field_init_shorthand = true
3 | use_try_shorthand = true
4 | use_small_heuristics = "Max"
5 | newline_style = "unix"
6 | edition = "2021"


--------------------------------------------------------------------------------
/plugins/opencl/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 | 
5 | [[package]]
6 | name = "opencl"
7 | version = "0.1.0"
8 | 


--------------------------------------------------------------------------------
/src/pow/keccak.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(any(not(target_arch = "x86_64"), feature = "no-asm", target_os = "windows"))]
 2 | pub(super) fn f1600(state: &mut [u64; 25]) {
 3 |     keccak::f1600(state);
 4 | }
 5 | 
 6 | #[cfg(all(target_arch = "x86_64", not(feature = "no-asm"), not(target_os = "windows")))]
 7 | pub(super) fn f1600(state: &mut [u64; 25]) {
 8 |     extern "C" {
 9 |         fn KeccakF1600(state: &mut [u64; 25]);
10 |     }
11 |     unsafe { KeccakF1600(state) }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/client.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use tokio::sync::mpsc::Sender;
 3 | 
 4 | pub mod grpc;
 5 | pub mod stratum;
 6 | 
 7 | use crate::pow::BlockSeed;
 8 | use crate::{Error, MinerManager};
 9 | 
10 | #[async_trait(?Send)]
11 | pub trait Client {
12 |     fn add_devfund(&mut self, address: String, percent: u16);
13 |     async fn register(&mut self) -> Result<(), Error>;
14 |     async fn listen(&mut self, miner: &mut MinerManager) -> Result<(), Error>;
15 |     fn get_block_channel(&self) -> Sender<BlockSeed>;
16 | }
17 | 


--------------------------------------------------------------------------------
/plugins/cuda/kaspa-cuda-native/src/keccak-tiny.h:
--------------------------------------------------------------------------------
 1 | #ifndef KECCAK_FIPS202_H
 2 | #define KECCAK_FIPS202_H
 3 | #define __STDC_WANT_LIB_EXT1__ 1
 4 | #include <stdint.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #define decshake(bits) \
 8 |   __device__ int shake##bits(uint8_t*, size_t, const uint8_t*, size_t);
 9 | 
10 | #define decsha3(bits) \
11 |   __device__ int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
12 | 
13 | decshake(128)
14 | decshake(256)
15 | decsha3(224)
16 | decsha3(256)
17 | decsha3(384)
18 | decsha3(512)
19 | #endif
20 | 


--------------------------------------------------------------------------------
/integrations/windows/create_bat.sh:
--------------------------------------------------------------------------------
1 | echo REM When mining to a local node, you can drop the -s option. > ${1}/mine.bat
2 | echo echo ============================================================ >> ${1}/mine.bat
3 | echo echo = Running Kaspa Miner with default .bat. Edit to configure = >> ${1}/mine.bat
4 | echo echo ============================================================ >> ${1}/mine.bat
5 | echo :start >> ${1}/mine.bat
6 | echo ${1}.exe -a kaspa:qz4jdyu04hv4hpyy00pl6trzw4gllnhnwy62xattejv2vaj5r0p5quvns058f -s n.seeder1.kaspad.net >> ${1}/mine.bat
7 | echo goto start >> ${1}/mine.bat


--------------------------------------------------------------------------------
/plugins/opencl/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "kaspaopencl"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | [dependencies]
 8 | kaspa_miner = {path = "../../", package="kaspa-miner"}
 9 | clap = { version = "3.0", features = ["color", "derive"]}
10 | env_logger = "0.9"
11 | opencl3 = {version = "0.6", features = ["CL_VERSION_2_1", "CL_VERSION_2_2", "CL_VERSION_3_0"]}
12 | log = "0.4"
13 | rand = "0.8"
14 | include_dir = "0.7"
15 | 
16 | [lib]
17 | crate-type = ["cdylib"]
18 | 
19 | 


--------------------------------------------------------------------------------
/plugins/cuda/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "kaspacuda"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | kaspa-miner = {path = "../../"}
10 | cust ="0.3"
11 | log = "0.4"
12 | rand = "0.8"
13 | clap = { version = "3.0", features = ["color", "derive"]}
14 | env_logger = "0.9"
15 | nvml-wrapper = { git = "https://github.com/benrod3k/nvml-wrapper", branch = "495.29.05", optional = true }
16 | 
17 | [lib]
18 | crate-type = ["cdylib", "rlib"]
19 | 
20 | [features]
21 | overclock = [ "nvml-wrapper" ]
22 | 


--------------------------------------------------------------------------------
/integrations/hiveos/h-config.sh:
--------------------------------------------------------------------------------
 1 | ####################################################################################
 2 | ###
 3 | ### kaspa-miner
 4 | ### https://github.com/tmrlvi/kaspa-miner/releases
 5 | ###
 6 | ### Hive integration: Merlin
 7 | ###
 8 | ####################################################################################
 9 | 
10 | #!/usr/bin/env bash
11 | [[ -e /hive/custom ]] && . /hive/custom/kaspa-miner/h-manifest.conf
12 | [[ -e /hive/miners/custom ]] && . /hive/miners/custom/kaspa-miner/h-manifest.conf
13 | conf=""
14 | conf+=" --kaspad-address=$CUSTOM_URL --mining-address $CUSTOM_TEMPLATE"
15 | 
16 | 
17 | [[ ! -z $CUSTOM_USER_CONFIG ]] && conf+=" $CUSTOM_USER_CONFIG"
18 | 
19 | echo "$conf"
20 | echo "$conf" > $CUSTOM_CONFIG_FILENAME
21 | 
22 | 


--------------------------------------------------------------------------------
/src/pow/xoshiro.rs:
--------------------------------------------------------------------------------
 1 | use crate::Hash;
 2 | use std::num::Wrapping;
 3 | 
 4 | pub(super) struct XoShiRo256PlusPlus {
 5 |     s0: Wrapping<u64>,
 6 |     s1: Wrapping<u64>,
 7 |     s2: Wrapping<u64>,
 8 |     s3: Wrapping<u64>,
 9 | }
10 | 
11 | impl XoShiRo256PlusPlus {
12 |     #[inline]
13 |     pub(super) fn new(hash: Hash) -> Self {
14 |         Self { s0: Wrapping(hash.0[0]), s1: Wrapping(hash.0[1]), s2: Wrapping(hash.0[2]), s3: Wrapping(hash.0[3]) }
15 |     }
16 | 
17 |     #[inline]
18 |     pub(super) fn u64(&mut self) -> u64 {
19 |         let res = self.s0 + Wrapping((self.s0 + self.s3).0.rotate_left(23));
20 |         let t = self.s1 << 17;
21 |         self.s2 ^= self.s0;
22 |         self.s3 ^= self.s1;
23 |         self.s1 ^= self.s2;
24 |         self.s0 ^= self.s3;
25 | 
26 |         self.s2 ^= t;
27 |         self.s3 = Wrapping(self.s3.0.rotate_left(45));
28 | 
29 |         res.0
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/integrations/hiveos/h-run.sh:
--------------------------------------------------------------------------------
 1 | ####################################################################################
 2 | ###
 3 | ### kaspa-miner
 4 | ### https://github.com/tmrlvi/kaspa-miner/releases
 5 | ###
 6 | ### Hive integration: Merlin
 7 | ###
 8 | ####################################################################################
 9 | 
10 | #!/usr/bin/env bash
11 | 
12 | cd `dirname $0`
13 | 
14 | [ -t 1 ] && . colors
15 | 
16 | . h-manifest.conf
17 | 
18 | echo $CUSTOM_NAME
19 | echo $CUSTOM_LOG_BASENAME
20 | echo $CUSTOM_CONFIG_FILENAME
21 | 
22 | [[ -z $CUSTOM_LOG_BASENAME ]] && echo -e "${RED}No CUSTOM_LOG_BASENAME is set${NOCOLOR}" && exit 1
23 | [[ -z $CUSTOM_CONFIG_FILENAME ]] && echo -e "${RED}No CUSTOM_CONFIG_FILENAME is set${NOCOLOR}" && exit 1
24 | [[ ! -f $CUSTOM_CONFIG_FILENAME ]] && echo -e "${RED}Custom config ${YELLOW}$CUSTOM_CONFIG_FILENAME${RED} is not found${NOCOLOR}" && exit 1
25 | 
26 | 
27 | ./$CUSTOM_MINERBIN $(< $CUSTOM_CONFIG_FILENAME) $@ 2>&1 | tee $CUSTOM_LOG_BASENAME.log
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Miner Plugins
 2 | 
 3 | **CAUTION! The plugin api is brand new and might change without prior notice.** 
 4 | 
 5 | **CAUTION! Old plugins might not be compatible with new plugins: check the plugin version.** 
 6 | 
 7 | **CAUTION! Plugins can run arbitrary code: if you use precompiled, make sure they come from 
 8 | reputable source**
 9 | 
10 | The plugin system relies on three interfaces defined in `lib.rs` on `kaspa-miner`. 
11 | Each interface refers to an object which has a different job:
12 |   * **Plugin** - the environment and configuration of a type of workers.
13 |   * **WorkerSpec** - Light weight struct containing the initialization arguments for a worker.
14 |   Can be (and is) sent between threads.
15 |   * **Worker** - The worker object, which contains references to device memory and functions. Usually not thread safe.
16 | 
17 | To implemenet your own plugin, create a `crate`, and implement the required methods. Build the as a `cdylib`
18 | and place it in the plugins directory. Add the plugin names to `main.rs` code to whitelist it.


--------------------------------------------------------------------------------
/plugins/opencl/README.md:
--------------------------------------------------------------------------------
 1 | # OpenCL support for Kaspa-Miner
 2 | 
 3 | This is an experimental plugin to support opencl.
 4 | 
 5 | # Compiling to AMD
 6 | Download and install Radeon GPU Analyzer, which allows you to compile OpenCL for AMD
 7 | 
 8 | ```shell
 9 | for arch in gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx906
10 | do 
11 |   rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable " -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D __FORCE_AMD_V_DOT8_U32_U4__=1 -D OPENCL_PLATFORM_AMD -D OFFLINE
12 | done 
13 | 
14 | for arch in gfx1010
15 | do 
16 |   rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable " -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D OPENCL_PLATFORM_AMD
17 | done 
18 | 
19 | for arch in Ellesmere
20 | do 
21 |   rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable -target amdgcn-amd-amdpal" -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D OPENCL_PLATFORM_AMD -D PAL
22 | done 
23 | ```


--------------------------------------------------------------------------------
/integrations/hiveos/createmanifest.sh:
--------------------------------------------------------------------------------
 1 | ####################################################################################
 2 | ###
 3 | ### kaspa-miner
 4 | ### https://github.com/tmrlvi/kaspa-miner/releases
 5 | ###
 6 | ### Hive integration: Merlin
 7 | ###
 8 | ####################################################################################
 9 | 
10 | if [ "$#" -ne "2" ]
11 |   then
12 |     echo "No arguments supplied. Call using createmanifest.sh <VERSION_NUMBER> <MINER BINARY NAME>"
13 |     exit
14 | fi
15 | cat > h-manifest.conf << EOF
16 | ####################################################################################
17 | ###
18 | ### kaspa-miner
19 | ### https://github.com/tmrlvi/kaspa-miner/releases
20 | ###
21 | ### Hive integration: Merlin
22 | ###
23 | ####################################################################################
24 | 
25 | # The name of the miner
26 | CUSTOM_NAME=kaspa-miner
27 | 
28 | # Optional version of your custom miner package
29 | CUSTOM_VERSION=$1
30 | CUSTOM_BUILD=0
31 | CUSTOM_MINERBIN=$2
32 | 
33 | # Full path to miner config file
34 | CUSTOM_CONFIG_FILENAME=/hive/miners/custom/\$CUSTOM_NAME/config.ini
35 | 
36 | # Full path to log file basename. WITHOUT EXTENSION (don't include .log at the end)
37 | # Used to truncate logs and rotate,
38 | # E.g. /var/log/miner/mysuperminer/somelogname (filename without .log at the end)
39 | CUSTOM_LOG_BASENAME=/var/log/miner/\$CUSTOM_NAME
40 | 
41 | WEB_PORT=3338
42 | EOF


--------------------------------------------------------------------------------
/plugins/cuda/README.md:
--------------------------------------------------------------------------------
 1 | # Cuda Support For Kaspa-Miner
 2 | 
 3 | ## Building
 4 | 
 5 | The plugin is a shared library file that resides in the same library as the miner. 
 6 | You can build the library by running
 7 | ```sh
 8 | cargo build -p kaspacuda
 9 | ```
10 | 
11 | This version includes a precompiled PTX, which would work with most modern GPUs. To compile the PTX youself,
12 | you have to clone the project:
13 | 
14 | ```sh
15 | git clone https://github.com/tmrlvi/kaspa-miner.git
16 | cd kaspa-miner
17 | # Compute version 8.6
18 | /usr/local/cuda-11.5/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_86 --gpu-code=sm_86 -o plugins/cuda/resources/kaspa-cuda-sm86.ptx -Xptxas -O3 -Xcompiler -O3
19 | # Compute version 7.5
20 | /usr/local/cuda-11.5/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_75 --gpu-code=sm_75 -o plugins/cuda/resources/kaspa-cuda-sm75.ptx -Xptxas -O3 -Xcompiler -O3
21 | # Compute version 6.1
22 | /usr/local/cuda-11.2/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_61 --gpu-code=sm_61 -o plugins/cuda/resources/kaspa-cuda-sm61.ptx -Xptxas -O3 -Xcompiler -O3
23 | # Compute version 3.0
24 | /usr/local/cuda-9.2/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -ccbin=gcc-7 -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_30 --gpu-code=sm_30 -o plugins/cuda/resources/kaspa-cuda-sm30.ptx
25 | # Compute version 2.0
26 | /usr/local/cuda-8.0/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -ccbin=gcc-5 -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_20 --gpu-code=sm_20 -o plugins/cuda/resources/kaspa-cuda-sm20.ptx
27 |  
28 | cargo build --release
29 | ```
30 | 


--------------------------------------------------------------------------------
/src/kaspad_messages.rs:
--------------------------------------------------------------------------------
 1 | use crate::proto::{
 2 |     kaspad_message::Payload, GetBlockTemplateRequestMessage, GetInfoRequestMessage, KaspadMessage,
 3 |     NotifyBlockAddedRequestMessage, NotifyNewBlockTemplateRequestMessage, RpcBlock, SubmitBlockRequestMessage,
 4 | };
 5 | use crate::{
 6 |     pow::{self, HeaderHasher},
 7 |     Hash,
 8 | };
 9 | 
10 | impl KaspadMessage {
11 |     #[inline(always)]
12 |     pub fn get_info_request() -> Self {
13 |         KaspadMessage { payload: Some(Payload::GetInfoRequest(GetInfoRequestMessage {})) }
14 |     }
15 |     #[inline(always)]
16 |     pub fn notify_block_added() -> Self {
17 |         KaspadMessage { payload: Some(Payload::NotifyBlockAddedRequest(NotifyBlockAddedRequestMessage {})) }
18 |     }
19 | 
20 |     #[inline(always)]
21 |     pub fn submit_block(block: RpcBlock) -> Self {
22 |         KaspadMessage {
23 |             payload: Some(Payload::SubmitBlockRequest(SubmitBlockRequestMessage {
24 |                 block: Some(block),
25 |                 allow_non_daa_blocks: false,
26 |             })),
27 |         }
28 |     }
29 | }
30 | 
31 | impl From<GetInfoRequestMessage> for KaspadMessage {
32 |     fn from(a: GetInfoRequestMessage) -> Self {
33 |         KaspadMessage { payload: Some(Payload::GetInfoRequest(a)) }
34 |     }
35 | }
36 | impl From<NotifyBlockAddedRequestMessage> for KaspadMessage {
37 |     fn from(a: NotifyBlockAddedRequestMessage) -> Self {
38 |         KaspadMessage { payload: Some(Payload::NotifyBlockAddedRequest(a)) }
39 |     }
40 | }
41 | 
42 | impl From<GetBlockTemplateRequestMessage> for KaspadMessage {
43 |     fn from(a: GetBlockTemplateRequestMessage) -> Self {
44 |         KaspadMessage { payload: Some(Payload::GetBlockTemplateRequest(a)) }
45 |     }
46 | }
47 | 
48 | impl From<NotifyNewBlockTemplateRequestMessage> for KaspadMessage {
49 |     fn from(a: NotifyNewBlockTemplateRequestMessage) -> Self {
50 |         KaspadMessage { payload: Some(Payload::NotifyNewBlockTemplateRequest(a)) }
51 |     }
52 | }
53 | 
54 | impl RpcBlock {
55 |     #[inline(always)]
56 |     pub fn block_hash(&self) -> Option<Hash> {
57 |         let mut hasher = HeaderHasher::new();
58 |         pow::serialize_header(&mut hasher, self.header.as_ref()?, false);
59 |         Some(hasher.finalize())
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/xoshiro256starstar.rs:
--------------------------------------------------------------------------------
 1 | const LONG_JUMP: [u64; 4] = [0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635];
 2 | 
 3 | #[derive(Clone, Copy)]
 4 | pub struct Xoshiro256StarStar {
 5 |     pub(crate) state: [u64; 4],
 6 | }
 7 | 
 8 | pub struct Xoshiro256StarStarStateIter {
 9 |     current: Xoshiro256StarStar,
10 | }
11 | 
12 | fn rotl(x: u64, k: i32) -> u64 {
13 |     (x << k) | (x >> (64 - k))
14 | }
15 | 
16 | impl Xoshiro256StarStar {
17 |     pub fn new(seed: &[u64; 4]) -> Self {
18 |         let mut state = [0u64; 4];
19 |         state.copy_from_slice(seed);
20 |         Self { state }
21 |     }
22 | 
23 |     pub fn next_u64(&mut self) -> u64 {
24 |         let result = u64::wrapping_mul(rotl(u64::wrapping_mul(self.state[1], 5), 7), 9);
25 |         let t = self.state[1] << 17;
26 | 
27 |         self.state[2] ^= self.state[0];
28 |         self.state[3] ^= self.state[1];
29 |         self.state[1] ^= self.state[2];
30 |         self.state[0] ^= self.state[3];
31 | 
32 |         self.state[2] ^= t;
33 | 
34 |         self.state[3] = rotl(self.state[3], 45);
35 | 
36 |         result
37 |     }
38 | 
39 |     pub fn long_jump(&mut self) {
40 |         let mut s0 = 0u64;
41 |         let mut s1 = 0u64;
42 |         let mut s2 = 0u64;
43 |         let mut s3 = 0u64;
44 |         for jmp in LONG_JUMP {
45 |             for b in 0..64 {
46 |                 if jmp & 1u64 << b != 0 {
47 |                     s0 ^= self.state[0];
48 |                     s1 ^= self.state[1];
49 |                     s2 ^= self.state[2];
50 |                     s3 ^= self.state[3];
51 |                 }
52 |                 self.next_u64();
53 |             }
54 | 
55 |             self.state[0] = s0;
56 |             self.state[1] = s1;
57 |             self.state[2] = s2;
58 |             self.state[3] = s3;
59 |         }
60 |     }
61 | 
62 |     pub fn iter_jump_state(&self) -> impl Iterator<Item = [u64; 4]> {
63 |         let current = Xoshiro256StarStar::new(&self.state);
64 |         Xoshiro256StarStarStateIter { current }
65 |     }
66 | }
67 | 
68 | impl Iterator for Xoshiro256StarStarStateIter {
69 |     type Item = [u64; 4];
70 | 
71 |     fn next(&mut self) -> Option<[u64; 4]> {
72 |         self.current.long_jump();
73 |         Some(self.current.state)
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "kaspa-miner"
 3 | version = "0.2.1-GPU-0.7"
 4 | edition = "2021"
 5 | license = "MIT/Apache-2.0"
 6 | authors = ["Elichai <elichai.turkel@gmail.com>"]
 7 | repository = "https://github.com/tmrlvi/kaspa-miner"
 8 | readme = "README.md"
 9 | description = "A fast CPU & GPU miner for Kaspa"
10 | categories = ["command-line-utilities"]
11 | keywords = ["blockchain", "cli"]
12 | include = [
13 |     "src/**/*.rs",
14 |     "src/**/*.s",
15 |     "proto/**",
16 |     "Cargo.toml",
17 |     "Cargo.lock",
18 |     "build.rs",
19 |     "LICENSE-MIT",
20 |     "LICENSE-APACHE",
21 |     "README.md",
22 | ]
23 | 
24 | [dependencies]
25 | tonic = "0.8"
26 | tokio = { version = "1.17", features = ["macros", "rt-multi-thread"] }
27 | prost = "0.11"
28 | futures-util = "0.3"
29 | tokio-stream = {version = "0.1", features = ["net"]}
30 | once_cell = "1"
31 | num_cpus = "1"
32 | rand = "0.8"
33 | blake2b_simd = "1.0.0"
34 | clap = { version = "3.0", features = ["color", "derive"]}
35 | log = "0.4"
36 | env_logger = "0.9"
37 | keccak = { version = "0.1", optional = true }
38 | parking = { package = "parking_lot", version = "0.12", optional = true }
39 | shuttle = { version = "0.2.0", optional = true }
40 | libloading = "0.7"
41 | tokio-util = {version = "0.7.0", features = ["codec"]}
42 | serde_json = "1.0"
43 | serde_repr = "0.1"
44 | serde =  {version="1.0", features=["derive"]}
45 | futures = "0.3.21"
46 | bytes = "1.1.0"
47 | async-trait = "0.1"
48 | num = "0.4"
49 | nix = "0.25"
50 | hex = "0.4"
51 | semver = "1.0"
52 | time = { version = "0.3", features = ["formatting", "macros"] }
53 | 
54 | [features]
55 | default = ["parking_lot"]
56 | parking_lot = ["parking", "tokio/parking_lot"]
57 | bench = []
58 | no-asm = ["keccak"]
59 | 
60 | [target.'cfg(target_os = "windows")'.dependencies]
61 | keccak = "0.1"
62 | kernel32-sys = "0.2"
63 | win32console = "0.1"
64 | 
65 | [profile.release]
66 | lto = true
67 | codegen-units = 1
68 | 
69 | [build-dependencies]
70 | tonic-build = { version = "0.8", default-features = false, features = ["prost", "transport"] }
71 | cc = "1"
72 | time = { version = "0.3", features = ["formatting"] }
73 | 
74 | [dev-dependencies]
75 | sha3 = { git = "https://github.com/elichai/hashes", branch = "cSHAKE" }
76 | 
77 | [workspace]
78 | members = ["plugins/*"]
79 | default-members = [".", "plugins/cuda", "plugins/opencl"]


--------------------------------------------------------------------------------
/plugins/opencl/src/cli.rs:
--------------------------------------------------------------------------------
 1 | use crate::Error;
 2 | use std::str::FromStr;
 3 | 
 4 | #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 5 | pub enum NonceGenEnum {
 6 |     Lean,
 7 |     Xoshiro,
 8 | }
 9 | 
10 | impl FromStr for NonceGenEnum {
11 |     type Err = Error;
12 | 
13 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
14 |         match s.to_lowercase().as_str() {
15 |             "lean" => Ok(Self::Lean),
16 |             "xoshiro" => Ok(Self::Xoshiro),
17 |             _ => Err("Unknown string".into()),
18 |         }
19 |     }
20 | }
21 | 
22 | #[derive(clap::Args, Debug)]
23 | pub struct OpenCLOpt {
24 |     #[clap(long = "opencl-platform", help = "Which OpenCL platform to use (limited to one per executable)")]
25 |     pub opencl_platform: Option<u16>,
26 |     #[clap(long = "opencl-device", use_delimiter = true, help = "Which OpenCL GPUs to use on a specific platform")]
27 |     pub opencl_device: Option<Vec<u16>>,
28 |     #[clap(long = "opencl-workload", help = "Ratio of nonces to GPU possible parrallel run in OpenCL [default: 512]")]
29 |     pub opencl_workload: Option<Vec<f32>>,
30 |     #[clap(
31 |         long = "opencl-workload-absolute",
32 |         help = "The values given by workload are not ratio, but absolute number of nonces in OpenCL [default: false]"
33 |     )]
34 |     pub opencl_workload_absolute: bool,
35 |     #[clap(long = "opencl-enable", help = "Enable opencl, and take all devices of the chosen platform")]
36 |     pub opencl_enable: bool,
37 |     #[clap(long = "opencl-amd-disable", help = "Disables AMD mining (does not override opencl-enable)")]
38 |     pub opencl_amd_disable: bool,
39 |     #[clap(long = "opencl-no-amd-binary", help = "Disable fetching of precompiled AMD kernel (if exists)")]
40 |     pub opencl_no_amd_binary: bool,
41 |     #[clap(
42 |         long = "experimental-amd",
43 |         help = "Uses SMID instructions in AMD. Miner will crash if instruction is not supported"
44 |     )]
45 |     pub experimental_amd: bool,
46 |     #[clap(
47 |         long = "opencl-nonce-gen",
48 |         help = "The random method used to generate nonces. Options: (i) xoshiro (ii) lean",
49 |         long_help = "The random method used to generate nonces. Options: (i) xoshiro - each thread in GPU will have its own random state, creating a (pseudo-)independent xoshiro sequence (ii) lean - each GPU will have a single random nonce, and each GPU thread will work on nonce + thread id.",
50 |         default_value = "lean"
51 |     )]
52 |     pub opencl_nonce_gen: NonceGenEnum,
53 | }
54 | 


--------------------------------------------------------------------------------
/plugins/cuda/src/cli.rs:
--------------------------------------------------------------------------------
 1 | use crate::Error;
 2 | use std::str::FromStr;
 3 | 
 4 | #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 5 | pub enum NonceGenEnum {
 6 |     Lean,
 7 |     Xoshiro,
 8 | }
 9 | 
10 | impl FromStr for NonceGenEnum {
11 |     type Err = Error;
12 | 
13 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
14 |         match s.to_lowercase().as_str() {
15 |             "lean" => Ok(Self::Lean),
16 |             "xoshiro" => Ok(Self::Xoshiro),
17 |             _ => Err("Unknown string".into()),
18 |         }
19 |     }
20 | }
21 | 
22 | #[cfg(feature = "overclock")]
23 | #[derive(clap::Args, Debug, Default)]
24 | pub struct OverClock {
25 |     #[clap(long = "cuda-lock-mem-clocks", use_delimiter = true, help = "Lock mem clocks eg: ,810, [default: 0]")]
26 |     pub cuda_lock_mem_clocks: Option<Vec<u32>>,
27 |     #[clap(long = "cuda-lock-core-clocks", use_delimiter = true, help = "Lock core clocks eg: ,1200, [default: 0]")]
28 |     pub cuda_lock_core_clocks: Option<Vec<u32>>,
29 |     #[clap(long = "cuda-power-limits", use_delimiter = true, help = "Lock power limits eg: ,150, [default: 0]")]
30 |     pub cuda_power_limits: Option<Vec<u32>>,
31 | }
32 | 
33 | #[derive(clap::Args, Debug)]
34 | pub struct CudaOpt {
35 |     #[clap(long = "cuda-device", use_delimiter = true, help = "Which CUDA GPUs to use [default: all]")]
36 |     pub cuda_device: Option<Vec<u16>>,
37 |     #[clap(long = "cuda-workload", help = "Ratio of nonces to GPU possible parrallel run [default: 64]")]
38 |     pub cuda_workload: Option<Vec<f32>>,
39 |     #[clap(
40 |         long = "cuda-workload-absolute",
41 |         help = "The values given by workload are not ratio, but absolute number of nonces [default: false]"
42 |     )]
43 |     pub cuda_workload_absolute: bool,
44 |     #[clap(long = "cuda-disable", help = "Disable cuda workers")]
45 |     pub cuda_disable: bool,
46 |     #[clap(
47 |         long = "cuda-no-blocking-sync",
48 |         help = "Actively wait for result. Higher CPU usage, but less red blocks. Can have lower workload.",
49 |         long_help = "Actively wait for GPU result. Increases CPU usage, but removes delays that might result in red blocks. Can have lower workload."
50 |     )]
51 |     pub cuda_no_blocking_sync: bool,
52 |     #[clap(
53 |         long = "cuda-nonce-gen",
54 |         help = "The random method used to generate nonces. Options: (i) xoshiro - each thread in GPU will have its own random state, creating a (pseudo-)independent xoshiro sequence (ii) lean - each GPU will have a single random nonce, and each GPU thread will work on nonce + thread id.",
55 |         default_value = "lean"
56 |     )]
57 |     pub cuda_nonce_gen: NonceGenEnum,
58 | 
59 |     #[cfg(feature = "overclock")]
60 |     #[clap(flatten)]
61 |     pub overclock: OverClock,
62 | }
63 | 


--------------------------------------------------------------------------------
/plugins/cuda/kaspa-cuda-native/src/xoshiro256starstar.c:
--------------------------------------------------------------------------------
  1 | /*  Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
  2 | 
  3 | To the extent possible under law, the author has dedicated all copyright
  4 | and related and neighboring rights to this software to the public domain
  5 | worldwide. This software is distributed without any warranty.
  6 | 
  7 | See <http://creativecommons.org/publicdomain/zero/1.0/>. */
  8 | 
  9 | #include <stdint.h>
 10 | 
 11 | /* This is xoshiro256** 1.0, one of our all-purpose, rock-solid
 12 |    generators. It has excellent (sub-ns) speed, a state (256 bits) that is
 13 |    large enough for any parallel application, and it passes all tests we
 14 |    are aware of.
 15 | 
 16 |    For generating just floating-point numbers, xoshiro256+ is even faster.
 17 | 
 18 |    The state must be seeded so that it is not everywhere zero. If you have
 19 |    a 64-bit seed, we suggest to seed a splitmix64 generator and use its
 20 |    output to fill s. */
 21 | 
 22 | __device__ static inline uint64_t rotl(const uint64_t x, int k) {
 23 | 	return (x << k) | (x >> (64 - k));
 24 | }
 25 | 
 26 | __device__ inline uint64_t xoshiro256_next(ulonglong4 *s) {
 27 | 	const uint64_t result = rotl(s->y * 5, 7) * 9;
 28 | 
 29 | 	const uint64_t t = s->y << 17;
 30 | 
 31 | 	s->z ^= s->x;
 32 | 	s->w ^= s->y;
 33 | 	s->y ^= s->z;
 34 | 	s->x ^= s->w;
 35 | 
 36 | 	s->z ^= t;
 37 | 
 38 | 	s->w = rotl(s->w, 45);
 39 | 
 40 | 	return result;
 41 | }
 42 | 
 43 | 
 44 | /* This is the jump function for the generator. It is equivalent
 45 |    to 2^128 calls to next(); it can be used to generate 2^128
 46 |    non-overlapping subsequences for parallel computations. */
 47 | 
 48 | __device__ void xoshiro256_jump(ulonglong4 *s) {
 49 | 	static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
 50 | 
 51 | 	uint64_t s0 = 0;
 52 | 	uint64_t s1 = 0;
 53 | 	uint64_t s2 = 0;
 54 | 	uint64_t s3 = 0;
 55 | 	for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
 56 | 		for(int b = 0; b < 64; b++) {
 57 | 			if (JUMP[i] & UINT64_C(1) << b) {
 58 | 				s0 ^= s->x;
 59 | 				s1 ^= s->y;
 60 | 				s2 ^= s->z;
 61 | 				s3 ^= s->w;
 62 | 			}
 63 | 			xoshiro256_next(s);
 64 | 		}
 65 | 		
 66 | 	s->x = s0;
 67 | 	s->y = s1;
 68 | 	s->z = s2;
 69 | 	s->w = s3;
 70 | }
 71 | 
 72 | 
 73 | 
 74 | /* This is the long-jump function for the generator. It is equivalent to
 75 |    2^192 calls to next(); it can be used to generate 2^64 starting points,
 76 |    from each of which jump() will generate 2^64 non-overlapping
 77 |    subsequences for parallel distributed computations. */
 78 | 
 79 | __device__ void xoshiro256_long_jump(ulonglong4 *s) {
 80 | 	static const uint64_t LONG_JUMP[] = { 0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635 };
 81 | 
 82 | 	uint64_t s0 = 0;
 83 | 	uint64_t s1 = 0;
 84 | 	uint64_t s2 = 0;
 85 | 	uint64_t s3 = 0;
 86 | 	for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
 87 | 		for(int b = 0; b < 64; b++) {
 88 | 			if (LONG_JUMP[i] & UINT64_C(1) << b) {
 89 | 				s0 ^= s->x;
 90 | 				s1 ^= s->y;
 91 | 				s2 ^= s->z;
 92 | 				s3 ^= s->w;
 93 | 			}
 94 | 			xoshiro256_next(s);
 95 | 		}
 96 | 		
 97 | 	s->x = s0;
 98 | 	s->y = s1;
 99 | 	s->z = s2;
100 | 	s->w = s3;
101 | }
102 | 


--------------------------------------------------------------------------------
/plugins/cuda/kaspa-cuda-native/src/keccak-tiny.c:
--------------------------------------------------------------------------------
  1 | /** libkeccak-tiny
  2 |  *
  3 |  * A single-file implementation of SHA-3 and SHAKE.
  4 |  *
  5 |  * Implementor: David Leon Gil
  6 |  * License: CC0, attribution kindly requested. Blame taken too,
  7 |  * but not liability.
  8 |  */
  9 | #define __STDC_WANT_LIB_EXT1__ 1
 10 | 
 11 | #include <stdint.h>
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | /******** The Keccak-f[1600] permutation ********/
 17 | 
 18 | /*** Constants. ***/
 19 | __device__ static const uint8_t rho[24] = \
 20 |   { 1,  3,   6, 10, 15, 21,
 21 |     28, 36, 45, 55,  2, 14,
 22 |     27, 41, 56,  8, 25, 43,
 23 |     62, 18, 39, 61, 20, 44};
 24 | __device__ static const uint8_t pi[24] = \
 25 |   {10,  7, 11, 17, 18, 3,
 26 |     5, 16,  8, 21, 24, 4,
 27 |    15, 23, 19, 13, 12, 2,
 28 |    20, 14, 22,  9, 6,  1};
 29 | __device__ static const uint64_t RC[24] = \
 30 |   {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
 31 |    0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
 32 |    0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
 33 |    0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
 34 |    0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
 35 |    0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
 36 | 
 37 | /*** Helper macros to unroll the permutation. ***/
 38 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
 39 | #define REPEAT6(e) e e e e e e
 40 | #define REPEAT24(e) REPEAT6(e e e e)
 41 | #define REPEAT5(e) e e e e e
 42 | #define FOR5(v, s, e) \
 43 |   v = 0;            \
 44 |   REPEAT5(e; v += s;)
 45 | 
 46 | /*** Keccak-f[1600] ***/
 47 | __device__ static inline void keccakf(void* state) {
 48 |   uint64_t* a = (uint64_t*)state;
 49 |   uint64_t b[5] = {0};
 50 |   uint64_t t = 0;
 51 |   uint8_t x, y;
 52 | 
 53 |   for (int i = 0; i < 24; i++) {
 54 |     // Theta
 55 |     FOR5(x, 1,
 56 |          b[x] = 0;
 57 |          FOR5(y, 5,
 58 |               b[x] ^= a[x + y]; ))
 59 |     FOR5(x, 1,
 60 |          FOR5(y, 5,
 61 |               a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
 62 |     // Rho and pi
 63 |     t = a[1];
 64 |     x = 0;
 65 |     REPEAT24(b[0] = a[pi[x]];
 66 |              a[pi[x]] = rol(t, rho[x]);
 67 |              t = b[0];
 68 |              x++; )
 69 |     // Chi
 70 |     FOR5(y,
 71 |        5,
 72 |        FOR5(x, 1,
 73 |             b[x] = a[y + x];)
 74 |        FOR5(x, 1,
 75 |             a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
 76 |     // Iota
 77 |     a[0] ^= RC[i];
 78 |   }
 79 | }
 80 | 
 81 | /******** The FIPS202-defined functions. ********/
 82 | 
 83 | /*** Some helper macros. ***/
 84 | #define P keccakf
 85 | #define Plen 200
 86 | 
 87 | 
 88 | /** The sponge-based hash construction. **/
 89 | __device__ __forceinline__ static void hash(
 90 |                        const uint8_t initP[Plen],
 91 |                        uint8_t* out,
 92 |                        const uint8_t* in) {
 93 |   uint8_t a[Plen] = {0};
 94 | 
 95 |   #pragma unroll
 96 |   for (int i=0; i<10; i++) ((uint64_t *)a)[i] = ((uint64_t *)initP)[i] ^ ((uint64_t *)in)[i];
 97 |   #pragma unroll
 98 |   for (int i=10; i<25; i++) ((uint64_t *)a)[i] = ((uint64_t *)initP)[i];
 99 | 
100 |   // Apply P
101 |   P(a);
102 |   // Squeeze output.
103 |   #pragma unroll
104 |   for (int i=0; i<4; i++) ((uint64_t *)out)[i] = ((uint64_t *)a)[i];
105 | 
106 | }
107 | 
108 | 


--------------------------------------------------------------------------------
/integrations/hiveos/h-stats.sh:
--------------------------------------------------------------------------------
  1 | ####################################################################################
  2 | ###
  3 | ### kaspa-miner
  4 | ### https://github.com/tmrlvi/kaspa-miner/releases
  5 | ###
  6 | ### Hive integration: Merlin
  7 | ###
  8 | ####################################################################################
  9 | 
 10 | #!/usr/bin/env bash
 11 | 
 12 | #######################
 13 | # MAIN script body
 14 | #######################
 15 | 
 16 | . /hive/miners/custom/kaspa-miner/h-manifest.conf
 17 | stats_raw=`cat $CUSTOM_LOG_BASENAME.log | grep -w "hashrate" | tail -n 1 `
 18 | #echo $stats_raw
 19 | 
 20 | #Calculate miner log freshness
 21 | 
 22 | maxDelay=120
 23 | time_now=`date +%s`
 24 | datetime_rep=`echo $stats_raw | awk '{print $1}' | awk -F[ '{print $2}'`
 25 | time_rep=`date -d $datetime_rep +%s`
 26 | diffTime=`echo $((time_now-time_rep)) | tr -d '-'`
 27 | 
 28 | if [ "$diffTime" -lt "$maxDelay" ]; then
 29 |         total_hashrate=`echo $stats_raw | awk '{print $7}' | cut -d "." -f 1,2 --output-delimiter='' | sed 's/$/0/'`
 30 | 	if [[ $stats_raw == *"Ghash"* ]]; then
 31 | 		total_hashrate=$(($total_hashrate*1000))
 32 | 	fi
 33 | 
 34 |         #GPU Status
 35 |         gpu_stats=$(< $GPU_STATS_JSON)
 36 | 
 37 |         readarray -t gpu_stats < <( jq --slurp -r -c '.[] | .busids, .brand, .temp, .fan | join(" ")' $GPU_STATS_JSON 2>/dev/null)
 38 |         busids=(${gpu_stats[0]})
 39 |         brands=(${gpu_stats[1]})
 40 |         temps=(${gpu_stats[2]})
 41 |         fans=(${gpu_stats[3]})
 42 |         gpu_count=${#busids[@]}
 43 | 
 44 |         hash_arr=()
 45 |         busid_arr=()
 46 |         fan_arr=()
 47 |         temp_arr=()
 48 |         lines=()
 49 | 
 50 |         if [ $(gpu-detect NVIDIA) -gt 0 ]; then
 51 |                 brand_gpu_count=$(gpu-detect NVIDIA)
 52 |                 BRAND_MINER="nvidia"
 53 |         elif [ $(gpu-detect AMD) -gt 0 ]; then
 54 |                 brand_gpu_count=$(gpu-detect AMD)
 55 |                 BRAND_MINER="amd"
 56 |         fi
 57 | 
 58 |         for(( i=0; i < gpu_count; i++ )); do
 59 |                 [[ "${brands[i]}" != $BRAND_MINER ]] && continue
 60 |                 [[ "${busids[i]}" =~ ^([A-Fa-f0-9]+): ]]
 61 |                 busid_arr+=($((16#${BASH_REMATCH[1]})))
 62 |                 temp_arr+=(${temps[i]})
 63 |                 fan_arr+=(${fans[i]})                
 64 |                 gpu_raw=`cat $CUSTOM_LOG_BASENAME.log | grep -w "Device #"$i | tail -n 1 `
 65 |                 hashrate=`echo $gpu_raw | awk '{print $(NF-1)}' | cut -d "." -f 1,2 --output-delimiter='' | sed 's/$/0/'`
 66 |                 if [[ $gpu_raw == *"Ghash"* ]]; then
 67 |                         hashrate=$(($hashrate*1000))
 68 |                 fi
 69 |                 hash_arr+=($hashrate)		
 70 |         done
 71 | 
 72 |         hash_json=`printf '%s\n' "${hash_arr[@]}" | jq -cs '.'`
 73 |         bus_numbers=`printf '%s\n' "${busid_arr[@]}"  | jq -cs '.'`
 74 |         fan_json=`printf '%s\n' "${fan_arr[@]}"  | jq -cs '.'`
 75 |         temp_json=`printf '%s\n' "${temp_arr[@]}"  | jq -cs '.'`
 76 | 
 77 |         uptime=$(( `date +%s` - `stat -c %Y $CUSTOM_CONFIG_FILENAME` ))
 78 | 
 79 | 
 80 |         #Compile stats/khs
 81 |         stats=$(jq -nc \
 82 |                 --argjson hs "$hash_json"\
 83 |                 --arg ver "$CUSTOM_VERSION" \
 84 |                 --arg ths "$total_hashrate" \
 85 |                 --argjson bus_numbers "$bus_numbers" \
 86 |                 --argjson fan "$fan_json" \
 87 |                 --argjson temp "$temp_json" \
 88 |                 --arg uptime "$uptime" \
 89 |                 '{ hs: $hs, hs_units: "khs", algo : "heavyhash", ver:$ver , $uptime, $bus_numbers, $temp, $fan}')
 90 |         khs=$total_hashrate
 91 | else
 92 |   khs=0
 93 |   stats="null"
 94 | fi
 95 | 
 96 | echo Debug info:
 97 | echo Log file : $CUSTOM_LOG_BASENAME.log
 98 | echo Time since last log entry : $diffTime
 99 | echo Raw stats : $stats_raw
100 | echo KHS : $khs
101 | echo Output : $stats
102 | 
103 | [[ -z $khs ]] && khs=0
104 | [[ -z $stats ]] && stats="null"
105 | 


--------------------------------------------------------------------------------
/src/target.rs:
--------------------------------------------------------------------------------
  1 | use core::cmp::Ordering;
  2 | use std::fmt;
  3 | 
  4 | pub fn u256_from_compact_target(bits: u32) -> Uint256 {
  5 |     // This is a floating-point "compact" encoding originally used by
  6 |     // OpenSSL, which satoshi put into consensus code, so we're stuck
  7 |     // with it. The exponent needs to have 3 subtracted from it, hence
  8 |     // this goofy decoding code:
  9 |     let (mant, expt) = {
 10 |         let unshifted_expt = bits >> 24;
 11 |         if unshifted_expt <= 3 {
 12 |             ((bits & 0xFFFFFF) >> (8 * (3 - unshifted_expt as usize)), 0)
 13 |         } else {
 14 |             (bits & 0xFFFFFF, 8 * ((bits >> 24) - 3))
 15 |         }
 16 |     };
 17 | 
 18 |     // The mantissa is signed but may not be negative
 19 |     if mant > 0x7FFFFF {
 20 |         Default::default()
 21 |     } else {
 22 |         Uint256::from_u64(mant as u64) << (expt as usize)
 23 |     }
 24 | }
 25 | 
 26 | /// Little-endian large integer type
 27 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Default, Debug)]
 28 | pub struct Uint256(pub [u64; 4]);
 29 | 
 30 | impl Uint256 {
 31 |     #[inline(always)]
 32 |     pub fn new(v: [u64; 4]) -> Self {
 33 |         Self(v)
 34 |     }
 35 |     /// Create an object from a given unsigned 64-bit integer
 36 |     #[inline]
 37 |     pub fn from_u64(init: u64) -> Uint256 {
 38 |         let mut ret = [0; 4];
 39 |         ret[0] = init;
 40 |         Uint256(ret)
 41 |     }
 42 | 
 43 |     /// Creates big integer value from a byte slice using
 44 |     /// little-endian encoding
 45 |     #[inline(always)]
 46 |     pub fn from_le_bytes(bytes: [u8; 32]) -> Uint256 {
 47 |         let mut out = [0u64; 4];
 48 |         // This should optimize to basically a transmute.
 49 |         out.iter_mut()
 50 |             .zip(bytes.chunks_exact(8))
 51 |             .for_each(|(word, bytes)| *word = u64::from_le_bytes(bytes.try_into().unwrap()));
 52 |         Self(out)
 53 |     }
 54 | 
 55 |     #[inline(always)]
 56 |     pub fn to_le_bytes(self) -> [u8; 32] {
 57 |         let mut out = [0u8; 32];
 58 |         // This should optimize to basically a transmute.
 59 |         out.chunks_exact_mut(8).zip(self.0).for_each(|(bytes, word)| bytes.copy_from_slice(&word.to_le_bytes()));
 60 |         out
 61 |     }
 62 | 
 63 |     #[inline(always)]
 64 |     pub fn to_be_bytes(self) -> [u8; 32] {
 65 |         let mut out = [0u8; 32];
 66 |         // This should optimize to basically a transmute.
 67 |         out.chunks_exact_mut(8)
 68 |             .zip(self.0.iter().rev())
 69 |             .for_each(|(bytes, word)| bytes.copy_from_slice(&word.to_be_bytes()));
 70 |         out
 71 |     }
 72 | }
 73 | 
 74 | impl fmt::LowerHex for Uint256 {
 75 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 76 |         self.to_le_bytes().iter().try_for_each(|&c| write!(f, "{:02x}", c))
 77 |     }
 78 | }
 79 | 
 80 | impl PartialOrd for Uint256 {
 81 |     #[inline(always)]
 82 |     fn partial_cmp(&self, other: &Uint256) -> Option<Ordering> {
 83 |         Some(self.cmp(other))
 84 |     }
 85 | }
 86 | 
 87 | impl Ord for Uint256 {
 88 |     #[inline(always)]
 89 |     fn cmp(&self, other: &Uint256) -> Ordering {
 90 |         // We need to manually implement ordering because we use little-endian
 91 |         // and the auto derive is a lexicographic ordering(i.e. memcmp)
 92 |         // which with numbers is equivalent to big-endian
 93 |         Iterator::cmp(self.0.iter().rev(), other.0.iter().rev())
 94 |     }
 95 | }
 96 | 
 97 | impl core::ops::Shl<usize> for Uint256 {
 98 |     type Output = Uint256;
 99 | 
100 |     fn shl(self, shift: usize) -> Uint256 {
101 |         let Uint256(ref original) = self;
102 |         let mut ret = [0u64; 4];
103 |         let word_shift = shift / 64;
104 |         let bit_shift = shift % 64;
105 |         for i in 0..4 {
106 |             // Shift
107 |             if bit_shift < 64 && i + word_shift < 4 {
108 |                 ret[i + word_shift] += original[i] << bit_shift;
109 |             }
110 |             // Carry
111 |             if bit_shift > 0 && i + word_shift + 1 < 4 {
112 |                 ret[i + word_shift + 1] += original[i] >> (64 - bit_shift);
113 |             }
114 |         }
115 |         Uint256(ret)
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/cli.rs:
--------------------------------------------------------------------------------
  1 | use clap::Parser;
  2 | use log::LevelFilter;
  3 | 
  4 | use crate::Error;
  5 | 
  6 | #[derive(Parser, Debug)]
  7 | #[clap(name = "kaspa-miner", version, about = "A Kaspa high performance CPU miner", term_width = 0)]
  8 | pub struct Opt {
  9 |     #[clap(short, long, help = "Enable debug logging level")]
 10 |     pub debug: bool,
 11 |     #[clap(short = 'a', long = "mining-address", help = "The Kaspa address for the miner reward")]
 12 |     pub mining_address: String,
 13 |     #[clap(short = 's', long = "kaspad-address", default_value = "127.0.0.1", help = "The IP of the kaspad instance")]
 14 |     pub kaspad_address: String,
 15 | 
 16 |     #[clap(long = "devfund-percent", help = "The percentage of blocks to send to the devfund (minimum 2%)", default_value = "2", parse(try_from_str = parse_devfund_percent))]
 17 |     pub devfund_percent: u16,
 18 | 
 19 |     #[clap(short, long, help = "Kaspad port [default: Mainnet = 16110, Testnet = 16211]")]
 20 |     port: Option<u16>,
 21 | 
 22 |     #[clap(long, help = "Use testnet instead of mainnet [default: false]")]
 23 |     testnet: bool,
 24 |     #[clap(short = 't', long = "threads", help = "Amount of CPU miner threads to launch [default: 0]")]
 25 |     pub num_threads: Option<u16>,
 26 |     #[clap(
 27 |         long = "mine-when-not-synced",
 28 |         help = "Mine even when kaspad says it is not synced",
 29 |         long_help = "Mine even when kaspad says it is not synced, only useful when passing `--allow-submit-block-when-not-synced` to kaspad  [default: false]"
 30 |     )]
 31 |     pub mine_when_not_synced: bool,
 32 | 
 33 |     #[clap(skip)]
 34 |     pub devfund_address: String,
 35 | }
 36 | 
 37 | fn parse_devfund_percent(s: &str) -> Result<u16, &'static str> {
 38 |     let err = "devfund-percent should be --devfund-percent=XX.YY up to 2 numbers after the dot";
 39 |     let mut splited = s.split('.');
 40 |     let prefix = splited.next().ok_or(err)?;
 41 |     // if there's no postfix then it's 0.
 42 |     let postfix = splited.next().ok_or(err).unwrap_or("0");
 43 |     // error if there's more than a single dot
 44 |     if splited.next().is_some() {
 45 |         return Err(err);
 46 |     };
 47 |     // error if there are more than 2 numbers before or after the dot
 48 |     if prefix.len() > 2 || postfix.len() > 2 {
 49 |         return Err(err);
 50 |     }
 51 |     let postfix: u16 = postfix.parse().map_err(|_| err)?;
 52 |     let prefix: u16 = prefix.parse().map_err(|_| err)?;
 53 |     // can't be more than 99.99%,
 54 |     if prefix >= 100 || postfix >= 100 {
 55 |         return Err(err);
 56 |     }
 57 |     if prefix < 2 {
 58 |         // Force at least 2 percent
 59 |         return Ok(200u16);
 60 |     }
 61 |     // DevFund is out of 10_000
 62 |     Ok(prefix * 100 + postfix)
 63 | }
 64 | 
 65 | impl Opt {
 66 |     pub fn process(&mut self) -> Result<(), Error> {
 67 |         //self.gpus = None;
 68 |         if self.kaspad_address.is_empty() {
 69 |             self.kaspad_address = "127.0.0.1".to_string();
 70 |         }
 71 | 
 72 |         if !self.kaspad_address.contains("://") {
 73 |             let port_str = self.port().to_string();
 74 |             let (kaspad, port) = match self.kaspad_address.contains(':') {
 75 |                 true => self.kaspad_address.split_once(':').expect("We checked for `:`"),
 76 |                 false => (self.kaspad_address.as_str(), port_str.as_str()),
 77 |             };
 78 |             self.kaspad_address = format!("grpc://{}:{}", kaspad, port);
 79 |         }
 80 |         log::info!("kaspad address: {}", self.kaspad_address);
 81 | 
 82 |         if self.num_threads.is_none() {
 83 |             self.num_threads = Some(0);
 84 |         }
 85 | 
 86 |         let miner_network = self.mining_address.split(':').next();
 87 |         self.devfund_address = String::from("kaspa:pzhh76qc82wzduvsrd9xh4zde9qhp0xc8rl7qu2mvl2e42uvdqt75zrcgpm00");
 88 |         let devfund_network = self.devfund_address.split(':').next();
 89 |         if miner_network.is_some() && devfund_network.is_some() && miner_network != devfund_network {
 90 |             self.devfund_percent = 0;
 91 |             log::info!(
 92 |                 "Mining address ({}) and devfund ({}) are not from the same network. Disabling devfund.",
 93 |                 miner_network.unwrap(),
 94 |                 devfund_network.unwrap()
 95 |             )
 96 |         }
 97 |         Ok(())
 98 |     }
 99 | 
100 |     fn port(&mut self) -> u16 {
101 |         *self.port.get_or_insert(if self.testnet { 16211 } else { 16110 })
102 |     }
103 | 
104 |     pub fn log_level(&self) -> LevelFilter {
105 |         if self.debug {
106 |             LevelFilter::Debug
107 |         } else {
108 |             LevelFilter::Info
109 |         }
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/plugins/cuda/kaspa-cuda-native/src/keccak-tiny-unrolled.c:
--------------------------------------------------------------------------------
  1 | /** libkeccak-tiny
  2 |  *
  3 |  * A single-file implementation of SHA-3 and SHAKE.
  4 |  *
  5 |  * Implementor: David Leon Gil
  6 |  * License: CC0, attribution kindly requested. Blame taken too,
  7 |  * but not liability.
  8 |  */
  9 | #define __STDC_WANT_LIB_EXT1__ 1
 10 | 
 11 | #include <stdint.h>
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | /******** The Keccak-f[1600] permutation ********/
 17 | 
 18 | 
 19 | /*** Constants. ***/
 20 | __device__ static const uint8_t rho[24] = \
 21 |   { 1,  3,   6, 10, 15, 21,
 22 |     28, 36, 45, 55,  2, 14,
 23 |     27, 41, 56,  8, 25, 43,
 24 |     62, 18, 39, 61, 20, 44};
 25 | __device__ static const uint8_t pi[24] = \
 26 |   {10,  7, 11, 17, 18, 3,
 27 |     5, 16,  8, 21, 24, 4,
 28 |    15, 23, 19, 13, 12, 2,
 29 |    20, 14, 22,  9, 6,  1};
 30 | __device__ static const uint64_t RC[24] = \
 31 |   {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
 32 |    0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
 33 |    0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
 34 |    0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
 35 |    0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
 36 |    0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
 37 | 
 38 | /*** Helper macros to unroll the permutation. ***/
 39 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
 40 | #define REPEAT6(e) e e e e e e
 41 | #define REPEAT24(e) REPEAT6(e e e e)
 42 | #define REPEAT5(e) e e e e e
 43 | #define FOR5(v, s, e) \
 44 |   v = 0;            \
 45 |   REPEAT5(e; v += s;)
 46 | 
 47 | /*** Keccak-f[1600] ***/
 48 | __device__ static inline void keccakf(void* state) {
 49 |   uint64_t* a = (uint64_t*)state;
 50 |   uint64_t b[5] = {0};
 51 |   uint64_t t = 0;
 52 |   uint8_t x, y, i = 0;
 53 | 
 54 |   REPEAT24(
 55 |       // Theta
 56 |       FOR5(x, 1,
 57 |            b[x] = 0;
 58 |            FOR5(y, 5,
 59 |                 b[x] ^= a[x + y]; ))
 60 |       FOR5(x, 1,
 61 |            FOR5(y, 5,
 62 |                 a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
 63 |       // Rho and pi
 64 |       t = a[1];
 65 |       x = 0;
 66 |       REPEAT24(b[0] = a[pi[x]];
 67 |                a[pi[x]] = rol(t, rho[x]);
 68 |                t = b[0];
 69 |                x++; )
 70 |       // Chi
 71 |       FOR5(y,
 72 |          5,
 73 |          FOR5(x, 1,
 74 |               b[x] = a[y + x];)
 75 |          FOR5(x, 1,
 76 |               a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
 77 |       // Iota
 78 |       a[0] ^= RC[i];
 79 |       i++; )
 80 | }
 81 | 
 82 | /******** The FIPS202-defined functions. ********/
 83 | 
 84 | /*** Some helper macros. ***/
 85 | 
 86 | #define _(S) do { S } while (0)
 87 | #define FOR(i, ST, L, S) \
 88 |   _(for (size_t i = 0; i < L; i += ST) { S; })
 89 | #define mkapply_ds(NAME, S)                                          \
 90 |   __device__ static inline void NAME(uint8_t* dst,                              \
 91 |                           const uint8_t* src,                        \
 92 |                           size_t len) {                              \
 93 |     FOR(i, 1, len, S);                                               \
 94 |   }
 95 | #define mkapply_sd(NAME, S)                                          \
 96 |   __device__ static inline void NAME(const uint8_t* src,                        \
 97 |                           uint8_t* dst,                              \
 98 |                           size_t len) {                              \
 99 |     FOR(i, 1, len, S);                                               \
100 |   }
101 | 
102 | mkapply_ds(xorin, dst[i] ^= src[i])  // xorin
103 | mkapply_sd(setout, dst[i] = src[i])  // setout
104 | 
105 | #define P keccakf
106 | #define Plen 200
107 | typedef uint8_t ShaState[Plen];
108 | 
109 | // Fold P*F over the full blocks of an input.
110 | #define foldP(I, L, F) \
111 |   while (L >= rate) {  \
112 |     F(a, I, rate);     \
113 |     P(a);              \
114 |     I += rate;         \
115 |     L -= rate;         \
116 |   }
117 | 
118 | /** The sponge-based hash construction. **/
119 | __device__ __forceinline__ static int hash(
120 |                        const uint8_t initP[Plen],
121 |                        uint8_t* out, size_t outlen,
122 |                        const uint8_t* in, size_t inlen,
123 |                        size_t rate, uint8_t delim) {
124 |   if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate > Plen)) {
125 |     return -1;
126 |   }
127 |   uint8_t a[Plen] = {0};
128 |   memcpy(a, initP, Plen);
129 |   // Absorb input.
130 |   foldP(in, inlen, xorin);
131 |   // Xor in the DS and pad frame.
132 |   a[inlen] ^= delim;
133 |   a[rate - 1] ^= 0x80;
134 |   // Xor in the last block.
135 |   xorin(a, in, inlen);
136 |   // Apply P
137 |   P(a);
138 |   // Squeeze output.
139 |   foldP(out, outlen, setout);
140 |   setout(a, out, outlen);
141 |   //memset_s(a, 200, 0, 200);
142 |   memset(a,0,200);
143 |   return 0;
144 | }
145 | 
146 | 


--------------------------------------------------------------------------------
/plugins/opencl/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate kaspa_miner;
  3 | 
  4 | use clap::{ArgMatches, FromArgMatches};
  5 | use kaspa_miner::{Plugin, Worker, WorkerSpec};
  6 | use log::{info, LevelFilter};
  7 | use opencl3::device::{Device, CL_DEVICE_TYPE_ALL};
  8 | use opencl3::platform::{get_platforms, Platform};
  9 | use opencl3::types::cl_device_id;
 10 | use std::error::Error as StdError;
 11 | 
 12 | pub type Error = Box<dyn StdError + Send + Sync + 'static>;
 13 | 
 14 | mod cli;
 15 | mod worker;
 16 | 
 17 | use crate::cli::{NonceGenEnum, OpenCLOpt};
 18 | use crate::worker::OpenCLGPUWorker;
 19 | 
 20 | const DEFAULT_WORKLOAD_SCALE: f32 = 512.;
 21 | 
 22 | pub struct OpenCLPlugin {
 23 |     specs: Vec<OpenCLWorkerSpec>,
 24 |     _enabled: bool,
 25 | }
 26 | 
 27 | impl OpenCLPlugin {
 28 |     fn new() -> Result<Self, Error> {
 29 |         env_logger::builder().filter_level(LevelFilter::Info).parse_default_env().init();
 30 |         Ok(Self { specs: Vec::new(), _enabled: false })
 31 |     }
 32 | }
 33 | 
 34 | impl Plugin for OpenCLPlugin {
 35 |     fn name(&self) -> &'static str {
 36 |         "OpenCL Worker"
 37 |     }
 38 | 
 39 |     fn enabled(&self) -> bool {
 40 |         self._enabled
 41 |     }
 42 | 
 43 |     fn get_worker_specs(&self) -> Vec<Box<dyn WorkerSpec>> {
 44 |         self.specs.iter().map(|spec| Box::new(*spec) as Box<dyn WorkerSpec>).collect::<Vec<Box<dyn WorkerSpec>>>()
 45 |     }
 46 | 
 47 |     //noinspection RsTypeCheck
 48 |     fn process_option(&mut self, matches: &ArgMatches) -> Result<usize, kaspa_miner::Error> {
 49 |         let opts: OpenCLOpt = OpenCLOpt::from_arg_matches(matches)?;
 50 | 
 51 |         self._enabled = opts.opencl_enable;
 52 |         let platforms = match get_platforms() {
 53 |             Ok(p) => p,
 54 |             Err(e) => {
 55 |                 return Err(e.to_string().into());
 56 |             }
 57 |         };
 58 |         info!("OpenCL Found Platforms:");
 59 |         info!("=======================");
 60 |         for platform in &platforms {
 61 |             let vendor = &platform.vendor().unwrap_or_else(|_| "Unk".into());
 62 |             let name = &platform.name().unwrap_or_else(|_| "Unk".into());
 63 |             let num_devices = platform.get_devices(CL_DEVICE_TYPE_ALL).unwrap_or_default().len();
 64 |             info!("{}: {} ({} devices available)", vendor, name, num_devices);
 65 |         }
 66 |         let amd_platforms = (&platforms)
 67 |             .iter()
 68 |             .filter(|p| {
 69 |                 p.vendor().unwrap_or_else(|_| "Unk".into()) == "Advanced Micro Devices, Inc."
 70 |                     && !p.get_devices(CL_DEVICE_TYPE_ALL).unwrap_or_default().is_empty()
 71 |             })
 72 |             .collect::<Vec<&Platform>>();
 73 |         let _platform: &Platform = match opts.opencl_platform {
 74 |             Some(idx) => {
 75 |                 self._enabled = true;
 76 |                 &platforms[idx as usize]
 77 |             }
 78 |             None if !opts.opencl_amd_disable && !amd_platforms.is_empty() => {
 79 |                 self._enabled = true;
 80 |                 amd_platforms[0]
 81 |             }
 82 |             None => &platforms[0],
 83 |         };
 84 |         if self._enabled {
 85 |             info!(
 86 |                 "Chose to mine on {}: {}.",
 87 |                 &_platform.vendor().unwrap_or_else(|_| "Unk".into()),
 88 |                 &_platform.name().unwrap_or_else(|_| "Unk".into())
 89 |             );
 90 | 
 91 |             let device_ids = _platform.get_devices(CL_DEVICE_TYPE_ALL).unwrap();
 92 |             let gpus = match opts.opencl_device {
 93 |                 Some(dev) => {
 94 |                     self._enabled = true;
 95 |                     dev.iter().map(|d| device_ids[*d as usize]).collect::<Vec<cl_device_id>>()
 96 |                 }
 97 |                 None => device_ids,
 98 |             };
 99 | 
100 |             self.specs = (0..gpus.len())
101 |                 .map(|i| OpenCLWorkerSpec {
102 |                     _platform: *_platform,
103 |                     index: i,
104 |                     device_id: Device::new(gpus[i]),
105 |                     workload: match &opts.opencl_workload {
106 |                         Some(workload) if i < workload.len() => workload[i],
107 |                         Some(workload) if !workload.is_empty() => *workload.last().unwrap(),
108 |                         _ => DEFAULT_WORKLOAD_SCALE,
109 |                     },
110 |                     is_absolute: opts.opencl_workload_absolute,
111 |                     experimental_amd: opts.experimental_amd,
112 |                     use_amd_binary: !opts.opencl_no_amd_binary,
113 |                     random: opts.opencl_nonce_gen,
114 |                 })
115 |                 .collect();
116 |         }
117 |         Ok(self.specs.len())
118 |     }
119 | }
120 | 
121 | #[derive(Copy, Clone)]
122 | struct OpenCLWorkerSpec {
123 |     _platform: Platform,
124 |     index: usize,
125 |     device_id: Device,
126 |     workload: f32,
127 |     is_absolute: bool,
128 |     experimental_amd: bool,
129 |     use_amd_binary: bool,
130 |     random: NonceGenEnum,
131 | }
132 | 
133 | impl WorkerSpec for OpenCLWorkerSpec {
134 |     fn id(&self) -> String {
135 |         format!(
136 |             "#{} {}",
137 |             self.index,
138 |             self.device_id
139 |                 .board_name_amd()
140 |                 .unwrap_or_else(|_| self.device_id.name().unwrap_or_else(|_| "Unknown Device".into()))
141 |         )
142 |     }
143 | 
144 |     fn build(&self) -> Box<dyn Worker> {
145 |         Box::new(
146 |             OpenCLGPUWorker::new(
147 |                 self.device_id,
148 |                 self.workload,
149 |                 self.is_absolute,
150 |                 self.experimental_amd,
151 |                 self.use_amd_binary,
152 |                 &self.random,
153 |             )
154 |             .unwrap(),
155 |         )
156 |     }
157 | }
158 | 
159 | declare_plugin!(OpenCLPlugin, OpenCLPlugin::new, OpenCLOpt);
160 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use clap::ArgMatches;
  2 | use std::any::Any;
  3 | use std::error::Error as StdError;
  4 | 
  5 | pub mod xoshiro256starstar;
  6 | use libloading::{Library, Symbol};
  7 | 
  8 | pub type Error = Box<dyn StdError + Send + Sync + 'static>;
  9 | 
 10 | #[derive(Default)]
 11 | pub struct PluginManager {
 12 |     plugins: Vec<Box<dyn Plugin>>,
 13 |     loaded_libraries: Vec<Library>,
 14 | }
 15 | 
 16 | /**
 17 |  Plugin Manager class - allows inserting your own hashers
 18 |  Inspired by https://michael-f-bryan.github.io/rust-ffi-guide/dynamic_loading.html
 19 | */
 20 | impl PluginManager {
 21 |     pub fn new() -> Self {
 22 |         Self { plugins: Vec::new(), loaded_libraries: Vec::new() }
 23 |     }
 24 | 
 25 |     pub(crate) unsafe fn load_single_plugin<'help>(
 26 |         &mut self,
 27 |         app: clap::App<'help>,
 28 |         path: &str,
 29 |     ) -> Result<clap::App<'help>, (clap::App<'help>, Error)> {
 30 |         type PluginCreate<'help> =
 31 |             unsafe fn(*const clap::App<'help>) -> (*mut clap::App<'help>, *mut dyn Plugin, *mut Error);
 32 | 
 33 |         let lib = match Library::new(path) {
 34 |             Ok(l) => l,
 35 |             Err(e) => return Err((app, e.to_string().into())),
 36 |         };
 37 | 
 38 |         self.loaded_libraries.push(lib); // Save library so it persists in memory
 39 |         let lib = self.loaded_libraries.last().unwrap();
 40 | 
 41 |         let constructor: Symbol<PluginCreate> = match lib.get(b"_plugin_create") {
 42 |             Ok(cons) => cons,
 43 |             Err(e) => return Err((app, e.to_string().into())),
 44 |         };
 45 | 
 46 |         let (app, boxed_raw, error) = constructor(Box::into_raw(Box::new(app)));
 47 |         let app = *Box::from_raw(app);
 48 | 
 49 |         if boxed_raw.is_null() {
 50 |             return Err((app, *Box::from_raw(error)));
 51 |         }
 52 |         let plugin = Box::from_raw(boxed_raw);
 53 |         self.plugins.push(plugin);
 54 | 
 55 |         Ok(app)
 56 |     }
 57 | 
 58 |     pub fn build(&self) -> Result<Vec<Box<dyn WorkerSpec + 'static>>, Error> {
 59 |         let mut specs = Vec::<Box<dyn WorkerSpec + 'static>>::new();
 60 |         for plugin in &self.plugins {
 61 |             if plugin.enabled() {
 62 |                 specs.extend(plugin.get_worker_specs());
 63 |             }
 64 |         }
 65 |         Ok(specs)
 66 |     }
 67 | 
 68 |     /**
 69 |     Process the options for a plugin, and reports how many workers are available
 70 |     */
 71 |     pub fn process_options(&mut self, matchs: &ArgMatches) -> Result<usize, Error> {
 72 |         let mut count = 0usize;
 73 |         self.plugins.iter_mut().for_each(|plugin| {
 74 |             count += match plugin.process_option(matchs) {
 75 |                 Ok(n) => n,
 76 |                 Err(e) => {
 77 |                     eprintln!(
 78 |                         "WARNING: Failed processing options for {} (ignore if you do not intend to use): {}",
 79 |                         plugin.name(),
 80 |                         e
 81 |                     );
 82 |                     0
 83 |                 }
 84 |             }
 85 |         });
 86 |         Ok(count)
 87 |     }
 88 | 
 89 |     pub fn has_specs(&self) -> bool {
 90 |         !self.plugins.is_empty()
 91 |     }
 92 | }
 93 | 
 94 | pub trait Plugin: Any + Send + Sync {
 95 |     fn name(&self) -> &'static str;
 96 |     fn enabled(&self) -> bool;
 97 |     fn get_worker_specs(&self) -> Vec<Box<dyn WorkerSpec>>;
 98 |     fn process_option(&mut self, matchs: &ArgMatches) -> Result<usize, Error>;
 99 | }
100 | 
101 | pub trait WorkerSpec: Any + Send + Sync {
102 |     /*type_: GPUWorkType,
103 |     opencl_platform: u16,
104 |     device_id: u32,
105 |     workload: f32,
106 |     is_absolute: bool*/
107 |     fn id(&self) -> String;
108 |     fn build(&self) -> Box<dyn Worker>;
109 | }
110 | 
111 | pub trait Worker {
112 |     //fn new(device_id: u32, workload: f32, is_absolute: bool) -> Result<Self, Error>;
113 |     fn id(&self) -> String;
114 |     fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]);
115 | 
116 |     fn calculate_hash(&mut self, nonces: Option<&Vec<u64>>, nonce_mask: u64, nonce_fixed: u64);
117 |     fn sync(&self) -> Result<(), Error>;
118 | 
119 |     fn get_workload(&self) -> usize;
120 |     fn copy_output_to(&mut self, nonces: &mut Vec<u64>) -> Result<(), Error>;
121 | }
122 | 
123 | pub fn load_plugins<'help>(
124 |     app: clap::App<'help>,
125 |     paths: &[String],
126 | ) -> Result<(clap::App<'help>, PluginManager), Error> {
127 |     let mut factory = PluginManager::new();
128 |     let mut app = app;
129 |     for path in paths {
130 |         app = unsafe {
131 |             factory.load_single_plugin(app, path.as_str()).unwrap_or_else(|(app, e)| {
132 |                 eprintln!("WARNING: Failed loading plugin {} (ignore if you do not intend to use): {}", path, e);
133 |                 app
134 |             })
135 |         };
136 |     }
137 |     Ok((app, factory))
138 | }
139 | 
140 | #[macro_export]
141 | macro_rules! declare_plugin {
142 |     ($plugin_type:ty, $constructor:path, $args:ty) => {
143 |         use clap::Args;
144 |         #[no_mangle]
145 |         pub unsafe extern "C" fn _plugin_create(
146 |             app: *mut clap::App,
147 |         ) -> (*mut clap::App, *mut dyn $crate::Plugin, *const $crate::Error) {
148 |             // make sure the constructor is the correct type.
149 |             let constructor: fn() -> Result<$plugin_type, $crate::Error> = $constructor;
150 | 
151 |             let object = match constructor() {
152 |                 Ok(obj) => obj,
153 |                 Err(e) => {
154 |                     return (
155 |                         app,
156 |                         unsafe { std::mem::MaybeUninit::zeroed().assume_init() }, // Translates to null pointer
157 |                         Box::into_raw(Box::new(e)),
158 |                     );
159 |                 }
160 |             };
161 | 
162 |             let boxed: Box<dyn $crate::Plugin> = Box::new(object);
163 | 
164 |             let boxed_app = Box::new(<$args>::augment_args(unsafe { *Box::from_raw(app) }));
165 |             (Box::into_raw(boxed_app), Box::into_raw(boxed), std::ptr::null::<Error>())
166 |         }
167 |     };
168 | }
169 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Kaspa-miner
  2 | [![Build status](https://github.com/tmrlvi/kaspa-miner/workflows/ci/badge.svg)](https://github.com/tmrlvi/kaspa-miner/actions)
  3 | [![Latest version](https://img.shields.io/crates/v/kaspa-miner.svg)](https://crates.io/crates/kaspa-miner)
  4 | ![License](https://img.shields.io/crates/l/kaspa-miner.svg)
  5 | [![dependency status](https://deps.rs/repo/github/tmrlvi/kaspa-miner/status.svg)](https://deps.rs/repo/github/tmrlvi/kaspa-miner)
  6 | 
  7 | [![Discord](https://discordapp.com/api/guilds/599153230659846165/embed.png)](https://discord.gg/kS3SK5F36R)
  8 | [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/Kaspaenglish)
  9 | 
 10 | 
 11 | ## Installation
 12 | ### From Sources
 13 | Installing via `cargo install` is not supported for the latest version.
 14 | 
 15 | The regular version is still available at
 16 | ```sh
 17 | cargo install kaspa-miner
 18 | ```
 19 | 
 20 | ### From Git Sources
 21 | 
 22 | If you are looking to build from the repository (for debug / extension), note that the plugins are additional
 23 | packages in the workspace. To compile a specific package, you run the following command or any subset of it
 24 | 
 25 | ```sh
 26 | git clone git@github.com:tmrlvi/kaspa-miner.git
 27 | cd kaspa-miner
 28 | cargo build --release -p kaspa-miner -p kaspacuda -p kaspaopencl
 29 | ```
 30 | And, the miner (and plugins) will be in `targets/release`. You can replace the last line with
 31 | ```sh
 32 | cargo build --release --all
 33 | ```
 34 | 
 35 | ### From Binaries
 36 | The [release page](https://github.com/tmrlvi/kaspa-miner/releases) includes precompiled binaries for Linux, and Windows (for the GPU version).
 37 | 
 38 | ### Removing Plugins
 39 | To remove a plugin, you simply remove the corresponding `dll`/`so` for the directory of the miner. 
 40 | 
 41 | * `libkaspacuda.so`, `libkaspacuda.dll`: Cuda support for Kaspa-Miner
 42 | * `libkaspaopencl.so`, `libkaspaopencl.dll`: OpenCL support for Kaspa-Miner
 43 | 
 44 | # Usage
 45 | To start mining, you need to run [kaspad](https://github.com/kaspanet/kaspad) and have an address to send the rewards to.
 46 | Here is a guidance on how to run a full node and how to generate addresses: https://github.com/kaspanet/docs/blob/main/Getting%20Started/Full%20Node%20Installation.md
 47 | 
 48 | Help:
 49 | ```
 50 | kaspa-miner 
 51 | A Kaspa high performance CPU miner
 52 | 
 53 | USAGE:
 54 |     kaspa-miner [OPTIONS] --mining-address <MINING_ADDRESS>
 55 | 
 56 | OPTIONS:
 57 |     -a, --mining-address <MINING_ADDRESS>                  The Kaspa address for the miner reward
 58 |         --cuda-device <CUDA_DEVICE>                        Which CUDA GPUs to use [default: all]
 59 |         --cuda-disable                                     Disable cuda workers
 60 |         --cuda-lock-core-clocks <CUDA_LOCK_CORE_CLOCKS>    Lock core clocks eg: ,1200, [default: 0]
 61 |         --cuda-lock-mem-clocks <CUDA_LOCK_MEM_CLOCKS>      Lock mem clocks eg: ,810, [default: 0]
 62 |         --cuda-no-blocking-sync                            Actively wait for result. Higher CPU usage, but less red blocks. Can have lower workload.
 63 |         --cuda-power-limits <CUDA_POWER_LIMITS>            Lock power limits eg: ,150, [default: 0]
 64 |         --cuda-workload <CUDA_WORKLOAD>                    Ratio of nonces to GPU possible parrallel run [default: 64]
 65 |         --cuda-workload-absolute                           The values given by workload are not ratio, but absolute number of nonces [default: false]
 66 |     -d, --debug                                            Enable debug logging level
 67 |         --devfund-percent <DEVFUND_PERCENT>                The percentage of blocks to send to the devfund (minimum 2%) [default: 2]
 68 |         --experimental-amd                                 Uses SMID instructions in AMD. Miner will crash if instruction is not supported
 69 |     -h, --help                                             Print help information
 70 |         --mine-when-not-synced                             Mine even when kaspad says it is not synced
 71 |         --nonce-gen <NONCE_GEN>                            The random method used to generate nonces. Options: (i) xoshiro (ii) lean [default: lean]
 72 |         --opencl-amd-disable                               Disables AMD mining (does not override opencl-enable)
 73 |         --opencl-device <OPENCL_DEVICE>                    Which OpenCL GPUs to use on a specific platform
 74 |         --opencl-enable                                    Enable opencl, and take all devices of the chosen platform
 75 |         --opencl-no-amd-binary                             Disable fetching of precompiled AMD kernel (if exists)
 76 |         --opencl-platform <OPENCL_PLATFORM>                Which OpenCL platform to use (limited to one per executable)
 77 |         --opencl-workload <OPENCL_WORKLOAD>                Ratio of nonces to GPU possible parrallel run in OpenCL [default: 512]
 78 |         --opencl-workload-absolute                         The values given by workload are not ratio, but absolute number of nonces in OpenCL [default: false]
 79 |     -p, --port <PORT>                                      Kaspad port [default: Mainnet = 16110, Testnet = 16211]
 80 |     -s, --kaspad-address <KASPAD_ADDRESS>                  The IP of the kaspad instance [default: 127.0.0.1]
 81 |     -t, --threads <NUM_THREADS>                            Amount of CPU miner threads to launch [default: 0]
 82 |         --testnet                                          Use testnet instead of mainnet [default: false]
 83 | ```
 84 | 
 85 | To start mining, you just need to run the following:
 86 | 
 87 | `./kaspa-miner --mining-address kaspa:XXXXX`
 88 | 
 89 | This will run the miner on all the available GPU devcies.
 90 | 
 91 | # Devfund
 92 | 
 93 | The devfund is a fund managed by the Kaspa community in order to fund Kaspa development <br>
 94 | A miner that wants to mine higher percentage into the dev-fund can pass the following flags: <br>
 95 | `--devfund-precent=XX.YY` to mine only XX.YY% of the blocks into the devfund.
 96 | 
 97 | **This version automatically sets the devfund donation to the community designated address. 
 98 | Due to community decision, the minimum amount in the precompiled binaries is 2%**
 99 | 
100 | # Donation Addresses
101 | 
102 | **Elichai**: `kaspa:qzvqtx5gkvl3tc54up6r8pk5mhuft9rtr0lvn624w9mtv4eqm9rvc9zfdmmpu`
103 | 
104 | **HauntedCook**: `kaspa:qz4jdyu04hv4hpyy00pl6trzw4gllnhnwy62xattejv2vaj5r0p5quvns058f`
105 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(all(test, feature = "bench"), feature(test))]
  2 | 
  3 | use std::env::consts::DLL_EXTENSION;
  4 | use std::env::current_exe;
  5 | use std::error::Error as StdError;
  6 | use std::ffi::OsStr;
  7 | 
  8 | use clap::{App, FromArgMatches, IntoApp};
  9 | use kaspa_miner::PluginManager;
 10 | use log::{error, info};
 11 | use rand::{thread_rng, RngCore};
 12 | use std::fs;
 13 | use std::sync::atomic::AtomicU16;
 14 | use std::sync::Arc;
 15 | use std::thread::sleep;
 16 | use std::time::Duration;
 17 | 
 18 | use crate::cli::Opt;
 19 | use crate::client::grpc::KaspadHandler;
 20 | use crate::client::stratum::StratumHandler;
 21 | use crate::client::Client;
 22 | use crate::miner::MinerManager;
 23 | use crate::target::Uint256;
 24 | 
 25 | mod cli;
 26 | mod client;
 27 | mod kaspad_messages;
 28 | mod miner;
 29 | mod pow;
 30 | mod target;
 31 | mod watch;
 32 | 
 33 | const WHITELIST: [&str; 4] = ["libkaspacuda", "libkaspaopencl", "kaspacuda", "kaspaopencl"];
 34 | 
 35 | pub mod proto {
 36 |     #![allow(clippy::derive_partial_eq_without_eq)]
 37 |     tonic::include_proto!("protowire");
 38 |     // include!("protowire.rs"); // FIXME: https://github.com/intellij-rust/intellij-rust/issues/6579
 39 | }
 40 | 
 41 | pub type Error = Box<dyn StdError + Send + Sync + 'static>;
 42 | 
 43 | type Hash = Uint256;
 44 | 
 45 | #[cfg(target_os = "windows")]
 46 | fn adjust_console() -> Result<(), Error> {
 47 |     let console = win32console::console::WinConsole::input();
 48 |     let mut mode = console.get_mode()?;
 49 |     mode = (mode & !win32console::console::ConsoleMode::ENABLE_QUICK_EDIT_MODE)
 50 |         | win32console::console::ConsoleMode::ENABLE_EXTENDED_FLAGS;
 51 |     console.set_mode(mode)?;
 52 |     Ok(())
 53 | }
 54 | 
 55 | fn filter_plugins(dirname: &str) -> Vec<String> {
 56 |     match fs::read_dir(dirname) {
 57 |         Ok(readdir) => readdir
 58 |             .map(|entry| entry.unwrap().path())
 59 |             .filter(|fname| {
 60 |                 fname.is_file()
 61 |                     && fname.extension().is_some()
 62 |                     && fname.extension().and_then(OsStr::to_str).unwrap_or_default().starts_with(DLL_EXTENSION)
 63 |             })
 64 |             .filter(|fname| WHITELIST.iter().any(|lib| *lib == fname.file_stem().and_then(OsStr::to_str).unwrap()))
 65 |             .map(|path| path.to_str().unwrap().to_string())
 66 |             .collect::<Vec<String>>(),
 67 |         _ => Vec::<String>::new(),
 68 |     }
 69 | }
 70 | 
 71 | async fn get_client(
 72 |     kaspad_address: String,
 73 |     mining_address: String,
 74 |     mine_when_not_synced: bool,
 75 |     block_template_ctr: Arc<AtomicU16>,
 76 | ) -> Result<Box<dyn Client + 'static>, Error> {
 77 |     if kaspad_address.starts_with("stratum+tcp://") {
 78 |         let (_schema, address) = kaspad_address.split_once("://").unwrap();
 79 |         Ok(StratumHandler::connect(
 80 |             address.to_string().clone(),
 81 |             mining_address.clone(),
 82 |             mine_when_not_synced,
 83 |             Some(block_template_ctr.clone()),
 84 |         )
 85 |         .await?)
 86 |     } else if kaspad_address.starts_with("grpc://") {
 87 |         Ok(KaspadHandler::connect(
 88 |             kaspad_address.clone(),
 89 |             mining_address.clone(),
 90 |             mine_when_not_synced,
 91 |             Some(block_template_ctr.clone()),
 92 |         )
 93 |         .await?)
 94 |     } else {
 95 |         Err("Did not recognize pool/grpc address schema".into())
 96 |     }
 97 | }
 98 | 
 99 | async fn client_main(
100 |     opt: &Opt,
101 |     block_template_ctr: Arc<AtomicU16>,
102 |     plugin_manager: &PluginManager,
103 | ) -> Result<(), Error> {
104 |     let mut client = get_client(
105 |         opt.kaspad_address.clone(),
106 |         opt.mining_address.clone(),
107 |         opt.mine_when_not_synced,
108 |         block_template_ctr.clone(),
109 |     )
110 |     .await?;
111 | 
112 |     if opt.devfund_percent > 0 {
113 |         client.add_devfund(opt.devfund_address.clone(), opt.devfund_percent);
114 |     }
115 |     client.register().await?;
116 |     let mut miner_manager = MinerManager::new(client.get_block_channel(), opt.num_threads, plugin_manager);
117 |     client.listen(&mut miner_manager).await?;
118 |     drop(miner_manager);
119 |     Ok(())
120 | }
121 | 
122 | #[tokio::main]
123 | async fn main() -> Result<(), Error> {
124 |     #[cfg(target_os = "windows")]
125 |     adjust_console().unwrap_or_else(|e| {
126 |         eprintln!("WARNING: Failed to protect console ({}). Any selection in console will freeze the miner.", e)
127 |     });
128 |     let mut path = current_exe().unwrap_or_default();
129 |     path.pop(); // Getting the parent directory
130 |     let plugins = filter_plugins(path.to_str().unwrap_or("."));
131 |     let (app, mut plugin_manager): (App, PluginManager) = kaspa_miner::load_plugins(Opt::into_app(), &plugins)?;
132 | 
133 |     let matches = app.get_matches();
134 | 
135 |     let worker_count = plugin_manager.process_options(&matches)?;
136 |     let mut opt: Opt = Opt::from_arg_matches(&matches)?;
137 |     opt.process()?;
138 |     env_logger::builder().filter_level(opt.log_level()).parse_default_env().init();
139 |     info!("=================================================================================");
140 |     info!("                 Kaspa-Miner GPU {}", env!("CARGO_PKG_VERSION"));
141 |     info!(" Mining for: {}", opt.mining_address);
142 |     info!("=================================================================================");
143 |     info!("Found plugins: {:?}", plugins);
144 |     info!("Plugins found {} workers", worker_count);
145 |     if worker_count == 0 && opt.num_threads.unwrap_or(0) == 0 {
146 |         error!("No workers specified");
147 |         return Err("No workers specified".into());
148 |     }
149 | 
150 |     let block_template_ctr = Arc::new(AtomicU16::new((thread_rng().next_u64() % 10_000u64) as u16));
151 |     if opt.devfund_percent > 0 {
152 |         info!(
153 |             "devfund enabled, mining {}.{}% of the time to devfund address: {} ",
154 |             opt.devfund_percent / 100,
155 |             opt.devfund_percent % 100,
156 |             opt.devfund_address
157 |         );
158 |     }
159 |     loop {
160 |         match client_main(&opt, block_template_ctr.clone(), &plugin_manager).await {
161 |             Ok(_) => info!("Client closed gracefully"),
162 |             Err(e) => error!("Client closed with error {:?}", e),
163 |         }
164 |         info!("Client closed, reconnecting");
165 |         sleep(Duration::from_millis(100));
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/pow/hasher.rs:
--------------------------------------------------------------------------------
  1 | use crate::Hash;
  2 | use blake2b_simd::State as Blake2bState;
  3 | 
  4 | const BLOCK_HASH_DOMAIN: &[u8] = b"BlockHash";
  5 | 
  6 | #[derive(Clone, Copy)]
  7 | pub(super) struct PowHasher([u64; 25]);
  8 | 
  9 | #[derive(Clone, Copy)]
 10 | pub(super) struct HeavyHasher;
 11 | 
 12 | #[derive(Clone)]
 13 | pub struct HeaderHasher(Blake2bState);
 14 | 
 15 | impl PowHasher {
 16 |     // The initial state of `cSHAKE256("ProofOfWorkHash")`
 17 |     // [10] -> 1123092876221303310 ^ 0x04(padding byte) = 1123092876221303306
 18 |     // [16] -> 10306167911662716186 ^ 0x8000000000000000(final padding) = 1082795874807940378
 19 |     #[rustfmt::skip]
 20 |     const INITIAL_STATE: [u64; 25] = [
 21 |         1242148031264380989, 3008272977830772284, 2188519011337848018, 1992179434288343456, 8876506674959887717,
 22 |         5399642050693751366, 1745875063082670864, 8605242046444978844, 17936695144567157056, 3343109343542796272,
 23 |         1123092876221303306, 4963925045340115282, 17037383077651887893, 16629644495023626889, 12833675776649114147,
 24 |         3784524041015224902, 1082795874807940378, 13952716920571277634, 13411128033953605860, 15060696040649351053,
 25 |         9928834659948351306, 5237849264682708699, 12825353012139217522, 6706187291358897596, 196324915476054915,
 26 |     ];
 27 |     #[inline(always)]
 28 |     pub(super) fn new(pre_pow_hash: Hash, timestamp: u64) -> Self {
 29 |         let mut start = Self::INITIAL_STATE;
 30 |         for (&pre_pow_word, state_word) in pre_pow_hash.0.iter().zip(start.iter_mut()) {
 31 |             *state_word ^= pre_pow_word;
 32 |         }
 33 |         start[4] ^= timestamp;
 34 |         Self(start)
 35 |     }
 36 | 
 37 |     #[inline(always)]
 38 |     pub(super) fn finalize_with_nonce(mut self, nonce: u64) -> Hash {
 39 |         self.0[9] ^= nonce;
 40 |         super::keccak::f1600(&mut self.0);
 41 |         Hash::new(self.0[..4].try_into().unwrap())
 42 |     }
 43 | }
 44 | 
 45 | impl HeavyHasher {
 46 |     // The initial state of `cSHAKE256("ProofOfWorkHash")`
 47 |     // [4] -> 16654558671554924254 ^ 0x04(padding byte) = 16654558671554924250
 48 |     // [16] -> 9793466274154320918 ^ 0x8000000000000000(final padding) = 570094237299545110
 49 |     #[rustfmt::skip]
 50 |     const INITIAL_STATE: [u64; 25] = [
 51 |         4239941492252378377, 8746723911537738262, 8796936657246353646, 1272090201925444760, 16654558671554924250,
 52 |         8270816933120786537, 13907396207649043898, 6782861118970774626, 9239690602118867528, 11582319943599406348,
 53 |         17596056728278508070, 15212962468105129023, 7812475424661425213, 3370482334374859748, 5690099369266491460,
 54 |         8596393687355028144, 570094237299545110, 9119540418498120711, 16901969272480492857, 13372017233735502424,
 55 |         14372891883993151831, 5171152063242093102, 10573107899694386186, 6096431547456407061, 1592359455985097269,
 56 |     ];
 57 |     #[inline(always)]
 58 |     pub(super) fn hash(in_hash: Hash) -> Hash {
 59 |         let mut state = Self::INITIAL_STATE;
 60 |         for (&pre_pow_word, state_word) in in_hash.0.iter().zip(state.iter_mut()) {
 61 |             *state_word ^= pre_pow_word;
 62 |         }
 63 |         super::keccak::f1600(&mut state);
 64 |         Hash::new(state[..4].try_into().unwrap())
 65 |     }
 66 | }
 67 | 
 68 | impl HeaderHasher {
 69 |     #[inline(always)]
 70 |     pub fn new() -> Self {
 71 |         Self(blake2b_simd::Params::new().hash_length(32).key(BLOCK_HASH_DOMAIN).to_state())
 72 |     }
 73 | 
 74 |     pub fn write<A: AsRef<[u8]>>(&mut self, data: A) {
 75 |         self.0.update(data.as_ref());
 76 |     }
 77 | 
 78 |     #[inline(always)]
 79 |     pub fn finalize(self) -> Hash {
 80 |         Hash::from_le_bytes(self.0.finalize().as_bytes().try_into().expect("this is 32 bytes"))
 81 |     }
 82 | }
 83 | 
 84 | pub trait Hasher {
 85 |     fn update<A: AsRef<[u8]>>(&mut self, data: A) -> &mut Self;
 86 | }
 87 | 
 88 | impl Hasher for HeaderHasher {
 89 |     fn update<A: AsRef<[u8]>>(&mut self, data: A) -> &mut Self {
 90 |         self.write(data);
 91 |         self
 92 |     }
 93 | }
 94 | 
 95 | #[cfg(test)]
 96 | mod tests {
 97 |     use crate::pow::hasher::{HeavyHasher, PowHasher};
 98 |     use crate::Hash;
 99 |     use sha3::digest::{ExtendableOutput, Update, XofReader};
100 |     use sha3::CShake256;
101 | 
102 |     const PROOF_OF_WORK_DOMAIN: &[u8] = b"ProofOfWorkHash";
103 |     const HEAVY_HASH_DOMAIN: &[u8] = b"HeavyHash";
104 | 
105 |     #[test]
106 |     fn test_pow_hash() {
107 |         let timestamp: u64 = 5435345234;
108 |         let nonce: u64 = 432432432;
109 |         let pre_pow_hash = Hash::from_le_bytes([42; 32]);
110 |         let hasher = PowHasher::new(pre_pow_hash, timestamp);
111 |         let hash1 = hasher.finalize_with_nonce(nonce);
112 | 
113 |         let hasher = CShake256::new(PROOF_OF_WORK_DOMAIN)
114 |             .chain(pre_pow_hash.to_le_bytes())
115 |             .chain(timestamp.to_le_bytes())
116 |             .chain([0u8; 32])
117 |             .chain(nonce.to_le_bytes());
118 |         let mut hash2 = [0u8; 32];
119 |         hasher.finalize_xof().read(&mut hash2);
120 |         assert_eq!(Hash::from_le_bytes(hash2), hash1);
121 |     }
122 | 
123 |     #[test]
124 |     fn test_heavy_hash() {
125 |         let val = Hash::from_le_bytes([42; 32]);
126 |         let hash1 = HeavyHasher::hash(val);
127 | 
128 |         let hasher = CShake256::new(HEAVY_HASH_DOMAIN).chain(val.to_le_bytes());
129 |         let mut hash2 = [0u8; 32];
130 |         hasher.finalize_xof().read(&mut hash2);
131 |         assert_eq!(Hash::from_le_bytes(hash2), hash1);
132 |     }
133 | }
134 | 
135 | #[cfg(all(test, feature = "bench"))]
136 | mod benches {
137 |     extern crate test;
138 | 
139 |     use self::test::{black_box, Bencher};
140 |     use super::{HeavyHasher, PowHasher};
141 |     use crate::Hash;
142 | 
143 |     #[bench]
144 |     pub fn bench_pow_hash(bh: &mut Bencher) {
145 |         let timestamp: u64 = 5435345234;
146 |         let mut nonce: u64 = 432432432;
147 |         let pre_pow_hash = Hash::from_le_bytes([42; 32]);
148 |         let mut hasher = PowHasher::new(pre_pow_hash, timestamp);
149 | 
150 |         bh.iter(|| {
151 |             for _ in 0..100 {
152 |                 black_box(&mut hasher);
153 |                 black_box(&mut nonce);
154 |                 black_box(hasher.finalize_with_nonce(nonce));
155 |             }
156 |         });
157 |     }
158 | 
159 |     #[bench]
160 |     pub fn bench_heavy_hash(bh: &mut Bencher) {
161 |         let mut data = Hash::from_le_bytes([42; 32]);
162 |         bh.iter(|| {
163 |             for _ in 0..100 {
164 |                 black_box(&mut data);
165 |                 black_box(HeavyHasher::hash(data));
166 |             }
167 |         });
168 |     }
169 | }
170 | 


--------------------------------------------------------------------------------
/proto/p2p.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | package protowire;
  3 | 
  4 | option go_package = "github.com/kaspanet/kaspad/protowire";
  5 | 
  6 | message RequestAddressesMessage{
  7 |   bool includeAllSubnetworks = 1;
  8 |   SubnetworkId subnetworkId = 2;
  9 | }
 10 | 
 11 | message AddressesMessage{
 12 |   repeated NetAddress addressList = 1;
 13 | }
 14 | 
 15 | message NetAddress{
 16 |   int64 timestamp = 1;
 17 |   bytes ip = 3;
 18 |   uint32 port = 4;
 19 | }
 20 | 
 21 | message SubnetworkId{
 22 |   bytes bytes = 1;
 23 | }
 24 | 
 25 | message TransactionMessage{
 26 |   uint32 version = 1;
 27 |   repeated TransactionInput inputs = 2;
 28 |   repeated TransactionOutput outputs = 3;
 29 |   uint64 lockTime = 4;
 30 |   SubnetworkId subnetworkId = 5;
 31 |   uint64 gas = 6;
 32 |   bytes payload = 8;
 33 | }
 34 | 
 35 | message TransactionInput{
 36 |   Outpoint previousOutpoint = 1;
 37 |   bytes signatureScript = 2;
 38 |   uint64 sequence = 3;
 39 |   uint32 sigOpCount = 4;
 40 | }
 41 | 
 42 | message Outpoint{
 43 |   TransactionId transactionId = 1;
 44 |   uint32 index = 2;
 45 | }
 46 | 
 47 | message TransactionId{
 48 |   bytes bytes = 1;
 49 | }
 50 | message ScriptPublicKey {
 51 |   bytes script = 1;
 52 |   uint32 version = 2;
 53 | }
 54 | 
 55 | message TransactionOutput{
 56 |   uint64 value = 1;
 57 |   ScriptPublicKey scriptPublicKey = 2;
 58 | }
 59 | 
 60 | message BlockMessage{
 61 |   BlockHeader header = 1;
 62 |   repeated TransactionMessage transactions = 2;
 63 | }
 64 | 
 65 | message BlockHeader{
 66 |   uint32 version = 1;
 67 |   repeated BlockLevelParents parents = 12;
 68 |   Hash hashMerkleRoot = 3;
 69 |   Hash acceptedIdMerkleRoot = 4;
 70 |   Hash utxoCommitment = 5;
 71 |   int64 timestamp = 6;
 72 |   uint32 bits = 7;
 73 |   uint64 nonce = 8;
 74 |   uint64 daaScore = 9;
 75 |   bytes blueWork = 10;
 76 |   Hash pruningPoint = 14;
 77 |   uint64 blueScore = 13;
 78 | }
 79 | 
 80 | message BlockLevelParents {
 81 |   repeated Hash parentHashes = 1;
 82 | }
 83 | 
 84 | message Hash{
 85 |   bytes bytes = 1;
 86 | }
 87 | 
 88 | message RequestBlockLocatorMessage{
 89 |   Hash highHash = 1;
 90 |   uint32 limit = 2;
 91 | }
 92 | 
 93 | message BlockLocatorMessage{
 94 |   repeated Hash hashes = 1;
 95 | }
 96 | 
 97 | message RequestHeadersMessage{
 98 |   Hash lowHash = 1;
 99 |   Hash highHash = 2;
100 | }
101 | 
102 | message RequestNextHeadersMessage{
103 | }
104 | 
105 | message DoneHeadersMessage{
106 | }
107 | 
108 | message RequestRelayBlocksMessage{
109 |   repeated Hash hashes = 1;
110 | }
111 | 
112 | message RequestTransactionsMessage {
113 |   repeated TransactionId ids = 1;
114 | }
115 | 
116 | message TransactionNotFoundMessage{
117 |   TransactionId id = 1;
118 | }
119 | 
120 | message InvRelayBlockMessage{
121 |   Hash hash = 1;
122 | }
123 | 
124 | message InvTransactionsMessage{
125 |   repeated TransactionId ids = 1;
126 | }
127 | 
128 | message PingMessage{
129 |   uint64 nonce = 1;
130 | }
131 | 
132 | message PongMessage{
133 |   uint64 nonce = 1;
134 | }
135 | 
136 | message VerackMessage{
137 | }
138 | 
139 | message VersionMessage{
140 |   uint32 protocolVersion = 1;
141 |   uint64 services = 2;
142 |   int64 timestamp = 3;
143 |   NetAddress address = 4;
144 |   bytes id = 5;
145 |   string userAgent = 6;
146 |   bool disableRelayTx = 8;
147 |   SubnetworkId subnetworkId = 9;
148 |   string network = 10;
149 | }
150 | 
151 | message RejectMessage{
152 |   string reason = 1;
153 | }
154 | 
155 | message RequestPruningPointUTXOSetMessage{
156 |   Hash pruningPointHash = 1;
157 | }
158 | 
159 | message PruningPointUtxoSetChunkMessage{
160 |   repeated OutpointAndUtxoEntryPair outpointAndUtxoEntryPairs = 1;
161 | }
162 | 
163 | message OutpointAndUtxoEntryPair{
164 |   Outpoint outpoint = 1;
165 |   UtxoEntry utxoEntry = 2;
166 | }
167 | 
168 | message UtxoEntry {
169 |   uint64 amount = 1;
170 |   ScriptPublicKey scriptPublicKey = 2;
171 |   uint64 blockDaaScore = 3;
172 |   bool isCoinbase = 4;
173 | }
174 | 
175 | message RequestNextPruningPointUtxoSetChunkMessage {
176 | }
177 | 
178 | message DonePruningPointUtxoSetChunksMessage {
179 | }
180 | 
181 | message RequestIBDBlocksMessage{
182 |   repeated Hash hashes = 1;
183 | }
184 | 
185 | message UnexpectedPruningPointMessage{
186 | }
187 | 
188 | message IbdBlockLocatorMessage {
189 |   Hash targetHash = 1;
190 |   repeated Hash blockLocatorHashes = 2;
191 | }
192 | 
193 | message RequestIBDChainBlockLocatorMessage{
194 |   Hash lowHash = 1;
195 |   Hash highHash = 2;
196 | }
197 | 
198 | message IbdChainBlockLocatorMessage {
199 |   repeated Hash blockLocatorHashes = 1;
200 | }
201 | 
202 | message RequestAnticoneMessage{
203 |   Hash blockHash = 1;
204 |   Hash contextHash = 2;
205 | }
206 | 
207 | message IbdBlockLocatorHighestHashMessage {
208 |   Hash highestHash = 1;
209 | }
210 | 
211 | message IbdBlockLocatorHighestHashNotFoundMessage {
212 | }
213 | 
214 | message BlockHeadersMessage {
215 |   repeated BlockHeader blockHeaders = 1;
216 | }
217 | 
218 | message RequestPruningPointAndItsAnticoneMessage {
219 | }
220 | 
221 | message RequestNextPruningPointAndItsAnticoneBlocksMessage{
222 | }
223 | 
224 | message BlockWithTrustedDataMessage {
225 |   BlockMessage block = 1;
226 |   uint64 daaScore = 2;
227 |   repeated DaaBlock daaWindow = 3;
228 |   repeated BlockGhostdagDataHashPair ghostdagData = 4;
229 | }
230 | 
231 | message DaaBlock {
232 |   BlockMessage block = 3;
233 |   GhostdagData ghostdagData = 2;
234 | }
235 | 
236 | message DaaBlockV4 {
237 |   BlockHeader header = 1;
238 |   GhostdagData ghostdagData = 2;
239 | }
240 | 
241 | message BlockGhostdagDataHashPair {
242 |   Hash hash = 1;
243 |   GhostdagData ghostdagData = 2;
244 | }
245 | 
246 | message GhostdagData {
247 |   uint64 blueScore = 1;
248 |   bytes blueWork = 2;
249 |   Hash selectedParent = 3;
250 |   repeated Hash mergeSetBlues = 4;
251 |   repeated Hash mergeSetReds = 5;
252 |   repeated BluesAnticoneSizes bluesAnticoneSizes = 6;
253 | }
254 | 
255 | message BluesAnticoneSizes {
256 |   Hash blueHash = 1;
257 |   uint32 anticoneSize = 2;
258 | }
259 | 
260 | message DoneBlocksWithTrustedDataMessage {
261 | }
262 | 
263 | message PruningPointsMessage {
264 |   repeated BlockHeader headers = 1;
265 | }
266 | 
267 | message RequestPruningPointProofMessage {
268 | }
269 | 
270 | message PruningPointProofMessage {
271 |   repeated PruningPointProofHeaderArray headers = 1;
272 | }
273 | 
274 | message PruningPointProofHeaderArray {
275 |   repeated BlockHeader headers = 1;
276 | }
277 | 
278 | message ReadyMessage {
279 | }
280 | 
281 | message BlockWithTrustedDataV4Message {
282 |   BlockMessage block = 1;
283 |   repeated uint64 daaWindowIndices = 2;
284 |   repeated uint64 ghostdagDataIndices = 3;
285 | }
286 | 
287 | message TrustedDataMessage {
288 |   repeated DaaBlockV4 daaWindow = 1;
289 |   repeated BlockGhostdagDataHashPair ghostdagData = 2;
290 | }
291 | 


--------------------------------------------------------------------------------
/plugins/cuda/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate kaspa_miner;
  3 | 
  4 | use clap::{ArgMatches, FromArgMatches};
  5 | use cust::prelude::*;
  6 | use kaspa_miner::{Plugin, Worker, WorkerSpec};
  7 | use log::LevelFilter;
  8 | use std::error::Error as StdError;
  9 | #[cfg(feature = "overclock")]
 10 | use {
 11 |     log::{error, info},
 12 |     nvml_wrapper::Device as NvmlDevice,
 13 |     nvml_wrapper::Nvml,
 14 | };
 15 | 
 16 | pub type Error = Box<dyn StdError + Send + Sync + 'static>;
 17 | 
 18 | mod cli;
 19 | mod worker;
 20 | 
 21 | use crate::cli::{CudaOpt, NonceGenEnum};
 22 | use crate::worker::CudaGPUWorker;
 23 | 
 24 | const DEFAULT_WORKLOAD_SCALE: f32 = 1024.;
 25 | 
 26 | pub struct CudaPlugin {
 27 |     specs: Vec<CudaWorkerSpec>,
 28 |     #[cfg(feature = "overclock")]
 29 |     nvml_instance: Nvml,
 30 |     _enabled: bool,
 31 | }
 32 | 
 33 | impl CudaPlugin {
 34 |     fn new() -> Result<Self, Error> {
 35 |         cust::init(CudaFlags::empty())?;
 36 |         env_logger::builder().filter_level(LevelFilter::Info).parse_default_env().init();
 37 |         Ok(Self {
 38 |             specs: Vec::new(),
 39 |             _enabled: false,
 40 |             #[cfg(feature = "overclock")]
 41 |             nvml_instance: Nvml::init()?,
 42 |         })
 43 |     }
 44 | }
 45 | 
 46 | impl Plugin for CudaPlugin {
 47 |     fn name(&self) -> &'static str {
 48 |         "CUDA Worker"
 49 |     }
 50 | 
 51 |     fn enabled(&self) -> bool {
 52 |         self._enabled
 53 |     }
 54 | 
 55 |     fn get_worker_specs(&self) -> Vec<Box<dyn WorkerSpec>> {
 56 |         self.specs.iter().map(|spec| Box::new(*spec) as Box<dyn WorkerSpec>).collect::<Vec<Box<dyn WorkerSpec>>>()
 57 |     }
 58 | 
 59 |     //noinspection RsTypeCheck
 60 |     fn process_option(&mut self, matches: &ArgMatches) -> Result<usize, kaspa_miner::Error> {
 61 |         let opts: CudaOpt = CudaOpt::from_arg_matches(matches)?;
 62 | 
 63 |         self._enabled = !opts.cuda_disable;
 64 |         if self._enabled {
 65 |             let gpus: Vec<u16> = match &opts.cuda_device {
 66 |                 Some(devices) => devices.clone(),
 67 |                 None => {
 68 |                     let gpu_count = Device::num_devices().unwrap() as u16;
 69 |                     (0..gpu_count).collect()
 70 |                 }
 71 |             };
 72 | 
 73 |             // if any of cuda_lock_core_clocks / cuda_lock_mem_clocks / cuda_power_limit is valid, init nvml and try to apply
 74 |             #[cfg(feature = "overclock")]
 75 |             if opts.overclock.cuda_lock_core_clocks.is_some()
 76 |                 || opts.overclock.cuda_lock_mem_clocks.is_some()
 77 |                 || opts.overclock.cuda_power_limits.is_some()
 78 |             {
 79 |                 for i in 0..gpus.len() {
 80 |                     let lock_mem_clock: Option<u32> = match &opts.overclock.cuda_lock_mem_clocks {
 81 |                         Some(mem_clocks) if i < mem_clocks.len() => Some(mem_clocks[i]),
 82 |                         Some(mem_clocks) if !mem_clocks.is_empty() => Some(*mem_clocks.last().unwrap()),
 83 |                         _ => None,
 84 |                     };
 85 | 
 86 |                     let lock_core_clock: Option<u32> = match &opts.overclock.cuda_lock_core_clocks {
 87 |                         Some(core_clocks) if i < core_clocks.len() => Some(core_clocks[i]),
 88 |                         Some(core_clocks) if !core_clocks.is_empty() => Some(*core_clocks.last().unwrap()),
 89 |                         _ => None,
 90 |                     };
 91 | 
 92 |                     let power_limit: Option<u32> = match &opts.overclock.cuda_power_limits {
 93 |                         Some(power_limits) if i < power_limits.len() => Some(power_limits[i]),
 94 |                         Some(power_limits) if !power_limits.is_empty() => Some(*power_limits.last().unwrap()),
 95 |                         _ => None,
 96 |                     };
 97 | 
 98 |                     let mut nvml_device: NvmlDevice = self.nvml_instance.device_by_index(gpus[i] as u32)?;
 99 | 
100 |                     if let Some(lmc) = lock_mem_clock {
101 |                         match nvml_device.set_mem_locked_clocks(lmc, lmc) {
102 |                             Err(e) => error!("set mem locked clocks {:?}", e),
103 |                             _ => info!("GPU #{} #{} lock mem clock at {} Mhz", i, &nvml_device.name()?, &lmc),
104 |                         };
105 |                     }
106 | 
107 |                     if let Some(lcc) = lock_core_clock {
108 |                         match nvml_device.set_gpu_locked_clocks(lcc, lcc) {
109 |                             Err(e) => error!("set gpu locked clocks {:?}", e),
110 |                             _ => info!("GPU #{} #{} lock core clock at {} Mhz", i, &nvml_device.name()?, &lcc),
111 |                         };
112 |                     };
113 | 
114 |                     if let Some(pl) = power_limit {
115 |                         match nvml_device.set_power_management_limit(pl * 1000) {
116 |                             Err(e) => error!("set power limit {:?}", e),
117 |                             _ => info!("GPU #{} #{} power limit at {} W", i, &nvml_device.name()?, &pl),
118 |                         };
119 |                     };
120 |                 }
121 |             }
122 | 
123 |             self.specs = (0..gpus.len())
124 |                 .map(|i| CudaWorkerSpec {
125 |                     device_id: gpus[i] as u32,
126 |                     workload: match &opts.cuda_workload {
127 |                         Some(workload) if i < workload.len() => workload[i],
128 |                         Some(workload) if !workload.is_empty() => *workload.last().unwrap(),
129 |                         _ => DEFAULT_WORKLOAD_SCALE,
130 |                     },
131 |                     is_absolute: opts.cuda_workload_absolute,
132 |                     blocking_sync: !opts.cuda_no_blocking_sync,
133 |                     random: opts.cuda_nonce_gen,
134 |                 })
135 |                 .collect();
136 |         }
137 |         Ok(self.specs.len())
138 |     }
139 | }
140 | 
141 | #[derive(Copy, Clone)]
142 | struct CudaWorkerSpec {
143 |     device_id: u32,
144 |     workload: f32,
145 |     is_absolute: bool,
146 |     blocking_sync: bool,
147 |     random: NonceGenEnum,
148 | }
149 | 
150 | impl WorkerSpec for CudaWorkerSpec {
151 |     fn id(&self) -> String {
152 |         let device = Device::get_device(self.device_id).unwrap();
153 |         format!("#{} ({})", self.device_id, device.name().unwrap())
154 |     }
155 | 
156 |     fn build(&self) -> Box<dyn Worker> {
157 |         Box::new(
158 |             CudaGPUWorker::new(self.device_id, self.workload, self.is_absolute, self.blocking_sync, self.random)
159 |                 .unwrap(),
160 |         )
161 |     }
162 | }
163 | 
164 | declare_plugin!(CudaPlugin, CudaPlugin::new, CudaOpt);
165 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yaml:
--------------------------------------------------------------------------------
  1 | name: Build and upload assets
  2 | on:
  3 |   release:
  4 |     types: [ published ]
  5 | 
  6 | jobs:
  7 |   build:
  8 |     runs-on: ${{ matrix.os }}
  9 |     strategy:
 10 |       fail-fast: false
 11 |       matrix:
 12 |         # Build gnu-linux on ubuntu-18.04 and musl on ubuntu latest
 13 |         os: [ ubuntu-18.04, windows-latest, macos-latest ]
 14 |         features: ["default", "kaspacuda/overclock "]
 15 |     name: Building, ${{ matrix.os }} ${{ matrix.features }}
 16 |     steps:
 17 |       - name: Fix CRLF on Windows
 18 |         if: runner.os == 'Windows'
 19 |         run: git config --global core.autocrlf false
 20 | 
 21 |       - name: Fix LibOpenCL on Linux
 22 |         if: runner.os == 'Linux'
 23 |         run: |
 24 |           sudo apt update
 25 |           sudo apt install ocl-icd-opencl-dev -y
 26 | 
 27 |       - name: Check out code into the Go module directory
 28 |         uses: actions/checkout@v2
 29 | 
 30 |       - name: Install Protoc
 31 |         uses: arduino/setup-protoc@v1
 32 | 
 33 |       - name: Setup Rust
 34 |         uses: actions-rs/toolchain@v1
 35 |         with:
 36 |           profile: minimal
 37 |           toolchain: stable
 38 |           override: true
 39 | 
 40 |       - name: Install CUDA Linux
 41 |         if: runner.os == 'Linux'
 42 |         uses: Jimver/cuda-toolkit@v0.2.8
 43 |         with:
 44 |           cuda: '11.2.2'
 45 |           method: 'network'
 46 |           sub-packages: '["nvcc", "cudart"]'
 47 | 
 48 |       - name: Install CUDA Windows
 49 |         if: runner.os == 'Windows'
 50 |         uses: Jimver/cuda-toolkit@v0.2.8
 51 |         with:
 52 |           cuda: '11.5.1'
 53 |           method: 'network'
 54 |           sub-packages: '["nvcc", "cudart"]'
 55 | 
 56 |       - name: Build on Linux GNU
 57 |         if: matrix.os == 'ubuntu-18.04'
 58 |         # We're using musl to make the binaries statically linked and portable
 59 |         run: |
 60 |           cargo build --target=x86_64-unknown-linux-gnu --release --all --features ${{ matrix.features }}
 61 |           feature_name=${{ matrix.features }}
 62 |           asset_name="kaspa-miner-${{ github.event.release.tag_name }}-${feature_name/\//-}-linux-gnu-amd64"
 63 |           strip ./target/x86_64-unknown-linux-gnu/release/kaspa-miner
 64 |           mkdir ${asset_name}
 65 |           mv ./target/x86_64-unknown-linux-gnu/release/kaspa-miner ${asset_name}/${asset_name}
 66 |           mv ./target/x86_64-unknown-linux-gnu/release/libkaspa*.so ${asset_name}/
 67 |           tar czvf ${asset_name}.tgz ${asset_name}
 68 |           echo "archive=${asset_name}.tgz" >> $GITHUB_ENV
 69 |           echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV
 70 | 
 71 | 
 72 |       - name: Build on Windows
 73 |         if: matrix.os == 'windows-latest'
 74 |         shell: bash
 75 |         run: |
 76 |           cargo build --target=x86_64-pc-windows-msvc --release --all --features ${{ matrix.features }}
 77 |           feature_name=${{ matrix.features }}
 78 |           asset_name="kaspa-miner-${{ github.event.release.tag_name }}-${feature_name/\//-}-win64-amd64"
 79 |           mkdir ${asset_name}
 80 |           mv ./target/x86_64-pc-windows-msvc/release/kaspa-miner.exe ${asset_name}/${asset_name}.exe
 81 |           mv ./target/x86_64-pc-windows-msvc/release/kaspa*.dll ${asset_name}/
 82 |           bash ./integrations/windows/create_bat.sh ${asset_name}
 83 |           7z a -tzip -r ${asset_name}.zip ${asset_name}
 84 |           echo "archive=${asset_name}.zip" >> $GITHUB_ENV
 85 |           echo "asset_name=${asset_name}.zip" >> $GITHUB_ENV
 86 | 
 87 |       - name: Build on MacOS
 88 |         if: matrix.os == 'macos-latest'
 89 |         run: |
 90 |           cargo build --target=x86_64-apple-darwin --release -p kaspa-miner
 91 |           asset_name="kaspa-miner-${{ github.event.release.tag_name }}-cpu-only-osx-amd64"
 92 |           mkdir ${asset_name}
 93 |           mv ./target/x86_64-apple-darwin/release/kaspa-miner ${asset_name}/${asset_name}
 94 |           tar czvf ${asset_name}.tgz ${asset_name}
 95 |           echo "archive=${asset_name}.tgz" >> $GITHUB_ENV
 96 |           echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV
 97 | 
 98 | 
 99 |       - name: Upload release asset
100 |         uses: actions/upload-release-asset@v1
101 |         env:
102 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
103 |         with:
104 |           upload_url: ${{ github.event.release.upload_url }}
105 |           asset_path: "./${{ env.archive }}"
106 |           asset_name: "${{ env.asset_name }}"
107 |           asset_content_type: application/zip
108 | 
109 |   intergrations:
110 |     runs-on: ubuntu-18.04
111 |     strategy:
112 |       fail-fast: false
113 |       matrix:
114 |         # Build gnu-linux on ubuntu-18.04 and musl on ubuntu latest
115 |         itegration: [ hiveos ]
116 |     name: Integrating, ${{ matrix.itegration }}
117 |     steps:
118 |       - name: Fix LibOpenCL on Linux
119 |         run: |
120 |           sudo apt update
121 |           sudo apt install ocl-icd-opencl-dev -y
122 | 
123 |       - name: Check out code into the module directory
124 |         uses: actions/checkout@v2
125 | 
126 |       - name: Install Protoc
127 |         uses: arduino/setup-protoc@v1
128 | 
129 |       - name: Setup Rust
130 |         uses: actions-rs/toolchain@v1
131 |         with:
132 |           profile: minimal
133 |           toolchain: stable
134 |           override: true
135 | 
136 |       - name: Install CUDA Linux
137 |         if: runner.os == 'Linux'
138 |         uses: Jimver/cuda-toolkit@v0.2.8
139 |         with:
140 |           cuda: '11.2.2'
141 |           method: 'network'
142 |           sub-packages: '["nvcc", "cudart"]'
143 | 
144 |       - name: Build ${{ matrix.itegration }} Script
145 |         run: |
146 |           cargo build --target=x86_64-unknown-linux-gnu --release --all
147 |           binary_name="kaspa-miner-${{ github.event.release.tag_name }}-linux-gnu-amd64"
148 |           asset_name="kaspa-miner-${{ matrix.itegration }}"
149 |           strip ./target/x86_64-unknown-linux-gnu/release/kaspa-miner
150 |           mkdir kaspa-miner
151 |           mv ./target/x86_64-unknown-linux-gnu/release/kaspa-miner kaspa-miner/${binary_name}
152 |           mv ./target/x86_64-unknown-linux-gnu/release/libkaspa*.so kaspa-miner/
153 |           bash integrations/${{ matrix.itegration }}/build.sh "${{ github.event.release.tag_name }}" "${binary_name}" kaspa-miner 
154 |           echo "archive=${asset_name}.tgz" >> $GITHUB_ENV
155 |           echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV
156 | 
157 |       - name: Upload release asset
158 |         uses: actions/upload-release-asset@v1
159 |         env:
160 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
161 |         with:
162 |           upload_url: ${{ github.event.release.upload_url }}
163 |           asset_path: "./${{ env.archive }}"
164 |           asset_name: "${{ env.asset_name }}"
165 |           asset_content_type: application/zip
166 | 


--------------------------------------------------------------------------------
/src/client/stratum/statum_codec.rs:
--------------------------------------------------------------------------------
  1 | use bytes::BytesMut;
  2 | use log::error;
  3 | use serde::{Deserialize, Serialize};
  4 | use serde_json::Value;
  5 | use serde_repr::*;
  6 | use std::fmt::{Display, Formatter};
  7 | use std::{fmt, io};
  8 | use tokio_util::codec::{Decoder, Encoder, LinesCodec};
  9 | 
 10 | #[derive(Serialize_repr, Deserialize_repr, Debug, Clone)]
 11 | #[repr(u8)]
 12 | pub enum ErrorCode {
 13 |     Unknown = 20,
 14 |     JobNotFound = 21,
 15 |     DuplicateShare = 22,
 16 |     LowDifficultyShare = 23,
 17 |     Unauthorized = 24,
 18 |     NotSubscribed = 25,
 19 | }
 20 | 
 21 | impl Display for ErrorCode {
 22 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 23 |         match &self {
 24 |             ErrorCode::Unknown => write!(f, "Unknown"),
 25 |             ErrorCode::JobNotFound => write!(f, "JobNotFound"),
 26 |             ErrorCode::DuplicateShare => write!(f, "DuplicateShare"),
 27 |             ErrorCode::LowDifficultyShare => write!(f, "LowDifficultyShare"),
 28 |             ErrorCode::Unauthorized => write!(f, "Unauthorized"),
 29 |             ErrorCode::NotSubscribed => write!(f, "NotSubscribed"),
 30 |         }
 31 |     }
 32 | }
 33 | 
 34 | #[derive(Serialize, Deserialize, Debug, Clone)]
 35 | pub(crate) struct StratumError(pub(crate) ErrorCode, pub(crate) String, #[serde(default)] pub(crate) Option<Value>);
 36 | 
 37 | #[derive(Serialize, Deserialize, Debug, Clone)]
 38 | #[serde(untagged)]
 39 | pub(crate) enum MiningNotify {
 40 |     MiningNotifyShort((String, [u64; 4], u64)),
 41 |     MiningNotifyLong((String, String, String, String, Vec<String>, String, String, String, bool)),
 42 | }
 43 | 
 44 | #[derive(Serialize, Deserialize, Debug, Clone)]
 45 | #[serde(untagged)]
 46 | pub enum MiningSubmit {
 47 |     MiningSubmitShort((String, String, String)),
 48 |     MiningSubmitLong((String, String, String, String, String)),
 49 | }
 50 | 
 51 | #[derive(Serialize, Deserialize, Debug, Clone)]
 52 | #[serde(untagged)]
 53 | pub enum MiningSubscribe {
 54 |     MiningSubscribeDefault((String,)),
 55 |     MiningSubscribeOptions((String, String)),
 56 | }
 57 | 
 58 | #[derive(Serialize, Deserialize, Debug, Clone)]
 59 | #[serde(untagged)]
 60 | pub enum SetExtranonce {
 61 |     SetExtranoncePlain((String, u32)),
 62 |     SetExtranoncePlainEth((String,)),
 63 | }
 64 | 
 65 | #[derive(Serialize, Deserialize, Debug, Clone)]
 66 | #[serde(tag = "method", content = "params")]
 67 | pub(crate) enum StratumCommand {
 68 |     #[serde(rename = "mining.set_extranonce", alias = "set_extranonce")]
 69 |     SetExtranonce(SetExtranonce),
 70 |     #[serde(rename = "mining.set_difficulty")]
 71 |     MiningSetDifficulty((f32,)),
 72 |     #[serde(rename = "mining.notify")]
 73 |     MiningNotify(MiningNotify),
 74 |     #[serde(rename = "mining.subscribe")]
 75 |     Subscribe(MiningSubscribe),
 76 |     #[serde(rename = "mining.authorize")]
 77 |     Authorize((String, String)),
 78 |     #[serde(rename = "mining.submit")]
 79 |     MiningSubmit(MiningSubmit),
 80 |     /*#[serde(rename = "mining.submit_hashrate")]
 81 |     MiningSubmitHashrate {
 82 |         params: (String, String),
 83 |         worker: String,
 84 |     },*/ //{"id":9,"method":"mining.submit_hashrate","jsonrpc":"2.0","worker":"rig","params":["0x00000000000000000000000000000000","0x85198cd10b915d560722cdfdf490d4d93892d2cc3fa5f2ff2195d499d04ee54c"]}
 85 | }
 86 | 
 87 | #[derive(Serialize, Deserialize, Debug, Clone)]
 88 | #[serde(untagged)]
 89 | pub(crate) enum StratumResult {
 90 |     Plain(Option<bool>),
 91 |     Eth((bool, String)),
 92 |     Subscribe((Vec<(String, String)>, String, u32)),
 93 | }
 94 | 
 95 | #[derive(Serialize, Deserialize, Debug, Clone)]
 96 | #[serde(untagged)]
 97 | pub(crate) enum StratumLinePayload {
 98 |     StratumCommand(StratumCommand),
 99 |     StratumResult { result: StratumResult },
100 | }
101 | 
102 | #[derive(Serialize, Deserialize, Debug, Clone)]
103 | pub(crate) struct StratumLine {
104 |     pub(crate) id: Option<u32>,
105 |     #[serde(flatten)]
106 |     pub(crate) payload: StratumLinePayload,
107 |     #[serde(skip_serializing_if = "Option::is_none")]
108 |     pub(crate) jsonrpc: Option<String>,
109 |     pub(crate) error: Option<StratumError>,
110 | }
111 | 
112 | /// An error occurred while encoding or decoding a line.
113 | #[derive(Debug)]
114 | pub(crate) enum NewLineJsonCodecError {
115 |     JsonParseError(String),
116 |     JsonEncodeError,
117 |     LineSplitError,
118 |     LineEncodeError,
119 |     Io(io::Error),
120 | }
121 | 
122 | impl fmt::Display for NewLineJsonCodecError {
123 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124 |         write!(f, "Some error occured")
125 |     }
126 | }
127 | impl From<io::Error> for NewLineJsonCodecError {
128 |     fn from(e: io::Error) -> NewLineJsonCodecError {
129 |         NewLineJsonCodecError::Io(e)
130 |     }
131 | }
132 | impl std::error::Error for NewLineJsonCodecError {}
133 | 
134 | impl From<(String, String)> for NewLineJsonCodecError {
135 |     fn from(e: (String, String)) -> Self {
136 |         NewLineJsonCodecError::JsonParseError(format!("{}: {}", e.0, e.1))
137 |     }
138 | }
139 | 
140 | pub(crate) struct NewLineJsonCodec {
141 |     lines_codec: LinesCodec,
142 | }
143 | 
144 | impl NewLineJsonCodec {
145 |     pub fn new() -> Self {
146 |         Self { lines_codec: LinesCodec::new() }
147 |     }
148 | }
149 | 
150 | impl Decoder for NewLineJsonCodec {
151 |     type Item = StratumLine;
152 |     type Error = NewLineJsonCodecError;
153 | 
154 |     fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
155 |         match self.lines_codec.decode(src) {
156 |             Ok(Some(s)) => {
157 |                 serde_json::from_str::<StratumLine>(s.as_str()).map_err(|e| (e.to_string(), s).into()).map(Some)
158 |             }
159 |             Err(_) => Err(NewLineJsonCodecError::LineSplitError),
160 |             _ => Ok(None),
161 |         }
162 |     }
163 | 
164 |     fn decode_eof(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
165 |         match self.lines_codec.decode_eof(buf) {
166 |             Ok(Some(s)) => serde_json::from_str(s.as_str()).map_err(|e| (e.to_string(), s).into()),
167 |             Err(_) => Err(NewLineJsonCodecError::LineSplitError),
168 |             _ => Ok(None),
169 |         }
170 |     }
171 | }
172 | 
173 | impl Encoder<StratumLine> for NewLineJsonCodec {
174 |     type Error = NewLineJsonCodecError;
175 | 
176 |     fn encode(&mut self, item: StratumLine, dst: &mut BytesMut) -> Result<(), Self::Error> {
177 |         match serde_json::to_string(&item) {
178 |             Ok(json) => self.lines_codec.encode(json, dst).map_err(|_| NewLineJsonCodecError::LineEncodeError),
179 |             Err(e) => {
180 |                 error!("Error! {:?}", e);
181 |                 Err(NewLineJsonCodecError::JsonEncodeError)
182 |             }
183 |         }
184 |     }
185 | }
186 | 
187 | impl Default for NewLineJsonCodec {
188 |     fn default() -> Self {
189 |         Self::new()
190 |     }
191 | }
192 | 


--------------------------------------------------------------------------------
/plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu:
--------------------------------------------------------------------------------
  1 | #include<stdint.h>
  2 | #include <assert.h>
  3 | #include "keccak-tiny.c"
  4 | #include "xoshiro256starstar.c"
  5 | 
  6 | 
  7 | 
  8 | typedef uint8_t Hash[32];
  9 | 
 10 | typedef union _uint256_t {
 11 |     uint64_t number[4];
 12 |     uint8_t hash[32];
 13 | } uint256_t;
 14 | 
 15 | #define BLOCKDIM 1024
 16 | #define MATRIX_SIZE 64
 17 | #define HALF_MATRIX_SIZE 32
 18 | #define QUARTER_MATRIX_SIZE 16
 19 | #define HASH_HEADER_SIZE 72
 20 | 
 21 | #define RANDOM_LEAN 0
 22 | #define RANDOM_XOSHIRO 1
 23 | 
 24 | #define LT_U256(X,Y) (X.number[3] != Y.number[3] ? X.number[3] < Y.number[3] : X.number[2] != Y.number[2] ? X.number[2] < Y.number[2] : X.number[1] != Y.number[1] ? X.number[1] < Y.number[1] : X.number[0] < Y.number[0])
 25 | 
 26 | __constant__ uint8_t matrix[MATRIX_SIZE][MATRIX_SIZE];
 27 | __constant__ uint8_t hash_header[HASH_HEADER_SIZE];
 28 | __constant__ uint256_t target;
 29 | __constant__ static const uint8_t powP[Plen] = { 0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02 };
 30 | __constant__ static const uint8_t heavyP[Plen] = { 0x09, 0x85, 0x24, 0xb2, 0x52, 0x4c, 0xd7, 0x3a, 0x16, 0x42, 0x9f, 0x2f, 0x0e, 0x9b, 0x62, 0x79, 0xee, 0xf8, 0xc7, 0x16, 0x48, 0xff, 0x14, 0x7a, 0x98, 0x64, 0x05, 0x80, 0x4c, 0x5f, 0xa7, 0x11, 0xda, 0xce, 0xee, 0x44, 0xdf, 0xe0, 0x20, 0xe7, 0x69, 0x40, 0xf3, 0x14, 0x2e, 0xd8, 0xc7, 0x72, 0xba, 0x35, 0x89, 0x93, 0x2a, 0xff, 0x00, 0xc1, 0x62, 0xc4, 0x0f, 0x25, 0x40, 0x90, 0x21, 0x5e, 0x48, 0x6a, 0xcf, 0x0d, 0xa6, 0xf9, 0x39, 0x80, 0x0c, 0x3d, 0x2a, 0x79, 0x9f, 0xaa, 0xbc, 0xa0, 0x26, 0xa2, 0xa9, 0xd0, 0x5d, 0xc0, 0x31, 0xf4, 0x3f, 0x8c, 0xc1, 0x54, 0xc3, 0x4c, 0x1f, 0xd3, 0x3d, 0xcc, 0x69, 0xa7, 0x01, 0x7d, 0x6b, 0x6c, 0xe4, 0x93, 0x24, 0x56, 0xd3, 0x5b, 0xc6, 0x2e, 0x44, 0xb0, 0xcd, 0x99, 0x3a, 0x4b, 0xf7, 0x4e, 0xb0, 0xf2, 0x34, 0x54, 0x83, 0x86, 0x4c, 0x77, 0x16, 0x94, 0xbc, 0x36, 0xb0, 0x61, 0xe9, 0x07, 0x07, 0xcc, 0x65, 0x77, 0xb1, 0x1d, 0x8f, 0x7e, 0x39, 0x6d, 0xc4, 0xba, 0x80, 0xdb, 0x8f, 0xea, 0x58, 0xca, 0x34, 0x7b, 0xd3, 0xf2, 0x92, 0xb9, 0x57, 0xb9, 0x81, 0x84, 0x04, 0xc5, 0x76, 0xc7, 0x2e, 0xc2, 0x12, 0x51, 0x67, 0x9f, 0xc3, 0x47, 0x0a, 0x0c, 0x29, 0xb5, 0x9d, 0x39, 0xbb, 0x92, 0x15, 0xc6, 0x9f, 0x2f, 0x31, 0xe0, 0x9a, 0x54, 0x35, 0xda, 0xb9, 0x10, 0x7d, 0x32, 0x19, 0x16 };
 31 | 
 32 | __device__ __inline__ void amul4bit(uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) {
 33 |     // We assume each 32 bits have four values: A0 B0 C0 D0
 34 |     unsigned int res = 0;
 35 |     #if __CUDA_ARCH__ < 610
 36 |     char4 *a4 = (char4*)packed_vec1;
 37 |     char4 *b4 = (char4*)packed_vec2;
 38 |     #endif
 39 |     #pragma unroll
 40 |     for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
 41 |         #if __CUDA_ARCH__ >= 610
 42 |         res = __dp4a(packed_vec1[i], packed_vec2[i], res);
 43 |         #else
 44 |         res += a4[i].x*b4[i].x;
 45 |         res += a4[i].y*b4[i].y;
 46 |         res += a4[i].z*b4[i].z;
 47 |         res += a4[i].w*b4[i].w;
 48 |         #endif
 49 |     }
 50 | 
 51 |     *ret = res;
 52 | }
 53 | 
 54 | 
 55 | extern "C" {
 56 | 
 57 | 
 58 |     __global__ void heavy_hash(const uint64_t nonce_mask, const uint64_t nonce_fixed, const uint64_t nonces_len, uint8_t random_type, void* states, uint64_t *final_nonce) {
 59 |         // assuming header_len is 72
 60 |         int nonceId = threadIdx.x + blockIdx.x*blockDim.x;
 61 |         if (nonceId < nonces_len) {
 62 |             if (nonceId == 0) *final_nonce = 0;
 63 |             uint64_t nonce;
 64 |             switch (random_type) {
 65 |                 case RANDOM_LEAN:
 66 |                     nonce = ((uint64_t *)states)[0] ^ nonceId;
 67 |                     break;
 68 |                 case RANDOM_XOSHIRO:
 69 |                 default:
 70 |                     nonce = xoshiro256_next(((ulonglong4 *)states) + nonceId);
 71 |                     break;
 72 |             }
 73 |             nonce = (nonce & nonce_mask) | nonce_fixed;
 74 |             // header
 75 |             uint8_t input[80];
 76 |             memcpy(input, hash_header, HASH_HEADER_SIZE);
 77 |             // data
 78 |             // TODO: check endianity?
 79 |             uint256_t hash_;
 80 |             memcpy(input +  HASH_HEADER_SIZE, (uint8_t *)(&nonce), 8);
 81 |             hash(powP, hash_.hash, input);
 82 | 
 83 |             //assert((rowId != 0) || (hashId != 0) );
 84 |             uchar4 packed_hash[QUARTER_MATRIX_SIZE] = {0};
 85 |             #pragma unroll
 86 |             for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
 87 |                 packed_hash[i] = make_uchar4(
 88 |                     (hash_.hash[2*i] & 0xF0) >> 4 ,
 89 |                     (hash_.hash[2*i] & 0x0F),
 90 |                     (hash_.hash[2*i+1] & 0xF0) >> 4,
 91 |                     (hash_.hash[2*i+1] & 0x0F)
 92 |                 );
 93 |             }
 94 |             uint32_t product1, product2;
 95 |             #pragma unroll
 96 |             for (int rowId=0; rowId<HALF_MATRIX_SIZE; rowId++){
 97 | 
 98 |                 amul4bit((uint32_t *)(matrix[(2*rowId)]), (uint32_t *)(packed_hash), &product1);
 99 |                 amul4bit((uint32_t *)(matrix[(2*rowId+1)]), (uint32_t *)(packed_hash), &product2);
100 |                 product1 >>= 6;
101 |                 product1 &= 0xF0;
102 |                 product2 >>= 10;
103 |                 #if __CUDA_ARCH__ < 500 || __CUDA_ARCH__ > 700
104 |                 hash_.hash[rowId] = hash_.hash[rowId] ^ ((uint8_t)(product1) | (uint8_t)(product2));
105 |                 #else
106 |                 uint32_t lop_temp = hash_.hash[rowId];
107 |                 asm("lop3.b32" " %0, %1, %2, %3, 0x56;": "=r" (lop_temp): "r" (product1), "r" (product2), "r" (lop_temp));
108 |                 hash_.hash[rowId] = lop_temp;
109 |                 #endif
110 |             }
111 |             memset(input, 0, 80);
112 |             memcpy(input, hash_.hash, 32);
113 |             hash(heavyP, hash_.hash, input);
114 |             if (LT_U256(hash_, target)){
115 |                 atomicCAS((unsigned long long int*) final_nonce, 0, (unsigned long long int) nonce);
116 |             }
117 |         }
118 |     }
119 | 
120 | }


--------------------------------------------------------------------------------
/src/keccakf1600_x86-64-osx.s:
--------------------------------------------------------------------------------
  1 | # Source: https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-x86_64.pl
  2 | 
  3 | .text
  4 | 
  5 | 
  6 | .p2align	5
  7 | __KeccakF1600:
  8 | .cfi_startproc
  9 | 	.byte	0xf3,0x0f,0x1e,0xfa
 10 | 
 11 | 	movq	60(%rdi),%rax
 12 | 	movq	68(%rdi),%rbx
 13 | 	movq	76(%rdi),%rcx
 14 | 	movq	84(%rdi),%rdx
 15 | 	movq	92(%rdi),%rbp
 16 | 	jmp	L$oop
 17 | 
 18 | .p2align	5
 19 | L$oop:
 20 | 	movq	-100(%rdi),%r8
 21 | 	movq	-52(%rdi),%r9
 22 | 	movq	-4(%rdi),%r10
 23 | 	movq	44(%rdi),%r11
 24 | 
 25 | 	xorq	-84(%rdi),%rcx
 26 | 	xorq	-76(%rdi),%rdx
 27 | 	xorq	%r8,%rax
 28 | 	xorq	-92(%rdi),%rbx
 29 | 	xorq	-44(%rdi),%rcx
 30 | 	xorq	-60(%rdi),%rax
 31 | 	movq	%rbp,%r12
 32 | 	xorq	-68(%rdi),%rbp
 33 | 
 34 | 	xorq	%r10,%rcx
 35 | 	xorq	-20(%rdi),%rax
 36 | 	xorq	-36(%rdi),%rdx
 37 | 	xorq	%r9,%rbx
 38 | 	xorq	-28(%rdi),%rbp
 39 | 
 40 | 	xorq	36(%rdi),%rcx
 41 | 	xorq	20(%rdi),%rax
 42 | 	xorq	4(%rdi),%rdx
 43 | 	xorq	-12(%rdi),%rbx
 44 | 	xorq	12(%rdi),%rbp
 45 | 
 46 | 	movq	%rcx,%r13
 47 | 	rolq	$1,%rcx
 48 | 	xorq	%rax,%rcx
 49 | 	xorq	%r11,%rdx
 50 | 
 51 | 	rolq	$1,%rax
 52 | 	xorq	%rdx,%rax
 53 | 	xorq	28(%rdi),%rbx
 54 | 
 55 | 	rolq	$1,%rdx
 56 | 	xorq	%rbx,%rdx
 57 | 	xorq	52(%rdi),%rbp
 58 | 
 59 | 	rolq	$1,%rbx
 60 | 	xorq	%rbp,%rbx
 61 | 
 62 | 	rolq	$1,%rbp
 63 | 	xorq	%r13,%rbp
 64 | 	xorq	%rcx,%r9
 65 | 	xorq	%rdx,%r10
 66 | 	rolq	$44,%r9
 67 | 	xorq	%rbp,%r11
 68 | 	xorq	%rax,%r12
 69 | 	rolq	$43,%r10
 70 | 	xorq	%rbx,%r8
 71 | 	movq	%r9,%r13
 72 | 	rolq	$21,%r11
 73 | 	orq	%r10,%r9
 74 | 	xorq	%r8,%r9
 75 | 	rolq	$14,%r12
 76 | 
 77 | 	xorq	(%r15),%r9
 78 | 	leaq	8(%r15),%r15
 79 | 
 80 | 	movq	%r12,%r14
 81 | 	andq	%r11,%r12
 82 | 	movq	%r9,-100(%rsi)
 83 | 	xorq	%r10,%r12
 84 | 	notq	%r10
 85 | 	movq	%r12,-84(%rsi)
 86 | 
 87 | 	orq	%r11,%r10
 88 | 	movq	76(%rdi),%r12
 89 | 	xorq	%r13,%r10
 90 | 	movq	%r10,-92(%rsi)
 91 | 
 92 | 	andq	%r8,%r13
 93 | 	movq	-28(%rdi),%r9
 94 | 	xorq	%r14,%r13
 95 | 	movq	-20(%rdi),%r10
 96 | 	movq	%r13,-68(%rsi)
 97 | 
 98 | 	orq	%r8,%r14
 99 | 	movq	-76(%rdi),%r8
100 | 	xorq	%r11,%r14
101 | 	movq	28(%rdi),%r11
102 | 	movq	%r14,-76(%rsi)
103 | 
104 | 
105 | 	xorq	%rbp,%r8
106 | 	xorq	%rdx,%r12
107 | 	rolq	$28,%r8
108 | 	xorq	%rcx,%r11
109 | 	xorq	%rax,%r9
110 | 	rolq	$61,%r12
111 | 	rolq	$45,%r11
112 | 	xorq	%rbx,%r10
113 | 	rolq	$20,%r9
114 | 	movq	%r8,%r13
115 | 	orq	%r12,%r8
116 | 	rolq	$3,%r10
117 | 
118 | 	xorq	%r11,%r8
119 | 	movq	%r8,-36(%rsi)
120 | 
121 | 	movq	%r9,%r14
122 | 	andq	%r13,%r9
123 | 	movq	-92(%rdi),%r8
124 | 	xorq	%r12,%r9
125 | 	notq	%r12
126 | 	movq	%r9,-28(%rsi)
127 | 
128 | 	orq	%r11,%r12
129 | 	movq	-44(%rdi),%r9
130 | 	xorq	%r10,%r12
131 | 	movq	%r12,-44(%rsi)
132 | 
133 | 	andq	%r10,%r11
134 | 	movq	60(%rdi),%r12
135 | 	xorq	%r14,%r11
136 | 	movq	%r11,-52(%rsi)
137 | 
138 | 	orq	%r10,%r14
139 | 	movq	4(%rdi),%r10
140 | 	xorq	%r13,%r14
141 | 	movq	52(%rdi),%r11
142 | 	movq	%r14,-60(%rsi)
143 | 
144 | 
145 | 	xorq	%rbp,%r10
146 | 	xorq	%rax,%r11
147 | 	rolq	$25,%r10
148 | 	xorq	%rdx,%r9
149 | 	rolq	$8,%r11
150 | 	xorq	%rbx,%r12
151 | 	rolq	$6,%r9
152 | 	xorq	%rcx,%r8
153 | 	rolq	$18,%r12
154 | 	movq	%r10,%r13
155 | 	andq	%r11,%r10
156 | 	rolq	$1,%r8
157 | 
158 | 	notq	%r11
159 | 	xorq	%r9,%r10
160 | 	movq	%r10,-12(%rsi)
161 | 
162 | 	movq	%r12,%r14
163 | 	andq	%r11,%r12
164 | 	movq	-12(%rdi),%r10
165 | 	xorq	%r13,%r12
166 | 	movq	%r12,-4(%rsi)
167 | 
168 | 	orq	%r9,%r13
169 | 	movq	84(%rdi),%r12
170 | 	xorq	%r8,%r13
171 | 	movq	%r13,-20(%rsi)
172 | 
173 | 	andq	%r8,%r9
174 | 	xorq	%r14,%r9
175 | 	movq	%r9,12(%rsi)
176 | 
177 | 	orq	%r8,%r14
178 | 	movq	-60(%rdi),%r9
179 | 	xorq	%r11,%r14
180 | 	movq	36(%rdi),%r11
181 | 	movq	%r14,4(%rsi)
182 | 
183 | 
184 | 	movq	-68(%rdi),%r8
185 | 
186 | 	xorq	%rcx,%r10
187 | 	xorq	%rdx,%r11
188 | 	rolq	$10,%r10
189 | 	xorq	%rbx,%r9
190 | 	rolq	$15,%r11
191 | 	xorq	%rbp,%r12
192 | 	rolq	$36,%r9
193 | 	xorq	%rax,%r8
194 | 	rolq	$56,%r12
195 | 	movq	%r10,%r13
196 | 	orq	%r11,%r10
197 | 	rolq	$27,%r8
198 | 
199 | 	notq	%r11
200 | 	xorq	%r9,%r10
201 | 	movq	%r10,28(%rsi)
202 | 
203 | 	movq	%r12,%r14
204 | 	orq	%r11,%r12
205 | 	xorq	%r13,%r12
206 | 	movq	%r12,36(%rsi)
207 | 
208 | 	andq	%r9,%r13
209 | 	xorq	%r8,%r13
210 | 	movq	%r13,20(%rsi)
211 | 
212 | 	orq	%r8,%r9
213 | 	xorq	%r14,%r9
214 | 	movq	%r9,52(%rsi)
215 | 
216 | 	andq	%r14,%r8
217 | 	xorq	%r11,%r8
218 | 	movq	%r8,44(%rsi)
219 | 
220 | 
221 | 	xorq	-84(%rdi),%rdx
222 | 	xorq	-36(%rdi),%rbp
223 | 	rolq	$62,%rdx
224 | 	xorq	68(%rdi),%rcx
225 | 	rolq	$55,%rbp
226 | 	xorq	12(%rdi),%rax
227 | 	rolq	$2,%rcx
228 | 	xorq	20(%rdi),%rbx
229 | 	xchgq	%rsi,%rdi
230 | 	rolq	$39,%rax
231 | 	rolq	$41,%rbx
232 | 	movq	%rdx,%r13
233 | 	andq	%rbp,%rdx
234 | 	notq	%rbp
235 | 	xorq	%rcx,%rdx
236 | 	movq	%rdx,92(%rdi)
237 | 
238 | 	movq	%rax,%r14
239 | 	andq	%rbp,%rax
240 | 	xorq	%r13,%rax
241 | 	movq	%rax,60(%rdi)
242 | 
243 | 	orq	%rcx,%r13
244 | 	xorq	%rbx,%r13
245 | 	movq	%r13,84(%rdi)
246 | 
247 | 	andq	%rbx,%rcx
248 | 	xorq	%r14,%rcx
249 | 	movq	%rcx,76(%rdi)
250 | 
251 | 	orq	%r14,%rbx
252 | 	xorq	%rbp,%rbx
253 | 	movq	%rbx,68(%rdi)
254 | 
255 | 	movq	%rdx,%rbp
256 | 	movq	%r13,%rdx
257 | 
258 | 	testq	$255,%r15
259 | 	jnz	L$oop
260 | 
261 | 	leaq	-192(%r15),%r15
262 | 	.byte	0xf3,0xc3
263 | .cfi_endproc
264 | 
265 | 
266 | .globl	_KeccakF1600
267 | 
268 | .p2align	5
269 | _KeccakF1600:
270 | .cfi_startproc
271 | 	.byte	0xf3,0x0f,0x1e,0xfa
272 | 
273 | 
274 | 	pushq	%rbx
275 | .cfi_adjust_cfa_offset	8
276 | .cfi_offset	%rbx,-16
277 | 	pushq	%rbp
278 | .cfi_adjust_cfa_offset	8
279 | .cfi_offset	%rbp,-24
280 | 	pushq	%r12
281 | .cfi_adjust_cfa_offset	8
282 | .cfi_offset	%r12,-32
283 | 	pushq	%r13
284 | .cfi_adjust_cfa_offset	8
285 | .cfi_offset	%r13,-40
286 | 	pushq	%r14
287 | .cfi_adjust_cfa_offset	8
288 | .cfi_offset	%r14,-48
289 | 	pushq	%r15
290 | .cfi_adjust_cfa_offset	8
291 | .cfi_offset	%r15,-56
292 | 
293 | 	leaq	100(%rdi),%rdi
294 | 	subq	$200,%rsp
295 | .cfi_adjust_cfa_offset	200
296 | 
297 | 	notq	-92(%rdi)
298 | 	notq	-84(%rdi)
299 | 	notq	-36(%rdi)
300 | 	notq	-4(%rdi)
301 | 	notq	36(%rdi)
302 | 	notq	60(%rdi)
303 | 
304 | 	leaq	iotas(%rip),%r15
305 | 	leaq	100(%rsp),%rsi
306 | 
307 | 	call	__KeccakF1600
308 | 
309 | 	notq	-92(%rdi)
310 | 	notq	-84(%rdi)
311 | 	notq	-36(%rdi)
312 | 	notq	-4(%rdi)
313 | 	notq	36(%rdi)
314 | 	notq	60(%rdi)
315 | 	leaq	-100(%rdi),%rdi
316 | 
317 | 	addq	$200,%rsp
318 | .cfi_adjust_cfa_offset	-200
319 | 
320 | 	popq	%r15
321 | .cfi_adjust_cfa_offset	-8
322 | .cfi_restore	%r15
323 | 	popq	%r14
324 | .cfi_adjust_cfa_offset	-8
325 | .cfi_restore	%r14
326 | 	popq	%r13
327 | .cfi_adjust_cfa_offset	-8
328 | .cfi_restore	%r13
329 | 	popq	%r12
330 | .cfi_adjust_cfa_offset	-8
331 | .cfi_restore	%r12
332 | 	popq	%rbp
333 | .cfi_adjust_cfa_offset	-8
334 | .cfi_restore	%rbp
335 | 	popq	%rbx
336 | .cfi_adjust_cfa_offset	-8
337 | .cfi_restore	%rbx
338 | 	.byte	0xf3,0xc3
339 | .cfi_endproc
340 | 
341 | .p2align	8
342 | .quad	0,0,0,0,0,0,0,0
343 | 
344 | iotas:
345 | .quad	0x0000000000000001
346 | .quad	0x0000000000008082
347 | .quad	0x800000000000808a
348 | .quad	0x8000000080008000
349 | .quad	0x000000000000808b
350 | .quad	0x0000000080000001
351 | .quad	0x8000000080008081
352 | .quad	0x8000000000008009
353 | .quad	0x000000000000008a
354 | .quad	0x0000000000000088
355 | .quad	0x0000000080008009
356 | .quad	0x000000008000000a
357 | .quad	0x000000008000808b
358 | .quad	0x800000000000008b
359 | .quad	0x8000000000008089
360 | .quad	0x8000000000008003
361 | .quad	0x8000000000008002
362 | .quad	0x8000000000000080
363 | .quad	0x000000000000800a
364 | .quad	0x800000008000000a
365 | .quad	0x8000000080008081
366 | .quad	0x8000000000008080
367 | .quad	0x0000000080000001
368 | .quad	0x8000000080008008
369 | 
370 | .byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
371 | 


--------------------------------------------------------------------------------
/src/keccakf1600_x86-64.s:
--------------------------------------------------------------------------------
  1 | # Source: https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-x86_64.pl
  2 | 
  3 | .text
  4 | 
  5 | .type	__KeccakF1600,@function
  6 | .align	32
  7 | __KeccakF1600:
  8 | .cfi_startproc
  9 | 	.byte	0xf3,0x0f,0x1e,0xfa
 10 | 
 11 | 	movq	60(%rdi),%rax
 12 | 	movq	68(%rdi),%rbx
 13 | 	movq	76(%rdi),%rcx
 14 | 	movq	84(%rdi),%rdx
 15 | 	movq	92(%rdi),%rbp
 16 | 	jmp	.Loop
 17 | 
 18 | .align	32
 19 | .Loop:
 20 | 	movq	-100(%rdi),%r8
 21 | 	movq	-52(%rdi),%r9
 22 | 	movq	-4(%rdi),%r10
 23 | 	movq	44(%rdi),%r11
 24 | 
 25 | 	xorq	-84(%rdi),%rcx
 26 | 	xorq	-76(%rdi),%rdx
 27 | 	xorq	%r8,%rax
 28 | 	xorq	-92(%rdi),%rbx
 29 | 	xorq	-44(%rdi),%rcx
 30 | 	xorq	-60(%rdi),%rax
 31 | 	movq	%rbp,%r12
 32 | 	xorq	-68(%rdi),%rbp
 33 | 
 34 | 	xorq	%r10,%rcx
 35 | 	xorq	-20(%rdi),%rax
 36 | 	xorq	-36(%rdi),%rdx
 37 | 	xorq	%r9,%rbx
 38 | 	xorq	-28(%rdi),%rbp
 39 | 
 40 | 	xorq	36(%rdi),%rcx
 41 | 	xorq	20(%rdi),%rax
 42 | 	xorq	4(%rdi),%rdx
 43 | 	xorq	-12(%rdi),%rbx
 44 | 	xorq	12(%rdi),%rbp
 45 | 
 46 | 	movq	%rcx,%r13
 47 | 	rolq	$1,%rcx
 48 | 	xorq	%rax,%rcx
 49 | 	xorq	%r11,%rdx
 50 | 
 51 | 	rolq	$1,%rax
 52 | 	xorq	%rdx,%rax
 53 | 	xorq	28(%rdi),%rbx
 54 | 
 55 | 	rolq	$1,%rdx
 56 | 	xorq	%rbx,%rdx
 57 | 	xorq	52(%rdi),%rbp
 58 | 
 59 | 	rolq	$1,%rbx
 60 | 	xorq	%rbp,%rbx
 61 | 
 62 | 	rolq	$1,%rbp
 63 | 	xorq	%r13,%rbp
 64 | 	xorq	%rcx,%r9
 65 | 	xorq	%rdx,%r10
 66 | 	rolq	$44,%r9
 67 | 	xorq	%rbp,%r11
 68 | 	xorq	%rax,%r12
 69 | 	rolq	$43,%r10
 70 | 	xorq	%rbx,%r8
 71 | 	movq	%r9,%r13
 72 | 	rolq	$21,%r11
 73 | 	orq	%r10,%r9
 74 | 	xorq	%r8,%r9
 75 | 	rolq	$14,%r12
 76 | 
 77 | 	xorq	(%r15),%r9
 78 | 	leaq	8(%r15),%r15
 79 | 
 80 | 	movq	%r12,%r14
 81 | 	andq	%r11,%r12
 82 | 	movq	%r9,-100(%rsi)
 83 | 	xorq	%r10,%r12
 84 | 	notq	%r10
 85 | 	movq	%r12,-84(%rsi)
 86 | 
 87 | 	orq	%r11,%r10
 88 | 	movq	76(%rdi),%r12
 89 | 	xorq	%r13,%r10
 90 | 	movq	%r10,-92(%rsi)
 91 | 
 92 | 	andq	%r8,%r13
 93 | 	movq	-28(%rdi),%r9
 94 | 	xorq	%r14,%r13
 95 | 	movq	-20(%rdi),%r10
 96 | 	movq	%r13,-68(%rsi)
 97 | 
 98 | 	orq	%r8,%r14
 99 | 	movq	-76(%rdi),%r8
100 | 	xorq	%r11,%r14
101 | 	movq	28(%rdi),%r11
102 | 	movq	%r14,-76(%rsi)
103 | 
104 | 
105 | 	xorq	%rbp,%r8
106 | 	xorq	%rdx,%r12
107 | 	rolq	$28,%r8
108 | 	xorq	%rcx,%r11
109 | 	xorq	%rax,%r9
110 | 	rolq	$61,%r12
111 | 	rolq	$45,%r11
112 | 	xorq	%rbx,%r10
113 | 	rolq	$20,%r9
114 | 	movq	%r8,%r13
115 | 	orq	%r12,%r8
116 | 	rolq	$3,%r10
117 | 
118 | 	xorq	%r11,%r8
119 | 	movq	%r8,-36(%rsi)
120 | 
121 | 	movq	%r9,%r14
122 | 	andq	%r13,%r9
123 | 	movq	-92(%rdi),%r8
124 | 	xorq	%r12,%r9
125 | 	notq	%r12
126 | 	movq	%r9,-28(%rsi)
127 | 
128 | 	orq	%r11,%r12
129 | 	movq	-44(%rdi),%r9
130 | 	xorq	%r10,%r12
131 | 	movq	%r12,-44(%rsi)
132 | 
133 | 	andq	%r10,%r11
134 | 	movq	60(%rdi),%r12
135 | 	xorq	%r14,%r11
136 | 	movq	%r11,-52(%rsi)
137 | 
138 | 	orq	%r10,%r14
139 | 	movq	4(%rdi),%r10
140 | 	xorq	%r13,%r14
141 | 	movq	52(%rdi),%r11
142 | 	movq	%r14,-60(%rsi)
143 | 
144 | 
145 | 	xorq	%rbp,%r10
146 | 	xorq	%rax,%r11
147 | 	rolq	$25,%r10
148 | 	xorq	%rdx,%r9
149 | 	rolq	$8,%r11
150 | 	xorq	%rbx,%r12
151 | 	rolq	$6,%r9
152 | 	xorq	%rcx,%r8
153 | 	rolq	$18,%r12
154 | 	movq	%r10,%r13
155 | 	andq	%r11,%r10
156 | 	rolq	$1,%r8
157 | 
158 | 	notq	%r11
159 | 	xorq	%r9,%r10
160 | 	movq	%r10,-12(%rsi)
161 | 
162 | 	movq	%r12,%r14
163 | 	andq	%r11,%r12
164 | 	movq	-12(%rdi),%r10
165 | 	xorq	%r13,%r12
166 | 	movq	%r12,-4(%rsi)
167 | 
168 | 	orq	%r9,%r13
169 | 	movq	84(%rdi),%r12
170 | 	xorq	%r8,%r13
171 | 	movq	%r13,-20(%rsi)
172 | 
173 | 	andq	%r8,%r9
174 | 	xorq	%r14,%r9
175 | 	movq	%r9,12(%rsi)
176 | 
177 | 	orq	%r8,%r14
178 | 	movq	-60(%rdi),%r9
179 | 	xorq	%r11,%r14
180 | 	movq	36(%rdi),%r11
181 | 	movq	%r14,4(%rsi)
182 | 
183 | 
184 | 	movq	-68(%rdi),%r8
185 | 
186 | 	xorq	%rcx,%r10
187 | 	xorq	%rdx,%r11
188 | 	rolq	$10,%r10
189 | 	xorq	%rbx,%r9
190 | 	rolq	$15,%r11
191 | 	xorq	%rbp,%r12
192 | 	rolq	$36,%r9
193 | 	xorq	%rax,%r8
194 | 	rolq	$56,%r12
195 | 	movq	%r10,%r13
196 | 	orq	%r11,%r10
197 | 	rolq	$27,%r8
198 | 
199 | 	notq	%r11
200 | 	xorq	%r9,%r10
201 | 	movq	%r10,28(%rsi)
202 | 
203 | 	movq	%r12,%r14
204 | 	orq	%r11,%r12
205 | 	xorq	%r13,%r12
206 | 	movq	%r12,36(%rsi)
207 | 
208 | 	andq	%r9,%r13
209 | 	xorq	%r8,%r13
210 | 	movq	%r13,20(%rsi)
211 | 
212 | 	orq	%r8,%r9
213 | 	xorq	%r14,%r9
214 | 	movq	%r9,52(%rsi)
215 | 
216 | 	andq	%r14,%r8
217 | 	xorq	%r11,%r8
218 | 	movq	%r8,44(%rsi)
219 | 
220 | 
221 | 	xorq	-84(%rdi),%rdx
222 | 	xorq	-36(%rdi),%rbp
223 | 	rolq	$62,%rdx
224 | 	xorq	68(%rdi),%rcx
225 | 	rolq	$55,%rbp
226 | 	xorq	12(%rdi),%rax
227 | 	rolq	$2,%rcx
228 | 	xorq	20(%rdi),%rbx
229 | 	xchgq	%rsi,%rdi
230 | 	rolq	$39,%rax
231 | 	rolq	$41,%rbx
232 | 	movq	%rdx,%r13
233 | 	andq	%rbp,%rdx
234 | 	notq	%rbp
235 | 	xorq	%rcx,%rdx
236 | 	movq	%rdx,92(%rdi)
237 | 
238 | 	movq	%rax,%r14
239 | 	andq	%rbp,%rax
240 | 	xorq	%r13,%rax
241 | 	movq	%rax,60(%rdi)
242 | 
243 | 	orq	%rcx,%r13
244 | 	xorq	%rbx,%r13
245 | 	movq	%r13,84(%rdi)
246 | 
247 | 	andq	%rbx,%rcx
248 | 	xorq	%r14,%rcx
249 | 	movq	%rcx,76(%rdi)
250 | 
251 | 	orq	%r14,%rbx
252 | 	xorq	%rbp,%rbx
253 | 	movq	%rbx,68(%rdi)
254 | 
255 | 	movq	%rdx,%rbp
256 | 	movq	%r13,%rdx
257 | 
258 | 	testq	$255,%r15
259 | 	jnz	.Loop
260 | 
261 | 	leaq	-192(%r15),%r15
262 | 	.byte	0xf3,0xc3
263 | .cfi_endproc
264 | .size	__KeccakF1600,.-__KeccakF1600
265 | 
266 | .globl	KeccakF1600
267 | .type	KeccakF1600,@function
268 | .align	32
269 | KeccakF1600:
270 | .cfi_startproc
271 | 	.byte	0xf3,0x0f,0x1e,0xfa
272 | 
273 | 
274 | 	pushq	%rbx
275 | .cfi_adjust_cfa_offset	8
276 | .cfi_offset	%rbx,-16
277 | 	pushq	%rbp
278 | .cfi_adjust_cfa_offset	8
279 | .cfi_offset	%rbp,-24
280 | 	pushq	%r12
281 | .cfi_adjust_cfa_offset	8
282 | .cfi_offset	%r12,-32
283 | 	pushq	%r13
284 | .cfi_adjust_cfa_offset	8
285 | .cfi_offset	%r13,-40
286 | 	pushq	%r14
287 | .cfi_adjust_cfa_offset	8
288 | .cfi_offset	%r14,-48
289 | 	pushq	%r15
290 | .cfi_adjust_cfa_offset	8
291 | .cfi_offset	%r15,-56
292 | 
293 | 	leaq	100(%rdi),%rdi
294 | 	subq	$200,%rsp
295 | .cfi_adjust_cfa_offset	200
296 | 
297 | 	notq	-92(%rdi)
298 | 	notq	-84(%rdi)
299 | 	notq	-36(%rdi)
300 | 	notq	-4(%rdi)
301 | 	notq	36(%rdi)
302 | 	notq	60(%rdi)
303 | 
304 | 	leaq	iotas(%rip),%r15
305 | 	leaq	100(%rsp),%rsi
306 | 
307 | 	call	__KeccakF1600
308 | 
309 | 	notq	-92(%rdi)
310 | 	notq	-84(%rdi)
311 | 	notq	-36(%rdi)
312 | 	notq	-4(%rdi)
313 | 	notq	36(%rdi)
314 | 	notq	60(%rdi)
315 | 	leaq	-100(%rdi),%rdi
316 | 
317 | 	addq	$200,%rsp
318 | .cfi_adjust_cfa_offset	-200
319 | 
320 | 	popq	%r15
321 | .cfi_adjust_cfa_offset	-8
322 | .cfi_restore	%r15
323 | 	popq	%r14
324 | .cfi_adjust_cfa_offset	-8
325 | .cfi_restore	%r14
326 | 	popq	%r13
327 | .cfi_adjust_cfa_offset	-8
328 | .cfi_restore	%r13
329 | 	popq	%r12
330 | .cfi_adjust_cfa_offset	-8
331 | .cfi_restore	%r12
332 | 	popq	%rbp
333 | .cfi_adjust_cfa_offset	-8
334 | .cfi_restore	%rbp
335 | 	popq	%rbx
336 | .cfi_adjust_cfa_offset	-8
337 | .cfi_restore	%rbx
338 | 	.byte	0xf3,0xc3
339 | .cfi_endproc
340 | .size	KeccakF1600,.-KeccakF1600
341 | .align	256
342 | .quad	0,0,0,0,0,0,0,0
343 | .type	iotas,@object
344 | iotas:
345 | .quad	0x0000000000000001
346 | .quad	0x0000000000008082
347 | .quad	0x800000000000808a
348 | .quad	0x8000000080008000
349 | .quad	0x000000000000808b
350 | .quad	0x0000000080000001
351 | .quad	0x8000000080008081
352 | .quad	0x8000000000008009
353 | .quad	0x000000000000008a
354 | .quad	0x0000000000000088
355 | .quad	0x0000000080008009
356 | .quad	0x000000008000000a
357 | .quad	0x000000008000808b
358 | .quad	0x800000000000008b
359 | .quad	0x8000000000008089
360 | .quad	0x8000000000008003
361 | .quad	0x8000000000008002
362 | .quad	0x8000000000000080
363 | .quad	0x000000000000800a
364 | .quad	0x800000008000000a
365 | .quad	0x8000000080008081
366 | .quad	0x8000000000008080
367 | .quad	0x0000000080000001
368 | .quad	0x8000000080008008
369 | .size	iotas,.-iotas
370 | .byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
371 | 
372 | .section	.note.gnu.property,"a",@note
373 | 	.long	4,2f-1f,5
374 | 	.byte	0x47,0x4E,0x55,0
375 | 1:	.long	0xc0000002,4,3
376 | .align	8
377 | 2:
378 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | 
  3 | on: [push, pull_request]
  4 | 
  5 | jobs:
  6 | 
  7 |   check:
  8 |     name: Check
  9 |     runs-on: ubuntu-latest
 10 |     steps:
 11 |       - name: Checkout sources
 12 |         uses: actions/checkout@v2
 13 | 
 14 |       - name: Install Protoc
 15 |         uses: arduino/setup-protoc@v1
 16 | 
 17 |       - name: Install stable toolchain
 18 |         uses: actions-rs/toolchain@v1
 19 |         with:
 20 |           profile: minimal
 21 |           toolchain: stable
 22 |           override: true
 23 | 
 24 |       - name: Install cuda
 25 |         uses: Jimver/cuda-toolkit@v0.2.8
 26 |         with:
 27 |           cuda: '11.2.2'
 28 |           method: 'network'
 29 |           sub-packages: '["nvcc", "cudart"]'
 30 | 
 31 |       - name: Cache
 32 |         uses: actions/cache@v2
 33 |         with:
 34 |           path: |
 35 |             ~/.cargo/bin/
 36 |             ~/.cargo/registry/index/
 37 |             ~/.cargo/registry/cache/
 38 |             ~/.cargo/git/db/
 39 |             target/
 40 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 41 | 
 42 |       - name: Run cargo check
 43 |         uses: actions-rs/cargo@v1
 44 |         with:
 45 |           command: check
 46 |           args: --tests
 47 | 
 48 |   test:
 49 |     name: Test Suite
 50 |     runs-on: ${{ matrix.os }}
 51 |     strategy:
 52 |       fail-fast: false
 53 |       matrix:
 54 |         os: [ ubuntu-latest, windows-latest ]
 55 |     steps:
 56 |       - name: Checkout sources
 57 |         uses: actions/checkout@v2
 58 | 
 59 |       - name: Fix LibOpenCL on Linux
 60 |         if: runner.os == 'Linux'
 61 |         run: |
 62 |           sudo apt update
 63 |           sudo apt install ocl-icd-opencl-dev -y
 64 | 
 65 |       - name: Fix CRLF on Windows
 66 |         if: runner.os == 'Windows'
 67 |         run: git config --global core.autocrlf false
 68 | 
 69 |       - name: Install Protoc
 70 |         uses: arduino/setup-protoc@v1
 71 | 
 72 |       - name: Install stable toolchain
 73 |         uses: actions-rs/toolchain@v1
 74 |         with:
 75 |           profile: minimal
 76 |           toolchain: stable
 77 |           override: true
 78 | 
 79 |       - name: Install CUDA Linux
 80 |         if: runner.os == 'Linux'
 81 |         uses: Jimver/cuda-toolkit@v0.2.8
 82 |         with:
 83 |           cuda: '11.2.2'
 84 |           method: 'network'
 85 |           sub-packages: '["nvcc", "cudart"]'
 86 | 
 87 |       - name: Install CUDA Windows
 88 |         if: runner.os == 'Windows'
 89 |         uses: Jimver/cuda-toolkit@v0.2.8
 90 |         with:
 91 |           cuda: '11.5.1'
 92 |           method: 'network'
 93 |           sub-packages: '["nvcc", "cudart"]'
 94 | 
 95 |       - name: Cache
 96 |         uses: actions/cache@v2
 97 |         with:
 98 |           path: |
 99 |             ~/.cargo/bin/
100 |             ~/.cargo/registry/index/
101 |             ~/.cargo/registry/cache/
102 |             ~/.cargo/git/db/
103 |             target/
104 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
105 | 
106 |       - name: Run cargo test regular features
107 |         uses: actions-rs/cargo@v1
108 |         with:
109 |           command: test
110 |           args: -p kaspa-miner
111 | 
112 |       - name: Run cargo test no asm
113 |         uses: actions-rs/cargo@v1
114 |         with:
115 |           command: test
116 |           args: -p kaspa-miner --features=no-asm
117 | 
118 | 
119 |       - name: Run cargo test no parking_lot
120 |         uses: actions-rs/cargo@v1
121 |         with:
122 |           command: test
123 |           args: -p kaspa-miner --no-default-features
124 | 
125 |       - name: Run cargo test shuttle
126 |         uses: actions-rs/cargo@v1
127 |         with:
128 |           command: test
129 |           args: -p kaspa-miner --no-default-features --features=shuttle
130 | 
131 |       - name: Run cargo test for kaspaopencl
132 |         uses: actions-rs/cargo@v1
133 |         with:
134 |           command: test
135 |           args: -p kaspaopencl
136 | 
137 | 
138 |   test-release:
139 |     name: Test Suite Release
140 |     runs-on: ${{ matrix.os }}
141 |     strategy:
142 |       fail-fast: false
143 |       matrix:
144 |         os: [ ubuntu-latest, windows-latest ]
145 |     steps:
146 |       - name: Checkout sources
147 |         uses: actions/checkout@v2
148 | 
149 |       - name: Fix LibOpenCL on Linux
150 |         if: runner.os == 'Linux'
151 |         run: |
152 |           sudo apt update
153 |           sudo apt install ocl-icd-opencl-dev -y
154 | 
155 |       - name: Fix CRLF on Windows
156 |         if: runner.os == 'Windows'
157 |         run: git config --global core.autocrlf false
158 | 
159 |       - name: Install Protoc
160 |         uses: arduino/setup-protoc@v1
161 | 
162 |       - name: Install stable toolchain
163 |         uses: actions-rs/toolchain@v1
164 |         with:
165 |           profile: minimal
166 |           toolchain: stable
167 |           override: true
168 | 
169 |       - name: Install CUDA Linux
170 |         if: runner.os == 'Linux'
171 |         uses: Jimver/cuda-toolkit@v0.2.8
172 |         with:
173 |           cuda: '11.2.2'
174 |           method: 'network'
175 |           sub-packages: '["nvcc", "cudart"]'
176 | 
177 |       - name: Install CUDA Windows
178 |         if: runner.os == 'Windows'
179 |         uses: Jimver/cuda-toolkit@v0.2.8
180 |         with:
181 |           cuda: '11.5.1'
182 |           method: 'network'
183 |           sub-packages: '["nvcc", "cudart"]'
184 | 
185 |       - name: Cache
186 |         uses: actions/cache@v2
187 |         with:
188 |           path: |
189 |             ~/.cargo/bin/
190 |             ~/.cargo/registry/index/
191 |             ~/.cargo/registry/cache/
192 |             ~/.cargo/git/db/
193 |             target/
194 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
195 | 
196 |       - name: Run cargo test release regular features
197 |         uses: actions-rs/cargo@v1
198 |         with:
199 |           command: test
200 |           args: --release -p kaspa-miner
201 | 
202 |       - name: Run cargo test release no asm
203 |         uses: actions-rs/cargo@v1
204 |         with:
205 |           command: test
206 |           args: --features=no-asm --release -p kaspa-miner
207 | 
208 |       - name: Run cargo test release no parking_lot
209 |         uses: actions-rs/cargo@v1
210 |         with:
211 |           command: test
212 |           args: --no-default-features --release -p kaspa-miner
213 | 
214 |       - name: Run cargo test release shuttle
215 |         uses: actions-rs/cargo@v1
216 |         with:
217 |           command: test
218 |           args: --no-default-features --features=shuttle --release -p kaspa-miner
219 | 
220 |       - name: Run cargo test for kaspaopencl
221 |         uses: actions-rs/cargo@v1
222 |         with:
223 |           command: test
224 |           args: --release -p kaspaopencl
225 | 
226 |   lints:
227 |     name: Lints
228 |     runs-on: ubuntu-latest
229 |     steps:
230 |       - name: Checkout sources
231 |         uses: actions/checkout@v2
232 | 
233 |       - name: Install Protoc
234 |         uses: arduino/setup-protoc@v1
235 | 
236 |       - name: Install stable toolchain
237 |         uses: actions-rs/toolchain@v1
238 |         with:
239 |           profile: minimal
240 |           toolchain: stable
241 |           override: true
242 |           components: rustfmt, clippy
243 | 
244 |       - name: Install cuda
245 |         uses: Jimver/cuda-toolkit@v0.2.8
246 |         with:
247 |           cuda: '11.2.2'
248 |           method: 'network'
249 |           sub-packages: '["nvcc", "cudart"]'
250 | 
251 |       - name: Cache
252 |         uses: actions/cache@v2
253 |         with:
254 |           path: |
255 |             ~/.cargo/bin/
256 |             ~/.cargo/registry/index/
257 |             ~/.cargo/registry/cache/
258 |             ~/.cargo/git/db/
259 |             target/
260 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
261 | 
262 | 
263 |       - name: Run cargo fmt
264 |         uses: actions-rs/cargo@v1
265 |         with:
266 |           command: fmt
267 |           args: --all -- --check
268 | 
269 |       - name: Run cargo clippy
270 |         uses: actions-rs/cargo@v1
271 |         with:
272 |           command: clippy
273 |           args: --tests -- -D warnings
274 | 


--------------------------------------------------------------------------------
/src/client/grpc.rs:
--------------------------------------------------------------------------------
  1 | use crate::client::Client;
  2 | use crate::pow::BlockSeed;
  3 | use crate::pow::BlockSeed::{FullBlock, PartialBlock};
  4 | use crate::proto::kaspad_message::Payload;
  5 | use crate::proto::rpc_client::RpcClient;
  6 | use crate::proto::{
  7 |     GetBlockTemplateRequestMessage, GetInfoRequestMessage, KaspadMessage, NotifyBlockAddedRequestMessage,
  8 |     NotifyNewBlockTemplateRequestMessage,
  9 | };
 10 | use crate::{miner::MinerManager, Error};
 11 | use async_trait::async_trait;
 12 | use futures_util::StreamExt;
 13 | use log::{error, info, warn};
 14 | use rand::{thread_rng, RngCore};
 15 | use semver::Version;
 16 | use std::sync::atomic::{AtomicU16, Ordering};
 17 | use std::sync::Arc;
 18 | use tokio::sync::mpsc::{self, error::SendError, Sender};
 19 | use tokio::task::JoinHandle;
 20 | use tokio_stream::wrappers::ReceiverStream;
 21 | use tokio_util::sync::{PollSendError, PollSender};
 22 | use tonic::{transport::Channel as TonicChannel, Streaming};
 23 | 
 24 | static EXTRA_DATA: &str = concat!(env!("CARGO_PKG_VERSION"), "/", env!("PACKAGE_COMPILE_TIME"));
 25 | static VERSION_UPDATE: &str = "0.11.15";
 26 | type BlockHandle = JoinHandle<Result<(), PollSendError<KaspadMessage>>>;
 27 | 
 28 | #[allow(dead_code)]
 29 | pub struct KaspadHandler {
 30 |     client: RpcClient<TonicChannel>,
 31 |     pub send_channel: Sender<KaspadMessage>,
 32 |     stream: Streaming<KaspadMessage>,
 33 |     miner_address: String,
 34 |     mine_when_not_synced: bool,
 35 |     devfund_address: Option<String>,
 36 |     devfund_percent: u16,
 37 |     block_template_ctr: Arc<AtomicU16>,
 38 | 
 39 |     block_channel: Sender<BlockSeed>,
 40 |     block_handle: BlockHandle,
 41 | }
 42 | 
 43 | #[async_trait(?Send)]
 44 | impl Client for KaspadHandler {
 45 |     fn add_devfund(&mut self, address: String, percent: u16) {
 46 |         self.devfund_address = Some(address);
 47 |         self.devfund_percent = percent;
 48 |     }
 49 | 
 50 |     async fn register(&mut self) -> Result<(), Error> {
 51 |         // We actually register in connect
 52 |         Ok(())
 53 |     }
 54 | 
 55 |     async fn listen(&mut self, miner: &mut MinerManager) -> Result<(), Error> {
 56 |         while let Some(msg) = self.stream.message().await? {
 57 |             match msg.payload {
 58 |                 Some(payload) => self.handle_message(payload, miner).await?,
 59 |                 None => warn!("kaspad message payload is empty"),
 60 |             }
 61 |         }
 62 |         Ok(())
 63 |     }
 64 | 
 65 |     fn get_block_channel(&self) -> Sender<BlockSeed> {
 66 |         self.block_channel.clone()
 67 |     }
 68 | }
 69 | 
 70 | impl KaspadHandler {
 71 |     pub async fn connect<D>(
 72 |         address: D,
 73 |         miner_address: String,
 74 |         mine_when_not_synced: bool,
 75 |         block_template_ctr: Option<Arc<AtomicU16>>,
 76 |     ) -> Result<Box<Self>, Error>
 77 |     where
 78 |         D: std::convert::TryInto<tonic::transport::Endpoint>,
 79 |         D::Error: Into<Error>,
 80 |     {
 81 |         let mut client = RpcClient::connect(address).await?;
 82 |         let (send_channel, recv) = mpsc::channel(2);
 83 |         send_channel.send(GetInfoRequestMessage {}.into()).await?;
 84 |         let stream = client.message_stream(ReceiverStream::new(recv)).await?.into_inner();
 85 |         let (block_channel, block_handle) = Self::create_block_channel(send_channel.clone());
 86 |         Ok(Box::new(Self {
 87 |             client,
 88 |             stream,
 89 |             send_channel,
 90 |             miner_address,
 91 |             mine_when_not_synced,
 92 |             devfund_address: None,
 93 |             devfund_percent: 0,
 94 |             block_template_ctr: block_template_ctr
 95 |                 .unwrap_or_else(|| Arc::new(AtomicU16::new((thread_rng().next_u64() % 10_000u64) as u16))),
 96 |             block_channel,
 97 |             block_handle,
 98 |         }))
 99 |     }
100 | 
101 |     fn create_block_channel(send_channel: Sender<KaspadMessage>) -> (Sender<BlockSeed>, BlockHandle) {
102 |         // KaspadMessage::submit_block(block)
103 |         let (send, recv) = mpsc::channel::<BlockSeed>(1);
104 |         (
105 |             send,
106 |             tokio::spawn(async move {
107 |                 ReceiverStream::new(recv)
108 |                     .map(|block_seed| match block_seed {
109 |                         FullBlock(block) => KaspadMessage::submit_block(*block),
110 |                         PartialBlock { .. } => unreachable!("All blocks sent here should have arrived from here"),
111 |                     })
112 |                     .map(Ok)
113 |                     .forward(PollSender::new(send_channel))
114 |                     .await
115 |             }),
116 |         )
117 |     }
118 | 
119 |     async fn client_send(&self, msg: impl Into<KaspadMessage>) -> Result<(), SendError<KaspadMessage>> {
120 |         self.send_channel.send(msg.into()).await
121 |     }
122 | 
123 |     async fn client_get_block_template(&mut self) -> Result<(), SendError<KaspadMessage>> {
124 |         let pay_address = match &self.devfund_address {
125 |             Some(devfund_address) if self.block_template_ctr.load(Ordering::SeqCst) <= self.devfund_percent => {
126 |                 devfund_address.clone()
127 |             }
128 |             _ => self.miner_address.clone(),
129 |         };
130 |         self.block_template_ctr.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |v| Some((v + 1) % 10_000)).unwrap();
131 |         self.client_send(GetBlockTemplateRequestMessage { pay_address, extra_data: EXTRA_DATA.into() }).await
132 |     }
133 | 
134 |     async fn handle_message(&mut self, msg: Payload, miner: &mut MinerManager) -> Result<(), Error> {
135 |         match msg {
136 |             Payload::BlockAddedNotification(_) => self.client_get_block_template().await?,
137 |             Payload::NewBlockTemplateNotification(_) => self.client_get_block_template().await?,
138 |             Payload::GetBlockTemplateResponse(template) => match (template.block, template.is_synced, template.error) {
139 |                 (Some(b), true, None) => miner.process_block(Some(FullBlock(Box::new(b)))).await?,
140 |                 (Some(b), false, None) if self.mine_when_not_synced => {
141 |                     miner.process_block(Some(FullBlock(Box::new(b)))).await?
142 |                 }
143 |                 (_, false, None) => miner.process_block(None).await?,
144 |                 (_, _, Some(e)) => {
145 |                     return Err(format!("GetTemplate returned with an error: {:?}", e).into());
146 |                 }
147 |                 (None, true, None) => error!("No block and No Error!"),
148 |             },
149 |             Payload::SubmitBlockResponse(res) => match res.error {
150 |                 None => info!("block submitted successfully!"),
151 |                 Some(e) => warn!("Failed submitting block: {:?}", e),
152 |             },
153 |             Payload::GetBlockResponse(msg) => {
154 |                 if let Some(e) = msg.error {
155 |                     return Err(e.message.into());
156 |                 } else {
157 |                     info!("Get block response: {:?}", msg);
158 |                 }
159 |             }
160 |             Payload::GetInfoResponse(info) => {
161 |                 info!("Kaspad version: {}", info.server_version);
162 |                 let kaspad_version = Version::parse(&info.server_version)?;
163 |                 let update_version = Version::parse(VERSION_UPDATE)?;
164 |                 match kaspad_version >= update_version {
165 |                     true => self.client_send(NotifyNewBlockTemplateRequestMessage {}).await?,
166 |                     false => self.client_send(NotifyBlockAddedRequestMessage {}).await?,
167 |                 };
168 | 
169 |                 self.client_get_block_template().await?;
170 |             }
171 |             Payload::NotifyNewBlockTemplateResponse(res) => match res.error {
172 |                 None => info!("Registered for new template notifications"),
173 |                 Some(e) => error!("Failed registering for new template notifications: {:?}", e),
174 |             },
175 |             Payload::NotifyBlockAddedResponse(res) => match res.error {
176 |                 None => info!("Registered for block notifications (upgrade your Kaspad for better experience)"),
177 |                 Some(e) => error!("Failed registering for block notifications: {:?}", e),
178 |             },
179 |             msg => info!("got unknown msg: {:?}", msg),
180 |         }
181 |         Ok(())
182 |     }
183 | }
184 | 
185 | impl Drop for KaspadHandler {
186 |     fn drop(&mut self) {
187 |         self.block_handle.abort();
188 |     }
189 | }
190 | 


--------------------------------------------------------------------------------
/proto/messages.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | package protowire;
  3 | 
  4 | option go_package = "github.com/kaspanet/kaspad/protowire";
  5 | 
  6 | import "p2p.proto";
  7 | import "rpc.proto";
  8 | 
  9 | message KaspadMessage {
 10 |   oneof payload {
 11 |     AddressesMessage addresses = 1;
 12 |     BlockMessage block = 2;
 13 |     TransactionMessage transaction = 3;
 14 |     BlockLocatorMessage blockLocator = 5;
 15 |     RequestAddressesMessage requestAddresses = 6;
 16 |     RequestRelayBlocksMessage requestRelayBlocks = 10;
 17 |     RequestTransactionsMessage requestTransactions = 12;
 18 |     BlockMessage ibdBlock = 13;
 19 |     InvRelayBlockMessage invRelayBlock = 14;
 20 |     InvTransactionsMessage invTransactions = 15;
 21 |     PingMessage ping = 16;
 22 |     PongMessage pong = 17;
 23 |     VerackMessage verack = 19;
 24 |     VersionMessage version = 20;
 25 |     TransactionNotFoundMessage transactionNotFound = 21;
 26 |     RejectMessage reject = 22;
 27 |     PruningPointUtxoSetChunkMessage pruningPointUtxoSetChunk = 25;
 28 |     RequestIBDBlocksMessage requestIBDBlocks = 26;
 29 |     UnexpectedPruningPointMessage unexpectedPruningPoint = 27;
 30 |     IbdBlockLocatorMessage ibdBlockLocator = 30;
 31 |     IbdBlockLocatorHighestHashMessage ibdBlockLocatorHighestHash = 31;
 32 |     RequestNextPruningPointUtxoSetChunkMessage requestNextPruningPointUtxoSetChunk = 33;
 33 |     DonePruningPointUtxoSetChunksMessage donePruningPointUtxoSetChunks = 34;
 34 |     IbdBlockLocatorHighestHashNotFoundMessage ibdBlockLocatorHighestHashNotFound = 35;
 35 |     BlockWithTrustedDataMessage blockWithTrustedData = 36;
 36 |     DoneBlocksWithTrustedDataMessage doneBlocksWithTrustedData = 37;
 37 |     RequestPruningPointAndItsAnticoneMessage requestPruningPointAndItsAnticone = 40;
 38 |     BlockHeadersMessage blockHeaders = 41;
 39 |     RequestNextHeadersMessage requestNextHeaders = 42;
 40 |     DoneHeadersMessage DoneHeaders = 43;
 41 |     RequestPruningPointUTXOSetMessage requestPruningPointUTXOSet = 44;
 42 |     RequestHeadersMessage requestHeaders = 45;
 43 |     RequestBlockLocatorMessage requestBlockLocator = 46;
 44 |     PruningPointsMessage pruningPoints = 47;
 45 |     RequestPruningPointProofMessage requestPruningPointProof = 48;
 46 |     PruningPointProofMessage pruningPointProof = 49;
 47 |     ReadyMessage ready = 50;
 48 |     BlockWithTrustedDataV4Message blockWithTrustedDataV4 = 51;
 49 |     TrustedDataMessage trustedData = 52;
 50 |     RequestIBDChainBlockLocatorMessage requestIBDChainBlockLocator = 53;
 51 |     IbdChainBlockLocatorMessage ibdChainBlockLocator = 54;
 52 |     RequestAnticoneMessage requestAnticone = 55;
 53 |     RequestNextPruningPointAndItsAnticoneBlocksMessage requestNextPruningPointAndItsAnticoneBlocks = 56;
 54 | 
 55 |     GetCurrentNetworkRequestMessage getCurrentNetworkRequest = 1001;
 56 |     GetCurrentNetworkResponseMessage getCurrentNetworkResponse = 1002;
 57 |     SubmitBlockRequestMessage submitBlockRequest = 1003;
 58 |     SubmitBlockResponseMessage submitBlockResponse = 1004;
 59 |     GetBlockTemplateRequestMessage getBlockTemplateRequest = 1005;
 60 |     GetBlockTemplateResponseMessage getBlockTemplateResponse = 1006;
 61 |     NotifyBlockAddedRequestMessage notifyBlockAddedRequest = 1007;
 62 |     NotifyBlockAddedResponseMessage notifyBlockAddedResponse = 1008;
 63 |     BlockAddedNotificationMessage blockAddedNotification = 1009;
 64 |     GetPeerAddressesRequestMessage getPeerAddressesRequest = 1010;
 65 |     GetPeerAddressesResponseMessage getPeerAddressesResponse = 1011;
 66 |     GetSelectedTipHashRequestMessage getSelectedTipHashRequest = 1012;
 67 |     GetSelectedTipHashResponseMessage getSelectedTipHashResponse = 1013;
 68 |     GetMempoolEntryRequestMessage getMempoolEntryRequest = 1014;
 69 |     GetMempoolEntryResponseMessage getMempoolEntryResponse = 1015;
 70 |     GetConnectedPeerInfoRequestMessage getConnectedPeerInfoRequest = 1016;
 71 |     GetConnectedPeerInfoResponseMessage getConnectedPeerInfoResponse = 1017;
 72 |     AddPeerRequestMessage addPeerRequest = 1018;
 73 |     AddPeerResponseMessage addPeerResponse = 1019;
 74 |     SubmitTransactionRequestMessage submitTransactionRequest = 1020;
 75 |     SubmitTransactionResponseMessage submitTransactionResponse = 1021;
 76 |     NotifyVirtualSelectedParentChainChangedRequestMessage notifyVirtualSelectedParentChainChangedRequest = 1022;
 77 |     NotifyVirtualSelectedParentChainChangedResponseMessage notifyVirtualSelectedParentChainChangedResponse = 1023;
 78 |     VirtualSelectedParentChainChangedNotificationMessage virtualSelectedParentChainChangedNotification = 1024;
 79 |     GetBlockRequestMessage getBlockRequest = 1025;
 80 |     GetBlockResponseMessage getBlockResponse = 1026;
 81 |     GetSubnetworkRequestMessage getSubnetworkRequest = 1027;
 82 |     GetSubnetworkResponseMessage getSubnetworkResponse = 1028;
 83 |     GetVirtualSelectedParentChainFromBlockRequestMessage getVirtualSelectedParentChainFromBlockRequest = 1029;
 84 |     GetVirtualSelectedParentChainFromBlockResponseMessage getVirtualSelectedParentChainFromBlockResponse = 1030;
 85 |     GetBlocksRequestMessage getBlocksRequest = 1031;
 86 |     GetBlocksResponseMessage getBlocksResponse = 1032;
 87 |     GetBlockCountRequestMessage getBlockCountRequest = 1033;
 88 |     GetBlockCountResponseMessage getBlockCountResponse = 1034;
 89 |     GetBlockDagInfoRequestMessage getBlockDagInfoRequest = 1035;
 90 |     GetBlockDagInfoResponseMessage getBlockDagInfoResponse = 1036;
 91 |     ResolveFinalityConflictRequestMessage resolveFinalityConflictRequest = 1037;
 92 |     ResolveFinalityConflictResponseMessage resolveFinalityConflictResponse = 1038;
 93 |     NotifyFinalityConflictsRequestMessage notifyFinalityConflictsRequest = 1039;
 94 |     NotifyFinalityConflictsResponseMessage notifyFinalityConflictsResponse = 1040;
 95 |     FinalityConflictNotificationMessage finalityConflictNotification = 1041;
 96 |     FinalityConflictResolvedNotificationMessage finalityConflictResolvedNotification = 1042;
 97 |     GetMempoolEntriesRequestMessage getMempoolEntriesRequest = 1043;
 98 |     GetMempoolEntriesResponseMessage getMempoolEntriesResponse = 1044;
 99 |     ShutDownRequestMessage shutDownRequest = 1045;
100 |     ShutDownResponseMessage shutDownResponse = 1046;
101 |     GetHeadersRequestMessage getHeadersRequest = 1047;
102 |     GetHeadersResponseMessage getHeadersResponse = 1048;
103 |     NotifyUtxosChangedRequestMessage notifyUtxosChangedRequest = 1049;
104 |     NotifyUtxosChangedResponseMessage notifyUtxosChangedResponse = 1050;
105 |     UtxosChangedNotificationMessage utxosChangedNotification = 1051;
106 |     GetUtxosByAddressesRequestMessage getUtxosByAddressesRequest = 1052;
107 |     GetUtxosByAddressesResponseMessage getUtxosByAddressesResponse = 1053;
108 |     GetVirtualSelectedParentBlueScoreRequestMessage getVirtualSelectedParentBlueScoreRequest = 1054;
109 |     GetVirtualSelectedParentBlueScoreResponseMessage getVirtualSelectedParentBlueScoreResponse = 1055;
110 |     NotifyVirtualSelectedParentBlueScoreChangedRequestMessage notifyVirtualSelectedParentBlueScoreChangedRequest = 1056;
111 |     NotifyVirtualSelectedParentBlueScoreChangedResponseMessage notifyVirtualSelectedParentBlueScoreChangedResponse = 1057;
112 |     VirtualSelectedParentBlueScoreChangedNotificationMessage virtualSelectedParentBlueScoreChangedNotification = 1058;
113 |     BanRequestMessage banRequest = 1059;
114 |     BanResponseMessage banResponse = 1060;
115 |     UnbanRequestMessage unbanRequest = 1061;
116 |     UnbanResponseMessage unbanResponse = 1062;
117 |     GetInfoRequestMessage getInfoRequest = 1063;
118 |     GetInfoResponseMessage getInfoResponse = 1064;
119 |     StopNotifyingUtxosChangedRequestMessage stopNotifyingUtxosChangedRequest = 1065;
120 |     StopNotifyingUtxosChangedResponseMessage stopNotifyingUtxosChangedResponse = 1066;
121 |     NotifyPruningPointUTXOSetOverrideRequestMessage notifyPruningPointUTXOSetOverrideRequest = 1067;
122 |     NotifyPruningPointUTXOSetOverrideResponseMessage notifyPruningPointUTXOSetOverrideResponse = 1068;
123 |     PruningPointUTXOSetOverrideNotificationMessage pruningPointUTXOSetOverrideNotification = 1069;
124 |     StopNotifyingPruningPointUTXOSetOverrideRequestMessage stopNotifyingPruningPointUTXOSetOverrideRequest = 1070;
125 |     StopNotifyingPruningPointUTXOSetOverrideResponseMessage stopNotifyingPruningPointUTXOSetOverrideResponse = 1071;
126 |     EstimateNetworkHashesPerSecondRequestMessage estimateNetworkHashesPerSecondRequest = 1072;
127 |     EstimateNetworkHashesPerSecondResponseMessage estimateNetworkHashesPerSecondResponse = 1073;
128 |     NotifyVirtualDaaScoreChangedRequestMessage notifyVirtualDaaScoreChangedRequest = 1074;
129 |     NotifyVirtualDaaScoreChangedResponseMessage notifyVirtualDaaScoreChangedResponse = 1075;
130 |     VirtualDaaScoreChangedNotificationMessage virtualDaaScoreChangedNotification = 1076;
131 |     GetBalanceByAddressRequestMessage getBalanceByAddressRequest = 1077;
132 |     GetBalanceByAddressResponseMessage getBalanceByAddressResponse = 1078;
133 |     GetBalancesByAddressesRequestMessage getBalancesByAddressesRequest = 1079;
134 |     GetBalancesByAddressesResponseMessage getBalancesByAddressesResponse = 1080;
135 |     NotifyNewBlockTemplateRequestMessage notifyNewBlockTemplateRequest = 1081;
136 |     NotifyNewBlockTemplateResponseMessage notifyNewBlockTemplateResponse = 1082;
137 |     NewBlockTemplateNotificationMessage newBlockTemplateNotification = 1083;
138 |   }
139 | }
140 | 
141 | service P2P {
142 |   rpc MessageStream (stream KaspadMessage) returns (stream KaspadMessage) {}
143 | }
144 | 
145 | service RPC {
146 |   rpc MessageStream (stream KaspadMessage) returns (stream KaspadMessage) {}
147 | }
148 | 


--------------------------------------------------------------------------------
/plugins/cuda/src/worker.rs:
--------------------------------------------------------------------------------
  1 | use crate::{Error, NonceGenEnum};
  2 | use cust::context::CurrentContext;
  3 | use cust::device::DeviceAttribute;
  4 | use cust::function::Function;
  5 | use cust::module::{ModuleJitOption, OptLevel};
  6 | use cust::prelude::*;
  7 | use kaspa_miner::xoshiro256starstar::Xoshiro256StarStar;
  8 | use kaspa_miner::Worker;
  9 | use log::{error, info};
 10 | use rand::{Fill, RngCore};
 11 | use std::ffi::CString;
 12 | use std::sync::{Arc, Weak};
 13 | 
 14 | static BPS: f32 = 1.;
 15 | 
 16 | static PTX_86: &str = include_str!("../resources/kaspa-cuda-sm86.ptx");
 17 | static PTX_75: &str = include_str!("../resources/kaspa-cuda-sm75.ptx");
 18 | static PTX_61: &str = include_str!("../resources/kaspa-cuda-sm61.ptx");
 19 | static PTX_30: &str = include_str!("../resources/kaspa-cuda-sm30.ptx");
 20 | static PTX_20: &str = include_str!("../resources/kaspa-cuda-sm20.ptx");
 21 | 
 22 | pub struct Kernel<'kernel> {
 23 |     func: Arc<Function<'kernel>>,
 24 |     block_size: u32,
 25 |     grid_size: u32,
 26 | }
 27 | 
 28 | impl<'kernel> Kernel<'kernel> {
 29 |     pub fn new(module: Weak<Module>, name: &'kernel str) -> Result<Kernel<'kernel>, Error> {
 30 |         let func = Arc::new(unsafe {
 31 |             module.as_ptr().as_ref().unwrap().get_function(name).map_err(|e| {
 32 |                 error!("Error loading function: {}", e);
 33 |                 e
 34 |             })?
 35 |         });
 36 |         let (_, block_size) = func.suggested_launch_configuration(0, 0.into())?;
 37 | 
 38 |         let device = CurrentContext::get_device()?;
 39 |         let sm_count = device.get_attribute(DeviceAttribute::MultiprocessorCount)? as u32;
 40 |         let grid_size = sm_count * func.max_active_blocks_per_multiprocessor(block_size.into(), 0)?;
 41 | 
 42 |         Ok(Self { func, block_size, grid_size })
 43 |     }
 44 | 
 45 |     pub fn get_workload(&self) -> u32 {
 46 |         self.block_size * self.grid_size
 47 |     }
 48 | 
 49 |     pub fn set_workload(&mut self, workload: u32) {
 50 |         self.grid_size = (workload + self.block_size - 1) / self.block_size
 51 |     }
 52 | }
 53 | 
 54 | pub struct CudaGPUWorker<'gpu> {
 55 |     // NOTE: The order is important! context must be closed last
 56 |     heavy_hash_kernel: Kernel<'gpu>,
 57 |     stream: Stream,
 58 |     start_event: Event,
 59 |     stop_event: Event,
 60 |     _module: Arc<Module>,
 61 | 
 62 |     rand_state: DeviceBuffer<u64>,
 63 |     final_nonce_buff: DeviceBuffer<u64>,
 64 | 
 65 |     device_id: u32,
 66 |     pub workload: usize,
 67 |     _context: Context,
 68 | 
 69 |     random: NonceGenEnum,
 70 | }
 71 | 
 72 | impl<'gpu> Worker for CudaGPUWorker<'gpu> {
 73 |     fn id(&self) -> String {
 74 |         let device = CurrentContext::get_device().unwrap();
 75 |         format!("#{} ({})", self.device_id, device.name().unwrap())
 76 |     }
 77 | 
 78 |     fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]) {
 79 |         let u8matrix: Arc<[[u8; 64]; 64]> = Arc::new(matrix.map(|row| row.map(|v| v as u8)));
 80 |         let mut hash_header_gpu = self._module.get_global::<[u8; 72]>(&CString::new("hash_header").unwrap()).unwrap();
 81 |         hash_header_gpu.copy_from(hash_header).map_err(|e| e.to_string()).unwrap();
 82 | 
 83 |         let mut matrix_gpu = self._module.get_global::<[[u8; 64]; 64]>(&CString::new("matrix").unwrap()).unwrap();
 84 |         matrix_gpu.copy_from(&u8matrix).map_err(|e| e.to_string()).unwrap();
 85 | 
 86 |         let mut target_gpu = self._module.get_global::<[u64; 4]>(&CString::new("target").unwrap()).unwrap();
 87 |         target_gpu.copy_from(target).map_err(|e| e.to_string()).unwrap();
 88 |     }
 89 | 
 90 |     #[inline(always)]
 91 |     fn calculate_hash(&mut self, _nonces: Option<&Vec<u64>>, nonce_mask: u64, nonce_fixed: u64) {
 92 |         let func = &self.heavy_hash_kernel.func;
 93 |         let stream = &self.stream;
 94 |         let random: u8 = match self.random {
 95 |             NonceGenEnum::Lean => {
 96 |                 self.rand_state.copy_from(&[rand::thread_rng().next_u64()]).unwrap();
 97 |                 0
 98 |             }
 99 |             NonceGenEnum::Xoshiro => 1,
100 |         };
101 | 
102 |         self.start_event.record(stream).unwrap();
103 |         unsafe {
104 |             launch!(
105 |                 func<<<
106 |                     self.heavy_hash_kernel.grid_size, self.heavy_hash_kernel.block_size,
107 |                     0, stream
108 |                 >>>(
109 |                     nonce_mask, nonce_fixed,
110 |                     self.workload,
111 |                     random,
112 |                     self.rand_state.as_device_ptr(),
113 |                     self.final_nonce_buff.as_device_ptr()
114 |                 )
115 |             )
116 |             .unwrap(); // We see errors in sync
117 |         }
118 |         self.stop_event.record(stream).unwrap();
119 |     }
120 | 
121 |     #[inline(always)]
122 |     fn sync(&self) -> Result<(), Error> {
123 |         //self.stream.synchronize()?;
124 |         self.stop_event.synchronize()?;
125 |         if self.stop_event.elapsed_time_f32(&self.start_event)? > 1000. / BPS {
126 |             return Err("Cuda takes longer then block rate. Please reduce your workload.".into());
127 |         }
128 |         Ok(())
129 |     }
130 | 
131 |     fn get_workload(&self) -> usize {
132 |         self.workload
133 |     }
134 | 
135 |     #[inline(always)]
136 |     fn copy_output_to(&mut self, nonces: &mut Vec<u64>) -> Result<(), Error> {
137 |         self.final_nonce_buff.copy_to(nonces)?;
138 |         Ok(())
139 |     }
140 | }
141 | 
142 | impl<'gpu> CudaGPUWorker<'gpu> {
143 |     pub fn new(
144 |         device_id: u32,
145 |         workload: f32,
146 |         is_absolute: bool,
147 |         blocking_sync: bool,
148 |         random: NonceGenEnum,
149 |     ) -> Result<Self, Error> {
150 |         info!("Starting a CUDA worker");
151 |         let sync_flag = match blocking_sync {
152 |             true => ContextFlags::SCHED_BLOCKING_SYNC,
153 |             false => ContextFlags::SCHED_AUTO,
154 |         };
155 |         let device = Device::get_device(device_id).unwrap();
156 |         let _context = Context::new(device)?;
157 |         _context.set_flags(sync_flag)?;
158 | 
159 |         let major = device.get_attribute(DeviceAttribute::ComputeCapabilityMajor)?;
160 |         let minor = device.get_attribute(DeviceAttribute::ComputeCapabilityMinor)?;
161 |         let _module: Arc<Module>;
162 |         info!("Device #{} compute version is {}.{}", device_id, major, minor);
163 |         if major > 8 || (major == 8 && minor >= 6) {
164 |             _module = Arc::new(Module::from_ptx(PTX_86, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| {
165 |                 error!("Error loading PTX. Make sure you have the updated driver for you devices");
166 |                 e
167 |             })?);
168 |         } else if major > 7 || (major == 7 && minor >= 5) {
169 |             _module = Arc::new(Module::from_ptx(PTX_75, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| {
170 |                 error!("Error loading PTX. Make sure you have the updated driver for you devices");
171 |                 e
172 |             })?);
173 |         } else if major > 6 || (major == 6 && minor >= 1) {
174 |             _module = Arc::new(Module::from_ptx(PTX_61, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| {
175 |                 error!("Error loading PTX. Make sure you have the updated driver for you devices");
176 |                 e
177 |             })?);
178 |         } else if major >= 3 {
179 |             _module = Arc::new(Module::from_ptx(PTX_30, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| {
180 |                 error!("Error loading PTX. Make sure you have the updated driver for you devices");
181 |                 e
182 |             })?);
183 |         } else if major >= 2 {
184 |             _module = Arc::new(Module::from_ptx(PTX_20, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| {
185 |                 error!("Error loading PTX. Make sure you have the updated driver for you devices");
186 |                 e
187 |             })?);
188 |         } else {
189 |             return Err("Cuda compute version not supported".into());
190 |         }
191 | 
192 |         let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
193 | 
194 |         let mut heavy_hash_kernel = Kernel::new(Arc::downgrade(&_module), "heavy_hash")?;
195 | 
196 |         let mut chosen_workload = 0u32;
197 |         if is_absolute {
198 |             chosen_workload = 1;
199 |         } else {
200 |             let cur_workload = heavy_hash_kernel.get_workload();
201 |             if chosen_workload == 0 || chosen_workload < cur_workload {
202 |                 chosen_workload = cur_workload;
203 |             }
204 |         }
205 |         chosen_workload = (chosen_workload as f32 * workload) as u32;
206 |         info!("GPU #{} Chosen workload: {}", device_id, chosen_workload);
207 |         heavy_hash_kernel.set_workload(chosen_workload);
208 | 
209 |         let final_nonce_buff = vec![0u64; 1].as_slice().as_dbuf()?;
210 | 
211 |         let rand_state: DeviceBuffer<u64> = match random {
212 |             NonceGenEnum::Xoshiro => {
213 |                 info!("Using xoshiro for nonce-generation");
214 |                 let mut buffer = DeviceBuffer::<u64>::zeroed(4 * (chosen_workload as usize)).unwrap();
215 |                 info!("GPU #{} is generating initial seed. This may take some time.", device_id);
216 |                 let mut seed = [1u64; 4];
217 |                 seed.try_fill(&mut rand::thread_rng())?;
218 |                 buffer.copy_from(
219 |                     Xoshiro256StarStar::new(&seed)
220 |                         .iter_jump_state()
221 |                         .take(chosen_workload as usize)
222 |                         .flatten()
223 |                         .collect::<Vec<u64>>()
224 |                         .as_slice(),
225 |                 )?;
226 |                 info!("GPU #{} initialized", device_id);
227 |                 buffer
228 |             }
229 |             NonceGenEnum::Lean => {
230 |                 info!("Using lean nonce-generation");
231 |                 let mut buffer = DeviceBuffer::<u64>::zeroed(1).unwrap();
232 |                 let seed = rand::thread_rng().next_u64();
233 |                 buffer.copy_from(&[seed])?;
234 |                 buffer
235 |             }
236 |         };
237 |         Ok(Self {
238 |             device_id,
239 |             _context,
240 |             _module,
241 |             start_event: Event::new(EventFlags::DEFAULT)?,
242 |             stop_event: Event::new(EventFlags::DEFAULT)?,
243 |             workload: chosen_workload as usize,
244 |             stream,
245 |             rand_state,
246 |             final_nonce_buff,
247 |             heavy_hash_kernel,
248 |             random,
249 |         })
250 |     }
251 | }
252 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/plugins/opencl/resources/kaspa-opencl.cl:
--------------------------------------------------------------------------------
  1 | // Catering for different flavors
  2 | #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  3 | 
  4 | #if __OPENCL_VERSION__ <= CL_VERSION_1_1
  5 | #define STATIC
  6 | #else
  7 | #define STATIC static
  8 | #endif
  9 | /* TYPES */
 10 | 
 11 | typedef uchar uint8_t;
 12 | typedef char int8_t;
 13 | typedef ushort uint16_t;
 14 | typedef short int16_t;
 15 | typedef uint uint32_t;
 16 | typedef int int32_t;
 17 | typedef ulong uint64_t;
 18 | typedef long int64_t;
 19 | 
 20 | /* TINY KECCAK */
 21 | /** libkeccak-tiny
 22 |  *
 23 |  * A single-file implementation of SHA-3 and SHAKE.
 24 |  *
 25 |  * Implementor: David Leon Gil
 26 |  * License: CC0, attribution kindly requested. Blame taken too,
 27 |  * but not liability.
 28 |  */
 29 | 
 30 | /******** The Keccak-f[1600] permutation ********/
 31 | 
 32 | /*** Constants. ***/
 33 | constant STATIC const uint8_t rho[24] = \
 34 |   { 1,  3,   6, 10, 15, 21,
 35 |     28, 36, 45, 55,  2, 14,
 36 |     27, 41, 56,  8, 25, 43,
 37 |     62, 18, 39, 61, 20, 44};
 38 | constant STATIC const uint8_t pi[24] = \
 39 |   {10,  7, 11, 17, 18, 3,
 40 |     5, 16,  8, 21, 24, 4,
 41 |    15, 23, 19, 13, 12, 2,
 42 |    20, 14, 22,  9, 6,  1};
 43 | 
 44 | constant STATIC const uint64_t RC[24] = \
 45 |   {1UL, 0x8082UL, 0x800000000000808aUL, 0x8000000080008000UL,
 46 |    0x808bUL, 0x80000001UL, 0x8000000080008081UL, 0x8000000000008009UL,
 47 |    0x8aUL, 0x88UL, 0x80008009UL, 0x8000000aUL,
 48 |    0x8000808bUL, 0x800000000000008bUL, 0x8000000000008089UL, 0x8000000000008003UL,
 49 |    0x8000000000008002UL, 0x8000000000000080UL, 0x800aUL, 0x800000008000000aUL,
 50 |    0x8000000080008081UL, 0x8000000000008080UL, 0x80000001UL, 0x8000000080008008UL};
 51 | 
 52 | 
 53 | /** Magic from fancyIX/sgminer-phi2-branch **/
 54 | #if defined(OPENCL_PLATFORM_AMD)
 55 | #pragma OPENCL EXTENSION cl_amd_media_ops : enable
 56 | #define dataType uint2
 57 | #define as_dataType as_uint2
 58 | static inline uint2 rol(const uint2 vv, const int r)
 59 | {
 60 | 	if (r <= 32)
 61 | 	{
 62 | 		return amd_bitalign((vv).xy, (vv).yx, 32 - r);
 63 | 	}
 64 | 	else
 65 | 	{
 66 | 		return amd_bitalign((vv).yx, (vv).xy, 64 - r);
 67 | 	}
 68 | }
 69 | #else
 70 | #define dataType ulong
 71 | #define as_dataType as_ulong
 72 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
 73 | #endif
 74 | 
 75 | /*** Helper macros to unroll the permutation. ***/
 76 | #define REPEAT6(e) e e e e e e
 77 | #define REPEAT24(e) REPEAT6(e e e e)
 78 | #define REPEAT23(e) REPEAT6(e e e) e e e e e
 79 | #define REPEAT5(e) e e e e e
 80 | #define FOR5(v, s, e) \
 81 |   v = 0;            \
 82 |   REPEAT5(e; v += s;)
 83 | 
 84 | /*** Keccak-f[1600] ***/
 85 | STATIC inline void keccakf(void *state) {
 86 |   dataType *a = (dataType *)state;
 87 |   dataType b[5] = {0};
 88 |   dataType t = 0, v = 0;
 89 |   uint8_t x, y;
 90 | 
 91 | #if defined(cl_amd_media_ops)
 92 |   #pragma unroll
 93 | #endif
 94 |   for (int i = 0; i < 23; i++) {
 95 |     // Theta
 96 |     FOR5(x, 1,
 97 |       b[x] = a[x] ^ a[x+5] ^ a[x+10] ^ a[x+15] ^ a[x+20];)
 98 | 
 99 |     v = b[4]; t = b[0];
100 |     b[4] = b[4] ^ rol(b[1], 1);
101 |     b[0] = b[0] ^ rol(b[2], 1);
102 |     b[1] = b[1] ^ rol(b[3], 1);
103 |     b[2] = b[2] ^ rol(v, 1);
104 |     b[3] = b[3] ^ rol(t, 1);
105 | 
106 |     FOR5(x, 1,
107 |       FOR5(y, 5, a[y + x] ^= b[(x + 4) % 5]; ))
108 | 
109 |     // Rho and pi
110 |     t = a[1];
111 |     x = 23;
112 |     REPEAT23(a[pi[x]] = rol(a[pi[x-1]], rho[x]); x--; )
113 |     a[pi[ 0]] = rol(        t, rho[ 0]);
114 | 
115 |     // Chi
116 |     FOR5(y, 5, 
117 |       v = a[y]; t = a[y+1];
118 |       a[y  ] = bitselect(a[y  ] ^ a[y+2], a[y  ], a[y+1]);
119 |       a[y+1] = bitselect(a[y+1] ^ a[y+3], a[y+1], a[y+2]);
120 |       a[y+2] = bitselect(a[y+2] ^ a[y+4], a[y+2], a[y+3]);
121 |       a[y+3] = bitselect(a[y+3] ^      v, a[y+3], a[y+4]);
122 |       a[y+4] = bitselect(a[y+4] ^      t, a[y+4], v);
123 |     )
124 | 
125 |     // Iota
126 |     a[0] ^= as_dataType(RC[i]);
127 | }
128 |   /*******************************************************/
129 |       // Theta
130 |     FOR5(x, 1,
131 |       b[x] = a[x] ^ a[x+5] ^ a[x+10] ^ a[x+15] ^ a[x+20];)
132 | 
133 |     v = b[4]; t = b[0];
134 |     b[4] = b[4] ^ rol(b[1], 1);
135 |     b[0] = b[0] ^ rol(b[2], 1);
136 |     b[1] = b[1] ^ rol(b[3], 1);
137 |     b[2] = b[2] ^ rol(v, 1);
138 |     b[3] = b[3] ^ rol(t, 1);
139 | 
140 |     a[0] ^= b[4];
141 |     a[1] ^= b[0]; a[6] ^= b[0];
142 |     a[2] ^= b[1]; a[12] ^= b[1];
143 |     a[3] ^= b[2]; a[18] ^= b[2];
144 |     a[4] ^= b[3]; a[24] ^= b[3];
145 | 
146 |     // Rho and pi
147 |     a[1]=rol(a[pi[22]], rho[23]);
148 |     a[2]=rol(a[pi[16]], rho[17]);
149 |     a[4]=rol(a[pi[10]], rho[11]);
150 |     a[3]=rol(a[pi[ 4]], rho[ 5]);
151 | 
152 |     // Chi
153 |     v = a[0];
154 | 
155 |     a[0] = bitselect(a[0] ^ a[2], a[0], a[1]); 
156 |     a[1] = bitselect(a[1] ^ a[3], a[1], a[2]); 
157 |     a[2] = bitselect(a[2] ^ a[4], a[2], a[3]); 
158 |     a[3] = bitselect(a[3] ^    v, a[3], a[4]); 
159 | 
160 |     // Iota
161 |     a[0] ^= as_dataType(RC[23]);
162 | }
163 | 
164 | /******** The FIPS202-defined functions. ********/
165 | 
166 | /*** Some helper macros. ***/
167 | 
168 | 
169 | #define P keccakf
170 | #define Plen 200
171 | 
172 | constant const ulong powP[25] = { 0x113cff0da1f6d83dUL, 0x29bf8855b7027e3cUL, 0x1e5f2e720efb44d2UL, 0x1ba5a4a3f59869a0UL, 0x7b2fafca875e2d65UL, 0x4aef61d629dce246UL, 0x183a981ead415b10UL, 0x776bf60c789bc29cUL, 0xf8ebf13388663140UL, 0x2e651c3c43285ff0UL, 0x0f96070540f14a0aUL, 0x44e367875b299152UL, 0xec70f1a425b13715UL, 0xe6c85d8f82e9da89UL, 0xb21a601f85b4b223UL, 0x3485549064a36a46UL, 0x0f06dd1c7a2f851aUL, 0xc1a2021d563bb142UL, 0xba1de5e4451668e4UL, 0xd102574105095f8dUL, 0x89ca4e849bcecf4aUL, 0x48b09427a8742edbUL, 0xb1fcce9ce78b5272UL, 0x5d1129cf82afa5bcUL, 0x02b97c786f824383UL };
173 | constant const ulong heavyP[25] = { 0x3ad74c52b2248509UL, 0x79629b0e2f9f4216UL, 0x7a14ff4816c7f8eeUL, 0x11a75f4c80056498UL, 0xe720e0df44eecedaUL, 0x72c7d82e14f34069UL, 0xc100ff2a938935baUL, 0x5e219040250fc462UL, 0x8039f9a60dcf6a48UL, 0xa0bcaa9f792a3d0cUL, 0xf431c05dd0a9a226UL, 0xd31f4cc354c18c3fUL, 0x6c6b7d01a769cc3dUL, 0x2ec65bd3562493e4UL, 0x4ef74b3a99cdb044UL, 0x774c86835434f2b0UL, 0x07e961b036bc9416UL, 0x7e8f1db17765cc07UL, 0xea8fdb80bac46d39UL, 0xb992f2d37b34ca58UL, 0xc776c5048481b957UL, 0x47c39f675112c22eUL, 0x92bb399db5290c0aUL, 0x549ae0312f9fc615UL, 0x1619327d10b9da35UL };
174 | 
175 | /** The sponge-based hash construction. **/
176 | STATIC inline void hash(constant const ulong *initP, const ulong* in, ulong4* out) {
177 |   private ulong a[25];
178 |   // Xor in the last block.
179 |   #pragma unroll
180 |   for (size_t i = 0; i < 10; i++) a[i] = initP[i] ^ in[i];
181 |   #pragma unroll
182 |   for (size_t i = 10; i < 25; i++) a[i] = initP[i];
183 |   // Apply P
184 |   P(a);
185 |   // Squeeze output.
186 |   *out = ((ulong4 *)(a))[0];
187 | }
188 | 
189 | /* RANDOM NUMBER GENERATOR BASED ON MWC64X                          */
190 | /* http://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html */
191 | 
192 | /*  Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
193 | 
194 | To the extent possible under law, the author has dedicated all copyright
195 | and related and neighboring rights to this software to the public domain
196 | worldwide. This software is distributed without any warranty.
197 | 
198 | See <http://creativecommons.org/publicdomain/zero/1.0/>. */
199 | 
200 | 
201 | /* This is xoshiro256** 1.0, one of our all-purpose, rock-solid
202 |    generators. It has excellent (sub-ns) speed, a state (256 bits) that is
203 |    large enough for any parallel application, and it passes all tests we
204 |    are aware of.
205 | 
206 |    For generating just floating-point numbers, xoshiro256+ is even faster.
207 | 
208 |    The state must be seeded so that it is not everywhere zero. If you have
209 |    a 64-bit seed, we suggest to seed a splitmix64 generator and use its
210 |    output to fill s. */
211 | 
212 | inline uint64_t rotl(const uint64_t x, int k) {
213 | 	return (x << k) | (x >> (64 - k));
214 | }
215 | 
216 | inline uint64_t xoshiro256_next(global ulong4 *s) {
217 | 	const uint64_t result = rotl(s->y * 5, 7) * 9;
218 | 
219 | 	const uint64_t t = s->y << 17;
220 | 
221 | 	s->z ^= s->x;
222 | 	s->w ^= s->y;
223 | 	s->y ^= s->z;
224 | 	s->x ^= s->w;
225 | 
226 | 	s->z ^= t;
227 | 
228 | 	s->w = rotl(s->w, 45);
229 | 
230 | 	return result;
231 | }
232 | /* KERNEL CODE */
233 | 
234 | #ifdef cl_khr_int64_base_atomics
235 | #pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable
236 | #endif
237 | typedef union _Hash {
238 |   ulong4 hash;
239 |   uchar bytes[32];
240 | } Hash;
241 | 
242 | #define BLOCKDIM 1024
243 | #define MATRIX_SIZE 64
244 | #define HALF_MATRIX_SIZE 32
245 | #define QUARTER_MATRIX_SIZE 16
246 | #define HASH_HEADER_SIZE 72
247 | 
248 | #define RANDOM_TYPE_LEAN 0
249 | #define RANDOM_TYPE_XOSHIRO 1
250 | 
251 | #define LT_U256(X,Y) (X.w != Y->w ? X.w < Y->w : X.z != Y->z ? X.z < Y->z : X.y != Y->y ? X.y < Y->y : X.x < Y->x)
252 | 
253 | #ifndef cl_khr_int64_base_atomics
254 | global int lock = false;
255 | #endif
256 | 
257 | #if defined(NVIDIA_CUDA) && (__COMPUTE_MAJOR__ > 6 || (__COMPUTE_MAJOR__ == 6 && __COMPUTE_MINOR__ >= 1))
258 | #define amul4bit(X,Y,Z) _amul4bit((constant uint32_t*)(X), (private uint32_t*)(Y), (uint32_t *)(Z))
259 | void STATIC inline _amul4bit(__constant uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) {
260 |     // We assume each 32 bits have four values: A0 B0 C0 D0
261 |     uint32_t res = 0;
262 |     #pragma unroll
263 |     for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
264 |         asm("dp4a.u32.u32" " %0, %1, %2, %3;": "=r" (res): "r" (packed_vec1[i]), "r" (packed_vec2[i]), "r" (res));
265 |     }
266 |     *ret = res;
267 | }
268 | #elif (defined(OFFLINE) && (defined(__gfx906__) || defined(__gfx908__))) || defined(__gfx1011__) || defined(__gfx1012__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || defined(__gfx1034__)
269 | #define amul4bit(X,Y,Z) _amul4bit((constant uint32_t*)(X), (private uint32_t*)(Y), (uint32_t *)(Z))
270 | void STATIC inline _amul4bit(__constant uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) {
271 |     // We assume each 32 bits have four values: A0 B0 C0 D0
272 |     uint32_t res = 0;
273 | #if defined(__FORCE_AMD_V_DOT8_U32_U4__)
274 |     for (int i=0; i<8; i++) {
275 |         __asm__("v_dot8_u32_u4" " %0, %1, %2, %3;": "=v" (res): "r" (packed_vec1[i]), "r" (packed_vec2[i]), "v" (res));
276 |     }
277 | #else
278 |     for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
279 |         __asm__("v_dot4_u32_u8" " %0, %1, %2, %3;": "=v" (res): "r" (packed_vec1[i]), "r" (packed_vec2[i]), "v" (res));
280 |     }
281 | #endif
282 |     *ret = res;
283 | }
284 | #else
285 | #define amul4bit(X,Y,Z) _amul4bit((constant uchar4*)(X), (private uchar4*)(Y), (uint32_t *)(Z))
286 | void STATIC inline _amul4bit(__constant uchar4 packed_vec1[32], uchar4 packed_vec2[32], uint32_t *ret) {
287 |     // We assume each 32 bits have four values: A0 B0 C0 D0
288 | #if __FORCE_AMD_V_DOT8_U32_U4__ == 1
289 |     uint32_t res = 0;
290 |     __constant uchar4 *a4 = packed_vec1;
291 |     uchar4 *b4 = packed_vec2;
292 |     for (int i=0; i<8; i++) {
293 |         res += ((a4[i].x>>0)&0xf)*((b4[i].x>>0)&0xf);
294 |         res += ((a4[i].x>>4)&0xf)*((b4[i].x>>4)&0xf);
295 |         res += ((a4[i].y>>0)&0xf)*((b4[i].y>>0)&0xf);
296 |         res += ((a4[i].y>>4)&0xf)*((b4[i].y>>4)&0xf);
297 |         res += ((a4[i].z>>0)&0xf)*((b4[i].z>>0)&0xf);
298 |         res += ((a4[i].z>>4)&0xf)*((b4[i].z>>4)&0xf);
299 |         res += ((a4[i].w>>0)&0xf)*((b4[i].w>>0)&0xf);
300 |         res += ((a4[i].w>>4)&0xf)*((b4[i].w>>4)&0xf);
301 |     }
302 |     *ret = res;
303 | #else
304 |     ushort4 res = 0;
305 |     for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
306 |         res += convert_ushort4(packed_vec1[i])*convert_ushort4(packed_vec2[i]);
307 |     }
308 |     res.s01 = res.s01 + res.s23;
309 |     *ret = res.s0 + res.s1;
310 | #endif
311 | }
312 | #endif
313 | #define SWAP4( x ) as_uint( as_uchar4( x ).wzyx )
314 | 
315 | kernel void heavy_hash(
316 |     const ulong local_size,
317 |     const ulong nonce_mask,
318 |     const ulong nonce_fixed,
319 |     __constant const ulong hash_header[9],
320 |     __constant const uint8_t matrix[4096],
321 |     __constant const ulong4 *target,
322 |     const uint8_t random_type,
323 |     global void * restrict random_state,
324 |     volatile global uint64_t *final_nonce,
325 |     volatile global ulong4 *final_hash
326 | ) {
327 |     #if defined(PAL)
328 |     int nonceId = get_group_id(0)*local_size + get_local_id(0);
329 |     #else
330 |     int nonceId = get_global_id(0);
331 |     #endif
332 | 
333 |     #ifndef cl_khr_int64_base_atomics
334 |     if (nonceId == 0)
335 |        lock = 0;
336 |     work_group_barrier(CLK_GLOBAL_MEM_FENCE);
337 |     #endif
338 | 
339 |     private uint64_t nonce;
340 |     switch (random_type){
341 |       case RANDOM_TYPE_LEAN:
342 |         // nonce = ((uint64_t *)random_state)[0] + nonceId;
343 |         nonce = (((__global uint64_t *)random_state)[0]) ^ nonceId;
344 |         break;
345 |       case RANDOM_TYPE_XOSHIRO:
346 |       default:
347 |         nonce = xoshiro256_next(((global ulong4 *)random_state) + nonceId);
348 |     }
349 |     nonce = (nonce & nonce_mask) | nonce_fixed;
350 | 
351 |     int64_t buffer[10];
352 | 
353 |     // header
354 |     #pragma unroll
355 |     for(int i=0; i<9; i++) buffer[i] = hash_header[i];
356 |     // data
357 |     buffer[9] = nonce;
358 | 
359 |     Hash hash_, hash2_;
360 |     hash(powP, (const ulong*)buffer, &hash_.hash);
361 |     #if __FORCE_AMD_V_DOT8_U32_U4__ == 1
362 |     #else
363 |     private uchar hash_part[64];
364 |     #if defined(NVIDIA_CUDA)
365 |     #pragma unroll
366 |     #endif
367 |     for (int i=0; i<32; i++) {
368 |          hash_part[2*i] = (hash_.bytes[i] & 0xF0) >> 4;
369 |          hash_part[2*i+1] = hash_.bytes[i] & 0x0F;
370 |     }
371 |     #endif
372 | 
373 |     uint32_t product1, product2;
374 |     #if defined(NVIDIA_CUDA) || defined(__FORCE_AMD_V_DOT8_U32_U4__)
375 |     #pragma unroll
376 |     #endif
377 |     for (int rowId=0; rowId<32; rowId++){
378 |     #if __FORCE_AMD_V_DOT8_U32_U4__ == 1
379 |         amul4bit(matrix + 64*rowId, hash_.bytes, &product1);
380 |         amul4bit(matrix + 64*rowId+32, hash_.bytes, &product2);
381 |     #else
382 |         amul4bit(matrix + 128*rowId, hash_part, &product1);
383 |         amul4bit(matrix + 128*rowId+64, hash_part, &product2);
384 |     #endif
385 |         product1 >>= 10;
386 |         product2 >>= 10;
387 | //        hash2_.bytes[rowId] = hash_.bytes[rowId] ^ bitselect(product1, product2, 0x0000000FU);
388 |         hash2_.bytes[rowId] = hash_.bytes[rowId] ^ ((uint8_t)((product1 << 4) | (uint8_t)(product2)));
389 |     }
390 |     buffer[0] = hash2_.hash.x;
391 |     buffer[1] = hash2_.hash.y;
392 |     buffer[2] = hash2_.hash.z;
393 |     buffer[3] = hash2_.hash.w;
394 |     #pragma unroll
395 |     for(int i=4; i<10; i++) buffer[i] = 0;
396 | 
397 |     hash(heavyP, (const ulong*)buffer, &hash_.hash);
398 | 
399 |     if (LT_U256(hash_.hash, target)){
400 |         //printf("%lu: %lu < %lu: %d %d\n", nonce, ((uint64_t *)hash_)[3], target[3], ((uint64_t *)hash_)[3] < target[3], LT_U256((uint64_t *)hash_, target));
401 |         #ifdef cl_khr_int64_base_atomics
402 |         atom_cmpxchg(final_nonce, 0, nonce);
403 |         #else
404 |         if (!atom_cmpxchg(&lock, 0, 1)) {
405 |             *final_nonce = nonce;
406 |             //for(int i=0;i<4;i++) final_hash[i] = ((uint64_t volatile *)hash_)[i];
407 |         }
408 |         #endif
409 |     }
410 |     /*if (nonceId==1) {
411 |         //printf("%lu: %lu < %lu: %d %d\n", nonce, ((uint64_t *)hash2_)[3], target[3], ((uint64_t *)hash_)[3] < target[3]);
412 |         *final_nonce = nonce;
413 |         for(int i=0;i<4;i++) final_hash[i] = ((uint64_t volatile *)hash_)[i];
414 |     }*/
415 | }
416 | 


--------------------------------------------------------------------------------
/plugins/opencl/src/worker.rs:
--------------------------------------------------------------------------------
  1 | use crate::cli::NonceGenEnum;
  2 | use crate::Error;
  3 | use include_dir::{include_dir, Dir};
  4 | use kaspa_miner::xoshiro256starstar::Xoshiro256StarStar;
  5 | use kaspa_miner::Worker;
  6 | use log::{info, warn};
  7 | use opencl3::command_queue::{CommandQueue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE};
  8 | use opencl3::context::Context;
  9 | use opencl3::device::Device;
 10 | use opencl3::event::{release_event, retain_event, wait_for_events};
 11 | use opencl3::kernel::{ExecuteKernel, Kernel};
 12 | use opencl3::memory::{Buffer, ClMem, CL_MAP_WRITE, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY};
 13 | use opencl3::platform::Platform;
 14 | use opencl3::program::{Program, CL_FINITE_MATH_ONLY, CL_MAD_ENABLE, CL_STD_2_0};
 15 | use opencl3::types::{cl_event, cl_uchar, cl_ulong, CL_BLOCKING};
 16 | use rand::{thread_rng, Fill, RngCore};
 17 | use std::borrow::Borrow;
 18 | use std::ffi::c_void;
 19 | use std::ptr;
 20 | use std::sync::Arc;
 21 | 
 22 | static BINARY_DIR: Dir = include_dir!("./plugins/opencl/resources/bin/");
 23 | static PROGRAM_SOURCE: &str = include_str!("../resources/kaspa-opencl.cl");
 24 | 
 25 | pub struct OpenCLGPUWorker {
 26 |     context: Arc<Context>,
 27 |     random: NonceGenEnum,
 28 |     local_size: usize,
 29 |     workload: usize,
 30 | 
 31 |     heavy_hash: Kernel,
 32 | 
 33 |     queue: CommandQueue,
 34 | 
 35 |     random_state: Buffer<cl_ulong>,
 36 |     final_nonce: Buffer<cl_ulong>,
 37 |     final_hash: Buffer<[cl_ulong; 4]>,
 38 | 
 39 |     hash_header: Buffer<cl_uchar>,
 40 |     matrix: Buffer<cl_uchar>,
 41 |     target: Buffer<cl_ulong>,
 42 | 
 43 |     events: Vec<cl_event>,
 44 |     experimental_amd: bool,
 45 | }
 46 | 
 47 | impl Worker for OpenCLGPUWorker {
 48 |     fn id(&self) -> String {
 49 |         let device = Device::new(self.context.default_device());
 50 |         device.name().unwrap()
 51 |     }
 52 | 
 53 |     fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]) {
 54 |         let cl_uchar_matrix = match self.experimental_amd {
 55 |             true => matrix
 56 |                 .iter()
 57 |                 .flat_map(|row| row.chunks(2).map(|v| ((v[0] << 4) | v[1]) as cl_uchar))
 58 |                 .collect::<Vec<cl_uchar>>(),
 59 |             false => matrix.iter().flat_map(|row| row.map(|v| v as cl_uchar)).collect::<Vec<cl_uchar>>(),
 60 |         };
 61 |         self.queue
 62 |             .enqueue_write_buffer(&mut self.final_nonce, CL_BLOCKING, 0, &[0], &[])
 63 |             .map_err(|e| e.to_string())
 64 |             .unwrap()
 65 |             .wait()
 66 |             .unwrap();
 67 |         self.queue
 68 |             .enqueue_write_buffer(&mut self.hash_header, CL_BLOCKING, 0, hash_header, &[])
 69 |             .map_err(|e| e.to_string())
 70 |             .unwrap()
 71 |             .wait()
 72 |             .unwrap();
 73 |         self.queue
 74 |             .enqueue_write_buffer(&mut self.matrix, CL_BLOCKING, 0, cl_uchar_matrix.as_slice(), &[])
 75 |             .map_err(|e| e.to_string())
 76 |             .unwrap()
 77 |             .wait()
 78 |             .unwrap();
 79 |         let copy_target = self
 80 |             .queue
 81 |             .enqueue_write_buffer(&mut self.target, CL_BLOCKING, 0, target, &[])
 82 |             .map_err(|e| e.to_string())
 83 |             .unwrap();
 84 | 
 85 |         self.events = vec![copy_target.get()];
 86 |         for event in &self.events {
 87 |             retain_event(*event).unwrap();
 88 |         }
 89 |     }
 90 | 
 91 |     fn calculate_hash(&mut self, _nonces: Option<&Vec<u64>>, nonce_mask: u64, nonce_fixed: u64) {
 92 |         if self.random == NonceGenEnum::Lean {
 93 |             self.queue
 94 |                 .enqueue_write_buffer(&mut self.random_state, CL_BLOCKING, 0, &[thread_rng().next_u64()], &[])
 95 |                 .map_err(|e| e.to_string())
 96 |                 .unwrap()
 97 |                 .wait()
 98 |                 .unwrap();
 99 |         }
100 |         let random_type: cl_uchar = match self.random {
101 |             NonceGenEnum::Lean => 0,
102 |             NonceGenEnum::Xoshiro => 1,
103 |         };
104 |         let kernel_event = ExecuteKernel::new(&self.heavy_hash)
105 |             .set_arg(&(self.local_size as u64))
106 |             .set_arg(&nonce_mask)
107 |             .set_arg(&nonce_fixed)
108 |             .set_arg(&self.hash_header)
109 |             .set_arg(&self.matrix)
110 |             .set_arg(&self.target)
111 |             .set_arg(&random_type)
112 |             .set_arg(&self.random_state)
113 |             .set_arg(&self.final_nonce)
114 |             .set_arg(&self.final_hash)
115 |             .set_global_work_size(self.workload)
116 |             .set_event_wait_list(self.events.borrow())
117 |             .enqueue_nd_range(&self.queue)
118 |             .map_err(|e| e.to_string())
119 |             .unwrap();
120 | 
121 |         kernel_event.wait().unwrap();
122 | 
123 |         /*let mut nonces = [0u64; 1];
124 |         let mut hash = [[0u64; 4]];
125 |         self.queue.enqueue_read_buffer(&self.final_nonce, CL_BLOCKING, 0, &mut nonces, &[]).map_err(|e| e.to_string()).unwrap();
126 |         self.queue.enqueue_read_buffer(&self.final_hash, CL_BLOCKING, 0, &mut hash, &[]).map_err(|e| e.to_string()).unwrap();
127 |         log::info!("Hash from kernel: {:?}", hash);*/
128 |         /*for event in &self.events{
129 |             release_event(*event).unwrap();
130 |         }
131 |         let event = kernel_event.get();
132 |         self.events = vec!(event);
133 |         retain_event(event);*/
134 |     }
135 | 
136 |     fn sync(&self) -> Result<(), Error> {
137 |         wait_for_events(&self.events).map_err(|e| format!("waiting error code {}", e))?;
138 |         for event in &self.events {
139 |             release_event(*event).unwrap();
140 |         }
141 |         Ok(())
142 |     }
143 | 
144 |     fn get_workload(&self) -> usize {
145 |         self.workload as usize
146 |     }
147 | 
148 |     fn copy_output_to(&mut self, nonces: &mut Vec<u64>) -> Result<(), Error> {
149 |         self.queue
150 |             .enqueue_read_buffer(&self.final_nonce, CL_BLOCKING, 0, nonces, &[])
151 |             .map_err(|e| e.to_string())
152 |             .unwrap();
153 |         Ok(())
154 |     }
155 | }
156 | 
157 | impl OpenCLGPUWorker {
158 |     pub fn new(
159 |         device: Device,
160 |         workload: f32,
161 |         is_absolute: bool,
162 |         experimental_amd: bool,
163 |         mut use_binary: bool,
164 |         random: &NonceGenEnum,
165 |     ) -> Result<Self, Error> {
166 |         let name =
167 |             device.board_name_amd().unwrap_or_else(|_| device.name().unwrap_or_else(|_| "Unknown Device".into()));
168 |         info!("{}: Using OpenCL", name);
169 |         let version = device.version().unwrap_or_else(|_| "unkown version".into());
170 |         info!(
171 |             "{}: Device supports {} with extensions: {}",
172 |             name,
173 |             version,
174 |             device.extensions().unwrap_or_else(|_| "NA".into())
175 |         );
176 | 
177 |         let local_size = device.max_work_group_size().map_err(|e| e.to_string())?;
178 |         let chosen_workload = match is_absolute {
179 |             true => workload as usize,
180 |             false => {
181 |                 let max_work_group_size =
182 |                     (local_size * (device.max_compute_units().map_err(|e| e.to_string())? as usize)) as f32;
183 |                 (workload * max_work_group_size) as usize
184 |             }
185 |         };
186 |         info!("{}: Chosen workload is {}", name, chosen_workload);
187 |         let context =
188 |             Arc::new(Context::from_device(&device).unwrap_or_else(|_| panic!("{}::Context::from_device failed", name)));
189 |         let context_ref = unsafe { Arc::as_ptr(&context).as_ref().unwrap() };
190 | 
191 |         let options = match experimental_amd {
192 |             // true => "-D __FORCE_AMD_V_DOT4_U32_U8__=1 ",
193 |             true => "-D __FORCE_AMD_V_DOT8_U32_U4__=1 ",
194 |             false => "",
195 |         };
196 | 
197 |         let experimental_amd_use = !matches!(
198 |             device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase().as_str(),
199 |             "tahiti" | "ellesmere" | "gfx1010" | "gfx906" | "gfx908"
200 |         );
201 | 
202 |         let program = match use_binary {
203 |             true => {
204 |                 let mut device_name = device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase();
205 |                 if device_name.contains(':') {
206 |                     device_name = device_name.split_once(':').expect("We checked for `:`").0.to_string();
207 |                 }
208 |                 info!("{}: Looking for binary for {}", name, device_name);
209 |                 match BINARY_DIR.get_file(format!("{}_kaspa-opencl.bin", device_name)) {
210 |                     Some(binary) => {
211 |                         Program::create_and_build_from_binary(&context, &[binary.contents()], "").unwrap_or_else(|e|{
212 |                         //Program::create_and_build_from_binary(&context, &[include_bytes!("../resources/kaspa-opencl-linked.bc")], "").unwrap_or_else(|e|{
213 |                             warn!("{}::Program::create_and_build_from_source failed: {}. Reverting to compiling from source", name, e);
214 |                             use_binary = false;
215 |                             from_source(&context, &device, options).unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e))
216 |                         })
217 |                     }
218 |                     None => {
219 |                         warn!("Binary file not found for {}. Reverting to compiling from source.", device_name);
220 |                         use_binary = false;
221 |                         from_source(&context, &device, options)
222 |                             .unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e))
223 |                     }
224 |                 }
225 |             }
226 |             false => from_source(&context, &device, options)
227 |                 .unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e)),
228 |         };
229 |         info!("Kernels: {:?}", program.kernel_names());
230 |         let heavy_hash =
231 |             Kernel::create(&program, "heavy_hash").unwrap_or_else(|_| panic!("{}::Kernel::create failed", name));
232 | 
233 |         let queue =
234 |             CommandQueue::create_with_properties(&context, device.id(), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0)
235 |                 .unwrap_or_else(|_| panic!("{}::CommandQueue::create_with_properties failed", name));
236 | 
237 |         let final_nonce = Buffer::<cl_ulong>::create(context_ref, CL_MEM_READ_WRITE, 1, ptr::null_mut())
238 |             .expect("Buffer allocation failed");
239 |         let final_hash = Buffer::<[cl_ulong; 4]>::create(context_ref, CL_MEM_WRITE_ONLY, 1, ptr::null_mut())
240 |             .expect("Buffer allocation failed");
241 | 
242 |         let hash_header = Buffer::<cl_uchar>::create(context_ref, CL_MEM_READ_ONLY, 72, ptr::null_mut())
243 |             .expect("Buffer allocation failed");
244 |         let matrix = Buffer::<cl_uchar>::create(context_ref, CL_MEM_READ_ONLY, 64 * 64, ptr::null_mut())
245 |             .expect("Buffer allocation failed");
246 |         let target = Buffer::<cl_ulong>::create(context_ref, CL_MEM_READ_ONLY, 4, ptr::null_mut())
247 |             .expect("Buffer allocation failed");
248 | 
249 |         let mut seed = [1u64; 4];
250 |         seed.try_fill(&mut rand::thread_rng())?;
251 | 
252 |         let random_state = match random {
253 |             NonceGenEnum::Xoshiro => {
254 |                 info!("Using xoshiro for nonce-generation");
255 |                 let random_state =
256 |                     Buffer::<cl_ulong>::create(context_ref, CL_MEM_READ_WRITE, 4 * chosen_workload, ptr::null_mut())
257 |                         .expect("Buffer allocation failed");
258 |                 let rand_state =
259 |                     Xoshiro256StarStar::new(&seed).iter_jump_state().take(chosen_workload).collect::<Vec<[u64; 4]>>();
260 |                 let mut random_state_local: *mut c_void = std::ptr::null_mut::<c_void>();
261 |                 info!("{}: Generating initial seed. This may take some time.", name);
262 | 
263 |                 queue
264 |                     .enqueue_map_buffer(
265 |                         &random_state,
266 |                         CL_BLOCKING,
267 |                         CL_MAP_WRITE,
268 |                         0,
269 |                         32 * chosen_workload,
270 |                         &mut random_state_local,
271 |                         &[],
272 |                     )
273 |                     .map_err(|e| e.to_string())?
274 |                     .wait()
275 |                     .unwrap();
276 |                 if random_state_local.is_null() {
277 |                     return Err(format!("{}::could not load random state vector to memory. Consider changing random or lowering workload", name).into());
278 |                 }
279 |                 unsafe {
280 |                     random_state_local.copy_from(rand_state.as_ptr() as *mut c_void, 32 * chosen_workload);
281 |                 }
282 |                 // queue.enqueue_svm_unmap(&random_state,&[]).map_err(|e| e.to_string())?;
283 |                 queue
284 |                     .enqueue_unmap_mem_object(random_state.get(), random_state_local, &[])
285 |                     .map_err(|e| e.to_string())
286 |                     .unwrap()
287 |                     .wait()
288 |                     .unwrap();
289 |                 info!("{}: Done generating initial seed", name);
290 |                 random_state
291 |             }
292 |             NonceGenEnum::Lean => {
293 |                 info!("Using lean nonce-generation");
294 |                 let mut random_state = Buffer::<cl_ulong>::create(context_ref, CL_MEM_READ_WRITE, 1, ptr::null_mut())
295 |                     .expect("Buffer allocation failed");
296 |                 queue
297 |                     .enqueue_write_buffer(&mut random_state, CL_BLOCKING, 0, &[thread_rng().next_u64()], &[])
298 |                     .map_err(|e| e.to_string())
299 |                     .unwrap()
300 |                     .wait()
301 |                     .unwrap();
302 |                 random_state
303 |             }
304 |         };
305 |         Ok(Self {
306 |             context,
307 |             local_size,
308 |             workload: chosen_workload,
309 |             random: *random,
310 |             heavy_hash,
311 |             random_state,
312 |             queue,
313 |             final_nonce,
314 |             final_hash,
315 |             hash_header,
316 |             matrix,
317 |             target,
318 |             events: Vec::<cl_event>::new(),
319 |             experimental_amd: ((experimental_amd | use_binary) & experimental_amd_use),
320 |         })
321 |     }
322 | }
323 | 
324 | fn from_source(context: &Context, device: &Device, options: &str) -> Result<Program, String> {
325 |     let version = device.version()?;
326 |     let v = version.split(' ').nth(1).unwrap();
327 |     let mut compile_options = options.to_string();
328 |     compile_options += CL_MAD_ENABLE;
329 |     compile_options += CL_FINITE_MATH_ONLY;
330 |     if v == "2.0" || v == "2.1" || v == "3.0" {
331 |         info!("Compiling with OpenCl 2");
332 |         compile_options += CL_STD_2_0;
333 |     }
334 |     compile_options += &match Platform::new(device.platform().unwrap()).name() {
335 |         Ok(name) => format!(
336 |             "-D{} ",
337 |             name.chars()
338 |                 .map(|c| match c.is_ascii_alphanumeric() {
339 |                     true => c,
340 |                     false => '_',
341 |                 })
342 |                 .collect::<String>()
343 |                 .to_uppercase()
344 |         ),
345 |         Err(_) => String::new(),
346 |     };
347 |     compile_options += &match device.compute_capability_major_nv() {
348 |         Ok(major) => format!("-D __COMPUTE_MAJOR__={} ", major),
349 |         Err(_) => String::new(),
350 |     };
351 |     compile_options += &match device.compute_capability_minor_nv() {
352 |         Ok(minor) => format!("-D __COMPUTE_MINOR__={} ", minor),
353 |         Err(_) => String::new(),
354 |     };
355 | 
356 |     // Hack to recreate the AMD flags
357 |     compile_options += &match device.pcie_id_amd() {
358 |         Ok(_) => {
359 |             let device_name = device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase();
360 |             format!("-D OPENCL_PLATFORM_AMD -D __{}__ ", device_name)
361 |         }
362 |         Err(_) => String::new(),
363 |     };
364 | 
365 |     info!("Build OpenCL with {}", compile_options);
366 | 
367 |     Program::create_and_build_from_source(context, PROGRAM_SOURCE, compile_options.as_str())
368 | }
369 | 


--------------------------------------------------------------------------------