├── .gitignore ├── plugins ├── opencl │ ├── resources │ │ ├── bin │ │ │ ├── gfx906_kaspa-opencl.bin │ │ │ ├── gfx1010_kaspa-opencl.bin │ │ │ ├── gfx1011_kaspa-opencl.bin │ │ │ ├── gfx1012_kaspa-opencl.bin │ │ │ ├── gfx1030_kaspa-opencl.bin │ │ │ ├── gfx1031_kaspa-opencl.bin │ │ │ ├── gfx1032_kaspa-opencl.bin │ │ │ ├── gfx1034_kaspa-opencl.bin │ │ │ └── ellesmere_kaspa-opencl.bin │ │ └── kaspa-opencl.cl │ ├── Cargo.lock │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── cli.rs │ │ ├── lib.rs │ │ └── worker.rs ├── cuda │ ├── kaspa-cuda-native │ │ └── src │ │ │ ├── keccak-tiny.h │ │ │ ├── xoshiro256starstar.c │ │ │ ├── keccak-tiny.c │ │ │ ├── keccak-tiny-unrolled.c │ │ │ └── kaspa-cuda.cu │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── cli.rs │ │ ├── lib.rs │ │ └── worker.rs └── README.md ├── integrations ├── hiveos │ ├── build.sh │ ├── h-config.sh │ ├── h-run.sh │ ├── createmanifest.sh │ └── h-stats.sh └── windows │ └── create_bat.sh ├── .rustfmt.toml ├── src ├── pow │ ├── keccak.rs │ ├── xoshiro.rs │ └── hasher.rs ├── client.rs ├── kaspad_messages.rs ├── xoshiro256starstar.rs ├── target.rs ├── cli.rs ├── lib.rs ├── main.rs ├── client │ ├── stratum │ │ └── statum_codec.rs │ └── grpc.rs ├── keccakf1600_x86-64-osx.s └── keccakf1600_x86-64.s ├── Cargo.toml ├── README.md ├── proto ├── p2p.proto └── messages.proto ├── .github └── workflows │ ├── deploy.yaml │ └── ci.yaml ├── LICENSE-MIT └── LICENSE-APACHE /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx906_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx906_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1010_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1010_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1011_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1011_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1012_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1012_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1030_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1030_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1031_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1031_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1032_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1032_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/gfx1034_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/gfx1034_kaspa-opencl.bin -------------------------------------------------------------------------------- /plugins/opencl/resources/bin/ellesmere_kaspa-opencl.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmrlvi/kaspa-miner/HEAD/plugins/opencl/resources/bin/ellesmere_kaspa-opencl.bin -------------------------------------------------------------------------------- /integrations/hiveos/build.sh: -------------------------------------------------------------------------------- 1 | integrations/hiveos/createmanifest.sh $1 $2 2 | mkdir $3 3 | cp h-manifest.conf integrations/hiveos/*.sh $2/* $3 4 | tar czvf $3-hiveos.tgz $3 -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | use_field_init_shorthand = true 3 | use_try_shorthand = true 4 | use_small_heuristics = "Max" 5 | newline_style = "unix" 6 | edition = "2021" -------------------------------------------------------------------------------- /plugins/opencl/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "opencl" 7 | version = "0.1.0" 8 | -------------------------------------------------------------------------------- /src/pow/keccak.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(not(target_arch = "x86_64"), feature = "no-asm", target_os = "windows"))] 2 | pub(super) fn f1600(state: &mut [u64; 25]) { 3 | keccak::f1600(state); 4 | } 5 | 6 | #[cfg(all(target_arch = "x86_64", not(feature = "no-asm"), not(target_os = "windows")))] 7 | pub(super) fn f1600(state: &mut [u64; 25]) { 8 | extern "C" { 9 | fn KeccakF1600(state: &mut [u64; 25]); 10 | } 11 | unsafe { KeccakF1600(state) } 12 | } 13 | -------------------------------------------------------------------------------- /src/client.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use tokio::sync::mpsc::Sender; 3 | 4 | pub mod grpc; 5 | pub mod stratum; 6 | 7 | use crate::pow::BlockSeed; 8 | use crate::{Error, MinerManager}; 9 | 10 | #[async_trait(?Send)] 11 | pub trait Client { 12 | fn add_devfund(&mut self, address: String, percent: u16); 13 | async fn register(&mut self) -> Result<(), Error>; 14 | async fn listen(&mut self, miner: &mut MinerManager) -> Result<(), Error>; 15 | fn get_block_channel(&self) -> Sender; 16 | } 17 | -------------------------------------------------------------------------------- /plugins/cuda/kaspa-cuda-native/src/keccak-tiny.h: -------------------------------------------------------------------------------- 1 | #ifndef KECCAK_FIPS202_H 2 | #define KECCAK_FIPS202_H 3 | #define __STDC_WANT_LIB_EXT1__ 1 4 | #include 5 | #include 6 | 7 | #define decshake(bits) \ 8 | __device__ int shake##bits(uint8_t*, size_t, const uint8_t*, size_t); 9 | 10 | #define decsha3(bits) \ 11 | __device__ int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t); 12 | 13 | decshake(128) 14 | decshake(256) 15 | decsha3(224) 16 | decsha3(256) 17 | decsha3(384) 18 | decsha3(512) 19 | #endif 20 | -------------------------------------------------------------------------------- /integrations/windows/create_bat.sh: -------------------------------------------------------------------------------- 1 | echo REM When mining to a local node, you can drop the -s option. > ${1}/mine.bat 2 | echo echo ============================================================ >> ${1}/mine.bat 3 | echo echo = Running Kaspa Miner with default .bat. Edit to configure = >> ${1}/mine.bat 4 | echo echo ============================================================ >> ${1}/mine.bat 5 | echo :start >> ${1}/mine.bat 6 | echo ${1}.exe -a kaspa:qz4jdyu04hv4hpyy00pl6trzw4gllnhnwy62xattejv2vaj5r0p5quvns058f -s n.seeder1.kaspad.net >> ${1}/mine.bat 7 | echo goto start >> ${1}/mine.bat -------------------------------------------------------------------------------- /plugins/opencl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kaspaopencl" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | [dependencies] 8 | kaspa_miner = {path = "../../", package="kaspa-miner"} 9 | clap = { version = "3.0", features = ["color", "derive"]} 10 | env_logger = "0.9" 11 | opencl3 = {version = "0.6", features = ["CL_VERSION_2_1", "CL_VERSION_2_2", "CL_VERSION_3_0"]} 12 | log = "0.4" 13 | rand = "0.8" 14 | include_dir = "0.7" 15 | 16 | [lib] 17 | crate-type = ["cdylib"] 18 | 19 | -------------------------------------------------------------------------------- /plugins/cuda/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kaspacuda" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | kaspa-miner = {path = "../../"} 10 | cust ="0.3" 11 | log = "0.4" 12 | rand = "0.8" 13 | clap = { version = "3.0", features = ["color", "derive"]} 14 | env_logger = "0.9" 15 | nvml-wrapper = { git = "https://github.com/benrod3k/nvml-wrapper", branch = "495.29.05", optional = true } 16 | 17 | [lib] 18 | crate-type = ["cdylib", "rlib"] 19 | 20 | [features] 21 | overclock = [ "nvml-wrapper" ] 22 | -------------------------------------------------------------------------------- /integrations/hiveos/h-config.sh: -------------------------------------------------------------------------------- 1 | #################################################################################### 2 | ### 3 | ### kaspa-miner 4 | ### https://github.com/tmrlvi/kaspa-miner/releases 5 | ### 6 | ### Hive integration: Merlin 7 | ### 8 | #################################################################################### 9 | 10 | #!/usr/bin/env bash 11 | [[ -e /hive/custom ]] && . /hive/custom/kaspa-miner/h-manifest.conf 12 | [[ -e /hive/miners/custom ]] && . /hive/miners/custom/kaspa-miner/h-manifest.conf 13 | conf="" 14 | conf+=" --kaspad-address=$CUSTOM_URL --mining-address $CUSTOM_TEMPLATE" 15 | 16 | 17 | [[ ! -z $CUSTOM_USER_CONFIG ]] && conf+=" $CUSTOM_USER_CONFIG" 18 | 19 | echo "$conf" 20 | echo "$conf" > $CUSTOM_CONFIG_FILENAME 21 | 22 | -------------------------------------------------------------------------------- /src/pow/xoshiro.rs: -------------------------------------------------------------------------------- 1 | use crate::Hash; 2 | use std::num::Wrapping; 3 | 4 | pub(super) struct XoShiRo256PlusPlus { 5 | s0: Wrapping, 6 | s1: Wrapping, 7 | s2: Wrapping, 8 | s3: Wrapping, 9 | } 10 | 11 | impl XoShiRo256PlusPlus { 12 | #[inline] 13 | pub(super) fn new(hash: Hash) -> Self { 14 | Self { s0: Wrapping(hash.0[0]), s1: Wrapping(hash.0[1]), s2: Wrapping(hash.0[2]), s3: Wrapping(hash.0[3]) } 15 | } 16 | 17 | #[inline] 18 | pub(super) fn u64(&mut self) -> u64 { 19 | let res = self.s0 + Wrapping((self.s0 + self.s3).0.rotate_left(23)); 20 | let t = self.s1 << 17; 21 | self.s2 ^= self.s0; 22 | self.s3 ^= self.s1; 23 | self.s1 ^= self.s2; 24 | self.s0 ^= self.s3; 25 | 26 | self.s2 ^= t; 27 | self.s3 = Wrapping(self.s3.0.rotate_left(45)); 28 | 29 | res.0 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /integrations/hiveos/h-run.sh: -------------------------------------------------------------------------------- 1 | #################################################################################### 2 | ### 3 | ### kaspa-miner 4 | ### https://github.com/tmrlvi/kaspa-miner/releases 5 | ### 6 | ### Hive integration: Merlin 7 | ### 8 | #################################################################################### 9 | 10 | #!/usr/bin/env bash 11 | 12 | cd `dirname $0` 13 | 14 | [ -t 1 ] && . colors 15 | 16 | . h-manifest.conf 17 | 18 | echo $CUSTOM_NAME 19 | echo $CUSTOM_LOG_BASENAME 20 | echo $CUSTOM_CONFIG_FILENAME 21 | 22 | [[ -z $CUSTOM_LOG_BASENAME ]] && echo -e "${RED}No CUSTOM_LOG_BASENAME is set${NOCOLOR}" && exit 1 23 | [[ -z $CUSTOM_CONFIG_FILENAME ]] && echo -e "${RED}No CUSTOM_CONFIG_FILENAME is set${NOCOLOR}" && exit 1 24 | [[ ! -f $CUSTOM_CONFIG_FILENAME ]] && echo -e "${RED}Custom config ${YELLOW}$CUSTOM_CONFIG_FILENAME${RED} is not found${NOCOLOR}" && exit 1 25 | 26 | 27 | ./$CUSTOM_MINERBIN $(< $CUSTOM_CONFIG_FILENAME) $@ 2>&1 | tee $CUSTOM_LOG_BASENAME.log 28 | 29 | 30 | -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- 1 | # Miner Plugins 2 | 3 | **CAUTION! The plugin api is brand new and might change without prior notice.** 4 | 5 | **CAUTION! Old plugins might not be compatible with new plugins: check the plugin version.** 6 | 7 | **CAUTION! Plugins can run arbitrary code: if you use precompiled, make sure they come from 8 | reputable source** 9 | 10 | The plugin system relies on three interfaces defined in `lib.rs` on `kaspa-miner`. 11 | Each interface refers to an object which has a different job: 12 | * **Plugin** - the environment and configuration of a type of workers. 13 | * **WorkerSpec** - Light weight struct containing the initialization arguments for a worker. 14 | Can be (and is) sent between threads. 15 | * **Worker** - The worker object, which contains references to device memory and functions. Usually not thread safe. 16 | 17 | To implemenet your own plugin, create a `crate`, and implement the required methods. Build the as a `cdylib` 18 | and place it in the plugins directory. Add the plugin names to `main.rs` code to whitelist it. -------------------------------------------------------------------------------- /plugins/opencl/README.md: -------------------------------------------------------------------------------- 1 | # OpenCL support for Kaspa-Miner 2 | 3 | This is an experimental plugin to support opencl. 4 | 5 | # Compiling to AMD 6 | Download and install Radeon GPU Analyzer, which allows you to compile OpenCL for AMD 7 | 8 | ```shell 9 | for arch in gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx906 10 | do 11 | rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable " -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D __FORCE_AMD_V_DOT8_U32_U4__=1 -D OPENCL_PLATFORM_AMD -D OFFLINE 12 | done 13 | 14 | for arch in gfx1010 15 | do 16 | rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable " -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D OPENCL_PLATFORM_AMD 17 | done 18 | 19 | for arch in Ellesmere 20 | do 21 | rga --O3 -s opencl -c "$arch" --OpenCLoption "-cl-finite-math-only -cl-mad-enable -target amdgcn-amd-amdpal" -b plugins/opencl/resources/bin/kaspa-opencl.bin plugins/opencl/resources/kaspa-opencl.cl -D OPENCL_PLATFORM_AMD -D PAL 22 | done 23 | ``` -------------------------------------------------------------------------------- /integrations/hiveos/createmanifest.sh: -------------------------------------------------------------------------------- 1 | #################################################################################### 2 | ### 3 | ### kaspa-miner 4 | ### https://github.com/tmrlvi/kaspa-miner/releases 5 | ### 6 | ### Hive integration: Merlin 7 | ### 8 | #################################################################################### 9 | 10 | if [ "$#" -ne "2" ] 11 | then 12 | echo "No arguments supplied. Call using createmanifest.sh " 13 | exit 14 | fi 15 | cat > h-manifest.conf << EOF 16 | #################################################################################### 17 | ### 18 | ### kaspa-miner 19 | ### https://github.com/tmrlvi/kaspa-miner/releases 20 | ### 21 | ### Hive integration: Merlin 22 | ### 23 | #################################################################################### 24 | 25 | # The name of the miner 26 | CUSTOM_NAME=kaspa-miner 27 | 28 | # Optional version of your custom miner package 29 | CUSTOM_VERSION=$1 30 | CUSTOM_BUILD=0 31 | CUSTOM_MINERBIN=$2 32 | 33 | # Full path to miner config file 34 | CUSTOM_CONFIG_FILENAME=/hive/miners/custom/\$CUSTOM_NAME/config.ini 35 | 36 | # Full path to log file basename. WITHOUT EXTENSION (don't include .log at the end) 37 | # Used to truncate logs and rotate, 38 | # E.g. /var/log/miner/mysuperminer/somelogname (filename without .log at the end) 39 | CUSTOM_LOG_BASENAME=/var/log/miner/\$CUSTOM_NAME 40 | 41 | WEB_PORT=3338 42 | EOF -------------------------------------------------------------------------------- /plugins/cuda/README.md: -------------------------------------------------------------------------------- 1 | # Cuda Support For Kaspa-Miner 2 | 3 | ## Building 4 | 5 | The plugin is a shared library file that resides in the same library as the miner. 6 | You can build the library by running 7 | ```sh 8 | cargo build -p kaspacuda 9 | ``` 10 | 11 | This version includes a precompiled PTX, which would work with most modern GPUs. To compile the PTX youself, 12 | you have to clone the project: 13 | 14 | ```sh 15 | git clone https://github.com/tmrlvi/kaspa-miner.git 16 | cd kaspa-miner 17 | # Compute version 8.6 18 | /usr/local/cuda-11.5/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_86 --gpu-code=sm_86 -o plugins/cuda/resources/kaspa-cuda-sm86.ptx -Xptxas -O3 -Xcompiler -O3 19 | # Compute version 7.5 20 | /usr/local/cuda-11.5/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_75 --gpu-code=sm_75 -o plugins/cuda/resources/kaspa-cuda-sm75.ptx -Xptxas -O3 -Xcompiler -O3 21 | # Compute version 6.1 22 | /usr/local/cuda-11.2/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_61 --gpu-code=sm_61 -o plugins/cuda/resources/kaspa-cuda-sm61.ptx -Xptxas -O3 -Xcompiler -O3 23 | # Compute version 3.0 24 | /usr/local/cuda-9.2/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -ccbin=gcc-7 -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_30 --gpu-code=sm_30 -o plugins/cuda/resources/kaspa-cuda-sm30.ptx 25 | # Compute version 2.0 26 | /usr/local/cuda-8.0/bin/nvcc plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu -ccbin=gcc-5 -std=c++11 -O3 --restrict --ptx --gpu-architecture=compute_20 --gpu-code=sm_20 -o plugins/cuda/resources/kaspa-cuda-sm20.ptx 27 | 28 | cargo build --release 29 | ``` 30 | -------------------------------------------------------------------------------- /src/kaspad_messages.rs: -------------------------------------------------------------------------------- 1 | use crate::proto::{ 2 | kaspad_message::Payload, GetBlockTemplateRequestMessage, GetInfoRequestMessage, KaspadMessage, 3 | NotifyBlockAddedRequestMessage, NotifyNewBlockTemplateRequestMessage, RpcBlock, SubmitBlockRequestMessage, 4 | }; 5 | use crate::{ 6 | pow::{self, HeaderHasher}, 7 | Hash, 8 | }; 9 | 10 | impl KaspadMessage { 11 | #[inline(always)] 12 | pub fn get_info_request() -> Self { 13 | KaspadMessage { payload: Some(Payload::GetInfoRequest(GetInfoRequestMessage {})) } 14 | } 15 | #[inline(always)] 16 | pub fn notify_block_added() -> Self { 17 | KaspadMessage { payload: Some(Payload::NotifyBlockAddedRequest(NotifyBlockAddedRequestMessage {})) } 18 | } 19 | 20 | #[inline(always)] 21 | pub fn submit_block(block: RpcBlock) -> Self { 22 | KaspadMessage { 23 | payload: Some(Payload::SubmitBlockRequest(SubmitBlockRequestMessage { 24 | block: Some(block), 25 | allow_non_daa_blocks: false, 26 | })), 27 | } 28 | } 29 | } 30 | 31 | impl From for KaspadMessage { 32 | fn from(a: GetInfoRequestMessage) -> Self { 33 | KaspadMessage { payload: Some(Payload::GetInfoRequest(a)) } 34 | } 35 | } 36 | impl From for KaspadMessage { 37 | fn from(a: NotifyBlockAddedRequestMessage) -> Self { 38 | KaspadMessage { payload: Some(Payload::NotifyBlockAddedRequest(a)) } 39 | } 40 | } 41 | 42 | impl From for KaspadMessage { 43 | fn from(a: GetBlockTemplateRequestMessage) -> Self { 44 | KaspadMessage { payload: Some(Payload::GetBlockTemplateRequest(a)) } 45 | } 46 | } 47 | 48 | impl From for KaspadMessage { 49 | fn from(a: NotifyNewBlockTemplateRequestMessage) -> Self { 50 | KaspadMessage { payload: Some(Payload::NotifyNewBlockTemplateRequest(a)) } 51 | } 52 | } 53 | 54 | impl RpcBlock { 55 | #[inline(always)] 56 | pub fn block_hash(&self) -> Option { 57 | let mut hasher = HeaderHasher::new(); 58 | pow::serialize_header(&mut hasher, self.header.as_ref()?, false); 59 | Some(hasher.finalize()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/xoshiro256starstar.rs: -------------------------------------------------------------------------------- 1 | const LONG_JUMP: [u64; 4] = [0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635]; 2 | 3 | #[derive(Clone, Copy)] 4 | pub struct Xoshiro256StarStar { 5 | pub(crate) state: [u64; 4], 6 | } 7 | 8 | pub struct Xoshiro256StarStarStateIter { 9 | current: Xoshiro256StarStar, 10 | } 11 | 12 | fn rotl(x: u64, k: i32) -> u64 { 13 | (x << k) | (x >> (64 - k)) 14 | } 15 | 16 | impl Xoshiro256StarStar { 17 | pub fn new(seed: &[u64; 4]) -> Self { 18 | let mut state = [0u64; 4]; 19 | state.copy_from_slice(seed); 20 | Self { state } 21 | } 22 | 23 | pub fn next_u64(&mut self) -> u64 { 24 | let result = u64::wrapping_mul(rotl(u64::wrapping_mul(self.state[1], 5), 7), 9); 25 | let t = self.state[1] << 17; 26 | 27 | self.state[2] ^= self.state[0]; 28 | self.state[3] ^= self.state[1]; 29 | self.state[1] ^= self.state[2]; 30 | self.state[0] ^= self.state[3]; 31 | 32 | self.state[2] ^= t; 33 | 34 | self.state[3] = rotl(self.state[3], 45); 35 | 36 | result 37 | } 38 | 39 | pub fn long_jump(&mut self) { 40 | let mut s0 = 0u64; 41 | let mut s1 = 0u64; 42 | let mut s2 = 0u64; 43 | let mut s3 = 0u64; 44 | for jmp in LONG_JUMP { 45 | for b in 0..64 { 46 | if jmp & 1u64 << b != 0 { 47 | s0 ^= self.state[0]; 48 | s1 ^= self.state[1]; 49 | s2 ^= self.state[2]; 50 | s3 ^= self.state[3]; 51 | } 52 | self.next_u64(); 53 | } 54 | 55 | self.state[0] = s0; 56 | self.state[1] = s1; 57 | self.state[2] = s2; 58 | self.state[3] = s3; 59 | } 60 | } 61 | 62 | pub fn iter_jump_state(&self) -> impl Iterator { 63 | let current = Xoshiro256StarStar::new(&self.state); 64 | Xoshiro256StarStarStateIter { current } 65 | } 66 | } 67 | 68 | impl Iterator for Xoshiro256StarStarStateIter { 69 | type Item = [u64; 4]; 70 | 71 | fn next(&mut self) -> Option<[u64; 4]> { 72 | self.current.long_jump(); 73 | Some(self.current.state) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kaspa-miner" 3 | version = "0.2.1-GPU-0.7" 4 | edition = "2021" 5 | license = "MIT/Apache-2.0" 6 | authors = ["Elichai "] 7 | repository = "https://github.com/tmrlvi/kaspa-miner" 8 | readme = "README.md" 9 | description = "A fast CPU & GPU miner for Kaspa" 10 | categories = ["command-line-utilities"] 11 | keywords = ["blockchain", "cli"] 12 | include = [ 13 | "src/**/*.rs", 14 | "src/**/*.s", 15 | "proto/**", 16 | "Cargo.toml", 17 | "Cargo.lock", 18 | "build.rs", 19 | "LICENSE-MIT", 20 | "LICENSE-APACHE", 21 | "README.md", 22 | ] 23 | 24 | [dependencies] 25 | tonic = "0.8" 26 | tokio = { version = "1.17", features = ["macros", "rt-multi-thread"] } 27 | prost = "0.11" 28 | futures-util = "0.3" 29 | tokio-stream = {version = "0.1", features = ["net"]} 30 | once_cell = "1" 31 | num_cpus = "1" 32 | rand = "0.8" 33 | blake2b_simd = "1.0.0" 34 | clap = { version = "3.0", features = ["color", "derive"]} 35 | log = "0.4" 36 | env_logger = "0.9" 37 | keccak = { version = "0.1", optional = true } 38 | parking = { package = "parking_lot", version = "0.12", optional = true } 39 | shuttle = { version = "0.2.0", optional = true } 40 | libloading = "0.7" 41 | tokio-util = {version = "0.7.0", features = ["codec"]} 42 | serde_json = "1.0" 43 | serde_repr = "0.1" 44 | serde = {version="1.0", features=["derive"]} 45 | futures = "0.3.21" 46 | bytes = "1.1.0" 47 | async-trait = "0.1" 48 | num = "0.4" 49 | nix = "0.25" 50 | hex = "0.4" 51 | semver = "1.0" 52 | time = { version = "0.3", features = ["formatting", "macros"] } 53 | 54 | [features] 55 | default = ["parking_lot"] 56 | parking_lot = ["parking", "tokio/parking_lot"] 57 | bench = [] 58 | no-asm = ["keccak"] 59 | 60 | [target.'cfg(target_os = "windows")'.dependencies] 61 | keccak = "0.1" 62 | kernel32-sys = "0.2" 63 | win32console = "0.1" 64 | 65 | [profile.release] 66 | lto = true 67 | codegen-units = 1 68 | 69 | [build-dependencies] 70 | tonic-build = { version = "0.8", default-features = false, features = ["prost", "transport"] } 71 | cc = "1" 72 | time = { version = "0.3", features = ["formatting"] } 73 | 74 | [dev-dependencies] 75 | sha3 = { git = "https://github.com/elichai/hashes", branch = "cSHAKE" } 76 | 77 | [workspace] 78 | members = ["plugins/*"] 79 | default-members = [".", "plugins/cuda", "plugins/opencl"] -------------------------------------------------------------------------------- /plugins/opencl/src/cli.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use std::str::FromStr; 3 | 4 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 5 | pub enum NonceGenEnum { 6 | Lean, 7 | Xoshiro, 8 | } 9 | 10 | impl FromStr for NonceGenEnum { 11 | type Err = Error; 12 | 13 | fn from_str(s: &str) -> Result { 14 | match s.to_lowercase().as_str() { 15 | "lean" => Ok(Self::Lean), 16 | "xoshiro" => Ok(Self::Xoshiro), 17 | _ => Err("Unknown string".into()), 18 | } 19 | } 20 | } 21 | 22 | #[derive(clap::Args, Debug)] 23 | pub struct OpenCLOpt { 24 | #[clap(long = "opencl-platform", help = "Which OpenCL platform to use (limited to one per executable)")] 25 | pub opencl_platform: Option, 26 | #[clap(long = "opencl-device", use_delimiter = true, help = "Which OpenCL GPUs to use on a specific platform")] 27 | pub opencl_device: Option>, 28 | #[clap(long = "opencl-workload", help = "Ratio of nonces to GPU possible parrallel run in OpenCL [default: 512]")] 29 | pub opencl_workload: Option>, 30 | #[clap( 31 | long = "opencl-workload-absolute", 32 | help = "The values given by workload are not ratio, but absolute number of nonces in OpenCL [default: false]" 33 | )] 34 | pub opencl_workload_absolute: bool, 35 | #[clap(long = "opencl-enable", help = "Enable opencl, and take all devices of the chosen platform")] 36 | pub opencl_enable: bool, 37 | #[clap(long = "opencl-amd-disable", help = "Disables AMD mining (does not override opencl-enable)")] 38 | pub opencl_amd_disable: bool, 39 | #[clap(long = "opencl-no-amd-binary", help = "Disable fetching of precompiled AMD kernel (if exists)")] 40 | pub opencl_no_amd_binary: bool, 41 | #[clap( 42 | long = "experimental-amd", 43 | help = "Uses SMID instructions in AMD. Miner will crash if instruction is not supported" 44 | )] 45 | pub experimental_amd: bool, 46 | #[clap( 47 | long = "opencl-nonce-gen", 48 | help = "The random method used to generate nonces. Options: (i) xoshiro (ii) lean", 49 | long_help = "The random method used to generate nonces. Options: (i) xoshiro - each thread in GPU will have its own random state, creating a (pseudo-)independent xoshiro sequence (ii) lean - each GPU will have a single random nonce, and each GPU thread will work on nonce + thread id.", 50 | default_value = "lean" 51 | )] 52 | pub opencl_nonce_gen: NonceGenEnum, 53 | } 54 | -------------------------------------------------------------------------------- /plugins/cuda/src/cli.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use std::str::FromStr; 3 | 4 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 5 | pub enum NonceGenEnum { 6 | Lean, 7 | Xoshiro, 8 | } 9 | 10 | impl FromStr for NonceGenEnum { 11 | type Err = Error; 12 | 13 | fn from_str(s: &str) -> Result { 14 | match s.to_lowercase().as_str() { 15 | "lean" => Ok(Self::Lean), 16 | "xoshiro" => Ok(Self::Xoshiro), 17 | _ => Err("Unknown string".into()), 18 | } 19 | } 20 | } 21 | 22 | #[cfg(feature = "overclock")] 23 | #[derive(clap::Args, Debug, Default)] 24 | pub struct OverClock { 25 | #[clap(long = "cuda-lock-mem-clocks", use_delimiter = true, help = "Lock mem clocks eg: ,810, [default: 0]")] 26 | pub cuda_lock_mem_clocks: Option>, 27 | #[clap(long = "cuda-lock-core-clocks", use_delimiter = true, help = "Lock core clocks eg: ,1200, [default: 0]")] 28 | pub cuda_lock_core_clocks: Option>, 29 | #[clap(long = "cuda-power-limits", use_delimiter = true, help = "Lock power limits eg: ,150, [default: 0]")] 30 | pub cuda_power_limits: Option>, 31 | } 32 | 33 | #[derive(clap::Args, Debug)] 34 | pub struct CudaOpt { 35 | #[clap(long = "cuda-device", use_delimiter = true, help = "Which CUDA GPUs to use [default: all]")] 36 | pub cuda_device: Option>, 37 | #[clap(long = "cuda-workload", help = "Ratio of nonces to GPU possible parrallel run [default: 64]")] 38 | pub cuda_workload: Option>, 39 | #[clap( 40 | long = "cuda-workload-absolute", 41 | help = "The values given by workload are not ratio, but absolute number of nonces [default: false]" 42 | )] 43 | pub cuda_workload_absolute: bool, 44 | #[clap(long = "cuda-disable", help = "Disable cuda workers")] 45 | pub cuda_disable: bool, 46 | #[clap( 47 | long = "cuda-no-blocking-sync", 48 | help = "Actively wait for result. Higher CPU usage, but less red blocks. Can have lower workload.", 49 | long_help = "Actively wait for GPU result. Increases CPU usage, but removes delays that might result in red blocks. Can have lower workload." 50 | )] 51 | pub cuda_no_blocking_sync: bool, 52 | #[clap( 53 | long = "cuda-nonce-gen", 54 | help = "The random method used to generate nonces. Options: (i) xoshiro - each thread in GPU will have its own random state, creating a (pseudo-)independent xoshiro sequence (ii) lean - each GPU will have a single random nonce, and each GPU thread will work on nonce + thread id.", 55 | default_value = "lean" 56 | )] 57 | pub cuda_nonce_gen: NonceGenEnum, 58 | 59 | #[cfg(feature = "overclock")] 60 | #[clap(flatten)] 61 | pub overclock: OverClock, 62 | } 63 | -------------------------------------------------------------------------------- /plugins/cuda/kaspa-cuda-native/src/xoshiro256starstar.c: -------------------------------------------------------------------------------- 1 | /* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) 2 | 3 | To the extent possible under law, the author has dedicated all copyright 4 | and related and neighboring rights to this software to the public domain 5 | worldwide. This software is distributed without any warranty. 6 | 7 | See . */ 8 | 9 | #include 10 | 11 | /* This is xoshiro256** 1.0, one of our all-purpose, rock-solid 12 | generators. It has excellent (sub-ns) speed, a state (256 bits) that is 13 | large enough for any parallel application, and it passes all tests we 14 | are aware of. 15 | 16 | For generating just floating-point numbers, xoshiro256+ is even faster. 17 | 18 | The state must be seeded so that it is not everywhere zero. If you have 19 | a 64-bit seed, we suggest to seed a splitmix64 generator and use its 20 | output to fill s. */ 21 | 22 | __device__ static inline uint64_t rotl(const uint64_t x, int k) { 23 | return (x << k) | (x >> (64 - k)); 24 | } 25 | 26 | __device__ inline uint64_t xoshiro256_next(ulonglong4 *s) { 27 | const uint64_t result = rotl(s->y * 5, 7) * 9; 28 | 29 | const uint64_t t = s->y << 17; 30 | 31 | s->z ^= s->x; 32 | s->w ^= s->y; 33 | s->y ^= s->z; 34 | s->x ^= s->w; 35 | 36 | s->z ^= t; 37 | 38 | s->w = rotl(s->w, 45); 39 | 40 | return result; 41 | } 42 | 43 | 44 | /* This is the jump function for the generator. It is equivalent 45 | to 2^128 calls to next(); it can be used to generate 2^128 46 | non-overlapping subsequences for parallel computations. */ 47 | 48 | __device__ void xoshiro256_jump(ulonglong4 *s) { 49 | static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; 50 | 51 | uint64_t s0 = 0; 52 | uint64_t s1 = 0; 53 | uint64_t s2 = 0; 54 | uint64_t s3 = 0; 55 | for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) 56 | for(int b = 0; b < 64; b++) { 57 | if (JUMP[i] & UINT64_C(1) << b) { 58 | s0 ^= s->x; 59 | s1 ^= s->y; 60 | s2 ^= s->z; 61 | s3 ^= s->w; 62 | } 63 | xoshiro256_next(s); 64 | } 65 | 66 | s->x = s0; 67 | s->y = s1; 68 | s->z = s2; 69 | s->w = s3; 70 | } 71 | 72 | 73 | 74 | /* This is the long-jump function for the generator. It is equivalent to 75 | 2^192 calls to next(); it can be used to generate 2^64 starting points, 76 | from each of which jump() will generate 2^64 non-overlapping 77 | subsequences for parallel distributed computations. */ 78 | 79 | __device__ void xoshiro256_long_jump(ulonglong4 *s) { 80 | static const uint64_t LONG_JUMP[] = { 0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635 }; 81 | 82 | uint64_t s0 = 0; 83 | uint64_t s1 = 0; 84 | uint64_t s2 = 0; 85 | uint64_t s3 = 0; 86 | for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) 87 | for(int b = 0; b < 64; b++) { 88 | if (LONG_JUMP[i] & UINT64_C(1) << b) { 89 | s0 ^= s->x; 90 | s1 ^= s->y; 91 | s2 ^= s->z; 92 | s3 ^= s->w; 93 | } 94 | xoshiro256_next(s); 95 | } 96 | 97 | s->x = s0; 98 | s->y = s1; 99 | s->z = s2; 100 | s->w = s3; 101 | } 102 | -------------------------------------------------------------------------------- /plugins/cuda/kaspa-cuda-native/src/keccak-tiny.c: -------------------------------------------------------------------------------- 1 | /** libkeccak-tiny 2 | * 3 | * A single-file implementation of SHA-3 and SHAKE. 4 | * 5 | * Implementor: David Leon Gil 6 | * License: CC0, attribution kindly requested. Blame taken too, 7 | * but not liability. 8 | */ 9 | #define __STDC_WANT_LIB_EXT1__ 1 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /******** The Keccak-f[1600] permutation ********/ 17 | 18 | /*** Constants. ***/ 19 | __device__ static const uint8_t rho[24] = \ 20 | { 1, 3, 6, 10, 15, 21, 21 | 28, 36, 45, 55, 2, 14, 22 | 27, 41, 56, 8, 25, 43, 23 | 62, 18, 39, 61, 20, 44}; 24 | __device__ static const uint8_t pi[24] = \ 25 | {10, 7, 11, 17, 18, 3, 26 | 5, 16, 8, 21, 24, 4, 27 | 15, 23, 19, 13, 12, 2, 28 | 20, 14, 22, 9, 6, 1}; 29 | __device__ static const uint64_t RC[24] = \ 30 | {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 31 | 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 32 | 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, 33 | 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 34 | 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, 35 | 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; 36 | 37 | /*** Helper macros to unroll the permutation. ***/ 38 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s))) 39 | #define REPEAT6(e) e e e e e e 40 | #define REPEAT24(e) REPEAT6(e e e e) 41 | #define REPEAT5(e) e e e e e 42 | #define FOR5(v, s, e) \ 43 | v = 0; \ 44 | REPEAT5(e; v += s;) 45 | 46 | /*** Keccak-f[1600] ***/ 47 | __device__ static inline void keccakf(void* state) { 48 | uint64_t* a = (uint64_t*)state; 49 | uint64_t b[5] = {0}; 50 | uint64_t t = 0; 51 | uint8_t x, y; 52 | 53 | for (int i = 0; i < 24; i++) { 54 | // Theta 55 | FOR5(x, 1, 56 | b[x] = 0; 57 | FOR5(y, 5, 58 | b[x] ^= a[x + y]; )) 59 | FOR5(x, 1, 60 | FOR5(y, 5, 61 | a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) 62 | // Rho and pi 63 | t = a[1]; 64 | x = 0; 65 | REPEAT24(b[0] = a[pi[x]]; 66 | a[pi[x]] = rol(t, rho[x]); 67 | t = b[0]; 68 | x++; ) 69 | // Chi 70 | FOR5(y, 71 | 5, 72 | FOR5(x, 1, 73 | b[x] = a[y + x];) 74 | FOR5(x, 1, 75 | a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) 76 | // Iota 77 | a[0] ^= RC[i]; 78 | } 79 | } 80 | 81 | /******** The FIPS202-defined functions. ********/ 82 | 83 | /*** Some helper macros. ***/ 84 | #define P keccakf 85 | #define Plen 200 86 | 87 | 88 | /** The sponge-based hash construction. **/ 89 | __device__ __forceinline__ static void hash( 90 | const uint8_t initP[Plen], 91 | uint8_t* out, 92 | const uint8_t* in) { 93 | uint8_t a[Plen] = {0}; 94 | 95 | #pragma unroll 96 | for (int i=0; i<10; i++) ((uint64_t *)a)[i] = ((uint64_t *)initP)[i] ^ ((uint64_t *)in)[i]; 97 | #pragma unroll 98 | for (int i=10; i<25; i++) ((uint64_t *)a)[i] = ((uint64_t *)initP)[i]; 99 | 100 | // Apply P 101 | P(a); 102 | // Squeeze output. 103 | #pragma unroll 104 | for (int i=0; i<4; i++) ((uint64_t *)out)[i] = ((uint64_t *)a)[i]; 105 | 106 | } 107 | 108 | -------------------------------------------------------------------------------- /integrations/hiveos/h-stats.sh: -------------------------------------------------------------------------------- 1 | #################################################################################### 2 | ### 3 | ### kaspa-miner 4 | ### https://github.com/tmrlvi/kaspa-miner/releases 5 | ### 6 | ### Hive integration: Merlin 7 | ### 8 | #################################################################################### 9 | 10 | #!/usr/bin/env bash 11 | 12 | ####################### 13 | # MAIN script body 14 | ####################### 15 | 16 | . /hive/miners/custom/kaspa-miner/h-manifest.conf 17 | stats_raw=`cat $CUSTOM_LOG_BASENAME.log | grep -w "hashrate" | tail -n 1 ` 18 | #echo $stats_raw 19 | 20 | #Calculate miner log freshness 21 | 22 | maxDelay=120 23 | time_now=`date +%s` 24 | datetime_rep=`echo $stats_raw | awk '{print $1}' | awk -F[ '{print $2}'` 25 | time_rep=`date -d $datetime_rep +%s` 26 | diffTime=`echo $((time_now-time_rep)) | tr -d '-'` 27 | 28 | if [ "$diffTime" -lt "$maxDelay" ]; then 29 | total_hashrate=`echo $stats_raw | awk '{print $7}' | cut -d "." -f 1,2 --output-delimiter='' | sed 's/$/0/'` 30 | if [[ $stats_raw == *"Ghash"* ]]; then 31 | total_hashrate=$(($total_hashrate*1000)) 32 | fi 33 | 34 | #GPU Status 35 | gpu_stats=$(< $GPU_STATS_JSON) 36 | 37 | readarray -t gpu_stats < <( jq --slurp -r -c '.[] | .busids, .brand, .temp, .fan | join(" ")' $GPU_STATS_JSON 2>/dev/null) 38 | busids=(${gpu_stats[0]}) 39 | brands=(${gpu_stats[1]}) 40 | temps=(${gpu_stats[2]}) 41 | fans=(${gpu_stats[3]}) 42 | gpu_count=${#busids[@]} 43 | 44 | hash_arr=() 45 | busid_arr=() 46 | fan_arr=() 47 | temp_arr=() 48 | lines=() 49 | 50 | if [ $(gpu-detect NVIDIA) -gt 0 ]; then 51 | brand_gpu_count=$(gpu-detect NVIDIA) 52 | BRAND_MINER="nvidia" 53 | elif [ $(gpu-detect AMD) -gt 0 ]; then 54 | brand_gpu_count=$(gpu-detect AMD) 55 | BRAND_MINER="amd" 56 | fi 57 | 58 | for(( i=0; i < gpu_count; i++ )); do 59 | [[ "${brands[i]}" != $BRAND_MINER ]] && continue 60 | [[ "${busids[i]}" =~ ^([A-Fa-f0-9]+): ]] 61 | busid_arr+=($((16#${BASH_REMATCH[1]}))) 62 | temp_arr+=(${temps[i]}) 63 | fan_arr+=(${fans[i]}) 64 | gpu_raw=`cat $CUSTOM_LOG_BASENAME.log | grep -w "Device #"$i | tail -n 1 ` 65 | hashrate=`echo $gpu_raw | awk '{print $(NF-1)}' | cut -d "." -f 1,2 --output-delimiter='' | sed 's/$/0/'` 66 | if [[ $gpu_raw == *"Ghash"* ]]; then 67 | hashrate=$(($hashrate*1000)) 68 | fi 69 | hash_arr+=($hashrate) 70 | done 71 | 72 | hash_json=`printf '%s\n' "${hash_arr[@]}" | jq -cs '.'` 73 | bus_numbers=`printf '%s\n' "${busid_arr[@]}" | jq -cs '.'` 74 | fan_json=`printf '%s\n' "${fan_arr[@]}" | jq -cs '.'` 75 | temp_json=`printf '%s\n' "${temp_arr[@]}" | jq -cs '.'` 76 | 77 | uptime=$(( `date +%s` - `stat -c %Y $CUSTOM_CONFIG_FILENAME` )) 78 | 79 | 80 | #Compile stats/khs 81 | stats=$(jq -nc \ 82 | --argjson hs "$hash_json"\ 83 | --arg ver "$CUSTOM_VERSION" \ 84 | --arg ths "$total_hashrate" \ 85 | --argjson bus_numbers "$bus_numbers" \ 86 | --argjson fan "$fan_json" \ 87 | --argjson temp "$temp_json" \ 88 | --arg uptime "$uptime" \ 89 | '{ hs: $hs, hs_units: "khs", algo : "heavyhash", ver:$ver , $uptime, $bus_numbers, $temp, $fan}') 90 | khs=$total_hashrate 91 | else 92 | khs=0 93 | stats="null" 94 | fi 95 | 96 | echo Debug info: 97 | echo Log file : $CUSTOM_LOG_BASENAME.log 98 | echo Time since last log entry : $diffTime 99 | echo Raw stats : $stats_raw 100 | echo KHS : $khs 101 | echo Output : $stats 102 | 103 | [[ -z $khs ]] && khs=0 104 | [[ -z $stats ]] && stats="null" 105 | -------------------------------------------------------------------------------- /src/target.rs: -------------------------------------------------------------------------------- 1 | use core::cmp::Ordering; 2 | use std::fmt; 3 | 4 | pub fn u256_from_compact_target(bits: u32) -> Uint256 { 5 | // This is a floating-point "compact" encoding originally used by 6 | // OpenSSL, which satoshi put into consensus code, so we're stuck 7 | // with it. The exponent needs to have 3 subtracted from it, hence 8 | // this goofy decoding code: 9 | let (mant, expt) = { 10 | let unshifted_expt = bits >> 24; 11 | if unshifted_expt <= 3 { 12 | ((bits & 0xFFFFFF) >> (8 * (3 - unshifted_expt as usize)), 0) 13 | } else { 14 | (bits & 0xFFFFFF, 8 * ((bits >> 24) - 3)) 15 | } 16 | }; 17 | 18 | // The mantissa is signed but may not be negative 19 | if mant > 0x7FFFFF { 20 | Default::default() 21 | } else { 22 | Uint256::from_u64(mant as u64) << (expt as usize) 23 | } 24 | } 25 | 26 | /// Little-endian large integer type 27 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Default, Debug)] 28 | pub struct Uint256(pub [u64; 4]); 29 | 30 | impl Uint256 { 31 | #[inline(always)] 32 | pub fn new(v: [u64; 4]) -> Self { 33 | Self(v) 34 | } 35 | /// Create an object from a given unsigned 64-bit integer 36 | #[inline] 37 | pub fn from_u64(init: u64) -> Uint256 { 38 | let mut ret = [0; 4]; 39 | ret[0] = init; 40 | Uint256(ret) 41 | } 42 | 43 | /// Creates big integer value from a byte slice using 44 | /// little-endian encoding 45 | #[inline(always)] 46 | pub fn from_le_bytes(bytes: [u8; 32]) -> Uint256 { 47 | let mut out = [0u64; 4]; 48 | // This should optimize to basically a transmute. 49 | out.iter_mut() 50 | .zip(bytes.chunks_exact(8)) 51 | .for_each(|(word, bytes)| *word = u64::from_le_bytes(bytes.try_into().unwrap())); 52 | Self(out) 53 | } 54 | 55 | #[inline(always)] 56 | pub fn to_le_bytes(self) -> [u8; 32] { 57 | let mut out = [0u8; 32]; 58 | // This should optimize to basically a transmute. 59 | out.chunks_exact_mut(8).zip(self.0).for_each(|(bytes, word)| bytes.copy_from_slice(&word.to_le_bytes())); 60 | out 61 | } 62 | 63 | #[inline(always)] 64 | pub fn to_be_bytes(self) -> [u8; 32] { 65 | let mut out = [0u8; 32]; 66 | // This should optimize to basically a transmute. 67 | out.chunks_exact_mut(8) 68 | .zip(self.0.iter().rev()) 69 | .for_each(|(bytes, word)| bytes.copy_from_slice(&word.to_be_bytes())); 70 | out 71 | } 72 | } 73 | 74 | impl fmt::LowerHex for Uint256 { 75 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 76 | self.to_le_bytes().iter().try_for_each(|&c| write!(f, "{:02x}", c)) 77 | } 78 | } 79 | 80 | impl PartialOrd for Uint256 { 81 | #[inline(always)] 82 | fn partial_cmp(&self, other: &Uint256) -> Option { 83 | Some(self.cmp(other)) 84 | } 85 | } 86 | 87 | impl Ord for Uint256 { 88 | #[inline(always)] 89 | fn cmp(&self, other: &Uint256) -> Ordering { 90 | // We need to manually implement ordering because we use little-endian 91 | // and the auto derive is a lexicographic ordering(i.e. memcmp) 92 | // which with numbers is equivalent to big-endian 93 | Iterator::cmp(self.0.iter().rev(), other.0.iter().rev()) 94 | } 95 | } 96 | 97 | impl core::ops::Shl for Uint256 { 98 | type Output = Uint256; 99 | 100 | fn shl(self, shift: usize) -> Uint256 { 101 | let Uint256(ref original) = self; 102 | let mut ret = [0u64; 4]; 103 | let word_shift = shift / 64; 104 | let bit_shift = shift % 64; 105 | for i in 0..4 { 106 | // Shift 107 | if bit_shift < 64 && i + word_shift < 4 { 108 | ret[i + word_shift] += original[i] << bit_shift; 109 | } 110 | // Carry 111 | if bit_shift > 0 && i + word_shift + 1 < 4 { 112 | ret[i + word_shift + 1] += original[i] >> (64 - bit_shift); 113 | } 114 | } 115 | Uint256(ret) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/cli.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use log::LevelFilter; 3 | 4 | use crate::Error; 5 | 6 | #[derive(Parser, Debug)] 7 | #[clap(name = "kaspa-miner", version, about = "A Kaspa high performance CPU miner", term_width = 0)] 8 | pub struct Opt { 9 | #[clap(short, long, help = "Enable debug logging level")] 10 | pub debug: bool, 11 | #[clap(short = 'a', long = "mining-address", help = "The Kaspa address for the miner reward")] 12 | pub mining_address: String, 13 | #[clap(short = 's', long = "kaspad-address", default_value = "127.0.0.1", help = "The IP of the kaspad instance")] 14 | pub kaspad_address: String, 15 | 16 | #[clap(long = "devfund-percent", help = "The percentage of blocks to send to the devfund (minimum 2%)", default_value = "2", parse(try_from_str = parse_devfund_percent))] 17 | pub devfund_percent: u16, 18 | 19 | #[clap(short, long, help = "Kaspad port [default: Mainnet = 16110, Testnet = 16211]")] 20 | port: Option, 21 | 22 | #[clap(long, help = "Use testnet instead of mainnet [default: false]")] 23 | testnet: bool, 24 | #[clap(short = 't', long = "threads", help = "Amount of CPU miner threads to launch [default: 0]")] 25 | pub num_threads: Option, 26 | #[clap( 27 | long = "mine-when-not-synced", 28 | help = "Mine even when kaspad says it is not synced", 29 | long_help = "Mine even when kaspad says it is not synced, only useful when passing `--allow-submit-block-when-not-synced` to kaspad [default: false]" 30 | )] 31 | pub mine_when_not_synced: bool, 32 | 33 | #[clap(skip)] 34 | pub devfund_address: String, 35 | } 36 | 37 | fn parse_devfund_percent(s: &str) -> Result { 38 | let err = "devfund-percent should be --devfund-percent=XX.YY up to 2 numbers after the dot"; 39 | let mut splited = s.split('.'); 40 | let prefix = splited.next().ok_or(err)?; 41 | // if there's no postfix then it's 0. 42 | let postfix = splited.next().ok_or(err).unwrap_or("0"); 43 | // error if there's more than a single dot 44 | if splited.next().is_some() { 45 | return Err(err); 46 | }; 47 | // error if there are more than 2 numbers before or after the dot 48 | if prefix.len() > 2 || postfix.len() > 2 { 49 | return Err(err); 50 | } 51 | let postfix: u16 = postfix.parse().map_err(|_| err)?; 52 | let prefix: u16 = prefix.parse().map_err(|_| err)?; 53 | // can't be more than 99.99%, 54 | if prefix >= 100 || postfix >= 100 { 55 | return Err(err); 56 | } 57 | if prefix < 2 { 58 | // Force at least 2 percent 59 | return Ok(200u16); 60 | } 61 | // DevFund is out of 10_000 62 | Ok(prefix * 100 + postfix) 63 | } 64 | 65 | impl Opt { 66 | pub fn process(&mut self) -> Result<(), Error> { 67 | //self.gpus = None; 68 | if self.kaspad_address.is_empty() { 69 | self.kaspad_address = "127.0.0.1".to_string(); 70 | } 71 | 72 | if !self.kaspad_address.contains("://") { 73 | let port_str = self.port().to_string(); 74 | let (kaspad, port) = match self.kaspad_address.contains(':') { 75 | true => self.kaspad_address.split_once(':').expect("We checked for `:`"), 76 | false => (self.kaspad_address.as_str(), port_str.as_str()), 77 | }; 78 | self.kaspad_address = format!("grpc://{}:{}", kaspad, port); 79 | } 80 | log::info!("kaspad address: {}", self.kaspad_address); 81 | 82 | if self.num_threads.is_none() { 83 | self.num_threads = Some(0); 84 | } 85 | 86 | let miner_network = self.mining_address.split(':').next(); 87 | self.devfund_address = String::from("kaspa:pzhh76qc82wzduvsrd9xh4zde9qhp0xc8rl7qu2mvl2e42uvdqt75zrcgpm00"); 88 | let devfund_network = self.devfund_address.split(':').next(); 89 | if miner_network.is_some() && devfund_network.is_some() && miner_network != devfund_network { 90 | self.devfund_percent = 0; 91 | log::info!( 92 | "Mining address ({}) and devfund ({}) are not from the same network. Disabling devfund.", 93 | miner_network.unwrap(), 94 | devfund_network.unwrap() 95 | ) 96 | } 97 | Ok(()) 98 | } 99 | 100 | fn port(&mut self) -> u16 { 101 | *self.port.get_or_insert(if self.testnet { 16211 } else { 16110 }) 102 | } 103 | 104 | pub fn log_level(&self) -> LevelFilter { 105 | if self.debug { 106 | LevelFilter::Debug 107 | } else { 108 | LevelFilter::Info 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /plugins/cuda/kaspa-cuda-native/src/keccak-tiny-unrolled.c: -------------------------------------------------------------------------------- 1 | /** libkeccak-tiny 2 | * 3 | * A single-file implementation of SHA-3 and SHAKE. 4 | * 5 | * Implementor: David Leon Gil 6 | * License: CC0, attribution kindly requested. Blame taken too, 7 | * but not liability. 8 | */ 9 | #define __STDC_WANT_LIB_EXT1__ 1 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /******** The Keccak-f[1600] permutation ********/ 17 | 18 | 19 | /*** Constants. ***/ 20 | __device__ static const uint8_t rho[24] = \ 21 | { 1, 3, 6, 10, 15, 21, 22 | 28, 36, 45, 55, 2, 14, 23 | 27, 41, 56, 8, 25, 43, 24 | 62, 18, 39, 61, 20, 44}; 25 | __device__ static const uint8_t pi[24] = \ 26 | {10, 7, 11, 17, 18, 3, 27 | 5, 16, 8, 21, 24, 4, 28 | 15, 23, 19, 13, 12, 2, 29 | 20, 14, 22, 9, 6, 1}; 30 | __device__ static const uint64_t RC[24] = \ 31 | {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 32 | 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 33 | 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, 34 | 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 35 | 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, 36 | 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; 37 | 38 | /*** Helper macros to unroll the permutation. ***/ 39 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s))) 40 | #define REPEAT6(e) e e e e e e 41 | #define REPEAT24(e) REPEAT6(e e e e) 42 | #define REPEAT5(e) e e e e e 43 | #define FOR5(v, s, e) \ 44 | v = 0; \ 45 | REPEAT5(e; v += s;) 46 | 47 | /*** Keccak-f[1600] ***/ 48 | __device__ static inline void keccakf(void* state) { 49 | uint64_t* a = (uint64_t*)state; 50 | uint64_t b[5] = {0}; 51 | uint64_t t = 0; 52 | uint8_t x, y, i = 0; 53 | 54 | REPEAT24( 55 | // Theta 56 | FOR5(x, 1, 57 | b[x] = 0; 58 | FOR5(y, 5, 59 | b[x] ^= a[x + y]; )) 60 | FOR5(x, 1, 61 | FOR5(y, 5, 62 | a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) 63 | // Rho and pi 64 | t = a[1]; 65 | x = 0; 66 | REPEAT24(b[0] = a[pi[x]]; 67 | a[pi[x]] = rol(t, rho[x]); 68 | t = b[0]; 69 | x++; ) 70 | // Chi 71 | FOR5(y, 72 | 5, 73 | FOR5(x, 1, 74 | b[x] = a[y + x];) 75 | FOR5(x, 1, 76 | a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) 77 | // Iota 78 | a[0] ^= RC[i]; 79 | i++; ) 80 | } 81 | 82 | /******** The FIPS202-defined functions. ********/ 83 | 84 | /*** Some helper macros. ***/ 85 | 86 | #define _(S) do { S } while (0) 87 | #define FOR(i, ST, L, S) \ 88 | _(for (size_t i = 0; i < L; i += ST) { S; }) 89 | #define mkapply_ds(NAME, S) \ 90 | __device__ static inline void NAME(uint8_t* dst, \ 91 | const uint8_t* src, \ 92 | size_t len) { \ 93 | FOR(i, 1, len, S); \ 94 | } 95 | #define mkapply_sd(NAME, S) \ 96 | __device__ static inline void NAME(const uint8_t* src, \ 97 | uint8_t* dst, \ 98 | size_t len) { \ 99 | FOR(i, 1, len, S); \ 100 | } 101 | 102 | mkapply_ds(xorin, dst[i] ^= src[i]) // xorin 103 | mkapply_sd(setout, dst[i] = src[i]) // setout 104 | 105 | #define P keccakf 106 | #define Plen 200 107 | typedef uint8_t ShaState[Plen]; 108 | 109 | // Fold P*F over the full blocks of an input. 110 | #define foldP(I, L, F) \ 111 | while (L >= rate) { \ 112 | F(a, I, rate); \ 113 | P(a); \ 114 | I += rate; \ 115 | L -= rate; \ 116 | } 117 | 118 | /** The sponge-based hash construction. **/ 119 | __device__ __forceinline__ static int hash( 120 | const uint8_t initP[Plen], 121 | uint8_t* out, size_t outlen, 122 | const uint8_t* in, size_t inlen, 123 | size_t rate, uint8_t delim) { 124 | if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate > Plen)) { 125 | return -1; 126 | } 127 | uint8_t a[Plen] = {0}; 128 | memcpy(a, initP, Plen); 129 | // Absorb input. 130 | foldP(in, inlen, xorin); 131 | // Xor in the DS and pad frame. 132 | a[inlen] ^= delim; 133 | a[rate - 1] ^= 0x80; 134 | // Xor in the last block. 135 | xorin(a, in, inlen); 136 | // Apply P 137 | P(a); 138 | // Squeeze output. 139 | foldP(out, outlen, setout); 140 | setout(a, out, outlen); 141 | //memset_s(a, 200, 0, 200); 142 | memset(a,0,200); 143 | return 0; 144 | } 145 | 146 | -------------------------------------------------------------------------------- /plugins/opencl/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate kaspa_miner; 3 | 4 | use clap::{ArgMatches, FromArgMatches}; 5 | use kaspa_miner::{Plugin, Worker, WorkerSpec}; 6 | use log::{info, LevelFilter}; 7 | use opencl3::device::{Device, CL_DEVICE_TYPE_ALL}; 8 | use opencl3::platform::{get_platforms, Platform}; 9 | use opencl3::types::cl_device_id; 10 | use std::error::Error as StdError; 11 | 12 | pub type Error = Box; 13 | 14 | mod cli; 15 | mod worker; 16 | 17 | use crate::cli::{NonceGenEnum, OpenCLOpt}; 18 | use crate::worker::OpenCLGPUWorker; 19 | 20 | const DEFAULT_WORKLOAD_SCALE: f32 = 512.; 21 | 22 | pub struct OpenCLPlugin { 23 | specs: Vec, 24 | _enabled: bool, 25 | } 26 | 27 | impl OpenCLPlugin { 28 | fn new() -> Result { 29 | env_logger::builder().filter_level(LevelFilter::Info).parse_default_env().init(); 30 | Ok(Self { specs: Vec::new(), _enabled: false }) 31 | } 32 | } 33 | 34 | impl Plugin for OpenCLPlugin { 35 | fn name(&self) -> &'static str { 36 | "OpenCL Worker" 37 | } 38 | 39 | fn enabled(&self) -> bool { 40 | self._enabled 41 | } 42 | 43 | fn get_worker_specs(&self) -> Vec> { 44 | self.specs.iter().map(|spec| Box::new(*spec) as Box).collect::>>() 45 | } 46 | 47 | //noinspection RsTypeCheck 48 | fn process_option(&mut self, matches: &ArgMatches) -> Result { 49 | let opts: OpenCLOpt = OpenCLOpt::from_arg_matches(matches)?; 50 | 51 | self._enabled = opts.opencl_enable; 52 | let platforms = match get_platforms() { 53 | Ok(p) => p, 54 | Err(e) => { 55 | return Err(e.to_string().into()); 56 | } 57 | }; 58 | info!("OpenCL Found Platforms:"); 59 | info!("======================="); 60 | for platform in &platforms { 61 | let vendor = &platform.vendor().unwrap_or_else(|_| "Unk".into()); 62 | let name = &platform.name().unwrap_or_else(|_| "Unk".into()); 63 | let num_devices = platform.get_devices(CL_DEVICE_TYPE_ALL).unwrap_or_default().len(); 64 | info!("{}: {} ({} devices available)", vendor, name, num_devices); 65 | } 66 | let amd_platforms = (&platforms) 67 | .iter() 68 | .filter(|p| { 69 | p.vendor().unwrap_or_else(|_| "Unk".into()) == "Advanced Micro Devices, Inc." 70 | && !p.get_devices(CL_DEVICE_TYPE_ALL).unwrap_or_default().is_empty() 71 | }) 72 | .collect::>(); 73 | let _platform: &Platform = match opts.opencl_platform { 74 | Some(idx) => { 75 | self._enabled = true; 76 | &platforms[idx as usize] 77 | } 78 | None if !opts.opencl_amd_disable && !amd_platforms.is_empty() => { 79 | self._enabled = true; 80 | amd_platforms[0] 81 | } 82 | None => &platforms[0], 83 | }; 84 | if self._enabled { 85 | info!( 86 | "Chose to mine on {}: {}.", 87 | &_platform.vendor().unwrap_or_else(|_| "Unk".into()), 88 | &_platform.name().unwrap_or_else(|_| "Unk".into()) 89 | ); 90 | 91 | let device_ids = _platform.get_devices(CL_DEVICE_TYPE_ALL).unwrap(); 92 | let gpus = match opts.opencl_device { 93 | Some(dev) => { 94 | self._enabled = true; 95 | dev.iter().map(|d| device_ids[*d as usize]).collect::>() 96 | } 97 | None => device_ids, 98 | }; 99 | 100 | self.specs = (0..gpus.len()) 101 | .map(|i| OpenCLWorkerSpec { 102 | _platform: *_platform, 103 | index: i, 104 | device_id: Device::new(gpus[i]), 105 | workload: match &opts.opencl_workload { 106 | Some(workload) if i < workload.len() => workload[i], 107 | Some(workload) if !workload.is_empty() => *workload.last().unwrap(), 108 | _ => DEFAULT_WORKLOAD_SCALE, 109 | }, 110 | is_absolute: opts.opencl_workload_absolute, 111 | experimental_amd: opts.experimental_amd, 112 | use_amd_binary: !opts.opencl_no_amd_binary, 113 | random: opts.opencl_nonce_gen, 114 | }) 115 | .collect(); 116 | } 117 | Ok(self.specs.len()) 118 | } 119 | } 120 | 121 | #[derive(Copy, Clone)] 122 | struct OpenCLWorkerSpec { 123 | _platform: Platform, 124 | index: usize, 125 | device_id: Device, 126 | workload: f32, 127 | is_absolute: bool, 128 | experimental_amd: bool, 129 | use_amd_binary: bool, 130 | random: NonceGenEnum, 131 | } 132 | 133 | impl WorkerSpec for OpenCLWorkerSpec { 134 | fn id(&self) -> String { 135 | format!( 136 | "#{} {}", 137 | self.index, 138 | self.device_id 139 | .board_name_amd() 140 | .unwrap_or_else(|_| self.device_id.name().unwrap_or_else(|_| "Unknown Device".into())) 141 | ) 142 | } 143 | 144 | fn build(&self) -> Box { 145 | Box::new( 146 | OpenCLGPUWorker::new( 147 | self.device_id, 148 | self.workload, 149 | self.is_absolute, 150 | self.experimental_amd, 151 | self.use_amd_binary, 152 | &self.random, 153 | ) 154 | .unwrap(), 155 | ) 156 | } 157 | } 158 | 159 | declare_plugin!(OpenCLPlugin, OpenCLPlugin::new, OpenCLOpt); 160 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use clap::ArgMatches; 2 | use std::any::Any; 3 | use std::error::Error as StdError; 4 | 5 | pub mod xoshiro256starstar; 6 | use libloading::{Library, Symbol}; 7 | 8 | pub type Error = Box; 9 | 10 | #[derive(Default)] 11 | pub struct PluginManager { 12 | plugins: Vec>, 13 | loaded_libraries: Vec, 14 | } 15 | 16 | /** 17 | Plugin Manager class - allows inserting your own hashers 18 | Inspired by https://michael-f-bryan.github.io/rust-ffi-guide/dynamic_loading.html 19 | */ 20 | impl PluginManager { 21 | pub fn new() -> Self { 22 | Self { plugins: Vec::new(), loaded_libraries: Vec::new() } 23 | } 24 | 25 | pub(crate) unsafe fn load_single_plugin<'help>( 26 | &mut self, 27 | app: clap::App<'help>, 28 | path: &str, 29 | ) -> Result, (clap::App<'help>, Error)> { 30 | type PluginCreate<'help> = 31 | unsafe fn(*const clap::App<'help>) -> (*mut clap::App<'help>, *mut dyn Plugin, *mut Error); 32 | 33 | let lib = match Library::new(path) { 34 | Ok(l) => l, 35 | Err(e) => return Err((app, e.to_string().into())), 36 | }; 37 | 38 | self.loaded_libraries.push(lib); // Save library so it persists in memory 39 | let lib = self.loaded_libraries.last().unwrap(); 40 | 41 | let constructor: Symbol = match lib.get(b"_plugin_create") { 42 | Ok(cons) => cons, 43 | Err(e) => return Err((app, e.to_string().into())), 44 | }; 45 | 46 | let (app, boxed_raw, error) = constructor(Box::into_raw(Box::new(app))); 47 | let app = *Box::from_raw(app); 48 | 49 | if boxed_raw.is_null() { 50 | return Err((app, *Box::from_raw(error))); 51 | } 52 | let plugin = Box::from_raw(boxed_raw); 53 | self.plugins.push(plugin); 54 | 55 | Ok(app) 56 | } 57 | 58 | pub fn build(&self) -> Result>, Error> { 59 | let mut specs = Vec::>::new(); 60 | for plugin in &self.plugins { 61 | if plugin.enabled() { 62 | specs.extend(plugin.get_worker_specs()); 63 | } 64 | } 65 | Ok(specs) 66 | } 67 | 68 | /** 69 | Process the options for a plugin, and reports how many workers are available 70 | */ 71 | pub fn process_options(&mut self, matchs: &ArgMatches) -> Result { 72 | let mut count = 0usize; 73 | self.plugins.iter_mut().for_each(|plugin| { 74 | count += match plugin.process_option(matchs) { 75 | Ok(n) => n, 76 | Err(e) => { 77 | eprintln!( 78 | "WARNING: Failed processing options for {} (ignore if you do not intend to use): {}", 79 | plugin.name(), 80 | e 81 | ); 82 | 0 83 | } 84 | } 85 | }); 86 | Ok(count) 87 | } 88 | 89 | pub fn has_specs(&self) -> bool { 90 | !self.plugins.is_empty() 91 | } 92 | } 93 | 94 | pub trait Plugin: Any + Send + Sync { 95 | fn name(&self) -> &'static str; 96 | fn enabled(&self) -> bool; 97 | fn get_worker_specs(&self) -> Vec>; 98 | fn process_option(&mut self, matchs: &ArgMatches) -> Result; 99 | } 100 | 101 | pub trait WorkerSpec: Any + Send + Sync { 102 | /*type_: GPUWorkType, 103 | opencl_platform: u16, 104 | device_id: u32, 105 | workload: f32, 106 | is_absolute: bool*/ 107 | fn id(&self) -> String; 108 | fn build(&self) -> Box; 109 | } 110 | 111 | pub trait Worker { 112 | //fn new(device_id: u32, workload: f32, is_absolute: bool) -> Result; 113 | fn id(&self) -> String; 114 | fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]); 115 | 116 | fn calculate_hash(&mut self, nonces: Option<&Vec>, nonce_mask: u64, nonce_fixed: u64); 117 | fn sync(&self) -> Result<(), Error>; 118 | 119 | fn get_workload(&self) -> usize; 120 | fn copy_output_to(&mut self, nonces: &mut Vec) -> Result<(), Error>; 121 | } 122 | 123 | pub fn load_plugins<'help>( 124 | app: clap::App<'help>, 125 | paths: &[String], 126 | ) -> Result<(clap::App<'help>, PluginManager), Error> { 127 | let mut factory = PluginManager::new(); 128 | let mut app = app; 129 | for path in paths { 130 | app = unsafe { 131 | factory.load_single_plugin(app, path.as_str()).unwrap_or_else(|(app, e)| { 132 | eprintln!("WARNING: Failed loading plugin {} (ignore if you do not intend to use): {}", path, e); 133 | app 134 | }) 135 | }; 136 | } 137 | Ok((app, factory)) 138 | } 139 | 140 | #[macro_export] 141 | macro_rules! declare_plugin { 142 | ($plugin_type:ty, $constructor:path, $args:ty) => { 143 | use clap::Args; 144 | #[no_mangle] 145 | pub unsafe extern "C" fn _plugin_create( 146 | app: *mut clap::App, 147 | ) -> (*mut clap::App, *mut dyn $crate::Plugin, *const $crate::Error) { 148 | // make sure the constructor is the correct type. 149 | let constructor: fn() -> Result<$plugin_type, $crate::Error> = $constructor; 150 | 151 | let object = match constructor() { 152 | Ok(obj) => obj, 153 | Err(e) => { 154 | return ( 155 | app, 156 | unsafe { std::mem::MaybeUninit::zeroed().assume_init() }, // Translates to null pointer 157 | Box::into_raw(Box::new(e)), 158 | ); 159 | } 160 | }; 161 | 162 | let boxed: Box = Box::new(object); 163 | 164 | let boxed_app = Box::new(<$args>::augment_args(unsafe { *Box::from_raw(app) })); 165 | (Box::into_raw(boxed_app), Box::into_raw(boxed), std::ptr::null::()) 166 | } 167 | }; 168 | } 169 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaspa-miner 2 | [![Build status](https://github.com/tmrlvi/kaspa-miner/workflows/ci/badge.svg)](https://github.com/tmrlvi/kaspa-miner/actions) 3 | [![Latest version](https://img.shields.io/crates/v/kaspa-miner.svg)](https://crates.io/crates/kaspa-miner) 4 | ![License](https://img.shields.io/crates/l/kaspa-miner.svg) 5 | [![dependency status](https://deps.rs/repo/github/tmrlvi/kaspa-miner/status.svg)](https://deps.rs/repo/github/tmrlvi/kaspa-miner) 6 | 7 | [![Discord](https://discordapp.com/api/guilds/599153230659846165/embed.png)](https://discord.gg/kS3SK5F36R) 8 | [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/Kaspaenglish) 9 | 10 | 11 | ## Installation 12 | ### From Sources 13 | Installing via `cargo install` is not supported for the latest version. 14 | 15 | The regular version is still available at 16 | ```sh 17 | cargo install kaspa-miner 18 | ``` 19 | 20 | ### From Git Sources 21 | 22 | If you are looking to build from the repository (for debug / extension), note that the plugins are additional 23 | packages in the workspace. To compile a specific package, you run the following command or any subset of it 24 | 25 | ```sh 26 | git clone git@github.com:tmrlvi/kaspa-miner.git 27 | cd kaspa-miner 28 | cargo build --release -p kaspa-miner -p kaspacuda -p kaspaopencl 29 | ``` 30 | And, the miner (and plugins) will be in `targets/release`. You can replace the last line with 31 | ```sh 32 | cargo build --release --all 33 | ``` 34 | 35 | ### From Binaries 36 | The [release page](https://github.com/tmrlvi/kaspa-miner/releases) includes precompiled binaries for Linux, and Windows (for the GPU version). 37 | 38 | ### Removing Plugins 39 | To remove a plugin, you simply remove the corresponding `dll`/`so` for the directory of the miner. 40 | 41 | * `libkaspacuda.so`, `libkaspacuda.dll`: Cuda support for Kaspa-Miner 42 | * `libkaspaopencl.so`, `libkaspaopencl.dll`: OpenCL support for Kaspa-Miner 43 | 44 | # Usage 45 | To start mining, you need to run [kaspad](https://github.com/kaspanet/kaspad) and have an address to send the rewards to. 46 | Here is a guidance on how to run a full node and how to generate addresses: https://github.com/kaspanet/docs/blob/main/Getting%20Started/Full%20Node%20Installation.md 47 | 48 | Help: 49 | ``` 50 | kaspa-miner 51 | A Kaspa high performance CPU miner 52 | 53 | USAGE: 54 | kaspa-miner [OPTIONS] --mining-address 55 | 56 | OPTIONS: 57 | -a, --mining-address The Kaspa address for the miner reward 58 | --cuda-device Which CUDA GPUs to use [default: all] 59 | --cuda-disable Disable cuda workers 60 | --cuda-lock-core-clocks Lock core clocks eg: ,1200, [default: 0] 61 | --cuda-lock-mem-clocks Lock mem clocks eg: ,810, [default: 0] 62 | --cuda-no-blocking-sync Actively wait for result. Higher CPU usage, but less red blocks. Can have lower workload. 63 | --cuda-power-limits Lock power limits eg: ,150, [default: 0] 64 | --cuda-workload Ratio of nonces to GPU possible parrallel run [default: 64] 65 | --cuda-workload-absolute The values given by workload are not ratio, but absolute number of nonces [default: false] 66 | -d, --debug Enable debug logging level 67 | --devfund-percent The percentage of blocks to send to the devfund (minimum 2%) [default: 2] 68 | --experimental-amd Uses SMID instructions in AMD. Miner will crash if instruction is not supported 69 | -h, --help Print help information 70 | --mine-when-not-synced Mine even when kaspad says it is not synced 71 | --nonce-gen The random method used to generate nonces. Options: (i) xoshiro (ii) lean [default: lean] 72 | --opencl-amd-disable Disables AMD mining (does not override opencl-enable) 73 | --opencl-device Which OpenCL GPUs to use on a specific platform 74 | --opencl-enable Enable opencl, and take all devices of the chosen platform 75 | --opencl-no-amd-binary Disable fetching of precompiled AMD kernel (if exists) 76 | --opencl-platform Which OpenCL platform to use (limited to one per executable) 77 | --opencl-workload Ratio of nonces to GPU possible parrallel run in OpenCL [default: 512] 78 | --opencl-workload-absolute The values given by workload are not ratio, but absolute number of nonces in OpenCL [default: false] 79 | -p, --port Kaspad port [default: Mainnet = 16110, Testnet = 16211] 80 | -s, --kaspad-address The IP of the kaspad instance [default: 127.0.0.1] 81 | -t, --threads Amount of CPU miner threads to launch [default: 0] 82 | --testnet Use testnet instead of mainnet [default: false] 83 | ``` 84 | 85 | To start mining, you just need to run the following: 86 | 87 | `./kaspa-miner --mining-address kaspa:XXXXX` 88 | 89 | This will run the miner on all the available GPU devcies. 90 | 91 | # Devfund 92 | 93 | The devfund is a fund managed by the Kaspa community in order to fund Kaspa development
94 | A miner that wants to mine higher percentage into the dev-fund can pass the following flags:
95 | `--devfund-precent=XX.YY` to mine only XX.YY% of the blocks into the devfund. 96 | 97 | **This version automatically sets the devfund donation to the community designated address. 98 | Due to community decision, the minimum amount in the precompiled binaries is 2%** 99 | 100 | # Donation Addresses 101 | 102 | **Elichai**: `kaspa:qzvqtx5gkvl3tc54up6r8pk5mhuft9rtr0lvn624w9mtv4eqm9rvc9zfdmmpu` 103 | 104 | **HauntedCook**: `kaspa:qz4jdyu04hv4hpyy00pl6trzw4gllnhnwy62xattejv2vaj5r0p5quvns058f` 105 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(all(test, feature = "bench"), feature(test))] 2 | 3 | use std::env::consts::DLL_EXTENSION; 4 | use std::env::current_exe; 5 | use std::error::Error as StdError; 6 | use std::ffi::OsStr; 7 | 8 | use clap::{App, FromArgMatches, IntoApp}; 9 | use kaspa_miner::PluginManager; 10 | use log::{error, info}; 11 | use rand::{thread_rng, RngCore}; 12 | use std::fs; 13 | use std::sync::atomic::AtomicU16; 14 | use std::sync::Arc; 15 | use std::thread::sleep; 16 | use std::time::Duration; 17 | 18 | use crate::cli::Opt; 19 | use crate::client::grpc::KaspadHandler; 20 | use crate::client::stratum::StratumHandler; 21 | use crate::client::Client; 22 | use crate::miner::MinerManager; 23 | use crate::target::Uint256; 24 | 25 | mod cli; 26 | mod client; 27 | mod kaspad_messages; 28 | mod miner; 29 | mod pow; 30 | mod target; 31 | mod watch; 32 | 33 | const WHITELIST: [&str; 4] = ["libkaspacuda", "libkaspaopencl", "kaspacuda", "kaspaopencl"]; 34 | 35 | pub mod proto { 36 | #![allow(clippy::derive_partial_eq_without_eq)] 37 | tonic::include_proto!("protowire"); 38 | // include!("protowire.rs"); // FIXME: https://github.com/intellij-rust/intellij-rust/issues/6579 39 | } 40 | 41 | pub type Error = Box; 42 | 43 | type Hash = Uint256; 44 | 45 | #[cfg(target_os = "windows")] 46 | fn adjust_console() -> Result<(), Error> { 47 | let console = win32console::console::WinConsole::input(); 48 | let mut mode = console.get_mode()?; 49 | mode = (mode & !win32console::console::ConsoleMode::ENABLE_QUICK_EDIT_MODE) 50 | | win32console::console::ConsoleMode::ENABLE_EXTENDED_FLAGS; 51 | console.set_mode(mode)?; 52 | Ok(()) 53 | } 54 | 55 | fn filter_plugins(dirname: &str) -> Vec { 56 | match fs::read_dir(dirname) { 57 | Ok(readdir) => readdir 58 | .map(|entry| entry.unwrap().path()) 59 | .filter(|fname| { 60 | fname.is_file() 61 | && fname.extension().is_some() 62 | && fname.extension().and_then(OsStr::to_str).unwrap_or_default().starts_with(DLL_EXTENSION) 63 | }) 64 | .filter(|fname| WHITELIST.iter().any(|lib| *lib == fname.file_stem().and_then(OsStr::to_str).unwrap())) 65 | .map(|path| path.to_str().unwrap().to_string()) 66 | .collect::>(), 67 | _ => Vec::::new(), 68 | } 69 | } 70 | 71 | async fn get_client( 72 | kaspad_address: String, 73 | mining_address: String, 74 | mine_when_not_synced: bool, 75 | block_template_ctr: Arc, 76 | ) -> Result, Error> { 77 | if kaspad_address.starts_with("stratum+tcp://") { 78 | let (_schema, address) = kaspad_address.split_once("://").unwrap(); 79 | Ok(StratumHandler::connect( 80 | address.to_string().clone(), 81 | mining_address.clone(), 82 | mine_when_not_synced, 83 | Some(block_template_ctr.clone()), 84 | ) 85 | .await?) 86 | } else if kaspad_address.starts_with("grpc://") { 87 | Ok(KaspadHandler::connect( 88 | kaspad_address.clone(), 89 | mining_address.clone(), 90 | mine_when_not_synced, 91 | Some(block_template_ctr.clone()), 92 | ) 93 | .await?) 94 | } else { 95 | Err("Did not recognize pool/grpc address schema".into()) 96 | } 97 | } 98 | 99 | async fn client_main( 100 | opt: &Opt, 101 | block_template_ctr: Arc, 102 | plugin_manager: &PluginManager, 103 | ) -> Result<(), Error> { 104 | let mut client = get_client( 105 | opt.kaspad_address.clone(), 106 | opt.mining_address.clone(), 107 | opt.mine_when_not_synced, 108 | block_template_ctr.clone(), 109 | ) 110 | .await?; 111 | 112 | if opt.devfund_percent > 0 { 113 | client.add_devfund(opt.devfund_address.clone(), opt.devfund_percent); 114 | } 115 | client.register().await?; 116 | let mut miner_manager = MinerManager::new(client.get_block_channel(), opt.num_threads, plugin_manager); 117 | client.listen(&mut miner_manager).await?; 118 | drop(miner_manager); 119 | Ok(()) 120 | } 121 | 122 | #[tokio::main] 123 | async fn main() -> Result<(), Error> { 124 | #[cfg(target_os = "windows")] 125 | adjust_console().unwrap_or_else(|e| { 126 | eprintln!("WARNING: Failed to protect console ({}). Any selection in console will freeze the miner.", e) 127 | }); 128 | let mut path = current_exe().unwrap_or_default(); 129 | path.pop(); // Getting the parent directory 130 | let plugins = filter_plugins(path.to_str().unwrap_or(".")); 131 | let (app, mut plugin_manager): (App, PluginManager) = kaspa_miner::load_plugins(Opt::into_app(), &plugins)?; 132 | 133 | let matches = app.get_matches(); 134 | 135 | let worker_count = plugin_manager.process_options(&matches)?; 136 | let mut opt: Opt = Opt::from_arg_matches(&matches)?; 137 | opt.process()?; 138 | env_logger::builder().filter_level(opt.log_level()).parse_default_env().init(); 139 | info!("================================================================================="); 140 | info!(" Kaspa-Miner GPU {}", env!("CARGO_PKG_VERSION")); 141 | info!(" Mining for: {}", opt.mining_address); 142 | info!("================================================================================="); 143 | info!("Found plugins: {:?}", plugins); 144 | info!("Plugins found {} workers", worker_count); 145 | if worker_count == 0 && opt.num_threads.unwrap_or(0) == 0 { 146 | error!("No workers specified"); 147 | return Err("No workers specified".into()); 148 | } 149 | 150 | let block_template_ctr = Arc::new(AtomicU16::new((thread_rng().next_u64() % 10_000u64) as u16)); 151 | if opt.devfund_percent > 0 { 152 | info!( 153 | "devfund enabled, mining {}.{}% of the time to devfund address: {} ", 154 | opt.devfund_percent / 100, 155 | opt.devfund_percent % 100, 156 | opt.devfund_address 157 | ); 158 | } 159 | loop { 160 | match client_main(&opt, block_template_ctr.clone(), &plugin_manager).await { 161 | Ok(_) => info!("Client closed gracefully"), 162 | Err(e) => error!("Client closed with error {:?}", e), 163 | } 164 | info!("Client closed, reconnecting"); 165 | sleep(Duration::from_millis(100)); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/pow/hasher.rs: -------------------------------------------------------------------------------- 1 | use crate::Hash; 2 | use blake2b_simd::State as Blake2bState; 3 | 4 | const BLOCK_HASH_DOMAIN: &[u8] = b"BlockHash"; 5 | 6 | #[derive(Clone, Copy)] 7 | pub(super) struct PowHasher([u64; 25]); 8 | 9 | #[derive(Clone, Copy)] 10 | pub(super) struct HeavyHasher; 11 | 12 | #[derive(Clone)] 13 | pub struct HeaderHasher(Blake2bState); 14 | 15 | impl PowHasher { 16 | // The initial state of `cSHAKE256("ProofOfWorkHash")` 17 | // [10] -> 1123092876221303310 ^ 0x04(padding byte) = 1123092876221303306 18 | // [16] -> 10306167911662716186 ^ 0x8000000000000000(final padding) = 1082795874807940378 19 | #[rustfmt::skip] 20 | const INITIAL_STATE: [u64; 25] = [ 21 | 1242148031264380989, 3008272977830772284, 2188519011337848018, 1992179434288343456, 8876506674959887717, 22 | 5399642050693751366, 1745875063082670864, 8605242046444978844, 17936695144567157056, 3343109343542796272, 23 | 1123092876221303306, 4963925045340115282, 17037383077651887893, 16629644495023626889, 12833675776649114147, 24 | 3784524041015224902, 1082795874807940378, 13952716920571277634, 13411128033953605860, 15060696040649351053, 25 | 9928834659948351306, 5237849264682708699, 12825353012139217522, 6706187291358897596, 196324915476054915, 26 | ]; 27 | #[inline(always)] 28 | pub(super) fn new(pre_pow_hash: Hash, timestamp: u64) -> Self { 29 | let mut start = Self::INITIAL_STATE; 30 | for (&pre_pow_word, state_word) in pre_pow_hash.0.iter().zip(start.iter_mut()) { 31 | *state_word ^= pre_pow_word; 32 | } 33 | start[4] ^= timestamp; 34 | Self(start) 35 | } 36 | 37 | #[inline(always)] 38 | pub(super) fn finalize_with_nonce(mut self, nonce: u64) -> Hash { 39 | self.0[9] ^= nonce; 40 | super::keccak::f1600(&mut self.0); 41 | Hash::new(self.0[..4].try_into().unwrap()) 42 | } 43 | } 44 | 45 | impl HeavyHasher { 46 | // The initial state of `cSHAKE256("ProofOfWorkHash")` 47 | // [4] -> 16654558671554924254 ^ 0x04(padding byte) = 16654558671554924250 48 | // [16] -> 9793466274154320918 ^ 0x8000000000000000(final padding) = 570094237299545110 49 | #[rustfmt::skip] 50 | const INITIAL_STATE: [u64; 25] = [ 51 | 4239941492252378377, 8746723911537738262, 8796936657246353646, 1272090201925444760, 16654558671554924250, 52 | 8270816933120786537, 13907396207649043898, 6782861118970774626, 9239690602118867528, 11582319943599406348, 53 | 17596056728278508070, 15212962468105129023, 7812475424661425213, 3370482334374859748, 5690099369266491460, 54 | 8596393687355028144, 570094237299545110, 9119540418498120711, 16901969272480492857, 13372017233735502424, 55 | 14372891883993151831, 5171152063242093102, 10573107899694386186, 6096431547456407061, 1592359455985097269, 56 | ]; 57 | #[inline(always)] 58 | pub(super) fn hash(in_hash: Hash) -> Hash { 59 | let mut state = Self::INITIAL_STATE; 60 | for (&pre_pow_word, state_word) in in_hash.0.iter().zip(state.iter_mut()) { 61 | *state_word ^= pre_pow_word; 62 | } 63 | super::keccak::f1600(&mut state); 64 | Hash::new(state[..4].try_into().unwrap()) 65 | } 66 | } 67 | 68 | impl HeaderHasher { 69 | #[inline(always)] 70 | pub fn new() -> Self { 71 | Self(blake2b_simd::Params::new().hash_length(32).key(BLOCK_HASH_DOMAIN).to_state()) 72 | } 73 | 74 | pub fn write>(&mut self, data: A) { 75 | self.0.update(data.as_ref()); 76 | } 77 | 78 | #[inline(always)] 79 | pub fn finalize(self) -> Hash { 80 | Hash::from_le_bytes(self.0.finalize().as_bytes().try_into().expect("this is 32 bytes")) 81 | } 82 | } 83 | 84 | pub trait Hasher { 85 | fn update>(&mut self, data: A) -> &mut Self; 86 | } 87 | 88 | impl Hasher for HeaderHasher { 89 | fn update>(&mut self, data: A) -> &mut Self { 90 | self.write(data); 91 | self 92 | } 93 | } 94 | 95 | #[cfg(test)] 96 | mod tests { 97 | use crate::pow::hasher::{HeavyHasher, PowHasher}; 98 | use crate::Hash; 99 | use sha3::digest::{ExtendableOutput, Update, XofReader}; 100 | use sha3::CShake256; 101 | 102 | const PROOF_OF_WORK_DOMAIN: &[u8] = b"ProofOfWorkHash"; 103 | const HEAVY_HASH_DOMAIN: &[u8] = b"HeavyHash"; 104 | 105 | #[test] 106 | fn test_pow_hash() { 107 | let timestamp: u64 = 5435345234; 108 | let nonce: u64 = 432432432; 109 | let pre_pow_hash = Hash::from_le_bytes([42; 32]); 110 | let hasher = PowHasher::new(pre_pow_hash, timestamp); 111 | let hash1 = hasher.finalize_with_nonce(nonce); 112 | 113 | let hasher = CShake256::new(PROOF_OF_WORK_DOMAIN) 114 | .chain(pre_pow_hash.to_le_bytes()) 115 | .chain(timestamp.to_le_bytes()) 116 | .chain([0u8; 32]) 117 | .chain(nonce.to_le_bytes()); 118 | let mut hash2 = [0u8; 32]; 119 | hasher.finalize_xof().read(&mut hash2); 120 | assert_eq!(Hash::from_le_bytes(hash2), hash1); 121 | } 122 | 123 | #[test] 124 | fn test_heavy_hash() { 125 | let val = Hash::from_le_bytes([42; 32]); 126 | let hash1 = HeavyHasher::hash(val); 127 | 128 | let hasher = CShake256::new(HEAVY_HASH_DOMAIN).chain(val.to_le_bytes()); 129 | let mut hash2 = [0u8; 32]; 130 | hasher.finalize_xof().read(&mut hash2); 131 | assert_eq!(Hash::from_le_bytes(hash2), hash1); 132 | } 133 | } 134 | 135 | #[cfg(all(test, feature = "bench"))] 136 | mod benches { 137 | extern crate test; 138 | 139 | use self::test::{black_box, Bencher}; 140 | use super::{HeavyHasher, PowHasher}; 141 | use crate::Hash; 142 | 143 | #[bench] 144 | pub fn bench_pow_hash(bh: &mut Bencher) { 145 | let timestamp: u64 = 5435345234; 146 | let mut nonce: u64 = 432432432; 147 | let pre_pow_hash = Hash::from_le_bytes([42; 32]); 148 | let mut hasher = PowHasher::new(pre_pow_hash, timestamp); 149 | 150 | bh.iter(|| { 151 | for _ in 0..100 { 152 | black_box(&mut hasher); 153 | black_box(&mut nonce); 154 | black_box(hasher.finalize_with_nonce(nonce)); 155 | } 156 | }); 157 | } 158 | 159 | #[bench] 160 | pub fn bench_heavy_hash(bh: &mut Bencher) { 161 | let mut data = Hash::from_le_bytes([42; 32]); 162 | bh.iter(|| { 163 | for _ in 0..100 { 164 | black_box(&mut data); 165 | black_box(HeavyHasher::hash(data)); 166 | } 167 | }); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /proto/p2p.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package protowire; 3 | 4 | option go_package = "github.com/kaspanet/kaspad/protowire"; 5 | 6 | message RequestAddressesMessage{ 7 | bool includeAllSubnetworks = 1; 8 | SubnetworkId subnetworkId = 2; 9 | } 10 | 11 | message AddressesMessage{ 12 | repeated NetAddress addressList = 1; 13 | } 14 | 15 | message NetAddress{ 16 | int64 timestamp = 1; 17 | bytes ip = 3; 18 | uint32 port = 4; 19 | } 20 | 21 | message SubnetworkId{ 22 | bytes bytes = 1; 23 | } 24 | 25 | message TransactionMessage{ 26 | uint32 version = 1; 27 | repeated TransactionInput inputs = 2; 28 | repeated TransactionOutput outputs = 3; 29 | uint64 lockTime = 4; 30 | SubnetworkId subnetworkId = 5; 31 | uint64 gas = 6; 32 | bytes payload = 8; 33 | } 34 | 35 | message TransactionInput{ 36 | Outpoint previousOutpoint = 1; 37 | bytes signatureScript = 2; 38 | uint64 sequence = 3; 39 | uint32 sigOpCount = 4; 40 | } 41 | 42 | message Outpoint{ 43 | TransactionId transactionId = 1; 44 | uint32 index = 2; 45 | } 46 | 47 | message TransactionId{ 48 | bytes bytes = 1; 49 | } 50 | message ScriptPublicKey { 51 | bytes script = 1; 52 | uint32 version = 2; 53 | } 54 | 55 | message TransactionOutput{ 56 | uint64 value = 1; 57 | ScriptPublicKey scriptPublicKey = 2; 58 | } 59 | 60 | message BlockMessage{ 61 | BlockHeader header = 1; 62 | repeated TransactionMessage transactions = 2; 63 | } 64 | 65 | message BlockHeader{ 66 | uint32 version = 1; 67 | repeated BlockLevelParents parents = 12; 68 | Hash hashMerkleRoot = 3; 69 | Hash acceptedIdMerkleRoot = 4; 70 | Hash utxoCommitment = 5; 71 | int64 timestamp = 6; 72 | uint32 bits = 7; 73 | uint64 nonce = 8; 74 | uint64 daaScore = 9; 75 | bytes blueWork = 10; 76 | Hash pruningPoint = 14; 77 | uint64 blueScore = 13; 78 | } 79 | 80 | message BlockLevelParents { 81 | repeated Hash parentHashes = 1; 82 | } 83 | 84 | message Hash{ 85 | bytes bytes = 1; 86 | } 87 | 88 | message RequestBlockLocatorMessage{ 89 | Hash highHash = 1; 90 | uint32 limit = 2; 91 | } 92 | 93 | message BlockLocatorMessage{ 94 | repeated Hash hashes = 1; 95 | } 96 | 97 | message RequestHeadersMessage{ 98 | Hash lowHash = 1; 99 | Hash highHash = 2; 100 | } 101 | 102 | message RequestNextHeadersMessage{ 103 | } 104 | 105 | message DoneHeadersMessage{ 106 | } 107 | 108 | message RequestRelayBlocksMessage{ 109 | repeated Hash hashes = 1; 110 | } 111 | 112 | message RequestTransactionsMessage { 113 | repeated TransactionId ids = 1; 114 | } 115 | 116 | message TransactionNotFoundMessage{ 117 | TransactionId id = 1; 118 | } 119 | 120 | message InvRelayBlockMessage{ 121 | Hash hash = 1; 122 | } 123 | 124 | message InvTransactionsMessage{ 125 | repeated TransactionId ids = 1; 126 | } 127 | 128 | message PingMessage{ 129 | uint64 nonce = 1; 130 | } 131 | 132 | message PongMessage{ 133 | uint64 nonce = 1; 134 | } 135 | 136 | message VerackMessage{ 137 | } 138 | 139 | message VersionMessage{ 140 | uint32 protocolVersion = 1; 141 | uint64 services = 2; 142 | int64 timestamp = 3; 143 | NetAddress address = 4; 144 | bytes id = 5; 145 | string userAgent = 6; 146 | bool disableRelayTx = 8; 147 | SubnetworkId subnetworkId = 9; 148 | string network = 10; 149 | } 150 | 151 | message RejectMessage{ 152 | string reason = 1; 153 | } 154 | 155 | message RequestPruningPointUTXOSetMessage{ 156 | Hash pruningPointHash = 1; 157 | } 158 | 159 | message PruningPointUtxoSetChunkMessage{ 160 | repeated OutpointAndUtxoEntryPair outpointAndUtxoEntryPairs = 1; 161 | } 162 | 163 | message OutpointAndUtxoEntryPair{ 164 | Outpoint outpoint = 1; 165 | UtxoEntry utxoEntry = 2; 166 | } 167 | 168 | message UtxoEntry { 169 | uint64 amount = 1; 170 | ScriptPublicKey scriptPublicKey = 2; 171 | uint64 blockDaaScore = 3; 172 | bool isCoinbase = 4; 173 | } 174 | 175 | message RequestNextPruningPointUtxoSetChunkMessage { 176 | } 177 | 178 | message DonePruningPointUtxoSetChunksMessage { 179 | } 180 | 181 | message RequestIBDBlocksMessage{ 182 | repeated Hash hashes = 1; 183 | } 184 | 185 | message UnexpectedPruningPointMessage{ 186 | } 187 | 188 | message IbdBlockLocatorMessage { 189 | Hash targetHash = 1; 190 | repeated Hash blockLocatorHashes = 2; 191 | } 192 | 193 | message RequestIBDChainBlockLocatorMessage{ 194 | Hash lowHash = 1; 195 | Hash highHash = 2; 196 | } 197 | 198 | message IbdChainBlockLocatorMessage { 199 | repeated Hash blockLocatorHashes = 1; 200 | } 201 | 202 | message RequestAnticoneMessage{ 203 | Hash blockHash = 1; 204 | Hash contextHash = 2; 205 | } 206 | 207 | message IbdBlockLocatorHighestHashMessage { 208 | Hash highestHash = 1; 209 | } 210 | 211 | message IbdBlockLocatorHighestHashNotFoundMessage { 212 | } 213 | 214 | message BlockHeadersMessage { 215 | repeated BlockHeader blockHeaders = 1; 216 | } 217 | 218 | message RequestPruningPointAndItsAnticoneMessage { 219 | } 220 | 221 | message RequestNextPruningPointAndItsAnticoneBlocksMessage{ 222 | } 223 | 224 | message BlockWithTrustedDataMessage { 225 | BlockMessage block = 1; 226 | uint64 daaScore = 2; 227 | repeated DaaBlock daaWindow = 3; 228 | repeated BlockGhostdagDataHashPair ghostdagData = 4; 229 | } 230 | 231 | message DaaBlock { 232 | BlockMessage block = 3; 233 | GhostdagData ghostdagData = 2; 234 | } 235 | 236 | message DaaBlockV4 { 237 | BlockHeader header = 1; 238 | GhostdagData ghostdagData = 2; 239 | } 240 | 241 | message BlockGhostdagDataHashPair { 242 | Hash hash = 1; 243 | GhostdagData ghostdagData = 2; 244 | } 245 | 246 | message GhostdagData { 247 | uint64 blueScore = 1; 248 | bytes blueWork = 2; 249 | Hash selectedParent = 3; 250 | repeated Hash mergeSetBlues = 4; 251 | repeated Hash mergeSetReds = 5; 252 | repeated BluesAnticoneSizes bluesAnticoneSizes = 6; 253 | } 254 | 255 | message BluesAnticoneSizes { 256 | Hash blueHash = 1; 257 | uint32 anticoneSize = 2; 258 | } 259 | 260 | message DoneBlocksWithTrustedDataMessage { 261 | } 262 | 263 | message PruningPointsMessage { 264 | repeated BlockHeader headers = 1; 265 | } 266 | 267 | message RequestPruningPointProofMessage { 268 | } 269 | 270 | message PruningPointProofMessage { 271 | repeated PruningPointProofHeaderArray headers = 1; 272 | } 273 | 274 | message PruningPointProofHeaderArray { 275 | repeated BlockHeader headers = 1; 276 | } 277 | 278 | message ReadyMessage { 279 | } 280 | 281 | message BlockWithTrustedDataV4Message { 282 | BlockMessage block = 1; 283 | repeated uint64 daaWindowIndices = 2; 284 | repeated uint64 ghostdagDataIndices = 3; 285 | } 286 | 287 | message TrustedDataMessage { 288 | repeated DaaBlockV4 daaWindow = 1; 289 | repeated BlockGhostdagDataHashPair ghostdagData = 2; 290 | } 291 | -------------------------------------------------------------------------------- /plugins/cuda/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate kaspa_miner; 3 | 4 | use clap::{ArgMatches, FromArgMatches}; 5 | use cust::prelude::*; 6 | use kaspa_miner::{Plugin, Worker, WorkerSpec}; 7 | use log::LevelFilter; 8 | use std::error::Error as StdError; 9 | #[cfg(feature = "overclock")] 10 | use { 11 | log::{error, info}, 12 | nvml_wrapper::Device as NvmlDevice, 13 | nvml_wrapper::Nvml, 14 | }; 15 | 16 | pub type Error = Box; 17 | 18 | mod cli; 19 | mod worker; 20 | 21 | use crate::cli::{CudaOpt, NonceGenEnum}; 22 | use crate::worker::CudaGPUWorker; 23 | 24 | const DEFAULT_WORKLOAD_SCALE: f32 = 1024.; 25 | 26 | pub struct CudaPlugin { 27 | specs: Vec, 28 | #[cfg(feature = "overclock")] 29 | nvml_instance: Nvml, 30 | _enabled: bool, 31 | } 32 | 33 | impl CudaPlugin { 34 | fn new() -> Result { 35 | cust::init(CudaFlags::empty())?; 36 | env_logger::builder().filter_level(LevelFilter::Info).parse_default_env().init(); 37 | Ok(Self { 38 | specs: Vec::new(), 39 | _enabled: false, 40 | #[cfg(feature = "overclock")] 41 | nvml_instance: Nvml::init()?, 42 | }) 43 | } 44 | } 45 | 46 | impl Plugin for CudaPlugin { 47 | fn name(&self) -> &'static str { 48 | "CUDA Worker" 49 | } 50 | 51 | fn enabled(&self) -> bool { 52 | self._enabled 53 | } 54 | 55 | fn get_worker_specs(&self) -> Vec> { 56 | self.specs.iter().map(|spec| Box::new(*spec) as Box).collect::>>() 57 | } 58 | 59 | //noinspection RsTypeCheck 60 | fn process_option(&mut self, matches: &ArgMatches) -> Result { 61 | let opts: CudaOpt = CudaOpt::from_arg_matches(matches)?; 62 | 63 | self._enabled = !opts.cuda_disable; 64 | if self._enabled { 65 | let gpus: Vec = match &opts.cuda_device { 66 | Some(devices) => devices.clone(), 67 | None => { 68 | let gpu_count = Device::num_devices().unwrap() as u16; 69 | (0..gpu_count).collect() 70 | } 71 | }; 72 | 73 | // if any of cuda_lock_core_clocks / cuda_lock_mem_clocks / cuda_power_limit is valid, init nvml and try to apply 74 | #[cfg(feature = "overclock")] 75 | if opts.overclock.cuda_lock_core_clocks.is_some() 76 | || opts.overclock.cuda_lock_mem_clocks.is_some() 77 | || opts.overclock.cuda_power_limits.is_some() 78 | { 79 | for i in 0..gpus.len() { 80 | let lock_mem_clock: Option = match &opts.overclock.cuda_lock_mem_clocks { 81 | Some(mem_clocks) if i < mem_clocks.len() => Some(mem_clocks[i]), 82 | Some(mem_clocks) if !mem_clocks.is_empty() => Some(*mem_clocks.last().unwrap()), 83 | _ => None, 84 | }; 85 | 86 | let lock_core_clock: Option = match &opts.overclock.cuda_lock_core_clocks { 87 | Some(core_clocks) if i < core_clocks.len() => Some(core_clocks[i]), 88 | Some(core_clocks) if !core_clocks.is_empty() => Some(*core_clocks.last().unwrap()), 89 | _ => None, 90 | }; 91 | 92 | let power_limit: Option = match &opts.overclock.cuda_power_limits { 93 | Some(power_limits) if i < power_limits.len() => Some(power_limits[i]), 94 | Some(power_limits) if !power_limits.is_empty() => Some(*power_limits.last().unwrap()), 95 | _ => None, 96 | }; 97 | 98 | let mut nvml_device: NvmlDevice = self.nvml_instance.device_by_index(gpus[i] as u32)?; 99 | 100 | if let Some(lmc) = lock_mem_clock { 101 | match nvml_device.set_mem_locked_clocks(lmc, lmc) { 102 | Err(e) => error!("set mem locked clocks {:?}", e), 103 | _ => info!("GPU #{} #{} lock mem clock at {} Mhz", i, &nvml_device.name()?, &lmc), 104 | }; 105 | } 106 | 107 | if let Some(lcc) = lock_core_clock { 108 | match nvml_device.set_gpu_locked_clocks(lcc, lcc) { 109 | Err(e) => error!("set gpu locked clocks {:?}", e), 110 | _ => info!("GPU #{} #{} lock core clock at {} Mhz", i, &nvml_device.name()?, &lcc), 111 | }; 112 | }; 113 | 114 | if let Some(pl) = power_limit { 115 | match nvml_device.set_power_management_limit(pl * 1000) { 116 | Err(e) => error!("set power limit {:?}", e), 117 | _ => info!("GPU #{} #{} power limit at {} W", i, &nvml_device.name()?, &pl), 118 | }; 119 | }; 120 | } 121 | } 122 | 123 | self.specs = (0..gpus.len()) 124 | .map(|i| CudaWorkerSpec { 125 | device_id: gpus[i] as u32, 126 | workload: match &opts.cuda_workload { 127 | Some(workload) if i < workload.len() => workload[i], 128 | Some(workload) if !workload.is_empty() => *workload.last().unwrap(), 129 | _ => DEFAULT_WORKLOAD_SCALE, 130 | }, 131 | is_absolute: opts.cuda_workload_absolute, 132 | blocking_sync: !opts.cuda_no_blocking_sync, 133 | random: opts.cuda_nonce_gen, 134 | }) 135 | .collect(); 136 | } 137 | Ok(self.specs.len()) 138 | } 139 | } 140 | 141 | #[derive(Copy, Clone)] 142 | struct CudaWorkerSpec { 143 | device_id: u32, 144 | workload: f32, 145 | is_absolute: bool, 146 | blocking_sync: bool, 147 | random: NonceGenEnum, 148 | } 149 | 150 | impl WorkerSpec for CudaWorkerSpec { 151 | fn id(&self) -> String { 152 | let device = Device::get_device(self.device_id).unwrap(); 153 | format!("#{} ({})", self.device_id, device.name().unwrap()) 154 | } 155 | 156 | fn build(&self) -> Box { 157 | Box::new( 158 | CudaGPUWorker::new(self.device_id, self.workload, self.is_absolute, self.blocking_sync, self.random) 159 | .unwrap(), 160 | ) 161 | } 162 | } 163 | 164 | declare_plugin!(CudaPlugin, CudaPlugin::new, CudaOpt); 165 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yaml: -------------------------------------------------------------------------------- 1 | name: Build and upload assets 2 | on: 3 | release: 4 | types: [ published ] 5 | 6 | jobs: 7 | build: 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | # Build gnu-linux on ubuntu-18.04 and musl on ubuntu latest 13 | os: [ ubuntu-18.04, windows-latest, macos-latest ] 14 | features: ["default", "kaspacuda/overclock "] 15 | name: Building, ${{ matrix.os }} ${{ matrix.features }} 16 | steps: 17 | - name: Fix CRLF on Windows 18 | if: runner.os == 'Windows' 19 | run: git config --global core.autocrlf false 20 | 21 | - name: Fix LibOpenCL on Linux 22 | if: runner.os == 'Linux' 23 | run: | 24 | sudo apt update 25 | sudo apt install ocl-icd-opencl-dev -y 26 | 27 | - name: Check out code into the Go module directory 28 | uses: actions/checkout@v2 29 | 30 | - name: Install Protoc 31 | uses: arduino/setup-protoc@v1 32 | 33 | - name: Setup Rust 34 | uses: actions-rs/toolchain@v1 35 | with: 36 | profile: minimal 37 | toolchain: stable 38 | override: true 39 | 40 | - name: Install CUDA Linux 41 | if: runner.os == 'Linux' 42 | uses: Jimver/cuda-toolkit@v0.2.8 43 | with: 44 | cuda: '11.2.2' 45 | method: 'network' 46 | sub-packages: '["nvcc", "cudart"]' 47 | 48 | - name: Install CUDA Windows 49 | if: runner.os == 'Windows' 50 | uses: Jimver/cuda-toolkit@v0.2.8 51 | with: 52 | cuda: '11.5.1' 53 | method: 'network' 54 | sub-packages: '["nvcc", "cudart"]' 55 | 56 | - name: Build on Linux GNU 57 | if: matrix.os == 'ubuntu-18.04' 58 | # We're using musl to make the binaries statically linked and portable 59 | run: | 60 | cargo build --target=x86_64-unknown-linux-gnu --release --all --features ${{ matrix.features }} 61 | feature_name=${{ matrix.features }} 62 | asset_name="kaspa-miner-${{ github.event.release.tag_name }}-${feature_name/\//-}-linux-gnu-amd64" 63 | strip ./target/x86_64-unknown-linux-gnu/release/kaspa-miner 64 | mkdir ${asset_name} 65 | mv ./target/x86_64-unknown-linux-gnu/release/kaspa-miner ${asset_name}/${asset_name} 66 | mv ./target/x86_64-unknown-linux-gnu/release/libkaspa*.so ${asset_name}/ 67 | tar czvf ${asset_name}.tgz ${asset_name} 68 | echo "archive=${asset_name}.tgz" >> $GITHUB_ENV 69 | echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV 70 | 71 | 72 | - name: Build on Windows 73 | if: matrix.os == 'windows-latest' 74 | shell: bash 75 | run: | 76 | cargo build --target=x86_64-pc-windows-msvc --release --all --features ${{ matrix.features }} 77 | feature_name=${{ matrix.features }} 78 | asset_name="kaspa-miner-${{ github.event.release.tag_name }}-${feature_name/\//-}-win64-amd64" 79 | mkdir ${asset_name} 80 | mv ./target/x86_64-pc-windows-msvc/release/kaspa-miner.exe ${asset_name}/${asset_name}.exe 81 | mv ./target/x86_64-pc-windows-msvc/release/kaspa*.dll ${asset_name}/ 82 | bash ./integrations/windows/create_bat.sh ${asset_name} 83 | 7z a -tzip -r ${asset_name}.zip ${asset_name} 84 | echo "archive=${asset_name}.zip" >> $GITHUB_ENV 85 | echo "asset_name=${asset_name}.zip" >> $GITHUB_ENV 86 | 87 | - name: Build on MacOS 88 | if: matrix.os == 'macos-latest' 89 | run: | 90 | cargo build --target=x86_64-apple-darwin --release -p kaspa-miner 91 | asset_name="kaspa-miner-${{ github.event.release.tag_name }}-cpu-only-osx-amd64" 92 | mkdir ${asset_name} 93 | mv ./target/x86_64-apple-darwin/release/kaspa-miner ${asset_name}/${asset_name} 94 | tar czvf ${asset_name}.tgz ${asset_name} 95 | echo "archive=${asset_name}.tgz" >> $GITHUB_ENV 96 | echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV 97 | 98 | 99 | - name: Upload release asset 100 | uses: actions/upload-release-asset@v1 101 | env: 102 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 103 | with: 104 | upload_url: ${{ github.event.release.upload_url }} 105 | asset_path: "./${{ env.archive }}" 106 | asset_name: "${{ env.asset_name }}" 107 | asset_content_type: application/zip 108 | 109 | intergrations: 110 | runs-on: ubuntu-18.04 111 | strategy: 112 | fail-fast: false 113 | matrix: 114 | # Build gnu-linux on ubuntu-18.04 and musl on ubuntu latest 115 | itegration: [ hiveos ] 116 | name: Integrating, ${{ matrix.itegration }} 117 | steps: 118 | - name: Fix LibOpenCL on Linux 119 | run: | 120 | sudo apt update 121 | sudo apt install ocl-icd-opencl-dev -y 122 | 123 | - name: Check out code into the module directory 124 | uses: actions/checkout@v2 125 | 126 | - name: Install Protoc 127 | uses: arduino/setup-protoc@v1 128 | 129 | - name: Setup Rust 130 | uses: actions-rs/toolchain@v1 131 | with: 132 | profile: minimal 133 | toolchain: stable 134 | override: true 135 | 136 | - name: Install CUDA Linux 137 | if: runner.os == 'Linux' 138 | uses: Jimver/cuda-toolkit@v0.2.8 139 | with: 140 | cuda: '11.2.2' 141 | method: 'network' 142 | sub-packages: '["nvcc", "cudart"]' 143 | 144 | - name: Build ${{ matrix.itegration }} Script 145 | run: | 146 | cargo build --target=x86_64-unknown-linux-gnu --release --all 147 | binary_name="kaspa-miner-${{ github.event.release.tag_name }}-linux-gnu-amd64" 148 | asset_name="kaspa-miner-${{ matrix.itegration }}" 149 | strip ./target/x86_64-unknown-linux-gnu/release/kaspa-miner 150 | mkdir kaspa-miner 151 | mv ./target/x86_64-unknown-linux-gnu/release/kaspa-miner kaspa-miner/${binary_name} 152 | mv ./target/x86_64-unknown-linux-gnu/release/libkaspa*.so kaspa-miner/ 153 | bash integrations/${{ matrix.itegration }}/build.sh "${{ github.event.release.tag_name }}" "${binary_name}" kaspa-miner 154 | echo "archive=${asset_name}.tgz" >> $GITHUB_ENV 155 | echo "asset_name=${asset_name}.tgz" >> $GITHUB_ENV 156 | 157 | - name: Upload release asset 158 | uses: actions/upload-release-asset@v1 159 | env: 160 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 161 | with: 162 | upload_url: ${{ github.event.release.upload_url }} 163 | asset_path: "./${{ env.archive }}" 164 | asset_name: "${{ env.asset_name }}" 165 | asset_content_type: application/zip 166 | -------------------------------------------------------------------------------- /src/client/stratum/statum_codec.rs: -------------------------------------------------------------------------------- 1 | use bytes::BytesMut; 2 | use log::error; 3 | use serde::{Deserialize, Serialize}; 4 | use serde_json::Value; 5 | use serde_repr::*; 6 | use std::fmt::{Display, Formatter}; 7 | use std::{fmt, io}; 8 | use tokio_util::codec::{Decoder, Encoder, LinesCodec}; 9 | 10 | #[derive(Serialize_repr, Deserialize_repr, Debug, Clone)] 11 | #[repr(u8)] 12 | pub enum ErrorCode { 13 | Unknown = 20, 14 | JobNotFound = 21, 15 | DuplicateShare = 22, 16 | LowDifficultyShare = 23, 17 | Unauthorized = 24, 18 | NotSubscribed = 25, 19 | } 20 | 21 | impl Display for ErrorCode { 22 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 23 | match &self { 24 | ErrorCode::Unknown => write!(f, "Unknown"), 25 | ErrorCode::JobNotFound => write!(f, "JobNotFound"), 26 | ErrorCode::DuplicateShare => write!(f, "DuplicateShare"), 27 | ErrorCode::LowDifficultyShare => write!(f, "LowDifficultyShare"), 28 | ErrorCode::Unauthorized => write!(f, "Unauthorized"), 29 | ErrorCode::NotSubscribed => write!(f, "NotSubscribed"), 30 | } 31 | } 32 | } 33 | 34 | #[derive(Serialize, Deserialize, Debug, Clone)] 35 | pub(crate) struct StratumError(pub(crate) ErrorCode, pub(crate) String, #[serde(default)] pub(crate) Option); 36 | 37 | #[derive(Serialize, Deserialize, Debug, Clone)] 38 | #[serde(untagged)] 39 | pub(crate) enum MiningNotify { 40 | MiningNotifyShort((String, [u64; 4], u64)), 41 | MiningNotifyLong((String, String, String, String, Vec, String, String, String, bool)), 42 | } 43 | 44 | #[derive(Serialize, Deserialize, Debug, Clone)] 45 | #[serde(untagged)] 46 | pub enum MiningSubmit { 47 | MiningSubmitShort((String, String, String)), 48 | MiningSubmitLong((String, String, String, String, String)), 49 | } 50 | 51 | #[derive(Serialize, Deserialize, Debug, Clone)] 52 | #[serde(untagged)] 53 | pub enum MiningSubscribe { 54 | MiningSubscribeDefault((String,)), 55 | MiningSubscribeOptions((String, String)), 56 | } 57 | 58 | #[derive(Serialize, Deserialize, Debug, Clone)] 59 | #[serde(untagged)] 60 | pub enum SetExtranonce { 61 | SetExtranoncePlain((String, u32)), 62 | SetExtranoncePlainEth((String,)), 63 | } 64 | 65 | #[derive(Serialize, Deserialize, Debug, Clone)] 66 | #[serde(tag = "method", content = "params")] 67 | pub(crate) enum StratumCommand { 68 | #[serde(rename = "mining.set_extranonce", alias = "set_extranonce")] 69 | SetExtranonce(SetExtranonce), 70 | #[serde(rename = "mining.set_difficulty")] 71 | MiningSetDifficulty((f32,)), 72 | #[serde(rename = "mining.notify")] 73 | MiningNotify(MiningNotify), 74 | #[serde(rename = "mining.subscribe")] 75 | Subscribe(MiningSubscribe), 76 | #[serde(rename = "mining.authorize")] 77 | Authorize((String, String)), 78 | #[serde(rename = "mining.submit")] 79 | MiningSubmit(MiningSubmit), 80 | /*#[serde(rename = "mining.submit_hashrate")] 81 | MiningSubmitHashrate { 82 | params: (String, String), 83 | worker: String, 84 | },*/ //{"id":9,"method":"mining.submit_hashrate","jsonrpc":"2.0","worker":"rig","params":["0x00000000000000000000000000000000","0x85198cd10b915d560722cdfdf490d4d93892d2cc3fa5f2ff2195d499d04ee54c"]} 85 | } 86 | 87 | #[derive(Serialize, Deserialize, Debug, Clone)] 88 | #[serde(untagged)] 89 | pub(crate) enum StratumResult { 90 | Plain(Option), 91 | Eth((bool, String)), 92 | Subscribe((Vec<(String, String)>, String, u32)), 93 | } 94 | 95 | #[derive(Serialize, Deserialize, Debug, Clone)] 96 | #[serde(untagged)] 97 | pub(crate) enum StratumLinePayload { 98 | StratumCommand(StratumCommand), 99 | StratumResult { result: StratumResult }, 100 | } 101 | 102 | #[derive(Serialize, Deserialize, Debug, Clone)] 103 | pub(crate) struct StratumLine { 104 | pub(crate) id: Option, 105 | #[serde(flatten)] 106 | pub(crate) payload: StratumLinePayload, 107 | #[serde(skip_serializing_if = "Option::is_none")] 108 | pub(crate) jsonrpc: Option, 109 | pub(crate) error: Option, 110 | } 111 | 112 | /// An error occurred while encoding or decoding a line. 113 | #[derive(Debug)] 114 | pub(crate) enum NewLineJsonCodecError { 115 | JsonParseError(String), 116 | JsonEncodeError, 117 | LineSplitError, 118 | LineEncodeError, 119 | Io(io::Error), 120 | } 121 | 122 | impl fmt::Display for NewLineJsonCodecError { 123 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 124 | write!(f, "Some error occured") 125 | } 126 | } 127 | impl From for NewLineJsonCodecError { 128 | fn from(e: io::Error) -> NewLineJsonCodecError { 129 | NewLineJsonCodecError::Io(e) 130 | } 131 | } 132 | impl std::error::Error for NewLineJsonCodecError {} 133 | 134 | impl From<(String, String)> for NewLineJsonCodecError { 135 | fn from(e: (String, String)) -> Self { 136 | NewLineJsonCodecError::JsonParseError(format!("{}: {}", e.0, e.1)) 137 | } 138 | } 139 | 140 | pub(crate) struct NewLineJsonCodec { 141 | lines_codec: LinesCodec, 142 | } 143 | 144 | impl NewLineJsonCodec { 145 | pub fn new() -> Self { 146 | Self { lines_codec: LinesCodec::new() } 147 | } 148 | } 149 | 150 | impl Decoder for NewLineJsonCodec { 151 | type Item = StratumLine; 152 | type Error = NewLineJsonCodecError; 153 | 154 | fn decode(&mut self, src: &mut BytesMut) -> Result, Self::Error> { 155 | match self.lines_codec.decode(src) { 156 | Ok(Some(s)) => { 157 | serde_json::from_str::(s.as_str()).map_err(|e| (e.to_string(), s).into()).map(Some) 158 | } 159 | Err(_) => Err(NewLineJsonCodecError::LineSplitError), 160 | _ => Ok(None), 161 | } 162 | } 163 | 164 | fn decode_eof(&mut self, buf: &mut BytesMut) -> Result, Self::Error> { 165 | match self.lines_codec.decode_eof(buf) { 166 | Ok(Some(s)) => serde_json::from_str(s.as_str()).map_err(|e| (e.to_string(), s).into()), 167 | Err(_) => Err(NewLineJsonCodecError::LineSplitError), 168 | _ => Ok(None), 169 | } 170 | } 171 | } 172 | 173 | impl Encoder for NewLineJsonCodec { 174 | type Error = NewLineJsonCodecError; 175 | 176 | fn encode(&mut self, item: StratumLine, dst: &mut BytesMut) -> Result<(), Self::Error> { 177 | match serde_json::to_string(&item) { 178 | Ok(json) => self.lines_codec.encode(json, dst).map_err(|_| NewLineJsonCodecError::LineEncodeError), 179 | Err(e) => { 180 | error!("Error! {:?}", e); 181 | Err(NewLineJsonCodecError::JsonEncodeError) 182 | } 183 | } 184 | } 185 | } 186 | 187 | impl Default for NewLineJsonCodec { 188 | fn default() -> Self { 189 | Self::new() 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /plugins/cuda/kaspa-cuda-native/src/kaspa-cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "keccak-tiny.c" 4 | #include "xoshiro256starstar.c" 5 | 6 | 7 | 8 | typedef uint8_t Hash[32]; 9 | 10 | typedef union _uint256_t { 11 | uint64_t number[4]; 12 | uint8_t hash[32]; 13 | } uint256_t; 14 | 15 | #define BLOCKDIM 1024 16 | #define MATRIX_SIZE 64 17 | #define HALF_MATRIX_SIZE 32 18 | #define QUARTER_MATRIX_SIZE 16 19 | #define HASH_HEADER_SIZE 72 20 | 21 | #define RANDOM_LEAN 0 22 | #define RANDOM_XOSHIRO 1 23 | 24 | #define LT_U256(X,Y) (X.number[3] != Y.number[3] ? X.number[3] < Y.number[3] : X.number[2] != Y.number[2] ? X.number[2] < Y.number[2] : X.number[1] != Y.number[1] ? X.number[1] < Y.number[1] : X.number[0] < Y.number[0]) 25 | 26 | __constant__ uint8_t matrix[MATRIX_SIZE][MATRIX_SIZE]; 27 | __constant__ uint8_t hash_header[HASH_HEADER_SIZE]; 28 | __constant__ uint256_t target; 29 | __constant__ static const uint8_t powP[Plen] = { 0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02 }; 30 | __constant__ static const uint8_t heavyP[Plen] = { 0x09, 0x85, 0x24, 0xb2, 0x52, 0x4c, 0xd7, 0x3a, 0x16, 0x42, 0x9f, 0x2f, 0x0e, 0x9b, 0x62, 0x79, 0xee, 0xf8, 0xc7, 0x16, 0x48, 0xff, 0x14, 0x7a, 0x98, 0x64, 0x05, 0x80, 0x4c, 0x5f, 0xa7, 0x11, 0xda, 0xce, 0xee, 0x44, 0xdf, 0xe0, 0x20, 0xe7, 0x69, 0x40, 0xf3, 0x14, 0x2e, 0xd8, 0xc7, 0x72, 0xba, 0x35, 0x89, 0x93, 0x2a, 0xff, 0x00, 0xc1, 0x62, 0xc4, 0x0f, 0x25, 0x40, 0x90, 0x21, 0x5e, 0x48, 0x6a, 0xcf, 0x0d, 0xa6, 0xf9, 0x39, 0x80, 0x0c, 0x3d, 0x2a, 0x79, 0x9f, 0xaa, 0xbc, 0xa0, 0x26, 0xa2, 0xa9, 0xd0, 0x5d, 0xc0, 0x31, 0xf4, 0x3f, 0x8c, 0xc1, 0x54, 0xc3, 0x4c, 0x1f, 0xd3, 0x3d, 0xcc, 0x69, 0xa7, 0x01, 0x7d, 0x6b, 0x6c, 0xe4, 0x93, 0x24, 0x56, 0xd3, 0x5b, 0xc6, 0x2e, 0x44, 0xb0, 0xcd, 0x99, 0x3a, 0x4b, 0xf7, 0x4e, 0xb0, 0xf2, 0x34, 0x54, 0x83, 0x86, 0x4c, 0x77, 0x16, 0x94, 0xbc, 0x36, 0xb0, 0x61, 0xe9, 0x07, 0x07, 0xcc, 0x65, 0x77, 0xb1, 0x1d, 0x8f, 0x7e, 0x39, 0x6d, 0xc4, 0xba, 0x80, 0xdb, 0x8f, 0xea, 0x58, 0xca, 0x34, 0x7b, 0xd3, 0xf2, 0x92, 0xb9, 0x57, 0xb9, 0x81, 0x84, 0x04, 0xc5, 0x76, 0xc7, 0x2e, 0xc2, 0x12, 0x51, 0x67, 0x9f, 0xc3, 0x47, 0x0a, 0x0c, 0x29, 0xb5, 0x9d, 0x39, 0xbb, 0x92, 0x15, 0xc6, 0x9f, 0x2f, 0x31, 0xe0, 0x9a, 0x54, 0x35, 0xda, 0xb9, 0x10, 0x7d, 0x32, 0x19, 0x16 }; 31 | 32 | __device__ __inline__ void amul4bit(uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) { 33 | // We assume each 32 bits have four values: A0 B0 C0 D0 34 | unsigned int res = 0; 35 | #if __CUDA_ARCH__ < 610 36 | char4 *a4 = (char4*)packed_vec1; 37 | char4 *b4 = (char4*)packed_vec2; 38 | #endif 39 | #pragma unroll 40 | for (int i=0; i= 610 42 | res = __dp4a(packed_vec1[i], packed_vec2[i], res); 43 | #else 44 | res += a4[i].x*b4[i].x; 45 | res += a4[i].y*b4[i].y; 46 | res += a4[i].z*b4[i].z; 47 | res += a4[i].w*b4[i].w; 48 | #endif 49 | } 50 | 51 | *ret = res; 52 | } 53 | 54 | 55 | extern "C" { 56 | 57 | 58 | __global__ void heavy_hash(const uint64_t nonce_mask, const uint64_t nonce_fixed, const uint64_t nonces_len, uint8_t random_type, void* states, uint64_t *final_nonce) { 59 | // assuming header_len is 72 60 | int nonceId = threadIdx.x + blockIdx.x*blockDim.x; 61 | if (nonceId < nonces_len) { 62 | if (nonceId == 0) *final_nonce = 0; 63 | uint64_t nonce; 64 | switch (random_type) { 65 | case RANDOM_LEAN: 66 | nonce = ((uint64_t *)states)[0] ^ nonceId; 67 | break; 68 | case RANDOM_XOSHIRO: 69 | default: 70 | nonce = xoshiro256_next(((ulonglong4 *)states) + nonceId); 71 | break; 72 | } 73 | nonce = (nonce & nonce_mask) | nonce_fixed; 74 | // header 75 | uint8_t input[80]; 76 | memcpy(input, hash_header, HASH_HEADER_SIZE); 77 | // data 78 | // TODO: check endianity? 79 | uint256_t hash_; 80 | memcpy(input + HASH_HEADER_SIZE, (uint8_t *)(&nonce), 8); 81 | hash(powP, hash_.hash, input); 82 | 83 | //assert((rowId != 0) || (hashId != 0) ); 84 | uchar4 packed_hash[QUARTER_MATRIX_SIZE] = {0}; 85 | #pragma unroll 86 | for (int i=0; i> 4 , 89 | (hash_.hash[2*i] & 0x0F), 90 | (hash_.hash[2*i+1] & 0xF0) >> 4, 91 | (hash_.hash[2*i+1] & 0x0F) 92 | ); 93 | } 94 | uint32_t product1, product2; 95 | #pragma unroll 96 | for (int rowId=0; rowId>= 6; 101 | product1 &= 0xF0; 102 | product2 >>= 10; 103 | #if __CUDA_ARCH__ < 500 || __CUDA_ARCH__ > 700 104 | hash_.hash[rowId] = hash_.hash[rowId] ^ ((uint8_t)(product1) | (uint8_t)(product2)); 105 | #else 106 | uint32_t lop_temp = hash_.hash[rowId]; 107 | asm("lop3.b32" " %0, %1, %2, %3, 0x56;": "=r" (lop_temp): "r" (product1), "r" (product2), "r" (lop_temp)); 108 | hash_.hash[rowId] = lop_temp; 109 | #endif 110 | } 111 | memset(input, 0, 80); 112 | memcpy(input, hash_.hash, 32); 113 | hash(heavyP, hash_.hash, input); 114 | if (LT_U256(hash_, target)){ 115 | atomicCAS((unsigned long long int*) final_nonce, 0, (unsigned long long int) nonce); 116 | } 117 | } 118 | } 119 | 120 | } -------------------------------------------------------------------------------- /src/keccakf1600_x86-64-osx.s: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-x86_64.pl 2 | 3 | .text 4 | 5 | 6 | .p2align 5 7 | __KeccakF1600: 8 | .cfi_startproc 9 | .byte 0xf3,0x0f,0x1e,0xfa 10 | 11 | movq 60(%rdi),%rax 12 | movq 68(%rdi),%rbx 13 | movq 76(%rdi),%rcx 14 | movq 84(%rdi),%rdx 15 | movq 92(%rdi),%rbp 16 | jmp L$oop 17 | 18 | .p2align 5 19 | L$oop: 20 | movq -100(%rdi),%r8 21 | movq -52(%rdi),%r9 22 | movq -4(%rdi),%r10 23 | movq 44(%rdi),%r11 24 | 25 | xorq -84(%rdi),%rcx 26 | xorq -76(%rdi),%rdx 27 | xorq %r8,%rax 28 | xorq -92(%rdi),%rbx 29 | xorq -44(%rdi),%rcx 30 | xorq -60(%rdi),%rax 31 | movq %rbp,%r12 32 | xorq -68(%rdi),%rbp 33 | 34 | xorq %r10,%rcx 35 | xorq -20(%rdi),%rax 36 | xorq -36(%rdi),%rdx 37 | xorq %r9,%rbx 38 | xorq -28(%rdi),%rbp 39 | 40 | xorq 36(%rdi),%rcx 41 | xorq 20(%rdi),%rax 42 | xorq 4(%rdi),%rdx 43 | xorq -12(%rdi),%rbx 44 | xorq 12(%rdi),%rbp 45 | 46 | movq %rcx,%r13 47 | rolq $1,%rcx 48 | xorq %rax,%rcx 49 | xorq %r11,%rdx 50 | 51 | rolq $1,%rax 52 | xorq %rdx,%rax 53 | xorq 28(%rdi),%rbx 54 | 55 | rolq $1,%rdx 56 | xorq %rbx,%rdx 57 | xorq 52(%rdi),%rbp 58 | 59 | rolq $1,%rbx 60 | xorq %rbp,%rbx 61 | 62 | rolq $1,%rbp 63 | xorq %r13,%rbp 64 | xorq %rcx,%r9 65 | xorq %rdx,%r10 66 | rolq $44,%r9 67 | xorq %rbp,%r11 68 | xorq %rax,%r12 69 | rolq $43,%r10 70 | xorq %rbx,%r8 71 | movq %r9,%r13 72 | rolq $21,%r11 73 | orq %r10,%r9 74 | xorq %r8,%r9 75 | rolq $14,%r12 76 | 77 | xorq (%r15),%r9 78 | leaq 8(%r15),%r15 79 | 80 | movq %r12,%r14 81 | andq %r11,%r12 82 | movq %r9,-100(%rsi) 83 | xorq %r10,%r12 84 | notq %r10 85 | movq %r12,-84(%rsi) 86 | 87 | orq %r11,%r10 88 | movq 76(%rdi),%r12 89 | xorq %r13,%r10 90 | movq %r10,-92(%rsi) 91 | 92 | andq %r8,%r13 93 | movq -28(%rdi),%r9 94 | xorq %r14,%r13 95 | movq -20(%rdi),%r10 96 | movq %r13,-68(%rsi) 97 | 98 | orq %r8,%r14 99 | movq -76(%rdi),%r8 100 | xorq %r11,%r14 101 | movq 28(%rdi),%r11 102 | movq %r14,-76(%rsi) 103 | 104 | 105 | xorq %rbp,%r8 106 | xorq %rdx,%r12 107 | rolq $28,%r8 108 | xorq %rcx,%r11 109 | xorq %rax,%r9 110 | rolq $61,%r12 111 | rolq $45,%r11 112 | xorq %rbx,%r10 113 | rolq $20,%r9 114 | movq %r8,%r13 115 | orq %r12,%r8 116 | rolq $3,%r10 117 | 118 | xorq %r11,%r8 119 | movq %r8,-36(%rsi) 120 | 121 | movq %r9,%r14 122 | andq %r13,%r9 123 | movq -92(%rdi),%r8 124 | xorq %r12,%r9 125 | notq %r12 126 | movq %r9,-28(%rsi) 127 | 128 | orq %r11,%r12 129 | movq -44(%rdi),%r9 130 | xorq %r10,%r12 131 | movq %r12,-44(%rsi) 132 | 133 | andq %r10,%r11 134 | movq 60(%rdi),%r12 135 | xorq %r14,%r11 136 | movq %r11,-52(%rsi) 137 | 138 | orq %r10,%r14 139 | movq 4(%rdi),%r10 140 | xorq %r13,%r14 141 | movq 52(%rdi),%r11 142 | movq %r14,-60(%rsi) 143 | 144 | 145 | xorq %rbp,%r10 146 | xorq %rax,%r11 147 | rolq $25,%r10 148 | xorq %rdx,%r9 149 | rolq $8,%r11 150 | xorq %rbx,%r12 151 | rolq $6,%r9 152 | xorq %rcx,%r8 153 | rolq $18,%r12 154 | movq %r10,%r13 155 | andq %r11,%r10 156 | rolq $1,%r8 157 | 158 | notq %r11 159 | xorq %r9,%r10 160 | movq %r10,-12(%rsi) 161 | 162 | movq %r12,%r14 163 | andq %r11,%r12 164 | movq -12(%rdi),%r10 165 | xorq %r13,%r12 166 | movq %r12,-4(%rsi) 167 | 168 | orq %r9,%r13 169 | movq 84(%rdi),%r12 170 | xorq %r8,%r13 171 | movq %r13,-20(%rsi) 172 | 173 | andq %r8,%r9 174 | xorq %r14,%r9 175 | movq %r9,12(%rsi) 176 | 177 | orq %r8,%r14 178 | movq -60(%rdi),%r9 179 | xorq %r11,%r14 180 | movq 36(%rdi),%r11 181 | movq %r14,4(%rsi) 182 | 183 | 184 | movq -68(%rdi),%r8 185 | 186 | xorq %rcx,%r10 187 | xorq %rdx,%r11 188 | rolq $10,%r10 189 | xorq %rbx,%r9 190 | rolq $15,%r11 191 | xorq %rbp,%r12 192 | rolq $36,%r9 193 | xorq %rax,%r8 194 | rolq $56,%r12 195 | movq %r10,%r13 196 | orq %r11,%r10 197 | rolq $27,%r8 198 | 199 | notq %r11 200 | xorq %r9,%r10 201 | movq %r10,28(%rsi) 202 | 203 | movq %r12,%r14 204 | orq %r11,%r12 205 | xorq %r13,%r12 206 | movq %r12,36(%rsi) 207 | 208 | andq %r9,%r13 209 | xorq %r8,%r13 210 | movq %r13,20(%rsi) 211 | 212 | orq %r8,%r9 213 | xorq %r14,%r9 214 | movq %r9,52(%rsi) 215 | 216 | andq %r14,%r8 217 | xorq %r11,%r8 218 | movq %r8,44(%rsi) 219 | 220 | 221 | xorq -84(%rdi),%rdx 222 | xorq -36(%rdi),%rbp 223 | rolq $62,%rdx 224 | xorq 68(%rdi),%rcx 225 | rolq $55,%rbp 226 | xorq 12(%rdi),%rax 227 | rolq $2,%rcx 228 | xorq 20(%rdi),%rbx 229 | xchgq %rsi,%rdi 230 | rolq $39,%rax 231 | rolq $41,%rbx 232 | movq %rdx,%r13 233 | andq %rbp,%rdx 234 | notq %rbp 235 | xorq %rcx,%rdx 236 | movq %rdx,92(%rdi) 237 | 238 | movq %rax,%r14 239 | andq %rbp,%rax 240 | xorq %r13,%rax 241 | movq %rax,60(%rdi) 242 | 243 | orq %rcx,%r13 244 | xorq %rbx,%r13 245 | movq %r13,84(%rdi) 246 | 247 | andq %rbx,%rcx 248 | xorq %r14,%rcx 249 | movq %rcx,76(%rdi) 250 | 251 | orq %r14,%rbx 252 | xorq %rbp,%rbx 253 | movq %rbx,68(%rdi) 254 | 255 | movq %rdx,%rbp 256 | movq %r13,%rdx 257 | 258 | testq $255,%r15 259 | jnz L$oop 260 | 261 | leaq -192(%r15),%r15 262 | .byte 0xf3,0xc3 263 | .cfi_endproc 264 | 265 | 266 | .globl _KeccakF1600 267 | 268 | .p2align 5 269 | _KeccakF1600: 270 | .cfi_startproc 271 | .byte 0xf3,0x0f,0x1e,0xfa 272 | 273 | 274 | pushq %rbx 275 | .cfi_adjust_cfa_offset 8 276 | .cfi_offset %rbx,-16 277 | pushq %rbp 278 | .cfi_adjust_cfa_offset 8 279 | .cfi_offset %rbp,-24 280 | pushq %r12 281 | .cfi_adjust_cfa_offset 8 282 | .cfi_offset %r12,-32 283 | pushq %r13 284 | .cfi_adjust_cfa_offset 8 285 | .cfi_offset %r13,-40 286 | pushq %r14 287 | .cfi_adjust_cfa_offset 8 288 | .cfi_offset %r14,-48 289 | pushq %r15 290 | .cfi_adjust_cfa_offset 8 291 | .cfi_offset %r15,-56 292 | 293 | leaq 100(%rdi),%rdi 294 | subq $200,%rsp 295 | .cfi_adjust_cfa_offset 200 296 | 297 | notq -92(%rdi) 298 | notq -84(%rdi) 299 | notq -36(%rdi) 300 | notq -4(%rdi) 301 | notq 36(%rdi) 302 | notq 60(%rdi) 303 | 304 | leaq iotas(%rip),%r15 305 | leaq 100(%rsp),%rsi 306 | 307 | call __KeccakF1600 308 | 309 | notq -92(%rdi) 310 | notq -84(%rdi) 311 | notq -36(%rdi) 312 | notq -4(%rdi) 313 | notq 36(%rdi) 314 | notq 60(%rdi) 315 | leaq -100(%rdi),%rdi 316 | 317 | addq $200,%rsp 318 | .cfi_adjust_cfa_offset -200 319 | 320 | popq %r15 321 | .cfi_adjust_cfa_offset -8 322 | .cfi_restore %r15 323 | popq %r14 324 | .cfi_adjust_cfa_offset -8 325 | .cfi_restore %r14 326 | popq %r13 327 | .cfi_adjust_cfa_offset -8 328 | .cfi_restore %r13 329 | popq %r12 330 | .cfi_adjust_cfa_offset -8 331 | .cfi_restore %r12 332 | popq %rbp 333 | .cfi_adjust_cfa_offset -8 334 | .cfi_restore %rbp 335 | popq %rbx 336 | .cfi_adjust_cfa_offset -8 337 | .cfi_restore %rbx 338 | .byte 0xf3,0xc3 339 | .cfi_endproc 340 | 341 | .p2align 8 342 | .quad 0,0,0,0,0,0,0,0 343 | 344 | iotas: 345 | .quad 0x0000000000000001 346 | .quad 0x0000000000008082 347 | .quad 0x800000000000808a 348 | .quad 0x8000000080008000 349 | .quad 0x000000000000808b 350 | .quad 0x0000000080000001 351 | .quad 0x8000000080008081 352 | .quad 0x8000000000008009 353 | .quad 0x000000000000008a 354 | .quad 0x0000000000000088 355 | .quad 0x0000000080008009 356 | .quad 0x000000008000000a 357 | .quad 0x000000008000808b 358 | .quad 0x800000000000008b 359 | .quad 0x8000000000008089 360 | .quad 0x8000000000008003 361 | .quad 0x8000000000008002 362 | .quad 0x8000000000000080 363 | .quad 0x000000000000800a 364 | .quad 0x800000008000000a 365 | .quad 0x8000000080008081 366 | .quad 0x8000000000008080 367 | .quad 0x0000000080000001 368 | .quad 0x8000000080008008 369 | 370 | .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 371 | -------------------------------------------------------------------------------- /src/keccakf1600_x86-64.s: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-x86_64.pl 2 | 3 | .text 4 | 5 | .type __KeccakF1600,@function 6 | .align 32 7 | __KeccakF1600: 8 | .cfi_startproc 9 | .byte 0xf3,0x0f,0x1e,0xfa 10 | 11 | movq 60(%rdi),%rax 12 | movq 68(%rdi),%rbx 13 | movq 76(%rdi),%rcx 14 | movq 84(%rdi),%rdx 15 | movq 92(%rdi),%rbp 16 | jmp .Loop 17 | 18 | .align 32 19 | .Loop: 20 | movq -100(%rdi),%r8 21 | movq -52(%rdi),%r9 22 | movq -4(%rdi),%r10 23 | movq 44(%rdi),%r11 24 | 25 | xorq -84(%rdi),%rcx 26 | xorq -76(%rdi),%rdx 27 | xorq %r8,%rax 28 | xorq -92(%rdi),%rbx 29 | xorq -44(%rdi),%rcx 30 | xorq -60(%rdi),%rax 31 | movq %rbp,%r12 32 | xorq -68(%rdi),%rbp 33 | 34 | xorq %r10,%rcx 35 | xorq -20(%rdi),%rax 36 | xorq -36(%rdi),%rdx 37 | xorq %r9,%rbx 38 | xorq -28(%rdi),%rbp 39 | 40 | xorq 36(%rdi),%rcx 41 | xorq 20(%rdi),%rax 42 | xorq 4(%rdi),%rdx 43 | xorq -12(%rdi),%rbx 44 | xorq 12(%rdi),%rbp 45 | 46 | movq %rcx,%r13 47 | rolq $1,%rcx 48 | xorq %rax,%rcx 49 | xorq %r11,%rdx 50 | 51 | rolq $1,%rax 52 | xorq %rdx,%rax 53 | xorq 28(%rdi),%rbx 54 | 55 | rolq $1,%rdx 56 | xorq %rbx,%rdx 57 | xorq 52(%rdi),%rbp 58 | 59 | rolq $1,%rbx 60 | xorq %rbp,%rbx 61 | 62 | rolq $1,%rbp 63 | xorq %r13,%rbp 64 | xorq %rcx,%r9 65 | xorq %rdx,%r10 66 | rolq $44,%r9 67 | xorq %rbp,%r11 68 | xorq %rax,%r12 69 | rolq $43,%r10 70 | xorq %rbx,%r8 71 | movq %r9,%r13 72 | rolq $21,%r11 73 | orq %r10,%r9 74 | xorq %r8,%r9 75 | rolq $14,%r12 76 | 77 | xorq (%r15),%r9 78 | leaq 8(%r15),%r15 79 | 80 | movq %r12,%r14 81 | andq %r11,%r12 82 | movq %r9,-100(%rsi) 83 | xorq %r10,%r12 84 | notq %r10 85 | movq %r12,-84(%rsi) 86 | 87 | orq %r11,%r10 88 | movq 76(%rdi),%r12 89 | xorq %r13,%r10 90 | movq %r10,-92(%rsi) 91 | 92 | andq %r8,%r13 93 | movq -28(%rdi),%r9 94 | xorq %r14,%r13 95 | movq -20(%rdi),%r10 96 | movq %r13,-68(%rsi) 97 | 98 | orq %r8,%r14 99 | movq -76(%rdi),%r8 100 | xorq %r11,%r14 101 | movq 28(%rdi),%r11 102 | movq %r14,-76(%rsi) 103 | 104 | 105 | xorq %rbp,%r8 106 | xorq %rdx,%r12 107 | rolq $28,%r8 108 | xorq %rcx,%r11 109 | xorq %rax,%r9 110 | rolq $61,%r12 111 | rolq $45,%r11 112 | xorq %rbx,%r10 113 | rolq $20,%r9 114 | movq %r8,%r13 115 | orq %r12,%r8 116 | rolq $3,%r10 117 | 118 | xorq %r11,%r8 119 | movq %r8,-36(%rsi) 120 | 121 | movq %r9,%r14 122 | andq %r13,%r9 123 | movq -92(%rdi),%r8 124 | xorq %r12,%r9 125 | notq %r12 126 | movq %r9,-28(%rsi) 127 | 128 | orq %r11,%r12 129 | movq -44(%rdi),%r9 130 | xorq %r10,%r12 131 | movq %r12,-44(%rsi) 132 | 133 | andq %r10,%r11 134 | movq 60(%rdi),%r12 135 | xorq %r14,%r11 136 | movq %r11,-52(%rsi) 137 | 138 | orq %r10,%r14 139 | movq 4(%rdi),%r10 140 | xorq %r13,%r14 141 | movq 52(%rdi),%r11 142 | movq %r14,-60(%rsi) 143 | 144 | 145 | xorq %rbp,%r10 146 | xorq %rax,%r11 147 | rolq $25,%r10 148 | xorq %rdx,%r9 149 | rolq $8,%r11 150 | xorq %rbx,%r12 151 | rolq $6,%r9 152 | xorq %rcx,%r8 153 | rolq $18,%r12 154 | movq %r10,%r13 155 | andq %r11,%r10 156 | rolq $1,%r8 157 | 158 | notq %r11 159 | xorq %r9,%r10 160 | movq %r10,-12(%rsi) 161 | 162 | movq %r12,%r14 163 | andq %r11,%r12 164 | movq -12(%rdi),%r10 165 | xorq %r13,%r12 166 | movq %r12,-4(%rsi) 167 | 168 | orq %r9,%r13 169 | movq 84(%rdi),%r12 170 | xorq %r8,%r13 171 | movq %r13,-20(%rsi) 172 | 173 | andq %r8,%r9 174 | xorq %r14,%r9 175 | movq %r9,12(%rsi) 176 | 177 | orq %r8,%r14 178 | movq -60(%rdi),%r9 179 | xorq %r11,%r14 180 | movq 36(%rdi),%r11 181 | movq %r14,4(%rsi) 182 | 183 | 184 | movq -68(%rdi),%r8 185 | 186 | xorq %rcx,%r10 187 | xorq %rdx,%r11 188 | rolq $10,%r10 189 | xorq %rbx,%r9 190 | rolq $15,%r11 191 | xorq %rbp,%r12 192 | rolq $36,%r9 193 | xorq %rax,%r8 194 | rolq $56,%r12 195 | movq %r10,%r13 196 | orq %r11,%r10 197 | rolq $27,%r8 198 | 199 | notq %r11 200 | xorq %r9,%r10 201 | movq %r10,28(%rsi) 202 | 203 | movq %r12,%r14 204 | orq %r11,%r12 205 | xorq %r13,%r12 206 | movq %r12,36(%rsi) 207 | 208 | andq %r9,%r13 209 | xorq %r8,%r13 210 | movq %r13,20(%rsi) 211 | 212 | orq %r8,%r9 213 | xorq %r14,%r9 214 | movq %r9,52(%rsi) 215 | 216 | andq %r14,%r8 217 | xorq %r11,%r8 218 | movq %r8,44(%rsi) 219 | 220 | 221 | xorq -84(%rdi),%rdx 222 | xorq -36(%rdi),%rbp 223 | rolq $62,%rdx 224 | xorq 68(%rdi),%rcx 225 | rolq $55,%rbp 226 | xorq 12(%rdi),%rax 227 | rolq $2,%rcx 228 | xorq 20(%rdi),%rbx 229 | xchgq %rsi,%rdi 230 | rolq $39,%rax 231 | rolq $41,%rbx 232 | movq %rdx,%r13 233 | andq %rbp,%rdx 234 | notq %rbp 235 | xorq %rcx,%rdx 236 | movq %rdx,92(%rdi) 237 | 238 | movq %rax,%r14 239 | andq %rbp,%rax 240 | xorq %r13,%rax 241 | movq %rax,60(%rdi) 242 | 243 | orq %rcx,%r13 244 | xorq %rbx,%r13 245 | movq %r13,84(%rdi) 246 | 247 | andq %rbx,%rcx 248 | xorq %r14,%rcx 249 | movq %rcx,76(%rdi) 250 | 251 | orq %r14,%rbx 252 | xorq %rbp,%rbx 253 | movq %rbx,68(%rdi) 254 | 255 | movq %rdx,%rbp 256 | movq %r13,%rdx 257 | 258 | testq $255,%r15 259 | jnz .Loop 260 | 261 | leaq -192(%r15),%r15 262 | .byte 0xf3,0xc3 263 | .cfi_endproc 264 | .size __KeccakF1600,.-__KeccakF1600 265 | 266 | .globl KeccakF1600 267 | .type KeccakF1600,@function 268 | .align 32 269 | KeccakF1600: 270 | .cfi_startproc 271 | .byte 0xf3,0x0f,0x1e,0xfa 272 | 273 | 274 | pushq %rbx 275 | .cfi_adjust_cfa_offset 8 276 | .cfi_offset %rbx,-16 277 | pushq %rbp 278 | .cfi_adjust_cfa_offset 8 279 | .cfi_offset %rbp,-24 280 | pushq %r12 281 | .cfi_adjust_cfa_offset 8 282 | .cfi_offset %r12,-32 283 | pushq %r13 284 | .cfi_adjust_cfa_offset 8 285 | .cfi_offset %r13,-40 286 | pushq %r14 287 | .cfi_adjust_cfa_offset 8 288 | .cfi_offset %r14,-48 289 | pushq %r15 290 | .cfi_adjust_cfa_offset 8 291 | .cfi_offset %r15,-56 292 | 293 | leaq 100(%rdi),%rdi 294 | subq $200,%rsp 295 | .cfi_adjust_cfa_offset 200 296 | 297 | notq -92(%rdi) 298 | notq -84(%rdi) 299 | notq -36(%rdi) 300 | notq -4(%rdi) 301 | notq 36(%rdi) 302 | notq 60(%rdi) 303 | 304 | leaq iotas(%rip),%r15 305 | leaq 100(%rsp),%rsi 306 | 307 | call __KeccakF1600 308 | 309 | notq -92(%rdi) 310 | notq -84(%rdi) 311 | notq -36(%rdi) 312 | notq -4(%rdi) 313 | notq 36(%rdi) 314 | notq 60(%rdi) 315 | leaq -100(%rdi),%rdi 316 | 317 | addq $200,%rsp 318 | .cfi_adjust_cfa_offset -200 319 | 320 | popq %r15 321 | .cfi_adjust_cfa_offset -8 322 | .cfi_restore %r15 323 | popq %r14 324 | .cfi_adjust_cfa_offset -8 325 | .cfi_restore %r14 326 | popq %r13 327 | .cfi_adjust_cfa_offset -8 328 | .cfi_restore %r13 329 | popq %r12 330 | .cfi_adjust_cfa_offset -8 331 | .cfi_restore %r12 332 | popq %rbp 333 | .cfi_adjust_cfa_offset -8 334 | .cfi_restore %rbp 335 | popq %rbx 336 | .cfi_adjust_cfa_offset -8 337 | .cfi_restore %rbx 338 | .byte 0xf3,0xc3 339 | .cfi_endproc 340 | .size KeccakF1600,.-KeccakF1600 341 | .align 256 342 | .quad 0,0,0,0,0,0,0,0 343 | .type iotas,@object 344 | iotas: 345 | .quad 0x0000000000000001 346 | .quad 0x0000000000008082 347 | .quad 0x800000000000808a 348 | .quad 0x8000000080008000 349 | .quad 0x000000000000808b 350 | .quad 0x0000000080000001 351 | .quad 0x8000000080008081 352 | .quad 0x8000000000008009 353 | .quad 0x000000000000008a 354 | .quad 0x0000000000000088 355 | .quad 0x0000000080008009 356 | .quad 0x000000008000000a 357 | .quad 0x000000008000808b 358 | .quad 0x800000000000008b 359 | .quad 0x8000000000008089 360 | .quad 0x8000000000008003 361 | .quad 0x8000000000008002 362 | .quad 0x8000000000000080 363 | .quad 0x000000000000800a 364 | .quad 0x800000008000000a 365 | .quad 0x8000000080008081 366 | .quad 0x8000000000008080 367 | .quad 0x0000000080000001 368 | .quad 0x8000000080008008 369 | .size iotas,.-iotas 370 | .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 371 | 372 | .section .note.gnu.property,"a",@note 373 | .long 4,2f-1f,5 374 | .byte 0x47,0x4E,0x55,0 375 | 1: .long 0xc0000002,4,3 376 | .align 8 377 | 2: 378 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | check: 8 | name: Check 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout sources 12 | uses: actions/checkout@v2 13 | 14 | - name: Install Protoc 15 | uses: arduino/setup-protoc@v1 16 | 17 | - name: Install stable toolchain 18 | uses: actions-rs/toolchain@v1 19 | with: 20 | profile: minimal 21 | toolchain: stable 22 | override: true 23 | 24 | - name: Install cuda 25 | uses: Jimver/cuda-toolkit@v0.2.8 26 | with: 27 | cuda: '11.2.2' 28 | method: 'network' 29 | sub-packages: '["nvcc", "cudart"]' 30 | 31 | - name: Cache 32 | uses: actions/cache@v2 33 | with: 34 | path: | 35 | ~/.cargo/bin/ 36 | ~/.cargo/registry/index/ 37 | ~/.cargo/registry/cache/ 38 | ~/.cargo/git/db/ 39 | target/ 40 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 41 | 42 | - name: Run cargo check 43 | uses: actions-rs/cargo@v1 44 | with: 45 | command: check 46 | args: --tests 47 | 48 | test: 49 | name: Test Suite 50 | runs-on: ${{ matrix.os }} 51 | strategy: 52 | fail-fast: false 53 | matrix: 54 | os: [ ubuntu-latest, windows-latest ] 55 | steps: 56 | - name: Checkout sources 57 | uses: actions/checkout@v2 58 | 59 | - name: Fix LibOpenCL on Linux 60 | if: runner.os == 'Linux' 61 | run: | 62 | sudo apt update 63 | sudo apt install ocl-icd-opencl-dev -y 64 | 65 | - name: Fix CRLF on Windows 66 | if: runner.os == 'Windows' 67 | run: git config --global core.autocrlf false 68 | 69 | - name: Install Protoc 70 | uses: arduino/setup-protoc@v1 71 | 72 | - name: Install stable toolchain 73 | uses: actions-rs/toolchain@v1 74 | with: 75 | profile: minimal 76 | toolchain: stable 77 | override: true 78 | 79 | - name: Install CUDA Linux 80 | if: runner.os == 'Linux' 81 | uses: Jimver/cuda-toolkit@v0.2.8 82 | with: 83 | cuda: '11.2.2' 84 | method: 'network' 85 | sub-packages: '["nvcc", "cudart"]' 86 | 87 | - name: Install CUDA Windows 88 | if: runner.os == 'Windows' 89 | uses: Jimver/cuda-toolkit@v0.2.8 90 | with: 91 | cuda: '11.5.1' 92 | method: 'network' 93 | sub-packages: '["nvcc", "cudart"]' 94 | 95 | - name: Cache 96 | uses: actions/cache@v2 97 | with: 98 | path: | 99 | ~/.cargo/bin/ 100 | ~/.cargo/registry/index/ 101 | ~/.cargo/registry/cache/ 102 | ~/.cargo/git/db/ 103 | target/ 104 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 105 | 106 | - name: Run cargo test regular features 107 | uses: actions-rs/cargo@v1 108 | with: 109 | command: test 110 | args: -p kaspa-miner 111 | 112 | - name: Run cargo test no asm 113 | uses: actions-rs/cargo@v1 114 | with: 115 | command: test 116 | args: -p kaspa-miner --features=no-asm 117 | 118 | 119 | - name: Run cargo test no parking_lot 120 | uses: actions-rs/cargo@v1 121 | with: 122 | command: test 123 | args: -p kaspa-miner --no-default-features 124 | 125 | - name: Run cargo test shuttle 126 | uses: actions-rs/cargo@v1 127 | with: 128 | command: test 129 | args: -p kaspa-miner --no-default-features --features=shuttle 130 | 131 | - name: Run cargo test for kaspaopencl 132 | uses: actions-rs/cargo@v1 133 | with: 134 | command: test 135 | args: -p kaspaopencl 136 | 137 | 138 | test-release: 139 | name: Test Suite Release 140 | runs-on: ${{ matrix.os }} 141 | strategy: 142 | fail-fast: false 143 | matrix: 144 | os: [ ubuntu-latest, windows-latest ] 145 | steps: 146 | - name: Checkout sources 147 | uses: actions/checkout@v2 148 | 149 | - name: Fix LibOpenCL on Linux 150 | if: runner.os == 'Linux' 151 | run: | 152 | sudo apt update 153 | sudo apt install ocl-icd-opencl-dev -y 154 | 155 | - name: Fix CRLF on Windows 156 | if: runner.os == 'Windows' 157 | run: git config --global core.autocrlf false 158 | 159 | - name: Install Protoc 160 | uses: arduino/setup-protoc@v1 161 | 162 | - name: Install stable toolchain 163 | uses: actions-rs/toolchain@v1 164 | with: 165 | profile: minimal 166 | toolchain: stable 167 | override: true 168 | 169 | - name: Install CUDA Linux 170 | if: runner.os == 'Linux' 171 | uses: Jimver/cuda-toolkit@v0.2.8 172 | with: 173 | cuda: '11.2.2' 174 | method: 'network' 175 | sub-packages: '["nvcc", "cudart"]' 176 | 177 | - name: Install CUDA Windows 178 | if: runner.os == 'Windows' 179 | uses: Jimver/cuda-toolkit@v0.2.8 180 | with: 181 | cuda: '11.5.1' 182 | method: 'network' 183 | sub-packages: '["nvcc", "cudart"]' 184 | 185 | - name: Cache 186 | uses: actions/cache@v2 187 | with: 188 | path: | 189 | ~/.cargo/bin/ 190 | ~/.cargo/registry/index/ 191 | ~/.cargo/registry/cache/ 192 | ~/.cargo/git/db/ 193 | target/ 194 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 195 | 196 | - name: Run cargo test release regular features 197 | uses: actions-rs/cargo@v1 198 | with: 199 | command: test 200 | args: --release -p kaspa-miner 201 | 202 | - name: Run cargo test release no asm 203 | uses: actions-rs/cargo@v1 204 | with: 205 | command: test 206 | args: --features=no-asm --release -p kaspa-miner 207 | 208 | - name: Run cargo test release no parking_lot 209 | uses: actions-rs/cargo@v1 210 | with: 211 | command: test 212 | args: --no-default-features --release -p kaspa-miner 213 | 214 | - name: Run cargo test release shuttle 215 | uses: actions-rs/cargo@v1 216 | with: 217 | command: test 218 | args: --no-default-features --features=shuttle --release -p kaspa-miner 219 | 220 | - name: Run cargo test for kaspaopencl 221 | uses: actions-rs/cargo@v1 222 | with: 223 | command: test 224 | args: --release -p kaspaopencl 225 | 226 | lints: 227 | name: Lints 228 | runs-on: ubuntu-latest 229 | steps: 230 | - name: Checkout sources 231 | uses: actions/checkout@v2 232 | 233 | - name: Install Protoc 234 | uses: arduino/setup-protoc@v1 235 | 236 | - name: Install stable toolchain 237 | uses: actions-rs/toolchain@v1 238 | with: 239 | profile: minimal 240 | toolchain: stable 241 | override: true 242 | components: rustfmt, clippy 243 | 244 | - name: Install cuda 245 | uses: Jimver/cuda-toolkit@v0.2.8 246 | with: 247 | cuda: '11.2.2' 248 | method: 'network' 249 | sub-packages: '["nvcc", "cudart"]' 250 | 251 | - name: Cache 252 | uses: actions/cache@v2 253 | with: 254 | path: | 255 | ~/.cargo/bin/ 256 | ~/.cargo/registry/index/ 257 | ~/.cargo/registry/cache/ 258 | ~/.cargo/git/db/ 259 | target/ 260 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 261 | 262 | 263 | - name: Run cargo fmt 264 | uses: actions-rs/cargo@v1 265 | with: 266 | command: fmt 267 | args: --all -- --check 268 | 269 | - name: Run cargo clippy 270 | uses: actions-rs/cargo@v1 271 | with: 272 | command: clippy 273 | args: --tests -- -D warnings 274 | -------------------------------------------------------------------------------- /src/client/grpc.rs: -------------------------------------------------------------------------------- 1 | use crate::client::Client; 2 | use crate::pow::BlockSeed; 3 | use crate::pow::BlockSeed::{FullBlock, PartialBlock}; 4 | use crate::proto::kaspad_message::Payload; 5 | use crate::proto::rpc_client::RpcClient; 6 | use crate::proto::{ 7 | GetBlockTemplateRequestMessage, GetInfoRequestMessage, KaspadMessage, NotifyBlockAddedRequestMessage, 8 | NotifyNewBlockTemplateRequestMessage, 9 | }; 10 | use crate::{miner::MinerManager, Error}; 11 | use async_trait::async_trait; 12 | use futures_util::StreamExt; 13 | use log::{error, info, warn}; 14 | use rand::{thread_rng, RngCore}; 15 | use semver::Version; 16 | use std::sync::atomic::{AtomicU16, Ordering}; 17 | use std::sync::Arc; 18 | use tokio::sync::mpsc::{self, error::SendError, Sender}; 19 | use tokio::task::JoinHandle; 20 | use tokio_stream::wrappers::ReceiverStream; 21 | use tokio_util::sync::{PollSendError, PollSender}; 22 | use tonic::{transport::Channel as TonicChannel, Streaming}; 23 | 24 | static EXTRA_DATA: &str = concat!(env!("CARGO_PKG_VERSION"), "/", env!("PACKAGE_COMPILE_TIME")); 25 | static VERSION_UPDATE: &str = "0.11.15"; 26 | type BlockHandle = JoinHandle>>; 27 | 28 | #[allow(dead_code)] 29 | pub struct KaspadHandler { 30 | client: RpcClient, 31 | pub send_channel: Sender, 32 | stream: Streaming, 33 | miner_address: String, 34 | mine_when_not_synced: bool, 35 | devfund_address: Option, 36 | devfund_percent: u16, 37 | block_template_ctr: Arc, 38 | 39 | block_channel: Sender, 40 | block_handle: BlockHandle, 41 | } 42 | 43 | #[async_trait(?Send)] 44 | impl Client for KaspadHandler { 45 | fn add_devfund(&mut self, address: String, percent: u16) { 46 | self.devfund_address = Some(address); 47 | self.devfund_percent = percent; 48 | } 49 | 50 | async fn register(&mut self) -> Result<(), Error> { 51 | // We actually register in connect 52 | Ok(()) 53 | } 54 | 55 | async fn listen(&mut self, miner: &mut MinerManager) -> Result<(), Error> { 56 | while let Some(msg) = self.stream.message().await? { 57 | match msg.payload { 58 | Some(payload) => self.handle_message(payload, miner).await?, 59 | None => warn!("kaspad message payload is empty"), 60 | } 61 | } 62 | Ok(()) 63 | } 64 | 65 | fn get_block_channel(&self) -> Sender { 66 | self.block_channel.clone() 67 | } 68 | } 69 | 70 | impl KaspadHandler { 71 | pub async fn connect( 72 | address: D, 73 | miner_address: String, 74 | mine_when_not_synced: bool, 75 | block_template_ctr: Option>, 76 | ) -> Result, Error> 77 | where 78 | D: std::convert::TryInto, 79 | D::Error: Into, 80 | { 81 | let mut client = RpcClient::connect(address).await?; 82 | let (send_channel, recv) = mpsc::channel(2); 83 | send_channel.send(GetInfoRequestMessage {}.into()).await?; 84 | let stream = client.message_stream(ReceiverStream::new(recv)).await?.into_inner(); 85 | let (block_channel, block_handle) = Self::create_block_channel(send_channel.clone()); 86 | Ok(Box::new(Self { 87 | client, 88 | stream, 89 | send_channel, 90 | miner_address, 91 | mine_when_not_synced, 92 | devfund_address: None, 93 | devfund_percent: 0, 94 | block_template_ctr: block_template_ctr 95 | .unwrap_or_else(|| Arc::new(AtomicU16::new((thread_rng().next_u64() % 10_000u64) as u16))), 96 | block_channel, 97 | block_handle, 98 | })) 99 | } 100 | 101 | fn create_block_channel(send_channel: Sender) -> (Sender, BlockHandle) { 102 | // KaspadMessage::submit_block(block) 103 | let (send, recv) = mpsc::channel::(1); 104 | ( 105 | send, 106 | tokio::spawn(async move { 107 | ReceiverStream::new(recv) 108 | .map(|block_seed| match block_seed { 109 | FullBlock(block) => KaspadMessage::submit_block(*block), 110 | PartialBlock { .. } => unreachable!("All blocks sent here should have arrived from here"), 111 | }) 112 | .map(Ok) 113 | .forward(PollSender::new(send_channel)) 114 | .await 115 | }), 116 | ) 117 | } 118 | 119 | async fn client_send(&self, msg: impl Into) -> Result<(), SendError> { 120 | self.send_channel.send(msg.into()).await 121 | } 122 | 123 | async fn client_get_block_template(&mut self) -> Result<(), SendError> { 124 | let pay_address = match &self.devfund_address { 125 | Some(devfund_address) if self.block_template_ctr.load(Ordering::SeqCst) <= self.devfund_percent => { 126 | devfund_address.clone() 127 | } 128 | _ => self.miner_address.clone(), 129 | }; 130 | self.block_template_ctr.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |v| Some((v + 1) % 10_000)).unwrap(); 131 | self.client_send(GetBlockTemplateRequestMessage { pay_address, extra_data: EXTRA_DATA.into() }).await 132 | } 133 | 134 | async fn handle_message(&mut self, msg: Payload, miner: &mut MinerManager) -> Result<(), Error> { 135 | match msg { 136 | Payload::BlockAddedNotification(_) => self.client_get_block_template().await?, 137 | Payload::NewBlockTemplateNotification(_) => self.client_get_block_template().await?, 138 | Payload::GetBlockTemplateResponse(template) => match (template.block, template.is_synced, template.error) { 139 | (Some(b), true, None) => miner.process_block(Some(FullBlock(Box::new(b)))).await?, 140 | (Some(b), false, None) if self.mine_when_not_synced => { 141 | miner.process_block(Some(FullBlock(Box::new(b)))).await? 142 | } 143 | (_, false, None) => miner.process_block(None).await?, 144 | (_, _, Some(e)) => { 145 | return Err(format!("GetTemplate returned with an error: {:?}", e).into()); 146 | } 147 | (None, true, None) => error!("No block and No Error!"), 148 | }, 149 | Payload::SubmitBlockResponse(res) => match res.error { 150 | None => info!("block submitted successfully!"), 151 | Some(e) => warn!("Failed submitting block: {:?}", e), 152 | }, 153 | Payload::GetBlockResponse(msg) => { 154 | if let Some(e) = msg.error { 155 | return Err(e.message.into()); 156 | } else { 157 | info!("Get block response: {:?}", msg); 158 | } 159 | } 160 | Payload::GetInfoResponse(info) => { 161 | info!("Kaspad version: {}", info.server_version); 162 | let kaspad_version = Version::parse(&info.server_version)?; 163 | let update_version = Version::parse(VERSION_UPDATE)?; 164 | match kaspad_version >= update_version { 165 | true => self.client_send(NotifyNewBlockTemplateRequestMessage {}).await?, 166 | false => self.client_send(NotifyBlockAddedRequestMessage {}).await?, 167 | }; 168 | 169 | self.client_get_block_template().await?; 170 | } 171 | Payload::NotifyNewBlockTemplateResponse(res) => match res.error { 172 | None => info!("Registered for new template notifications"), 173 | Some(e) => error!("Failed registering for new template notifications: {:?}", e), 174 | }, 175 | Payload::NotifyBlockAddedResponse(res) => match res.error { 176 | None => info!("Registered for block notifications (upgrade your Kaspad for better experience)"), 177 | Some(e) => error!("Failed registering for block notifications: {:?}", e), 178 | }, 179 | msg => info!("got unknown msg: {:?}", msg), 180 | } 181 | Ok(()) 182 | } 183 | } 184 | 185 | impl Drop for KaspadHandler { 186 | fn drop(&mut self) { 187 | self.block_handle.abort(); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /proto/messages.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package protowire; 3 | 4 | option go_package = "github.com/kaspanet/kaspad/protowire"; 5 | 6 | import "p2p.proto"; 7 | import "rpc.proto"; 8 | 9 | message KaspadMessage { 10 | oneof payload { 11 | AddressesMessage addresses = 1; 12 | BlockMessage block = 2; 13 | TransactionMessage transaction = 3; 14 | BlockLocatorMessage blockLocator = 5; 15 | RequestAddressesMessage requestAddresses = 6; 16 | RequestRelayBlocksMessage requestRelayBlocks = 10; 17 | RequestTransactionsMessage requestTransactions = 12; 18 | BlockMessage ibdBlock = 13; 19 | InvRelayBlockMessage invRelayBlock = 14; 20 | InvTransactionsMessage invTransactions = 15; 21 | PingMessage ping = 16; 22 | PongMessage pong = 17; 23 | VerackMessage verack = 19; 24 | VersionMessage version = 20; 25 | TransactionNotFoundMessage transactionNotFound = 21; 26 | RejectMessage reject = 22; 27 | PruningPointUtxoSetChunkMessage pruningPointUtxoSetChunk = 25; 28 | RequestIBDBlocksMessage requestIBDBlocks = 26; 29 | UnexpectedPruningPointMessage unexpectedPruningPoint = 27; 30 | IbdBlockLocatorMessage ibdBlockLocator = 30; 31 | IbdBlockLocatorHighestHashMessage ibdBlockLocatorHighestHash = 31; 32 | RequestNextPruningPointUtxoSetChunkMessage requestNextPruningPointUtxoSetChunk = 33; 33 | DonePruningPointUtxoSetChunksMessage donePruningPointUtxoSetChunks = 34; 34 | IbdBlockLocatorHighestHashNotFoundMessage ibdBlockLocatorHighestHashNotFound = 35; 35 | BlockWithTrustedDataMessage blockWithTrustedData = 36; 36 | DoneBlocksWithTrustedDataMessage doneBlocksWithTrustedData = 37; 37 | RequestPruningPointAndItsAnticoneMessage requestPruningPointAndItsAnticone = 40; 38 | BlockHeadersMessage blockHeaders = 41; 39 | RequestNextHeadersMessage requestNextHeaders = 42; 40 | DoneHeadersMessage DoneHeaders = 43; 41 | RequestPruningPointUTXOSetMessage requestPruningPointUTXOSet = 44; 42 | RequestHeadersMessage requestHeaders = 45; 43 | RequestBlockLocatorMessage requestBlockLocator = 46; 44 | PruningPointsMessage pruningPoints = 47; 45 | RequestPruningPointProofMessage requestPruningPointProof = 48; 46 | PruningPointProofMessage pruningPointProof = 49; 47 | ReadyMessage ready = 50; 48 | BlockWithTrustedDataV4Message blockWithTrustedDataV4 = 51; 49 | TrustedDataMessage trustedData = 52; 50 | RequestIBDChainBlockLocatorMessage requestIBDChainBlockLocator = 53; 51 | IbdChainBlockLocatorMessage ibdChainBlockLocator = 54; 52 | RequestAnticoneMessage requestAnticone = 55; 53 | RequestNextPruningPointAndItsAnticoneBlocksMessage requestNextPruningPointAndItsAnticoneBlocks = 56; 54 | 55 | GetCurrentNetworkRequestMessage getCurrentNetworkRequest = 1001; 56 | GetCurrentNetworkResponseMessage getCurrentNetworkResponse = 1002; 57 | SubmitBlockRequestMessage submitBlockRequest = 1003; 58 | SubmitBlockResponseMessage submitBlockResponse = 1004; 59 | GetBlockTemplateRequestMessage getBlockTemplateRequest = 1005; 60 | GetBlockTemplateResponseMessage getBlockTemplateResponse = 1006; 61 | NotifyBlockAddedRequestMessage notifyBlockAddedRequest = 1007; 62 | NotifyBlockAddedResponseMessage notifyBlockAddedResponse = 1008; 63 | BlockAddedNotificationMessage blockAddedNotification = 1009; 64 | GetPeerAddressesRequestMessage getPeerAddressesRequest = 1010; 65 | GetPeerAddressesResponseMessage getPeerAddressesResponse = 1011; 66 | GetSelectedTipHashRequestMessage getSelectedTipHashRequest = 1012; 67 | GetSelectedTipHashResponseMessage getSelectedTipHashResponse = 1013; 68 | GetMempoolEntryRequestMessage getMempoolEntryRequest = 1014; 69 | GetMempoolEntryResponseMessage getMempoolEntryResponse = 1015; 70 | GetConnectedPeerInfoRequestMessage getConnectedPeerInfoRequest = 1016; 71 | GetConnectedPeerInfoResponseMessage getConnectedPeerInfoResponse = 1017; 72 | AddPeerRequestMessage addPeerRequest = 1018; 73 | AddPeerResponseMessage addPeerResponse = 1019; 74 | SubmitTransactionRequestMessage submitTransactionRequest = 1020; 75 | SubmitTransactionResponseMessage submitTransactionResponse = 1021; 76 | NotifyVirtualSelectedParentChainChangedRequestMessage notifyVirtualSelectedParentChainChangedRequest = 1022; 77 | NotifyVirtualSelectedParentChainChangedResponseMessage notifyVirtualSelectedParentChainChangedResponse = 1023; 78 | VirtualSelectedParentChainChangedNotificationMessage virtualSelectedParentChainChangedNotification = 1024; 79 | GetBlockRequestMessage getBlockRequest = 1025; 80 | GetBlockResponseMessage getBlockResponse = 1026; 81 | GetSubnetworkRequestMessage getSubnetworkRequest = 1027; 82 | GetSubnetworkResponseMessage getSubnetworkResponse = 1028; 83 | GetVirtualSelectedParentChainFromBlockRequestMessage getVirtualSelectedParentChainFromBlockRequest = 1029; 84 | GetVirtualSelectedParentChainFromBlockResponseMessage getVirtualSelectedParentChainFromBlockResponse = 1030; 85 | GetBlocksRequestMessage getBlocksRequest = 1031; 86 | GetBlocksResponseMessage getBlocksResponse = 1032; 87 | GetBlockCountRequestMessage getBlockCountRequest = 1033; 88 | GetBlockCountResponseMessage getBlockCountResponse = 1034; 89 | GetBlockDagInfoRequestMessage getBlockDagInfoRequest = 1035; 90 | GetBlockDagInfoResponseMessage getBlockDagInfoResponse = 1036; 91 | ResolveFinalityConflictRequestMessage resolveFinalityConflictRequest = 1037; 92 | ResolveFinalityConflictResponseMessage resolveFinalityConflictResponse = 1038; 93 | NotifyFinalityConflictsRequestMessage notifyFinalityConflictsRequest = 1039; 94 | NotifyFinalityConflictsResponseMessage notifyFinalityConflictsResponse = 1040; 95 | FinalityConflictNotificationMessage finalityConflictNotification = 1041; 96 | FinalityConflictResolvedNotificationMessage finalityConflictResolvedNotification = 1042; 97 | GetMempoolEntriesRequestMessage getMempoolEntriesRequest = 1043; 98 | GetMempoolEntriesResponseMessage getMempoolEntriesResponse = 1044; 99 | ShutDownRequestMessage shutDownRequest = 1045; 100 | ShutDownResponseMessage shutDownResponse = 1046; 101 | GetHeadersRequestMessage getHeadersRequest = 1047; 102 | GetHeadersResponseMessage getHeadersResponse = 1048; 103 | NotifyUtxosChangedRequestMessage notifyUtxosChangedRequest = 1049; 104 | NotifyUtxosChangedResponseMessage notifyUtxosChangedResponse = 1050; 105 | UtxosChangedNotificationMessage utxosChangedNotification = 1051; 106 | GetUtxosByAddressesRequestMessage getUtxosByAddressesRequest = 1052; 107 | GetUtxosByAddressesResponseMessage getUtxosByAddressesResponse = 1053; 108 | GetVirtualSelectedParentBlueScoreRequestMessage getVirtualSelectedParentBlueScoreRequest = 1054; 109 | GetVirtualSelectedParentBlueScoreResponseMessage getVirtualSelectedParentBlueScoreResponse = 1055; 110 | NotifyVirtualSelectedParentBlueScoreChangedRequestMessage notifyVirtualSelectedParentBlueScoreChangedRequest = 1056; 111 | NotifyVirtualSelectedParentBlueScoreChangedResponseMessage notifyVirtualSelectedParentBlueScoreChangedResponse = 1057; 112 | VirtualSelectedParentBlueScoreChangedNotificationMessage virtualSelectedParentBlueScoreChangedNotification = 1058; 113 | BanRequestMessage banRequest = 1059; 114 | BanResponseMessage banResponse = 1060; 115 | UnbanRequestMessage unbanRequest = 1061; 116 | UnbanResponseMessage unbanResponse = 1062; 117 | GetInfoRequestMessage getInfoRequest = 1063; 118 | GetInfoResponseMessage getInfoResponse = 1064; 119 | StopNotifyingUtxosChangedRequestMessage stopNotifyingUtxosChangedRequest = 1065; 120 | StopNotifyingUtxosChangedResponseMessage stopNotifyingUtxosChangedResponse = 1066; 121 | NotifyPruningPointUTXOSetOverrideRequestMessage notifyPruningPointUTXOSetOverrideRequest = 1067; 122 | NotifyPruningPointUTXOSetOverrideResponseMessage notifyPruningPointUTXOSetOverrideResponse = 1068; 123 | PruningPointUTXOSetOverrideNotificationMessage pruningPointUTXOSetOverrideNotification = 1069; 124 | StopNotifyingPruningPointUTXOSetOverrideRequestMessage stopNotifyingPruningPointUTXOSetOverrideRequest = 1070; 125 | StopNotifyingPruningPointUTXOSetOverrideResponseMessage stopNotifyingPruningPointUTXOSetOverrideResponse = 1071; 126 | EstimateNetworkHashesPerSecondRequestMessage estimateNetworkHashesPerSecondRequest = 1072; 127 | EstimateNetworkHashesPerSecondResponseMessage estimateNetworkHashesPerSecondResponse = 1073; 128 | NotifyVirtualDaaScoreChangedRequestMessage notifyVirtualDaaScoreChangedRequest = 1074; 129 | NotifyVirtualDaaScoreChangedResponseMessage notifyVirtualDaaScoreChangedResponse = 1075; 130 | VirtualDaaScoreChangedNotificationMessage virtualDaaScoreChangedNotification = 1076; 131 | GetBalanceByAddressRequestMessage getBalanceByAddressRequest = 1077; 132 | GetBalanceByAddressResponseMessage getBalanceByAddressResponse = 1078; 133 | GetBalancesByAddressesRequestMessage getBalancesByAddressesRequest = 1079; 134 | GetBalancesByAddressesResponseMessage getBalancesByAddressesResponse = 1080; 135 | NotifyNewBlockTemplateRequestMessage notifyNewBlockTemplateRequest = 1081; 136 | NotifyNewBlockTemplateResponseMessage notifyNewBlockTemplateResponse = 1082; 137 | NewBlockTemplateNotificationMessage newBlockTemplateNotification = 1083; 138 | } 139 | } 140 | 141 | service P2P { 142 | rpc MessageStream (stream KaspadMessage) returns (stream KaspadMessage) {} 143 | } 144 | 145 | service RPC { 146 | rpc MessageStream (stream KaspadMessage) returns (stream KaspadMessage) {} 147 | } 148 | -------------------------------------------------------------------------------- /plugins/cuda/src/worker.rs: -------------------------------------------------------------------------------- 1 | use crate::{Error, NonceGenEnum}; 2 | use cust::context::CurrentContext; 3 | use cust::device::DeviceAttribute; 4 | use cust::function::Function; 5 | use cust::module::{ModuleJitOption, OptLevel}; 6 | use cust::prelude::*; 7 | use kaspa_miner::xoshiro256starstar::Xoshiro256StarStar; 8 | use kaspa_miner::Worker; 9 | use log::{error, info}; 10 | use rand::{Fill, RngCore}; 11 | use std::ffi::CString; 12 | use std::sync::{Arc, Weak}; 13 | 14 | static BPS: f32 = 1.; 15 | 16 | static PTX_86: &str = include_str!("../resources/kaspa-cuda-sm86.ptx"); 17 | static PTX_75: &str = include_str!("../resources/kaspa-cuda-sm75.ptx"); 18 | static PTX_61: &str = include_str!("../resources/kaspa-cuda-sm61.ptx"); 19 | static PTX_30: &str = include_str!("../resources/kaspa-cuda-sm30.ptx"); 20 | static PTX_20: &str = include_str!("../resources/kaspa-cuda-sm20.ptx"); 21 | 22 | pub struct Kernel<'kernel> { 23 | func: Arc>, 24 | block_size: u32, 25 | grid_size: u32, 26 | } 27 | 28 | impl<'kernel> Kernel<'kernel> { 29 | pub fn new(module: Weak, name: &'kernel str) -> Result, Error> { 30 | let func = Arc::new(unsafe { 31 | module.as_ptr().as_ref().unwrap().get_function(name).map_err(|e| { 32 | error!("Error loading function: {}", e); 33 | e 34 | })? 35 | }); 36 | let (_, block_size) = func.suggested_launch_configuration(0, 0.into())?; 37 | 38 | let device = CurrentContext::get_device()?; 39 | let sm_count = device.get_attribute(DeviceAttribute::MultiprocessorCount)? as u32; 40 | let grid_size = sm_count * func.max_active_blocks_per_multiprocessor(block_size.into(), 0)?; 41 | 42 | Ok(Self { func, block_size, grid_size }) 43 | } 44 | 45 | pub fn get_workload(&self) -> u32 { 46 | self.block_size * self.grid_size 47 | } 48 | 49 | pub fn set_workload(&mut self, workload: u32) { 50 | self.grid_size = (workload + self.block_size - 1) / self.block_size 51 | } 52 | } 53 | 54 | pub struct CudaGPUWorker<'gpu> { 55 | // NOTE: The order is important! context must be closed last 56 | heavy_hash_kernel: Kernel<'gpu>, 57 | stream: Stream, 58 | start_event: Event, 59 | stop_event: Event, 60 | _module: Arc, 61 | 62 | rand_state: DeviceBuffer, 63 | final_nonce_buff: DeviceBuffer, 64 | 65 | device_id: u32, 66 | pub workload: usize, 67 | _context: Context, 68 | 69 | random: NonceGenEnum, 70 | } 71 | 72 | impl<'gpu> Worker for CudaGPUWorker<'gpu> { 73 | fn id(&self) -> String { 74 | let device = CurrentContext::get_device().unwrap(); 75 | format!("#{} ({})", self.device_id, device.name().unwrap()) 76 | } 77 | 78 | fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]) { 79 | let u8matrix: Arc<[[u8; 64]; 64]> = Arc::new(matrix.map(|row| row.map(|v| v as u8))); 80 | let mut hash_header_gpu = self._module.get_global::<[u8; 72]>(&CString::new("hash_header").unwrap()).unwrap(); 81 | hash_header_gpu.copy_from(hash_header).map_err(|e| e.to_string()).unwrap(); 82 | 83 | let mut matrix_gpu = self._module.get_global::<[[u8; 64]; 64]>(&CString::new("matrix").unwrap()).unwrap(); 84 | matrix_gpu.copy_from(&u8matrix).map_err(|e| e.to_string()).unwrap(); 85 | 86 | let mut target_gpu = self._module.get_global::<[u64; 4]>(&CString::new("target").unwrap()).unwrap(); 87 | target_gpu.copy_from(target).map_err(|e| e.to_string()).unwrap(); 88 | } 89 | 90 | #[inline(always)] 91 | fn calculate_hash(&mut self, _nonces: Option<&Vec>, nonce_mask: u64, nonce_fixed: u64) { 92 | let func = &self.heavy_hash_kernel.func; 93 | let stream = &self.stream; 94 | let random: u8 = match self.random { 95 | NonceGenEnum::Lean => { 96 | self.rand_state.copy_from(&[rand::thread_rng().next_u64()]).unwrap(); 97 | 0 98 | } 99 | NonceGenEnum::Xoshiro => 1, 100 | }; 101 | 102 | self.start_event.record(stream).unwrap(); 103 | unsafe { 104 | launch!( 105 | func<<< 106 | self.heavy_hash_kernel.grid_size, self.heavy_hash_kernel.block_size, 107 | 0, stream 108 | >>>( 109 | nonce_mask, nonce_fixed, 110 | self.workload, 111 | random, 112 | self.rand_state.as_device_ptr(), 113 | self.final_nonce_buff.as_device_ptr() 114 | ) 115 | ) 116 | .unwrap(); // We see errors in sync 117 | } 118 | self.stop_event.record(stream).unwrap(); 119 | } 120 | 121 | #[inline(always)] 122 | fn sync(&self) -> Result<(), Error> { 123 | //self.stream.synchronize()?; 124 | self.stop_event.synchronize()?; 125 | if self.stop_event.elapsed_time_f32(&self.start_event)? > 1000. / BPS { 126 | return Err("Cuda takes longer then block rate. Please reduce your workload.".into()); 127 | } 128 | Ok(()) 129 | } 130 | 131 | fn get_workload(&self) -> usize { 132 | self.workload 133 | } 134 | 135 | #[inline(always)] 136 | fn copy_output_to(&mut self, nonces: &mut Vec) -> Result<(), Error> { 137 | self.final_nonce_buff.copy_to(nonces)?; 138 | Ok(()) 139 | } 140 | } 141 | 142 | impl<'gpu> CudaGPUWorker<'gpu> { 143 | pub fn new( 144 | device_id: u32, 145 | workload: f32, 146 | is_absolute: bool, 147 | blocking_sync: bool, 148 | random: NonceGenEnum, 149 | ) -> Result { 150 | info!("Starting a CUDA worker"); 151 | let sync_flag = match blocking_sync { 152 | true => ContextFlags::SCHED_BLOCKING_SYNC, 153 | false => ContextFlags::SCHED_AUTO, 154 | }; 155 | let device = Device::get_device(device_id).unwrap(); 156 | let _context = Context::new(device)?; 157 | _context.set_flags(sync_flag)?; 158 | 159 | let major = device.get_attribute(DeviceAttribute::ComputeCapabilityMajor)?; 160 | let minor = device.get_attribute(DeviceAttribute::ComputeCapabilityMinor)?; 161 | let _module: Arc; 162 | info!("Device #{} compute version is {}.{}", device_id, major, minor); 163 | if major > 8 || (major == 8 && minor >= 6) { 164 | _module = Arc::new(Module::from_ptx(PTX_86, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| { 165 | error!("Error loading PTX. Make sure you have the updated driver for you devices"); 166 | e 167 | })?); 168 | } else if major > 7 || (major == 7 && minor >= 5) { 169 | _module = Arc::new(Module::from_ptx(PTX_75, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| { 170 | error!("Error loading PTX. Make sure you have the updated driver for you devices"); 171 | e 172 | })?); 173 | } else if major > 6 || (major == 6 && minor >= 1) { 174 | _module = Arc::new(Module::from_ptx(PTX_61, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| { 175 | error!("Error loading PTX. Make sure you have the updated driver for you devices"); 176 | e 177 | })?); 178 | } else if major >= 3 { 179 | _module = Arc::new(Module::from_ptx(PTX_30, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| { 180 | error!("Error loading PTX. Make sure you have the updated driver for you devices"); 181 | e 182 | })?); 183 | } else if major >= 2 { 184 | _module = Arc::new(Module::from_ptx(PTX_20, &[ModuleJitOption::OptLevel(OptLevel::O4)]).map_err(|e| { 185 | error!("Error loading PTX. Make sure you have the updated driver for you devices"); 186 | e 187 | })?); 188 | } else { 189 | return Err("Cuda compute version not supported".into()); 190 | } 191 | 192 | let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; 193 | 194 | let mut heavy_hash_kernel = Kernel::new(Arc::downgrade(&_module), "heavy_hash")?; 195 | 196 | let mut chosen_workload = 0u32; 197 | if is_absolute { 198 | chosen_workload = 1; 199 | } else { 200 | let cur_workload = heavy_hash_kernel.get_workload(); 201 | if chosen_workload == 0 || chosen_workload < cur_workload { 202 | chosen_workload = cur_workload; 203 | } 204 | } 205 | chosen_workload = (chosen_workload as f32 * workload) as u32; 206 | info!("GPU #{} Chosen workload: {}", device_id, chosen_workload); 207 | heavy_hash_kernel.set_workload(chosen_workload); 208 | 209 | let final_nonce_buff = vec![0u64; 1].as_slice().as_dbuf()?; 210 | 211 | let rand_state: DeviceBuffer = match random { 212 | NonceGenEnum::Xoshiro => { 213 | info!("Using xoshiro for nonce-generation"); 214 | let mut buffer = DeviceBuffer::::zeroed(4 * (chosen_workload as usize)).unwrap(); 215 | info!("GPU #{} is generating initial seed. This may take some time.", device_id); 216 | let mut seed = [1u64; 4]; 217 | seed.try_fill(&mut rand::thread_rng())?; 218 | buffer.copy_from( 219 | Xoshiro256StarStar::new(&seed) 220 | .iter_jump_state() 221 | .take(chosen_workload as usize) 222 | .flatten() 223 | .collect::>() 224 | .as_slice(), 225 | )?; 226 | info!("GPU #{} initialized", device_id); 227 | buffer 228 | } 229 | NonceGenEnum::Lean => { 230 | info!("Using lean nonce-generation"); 231 | let mut buffer = DeviceBuffer::::zeroed(1).unwrap(); 232 | let seed = rand::thread_rng().next_u64(); 233 | buffer.copy_from(&[seed])?; 234 | buffer 235 | } 236 | }; 237 | Ok(Self { 238 | device_id, 239 | _context, 240 | _module, 241 | start_event: Event::new(EventFlags::DEFAULT)?, 242 | stop_event: Event::new(EventFlags::DEFAULT)?, 243 | workload: chosen_workload as usize, 244 | stream, 245 | rand_state, 246 | final_nonce_buff, 247 | heavy_hash_kernel, 248 | random, 249 | }) 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /plugins/opencl/resources/kaspa-opencl.cl: -------------------------------------------------------------------------------- 1 | // Catering for different flavors 2 | #pragma OPENCL EXTENSION cl_amd_media_ops : enable 3 | 4 | #if __OPENCL_VERSION__ <= CL_VERSION_1_1 5 | #define STATIC 6 | #else 7 | #define STATIC static 8 | #endif 9 | /* TYPES */ 10 | 11 | typedef uchar uint8_t; 12 | typedef char int8_t; 13 | typedef ushort uint16_t; 14 | typedef short int16_t; 15 | typedef uint uint32_t; 16 | typedef int int32_t; 17 | typedef ulong uint64_t; 18 | typedef long int64_t; 19 | 20 | /* TINY KECCAK */ 21 | /** libkeccak-tiny 22 | * 23 | * A single-file implementation of SHA-3 and SHAKE. 24 | * 25 | * Implementor: David Leon Gil 26 | * License: CC0, attribution kindly requested. Blame taken too, 27 | * but not liability. 28 | */ 29 | 30 | /******** The Keccak-f[1600] permutation ********/ 31 | 32 | /*** Constants. ***/ 33 | constant STATIC const uint8_t rho[24] = \ 34 | { 1, 3, 6, 10, 15, 21, 35 | 28, 36, 45, 55, 2, 14, 36 | 27, 41, 56, 8, 25, 43, 37 | 62, 18, 39, 61, 20, 44}; 38 | constant STATIC const uint8_t pi[24] = \ 39 | {10, 7, 11, 17, 18, 3, 40 | 5, 16, 8, 21, 24, 4, 41 | 15, 23, 19, 13, 12, 2, 42 | 20, 14, 22, 9, 6, 1}; 43 | 44 | constant STATIC const uint64_t RC[24] = \ 45 | {1UL, 0x8082UL, 0x800000000000808aUL, 0x8000000080008000UL, 46 | 0x808bUL, 0x80000001UL, 0x8000000080008081UL, 0x8000000000008009UL, 47 | 0x8aUL, 0x88UL, 0x80008009UL, 0x8000000aUL, 48 | 0x8000808bUL, 0x800000000000008bUL, 0x8000000000008089UL, 0x8000000000008003UL, 49 | 0x8000000000008002UL, 0x8000000000000080UL, 0x800aUL, 0x800000008000000aUL, 50 | 0x8000000080008081UL, 0x8000000000008080UL, 0x80000001UL, 0x8000000080008008UL}; 51 | 52 | 53 | /** Magic from fancyIX/sgminer-phi2-branch **/ 54 | #if defined(OPENCL_PLATFORM_AMD) 55 | #pragma OPENCL EXTENSION cl_amd_media_ops : enable 56 | #define dataType uint2 57 | #define as_dataType as_uint2 58 | static inline uint2 rol(const uint2 vv, const int r) 59 | { 60 | if (r <= 32) 61 | { 62 | return amd_bitalign((vv).xy, (vv).yx, 32 - r); 63 | } 64 | else 65 | { 66 | return amd_bitalign((vv).yx, (vv).xy, 64 - r); 67 | } 68 | } 69 | #else 70 | #define dataType ulong 71 | #define as_dataType as_ulong 72 | #define rol(x, s) (((x) << s) | ((x) >> (64 - s))) 73 | #endif 74 | 75 | /*** Helper macros to unroll the permutation. ***/ 76 | #define REPEAT6(e) e e e e e e 77 | #define REPEAT24(e) REPEAT6(e e e e) 78 | #define REPEAT23(e) REPEAT6(e e e) e e e e e 79 | #define REPEAT5(e) e e e e e 80 | #define FOR5(v, s, e) \ 81 | v = 0; \ 82 | REPEAT5(e; v += s;) 83 | 84 | /*** Keccak-f[1600] ***/ 85 | STATIC inline void keccakf(void *state) { 86 | dataType *a = (dataType *)state; 87 | dataType b[5] = {0}; 88 | dataType t = 0, v = 0; 89 | uint8_t x, y; 90 | 91 | #if defined(cl_amd_media_ops) 92 | #pragma unroll 93 | #endif 94 | for (int i = 0; i < 23; i++) { 95 | // Theta 96 | FOR5(x, 1, 97 | b[x] = a[x] ^ a[x+5] ^ a[x+10] ^ a[x+15] ^ a[x+20];) 98 | 99 | v = b[4]; t = b[0]; 100 | b[4] = b[4] ^ rol(b[1], 1); 101 | b[0] = b[0] ^ rol(b[2], 1); 102 | b[1] = b[1] ^ rol(b[3], 1); 103 | b[2] = b[2] ^ rol(v, 1); 104 | b[3] = b[3] ^ rol(t, 1); 105 | 106 | FOR5(x, 1, 107 | FOR5(y, 5, a[y + x] ^= b[(x + 4) % 5]; )) 108 | 109 | // Rho and pi 110 | t = a[1]; 111 | x = 23; 112 | REPEAT23(a[pi[x]] = rol(a[pi[x-1]], rho[x]); x--; ) 113 | a[pi[ 0]] = rol( t, rho[ 0]); 114 | 115 | // Chi 116 | FOR5(y, 5, 117 | v = a[y]; t = a[y+1]; 118 | a[y ] = bitselect(a[y ] ^ a[y+2], a[y ], a[y+1]); 119 | a[y+1] = bitselect(a[y+1] ^ a[y+3], a[y+1], a[y+2]); 120 | a[y+2] = bitselect(a[y+2] ^ a[y+4], a[y+2], a[y+3]); 121 | a[y+3] = bitselect(a[y+3] ^ v, a[y+3], a[y+4]); 122 | a[y+4] = bitselect(a[y+4] ^ t, a[y+4], v); 123 | ) 124 | 125 | // Iota 126 | a[0] ^= as_dataType(RC[i]); 127 | } 128 | /*******************************************************/ 129 | // Theta 130 | FOR5(x, 1, 131 | b[x] = a[x] ^ a[x+5] ^ a[x+10] ^ a[x+15] ^ a[x+20];) 132 | 133 | v = b[4]; t = b[0]; 134 | b[4] = b[4] ^ rol(b[1], 1); 135 | b[0] = b[0] ^ rol(b[2], 1); 136 | b[1] = b[1] ^ rol(b[3], 1); 137 | b[2] = b[2] ^ rol(v, 1); 138 | b[3] = b[3] ^ rol(t, 1); 139 | 140 | a[0] ^= b[4]; 141 | a[1] ^= b[0]; a[6] ^= b[0]; 142 | a[2] ^= b[1]; a[12] ^= b[1]; 143 | a[3] ^= b[2]; a[18] ^= b[2]; 144 | a[4] ^= b[3]; a[24] ^= b[3]; 145 | 146 | // Rho and pi 147 | a[1]=rol(a[pi[22]], rho[23]); 148 | a[2]=rol(a[pi[16]], rho[17]); 149 | a[4]=rol(a[pi[10]], rho[11]); 150 | a[3]=rol(a[pi[ 4]], rho[ 5]); 151 | 152 | // Chi 153 | v = a[0]; 154 | 155 | a[0] = bitselect(a[0] ^ a[2], a[0], a[1]); 156 | a[1] = bitselect(a[1] ^ a[3], a[1], a[2]); 157 | a[2] = bitselect(a[2] ^ a[4], a[2], a[3]); 158 | a[3] = bitselect(a[3] ^ v, a[3], a[4]); 159 | 160 | // Iota 161 | a[0] ^= as_dataType(RC[23]); 162 | } 163 | 164 | /******** The FIPS202-defined functions. ********/ 165 | 166 | /*** Some helper macros. ***/ 167 | 168 | 169 | #define P keccakf 170 | #define Plen 200 171 | 172 | constant const ulong powP[25] = { 0x113cff0da1f6d83dUL, 0x29bf8855b7027e3cUL, 0x1e5f2e720efb44d2UL, 0x1ba5a4a3f59869a0UL, 0x7b2fafca875e2d65UL, 0x4aef61d629dce246UL, 0x183a981ead415b10UL, 0x776bf60c789bc29cUL, 0xf8ebf13388663140UL, 0x2e651c3c43285ff0UL, 0x0f96070540f14a0aUL, 0x44e367875b299152UL, 0xec70f1a425b13715UL, 0xe6c85d8f82e9da89UL, 0xb21a601f85b4b223UL, 0x3485549064a36a46UL, 0x0f06dd1c7a2f851aUL, 0xc1a2021d563bb142UL, 0xba1de5e4451668e4UL, 0xd102574105095f8dUL, 0x89ca4e849bcecf4aUL, 0x48b09427a8742edbUL, 0xb1fcce9ce78b5272UL, 0x5d1129cf82afa5bcUL, 0x02b97c786f824383UL }; 173 | constant const ulong heavyP[25] = { 0x3ad74c52b2248509UL, 0x79629b0e2f9f4216UL, 0x7a14ff4816c7f8eeUL, 0x11a75f4c80056498UL, 0xe720e0df44eecedaUL, 0x72c7d82e14f34069UL, 0xc100ff2a938935baUL, 0x5e219040250fc462UL, 0x8039f9a60dcf6a48UL, 0xa0bcaa9f792a3d0cUL, 0xf431c05dd0a9a226UL, 0xd31f4cc354c18c3fUL, 0x6c6b7d01a769cc3dUL, 0x2ec65bd3562493e4UL, 0x4ef74b3a99cdb044UL, 0x774c86835434f2b0UL, 0x07e961b036bc9416UL, 0x7e8f1db17765cc07UL, 0xea8fdb80bac46d39UL, 0xb992f2d37b34ca58UL, 0xc776c5048481b957UL, 0x47c39f675112c22eUL, 0x92bb399db5290c0aUL, 0x549ae0312f9fc615UL, 0x1619327d10b9da35UL }; 174 | 175 | /** The sponge-based hash construction. **/ 176 | STATIC inline void hash(constant const ulong *initP, const ulong* in, ulong4* out) { 177 | private ulong a[25]; 178 | // Xor in the last block. 179 | #pragma unroll 180 | for (size_t i = 0; i < 10; i++) a[i] = initP[i] ^ in[i]; 181 | #pragma unroll 182 | for (size_t i = 10; i < 25; i++) a[i] = initP[i]; 183 | // Apply P 184 | P(a); 185 | // Squeeze output. 186 | *out = ((ulong4 *)(a))[0]; 187 | } 188 | 189 | /* RANDOM NUMBER GENERATOR BASED ON MWC64X */ 190 | /* http://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html */ 191 | 192 | /* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) 193 | 194 | To the extent possible under law, the author has dedicated all copyright 195 | and related and neighboring rights to this software to the public domain 196 | worldwide. This software is distributed without any warranty. 197 | 198 | See . */ 199 | 200 | 201 | /* This is xoshiro256** 1.0, one of our all-purpose, rock-solid 202 | generators. It has excellent (sub-ns) speed, a state (256 bits) that is 203 | large enough for any parallel application, and it passes all tests we 204 | are aware of. 205 | 206 | For generating just floating-point numbers, xoshiro256+ is even faster. 207 | 208 | The state must be seeded so that it is not everywhere zero. If you have 209 | a 64-bit seed, we suggest to seed a splitmix64 generator and use its 210 | output to fill s. */ 211 | 212 | inline uint64_t rotl(const uint64_t x, int k) { 213 | return (x << k) | (x >> (64 - k)); 214 | } 215 | 216 | inline uint64_t xoshiro256_next(global ulong4 *s) { 217 | const uint64_t result = rotl(s->y * 5, 7) * 9; 218 | 219 | const uint64_t t = s->y << 17; 220 | 221 | s->z ^= s->x; 222 | s->w ^= s->y; 223 | s->y ^= s->z; 224 | s->x ^= s->w; 225 | 226 | s->z ^= t; 227 | 228 | s->w = rotl(s->w, 45); 229 | 230 | return result; 231 | } 232 | /* KERNEL CODE */ 233 | 234 | #ifdef cl_khr_int64_base_atomics 235 | #pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable 236 | #endif 237 | typedef union _Hash { 238 | ulong4 hash; 239 | uchar bytes[32]; 240 | } Hash; 241 | 242 | #define BLOCKDIM 1024 243 | #define MATRIX_SIZE 64 244 | #define HALF_MATRIX_SIZE 32 245 | #define QUARTER_MATRIX_SIZE 16 246 | #define HASH_HEADER_SIZE 72 247 | 248 | #define RANDOM_TYPE_LEAN 0 249 | #define RANDOM_TYPE_XOSHIRO 1 250 | 251 | #define LT_U256(X,Y) (X.w != Y->w ? X.w < Y->w : X.z != Y->z ? X.z < Y->z : X.y != Y->y ? X.y < Y->y : X.x < Y->x) 252 | 253 | #ifndef cl_khr_int64_base_atomics 254 | global int lock = false; 255 | #endif 256 | 257 | #if defined(NVIDIA_CUDA) && (__COMPUTE_MAJOR__ > 6 || (__COMPUTE_MAJOR__ == 6 && __COMPUTE_MINOR__ >= 1)) 258 | #define amul4bit(X,Y,Z) _amul4bit((constant uint32_t*)(X), (private uint32_t*)(Y), (uint32_t *)(Z)) 259 | void STATIC inline _amul4bit(__constant uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) { 260 | // We assume each 32 bits have four values: A0 B0 C0 D0 261 | uint32_t res = 0; 262 | #pragma unroll 263 | for (int i=0; i>0)&0xf)*((b4[i].x>>0)&0xf); 294 | res += ((a4[i].x>>4)&0xf)*((b4[i].x>>4)&0xf); 295 | res += ((a4[i].y>>0)&0xf)*((b4[i].y>>0)&0xf); 296 | res += ((a4[i].y>>4)&0xf)*((b4[i].y>>4)&0xf); 297 | res += ((a4[i].z>>0)&0xf)*((b4[i].z>>0)&0xf); 298 | res += ((a4[i].z>>4)&0xf)*((b4[i].z>>4)&0xf); 299 | res += ((a4[i].w>>0)&0xf)*((b4[i].w>>0)&0xf); 300 | res += ((a4[i].w>>4)&0xf)*((b4[i].w>>4)&0xf); 301 | } 302 | *ret = res; 303 | #else 304 | ushort4 res = 0; 305 | for (int i=0; i> 4; 369 | hash_part[2*i+1] = hash_.bytes[i] & 0x0F; 370 | } 371 | #endif 372 | 373 | uint32_t product1, product2; 374 | #if defined(NVIDIA_CUDA) || defined(__FORCE_AMD_V_DOT8_U32_U4__) 375 | #pragma unroll 376 | #endif 377 | for (int rowId=0; rowId<32; rowId++){ 378 | #if __FORCE_AMD_V_DOT8_U32_U4__ == 1 379 | amul4bit(matrix + 64*rowId, hash_.bytes, &product1); 380 | amul4bit(matrix + 64*rowId+32, hash_.bytes, &product2); 381 | #else 382 | amul4bit(matrix + 128*rowId, hash_part, &product1); 383 | amul4bit(matrix + 128*rowId+64, hash_part, &product2); 384 | #endif 385 | product1 >>= 10; 386 | product2 >>= 10; 387 | // hash2_.bytes[rowId] = hash_.bytes[rowId] ^ bitselect(product1, product2, 0x0000000FU); 388 | hash2_.bytes[rowId] = hash_.bytes[rowId] ^ ((uint8_t)((product1 << 4) | (uint8_t)(product2))); 389 | } 390 | buffer[0] = hash2_.hash.x; 391 | buffer[1] = hash2_.hash.y; 392 | buffer[2] = hash2_.hash.z; 393 | buffer[3] = hash2_.hash.w; 394 | #pragma unroll 395 | for(int i=4; i<10; i++) buffer[i] = 0; 396 | 397 | hash(heavyP, (const ulong*)buffer, &hash_.hash); 398 | 399 | if (LT_U256(hash_.hash, target)){ 400 | //printf("%lu: %lu < %lu: %d %d\n", nonce, ((uint64_t *)hash_)[3], target[3], ((uint64_t *)hash_)[3] < target[3], LT_U256((uint64_t *)hash_, target)); 401 | #ifdef cl_khr_int64_base_atomics 402 | atom_cmpxchg(final_nonce, 0, nonce); 403 | #else 404 | if (!atom_cmpxchg(&lock, 0, 1)) { 405 | *final_nonce = nonce; 406 | //for(int i=0;i<4;i++) final_hash[i] = ((uint64_t volatile *)hash_)[i]; 407 | } 408 | #endif 409 | } 410 | /*if (nonceId==1) { 411 | //printf("%lu: %lu < %lu: %d %d\n", nonce, ((uint64_t *)hash2_)[3], target[3], ((uint64_t *)hash_)[3] < target[3]); 412 | *final_nonce = nonce; 413 | for(int i=0;i<4;i++) final_hash[i] = ((uint64_t volatile *)hash_)[i]; 414 | }*/ 415 | } 416 | -------------------------------------------------------------------------------- /plugins/opencl/src/worker.rs: -------------------------------------------------------------------------------- 1 | use crate::cli::NonceGenEnum; 2 | use crate::Error; 3 | use include_dir::{include_dir, Dir}; 4 | use kaspa_miner::xoshiro256starstar::Xoshiro256StarStar; 5 | use kaspa_miner::Worker; 6 | use log::{info, warn}; 7 | use opencl3::command_queue::{CommandQueue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE}; 8 | use opencl3::context::Context; 9 | use opencl3::device::Device; 10 | use opencl3::event::{release_event, retain_event, wait_for_events}; 11 | use opencl3::kernel::{ExecuteKernel, Kernel}; 12 | use opencl3::memory::{Buffer, ClMem, CL_MAP_WRITE, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY}; 13 | use opencl3::platform::Platform; 14 | use opencl3::program::{Program, CL_FINITE_MATH_ONLY, CL_MAD_ENABLE, CL_STD_2_0}; 15 | use opencl3::types::{cl_event, cl_uchar, cl_ulong, CL_BLOCKING}; 16 | use rand::{thread_rng, Fill, RngCore}; 17 | use std::borrow::Borrow; 18 | use std::ffi::c_void; 19 | use std::ptr; 20 | use std::sync::Arc; 21 | 22 | static BINARY_DIR: Dir = include_dir!("./plugins/opencl/resources/bin/"); 23 | static PROGRAM_SOURCE: &str = include_str!("../resources/kaspa-opencl.cl"); 24 | 25 | pub struct OpenCLGPUWorker { 26 | context: Arc, 27 | random: NonceGenEnum, 28 | local_size: usize, 29 | workload: usize, 30 | 31 | heavy_hash: Kernel, 32 | 33 | queue: CommandQueue, 34 | 35 | random_state: Buffer, 36 | final_nonce: Buffer, 37 | final_hash: Buffer<[cl_ulong; 4]>, 38 | 39 | hash_header: Buffer, 40 | matrix: Buffer, 41 | target: Buffer, 42 | 43 | events: Vec, 44 | experimental_amd: bool, 45 | } 46 | 47 | impl Worker for OpenCLGPUWorker { 48 | fn id(&self) -> String { 49 | let device = Device::new(self.context.default_device()); 50 | device.name().unwrap() 51 | } 52 | 53 | fn load_block_constants(&mut self, hash_header: &[u8; 72], matrix: &[[u16; 64]; 64], target: &[u64; 4]) { 54 | let cl_uchar_matrix = match self.experimental_amd { 55 | true => matrix 56 | .iter() 57 | .flat_map(|row| row.chunks(2).map(|v| ((v[0] << 4) | v[1]) as cl_uchar)) 58 | .collect::>(), 59 | false => matrix.iter().flat_map(|row| row.map(|v| v as cl_uchar)).collect::>(), 60 | }; 61 | self.queue 62 | .enqueue_write_buffer(&mut self.final_nonce, CL_BLOCKING, 0, &[0], &[]) 63 | .map_err(|e| e.to_string()) 64 | .unwrap() 65 | .wait() 66 | .unwrap(); 67 | self.queue 68 | .enqueue_write_buffer(&mut self.hash_header, CL_BLOCKING, 0, hash_header, &[]) 69 | .map_err(|e| e.to_string()) 70 | .unwrap() 71 | .wait() 72 | .unwrap(); 73 | self.queue 74 | .enqueue_write_buffer(&mut self.matrix, CL_BLOCKING, 0, cl_uchar_matrix.as_slice(), &[]) 75 | .map_err(|e| e.to_string()) 76 | .unwrap() 77 | .wait() 78 | .unwrap(); 79 | let copy_target = self 80 | .queue 81 | .enqueue_write_buffer(&mut self.target, CL_BLOCKING, 0, target, &[]) 82 | .map_err(|e| e.to_string()) 83 | .unwrap(); 84 | 85 | self.events = vec![copy_target.get()]; 86 | for event in &self.events { 87 | retain_event(*event).unwrap(); 88 | } 89 | } 90 | 91 | fn calculate_hash(&mut self, _nonces: Option<&Vec>, nonce_mask: u64, nonce_fixed: u64) { 92 | if self.random == NonceGenEnum::Lean { 93 | self.queue 94 | .enqueue_write_buffer(&mut self.random_state, CL_BLOCKING, 0, &[thread_rng().next_u64()], &[]) 95 | .map_err(|e| e.to_string()) 96 | .unwrap() 97 | .wait() 98 | .unwrap(); 99 | } 100 | let random_type: cl_uchar = match self.random { 101 | NonceGenEnum::Lean => 0, 102 | NonceGenEnum::Xoshiro => 1, 103 | }; 104 | let kernel_event = ExecuteKernel::new(&self.heavy_hash) 105 | .set_arg(&(self.local_size as u64)) 106 | .set_arg(&nonce_mask) 107 | .set_arg(&nonce_fixed) 108 | .set_arg(&self.hash_header) 109 | .set_arg(&self.matrix) 110 | .set_arg(&self.target) 111 | .set_arg(&random_type) 112 | .set_arg(&self.random_state) 113 | .set_arg(&self.final_nonce) 114 | .set_arg(&self.final_hash) 115 | .set_global_work_size(self.workload) 116 | .set_event_wait_list(self.events.borrow()) 117 | .enqueue_nd_range(&self.queue) 118 | .map_err(|e| e.to_string()) 119 | .unwrap(); 120 | 121 | kernel_event.wait().unwrap(); 122 | 123 | /*let mut nonces = [0u64; 1]; 124 | let mut hash = [[0u64; 4]]; 125 | self.queue.enqueue_read_buffer(&self.final_nonce, CL_BLOCKING, 0, &mut nonces, &[]).map_err(|e| e.to_string()).unwrap(); 126 | self.queue.enqueue_read_buffer(&self.final_hash, CL_BLOCKING, 0, &mut hash, &[]).map_err(|e| e.to_string()).unwrap(); 127 | log::info!("Hash from kernel: {:?}", hash);*/ 128 | /*for event in &self.events{ 129 | release_event(*event).unwrap(); 130 | } 131 | let event = kernel_event.get(); 132 | self.events = vec!(event); 133 | retain_event(event);*/ 134 | } 135 | 136 | fn sync(&self) -> Result<(), Error> { 137 | wait_for_events(&self.events).map_err(|e| format!("waiting error code {}", e))?; 138 | for event in &self.events { 139 | release_event(*event).unwrap(); 140 | } 141 | Ok(()) 142 | } 143 | 144 | fn get_workload(&self) -> usize { 145 | self.workload as usize 146 | } 147 | 148 | fn copy_output_to(&mut self, nonces: &mut Vec) -> Result<(), Error> { 149 | self.queue 150 | .enqueue_read_buffer(&self.final_nonce, CL_BLOCKING, 0, nonces, &[]) 151 | .map_err(|e| e.to_string()) 152 | .unwrap(); 153 | Ok(()) 154 | } 155 | } 156 | 157 | impl OpenCLGPUWorker { 158 | pub fn new( 159 | device: Device, 160 | workload: f32, 161 | is_absolute: bool, 162 | experimental_amd: bool, 163 | mut use_binary: bool, 164 | random: &NonceGenEnum, 165 | ) -> Result { 166 | let name = 167 | device.board_name_amd().unwrap_or_else(|_| device.name().unwrap_or_else(|_| "Unknown Device".into())); 168 | info!("{}: Using OpenCL", name); 169 | let version = device.version().unwrap_or_else(|_| "unkown version".into()); 170 | info!( 171 | "{}: Device supports {} with extensions: {}", 172 | name, 173 | version, 174 | device.extensions().unwrap_or_else(|_| "NA".into()) 175 | ); 176 | 177 | let local_size = device.max_work_group_size().map_err(|e| e.to_string())?; 178 | let chosen_workload = match is_absolute { 179 | true => workload as usize, 180 | false => { 181 | let max_work_group_size = 182 | (local_size * (device.max_compute_units().map_err(|e| e.to_string())? as usize)) as f32; 183 | (workload * max_work_group_size) as usize 184 | } 185 | }; 186 | info!("{}: Chosen workload is {}", name, chosen_workload); 187 | let context = 188 | Arc::new(Context::from_device(&device).unwrap_or_else(|_| panic!("{}::Context::from_device failed", name))); 189 | let context_ref = unsafe { Arc::as_ptr(&context).as_ref().unwrap() }; 190 | 191 | let options = match experimental_amd { 192 | // true => "-D __FORCE_AMD_V_DOT4_U32_U8__=1 ", 193 | true => "-D __FORCE_AMD_V_DOT8_U32_U4__=1 ", 194 | false => "", 195 | }; 196 | 197 | let experimental_amd_use = !matches!( 198 | device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase().as_str(), 199 | "tahiti" | "ellesmere" | "gfx1010" | "gfx906" | "gfx908" 200 | ); 201 | 202 | let program = match use_binary { 203 | true => { 204 | let mut device_name = device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase(); 205 | if device_name.contains(':') { 206 | device_name = device_name.split_once(':').expect("We checked for `:`").0.to_string(); 207 | } 208 | info!("{}: Looking for binary for {}", name, device_name); 209 | match BINARY_DIR.get_file(format!("{}_kaspa-opencl.bin", device_name)) { 210 | Some(binary) => { 211 | Program::create_and_build_from_binary(&context, &[binary.contents()], "").unwrap_or_else(|e|{ 212 | //Program::create_and_build_from_binary(&context, &[include_bytes!("../resources/kaspa-opencl-linked.bc")], "").unwrap_or_else(|e|{ 213 | warn!("{}::Program::create_and_build_from_source failed: {}. Reverting to compiling from source", name, e); 214 | use_binary = false; 215 | from_source(&context, &device, options).unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e)) 216 | }) 217 | } 218 | None => { 219 | warn!("Binary file not found for {}. Reverting to compiling from source.", device_name); 220 | use_binary = false; 221 | from_source(&context, &device, options) 222 | .unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e)) 223 | } 224 | } 225 | } 226 | false => from_source(&context, &device, options) 227 | .unwrap_or_else(|e| panic!("{}::Program::create_and_build_from_binary failed: {}", name, e)), 228 | }; 229 | info!("Kernels: {:?}", program.kernel_names()); 230 | let heavy_hash = 231 | Kernel::create(&program, "heavy_hash").unwrap_or_else(|_| panic!("{}::Kernel::create failed", name)); 232 | 233 | let queue = 234 | CommandQueue::create_with_properties(&context, device.id(), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0) 235 | .unwrap_or_else(|_| panic!("{}::CommandQueue::create_with_properties failed", name)); 236 | 237 | let final_nonce = Buffer::::create(context_ref, CL_MEM_READ_WRITE, 1, ptr::null_mut()) 238 | .expect("Buffer allocation failed"); 239 | let final_hash = Buffer::<[cl_ulong; 4]>::create(context_ref, CL_MEM_WRITE_ONLY, 1, ptr::null_mut()) 240 | .expect("Buffer allocation failed"); 241 | 242 | let hash_header = Buffer::::create(context_ref, CL_MEM_READ_ONLY, 72, ptr::null_mut()) 243 | .expect("Buffer allocation failed"); 244 | let matrix = Buffer::::create(context_ref, CL_MEM_READ_ONLY, 64 * 64, ptr::null_mut()) 245 | .expect("Buffer allocation failed"); 246 | let target = Buffer::::create(context_ref, CL_MEM_READ_ONLY, 4, ptr::null_mut()) 247 | .expect("Buffer allocation failed"); 248 | 249 | let mut seed = [1u64; 4]; 250 | seed.try_fill(&mut rand::thread_rng())?; 251 | 252 | let random_state = match random { 253 | NonceGenEnum::Xoshiro => { 254 | info!("Using xoshiro for nonce-generation"); 255 | let random_state = 256 | Buffer::::create(context_ref, CL_MEM_READ_WRITE, 4 * chosen_workload, ptr::null_mut()) 257 | .expect("Buffer allocation failed"); 258 | let rand_state = 259 | Xoshiro256StarStar::new(&seed).iter_jump_state().take(chosen_workload).collect::>(); 260 | let mut random_state_local: *mut c_void = std::ptr::null_mut::(); 261 | info!("{}: Generating initial seed. This may take some time.", name); 262 | 263 | queue 264 | .enqueue_map_buffer( 265 | &random_state, 266 | CL_BLOCKING, 267 | CL_MAP_WRITE, 268 | 0, 269 | 32 * chosen_workload, 270 | &mut random_state_local, 271 | &[], 272 | ) 273 | .map_err(|e| e.to_string())? 274 | .wait() 275 | .unwrap(); 276 | if random_state_local.is_null() { 277 | return Err(format!("{}::could not load random state vector to memory. Consider changing random or lowering workload", name).into()); 278 | } 279 | unsafe { 280 | random_state_local.copy_from(rand_state.as_ptr() as *mut c_void, 32 * chosen_workload); 281 | } 282 | // queue.enqueue_svm_unmap(&random_state,&[]).map_err(|e| e.to_string())?; 283 | queue 284 | .enqueue_unmap_mem_object(random_state.get(), random_state_local, &[]) 285 | .map_err(|e| e.to_string()) 286 | .unwrap() 287 | .wait() 288 | .unwrap(); 289 | info!("{}: Done generating initial seed", name); 290 | random_state 291 | } 292 | NonceGenEnum::Lean => { 293 | info!("Using lean nonce-generation"); 294 | let mut random_state = Buffer::::create(context_ref, CL_MEM_READ_WRITE, 1, ptr::null_mut()) 295 | .expect("Buffer allocation failed"); 296 | queue 297 | .enqueue_write_buffer(&mut random_state, CL_BLOCKING, 0, &[thread_rng().next_u64()], &[]) 298 | .map_err(|e| e.to_string()) 299 | .unwrap() 300 | .wait() 301 | .unwrap(); 302 | random_state 303 | } 304 | }; 305 | Ok(Self { 306 | context, 307 | local_size, 308 | workload: chosen_workload, 309 | random: *random, 310 | heavy_hash, 311 | random_state, 312 | queue, 313 | final_nonce, 314 | final_hash, 315 | hash_header, 316 | matrix, 317 | target, 318 | events: Vec::::new(), 319 | experimental_amd: ((experimental_amd | use_binary) & experimental_amd_use), 320 | }) 321 | } 322 | } 323 | 324 | fn from_source(context: &Context, device: &Device, options: &str) -> Result { 325 | let version = device.version()?; 326 | let v = version.split(' ').nth(1).unwrap(); 327 | let mut compile_options = options.to_string(); 328 | compile_options += CL_MAD_ENABLE; 329 | compile_options += CL_FINITE_MATH_ONLY; 330 | if v == "2.0" || v == "2.1" || v == "3.0" { 331 | info!("Compiling with OpenCl 2"); 332 | compile_options += CL_STD_2_0; 333 | } 334 | compile_options += &match Platform::new(device.platform().unwrap()).name() { 335 | Ok(name) => format!( 336 | "-D{} ", 337 | name.chars() 338 | .map(|c| match c.is_ascii_alphanumeric() { 339 | true => c, 340 | false => '_', 341 | }) 342 | .collect::() 343 | .to_uppercase() 344 | ), 345 | Err(_) => String::new(), 346 | }; 347 | compile_options += &match device.compute_capability_major_nv() { 348 | Ok(major) => format!("-D __COMPUTE_MAJOR__={} ", major), 349 | Err(_) => String::new(), 350 | }; 351 | compile_options += &match device.compute_capability_minor_nv() { 352 | Ok(minor) => format!("-D __COMPUTE_MINOR__={} ", minor), 353 | Err(_) => String::new(), 354 | }; 355 | 356 | // Hack to recreate the AMD flags 357 | compile_options += &match device.pcie_id_amd() { 358 | Ok(_) => { 359 | let device_name = device.name().unwrap_or_else(|_| "Unknown".into()).to_lowercase(); 360 | format!("-D OPENCL_PLATFORM_AMD -D __{}__ ", device_name) 361 | } 362 | Err(_) => String::new(), 363 | }; 364 | 365 | info!("Build OpenCL with {}", compile_options); 366 | 367 | Program::create_and_build_from_source(context, PROGRAM_SOURCE, compile_options.as_str()) 368 | } 369 | --------------------------------------------------------------------------------