├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── ci ├── build.sh ├── buildkite.yml ├── docker-run.sh ├── docker-sgx │ ├── Dockerfile │ └── build.sh ├── env.sh ├── upload-ci-artifact.sh └── upload-github-release-asset.sh └── src ├── Makefile ├── config.h ├── cuda-crypt ├── aes.h ├── aes_cbc.cu ├── aes_core.cu ├── aes_locl.h ├── chacha.h ├── chacha20_core.cu ├── chacha_cbc.cu ├── common.cu ├── cryptoerr.h ├── modes.h ├── modes_lcl.h ├── perftime.h └── test.cu ├── cuda-ecc-ed25519 ├── common.cu ├── ed25519.h ├── fe.cu ├── fe.h ├── fixedint.h ├── ge.cu ├── ge.h ├── gpu_ctx.cu ├── gpu_ctx.h ├── keypair.cu ├── license.txt ├── main.cu ├── perftime.h ├── precomp_data.h ├── sc.cu ├── sc.h ├── seed.cu ├── sha512.cu ├── sha512.h ├── sign.cu ├── vanity.cu └── verify.cu ├── cuda-headers └── gpu_common.h ├── cuda-poh-verify └── poh_verify.cu ├── cuda-sha256 ├── sha256.cu └── tomcrypt_macros.h ├── gpu-common.mk ├── jerasure-sys ├── Cargo.toml ├── build.rs ├── gf-complete └── jerasure ├── sgx-ecc-ed25519 ├── Makefile ├── add_scalar.c ├── build.sh ├── ed25519.h ├── fe.c ├── fe.h ├── fixedint.h ├── ge.c ├── ge.h ├── key_exchange.c ├── keypair.c ├── precomp_data.h ├── sc.c ├── sc.h ├── seed.c ├── sha512.c ├── sha512.h ├── sign.c └── verify.c └── sgx ├── build.sh ├── signing ├── Makefile ├── signing.config.xml ├── signing.edl ├── signing.lds ├── signing_internal.h ├── signing_public.h ├── signing_trusted.c └── signing_untrusted.c └── test ├── Makefile └── signing_test.c /.gitignore: -------------------------------------------------------------------------------- 1 | # temp folder 2 | /temp/ 3 | 4 | # build output folders 5 | /libs/ 6 | /dist/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Solana Labs, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | OS := $(shell uname) 2 | 3 | all: 4 | ifeq ($(OS),Darwin) 5 | SO=dylib 6 | else 7 | SO=so 8 | all: cuda_crypt 9 | endif 10 | 11 | V=release 12 | 13 | .PHONY:cuda_crypt 14 | cuda_crypt: 15 | $(MAKE) V=$(V) -C src 16 | 17 | DESTDIR ?= dist 18 | install: 19 | mkdir -p $(DESTDIR) 20 | ifneq ($(OS),Darwin) 21 | cp -f src/$(V)/libcuda-crypt.so $(DESTDIR) 22 | endif 23 | ls -lh $(DESTDIR) 24 | 25 | .PHONY:clean 26 | clean: 27 | $(MAKE) V=$(V) -C src clean 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A CUDA based ed25519 vanity key finder (in Base58) 2 | 3 | This is a GPU based vanity key finder. It does not currently use a CSPRNG and 4 | any key generated by this tool is 100% not secure to use. Great fun for Tour de 5 | Sol though. 6 | 7 | ## Configure 8 | Open `src/config.h` and add any prefixes you want to scan for to the list. 9 | 10 | ## Building 11 | Make sure your cuda binary are in your path, and build: 12 | 13 | ```bash 14 | $ export PATH=/usr/local/cuda/bin:$PATH 15 | $ make -j$(nproc) 16 | ``` 17 | 18 | ## Running 19 | 20 | ```bash 21 | LD_LIBRARY_PATH=./src/release ./src/release/cuda_ed25519_vanity 22 | ``` 23 | -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | cd "$(dirname "$0")/.." 4 | 5 | source ci/env.sh 6 | source ci/upload-ci-artifact.sh 7 | 8 | CUDA_HOMES=( 9 | /usr/local/cuda-10.0 10 | /usr/local/cuda-10.1 11 | ) 12 | 13 | for CUDA_HOME in "${CUDA_HOMES[@]}"; do 14 | CUDA_HOME_BASE="$(basename "$CUDA_HOME")" 15 | echo "--- Build: $CUDA_HOME_BASE" 16 | ( 17 | if [[ ! -d $CUDA_HOME/lib64 ]]; then 18 | echo "Invalid CUDA_HOME: $CUDA_HOME" 19 | exit 1 20 | fi 21 | 22 | set -x 23 | export LD_LIBRARY_PATH=$CUDA_HOME/lib64 24 | export PATH=$PATH:$HOME/.cargo/bin/:$CUDA_HOME/bin 25 | export DESTDIR=dist/$CUDA_HOME_BASE 26 | 27 | make -j"$(nproc)" 28 | make install 29 | make clean 30 | 31 | cp -vf "$CUDA_HOME"/version.txt "$DESTDIR"/cuda-version.txt 32 | ) 33 | done 34 | 35 | echo --- Build SGX 36 | ( 37 | set -x 38 | ci/docker-run.sh solanalabs/sgxsdk src/sgx-ecc-ed25519/build.sh 39 | ci/docker-run.sh solanalabs/sgxsdk src/sgx/build.sh 40 | ) 41 | 42 | echo --- Create tarball 43 | ( 44 | set -x 45 | cd dist 46 | git rev-parse HEAD | tee solana-perf-HEAD.txt 47 | tar zcvf ../solana-perf.tgz ./* 48 | ) 49 | 50 | upload-ci-artifact solana-perf.tgz 51 | 52 | [[ -n $CI_TAG ]] || exit 0 53 | ci/upload-github-release-asset.sh solana-perf.tgz 54 | exit 0 55 | -------------------------------------------------------------------------------- /ci/buildkite.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - command: "ci/build.sh" 3 | name: "build" 4 | timeout_in_minutes: 20 5 | agents: 6 | - "queue=cuda" 7 | -------------------------------------------------------------------------------- /ci/docker-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | usage() { 5 | echo "Usage: $0 [--nopull] [docker image name] [command]" 6 | echo 7 | echo Runs command in the specified docker image with 8 | echo a CI-appropriate environment. 9 | echo 10 | echo "--nopull Skip the dockerhub image update" 11 | echo "--shell Skip command and enter an interactive shell" 12 | echo 13 | } 14 | 15 | cd "$(dirname "$0")/.." 16 | 17 | INTERACTIVE=false 18 | if [[ $1 = --shell ]]; then 19 | INTERACTIVE=true 20 | shift 21 | fi 22 | 23 | NOPULL=false 24 | if [[ $1 = --nopull ]]; then 25 | NOPULL=true 26 | shift 27 | fi 28 | 29 | IMAGE="$1" 30 | if [[ -z "$IMAGE" ]]; then 31 | echo Error: image not defined 32 | exit 1 33 | fi 34 | 35 | $NOPULL || docker pull "$IMAGE" 36 | shift 37 | 38 | ARGS=( 39 | --workdir /solana 40 | --volume "$PWD:/solana" 41 | --rm 42 | ) 43 | 44 | if [[ -n $CI ]]; then 45 | # Share the real ~/.cargo between docker containers in CI for speed 46 | ARGS+=(--volume "$HOME:/home") 47 | else 48 | # Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux 49 | # ~/.cargo 50 | ARGS+=(--volume "$PWD:/home") 51 | fi 52 | ARGS+=(--env "CARGO_HOME=/home/.cargo") 53 | 54 | # kcov tries to set the personality of the binary which docker 55 | # doesn't allow by default. 56 | ARGS+=(--security-opt "seccomp=unconfined") 57 | 58 | # Ensure files are created with the current host uid/gid 59 | if [[ -z "$SOLANA_DOCKER_RUN_NOSETUID" ]]; then 60 | ARGS+=(--user "$(id -u):$(id -g)") 61 | fi 62 | 63 | # Environment variables to propagate into the container 64 | ARGS+=( 65 | --env BUILDKITE 66 | --env BUILDKITE_AGENT_ACCESS_TOKEN 67 | --env BUILDKITE_BRANCH 68 | --env BUILDKITE_JOB_ID 69 | --env BUILDKITE_TAG 70 | --env CODECOV_TOKEN 71 | --env CRATES_IO_TOKEN 72 | --env SNAPCRAFT_CREDENTIALS_KEY 73 | ) 74 | 75 | if $INTERACTIVE; then 76 | if [[ -n $1 ]]; then 77 | echo 78 | echo "Note: '$*' ignored due to --shell argument" 79 | echo 80 | fi 81 | set -x 82 | exec docker run --interactive --tty "${ARGS[@]}" "$IMAGE" bash 83 | fi 84 | 85 | set -x 86 | exec docker run "${ARGS[@]}" "$IMAGE" "$@" 87 | -------------------------------------------------------------------------------- /ci/docker-sgx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential ocaml ocamlbuild automake autoconf libtool wget python libssl-dev libcurl4-openssl-dev protobuf-compiler libprotobuf-dev sudo kmod vim curl git-core libprotobuf-c0-dev libboost-thread-dev libboost-system-dev liblog4cpp5-dev libjsoncpp-dev alien uuid-dev libxml2-dev cmake pkg-config expect 6 | 7 | 8 | RUN mkdir /root/sgx && mkdir /etc/init/ && \ 9 | wget -O /root/sgx/sdk.bin https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/sgx_linux_x64_sdk_2.3.101.46683.bin && \ 10 | wget -O /root/sgx/psw.deb https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/libsgx-enclave-common_2.3.101.46683-1_amd64.deb && \ 11 | cd /root/sgx && \ 12 | dpkg -i /root/sgx/psw.deb && \ 13 | chmod +x /root/sgx/sdk.bin && \ 14 | echo -e 'no\n/opt' | /root/sgx/sdk.bin && \ 15 | echo 'source /opt/sgxsdk/environment' >> /root/.bashrc && \ 16 | rm -rf /root/sgx/* 17 | 18 | WORKDIR /root 19 | 20 | -------------------------------------------------------------------------------- /ci/docker-sgx/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | 4 | cd "$(dirname "$0")" 5 | 6 | docker build -t solanalabs/sgxsdk . 7 | docker push solanalabs/sgxsdk 8 | 9 | -------------------------------------------------------------------------------- /ci/env.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Normalized CI environment variables 3 | # 4 | # |source| me 5 | # 6 | 7 | if [[ -n $CI ]]; then 8 | export CI=1 9 | if [[ -n $TRAVIS ]]; then 10 | export CI_BRANCH=$TRAVIS_BRANCH 11 | export CI_BUILD_ID=$TRAVIS_BUILD_ID 12 | export CI_COMMIT=$TRAVIS_COMMIT 13 | export CI_JOB_ID=$TRAVIS_JOB_ID 14 | if $TRAVIS_PULL_REQUEST; then 15 | export CI_PULL_REQUEST=true 16 | else 17 | export CI_PULL_REQUEST= 18 | fi 19 | export CI_OS_NAME=$TRAVIS_OS_NAME 20 | export CI_REPO_SLUG=$TRAVIS_REPO_SLUG 21 | export CI_TAG=$TRAVIS_TAG 22 | elif [[ -n $BUILDKITE ]]; then 23 | export CI_BRANCH=$BUILDKITE_BRANCH 24 | export CI_BUILD_ID=$BUILDKITE_BUILD_ID 25 | export CI_COMMIT=$BUILDKITE_COMMIT 26 | export CI_JOB_ID=$BUILDKITE_JOB_ID 27 | # The standard BUILDKITE_PULL_REQUEST environment variable is always "false" due 28 | # to how solana-ci-gate is used to trigger PR builds rather than using the 29 | # standard Buildkite PR trigger. 30 | if [[ $CI_BRANCH =~ pull/* ]]; then 31 | export CI_PULL_REQUEST=true 32 | else 33 | export CI_PULL_REQUEST= 34 | fi 35 | export CI_OS_NAME=linux 36 | export CI_REPO_SLUG=$BUILDKITE_ORGANIZATION_SLUG/$BUILDKITE_PIPELINE_SLUG 37 | # TRIGGERED_BUILDKITE_TAG is a workaround to propagate BUILDKITE_TAG into 38 | # the solana-secondary builder 39 | if [[ -n $TRIGGERED_BUILDKITE_TAG ]]; then 40 | export CI_TAG=$TRIGGERED_BUILDKITE_TAG 41 | else 42 | export CI_TAG=$BUILDKITE_TAG 43 | fi 44 | elif [[ -n $APPVEYOR ]]; then 45 | export CI_BRANCH=$APPVEYOR_REPO_BRANCH 46 | export CI_BUILD_ID=$APPVEYOR_BUILD_ID 47 | export CI_COMMIT=$APPVEYOR_REPO_COMMIT 48 | export CI_JOB_ID=$APPVEYOR_JOB_ID 49 | if [[ -n $APPVEYOR_PULL_REQUEST_NUMBER ]]; then 50 | export CI_PULL_REQUEST=true 51 | else 52 | export CI_PULL_REQUEST= 53 | fi 54 | if [[ $CI_LINUX = True ]]; then 55 | export CI_OS_NAME=linux 56 | elif [[ $CI_WINDOWS = True ]]; then 57 | export CI_OS_NAME=windows 58 | fi 59 | export CI_REPO_SLUG=$APPVEYOR_REPO_NAME 60 | export CI_TAG=$APPVEYOR_REPO_TAG_NAME 61 | fi 62 | else 63 | export CI= 64 | export CI_BRANCH= 65 | export CI_BUILD_ID= 66 | export CI_COMMIT= 67 | export CI_JOB_ID= 68 | export CI_OS_NAME= 69 | export CI_PULL_REQUEST= 70 | export CI_REPO_SLUG= 71 | export CI_TAG= 72 | fi 73 | 74 | cat < 14 | # include 15 | # ifdef __cplusplus 16 | extern "C" { 17 | # endif 18 | 19 | # define AES_ENCRYPT 1 20 | # define AES_DECRYPT 0 21 | 22 | /* 23 | * Because array size can't be a const in C, the following two are macros. 24 | * Both sizes are in bytes. 25 | */ 26 | # define AES_MAXNR 14 27 | # define AES_BLOCK_SIZE 16 28 | 29 | /* This should be a hidden type, but EVP requires that the size be known */ 30 | struct aes_key_st { 31 | # ifdef AES_LONG 32 | unsigned long rd_key[4 * (AES_MAXNR + 1)]; 33 | # else 34 | unsigned int rd_key[4 * (AES_MAXNR + 1)]; 35 | # endif 36 | int rounds; 37 | }; 38 | typedef struct aes_key_st AES_KEY; 39 | 40 | const char *AES_options(void); 41 | 42 | int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 43 | AES_KEY *key); 44 | int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 45 | AES_KEY *key); 46 | 47 | __host__ __device__ void AES_encrypt(const unsigned char *in, unsigned char *out, 48 | const AES_KEY *key); 49 | 50 | void AES_decrypt(const unsigned char *in, unsigned char *out, 51 | const AES_KEY *key); 52 | 53 | void AES_ecb_encrypt(const unsigned char *in, unsigned char *out, 54 | const AES_KEY *key, const int enc); 55 | 56 | void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 57 | size_t length, const AES_KEY *key, 58 | unsigned char *ivec, const int enc); 59 | 60 | void AES_cbc_encrypt_many(const unsigned char *in, unsigned char *out, 61 | size_t length, const AES_KEY *key, 62 | unsigned char *ivec, uint32_t num_keys, float* time_us); 63 | 64 | void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, 65 | size_t length, const AES_KEY *key, 66 | unsigned char *ivec, int *num, const int enc); 67 | void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, 68 | size_t length, const AES_KEY *key, 69 | unsigned char *ivec, int *num, const int enc); 70 | void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, 71 | size_t length, const AES_KEY *key, 72 | unsigned char *ivec, int *num, const int enc); 73 | void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, 74 | size_t length, const AES_KEY *key, 75 | unsigned char *ivec, int *num); 76 | /* NB: the IV is _two_ blocks long */ 77 | void AES_ige_encrypt(const unsigned char *in, unsigned char *out, 78 | size_t length, const AES_KEY *key, 79 | unsigned char *ivec, const int enc); 80 | /* NB: the IV is _four_ blocks long */ 81 | void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out, 82 | size_t length, const AES_KEY *key, 83 | const AES_KEY *key2, const unsigned char *ivec, 84 | const int enc); 85 | 86 | int AES_wrap_key(AES_KEY *key, const unsigned char *iv, 87 | unsigned char *out, 88 | const unsigned char *in, unsigned int inlen); 89 | int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, 90 | unsigned char *out, 91 | const unsigned char *in, unsigned int inlen); 92 | 93 | 94 | # ifdef __cplusplus 95 | } 96 | # endif 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /src/cuda-crypt/aes_cbc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. 3 | * 4 | * Licensed under the OpenSSL license (the "License"). You may not use 5 | * this file except in compliance with the License. You can obtain a copy 6 | * in the file LICENSE in the source distribution or at 7 | * https://www.openssl.org/source/license.html 8 | */ 9 | 10 | #include 11 | #include "common.cu" 12 | #include "aes.h" 13 | #include "modes.h" 14 | #include "perftime.h" 15 | #include "modes_lcl.h" 16 | #include "aes_core.cu" 17 | #include "gpu_common.h" 18 | 19 | #if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC) 20 | # define STRICT_ALIGNMENT 0 21 | #endif 22 | 23 | __host__ __device__ void aes_cbc128_encrypt(const unsigned char* in, unsigned char* out, 24 | uint32_t len, const AES_KEY* key, 25 | unsigned char* ivec, 26 | const u32* l_te) 27 | { 28 | size_t n; 29 | unsigned char *iv = ivec; 30 | 31 | if (len == 0) 32 | return; 33 | 34 | #if !defined(OPENSSL_SMALL_FOOTPRINT) 35 | if (STRICT_ALIGNMENT && 36 | ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) { 37 | while (len >= 16) { 38 | for (n = 0; n < 16; ++n) 39 | out[n] = in[n] ^ iv[n]; 40 | AES_encrypt(out, out, key, l_te); 41 | iv = out; 42 | len -= 16; 43 | in += 16; 44 | out += 16; 45 | } 46 | } else { 47 | while (len >= 16) { 48 | for (n = 0; n < 16; n += sizeof(size_t)) 49 | *(size_t *)(out + n) = 50 | *(size_t *)(in + n) ^ *(size_t *)(iv + n); 51 | AES_encrypt(out, out, key, l_te); 52 | iv = out; 53 | len -= 16; 54 | in += 16; 55 | out += 16; 56 | } 57 | } 58 | #endif 59 | while (len) { 60 | for (n = 0; n < 16 && n < len; ++n) 61 | out[n] = in[n] ^ iv[n]; 62 | for (; n < 16; ++n) 63 | out[n] = iv[n]; 64 | AES_encrypt(out, out, key, l_te); 65 | iv = out; 66 | if (len <= 16) 67 | break; 68 | len -= 16; 69 | in += 16; 70 | out += 16; 71 | } 72 | memcpy(ivec, iv, 16); 73 | } 74 | 75 | void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, 76 | size_t len, const AES_KEY *key, 77 | unsigned char ivec[16], block128_f block) 78 | { 79 | size_t n; 80 | union { 81 | size_t t[16 / sizeof(size_t)]; 82 | unsigned char c[16]; 83 | } tmp; 84 | 85 | if (len == 0) 86 | return; 87 | 88 | #if !defined(OPENSSL_SMALL_FOOTPRINT) 89 | if (in != out) { 90 | const unsigned char *iv = ivec; 91 | 92 | if (STRICT_ALIGNMENT && 93 | ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) { 94 | while (len >= 16) { 95 | (*block) (in, out, key); 96 | for (n = 0; n < 16; ++n) 97 | out[n] ^= iv[n]; 98 | iv = in; 99 | len -= 16; 100 | in += 16; 101 | out += 16; 102 | } 103 | } else if (16 % sizeof(size_t) == 0) { /* always true */ 104 | while (len >= 16) { 105 | size_t *out_t = (size_t *)out, *iv_t = (size_t *)iv; 106 | 107 | (*block) (in, out, key); 108 | for (n = 0; n < 16 / sizeof(size_t); n++) 109 | out_t[n] ^= iv_t[n]; 110 | iv = in; 111 | len -= 16; 112 | in += 16; 113 | out += 16; 114 | } 115 | } 116 | memcpy(ivec, iv, 16); 117 | } else { 118 | if (STRICT_ALIGNMENT && 119 | ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) { 120 | unsigned char c; 121 | while (len >= 16) { 122 | (*block) (in, tmp.c, key); 123 | for (n = 0; n < 16; ++n) { 124 | c = in[n]; 125 | out[n] = tmp.c[n] ^ ivec[n]; 126 | ivec[n] = c; 127 | } 128 | len -= 16; 129 | in += 16; 130 | out += 16; 131 | } 132 | } else if (16 % sizeof(size_t) == 0) { /* always true */ 133 | while (len >= 16) { 134 | size_t c, *out_t = (size_t *)out, *ivec_t = (size_t *)ivec; 135 | const size_t *in_t = (const size_t *)in; 136 | 137 | (*block) (in, tmp.c, key); 138 | for (n = 0; n < 16 / sizeof(size_t); n++) { 139 | c = in_t[n]; 140 | out_t[n] = tmp.t[n] ^ ivec_t[n]; 141 | ivec_t[n] = c; 142 | } 143 | len -= 16; 144 | in += 16; 145 | out += 16; 146 | } 147 | } 148 | } 149 | #endif 150 | while (len) { 151 | unsigned char c; 152 | (*block) (in, tmp.c, key); 153 | for (n = 0; n < 16 && n < len; ++n) { 154 | c = in[n]; 155 | out[n] = tmp.c[n] ^ ivec[n]; 156 | ivec[n] = c; 157 | } 158 | if (len <= 16) { 159 | for (; n < 16; ++n) 160 | ivec[n] = in[n]; 161 | break; 162 | } 163 | len -= 16; 164 | in += 16; 165 | out += 16; 166 | } 167 | } 168 | 169 | 170 | void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 171 | size_t len, const AES_KEY *key, 172 | unsigned char *ivec, const int enc) 173 | { 174 | 175 | if (enc) 176 | aes_cbc128_encrypt(in, out, len, key, ivec, g_Te0); 177 | else 178 | CRYPTO_cbc128_decrypt(in, out, len, key, ivec, 179 | (block128_f) AES_decrypt); 180 | } 181 | 182 | __global__ void CRYPTO_cbc128_encrypt_kernel(const unsigned char* input, unsigned char* output, 183 | size_t length, const AES_KEY* keys, 184 | unsigned char* ivec, uint32_t num_keys, 185 | unsigned char* sha_state, 186 | uint32_t* sample_idx, 187 | uint32_t sample_len, 188 | uint32_t block_offset) 189 | { 190 | size_t i = (size_t)(blockIdx.x * blockDim.x + threadIdx.x); 191 | 192 | //#if 0 193 | #ifdef __CUDA_ARCH__ 194 | __shared__ u32 l_te[256]; 195 | uint32_t tid = threadIdx.x; 196 | l_te[tid] = g_Te0[tid]; 197 | __syncthreads(); 198 | #else 199 | const u32* l_te = g_Te0; 200 | #endif 201 | 202 | if (i < num_keys) { 203 | aes_cbc128_encrypt(input, &output[i * length], length, &keys[i], &ivec[i * AES_BLOCK_SIZE], l_te); 204 | 205 | /*for (uint32_t j = 0; j < sample_len; j++) { 206 | if (sample_idx[j] > block_offset && sample_idx[j] < (block_offset + length)) { 207 | } 208 | }*/ 209 | } 210 | } 211 | 212 | void AES_cbc_encrypt_many(const unsigned char *in, unsigned char *out, 213 | size_t length, const AES_KEY *keys, 214 | unsigned char *ivec, 215 | uint32_t num_keys, 216 | float* time_us) 217 | { 218 | 219 | if (length < BLOCK_SIZE) { 220 | printf("ERROR! block size(%d) > length(%zu)\n", BLOCK_SIZE, length); 221 | return; 222 | } 223 | uint8_t* in_device = NULL; 224 | uint8_t* in_device0 = NULL; 225 | uint8_t* in_device1 = NULL; 226 | AES_KEY* keys_device = NULL; 227 | uint8_t* output_device = NULL; 228 | uint8_t* output_device0 = NULL; 229 | uint8_t* output_device1 = NULL; 230 | uint8_t* ivec_device = NULL; 231 | 232 | uint8_t* sha_state_device = NULL; 233 | 234 | uint32_t sample_len = 0; 235 | uint32_t* samples_device = NULL; 236 | 237 | CUDA_CHK(cudaMalloc(&in_device0, BLOCK_SIZE)); 238 | CUDA_CHK(cudaMalloc(&in_device1, BLOCK_SIZE)); 239 | 240 | size_t ctx_size = sizeof(AES_KEY) * num_keys; 241 | CUDA_CHK(cudaMalloc(&keys_device, ctx_size)); 242 | CUDA_CHK(cudaMemcpy(keys_device, keys, ctx_size, cudaMemcpyHostToDevice)); 243 | 244 | size_t ivec_size = AES_BLOCK_SIZE * num_keys; 245 | CUDA_CHK(cudaMalloc(&ivec_device, ivec_size)); 246 | CUDA_CHK(cudaMemcpy(ivec_device, ivec, ivec_size, cudaMemcpyHostToDevice)); 247 | 248 | size_t output_size = (size_t)num_keys * (size_t)BLOCK_SIZE; 249 | CUDA_CHK(cudaMalloc(&output_device0, output_size)); 250 | CUDA_CHK(cudaMalloc(&output_device1, output_size)); 251 | 252 | int num_threads_per_block = 256; 253 | int num_blocks = (num_keys + num_threads_per_block - 1) / num_threads_per_block; 254 | 255 | perftime_t start, end; 256 | 257 | get_time(&start); 258 | 259 | cudaStream_t stream, stream0, stream1; 260 | cudaStreamCreate(&stream0); 261 | cudaStreamCreate(&stream1); 262 | 263 | ssize_t slength = length; 264 | size_t num_data_blocks = (length + BLOCK_SIZE - 1) / (BLOCK_SIZE); 265 | 266 | printf("num_blocks: %d threads_per_block: %d ivec_size: %zu keys size: %zu in: %p ind0: %p ind1: %p output_size: %zu num_data_blocks: %zu\n", 267 | num_blocks, num_threads_per_block, ivec_size, ctx_size, in, in_device0, in_device1, output_size, num_data_blocks); 268 | 269 | for (uint32_t i = 0;; i++) { 270 | //if (i & 0x1) { 271 | if (0) { 272 | in_device = in_device1; 273 | output_device = output_device1; 274 | stream = stream1; 275 | } else { 276 | in_device = in_device0; 277 | output_device = output_device0; 278 | stream = stream0; 279 | } 280 | size_t size = std::min(slength, (ssize_t)BLOCK_SIZE); 281 | //printf("copying to in_device: %p in: %p size: %zu num_data_blocks: %zu\n", in_device, in, size, num_data_blocks); 282 | CUDA_CHK(cudaMemcpyAsync(in_device, in, size, cudaMemcpyHostToDevice, stream)); 283 | 284 | CRYPTO_cbc128_encrypt_kernel<<>>( 285 | in_device, output_device, size, 286 | keys_device, ivec_device, num_keys, 287 | sha_state_device, 288 | samples_device, 289 | sample_len, 290 | i * BLOCK_SIZE); 291 | #if 0 292 | for (uint32_t j = 0; j < num_keys; j++) { 293 | size_t block_offset = j * length + i * BLOCK_SIZE; 294 | size_t out_offset = j * size; 295 | //printf("i: %d j: %d copy %zi b block offset: %zu output offset: %zu num_data_blocks: %zu\n", 296 | // i, j, size, block_offset, out_offset, num_data_blocks); 297 | CUDA_CHK(cudaMemcpy(&out[block_offset], &output_device[out_offset], size, cudaMemcpyDeviceToHost)); 298 | } 299 | #endif 300 | 301 | slength -= BLOCK_SIZE; 302 | in += BLOCK_SIZE; 303 | if (slength <= 0) { 304 | break; 305 | } 306 | } 307 | 308 | CUDA_CHK(cudaMemcpy(ivec, ivec_device, ivec_size, cudaMemcpyDeviceToHost)); 309 | get_time(&end); 310 | *time_us = get_diff(&start, &end); 311 | 312 | //printf("gpu time: %f us\n", get_diff(&start, &end)); 313 | } 314 | 315 | 316 | -------------------------------------------------------------------------------- /src/cuda-crypt/aes_locl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. 3 | * 4 | * Licensed under the OpenSSL license (the "License"). You may not use 5 | * this file except in compliance with the License. You can obtain a copy 6 | * in the file LICENSE in the source distribution or at 7 | * https://www.openssl.org/source/license.html 8 | */ 9 | 10 | #ifndef HEADER_AES_LOCL_H 11 | # define HEADER_AES_LOCL_H 12 | 13 | # include 14 | # include 15 | # include 16 | 17 | #if 0 18 | # if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 19 | # define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) 20 | # define GETU32(p) SWAP(*((u32 *)(p))) 21 | # define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); } 22 | # else 23 | # define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) 24 | # define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } 25 | # endif 26 | #endif 27 | 28 | # ifdef AES_LONG 29 | typedef unsigned long u32; 30 | # else 31 | typedef unsigned int u32; 32 | # endif 33 | typedef unsigned short u16; 34 | typedef unsigned char u8; 35 | 36 | # define MAXKC (256/32) 37 | # define MAXKB (256/8) 38 | # define MAXNR 14 39 | 40 | /* This controls loop-unrolling in aes_core.c */ 41 | #ifndef __CUDA_ARCH__ 42 | # define FULL_UNROLL 43 | #endif 44 | 45 | #endif /* !HEADER_AES_LOCL_H */ 46 | -------------------------------------------------------------------------------- /src/cuda-crypt/chacha.h: -------------------------------------------------------------------------------- 1 | #ifndef HEADER_CHACHA_H 2 | # define HEADER_CHACHA_H 3 | 4 | #include 5 | # include 6 | # ifdef __cplusplus 7 | extern "C" { 8 | # endif 9 | 10 | #define CHACHA_KEY_SIZE 32 11 | #define CHACHA_NONCE_SIZE 12 12 | #define CHACHA_BLOCK_SIZE 64 13 | #define CHACHA_ROUNDS 500 14 | #define SAMPLE_SIZE 32 15 | 16 | void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len, 17 | const uint8_t key[CHACHA_KEY_SIZE], const uint8_t nonce[CHACHA_NONCE_SIZE], 18 | uint32_t counter); 19 | 20 | void cuda_chacha20_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t in_len, 21 | const uint8_t key[CHACHA_KEY_SIZE], uint8_t* ivec); 22 | 23 | void chacha_ctr_encrypt_many(const unsigned char* in, unsigned char* out, 24 | size_t length, 25 | const uint8_t* keys, 26 | const uint8_t* nonces, 27 | uint32_t num_keys, 28 | float* time_us); 29 | 30 | void chacha_cbc_encrypt_many(const uint8_t* in, uint8_t* out, 31 | size_t length, const uint8_t *keys, 32 | uint8_t* ivec, 33 | uint32_t num_keys, 34 | float* time_us); 35 | 36 | void chacha_cbc_encrypt_many_sample(const uint8_t* in, 37 | void* out, 38 | size_t length, 39 | const uint8_t *keys, 40 | uint8_t* ivecs, 41 | uint32_t num_keys, 42 | const uint64_t* samples, 43 | uint32_t num_samples, 44 | uint64_t starting_block_offset, 45 | float* time_us); 46 | 47 | void chacha_end_sha_state(const void* sha_state, uint8_t* out, uint32_t num_keys); 48 | 49 | void chacha_init_sha_state(void* sha_state, uint32_t num_keys); 50 | 51 | # ifdef __cplusplus 52 | } 53 | # endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/cuda-crypt/chacha20_core.cu: -------------------------------------------------------------------------------- 1 | #include "chacha.h" 2 | 3 | #define ROTL32(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) 4 | 5 | #define ROTATE(v, c) ROTL32((v), (c)) 6 | 7 | #define XOR(v, w) ((v) ^ (w)) 8 | 9 | #define PLUS(x, y) ((x) + (y)) 10 | 11 | #define U32TO8_LITTLE(p, v) \ 12 | { (p)[0] = ((v) ) & 0xff; (p)[1] = ((v) >> 8) & 0xff; \ 13 | (p)[2] = ((v) >> 16) & 0xff; (p)[3] = ((v) >> 24) & 0xff; } 14 | 15 | #define U8TO32_LITTLE(p) \ 16 | (((u32)((p)[0]) ) | ((u32)((p)[1]) << 8) | \ 17 | ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24) ) 18 | 19 | #define QUARTERROUND(a,b,c,d) \ 20 | x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ 21 | x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ 22 | x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ 23 | x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); 24 | 25 | #ifdef __CUDA_ARCH__ 26 | #define SIGMA_DEF __device__ __constant__ 27 | #else 28 | #define SIGMA_DEF 29 | #endif 30 | 31 | // sigma contains the ChaCha constants, which happen to be an ASCII string. 32 | static const uint8_t SIGMA_DEF sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', 33 | '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; 34 | 35 | static void __host__ __device__ chacha20_encrypt(const u32 input[16], 36 | unsigned char output[64], 37 | int num_rounds) 38 | { 39 | u32 x[16]; 40 | int i; 41 | memcpy(x, input, sizeof(u32) * 16); 42 | for (i = num_rounds; i > 0; i -= 2) { 43 | QUARTERROUND( 0, 4, 8,12) 44 | QUARTERROUND( 1, 5, 9,13) 45 | QUARTERROUND( 2, 6,10,14) 46 | QUARTERROUND( 3, 7,11,15) 47 | QUARTERROUND( 0, 5,10,15) 48 | QUARTERROUND( 1, 6,11,12) 49 | QUARTERROUND( 2, 7, 8,13) 50 | QUARTERROUND( 3, 4, 9,14) 51 | } 52 | for (i = 0; i < 16; ++i) { 53 | x[i] = PLUS(x[i], input[i]); 54 | } 55 | for (i = 0; i < 16; ++i) { 56 | U32TO8_LITTLE(output + 4 * i, x[i]); 57 | } 58 | } 59 | 60 | void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len, 61 | const uint8_t key[CHACHA_KEY_SIZE], 62 | const uint8_t nonce[CHACHA_NONCE_SIZE], 63 | uint32_t counter) 64 | { 65 | uint32_t input[16]; 66 | uint8_t buf[64]; 67 | size_t todo, i; 68 | 69 | input[0] = U8TO32_LITTLE(sigma + 0); 70 | input[1] = U8TO32_LITTLE(sigma + 4); 71 | input[2] = U8TO32_LITTLE(sigma + 8); 72 | input[3] = U8TO32_LITTLE(sigma + 12); 73 | 74 | input[4] = U8TO32_LITTLE(key + 0); 75 | input[5] = U8TO32_LITTLE(key + 4); 76 | input[6] = U8TO32_LITTLE(key + 8); 77 | input[7] = U8TO32_LITTLE(key + 12); 78 | 79 | input[8] = U8TO32_LITTLE(key + 16); 80 | input[9] = U8TO32_LITTLE(key + 20); 81 | input[10] = U8TO32_LITTLE(key + 24); 82 | input[11] = U8TO32_LITTLE(key + 28); 83 | 84 | input[12] = counter; 85 | input[13] = U8TO32_LITTLE(nonce + 0); 86 | input[14] = U8TO32_LITTLE(nonce + 4); 87 | input[15] = U8TO32_LITTLE(nonce + 8); 88 | 89 | while (in_len > 0) { 90 | todo = sizeof(buf); 91 | if (in_len < todo) { 92 | todo = in_len; 93 | } 94 | 95 | chacha20_encrypt(input, buf, CHACHA_ROUNDS); 96 | for (i = 0; i < todo; i++) { 97 | out[i] = in[i] ^ buf[i]; 98 | } 99 | 100 | out += todo; 101 | in += todo; 102 | in_len -= todo; 103 | 104 | input[12]++; 105 | } 106 | } 107 | 108 | 109 | -------------------------------------------------------------------------------- /src/cuda-crypt/common.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef COMMON_CU 4 | #define COMMON_CU 5 | 6 | #define BLOCK_SIZE (4 * 1024) 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/cuda-crypt/cryptoerr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated by util/mkerr.pl DO NOT EDIT 3 | * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. 4 | * 5 | * Licensed under the OpenSSL license (the "License"). You may not use 6 | * this file except in compliance with the License. You can obtain a copy 7 | * in the file LICENSE in the source distribution or at 8 | * https://www.openssl.org/source/license.html 9 | */ 10 | 11 | #ifndef HEADER_CRYPTOERR_H 12 | # define HEADER_CRYPTOERR_H 13 | 14 | # ifdef __cplusplus 15 | extern "C" 16 | # endif 17 | int ERR_load_CRYPTO_strings(void); 18 | 19 | /* 20 | * CRYPTO function codes. 21 | */ 22 | # define CRYPTO_F_CMAC_CTX_NEW 120 23 | # define CRYPTO_F_CRYPTO_DUP_EX_DATA 110 24 | # define CRYPTO_F_CRYPTO_FREE_EX_DATA 111 25 | # define CRYPTO_F_CRYPTO_GET_EX_NEW_INDEX 100 26 | # define CRYPTO_F_CRYPTO_MEMDUP 115 27 | # define CRYPTO_F_CRYPTO_NEW_EX_DATA 112 28 | # define CRYPTO_F_CRYPTO_OCB128_COPY_CTX 121 29 | # define CRYPTO_F_CRYPTO_OCB128_INIT 122 30 | # define CRYPTO_F_CRYPTO_SET_EX_DATA 102 31 | # define CRYPTO_F_FIPS_MODE_SET 109 32 | # define CRYPTO_F_GET_AND_LOCK 113 33 | # define CRYPTO_F_OPENSSL_ATEXIT 114 34 | # define CRYPTO_F_OPENSSL_BUF2HEXSTR 117 35 | # define CRYPTO_F_OPENSSL_FOPEN 119 36 | # define CRYPTO_F_OPENSSL_HEXSTR2BUF 118 37 | # define CRYPTO_F_OPENSSL_INIT_CRYPTO 116 38 | # define CRYPTO_F_OPENSSL_LH_NEW 126 39 | # define CRYPTO_F_OPENSSL_SK_DEEP_COPY 127 40 | # define CRYPTO_F_OPENSSL_SK_DUP 128 41 | # define CRYPTO_F_PKEY_HMAC_INIT 123 42 | # define CRYPTO_F_PKEY_POLY1305_INIT 124 43 | # define CRYPTO_F_PKEY_SIPHASH_INIT 125 44 | # define CRYPTO_F_SK_RESERVE 129 45 | 46 | /* 47 | * CRYPTO reason codes. 48 | */ 49 | # define CRYPTO_R_FIPS_MODE_NOT_SUPPORTED 101 50 | # define CRYPTO_R_ILLEGAL_HEX_DIGIT 102 51 | # define CRYPTO_R_ODD_NUMBER_OF_DIGITS 103 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/cuda-crypt/modes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. 3 | * 4 | * Licensed under the OpenSSL license (the "License"). You may not use 5 | * this file except in compliance with the License. You can obtain a copy 6 | * in the file LICENSE in the source distribution or at 7 | * https://www.openssl.org/source/license.html 8 | */ 9 | 10 | #ifndef HEADER_MODES_H 11 | # define HEADER_MODES_H 12 | 13 | # include 14 | 15 | # ifdef __cplusplus 16 | extern "C" { 17 | # endif 18 | typedef void (*block128_f) (const unsigned char in[16], 19 | unsigned char out[16], const void *key); 20 | 21 | typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out, 22 | size_t len, const void *key, 23 | unsigned char ivec[16], int enc); 24 | 25 | typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out, 26 | size_t blocks, const void *key, 27 | const unsigned char ivec[16]); 28 | 29 | typedef void (*ccm128_f) (const unsigned char *in, unsigned char *out, 30 | size_t blocks, const void *key, 31 | const unsigned char ivec[16], 32 | unsigned char cmac[16]); 33 | 34 | __host__ __device__ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, 35 | uint32_t len, const void *key, 36 | unsigned char* ivec, const uint32_t* Te3); 37 | 38 | void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, 39 | size_t len, const void *key, 40 | unsigned char ivec[16], block128_f block); 41 | 42 | void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, 43 | size_t len, const void *key, 44 | unsigned char ivec[16], 45 | unsigned char ecount_buf[16], unsigned int *num, 46 | block128_f block); 47 | 48 | void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, 49 | size_t len, const void *key, 50 | unsigned char ivec[16], 51 | unsigned char ecount_buf[16], 52 | unsigned int *num, ctr128_f ctr); 53 | 54 | void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, 55 | size_t len, const void *key, 56 | unsigned char ivec[16], int *num, 57 | block128_f block); 58 | 59 | void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, 60 | size_t len, const void *key, 61 | unsigned char ivec[16], int *num, 62 | int enc, block128_f block); 63 | void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out, 64 | size_t length, const void *key, 65 | unsigned char ivec[16], int *num, 66 | int enc, block128_f block); 67 | void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out, 68 | size_t bits, const void *key, 69 | unsigned char ivec[16], int *num, 70 | int enc, block128_f block); 71 | 72 | size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, 73 | unsigned char *out, size_t len, 74 | const void *key, unsigned char ivec[16], 75 | block128_f block); 76 | size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out, 77 | size_t len, const void *key, 78 | unsigned char ivec[16], cbc128_f cbc); 79 | size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, 80 | unsigned char *out, size_t len, 81 | const void *key, unsigned char ivec[16], 82 | block128_f block); 83 | size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, 84 | size_t len, const void *key, 85 | unsigned char ivec[16], cbc128_f cbc); 86 | 87 | size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, 88 | unsigned char *out, size_t len, 89 | const void *key, 90 | unsigned char ivec[16], 91 | block128_f block); 92 | size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out, 93 | size_t len, const void *key, 94 | unsigned char ivec[16], cbc128_f cbc); 95 | size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, 96 | unsigned char *out, size_t len, 97 | const void *key, 98 | unsigned char ivec[16], 99 | block128_f block); 100 | size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, 101 | size_t len, const void *key, 102 | unsigned char ivec[16], cbc128_f cbc); 103 | 104 | typedef struct gcm128_context GCM128_CONTEXT; 105 | 106 | GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block); 107 | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block); 108 | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 109 | size_t len); 110 | int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 111 | size_t len); 112 | int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 113 | const unsigned char *in, unsigned char *out, 114 | size_t len); 115 | int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 116 | const unsigned char *in, unsigned char *out, 117 | size_t len); 118 | int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 119 | const unsigned char *in, unsigned char *out, 120 | size_t len, ctr128_f stream); 121 | int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 122 | const unsigned char *in, unsigned char *out, 123 | size_t len, ctr128_f stream); 124 | int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 125 | size_t len); 126 | void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len); 127 | void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx); 128 | 129 | typedef struct ccm128_context CCM128_CONTEXT; 130 | 131 | void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, 132 | unsigned int M, unsigned int L, void *key, 133 | block128_f block); 134 | int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, const unsigned char *nonce, 135 | size_t nlen, size_t mlen); 136 | void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, const unsigned char *aad, 137 | size_t alen); 138 | int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, const unsigned char *inp, 139 | unsigned char *out, size_t len); 140 | int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, const unsigned char *inp, 141 | unsigned char *out, size_t len); 142 | int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp, 143 | unsigned char *out, size_t len, 144 | ccm128_f stream); 145 | int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp, 146 | unsigned char *out, size_t len, 147 | ccm128_f stream); 148 | size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len); 149 | 150 | typedef struct xts128_context XTS128_CONTEXT; 151 | 152 | int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, 153 | const unsigned char iv[16], 154 | const unsigned char *inp, unsigned char *out, 155 | size_t len, int enc); 156 | 157 | size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, 158 | unsigned char *out, 159 | const unsigned char *in, size_t inlen, 160 | block128_f block); 161 | 162 | size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, 163 | unsigned char *out, 164 | const unsigned char *in, size_t inlen, 165 | block128_f block); 166 | size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv, 167 | unsigned char *out, const unsigned char *in, 168 | size_t inlen, block128_f block); 169 | size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, 170 | unsigned char *out, const unsigned char *in, 171 | size_t inlen, block128_f block); 172 | 173 | # ifndef OPENSSL_NO_OCB 174 | typedef struct ocb128_context OCB128_CONTEXT; 175 | 176 | typedef void (*ocb128_f) (const unsigned char *in, unsigned char *out, 177 | size_t blocks, const void *key, 178 | size_t start_block_num, 179 | unsigned char offset_i[16], 180 | const unsigned char L_[][16], 181 | unsigned char checksum[16]); 182 | 183 | OCB128_CONTEXT *CRYPTO_ocb128_new(void *keyenc, void *keydec, 184 | block128_f encrypt, block128_f decrypt, 185 | ocb128_f stream); 186 | int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec, 187 | block128_f encrypt, block128_f decrypt, 188 | ocb128_f stream); 189 | int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src, 190 | void *keyenc, void *keydec); 191 | int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv, 192 | size_t len, size_t taglen); 193 | int CRYPTO_ocb128_aad(OCB128_CONTEXT *ctx, const unsigned char *aad, 194 | size_t len); 195 | int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx, const unsigned char *in, 196 | unsigned char *out, size_t len); 197 | int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx, const unsigned char *in, 198 | unsigned char *out, size_t len); 199 | int CRYPTO_ocb128_finish(OCB128_CONTEXT *ctx, const unsigned char *tag, 200 | size_t len); 201 | int CRYPTO_ocb128_tag(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len); 202 | void CRYPTO_ocb128_cleanup(OCB128_CONTEXT *ctx); 203 | # endif /* OPENSSL_NO_OCB */ 204 | 205 | # ifdef __cplusplus 206 | } 207 | # endif 208 | 209 | #endif 210 | -------------------------------------------------------------------------------- /src/cuda-crypt/modes_lcl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. 3 | * 4 | * Licensed under the OpenSSL license (the "License"). You may not use 5 | * this file except in compliance with the License. You can obtain a copy 6 | * in the file LICENSE in the source distribution or at 7 | * https://www.openssl.org/source/license.html 8 | */ 9 | 10 | #include "modes.h" 11 | 12 | #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) 13 | typedef __int64 i64; 14 | typedef unsigned __int64 u64; 15 | # define U64(C) C##UI64 16 | #elif defined(__arch64__) 17 | typedef long i64; 18 | typedef unsigned long u64; 19 | # define U64(C) C##UL 20 | #else 21 | typedef long long i64; 22 | typedef unsigned long long u64; 23 | # define U64(C) C##ULL 24 | #endif 25 | 26 | typedef unsigned int u32; 27 | typedef unsigned char u8; 28 | 29 | #define STRICT_ALIGNMENT 1 30 | #ifndef PEDANTIC 31 | # if defined(__i386) || defined(__i386__) || \ 32 | defined(__x86_64) || defined(__x86_64__) || \ 33 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ 34 | defined(__aarch64__) || \ 35 | defined(__s390__) || defined(__s390x__) 36 | # undef STRICT_ALIGNMENT 37 | # endif 38 | #endif 39 | 40 | #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) 41 | 42 | # if defined(__CUDA_ARCH__) 43 | # undef STRICT_ALIGNMENT 44 | # define BSWAP4(x) __byte_perm(x, 0, 0x123) 45 | 46 | # elif defined(__GNUC__) && __GNUC__>=2 47 | 48 | # if defined(__x86_64) || defined(__x86_64__) 49 | # define BSWAP8(x) ({ u64 ret_=(x); \ 50 | asm ("bswapq %0" \ 51 | : "+r"(ret_)); ret_; }) 52 | # define BSWAP4(x) ({ u32 ret_=(x); \ 53 | asm ("bswapl %0" \ 54 | : "+r"(ret_)); ret_; }) 55 | # elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY) 56 | # define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ 57 | asm ("bswapl %0; bswapl %1" \ 58 | : "+r"(hi_),"+r"(lo_)); \ 59 | (u64)hi_<<32|lo_; }) 60 | # define BSWAP4(x) ({ u32 ret_=(x); \ 61 | asm ("bswapl %0" \ 62 | : "+r"(ret_)); ret_; }) 63 | # elif defined(__aarch64__) 64 | # define BSWAP8(x) ({ u64 ret_; \ 65 | asm ("rev %0,%1" \ 66 | : "=r"(ret_) : "r"(x)); ret_; }) 67 | # define BSWAP4(x) ({ u32 ret_; \ 68 | asm ("rev %w0,%w1" \ 69 | : "=r"(ret_) : "r"(x)); ret_; }) 70 | # elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT) 71 | # define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ 72 | asm ("rev %0,%0; rev %1,%1" \ 73 | : "+r"(hi_),"+r"(lo_)); \ 74 | (u64)hi_<<32|lo_; }) 75 | # define BSWAP4(x) ({ u32 ret_; \ 76 | asm ("rev %0,%1" \ 77 | : "=r"(ret_) : "r"((u32)(x))); \ 78 | ret_; }) 79 | # endif 80 | 81 | # elif defined(_MSC_VER) 82 | # if _MSC_VER>=1300 83 | # include 84 | # pragma intrinsic(_byteswap_uint64,_byteswap_ulong) 85 | # define BSWAP8(x) _byteswap_uint64((u64)(x)) 86 | # define BSWAP4(x) _byteswap_ulong((u32)(x)) 87 | 88 | # elif defined(_M_IX86) 89 | __inline u32 _bswap4(u32 val) 90 | { 91 | _asm mov eax, val _asm bswap eax} 92 | # define BSWAP4(x) _bswap4(x) 93 | 94 | # endif // MSC_VER > 1300 95 | # endif // def(MSC_VER) 96 | #endif 97 | 98 | #if defined(BSWAP4) && !defined(STRICT_ALIGNMENT) 99 | # define GETU32(p) BSWAP4(*(const u32 *)(p)) 100 | # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 101 | #else 102 | # define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) 103 | # define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } 104 | #endif 105 | /*- GCM definitions */ typedef struct { 106 | u64 hi, lo; 107 | } u128; 108 | 109 | #ifdef TABLE_BITS 110 | # undef TABLE_BITS 111 | #endif 112 | /* 113 | * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 114 | * never be set to 8 [or 1]. For further information see gcm128.c. 115 | */ 116 | #define TABLE_BITS 4 117 | 118 | struct gcm128_context { 119 | /* Following 6 names follow names in GCM specification */ 120 | union { 121 | u64 u[2]; 122 | u32 d[4]; 123 | u8 c[16]; 124 | size_t t[16 / sizeof(size_t)]; 125 | } Yi, EKi, EK0, len, Xi, H; 126 | /* 127 | * Relative position of Xi, H and pre-computed Htable is used in some 128 | * assembler modules, i.e. don't change the order! 129 | */ 130 | #if TABLE_BITS==8 131 | u128 Htable[256]; 132 | #else 133 | u128 Htable[16]; 134 | void (*gmult) (u64 Xi[2], const u128 Htable[16]); 135 | void (*ghash) (u64 Xi[2], const u128 Htable[16], const u8 *inp, 136 | size_t len); 137 | #endif 138 | unsigned int mres, ares; 139 | block128_f block; 140 | void *key; 141 | #if !defined(OPENSSL_SMALL_FOOTPRINT) 142 | unsigned char Xn[48]; 143 | #endif 144 | }; 145 | 146 | struct xts128_context { 147 | void *key1, *key2; 148 | block128_f block1, block2; 149 | }; 150 | 151 | struct ccm128_context { 152 | union { 153 | u64 u[2]; 154 | u8 c[16]; 155 | } nonce, cmac; 156 | u64 blocks; 157 | block128_f block; 158 | void *key; 159 | }; 160 | 161 | #ifndef OPENSSL_NO_OCB 162 | 163 | typedef union { 164 | u64 a[2]; 165 | unsigned char c[16]; 166 | } OCB_BLOCK; 167 | # define ocb_block16_xor(in1,in2,out) \ 168 | ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \ 169 | (out)->a[1]=(in1)->a[1]^(in2)->a[1] ) 170 | # if STRICT_ALIGNMENT 171 | # define ocb_block16_xor_misaligned(in1,in2,out) \ 172 | ocb_block_xor((in1)->c,(in2)->c,16,(out)->c) 173 | # else 174 | # define ocb_block16_xor_misaligned ocb_block16_xor 175 | # endif 176 | 177 | struct ocb128_context { 178 | /* Need both encrypt and decrypt key schedules for decryption */ 179 | block128_f encrypt; 180 | block128_f decrypt; 181 | void *keyenc; 182 | void *keydec; 183 | ocb128_f stream; /* direction dependent */ 184 | /* Key dependent variables. Can be reused if key remains the same */ 185 | size_t l_index; 186 | size_t max_l_index; 187 | OCB_BLOCK l_star; 188 | OCB_BLOCK l_dollar; 189 | OCB_BLOCK *l; 190 | /* Must be reset for each session */ 191 | struct { 192 | u64 blocks_hashed; 193 | u64 blocks_processed; 194 | OCB_BLOCK offset_aad; 195 | OCB_BLOCK sum; 196 | OCB_BLOCK offset; 197 | OCB_BLOCK checksum; 198 | } sess; 199 | }; 200 | #endif /* OPENSSL_NO_OCB */ 201 | -------------------------------------------------------------------------------- /src/cuda-crypt/perftime.h: -------------------------------------------------------------------------------- 1 | #ifndef PERFTIME_H 2 | #define PERFTIME_H 3 | 4 | #ifdef USE_RDTSC 5 | static inline uint64_t rdtsc() 6 | { 7 | unsigned int hi, lo; 8 | __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)); 9 | return ((uint64_t)hi << 32) | lo; 10 | } 11 | 12 | typedef struct { 13 | uint64_t count; 14 | } perftime_t; 15 | 16 | #elif defined(USE_CLOCK_GETTIME) 17 | #include 18 | typedef struct timespec perftime_t; 19 | #else 20 | #include 21 | typedef struct timeval perftime_t; 22 | #endif 23 | 24 | static int get_time(perftime_t* t) { 25 | #ifdef USE_RDTSC 26 | t->count = rdtsc(); 27 | return 0; 28 | #elif defined(USE_CLOCK_GETTIME) 29 | return clock_gettime(CLOCK_MONOTONIC_RAW, t); 30 | //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); 31 | #else 32 | return gettimeofday(t, NULL /* timezone */); 33 | #endif 34 | } 35 | 36 | static double get_us(const perftime_t* time) { 37 | #ifdef USE_RDTSC 38 | return time->count; 39 | #elif defined(USE_CLOCK_GETTIME) 40 | return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000); 41 | #else 42 | return (time->tv_usec + (double)time->tv_sec * 1000000); 43 | #endif 44 | } 45 | 46 | static double get_diff(const perftime_t* start, const perftime_t* end) { 47 | return get_us(end) - get_us(start); 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/common.cu: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMMON_CU 3 | #define COMMON_CU 4 | 5 | static uint64_t __host__ __device__ load_3(const unsigned char *in) { 6 | uint64_t result; 7 | 8 | result = (uint64_t) in[0]; 9 | result |= ((uint64_t) in[1]) << 8; 10 | result |= ((uint64_t) in[2]) << 16; 11 | 12 | return result; 13 | } 14 | 15 | static uint64_t __host__ __device__ load_4(const unsigned char *in) { 16 | uint64_t result; 17 | 18 | result = (uint64_t) in[0]; 19 | result |= ((uint64_t) in[1]) << 8; 20 | result |= ((uint64_t) in[2]) << 16; 21 | result |= ((uint64_t) in[3]) << 24; 22 | 23 | return result; 24 | } 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/ed25519.h: -------------------------------------------------------------------------------- 1 | #ifndef ED25519_H 2 | #define ED25519_H 3 | 4 | #include 5 | #include 6 | 7 | #if defined(_WIN32) 8 | #if defined(ED25519_BUILD_DLL) 9 | #define ED25519_DECLSPEC __declspec(dllexport) 10 | #elif defined(ED25519_DLL) 11 | #define ED25519_DECLSPEC __declspec(dllimport) 12 | #else 13 | #define ED25519_DECLSPEC 14 | #endif 15 | #else 16 | #define ED25519_DECLSPEC 17 | #endif 18 | 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #ifndef ED25519_NO_SEED 25 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed); 26 | #endif 27 | 28 | #define PUB_KEY_SIZE 32 29 | #define PRIV_KEY_SIZE 64 30 | #define SEED_SIZE 32 31 | #define SCALAR_SIZE 32 32 | #define SIG_SIZE 64 33 | 34 | typedef struct { 35 | uint8_t* elems; 36 | uint32_t num; 37 | } gpu_Elems; 38 | 39 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed); 40 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key); 41 | 42 | void ED25519_DECLSPEC ed25519_sign_many(const gpu_Elems* elems, 43 | uint32_t num_elems, 44 | uint32_t message_size, 45 | uint32_t total_packets, 46 | uint32_t total_signatures, 47 | const uint32_t* message_lens, 48 | const uint32_t* public_key_offsets, 49 | const uint32_t* private_key_offsets, 50 | const uint32_t* message_start_offsets, 51 | uint8_t* signatures_out, 52 | uint8_t use_non_default_stream); 53 | 54 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, uint32_t message_len, const unsigned char *public_key); 55 | 56 | void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems, 57 | uint32_t num_elems, 58 | uint32_t message_size, 59 | uint32_t total_packets, 60 | uint32_t total_signatures, 61 | const uint32_t* message_lens, 62 | const uint32_t* public_key_offsets, 63 | const uint32_t* private_key_offsets, 64 | const uint32_t* message_start_offsets, 65 | uint8_t* out, 66 | uint8_t use_non_default_stream); 67 | 68 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar); 69 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key); 70 | void ED25519_DECLSPEC ed25519_set_verbose(bool val); 71 | 72 | const char* ED25519_DECLSPEC ed25519_license(); 73 | bool ED25519_DECLSPEC ed25519_init(); 74 | 75 | int cuda_host_register(void* ptr, size_t size, unsigned int flags); 76 | int cuda_host_unregister(void* ptr); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/fe.h: -------------------------------------------------------------------------------- 1 | #ifndef FE_H 2 | #define FE_H 3 | 4 | #include "fixedint.h" 5 | 6 | 7 | /* 8 | fe means field element. 9 | Here the field is \Z/(2^255-19). 10 | An element t, entries t[0]...t[9], represents the integer 11 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 12 | Bounds on each t[i] vary depending on context. 13 | */ 14 | 15 | 16 | typedef int32_t fe[10]; 17 | 18 | 19 | void __host__ __device__ fe_0(fe h); 20 | void __device__ __host__ fe_1(fe h); 21 | 22 | void __device__ __host__ fe_frombytes(fe h, const unsigned char *s); 23 | void __device__ __host__ fe_tobytes(unsigned char *s, const fe h); 24 | 25 | void __host__ __device__ fe_copy(fe h, const fe f); 26 | int __host__ __device__ fe_isnegative(const fe f); 27 | int __device__ __host__ fe_isnonzero(const fe f); 28 | void __host__ __device__ fe_cmov(fe f, const fe g, unsigned int b); 29 | void fe_cswap(fe f, fe g, unsigned int b); 30 | 31 | void __device__ __host__ fe_neg(fe h, const fe f); 32 | void __device__ __host__ fe_add(fe h, const fe f, const fe g); 33 | void __device__ __host__ fe_invert(fe out, const fe z); 34 | void __device__ __host__ fe_sq(fe h, const fe f); 35 | void __host__ __device__ fe_sq2(fe h, const fe f); 36 | void __device__ __host__ fe_mul(fe h, const fe f, const fe g); 37 | void fe_mul121666(fe h, fe f); 38 | void __device__ __host__ fe_pow22523(fe out, const fe z); 39 | void __device__ __host__ fe_sub(fe h, const fe f, const fe g); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/fixedint.h: -------------------------------------------------------------------------------- 1 | /* 2 | Portable header to provide the 32 and 64 bits type. 3 | 4 | Not a compatible replacement for , do not blindly use it as such. 5 | */ 6 | 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED) 8 | #include 9 | #define FIXEDINT_H_INCLUDED 10 | 11 | #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C) 12 | #include 13 | #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) 14 | #endif 15 | #endif 16 | 17 | 18 | #ifndef FIXEDINT_H_INCLUDED 19 | #define FIXEDINT_H_INCLUDED 20 | 21 | #include 22 | 23 | /* (u)int32_t */ 24 | #ifndef uint32_t 25 | #if (ULONG_MAX == 0xffffffffUL) 26 | typedef unsigned long uint32_t; 27 | #elif (UINT_MAX == 0xffffffffUL) 28 | typedef unsigned int uint32_t; 29 | #elif (USHRT_MAX == 0xffffffffUL) 30 | typedef unsigned short uint32_t; 31 | #endif 32 | #endif 33 | 34 | 35 | #ifndef int32_t 36 | #if (LONG_MAX == 0x7fffffffL) 37 | typedef signed long int32_t; 38 | #elif (INT_MAX == 0x7fffffffL) 39 | typedef signed int int32_t; 40 | #elif (SHRT_MAX == 0x7fffffffL) 41 | typedef signed short int32_t; 42 | #endif 43 | #endif 44 | 45 | 46 | /* (u)int64_t */ 47 | #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L) 48 | typedef long long int64_t; 49 | typedef unsigned long long uint64_t; 50 | 51 | #define UINT64_C(v) v ##ULL 52 | #define INT64_C(v) v ##LL 53 | #elif defined(__GNUC__) 54 | __extension__ typedef long long int64_t; 55 | __extension__ typedef unsigned long long uint64_t; 56 | 57 | #define UINT64_C(v) v ##ULL 58 | #define INT64_C(v) v ##LL 59 | #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) 60 | typedef long long int64_t; 61 | typedef unsigned long long uint64_t; 62 | 63 | #define UINT64_C(v) v ##ULL 64 | #define INT64_C(v) v ##LL 65 | #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) 66 | typedef __int64 int64_t; 67 | typedef unsigned __int64 uint64_t; 68 | 69 | #define UINT64_C(v) v ##UI64 70 | #define INT64_C(v) v ##I64 71 | #endif 72 | #endif 73 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/ge.cu: -------------------------------------------------------------------------------- 1 | #include "ge.h" 2 | #include "precomp_data.h" 3 | 4 | 5 | /* 6 | r = p + q 7 | */ 8 | 9 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 10 | fe t0; 11 | fe_add(r->X, p->Y, p->X); 12 | fe_sub(r->Y, p->Y, p->X); 13 | fe_mul(r->Z, r->X, q->YplusX); 14 | fe_mul(r->Y, r->Y, q->YminusX); 15 | fe_mul(r->T, q->T2d, p->T); 16 | fe_mul(r->X, p->Z, q->Z); 17 | fe_add(t0, r->X, r->X); 18 | fe_sub(r->X, r->Z, r->Y); 19 | fe_add(r->Y, r->Z, r->Y); 20 | fe_add(r->Z, t0, r->T); 21 | fe_sub(r->T, t0, r->T); 22 | } 23 | 24 | 25 | static void __host__ __device__ slide(signed char *r, const unsigned char *a) { 26 | int i; 27 | int b; 28 | int k; 29 | 30 | for (i = 0; i < 256; ++i) { 31 | r[i] = 1 & (a[i >> 3] >> (i & 7)); 32 | } 33 | 34 | for (i = 0; i < 256; ++i) 35 | if (r[i]) { 36 | for (b = 1; b <= 6 && i + b < 256; ++b) { 37 | if (r[i + b]) { 38 | if (r[i] + (r[i + b] << b) <= 15) { 39 | r[i] += r[i + b] << b; 40 | r[i + b] = 0; 41 | } else if (r[i] - (r[i + b] << b) >= -15) { 42 | r[i] -= r[i + b] << b; 43 | 44 | for (k = i + b; k < 256; ++k) { 45 | if (!r[k]) { 46 | r[k] = 1; 47 | break; 48 | } 49 | 50 | r[k] = 0; 51 | } 52 | } else { 53 | break; 54 | } 55 | } 56 | } 57 | } 58 | } 59 | 60 | /* 61 | r = a * A + b * B 62 | where a = a[0]+256*a[1]+...+256^31 a[31]. 63 | and b = b[0]+256*b[1]+...+256^31 b[31]. 64 | B is the Ed25519 base point (x,4/5) with x positive. 65 | */ 66 | 67 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) { 68 | signed char aslide[256]; 69 | signed char bslide[256]; 70 | ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ 71 | ge_p1p1 t; 72 | ge_p3 u; 73 | ge_p3 A2; 74 | int i; 75 | slide(aslide, a); 76 | slide(bslide, b); 77 | ge_p3_to_cached(&Ai[0], A); 78 | ge_p3_dbl(&t, A); 79 | ge_p1p1_to_p3(&A2, &t); 80 | ge_add(&t, &A2, &Ai[0]); 81 | ge_p1p1_to_p3(&u, &t); 82 | ge_p3_to_cached(&Ai[1], &u); 83 | ge_add(&t, &A2, &Ai[1]); 84 | ge_p1p1_to_p3(&u, &t); 85 | ge_p3_to_cached(&Ai[2], &u); 86 | ge_add(&t, &A2, &Ai[2]); 87 | ge_p1p1_to_p3(&u, &t); 88 | ge_p3_to_cached(&Ai[3], &u); 89 | ge_add(&t, &A2, &Ai[3]); 90 | ge_p1p1_to_p3(&u, &t); 91 | ge_p3_to_cached(&Ai[4], &u); 92 | ge_add(&t, &A2, &Ai[4]); 93 | ge_p1p1_to_p3(&u, &t); 94 | ge_p3_to_cached(&Ai[5], &u); 95 | ge_add(&t, &A2, &Ai[5]); 96 | ge_p1p1_to_p3(&u, &t); 97 | ge_p3_to_cached(&Ai[6], &u); 98 | ge_add(&t, &A2, &Ai[6]); 99 | ge_p1p1_to_p3(&u, &t); 100 | ge_p3_to_cached(&Ai[7], &u); 101 | ge_p2_0(r); 102 | 103 | for (i = 255; i >= 0; --i) { 104 | if (aslide[i] || bslide[i]) { 105 | break; 106 | } 107 | } 108 | 109 | for (; i >= 0; --i) { 110 | ge_p2_dbl(&t, r); 111 | 112 | if (aslide[i] > 0) { 113 | ge_p1p1_to_p3(&u, &t); 114 | ge_add(&t, &u, &Ai[aslide[i] / 2]); 115 | } else if (aslide[i] < 0) { 116 | ge_p1p1_to_p3(&u, &t); 117 | ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); 118 | } 119 | 120 | if (bslide[i] > 0) { 121 | ge_p1p1_to_p3(&u, &t); 122 | ge_madd(&t, &u, &Bi[bslide[i] / 2]); 123 | } else if (bslide[i] < 0) { 124 | ge_p1p1_to_p3(&u, &t); 125 | ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); 126 | } 127 | 128 | ge_p1p1_to_p2(r, &t); 129 | } 130 | } 131 | 132 | 133 | static const __device__ fe d = { 134 | -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 135 | }; 136 | 137 | static const __device__ fe sqrtm1 = { 138 | -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 139 | }; 140 | 141 | int __device__ __host__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) { 142 | fe u; 143 | fe v; 144 | fe v3; 145 | fe vxx; 146 | fe check; 147 | fe_frombytes(h->Y, s); 148 | fe_1(h->Z); 149 | fe_sq(u, h->Y); 150 | fe_mul(v, u, d); 151 | fe_sub(u, u, h->Z); /* u = y^2-1 */ 152 | fe_add(v, v, h->Z); /* v = dy^2+1 */ 153 | fe_sq(v3, v); 154 | fe_mul(v3, v3, v); /* v3 = v^3 */ 155 | fe_sq(h->X, v3); 156 | fe_mul(h->X, h->X, v); 157 | fe_mul(h->X, h->X, u); /* x = uv^7 */ 158 | fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ 159 | fe_mul(h->X, h->X, v3); 160 | fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ 161 | fe_sq(vxx, h->X); 162 | fe_mul(vxx, vxx, v); 163 | fe_sub(check, vxx, u); /* vx^2-u */ 164 | 165 | if (fe_isnonzero(check)) { 166 | fe_add(check, vxx, u); /* vx^2+u */ 167 | 168 | if (fe_isnonzero(check)) { 169 | return -1; 170 | } 171 | 172 | fe_mul(h->X, h->X, sqrtm1); 173 | } 174 | 175 | if (fe_isnegative(h->X) == (s[31] >> 7)) { 176 | fe_neg(h->X, h->X); 177 | } 178 | 179 | fe_mul(h->T, h->X, h->Y); 180 | return 0; 181 | } 182 | 183 | 184 | /* 185 | r = p + q 186 | */ 187 | 188 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 189 | fe t0; 190 | fe_add(r->X, p->Y, p->X); 191 | fe_sub(r->Y, p->Y, p->X); 192 | fe_mul(r->Z, r->X, q->yplusx); 193 | fe_mul(r->Y, r->Y, q->yminusx); 194 | fe_mul(r->T, q->xy2d, p->T); 195 | fe_add(t0, p->Z, p->Z); 196 | fe_sub(r->X, r->Z, r->Y); 197 | fe_add(r->Y, r->Z, r->Y); 198 | fe_add(r->Z, t0, r->T); 199 | fe_sub(r->T, t0, r->T); 200 | } 201 | 202 | 203 | /* 204 | r = p - q 205 | */ 206 | 207 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 208 | fe t0; 209 | 210 | fe_add(r->X, p->Y, p->X); 211 | fe_sub(r->Y, p->Y, p->X); 212 | fe_mul(r->Z, r->X, q->yminusx); 213 | fe_mul(r->Y, r->Y, q->yplusx); 214 | fe_mul(r->T, q->xy2d, p->T); 215 | fe_add(t0, p->Z, p->Z); 216 | fe_sub(r->X, r->Z, r->Y); 217 | fe_add(r->Y, r->Z, r->Y); 218 | fe_sub(r->Z, t0, r->T); 219 | fe_add(r->T, t0, r->T); 220 | } 221 | 222 | 223 | /* 224 | r = p 225 | */ 226 | 227 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { 228 | fe_mul(r->X, p->X, p->T); 229 | fe_mul(r->Y, p->Y, p->Z); 230 | fe_mul(r->Z, p->Z, p->T); 231 | } 232 | 233 | 234 | 235 | /* 236 | r = p 237 | */ 238 | 239 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { 240 | fe_mul(r->X, p->X, p->T); 241 | fe_mul(r->Y, p->Y, p->Z); 242 | fe_mul(r->Z, p->Z, p->T); 243 | fe_mul(r->T, p->X, p->Y); 244 | } 245 | 246 | 247 | void __host__ __device__ ge_p2_0(ge_p2 *h) { 248 | fe_0(h->X); 249 | fe_1(h->Y); 250 | fe_1(h->Z); 251 | } 252 | 253 | 254 | 255 | /* 256 | r = 2 * p 257 | */ 258 | 259 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { 260 | fe t0; 261 | 262 | fe_sq(r->X, p->X); 263 | fe_sq(r->Z, p->Y); 264 | fe_sq2(r->T, p->Z); 265 | fe_add(r->Y, p->X, p->Y); 266 | fe_sq(t0, r->Y); 267 | fe_add(r->Y, r->Z, r->X); 268 | fe_sub(r->Z, r->Z, r->X); 269 | fe_sub(r->X, t0, r->Y); 270 | fe_sub(r->T, r->T, r->Z); 271 | } 272 | 273 | 274 | void __host__ __device__ ge_p3_0(ge_p3 *h) { 275 | fe_0(h->X); 276 | fe_1(h->Y); 277 | fe_1(h->Z); 278 | fe_0(h->T); 279 | } 280 | 281 | 282 | /* 283 | r = 2 * p 284 | */ 285 | 286 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { 287 | ge_p2 q; 288 | ge_p3_to_p2(&q, p); 289 | ge_p2_dbl(r, &q); 290 | } 291 | 292 | 293 | 294 | /* 295 | r = p 296 | */ 297 | 298 | static const __device__ fe d2 = { 299 | -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 300 | }; 301 | 302 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { 303 | fe_add(r->YplusX, p->Y, p->X); 304 | fe_sub(r->YminusX, p->Y, p->X); 305 | fe_copy(r->Z, p->Z); 306 | fe_mul(r->T2d, p->T, d2); 307 | } 308 | 309 | 310 | /* 311 | r = p 312 | */ 313 | 314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { 315 | fe_copy(r->X, p->X); 316 | fe_copy(r->Y, p->Y); 317 | fe_copy(r->Z, p->Z); 318 | } 319 | 320 | 321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) { 322 | fe recip; 323 | fe x; 324 | fe y; 325 | fe_invert(recip, h->Z); 326 | fe_mul(x, h->X, recip); 327 | fe_mul(y, h->Y, recip); 328 | fe_tobytes(s, y); 329 | s[31] ^= fe_isnegative(x) << 7; 330 | } 331 | 332 | 333 | static unsigned char __host__ __device__ equal(signed char b, signed char c) { 334 | unsigned char ub = b; 335 | unsigned char uc = c; 336 | unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ 337 | uint64_t y = x; /* 0: yes; 1..255: no */ 338 | y -= 1; /* large: yes; 0..254: no */ 339 | y >>= 63; /* 1: yes; 0: no */ 340 | return (unsigned char) y; 341 | } 342 | 343 | static unsigned char __host__ __device__ negative(signed char b) { 344 | uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ 345 | x >>= 63; /* 1: yes; 0: no */ 346 | return (unsigned char) x; 347 | } 348 | 349 | static void __host__ __device__ cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) { 350 | fe_cmov(t->yplusx, u->yplusx, b); 351 | fe_cmov(t->yminusx, u->yminusx, b); 352 | fe_cmov(t->xy2d, u->xy2d, b); 353 | } 354 | 355 | 356 | static void __host__ __device__ select(ge_precomp *t, int pos, signed char b) { 357 | ge_precomp minust; 358 | unsigned char bnegative = negative(b); 359 | unsigned char babs = b - (((-bnegative) & b) << 1); 360 | fe_1(t->yplusx); 361 | fe_1(t->yminusx); 362 | fe_0(t->xy2d); 363 | cmov(t, &base[pos][0], equal(babs, 1)); 364 | cmov(t, &base[pos][1], equal(babs, 2)); 365 | cmov(t, &base[pos][2], equal(babs, 3)); 366 | cmov(t, &base[pos][3], equal(babs, 4)); 367 | cmov(t, &base[pos][4], equal(babs, 5)); 368 | cmov(t, &base[pos][5], equal(babs, 6)); 369 | cmov(t, &base[pos][6], equal(babs, 7)); 370 | cmov(t, &base[pos][7], equal(babs, 8)); 371 | fe_copy(minust.yplusx, t->yminusx); 372 | fe_copy(minust.yminusx, t->yplusx); 373 | fe_neg(minust.xy2d, t->xy2d); 374 | cmov(t, &minust, bnegative); 375 | } 376 | 377 | /* 378 | h = a * B 379 | where a = a[0]+256*a[1]+...+256^31 a[31] 380 | B is the Ed25519 base point (x,4/5) with x positive. 381 | 382 | Preconditions: 383 | a[31] <= 127 384 | */ 385 | 386 | void __device__ __host__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a) { 387 | signed char e[64]; 388 | signed char carry; 389 | ge_p1p1 r; 390 | ge_p2 s; 391 | ge_precomp t; 392 | int i; 393 | 394 | for (i = 0; i < 32; ++i) { 395 | e[2 * i + 0] = (a[i] >> 0) & 15; 396 | e[2 * i + 1] = (a[i] >> 4) & 15; 397 | } 398 | 399 | /* each e[i] is between 0 and 15 */ 400 | /* e[63] is between 0 and 7 */ 401 | carry = 0; 402 | 403 | for (i = 0; i < 63; ++i) { 404 | e[i] += carry; 405 | carry = e[i] + 8; 406 | carry >>= 4; 407 | e[i] -= carry << 4; 408 | } 409 | 410 | e[63] += carry; 411 | /* each e[i] is between -8 and 8 */ 412 | ge_p3_0(h); 413 | 414 | for (i = 1; i < 64; i += 2) { 415 | select(&t, i / 2, e[i]); 416 | ge_madd(&r, h, &t); 417 | ge_p1p1_to_p3(h, &r); 418 | } 419 | 420 | ge_p3_dbl(&r, h); 421 | ge_p1p1_to_p2(&s, &r); 422 | ge_p2_dbl(&r, &s); 423 | ge_p1p1_to_p2(&s, &r); 424 | ge_p2_dbl(&r, &s); 425 | ge_p1p1_to_p2(&s, &r); 426 | ge_p2_dbl(&r, &s); 427 | ge_p1p1_to_p3(h, &r); 428 | 429 | for (i = 0; i < 64; i += 2) { 430 | select(&t, i / 2, e[i]); 431 | ge_madd(&r, h, &t); 432 | ge_p1p1_to_p3(h, &r); 433 | } 434 | } 435 | 436 | 437 | /* 438 | r = p - q 439 | */ 440 | 441 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 442 | fe t0; 443 | 444 | fe_add(r->X, p->Y, p->X); 445 | fe_sub(r->Y, p->Y, p->X); 446 | fe_mul(r->Z, r->X, q->YminusX); 447 | fe_mul(r->Y, r->Y, q->YplusX); 448 | fe_mul(r->T, q->T2d, p->T); 449 | fe_mul(r->X, p->Z, q->Z); 450 | fe_add(t0, r->X, r->X); 451 | fe_sub(r->X, r->Z, r->Y); 452 | fe_add(r->Y, r->Z, r->Y); 453 | fe_sub(r->Z, t0, r->T); 454 | fe_add(r->T, t0, r->T); 455 | } 456 | 457 | 458 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h) { 459 | fe recip; 460 | fe x; 461 | fe y; 462 | fe_invert(recip, h->Z); 463 | fe_mul(x, h->X, recip); 464 | fe_mul(y, h->Y, recip); 465 | fe_tobytes(s, y); 466 | s[31] ^= fe_isnegative(x) << 7; 467 | } 468 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/ge.h: -------------------------------------------------------------------------------- 1 | #ifndef GE_H 2 | #define GE_H 3 | 4 | #include "fe.h" 5 | 6 | 7 | /* 8 | ge means group element. 9 | 10 | Here the group is the set of pairs (x,y) of field elements (see fe.h) 11 | satisfying -x^2 + y^2 = 1 + d x^2y^2 12 | where d = -121665/121666. 13 | 14 | Representations: 15 | ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z 16 | ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 17 | ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 18 | ge_precomp (Duif): (y+x,y-x,2dxy) 19 | */ 20 | 21 | typedef struct { 22 | fe X; 23 | fe Y; 24 | fe Z; 25 | } ge_p2; 26 | 27 | typedef struct { 28 | fe X; 29 | fe Y; 30 | fe Z; 31 | fe T; 32 | } ge_p3; 33 | 34 | typedef struct { 35 | fe X; 36 | fe Y; 37 | fe Z; 38 | fe T; 39 | } ge_p1p1; 40 | 41 | typedef struct { 42 | fe yplusx; 43 | fe yminusx; 44 | fe xy2d; 45 | } ge_precomp; 46 | 47 | typedef struct { 48 | fe YplusX; 49 | fe YminusX; 50 | fe Z; 51 | fe T2d; 52 | } ge_cached; 53 | 54 | void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h); 55 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h); 56 | int __host__ __device__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s); 57 | 58 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 59 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 60 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b); 61 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 62 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 63 | void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a); 64 | 65 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); 66 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); 67 | void __host__ __device__ ge_p2_0(ge_p2 *h); 68 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p); 69 | void __host__ __device__ ge_p3_0(ge_p3 *h); 70 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p); 71 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p); 72 | void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p); 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/gpu_ctx.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "gpu_ctx.h" 3 | #include 4 | #include "gpu_common.h" 5 | 6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 7 | 8 | #define MAX_NUM_GPUS 8 9 | #define MAX_QUEUE_SIZE 8 10 | 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 12 | static uint32_t g_cur_gpu = 0; 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 14 | static int32_t g_total_gpus = -1; 15 | 16 | static bool cuda_crypt_init_locked() { 17 | if (g_total_gpus == -1) { 18 | cudaGetDeviceCount(&g_total_gpus); 19 | g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus); 20 | LOG("total_gpus: %d\n", g_total_gpus); 21 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 22 | CUDA_CHK(cudaSetDevice(gpu)); 23 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 24 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 25 | if (err != 0) { 26 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 27 | err, gpu, queue); 28 | g_total_gpus = 0; 29 | return false; 30 | } 31 | CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream)); 32 | } 33 | } 34 | } 35 | return g_total_gpus > 0; 36 | } 37 | 38 | bool ed25519_init() { 39 | cudaFree(0); 40 | pthread_mutex_lock(&g_ctx_mutex); 41 | bool success = cuda_crypt_init_locked(); 42 | pthread_mutex_unlock(&g_ctx_mutex); 43 | return success; 44 | } 45 | 46 | gpu_ctx_t* get_gpu_ctx() { 47 | int32_t cur_gpu, cur_queue; 48 | 49 | LOG("locking global mutex"); 50 | pthread_mutex_lock(&g_ctx_mutex); 51 | if (!cuda_crypt_init_locked()) { 52 | pthread_mutex_unlock(&g_ctx_mutex); 53 | LOG("No GPUs, exiting...\n"); 54 | return NULL; 55 | } 56 | cur_gpu = g_cur_gpu; 57 | g_cur_gpu++; 58 | g_cur_gpu %= g_total_gpus; 59 | cur_queue = g_cur_queue[cur_gpu]; 60 | g_cur_queue[cur_gpu]++; 61 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 62 | pthread_mutex_unlock(&g_ctx_mutex); 63 | 64 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 65 | LOG("locking contex mutex queue: %d gpu: %d", cur_queue, cur_gpu); 66 | pthread_mutex_lock(&cur_ctx->mutex); 67 | 68 | CUDA_CHK(cudaSetDevice(cur_gpu)); 69 | 70 | LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue); 71 | 72 | return cur_ctx; 73 | } 74 | 75 | void setup_gpu_ctx(verify_ctx_t* cur_ctx, 76 | const gpu_Elems* elems, 77 | uint32_t num_elems, 78 | uint32_t message_size, 79 | uint32_t total_packets, 80 | uint32_t total_packets_size, 81 | uint32_t total_signatures, 82 | const uint32_t* message_lens, 83 | const uint32_t* public_key_offsets, 84 | const uint32_t* signature_offsets, 85 | const uint32_t* message_start_offsets, 86 | size_t out_size, 87 | cudaStream_t stream 88 | ) { 89 | size_t offsets_size = total_signatures * sizeof(uint32_t); 90 | 91 | LOG("device allocate. packets: %d out: %d offsets_size: %zu\n", 92 | total_packets_size, (int)out_size, offsets_size); 93 | 94 | if (cur_ctx->packets == NULL || 95 | total_packets_size > cur_ctx->packets_size_bytes) { 96 | CUDA_CHK(cudaFree(cur_ctx->packets)); 97 | CUDA_CHK(cudaMalloc(&cur_ctx->packets, total_packets_size)); 98 | 99 | cur_ctx->packets_size_bytes = total_packets_size; 100 | } 101 | 102 | if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) { 103 | CUDA_CHK(cudaFree(cur_ctx->out)); 104 | CUDA_CHK(cudaMalloc(&cur_ctx->out, out_size)); 105 | 106 | cur_ctx->out_size_bytes = total_signatures; 107 | } 108 | 109 | if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) { 110 | CUDA_CHK(cudaFree(cur_ctx->public_key_offsets)); 111 | CUDA_CHK(cudaMalloc(&cur_ctx->public_key_offsets, offsets_size)); 112 | 113 | CUDA_CHK(cudaFree(cur_ctx->signature_offsets)); 114 | CUDA_CHK(cudaMalloc(&cur_ctx->signature_offsets, offsets_size)); 115 | 116 | CUDA_CHK(cudaFree(cur_ctx->message_start_offsets)); 117 | CUDA_CHK(cudaMalloc(&cur_ctx->message_start_offsets, offsets_size)); 118 | 119 | CUDA_CHK(cudaFree(cur_ctx->message_lens)); 120 | CUDA_CHK(cudaMalloc(&cur_ctx->message_lens, offsets_size)); 121 | 122 | cur_ctx->offsets_len = total_signatures; 123 | } 124 | 125 | LOG("Done alloc"); 126 | 127 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->public_key_offsets, public_key_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 128 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->signature_offsets, signature_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 129 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_start_offsets, message_start_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 130 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_lens, message_lens, offsets_size, cudaMemcpyHostToDevice, stream)); 131 | 132 | size_t cur = 0; 133 | for (size_t i = 0; i < num_elems; i++) { 134 | LOG("i: %zu size: %d\n", i, elems[i].num * message_size); 135 | CUDA_CHK(cudaMemcpyAsync(&cur_ctx->packets[cur * message_size], elems[i].elems, elems[i].num * message_size, cudaMemcpyHostToDevice, stream)); 136 | cur += elems[i].num; 137 | } 138 | } 139 | 140 | 141 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) { 142 | pthread_mutex_unlock(&cur_ctx->mutex); 143 | } 144 | 145 | void ed25519_free_gpu_mem() { 146 | for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) { 147 | for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 148 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[gpu][queue]; 149 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.packets)); 150 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.out)); 151 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_lens)); 152 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.public_key_offsets)); 153 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.private_key_offsets)); 154 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.signature_offsets)); 155 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_start_offsets)); 156 | if (cur_ctx->stream != 0) { 157 | CUDA_CHK(cudaStreamDestroy(cur_ctx->stream)); 158 | } 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/gpu_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_CTX_H 2 | #define GPU_CTX_H 3 | 4 | #include 5 | #include "ed25519.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | typedef struct { 12 | uint8_t* packets; 13 | uint32_t packets_size_bytes; 14 | 15 | uint8_t* out; 16 | size_t out_size_bytes; 17 | 18 | uint32_t* public_key_offsets; 19 | uint32_t* private_key_offsets; 20 | uint32_t* message_start_offsets; 21 | uint32_t* signature_offsets; 22 | uint32_t* message_lens; 23 | size_t offsets_len; 24 | } verify_ctx_t; 25 | 26 | typedef struct { 27 | verify_ctx_t verify_ctx; 28 | 29 | pthread_mutex_t mutex; 30 | cudaStream_t stream; 31 | } gpu_ctx_t; 32 | 33 | extern gpu_ctx_t* get_gpu_ctx(); 34 | extern void release_gpu_ctx(gpu_ctx_t*); 35 | 36 | extern void ed25519_free_gpu_mem(); 37 | 38 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx, 39 | const gpu_Elems* elems, 40 | uint32_t num_elems, 41 | uint32_t message_size, 42 | uint32_t total_packets, 43 | uint32_t total_packets_size, 44 | uint32_t total_signatures, 45 | const uint32_t* message_lens, 46 | const uint32_t* public_key_offsets, 47 | const uint32_t* signature_offsets, 48 | const uint32_t* message_start_offsets, 49 | size_t out_size, 50 | cudaStream_t stream 51 | ); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/keypair.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | 5 | 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) { 7 | ge_p3 A; 8 | 9 | sha512(seed, 32, private_key); 10 | private_key[0] &= 248; 11 | private_key[31] &= 63; 12 | private_key[31] |= 64; 13 | 14 | ge_scalarmult_base(&A, private_key); 15 | ge_p3_tobytes(public_key, &A); 16 | } 17 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Orson Peters 2 | 3 | This software is provided 'as-is', without any express or implied warranty. In no event will the 4 | authors be held liable for any damages arising from the use of this software. 5 | 6 | Permission is granted to anyone to use this software for any purpose, including commercial 7 | applications, and to alter it and redistribute it freely, subject to the following restrictions: 8 | 9 | 1. The origin of this software must not be misrepresented; you must not claim that you wrote the 10 | original software. If you use this software in a product, an acknowledgment in the product 11 | documentation would be appreciated but is not required. 12 | 13 | 2. Altered source versions must be plainly marked as such, and must not be misrepresented as 14 | being the original software. 15 | 16 | 3. This notice may not be removed or altered from any source distribution. 17 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/main.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ed25519.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "gpu_common.h" 8 | #include "gpu_ctx.h" 9 | 10 | #define USE_CLOCK_GETTIME 11 | #include "perftime.h" 12 | 13 | #define PACKET_SIZE 512 14 | 15 | typedef struct { 16 | size_t size; 17 | uint64_t num_retransmits; 18 | uint16_t addr[8]; 19 | uint16_t port; 20 | bool v6; 21 | } streamer_Meta; 22 | 23 | typedef struct { 24 | uint8_t data[PACKET_SIZE]; 25 | streamer_Meta meta; 26 | } streamer_Packet; 27 | 28 | void print_dwords(unsigned char* ptr, int size) { 29 | for (int j = 0; j < (size)/(int)sizeof(uint32_t); j++) { 30 | LOG("%x ", ((uint32_t*)ptr)[j]); 31 | } 32 | } 33 | 34 | typedef struct { 35 | uint8_t signature[SIG_SIZE]; 36 | uint8_t public_key[PUB_KEY_SIZE]; 37 | uint32_t message_len; 38 | uint8_t message[8]; 39 | } packet_t; 40 | 41 | typedef struct { 42 | gpu_Elems* elems_h; 43 | uint32_t num_elems; 44 | uint32_t total_packets; 45 | uint32_t total_signatures; 46 | uint32_t* message_lens; 47 | uint32_t* public_key_offsets; 48 | uint32_t* signature_offsets; 49 | uint32_t* message_start_offsets; 50 | uint8_t* out_h; 51 | int num_iterations; 52 | uint8_t use_non_default_stream; 53 | } verify_cpu_ctx_t; 54 | 55 | static void* verify_proc(void* ctx) { 56 | verify_cpu_ctx_t* vctx = (verify_cpu_ctx_t*)ctx; 57 | LOG("Start iterations\n"); 58 | for (int i = 0; i < vctx->num_iterations; i++) { 59 | ed25519_verify_many(&vctx->elems_h[0], 60 | vctx->num_elems, 61 | sizeof(streamer_Packet), 62 | vctx->total_packets, 63 | vctx->total_signatures, 64 | vctx->message_lens, 65 | vctx->public_key_offsets, 66 | vctx->signature_offsets, 67 | vctx->message_start_offsets, 68 | vctx->out_h, 69 | vctx->use_non_default_stream); 70 | } 71 | LOG("Done iterations\n"); 72 | return NULL; 73 | } 74 | 75 | const static bool USE_CUDA_ALLOC = true; 76 | 77 | template static void ed25519_alloc(T** ptr, size_t num) { 78 | if (USE_CUDA_ALLOC) { 79 | CUDA_CHK(cudaMallocHost(ptr, sizeof(T) * num)); 80 | } else { 81 | *ptr = (T*)calloc(sizeof(T), num); 82 | } 83 | } 84 | 85 | static void ed25519_free(void* ptr) { 86 | if (USE_CUDA_ALLOC) { 87 | CUDA_CHK(cudaFreeHost(ptr)); 88 | } else { 89 | free(ptr); 90 | } 91 | 92 | } 93 | 94 | int main(int argc, const char* argv[]) { 95 | int arg; 96 | bool verbose = false; 97 | for (arg = 1; arg < argc; arg++) { 98 | if (0 == strcmp(argv[arg], "-v")) { 99 | verbose = true; 100 | } else { 101 | break; 102 | } 103 | } 104 | 105 | if ((argc - arg) != 6) { 106 | printf("usage: %s [-v] \n", argv[0]); 107 | return 1; 108 | } 109 | 110 | ed25519_set_verbose(verbose); 111 | 112 | int num_signatures_per_elem = strtol(argv[arg++], NULL, 10); 113 | if (num_signatures_per_elem <= 0) { 114 | printf("num_signatures_per_elem should be > 0! %d\n", num_signatures_per_elem); 115 | return 1; 116 | } 117 | 118 | int num_elems = strtol(argv[arg++], NULL, 10); 119 | if (num_elems <= 0) { 120 | printf("num_elems should be > 0! %d\n", num_elems); 121 | return 1; 122 | } 123 | 124 | int num_sigs_per_packet = strtol(argv[arg++], NULL, 10); 125 | if (num_sigs_per_packet <= 0) { 126 | printf("num_sigs_per_packet should be > 0! %d\n", num_sigs_per_packet); 127 | return 1; 128 | } 129 | 130 | int num_threads = strtol(argv[arg++], NULL, 10); 131 | if (num_threads <= 0) { 132 | printf("num_threads should be > 0! %d\n", num_threads); 133 | return 1; 134 | } 135 | 136 | int num_iterations = strtol(argv[arg++], NULL, 10); 137 | if (num_iterations <= 0) { 138 | printf("num_iterations should be > 0! %d\n", num_iterations); 139 | return 1; 140 | } 141 | 142 | uint8_t use_non_default_stream = (uint8_t)strtol(argv[arg++], NULL, 10); 143 | if (use_non_default_stream != 0 && use_non_default_stream != 1) { 144 | printf("non_default_stream should be 0 or 1! %d\n", use_non_default_stream); 145 | return 1; 146 | } 147 | 148 | LOG("streamer size: %zu elems size: %zu\n", sizeof(streamer_Packet), sizeof(gpu_Elems)); 149 | 150 | std::vector vctx = std::vector(num_threads); 151 | 152 | // Host allocate 153 | unsigned char* seed_h = (unsigned char*)calloc(num_signatures_per_elem * SEED_SIZE, sizeof(uint32_t)); 154 | unsigned char* private_key_h = (unsigned char*)calloc(num_signatures_per_elem, PRIV_KEY_SIZE); 155 | unsigned char message_h[] = "abcd1234"; 156 | int message_h_len = strlen((char*)message_h); 157 | 158 | uint32_t total_signatures = num_elems * num_signatures_per_elem; 159 | 160 | uint32_t* message_lens = NULL; 161 | ed25519_alloc(&message_lens, total_signatures); 162 | 163 | uint32_t* signature_offsets = NULL; 164 | ed25519_alloc(&signature_offsets, total_signatures); 165 | 166 | uint32_t* public_key_offsets = NULL; 167 | ed25519_alloc(&public_key_offsets, total_signatures); 168 | 169 | uint32_t* message_start_offsets = NULL; 170 | ed25519_alloc(&message_start_offsets, total_signatures); 171 | 172 | for (uint32_t i = 0; i < total_signatures; i++) { 173 | uint32_t base_offset = i * sizeof(streamer_Packet); 174 | signature_offsets[i] = base_offset + offsetof(packet_t, signature); 175 | public_key_offsets[i] = base_offset + offsetof(packet_t, public_key); 176 | message_start_offsets[i] = base_offset + offsetof(packet_t, message); 177 | message_lens[i] = message_h_len; 178 | } 179 | 180 | for (int i = 0; i < num_threads; i++) { 181 | vctx[i].message_lens = message_lens; 182 | vctx[i].signature_offsets = signature_offsets; 183 | vctx[i].public_key_offsets = public_key_offsets; 184 | vctx[i].message_start_offsets = message_start_offsets; 185 | vctx[i].num_iterations = num_iterations; 186 | vctx[i].use_non_default_stream = use_non_default_stream; 187 | } 188 | 189 | streamer_Packet* packets_h = NULL; 190 | ed25519_alloc(&packets_h, num_signatures_per_elem); 191 | uint32_t total_packets = 0; 192 | 193 | gpu_Elems* elems_h = NULL; 194 | ed25519_alloc(&elems_h, num_elems); 195 | for (int i = 0; i < num_elems; i++) { 196 | elems_h[i].num = num_signatures_per_elem; 197 | elems_h[i].elems = (uint8_t*)&packets_h[0]; 198 | 199 | total_packets += num_signatures_per_elem; 200 | } 201 | 202 | LOG("initing signatures..\n"); 203 | for (int i = 0; i < num_signatures_per_elem; i++) { 204 | packet_t* packet = (packet_t*)packets_h[i].data; 205 | memcpy(packet->message, message_h, message_h_len); 206 | 207 | LOG("message_len: %d\n", 208 | message_h_len); 209 | } 210 | 211 | for (uint32_t i = 0; i < total_signatures; i++) { 212 | LOG("sig_offset: %d pub_key_offset: %d message_start_offset: %d message_len: %d\n", 213 | signature_offsets[i], public_key_offsets[i], message_start_offsets[i], message_lens[i]); 214 | } 215 | 216 | int out_size = total_signatures * sizeof(uint8_t); 217 | for (int i = 0; i < num_threads; i++) { 218 | vctx[i].num_elems = num_elems; 219 | ed25519_alloc(&vctx[i].out_h, out_size); 220 | vctx[i].elems_h = &elems_h[0]; 221 | vctx[i].total_signatures = total_signatures; 222 | vctx[i].total_packets = total_packets; 223 | } 224 | 225 | LOG("creating seed..\n"); 226 | int ret = ed25519_create_seed(seed_h); 227 | LOG("create_seed: %d\n", ret); 228 | packet_t* first_packet_h = (packet_t*)packets_h[0].data; 229 | ed25519_create_keypair(first_packet_h->public_key, private_key_h, seed_h); 230 | ed25519_sign(first_packet_h->signature, first_packet_h->message, message_h_len, first_packet_h->public_key, private_key_h); 231 | ret = ed25519_verify(first_packet_h->signature, message_h, message_h_len, first_packet_h->public_key); 232 | LOG("verify: %d\n", ret); 233 | 234 | for (int i = 1; i < num_signatures_per_elem; i++) { 235 | packet_t* packet_h = (packet_t*)packets_h[i].data; 236 | memcpy(packet_h->signature, first_packet_h->signature, SIG_SIZE); 237 | memcpy(packet_h->public_key, first_packet_h->public_key, PUB_KEY_SIZE); 238 | } 239 | 240 | for (int i = 0; i < num_signatures_per_elem; i++ ) { 241 | packet_t* packet_h = (packet_t*)packets_h[i].data; 242 | unsigned char* sig_ptr = packet_h->signature; 243 | unsigned char* messages_ptr = packet_h->message; 244 | LOG("sig:"); 245 | print_dwords(sig_ptr, SIG_SIZE); 246 | LOG("\nmessage: "); 247 | print_dwords(messages_ptr, message_h_len); 248 | LOG("\n\n"); 249 | } 250 | LOG("\n"); 251 | 252 | std::vector threads = std::vector(num_threads); 253 | pthread_attr_t attr; 254 | ret = pthread_attr_init(&attr); 255 | if (ret != 0) { 256 | LOG("ERROR: pthread_attr_init: %d\n", ret); 257 | return 1; 258 | } 259 | 260 | perftime_t start, end; 261 | get_time(&start); 262 | for (int i = 0; i < num_threads; i++) { 263 | ret = pthread_create(&threads[i], 264 | &attr, 265 | verify_proc, 266 | &vctx[i]); 267 | if (ret != 0) { 268 | LOG("ERROR: pthread_create: %d\n", ret); 269 | return 1; 270 | } 271 | } 272 | 273 | void* res = NULL; 274 | for (int i = 0; i < num_threads; i++) { 275 | ret = pthread_join(threads[i], &res); 276 | if (ret != 0) { 277 | LOG("ERROR: pthread_join: %d\n", ret); 278 | return 1; 279 | } 280 | } 281 | get_time(&end); 282 | 283 | int total = (num_threads * total_signatures * num_iterations); 284 | double diff = get_diff(&start, &end); 285 | printf("time diff: %f total: %d sigs/sec: %f\n", 286 | diff, 287 | total, 288 | (double)total / (diff / 1e6)); 289 | 290 | for (int thread = 0; thread < num_threads; thread++) { 291 | LOG("ret:\n"); 292 | bool verify_failed = false; 293 | for (int i = 0; i < out_size / (int)sizeof(uint8_t); i++) { 294 | LOG("%x ", vctx[thread].out_h[i]); 295 | if (vctx[thread].out_h[i] != 1) { 296 | verify_failed = true; 297 | } 298 | } 299 | LOG("\n"); 300 | fflush(stdout); 301 | assert(verify_failed == false); 302 | } 303 | 304 | ed25519_free(elems_h); 305 | ed25519_free(packets_h); 306 | ed25519_free(message_lens); 307 | ed25519_free(signature_offsets); 308 | ed25519_free(public_key_offsets); 309 | ed25519_free(message_start_offsets); 310 | for (int thread = 0; thread < num_threads; thread++) { 311 | ed25519_free(vctx[thread].out_h); 312 | } 313 | free(seed_h); 314 | free(private_key_h); 315 | ed25519_free_gpu_mem(); 316 | return 0; 317 | } 318 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/perftime.h: -------------------------------------------------------------------------------- 1 | #ifndef PERFTIME_H 2 | #define PERFTIME_H 3 | 4 | #ifdef USE_RDTSC 5 | static inline uint64_t rdtsc() 6 | { 7 | unsigned int hi, lo; 8 | __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)); 9 | return ((uint64_t)hi << 32) | lo; 10 | } 11 | 12 | typedef struct { 13 | uint64_t count; 14 | } perftime_t; 15 | 16 | #elif defined(USE_CLOCK_GETTIME) 17 | #include 18 | typedef struct timespec perftime_t; 19 | #else 20 | #include 21 | typedef struct timeval perftime_t; 22 | #endif 23 | 24 | static int get_time(perftime_t* t) { 25 | #ifdef USE_RDTSC 26 | t->count = rdtsc(); 27 | return 0; 28 | #elif defined(USE_CLOCK_GETTIME) 29 | return clock_gettime(CLOCK_MONOTONIC_RAW, t); 30 | //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); 31 | #else 32 | return gettimeofday(t, NULL /* timezone */); 33 | #endif 34 | } 35 | 36 | static double get_us(const perftime_t* time) { 37 | #ifdef USE_RDTSC 38 | return time->count; 39 | #elif defined(USE_CLOCK_GETTIME) 40 | return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000); 41 | #else 42 | return (time->tv_usec + (double)time->tv_sec * 1000000); 43 | #endif 44 | } 45 | 46 | static double get_diff(const perftime_t* start, const perftime_t* end) { 47 | return get_us(end) - get_us(start); 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sc.h: -------------------------------------------------------------------------------- 1 | #ifndef SC_H 2 | #define SC_H 3 | 4 | /* 5 | The set of scalars is \Z/l 6 | where l = 2^252 + 27742317777372353535851937790883648493. 7 | */ 8 | 9 | void __host__ __device__ sc_reduce(unsigned char *s); 10 | void __host__ __device__ sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/seed.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | 3 | #ifndef ED25519_NO_SEED 4 | 5 | #ifdef _WIN32 6 | #include 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | int ed25519_create_seed(unsigned char *seed) { 13 | #ifdef _WIN32 14 | HCRYPTPROV prov; 15 | 16 | if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { 17 | return 1; 18 | } 19 | 20 | if (!CryptGenRandom(prov, 32, seed)) { 21 | CryptReleaseContext(prov, 0); 22 | return 1; 23 | } 24 | 25 | CryptReleaseContext(prov, 0); 26 | #else 27 | FILE *f = fopen("/dev/urandom", "rb"); 28 | 29 | if (f == NULL) { 30 | return 1; 31 | } 32 | 33 | size_t res = fread(seed, 1, 32, f); 34 | if (res != 32) { 35 | return 1; 36 | } 37 | fclose(f); 38 | #endif 39 | 40 | return 0; 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sha512.cu: -------------------------------------------------------------------------------- 1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis 2 | * 3 | * LibTomCrypt is a library that provides various cryptographic 4 | * algorithms in a highly modular and flexible manner. 5 | * 6 | * The library is free for all purposes without any express 7 | * guarantee it works. 8 | * 9 | * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 10 | */ 11 | 12 | #include "fixedint.h" 13 | #include "sha512.h" 14 | 15 | #ifdef __CUDA_ARCH__ 16 | #define K_DEF __device__ 17 | #else 18 | #define K_DEF 19 | #endif 20 | 21 | static const uint64_t K_DEF K[80] = { 22 | UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 23 | UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc), 24 | UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 25 | UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118), 26 | UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 27 | UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2), 28 | UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 29 | UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694), 30 | UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 31 | UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65), 32 | UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 33 | UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5), 34 | UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 35 | UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4), 36 | UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 37 | UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70), 38 | UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 39 | UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df), 40 | UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 41 | UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b), 42 | UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), 43 | UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30), 44 | UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 45 | UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8), 46 | UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 47 | UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8), 48 | UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 49 | UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3), 50 | UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 51 | UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec), 52 | UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 53 | UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b), 54 | UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 55 | UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178), 56 | UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 57 | UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b), 58 | UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 59 | UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c), 60 | UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 61 | UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817) 62 | }; 63 | 64 | /* Various logical functions */ 65 | 66 | #define ROR64c(x, y) \ 67 | ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \ 68 | ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF)) 69 | 70 | #define STORE64H(x, y) \ 71 | { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ 72 | (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ 73 | (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ 74 | (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } 75 | 76 | #define LOAD64H(x, y) \ 77 | { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \ 78 | (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \ 79 | (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \ 80 | (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); } 81 | 82 | 83 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) 84 | #define Maj(x,y,z) (((x | y) & z) | (x & y)) 85 | #define S(x, n) ROR64c(x, n) 86 | #define R(x, n) (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n)) 87 | #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) 88 | #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) 89 | #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7)) 90 | #define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6)) 91 | #ifndef MIN 92 | #define MIN(x, y) ( ((x)<(y))?(x):(y) ) 93 | #endif 94 | 95 | /* compress 1024-bits */ 96 | static int __device__ __host__ sha512_compress(sha512_context *md, unsigned char *buf) 97 | { 98 | uint64_t S[8], W[80], t0, t1; 99 | int i; 100 | 101 | /* copy state into S */ 102 | for (i = 0; i < 8; i++) { 103 | S[i] = md->state[i]; 104 | } 105 | 106 | /* copy the state into 1024-bits into W[0..15] */ 107 | for (i = 0; i < 16; i++) { 108 | LOAD64H(W[i], buf + (8*i)); 109 | } 110 | 111 | /* fill W[16..79] */ 112 | for (i = 16; i < 80; i++) { 113 | W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; 114 | } 115 | 116 | /* Compress */ 117 | #define RND(a,b,c,d,e,f,g,h,i) \ 118 | t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ 119 | t1 = Sigma0(a) + Maj(a, b, c);\ 120 | d += t0; \ 121 | h = t0 + t1; 122 | 123 | for (i = 0; i < 80; i += 8) { 124 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); 125 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); 126 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); 127 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); 128 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); 129 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); 130 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); 131 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); 132 | } 133 | 134 | #undef RND 135 | 136 | 137 | 138 | /* feedback */ 139 | for (i = 0; i < 8; i++) { 140 | md->state[i] = md->state[i] + S[i]; 141 | } 142 | 143 | return 0; 144 | } 145 | 146 | 147 | /** 148 | Initialize the hash state 149 | @param md The hash state you wish to initialize 150 | @return 0 if successful 151 | */ 152 | int __device__ __host__ sha512_init(sha512_context * md) { 153 | if (md == NULL) return 1; 154 | 155 | md->curlen = 0; 156 | md->length = 0; 157 | md->state[0] = UINT64_C(0x6a09e667f3bcc908); 158 | md->state[1] = UINT64_C(0xbb67ae8584caa73b); 159 | md->state[2] = UINT64_C(0x3c6ef372fe94f82b); 160 | md->state[3] = UINT64_C(0xa54ff53a5f1d36f1); 161 | md->state[4] = UINT64_C(0x510e527fade682d1); 162 | md->state[5] = UINT64_C(0x9b05688c2b3e6c1f); 163 | md->state[6] = UINT64_C(0x1f83d9abfb41bd6b); 164 | md->state[7] = UINT64_C(0x5be0cd19137e2179); 165 | 166 | return 0; 167 | } 168 | 169 | /** 170 | Process a block of memory though the hash 171 | @param md The hash state 172 | @param in The data to hash 173 | @param inlen The length of the data (octets) 174 | @return 0 if successful 175 | */ 176 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen) 177 | { 178 | size_t n; 179 | size_t i; 180 | int err; 181 | if (md == NULL) return 1; 182 | if (in == NULL) return 1; 183 | if (md->curlen > sizeof(md->buf)) { 184 | return 1; 185 | } 186 | while (inlen > 0) { 187 | if (md->curlen == 0 && inlen >= 128) { 188 | if ((err = sha512_compress (md, (unsigned char *)in)) != 0) { 189 | return err; 190 | } 191 | md->length += 128 * 8; 192 | in += 128; 193 | inlen -= 128; 194 | } else { 195 | n = MIN(inlen, (128 - md->curlen)); 196 | 197 | for (i = 0; i < n; i++) { 198 | md->buf[i + md->curlen] = in[i]; 199 | } 200 | 201 | 202 | md->curlen += n; 203 | in += n; 204 | inlen -= n; 205 | if (md->curlen == 128) { 206 | if ((err = sha512_compress (md, md->buf)) != 0) { 207 | return err; 208 | } 209 | md->length += 8*128; 210 | md->curlen = 0; 211 | } 212 | } 213 | } 214 | return 0; 215 | } 216 | 217 | /** 218 | Terminate the hash to get the digest 219 | @param md The hash state 220 | @param out [out] The destination of the hash (64 bytes) 221 | @return 0 if successful 222 | */ 223 | int sha512_final(sha512_context * md, unsigned char *out) 224 | { 225 | int i; 226 | 227 | if (md == NULL) return 1; 228 | if (out == NULL) return 1; 229 | 230 | if (md->curlen >= sizeof(md->buf)) { 231 | return 1; 232 | } 233 | 234 | /* increase the length of the message */ 235 | md->length += md->curlen * UINT64_C(8); 236 | 237 | /* append the '1' bit */ 238 | md->buf[md->curlen++] = (unsigned char)0x80; 239 | 240 | /* if the length is currently above 112 bytes we append zeros 241 | * then compress. Then we can fall back to padding zeros and length 242 | * encoding like normal. 243 | */ 244 | if (md->curlen > 112) { 245 | while (md->curlen < 128) { 246 | md->buf[md->curlen++] = (unsigned char)0; 247 | } 248 | sha512_compress(md, md->buf); 249 | md->curlen = 0; 250 | } 251 | 252 | /* pad upto 120 bytes of zeroes 253 | * note: that from 112 to 120 is the 64 MSB of the length. We assume that you won't hash 254 | * > 2^64 bits of data... :-) 255 | */ 256 | while (md->curlen < 120) { 257 | md->buf[md->curlen++] = (unsigned char)0; 258 | } 259 | 260 | /* store length */ 261 | STORE64H(md->length, md->buf+120); 262 | sha512_compress(md, md->buf); 263 | 264 | /* copy output */ 265 | for (i = 0; i < 8; i++) { 266 | STORE64H(md->state[i], out+(8*i)); 267 | } 268 | 269 | return 0; 270 | } 271 | 272 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out) 273 | { 274 | sha512_context ctx; 275 | int ret; 276 | if ((ret = sha512_init(&ctx))) return ret; 277 | if ((ret = sha512_update(&ctx, message, message_len))) return ret; 278 | if ((ret = sha512_final(&ctx, out))) return ret; 279 | return 0; 280 | } 281 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sha512.h: -------------------------------------------------------------------------------- 1 | #ifndef SHA512_H 2 | #define SHA512_H 3 | 4 | #include 5 | 6 | #include "fixedint.h" 7 | 8 | /* state */ 9 | typedef struct sha512_context_ { 10 | uint64_t length, state[8]; 11 | size_t curlen; 12 | unsigned char buf[128]; 13 | } sha512_context; 14 | 15 | 16 | int __device__ __host__ sha512_init(sha512_context * md); 17 | int __device__ __host__ sha512_final(sha512_context * md, unsigned char *out); 18 | int __device__ __host__ sha512_update(sha512_context * md, const unsigned char *in, size_t inlen); 19 | int __device__ __host__ sha512(const unsigned char *message, size_t message_len, unsigned char *out); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sign.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | #include "gpu_common.h" 6 | #include "gpu_ctx.h" 7 | 8 | 9 | static void __device__ __host__ 10 | ed25519_sign_device(unsigned char *signature, 11 | const unsigned char *message, 12 | size_t message_len, 13 | const unsigned char *public_key, 14 | const unsigned char *private_key) { 15 | sha512_context hash; 16 | unsigned char hram[64]; 17 | unsigned char r[64]; 18 | ge_p3 R; 19 | 20 | 21 | sha512_init(&hash); 22 | sha512_update(&hash, private_key + 32, 32); 23 | sha512_update(&hash, message, message_len); 24 | sha512_final(&hash, r); 25 | 26 | sc_reduce(r); 27 | ge_scalarmult_base(&R, r); 28 | ge_p3_tobytes(signature, &R); 29 | 30 | sha512_init(&hash); 31 | sha512_update(&hash, signature, 32); 32 | sha512_update(&hash, public_key, 32); 33 | sha512_update(&hash, message, message_len); 34 | sha512_final(&hash, hram); 35 | 36 | sc_reduce(hram); 37 | sc_muladd(signature + 32, hram, private_key, r); 38 | } 39 | 40 | void ed25519_sign(unsigned char *signature, 41 | const unsigned char *message, 42 | size_t message_len, 43 | const unsigned char *public_key, 44 | const unsigned char *private_key) { 45 | ed25519_sign_device(signature, message, message_len, public_key, private_key); 46 | } 47 | 48 | 49 | 50 | __global__ void ed25519_sign_kernel(unsigned char* packets, 51 | uint32_t message_size, 52 | uint32_t* public_key_offsets, 53 | uint32_t* private_key_offsets, 54 | uint32_t* message_start_offsets, 55 | uint32_t* message_lens, 56 | size_t num_transactions, 57 | uint8_t* out) 58 | { 59 | int i = blockIdx.x * blockDim.x + threadIdx.x; 60 | if (i < num_transactions) { 61 | uint32_t message_start_offset = message_start_offsets[i]; 62 | uint32_t public_key_offset = public_key_offsets[i]; 63 | uint32_t private_key_offset = private_key_offsets[i]; 64 | uint32_t message_len = message_lens[i]; 65 | 66 | ed25519_sign_device(&out[i * SIG_SIZE], 67 | &packets[message_start_offset], 68 | message_len, 69 | &packets[public_key_offset], 70 | &packets[private_key_offset]); 71 | } 72 | } 73 | 74 | 75 | 76 | void ed25519_sign_many(const gpu_Elems* elems, 77 | uint32_t num_elems, 78 | uint32_t message_size, 79 | uint32_t total_packets, 80 | uint32_t total_signatures, 81 | const uint32_t* message_lens, 82 | const uint32_t* public_key_offsets, 83 | const uint32_t* private_key_offsets, 84 | const uint32_t* message_start_offsets, 85 | uint8_t* signatures_out, 86 | uint8_t use_non_default_stream 87 | ) { 88 | int num_threads_per_block = 64; 89 | int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block); 90 | size_t sig_out_size = SIG_SIZE * total_signatures; 91 | 92 | if (0 == total_packets) { 93 | return; 94 | } 95 | 96 | uint32_t total_packets_size = total_packets * message_size; 97 | 98 | LOG("signing %d packets sig_size: %zu message_size: %d\n", 99 | total_packets, sig_out_size, message_size); 100 | 101 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 102 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 103 | 104 | cudaStream_t stream = 0; 105 | if (0 != use_non_default_stream) { 106 | stream = gpu_ctx->stream; 107 | } 108 | 109 | setup_gpu_ctx(cur_ctx, 110 | elems, 111 | num_elems, 112 | message_size, 113 | total_packets, 114 | total_packets_size, 115 | total_signatures, 116 | message_lens, 117 | public_key_offsets, 118 | private_key_offsets, 119 | message_start_offsets, 120 | sig_out_size, 121 | stream 122 | ); 123 | 124 | LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block); 125 | ed25519_sign_kernel<<>> 126 | (cur_ctx->packets, 127 | message_size, 128 | cur_ctx->public_key_offsets, 129 | cur_ctx->signature_offsets, 130 | cur_ctx->message_start_offsets, 131 | cur_ctx->message_lens, 132 | total_signatures, 133 | cur_ctx->out); 134 | 135 | cudaError_t err = cudaMemcpyAsync(signatures_out, cur_ctx->out, sig_out_size, cudaMemcpyDeviceToHost, stream); 136 | if (err != cudaSuccess) { 137 | fprintf(stderr, "sign: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n", 138 | signatures_out, cur_ctx->out, sig_out_size, num_elems, elems); 139 | } 140 | CUDA_CHK(err); 141 | 142 | CUDA_CHK(cudaStreamSynchronize(stream)); 143 | 144 | release_gpu_ctx(gpu_ctx); 145 | } 146 | 147 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/verify.cu: -------------------------------------------------------------------------------- 1 | #include "sha512.h" 2 | #include 3 | #include 4 | #include "sc.cu" 5 | #include "fe.cu" 6 | #include "ge.cu" 7 | #include "sha512.cu" 8 | 9 | #include "ed25519.h" 10 | #include 11 | 12 | #include "gpu_common.h" 13 | #include "gpu_ctx.h" 14 | 15 | #define USE_CLOCK_GETTIME 16 | #include "perftime.h" 17 | 18 | static int __host__ __device__ consttime_equal(const unsigned char *x, const unsigned char *y) { 19 | unsigned char r = 0; 20 | 21 | r = x[0] ^ y[0]; 22 | #define F(i) r |= x[i] ^ y[i] 23 | F(1); 24 | F(2); 25 | F(3); 26 | F(4); 27 | F(5); 28 | F(6); 29 | F(7); 30 | F(8); 31 | F(9); 32 | F(10); 33 | F(11); 34 | F(12); 35 | F(13); 36 | F(14); 37 | F(15); 38 | F(16); 39 | F(17); 40 | F(18); 41 | F(19); 42 | F(20); 43 | F(21); 44 | F(22); 45 | F(23); 46 | F(24); 47 | F(25); 48 | F(26); 49 | F(27); 50 | F(28); 51 | F(29); 52 | F(30); 53 | F(31); 54 | #undef F 55 | 56 | return !r; 57 | } 58 | 59 | static int __device__ __host__ 60 | ed25519_verify_device(const unsigned char *signature, 61 | const unsigned char *message, 62 | uint32_t message_len, 63 | const unsigned char *public_key) { 64 | unsigned char h[64]; 65 | unsigned char checker[32]; 66 | sha512_context hash; 67 | ge_p3 A; 68 | ge_p2 R; 69 | 70 | if (signature[63] & 224) { 71 | return 0; 72 | } 73 | 74 | if (ge_frombytes_negate_vartime(&A, public_key) != 0) { 75 | return 0; 76 | } 77 | 78 | sha512_init(&hash); 79 | sha512_update(&hash, signature, 32); 80 | sha512_update(&hash, public_key, 32); 81 | sha512_update(&hash, message, message_len); 82 | sha512_final(&hash, h); 83 | 84 | sc_reduce(h); 85 | ge_double_scalarmult_vartime(&R, h, &A, signature + 32); 86 | ge_tobytes(checker, &R); 87 | 88 | if (!consttime_equal(checker, signature)) { 89 | return 0; 90 | } 91 | 92 | return 1; 93 | } 94 | 95 | int 96 | ed25519_verify(const unsigned char *signature, 97 | const unsigned char *message, 98 | uint32_t message_len, 99 | const unsigned char *public_key) { 100 | return ed25519_verify_device(signature, message, message_len, public_key); 101 | } 102 | 103 | __global__ void ed25519_verify_kernel(const uint8_t* packets, 104 | uint32_t message_size, 105 | uint32_t* message_lens, 106 | uint32_t* public_key_offsets, 107 | uint32_t* signature_offsets, 108 | uint32_t* message_start_offsets, 109 | size_t num_keys, 110 | uint8_t* out) 111 | { 112 | int i = blockIdx.x * blockDim.x + threadIdx.x; 113 | if (i < num_keys) { 114 | uint32_t message_start_offset = message_start_offsets[i]; 115 | uint32_t signature_offset = signature_offsets[i]; 116 | uint32_t public_key_offset = public_key_offsets[i]; 117 | uint32_t message_len = message_lens[i]; 118 | 119 | out[i] = ed25519_verify_device(&packets[signature_offset], 120 | &packets[message_start_offset], 121 | message_len, 122 | &packets[public_key_offset]); 123 | } 124 | } 125 | 126 | bool g_verbose = false; 127 | 128 | void ed25519_set_verbose(bool val) { 129 | g_verbose = val; 130 | } 131 | 132 | void ed25519_verify_many(const gpu_Elems* elems, 133 | uint32_t num_elems, 134 | uint32_t message_size, 135 | uint32_t total_packets, 136 | uint32_t total_signatures, 137 | const uint32_t* message_lens, 138 | const uint32_t* public_key_offsets, 139 | const uint32_t* signature_offsets, 140 | const uint32_t* message_start_offsets, 141 | uint8_t* out, 142 | uint8_t use_non_default_stream) 143 | { 144 | LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n", 145 | num_elems, total_signatures, total_packets, message_size); 146 | 147 | size_t out_size = total_signatures * sizeof(uint8_t); 148 | 149 | uint32_t total_packets_size = total_packets * message_size; 150 | 151 | if (0 == total_packets) { 152 | return; 153 | } 154 | 155 | // Device allocate 156 | 157 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 158 | 159 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 160 | 161 | cudaStream_t stream = 0; 162 | if (0 != use_non_default_stream) { 163 | stream = gpu_ctx->stream; 164 | } 165 | 166 | setup_gpu_ctx(cur_ctx, 167 | elems, 168 | num_elems, 169 | message_size, 170 | total_packets, 171 | total_packets_size, 172 | total_signatures, 173 | message_lens, 174 | public_key_offsets, 175 | signature_offsets, 176 | message_start_offsets, 177 | out_size, 178 | stream 179 | ); 180 | 181 | int num_threads_per_block = 64; 182 | int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block); 183 | LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p stream: %p\n", 184 | num_blocks, num_threads_per_block, (int)total_packets, out, gpu_ctx->stream); 185 | 186 | perftime_t start, end; 187 | get_time(&start); 188 | ed25519_verify_kernel<<>> 189 | (cur_ctx->packets, 190 | message_size, 191 | cur_ctx->message_lens, 192 | cur_ctx->public_key_offsets, 193 | cur_ctx->signature_offsets, 194 | cur_ctx->message_start_offsets, 195 | cur_ctx->offsets_len, 196 | cur_ctx->out); 197 | CUDA_CHK(cudaPeekAtLastError()); 198 | 199 | cudaError_t err = cudaMemcpyAsync(out, cur_ctx->out, out_size, cudaMemcpyDeviceToHost, stream); 200 | if (err != cudaSuccess) { 201 | fprintf(stderr, "verify: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n", 202 | out, cur_ctx->out, out_size, num_elems, elems); 203 | } 204 | CUDA_CHK(err); 205 | 206 | CUDA_CHK(cudaStreamSynchronize(stream)); 207 | 208 | release_gpu_ctx(gpu_ctx); 209 | 210 | get_time(&end); 211 | LOG("time diff: %f\n", get_diff(&start, &end)); 212 | } 213 | 214 | // Ensure copyright and license notice is embedded in the binary 215 | const char* ed25519_license() { 216 | return "Copyright (c) 2018 Solana Labs, Inc. " 217 | "Licensed under the Apache License, Version 2.0 " 218 | ""; 219 | } 220 | 221 | int cuda_host_register(void* ptr, size_t size, unsigned int flags) { 222 | return cudaHostRegister(ptr, size, flags); 223 | } 224 | 225 | int cuda_host_unregister(void* ptr) { 226 | return cudaHostUnregister(ptr); 227 | } 228 | -------------------------------------------------------------------------------- /src/cuda-headers/gpu_common.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef GPU_COMMON_H 5 | #define GPU_COMMON_H 6 | 7 | extern bool g_verbose; 8 | 9 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); } 10 | 11 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y)) 12 | 13 | #define CUDA_CHK(ans) { cuda_assert((ans), __FILE__, __LINE__); } 14 | 15 | inline void cuda_assert(cudaError_t err, const char *file, int line) 16 | { 17 | if (err != cudaSuccess) 18 | { 19 | fprintf(stderr,"ERR: %s %s %d\n", cudaGetErrorString(err), file, line); 20 | assert(0); 21 | } 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/cuda-poh-verify/poh_verify.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "gpu_common.h" 5 | #include "sha256.cu" 6 | 7 | #define MAX_NUM_GPUS 8 8 | #define MAX_QUEUE_SIZE 8 9 | #define NUM_THREADS_PER_BLOCK 64 10 | 11 | 12 | __global__ void poh_verify_kernel(uint8_t* hashes, uint64_t* num_hashes_arr, size_t num_elems) { 13 | size_t idx = (size_t)(blockIdx.x * blockDim.x + threadIdx.x); 14 | if (idx >= num_elems) return; 15 | 16 | uint8_t hash[SHA256_BLOCK_SIZE]; 17 | 18 | memcpy(hash, &hashes[idx * SHA256_BLOCK_SIZE], SHA256_BLOCK_SIZE); 19 | 20 | for (size_t i = 0; i < num_hashes_arr[idx]; i++) { 21 | hash_state sha_state; 22 | sha256_init(&sha_state); 23 | sha256_process(&sha_state, hash, SHA256_BLOCK_SIZE); 24 | sha256_done(&sha_state, hash); 25 | } 26 | 27 | memcpy(&hashes[idx * SHA256_BLOCK_SIZE], hash, SHA256_BLOCK_SIZE); 28 | } 29 | 30 | typedef struct { 31 | uint8_t* hashes; 32 | uint64_t* num_hashes_arr; 33 | size_t num_elems_alloc; 34 | pthread_mutex_t mutex; 35 | cudaStream_t stream; 36 | } gpu_ctx; 37 | 38 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 39 | 40 | static gpu_ctx g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 41 | static uint32_t g_cur_gpu = 0; 42 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 43 | static int32_t g_total_gpus = -1; 44 | 45 | static bool poh_init_locked() { 46 | if (g_total_gpus == -1) { 47 | cudaGetDeviceCount(&g_total_gpus); 48 | g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus); 49 | LOG("total_gpus: %d\n", g_total_gpus); 50 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 51 | CUDA_CHK(cudaSetDevice(gpu)); 52 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 53 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 54 | if (err != 0) { 55 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 56 | err, gpu, queue); 57 | g_total_gpus = 0; 58 | return false; 59 | } 60 | CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream)); 61 | } 62 | } 63 | } 64 | return g_total_gpus > 0; 65 | } 66 | 67 | bool poh_init() { 68 | cudaFree(0); 69 | pthread_mutex_lock(&g_ctx_mutex); 70 | bool success = poh_init_locked(); 71 | pthread_mutex_unlock(&g_ctx_mutex); 72 | return success; 73 | } 74 | 75 | extern "C" { 76 | int poh_verify_many(uint8_t* hashes, 77 | const uint64_t* num_hashes_arr, 78 | size_t num_elems, 79 | uint8_t use_non_default_stream) 80 | { 81 | LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems); 82 | 83 | if (num_elems == 0) return 0; 84 | 85 | int32_t cur_gpu, cur_queue; 86 | 87 | pthread_mutex_lock(&g_ctx_mutex); 88 | if (!poh_init_locked()) { 89 | pthread_mutex_unlock(&g_ctx_mutex); 90 | LOG("No GPUs, exiting...\n"); 91 | return 1; 92 | } 93 | cur_gpu = g_cur_gpu; 94 | g_cur_gpu++; 95 | g_cur_gpu %= g_total_gpus; 96 | cur_queue = g_cur_queue[cur_gpu]; 97 | g_cur_queue[cur_gpu]++; 98 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 99 | pthread_mutex_unlock(&g_ctx_mutex); 100 | 101 | gpu_ctx* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 102 | pthread_mutex_lock(&cur_ctx->mutex); 103 | 104 | CUDA_CHK(cudaSetDevice(cur_gpu)); 105 | 106 | LOG("cur gpu: %d cur queue: %d\n", cur_gpu, cur_queue); 107 | 108 | size_t hashes_size = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t); 109 | size_t num_hashes_size = num_elems * sizeof(uint64_t); 110 | 111 | // Ensure there is enough memory allocated 112 | if (cur_ctx->hashes == NULL || cur_ctx->num_elems_alloc < num_elems) { 113 | CUDA_CHK(cudaFree(cur_ctx->hashes)); 114 | CUDA_CHK(cudaMalloc(&cur_ctx->hashes, hashes_size)); 115 | CUDA_CHK(cudaFree(cur_ctx->num_hashes_arr)); 116 | CUDA_CHK(cudaMalloc(&cur_ctx->num_hashes_arr, num_hashes_size)); 117 | 118 | cur_ctx->num_elems_alloc = num_elems; 119 | } 120 | 121 | cudaStream_t stream = 0; 122 | if (0 != use_non_default_stream) { 123 | stream = cur_ctx->stream; 124 | } 125 | 126 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->hashes, hashes, hashes_size, cudaMemcpyHostToDevice, stream)); 127 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->num_hashes_arr, num_hashes_arr, num_hashes_size, cudaMemcpyHostToDevice, stream)); 128 | 129 | int num_blocks = ROUND_UP_DIV(num_elems, NUM_THREADS_PER_BLOCK); 130 | 131 | poh_verify_kernel<<>>(cur_ctx->hashes, cur_ctx->num_hashes_arr, num_elems); 132 | CUDA_CHK(cudaPeekAtLastError()); 133 | 134 | CUDA_CHK(cudaMemcpyAsync(hashes, cur_ctx->hashes, hashes_size, cudaMemcpyDeviceToHost, stream)); 135 | 136 | CUDA_CHK(cudaStreamSynchronize(stream)); 137 | 138 | pthread_mutex_unlock(&cur_ctx->mutex); 139 | 140 | return 0; 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/cuda-sha256/sha256.cu: -------------------------------------------------------------------------------- 1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis 2 | * 3 | * LibTomCrypt is a library that provides various cryptographic 4 | * algorithms in a highly modular and flexible manner. 5 | * 6 | * The library is free for all purposes without any express 7 | * guarantee it works. 8 | */ 9 | 10 | /** 11 | @file sha256.c 12 | LTC_SHA256 by Tom St Denis 13 | */ 14 | 15 | #include "tomcrypt_macros.h" 16 | 17 | 18 | #define SHA256_BLOCK_SIZE 32 19 | 20 | struct sha256_state { 21 | ulong64 length; 22 | ulong32 state[8], curlen; 23 | unsigned char buf[64]; 24 | }; 25 | 26 | typedef struct { 27 | struct sha256_state sha256; 28 | } hash_state; 29 | 30 | #ifdef LTC_SMALL_CODE 31 | /* the K array */ 32 | static const ulong32 K[64] = { 33 | 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, 34 | 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL, 35 | 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 36 | 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 37 | 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL, 38 | 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, 39 | 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 40 | 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, 41 | 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, 42 | 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL, 43 | 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 44 | 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 45 | 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL 46 | }; 47 | #endif 48 | 49 | /* Various logical functions */ 50 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) 51 | #define Maj(x,y,z) (((x | y) & z) | (x & y)) 52 | #define S(x, n) RORc((x),(n)) 53 | #define R(x, n) (((x)&0xFFFFFFFFUL)>>(n)) 54 | #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) 55 | #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) 56 | #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) 57 | #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) 58 | 59 | /* compress 512-bits */ 60 | #ifdef LTC_CLEAN_STACK 61 | static int _sha256_compress(hash_state * md, const unsigned char *buf) 62 | #else 63 | static int __host__ __device__ sha256_compress(hash_state * md, const unsigned char *buf) 64 | #endif 65 | { 66 | ulong32 S[8], W[64], t0, t1; 67 | #ifdef LTC_SMALL_CODE 68 | ulong32 t; 69 | #endif 70 | int i; 71 | 72 | /* copy state into S */ 73 | for (i = 0; i < 8; i++) { 74 | S[i] = md->sha256.state[i]; 75 | } 76 | 77 | /* copy the state into 512-bits into W[0..15] */ 78 | for (i = 0; i < 16; i++) { 79 | LOAD32H(W[i], buf + (4*i)); 80 | } 81 | 82 | /* fill W[16..63] */ 83 | for (i = 16; i < 64; i++) { 84 | W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; 85 | } 86 | 87 | /* Compress */ 88 | #ifdef LTC_SMALL_CODE 89 | #define RND(a,b,c,d,e,f,g,h,i) \ 90 | t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ 91 | t1 = Sigma0(a) + Maj(a, b, c); \ 92 | d += t0; \ 93 | h = t0 + t1; 94 | 95 | for (i = 0; i < 64; ++i) { 96 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i); 97 | t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; 98 | S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t; 99 | } 100 | #else 101 | #define RND(a,b,c,d,e,f,g,h,i,ki) \ 102 | t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \ 103 | t1 = Sigma0(a) + Maj(a, b, c); \ 104 | d += t0; \ 105 | h = t0 + t1; 106 | 107 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98); 108 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491); 109 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf); 110 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5); 111 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b); 112 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1); 113 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4); 114 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5); 115 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98); 116 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01); 117 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be); 118 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3); 119 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74); 120 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe); 121 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7); 122 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174); 123 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1); 124 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786); 125 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6); 126 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc); 127 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f); 128 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa); 129 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc); 130 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da); 131 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152); 132 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d); 133 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8); 134 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7); 135 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3); 136 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147); 137 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351); 138 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967); 139 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85); 140 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138); 141 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc); 142 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13); 143 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354); 144 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb); 145 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e); 146 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85); 147 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1); 148 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b); 149 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70); 150 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3); 151 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819); 152 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624); 153 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585); 154 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070); 155 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116); 156 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08); 157 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c); 158 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5); 159 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3); 160 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a); 161 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f); 162 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3); 163 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee); 164 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f); 165 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814); 166 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208); 167 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa); 168 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb); 169 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7); 170 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2); 171 | 172 | #undef RND 173 | 174 | #endif 175 | 176 | /* feedback */ 177 | for (i = 0; i < 8; i++) { 178 | md->sha256.state[i] = md->sha256.state[i] + S[i]; 179 | } 180 | return CRYPT_OK; 181 | } 182 | 183 | #ifdef LTC_CLEAN_STACK 184 | static int sha256_compress(hash_state * md, const unsigned char *buf) 185 | { 186 | int err; 187 | err = _sha256_compress(md, buf); 188 | burn_stack(sizeof(ulong32) * 74); 189 | return err; 190 | } 191 | #endif 192 | 193 | /** 194 | Initialize the hash state 195 | @param md The hash state you wish to initialize 196 | @return CRYPT_OK if successful 197 | */ 198 | inline int __host__ __device__ sha256_init(hash_state * md) 199 | { 200 | LTC_ARGCHK(md != NULL); 201 | 202 | md->sha256.curlen = 0; 203 | md->sha256.length = 0; 204 | md->sha256.state[0] = 0x6A09E667UL; 205 | md->sha256.state[1] = 0xBB67AE85UL; 206 | md->sha256.state[2] = 0x3C6EF372UL; 207 | md->sha256.state[3] = 0xA54FF53AUL; 208 | md->sha256.state[4] = 0x510E527FUL; 209 | md->sha256.state[5] = 0x9B05688CUL; 210 | md->sha256.state[6] = 0x1F83D9ABUL; 211 | md->sha256.state[7] = 0x5BE0CD19UL; 212 | return CRYPT_OK; 213 | } 214 | 215 | /** 216 | Process a block of memory though the hash 217 | @param md The hash state 218 | @param in The data to hash 219 | @param inlen The length of the data (octets) 220 | @return CRYPT_OK if successful 221 | */ 222 | inline HASH_PROCESS(sha256_process, sha256_compress, sha256, 64) 223 | 224 | /** 225 | Terminate the hash to get the digest 226 | @param md The hash state 227 | @param out [out] The destination of the hash (32 bytes) 228 | @return CRYPT_OK if successful 229 | */ 230 | inline int __host__ __device__ sha256_done(hash_state * md, unsigned char *out) 231 | { 232 | int i; 233 | 234 | LTC_ARGCHK(md != NULL); 235 | LTC_ARGCHK(out != NULL); 236 | 237 | if (md->sha256.curlen >= sizeof(md->sha256.buf)) { 238 | return CRYPT_INVALID_ARG; 239 | } 240 | 241 | 242 | /* increase the length of the message */ 243 | md->sha256.length += md->sha256.curlen * 8; 244 | 245 | /* append the '1' bit */ 246 | md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80; 247 | 248 | /* if the length is currently above 56 bytes we append zeros 249 | * then compress. Then we can fall back to padding zeros and length 250 | * encoding like normal. 251 | */ 252 | if (md->sha256.curlen > 56) { 253 | while (md->sha256.curlen < 64) { 254 | md->sha256.buf[md->sha256.curlen++] = (unsigned char)0; 255 | } 256 | sha256_compress(md, md->sha256.buf); 257 | md->sha256.curlen = 0; 258 | } 259 | 260 | /* pad upto 56 bytes of zeroes */ 261 | while (md->sha256.curlen < 56) { 262 | md->sha256.buf[md->sha256.curlen++] = (unsigned char)0; 263 | } 264 | 265 | /* store length */ 266 | STORE64H(md->sha256.length, md->sha256.buf+56); 267 | sha256_compress(md, md->sha256.buf); 268 | 269 | /* copy output */ 270 | for (i = 0; i < 8; i++) { 271 | STORE32H(md->sha256.state[i], out+(4*i)); 272 | } 273 | #ifdef LTC_CLEAN_STACK 274 | zeromem(md, sizeof(hash_state)); 275 | #endif 276 | return CRYPT_OK; 277 | } 278 | 279 | /** 280 | Self-test the hash 281 | @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled 282 | */ 283 | #if 0 284 | int sha256_test(void) 285 | { 286 | #ifndef LTC_TEST 287 | return CRYPT_NOP; 288 | #else 289 | static const struct { 290 | const char *msg; 291 | unsigned char hash[32]; 292 | } tests[] = { 293 | { "abc", 294 | { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 295 | 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, 296 | 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 297 | 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad } 298 | }, 299 | { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 300 | { 0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8, 301 | 0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39, 302 | 0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67, 303 | 0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1 } 304 | }, 305 | }; 306 | 307 | int i; 308 | unsigned char tmp[32]; 309 | hash_state md; 310 | 311 | for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { 312 | sha256_init(&md); 313 | sha256_process(&md, (unsigned char*)tests[i].msg, (unsigned long)strlen(tests[i].msg)); 314 | sha256_done(&md, tmp); 315 | if (compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA256", i)) { 316 | return CRYPT_FAIL_TESTVECTOR; 317 | } 318 | } 319 | return CRYPT_OK; 320 | #endif 321 | } 322 | #endif 323 | 324 | 325 | 326 | /* ref: $Format:%D$ */ 327 | /* git commit: $Format:%H$ */ 328 | /* commit time: $Format:%ai$ */ 329 | -------------------------------------------------------------------------------- /src/gpu-common.mk: -------------------------------------------------------------------------------- 1 | NVCC:=nvcc 2 | GPU_PTX_ARCH:=compute_35 3 | GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70 4 | GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH) 5 | CFLAGS_release:=--ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "-Wall -Werror -fPIC -Wno-strict-aliasing" 6 | CFLAGS_debug:=$(CFLAGS_release) -g 7 | CFLAGS:=$(CFLAGS_$V) 8 | -------------------------------------------------------------------------------- /src/jerasure-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jerasure-sys" 3 | description = "Rust bindings for jerasure 2.0" 4 | version = "0.1.0" 5 | homepage = "https://solana.com/" 6 | readme = "../jerasure/README" 7 | repository = "https://github.com/solana-labs/solana-perf-libs" 8 | authors = ["Solana Maintainers "] 9 | license = "../jerasure/COPYING" 10 | links = "Jerasure" 11 | build = "build.rs" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /src/jerasure-sys/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cc; 2 | 3 | fn main() { 4 | cc::Build::new() 5 | .files(&[ 6 | "jerasure/src/galois.c", 7 | "jerasure/src/jerasure.c", 8 | "jerasure/src/reed_sol.c", 9 | "jerasure/src/cauchy.c", 10 | "jerasure/src/liberation.c", 11 | ]) 12 | .include("jerasure/include") 13 | .include("gf-complete/include") 14 | .compile("Jerasure"); 15 | println!("cargo:rustc-link-lib=static=Jerasure"); 16 | } 17 | -------------------------------------------------------------------------------- /src/jerasure-sys/gf-complete: -------------------------------------------------------------------------------- 1 | ../gf-complete/ -------------------------------------------------------------------------------- /src/jerasure-sys/jerasure: -------------------------------------------------------------------------------- 1 | ../jerasure -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | 3 | C_Flags := -O2 -fpic -I. 4 | 5 | SGX_C_Flags := -Wno-implicit-function-declaration -std=c11 -m64 -O2 -nostdinc -DSGX_COMPAT -fpie -fstack-protector \ 6 | -IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf -I. 7 | 8 | C_Files := $(wildcard *.c) 9 | 10 | OUT ?= libs 11 | 12 | SGX_OBJ := sgxobj 13 | SGX_C_Objects := $(C_Files:%.c=$(SGX_OBJ)/%.o) 14 | 15 | NONSGX_OBJ := nonsgxobj 16 | NONSGX_C_Objects := $(C_Files:%.c=$(NONSGX_OBJ)/%.o) 17 | 18 | .PHONY: all run 19 | all: $(OUT)/libed25519.sgx.static.a $(OUT)/libed25519.static.a 20 | run: all 21 | 22 | $(SGX_OBJ)/%.o: %.c 23 | @echo "CC <= $<" 24 | @mkdir -p $(SGX_OBJ) 25 | $(CC) $(SGX_C_Flags) -c $< -o $@ 26 | 27 | $(NONSGX_OBJ)/%.o: %.c 28 | @echo "CC <= $<" 29 | @mkdir -p $(NONSGX_OBJ) 30 | $(CC) $(C_Flags) -c $< -o $@ 31 | 32 | $(OUT)/libed25519.sgx.static.a: $(SGX_C_Objects) 33 | @mkdir -p $(OUT) 34 | ar rcs $@ $^ 35 | 36 | $(OUT)/libed25519.static.a: $(NONSGX_C_Objects) 37 | @mkdir -p $(OUT) 38 | ar rcs $@ $^ 39 | 40 | clean: 41 | @rm -rf $(SGX_OBJ) $(NONSGX_OBJ) $(OUT) 42 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/add_scalar.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "ge.h" 3 | #include "sc.h" 4 | #include "sha512.h" 5 | 6 | 7 | /* see http://crypto.stackexchange.com/a/6215/4697 */ 8 | void ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar) { 9 | const unsigned char SC_1[32] = {1}; /* scalar with value 1 */ 10 | 11 | unsigned char n[32]; 12 | ge_p3 nB; 13 | ge_p1p1 A_p1p1; 14 | ge_p3 A; 15 | ge_p3 public_key_unpacked; 16 | ge_cached T; 17 | 18 | sha512_context hash; 19 | unsigned char hashbuf[64]; 20 | 21 | int i; 22 | 23 | /* copy the scalar and clear highest bit */ 24 | for (i = 0; i < 31; ++i) { 25 | n[i] = scalar[i]; 26 | } 27 | n[31] = scalar[31] & 127; 28 | 29 | /* private key: a = n + t */ 30 | if (private_key) { 31 | sc_muladd(private_key, SC_1, n, private_key); 32 | 33 | // https://github.com/orlp/ed25519/issues/3 34 | sha512_init(&hash); 35 | sha512_update(&hash, private_key + 32, 32); 36 | sha512_update(&hash, scalar, 32); 37 | sha512_final(&hash, hashbuf); 38 | for (i = 0; i < 32; ++i) { 39 | private_key[32 + i] = hashbuf[i]; 40 | } 41 | } 42 | 43 | /* public key: A = nB + T */ 44 | if (public_key) { 45 | /* if we know the private key we don't need a point addition, which is faster */ 46 | /* using a "timing attack" you could find out wether or not we know the private 47 | key, but this information seems rather useless - if this is important pass 48 | public_key and private_key seperately in 2 function calls */ 49 | if (private_key) { 50 | ge_scalarmult_base(&A, private_key); 51 | } else { 52 | /* unpack public key into T */ 53 | ge_frombytes_negate_vartime(&public_key_unpacked, public_key); 54 | fe_neg(public_key_unpacked.X, public_key_unpacked.X); /* undo negate */ 55 | fe_neg(public_key_unpacked.T, public_key_unpacked.T); /* undo negate */ 56 | ge_p3_to_cached(&T, &public_key_unpacked); 57 | 58 | /* calculate n*B */ 59 | ge_scalarmult_base(&nB, n); 60 | 61 | /* A = n*B + T */ 62 | ge_add(&A_p1p1, &nB, &T); 63 | ge_p1p1_to_p3(&A, &A_p1p1); 64 | } 65 | 66 | /* pack public key */ 67 | ge_p3_tobytes(public_key, &A); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | pwd=$PWD 5 | cd "$(dirname "$0")" 6 | 7 | echo --- Build 8 | ( 9 | set -x 10 | make OUT="$pwd"/libs 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ed25519.h: -------------------------------------------------------------------------------- 1 | #ifndef ED25519_H 2 | #define ED25519_H 3 | 4 | #include 5 | 6 | #if defined(_WIN32) 7 | #if defined(ED25519_BUILD_DLL) 8 | #define ED25519_DECLSPEC __declspec(dllexport) 9 | #elif defined(ED25519_DLL) 10 | #define ED25519_DECLSPEC __declspec(dllimport) 11 | #else 12 | #define ED25519_DECLSPEC 13 | #endif 14 | #else 15 | #define ED25519_DECLSPEC 16 | #endif 17 | 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | #ifndef ED25519_NO_SEED 24 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed); 25 | #endif 26 | 27 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed); 28 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key); 29 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key); 30 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar); 31 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key); 32 | 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/fe.h: -------------------------------------------------------------------------------- 1 | #ifndef FE_H 2 | #define FE_H 3 | 4 | #include "fixedint.h" 5 | 6 | 7 | /* 8 | fe means field element. 9 | Here the field is \Z/(2^255-19). 10 | An element t, entries t[0]...t[9], represents the integer 11 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 12 | Bounds on each t[i] vary depending on context. 13 | */ 14 | 15 | 16 | typedef int32_t fe[10]; 17 | 18 | 19 | void fe_0(fe h); 20 | void fe_1(fe h); 21 | 22 | void fe_frombytes(fe h, const unsigned char *s); 23 | void fe_tobytes(unsigned char *s, const fe h); 24 | 25 | void fe_copy(fe h, const fe f); 26 | int fe_isnegative(const fe f); 27 | int fe_isnonzero(const fe f); 28 | void fe_cmov(fe f, const fe g, unsigned int b); 29 | void fe_cswap(fe f, fe g, unsigned int b); 30 | 31 | void fe_neg(fe h, const fe f); 32 | void fe_add(fe h, const fe f, const fe g); 33 | void fe_invert(fe out, const fe z); 34 | void fe_sq(fe h, const fe f); 35 | void fe_sq2(fe h, const fe f); 36 | void fe_mul(fe h, const fe f, const fe g); 37 | void fe_mul121666(fe h, fe f); 38 | void fe_pow22523(fe out, const fe z); 39 | void fe_sub(fe h, const fe f, const fe g); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/fixedint.h: -------------------------------------------------------------------------------- 1 | /* 2 | Portable header to provide the 32 and 64 bits type. 3 | 4 | Not a compatible replacement for , do not blindly use it as such. 5 | */ 6 | 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED) 8 | #include 9 | #define FIXEDINT_H_INCLUDED 10 | 11 | #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C) 12 | #include 13 | #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) 14 | #endif 15 | #endif 16 | 17 | 18 | #ifndef FIXEDINT_H_INCLUDED 19 | #define FIXEDINT_H_INCLUDED 20 | 21 | #include 22 | 23 | /* (u)int32_t */ 24 | #ifndef uint32_t 25 | #if (ULONG_MAX == 0xffffffffUL) 26 | typedef unsigned long uint32_t; 27 | #elif (UINT_MAX == 0xffffffffUL) 28 | typedef unsigned int uint32_t; 29 | #elif (USHRT_MAX == 0xffffffffUL) 30 | typedef unsigned short uint32_t; 31 | #endif 32 | #endif 33 | 34 | 35 | #ifndef int32_t 36 | #if (LONG_MAX == 0x7fffffffL) 37 | typedef signed long int32_t; 38 | #elif (INT_MAX == 0x7fffffffL) 39 | typedef signed int int32_t; 40 | #elif (SHRT_MAX == 0x7fffffffL) 41 | typedef signed short int32_t; 42 | #endif 43 | #endif 44 | 45 | 46 | /* (u)int64_t */ 47 | #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L) 48 | typedef long long int64_t; 49 | typedef unsigned long long uint64_t; 50 | 51 | #define UINT64_C(v) v ##ULL 52 | #define INT64_C(v) v ##LL 53 | #elif defined(__GNUC__) 54 | __extension__ typedef long long int64_t; 55 | __extension__ typedef unsigned long long uint64_t; 56 | 57 | #define UINT64_C(v) v ##ULL 58 | #define INT64_C(v) v ##LL 59 | #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) 60 | typedef long long int64_t; 61 | typedef unsigned long long uint64_t; 62 | 63 | #define UINT64_C(v) v ##ULL 64 | #define INT64_C(v) v ##LL 65 | #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) 66 | typedef __int64 int64_t; 67 | typedef unsigned __int64 uint64_t; 68 | 69 | #define UINT64_C(v) v ##UI64 70 | #define INT64_C(v) v ##I64 71 | #endif 72 | #endif 73 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ge.c: -------------------------------------------------------------------------------- 1 | #include "ge.h" 2 | #include "precomp_data.h" 3 | 4 | 5 | /* 6 | r = p + q 7 | */ 8 | 9 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 10 | fe t0; 11 | fe_add(r->X, p->Y, p->X); 12 | fe_sub(r->Y, p->Y, p->X); 13 | fe_mul(r->Z, r->X, q->YplusX); 14 | fe_mul(r->Y, r->Y, q->YminusX); 15 | fe_mul(r->T, q->T2d, p->T); 16 | fe_mul(r->X, p->Z, q->Z); 17 | fe_add(t0, r->X, r->X); 18 | fe_sub(r->X, r->Z, r->Y); 19 | fe_add(r->Y, r->Z, r->Y); 20 | fe_add(r->Z, t0, r->T); 21 | fe_sub(r->T, t0, r->T); 22 | } 23 | 24 | 25 | static void slide(signed char *r, const unsigned char *a) { 26 | int i; 27 | int b; 28 | int k; 29 | 30 | for (i = 0; i < 256; ++i) { 31 | r[i] = 1 & (a[i >> 3] >> (i & 7)); 32 | } 33 | 34 | for (i = 0; i < 256; ++i) 35 | if (r[i]) { 36 | for (b = 1; b <= 6 && i + b < 256; ++b) { 37 | if (r[i + b]) { 38 | if (r[i] + (r[i + b] << b) <= 15) { 39 | r[i] += r[i + b] << b; 40 | r[i + b] = 0; 41 | } else if (r[i] - (r[i + b] << b) >= -15) { 42 | r[i] -= r[i + b] << b; 43 | 44 | for (k = i + b; k < 256; ++k) { 45 | if (!r[k]) { 46 | r[k] = 1; 47 | break; 48 | } 49 | 50 | r[k] = 0; 51 | } 52 | } else { 53 | break; 54 | } 55 | } 56 | } 57 | } 58 | } 59 | 60 | /* 61 | r = a * A + b * B 62 | where a = a[0]+256*a[1]+...+256^31 a[31]. 63 | and b = b[0]+256*b[1]+...+256^31 b[31]. 64 | B is the Ed25519 base point (x,4/5) with x positive. 65 | */ 66 | 67 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) { 68 | signed char aslide[256]; 69 | signed char bslide[256]; 70 | ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ 71 | ge_p1p1 t; 72 | ge_p3 u; 73 | ge_p3 A2; 74 | int i; 75 | slide(aslide, a); 76 | slide(bslide, b); 77 | ge_p3_to_cached(&Ai[0], A); 78 | ge_p3_dbl(&t, A); 79 | ge_p1p1_to_p3(&A2, &t); 80 | ge_add(&t, &A2, &Ai[0]); 81 | ge_p1p1_to_p3(&u, &t); 82 | ge_p3_to_cached(&Ai[1], &u); 83 | ge_add(&t, &A2, &Ai[1]); 84 | ge_p1p1_to_p3(&u, &t); 85 | ge_p3_to_cached(&Ai[2], &u); 86 | ge_add(&t, &A2, &Ai[2]); 87 | ge_p1p1_to_p3(&u, &t); 88 | ge_p3_to_cached(&Ai[3], &u); 89 | ge_add(&t, &A2, &Ai[3]); 90 | ge_p1p1_to_p3(&u, &t); 91 | ge_p3_to_cached(&Ai[4], &u); 92 | ge_add(&t, &A2, &Ai[4]); 93 | ge_p1p1_to_p3(&u, &t); 94 | ge_p3_to_cached(&Ai[5], &u); 95 | ge_add(&t, &A2, &Ai[5]); 96 | ge_p1p1_to_p3(&u, &t); 97 | ge_p3_to_cached(&Ai[6], &u); 98 | ge_add(&t, &A2, &Ai[6]); 99 | ge_p1p1_to_p3(&u, &t); 100 | ge_p3_to_cached(&Ai[7], &u); 101 | ge_p2_0(r); 102 | 103 | for (i = 255; i >= 0; --i) { 104 | if (aslide[i] || bslide[i]) { 105 | break; 106 | } 107 | } 108 | 109 | for (; i >= 0; --i) { 110 | ge_p2_dbl(&t, r); 111 | 112 | if (aslide[i] > 0) { 113 | ge_p1p1_to_p3(&u, &t); 114 | ge_add(&t, &u, &Ai[aslide[i] / 2]); 115 | } else if (aslide[i] < 0) { 116 | ge_p1p1_to_p3(&u, &t); 117 | ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); 118 | } 119 | 120 | if (bslide[i] > 0) { 121 | ge_p1p1_to_p3(&u, &t); 122 | ge_madd(&t, &u, &Bi[bslide[i] / 2]); 123 | } else if (bslide[i] < 0) { 124 | ge_p1p1_to_p3(&u, &t); 125 | ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); 126 | } 127 | 128 | ge_p1p1_to_p2(r, &t); 129 | } 130 | } 131 | 132 | 133 | static const fe d = { 134 | -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 135 | }; 136 | 137 | static const fe sqrtm1 = { 138 | -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 139 | }; 140 | 141 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) { 142 | fe u; 143 | fe v; 144 | fe v3; 145 | fe vxx; 146 | fe check; 147 | fe_frombytes(h->Y, s); 148 | fe_1(h->Z); 149 | fe_sq(u, h->Y); 150 | fe_mul(v, u, d); 151 | fe_sub(u, u, h->Z); /* u = y^2-1 */ 152 | fe_add(v, v, h->Z); /* v = dy^2+1 */ 153 | fe_sq(v3, v); 154 | fe_mul(v3, v3, v); /* v3 = v^3 */ 155 | fe_sq(h->X, v3); 156 | fe_mul(h->X, h->X, v); 157 | fe_mul(h->X, h->X, u); /* x = uv^7 */ 158 | fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ 159 | fe_mul(h->X, h->X, v3); 160 | fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ 161 | fe_sq(vxx, h->X); 162 | fe_mul(vxx, vxx, v); 163 | fe_sub(check, vxx, u); /* vx^2-u */ 164 | 165 | if (fe_isnonzero(check)) { 166 | fe_add(check, vxx, u); /* vx^2+u */ 167 | 168 | if (fe_isnonzero(check)) { 169 | return -1; 170 | } 171 | 172 | fe_mul(h->X, h->X, sqrtm1); 173 | } 174 | 175 | if (fe_isnegative(h->X) == (s[31] >> 7)) { 176 | fe_neg(h->X, h->X); 177 | } 178 | 179 | fe_mul(h->T, h->X, h->Y); 180 | return 0; 181 | } 182 | 183 | 184 | /* 185 | r = p + q 186 | */ 187 | 188 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 189 | fe t0; 190 | fe_add(r->X, p->Y, p->X); 191 | fe_sub(r->Y, p->Y, p->X); 192 | fe_mul(r->Z, r->X, q->yplusx); 193 | fe_mul(r->Y, r->Y, q->yminusx); 194 | fe_mul(r->T, q->xy2d, p->T); 195 | fe_add(t0, p->Z, p->Z); 196 | fe_sub(r->X, r->Z, r->Y); 197 | fe_add(r->Y, r->Z, r->Y); 198 | fe_add(r->Z, t0, r->T); 199 | fe_sub(r->T, t0, r->T); 200 | } 201 | 202 | 203 | /* 204 | r = p - q 205 | */ 206 | 207 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 208 | fe t0; 209 | 210 | fe_add(r->X, p->Y, p->X); 211 | fe_sub(r->Y, p->Y, p->X); 212 | fe_mul(r->Z, r->X, q->yminusx); 213 | fe_mul(r->Y, r->Y, q->yplusx); 214 | fe_mul(r->T, q->xy2d, p->T); 215 | fe_add(t0, p->Z, p->Z); 216 | fe_sub(r->X, r->Z, r->Y); 217 | fe_add(r->Y, r->Z, r->Y); 218 | fe_sub(r->Z, t0, r->T); 219 | fe_add(r->T, t0, r->T); 220 | } 221 | 222 | 223 | /* 224 | r = p 225 | */ 226 | 227 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { 228 | fe_mul(r->X, p->X, p->T); 229 | fe_mul(r->Y, p->Y, p->Z); 230 | fe_mul(r->Z, p->Z, p->T); 231 | } 232 | 233 | 234 | 235 | /* 236 | r = p 237 | */ 238 | 239 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { 240 | fe_mul(r->X, p->X, p->T); 241 | fe_mul(r->Y, p->Y, p->Z); 242 | fe_mul(r->Z, p->Z, p->T); 243 | fe_mul(r->T, p->X, p->Y); 244 | } 245 | 246 | 247 | void ge_p2_0(ge_p2 *h) { 248 | fe_0(h->X); 249 | fe_1(h->Y); 250 | fe_1(h->Z); 251 | } 252 | 253 | 254 | 255 | /* 256 | r = 2 * p 257 | */ 258 | 259 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { 260 | fe t0; 261 | 262 | fe_sq(r->X, p->X); 263 | fe_sq(r->Z, p->Y); 264 | fe_sq2(r->T, p->Z); 265 | fe_add(r->Y, p->X, p->Y); 266 | fe_sq(t0, r->Y); 267 | fe_add(r->Y, r->Z, r->X); 268 | fe_sub(r->Z, r->Z, r->X); 269 | fe_sub(r->X, t0, r->Y); 270 | fe_sub(r->T, r->T, r->Z); 271 | } 272 | 273 | 274 | void ge_p3_0(ge_p3 *h) { 275 | fe_0(h->X); 276 | fe_1(h->Y); 277 | fe_1(h->Z); 278 | fe_0(h->T); 279 | } 280 | 281 | 282 | /* 283 | r = 2 * p 284 | */ 285 | 286 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { 287 | ge_p2 q; 288 | ge_p3_to_p2(&q, p); 289 | ge_p2_dbl(r, &q); 290 | } 291 | 292 | 293 | 294 | /* 295 | r = p 296 | */ 297 | 298 | static const fe d2 = { 299 | -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 300 | }; 301 | 302 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { 303 | fe_add(r->YplusX, p->Y, p->X); 304 | fe_sub(r->YminusX, p->Y, p->X); 305 | fe_copy(r->Z, p->Z); 306 | fe_mul(r->T2d, p->T, d2); 307 | } 308 | 309 | 310 | /* 311 | r = p 312 | */ 313 | 314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { 315 | fe_copy(r->X, p->X); 316 | fe_copy(r->Y, p->Y); 317 | fe_copy(r->Z, p->Z); 318 | } 319 | 320 | 321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) { 322 | fe recip; 323 | fe x; 324 | fe y; 325 | fe_invert(recip, h->Z); 326 | fe_mul(x, h->X, recip); 327 | fe_mul(y, h->Y, recip); 328 | fe_tobytes(s, y); 329 | s[31] ^= fe_isnegative(x) << 7; 330 | } 331 | 332 | 333 | static unsigned char equal(signed char b, signed char c) { 334 | unsigned char ub = b; 335 | unsigned char uc = c; 336 | unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ 337 | uint64_t y = x; /* 0: yes; 1..255: no */ 338 | y -= 1; /* large: yes; 0..254: no */ 339 | y >>= 63; /* 1: yes; 0: no */ 340 | return (unsigned char) y; 341 | } 342 | 343 | static unsigned char negative(signed char b) { 344 | uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ 345 | x >>= 63; /* 1: yes; 0: no */ 346 | return (unsigned char) x; 347 | } 348 | 349 | static void cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) { 350 | fe_cmov(t->yplusx, u->yplusx, b); 351 | fe_cmov(t->yminusx, u->yminusx, b); 352 | fe_cmov(t->xy2d, u->xy2d, b); 353 | } 354 | 355 | 356 | static void select(ge_precomp *t, int pos, signed char b) { 357 | ge_precomp minust; 358 | unsigned char bnegative = negative(b); 359 | unsigned char babs = b - (((-bnegative) & b) << 1); 360 | fe_1(t->yplusx); 361 | fe_1(t->yminusx); 362 | fe_0(t->xy2d); 363 | cmov(t, &base[pos][0], equal(babs, 1)); 364 | cmov(t, &base[pos][1], equal(babs, 2)); 365 | cmov(t, &base[pos][2], equal(babs, 3)); 366 | cmov(t, &base[pos][3], equal(babs, 4)); 367 | cmov(t, &base[pos][4], equal(babs, 5)); 368 | cmov(t, &base[pos][5], equal(babs, 6)); 369 | cmov(t, &base[pos][6], equal(babs, 7)); 370 | cmov(t, &base[pos][7], equal(babs, 8)); 371 | fe_copy(minust.yplusx, t->yminusx); 372 | fe_copy(minust.yminusx, t->yplusx); 373 | fe_neg(minust.xy2d, t->xy2d); 374 | cmov(t, &minust, bnegative); 375 | } 376 | 377 | /* 378 | h = a * B 379 | where a = a[0]+256*a[1]+...+256^31 a[31] 380 | B is the Ed25519 base point (x,4/5) with x positive. 381 | 382 | Preconditions: 383 | a[31] <= 127 384 | */ 385 | 386 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) { 387 | signed char e[64]; 388 | signed char carry; 389 | ge_p1p1 r; 390 | ge_p2 s; 391 | ge_precomp t; 392 | int i; 393 | 394 | for (i = 0; i < 32; ++i) { 395 | e[2 * i + 0] = (a[i] >> 0) & 15; 396 | e[2 * i + 1] = (a[i] >> 4) & 15; 397 | } 398 | 399 | /* each e[i] is between 0 and 15 */ 400 | /* e[63] is between 0 and 7 */ 401 | carry = 0; 402 | 403 | for (i = 0; i < 63; ++i) { 404 | e[i] += carry; 405 | carry = e[i] + 8; 406 | carry >>= 4; 407 | e[i] -= carry << 4; 408 | } 409 | 410 | e[63] += carry; 411 | /* each e[i] is between -8 and 8 */ 412 | ge_p3_0(h); 413 | 414 | for (i = 1; i < 64; i += 2) { 415 | select(&t, i / 2, e[i]); 416 | ge_madd(&r, h, &t); 417 | ge_p1p1_to_p3(h, &r); 418 | } 419 | 420 | ge_p3_dbl(&r, h); 421 | ge_p1p1_to_p2(&s, &r); 422 | ge_p2_dbl(&r, &s); 423 | ge_p1p1_to_p2(&s, &r); 424 | ge_p2_dbl(&r, &s); 425 | ge_p1p1_to_p2(&s, &r); 426 | ge_p2_dbl(&r, &s); 427 | ge_p1p1_to_p3(h, &r); 428 | 429 | for (i = 0; i < 64; i += 2) { 430 | select(&t, i / 2, e[i]); 431 | ge_madd(&r, h, &t); 432 | ge_p1p1_to_p3(h, &r); 433 | } 434 | } 435 | 436 | 437 | /* 438 | r = p - q 439 | */ 440 | 441 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 442 | fe t0; 443 | 444 | fe_add(r->X, p->Y, p->X); 445 | fe_sub(r->Y, p->Y, p->X); 446 | fe_mul(r->Z, r->X, q->YminusX); 447 | fe_mul(r->Y, r->Y, q->YplusX); 448 | fe_mul(r->T, q->T2d, p->T); 449 | fe_mul(r->X, p->Z, q->Z); 450 | fe_add(t0, r->X, r->X); 451 | fe_sub(r->X, r->Z, r->Y); 452 | fe_add(r->Y, r->Z, r->Y); 453 | fe_sub(r->Z, t0, r->T); 454 | fe_add(r->T, t0, r->T); 455 | } 456 | 457 | 458 | void ge_tobytes(unsigned char *s, const ge_p2 *h) { 459 | fe recip; 460 | fe x; 461 | fe y; 462 | fe_invert(recip, h->Z); 463 | fe_mul(x, h->X, recip); 464 | fe_mul(y, h->Y, recip); 465 | fe_tobytes(s, y); 466 | s[31] ^= fe_isnegative(x) << 7; 467 | } 468 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ge.h: -------------------------------------------------------------------------------- 1 | #ifndef GE_H 2 | #define GE_H 3 | 4 | #include "fe.h" 5 | 6 | 7 | /* 8 | ge means group element. 9 | 10 | Here the group is the set of pairs (x,y) of field elements (see fe.h) 11 | satisfying -x^2 + y^2 = 1 + d x^2y^2 12 | where d = -121665/121666. 13 | 14 | Representations: 15 | ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z 16 | ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 17 | ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 18 | ge_precomp (Duif): (y+x,y-x,2dxy) 19 | */ 20 | 21 | typedef struct { 22 | fe X; 23 | fe Y; 24 | fe Z; 25 | } ge_p2; 26 | 27 | typedef struct { 28 | fe X; 29 | fe Y; 30 | fe Z; 31 | fe T; 32 | } ge_p3; 33 | 34 | typedef struct { 35 | fe X; 36 | fe Y; 37 | fe Z; 38 | fe T; 39 | } ge_p1p1; 40 | 41 | typedef struct { 42 | fe yplusx; 43 | fe yminusx; 44 | fe xy2d; 45 | } ge_precomp; 46 | 47 | typedef struct { 48 | fe YplusX; 49 | fe YminusX; 50 | fe Z; 51 | fe T2d; 52 | } ge_cached; 53 | 54 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h); 55 | void ge_tobytes(unsigned char *s, const ge_p2 *h); 56 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s); 57 | 58 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 59 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 60 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b); 61 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 62 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 63 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a); 64 | 65 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); 66 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); 67 | void ge_p2_0(ge_p2 *h); 68 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p); 69 | void ge_p3_0(ge_p3 *h); 70 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p); 71 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p); 72 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p); 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/key_exchange.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "fe.h" 3 | 4 | void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) { 5 | unsigned char e[32]; 6 | unsigned int i; 7 | 8 | fe x1; 9 | fe x2; 10 | fe z2; 11 | fe x3; 12 | fe z3; 13 | fe tmp0; 14 | fe tmp1; 15 | 16 | int pos; 17 | unsigned int swap; 18 | unsigned int b; 19 | 20 | /* copy the private key and make sure it's valid */ 21 | for (i = 0; i < 32; ++i) { 22 | e[i] = private_key[i]; 23 | } 24 | 25 | e[0] &= 248; 26 | e[31] &= 63; 27 | e[31] |= 64; 28 | 29 | /* unpack the public key and convert edwards to montgomery */ 30 | /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */ 31 | fe_frombytes(x1, public_key); 32 | fe_1(tmp1); 33 | fe_add(tmp0, x1, tmp1); 34 | fe_sub(tmp1, tmp1, x1); 35 | fe_invert(tmp1, tmp1); 36 | fe_mul(x1, tmp0, tmp1); 37 | 38 | fe_1(x2); 39 | fe_0(z2); 40 | fe_copy(x3, x1); 41 | fe_1(z3); 42 | 43 | swap = 0; 44 | for (pos = 254; pos >= 0; --pos) { 45 | b = e[pos / 8] >> (pos & 7); 46 | b &= 1; 47 | swap ^= b; 48 | fe_cswap(x2, x3, swap); 49 | fe_cswap(z2, z3, swap); 50 | swap = b; 51 | 52 | /* from montgomery.h */ 53 | fe_sub(tmp0, x3, z3); 54 | fe_sub(tmp1, x2, z2); 55 | fe_add(x2, x2, z2); 56 | fe_add(z2, x3, z3); 57 | fe_mul(z3, tmp0, x2); 58 | fe_mul(z2, z2, tmp1); 59 | fe_sq(tmp0, tmp1); 60 | fe_sq(tmp1, x2); 61 | fe_add(x3, z3, z2); 62 | fe_sub(z2, z3, z2); 63 | fe_mul(x2, tmp1, tmp0); 64 | fe_sub(tmp1, tmp1, tmp0); 65 | fe_sq(z2, z2); 66 | fe_mul121666(z3, tmp1); 67 | fe_sq(x3, x3); 68 | fe_add(tmp0, tmp0, z3); 69 | fe_mul(z3, x1, z2); 70 | fe_mul(z2, tmp1, tmp0); 71 | } 72 | 73 | fe_cswap(x2, x3, swap); 74 | fe_cswap(z2, z3, swap); 75 | 76 | fe_invert(z2, z2); 77 | fe_mul(x2, x2, z2); 78 | fe_tobytes(shared_secret, x2); 79 | } 80 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/keypair.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | 5 | 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) { 7 | ge_p3 A; 8 | 9 | sha512(seed, 32, private_key); 10 | private_key[0] &= 248; 11 | private_key[31] &= 63; 12 | private_key[31] |= 64; 13 | 14 | ge_scalarmult_base(&A, private_key); 15 | ge_p3_tobytes(public_key, &A); 16 | } 17 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sc.h: -------------------------------------------------------------------------------- 1 | #ifndef SC_H 2 | #define SC_H 3 | 4 | /* 5 | The set of scalars is \Z/l 6 | where l = 2^252 + 27742317777372353535851937790883648493. 7 | */ 8 | 9 | void sc_reduce(unsigned char *s); 10 | void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/seed.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | 3 | #ifndef ED25519_NO_SEED 4 | 5 | #ifdef _WIN32 6 | #include 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | int ed25519_create_seed(unsigned char *seed) { 13 | #ifndef SGX_COMPAT 14 | #ifdef _WIN32 15 | HCRYPTPROV prov; 16 | 17 | if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { 18 | return 1; 19 | } 20 | 21 | if (!CryptGenRandom(prov, 32, seed)) { 22 | CryptReleaseContext(prov, 0); 23 | return 1; 24 | } 25 | 26 | CryptReleaseContext(prov, 0); 27 | #else 28 | FILE *f = fopen("/dev/urandom", "rb"); 29 | 30 | if (f == NULL) { 31 | return 1; 32 | } 33 | 34 | fread(seed, 1, 32, f); 35 | fclose(f); 36 | #endif 37 | #endif 38 | 39 | return 0; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sha512.c: -------------------------------------------------------------------------------- 1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis 2 | * 3 | * LibTomCrypt is a library that provides various cryptographic 4 | * algorithms in a highly modular and flexible manner. 5 | * 6 | * The library is free for all purposes without any express 7 | * guarantee it works. 8 | * 9 | * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 10 | */ 11 | 12 | #include "fixedint.h" 13 | #include "sha512.h" 14 | 15 | /* the K array */ 16 | static const uint64_t K[80] = { 17 | UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 18 | UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc), 19 | UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 20 | UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118), 21 | UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 22 | UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2), 23 | UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 24 | UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694), 25 | UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 26 | UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65), 27 | UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 28 | UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5), 29 | UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 30 | UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4), 31 | UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 32 | UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70), 33 | UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 34 | UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df), 35 | UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 36 | UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b), 37 | UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), 38 | UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30), 39 | UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 40 | UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8), 41 | UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 42 | UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8), 43 | UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 44 | UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3), 45 | UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 46 | UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec), 47 | UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 48 | UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b), 49 | UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 50 | UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178), 51 | UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 52 | UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b), 53 | UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 54 | UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c), 55 | UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 56 | UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817) 57 | }; 58 | 59 | /* Various logical functions */ 60 | 61 | #define ROR64c(x, y) \ 62 | ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \ 63 | ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF)) 64 | 65 | #define STORE64H(x, y) \ 66 | { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ 67 | (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ 68 | (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ 69 | (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } 70 | 71 | #define LOAD64H(x, y) \ 72 | { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \ 73 | (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \ 74 | (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \ 75 | (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); } 76 | 77 | 78 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) 79 | #define Maj(x,y,z) (((x | y) & z) | (x & y)) 80 | #define S(x, n) ROR64c(x, n) 81 | #define R(x, n) (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n)) 82 | #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) 83 | #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) 84 | #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7)) 85 | #define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6)) 86 | #ifndef MIN 87 | #define MIN(x, y) ( ((x)<(y))?(x):(y) ) 88 | #endif 89 | 90 | /* compress 1024-bits */ 91 | static int sha512_compress(sha512_context *md, unsigned char *buf) 92 | { 93 | uint64_t S[8], W[80], t0, t1; 94 | int i; 95 | 96 | /* copy state into S */ 97 | for (i = 0; i < 8; i++) { 98 | S[i] = md->state[i]; 99 | } 100 | 101 | /* copy the state into 1024-bits into W[0..15] */ 102 | for (i = 0; i < 16; i++) { 103 | LOAD64H(W[i], buf + (8*i)); 104 | } 105 | 106 | /* fill W[16..79] */ 107 | for (i = 16; i < 80; i++) { 108 | W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; 109 | } 110 | 111 | /* Compress */ 112 | #define RND(a,b,c,d,e,f,g,h,i) \ 113 | t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ 114 | t1 = Sigma0(a) + Maj(a, b, c);\ 115 | d += t0; \ 116 | h = t0 + t1; 117 | 118 | for (i = 0; i < 80; i += 8) { 119 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); 120 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); 121 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); 122 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); 123 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); 124 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); 125 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); 126 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); 127 | } 128 | 129 | #undef RND 130 | 131 | 132 | 133 | /* feedback */ 134 | for (i = 0; i < 8; i++) { 135 | md->state[i] = md->state[i] + S[i]; 136 | } 137 | 138 | return 0; 139 | } 140 | 141 | 142 | /** 143 | Initialize the hash state 144 | @param md The hash state you wish to initialize 145 | @return 0 if successful 146 | */ 147 | int sha512_init(sha512_context * md) { 148 | if (md == NULL) return 1; 149 | 150 | md->curlen = 0; 151 | md->length = 0; 152 | md->state[0] = UINT64_C(0x6a09e667f3bcc908); 153 | md->state[1] = UINT64_C(0xbb67ae8584caa73b); 154 | md->state[2] = UINT64_C(0x3c6ef372fe94f82b); 155 | md->state[3] = UINT64_C(0xa54ff53a5f1d36f1); 156 | md->state[4] = UINT64_C(0x510e527fade682d1); 157 | md->state[5] = UINT64_C(0x9b05688c2b3e6c1f); 158 | md->state[6] = UINT64_C(0x1f83d9abfb41bd6b); 159 | md->state[7] = UINT64_C(0x5be0cd19137e2179); 160 | 161 | return 0; 162 | } 163 | 164 | /** 165 | Process a block of memory though the hash 166 | @param md The hash state 167 | @param in The data to hash 168 | @param inlen The length of the data (octets) 169 | @return 0 if successful 170 | */ 171 | int sha512_update (sha512_context * md, const unsigned char *in, size_t inlen) 172 | { 173 | size_t n; 174 | size_t i; 175 | int err; 176 | if (md == NULL) return 1; 177 | if (in == NULL) return 1; 178 | if (md->curlen > sizeof(md->buf)) { 179 | return 1; 180 | } 181 | while (inlen > 0) { 182 | if (md->curlen == 0 && inlen >= 128) { 183 | if ((err = sha512_compress (md, (unsigned char *)in)) != 0) { 184 | return err; 185 | } 186 | md->length += 128 * 8; 187 | in += 128; 188 | inlen -= 128; 189 | } else { 190 | n = MIN(inlen, (128 - md->curlen)); 191 | 192 | for (i = 0; i < n; i++) { 193 | md->buf[i + md->curlen] = in[i]; 194 | } 195 | 196 | 197 | md->curlen += n; 198 | in += n; 199 | inlen -= n; 200 | if (md->curlen == 128) { 201 | if ((err = sha512_compress (md, md->buf)) != 0) { 202 | return err; 203 | } 204 | md->length += 8*128; 205 | md->curlen = 0; 206 | } 207 | } 208 | } 209 | return 0; 210 | } 211 | 212 | /** 213 | Terminate the hash to get the digest 214 | @param md The hash state 215 | @param out [out] The destination of the hash (64 bytes) 216 | @return 0 if successful 217 | */ 218 | int sha512_final(sha512_context * md, unsigned char *out) 219 | { 220 | int i; 221 | 222 | if (md == NULL) return 1; 223 | if (out == NULL) return 1; 224 | 225 | if (md->curlen >= sizeof(md->buf)) { 226 | return 1; 227 | } 228 | 229 | /* increase the length of the message */ 230 | md->length += md->curlen * UINT64_C(8); 231 | 232 | /* append the '1' bit */ 233 | md->buf[md->curlen++] = (unsigned char)0x80; 234 | 235 | /* if the length is currently above 112 bytes we append zeros 236 | * then compress. Then we can fall back to padding zeros and length 237 | * encoding like normal. 238 | */ 239 | if (md->curlen > 112) { 240 | while (md->curlen < 128) { 241 | md->buf[md->curlen++] = (unsigned char)0; 242 | } 243 | sha512_compress(md, md->buf); 244 | md->curlen = 0; 245 | } 246 | 247 | /* pad upto 120 bytes of zeroes 248 | * note: that from 112 to 120 is the 64 MSB of the length. We assume that you won't hash 249 | * > 2^64 bits of data... :-) 250 | */ 251 | while (md->curlen < 120) { 252 | md->buf[md->curlen++] = (unsigned char)0; 253 | } 254 | 255 | /* store length */ 256 | STORE64H(md->length, md->buf+120); 257 | sha512_compress(md, md->buf); 258 | 259 | /* copy output */ 260 | for (i = 0; i < 8; i++) { 261 | STORE64H(md->state[i], out+(8*i)); 262 | } 263 | 264 | return 0; 265 | } 266 | 267 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out) 268 | { 269 | sha512_context ctx; 270 | int ret; 271 | if ((ret = sha512_init(&ctx))) return ret; 272 | if ((ret = sha512_update(&ctx, message, message_len))) return ret; 273 | if ((ret = sha512_final(&ctx, out))) return ret; 274 | return 0; 275 | } 276 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sha512.h: -------------------------------------------------------------------------------- 1 | #ifndef SHA512_H 2 | #define SHA512_H 3 | 4 | #include 5 | 6 | #include "fixedint.h" 7 | 8 | /* state */ 9 | typedef struct sha512_context_ { 10 | uint64_t length, state[8]; 11 | size_t curlen; 12 | unsigned char buf[128]; 13 | } sha512_context; 14 | 15 | 16 | int sha512_init(sha512_context * md); 17 | int sha512_final(sha512_context * md, unsigned char *out); 18 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen); 19 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sign.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | 6 | 7 | void ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key) { 8 | sha512_context hash; 9 | unsigned char hram[64]; 10 | unsigned char r[64]; 11 | ge_p3 R; 12 | 13 | 14 | sha512_init(&hash); 15 | sha512_update(&hash, private_key + 32, 32); 16 | sha512_update(&hash, message, message_len); 17 | sha512_final(&hash, r); 18 | 19 | sc_reduce(r); 20 | ge_scalarmult_base(&R, r); 21 | ge_p3_tobytes(signature, &R); 22 | 23 | sha512_init(&hash); 24 | sha512_update(&hash, signature, 32); 25 | sha512_update(&hash, public_key, 32); 26 | sha512_update(&hash, message, message_len); 27 | sha512_final(&hash, hram); 28 | 29 | sc_reduce(hram); 30 | sc_muladd(signature + 32, hram, private_key, r); 31 | } 32 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/verify.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | 6 | static int consttime_equal(const unsigned char *x, const unsigned char *y) { 7 | unsigned char r = 0; 8 | 9 | r = x[0] ^ y[0]; 10 | #define F(i) r |= x[i] ^ y[i] 11 | F(1); 12 | F(2); 13 | F(3); 14 | F(4); 15 | F(5); 16 | F(6); 17 | F(7); 18 | F(8); 19 | F(9); 20 | F(10); 21 | F(11); 22 | F(12); 23 | F(13); 24 | F(14); 25 | F(15); 26 | F(16); 27 | F(17); 28 | F(18); 29 | F(19); 30 | F(20); 31 | F(21); 32 | F(22); 33 | F(23); 34 | F(24); 35 | F(25); 36 | F(26); 37 | F(27); 38 | F(28); 39 | F(29); 40 | F(30); 41 | F(31); 42 | #undef F 43 | 44 | return !r; 45 | } 46 | 47 | int ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key) { 48 | unsigned char h[64]; 49 | unsigned char checker[32]; 50 | sha512_context hash; 51 | ge_p3 A; 52 | ge_p2 R; 53 | 54 | if (signature[63] & 224) { 55 | return 0; 56 | } 57 | 58 | if (ge_frombytes_negate_vartime(&A, public_key) != 0) { 59 | return 0; 60 | } 61 | 62 | sha512_init(&hash); 63 | sha512_update(&hash, signature, 32); 64 | sha512_update(&hash, public_key, 32); 65 | sha512_update(&hash, message, message_len); 66 | sha512_final(&hash, h); 67 | 68 | sc_reduce(h); 69 | ge_double_scalarmult_vartime(&R, h, &A, signature + 32); 70 | ge_tobytes(checker, &R); 71 | 72 | if (!consttime_equal(checker, signature)) { 73 | return 0; 74 | } 75 | 76 | return 1; 77 | } 78 | -------------------------------------------------------------------------------- /src/sgx/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | pwd=$PWD 5 | 6 | echo --- Build 7 | cd "$(dirname "$0")/signing" 8 | ( 9 | set -x 10 | mkdir -p "$pwd"/temp 11 | openssl genrsa -out "$pwd"/temp/priv_key.pem -3 3072 12 | openssl rsa -in "$pwd"/temp/priv_key.pem -pubout -out "$pwd"/temp/pub_key.pem 13 | make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist PRIV_KEY="$pwd"/temp/priv_key.pem PUB_KEY="$pwd"/temp/pub_key.pem 14 | ) 15 | 16 | echo --- Build Enclave Test 17 | cd "../test" 18 | ( 19 | set -x 20 | make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist 21 | ) 22 | -------------------------------------------------------------------------------- /src/sgx/signing/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | SIGN_ENCLAVE ?= 1 3 | PRIV_KEY ?= ../keys/private_key.pem 4 | PUB_KEY ?= ../keys/public_key.pem 5 | LIBS_PATH ?= ../../../libs 6 | OUT ?= ../../../dist 7 | 8 | SGX_COMMON_CFLAGS := -m64 -O2 9 | SGX_LIBRARY_PATH := $(SGX_SDK)/lib64 10 | SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign 11 | SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r 12 | 13 | Trusted_C_Flags := -Wno-implicit-function-declaration -std=c11 $(SGX_COMMON_CFLAGS) -nostdinc -fpie -fstack-protector \ 14 | -IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf \ 15 | -I. -I../../sgx-ecc-ed25519 -fvisibility=hidden 16 | 17 | Untrusted_C_Flags := -fPIC -O0 -g -Wno-attributes -I$(SGX_SDK)/include -I. -I../../sgx-ecc-ed25519 18 | Test_C_Flags := $(Untrusted_C_Flags) 19 | 20 | Link_Flags := $(SGX_COMMON_CFLAGS) -Wl,--no-undefined -nostdlib -nodefaultlibs -nostartfiles -L$(SGX_LIBRARY_PATH) \ 21 | -Wl,--whole-archive -lsgx_trts -Wl,--no-whole-archive \ 22 | -L$(LIBS_PATH) -led25519.sgx.static \ 23 | -Wl,--start-group -lsgx_tstdc -lsgx_tcxx -lsgx_tkey_exchange -lsgx_tcrypto -lsgx_tservice -Wl,--end-group \ 24 | -Wl,-Bstatic -Wl,-Bsymbolic -Wl,--no-undefined \ 25 | -Wl,-pie,-eenclave_entry -Wl,--export-dynamic \ 26 | -Wl,--defsym,__ImageBase=0 \ 27 | -Wl,--version-script=./signing.lds 28 | 29 | Trusted_C_Files := $(filter $(wildcard *_trusted.c), $(wildcard *.c)) 30 | Trusted_C_Files += signing_t.c 31 | Trusted_C_Objects := $(Trusted_C_Files:.c=.o) 32 | 33 | Untrusted_C_Files := $(filter $(wildcard *_untrusted.c), $(wildcard *.c)) 34 | Untrusted_C_Files += signing_u.c 35 | Untrusted_C_Objects := $(Untrusted_C_Files:.c=.o) 36 | 37 | .PHONY: all run 38 | 39 | ifneq ($(SIGN_ENCLAVE), 0) 40 | all: signing_u.c signing_t.c signing.signed.so libsigning.so 41 | else 42 | all: signing_u.c signing_t.c signing.so libsigning.so 43 | @echo "Build enclave signing.so success!" 44 | @echo 45 | @echo "**********************************************************************************************" 46 | @echo "PLEASE NOTE: In this mode, please sign the enclave first using Two Step Sign mechanism, before" 47 | @echo "you run the app to launch and access the enclave." 48 | @echo "**********************************************************************************************" 49 | @echo 50 | endif 51 | 52 | run: all 53 | 54 | signing_t.c: $(SGX_EDGER8R) signing.edl 55 | @echo "GEN => $@" 56 | @$(SGX_EDGER8R) --trusted signing.edl --search-path $(SGX_SDK)/include 57 | 58 | signing_u.c: $(SGX_EDGER8R) signing.edl 59 | @echo "GEN => $@" 60 | @$(SGX_EDGER8R) --untrusted signing.edl --search-path $(SGX_SDK)/include 61 | 62 | $(Trusted_C_Objects): %.o: %.c 63 | @echo "CC <= $<" 64 | $(CC) $(Trusted_C_Flags) -c $< -o $@ 65 | 66 | $(Untrusted_C_Objects): %.o: %.c 67 | @echo "CC <= $<" 68 | $(CC) $(Untrusted_C_Flags) -c $< -o $@ 69 | 70 | signing.so: signing_t.o $(Trusted_C_Objects) 71 | @echo "LINK => $@" 72 | $(CC) $^ -o $@ $(Link_Flags) 73 | mkdir -p $(OUT) 74 | cp $@ $(OUT) 75 | 76 | signing.signed.so: signing.so 77 | @echo "SIGN => $@" 78 | $(SGX_ENCLAVE_SIGNER) gendata -enclave $< -config signing.config.xml -out /tmp/enclave_hash.hex 79 | openssl dgst -sha256 -out /tmp/signature.hex -sign $(PRIV_KEY) -keyform PEM /tmp/enclave_hash.hex 80 | $(SGX_ENCLAVE_SIGNER) catsig -enclave $< -config signing.config.xml -out $@ -key $(PUB_KEY) -sig /tmp/signature.hex -unsigned /tmp/enclave_hash.hex 81 | mkdir -p $(OUT) 82 | cp $@ $(OUT) 83 | 84 | libsigning.so: signing_u.o signing_untrusted.o $(Untrusted_C_Objects) 85 | @echo "LINK => $@" 86 | $(CC) $^ -o $@ -shared -L$(SGX_LIBRARY_PATH) -lsgx_uae_service -lsgx_ukey_exchange -lsgx_urts -L$(LIBS_PATH) -led25519.static 87 | mkdir -p $(OUT) 88 | cp $@ $(OUT) 89 | cp signing_public.h $(OUT) 90 | 91 | clean: 92 | @rm -f signing_t.* signing_u.* $(Trusted_C_Objects) $(Untrusted_C_Objects) signing.signed.so signing.so libsigning.so 93 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.config.xml: -------------------------------------------------------------------------------- 1 | 2 | 0 3 | 0 4 | 0x20000 5 | 0x80000 6 | 4 7 | 1 8 | 0 9 | 10 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.edl: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave interface. 3 | */ 4 | 5 | enclave { 6 | from "sgx_tkey_exchange.edl" import *; 7 | include "sgx_key_exchange.h" 8 | include "sgx_trts.h" 9 | include "signing_internal.h" 10 | 11 | trusted { 12 | /* This function initializes enclave's remote attestation 13 | parameters. 14 | 15 | Parameters: 16 | b_pse: Use Intel's Platform Services 17 | pub_key: The caller's public key, that it'll use for 18 | communication with Intel's IAS service. 19 | pctxt: Remmote attestation context 20 | */ 21 | public sgx_status_t init_remote_attestation( 22 | int b_pse, 23 | [in] sgx_ec256_public_t* pub_key, 24 | [out] sgx_ra_context_t *pctxt); 25 | 26 | /* This function frees enclave's remote attestation 27 | context (returned by init_remote_attestation). 28 | */ 29 | public sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt); 30 | 31 | /* This function initializes an ED25519 keypair in enclave. It 32 | returns the public key to the caller. The private key will 33 | be used by sign_sgx_ed25519 function to sign the data. 34 | 35 | Enclave uses lockout parameters to compute if signing the 36 | data will cause slashing. 37 | */ 38 | public sgx_status_t init_sgx_ed25519( 39 | uint32_t lockout_period, 40 | uint32_t lockout_multiplier, 41 | uint32_t lockout_max_depth, 42 | uint32_t key_len, 43 | [out, size=key_len] uint8_t* pubkey); 44 | 45 | /* This function returns sealed enclave data (keypair, lockout 46 | parameters) to the caller. The data is encrypted using 47 | enclave specific keys. 48 | */ 49 | public sgx_status_t get_sgx_ed25519_data( 50 | uint32_t data_size, 51 | [out, size=data_size] uint8_t* sealed_data, 52 | [out] uint32_t* data_size_needed); 53 | 54 | /* This function initializes enclave using sealed data. The data 55 | was sealed using get_sgx_ed25519_data() function. 56 | 57 | The caller can also update the lockout parameters. 58 | */ 59 | public sgx_status_t init_sgx_ed25519_from_data( 60 | uint32_t data_size, 61 | [in, size=data_size] uint8_t* sealed_data, 62 | uint32_t update_lockout_params, 63 | uint32_t lockout_period, 64 | uint32_t lockout_multiplier, 65 | uint32_t lockout_max_depth, 66 | uint32_t key_len, 67 | [out, size=key_len] uint8_t* pubkey); 68 | 69 | /* This function signs the message by using private key generated 70 | during init function. The caller provides the new history entries. 71 | The enclave checks the lockout parameters, past history and the 72 | new history to compute slashing conditions. The enclave will not 73 | sign the message if it'll result in slashing. 74 | */ 75 | public sgx_status_t sign_sgx_ed25519( 76 | uint32_t msg_len, 77 | [in, size=msg_len] const uint8_t* msg, 78 | uint32_t history_len, 79 | [in, count=history_len] const history_entry_t* entries, 80 | uint32_t sig_len, 81 | [out, size=sig_len] uint8_t* signature); 82 | }; 83 | }; 84 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.lds: -------------------------------------------------------------------------------- 1 | signing.so 2 | { 3 | global: 4 | g_global_data_sim; 5 | g_global_data; 6 | signing_entry; 7 | local: 8 | *; 9 | }; 10 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_internal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "signing_public.h" 4 | 5 | // The following definitions are specific to ed25519 specifications 6 | #define ED25519_PRIV_KEY_LEN 64 7 | #define ED25519_SIGNATURE_LEN 64 8 | #define ED25519_SEED_LEN 32 9 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_public.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "sgx_eid.h" 5 | #include "sgx_error.h" 6 | 7 | #define ED25519_PUB_KEY_LEN 32 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | typedef struct ed25519_context { 14 | bool enclaveEnabled; 15 | sgx_enclave_id_t eid; 16 | uint8_t public_key[ED25519_PUB_KEY_LEN]; 17 | } ed25519_context_t; 18 | 19 | typedef struct history_entry { 20 | uint32_t num_hashes; 21 | uint32_t optional_input_hash[4]; 22 | uint32_t result_hash[4]; 23 | } history_entry_t; 24 | 25 | /* This function initializes SGX enclave. It loads enclave_file 26 | to SGX, which internally creates a new public/private keypair. 27 | 28 | If the platform does not support SGX, it creates a public/private 29 | keypair in untrusted space. An error is returned in this scenario. 30 | The user can choose to not use the library if SGX encalve is not 31 | being used for signing. 32 | 33 | Note: The user must release the enclave by calling release_ed25519_context() 34 | after they are done using it. 35 | */ 36 | sgx_status_t init_ed25519(const char* enclave_file, 37 | uint32_t lockout_period, 38 | uint32_t lockout_multiplier, 39 | uint32_t lockout_max_depth, 40 | ed25519_context_t* pctxt); 41 | 42 | /* This function returns the sealed data (private key and associated 43 | informatio). The sealed data can be used to reinit the enclave using 44 | init_ed25519_from_data(). 45 | */ 46 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt, 47 | uint32_t* datalen, 48 | uint8_t* data); 49 | 50 | /* This function reinitializes the enclave using sealed data. 51 | */ 52 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt, 53 | uint32_t datalen, 54 | uint8_t* data, 55 | uint32_t update_lockout_params, 56 | uint32_t lockout_period, 57 | uint32_t lockout_multiplier, 58 | uint32_t lockout_max_depth); 59 | 60 | /* This function signs the msg using the internally stored private 61 | key. The signature is returned in the output "signature" buffer. 62 | 63 | This function must only be called after init_ed25519() function. 64 | */ 65 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt, 66 | uint32_t msg_len, 67 | const uint8_t* msg, 68 | uint32_t history_len, 69 | const history_entry_t* entries, 70 | uint32_t sig_len, 71 | uint8_t* signature); 72 | 73 | /* This function releases SGX enclave */ 74 | void release_ed25519_context(ed25519_context_t* pctxt); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_trusted.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave code for signing data. 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include "sgx_key.h" 9 | #include "sgx_tseal.h" 10 | 11 | #include "ed25519.h" 12 | #include "signing_t.h" 13 | 14 | typedef struct signing_parameters { 15 | bool initialized; 16 | uint8_t public_key[ED25519_PUB_KEY_LEN]; 17 | uint8_t private_key[ED25519_PRIV_KEY_LEN]; 18 | uint32_t nonce; 19 | uint32_t lockout_period; 20 | uint32_t lockout_multiplier; 21 | uint32_t lockout_max_depth; 22 | sgx_mc_uuid_t counter; 23 | uint32_t counter_value; 24 | } signing_parameters_t; 25 | 26 | static signing_parameters_t g_signing_params; 27 | 28 | sgx_status_t init_remote_attestation(int b_pse, 29 | sgx_ec256_public_t* sp_pub_key, 30 | sgx_ra_context_t* pctxt) { 31 | sgx_status_t ret; 32 | if (b_pse) { 33 | int busy_retry_times = 2; 34 | do { 35 | ret = sgx_create_pse_session(); 36 | } while (ret == SGX_ERROR_BUSY && busy_retry_times--); 37 | if (ret != SGX_SUCCESS) 38 | return ret; 39 | } 40 | ret = sgx_ra_init(sp_pub_key, b_pse, pctxt); 41 | if (b_pse) { 42 | sgx_close_pse_session(); 43 | } 44 | return ret; 45 | } 46 | 47 | sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt) { 48 | return sgx_ra_close(ctxt); 49 | } 50 | 51 | /* This function creates a new public/private keypair in 52 | enclave trusted space. 53 | */ 54 | sgx_status_t init_sgx_ed25519(uint32_t lockout_period, 55 | uint32_t lockout_multiplier, 56 | uint32_t lockout_max_depth, 57 | uint32_t key_len, 58 | uint8_t* pubkey) { 59 | if (key_len < sizeof(g_signing_params.public_key)) { 60 | return SGX_ERROR_INVALID_PARAMETER; 61 | } 62 | 63 | sgx_status_t status = SGX_SUCCESS; 64 | int busy_retry_times = 3; 65 | do { 66 | status = sgx_create_pse_session(); 67 | } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0)); 68 | 69 | if (SGX_SUCCESS != status) { 70 | return status; 71 | } 72 | 73 | status = sgx_create_monotonic_counter(&g_signing_params.counter, 74 | &g_signing_params.counter_value); 75 | sgx_close_pse_session(); 76 | if (SGX_SUCCESS != status) { 77 | return status; 78 | } 79 | 80 | uint8_t seed[ED25519_SEED_LEN]; 81 | status = sgx_read_rand(seed, sizeof(seed)); 82 | if (SGX_SUCCESS != status) { 83 | return status; 84 | } 85 | 86 | ed25519_create_keypair(g_signing_params.public_key, 87 | g_signing_params.private_key, seed); 88 | 89 | memcpy(pubkey, g_signing_params.public_key, 90 | sizeof(g_signing_params.public_key)); 91 | 92 | g_signing_params.initialized = true; 93 | g_signing_params.lockout_max_depth = lockout_max_depth; 94 | g_signing_params.lockout_multiplier = lockout_multiplier; 95 | g_signing_params.lockout_period = lockout_period; 96 | 97 | return SGX_SUCCESS; 98 | } 99 | 100 | sgx_status_t get_sgx_ed25519_data(uint32_t data_size, 101 | uint8_t* sealed_data, 102 | uint32_t* data_size_needed) { 103 | *data_size_needed = 104 | sgx_calc_sealed_data_size(0, sizeof(signing_parameters_t)); 105 | 106 | if (*data_size_needed > data_size) { 107 | return SGX_ERROR_INVALID_PARAMETER; 108 | } 109 | 110 | sgx_status_t status = sgx_read_rand((uint8_t*)&g_signing_params.nonce, 111 | sizeof(g_signing_params.nonce)); 112 | if (SGX_SUCCESS != status) { 113 | return status; 114 | } 115 | 116 | sgx_attributes_t attribute_mask; 117 | attribute_mask.flags = SGX_FLAGS_INITTED | SGX_FLAGS_DEBUG; 118 | attribute_mask.xfrm = 0x0; 119 | 120 | return sgx_seal_data_ex(SGX_KEYPOLICY_MRENCLAVE, attribute_mask, 0xF0000000, 121 | 0, NULL, sizeof(g_signing_params), 122 | (const uint8_t*)&g_signing_params, *data_size_needed, 123 | (sgx_sealed_data_t*)sealed_data); 124 | } 125 | 126 | sgx_status_t init_sgx_ed25519_from_data(uint32_t data_size, 127 | uint8_t* sealed_data, 128 | uint32_t update_lockout_params, 129 | uint32_t lockout_period, 130 | uint32_t lockout_multiplier, 131 | uint32_t lockout_max_depth, 132 | uint32_t key_len, 133 | uint8_t* pubkey) { 134 | if (key_len < sizeof(g_signing_params.public_key)) { 135 | return SGX_ERROR_INVALID_PARAMETER; 136 | } 137 | 138 | signing_parameters_t data; 139 | uint32_t datalen = sizeof(data); 140 | sgx_status_t status = sgx_unseal_data((const sgx_sealed_data_t*)sealed_data, 141 | NULL, 0, (uint8_t*)&data, &datalen); 142 | if (SGX_SUCCESS != status) { 143 | return status; 144 | } 145 | 146 | if (datalen != sizeof(data)) { 147 | return SGX_ERROR_INVALID_PARAMETER; 148 | } 149 | 150 | int busy_retry_times = 3; 151 | do { 152 | status = sgx_create_pse_session(); 153 | } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0)); 154 | 155 | if (SGX_SUCCESS != status) { 156 | return status; 157 | } 158 | 159 | uint32_t counter_value = 0xffffffff; 160 | status = 161 | sgx_read_monotonic_counter(&g_signing_params.counter, &counter_value); 162 | if (SGX_SUCCESS != status) { 163 | sgx_close_pse_session(); 164 | return status; 165 | } 166 | 167 | if (counter_value != g_signing_params.counter_value) { 168 | sgx_close_pse_session(); 169 | return SGX_ERROR_INVALID_PARAMETER; 170 | } 171 | 172 | status = sgx_increment_monotonic_counter(&g_signing_params.counter, 173 | &g_signing_params.counter_value); 174 | 175 | sgx_close_pse_session(); 176 | if (SGX_SUCCESS != status) { 177 | return status; 178 | } 179 | 180 | memcpy(&g_signing_params, &data, sizeof(g_signing_params)); 181 | 182 | memcpy(pubkey, g_signing_params.public_key, 183 | sizeof(g_signing_params.public_key)); 184 | 185 | g_signing_params.initialized = true; 186 | if (update_lockout_params != 0) { 187 | g_signing_params.lockout_max_depth = lockout_max_depth; 188 | g_signing_params.lockout_multiplier = lockout_multiplier; 189 | g_signing_params.lockout_period = lockout_period; 190 | } 191 | return SGX_SUCCESS; 192 | } 193 | 194 | /* This function signs the msg using private key. 195 | */ 196 | sgx_status_t sign_sgx_ed25519(uint32_t msg_len, 197 | const uint8_t* msg, 198 | uint32_t history_len, 199 | const history_entry_t* entries, 200 | uint32_t sig_len, 201 | uint8_t* signature) { 202 | if (!g_signing_params.initialized) { 203 | return SGX_ERROR_INVALID_STATE; 204 | } 205 | 206 | if (sig_len < ED25519_SIGNATURE_LEN) { 207 | return SGX_ERROR_INVALID_PARAMETER; 208 | } 209 | 210 | ed25519_sign(signature, msg, msg_len, g_signing_params.public_key, 211 | g_signing_params.private_key); 212 | 213 | return SGX_SUCCESS; 214 | } 215 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_untrusted.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave code for signing data. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ed25519.h" 10 | #include "sgx_urts.h" 11 | #include "signing_public.h" 12 | #include "signing_u.h" 13 | 14 | /* This function initializes SGX enclave. It loads enclave_file 15 | to SGX, which internally creates a new public/private keypair. 16 | */ 17 | sgx_status_t init_ed25519(const char* enclave_file, 18 | uint32_t lockout_period, 19 | uint32_t lockout_multiplier, 20 | uint32_t lockout_max_depth, 21 | ed25519_context_t* pctxt) { 22 | int updated = 0; 23 | sgx_launch_token_t token = {0}; 24 | sgx_enclave_id_t eid; 25 | 26 | // Try to load the SGX enclave 27 | sgx_status_t status = 28 | sgx_create_enclave(enclave_file, 1, &token, &updated, &eid, NULL); 29 | 30 | if (SGX_SUCCESS != status) { 31 | return status; 32 | } 33 | 34 | sgx_status_t retval = SGX_SUCCESS; 35 | status = init_sgx_ed25519(eid, &retval, lockout_period, lockout_multiplier, 36 | lockout_max_depth, sizeof(pctxt->public_key), 37 | &pctxt->public_key[0]); 38 | 39 | if (SGX_SUCCESS != status) { 40 | sgx_destroy_enclave(eid); 41 | return status; 42 | } 43 | 44 | if (SGX_SUCCESS != retval) { 45 | sgx_destroy_enclave(eid); 46 | return retval; 47 | } 48 | 49 | pctxt->enclaveEnabled = true; 50 | pctxt->eid = eid; 51 | 52 | return status; 53 | } 54 | 55 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt, 56 | uint32_t* datalen, 57 | uint8_t* data) { 58 | if (!pctxt->enclaveEnabled) { 59 | return SGX_ERROR_INVALID_ENCLAVE; 60 | } 61 | 62 | sgx_status_t retval = SGX_SUCCESS; 63 | sgx_status_t status = 64 | get_sgx_ed25519_data(pctxt->eid, &retval, *datalen, data, datalen); 65 | 66 | if (SGX_SUCCESS != status) { 67 | return status; 68 | } 69 | 70 | if (SGX_SUCCESS != retval) { 71 | return retval; 72 | } 73 | 74 | return status; 75 | } 76 | 77 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt, 78 | uint32_t datalen, 79 | uint8_t* data, 80 | uint32_t update_lockout_params, 81 | uint32_t lockout_period, 82 | uint32_t lockout_multiplier, 83 | uint32_t lockout_max_depth) { 84 | if (!pctxt->enclaveEnabled) { 85 | return SGX_ERROR_INVALID_ENCLAVE; 86 | } 87 | 88 | sgx_status_t retval = SGX_SUCCESS; 89 | sgx_status_t status = init_sgx_ed25519_from_data( 90 | pctxt->eid, &retval, datalen, data, update_lockout_params, lockout_period, 91 | lockout_multiplier, lockout_max_depth, sizeof(pctxt->public_key), 92 | &pctxt->public_key[0]); 93 | 94 | if (SGX_SUCCESS != status) { 95 | return status; 96 | } 97 | 98 | if (SGX_SUCCESS != retval) { 99 | return retval; 100 | } 101 | 102 | return status; 103 | } 104 | 105 | /* This function signs the msg using the internally stored private 106 | key. The signature is returned in the output "signature" buffer. 107 | 108 | This function must only be called after init_ed25519() function. 109 | */ 110 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt, 111 | uint32_t msg_len, 112 | const uint8_t* msg, 113 | uint32_t history_len, 114 | const history_entry_t* entries, 115 | uint32_t sig_len, 116 | uint8_t* signature) { 117 | if (!pctxt->enclaveEnabled) { 118 | return SGX_ERROR_INVALID_ENCLAVE; 119 | } 120 | 121 | sgx_status_t retval = SGX_SUCCESS; 122 | sgx_status_t status = 123 | sign_sgx_ed25519(pctxt->eid, &retval, msg_len, msg, history_len, entries, 124 | sig_len, signature); 125 | 126 | if (SGX_SUCCESS != status) { 127 | return status; 128 | } 129 | 130 | if (SGX_SUCCESS != retval) { 131 | return retval; 132 | } 133 | 134 | return status; 135 | } 136 | 137 | void release_ed25519_context(ed25519_context_t* pctxt) { 138 | sgx_destroy_enclave(pctxt->eid); 139 | } -------------------------------------------------------------------------------- /src/sgx/test/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | LIBS_PATH ?= ../../../libs 3 | OUT ?= ../../../dist 4 | 5 | C_Flags := -O2 -fpic -I. -I$(SGX_SDK)/include -I$(OUT) -I../../sgx-ecc-ed25519 6 | 7 | C_Files := $(wildcard *.c) 8 | C_Objects := $(C_Files:%.c=%.o) 9 | 10 | .PHONY: all run 11 | all: $(OUT)/signing_test 12 | run: all 13 | 14 | %.o: %.c 15 | @echo "CC <= $<" 16 | $(CC) $(C_Flags) -c $< -o $@ 17 | 18 | $(OUT)/signing_test: $(C_Objects) 19 | @mkdir -p $(OUT) 20 | $(CC) $^ -o $@ -L$(OUT) -L$(LIBS_PATH) -lsigning -led25519.static 21 | 22 | clean: 23 | @rm -rf $(C_Objects) $(OUT)/signing_test 24 | -------------------------------------------------------------------------------- /src/sgx/test/signing_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "signing_public.h" 6 | 7 | #include "ed25519.h" 8 | 9 | void print_buffer(const uint8_t* buf, int len) { 10 | char str[BUFSIZ] = {'\0'}; 11 | int offset = 0; 12 | for (int i = 0; i < len; i++) { 13 | offset += snprintf(&str[offset], BUFSIZ - offset, "0x%02x ", buf[i]); 14 | if (!((i + 1) % 8)) 15 | offset += snprintf(&str[offset], BUFSIZ - offset, "\n"); 16 | } 17 | offset += snprintf(&str[offset], BUFSIZ - offset, "\n"); 18 | printf("%s", str); 19 | } 20 | 21 | int main(int argc, char* argv[]) { 22 | if (argc < 2) { 23 | printf("Usage: %s \n", argv[0]); 24 | return -1; 25 | } 26 | 27 | ed25519_context_t ctxt; 28 | uint32_t lockout_period = 10, lockout_multiplier = 2, lockout_max_depth = 32; 29 | sgx_status_t status = init_ed25519( 30 | argv[1], lockout_period, lockout_multiplier, lockout_max_depth, &ctxt); 31 | if (SGX_SUCCESS != status) { 32 | printf("Failed in init_ed25519. Error %d\n", status); 33 | return -1; 34 | } 35 | 36 | printf("Loaded the enclave. eid: %d\n", (uint32_t)ctxt.eid); 37 | 38 | uint32_t datalen = 0; 39 | status = get_ed25519_data(&ctxt, &datalen, NULL); 40 | 41 | uint8_t* sealed_data = malloc(datalen); 42 | status = get_ed25519_data(&ctxt, &datalen, sealed_data); 43 | if (SGX_SUCCESS != status) { 44 | printf("Failed in get_ed25519_data. Error %d\n", status); 45 | release_ed25519_context(&ctxt); 46 | free(sealed_data); 47 | return -1; 48 | } 49 | 50 | status = 51 | init_ed25519_from_data(&ctxt, datalen, sealed_data, 1, lockout_period, 52 | lockout_multiplier, lockout_max_depth); 53 | free(sealed_data); 54 | if (SGX_SUCCESS != status) { 55 | printf("Failed in init_ed25519_from_data. Error %d\n", status); 56 | release_ed25519_context(&ctxt); 57 | return -1; 58 | } 59 | 60 | const history_entry_t entries; 61 | uint8_t* data = 62 | "This is a test string. We'll sign it using SGX enclave. Hope it works!!"; 63 | uint8_t signature[64]; 64 | memset(signature, 0, sizeof(signature)); 65 | status = sign_ed25519(&ctxt, sizeof(data), data, 1, &entries, 66 | sizeof(signature), signature); 67 | if (SGX_SUCCESS != status) { 68 | printf("Failed in sign_ed25519. Error %d\n", status); 69 | release_ed25519_context(&ctxt); 70 | return -1; 71 | } 72 | 73 | printf("Signature:\n"); 74 | print_buffer(signature, sizeof(signature)); 75 | 76 | if (ed25519_verify(signature, data, sizeof(data), ctxt.public_key) == 0) { 77 | printf("Failed in verifying the signature\n"); 78 | } else { 79 | printf("Signature verified\n"); 80 | } 81 | 82 | release_ed25519_context(&ctxt); 83 | return 0; 84 | } --------------------------------------------------------------------------------