├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── ci ├── build.sh ├── buildkite.yml ├── docker-ispc │ └── Dockerfile ├── docker-run.sh ├── docker-sgx │ ├── Dockerfile │ └── build.sh ├── env.sh ├── upload-ci-artifact.sh └── upload-github-release-asset.sh └── src ├── Makefile ├── common ├── gpu_common.h └── perftime.h ├── cuda-ecc-ed25519 ├── common.cu ├── ed25519.h ├── fe.cu ├── fe.h ├── fixedint.h ├── ge.cu ├── ge.h ├── gpu_ctx.cu ├── gpu_ctx.h ├── int128.h ├── keypair.cu ├── license.txt ├── main.cu ├── precomp_data.h ├── sc.cu ├── sc.h ├── seed.cu ├── sha512.cu ├── sha512.h ├── sign.cu └── verify.cu ├── cuda-poh-verify └── poh_verify.cu ├── cuda-sha256 ├── sha256.cu └── tomcrypt_macros.h ├── gpu-common.mk ├── jerasure-sys ├── Cargo.toml ├── build.rs ├── gf-complete └── jerasure ├── opencl-ecc-ed25519 ├── gpu_ctx.cpp ├── gpu_ctx.h ├── main.cpp ├── sign.cpp └── verify.cpp ├── opencl-platform ├── cl_common.h ├── cl_init_platform.cpp ├── kernels_precomp_data.h ├── kernels_sha256.h └── kernels_verify.h ├── opencl-poh-verify └── cl_poh_verify.cpp ├── poh-simd ├── Makefile ├── build.sh ├── poh-verify.ispc └── sha256.h ├── poh-verify-test ├── main.cpp ├── test_hashes_332_129 ├── test_hashes_output_332 ├── test_num_elems_332 └── test_num_hashes_arr_332 ├── sgx-ecc-ed25519 ├── Makefile ├── add_scalar.c ├── build.sh ├── ed25519.h ├── fe.c ├── fe.h ├── fixedint.h ├── ge.c ├── ge.h ├── key_exchange.c ├── keypair.c ├── precomp_data.h ├── sc.c ├── sc.h ├── seed.c ├── sha512.c ├── sha512.h ├── sign.c └── verify.c └── sgx ├── build.sh ├── signing ├── Makefile ├── signing.config.xml ├── signing.edl ├── signing.lds ├── signing_internal.h ├── signing_public.h ├── signing_trusted.c └── signing_untrusted.c └── test ├── Makefile └── signing_test.c /.gitignore: -------------------------------------------------------------------------------- 1 | # temp folder 2 | /temp/ 3 | 4 | # build output folders 5 | /libs/ 6 | /dist/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Solana Labs, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | OS := $(shell uname) 2 | 3 | all: 4 | ifeq ($(OS),Darwin) 5 | SO=dylib 6 | else 7 | SO=so 8 | all: cuda_crypt 9 | endif 10 | 11 | V=release 12 | 13 | .PHONY:cuda_crypt 14 | cuda_crypt: 15 | $(MAKE) V=$(V) -C src 16 | 17 | DESTDIR ?= dist 18 | install: 19 | mkdir -p $(DESTDIR) 20 | ifneq ($(OS),Darwin) 21 | cp -f src/$(V)/libcuda-crypt.so $(DESTDIR) 22 | cp -f src/$(V)/libcl-crypt.so $(DESTDIR) 23 | endif 24 | ls -lh $(DESTDIR) 25 | 26 | .PHONY:clean 27 | clean: 28 | $(MAKE) V=$(V) -C src clean 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://badge.buildkite.com/dcc97a44f655a7473ff0f836a2cf154dff016a66db8e4f7405.svg?branch=master)](https://buildkite.com/solana-labs/wool) 2 | 3 | # solana-perf-libs 4 | CUDA, and more! 5 | 6 | ## Building 7 | After cloning this repo use the makefile in the root to build the tree 8 | with nvcc in your path: 9 | 10 | ```bash 11 | $ export PATH=/usr/local/cuda/bin:$PATH 12 | $ make -j$(nproc) 13 | ``` 14 | 15 | This should generate the libraries: 16 | * libcuda-crypt.so - ed25519 verify and poh verify cuda implementations 17 | * libcl-crypt.so - ed25519 verify and poh verify OpenCL implementations 18 | 19 | Copy libraries to the main Solana repo: 20 | ```bash 21 | $ make DESTDIR=${SOLANA_ROOT:?}/target/perf-libs install 22 | ``` 23 | 24 | Build Solana: 25 | ```bash 26 | $ cd $SOLANA_ROOT 27 | $ cargo build --release 28 | ``` 29 | 30 | The library is loaded at startup by `solana_perf::perf_libs`. 31 | See `perf/src/perf_libs.rs` in the main Solana repo for details. 32 | -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | cd "$(dirname "$0")/.." 4 | 5 | source ci/env.sh 6 | source ci/upload-ci-artifact.sh 7 | 8 | CUDA_HOMES=( 9 | /usr/local/cuda-11.1 10 | /usr/local/cuda-11.2 11 | /usr/local/cuda-11.3 12 | /usr/local/cuda-11.4 13 | /usr/local/cuda-11.5 14 | ) 15 | 16 | for CUDA_HOME in "${CUDA_HOMES[@]}"; do 17 | CUDA_HOME_BASE="$(basename "$CUDA_HOME")" 18 | echo "--- Build: $CUDA_HOME_BASE" 19 | ( 20 | if [[ ! -d $CUDA_HOME/lib64 ]]; then 21 | echo "Invalid CUDA_HOME: $CUDA_HOME" 22 | exit 1 23 | fi 24 | 25 | set -x 26 | export LD_LIBRARY_PATH=$CUDA_HOME/lib64 27 | export PATH=$PATH:$HOME/.cargo/bin/:$CUDA_HOME/bin 28 | export DESTDIR=dist/$CUDA_HOME_BASE 29 | 30 | make -j"$(nproc)" 31 | make install 32 | make clean 33 | 34 | cp -vf "$CUDA_HOME"/version.txt "$DESTDIR"/cuda-version.txt 35 | ) 36 | done 37 | 38 | echo --- Build SGX 39 | ( 40 | set -x 41 | ci/docker-run.sh solanalabs/sgxsdk src/sgx-ecc-ed25519/build.sh 42 | ci/docker-run.sh solanalabs/sgxsdk src/sgx/build.sh 43 | ) 44 | 45 | echo --- Build ISPC 46 | ( 47 | set -x 48 | ci/docker-run.sh solanalabs/ispc src/poh-simd/build.sh 49 | ) 50 | 51 | echo --- Create tarball 52 | ( 53 | set -x 54 | cd dist 55 | git rev-parse HEAD | tee solana-perf-HEAD.txt 56 | tar zcvf ../solana-perf.tgz ./* 57 | ) 58 | 59 | upload-ci-artifact solana-perf.tgz 60 | 61 | [[ -n $CI_TAG ]] || exit 0 62 | ci/upload-github-release-asset.sh solana-perf.tgz 63 | exit 0 64 | -------------------------------------------------------------------------------- /ci/buildkite.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - command: "ci/build.sh" 3 | name: "build" 4 | timeout_in_minutes: 120 5 | agents: 6 | - "queue=perf-cuda" 7 | -------------------------------------------------------------------------------- /ci/docker-ispc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM buildpack-deps:stretch 2 | 3 | ARG ISPC_HOME=/usr/local/src/ispc 4 | ARG LLVM_HOME=/usr/local/src/llvm 5 | ARG LLVM_VERSION=8.0 6 | 7 | ENV PATH=$LLVM_HOME/bin-$LLVM_VERSION/bin:$ISPC_HOME/bin/bin:$PATH 8 | 9 | RUN set -x \ 10 | && apt-get update \ 11 | && apt purge -y --auto-remove cmake \ 12 | && apt-get install -y bison flex \ 13 | && wget https://cmake.org/files/v3.8/cmake-3.8.0-Linux-x86_64.sh \ 14 | && mkdir /opt/cmake \ 15 | && sh cmake-3.8.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license \ 16 | && ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake \ 17 | && rm cmake-3.8.0-Linux-x86_64.sh \ 18 | && cmake --version \ 19 | && git clone git://github.com/ispc/ispc.git $ISPC_HOME \ 20 | && cd $ISPC_HOME \ 21 | && python alloy.py -b --version=$LLVM_VERSION --git --selfbuild \ 22 | && rm -rf $LLVM_HOME/build-$LLVM_VERSION $LLVM_HOME/llvm-$LLVM_VERSION $LLVM_HOME/bin-$LLVM_VERSION_temp $LLVM_HOME/build-$LLVM_VERSION_temp \ 23 | && mkdir build \ 24 | && cd build \ 25 | && echo $PATH \ 26 | && ls -la /usr/local/src/llvm/bin-8.0/bin \ 27 | && cmake -DCMAKE_INSTALL_PREFIX=$ISPC_HOME/bin -DCMAKE_CXX_COMPILER=clang++ $ISPC_HOME \ 28 | && make -j$(nproc) \ 29 | && make install \ 30 | && cd .. \ 31 | && rm -rf build \ 32 | && mv $LLVM_HOME/bin-$LLVM_VERSION / \ 33 | && rm -rf $LLVM_HOME \ 34 | && mkdir -p $LLVM_HOME \ 35 | && mv /bin-$LLVM_VERSION $LLVM_HOME \ 36 | && cd / \ 37 | && mv $ISPC_HOME/bin /ispcbin \ 38 | && rm -rf $ISPC_HOME \ 39 | && mkdir $ISPC_HOME \ 40 | && mv /ispcbin $ISPC_HOME/bin \ 41 | && ispc --version 42 | -------------------------------------------------------------------------------- /ci/docker-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | usage() { 5 | echo "Usage: $0 [--nopull] [docker image name] [command]" 6 | echo 7 | echo Runs command in the specified docker image with 8 | echo a CI-appropriate environment. 9 | echo 10 | echo "--nopull Skip the dockerhub image update" 11 | echo "--shell Skip command and enter an interactive shell" 12 | echo 13 | } 14 | 15 | cd "$(dirname "$0")/.." 16 | 17 | INTERACTIVE=false 18 | if [[ $1 = --shell ]]; then 19 | INTERACTIVE=true 20 | shift 21 | fi 22 | 23 | NOPULL=false 24 | if [[ $1 = --nopull ]]; then 25 | NOPULL=true 26 | shift 27 | fi 28 | 29 | IMAGE="$1" 30 | if [[ -z "$IMAGE" ]]; then 31 | echo Error: image not defined 32 | exit 1 33 | fi 34 | 35 | $NOPULL || docker pull "$IMAGE" 36 | shift 37 | 38 | ARGS=( 39 | --workdir /solana 40 | --volume "$PWD:/solana" 41 | --rm 42 | ) 43 | 44 | if [[ -n $CI ]]; then 45 | # Share the real ~/.cargo between docker containers in CI for speed 46 | ARGS+=(--volume "$HOME:/home") 47 | else 48 | # Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux 49 | # ~/.cargo 50 | ARGS+=(--volume "$PWD:/home") 51 | fi 52 | ARGS+=(--env "CARGO_HOME=/home/.cargo") 53 | 54 | # kcov tries to set the personality of the binary which docker 55 | # doesn't allow by default. 56 | ARGS+=(--security-opt "seccomp=unconfined") 57 | 58 | # Ensure files are created with the current host uid/gid 59 | if [[ -z "$SOLANA_DOCKER_RUN_NOSETUID" ]]; then 60 | ARGS+=(--user "$(id -u):$(id -g)") 61 | fi 62 | 63 | # Environment variables to propagate into the container 64 | ARGS+=( 65 | --env BUILDKITE 66 | --env BUILDKITE_AGENT_ACCESS_TOKEN 67 | --env BUILDKITE_BRANCH 68 | --env BUILDKITE_JOB_ID 69 | --env BUILDKITE_TAG 70 | --env CODECOV_TOKEN 71 | --env CRATES_IO_TOKEN 72 | --env SNAPCRAFT_CREDENTIALS_KEY 73 | ) 74 | 75 | if $INTERACTIVE; then 76 | if [[ -n $1 ]]; then 77 | echo 78 | echo "Note: '$*' ignored due to --shell argument" 79 | echo 80 | fi 81 | set -x 82 | exec docker run --interactive --tty "${ARGS[@]}" "$IMAGE" bash 83 | fi 84 | 85 | set -x 86 | exec docker run "${ARGS[@]}" "$IMAGE" "$@" 87 | -------------------------------------------------------------------------------- /ci/docker-sgx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential ocaml ocamlbuild automake autoconf libtool wget python libssl-dev libcurl4-openssl-dev protobuf-compiler libprotobuf-dev sudo kmod vim curl git-core libprotobuf-c0-dev libboost-thread-dev libboost-system-dev liblog4cpp5-dev libjsoncpp-dev alien uuid-dev libxml2-dev cmake pkg-config expect 6 | 7 | 8 | RUN mkdir /root/sgx && mkdir /etc/init/ && \ 9 | wget -O /root/sgx/sdk.bin https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/sgx_linux_x64_sdk_2.3.101.46683.bin && \ 10 | wget -O /root/sgx/psw.deb https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/libsgx-enclave-common_2.3.101.46683-1_amd64.deb && \ 11 | cd /root/sgx && \ 12 | dpkg -i /root/sgx/psw.deb && \ 13 | chmod +x /root/sgx/sdk.bin && \ 14 | echo -e 'no\n/opt' | /root/sgx/sdk.bin && \ 15 | echo 'source /opt/sgxsdk/environment' >> /root/.bashrc && \ 16 | rm -rf /root/sgx/* 17 | 18 | WORKDIR /root 19 | 20 | -------------------------------------------------------------------------------- /ci/docker-sgx/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | 4 | cd "$(dirname "$0")" 5 | 6 | docker build -t solanalabs/sgxsdk . 7 | docker push solanalabs/sgxsdk 8 | 9 | -------------------------------------------------------------------------------- /ci/env.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Normalized CI environment variables 3 | # 4 | # |source| me 5 | # 6 | 7 | if [[ -n $CI ]]; then 8 | export CI=1 9 | if [[ -n $TRAVIS ]]; then 10 | export CI_BRANCH=$TRAVIS_BRANCH 11 | export CI_BUILD_ID=$TRAVIS_BUILD_ID 12 | export CI_COMMIT=$TRAVIS_COMMIT 13 | export CI_JOB_ID=$TRAVIS_JOB_ID 14 | if $TRAVIS_PULL_REQUEST; then 15 | export CI_PULL_REQUEST=true 16 | else 17 | export CI_PULL_REQUEST= 18 | fi 19 | export CI_OS_NAME=$TRAVIS_OS_NAME 20 | export CI_REPO_SLUG=$TRAVIS_REPO_SLUG 21 | export CI_TAG=$TRAVIS_TAG 22 | elif [[ -n $BUILDKITE ]]; then 23 | export CI_BRANCH=$BUILDKITE_BRANCH 24 | export CI_BUILD_ID=$BUILDKITE_BUILD_ID 25 | export CI_COMMIT=$BUILDKITE_COMMIT 26 | export CI_JOB_ID=$BUILDKITE_JOB_ID 27 | # The standard BUILDKITE_PULL_REQUEST environment variable is always "false" due 28 | # to how solana-ci-gate is used to trigger PR builds rather than using the 29 | # standard Buildkite PR trigger. 30 | if [[ $CI_BRANCH =~ pull/* ]]; then 31 | export CI_PULL_REQUEST=true 32 | else 33 | export CI_PULL_REQUEST= 34 | fi 35 | export CI_OS_NAME=linux 36 | export CI_REPO_SLUG=$BUILDKITE_ORGANIZATION_SLUG/$BUILDKITE_PIPELINE_SLUG 37 | # TRIGGERED_BUILDKITE_TAG is a workaround to propagate BUILDKITE_TAG into 38 | # the solana-secondary builder 39 | if [[ -n $TRIGGERED_BUILDKITE_TAG ]]; then 40 | export CI_TAG=$TRIGGERED_BUILDKITE_TAG 41 | else 42 | export CI_TAG=$BUILDKITE_TAG 43 | fi 44 | elif [[ -n $APPVEYOR ]]; then 45 | export CI_BRANCH=$APPVEYOR_REPO_BRANCH 46 | export CI_BUILD_ID=$APPVEYOR_BUILD_ID 47 | export CI_COMMIT=$APPVEYOR_REPO_COMMIT 48 | export CI_JOB_ID=$APPVEYOR_JOB_ID 49 | if [[ -n $APPVEYOR_PULL_REQUEST_NUMBER ]]; then 50 | export CI_PULL_REQUEST=true 51 | else 52 | export CI_PULL_REQUEST= 53 | fi 54 | if [[ $CI_LINUX = True ]]; then 55 | export CI_OS_NAME=linux 56 | elif [[ $CI_WINDOWS = True ]]; then 57 | export CI_OS_NAME=windows 58 | fi 59 | export CI_REPO_SLUG=$APPVEYOR_REPO_NAME 60 | export CI_TAG=$APPVEYOR_REPO_TAG_NAME 61 | fi 62 | else 63 | export CI= 64 | export CI_BRANCH= 65 | export CI_BUILD_ID= 66 | export CI_COMMIT= 67 | export CI_JOB_ID= 68 | export CI_OS_NAME= 69 | export CI_PULL_REQUEST= 70 | export CI_REPO_SLUG= 71 | export CI_TAG= 72 | fi 73 | 74 | cat < 2 | #include 3 | 4 | #ifndef GPU_COMMON_H 5 | #define GPU_COMMON_H 6 | 7 | extern bool g_verbose; 8 | 9 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); } 10 | 11 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y)) 12 | 13 | #ifndef OPENCL_VARIANT 14 | 15 | #define CUDA_CHK(ans) { cuda_assert((ans), __FILE__, __LINE__); } 16 | 17 | inline void cuda_assert(cudaError_t err, const char *file, int line) 18 | { 19 | if (err != cudaSuccess) 20 | { 21 | fprintf(stderr,"ERR: %s %s %d\n", cudaGetErrorString(err), file, line); 22 | assert(0); 23 | } 24 | } 25 | 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/common/perftime.h: -------------------------------------------------------------------------------- 1 | #ifndef PERFTIME_H 2 | #define PERFTIME_H 3 | 4 | #ifdef USE_RDTSC 5 | static inline uint64_t rdtsc() 6 | { 7 | unsigned int hi, lo; 8 | __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)); 9 | return ((uint64_t)hi << 32) | lo; 10 | } 11 | 12 | typedef struct { 13 | uint64_t count; 14 | } perftime_t; 15 | 16 | #elif defined(USE_CLOCK_GETTIME) 17 | #include 18 | typedef struct timespec perftime_t; 19 | #else 20 | #include 21 | typedef struct timeval perftime_t; 22 | #endif 23 | 24 | static int get_time(perftime_t* t) { 25 | #ifdef USE_RDTSC 26 | t->count = rdtsc(); 27 | return 0; 28 | #elif defined(USE_CLOCK_GETTIME) 29 | return clock_gettime(CLOCK_MONOTONIC_RAW, t); 30 | //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); 31 | #else 32 | return gettimeofday(t, NULL /* timezone */); 33 | #endif 34 | } 35 | 36 | static double get_us(const perftime_t* time) { 37 | #ifdef USE_RDTSC 38 | return time->count; 39 | #elif defined(USE_CLOCK_GETTIME) 40 | return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000); 41 | #else 42 | return (time->tv_usec + (double)time->tv_sec * 1000000); 43 | #endif 44 | } 45 | 46 | static double get_diff(const perftime_t* start, const perftime_t* end) { 47 | return get_us(end) - get_us(start); 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/common.cu: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMMON_CU 3 | #define COMMON_CU 4 | 5 | static uint64_t __host__ __device__ load_3(const unsigned char *in) { 6 | uint64_t result; 7 | 8 | result = (uint64_t) in[0]; 9 | result |= ((uint64_t) in[1]) << 8; 10 | result |= ((uint64_t) in[2]) << 16; 11 | 12 | return result; 13 | } 14 | 15 | static uint64_t __host__ __device__ load_4(const unsigned char *in) { 16 | uint64_t result; 17 | 18 | result = (uint64_t) in[0]; 19 | result |= ((uint64_t) in[1]) << 8; 20 | result |= ((uint64_t) in[2]) << 16; 21 | result |= ((uint64_t) in[3]) << 24; 22 | 23 | return result; 24 | } 25 | 26 | static uint64_t __host__ __device__ load_7(const unsigned char *in) { 27 | uint64_t result; 28 | 29 | result = (uint64_t) in[0]; 30 | result |= ((uint64_t) in[1]) << 8; 31 | result |= ((uint64_t) in[2]) << 16; 32 | result |= ((uint64_t) in[3]) << 24; 33 | result |= ((uint64_t) in[4]) << 32; 34 | result |= ((uint64_t) in[5]) << 40; 35 | result |= ((uint64_t) in[6]) << 48; 36 | 37 | return result; 38 | } 39 | 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/ed25519.h: -------------------------------------------------------------------------------- 1 | #ifndef ED25519_H 2 | #define ED25519_H 3 | 4 | #include 5 | #include 6 | 7 | #if defined(_WIN32) 8 | #if defined(ED25519_BUILD_DLL) 9 | #define ED25519_DECLSPEC __declspec(dllexport) 10 | #elif defined(ED25519_DLL) 11 | #define ED25519_DECLSPEC __declspec(dllimport) 12 | #else 13 | #define ED25519_DECLSPEC 14 | #endif 15 | #else 16 | #define ED25519_DECLSPEC 17 | #endif 18 | 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #ifndef ED25519_NO_SEED 25 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed); 26 | #endif 27 | 28 | #define SHA512_SIZE 64 29 | #define PUB_KEY_SIZE 32 30 | #define PRIV_KEY_SIZE 64 31 | #define SEED_SIZE 32 32 | #define SCALAR_SIZE 32 33 | #define SIG_SIZE 64 34 | 35 | typedef struct { 36 | uint8_t* elems; 37 | uint32_t num; 38 | } gpu_Elems; 39 | 40 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed); 41 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key); 42 | 43 | void ED25519_DECLSPEC ed25519_sign_many(const gpu_Elems* elems, 44 | uint32_t num_elems, 45 | uint32_t message_size, 46 | uint32_t total_packets, 47 | uint32_t total_signatures, 48 | const uint32_t* message_lens, 49 | const uint32_t* public_key_offsets, 50 | const uint32_t* private_key_offsets, 51 | const uint32_t* message_start_offsets, 52 | uint8_t* signatures_out, 53 | uint8_t use_non_default_stream); 54 | 55 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, uint32_t message_len, const unsigned char *public_key); 56 | 57 | void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems, 58 | uint32_t num_elems, 59 | uint32_t message_size, 60 | uint32_t total_packets, 61 | uint32_t total_signatures, 62 | const uint32_t* message_lens, 63 | const uint32_t* public_key_offsets, 64 | const uint32_t* private_key_offsets, 65 | const uint32_t* message_start_offsets, 66 | uint8_t* out, 67 | uint8_t use_non_default_stream); 68 | 69 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar); 70 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key); 71 | void ED25519_DECLSPEC ed25519_set_verbose(bool val); 72 | 73 | const char* ED25519_DECLSPEC ed25519_license(); 74 | bool ED25519_DECLSPEC ed25519_init(); 75 | 76 | int cuda_host_register(void* ptr, size_t size, unsigned int flags); 77 | int cuda_host_unregister(void* ptr); 78 | 79 | int ED25519_DECLSPEC ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar); 80 | 81 | int ED25519_DECLSPEC ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element); 82 | 83 | #ifdef __cplusplus 84 | } 85 | #endif 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/fe.h: -------------------------------------------------------------------------------- 1 | #ifndef FE_H 2 | #define FE_H 3 | 4 | #include "fixedint.h" 5 | 6 | 7 | /* 8 | fe means field element. 9 | Here the field is \Z/(2^255-19). 10 | An element t, entries t[0]...t[9], represents the integer 11 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 12 | Bounds on each t[i] vary depending on context. 13 | */ 14 | 15 | 16 | typedef int32_t fe[10]; 17 | 18 | 19 | void __host__ __device__ fe_0(fe h); 20 | void __device__ __host__ fe_1(fe h); 21 | int __device__ __host__ fe_is_0(fe h); 22 | int __device__ __host__ fe_is_1(fe h); 23 | 24 | void __device__ __host__ fe_frombytes(fe h, const unsigned char *s); 25 | void __device__ __host__ fe_tobytes(unsigned char *s, const fe h); 26 | 27 | void __host__ __device__ fe_copy(fe h, const fe f); 28 | int __host__ __device__ fe_isnegative(const fe f); 29 | int __device__ __host__ fe_isnonzero(const fe f); 30 | void __host__ __device__ fe_cmov(fe f, const fe g, unsigned int b); 31 | void fe_cswap(fe f, fe g, unsigned int b); 32 | 33 | void __device__ __host__ fe_neg(fe h, const fe f); 34 | void __device__ __host__ fe_add(fe h, const fe f, const fe g); 35 | void __device__ __host__ fe_invert(fe out, const fe z); 36 | void __device__ __host__ fe_sq(fe h, const fe f); 37 | void __host__ __device__ fe_sq2(fe h, const fe f); 38 | void __device__ __host__ fe_mul(fe h, const fe f, const fe g); 39 | void fe_mul121666(fe h, fe f); 40 | void __device__ __host__ fe_pow22523(fe out, const fe z); 41 | void __device__ __host__ fe_sub(fe h, const fe f, const fe g); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/fixedint.h: -------------------------------------------------------------------------------- 1 | /* 2 | Portable header to provide the 32 and 64 bits type. 3 | 4 | Not a compatible replacement for , do not blindly use it as such. 5 | */ 6 | 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED) 8 | #include 9 | #define FIXEDINT_H_INCLUDED 10 | 11 | #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C) 12 | #include 13 | #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) 14 | #endif 15 | #endif 16 | 17 | 18 | #ifndef FIXEDINT_H_INCLUDED 19 | #define FIXEDINT_H_INCLUDED 20 | 21 | #include 22 | 23 | /* (u)int32_t */ 24 | #ifndef uint32_t 25 | #if (ULONG_MAX == 0xffffffffUL) 26 | typedef unsigned long uint32_t; 27 | #elif (UINT_MAX == 0xffffffffUL) 28 | typedef unsigned int uint32_t; 29 | #elif (USHRT_MAX == 0xffffffffUL) 30 | typedef unsigned short uint32_t; 31 | #endif 32 | #endif 33 | 34 | 35 | #ifndef int32_t 36 | #if (LONG_MAX == 0x7fffffffL) 37 | typedef signed long int32_t; 38 | #elif (INT_MAX == 0x7fffffffL) 39 | typedef signed int int32_t; 40 | #elif (SHRT_MAX == 0x7fffffffL) 41 | typedef signed short int32_t; 42 | #endif 43 | #endif 44 | 45 | 46 | /* (u)int64_t */ 47 | #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L) 48 | typedef long long int64_t; 49 | typedef unsigned long long uint64_t; 50 | 51 | #define UINT64_C(v) v ##ULL 52 | #define INT64_C(v) v ##LL 53 | #elif defined(__GNUC__) 54 | __extension__ typedef long long int64_t; 55 | __extension__ typedef unsigned long long uint64_t; 56 | 57 | #define UINT64_C(v) v ##ULL 58 | #define INT64_C(v) v ##LL 59 | #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) 60 | typedef long long int64_t; 61 | typedef unsigned long long uint64_t; 62 | 63 | #define UINT64_C(v) v ##ULL 64 | #define INT64_C(v) v ##LL 65 | #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) 66 | typedef __int64 int64_t; 67 | typedef unsigned __int64 uint64_t; 68 | 69 | #define UINT64_C(v) v ##UI64 70 | #define INT64_C(v) v ##I64 71 | #endif 72 | #endif 73 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/ge.h: -------------------------------------------------------------------------------- 1 | #ifndef GE_H 2 | #define GE_H 3 | 4 | #include "fe.h" 5 | 6 | 7 | /* 8 | ge means group element. 9 | 10 | Here the group is the set of pairs (x,y) of field elements (see fe.h) 11 | satisfying -x^2 + y^2 = 1 + d x^2y^2 12 | where d = -121665/121666. 13 | 14 | Representations: 15 | ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z 16 | ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 17 | ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 18 | ge_precomp (Duif): (y+x,y-x,2dxy) 19 | */ 20 | 21 | typedef struct { 22 | fe X; 23 | fe Y; 24 | fe Z; 25 | } ge_p2; 26 | 27 | typedef struct { 28 | fe X; 29 | fe Y; 30 | fe Z; 31 | fe T; 32 | } ge_p3; 33 | 34 | typedef struct { 35 | fe X; 36 | fe Y; 37 | fe Z; 38 | fe T; 39 | } ge_p1p1; 40 | 41 | typedef struct { 42 | fe yplusx; 43 | fe yminusx; 44 | fe xy2d; 45 | } ge_precomp; 46 | 47 | typedef struct { 48 | fe YplusX; 49 | fe YminusX; 50 | fe Z; 51 | fe T2d; 52 | } ge_cached; 53 | 54 | #define GE_LOOKUP_SIZE 8 55 | 56 | void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h); 57 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h); 58 | int __host__ __device__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s); 59 | 60 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 61 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 62 | void __host__ __device__ ge_addsub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q, bool add); 63 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_cached *A, const unsigned char *b); 64 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 65 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 66 | void __host__ __device__ ge_maddsub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q, bool add); 67 | void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a); 68 | 69 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); 70 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); 71 | void __host__ __device__ ge_p2_0(ge_p2 *h); 72 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p); 73 | void __host__ __device__ ge_p3_0(ge_p3 *h); 74 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p); 75 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p); 76 | void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p); 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/gpu_ctx.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "gpu_ctx.h" 3 | #include 4 | #include "gpu_common.h" 5 | 6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 7 | 8 | #define MAX_NUM_GPUS 8 9 | #define MAX_QUEUE_SIZE 8 10 | 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 12 | static uint32_t g_cur_gpu = 0; 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 14 | static int32_t g_total_gpus = -1; 15 | 16 | static bool cuda_crypt_init_locked() { 17 | if (g_total_gpus == -1) { 18 | cudaGetDeviceCount(&g_total_gpus); 19 | g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus); 20 | LOG("total_gpus: %d\n", g_total_gpus); 21 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 22 | CUDA_CHK(cudaSetDevice(gpu)); 23 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 24 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 25 | if (err != 0) { 26 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 27 | err, gpu, queue); 28 | g_total_gpus = 0; 29 | return false; 30 | } 31 | CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream)); 32 | } 33 | } 34 | } 35 | return g_total_gpus > 0; 36 | } 37 | 38 | bool ed25519_init() { 39 | cudaFree(0); 40 | pthread_mutex_lock(&g_ctx_mutex); 41 | bool success = cuda_crypt_init_locked(); 42 | pthread_mutex_unlock(&g_ctx_mutex); 43 | return success; 44 | } 45 | 46 | gpu_ctx_t* get_gpu_ctx() { 47 | int32_t cur_gpu, cur_queue; 48 | 49 | LOG("locking global mutex\n"); 50 | pthread_mutex_lock(&g_ctx_mutex); 51 | if (!cuda_crypt_init_locked()) { 52 | pthread_mutex_unlock(&g_ctx_mutex); 53 | LOG("No GPUs, exiting...\n"); 54 | return NULL; 55 | } 56 | cur_gpu = g_cur_gpu; 57 | g_cur_gpu++; 58 | g_cur_gpu %= g_total_gpus; 59 | cur_queue = g_cur_queue[cur_gpu]; 60 | g_cur_queue[cur_gpu]++; 61 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 62 | pthread_mutex_unlock(&g_ctx_mutex); 63 | 64 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 65 | LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu); 66 | pthread_mutex_lock(&cur_ctx->mutex); 67 | 68 | CUDA_CHK(cudaSetDevice(cur_gpu)); 69 | 70 | LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue); 71 | 72 | return cur_ctx; 73 | } 74 | 75 | void setup_gpu_ctx(verify_ctx_t* cur_ctx, 76 | const gpu_Elems* elems, 77 | uint32_t num_elems, 78 | uint32_t message_size, 79 | uint32_t total_packets, 80 | uint32_t total_packets_size, 81 | uint32_t total_signatures, 82 | const uint32_t* message_lens, 83 | const uint32_t* public_key_offsets, 84 | const uint32_t* signature_offsets, 85 | const uint32_t* message_start_offsets, 86 | size_t out_size, 87 | cudaStream_t stream 88 | ) { 89 | size_t offsets_size = total_signatures * sizeof(uint32_t); 90 | 91 | LOG("device allocate. packets: %d out: %d offsets_size: %zu\n", 92 | total_packets_size, (int)out_size, offsets_size); 93 | 94 | if (cur_ctx->packets == NULL || 95 | total_packets_size > cur_ctx->packets_size_bytes) { 96 | CUDA_CHK(cudaFree(cur_ctx->packets)); 97 | CUDA_CHK(cudaMalloc(&cur_ctx->packets, total_packets_size)); 98 | 99 | cur_ctx->packets_size_bytes = total_packets_size; 100 | } 101 | 102 | if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) { 103 | CUDA_CHK(cudaFree(cur_ctx->out)); 104 | CUDA_CHK(cudaMalloc(&cur_ctx->out, out_size)); 105 | 106 | cur_ctx->out_size_bytes = out_size; 107 | } 108 | 109 | if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) { 110 | CUDA_CHK(cudaFree(cur_ctx->Ai)); 111 | CUDA_CHK(cudaMalloc(&cur_ctx->Ai, total_signatures * sizeof(ge_cached) * GE_LOOKUP_SIZE)); 112 | 113 | CUDA_CHK(cudaFree(cur_ctx->h)); 114 | CUDA_CHK(cudaMalloc(&cur_ctx->h, total_signatures * SHA512_SIZE)); 115 | 116 | CUDA_CHK(cudaFree(cur_ctx->public_key_offsets)); 117 | CUDA_CHK(cudaMalloc(&cur_ctx->public_key_offsets, offsets_size)); 118 | 119 | CUDA_CHK(cudaFree(cur_ctx->signature_offsets)); 120 | CUDA_CHK(cudaMalloc(&cur_ctx->signature_offsets, offsets_size)); 121 | 122 | CUDA_CHK(cudaFree(cur_ctx->message_start_offsets)); 123 | CUDA_CHK(cudaMalloc(&cur_ctx->message_start_offsets, offsets_size)); 124 | 125 | CUDA_CHK(cudaFree(cur_ctx->message_lens)); 126 | CUDA_CHK(cudaMalloc(&cur_ctx->message_lens, offsets_size)); 127 | 128 | cur_ctx->offsets_len = total_signatures; 129 | } 130 | 131 | LOG("Done alloc\n"); 132 | 133 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->public_key_offsets, public_key_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 134 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->signature_offsets, signature_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 135 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_start_offsets, message_start_offsets, offsets_size, cudaMemcpyHostToDevice, stream)); 136 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_lens, message_lens, offsets_size, cudaMemcpyHostToDevice, stream)); 137 | 138 | size_t cur = 0; 139 | for (size_t i = 0; i < num_elems; i++) { 140 | LOG("i: %zu size: %d\n", i, elems[i].num * message_size); 141 | CUDA_CHK(cudaMemcpyAsync(&cur_ctx->packets[cur * message_size], elems[i].elems, elems[i].num * message_size, cudaMemcpyHostToDevice, stream)); 142 | cur += elems[i].num; 143 | } 144 | } 145 | 146 | 147 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) { 148 | pthread_mutex_unlock(&cur_ctx->mutex); 149 | } 150 | 151 | void ed25519_free_gpu_mem() { 152 | for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) { 153 | for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 154 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[gpu][queue]; 155 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.packets)); 156 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.out)); 157 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_lens)); 158 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.public_key_offsets)); 159 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.private_key_offsets)); 160 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.signature_offsets)); 161 | CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_start_offsets)); 162 | if (cur_ctx->stream != 0) { 163 | CUDA_CHK(cudaStreamDestroy(cur_ctx->stream)); 164 | } 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/gpu_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_CTX_H 2 | #define GPU_CTX_H 3 | 4 | #include 5 | #include "ed25519.h" 6 | #include "ge.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | typedef struct { 13 | uint8_t* packets; 14 | uint32_t packets_size_bytes; 15 | 16 | ge_cached* Ai; 17 | uint8_t* h; 18 | uint8_t* out; 19 | size_t out_size_bytes; 20 | 21 | uint32_t* public_key_offsets; 22 | uint32_t* private_key_offsets; 23 | uint32_t* message_start_offsets; 24 | uint32_t* signature_offsets; 25 | uint32_t* message_lens; 26 | size_t offsets_len; 27 | } verify_ctx_t; 28 | 29 | typedef struct { 30 | verify_ctx_t verify_ctx; 31 | 32 | pthread_mutex_t mutex; 33 | cudaStream_t stream; 34 | } gpu_ctx_t; 35 | 36 | extern gpu_ctx_t* get_gpu_ctx(); 37 | extern void release_gpu_ctx(gpu_ctx_t*); 38 | 39 | extern void ed25519_free_gpu_mem(); 40 | 41 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx, 42 | const gpu_Elems* elems, 43 | uint32_t num_elems, 44 | uint32_t message_size, 45 | uint32_t total_packets, 46 | uint32_t total_packets_size, 47 | uint32_t total_signatures, 48 | const uint32_t* message_lens, 49 | const uint32_t* public_key_offsets, 50 | const uint32_t* signature_offsets, 51 | const uint32_t* message_start_offsets, 52 | size_t out_size, 53 | cudaStream_t stream 54 | ); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/int128.h: -------------------------------------------------------------------------------- 1 | #ifndef INT128_H 2 | #define INT128_H 3 | 4 | struct uint128_t { 5 | uint64_t low; 6 | uint64_t high; 7 | }; 8 | 9 | static __device__ __host__ uint128_t mul_128(uint64_t a, uint64_t b) { 10 | uint128_t result; 11 | #ifdef __CUDA_ARCH__ 12 | result.low = a * b; 13 | result.high = __mul64hi(a, b); 14 | #elif __x86_64__ 15 | asm( "mulq %3\n\t" 16 | : "=a" (result.low), "=d" (result.high) 17 | : "%0" (a), "rm" (b)); 18 | #endif 19 | return result; 20 | } 21 | 22 | static __device__ __host__ uint128_t add_128(uint128_t a, uint128_t b) { 23 | uint128_t result; 24 | #ifdef __CUDA_ARCH__ 25 | asm( "add.cc.u64 %0, %2, %4;\n\t" 26 | "addc.u64 %1, %3, %5;\n\t" 27 | : "=l" (result.low), "=l" (result.high) 28 | : "l" (a.low), "l" (a.high), 29 | "l" (b.low), "l" (b.high)); 30 | #else 31 | result.low = a.low + b.low; 32 | result.high = a.high + b.high + (result.low < a.low); 33 | #endif 34 | return result; 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/keypair.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | 5 | 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) { 7 | ge_p3 A; 8 | 9 | sha512(seed, 32, private_key); 10 | private_key[0] &= 248; 11 | private_key[31] &= 63; 12 | private_key[31] |= 64; 13 | 14 | ge_scalarmult_base(&A, private_key); 15 | ge_p3_tobytes(public_key, &A); 16 | } 17 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Orson Peters 2 | 3 | This software is provided 'as-is', without any express or implied warranty. In no event will the 4 | authors be held liable for any damages arising from the use of this software. 5 | 6 | Permission is granted to anyone to use this software for any purpose, including commercial 7 | applications, and to alter it and redistribute it freely, subject to the following restrictions: 8 | 9 | 1. The origin of this software must not be misrepresented; you must not claim that you wrote the 10 | original software. If you use this software in a product, an acknowledgment in the product 11 | documentation would be appreciated but is not required. 12 | 13 | 2. Altered source versions must be plainly marked as such, and must not be misrepresented as 14 | being the original software. 15 | 16 | 3. This notice may not be removed or altered from any source distribution. 17 | 18 | ================================ 19 | 20 | Copyright (c) 2017-2019 isis agora lovecruft. All rights reserved. 21 | 22 | Redistribution and use in source and binary forms, with or without 23 | modification, are permitted provided that the following conditions are 24 | met: 25 | 26 | 1. Redistributions of source code must retain the above copyright 27 | notice, this list of conditions and the following disclaimer. 28 | 29 | 2. Redistributions in binary form must reproduce the above copyright 30 | notice, this list of conditions and the following disclaimer in the 31 | documentation and/or other materials provided with the distribution. 32 | 33 | 3. Neither the name of the copyright holder nor the names of its 34 | contributors may be used to endorse or promote products derived from 35 | this software without specific prior written permission. 36 | 37 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 38 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 39 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 40 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 43 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 44 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 46 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 47 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sc.h: -------------------------------------------------------------------------------- 1 | #ifndef SC_H 2 | #define SC_H 3 | 4 | /* 5 | The set of scalars is \Z/l 6 | where l = 2^252 + 27742317777372353535851937790883648493. 7 | */ 8 | 9 | void __host__ __device__ scalar32_reduce(unsigned char* s); 10 | void __host__ __device__ sc_reduce(unsigned char *s); 11 | void __host__ __device__ sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/seed.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | 3 | #ifndef ED25519_NO_SEED 4 | 5 | #ifdef _WIN32 6 | #include 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | int ed25519_create_seed(unsigned char *seed) { 13 | #ifdef _WIN32 14 | HCRYPTPROV prov; 15 | 16 | if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { 17 | return 1; 18 | } 19 | 20 | if (!CryptGenRandom(prov, 32, seed)) { 21 | CryptReleaseContext(prov, 0); 22 | return 1; 23 | } 24 | 25 | CryptReleaseContext(prov, 0); 26 | #else 27 | FILE *f = fopen("/dev/urandom", "rb"); 28 | 29 | if (f == NULL) { 30 | return 1; 31 | } 32 | 33 | size_t res = fread(seed, 1, 32, f); 34 | if (res != 32) { 35 | return 1; 36 | } 37 | fclose(f); 38 | #endif 39 | 40 | return 0; 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sha512.cu: -------------------------------------------------------------------------------- 1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis 2 | * 3 | * LibTomCrypt is a library that provides various cryptographic 4 | * algorithms in a highly modular and flexible manner. 5 | * 6 | * The library is free for all purposes without any express 7 | * guarantee it works. 8 | * 9 | * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 10 | */ 11 | 12 | #include "fixedint.h" 13 | #include "sha512.h" 14 | 15 | #ifdef __CUDA_ARCH__ 16 | #define K_DEF __device__ 17 | #else 18 | #define K_DEF 19 | #endif 20 | 21 | static const uint64_t K_DEF K[80] = { 22 | UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 23 | UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc), 24 | UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 25 | UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118), 26 | UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 27 | UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2), 28 | UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 29 | UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694), 30 | UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 31 | UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65), 32 | UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 33 | UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5), 34 | UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 35 | UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4), 36 | UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 37 | UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70), 38 | UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 39 | UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df), 40 | UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 41 | UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b), 42 | UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), 43 | UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30), 44 | UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 45 | UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8), 46 | UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 47 | UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8), 48 | UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 49 | UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3), 50 | UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 51 | UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec), 52 | UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 53 | UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b), 54 | UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 55 | UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178), 56 | UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 57 | UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b), 58 | UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 59 | UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c), 60 | UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 61 | UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817) 62 | }; 63 | 64 | /* Various logical functions */ 65 | 66 | #define ROR64c(x, y) \ 67 | ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \ 68 | ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF)) 69 | 70 | #define STORE64H(x, y) \ 71 | { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ 72 | (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ 73 | (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ 74 | (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } 75 | 76 | #define LOAD64H(x, y) \ 77 | { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \ 78 | (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \ 79 | (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \ 80 | (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); } 81 | 82 | 83 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) 84 | #define Maj(x,y,z) (((x | y) & z) | (x & y)) 85 | #define S(x, n) ROR64c(x, n) 86 | #define R(x, n) (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n)) 87 | #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) 88 | #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) 89 | #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7)) 90 | #define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6)) 91 | #ifndef MIN 92 | #define MIN(x, y) ( ((x)<(y))?(x):(y) ) 93 | #endif 94 | 95 | /* compress 1024-bits */ 96 | static int __device__ __host__ sha512_compress(sha512_context *md, unsigned char *buf) 97 | { 98 | uint64_t S[8], W[80], t0, t1; 99 | int i; 100 | 101 | /* copy state into S */ 102 | for (i = 0; i < 8; i++) { 103 | S[i] = md->state[i]; 104 | } 105 | 106 | /* copy the state into 1024-bits into W[0..15] */ 107 | for (i = 0; i < 16; i++) { 108 | LOAD64H(W[i], buf + (8*i)); 109 | } 110 | 111 | /* fill W[16..79] */ 112 | for (i = 16; i < 80; i++) { 113 | W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; 114 | } 115 | 116 | /* Compress */ 117 | #define RND(a,b,c,d,e,f,g,h,i) \ 118 | t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ 119 | t1 = Sigma0(a) + Maj(a, b, c);\ 120 | d += t0; \ 121 | h = t0 + t1; 122 | 123 | for (i = 0; i < 80; i += 8) { 124 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); 125 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); 126 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); 127 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); 128 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); 129 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); 130 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); 131 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); 132 | } 133 | 134 | #undef RND 135 | 136 | 137 | 138 | /* feedback */ 139 | for (i = 0; i < 8; i++) { 140 | md->state[i] = md->state[i] + S[i]; 141 | } 142 | 143 | return 0; 144 | } 145 | 146 | 147 | /** 148 | Initialize the hash state 149 | @param md The hash state you wish to initialize 150 | @return 0 if successful 151 | */ 152 | int __device__ __host__ sha512_init(sha512_context * md) { 153 | if (md == NULL) return 1; 154 | 155 | md->curlen = 0; 156 | md->length = 0; 157 | md->state[0] = UINT64_C(0x6a09e667f3bcc908); 158 | md->state[1] = UINT64_C(0xbb67ae8584caa73b); 159 | md->state[2] = UINT64_C(0x3c6ef372fe94f82b); 160 | md->state[3] = UINT64_C(0xa54ff53a5f1d36f1); 161 | md->state[4] = UINT64_C(0x510e527fade682d1); 162 | md->state[5] = UINT64_C(0x9b05688c2b3e6c1f); 163 | md->state[6] = UINT64_C(0x1f83d9abfb41bd6b); 164 | md->state[7] = UINT64_C(0x5be0cd19137e2179); 165 | 166 | return 0; 167 | } 168 | 169 | /** 170 | Process a block of memory though the hash 171 | @param md The hash state 172 | @param in The data to hash 173 | @param inlen The length of the data (octets) 174 | @return 0 if successful 175 | */ 176 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen) 177 | { 178 | size_t n; 179 | size_t i; 180 | int err; 181 | if (md == NULL) return 1; 182 | if (in == NULL) return 1; 183 | if (md->curlen > sizeof(md->buf)) { 184 | return 1; 185 | } 186 | while (inlen > 0) { 187 | if (md->curlen == 0 && inlen >= 128) { 188 | if ((err = sha512_compress (md, (unsigned char *)in)) != 0) { 189 | return err; 190 | } 191 | md->length += 128 * 8; 192 | in += 128; 193 | inlen -= 128; 194 | } else { 195 | n = MIN(inlen, (128 - md->curlen)); 196 | 197 | for (i = 0; i < n; i++) { 198 | md->buf[i + md->curlen] = in[i]; 199 | } 200 | 201 | 202 | md->curlen += n; 203 | in += n; 204 | inlen -= n; 205 | if (md->curlen == 128) { 206 | if ((err = sha512_compress (md, md->buf)) != 0) { 207 | return err; 208 | } 209 | md->length += 8*128; 210 | md->curlen = 0; 211 | } 212 | } 213 | } 214 | return 0; 215 | } 216 | 217 | /** 218 | Terminate the hash to get the digest 219 | @param md The hash state 220 | @param out [out] The destination of the hash (64 bytes) 221 | @return 0 if successful 222 | */ 223 | int sha512_final(sha512_context * md, unsigned char *out) 224 | { 225 | int i; 226 | 227 | if (md == NULL) return 1; 228 | if (out == NULL) return 1; 229 | 230 | if (md->curlen >= sizeof(md->buf)) { 231 | return 1; 232 | } 233 | 234 | /* increase the length of the message */ 235 | md->length += md->curlen * UINT64_C(8); 236 | 237 | /* append the '1' bit */ 238 | md->buf[md->curlen++] = (unsigned char)0x80; 239 | 240 | /* if the length is currently above 112 bytes we append zeros 241 | * then compress. Then we can fall back to padding zeros and length 242 | * encoding like normal. 243 | */ 244 | if (md->curlen > 112) { 245 | while (md->curlen < 128) { 246 | md->buf[md->curlen++] = (unsigned char)0; 247 | } 248 | sha512_compress(md, md->buf); 249 | md->curlen = 0; 250 | } 251 | 252 | /* pad upto 120 bytes of zeroes 253 | * note: that from 112 to 120 is the 64 MSB of the length. We assume that you won't hash 254 | * > 2^64 bits of data... :-) 255 | */ 256 | while (md->curlen < 120) { 257 | md->buf[md->curlen++] = (unsigned char)0; 258 | } 259 | 260 | /* store length */ 261 | STORE64H(md->length, md->buf+120); 262 | sha512_compress(md, md->buf); 263 | 264 | /* copy output */ 265 | for (i = 0; i < 8; i++) { 266 | STORE64H(md->state[i], out+(8*i)); 267 | } 268 | 269 | return 0; 270 | } 271 | 272 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out) 273 | { 274 | sha512_context ctx; 275 | int ret; 276 | if ((ret = sha512_init(&ctx))) return ret; 277 | if ((ret = sha512_update(&ctx, message, message_len))) return ret; 278 | if ((ret = sha512_final(&ctx, out))) return ret; 279 | return 0; 280 | } 281 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sha512.h: -------------------------------------------------------------------------------- 1 | #ifndef SHA512_H 2 | #define SHA512_H 3 | 4 | #include 5 | 6 | #include "fixedint.h" 7 | 8 | /* state */ 9 | typedef struct sha512_context_ { 10 | uint64_t length, state[8]; 11 | size_t curlen; 12 | unsigned char buf[128]; 13 | } sha512_context; 14 | 15 | 16 | int __device__ __host__ sha512_init(sha512_context * md); 17 | int __device__ __host__ sha512_final(sha512_context * md, unsigned char *out); 18 | int __device__ __host__ sha512_update(sha512_context * md, const unsigned char *in, size_t inlen); 19 | int __device__ __host__ sha512(const unsigned char *message, size_t message_len, unsigned char *out); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/sign.cu: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | #include "gpu_common.h" 6 | #include "gpu_ctx.h" 7 | 8 | 9 | static void __device__ __host__ 10 | ed25519_sign_device(unsigned char *signature, 11 | const unsigned char *message, 12 | size_t message_len, 13 | const unsigned char *public_key, 14 | const unsigned char *private_key) { 15 | sha512_context hash; 16 | unsigned char hram[64]; 17 | unsigned char r[64]; 18 | ge_p3 R; 19 | 20 | 21 | sha512_init(&hash); 22 | sha512_update(&hash, private_key + 32, 32); 23 | sha512_update(&hash, message, message_len); 24 | sha512_final(&hash, r); 25 | 26 | sc_reduce(r); 27 | ge_scalarmult_base(&R, r); 28 | ge_p3_tobytes(signature, &R); 29 | 30 | sha512_init(&hash); 31 | sha512_update(&hash, signature, 32); 32 | sha512_update(&hash, public_key, 32); 33 | sha512_update(&hash, message, message_len); 34 | sha512_final(&hash, hram); 35 | 36 | sc_reduce(hram); 37 | sc_muladd(signature + 32, hram, private_key, r); 38 | } 39 | 40 | void ed25519_sign(unsigned char *signature, 41 | const unsigned char *message, 42 | size_t message_len, 43 | const unsigned char *public_key, 44 | const unsigned char *private_key) { 45 | ed25519_sign_device(signature, message, message_len, public_key, private_key); 46 | } 47 | 48 | 49 | 50 | __global__ void ed25519_sign_kernel(unsigned char* packets, 51 | uint32_t message_size, 52 | uint32_t* public_key_offsets, 53 | uint32_t* private_key_offsets, 54 | uint32_t* message_start_offsets, 55 | uint32_t* message_lens, 56 | size_t num_transactions, 57 | uint8_t* out) 58 | { 59 | int i = blockIdx.x * blockDim.x + threadIdx.x; 60 | if (i < num_transactions) { 61 | uint32_t message_start_offset = message_start_offsets[i]; 62 | uint32_t public_key_offset = public_key_offsets[i]; 63 | uint32_t private_key_offset = private_key_offsets[i]; 64 | uint32_t message_len = message_lens[i]; 65 | 66 | ed25519_sign_device(&out[i * SIG_SIZE], 67 | &packets[message_start_offset], 68 | message_len, 69 | &packets[public_key_offset], 70 | &packets[private_key_offset]); 71 | } 72 | } 73 | 74 | 75 | 76 | void ed25519_sign_many(const gpu_Elems* elems, 77 | uint32_t num_elems, 78 | uint32_t message_size, 79 | uint32_t total_packets, 80 | uint32_t total_signatures, 81 | const uint32_t* message_lens, 82 | const uint32_t* public_key_offsets, 83 | const uint32_t* private_key_offsets, 84 | const uint32_t* message_start_offsets, 85 | uint8_t* signatures_out, 86 | uint8_t use_non_default_stream 87 | ) { 88 | int num_threads_per_block = 64; 89 | int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block); 90 | size_t sig_out_size = SIG_SIZE * total_signatures; 91 | 92 | if (0 == total_packets) { 93 | return; 94 | } 95 | 96 | uint32_t total_packets_size = total_packets * message_size; 97 | 98 | LOG("signing %d packets sig_size: %zu message_size: %d\n", 99 | total_packets, sig_out_size, message_size); 100 | 101 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 102 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 103 | 104 | cudaStream_t stream = 0; 105 | if (0 != use_non_default_stream) { 106 | stream = gpu_ctx->stream; 107 | } 108 | 109 | setup_gpu_ctx(cur_ctx, 110 | elems, 111 | num_elems, 112 | message_size, 113 | total_packets, 114 | total_packets_size, 115 | total_signatures, 116 | message_lens, 117 | public_key_offsets, 118 | private_key_offsets, 119 | message_start_offsets, 120 | sig_out_size, 121 | stream 122 | ); 123 | 124 | LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block); 125 | ed25519_sign_kernel<<>> 126 | (cur_ctx->packets, 127 | message_size, 128 | cur_ctx->public_key_offsets, 129 | cur_ctx->signature_offsets, 130 | cur_ctx->message_start_offsets, 131 | cur_ctx->message_lens, 132 | total_signatures, 133 | cur_ctx->out); 134 | 135 | cudaError_t err = cudaMemcpyAsync(signatures_out, cur_ctx->out, sig_out_size, cudaMemcpyDeviceToHost, stream); 136 | if (err != cudaSuccess) { 137 | fprintf(stderr, "sign: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n", 138 | signatures_out, cur_ctx->out, sig_out_size, num_elems, elems); 139 | } 140 | CUDA_CHK(err); 141 | 142 | CUDA_CHK(cudaStreamSynchronize(stream)); 143 | 144 | release_gpu_ctx(gpu_ctx); 145 | } 146 | 147 | -------------------------------------------------------------------------------- /src/cuda-ecc-ed25519/verify.cu: -------------------------------------------------------------------------------- 1 | #include "sha512.h" 2 | #include 3 | #include 4 | #include "sc.cu" 5 | #include "fe.cu" 6 | #include "ge.cu" 7 | #include "sha512.cu" 8 | 9 | #include "ed25519.h" 10 | #include 11 | 12 | #include "gpu_common.h" 13 | #include "gpu_ctx.h" 14 | 15 | #define USE_CLOCK_GETTIME 16 | #include "perftime.h" 17 | 18 | static int __host__ __device__ consttime_equal(const unsigned char *x, const unsigned char *y) { 19 | unsigned char r = 0; 20 | 21 | r = x[0] ^ y[0]; 22 | #define F(i) r |= x[i] ^ y[i] 23 | F(1); 24 | F(2); 25 | F(3); 26 | F(4); 27 | F(5); 28 | F(6); 29 | F(7); 30 | F(8); 31 | F(9); 32 | F(10); 33 | F(11); 34 | F(12); 35 | F(13); 36 | F(14); 37 | F(15); 38 | F(16); 39 | F(17); 40 | F(18); 41 | F(19); 42 | F(20); 43 | F(21); 44 | F(22); 45 | F(23); 46 | F(24); 47 | F(25); 48 | F(26); 49 | F(27); 50 | F(28); 51 | F(29); 52 | F(30); 53 | F(31); 54 | #undef F 55 | 56 | return !r; 57 | } 58 | 59 | // 0 == success 60 | static int __host__ __device__ 61 | get_checked_scalar(unsigned char* scalar, const unsigned char* signature) { 62 | // Check if top 4-bits are clear 63 | // then scalar is reduced. 64 | if ((signature[31] & 0xf0) == 0) { 65 | for (int i = 0; i < 32; i++) { 66 | scalar[i] = signature[i]; 67 | } 68 | return 0; 69 | } 70 | 71 | if ((signature[31] >> 7) != 0) { 72 | return 1; 73 | } 74 | 75 | scalar32_reduce(scalar); 76 | if (!consttime_equal(scalar, signature)) { 77 | return 1; 78 | } 79 | return 0; 80 | 81 | } 82 | 83 | int ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar) { 84 | return get_checked_scalar(out_scalar, in_scalar); 85 | } 86 | 87 | // Return 0=success if ge unpacks and is not small order 88 | static int __device__ __host__ 89 | check_packed_ge_small_order(const unsigned char* packed_group_element) { 90 | ge_p3 signature_R; 91 | 92 | // fail if ge does not unpack 93 | if (0 != ge_frombytes_negate_vartime(&signature_R, packed_group_element)) { 94 | return 1; 95 | } 96 | 97 | // fail if ge is small order 98 | if (0 != ge_is_small_order(&signature_R)) { 99 | return 1; 100 | } 101 | 102 | return 0; 103 | } 104 | 105 | int ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element) { 106 | return check_packed_ge_small_order(packed_group_element); 107 | } 108 | 109 | static int __device__ __host__ 110 | ed25519_verify_device(const unsigned char *signature, 111 | const unsigned char *message, 112 | uint32_t message_len, 113 | const unsigned char *public_key, 114 | unsigned char* h) { 115 | sha512_context hash; 116 | unsigned char checker[32]; 117 | 118 | // Check that s.reduce() == s 119 | if (0 != get_checked_scalar(checker, signature + 32)) { 120 | return 0; 121 | } 122 | 123 | if (0 != check_packed_ge_small_order(signature)) { 124 | return 0; 125 | } 126 | 127 | sha512_init(&hash); 128 | sha512_update(&hash, signature, 32); 129 | sha512_update(&hash, public_key, 32); 130 | sha512_update(&hash, message, message_len); 131 | sha512_final(&hash, h); 132 | 133 | sc_reduce(h); 134 | return 1; 135 | } 136 | 137 | static int __device__ __host__ 138 | ed25519_verify_scalar_double(const unsigned char* signature, 139 | const unsigned char* h, 140 | ge_cached* Ai) { 141 | unsigned char checker[32]; 142 | ge_p2 R; 143 | 144 | ge_double_scalarmult_vartime(&R, h, Ai, signature + 32); 145 | ge_tobytes(checker, &R); 146 | 147 | if (!consttime_equal(checker, signature)) { 148 | return 0; 149 | } 150 | 151 | return 1; 152 | } 153 | 154 | int 155 | ed25519_verify(const unsigned char *signature, 156 | const unsigned char *message, 157 | uint32_t message_len, 158 | const unsigned char *public_key) { 159 | unsigned char h[SHA512_SIZE]; 160 | if (0 == ed25519_verify_device(signature, message, message_len, public_key, h)) { 161 | return 0; 162 | } 163 | 164 | ge_cached Ai[GE_LOOKUP_SIZE]; 165 | if (0 == ge_gen_lookup(public_key, Ai)) { 166 | return 0; 167 | } 168 | 169 | if (0 == ed25519_verify_scalar_double(signature, h, Ai)) { 170 | return 0; 171 | } 172 | 173 | return 1; 174 | } 175 | 176 | __global__ void 177 | ed25519_scalar_double_kernel(const uint8_t* packets, 178 | uint32_t* signature_offsets, 179 | uint8_t* out, 180 | ge_cached* Ai, 181 | size_t num_keys, 182 | uint8_t* h) { 183 | int i = blockIdx.x * blockDim.x + threadIdx.x; 184 | if (i < num_keys && (0 != out[i])) { 185 | uint32_t signature_offset = signature_offsets[i]; 186 | out[i] = ed25519_verify_scalar_double(&packets[signature_offset], 187 | &h[i * SHA512_SIZE], 188 | &Ai[i * GE_LOOKUP_SIZE] 189 | ); 190 | } 191 | } 192 | 193 | __global__ void 194 | ed25519_gen_lookup_kernel(const uint8_t* packets, 195 | uint32_t* public_key_offsets, 196 | ge_cached* Ai, 197 | size_t num_keys, 198 | uint8_t* out 199 | ) { 200 | int i = blockIdx.x * blockDim.x + threadIdx.x; 201 | if (i < num_keys && (0 != out[i])) { 202 | uint32_t public_key_offset = public_key_offsets[i]; 203 | out[i] = ge_gen_lookup(&packets[public_key_offset], &Ai[i * GE_LOOKUP_SIZE]); 204 | } 205 | } 206 | 207 | __global__ void 208 | ed25519_verify_kernel(const uint8_t* packets, 209 | uint32_t message_size, 210 | uint32_t* message_lens, 211 | uint32_t* public_key_offsets, 212 | uint32_t* signature_offsets, 213 | uint32_t* message_start_offsets, 214 | size_t num_keys, 215 | uint8_t* out, 216 | uint8_t* h) 217 | { 218 | int i = blockIdx.x * blockDim.x + threadIdx.x; 219 | if (i < num_keys) { 220 | uint32_t message_start_offset = message_start_offsets[i]; 221 | uint32_t signature_offset = signature_offsets[i]; 222 | uint32_t public_key_offset = public_key_offsets[i]; 223 | uint32_t message_len = message_lens[i]; 224 | 225 | out[i] = ed25519_verify_device(&packets[signature_offset], 226 | &packets[message_start_offset], 227 | message_len, 228 | &packets[public_key_offset], 229 | &h[i * SHA512_SIZE] 230 | ); 231 | } 232 | } 233 | 234 | bool g_verbose = false; 235 | 236 | void ed25519_set_verbose(bool val) { 237 | g_verbose = val; 238 | } 239 | 240 | void ed25519_verify_many(const gpu_Elems* elems, 241 | uint32_t num_elems, 242 | uint32_t message_size, 243 | uint32_t total_packets, 244 | uint32_t total_signatures, 245 | const uint32_t* message_lens, 246 | const uint32_t* public_key_offsets, 247 | const uint32_t* signature_offsets, 248 | const uint32_t* message_start_offsets, 249 | uint8_t* out, 250 | uint8_t use_non_default_stream) 251 | { 252 | LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n", 253 | num_elems, total_signatures, total_packets, message_size); 254 | 255 | size_t out_size = total_signatures * sizeof(uint8_t); 256 | 257 | uint32_t total_packets_size = total_packets * message_size; 258 | 259 | if (0 == total_packets) { 260 | return; 261 | } 262 | 263 | // Device allocate 264 | 265 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 266 | 267 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 268 | 269 | cudaStream_t stream = 0; 270 | if (0 != use_non_default_stream) { 271 | stream = gpu_ctx->stream; 272 | } 273 | 274 | setup_gpu_ctx(cur_ctx, 275 | elems, 276 | num_elems, 277 | message_size, 278 | total_packets, 279 | total_packets_size, 280 | total_signatures, 281 | message_lens, 282 | public_key_offsets, 283 | signature_offsets, 284 | message_start_offsets, 285 | out_size, 286 | stream 287 | ); 288 | 289 | int num_threads_per_block = 64; 290 | int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block); 291 | LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p stream: %p\n", 292 | num_blocks, num_threads_per_block, (int)total_packets, out, gpu_ctx->stream); 293 | 294 | perftime_t start, end; 295 | get_time(&start); 296 | ed25519_verify_kernel<<>> 297 | (cur_ctx->packets, 298 | message_size, 299 | cur_ctx->message_lens, 300 | cur_ctx->public_key_offsets, 301 | cur_ctx->signature_offsets, 302 | cur_ctx->message_start_offsets, 303 | cur_ctx->offsets_len, 304 | cur_ctx->out, 305 | cur_ctx->h 306 | ); 307 | CUDA_CHK(cudaPeekAtLastError()); 308 | 309 | ed25519_gen_lookup_kernel<<>> 310 | (cur_ctx->packets, 311 | cur_ctx->public_key_offsets, 312 | cur_ctx->Ai, 313 | cur_ctx->offsets_len, 314 | cur_ctx->out 315 | ); 316 | CUDA_CHK(cudaPeekAtLastError()); 317 | 318 | ed25519_scalar_double_kernel<<>> 319 | (cur_ctx->packets, 320 | cur_ctx->signature_offsets, 321 | cur_ctx->out, 322 | cur_ctx->Ai, 323 | cur_ctx->offsets_len, 324 | cur_ctx->h); 325 | CUDA_CHK(cudaPeekAtLastError()); 326 | 327 | cudaError_t err = cudaMemcpyAsync(out, cur_ctx->out, out_size, cudaMemcpyDeviceToHost, stream); 328 | if (err != cudaSuccess) { 329 | fprintf(stderr, "verify: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n", 330 | out, cur_ctx->out, out_size, num_elems, elems); 331 | } 332 | CUDA_CHK(err); 333 | 334 | CUDA_CHK(cudaStreamSynchronize(stream)); 335 | 336 | release_gpu_ctx(gpu_ctx); 337 | 338 | get_time(&end); 339 | LOG("time diff: %f\n", get_diff(&start, &end)); 340 | } 341 | 342 | // Ensure copyright and license notice is embedded in the binary 343 | const char* ed25519_license() { 344 | return "Copyright (c) 2018 Solana Labs, Inc. " 345 | "Licensed under the Apache License, Version 2.0 " 346 | ""; 347 | } 348 | 349 | int cuda_host_register(void* ptr, size_t size, unsigned int flags) { 350 | return cudaHostRegister(ptr, size, flags); 351 | } 352 | 353 | int cuda_host_unregister(void* ptr) { 354 | return cudaHostUnregister(ptr); 355 | } 356 | -------------------------------------------------------------------------------- /src/cuda-poh-verify/poh_verify.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "gpu_common.h" 5 | #include "sha256.cu" 6 | 7 | #define MAX_NUM_GPUS 8 8 | #define MAX_QUEUE_SIZE 8 9 | #define NUM_THREADS_PER_BLOCK 64 10 | 11 | 12 | __global__ void poh_verify_kernel(uint8_t* hashes, uint64_t* num_hashes_arr, size_t num_elems) { 13 | size_t idx = (size_t)(blockIdx.x * blockDim.x + threadIdx.x); 14 | if (idx >= num_elems) return; 15 | 16 | uint8_t hash[SHA256_BLOCK_SIZE]; 17 | 18 | memcpy(hash, &hashes[idx * SHA256_BLOCK_SIZE], SHA256_BLOCK_SIZE); 19 | 20 | for (size_t i = 0; i < num_hashes_arr[idx]; i++) { 21 | hash_state sha_state; 22 | sha256_init(&sha_state); 23 | sha256_process(&sha_state, hash, SHA256_BLOCK_SIZE); 24 | sha256_done(&sha_state, hash); 25 | } 26 | memcpy(&hashes[idx * SHA256_BLOCK_SIZE], hash, SHA256_BLOCK_SIZE); 27 | } 28 | 29 | typedef struct { 30 | uint8_t* hashes; 31 | uint64_t* num_hashes_arr; 32 | size_t num_elems_alloc; 33 | pthread_mutex_t mutex; 34 | cudaStream_t stream; 35 | } gpu_ctx; 36 | 37 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 38 | 39 | static gpu_ctx g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 40 | static uint32_t g_cur_gpu = 0; 41 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 42 | static int32_t g_total_gpus = -1; 43 | 44 | static bool poh_init_locked() { 45 | if (g_total_gpus == -1) { 46 | cudaGetDeviceCount(&g_total_gpus); 47 | g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus); 48 | LOG("total_gpus: %d\n", g_total_gpus); 49 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 50 | CUDA_CHK(cudaSetDevice(gpu)); 51 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 52 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 53 | if (err != 0) { 54 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 55 | err, gpu, queue); 56 | g_total_gpus = 0; 57 | return false; 58 | } 59 | CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream)); 60 | } 61 | } 62 | } 63 | return g_total_gpus > 0; 64 | } 65 | 66 | bool poh_init() { 67 | cudaFree(0); 68 | pthread_mutex_lock(&g_ctx_mutex); 69 | bool success = poh_init_locked(); 70 | pthread_mutex_unlock(&g_ctx_mutex); 71 | return success; 72 | } 73 | 74 | extern "C" { 75 | 76 | void poh_verify_many_set_verbose(bool val) { 77 | g_verbose = val; 78 | } 79 | 80 | int poh_verify_many(uint8_t* hashes, 81 | const uint64_t* num_hashes_arr, 82 | size_t num_elems, 83 | uint8_t use_non_default_stream) 84 | { 85 | LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems); 86 | 87 | if (num_elems == 0) return 0; 88 | 89 | int32_t cur_gpu, cur_queue; 90 | 91 | pthread_mutex_lock(&g_ctx_mutex); 92 | if (!poh_init_locked()) { 93 | pthread_mutex_unlock(&g_ctx_mutex); 94 | LOG("No GPUs, exiting...\n"); 95 | return 1; 96 | } 97 | cur_gpu = g_cur_gpu; 98 | g_cur_gpu++; 99 | g_cur_gpu %= g_total_gpus; 100 | cur_queue = g_cur_queue[cur_gpu]; 101 | g_cur_queue[cur_gpu]++; 102 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 103 | pthread_mutex_unlock(&g_ctx_mutex); 104 | 105 | gpu_ctx* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 106 | pthread_mutex_lock(&cur_ctx->mutex); 107 | 108 | CUDA_CHK(cudaSetDevice(cur_gpu)); 109 | 110 | LOG("cur gpu: %d cur queue: %d\n", cur_gpu, cur_queue); 111 | 112 | size_t hashes_size = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t); 113 | size_t num_hashes_size = num_elems * sizeof(uint64_t); 114 | 115 | // Ensure there is enough memory allocated 116 | if (cur_ctx->hashes == NULL || cur_ctx->num_elems_alloc < num_elems) { 117 | CUDA_CHK(cudaFree(cur_ctx->hashes)); 118 | CUDA_CHK(cudaMalloc(&cur_ctx->hashes, hashes_size)); 119 | CUDA_CHK(cudaFree(cur_ctx->num_hashes_arr)); 120 | CUDA_CHK(cudaMalloc(&cur_ctx->num_hashes_arr, num_hashes_size)); 121 | 122 | cur_ctx->num_elems_alloc = num_elems; 123 | } 124 | 125 | cudaStream_t stream = 0; 126 | if (0 != use_non_default_stream) { 127 | stream = cur_ctx->stream; 128 | } 129 | 130 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->hashes, hashes, hashes_size, cudaMemcpyHostToDevice, stream)); 131 | CUDA_CHK(cudaMemcpyAsync(cur_ctx->num_hashes_arr, num_hashes_arr, num_hashes_size, cudaMemcpyHostToDevice, stream)); 132 | 133 | int num_blocks = ROUND_UP_DIV(num_elems, NUM_THREADS_PER_BLOCK); 134 | 135 | poh_verify_kernel<<>>(cur_ctx->hashes, cur_ctx->num_hashes_arr, num_elems); 136 | CUDA_CHK(cudaPeekAtLastError()); 137 | 138 | CUDA_CHK(cudaMemcpyAsync(hashes, cur_ctx->hashes, hashes_size, cudaMemcpyDeviceToHost, stream)); 139 | CUDA_CHK(cudaStreamSynchronize(stream)); 140 | 141 | pthread_mutex_unlock(&cur_ctx->mutex); 142 | 143 | return 0; 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/gpu-common.mk: -------------------------------------------------------------------------------- 1 | NVCC:=nvcc 2 | #GPU_PTX_ARCH:=compute_35 3 | #GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70 4 | GPU_PTX_ARCH:=compute_60 5 | GPU_ARCHS?=sm_61,sm_70,sm_75,sm_80,sm_86 6 | HOST_CFLAGS:=-Wall -Werror -fPIC -Wno-strict-aliasing 7 | GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH) 8 | 9 | # enable for profiling 10 | #GPU_CFLAGS+=-lineinfo 11 | 12 | # enable to see kernel register stats 13 | #GPU_CFLAGS+=--ptxas-options=-v 14 | 15 | CFLAGS_release:=-Icommon $(GPU_CFLAGS) -O3 -Xcompiler "$(HOST_CFLAGS)" 16 | CFLAGS_debug:=$(CFLAGS_release) -g 17 | CFLAGS:=$(CFLAGS_$V) 18 | -------------------------------------------------------------------------------- /src/jerasure-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jerasure-sys" 3 | description = "Rust bindings for jerasure 2.0" 4 | version = "0.1.0" 5 | homepage = "https://solana.com/" 6 | readme = "../jerasure/README" 7 | repository = "https://github.com/solana-labs/solana-perf-libs" 8 | authors = ["Solana Maintainers "] 9 | license = "../jerasure/COPYING" 10 | links = "Jerasure" 11 | build = "build.rs" 12 | 13 | [build-dependencies] 14 | cc = "1.0" 15 | -------------------------------------------------------------------------------- /src/jerasure-sys/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cc; 2 | 3 | fn main() { 4 | cc::Build::new() 5 | .files(&[ 6 | "jerasure/src/galois.c", 7 | "jerasure/src/jerasure.c", 8 | "jerasure/src/reed_sol.c", 9 | "jerasure/src/cauchy.c", 10 | "jerasure/src/liberation.c", 11 | ]) 12 | .include("jerasure/include") 13 | .include("gf-complete/include") 14 | .compile("Jerasure"); 15 | println!("cargo:rustc-link-lib=static=Jerasure"); 16 | } 17 | -------------------------------------------------------------------------------- /src/jerasure-sys/gf-complete: -------------------------------------------------------------------------------- 1 | ../gf-complete/ -------------------------------------------------------------------------------- /src/jerasure-sys/jerasure: -------------------------------------------------------------------------------- 1 | ../jerasure -------------------------------------------------------------------------------- /src/opencl-ecc-ed25519/gpu_ctx.cpp: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "gpu_ctx.h" 3 | #include 4 | #include "gpu_common.h" 5 | 6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 7 | 8 | #define MAX_NUM_GPUS 1 9 | #define MAX_QUEUE_SIZE 1 10 | 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 12 | static uint32_t g_cur_gpu = 0; 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 14 | static int32_t g_total_gpus = -1; 15 | 16 | static bool cl_crypt_init_locked() { 17 | if (g_total_gpus == -1) { 18 | g_total_gpus = MAX_NUM_GPUS; 19 | LOG("total_gpus: %d\n", g_total_gpus); 20 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 21 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 22 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 23 | if (err != 0) { 24 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 25 | err, gpu, queue); 26 | g_total_gpus = 0; 27 | return false; 28 | } 29 | } 30 | } 31 | } 32 | return g_total_gpus > 0; 33 | } 34 | 35 | bool ed25519_init() { 36 | pthread_mutex_lock(&g_ctx_mutex); 37 | bool success = cl_crypt_init_locked(); 38 | pthread_mutex_unlock(&g_ctx_mutex); 39 | return success; 40 | } 41 | 42 | gpu_ctx_t* get_gpu_ctx() { 43 | int32_t cur_gpu, cur_queue; 44 | 45 | LOG("locking global mutex\n"); 46 | pthread_mutex_lock(&g_ctx_mutex); 47 | if (!cl_crypt_init_locked()) { 48 | pthread_mutex_unlock(&g_ctx_mutex); 49 | LOG("No GPUs, exiting...\n"); 50 | return NULL; 51 | } 52 | cur_gpu = g_cur_gpu; 53 | g_cur_gpu++; 54 | g_cur_gpu %= g_total_gpus; 55 | cur_queue = g_cur_queue[cur_gpu]; 56 | g_cur_queue[cur_gpu]++; 57 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 58 | pthread_mutex_unlock(&g_ctx_mutex); 59 | 60 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 61 | LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu); 62 | pthread_mutex_lock(&cur_ctx->mutex); 63 | 64 | LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue); 65 | 66 | return cur_ctx; 67 | } 68 | 69 | void setup_gpu_ctx(verify_ctx_t* cur_ctx, 70 | const gpu_Elems* elems, 71 | uint32_t num_elems, 72 | uint32_t message_size, 73 | uint32_t total_packets, 74 | uint32_t total_packets_size, 75 | uint32_t total_signatures, 76 | const uint32_t* message_lens, 77 | const uint32_t* public_key_offsets, 78 | const uint32_t* signature_offsets, 79 | const uint32_t* message_start_offsets, 80 | size_t out_size 81 | ) { 82 | int ret; 83 | size_t offsets_size = total_signatures * sizeof(uint32_t); 84 | 85 | LOG("device allocate. packets: %d out: %d offsets_size: %zu\n", 86 | total_packets_size, (int)out_size, offsets_size); 87 | 88 | if (cur_ctx->packets == NULL || 89 | total_packets_size > cur_ctx->packets_size_bytes) { 90 | clReleaseMemObject(cur_ctx->packets); 91 | cur_ctx->packets = clCreateBuffer(context, CL_MEM_READ_WRITE, total_packets_size, NULL, &ret); 92 | CL_ERR( ret ); 93 | 94 | cur_ctx->packets_size_bytes = total_packets_size; 95 | } 96 | 97 | if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) { 98 | clReleaseMemObject(cur_ctx->out); 99 | cur_ctx->out = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &ret); 100 | CL_ERR( ret ); 101 | 102 | cur_ctx->out_size_bytes = total_signatures; 103 | } 104 | 105 | if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) { 106 | 107 | clReleaseMemObject(cur_ctx->public_key_offsets); 108 | cur_ctx->public_key_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret); 109 | CL_ERR( ret ); 110 | 111 | clReleaseMemObject(cur_ctx->signature_offsets); 112 | cur_ctx->signature_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret); 113 | CL_ERR( ret ); 114 | 115 | clReleaseMemObject(cur_ctx->message_start_offsets); 116 | cur_ctx->message_start_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret); 117 | CL_ERR( ret ); 118 | 119 | clReleaseMemObject(cur_ctx->message_lens); 120 | cur_ctx->message_lens = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret); 121 | CL_ERR( ret ); 122 | 123 | cur_ctx->offsets_len = total_signatures; 124 | } 125 | 126 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->public_key_offsets, CL_TRUE, 0, offsets_size, public_key_offsets, 0, NULL, NULL)); 127 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->signature_offsets, CL_TRUE, 0, offsets_size, signature_offsets, 0, NULL, NULL)); 128 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_start_offsets, CL_TRUE, 0, offsets_size, message_start_offsets, 0, NULL, NULL)); 129 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_lens, CL_TRUE, 0, offsets_size, message_lens, 0, NULL, NULL)); 130 | 131 | size_t cur = 0; 132 | for (size_t i = 0; i < num_elems; i++) { 133 | LOG("i: %zu size: %d\n", i, elems[i].num * message_size); 134 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->packets, CL_TRUE, cur * message_size, elems[i].num * message_size, elems[i].elems, 0, NULL, NULL)); 135 | cur += elems[i].num; 136 | } 137 | } 138 | 139 | 140 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) { 141 | pthread_mutex_unlock(&cur_ctx->mutex); 142 | } 143 | 144 | void ed25519_free_gpu_mem() { 145 | for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) { 146 | for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 147 | verify_ctx_t* verify_ctx = &g_gpu_ctx[gpu][queue].verify_ctx; 148 | 149 | CL_ERR(clReleaseMemObject(verify_ctx->packets)); 150 | CL_ERR(clReleaseMemObject(verify_ctx->out)); 151 | CL_ERR(clReleaseMemObject(verify_ctx->message_lens)); 152 | CL_ERR(clReleaseMemObject(verify_ctx->public_key_offsets)); 153 | CL_ERR(clReleaseMemObject(verify_ctx->signature_offsets)); 154 | CL_ERR(clReleaseMemObject(verify_ctx->message_start_offsets)); 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/opencl-ecc-ed25519/gpu_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_CTX_H 2 | #define GPU_CTX_H 3 | 4 | #include "cl_common.h" 5 | 6 | #include 7 | #include "ed25519.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | typedef struct { 14 | cl_mem packets; 15 | uint32_t packets_size_bytes; 16 | 17 | cl_mem out; 18 | size_t out_size_bytes; 19 | 20 | cl_mem public_key_offsets; 21 | cl_mem message_start_offsets; 22 | cl_mem signature_offsets; 23 | cl_mem message_lens; 24 | size_t offsets_len; 25 | 26 | size_t num; 27 | size_t num_signatures; 28 | uint32_t total_packets_len; 29 | } verify_ctx_t; 30 | 31 | typedef struct { 32 | verify_ctx_t verify_ctx; 33 | 34 | pthread_mutex_t mutex; 35 | } gpu_ctx_t; 36 | 37 | extern gpu_ctx_t* get_gpu_ctx(); 38 | extern void release_gpu_ctx(gpu_ctx_t*); 39 | 40 | extern void ed25519_free_gpu_mem(); 41 | 42 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx, 43 | const gpu_Elems* elems, 44 | uint32_t num_elems, 45 | uint32_t message_size, 46 | uint32_t total_packets, 47 | uint32_t total_packets_size, 48 | uint32_t total_signatures, 49 | const uint32_t* message_lens, 50 | const uint32_t* public_key_offsets, 51 | const uint32_t* signature_offsets, 52 | const uint32_t* message_start_offsets, 53 | size_t out_size 54 | ); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /src/opencl-ecc-ed25519/sign.cpp: -------------------------------------------------------------------------------- 1 | #include "cl_common.h" 2 | 3 | #include "ed25519.h" 4 | #include "sha512.h" 5 | #include "ge.h" 6 | #include "sc.h" 7 | #include "gpu_common.h" 8 | #include "gpu_ctx.h" 9 | 10 | static void 11 | ed25519_sign_device(unsigned char *signature, 12 | const unsigned char *message, 13 | size_t message_len, 14 | const unsigned char *public_key, 15 | const unsigned char *private_key) { 16 | sha512_context hash; 17 | unsigned char hram[64]; 18 | unsigned char r[64]; 19 | ge_p3 R; 20 | 21 | 22 | sha512_init(&hash); 23 | sha512_update(&hash, private_key + 32, 32); 24 | sha512_update(&hash, message, message_len); 25 | sha512_final(&hash, r); 26 | 27 | sc_reduce(r); 28 | ge_scalarmult_base(&R, r); 29 | ge_p3_tobytes(signature, &R); 30 | 31 | sha512_init(&hash); 32 | sha512_update(&hash, signature, 32); 33 | sha512_update(&hash, public_key, 32); 34 | sha512_update(&hash, message, message_len); 35 | sha512_final(&hash, hram); 36 | 37 | sc_reduce(hram); 38 | sc_muladd(signature + 32, hram, private_key, r); 39 | } 40 | 41 | void ed25519_sign(unsigned char *signature, 42 | const unsigned char *message, 43 | size_t message_len, 44 | const unsigned char *public_key, 45 | const unsigned char *private_key) { 46 | ed25519_sign_device(signature, message, message_len, public_key, private_key); 47 | } 48 | 49 | void ed25519_sign_many(const gpu_Elems* elems, 50 | uint32_t num_elems, 51 | uint32_t message_size, 52 | uint32_t total_packets, 53 | uint32_t total_signatures, 54 | const uint32_t* message_lens, 55 | const uint32_t* public_key_offsets, 56 | const uint32_t* private_key_offsets, 57 | const uint32_t* message_start_offsets, 58 | uint8_t* signatures_out, 59 | uint8_t use_non_default_stream 60 | ) { 61 | DIE(cl_check_init() == false, "OpenCL could not be init"); 62 | 63 | cl_int ret; 64 | 65 | size_t sig_out_size = SIG_SIZE * total_signatures; 66 | 67 | if (0 == total_packets) { 68 | return; 69 | } 70 | 71 | uint32_t total_packets_size = total_packets * message_size; 72 | 73 | LOG("signing %d packets sig_size: %zu message_size: %d\n", 74 | total_packets, sig_out_size, message_size); 75 | 76 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 77 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 78 | 79 | setup_gpu_ctx(cur_ctx, 80 | elems, 81 | num_elems, 82 | message_size, 83 | total_packets, 84 | total_packets_size, 85 | total_signatures, 86 | message_lens, 87 | public_key_offsets, 88 | private_key_offsets, 89 | message_start_offsets, 90 | sig_out_size 91 | ); 92 | 93 | size_t num_threads_per_block = 64; 94 | size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block; 95 | LOG("signing blocks: %zu threads_per_block: %zu\n", num_blocks, num_threads_per_block); 96 | 97 | /* 98 | __kernel void ed25519_sign_kernel(__global unsigned char* packets, 99 | uint32_t message_size, 100 | __global uint32_t* public_key_offsets, 101 | __global uint32_t* private_key_offsets, 102 | __global uint32_t* message_start_offsets, 103 | __global uint32_t* message_lens, 104 | uint32_t num_transactions, 105 | __global uint8_t* out) 106 | */ 107 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) ); 108 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 1, sizeof(cl_uint), (void *)&message_size) ); 109 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 2, sizeof(cl_mem), (void *)&cur_ctx->public_key_offsets) ); 110 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 3, sizeof(cl_mem), (void *)&cur_ctx->signature_offsets) ); 111 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 4, sizeof(cl_mem), (void *)&cur_ctx->message_start_offsets) ); 112 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 5, sizeof(cl_mem), (void *)&cur_ctx->message_lens) ); 113 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 6, sizeof(cl_uint), (void *)&total_signatures) ); 114 | CL_ERR( clSetKernelArg(ed25519_sign_kernel, 7, sizeof(cl_mem), (void *)&cur_ctx->out) ); 115 | 116 | size_t globalSize[2] = {num_blocks * num_threads_per_block, 0}; 117 | size_t localSize[2] = {num_threads_per_block, 0}; 118 | 119 | ret = clEnqueueNDRangeKernel(cmd_queue, ed25519_sign_kernel, 1, NULL, 120 | globalSize, localSize, 0, NULL, NULL); 121 | CL_ERR( ret ); 122 | 123 | CL_ERR( clEnqueueReadBuffer(cmd_queue, cur_ctx->out, CL_TRUE, 0, sig_out_size, signatures_out, 0, NULL, NULL)); 124 | 125 | release_gpu_ctx(gpu_ctx); 126 | } 127 | -------------------------------------------------------------------------------- /src/opencl-ecc-ed25519/verify.cpp: -------------------------------------------------------------------------------- 1 | #include "cl_common.h" 2 | 3 | #include "sha512.h" 4 | #include 5 | #include 6 | #include "ge.cu" 7 | #include "sc.cu" 8 | #include "fe.cu" 9 | #include "sha512.cu" 10 | 11 | #include "ed25519.h" 12 | #include 13 | 14 | #include "gpu_common.h" 15 | #include "gpu_ctx.h" 16 | 17 | #define USE_CLOCK_GETTIME 18 | #include "perftime.h" 19 | 20 | static int consttime_equal(const unsigned char *x, const unsigned char *y) { 21 | unsigned char r = 0; 22 | 23 | r = x[0] ^ y[0]; 24 | #define F(i) r |= x[i] ^ y[i] 25 | F(1); 26 | F(2); 27 | F(3); 28 | F(4); 29 | F(5); 30 | F(6); 31 | F(7); 32 | F(8); 33 | F(9); 34 | F(10); 35 | F(11); 36 | F(12); 37 | F(13); 38 | F(14); 39 | F(15); 40 | F(16); 41 | F(17); 42 | F(18); 43 | F(19); 44 | F(20); 45 | F(21); 46 | F(22); 47 | F(23); 48 | F(24); 49 | F(25); 50 | F(26); 51 | F(27); 52 | F(28); 53 | F(29); 54 | F(30); 55 | F(31); 56 | #undef F 57 | 58 | return !r; 59 | } 60 | 61 | static int ed25519_verify_device(const unsigned char *signature, 62 | const unsigned char *message, 63 | uint32_t message_len, 64 | const unsigned char *public_key) { 65 | unsigned char h[64]; 66 | unsigned char checker[32]; 67 | sha512_context hash; 68 | ge_p2 R; 69 | 70 | if (signature[63] & 224) { 71 | return 0; 72 | } 73 | 74 | sha512_init(&hash); 75 | sha512_update(&hash, signature, 32); 76 | sha512_update(&hash, public_key, 32); 77 | sha512_update(&hash, message, message_len); 78 | sha512_final(&hash, h); 79 | 80 | sc_reduce(h); 81 | ge_cached Ai[8]; 82 | if (0 != ge_gen_lookup(public_key, Ai)) { 83 | return 0; 84 | } 85 | ge_double_scalarmult_vartime(&R, h, Ai, signature + 32); 86 | ge_tobytes(checker, &R); 87 | 88 | if (!consttime_equal(checker, signature)) { 89 | return 0; 90 | } 91 | 92 | return 1; 93 | } 94 | 95 | int ed25519_verify(const unsigned char *signature, 96 | const unsigned char *message, 97 | uint32_t message_len, 98 | const unsigned char *public_key) { 99 | return ed25519_verify_device(signature, message, message_len, public_key); 100 | } 101 | 102 | bool g_verbose = true; 103 | 104 | void ed25519_set_verbose(bool val) { 105 | g_verbose = val; 106 | } 107 | 108 | void ed25519_verify_many(const gpu_Elems* elems, 109 | uint32_t num_elems, 110 | uint32_t message_size, 111 | uint32_t total_packets, 112 | uint32_t total_signatures, 113 | const uint32_t* message_lens, 114 | const uint32_t* public_key_offsets, 115 | const uint32_t* signature_offsets, 116 | const uint32_t* message_start_offsets, 117 | uint8_t* out, 118 | uint8_t use_non_default_stream) 119 | { 120 | DIE(cl_check_init() == false, "OpenCL could not be init"); 121 | 122 | cl_int ret; 123 | 124 | LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n", 125 | num_elems, total_signatures, total_packets, message_size); 126 | 127 | size_t out_size = total_signatures * sizeof(uint8_t); 128 | 129 | uint32_t total_packets_size = total_packets * message_size; 130 | 131 | if (0 == total_packets) { 132 | return; 133 | } 134 | 135 | // Device allocate 136 | 137 | gpu_ctx_t* gpu_ctx = get_gpu_ctx(); 138 | 139 | verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx; 140 | 141 | setup_gpu_ctx(cur_ctx, 142 | elems, 143 | num_elems, 144 | message_size, 145 | total_packets, 146 | total_packets_size, 147 | total_signatures, 148 | message_lens, 149 | public_key_offsets, 150 | signature_offsets, 151 | message_start_offsets, 152 | out_size 153 | ); 154 | 155 | size_t num_threads_per_block = 64; 156 | size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block; 157 | LOG("num_blocks: %zu threads_per_block: %zu keys: %d out: %p\n", 158 | num_blocks, num_threads_per_block, (int)total_packets, out); 159 | 160 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) ); 161 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 1, sizeof(cl_uint), (void *)&message_size) ); 162 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 2, sizeof(cl_mem), (void *)&cur_ctx->message_lens) ); 163 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 3, sizeof(cl_mem), (void *)&cur_ctx->public_key_offsets) ); 164 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 4, sizeof(cl_mem), (void *)&cur_ctx->signature_offsets) ); 165 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 5, sizeof(cl_mem), (void *)&cur_ctx->message_start_offsets) ); 166 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 6, sizeof(cl_uint), (void *)&cur_ctx->offsets_len) ); 167 | CL_ERR( clSetKernelArg(ed25519_verify_kernel, 7, sizeof(cl_mem), (void *)&cur_ctx->out) ); 168 | 169 | perftime_t start, end; 170 | get_time(&start); 171 | 172 | size_t globalSize[2] = {num_blocks * num_threads_per_block, 0}; 173 | size_t localSize[2] = {num_threads_per_block, 0}; 174 | ret = clEnqueueNDRangeKernel(cmd_queue, ed25519_verify_kernel, 1, NULL, 175 | globalSize, localSize, 0, NULL, NULL); 176 | CL_ERR( ret ); 177 | 178 | CL_ERR( clEnqueueReadBuffer(cmd_queue, cur_ctx->out, CL_TRUE, 0, out_size, out, 0, NULL, NULL)); 179 | 180 | release_gpu_ctx(gpu_ctx); 181 | 182 | get_time(&end); 183 | LOG("time diff: %f\n", get_diff(&start, &end)); 184 | } 185 | 186 | // Ensure copyright and license notice is embedded in the binary 187 | const char* ed25519_license() { 188 | return "Copyright (c) 2018 Solana Labs, Inc. " 189 | "Licensed under the Apache License, Version 2.0 " 190 | ""; 191 | } 192 | 193 | // Supported by the cuda lib, so stub them here. 194 | int cuda_host_register(void* ptr, size_t size, unsigned int flags) 195 | { 196 | return 0; 197 | } 198 | 199 | int cuda_host_unregister(void* ptr) 200 | { 201 | return 0; 202 | } 203 | 204 | static int 205 | get_checked_scalar(unsigned char* scalar, const unsigned char* signature) { 206 | // Check if top 4-bits are clear 207 | // then scalar is reduced. 208 | // if ((signature[31] & 0xf0) == 0) { 209 | // for (int i = 0; i < 32; i++) { 210 | // scalar[i] = signature[i]; 211 | // } 212 | // return 0; 213 | // } 214 | 215 | // if ((signature[31] >> 7) != 0) { 216 | // return 1; 217 | // } 218 | 219 | // scalar32_reduce(scalar); 220 | // if (!consttime_equal(scalar, signature)) { 221 | // return 1; 222 | // } 223 | fprintf(stderr, "get_checked_scalar not implemented.\n"); 224 | exit(1); 225 | return 0; 226 | 227 | } 228 | 229 | int ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar) { 230 | return get_checked_scalar(out_scalar, in_scalar); 231 | } 232 | 233 | // Return 0=success if ge unpacks and is not small order 234 | static int 235 | check_packed_ge_small_order(const unsigned char* packed_group_element) { 236 | // ge_p3 signature_R; 237 | 238 | // fail if ge does not unpack 239 | // if (0 != ge_frombytes_negate_vartime(&signature_R, packed_group_element)) { 240 | // return 1; 241 | // } 242 | 243 | // // fail if ge is small order 244 | // if (0 != ge_is_small_order(&signature_R)) { 245 | // return 1; 246 | // } 247 | fprintf(stderr, "check_packed_ge_small_order not implemented.\n"); 248 | exit(1); 249 | return 0; 250 | } 251 | 252 | int ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element) { 253 | return check_packed_ge_small_order(packed_group_element); 254 | } 255 | -------------------------------------------------------------------------------- /src/opencl-platform/cl_common.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | #ifndef GPU_COMMON_H 8 | #define GPU_COMMON_H 9 | 10 | // override CUDA directives 11 | #ifndef __device__ 12 | #define __device__ 13 | #endif 14 | 15 | #ifndef __host__ 16 | #define __host__ 17 | #endif 18 | 19 | /* this is to silent warnings about opencl version (without this Werror flag will prevent compiling)*/ 20 | #ifndef CL_TARGET_OPENCL_VERSION 21 | #define CL_TARGET_OPENCL_VERSION 120 22 | #endif 23 | 24 | extern bool g_verbose; 25 | 26 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); } 27 | 28 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y)) 29 | 30 | #ifndef UINT64_C 31 | #define UINT64_C uint64_t 32 | #endif 33 | 34 | /************************************ 35 | * OpenCL compile path 36 | *************************************/ 37 | 38 | #if __APPLE__ 39 | #include 40 | #else 41 | #include 42 | #endif 43 | 44 | // runs at the start of any OpenCL entry point crypto function 45 | bool cl_check_init(cl_uint sel_device_type); 46 | bool cl_check_init(void); 47 | 48 | // do only 1 init, kernel compilation etc 49 | extern bool cl_is_init; 50 | 51 | extern cl_context context; 52 | extern cl_command_queue cmd_queue; 53 | extern cl_program program; 54 | 55 | extern cl_kernel init_sha256_state_kernel; 56 | extern cl_kernel end_sha256_state_kernel; 57 | 58 | extern cl_kernel ed25519_sign_kernel; 59 | extern cl_kernel ed25519_verify_kernel; 60 | extern cl_kernel poh_verify_kernel; 61 | 62 | // override any CUDA function qualifiers 63 | #define __host__ 64 | #define __device__ 65 | #define __global__ 66 | 67 | #include 68 | 69 | using namespace std; 70 | 71 | // OpenCL utilities 72 | #define CL_ERR(cl_ret) if(cl_ret != CL_SUCCESS){ cout << endl << cl_get_string_err(cl_ret) << " file " << __FILE__ << "@" << __LINE__ << endl; } 73 | 74 | int CL_COMPILE_ERR(int cl_ret, 75 | cl_program program, 76 | cl_device_id device); 77 | 78 | const char* cl_get_string_err(cl_int err); 79 | void cl_get_compiler_err_log(cl_program program, 80 | cl_device_id device); 81 | 82 | void read_kernel(string file_name, string &str_kernel); 83 | 84 | #define DIE(assertion, call_description) \ 85 | do { \ 86 | if (assertion) { \ 87 | fprintf(stderr, "(%d): ", \ 88 | __LINE__); \ 89 | perror(call_description); \ 90 | exit(EXIT_FAILURE); \ 91 | } \ 92 | } while(0); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/opencl-poh-verify/cl_poh_verify.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../opencl-platform/cl_common.h" 9 | #if __APPLE__ 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | #include "perftime.h" 16 | 17 | #define MAX_NUM_GPUS 1 18 | #define MAX_QUEUE_SIZE 8 19 | 20 | #define SHA256_BLOCK_SIZE 32 21 | 22 | typedef struct { 23 | cl_mem in_out_hashes; 24 | cl_mem in_num_hashes_arr; 25 | size_t in_num_elems; 26 | 27 | pthread_mutex_t mutex; 28 | } gpu_ctx_t; 29 | 30 | 31 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0}; 32 | static uint32_t g_cur_gpu = 0; 33 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0}; 34 | static int32_t g_total_gpus = -1; 35 | 36 | extern bool cl_check_init(); 37 | static pthread_mutex_t clg_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; 38 | static bool initialized = false; 39 | 40 | static bool cl_crypt_init_locked() { 41 | if (g_total_gpus == -1) { 42 | g_total_gpus = MAX_NUM_GPUS; 43 | LOG("total_gpus: %d\n", g_total_gpus); 44 | for (int gpu = 0; gpu < g_total_gpus; gpu++) { 45 | for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) { 46 | int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL); 47 | if (err != 0) { 48 | fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n", 49 | err, gpu, queue); 50 | g_total_gpus = 0; 51 | return false; 52 | } 53 | } 54 | } 55 | } 56 | return g_total_gpus > 0; 57 | } 58 | 59 | gpu_ctx_t* get_gpu_ctx() { 60 | int32_t cur_gpu, cur_queue; 61 | 62 | LOG("getting gpu_ctx\n"); 63 | 64 | cur_gpu = g_cur_gpu; 65 | g_cur_gpu++; 66 | g_cur_gpu %= g_total_gpus; 67 | cur_queue = g_cur_queue[cur_gpu]; 68 | g_cur_queue[cur_gpu]++; 69 | g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE; 70 | 71 | gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue]; 72 | LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu); 73 | pthread_mutex_lock(&cur_ctx->mutex); 74 | 75 | LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue); 76 | 77 | return cur_ctx; 78 | } 79 | 80 | void setup_gpu_ctx(gpu_ctx_t *cur_ctx, 81 | uint8_t* hashes, 82 | const uint64_t* num_hashes_arr, 83 | size_t num_elems, 84 | size_t nr_bytes_hashes, 85 | size_t nr_bytes_num_hashes_arr 86 | ) { 87 | int ret; 88 | 89 | LOG("device allocate. num hashes: %lu sizes in MB: hashes: %f num_hashes_arr: %f\n", 90 | num_elems, (double)nr_bytes_hashes/(1024*1024), (double)nr_bytes_num_hashes_arr/(1024*1024)); 91 | 92 | if (cur_ctx->in_out_hashes == NULL || cur_ctx->in_num_elems < num_elems) { 93 | clReleaseMemObject(cur_ctx->in_out_hashes); 94 | cur_ctx->in_out_hashes = clCreateBuffer(context, CL_MEM_READ_WRITE, nr_bytes_hashes, NULL, &ret); 95 | CL_ERR( ret ); 96 | } 97 | 98 | if (cur_ctx->in_num_hashes_arr == NULL || cur_ctx->in_num_elems < num_elems) { 99 | clReleaseMemObject(cur_ctx->in_num_hashes_arr); 100 | cur_ctx->in_num_hashes_arr = clCreateBuffer(context, CL_MEM_READ_ONLY, nr_bytes_num_hashes_arr, NULL, &ret); 101 | CL_ERR( ret ); 102 | cur_ctx->in_num_elems = num_elems; 103 | } 104 | 105 | 106 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->in_out_hashes, CL_TRUE, 0, nr_bytes_hashes, hashes, 0, NULL, NULL)); 107 | CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->in_num_hashes_arr, CL_TRUE, 0, nr_bytes_num_hashes_arr, num_hashes_arr, 0, NULL, NULL)); 108 | } 109 | 110 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) { 111 | pthread_mutex_unlock(&cur_ctx->mutex); 112 | } 113 | 114 | extern "C" { 115 | 116 | void poh_verify_many_set_verbose(bool val) { 117 | g_verbose = val; 118 | } 119 | 120 | int poh_verify_many(uint8_t* hashes, 121 | const uint64_t* num_hashes_arr, 122 | size_t num_elems, 123 | uint8_t use_non_default_stream) 124 | { 125 | LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems); 126 | 127 | if (num_elems == 0) return 0; 128 | 129 | pthread_mutex_lock(&clg_ctx_mutex); 130 | bool success = false; 131 | 132 | if (initialized == false) { 133 | success = cl_check_init(); 134 | DIE(success == false, "OpenCL could not be init"); 135 | DIE(cl_crypt_init_locked() == false, "cl_crypt_init_locked failed"); 136 | initialized = true; 137 | } else { 138 | LOG("cl_poh_verify_many already initialized\n"); 139 | } 140 | 141 | gpu_ctx_t *cur_ctx = get_gpu_ctx(); 142 | pthread_mutex_unlock(&clg_ctx_mutex); 143 | 144 | size_t nr_bytes_hashes = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t); 145 | size_t nr_bytes_num_hashes_arr = num_elems * sizeof(uint64_t); 146 | setup_gpu_ctx(cur_ctx, 147 | hashes, 148 | num_hashes_arr, 149 | num_elems, 150 | nr_bytes_hashes, 151 | nr_bytes_num_hashes_arr); 152 | 153 | size_t num_threads_per_block = 64; 154 | size_t num_blocks = ROUND_UP_DIV(num_elems, num_threads_per_block) * num_threads_per_block; 155 | LOG("num_blocks: %zu threads_per_block: %zu nr hashes: %lu\n", 156 | num_blocks, num_threads_per_block, num_elems); 157 | 158 | CL_ERR( clSetKernelArg(poh_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->in_out_hashes) ); 159 | CL_ERR( clSetKernelArg(poh_verify_kernel, 1, sizeof(cl_mem), (void *)&cur_ctx->in_num_hashes_arr) ); 160 | CL_ERR( clSetKernelArg(poh_verify_kernel, 2, sizeof(cl_uint), (void *)&cur_ctx->in_num_elems) ); 161 | 162 | perftime_t start, end; 163 | get_time(&start); 164 | 165 | size_t globalSize[2] = {num_blocks * num_threads_per_block, 0}; 166 | size_t localSize[2] = {num_threads_per_block, 0}; 167 | cl_int ret = clEnqueueNDRangeKernel(cmd_queue, poh_verify_kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL); 168 | CL_ERR( ret ); 169 | 170 | ret = clFinish(cmd_queue); 171 | CL_ERR( ret ); 172 | 173 | ret = clEnqueueReadBuffer(cmd_queue, cur_ctx->in_out_hashes, CL_TRUE, 0, nr_bytes_hashes, hashes, 0, NULL, NULL); 174 | CL_ERR( ret ); 175 | 176 | get_time(&end); 177 | LOG("time diff: %f\n", get_diff(&start, &end)); 178 | release_gpu_ctx(cur_ctx); 179 | 180 | return 0; 181 | } 182 | 183 | } 184 | -------------------------------------------------------------------------------- /src/poh-simd/Makefile: -------------------------------------------------------------------------------- 1 | CC=ispc 2 | ISPC_FLAGS := -O2 --pic -I. 3 | DEPS := sha256.h 4 | 5 | ISPC_OBJ := ispcobj 6 | ISPC_C_Objects := $(ISPC_OBJ)/poh-verify-sse2.o \ 7 | $(ISPC_OBJ)/poh-verify-sse4.o \ 8 | $(ISPC_OBJ)/poh-verify-avx1.o \ 9 | $(ISPC_OBJ)/poh-verify-avx2.o \ 10 | $(ISPC_OBJ)/poh-verify-avx512skx.o 11 | 12 | OUT ?= libs 13 | 14 | .PHONY: all run 15 | all: $(OUT)/libpoh-simd.so 16 | run: all 17 | 18 | $(ISPC_OBJ)/poh-verify-sse2.o: poh-verify.ispc $(DEPS) 19 | @mkdir -p $(ISPC_OBJ) 20 | $(CC) --target=sse2-i32x4 -DNAME_SUFFIX=sse2 $(ISPC_FLAGS) $< -o $@ 21 | 22 | $(ISPC_OBJ)/poh-verify-sse4.o: poh-verify.ispc $(DEPS) 23 | @mkdir -p $(ISPC_OBJ) 24 | $(CC) --target=sse4-i32x4 -DNAME_SUFFIX=sse4 $(ISPC_FLAGS) $< -o $@ 25 | 26 | $(ISPC_OBJ)/poh-verify-avx1.o: poh-verify.ispc $(DEPS) 27 | @mkdir -p $(ISPC_OBJ) 28 | $(CC) --target=avx1-i32x8 -DNAME_SUFFIX=avx1 $(ISPC_FLAGS) $< -o $@ 29 | 30 | $(ISPC_OBJ)/poh-verify-avx2.o: poh-verify.ispc $(DEPS) 31 | @mkdir -p $(ISPC_OBJ) 32 | $(CC) --target=avx2-i32x8 -DNAME_SUFFIX=avx2 $(ISPC_FLAGS) $< -o $@ 33 | 34 | $(ISPC_OBJ)/poh-verify-avx512skx.o: poh-verify.ispc $(DEPS) 35 | @mkdir -p $(ISPC_OBJ) 36 | $(CC) --target=avx512skx-i32x16 -DNAME_SUFFIX=avx512skx $(ISPC_FLAGS) $< -o $@ 37 | 38 | $(OUT)/libpoh-simd.so: $(ISPC_C_Objects) 39 | @mkdir -p $(OUT) 40 | gcc -shared -o $@ $^ 41 | 42 | clean: 43 | @rm -rf $(ISPC_OBJ) $(OUT) 44 | -------------------------------------------------------------------------------- /src/poh-simd/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | pwd=$PWD 5 | cd "$(dirname "$0")" 6 | 7 | echo --- Build 8 | ( 9 | set -x 10 | make OUT="$pwd"/dist 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /src/poh-simd/poh-verify.ispc: -------------------------------------------------------------------------------- 1 | #include "sha256.h" 2 | 3 | #define MAKE_FN_NAME(x) export void poh_verify_many_simd_ ## x (uniform u8 hashes[], uniform const unsigned int64 num_hashes_arr[]) 4 | #define FUNCTION_NAME(signal) MAKE_FN_NAME(signal) 5 | 6 | FUNCTION_NAME(NAME_SUFFIX) 7 | { 8 | foreach(i = 0 ... programCount) { 9 | u8* hash = &hashes[i * SHA256_BLOCK_SIZE]; 10 | varying u32 s[8]; 11 | varying u32 w[64]; 12 | varying u32 T0; 13 | varying u32 T1; 14 | 15 | // Load words 16 | for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) { 17 | w[j] = (((u32)hash[j * 4] << 24) | 18 | ((u32)hash[j * 4 + 1] << 16) | 19 | ((u32)hash[j * 4 + 2] << 8) | 20 | ((u32)hash[j * 4 + 3])); 21 | } 22 | 23 | if (num_hashes_arr[i] > 0) { 24 | for (int j = 0; j < num_hashes_arr[i]; j++) { 25 | s[0] = 0x6a09e667; 26 | s[1] = 0xbb67ae85; 27 | s[2] = 0x3c6ef372; 28 | s[3] = 0xa54ff53a; 29 | s[4] = 0x510e527f; 30 | s[5] = 0x9b05688c; 31 | s[6] = 0x1f83d9ab; 32 | s[7] = 0x5be0cd19; 33 | 34 | w[8] = 0x80000000; 35 | w[9] = 0; 36 | w[10] = 0; 37 | w[11] = 0; 38 | w[12] = 0; 39 | w[13] = 0; 40 | w[14] = 0; 41 | w[15] = 0x00000100; 42 | 43 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); 44 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); 45 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); 46 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); 47 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); 48 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); 49 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); 50 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); 51 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); 52 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); 53 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); 54 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); 55 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); 56 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); 57 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); 58 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); 59 | w[16] = WSIGMA1(w[14]) + w[0] + w[9] + WSIGMA0(w[1]); 60 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); 61 | w[17] = WSIGMA1(w[15]) + w[1] + w[10] + WSIGMA0(w[2]); 62 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); 63 | w[18] = WSIGMA1(w[16]) + w[2] + w[11] + WSIGMA0(w[3]); 64 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); 65 | w[19] = WSIGMA1(w[17]) + w[3] + w[12] + WSIGMA0(w[4]); 66 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); 67 | w[20] = WSIGMA1(w[18]) + w[4] + w[13] + WSIGMA0(w[5]); 68 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); 69 | w[21] = WSIGMA1(w[19]) + w[5] + w[14] + WSIGMA0(w[6]); 70 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); 71 | w[22] = WSIGMA1(w[20]) + w[6] + w[15] + WSIGMA0(w[7]); 72 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); 73 | w[23] = WSIGMA1(w[21]) + w[7] + w[16] + WSIGMA0(w[8]); 74 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); 75 | w[24] = WSIGMA1(w[22]) + w[8] + w[17] + WSIGMA0(w[9]); 76 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); 77 | w[25] = WSIGMA1(w[23]) + w[9] + w[18] + WSIGMA0(w[10]); 78 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); 79 | w[26] = WSIGMA1(w[24]) + w[10] + w[19] + WSIGMA0(w[11]); 80 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); 81 | w[27] = WSIGMA1(w[25]) + w[11] + w[20] + WSIGMA0(w[12]); 82 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); 83 | w[28] = WSIGMA1(w[26]) + w[12] + w[21] + WSIGMA0(w[13]); 84 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); 85 | w[29] = WSIGMA1(w[27]) + w[13] + w[22] + WSIGMA0(w[14]); 86 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); 87 | w[30] = WSIGMA1(w[28]) + w[14] + w[23] + WSIGMA0(w[15]); 88 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); 89 | w[31] = WSIGMA1(w[29]) + w[15] + w[24] + WSIGMA0(w[16]); 90 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); 91 | w[32] = WSIGMA1(w[30]) + w[16] + w[25] + WSIGMA0(w[17]); 92 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); 93 | w[33] = WSIGMA1(w[31]) + w[17] + w[26] + WSIGMA0(w[18]); 94 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); 95 | w[34] = WSIGMA1(w[32]) + w[18] + w[27] + WSIGMA0(w[19]); 96 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); 97 | w[35] = WSIGMA1(w[33]) + w[19] + w[28] + WSIGMA0(w[20]); 98 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); 99 | w[36] = WSIGMA1(w[34]) + w[20] + w[29] + WSIGMA0(w[21]); 100 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); 101 | w[37] = WSIGMA1(w[35]) + w[21] + w[30] + WSIGMA0(w[22]); 102 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); 103 | w[38] = WSIGMA1(w[36]) + w[22] + w[31] + WSIGMA0(w[23]); 104 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); 105 | w[39] = WSIGMA1(w[37]) + w[23] + w[32] + WSIGMA0(w[24]); 106 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); 107 | w[40] = WSIGMA1(w[38]) + w[24] + w[33] + WSIGMA0(w[25]); 108 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); 109 | w[41] = WSIGMA1(w[39]) + w[25] + w[34] + WSIGMA0(w[26]); 110 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); 111 | w[42] = WSIGMA1(w[40]) + w[26] + w[35] + WSIGMA0(w[27]); 112 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); 113 | w[43] = WSIGMA1(w[41]) + w[27] + w[36] + WSIGMA0(w[28]); 114 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); 115 | w[44] = WSIGMA1(w[42]) + w[28] + w[37] + WSIGMA0(w[29]); 116 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); 117 | w[45] = WSIGMA1(w[43]) + w[29] + w[38] + WSIGMA0(w[30]); 118 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); 119 | w[46] = WSIGMA1(w[44]) + w[30] + w[39] + WSIGMA0(w[31]); 120 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); 121 | w[47] = WSIGMA1(w[45]) + w[31] + w[40] + WSIGMA0(w[32]); 122 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); 123 | w[48] = WSIGMA1(w[46]) + w[32] + w[41] + WSIGMA0(w[33]); 124 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); 125 | w[49] = WSIGMA1(w[47]) + w[33] + w[42] + WSIGMA0(w[34]); 126 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); 127 | w[50] = WSIGMA1(w[48]) + w[34] + w[43] + WSIGMA0(w[35]); 128 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); 129 | w[51] = WSIGMA1(w[49]) + w[35] + w[44] + WSIGMA0(w[36]); 130 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); 131 | w[52] = WSIGMA1(w[50]) + w[36] + w[45] + WSIGMA0(w[37]); 132 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); 133 | w[53] = WSIGMA1(w[51]) + w[37] + w[46] + WSIGMA0(w[38]); 134 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); 135 | w[54] = WSIGMA1(w[52]) + w[38] + w[47] + WSIGMA0(w[39]); 136 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); 137 | w[55] = WSIGMA1(w[53]) + w[39] + w[48] + WSIGMA0(w[40]); 138 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); 139 | w[56] = WSIGMA1(w[54]) + w[40] + w[49] + WSIGMA0(w[41]); 140 | SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); 141 | w[57] = WSIGMA1(w[55]) + w[41] + w[50] + WSIGMA0(w[42]); 142 | SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); 143 | w[58] = WSIGMA1(w[56]) + w[42] + w[51] + WSIGMA0(w[43]); 144 | SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); 145 | w[59] = WSIGMA1(w[57]) + w[43] + w[52] + WSIGMA0(w[44]); 146 | SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); 147 | w[60] = WSIGMA1(w[58]) + w[44] + w[53] + WSIGMA0(w[45]); 148 | SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); 149 | w[61] = WSIGMA1(w[59]) + w[45] + w[54] + WSIGMA0(w[46]); 150 | SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); 151 | w[62] = WSIGMA1(w[60]) + w[46] + w[55] + WSIGMA0(w[47]); 152 | SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); 153 | w[63] = WSIGMA1(w[61]) + w[47] + w[56] + WSIGMA0(w[48]); 154 | SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); 155 | 156 | // Feed Forward 157 | s[0] = s[0] + 0x6a09e667; 158 | s[1] = s[1] + 0xbb67ae85; 159 | s[2] = s[2] + 0x3c6ef372; 160 | s[3] = s[3] + 0xa54ff53a; 161 | s[4] = s[4] + 0x510e527f; 162 | s[5] = s[5] + 0x9b05688c; 163 | s[6] = s[6] + 0x1f83d9ab; 164 | s[7] = s[7] + 0x5be0cd19; 165 | 166 | // Store Hash value 167 | for (int k = 0; k < 8; k++) { 168 | w[k] = s[k]; 169 | } 170 | } 171 | 172 | // Store Hash value 173 | for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) { 174 | hash[j * 4 + 3] = s[j] & 0xff; 175 | hash[j * 4 + 2] = (s[j] >> 8) & 0xff; 176 | hash[j * 4 + 1] = (s[j] >> 16) & 0xff; 177 | hash[j * 4 + 0] = (s[j] >> 24) & 0xff; 178 | } 179 | } 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /src/poh-simd/sha256.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Adapted from kste's sha256 implementation, accessible at https://github.com/kste/sha256_avx 3 | * 4 | * MIT License 5 | * 6 | * Copyright (c) 2017 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy 9 | * of this software and associated documentation files (the "Software"), to deal 10 | * in the Software without restriction, including without limitation the rights 11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | * copies of the Software, and to permit persons to whom the Software is 13 | * furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in all 16 | * copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | * SOFTWARE. 25 | */ 26 | 27 | #ifndef SHA256_H 28 | #define SHA256_H 29 | 30 | #define u32 unsigned int32 31 | #define u8 unsigned int8 32 | 33 | #define SHA256_BLOCK_SIZE 32 34 | 35 | static const u32 RC[] = { 36 | 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 37 | 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 38 | 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 39 | 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 40 | 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 41 | 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 42 | 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 43 | 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 44 | 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 45 | 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 46 | 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 47 | 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 48 | 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 49 | 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 50 | 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 51 | 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 52 | }; 53 | 54 | #define MAJ(a, b, c) ((a&b) ^ (a&c) ^ (b&c)) 55 | #define CH(a, b, c) ((a&b) ^ (~(a)&c)) 56 | 57 | #define ROTR32(x, r) ((x >> r) | (x << (SHA256_BLOCK_SIZE - r))) 58 | 59 | #define SIGMA1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) 60 | #define SIGMA0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) 61 | 62 | #define WSIGMA1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10)) 63 | #define WSIGMA0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3)) 64 | 65 | #define SHA256ROUND(a, b, c, d, e, f, g, h, rc, w) \ 66 | T0 = h + SIGMA1(e) + CH(e, f, g) + RC[rc] + w; \ 67 | d = d + T0; \ 68 | T1 = SIGMA0(a) + MAJ(a, b, c); \ 69 | h = T0 + T1; 70 | 71 | #endif -------------------------------------------------------------------------------- /src/poh-verify-test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "cl_common.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define USE_CLOCK_GETTIME 9 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y)) 10 | #define SHA256_BLOCK_SIZE 32 11 | 12 | #include "perftime.h" 13 | 14 | bool g_verbose = false; 15 | 16 | typedef struct input_poh_ { 17 | 18 | uint8_t* hashes; 19 | uint64_t* num_hashes_arr; 20 | size_t num_elems; 21 | 22 | } input_poh; 23 | 24 | void static inline save_out(uint8_t* hashes, 25 | size_t num_elems, size_t index_thread) { 26 | 27 | FILE * fp; 28 | 29 | const char *file_name = "test_hashes_output"; 30 | char temp_string[50]; 31 | sprintf(temp_string, "%s_%lu", file_name, index_thread); 32 | 33 | fp = fopen (temp_string, "w"); 34 | if (fp == NULL) { 35 | fprintf(stderr, "Could not create file %s\n", temp_string); 36 | exit(-1); 37 | } 38 | 39 | for (size_t i = 0; i < num_elems; ++i) { 40 | fprintf(fp, "%hhu ", hashes[i]); 41 | } 42 | fclose(fp); 43 | } 44 | 45 | 46 | input_poh* allocate_input_poh(size_t num_elems) { 47 | 48 | input_poh* input_result = (input_poh*)calloc(1, sizeof(input_poh)); 49 | DIE(input_result == NULL, "Error while allocating an input_poh structure"); 50 | 51 | input_result->num_elems = num_elems; 52 | 53 | input_result->hashes = (uint8_t*)calloc(input_result->num_elems, sizeof(uint8_t)); 54 | DIE(input_result->hashes == NULL, "Error while allocating input_result->hashes"); 55 | 56 | input_result->num_hashes_arr = (uint64_t*)calloc(input_result->num_elems, sizeof(uint64_t)); 57 | DIE(input_result->num_hashes_arr == NULL, "Error while allocating input_result->num_hashes_arr"); 58 | 59 | return input_result; 60 | } 61 | 62 | void free_input_poh(input_poh** poh) { 63 | free((*poh)->hashes); 64 | (*poh)->hashes = NULL; 65 | free((*poh)->num_hashes_arr); 66 | (*poh)->num_hashes_arr = NULL; 67 | free(*poh); 68 | (*poh) = NULL; 69 | 70 | } 71 | 72 | input_poh* get_input(const char* file_hashes, const char* file_hashes_arr, const char* file_num_elems) { 73 | 74 | FILE * fp; 75 | fp = fopen(file_hashes, "r"); 76 | 77 | if (fp == NULL) { 78 | fprintf(stderr, "Could not open file %s\n", file_hashes); 79 | exit(-1); 80 | } 81 | 82 | FILE * fp2; 83 | fp2 = fopen(file_hashes_arr, "r"); 84 | 85 | if (fp2 == NULL) { 86 | fprintf(stderr, "Could not open file %s\n", file_hashes_arr); 87 | exit(-1); 88 | } 89 | 90 | FILE * fp3; 91 | fp3 = fopen(file_num_elems, "r"); 92 | 93 | if (fp3 == NULL) { 94 | fprintf(stderr, "Could not open file %s\n", file_num_elems); 95 | exit(-1); 96 | } 97 | 98 | size_t num_elems; 99 | DIE( 0 == fscanf(fp3, "%zu", &num_elems), "Error while reading num_elems from file"); 100 | fprintf(stderr, "num_elems read from file %s is %zu\n", file_num_elems, num_elems); 101 | 102 | input_poh* input_result = allocate_input_poh(num_elems); 103 | 104 | for (size_t i=0; inum_elems; ++i) { 105 | if( 0 == fscanf(fp, "%hhu", &input_result->hashes[i])) { 106 | fprintf(stderr, "Error while reading hashes from file %s at index %lu \n", file_hashes, i); 107 | exit(-2); 108 | } 109 | } 110 | 111 | for (size_t i=0; inum_elems/SHA256_BLOCK_SIZE; ++i) { 112 | if( 0 == fscanf(fp2, "%lu", &input_result->num_hashes_arr[i])) { 113 | fprintf(stderr, "Error while reading input num_hashes_arr from file %s at index %lu \n", file_hashes_arr, i); 114 | exit(-2); 115 | } 116 | } 117 | 118 | fclose(fp); 119 | fclose(fp2); 120 | fclose(fp3); 121 | 122 | return input_result; 123 | } 124 | 125 | void generate_input(input_poh* input_result) { 126 | srand(1); // keep the same seed in cuda and opencl variants 127 | for (size_t i = 0 ; i < input_result->num_elems; ++i) { 128 | input_result->hashes[i] = rand() % 100000; 129 | } 130 | 131 | for (size_t i = 0 ; i < input_result->num_elems/SHA256_BLOCK_SIZE; ++i) { 132 | input_result->num_hashes_arr[i] = 20000; 133 | } 134 | } 135 | 136 | extern "C" { 137 | extern int poh_verify_many(uint8_t*, const uint64_t*, size_t, uint8_t); 138 | void poh_verify_many_set_verbose(bool); 139 | } 140 | 141 | void* work(void *param) { 142 | input_poh* input_result = (input_poh*)param; 143 | poh_verify_many(input_result->hashes, input_result->num_hashes_arr, input_result->num_elems/SHA256_BLOCK_SIZE, 0); 144 | return nullptr; 145 | } 146 | 147 | int main(int argc, const char* argv[]) { 148 | 149 | if (argc == 1 || argc == 2) { 150 | printf("usage 1: %s [-v] [-save_output] generate \n", argv[0]); 151 | printf("usage 2: %s [-v] [-save_output] [-check_result] \n", argv[0]); 152 | printf("usage: argc is %i \n", argc); 153 | return 1; 154 | } 155 | 156 | int arg = 1; 157 | bool verbose = false; 158 | bool save_output_file = false; 159 | bool check_result = false; 160 | if (0 == strcmp(argv[arg], "-v")) { 161 | verbose = true; 162 | arg++; 163 | } 164 | if (0 == strcmp(argv[arg], "-save_output")) { 165 | save_output_file = true; 166 | arg++; 167 | } 168 | if (0 == strcmp(argv[arg], "-check_result")) { 169 | check_result = true; 170 | arg++; 171 | } 172 | 173 | poh_verify_many_set_verbose(verbose); 174 | 175 | if (0 == strcmp(argv[arg], "generate")) { 176 | ++arg; 177 | if ((argc - arg) != 2) { 178 | printf("usage 1: %s [-v] [-save_output] generate \n", argv[0]); 179 | printf("usage 2: %s [-v] [-save_output] [-check_result] \n", argv[0]); 180 | printf("usage: argc is %i \n", argc); 181 | return 1; 182 | } 183 | 184 | if (0 == strcmp(argv[arg], "0")) { 185 | printf("nr_elements is 0!\n"); 186 | return 1; 187 | } 188 | 189 | size_t num_elems = strtoul(argv[arg], nullptr, 10); 190 | if (num_elems == 0) { 191 | printf("nr_elements is not a number %s!\n", argv[arg]); 192 | exit(-1); 193 | } 194 | ++arg; 195 | 196 | size_t num_threads = strtoul(argv[arg], nullptr, 10); 197 | if (num_threads == 0) { 198 | printf("nr_inputs is not a number %s!\n", argv[arg]); 199 | exit(-1); 200 | } 201 | 202 | pthread_t *threads; 203 | 204 | num_elems = ROUND_UP_DIV(num_elems, SHA256_BLOCK_SIZE) * SHA256_BLOCK_SIZE; 205 | printf("nr_elements rounded up to %lu \n", num_elems); 206 | 207 | threads = (pthread_t*)calloc(num_threads, sizeof(pthread_t)); 208 | if (threads == NULL) { 209 | fprintf(stderr, "Error while allocating threads\n"); 210 | exit(-1); 211 | } 212 | 213 | input_poh** vinput_result = (input_poh**)calloc(num_threads, sizeof(input_poh)); 214 | 215 | for (size_t i = 0; i < num_threads; ++i) { 216 | vinput_result[i] = allocate_input_poh(num_elems); 217 | generate_input(vinput_result[i]); 218 | } 219 | LOG("Created and filled input_poh with %lu elements for %lu threads\n", num_elems, num_threads); 220 | 221 | perftime_t start, end; 222 | get_time(&start); 223 | 224 | for (size_t i = 0; i < num_threads; ++i) { 225 | if (pthread_create (&threads[i], NULL, work, (void*)vinput_result[i]) != 0) { 226 | fprintf(stderr, "Error while creating threads %lu\n", i); 227 | exit(-1); 228 | } 229 | } 230 | 231 | for (size_t i = 0; i < num_threads; ++i) { 232 | if (pthread_join (threads[i], NULL) != 0) { 233 | fprintf(stderr, "Error while creating threads %lu\n", i); 234 | exit(-1); 235 | } 236 | } 237 | 238 | get_time(&end); 239 | 240 | double diff = get_diff(&start, &end); 241 | printf("Total time hashing diff: %f microSeconds or %f seconds \n", diff, diff/1000000); 242 | 243 | 244 | for (size_t i = 0; i < num_threads; ++i) { 245 | if (save_output_file) { 246 | save_out(vinput_result[i]->hashes, vinput_result[i]->num_elems, i); 247 | } 248 | } 249 | 250 | for (size_t i = 0; i < num_threads; ++i) { 251 | free_input_poh(&vinput_result[i]); 252 | } 253 | free(vinput_result); 254 | 255 | } 256 | else { 257 | if ((argc - arg) != 3) { 258 | printf("usage 1: %s [-v] [-save_output] generate \n", argv[0]); 259 | printf("usage 2: %s [-v] [-save_output] [-check_result] \n", argv[0]); 260 | printf("usage: argc is %i \n", argc); 261 | return 1; 262 | } 263 | 264 | input_poh* input_result = get_input(argv[arg], argv[arg+1], argv[arg+2]); 265 | perftime_t start, end; 266 | 267 | get_time(&start); 268 | work(input_result); 269 | get_time(&end); 270 | 271 | double diff = get_diff(&start, &end); 272 | printf("Total time hashing diff: %f microSeconds or %f seconds \n", diff, diff/1000000); 273 | 274 | if (save_output_file) { 275 | save_out(input_result->hashes, input_result->num_elems, 0); 276 | } 277 | 278 | if (check_result) { 279 | FILE * fp; 280 | fp = fopen(argv[arg+2], "r"); 281 | 282 | if (fp == NULL) { 283 | fprintf(stderr, "Could not open file %s\n", argv[arg+2]); 284 | exit(-1); 285 | } 286 | 287 | FILE * fp2; 288 | const char* file_with_results = "..//poh-verify-test//test_hashes_output_332"; 289 | fp2 = fopen(file_with_results, "r"); 290 | 291 | if (fp2 == NULL) { 292 | fprintf(stderr, "Could not open file %s\n", file_with_results); 293 | exit(-1); 294 | } 295 | 296 | size_t num_elems; 297 | DIE( 0 == fscanf(fp, "%zu", &num_elems), "Error while reading num_elems from file"); 298 | fprintf(stderr, "num_elems read from file %s is %zu\n", argv[arg+2], num_elems); 299 | 300 | uint8_t* test_result_hashes = (uint8_t*)calloc(input_result->num_elems, sizeof(uint8_t)); 301 | DIE(test_result_hashes == NULL, "Error while allocating test_result_hashes"); 302 | 303 | size_t i = 0; 304 | for (i=0; i < num_elems; ++i) { 305 | if( 0 == fscanf(fp2, "%hhu", &test_result_hashes[i])) { 306 | fprintf(stderr, "Error while reading hashes from file %s at index %lu \n", file_with_results, i); 307 | exit(-2); 308 | } 309 | } 310 | 311 | for (i=0; i < num_elems; ++i) { 312 | if (test_result_hashes[i] != input_result->hashes[i]) { 313 | fprintf(stderr, "Different result detected at index %lu of %lu actual result: %hhu expected: %hhu \n TEST FAILED", 314 | i, num_elems, input_result->hashes[i], test_result_hashes[i]); 315 | break; 316 | } 317 | } 318 | 319 | if (num_elems == i) { 320 | printf("TEST PASSED num_elems %lu\n", num_elems); 321 | } 322 | free(test_result_hashes); 323 | fclose(fp); 324 | fclose(fp2); 325 | } 326 | 327 | 328 | free_input_poh(&input_result); 329 | } 330 | 331 | return 0; 332 | 333 | } 334 | -------------------------------------------------------------------------------- /src/poh-verify-test/test_num_elems_332: -------------------------------------------------------------------------------- 1 | 4128 -------------------------------------------------------------------------------- /src/poh-verify-test/test_num_hashes_arr_332: -------------------------------------------------------------------------------- 1 | 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 16901 2451 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | 3 | C_Flags := -O2 -fpic -I. 4 | 5 | SGX_C_Flags := -Wno-implicit-function-declaration -std=c11 -m64 -O2 -nostdinc -DSGX_COMPAT -fpie -fstack-protector \ 6 | -IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf -I. 7 | 8 | C_Files := $(wildcard *.c) 9 | 10 | OUT ?= libs 11 | 12 | SGX_OBJ := sgxobj 13 | SGX_C_Objects := $(C_Files:%.c=$(SGX_OBJ)/%.o) 14 | 15 | NONSGX_OBJ := nonsgxobj 16 | NONSGX_C_Objects := $(C_Files:%.c=$(NONSGX_OBJ)/%.o) 17 | 18 | .PHONY: all run 19 | all: $(OUT)/libed25519.sgx.static.a $(OUT)/libed25519.static.a 20 | run: all 21 | 22 | $(SGX_OBJ)/%.o: %.c 23 | @echo "CC <= $<" 24 | @mkdir -p $(SGX_OBJ) 25 | $(CC) $(SGX_C_Flags) -c $< -o $@ 26 | 27 | $(NONSGX_OBJ)/%.o: %.c 28 | @echo "CC <= $<" 29 | @mkdir -p $(NONSGX_OBJ) 30 | $(CC) $(C_Flags) -c $< -o $@ 31 | 32 | $(OUT)/libed25519.sgx.static.a: $(SGX_C_Objects) 33 | @mkdir -p $(OUT) 34 | ar rcs $@ $^ 35 | 36 | $(OUT)/libed25519.static.a: $(NONSGX_C_Objects) 37 | @mkdir -p $(OUT) 38 | ar rcs $@ $^ 39 | 40 | clean: 41 | @rm -rf $(SGX_OBJ) $(NONSGX_OBJ) $(OUT) 42 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/add_scalar.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "ge.h" 3 | #include "sc.h" 4 | #include "sha512.h" 5 | 6 | 7 | /* see http://crypto.stackexchange.com/a/6215/4697 */ 8 | void ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar) { 9 | const unsigned char SC_1[32] = {1}; /* scalar with value 1 */ 10 | 11 | unsigned char n[32]; 12 | ge_p3 nB; 13 | ge_p1p1 A_p1p1; 14 | ge_p3 A; 15 | ge_p3 public_key_unpacked; 16 | ge_cached T; 17 | 18 | sha512_context hash; 19 | unsigned char hashbuf[64]; 20 | 21 | int i; 22 | 23 | /* copy the scalar and clear highest bit */ 24 | for (i = 0; i < 31; ++i) { 25 | n[i] = scalar[i]; 26 | } 27 | n[31] = scalar[31] & 127; 28 | 29 | /* private key: a = n + t */ 30 | if (private_key) { 31 | sc_muladd(private_key, SC_1, n, private_key); 32 | 33 | // https://github.com/orlp/ed25519/issues/3 34 | sha512_init(&hash); 35 | sha512_update(&hash, private_key + 32, 32); 36 | sha512_update(&hash, scalar, 32); 37 | sha512_final(&hash, hashbuf); 38 | for (i = 0; i < 32; ++i) { 39 | private_key[32 + i] = hashbuf[i]; 40 | } 41 | } 42 | 43 | /* public key: A = nB + T */ 44 | if (public_key) { 45 | /* if we know the private key we don't need a point addition, which is faster */ 46 | /* using a "timing attack" you could find out wether or not we know the private 47 | key, but this information seems rather useless - if this is important pass 48 | public_key and private_key seperately in 2 function calls */ 49 | if (private_key) { 50 | ge_scalarmult_base(&A, private_key); 51 | } else { 52 | /* unpack public key into T */ 53 | ge_frombytes_negate_vartime(&public_key_unpacked, public_key); 54 | fe_neg(public_key_unpacked.X, public_key_unpacked.X); /* undo negate */ 55 | fe_neg(public_key_unpacked.T, public_key_unpacked.T); /* undo negate */ 56 | ge_p3_to_cached(&T, &public_key_unpacked); 57 | 58 | /* calculate n*B */ 59 | ge_scalarmult_base(&nB, n); 60 | 61 | /* A = n*B + T */ 62 | ge_add(&A_p1p1, &nB, &T); 63 | ge_p1p1_to_p3(&A, &A_p1p1); 64 | } 65 | 66 | /* pack public key */ 67 | ge_p3_tobytes(public_key, &A); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | pwd=$PWD 5 | cd "$(dirname "$0")" 6 | 7 | echo --- Build 8 | ( 9 | set -x 10 | make OUT="$pwd"/libs 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ed25519.h: -------------------------------------------------------------------------------- 1 | #ifndef ED25519_H 2 | #define ED25519_H 3 | 4 | #include 5 | 6 | #if defined(_WIN32) 7 | #if defined(ED25519_BUILD_DLL) 8 | #define ED25519_DECLSPEC __declspec(dllexport) 9 | #elif defined(ED25519_DLL) 10 | #define ED25519_DECLSPEC __declspec(dllimport) 11 | #else 12 | #define ED25519_DECLSPEC 13 | #endif 14 | #else 15 | #define ED25519_DECLSPEC 16 | #endif 17 | 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | #ifndef ED25519_NO_SEED 24 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed); 25 | #endif 26 | 27 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed); 28 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key); 29 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key); 30 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar); 31 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key); 32 | 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/fe.h: -------------------------------------------------------------------------------- 1 | #ifndef FE_H 2 | #define FE_H 3 | 4 | #include "fixedint.h" 5 | 6 | 7 | /* 8 | fe means field element. 9 | Here the field is \Z/(2^255-19). 10 | An element t, entries t[0]...t[9], represents the integer 11 | t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 12 | Bounds on each t[i] vary depending on context. 13 | */ 14 | 15 | 16 | typedef int32_t fe[10]; 17 | 18 | 19 | void fe_0(fe h); 20 | void fe_1(fe h); 21 | 22 | void fe_frombytes(fe h, const unsigned char *s); 23 | void fe_tobytes(unsigned char *s, const fe h); 24 | 25 | void fe_copy(fe h, const fe f); 26 | int fe_isnegative(const fe f); 27 | int fe_isnonzero(const fe f); 28 | void fe_cmov(fe f, const fe g, unsigned int b); 29 | void fe_cswap(fe f, fe g, unsigned int b); 30 | 31 | void fe_neg(fe h, const fe f); 32 | void fe_add(fe h, const fe f, const fe g); 33 | void fe_invert(fe out, const fe z); 34 | void fe_sq(fe h, const fe f); 35 | void fe_sq2(fe h, const fe f); 36 | void fe_mul(fe h, const fe f, const fe g); 37 | void fe_mul121666(fe h, fe f); 38 | void fe_pow22523(fe out, const fe z); 39 | void fe_sub(fe h, const fe f, const fe g); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/fixedint.h: -------------------------------------------------------------------------------- 1 | /* 2 | Portable header to provide the 32 and 64 bits type. 3 | 4 | Not a compatible replacement for , do not blindly use it as such. 5 | */ 6 | 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED) 8 | #include 9 | #define FIXEDINT_H_INCLUDED 10 | 11 | #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C) 12 | #include 13 | #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) 14 | #endif 15 | #endif 16 | 17 | 18 | #ifndef FIXEDINT_H_INCLUDED 19 | #define FIXEDINT_H_INCLUDED 20 | 21 | #include 22 | 23 | /* (u)int32_t */ 24 | #ifndef uint32_t 25 | #if (ULONG_MAX == 0xffffffffUL) 26 | typedef unsigned long uint32_t; 27 | #elif (UINT_MAX == 0xffffffffUL) 28 | typedef unsigned int uint32_t; 29 | #elif (USHRT_MAX == 0xffffffffUL) 30 | typedef unsigned short uint32_t; 31 | #endif 32 | #endif 33 | 34 | 35 | #ifndef int32_t 36 | #if (LONG_MAX == 0x7fffffffL) 37 | typedef signed long int32_t; 38 | #elif (INT_MAX == 0x7fffffffL) 39 | typedef signed int int32_t; 40 | #elif (SHRT_MAX == 0x7fffffffL) 41 | typedef signed short int32_t; 42 | #endif 43 | #endif 44 | 45 | 46 | /* (u)int64_t */ 47 | #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L) 48 | typedef long long int64_t; 49 | typedef unsigned long long uint64_t; 50 | 51 | #define UINT64_C(v) v ##ULL 52 | #define INT64_C(v) v ##LL 53 | #elif defined(__GNUC__) 54 | __extension__ typedef long long int64_t; 55 | __extension__ typedef unsigned long long uint64_t; 56 | 57 | #define UINT64_C(v) v ##ULL 58 | #define INT64_C(v) v ##LL 59 | #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) 60 | typedef long long int64_t; 61 | typedef unsigned long long uint64_t; 62 | 63 | #define UINT64_C(v) v ##ULL 64 | #define INT64_C(v) v ##LL 65 | #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) 66 | typedef __int64 int64_t; 67 | typedef unsigned __int64 uint64_t; 68 | 69 | #define UINT64_C(v) v ##UI64 70 | #define INT64_C(v) v ##I64 71 | #endif 72 | #endif 73 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ge.c: -------------------------------------------------------------------------------- 1 | #include "ge.h" 2 | #include "precomp_data.h" 3 | 4 | 5 | /* 6 | r = p + q 7 | */ 8 | 9 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 10 | fe t0; 11 | fe_add(r->X, p->Y, p->X); 12 | fe_sub(r->Y, p->Y, p->X); 13 | fe_mul(r->Z, r->X, q->YplusX); 14 | fe_mul(r->Y, r->Y, q->YminusX); 15 | fe_mul(r->T, q->T2d, p->T); 16 | fe_mul(r->X, p->Z, q->Z); 17 | fe_add(t0, r->X, r->X); 18 | fe_sub(r->X, r->Z, r->Y); 19 | fe_add(r->Y, r->Z, r->Y); 20 | fe_add(r->Z, t0, r->T); 21 | fe_sub(r->T, t0, r->T); 22 | } 23 | 24 | 25 | static void slide(signed char *r, const unsigned char *a) { 26 | int i; 27 | int b; 28 | int k; 29 | 30 | for (i = 0; i < 256; ++i) { 31 | r[i] = 1 & (a[i >> 3] >> (i & 7)); 32 | } 33 | 34 | for (i = 0; i < 256; ++i) 35 | if (r[i]) { 36 | for (b = 1; b <= 6 && i + b < 256; ++b) { 37 | if (r[i + b]) { 38 | if (r[i] + (r[i + b] << b) <= 15) { 39 | r[i] += r[i + b] << b; 40 | r[i + b] = 0; 41 | } else if (r[i] - (r[i + b] << b) >= -15) { 42 | r[i] -= r[i + b] << b; 43 | 44 | for (k = i + b; k < 256; ++k) { 45 | if (!r[k]) { 46 | r[k] = 1; 47 | break; 48 | } 49 | 50 | r[k] = 0; 51 | } 52 | } else { 53 | break; 54 | } 55 | } 56 | } 57 | } 58 | } 59 | 60 | /* 61 | r = a * A + b * B 62 | where a = a[0]+256*a[1]+...+256^31 a[31]. 63 | and b = b[0]+256*b[1]+...+256^31 b[31]. 64 | B is the Ed25519 base point (x,4/5) with x positive. 65 | */ 66 | 67 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) { 68 | signed char aslide[256]; 69 | signed char bslide[256]; 70 | ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ 71 | ge_p1p1 t; 72 | ge_p3 u; 73 | ge_p3 A2; 74 | int i; 75 | slide(aslide, a); 76 | slide(bslide, b); 77 | ge_p3_to_cached(&Ai[0], A); 78 | ge_p3_dbl(&t, A); 79 | ge_p1p1_to_p3(&A2, &t); 80 | ge_add(&t, &A2, &Ai[0]); 81 | ge_p1p1_to_p3(&u, &t); 82 | ge_p3_to_cached(&Ai[1], &u); 83 | ge_add(&t, &A2, &Ai[1]); 84 | ge_p1p1_to_p3(&u, &t); 85 | ge_p3_to_cached(&Ai[2], &u); 86 | ge_add(&t, &A2, &Ai[2]); 87 | ge_p1p1_to_p3(&u, &t); 88 | ge_p3_to_cached(&Ai[3], &u); 89 | ge_add(&t, &A2, &Ai[3]); 90 | ge_p1p1_to_p3(&u, &t); 91 | ge_p3_to_cached(&Ai[4], &u); 92 | ge_add(&t, &A2, &Ai[4]); 93 | ge_p1p1_to_p3(&u, &t); 94 | ge_p3_to_cached(&Ai[5], &u); 95 | ge_add(&t, &A2, &Ai[5]); 96 | ge_p1p1_to_p3(&u, &t); 97 | ge_p3_to_cached(&Ai[6], &u); 98 | ge_add(&t, &A2, &Ai[6]); 99 | ge_p1p1_to_p3(&u, &t); 100 | ge_p3_to_cached(&Ai[7], &u); 101 | ge_p2_0(r); 102 | 103 | for (i = 255; i >= 0; --i) { 104 | if (aslide[i] || bslide[i]) { 105 | break; 106 | } 107 | } 108 | 109 | for (; i >= 0; --i) { 110 | ge_p2_dbl(&t, r); 111 | 112 | if (aslide[i] > 0) { 113 | ge_p1p1_to_p3(&u, &t); 114 | ge_add(&t, &u, &Ai[aslide[i] / 2]); 115 | } else if (aslide[i] < 0) { 116 | ge_p1p1_to_p3(&u, &t); 117 | ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); 118 | } 119 | 120 | if (bslide[i] > 0) { 121 | ge_p1p1_to_p3(&u, &t); 122 | ge_madd(&t, &u, &Bi[bslide[i] / 2]); 123 | } else if (bslide[i] < 0) { 124 | ge_p1p1_to_p3(&u, &t); 125 | ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); 126 | } 127 | 128 | ge_p1p1_to_p2(r, &t); 129 | } 130 | } 131 | 132 | 133 | static const fe d = { 134 | -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 135 | }; 136 | 137 | static const fe sqrtm1 = { 138 | -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 139 | }; 140 | 141 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) { 142 | fe u; 143 | fe v; 144 | fe v3; 145 | fe vxx; 146 | fe check; 147 | fe_frombytes(h->Y, s); 148 | fe_1(h->Z); 149 | fe_sq(u, h->Y); 150 | fe_mul(v, u, d); 151 | fe_sub(u, u, h->Z); /* u = y^2-1 */ 152 | fe_add(v, v, h->Z); /* v = dy^2+1 */ 153 | fe_sq(v3, v); 154 | fe_mul(v3, v3, v); /* v3 = v^3 */ 155 | fe_sq(h->X, v3); 156 | fe_mul(h->X, h->X, v); 157 | fe_mul(h->X, h->X, u); /* x = uv^7 */ 158 | fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ 159 | fe_mul(h->X, h->X, v3); 160 | fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ 161 | fe_sq(vxx, h->X); 162 | fe_mul(vxx, vxx, v); 163 | fe_sub(check, vxx, u); /* vx^2-u */ 164 | 165 | if (fe_isnonzero(check)) { 166 | fe_add(check, vxx, u); /* vx^2+u */ 167 | 168 | if (fe_isnonzero(check)) { 169 | return -1; 170 | } 171 | 172 | fe_mul(h->X, h->X, sqrtm1); 173 | } 174 | 175 | if (fe_isnegative(h->X) == (s[31] >> 7)) { 176 | fe_neg(h->X, h->X); 177 | } 178 | 179 | fe_mul(h->T, h->X, h->Y); 180 | return 0; 181 | } 182 | 183 | 184 | /* 185 | r = p + q 186 | */ 187 | 188 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 189 | fe t0; 190 | fe_add(r->X, p->Y, p->X); 191 | fe_sub(r->Y, p->Y, p->X); 192 | fe_mul(r->Z, r->X, q->yplusx); 193 | fe_mul(r->Y, r->Y, q->yminusx); 194 | fe_mul(r->T, q->xy2d, p->T); 195 | fe_add(t0, p->Z, p->Z); 196 | fe_sub(r->X, r->Z, r->Y); 197 | fe_add(r->Y, r->Z, r->Y); 198 | fe_add(r->Z, t0, r->T); 199 | fe_sub(r->T, t0, r->T); 200 | } 201 | 202 | 203 | /* 204 | r = p - q 205 | */ 206 | 207 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 208 | fe t0; 209 | 210 | fe_add(r->X, p->Y, p->X); 211 | fe_sub(r->Y, p->Y, p->X); 212 | fe_mul(r->Z, r->X, q->yminusx); 213 | fe_mul(r->Y, r->Y, q->yplusx); 214 | fe_mul(r->T, q->xy2d, p->T); 215 | fe_add(t0, p->Z, p->Z); 216 | fe_sub(r->X, r->Z, r->Y); 217 | fe_add(r->Y, r->Z, r->Y); 218 | fe_sub(r->Z, t0, r->T); 219 | fe_add(r->T, t0, r->T); 220 | } 221 | 222 | 223 | /* 224 | r = p 225 | */ 226 | 227 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { 228 | fe_mul(r->X, p->X, p->T); 229 | fe_mul(r->Y, p->Y, p->Z); 230 | fe_mul(r->Z, p->Z, p->T); 231 | } 232 | 233 | 234 | 235 | /* 236 | r = p 237 | */ 238 | 239 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { 240 | fe_mul(r->X, p->X, p->T); 241 | fe_mul(r->Y, p->Y, p->Z); 242 | fe_mul(r->Z, p->Z, p->T); 243 | fe_mul(r->T, p->X, p->Y); 244 | } 245 | 246 | 247 | void ge_p2_0(ge_p2 *h) { 248 | fe_0(h->X); 249 | fe_1(h->Y); 250 | fe_1(h->Z); 251 | } 252 | 253 | 254 | 255 | /* 256 | r = 2 * p 257 | */ 258 | 259 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { 260 | fe t0; 261 | 262 | fe_sq(r->X, p->X); 263 | fe_sq(r->Z, p->Y); 264 | fe_sq2(r->T, p->Z); 265 | fe_add(r->Y, p->X, p->Y); 266 | fe_sq(t0, r->Y); 267 | fe_add(r->Y, r->Z, r->X); 268 | fe_sub(r->Z, r->Z, r->X); 269 | fe_sub(r->X, t0, r->Y); 270 | fe_sub(r->T, r->T, r->Z); 271 | } 272 | 273 | 274 | void ge_p3_0(ge_p3 *h) { 275 | fe_0(h->X); 276 | fe_1(h->Y); 277 | fe_1(h->Z); 278 | fe_0(h->T); 279 | } 280 | 281 | 282 | /* 283 | r = 2 * p 284 | */ 285 | 286 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { 287 | ge_p2 q; 288 | ge_p3_to_p2(&q, p); 289 | ge_p2_dbl(r, &q); 290 | } 291 | 292 | 293 | 294 | /* 295 | r = p 296 | */ 297 | 298 | static const fe d2 = { 299 | -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 300 | }; 301 | 302 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { 303 | fe_add(r->YplusX, p->Y, p->X); 304 | fe_sub(r->YminusX, p->Y, p->X); 305 | fe_copy(r->Z, p->Z); 306 | fe_mul(r->T2d, p->T, d2); 307 | } 308 | 309 | 310 | /* 311 | r = p 312 | */ 313 | 314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { 315 | fe_copy(r->X, p->X); 316 | fe_copy(r->Y, p->Y); 317 | fe_copy(r->Z, p->Z); 318 | } 319 | 320 | 321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) { 322 | fe recip; 323 | fe x; 324 | fe y; 325 | fe_invert(recip, h->Z); 326 | fe_mul(x, h->X, recip); 327 | fe_mul(y, h->Y, recip); 328 | fe_tobytes(s, y); 329 | s[31] ^= fe_isnegative(x) << 7; 330 | } 331 | 332 | 333 | static unsigned char equal(signed char b, signed char c) { 334 | unsigned char ub = b; 335 | unsigned char uc = c; 336 | unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */ 337 | uint64_t y = x; /* 0: yes; 1..255: no */ 338 | y -= 1; /* large: yes; 0..254: no */ 339 | y >>= 63; /* 1: yes; 0: no */ 340 | return (unsigned char) y; 341 | } 342 | 343 | static unsigned char negative(signed char b) { 344 | uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ 345 | x >>= 63; /* 1: yes; 0: no */ 346 | return (unsigned char) x; 347 | } 348 | 349 | static void cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) { 350 | fe_cmov(t->yplusx, u->yplusx, b); 351 | fe_cmov(t->yminusx, u->yminusx, b); 352 | fe_cmov(t->xy2d, u->xy2d, b); 353 | } 354 | 355 | 356 | static void select(ge_precomp *t, int pos, signed char b) { 357 | ge_precomp minust; 358 | unsigned char bnegative = negative(b); 359 | unsigned char babs = b - (((-bnegative) & b) << 1); 360 | fe_1(t->yplusx); 361 | fe_1(t->yminusx); 362 | fe_0(t->xy2d); 363 | cmov(t, &base[pos][0], equal(babs, 1)); 364 | cmov(t, &base[pos][1], equal(babs, 2)); 365 | cmov(t, &base[pos][2], equal(babs, 3)); 366 | cmov(t, &base[pos][3], equal(babs, 4)); 367 | cmov(t, &base[pos][4], equal(babs, 5)); 368 | cmov(t, &base[pos][5], equal(babs, 6)); 369 | cmov(t, &base[pos][6], equal(babs, 7)); 370 | cmov(t, &base[pos][7], equal(babs, 8)); 371 | fe_copy(minust.yplusx, t->yminusx); 372 | fe_copy(minust.yminusx, t->yplusx); 373 | fe_neg(minust.xy2d, t->xy2d); 374 | cmov(t, &minust, bnegative); 375 | } 376 | 377 | /* 378 | h = a * B 379 | where a = a[0]+256*a[1]+...+256^31 a[31] 380 | B is the Ed25519 base point (x,4/5) with x positive. 381 | 382 | Preconditions: 383 | a[31] <= 127 384 | */ 385 | 386 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) { 387 | signed char e[64]; 388 | signed char carry; 389 | ge_p1p1 r; 390 | ge_p2 s; 391 | ge_precomp t; 392 | int i; 393 | 394 | for (i = 0; i < 32; ++i) { 395 | e[2 * i + 0] = (a[i] >> 0) & 15; 396 | e[2 * i + 1] = (a[i] >> 4) & 15; 397 | } 398 | 399 | /* each e[i] is between 0 and 15 */ 400 | /* e[63] is between 0 and 7 */ 401 | carry = 0; 402 | 403 | for (i = 0; i < 63; ++i) { 404 | e[i] += carry; 405 | carry = e[i] + 8; 406 | carry >>= 4; 407 | e[i] -= carry << 4; 408 | } 409 | 410 | e[63] += carry; 411 | /* each e[i] is between -8 and 8 */ 412 | ge_p3_0(h); 413 | 414 | for (i = 1; i < 64; i += 2) { 415 | select(&t, i / 2, e[i]); 416 | ge_madd(&r, h, &t); 417 | ge_p1p1_to_p3(h, &r); 418 | } 419 | 420 | ge_p3_dbl(&r, h); 421 | ge_p1p1_to_p2(&s, &r); 422 | ge_p2_dbl(&r, &s); 423 | ge_p1p1_to_p2(&s, &r); 424 | ge_p2_dbl(&r, &s); 425 | ge_p1p1_to_p2(&s, &r); 426 | ge_p2_dbl(&r, &s); 427 | ge_p1p1_to_p3(h, &r); 428 | 429 | for (i = 0; i < 64; i += 2) { 430 | select(&t, i / 2, e[i]); 431 | ge_madd(&r, h, &t); 432 | ge_p1p1_to_p3(h, &r); 433 | } 434 | } 435 | 436 | 437 | /* 438 | r = p - q 439 | */ 440 | 441 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 442 | fe t0; 443 | 444 | fe_add(r->X, p->Y, p->X); 445 | fe_sub(r->Y, p->Y, p->X); 446 | fe_mul(r->Z, r->X, q->YminusX); 447 | fe_mul(r->Y, r->Y, q->YplusX); 448 | fe_mul(r->T, q->T2d, p->T); 449 | fe_mul(r->X, p->Z, q->Z); 450 | fe_add(t0, r->X, r->X); 451 | fe_sub(r->X, r->Z, r->Y); 452 | fe_add(r->Y, r->Z, r->Y); 453 | fe_sub(r->Z, t0, r->T); 454 | fe_add(r->T, t0, r->T); 455 | } 456 | 457 | 458 | void ge_tobytes(unsigned char *s, const ge_p2 *h) { 459 | fe recip; 460 | fe x; 461 | fe y; 462 | fe_invert(recip, h->Z); 463 | fe_mul(x, h->X, recip); 464 | fe_mul(y, h->Y, recip); 465 | fe_tobytes(s, y); 466 | s[31] ^= fe_isnegative(x) << 7; 467 | } 468 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/ge.h: -------------------------------------------------------------------------------- 1 | #ifndef GE_H 2 | #define GE_H 3 | 4 | #include "fe.h" 5 | 6 | 7 | /* 8 | ge means group element. 9 | 10 | Here the group is the set of pairs (x,y) of field elements (see fe.h) 11 | satisfying -x^2 + y^2 = 1 + d x^2y^2 12 | where d = -121665/121666. 13 | 14 | Representations: 15 | ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z 16 | ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 17 | ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 18 | ge_precomp (Duif): (y+x,y-x,2dxy) 19 | */ 20 | 21 | typedef struct { 22 | fe X; 23 | fe Y; 24 | fe Z; 25 | } ge_p2; 26 | 27 | typedef struct { 28 | fe X; 29 | fe Y; 30 | fe Z; 31 | fe T; 32 | } ge_p3; 33 | 34 | typedef struct { 35 | fe X; 36 | fe Y; 37 | fe Z; 38 | fe T; 39 | } ge_p1p1; 40 | 41 | typedef struct { 42 | fe yplusx; 43 | fe yminusx; 44 | fe xy2d; 45 | } ge_precomp; 46 | 47 | typedef struct { 48 | fe YplusX; 49 | fe YminusX; 50 | fe Z; 51 | fe T2d; 52 | } ge_cached; 53 | 54 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h); 55 | void ge_tobytes(unsigned char *s, const ge_p2 *h); 56 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s); 57 | 58 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 59 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); 60 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b); 61 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 62 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); 63 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a); 64 | 65 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); 66 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); 67 | void ge_p2_0(ge_p2 *h); 68 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p); 69 | void ge_p3_0(ge_p3 *h); 70 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p); 71 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p); 72 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p); 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/key_exchange.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "fe.h" 3 | 4 | void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) { 5 | unsigned char e[32]; 6 | unsigned int i; 7 | 8 | fe x1; 9 | fe x2; 10 | fe z2; 11 | fe x3; 12 | fe z3; 13 | fe tmp0; 14 | fe tmp1; 15 | 16 | int pos; 17 | unsigned int swap; 18 | unsigned int b; 19 | 20 | /* copy the private key and make sure it's valid */ 21 | for (i = 0; i < 32; ++i) { 22 | e[i] = private_key[i]; 23 | } 24 | 25 | e[0] &= 248; 26 | e[31] &= 63; 27 | e[31] |= 64; 28 | 29 | /* unpack the public key and convert edwards to montgomery */ 30 | /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */ 31 | fe_frombytes(x1, public_key); 32 | fe_1(tmp1); 33 | fe_add(tmp0, x1, tmp1); 34 | fe_sub(tmp1, tmp1, x1); 35 | fe_invert(tmp1, tmp1); 36 | fe_mul(x1, tmp0, tmp1); 37 | 38 | fe_1(x2); 39 | fe_0(z2); 40 | fe_copy(x3, x1); 41 | fe_1(z3); 42 | 43 | swap = 0; 44 | for (pos = 254; pos >= 0; --pos) { 45 | b = e[pos / 8] >> (pos & 7); 46 | b &= 1; 47 | swap ^= b; 48 | fe_cswap(x2, x3, swap); 49 | fe_cswap(z2, z3, swap); 50 | swap = b; 51 | 52 | /* from montgomery.h */ 53 | fe_sub(tmp0, x3, z3); 54 | fe_sub(tmp1, x2, z2); 55 | fe_add(x2, x2, z2); 56 | fe_add(z2, x3, z3); 57 | fe_mul(z3, tmp0, x2); 58 | fe_mul(z2, z2, tmp1); 59 | fe_sq(tmp0, tmp1); 60 | fe_sq(tmp1, x2); 61 | fe_add(x3, z3, z2); 62 | fe_sub(z2, z3, z2); 63 | fe_mul(x2, tmp1, tmp0); 64 | fe_sub(tmp1, tmp1, tmp0); 65 | fe_sq(z2, z2); 66 | fe_mul121666(z3, tmp1); 67 | fe_sq(x3, x3); 68 | fe_add(tmp0, tmp0, z3); 69 | fe_mul(z3, x1, z2); 70 | fe_mul(z2, tmp1, tmp0); 71 | } 72 | 73 | fe_cswap(x2, x3, swap); 74 | fe_cswap(z2, z3, swap); 75 | 76 | fe_invert(z2, z2); 77 | fe_mul(x2, x2, z2); 78 | fe_tobytes(shared_secret, x2); 79 | } 80 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/keypair.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | 5 | 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) { 7 | ge_p3 A; 8 | 9 | sha512(seed, 32, private_key); 10 | private_key[0] &= 248; 11 | private_key[31] &= 63; 12 | private_key[31] |= 64; 13 | 14 | ge_scalarmult_base(&A, private_key); 15 | ge_p3_tobytes(public_key, &A); 16 | } 17 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sc.h: -------------------------------------------------------------------------------- 1 | #ifndef SC_H 2 | #define SC_H 3 | 4 | /* 5 | The set of scalars is \Z/l 6 | where l = 2^252 + 27742317777372353535851937790883648493. 7 | */ 8 | 9 | void sc_reduce(unsigned char *s); 10 | void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/seed.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | 3 | #ifndef ED25519_NO_SEED 4 | 5 | #ifdef _WIN32 6 | #include 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | int ed25519_create_seed(unsigned char *seed) { 13 | #ifndef SGX_COMPAT 14 | #ifdef _WIN32 15 | HCRYPTPROV prov; 16 | 17 | if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { 18 | return 1; 19 | } 20 | 21 | if (!CryptGenRandom(prov, 32, seed)) { 22 | CryptReleaseContext(prov, 0); 23 | return 1; 24 | } 25 | 26 | CryptReleaseContext(prov, 0); 27 | #else 28 | FILE *f = fopen("/dev/urandom", "rb"); 29 | 30 | if (f == NULL) { 31 | return 1; 32 | } 33 | 34 | size_t ret = fread(seed, 1, 32, f); 35 | if (ret != 32) { 36 | fprintf(stderr, "Seed read error"); 37 | return 1; 38 | } 39 | fclose(f); 40 | #endif 41 | #endif 42 | 43 | return 0; 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sha512.c: -------------------------------------------------------------------------------- 1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis 2 | * 3 | * LibTomCrypt is a library that provides various cryptographic 4 | * algorithms in a highly modular and flexible manner. 5 | * 6 | * The library is free for all purposes without any express 7 | * guarantee it works. 8 | * 9 | * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 10 | */ 11 | 12 | #include "fixedint.h" 13 | #include "sha512.h" 14 | 15 | /* the K array */ 16 | static const uint64_t K[80] = { 17 | UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 18 | UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc), 19 | UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 20 | UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118), 21 | UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 22 | UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2), 23 | UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 24 | UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694), 25 | UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 26 | UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65), 27 | UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 28 | UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5), 29 | UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 30 | UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4), 31 | UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 32 | UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70), 33 | UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 34 | UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df), 35 | UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 36 | UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b), 37 | UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), 38 | UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30), 39 | UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 40 | UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8), 41 | UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 42 | UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8), 43 | UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 44 | UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3), 45 | UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 46 | UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec), 47 | UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 48 | UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b), 49 | UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 50 | UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178), 51 | UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 52 | UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b), 53 | UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 54 | UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c), 55 | UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 56 | UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817) 57 | }; 58 | 59 | /* Various logical functions */ 60 | 61 | #define ROR64c(x, y) \ 62 | ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \ 63 | ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF)) 64 | 65 | #define STORE64H(x, y) \ 66 | { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ 67 | (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ 68 | (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ 69 | (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } 70 | 71 | #define LOAD64H(x, y) \ 72 | { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \ 73 | (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \ 74 | (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \ 75 | (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); } 76 | 77 | 78 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) 79 | #define Maj(x,y,z) (((x | y) & z) | (x & y)) 80 | #define S(x, n) ROR64c(x, n) 81 | #define R(x, n) (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n)) 82 | #define Sigma0(x) (S(x, 28) ^ S(x, 34) ^ S(x, 39)) 83 | #define Sigma1(x) (S(x, 14) ^ S(x, 18) ^ S(x, 41)) 84 | #define Gamma0(x) (S(x, 1) ^ S(x, 8) ^ R(x, 7)) 85 | #define Gamma1(x) (S(x, 19) ^ S(x, 61) ^ R(x, 6)) 86 | #ifndef MIN 87 | #define MIN(x, y) ( ((x)<(y))?(x):(y) ) 88 | #endif 89 | 90 | /* compress 1024-bits */ 91 | static int sha512_compress(sha512_context *md, unsigned char *buf) 92 | { 93 | uint64_t S[8], W[80], t0, t1; 94 | int i; 95 | 96 | /* copy state into S */ 97 | for (i = 0; i < 8; i++) { 98 | S[i] = md->state[i]; 99 | } 100 | 101 | /* copy the state into 1024-bits into W[0..15] */ 102 | for (i = 0; i < 16; i++) { 103 | LOAD64H(W[i], buf + (8*i)); 104 | } 105 | 106 | /* fill W[16..79] */ 107 | for (i = 16; i < 80; i++) { 108 | W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; 109 | } 110 | 111 | /* Compress */ 112 | #define RND(a,b,c,d,e,f,g,h,i) \ 113 | t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ 114 | t1 = Sigma0(a) + Maj(a, b, c);\ 115 | d += t0; \ 116 | h = t0 + t1; 117 | 118 | for (i = 0; i < 80; i += 8) { 119 | RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); 120 | RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); 121 | RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); 122 | RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); 123 | RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); 124 | RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); 125 | RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); 126 | RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); 127 | } 128 | 129 | #undef RND 130 | 131 | 132 | 133 | /* feedback */ 134 | for (i = 0; i < 8; i++) { 135 | md->state[i] = md->state[i] + S[i]; 136 | } 137 | 138 | return 0; 139 | } 140 | 141 | 142 | /** 143 | Initialize the hash state 144 | @param md The hash state you wish to initialize 145 | @return 0 if successful 146 | */ 147 | int sha512_init(sha512_context * md) { 148 | if (md == NULL) return 1; 149 | 150 | md->curlen = 0; 151 | md->length = 0; 152 | md->state[0] = UINT64_C(0x6a09e667f3bcc908); 153 | md->state[1] = UINT64_C(0xbb67ae8584caa73b); 154 | md->state[2] = UINT64_C(0x3c6ef372fe94f82b); 155 | md->state[3] = UINT64_C(0xa54ff53a5f1d36f1); 156 | md->state[4] = UINT64_C(0x510e527fade682d1); 157 | md->state[5] = UINT64_C(0x9b05688c2b3e6c1f); 158 | md->state[6] = UINT64_C(0x1f83d9abfb41bd6b); 159 | md->state[7] = UINT64_C(0x5be0cd19137e2179); 160 | 161 | return 0; 162 | } 163 | 164 | /** 165 | Process a block of memory though the hash 166 | @param md The hash state 167 | @param in The data to hash 168 | @param inlen The length of the data (octets) 169 | @return 0 if successful 170 | */ 171 | int sha512_update (sha512_context * md, const unsigned char *in, size_t inlen) 172 | { 173 | size_t n; 174 | size_t i; 175 | int err; 176 | if (md == NULL) return 1; 177 | if (in == NULL) return 1; 178 | if (md->curlen > sizeof(md->buf)) { 179 | return 1; 180 | } 181 | while (inlen > 0) { 182 | if (md->curlen == 0 && inlen >= 128) { 183 | if ((err = sha512_compress (md, (unsigned char *)in)) != 0) { 184 | return err; 185 | } 186 | md->length += 128 * 8; 187 | in += 128; 188 | inlen -= 128; 189 | } else { 190 | n = MIN(inlen, (128 - md->curlen)); 191 | 192 | for (i = 0; i < n; i++) { 193 | md->buf[i + md->curlen] = in[i]; 194 | } 195 | 196 | 197 | md->curlen += n; 198 | in += n; 199 | inlen -= n; 200 | if (md->curlen == 128) { 201 | if ((err = sha512_compress (md, md->buf)) != 0) { 202 | return err; 203 | } 204 | md->length += 8*128; 205 | md->curlen = 0; 206 | } 207 | } 208 | } 209 | return 0; 210 | } 211 | 212 | /** 213 | Terminate the hash to get the digest 214 | @param md The hash state 215 | @param out [out] The destination of the hash (64 bytes) 216 | @return 0 if successful 217 | */ 218 | int sha512_final(sha512_context * md, unsigned char *out) 219 | { 220 | int i; 221 | 222 | if (md == NULL) return 1; 223 | if (out == NULL) return 1; 224 | 225 | if (md->curlen >= sizeof(md->buf)) { 226 | return 1; 227 | } 228 | 229 | /* increase the length of the message */ 230 | md->length += md->curlen * UINT64_C(8); 231 | 232 | /* append the '1' bit */ 233 | md->buf[md->curlen++] = (unsigned char)0x80; 234 | 235 | /* if the length is currently above 112 bytes we append zeros 236 | * then compress. Then we can fall back to padding zeros and length 237 | * encoding like normal. 238 | */ 239 | if (md->curlen > 112) { 240 | while (md->curlen < 128) { 241 | md->buf[md->curlen++] = (unsigned char)0; 242 | } 243 | sha512_compress(md, md->buf); 244 | md->curlen = 0; 245 | } 246 | 247 | /* pad upto 120 bytes of zeroes 248 | * note: that from 112 to 120 is the 64 MSB of the length. We assume that you won't hash 249 | * > 2^64 bits of data... :-) 250 | */ 251 | while (md->curlen < 120) { 252 | md->buf[md->curlen++] = (unsigned char)0; 253 | } 254 | 255 | /* store length */ 256 | STORE64H(md->length, md->buf+120); 257 | sha512_compress(md, md->buf); 258 | 259 | /* copy output */ 260 | for (i = 0; i < 8; i++) { 261 | STORE64H(md->state[i], out+(8*i)); 262 | } 263 | 264 | return 0; 265 | } 266 | 267 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out) 268 | { 269 | sha512_context ctx; 270 | int ret; 271 | if ((ret = sha512_init(&ctx))) return ret; 272 | if ((ret = sha512_update(&ctx, message, message_len))) return ret; 273 | if ((ret = sha512_final(&ctx, out))) return ret; 274 | return 0; 275 | } 276 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sha512.h: -------------------------------------------------------------------------------- 1 | #ifndef SHA512_H 2 | #define SHA512_H 3 | 4 | #include 5 | 6 | #include "fixedint.h" 7 | 8 | /* state */ 9 | typedef struct sha512_context_ { 10 | uint64_t length, state[8]; 11 | size_t curlen; 12 | unsigned char buf[128]; 13 | } sha512_context; 14 | 15 | 16 | int sha512_init(sha512_context * md); 17 | int sha512_final(sha512_context * md, unsigned char *out); 18 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen); 19 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/sign.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | 6 | 7 | void ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key) { 8 | sha512_context hash; 9 | unsigned char hram[64]; 10 | unsigned char r[64]; 11 | ge_p3 R; 12 | 13 | 14 | sha512_init(&hash); 15 | sha512_update(&hash, private_key + 32, 32); 16 | sha512_update(&hash, message, message_len); 17 | sha512_final(&hash, r); 18 | 19 | sc_reduce(r); 20 | ge_scalarmult_base(&R, r); 21 | ge_p3_tobytes(signature, &R); 22 | 23 | sha512_init(&hash); 24 | sha512_update(&hash, signature, 32); 25 | sha512_update(&hash, public_key, 32); 26 | sha512_update(&hash, message, message_len); 27 | sha512_final(&hash, hram); 28 | 29 | sc_reduce(hram); 30 | sc_muladd(signature + 32, hram, private_key, r); 31 | } 32 | -------------------------------------------------------------------------------- /src/sgx-ecc-ed25519/verify.c: -------------------------------------------------------------------------------- 1 | #include "ed25519.h" 2 | #include "sha512.h" 3 | #include "ge.h" 4 | #include "sc.h" 5 | 6 | static int consttime_equal(const unsigned char *x, const unsigned char *y) { 7 | unsigned char r = 0; 8 | 9 | r = x[0] ^ y[0]; 10 | #define F(i) r |= x[i] ^ y[i] 11 | F(1); 12 | F(2); 13 | F(3); 14 | F(4); 15 | F(5); 16 | F(6); 17 | F(7); 18 | F(8); 19 | F(9); 20 | F(10); 21 | F(11); 22 | F(12); 23 | F(13); 24 | F(14); 25 | F(15); 26 | F(16); 27 | F(17); 28 | F(18); 29 | F(19); 30 | F(20); 31 | F(21); 32 | F(22); 33 | F(23); 34 | F(24); 35 | F(25); 36 | F(26); 37 | F(27); 38 | F(28); 39 | F(29); 40 | F(30); 41 | F(31); 42 | #undef F 43 | 44 | return !r; 45 | } 46 | 47 | int ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key) { 48 | unsigned char h[64]; 49 | unsigned char checker[32]; 50 | sha512_context hash; 51 | ge_p3 A; 52 | ge_p2 R; 53 | 54 | if (signature[63] & 224) { 55 | return 0; 56 | } 57 | 58 | if (ge_frombytes_negate_vartime(&A, public_key) != 0) { 59 | return 0; 60 | } 61 | 62 | sha512_init(&hash); 63 | sha512_update(&hash, signature, 32); 64 | sha512_update(&hash, public_key, 32); 65 | sha512_update(&hash, message, message_len); 66 | sha512_final(&hash, h); 67 | 68 | sc_reduce(h); 69 | ge_double_scalarmult_vartime(&R, h, &A, signature + 32); 70 | ge_tobytes(checker, &R); 71 | 72 | if (!consttime_equal(checker, signature)) { 73 | return 0; 74 | } 75 | 76 | return 1; 77 | } 78 | -------------------------------------------------------------------------------- /src/sgx/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | pwd=$PWD 5 | 6 | echo --- Build 7 | cd "$(dirname "$0")/signing" 8 | ( 9 | set -x 10 | mkdir -p "$pwd"/temp 11 | openssl genrsa -out "$pwd"/temp/priv_key.pem -3 3072 12 | openssl rsa -in "$pwd"/temp/priv_key.pem -pubout -out "$pwd"/temp/pub_key.pem 13 | make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist PRIV_KEY="$pwd"/temp/priv_key.pem PUB_KEY="$pwd"/temp/pub_key.pem 14 | ) 15 | 16 | echo --- Build Enclave Test 17 | cd "../test" 18 | ( 19 | set -x 20 | make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist 21 | ) 22 | -------------------------------------------------------------------------------- /src/sgx/signing/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | SIGN_ENCLAVE ?= 1 3 | PRIV_KEY ?= ../keys/private_key.pem 4 | PUB_KEY ?= ../keys/public_key.pem 5 | LIBS_PATH ?= ../../../libs 6 | OUT ?= ../../../dist 7 | 8 | SGX_COMMON_CFLAGS := -m64 -O2 9 | SGX_LIBRARY_PATH := $(SGX_SDK)/lib64 10 | SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign 11 | SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r 12 | 13 | Trusted_C_Flags := -Wno-implicit-function-declaration -std=c11 $(SGX_COMMON_CFLAGS) -nostdinc -fpie -fstack-protector \ 14 | -IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf \ 15 | -I. -I../../sgx-ecc-ed25519 -fvisibility=hidden 16 | 17 | Untrusted_C_Flags := -fPIC -O0 -g -Wno-attributes -I$(SGX_SDK)/include -I. -I../../sgx-ecc-ed25519 18 | Test_C_Flags := $(Untrusted_C_Flags) 19 | 20 | Link_Flags := $(SGX_COMMON_CFLAGS) -Wl,--no-undefined -nostdlib -nodefaultlibs -nostartfiles -L$(SGX_LIBRARY_PATH) \ 21 | -Wl,--whole-archive -lsgx_trts -Wl,--no-whole-archive \ 22 | -L$(LIBS_PATH) -led25519.sgx.static \ 23 | -Wl,--start-group -lsgx_tstdc -lsgx_tcxx -lsgx_tkey_exchange -lsgx_tcrypto -lsgx_tservice -Wl,--end-group \ 24 | -Wl,-Bstatic -Wl,-Bsymbolic -Wl,--no-undefined \ 25 | -Wl,-pie,-eenclave_entry -Wl,--export-dynamic \ 26 | -Wl,--defsym,__ImageBase=0 \ 27 | -Wl,--version-script=./signing.lds 28 | 29 | Trusted_C_Files := $(filter $(wildcard *_trusted.c), $(wildcard *.c)) 30 | Trusted_C_Files += signing_t.c 31 | Trusted_C_Objects := $(Trusted_C_Files:.c=.o) 32 | 33 | Untrusted_C_Files := $(filter $(wildcard *_untrusted.c), $(wildcard *.c)) 34 | Untrusted_C_Files += signing_u.c 35 | Untrusted_C_Objects := $(Untrusted_C_Files:.c=.o) 36 | 37 | .PHONY: all run 38 | 39 | ifneq ($(SIGN_ENCLAVE), 0) 40 | all: signing_u.c signing_t.c signing.signed.so libsigning.so 41 | else 42 | all: signing_u.c signing_t.c signing.so libsigning.so 43 | @echo "Build enclave signing.so success!" 44 | @echo 45 | @echo "**********************************************************************************************" 46 | @echo "PLEASE NOTE: In this mode, please sign the enclave first using Two Step Sign mechanism, before" 47 | @echo "you run the app to launch and access the enclave." 48 | @echo "**********************************************************************************************" 49 | @echo 50 | endif 51 | 52 | run: all 53 | 54 | signing_t.c: $(SGX_EDGER8R) signing.edl 55 | @echo "GEN => $@" 56 | @$(SGX_EDGER8R) --trusted signing.edl --search-path $(SGX_SDK)/include 57 | 58 | signing_u.c: $(SGX_EDGER8R) signing.edl 59 | @echo "GEN => $@" 60 | @$(SGX_EDGER8R) --untrusted signing.edl --search-path $(SGX_SDK)/include 61 | 62 | $(Trusted_C_Objects): %.o: %.c 63 | @echo "CC <= $<" 64 | $(CC) $(Trusted_C_Flags) -c $< -o $@ 65 | 66 | $(Untrusted_C_Objects): %.o: %.c 67 | @echo "CC <= $<" 68 | $(CC) $(Untrusted_C_Flags) -c $< -o $@ 69 | 70 | signing.so: signing_t.o $(Trusted_C_Objects) 71 | @echo "LINK => $@" 72 | $(CC) $^ -o $@ $(Link_Flags) 73 | mkdir -p $(OUT) 74 | cp $@ $(OUT) 75 | 76 | signing.signed.so: signing.so 77 | @echo "SIGN => $@" 78 | $(SGX_ENCLAVE_SIGNER) gendata -enclave $< -config signing.config.xml -out /tmp/enclave_hash.hex 79 | openssl dgst -sha256 -out /tmp/signature.hex -sign $(PRIV_KEY) -keyform PEM /tmp/enclave_hash.hex 80 | $(SGX_ENCLAVE_SIGNER) catsig -enclave $< -config signing.config.xml -out $@ -key $(PUB_KEY) -sig /tmp/signature.hex -unsigned /tmp/enclave_hash.hex 81 | mkdir -p $(OUT) 82 | cp $@ $(OUT) 83 | 84 | libsigning.so: signing_u.o signing_untrusted.o $(Untrusted_C_Objects) 85 | @echo "LINK => $@" 86 | $(CC) $^ -o $@ -shared -L$(SGX_LIBRARY_PATH) -lsgx_uae_service -lsgx_ukey_exchange -lsgx_urts -L$(LIBS_PATH) -led25519.static 87 | mkdir -p $(OUT) 88 | cp $@ $(OUT) 89 | cp signing_public.h $(OUT) 90 | 91 | clean: 92 | @rm -f signing_t.* signing_u.* $(Trusted_C_Objects) $(Untrusted_C_Objects) signing.signed.so signing.so libsigning.so 93 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.config.xml: -------------------------------------------------------------------------------- 1 | 2 | 0 3 | 0 4 | 0x20000 5 | 0x80000 6 | 4 7 | 1 8 | 0 9 | 10 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.edl: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave interface. 3 | */ 4 | 5 | enclave { 6 | from "sgx_tkey_exchange.edl" import *; 7 | include "sgx_key_exchange.h" 8 | include "sgx_trts.h" 9 | include "signing_internal.h" 10 | 11 | trusted { 12 | /* This function initializes enclave's remote attestation 13 | parameters. 14 | 15 | Parameters: 16 | b_pse: Use Intel's Platform Services 17 | pub_key: The caller's public key, that it'll use for 18 | communication with Intel's IAS service. 19 | pctxt: Remmote attestation context 20 | */ 21 | public sgx_status_t init_remote_attestation( 22 | int b_pse, 23 | [in] sgx_ec256_public_t* pub_key, 24 | [out] sgx_ra_context_t *pctxt); 25 | 26 | /* This function frees enclave's remote attestation 27 | context (returned by init_remote_attestation). 28 | */ 29 | public sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt); 30 | 31 | /* This function initializes an ED25519 keypair in enclave. It 32 | returns the public key to the caller. The private key will 33 | be used by sign_sgx_ed25519 function to sign the data. 34 | 35 | Enclave uses lockout parameters to compute if signing the 36 | data will cause slashing. 37 | */ 38 | public sgx_status_t init_sgx_ed25519( 39 | uint32_t lockout_period, 40 | uint32_t lockout_multiplier, 41 | uint32_t lockout_max_depth, 42 | uint32_t key_len, 43 | [out, size=key_len] uint8_t* pubkey); 44 | 45 | /* This function returns sealed enclave data (keypair, lockout 46 | parameters) to the caller. The data is encrypted using 47 | enclave specific keys. 48 | */ 49 | public sgx_status_t get_sgx_ed25519_data( 50 | uint32_t data_size, 51 | [out, size=data_size] uint8_t* sealed_data, 52 | [out] uint32_t* data_size_needed); 53 | 54 | /* This function initializes enclave using sealed data. The data 55 | was sealed using get_sgx_ed25519_data() function. 56 | 57 | The caller can also update the lockout parameters. 58 | */ 59 | public sgx_status_t init_sgx_ed25519_from_data( 60 | uint32_t data_size, 61 | [in, size=data_size] uint8_t* sealed_data, 62 | uint32_t update_lockout_params, 63 | uint32_t lockout_period, 64 | uint32_t lockout_multiplier, 65 | uint32_t lockout_max_depth, 66 | uint32_t key_len, 67 | [out, size=key_len] uint8_t* pubkey); 68 | 69 | /* This function signs the message by using private key generated 70 | during init function. The caller provides the new history entries. 71 | The enclave checks the lockout parameters, past history and the 72 | new history to compute slashing conditions. The enclave will not 73 | sign the message if it'll result in slashing. 74 | */ 75 | public sgx_status_t sign_sgx_ed25519( 76 | uint32_t msg_len, 77 | [in, size=msg_len] const uint8_t* msg, 78 | uint32_t history_len, 79 | [in, count=history_len] const history_entry_t* entries, 80 | uint32_t sig_len, 81 | [out, size=sig_len] uint8_t* signature); 82 | }; 83 | }; 84 | -------------------------------------------------------------------------------- /src/sgx/signing/signing.lds: -------------------------------------------------------------------------------- 1 | signing.so 2 | { 3 | global: 4 | g_global_data_sim; 5 | g_global_data; 6 | signing_entry; 7 | local: 8 | *; 9 | }; 10 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_internal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "signing_public.h" 4 | 5 | // The following definitions are specific to ed25519 specifications 6 | #define ED25519_PRIV_KEY_LEN 64 7 | #define ED25519_SIGNATURE_LEN 64 8 | #define ED25519_SEED_LEN 32 9 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_public.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "sgx_eid.h" 5 | #include "sgx_error.h" 6 | 7 | #define ED25519_PUB_KEY_LEN 32 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | typedef struct ed25519_context { 14 | bool enclaveEnabled; 15 | sgx_enclave_id_t eid; 16 | uint8_t public_key[ED25519_PUB_KEY_LEN]; 17 | } ed25519_context_t; 18 | 19 | typedef struct history_entry { 20 | uint32_t num_hashes; 21 | uint32_t optional_input_hash[4]; 22 | uint32_t result_hash[4]; 23 | } history_entry_t; 24 | 25 | /* This function initializes SGX enclave. It loads enclave_file 26 | to SGX, which internally creates a new public/private keypair. 27 | 28 | If the platform does not support SGX, it creates a public/private 29 | keypair in untrusted space. An error is returned in this scenario. 30 | The user can choose to not use the library if SGX encalve is not 31 | being used for signing. 32 | 33 | Note: The user must release the enclave by calling release_ed25519_context() 34 | after they are done using it. 35 | */ 36 | sgx_status_t init_ed25519(const char* enclave_file, 37 | uint32_t lockout_period, 38 | uint32_t lockout_multiplier, 39 | uint32_t lockout_max_depth, 40 | ed25519_context_t* pctxt); 41 | 42 | /* This function returns the sealed data (private key and associated 43 | informatio). The sealed data can be used to reinit the enclave using 44 | init_ed25519_from_data(). 45 | */ 46 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt, 47 | uint32_t* datalen, 48 | uint8_t* data); 49 | 50 | /* This function reinitializes the enclave using sealed data. 51 | */ 52 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt, 53 | uint32_t datalen, 54 | uint8_t* data, 55 | uint32_t update_lockout_params, 56 | uint32_t lockout_period, 57 | uint32_t lockout_multiplier, 58 | uint32_t lockout_max_depth); 59 | 60 | /* This function signs the msg using the internally stored private 61 | key. The signature is returned in the output "signature" buffer. 62 | 63 | This function must only be called after init_ed25519() function. 64 | */ 65 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt, 66 | uint32_t msg_len, 67 | const uint8_t* msg, 68 | uint32_t history_len, 69 | const history_entry_t* entries, 70 | uint32_t sig_len, 71 | uint8_t* signature); 72 | 73 | /* This function releases SGX enclave */ 74 | void release_ed25519_context(ed25519_context_t* pctxt); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_trusted.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave code for signing data. 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include "sgx_key.h" 9 | #include "sgx_tseal.h" 10 | 11 | #include "ed25519.h" 12 | #include "signing_t.h" 13 | 14 | typedef struct signing_parameters { 15 | bool initialized; 16 | uint8_t public_key[ED25519_PUB_KEY_LEN]; 17 | uint8_t private_key[ED25519_PRIV_KEY_LEN]; 18 | uint32_t nonce; 19 | uint32_t lockout_period; 20 | uint32_t lockout_multiplier; 21 | uint32_t lockout_max_depth; 22 | sgx_mc_uuid_t counter; 23 | uint32_t counter_value; 24 | } signing_parameters_t; 25 | 26 | static signing_parameters_t g_signing_params; 27 | 28 | sgx_status_t init_remote_attestation(int b_pse, 29 | sgx_ec256_public_t* sp_pub_key, 30 | sgx_ra_context_t* pctxt) { 31 | sgx_status_t ret; 32 | if (b_pse) { 33 | int busy_retry_times = 2; 34 | do { 35 | ret = sgx_create_pse_session(); 36 | } while (ret == SGX_ERROR_BUSY && busy_retry_times--); 37 | if (ret != SGX_SUCCESS) 38 | return ret; 39 | } 40 | ret = sgx_ra_init(sp_pub_key, b_pse, pctxt); 41 | if (b_pse) { 42 | sgx_close_pse_session(); 43 | } 44 | return ret; 45 | } 46 | 47 | sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt) { 48 | return sgx_ra_close(ctxt); 49 | } 50 | 51 | /* This function creates a new public/private keypair in 52 | enclave trusted space. 53 | */ 54 | sgx_status_t init_sgx_ed25519(uint32_t lockout_period, 55 | uint32_t lockout_multiplier, 56 | uint32_t lockout_max_depth, 57 | uint32_t key_len, 58 | uint8_t* pubkey) { 59 | if (key_len < sizeof(g_signing_params.public_key)) { 60 | return SGX_ERROR_INVALID_PARAMETER; 61 | } 62 | 63 | sgx_status_t status = SGX_SUCCESS; 64 | int busy_retry_times = 3; 65 | do { 66 | status = sgx_create_pse_session(); 67 | } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0)); 68 | 69 | if (SGX_SUCCESS != status) { 70 | return status; 71 | } 72 | 73 | status = sgx_create_monotonic_counter(&g_signing_params.counter, 74 | &g_signing_params.counter_value); 75 | sgx_close_pse_session(); 76 | if (SGX_SUCCESS != status) { 77 | return status; 78 | } 79 | 80 | uint8_t seed[ED25519_SEED_LEN]; 81 | status = sgx_read_rand(seed, sizeof(seed)); 82 | if (SGX_SUCCESS != status) { 83 | return status; 84 | } 85 | 86 | ed25519_create_keypair(g_signing_params.public_key, 87 | g_signing_params.private_key, seed); 88 | 89 | memcpy(pubkey, g_signing_params.public_key, 90 | sizeof(g_signing_params.public_key)); 91 | 92 | g_signing_params.initialized = true; 93 | g_signing_params.lockout_max_depth = lockout_max_depth; 94 | g_signing_params.lockout_multiplier = lockout_multiplier; 95 | g_signing_params.lockout_period = lockout_period; 96 | 97 | return SGX_SUCCESS; 98 | } 99 | 100 | sgx_status_t get_sgx_ed25519_data(uint32_t data_size, 101 | uint8_t* sealed_data, 102 | uint32_t* data_size_needed) { 103 | *data_size_needed = 104 | sgx_calc_sealed_data_size(0, sizeof(signing_parameters_t)); 105 | 106 | if (*data_size_needed > data_size) { 107 | return SGX_ERROR_INVALID_PARAMETER; 108 | } 109 | 110 | sgx_status_t status = sgx_read_rand((uint8_t*)&g_signing_params.nonce, 111 | sizeof(g_signing_params.nonce)); 112 | if (SGX_SUCCESS != status) { 113 | return status; 114 | } 115 | 116 | sgx_attributes_t attribute_mask; 117 | attribute_mask.flags = SGX_FLAGS_INITTED | SGX_FLAGS_DEBUG; 118 | attribute_mask.xfrm = 0x0; 119 | 120 | return sgx_seal_data_ex(SGX_KEYPOLICY_MRENCLAVE, attribute_mask, 0xF0000000, 121 | 0, NULL, sizeof(g_signing_params), 122 | (const uint8_t*)&g_signing_params, *data_size_needed, 123 | (sgx_sealed_data_t*)sealed_data); 124 | } 125 | 126 | sgx_status_t init_sgx_ed25519_from_data(uint32_t data_size, 127 | uint8_t* sealed_data, 128 | uint32_t update_lockout_params, 129 | uint32_t lockout_period, 130 | uint32_t lockout_multiplier, 131 | uint32_t lockout_max_depth, 132 | uint32_t key_len, 133 | uint8_t* pubkey) { 134 | if (key_len < sizeof(g_signing_params.public_key)) { 135 | return SGX_ERROR_INVALID_PARAMETER; 136 | } 137 | 138 | signing_parameters_t data; 139 | uint32_t datalen = sizeof(data); 140 | sgx_status_t status = sgx_unseal_data((const sgx_sealed_data_t*)sealed_data, 141 | NULL, 0, (uint8_t*)&data, &datalen); 142 | if (SGX_SUCCESS != status) { 143 | return status; 144 | } 145 | 146 | if (datalen != sizeof(data)) { 147 | return SGX_ERROR_INVALID_PARAMETER; 148 | } 149 | 150 | int busy_retry_times = 3; 151 | do { 152 | status = sgx_create_pse_session(); 153 | } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0)); 154 | 155 | if (SGX_SUCCESS != status) { 156 | return status; 157 | } 158 | 159 | uint32_t counter_value = 0xffffffff; 160 | status = 161 | sgx_read_monotonic_counter(&g_signing_params.counter, &counter_value); 162 | if (SGX_SUCCESS != status) { 163 | sgx_close_pse_session(); 164 | return status; 165 | } 166 | 167 | if (counter_value != g_signing_params.counter_value) { 168 | sgx_close_pse_session(); 169 | return SGX_ERROR_INVALID_PARAMETER; 170 | } 171 | 172 | status = sgx_increment_monotonic_counter(&g_signing_params.counter, 173 | &g_signing_params.counter_value); 174 | 175 | sgx_close_pse_session(); 176 | if (SGX_SUCCESS != status) { 177 | return status; 178 | } 179 | 180 | memcpy(&g_signing_params, &data, sizeof(g_signing_params)); 181 | 182 | memcpy(pubkey, g_signing_params.public_key, 183 | sizeof(g_signing_params.public_key)); 184 | 185 | g_signing_params.initialized = true; 186 | if (update_lockout_params != 0) { 187 | g_signing_params.lockout_max_depth = lockout_max_depth; 188 | g_signing_params.lockout_multiplier = lockout_multiplier; 189 | g_signing_params.lockout_period = lockout_period; 190 | } 191 | return SGX_SUCCESS; 192 | } 193 | 194 | /* This function signs the msg using private key. 195 | */ 196 | sgx_status_t sign_sgx_ed25519(uint32_t msg_len, 197 | const uint8_t* msg, 198 | uint32_t history_len, 199 | const history_entry_t* entries, 200 | uint32_t sig_len, 201 | uint8_t* signature) { 202 | if (!g_signing_params.initialized) { 203 | return SGX_ERROR_INVALID_STATE; 204 | } 205 | 206 | if (sig_len < ED25519_SIGNATURE_LEN) { 207 | return SGX_ERROR_INVALID_PARAMETER; 208 | } 209 | 210 | ed25519_sign(signature, msg, msg_len, g_signing_params.public_key, 211 | g_signing_params.private_key); 212 | 213 | return SGX_SUCCESS; 214 | } 215 | -------------------------------------------------------------------------------- /src/sgx/signing/signing_untrusted.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains Solana's SGX enclave code for signing data. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ed25519.h" 10 | #include "sgx_urts.h" 11 | #include "signing_public.h" 12 | #include "signing_u.h" 13 | 14 | /* This function initializes SGX enclave. It loads enclave_file 15 | to SGX, which internally creates a new public/private keypair. 16 | */ 17 | sgx_status_t init_ed25519(const char* enclave_file, 18 | uint32_t lockout_period, 19 | uint32_t lockout_multiplier, 20 | uint32_t lockout_max_depth, 21 | ed25519_context_t* pctxt) { 22 | int updated = 0; 23 | sgx_launch_token_t token = {0}; 24 | sgx_enclave_id_t eid; 25 | 26 | // Try to load the SGX enclave 27 | sgx_status_t status = 28 | sgx_create_enclave(enclave_file, 1, &token, &updated, &eid, NULL); 29 | 30 | if (SGX_SUCCESS != status) { 31 | return status; 32 | } 33 | 34 | sgx_status_t retval = SGX_SUCCESS; 35 | status = init_sgx_ed25519(eid, &retval, lockout_period, lockout_multiplier, 36 | lockout_max_depth, sizeof(pctxt->public_key), 37 | &pctxt->public_key[0]); 38 | 39 | if (SGX_SUCCESS != status) { 40 | sgx_destroy_enclave(eid); 41 | return status; 42 | } 43 | 44 | if (SGX_SUCCESS != retval) { 45 | sgx_destroy_enclave(eid); 46 | return retval; 47 | } 48 | 49 | pctxt->enclaveEnabled = true; 50 | pctxt->eid = eid; 51 | 52 | return status; 53 | } 54 | 55 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt, 56 | uint32_t* datalen, 57 | uint8_t* data) { 58 | if (!pctxt->enclaveEnabled) { 59 | return SGX_ERROR_INVALID_ENCLAVE; 60 | } 61 | 62 | sgx_status_t retval = SGX_SUCCESS; 63 | sgx_status_t status = 64 | get_sgx_ed25519_data(pctxt->eid, &retval, *datalen, data, datalen); 65 | 66 | if (SGX_SUCCESS != status) { 67 | return status; 68 | } 69 | 70 | if (SGX_SUCCESS != retval) { 71 | return retval; 72 | } 73 | 74 | return status; 75 | } 76 | 77 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt, 78 | uint32_t datalen, 79 | uint8_t* data, 80 | uint32_t update_lockout_params, 81 | uint32_t lockout_period, 82 | uint32_t lockout_multiplier, 83 | uint32_t lockout_max_depth) { 84 | if (!pctxt->enclaveEnabled) { 85 | return SGX_ERROR_INVALID_ENCLAVE; 86 | } 87 | 88 | sgx_status_t retval = SGX_SUCCESS; 89 | sgx_status_t status = init_sgx_ed25519_from_data( 90 | pctxt->eid, &retval, datalen, data, update_lockout_params, lockout_period, 91 | lockout_multiplier, lockout_max_depth, sizeof(pctxt->public_key), 92 | &pctxt->public_key[0]); 93 | 94 | if (SGX_SUCCESS != status) { 95 | return status; 96 | } 97 | 98 | if (SGX_SUCCESS != retval) { 99 | return retval; 100 | } 101 | 102 | return status; 103 | } 104 | 105 | /* This function signs the msg using the internally stored private 106 | key. The signature is returned in the output "signature" buffer. 107 | 108 | This function must only be called after init_ed25519() function. 109 | */ 110 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt, 111 | uint32_t msg_len, 112 | const uint8_t* msg, 113 | uint32_t history_len, 114 | const history_entry_t* entries, 115 | uint32_t sig_len, 116 | uint8_t* signature) { 117 | if (!pctxt->enclaveEnabled) { 118 | return SGX_ERROR_INVALID_ENCLAVE; 119 | } 120 | 121 | sgx_status_t retval = SGX_SUCCESS; 122 | sgx_status_t status = 123 | sign_sgx_ed25519(pctxt->eid, &retval, msg_len, msg, history_len, entries, 124 | sig_len, signature); 125 | 126 | if (SGX_SUCCESS != status) { 127 | return status; 128 | } 129 | 130 | if (SGX_SUCCESS != retval) { 131 | return retval; 132 | } 133 | 134 | return status; 135 | } 136 | 137 | void release_ed25519_context(ed25519_context_t* pctxt) { 138 | sgx_destroy_enclave(pctxt->eid); 139 | } -------------------------------------------------------------------------------- /src/sgx/test/Makefile: -------------------------------------------------------------------------------- 1 | SGX_SDK ?= /opt/sgxsdk 2 | LIBS_PATH ?= ../../../libs 3 | OUT ?= ../../../dist 4 | 5 | C_Flags := -O2 -fpic -I. -I$(SGX_SDK)/include -I$(OUT) -I../../sgx-ecc-ed25519 6 | 7 | C_Files := $(wildcard *.c) 8 | C_Objects := $(C_Files:%.c=%.o) 9 | 10 | .PHONY: all run 11 | all: $(OUT)/signing_test 12 | run: all 13 | 14 | %.o: %.c 15 | @echo "CC <= $<" 16 | $(CC) $(C_Flags) -c $< -o $@ 17 | 18 | $(OUT)/signing_test: $(C_Objects) 19 | @mkdir -p $(OUT) 20 | $(CC) $^ -o $@ -L$(OUT) -L$(LIBS_PATH) -lsigning -led25519.static 21 | 22 | clean: 23 | @rm -rf $(C_Objects) $(OUT)/signing_test 24 | -------------------------------------------------------------------------------- /src/sgx/test/signing_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "signing_public.h" 6 | 7 | #include "ed25519.h" 8 | 9 | void print_buffer(const uint8_t* buf, int len) { 10 | char str[BUFSIZ] = {'\0'}; 11 | int offset = 0; 12 | for (int i = 0; i < len; i++) { 13 | offset += snprintf(&str[offset], BUFSIZ - offset, "0x%02x ", buf[i]); 14 | if (!((i + 1) % 8)) 15 | offset += snprintf(&str[offset], BUFSIZ - offset, "\n"); 16 | } 17 | offset += snprintf(&str[offset], BUFSIZ - offset, "\n"); 18 | printf("%s", str); 19 | } 20 | 21 | int main(int argc, char* argv[]) { 22 | if (argc < 2) { 23 | printf("Usage: %s \n", argv[0]); 24 | return -1; 25 | } 26 | 27 | ed25519_context_t ctxt; 28 | uint32_t lockout_period = 10, lockout_multiplier = 2, lockout_max_depth = 32; 29 | sgx_status_t status = init_ed25519( 30 | argv[1], lockout_period, lockout_multiplier, lockout_max_depth, &ctxt); 31 | if (SGX_SUCCESS != status) { 32 | printf("Failed in init_ed25519. Error %d\n", status); 33 | return -1; 34 | } 35 | 36 | printf("Loaded the enclave. eid: %d\n", (uint32_t)ctxt.eid); 37 | 38 | uint32_t datalen = 0; 39 | status = get_ed25519_data(&ctxt, &datalen, NULL); 40 | 41 | uint8_t* sealed_data = malloc(datalen); 42 | status = get_ed25519_data(&ctxt, &datalen, sealed_data); 43 | if (SGX_SUCCESS != status) { 44 | printf("Failed in get_ed25519_data. Error %d\n", status); 45 | release_ed25519_context(&ctxt); 46 | free(sealed_data); 47 | return -1; 48 | } 49 | 50 | status = 51 | init_ed25519_from_data(&ctxt, datalen, sealed_data, 1, lockout_period, 52 | lockout_multiplier, lockout_max_depth); 53 | free(sealed_data); 54 | if (SGX_SUCCESS != status) { 55 | printf("Failed in init_ed25519_from_data. Error %d\n", status); 56 | release_ed25519_context(&ctxt); 57 | return -1; 58 | } 59 | 60 | const history_entry_t entries; 61 | uint8_t* data = 62 | "This is a test string. We'll sign it using SGX enclave. Hope it works!!"; 63 | uint8_t signature[64]; 64 | memset(signature, 0, sizeof(signature)); 65 | status = sign_ed25519(&ctxt, sizeof(data), data, 1, &entries, 66 | sizeof(signature), signature); 67 | if (SGX_SUCCESS != status) { 68 | printf("Failed in sign_ed25519. Error %d\n", status); 69 | release_ed25519_context(&ctxt); 70 | return -1; 71 | } 72 | 73 | printf("Signature:\n"); 74 | print_buffer(signature, sizeof(signature)); 75 | 76 | if (ed25519_verify(signature, data, sizeof(data), ctxt.public_key) == 0) { 77 | printf("Failed in verifying the signature\n"); 78 | } else { 79 | printf("Signature verified\n"); 80 | } 81 | 82 | release_ed25519_context(&ctxt); 83 | return 0; 84 | } --------------------------------------------------------------------------------