├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── ci
    ├── build.sh
    ├── buildkite.yml
    ├── docker-ispc
    │   └── Dockerfile
    ├── docker-run.sh
    ├── docker-sgx
    │   ├── Dockerfile
    │   └── build.sh
    ├── env.sh
    ├── upload-ci-artifact.sh
    └── upload-github-release-asset.sh
└── src
    ├── Makefile
    ├── common
        ├── gpu_common.h
        └── perftime.h
    ├── cuda-ecc-ed25519
        ├── common.cu
        ├── ed25519.h
        ├── fe.cu
        ├── fe.h
        ├── fixedint.h
        ├── ge.cu
        ├── ge.h
        ├── gpu_ctx.cu
        ├── gpu_ctx.h
        ├── int128.h
        ├── keypair.cu
        ├── license.txt
        ├── main.cu
        ├── precomp_data.h
        ├── sc.cu
        ├── sc.h
        ├── seed.cu
        ├── sha512.cu
        ├── sha512.h
        ├── sign.cu
        └── verify.cu
    ├── cuda-poh-verify
        └── poh_verify.cu
    ├── cuda-sha256
        ├── sha256.cu
        └── tomcrypt_macros.h
    ├── gpu-common.mk
    ├── jerasure-sys
        ├── Cargo.toml
        ├── build.rs
        ├── gf-complete
        └── jerasure
    ├── opencl-ecc-ed25519
        ├── gpu_ctx.cpp
        ├── gpu_ctx.h
        ├── main.cpp
        ├── sign.cpp
        └── verify.cpp
    ├── opencl-platform
        ├── cl_common.h
        ├── cl_init_platform.cpp
        ├── kernels_precomp_data.h
        ├── kernels_sha256.h
        └── kernels_verify.h
    ├── opencl-poh-verify
        └── cl_poh_verify.cpp
    ├── poh-simd
        ├── Makefile
        ├── build.sh
        ├── poh-verify.ispc
        └── sha256.h
    ├── poh-verify-test
        ├── main.cpp
        ├── test_hashes_332_129
        ├── test_hashes_output_332
        ├── test_num_elems_332
        └── test_num_hashes_arr_332
    ├── sgx-ecc-ed25519
        ├── Makefile
        ├── add_scalar.c
        ├── build.sh
        ├── ed25519.h
        ├── fe.c
        ├── fe.h
        ├── fixedint.h
        ├── ge.c
        ├── ge.h
        ├── key_exchange.c
        ├── keypair.c
        ├── precomp_data.h
        ├── sc.c
        ├── sc.h
        ├── seed.c
        ├── sha512.c
        ├── sha512.h
        ├── sign.c
        └── verify.c
    └── sgx
        ├── build.sh
        ├── signing
            ├── Makefile
            ├── signing.config.xml
            ├── signing.edl
            ├── signing.lds
            ├── signing_internal.h
            ├── signing_public.h
            ├── signing_trusted.c
            └── signing_untrusted.c
        └── test
            ├── Makefile
            └── signing_test.c


/.gitignore:
--------------------------------------------------------------------------------
1 | # temp folder
2 | /temp/
3 | 
4 | # build output folders
5 | /libs/
6 | /dist/
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 Solana Labs, Inc.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | OS := $(shell uname)
 2 | 
 3 | all:
 4 | ifeq ($(OS),Darwin)
 5 | SO=dylib
 6 | else
 7 | SO=so
 8 | all: cuda_crypt
 9 | endif
10 | 
11 | V=release
12 | 
13 | .PHONY:cuda_crypt
14 | cuda_crypt:
15 | 	$(MAKE) V=$(V) -C src
16 | 
17 | DESTDIR ?= dist
18 | install:
19 | 	mkdir -p $(DESTDIR)
20 | ifneq ($(OS),Darwin)
21 | 	cp -f src/$(V)/libcuda-crypt.so $(DESTDIR)
22 | 	cp -f src/$(V)/libcl-crypt.so $(DESTDIR)
23 | endif
24 | 	ls -lh $(DESTDIR)
25 | 
26 | .PHONY:clean
27 | clean:
28 | 	$(MAKE) V=$(V) -C src clean
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build status](https://badge.buildkite.com/dcc97a44f655a7473ff0f836a2cf154dff016a66db8e4f7405.svg?branch=master)](https://buildkite.com/solana-labs/wool)
 2 | 
 3 | # solana-perf-libs
 4 | CUDA, and more!
 5 | 
 6 | ## Building
 7 | After cloning this repo use the makefile in the root to build the tree
 8 | with nvcc in your path:
 9 | 
10 | ```bash
11 | $ export PATH=/usr/local/cuda/bin:$PATH
12 | $ make -j$(nproc)
13 | ```
14 | 
15 | This should generate the libraries:
16 | * libcuda-crypt.so - ed25519 verify and poh verify cuda implementations
17 | * libcl-crypt.so - ed25519 verify and poh verify OpenCL implementations
18 | 
19 | Copy libraries to the main Solana repo:
20 | ```bash
21 | $ make DESTDIR=${SOLANA_ROOT:?}/target/perf-libs install
22 | ```
23 | 
24 | Build Solana:
25 | ```bash
26 | $ cd $SOLANA_ROOT
27 | $ cargo build --release
28 | ```
29 | 
30 | The library is loaded at startup by `solana_perf::perf_libs`.
31 | See `perf/src/perf_libs.rs` in the main Solana repo for details.
32 | 


--------------------------------------------------------------------------------
/ci/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | cd "$(dirname "$0")/.."
 4 | 
 5 | source ci/env.sh
 6 | source ci/upload-ci-artifact.sh
 7 | 
 8 | CUDA_HOMES=(
 9 |   /usr/local/cuda-11.1
10 |   /usr/local/cuda-11.2
11 |   /usr/local/cuda-11.3
12 |   /usr/local/cuda-11.4
13 |   /usr/local/cuda-11.5
14 | )
15 | 
16 | for CUDA_HOME in "${CUDA_HOMES[@]}"; do
17 |   CUDA_HOME_BASE="$(basename "$CUDA_HOME")"
18 |   echo "--- Build: $CUDA_HOME_BASE"
19 |   (
20 |     if [[ ! -d $CUDA_HOME/lib64 ]]; then
21 |       echo "Invalid CUDA_HOME: $CUDA_HOME"
22 |       exit 1
23 |     fi
24 | 
25 |     set -x
26 |     export LD_LIBRARY_PATH=$CUDA_HOME/lib64
27 |     export PATH=$PATH:$HOME/.cargo/bin/:$CUDA_HOME/bin
28 |     export DESTDIR=dist/$CUDA_HOME_BASE
29 | 
30 |     make -j"$(nproc)"
31 |     make install
32 |     make clean
33 | 
34 |     cp -vf "$CUDA_HOME"/version.txt "$DESTDIR"/cuda-version.txt
35 |   )
36 | done
37 | 
38 | echo --- Build SGX
39 | (
40 |   set -x
41 |   ci/docker-run.sh solanalabs/sgxsdk src/sgx-ecc-ed25519/build.sh
42 |   ci/docker-run.sh solanalabs/sgxsdk src/sgx/build.sh
43 | )
44 | 
45 | echo --- Build ISPC
46 | (
47 |   set -x
48 |   ci/docker-run.sh solanalabs/ispc src/poh-simd/build.sh
49 | )
50 | 
51 | echo --- Create tarball
52 | (
53 |   set -x
54 |   cd dist
55 |   git rev-parse HEAD | tee solana-perf-HEAD.txt
56 |   tar zcvf ../solana-perf.tgz ./*
57 | )
58 | 
59 | upload-ci-artifact solana-perf.tgz
60 | 
61 | [[ -n $CI_TAG ]] || exit 0
62 | ci/upload-github-release-asset.sh solana-perf.tgz
63 | exit 0
64 | 


--------------------------------------------------------------------------------
/ci/buildkite.yml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - command: "ci/build.sh"
3 |     name: "build"
4 |     timeout_in_minutes: 120
5 |     agents:
6 |       - "queue=perf-cuda"
7 | 


--------------------------------------------------------------------------------
/ci/docker-ispc/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM buildpack-deps:stretch
 2 | 
 3 | ARG ISPC_HOME=/usr/local/src/ispc
 4 | ARG LLVM_HOME=/usr/local/src/llvm
 5 | ARG LLVM_VERSION=8.0
 6 | 
 7 | ENV PATH=$LLVM_HOME/bin-$LLVM_VERSION/bin:$ISPC_HOME/bin/bin:$PATH
 8 | 
 9 | RUN set -x \
10 |  && apt-get update \
11 |  && apt purge -y --auto-remove cmake \
12 |  && apt-get install -y bison flex \
13 |  && wget https://cmake.org/files/v3.8/cmake-3.8.0-Linux-x86_64.sh \
14 |  && mkdir /opt/cmake \
15 |  && sh cmake-3.8.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license \
16 |  && ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake \
17 |  && rm cmake-3.8.0-Linux-x86_64.sh \
18 |  && cmake --version \
19 |  && git clone git://github.com/ispc/ispc.git $ISPC_HOME \
20 |  && cd $ISPC_HOME \
21 |  && python alloy.py -b --version=$LLVM_VERSION --git --selfbuild \
22 |  && rm -rf $LLVM_HOME/build-$LLVM_VERSION $LLVM_HOME/llvm-$LLVM_VERSION $LLVM_HOME/bin-$LLVM_VERSION_temp $LLVM_HOME/build-$LLVM_VERSION_temp \
23 |  && mkdir build \
24 |  && cd build \
25 |  && echo $PATH \
26 |  && ls -la /usr/local/src/llvm/bin-8.0/bin \
27 |  && cmake -DCMAKE_INSTALL_PREFIX=$ISPC_HOME/bin -DCMAKE_CXX_COMPILER=clang++ $ISPC_HOME \
28 |  && make -j$(nproc) \
29 |  && make install \
30 |  && cd .. \
31 |  && rm -rf build \
32 |  && mv $LLVM_HOME/bin-$LLVM_VERSION / \
33 |  && rm -rf $LLVM_HOME \
34 |  && mkdir -p $LLVM_HOME \
35 |  && mv /bin-$LLVM_VERSION $LLVM_HOME \
36 |  && cd / \
37 |  && mv $ISPC_HOME/bin /ispcbin \
38 |  && rm -rf $ISPC_HOME \
39 |  && mkdir $ISPC_HOME \
40 |  && mv /ispcbin $ISPC_HOME/bin \
41 |  && ispc --version
42 | 


--------------------------------------------------------------------------------
/ci/docker-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | usage() {
 5 |   echo "Usage: $0 [--nopull] [docker image name] [command]"
 6 |   echo
 7 |   echo Runs command in the specified docker image with
 8 |   echo a CI-appropriate environment.
 9 |   echo
10 |   echo "--nopull   Skip the dockerhub image update"
11 |   echo "--shell    Skip command and enter an interactive shell"
12 |   echo
13 | }
14 | 
15 | cd "$(dirname "$0")/.."
16 | 
17 | INTERACTIVE=false
18 | if [[ $1 = --shell ]]; then
19 |   INTERACTIVE=true
20 |   shift
21 | fi
22 | 
23 | NOPULL=false
24 | if [[ $1 = --nopull ]]; then
25 |   NOPULL=true
26 |   shift
27 | fi
28 | 
29 | IMAGE="$1"
30 | if [[ -z "$IMAGE" ]]; then
31 |   echo Error: image not defined
32 |   exit 1
33 | fi
34 | 
35 | $NOPULL || docker pull "$IMAGE"
36 | shift
37 | 
38 | ARGS=(
39 |   --workdir /solana
40 |   --volume "$PWD:/solana"
41 |   --rm
42 | )
43 | 
44 | if [[ -n $CI ]]; then
45 |   # Share the real ~/.cargo between docker containers in CI for speed
46 |   ARGS+=(--volume "$HOME:/home")
47 | else
48 |   # Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux
49 |   # ~/.cargo
50 |   ARGS+=(--volume "$PWD:/home")
51 | fi
52 | ARGS+=(--env "CARGO_HOME=/home/.cargo")
53 | 
54 | # kcov tries to set the personality of the binary which docker
55 | # doesn't allow by default.
56 | ARGS+=(--security-opt "seccomp=unconfined")
57 | 
58 | # Ensure files are created with the current host uid/gid
59 | if [[ -z "$SOLANA_DOCKER_RUN_NOSETUID" ]]; then
60 |   ARGS+=(--user "$(id -u):$(id -g)")
61 | fi
62 | 
63 | # Environment variables to propagate into the container
64 | ARGS+=(
65 |   --env BUILDKITE
66 |   --env BUILDKITE_AGENT_ACCESS_TOKEN
67 |   --env BUILDKITE_BRANCH
68 |   --env BUILDKITE_JOB_ID
69 |   --env BUILDKITE_TAG
70 |   --env CODECOV_TOKEN
71 |   --env CRATES_IO_TOKEN
72 |   --env SNAPCRAFT_CREDENTIALS_KEY
73 | )
74 | 
75 | if $INTERACTIVE; then
76 |   if [[ -n $1 ]]; then
77 |     echo
78 |     echo "Note: '$*' ignored due to --shell argument"
79 |     echo
80 |   fi
81 |   set -x
82 |   exec docker run --interactive --tty "${ARGS[@]}" "$IMAGE" bash
83 | fi
84 | 
85 | set -x
86 | exec docker run "${ARGS[@]}" "$IMAGE" "$@"
87 | 


--------------------------------------------------------------------------------
/ci/docker-sgx/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && \
 5 |     apt-get install -y build-essential ocaml ocamlbuild automake autoconf libtool wget python libssl-dev libcurl4-openssl-dev protobuf-compiler libprotobuf-dev sudo kmod vim curl git-core libprotobuf-c0-dev libboost-thread-dev libboost-system-dev liblog4cpp5-dev libjsoncpp-dev alien uuid-dev libxml2-dev cmake pkg-config expect
 6 | 
 7 | 
 8 | RUN mkdir /root/sgx && mkdir /etc/init/ && \
 9 |     wget -O /root/sgx/sdk.bin https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/sgx_linux_x64_sdk_2.3.101.46683.bin && \
10 |     wget -O /root/sgx/psw.deb https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/libsgx-enclave-common_2.3.101.46683-1_amd64.deb && \
11 |     cd /root/sgx && \
12 |     dpkg -i /root/sgx/psw.deb && \
13 |     chmod +x /root/sgx/sdk.bin && \
14 |     echo -e 'no\n/opt' | /root/sgx/sdk.bin && \
15 |     echo 'source /opt/sgxsdk/environment' >> /root/.bashrc && \
16 |     rm -rf /root/sgx/*
17 | 
18 | WORKDIR /root
19 | 
20 | 


--------------------------------------------------------------------------------
/ci/docker-sgx/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -ex
3 | 
4 | cd "$(dirname "$0")"
5 | 
6 | docker build -t solanalabs/sgxsdk .
7 | docker push solanalabs/sgxsdk
8 | 
9 | 


--------------------------------------------------------------------------------
/ci/env.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # Normalized CI environment variables
 3 | #
 4 | # |source| me
 5 | #
 6 | 
 7 | if [[ -n $CI ]]; then
 8 |   export CI=1
 9 |   if [[ -n $TRAVIS ]]; then
10 |     export CI_BRANCH=$TRAVIS_BRANCH
11 |     export CI_BUILD_ID=$TRAVIS_BUILD_ID
12 |     export CI_COMMIT=$TRAVIS_COMMIT
13 |     export CI_JOB_ID=$TRAVIS_JOB_ID
14 |     if $TRAVIS_PULL_REQUEST; then
15 |       export CI_PULL_REQUEST=true
16 |     else
17 |       export CI_PULL_REQUEST=
18 |     fi
19 |     export CI_OS_NAME=$TRAVIS_OS_NAME
20 |     export CI_REPO_SLUG=$TRAVIS_REPO_SLUG
21 |     export CI_TAG=$TRAVIS_TAG
22 |   elif [[ -n $BUILDKITE ]]; then
23 |     export CI_BRANCH=$BUILDKITE_BRANCH
24 |     export CI_BUILD_ID=$BUILDKITE_BUILD_ID
25 |     export CI_COMMIT=$BUILDKITE_COMMIT
26 |     export CI_JOB_ID=$BUILDKITE_JOB_ID
27 |     # The standard BUILDKITE_PULL_REQUEST environment variable is always "false" due
28 |     # to how solana-ci-gate is used to trigger PR builds rather than using the
29 |     # standard Buildkite PR trigger.
30 |     if [[ $CI_BRANCH =~ pull/* ]]; then
31 |       export CI_PULL_REQUEST=true
32 |     else
33 |       export CI_PULL_REQUEST=
34 |     fi
35 |     export CI_OS_NAME=linux
36 |     export CI_REPO_SLUG=$BUILDKITE_ORGANIZATION_SLUG/$BUILDKITE_PIPELINE_SLUG
37 |     # TRIGGERED_BUILDKITE_TAG is a workaround to propagate BUILDKITE_TAG into
38 |     # the solana-secondary builder
39 |     if [[ -n $TRIGGERED_BUILDKITE_TAG ]]; then
40 |       export CI_TAG=$TRIGGERED_BUILDKITE_TAG
41 |     else
42 |       export CI_TAG=$BUILDKITE_TAG
43 |     fi
44 |   elif [[ -n $APPVEYOR ]]; then
45 |     export CI_BRANCH=$APPVEYOR_REPO_BRANCH
46 |     export CI_BUILD_ID=$APPVEYOR_BUILD_ID
47 |     export CI_COMMIT=$APPVEYOR_REPO_COMMIT
48 |     export CI_JOB_ID=$APPVEYOR_JOB_ID
49 |     if [[ -n $APPVEYOR_PULL_REQUEST_NUMBER ]]; then
50 |       export CI_PULL_REQUEST=true
51 |     else
52 |       export CI_PULL_REQUEST=
53 |     fi
54 |     if [[ $CI_LINUX = True ]]; then
55 |       export CI_OS_NAME=linux
56 |     elif [[ $CI_WINDOWS = True ]]; then
57 |       export CI_OS_NAME=windows
58 |     fi
59 |     export CI_REPO_SLUG=$APPVEYOR_REPO_NAME
60 |     export CI_TAG=$APPVEYOR_REPO_TAG_NAME
61 |   fi
62 | else
63 |   export CI=
64 |   export CI_BRANCH=
65 |   export CI_BUILD_ID=
66 |   export CI_COMMIT=
67 |   export CI_JOB_ID=
68 |   export CI_OS_NAME=
69 |   export CI_PULL_REQUEST=
70 |   export CI_REPO_SLUG=
71 |   export CI_TAG=
72 | fi
73 | 
74 | cat <<EOF
75 | CI=$CI
76 | CI_BRANCH=$CI_BRANCH
77 | CI_BUILD_ID=$CI_BUILD_ID
78 | CI_COMMIT=$CI_COMMIT
79 | CI_JOB_ID=$CI_JOB_ID
80 | CI_OS_NAME=$CI_OS_NAME
81 | CI_PULL_REQUEST=$CI_PULL_REQUEST
82 | CI_TAG=$CI_TAG
83 | EOF
84 | 


--------------------------------------------------------------------------------
/ci/upload-ci-artifact.sh:
--------------------------------------------------------------------------------
 1 | # |source| me
 2 | 
 3 | upload-ci-artifact() {
 4 |   echo "--- artifact: $1"
 5 |   if [[ -r "$1" ]]; then
 6 |     ls -l "$1"
 7 |     if ${BUILDKITE:-false}; then
 8 |       (
 9 |         set -x
10 |         buildkite-agent artifact upload "$1"
11 |       )
12 |     fi
13 |   else
14 |     echo ^^^ +++
15 |     echo "$1 not found"
16 |   fi
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/ci/upload-github-release-asset.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Uploads one or more files to a github release
 4 | #
 5 | # Prerequisites
 6 | # 1) GITHUB_TOKEN defined in the environment
 7 | # 2) TAG defined in the environment
 8 | #
 9 | set -e
10 | 
11 | if [[ -z $1 ]]; then
12 |   echo No files specified
13 |   exit 1
14 | fi
15 | 
16 | if [[ -z $GITHUB_TOKEN ]]; then
17 |   echo Error: GITHUB_TOKEN not defined
18 |   exit 1
19 | fi
20 | 
21 | if [[ -z $CI_TAG ]]; then
22 |   echo Error: CI_TAG not defined
23 |   exit 1
24 | fi
25 | 
26 | if [[ -z $CI_REPO_SLUG ]]; then
27 |   echo Error: CI_REPO_SLUG not defined
28 |   exit 1
29 | fi
30 | 
31 | releaseId=$( \
32 |   curl -s "https://api.github.com/repos/$CI_REPO_SLUG/releases/tags/$CI_TAG" \
33 |   | grep -m 1 \"id\": \
34 |   | sed -ne 's/^[^0-9]*\([0-9]*\),$/\1/p' \
35 | )
36 | echo "Github release id for $CI_TAG is $releaseId"
37 | 
38 | for file in "$@"; do
39 |   echo "--- Uploading $file to tag $CI_TAG of $CI_REPO_SLUG"
40 |   curl \
41 |     --data-binary @"$file" \
42 |     -H "Authorization: token $GITHUB_TOKEN" \
43 |     -H "Content-Type: application/octet-stream" \
44 |     "https://uploads.github.com/repos/$CI_REPO_SLUG/releases/$releaseId/assets?name=$(basename "$file")"
45 |   echo
46 | done
47 | 
48 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | #  Makefile to build libcuda-crypt.so
  3 | #   From sources in /cuda-crypt and /ed25519-verify
  4 | #
  5 | #   nvcc inserts conflicting driver symbols into a static lib (.so)
  6 | #   so more than one .so cannot be linked into a single program.
  7 | #   Shared objects with device kernels also did not seem to work--
  8 | #   it can build, but the kernel launch is not successful. (with cuda 9.2)
  9 | #   Hence, build ed25519 ecdsa verify and poh verify device
 10 | #   code into a single binary.
 11 | 
 12 | V=debug
 13 | include gpu-common.mk
 14 | 
 15 | ECC_TEST_BIN=cuda_ed25519_verify
 16 | POH_VERIFY_MANY_TEST_BIN=cuda_poh_verify_many
 17 | LIB=cuda-crypt
 18 | 
 19 | CL_POH_VERIFY_MANY_TEST_BIN=cl_poh_verify_many
 20 | CL_ECC_TEST_BIN=cl_ed25519_verify
 21 | CL_LIB=cl-crypt
 22 | 
 23 | CL_HEADER_DIR:=opencl-platform
 24 | 
 25 | CUDA_HEADER_DIR:=cuda-headers
 26 | CUDA_SHA256_DIR:=cuda-sha256
 27 | 
 28 | CUDA_DIR ?= /usr/local/cuda
 29 | 
 30 | POH_VERIFY_TEST_DIR=poh-verify-test
 31 | 
 32 | CXX ?= g++
 33 | 
 34 | CFLAGS_COMMON:=-DENDIAN_NEUTRAL -DLTC_NO_ASM
 35 | CFLAGS+=$(CFLAGS_COMMON) -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
 36 | 
 37 | #use -DUSE_RDTSC for Windows compilation
 38 | CL_CFLAGS_common:=-fPIC -std=c++11 $(CFLAGS_COMMON) -DOPENCL_VARIANT \
 39 | 		  -I$(CL_HEADER_DIR) -Icommon/ \
 40 | 		  -I$(CUDA_DIR)/targets/x86_64-linux/include $(HOST_CFLAGS)
 41 | CL_CFLAGS_release:=$(CL_CFLAGS_common) -O3
 42 | CL_CFLAGS_debug:=$(CL_CFLAGS_common) -O0 -g
 43 | CL_CFLAGS:=$(CL_CFLAGS_$V)
 44 | 
 45 | all: $(V)/$(ECC_TEST_BIN) \
 46 |      $(V)/$(POH_VERIFY_MANY_TEST_BIN) \
 47 |      $(V)/$(CL_ECC_TEST_BIN) \
 48 | 	 $(V)/$(CL_POH_VERIFY_MANY_TEST_BIN) \
 49 |      $(V)/lib$(LIB).so \
 50 |      $(V)/lib$(CL_LIB).so
 51 | 
 52 | ECC_DIR:=cuda-ecc-ed25519
 53 | POH_VERIFY_MANY_DIR:=cuda-poh-verify
 54 | 
 55 | KEYPAIR_SRCS:=$(addprefix $(ECC_DIR)/,keypair.cu ed25519.h ge.h)
 56 | $V/keypair.o: $(KEYPAIR_SRCS)
 57 | 	@mkdir -p $(@D)
 58 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 59 | 
 60 | SEED_SRCS:=$(addprefix $(ECC_DIR)/,seed.cu ed25519.h)
 61 | $V/seed.o: $(SEED_SRCS)
 62 | 	@mkdir -p $(@D)
 63 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 64 | 
 65 | SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../common/gpu_common.h ed25519.h)
 66 | $V/sign.o: $(SIGN_SRCS)
 67 | 	@mkdir -p $(@D)
 68 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 69 | 
 70 | VERIFY_SRCS:=$(addprefix $(ECC_DIR)/,verify.cu sha512.cu ge.cu sc.cu fe.cu keypair.cu common.cu ed25519.h)
 71 | $V/verify.o: $(VERIFY_SRCS)
 72 | 	@mkdir -p $(@D)
 73 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 74 | 
 75 | $V/gpu_ctx.o: $(addprefix $(ECC_DIR)/,gpu_ctx.cu gpu_ctx.h)
 76 | 	@mkdir -p $(@D)
 77 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 78 | 
 79 | CL_ECC_DIR:=opencl-ecc-ed25519
 80 | CL_CRYPT_DIR:=opencl-crypt
 81 | CL_POH_VERIFY_DIR:=opencl-poh-verify
 82 | 
 83 | CL_POH_VERIFY_SRCS:=$(CL_POH_VERIFY_DIR)/cl_poh_verify.cpp
 84 | $V/cl_poh_verify.o: $(CL_POH_VERIFY_SRCS)
 85 | 	@mkdir -p $(@D)
 86 | 	$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
 87 | 
 88 | CL_SIGN_SRCS:=$(CL_ECC_DIR)/sign.cpp $(ECC_DIR)/fe.cu $(ECC_DIR)/ed25519.h
 89 | $V/cl_sign.o: $(CL_SIGN_SRCS)
 90 | 	@mkdir -p $(@D)
 91 | 	$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
 92 | 
 93 | CL_VERIFY_SRCS:=$(CL_ECC_DIR)/verify.cpp $(ECC_DIR)/seed.cu $(ECC_DIR)/ed25519.h
 94 | $V/cl_verify.o: $(CL_VERIFY_SRCS)
 95 | 	@mkdir -p $(@D)
 96 | 	$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
 97 | 	
 98 | $V/cl_gpu_ctx.o: $(addprefix $(CL_ECC_DIR)/,gpu_ctx.cpp gpu_ctx.h)
 99 | 	@mkdir -p $(@D)
100 | 	$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
101 | 	
102 | $V/cl_init_platform.o: opencl-platform/cl_init_platform.cpp
103 | 	@mkdir -p $(@D)
104 | 	$(CXX) $(CL_CFLAGS) -c $< -o $@
105 | 
106 | POH_DIR:=cuda-poh-verify
107 | POH_SRCS:=$(addprefix $(POH_DIR)/,poh_verify.cu)
108 | 
109 | $V/poh_verify.o: $(POH_SRCS)
110 | 	@mkdir -p $(@D)
111 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
112 | 
113 | CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o cl_poh_verify.o)
114 | 
115 | $V/lib$(CL_LIB).so: $(CL_CPU_GPU_OBJS)
116 | 	$(CXX) -shared $^ -L$(CUDA_DIR)/lib64 -lOpenCL -o $@
117 | 
118 | $V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h
119 | 	@mkdir -p $(@D)
120 | 	$(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@
121 | 
122 | $V/poh_many_main.o: $(POH_VERIFY_TEST_DIR)/main.cpp
123 | 	@mkdir -p $(@D)
124 | 	$(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@
125 | 
126 | $V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so
127 | 	$(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@
128 | 
129 | $V/$(CL_POH_VERIFY_MANY_TEST_BIN): $V/poh_many_main.o $V/lib$(CL_LIB).so
130 | 	$(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@
131 | 
132 | CPU_GPU_OBJS=$(addprefix $V/,verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)
133 | 
134 | $V/crypt-dlink.o: $(CPU_GPU_OBJS)
135 | 	$(NVCC) -Xcompiler "-fPIC" $(CFLAGS) --device-link $^ --output-file $@
136 | 
137 | $V/lib$(LIB).so: $V/crypt-dlink.o $(CPU_GPU_OBJS)
138 | 	$(NVCC) -Xcompiler "-fPIC" --shared --output-file $@ $^
139 | 
140 | $V/ecc_main.o: $(addprefix $(ECC_DIR)/,main.cu ed25519.h)
141 | 	@mkdir -p $(@D)
142 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
143 | 
144 | $V/$(ECC_TEST_BIN): $V/ecc_main.o $V/lib$(LIB).so
145 | 	$(NVCC) $(CFLAGS) -L$V -l$(LIB) $< -o $@
146 | 
147 | $V/$(POH_VERIFY_MANY_TEST_BIN): $V/poh_many_main.o $V/lib$(LIB).so
148 | 	$(NVCC) $(CFLAGS) -L$V -l$(LIB) $< -o $@
149 | 
150 | 
151 | .PHONY:clean
152 | clean:
153 | 	rm -rf $V
154 | 
155 | 
156 | test: $V/$(ECC_TEST_BIN) $V/$(POH_VERIFY_MANY_TEST_BIN) $V/$(CL_POH_VERIFY_MANY_TEST_BIN)
157 | 	cd $(V) && LD_LIBRARY_PATH=. ./$(POH_VERIFY_MANY_TEST_BIN) -check_result ../$(POH_VERIFY_TEST_DIR)/test_hashes_332_129 ../$(POH_VERIFY_TEST_DIR)/test_num_hashes_arr_332 ../$(POH_VERIFY_TEST_DIR)/test_num_elems_332
158 | 	cd $(V) && LD_LIBRARY_PATH=. ./$(CL_POH_VERIFY_MANY_TEST_BIN) -check_result ../$(POH_VERIFY_TEST_DIR)/test_hashes_332_129 ../$(POH_VERIFY_TEST_DIR)/test_num_hashes_arr_332 ../$(POH_VERIFY_TEST_DIR)/test_num_elems_332
159 | 	cd $(V) && LD_LIBRARY_PATH=. ./$(ECC_TEST_BIN) 1 1 1 1 1 1
160 | 	cd $(V) && LD_LIBRARY_PATH=. ./$(ECC_TEST_BIN) 64 1 1 1 1 0
161 | 	cd $(V) && LD_LIBRARY_PATH=. ./$(ECC_TEST_BIN) 100201 1 1 4 10 1
162 | 


--------------------------------------------------------------------------------
/src/common/gpu_common.h:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | 
 4 | #ifndef GPU_COMMON_H
 5 | #define GPU_COMMON_H
 6 | 
 7 | extern bool g_verbose;
 8 | 
 9 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); }
10 | 
11 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y))
12 | 
13 | #ifndef OPENCL_VARIANT
14 | 
15 | #define CUDA_CHK(ans) { cuda_assert((ans), __FILE__, __LINE__); }
16 | 
17 | inline void cuda_assert(cudaError_t err, const char *file, int line)
18 | {
19 |     if (err != cudaSuccess)
20 |     {
21 |         fprintf(stderr,"ERR: %s %s %d\n", cudaGetErrorString(err), file, line);
22 |         assert(0);
23 |     }
24 | }
25 | 
26 | #endif
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/common/perftime.h:
--------------------------------------------------------------------------------
 1 | #ifndef PERFTIME_H
 2 | #define PERFTIME_H
 3 | 
 4 | #ifdef USE_RDTSC
 5 | static inline uint64_t rdtsc()
 6 | {
 7 |     unsigned int hi, lo;
 8 |     __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi));
 9 |     return ((uint64_t)hi << 32) | lo;
10 | }
11 | 
12 | typedef struct {
13 |     uint64_t count;
14 | } perftime_t;
15 | 
16 | #elif defined(USE_CLOCK_GETTIME)
17 | #include <time.h>
18 | typedef struct timespec perftime_t;
19 | #else
20 | #include <sys/time.h>
21 | typedef struct timeval perftime_t;
22 | #endif
23 | 
24 | static int get_time(perftime_t* t) {
25 | #ifdef USE_RDTSC
26 |     t->count = rdtsc();
27 |     return 0;
28 | #elif defined(USE_CLOCK_GETTIME)
29 |     return clock_gettime(CLOCK_MONOTONIC_RAW, t);
30 |     //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t);
31 | #else
32 |     return gettimeofday(t, NULL /* timezone */);
33 | #endif
34 | }
35 | 
36 | static double get_us(const perftime_t* time) {
37 | #ifdef USE_RDTSC
38 |     return time->count;
39 | #elif defined(USE_CLOCK_GETTIME)
40 |     return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000);
41 | #else
42 |     return (time->tv_usec + (double)time->tv_sec * 1000000);
43 | #endif
44 | }
45 | 
46 | static double get_diff(const perftime_t* start, const perftime_t* end) {
47 |     return get_us(end) - get_us(start);
48 | }
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/common.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef COMMON_CU
 3 | #define COMMON_CU
 4 | 
 5 | static uint64_t __host__ __device__ load_3(const unsigned char *in) {
 6 |     uint64_t result;
 7 | 
 8 |     result = (uint64_t) in[0];
 9 |     result |= ((uint64_t) in[1]) << 8;
10 |     result |= ((uint64_t) in[2]) << 16;
11 | 
12 |     return result;
13 | }
14 | 
15 | static uint64_t __host__ __device__ load_4(const unsigned char *in) {
16 |     uint64_t result;
17 | 
18 |     result = (uint64_t) in[0];
19 |     result |= ((uint64_t) in[1]) << 8;
20 |     result |= ((uint64_t) in[2]) << 16;
21 |     result |= ((uint64_t) in[3]) << 24;
22 |     
23 |     return result;
24 | }
25 | 
26 | static uint64_t __host__ __device__ load_7(const unsigned char *in) {
27 |     uint64_t result;
28 | 
29 |     result = (uint64_t) in[0];
30 |     result |= ((uint64_t) in[1]) << 8;
31 |     result |= ((uint64_t) in[2]) << 16;
32 |     result |= ((uint64_t) in[3]) << 24;
33 |     result |= ((uint64_t) in[4]) << 32;
34 |     result |= ((uint64_t) in[5]) << 40;
35 |     result |= ((uint64_t) in[6]) << 48;
36 | 
37 |     return result;
38 | }
39 | 
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/ed25519.h:
--------------------------------------------------------------------------------
 1 | #ifndef ED25519_H
 2 | #define ED25519_H
 3 | 
 4 | #include <stddef.h>
 5 | #include <inttypes.h>
 6 | 
 7 | #if defined(_WIN32)
 8 |     #if defined(ED25519_BUILD_DLL)
 9 |         #define ED25519_DECLSPEC __declspec(dllexport)
10 |     #elif defined(ED25519_DLL)
11 |         #define ED25519_DECLSPEC __declspec(dllimport)
12 |     #else
13 |         #define ED25519_DECLSPEC
14 |     #endif
15 | #else
16 |     #define ED25519_DECLSPEC
17 | #endif
18 | 
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 | 
24 | #ifndef ED25519_NO_SEED
25 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed);
26 | #endif
27 | 
28 | #define SHA512_SIZE 64
29 | #define PUB_KEY_SIZE 32
30 | #define PRIV_KEY_SIZE 64
31 | #define SEED_SIZE 32
32 | #define SCALAR_SIZE 32
33 | #define SIG_SIZE 64
34 | 
35 | typedef struct {
36 |     uint8_t* elems;
37 |     uint32_t num;
38 | } gpu_Elems;
39 | 
40 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed);
41 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key);
42 | 
43 | void ED25519_DECLSPEC ed25519_sign_many(const gpu_Elems* elems,
44 |                                         uint32_t num_elems,
45 |                                         uint32_t message_size,
46 |                                         uint32_t total_packets,
47 |                                         uint32_t total_signatures,
48 |                                         const uint32_t* message_lens,
49 |                                         const uint32_t* public_key_offsets,
50 |                                         const uint32_t* private_key_offsets,
51 |                                         const uint32_t* message_start_offsets,
52 |                                         uint8_t* signatures_out,
53 |                                         uint8_t use_non_default_stream);
54 | 
55 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, uint32_t message_len, const unsigned char *public_key);
56 | 
57 | void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems,
58 |                                           uint32_t num_elems,
59 |                                           uint32_t message_size,
60 |                                           uint32_t total_packets,
61 |                                           uint32_t total_signatures,
62 |                                           const uint32_t* message_lens,
63 |                                           const uint32_t* public_key_offsets,
64 |                                           const uint32_t* private_key_offsets,
65 |                                           const uint32_t* message_start_offsets,
66 |                                           uint8_t* out,
67 |                                           uint8_t use_non_default_stream);
68 | 
69 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar);
70 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key);
71 | void ED25519_DECLSPEC ed25519_set_verbose(bool val);
72 | 
73 | const char* ED25519_DECLSPEC ed25519_license();
74 | bool ED25519_DECLSPEC ed25519_init();
75 | 
76 | int cuda_host_register(void* ptr, size_t size, unsigned int flags);
77 | int cuda_host_unregister(void* ptr);
78 | 
79 | int ED25519_DECLSPEC ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar);
80 | 
81 | int ED25519_DECLSPEC ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element);
82 | 
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/fe.h:
--------------------------------------------------------------------------------
 1 | #ifndef FE_H
 2 | #define FE_H
 3 | 
 4 | #include "fixedint.h"
 5 | 
 6 | 
 7 | /*
 8 |     fe means field element.
 9 |     Here the field is \Z/(2^255-19).
10 |     An element t, entries t[0]...t[9], represents the integer
11 |     t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
12 |     Bounds on each t[i] vary depending on context.
13 | */
14 | 
15 | 
16 | typedef int32_t fe[10];
17 | 
18 | 
19 | void __host__ __device__ fe_0(fe h);
20 | void __device__ __host__ fe_1(fe h);
21 | int __device__ __host__ fe_is_0(fe h);
22 | int __device__ __host__ fe_is_1(fe h);
23 | 
24 | void __device__ __host__ fe_frombytes(fe h, const unsigned char *s);
25 | void __device__ __host__ fe_tobytes(unsigned char *s, const fe h);
26 | 
27 | void __host__ __device__ fe_copy(fe h, const fe f);
28 | int __host__ __device__ fe_isnegative(const fe f);
29 | int __device__ __host__ fe_isnonzero(const fe f);
30 | void __host__ __device__ fe_cmov(fe f, const fe g, unsigned int b);
31 | void fe_cswap(fe f, fe g, unsigned int b);
32 | 
33 | void __device__ __host__ fe_neg(fe h, const fe f);
34 | void __device__ __host__ fe_add(fe h, const fe f, const fe g);
35 | void __device__ __host__ fe_invert(fe out, const fe z);
36 | void __device__ __host__ fe_sq(fe h, const fe f);
37 | void __host__ __device__ fe_sq2(fe h, const fe f);
38 | void __device__ __host__ fe_mul(fe h, const fe f, const fe g);
39 | void fe_mul121666(fe h, fe f);
40 | void __device__ __host__ fe_pow22523(fe out, const fe z);
41 | void __device__ __host__ fe_sub(fe h, const fe f, const fe g);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/fixedint.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Portable header to provide the 32 and 64 bits type.
 3 | 
 4 |     Not a compatible replacement for <stdint.h>, do not blindly use it as such.
 5 | */
 6 | 
 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED)
 8 |     #include <stdint.h>
 9 |     #define FIXEDINT_H_INCLUDED
10 | 
11 |     #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C)
12 |         #include <limits.h>
13 |         #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
14 |     #endif
15 | #endif
16 | 
17 | 
18 | #ifndef FIXEDINT_H_INCLUDED
19 |     #define FIXEDINT_H_INCLUDED
20 |     
21 |     #include <limits.h>
22 | 
23 |     /* (u)int32_t */
24 |     #ifndef uint32_t
25 |         #if (ULONG_MAX == 0xffffffffUL)
26 |             typedef unsigned long uint32_t;
27 |         #elif (UINT_MAX == 0xffffffffUL)
28 |             typedef unsigned int uint32_t;
29 |         #elif (USHRT_MAX == 0xffffffffUL)
30 |             typedef unsigned short uint32_t;
31 |         #endif
32 |     #endif
33 | 
34 | 
35 |     #ifndef int32_t
36 |         #if (LONG_MAX == 0x7fffffffL)
37 |             typedef signed long int32_t;
38 |         #elif (INT_MAX == 0x7fffffffL)
39 |             typedef signed int int32_t;
40 |         #elif (SHRT_MAX == 0x7fffffffL)
41 |             typedef signed short int32_t;
42 |         #endif
43 |     #endif
44 | 
45 | 
46 |     /* (u)int64_t */
47 |     #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L)
48 |         typedef long long int64_t;
49 |         typedef unsigned long long uint64_t;
50 | 
51 |         #define UINT64_C(v) v ##ULL
52 |         #define INT64_C(v) v ##LL
53 |     #elif defined(__GNUC__)
54 |         __extension__ typedef long long int64_t;
55 |         __extension__ typedef unsigned long long uint64_t;
56 | 
57 |         #define UINT64_C(v) v ##ULL
58 |         #define INT64_C(v) v ##LL
59 |     #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC)
60 |         typedef long long int64_t;
61 |         typedef unsigned long long uint64_t;
62 | 
63 |         #define UINT64_C(v) v ##ULL
64 |         #define INT64_C(v) v ##LL
65 |     #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
66 |         typedef __int64 int64_t;
67 |         typedef unsigned __int64 uint64_t;
68 | 
69 |         #define UINT64_C(v) v ##UI64
70 |         #define INT64_C(v) v ##I64
71 |     #endif
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/ge.h:
--------------------------------------------------------------------------------
 1 | #ifndef GE_H
 2 | #define GE_H
 3 | 
 4 | #include "fe.h"
 5 | 
 6 | 
 7 | /*
 8 | ge means group element.
 9 | 
10 | Here the group is the set of pairs (x,y) of field elements (see fe.h)
11 | satisfying -x^2 + y^2 = 1 + d x^2y^2
12 | where d = -121665/121666.
13 | 
14 | Representations:
15 |   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
16 |   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
17 |   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
18 |   ge_precomp (Duif): (y+x,y-x,2dxy)
19 | */
20 | 
21 | typedef struct {
22 |   fe X;
23 |   fe Y;
24 |   fe Z;
25 | } ge_p2;
26 | 
27 | typedef struct {
28 |   fe X;
29 |   fe Y;
30 |   fe Z;
31 |   fe T;
32 | } ge_p3;
33 | 
34 | typedef struct {
35 |   fe X;
36 |   fe Y;
37 |   fe Z;
38 |   fe T;
39 | } ge_p1p1;
40 | 
41 | typedef struct {
42 |   fe yplusx;
43 |   fe yminusx;
44 |   fe xy2d;
45 | } ge_precomp;
46 | 
47 | typedef struct {
48 |   fe YplusX;
49 |   fe YminusX;
50 |   fe Z;
51 |   fe T2d;
52 | } ge_cached;
53 | 
54 | #define GE_LOOKUP_SIZE 8
55 | 
56 | void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
57 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h);
58 | int  __host__ __device__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s);
59 | 
60 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
61 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
62 | void __host__ __device__ ge_addsub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q, bool add);
63 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_cached *A, const unsigned char *b);
64 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
65 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
66 | void __host__ __device__ ge_maddsub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q, bool add);
67 | void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
68 | 
69 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
70 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
71 | void __host__ __device__ ge_p2_0(ge_p2 *h);
72 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
73 | void __host__ __device__ ge_p3_0(ge_p3 *h);
74 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
75 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
76 | void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
77 | 
78 | #endif
79 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/gpu_ctx.cu:
--------------------------------------------------------------------------------
  1 | #include "ed25519.h"
  2 | #include "gpu_ctx.h"
  3 | #include <pthread.h>
  4 | #include "gpu_common.h"
  5 | 
  6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
  7 | 
  8 | #define MAX_NUM_GPUS 8
  9 | #define MAX_QUEUE_SIZE 8
 10 | 
 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 12 | static uint32_t g_cur_gpu = 0;
 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 14 | static int32_t g_total_gpus = -1;
 15 | 
 16 | static bool cuda_crypt_init_locked() {
 17 |     if (g_total_gpus == -1) {
 18 |         cudaGetDeviceCount(&g_total_gpus);
 19 |         g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus);
 20 |         LOG("total_gpus: %d\n", g_total_gpus);
 21 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 22 |             CUDA_CHK(cudaSetDevice(gpu));
 23 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 24 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 25 |                 if (err != 0) {
 26 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 27 |                             err, gpu, queue);
 28 |                     g_total_gpus = 0;
 29 |                     return false;
 30 |                 }
 31 |                 CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream));
 32 |             }
 33 |         }
 34 |     }
 35 |     return g_total_gpus > 0;
 36 | }
 37 | 
 38 | bool ed25519_init() {
 39 |     cudaFree(0);
 40 |     pthread_mutex_lock(&g_ctx_mutex);
 41 |     bool success = cuda_crypt_init_locked();
 42 |     pthread_mutex_unlock(&g_ctx_mutex);
 43 |     return success;
 44 | }
 45 | 
 46 | gpu_ctx_t* get_gpu_ctx() {
 47 |     int32_t cur_gpu, cur_queue;
 48 | 
 49 |     LOG("locking global mutex\n");
 50 |     pthread_mutex_lock(&g_ctx_mutex);
 51 |     if (!cuda_crypt_init_locked()) {
 52 |         pthread_mutex_unlock(&g_ctx_mutex);
 53 |         LOG("No GPUs, exiting...\n");
 54 |         return NULL;
 55 |     }
 56 |     cur_gpu = g_cur_gpu;
 57 |     g_cur_gpu++;
 58 |     g_cur_gpu %= g_total_gpus;
 59 |     cur_queue = g_cur_queue[cur_gpu];
 60 |     g_cur_queue[cur_gpu]++;
 61 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
 62 |     pthread_mutex_unlock(&g_ctx_mutex);
 63 | 
 64 |     gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
 65 |     LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu);
 66 |     pthread_mutex_lock(&cur_ctx->mutex);
 67 | 
 68 |     CUDA_CHK(cudaSetDevice(cur_gpu));
 69 | 
 70 |     LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue);
 71 | 
 72 |     return cur_ctx;
 73 | }
 74 | 
 75 | void setup_gpu_ctx(verify_ctx_t* cur_ctx,
 76 |                    const gpu_Elems* elems,
 77 |                    uint32_t num_elems,
 78 |                    uint32_t message_size,
 79 |                    uint32_t total_packets,
 80 |                    uint32_t total_packets_size,
 81 |                    uint32_t total_signatures,
 82 |                    const uint32_t* message_lens,
 83 |                    const uint32_t* public_key_offsets,
 84 |                    const uint32_t* signature_offsets,
 85 |                    const uint32_t* message_start_offsets,
 86 |                    size_t out_size,
 87 |                    cudaStream_t stream
 88 |                    ) {
 89 |     size_t offsets_size = total_signatures * sizeof(uint32_t);
 90 | 
 91 |     LOG("device allocate. packets: %d out: %d offsets_size: %zu\n",
 92 |         total_packets_size, (int)out_size, offsets_size);
 93 | 
 94 |     if (cur_ctx->packets == NULL ||
 95 |         total_packets_size > cur_ctx->packets_size_bytes) {
 96 |         CUDA_CHK(cudaFree(cur_ctx->packets));
 97 |         CUDA_CHK(cudaMalloc(&cur_ctx->packets, total_packets_size));
 98 | 
 99 |         cur_ctx->packets_size_bytes = total_packets_size;
100 |     }
101 | 
102 |     if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) {
103 |         CUDA_CHK(cudaFree(cur_ctx->out));
104 |         CUDA_CHK(cudaMalloc(&cur_ctx->out, out_size));
105 | 
106 |         cur_ctx->out_size_bytes = out_size;
107 |     }
108 | 
109 |     if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) {
110 |         CUDA_CHK(cudaFree(cur_ctx->Ai));
111 |         CUDA_CHK(cudaMalloc(&cur_ctx->Ai, total_signatures * sizeof(ge_cached) * GE_LOOKUP_SIZE));
112 | 
113 |         CUDA_CHK(cudaFree(cur_ctx->h));
114 |         CUDA_CHK(cudaMalloc(&cur_ctx->h, total_signatures * SHA512_SIZE));
115 | 
116 |         CUDA_CHK(cudaFree(cur_ctx->public_key_offsets));
117 |         CUDA_CHK(cudaMalloc(&cur_ctx->public_key_offsets, offsets_size));
118 | 
119 |         CUDA_CHK(cudaFree(cur_ctx->signature_offsets));
120 |         CUDA_CHK(cudaMalloc(&cur_ctx->signature_offsets, offsets_size));
121 | 
122 |         CUDA_CHK(cudaFree(cur_ctx->message_start_offsets));
123 |         CUDA_CHK(cudaMalloc(&cur_ctx->message_start_offsets, offsets_size));
124 | 
125 |         CUDA_CHK(cudaFree(cur_ctx->message_lens));
126 |         CUDA_CHK(cudaMalloc(&cur_ctx->message_lens, offsets_size));
127 | 
128 |         cur_ctx->offsets_len = total_signatures;
129 |     }
130 | 
131 |     LOG("Done alloc\n");
132 | 
133 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->public_key_offsets, public_key_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
134 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->signature_offsets, signature_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
135 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_start_offsets, message_start_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
136 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_lens, message_lens, offsets_size, cudaMemcpyHostToDevice, stream));
137 | 
138 |     size_t cur = 0;
139 |     for (size_t i = 0; i < num_elems; i++) {
140 |         LOG("i: %zu size: %d\n", i, elems[i].num * message_size);
141 |         CUDA_CHK(cudaMemcpyAsync(&cur_ctx->packets[cur * message_size], elems[i].elems, elems[i].num * message_size, cudaMemcpyHostToDevice, stream));
142 |         cur += elems[i].num;
143 |     }
144 | }
145 | 
146 | 
147 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) {
148 |     pthread_mutex_unlock(&cur_ctx->mutex);
149 | }
150 | 
151 | void ed25519_free_gpu_mem() {
152 |     for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) {
153 |         for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
154 |             gpu_ctx_t* cur_ctx = &g_gpu_ctx[gpu][queue];
155 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.packets));
156 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.out));
157 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_lens));
158 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.public_key_offsets));
159 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.private_key_offsets));
160 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.signature_offsets));
161 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_start_offsets));
162 |             if (cur_ctx->stream != 0) {
163 |                 CUDA_CHK(cudaStreamDestroy(cur_ctx->stream));
164 |             }
165 |         }
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/gpu_ctx.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_CTX_H
 2 | #define GPU_CTX_H
 3 | 
 4 | #include <inttypes.h>
 5 | #include "ed25519.h"
 6 | #include "ge.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | typedef struct {
13 |     uint8_t* packets;
14 |     uint32_t packets_size_bytes;
15 | 
16 |     ge_cached* Ai;
17 |     uint8_t* h;
18 |     uint8_t* out;
19 |     size_t out_size_bytes;
20 | 
21 |     uint32_t* public_key_offsets;
22 |     uint32_t* private_key_offsets;
23 |     uint32_t* message_start_offsets;
24 |     uint32_t* signature_offsets;
25 |     uint32_t* message_lens;
26 |     size_t offsets_len;
27 | } verify_ctx_t;
28 | 
29 | typedef struct {
30 |     verify_ctx_t verify_ctx;
31 | 
32 |     pthread_mutex_t mutex;
33 |     cudaStream_t stream;
34 | } gpu_ctx_t;
35 | 
36 | extern gpu_ctx_t* get_gpu_ctx();
37 | extern void release_gpu_ctx(gpu_ctx_t*);
38 | 
39 | extern void ed25519_free_gpu_mem();
40 | 
41 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx,
42 |                           const gpu_Elems* elems,
43 |                           uint32_t num_elems,
44 |                           uint32_t message_size,
45 |                           uint32_t total_packets,
46 |                           uint32_t total_packets_size,
47 |                           uint32_t total_signatures,
48 |                           const uint32_t* message_lens,
49 |                           const uint32_t* public_key_offsets,
50 |                           const uint32_t* signature_offsets,
51 |                           const uint32_t* message_start_offsets,
52 |                           size_t out_size,
53 |                           cudaStream_t stream
54 |                           );
55 | 
56 | #ifdef __cplusplus
57 | }
58 | #endif
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/int128.h:
--------------------------------------------------------------------------------
 1 | #ifndef INT128_H
 2 | #define INT128_H
 3 | 
 4 | struct uint128_t {
 5 |   uint64_t low;
 6 |   uint64_t high;
 7 | };
 8 | 
 9 | static __device__ __host__ uint128_t mul_128(uint64_t a, uint64_t b) {
10 |   uint128_t result;
11 | #ifdef __CUDA_ARCH__
12 |   result.low = a * b;
13 |   result.high = __mul64hi(a, b);
14 | #elif __x86_64__
15 |   asm( "mulq %3\n\t"
16 |       : "=a" (result.low), "=d" (result.high)
17 |       : "%0" (a), "rm" (b));
18 | #endif
19 |   return result;
20 | }
21 | 
22 | static __device__ __host__ uint128_t add_128(uint128_t a, uint128_t b) {
23 |   uint128_t result;
24 | #ifdef __CUDA_ARCH__
25 |   asm( "add.cc.u64    %0, %2, %4;\n\t"
26 |        "addc.u64      %1, %3, %5;\n\t"
27 |        : "=l" (result.low), "=l" (result.high)
28 |        : "l" (a.low), "l" (a.high),
29 |        "l" (b.low), "l" (b.high));
30 | #else
31 |   result.low = a.low + b.low;
32 |   result.high = a.high + b.high + (result.low < a.low);
33 | #endif
34 |   return result;
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/keypair.cu:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | 
 5 | 
 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) {
 7 |     ge_p3 A;
 8 | 
 9 |     sha512(seed, 32, private_key);
10 |     private_key[0] &= 248;
11 |     private_key[31] &= 63;
12 |     private_key[31] |= 64;
13 | 
14 |     ge_scalarmult_base(&A, private_key);
15 |     ge_p3_tobytes(public_key, &A);
16 | }
17 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com>
 2 | 
 3 | This software is provided 'as-is', without any express or implied warranty. In no event will the
 4 | authors be held liable for any damages arising from the use of this software.
 5 | 
 6 | Permission is granted to anyone to use this software for any purpose, including commercial
 7 | applications, and to alter it and redistribute it freely, subject to the following restrictions:
 8 | 
 9 | 1. The origin of this software must not be misrepresented; you must not claim that you wrote the
10 |    original software. If you use this software in a product, an acknowledgment in the product
11 |    documentation would be appreciated but is not required.
12 | 
13 | 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
14 |    being the original software.
15 | 
16 | 3. This notice may not be removed or altered from any source distribution.
17 | 
18 | ================================
19 | 
20 | Copyright (c) 2017-2019 isis agora lovecruft. All rights reserved.
21 | 
22 | Redistribution and use in source and binary forms, with or without
23 | modification, are permitted provided that the following conditions are
24 | met:
25 | 
26 | 1. Redistributions of source code must retain the above copyright
27 | notice, this list of conditions and the following disclaimer.
28 | 
29 | 2. Redistributions in binary form must reproduce the above copyright
30 | notice, this list of conditions and the following disclaimer in the
31 | documentation and/or other materials provided with the distribution.
32 | 
33 | 3. Neither the name of the copyright holder nor the names of its
34 | contributors may be used to endorse or promote products derived from
35 | this software without specific prior written permission.
36 | 
37 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
38 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
39 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
43 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
44 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
46 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
47 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sc.h:
--------------------------------------------------------------------------------
 1 | #ifndef SC_H
 2 | #define SC_H
 3 | 
 4 | /*
 5 | The set of scalars is \Z/l
 6 | where l = 2^252 + 27742317777372353535851937790883648493.
 7 | */
 8 | 
 9 | void __host__ __device__ scalar32_reduce(unsigned char* s);
10 | void __host__ __device__ sc_reduce(unsigned char *s);
11 | void __host__ __device__ sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/seed.cu:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | 
 3 | #ifndef ED25519_NO_SEED
 4 | 
 5 | #ifdef _WIN32
 6 | #include <windows.h>
 7 | #include <wincrypt.h>
 8 | #else
 9 | #include <stdio.h>
10 | #endif
11 | 
12 | int ed25519_create_seed(unsigned char *seed) {
13 | #ifdef _WIN32
14 |     HCRYPTPROV prov;
15 | 
16 |     if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))  {
17 |         return 1;
18 |     }
19 | 
20 |     if (!CryptGenRandom(prov, 32, seed))  {
21 |         CryptReleaseContext(prov, 0);
22 |         return 1;
23 |     }
24 | 
25 |     CryptReleaseContext(prov, 0);
26 | #else
27 |     FILE *f = fopen("/dev/urandom", "rb");
28 | 
29 |     if (f == NULL) {
30 |         return 1;
31 |     }
32 | 
33 |     size_t res = fread(seed, 1, 32, f);
34 |     if (res != 32) {
35 |         return 1;
36 |     }
37 |     fclose(f);
38 | #endif
39 | 
40 |     return 0;
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sha512.cu:
--------------------------------------------------------------------------------
  1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis
  2 |  *
  3 |  * LibTomCrypt is a library that provides various cryptographic
  4 |  * algorithms in a highly modular and flexible manner.
  5 |  *
  6 |  * The library is free for all purposes without any express
  7 |  * guarantee it works.
  8 |  *
  9 |  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
 10 |  */
 11 | 
 12 | #include "fixedint.h"
 13 | #include "sha512.h"
 14 | 
 15 | #ifdef __CUDA_ARCH__
 16 | #define K_DEF __device__
 17 | #else
 18 | #define K_DEF
 19 | #endif
 20 | 
 21 | static const uint64_t K_DEF K[80] = {
 22 |     UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd),
 23 |     UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc),
 24 |     UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019),
 25 |     UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118),
 26 |     UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe),
 27 |     UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2),
 28 |     UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1),
 29 |     UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694),
 30 |     UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3),
 31 |     UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65),
 32 |     UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483),
 33 |     UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5),
 34 |     UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210),
 35 |     UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4),
 36 |     UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725),
 37 |     UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70),
 38 |     UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926),
 39 |     UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df),
 40 |     UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8),
 41 |     UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b),
 42 |     UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001),
 43 |     UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30),
 44 |     UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910),
 45 |     UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8),
 46 |     UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53),
 47 |     UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8),
 48 |     UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb),
 49 |     UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3),
 50 |     UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60),
 51 |     UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec),
 52 |     UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9),
 53 |     UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b),
 54 |     UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207),
 55 |     UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178),
 56 |     UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6),
 57 |     UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b),
 58 |     UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493),
 59 |     UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c),
 60 |     UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a),
 61 |     UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817)
 62 | };
 63 | 
 64 | /* Various logical functions */
 65 | 
 66 | #define ROR64c(x, y) \
 67 |     ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \
 68 |       ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF))
 69 | 
 70 | #define STORE64H(x, y)                                                                     \
 71 |    { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255);     \
 72 |      (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255);     \
 73 |      (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255);     \
 74 |      (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 75 | 
 76 | #define LOAD64H(x, y)                                                      \
 77 |    { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \
 78 |          (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \
 79 |          (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \
 80 |          (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); }
 81 | 
 82 | 
 83 | #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 84 | #define Maj(x,y,z)      (((x | y) & z) | (x & y))
 85 | #define S(x, n)         ROR64c(x, n)
 86 | #define R(x, n)         (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n))
 87 | #define Sigma0(x)       (S(x, 28) ^ S(x, 34) ^ S(x, 39))
 88 | #define Sigma1(x)       (S(x, 14) ^ S(x, 18) ^ S(x, 41))
 89 | #define Gamma0(x)       (S(x, 1) ^ S(x, 8) ^ R(x, 7))
 90 | #define Gamma1(x)       (S(x, 19) ^ S(x, 61) ^ R(x, 6))
 91 | #ifndef MIN
 92 |    #define MIN(x, y) ( ((x)<(y))?(x):(y) )
 93 | #endif
 94 | 
 95 | /* compress 1024-bits */
 96 | static int __device__ __host__ sha512_compress(sha512_context *md, unsigned char *buf)
 97 | {
 98 |     uint64_t S[8], W[80], t0, t1;
 99 |     int i;
100 | 
101 |     /* copy state into S */
102 |     for (i = 0; i < 8; i++) {
103 |         S[i] = md->state[i];
104 |     }
105 | 
106 |     /* copy the state into 1024-bits into W[0..15] */
107 |     for (i = 0; i < 16; i++) {
108 |         LOAD64H(W[i], buf + (8*i));
109 |     }
110 | 
111 |     /* fill W[16..79] */
112 |     for (i = 16; i < 80; i++) {
113 |         W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
114 |     }
115 | 
116 |     /* Compress */
117 |     #define RND(a,b,c,d,e,f,g,h,i) \
118 |     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
119 |     t1 = Sigma0(a) + Maj(a, b, c);\
120 |     d += t0; \
121 |     h  = t0 + t1;
122 | 
123 |     for (i = 0; i < 80; i += 8) {
124 |        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
125 |        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
126 |        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
127 |        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
128 |        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
129 |        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
130 |        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
131 |        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
132 |    }
133 | 
134 |    #undef RND
135 | 
136 | 
137 | 
138 |     /* feedback */
139 |     for (i = 0; i < 8; i++) {
140 |         md->state[i] = md->state[i] + S[i];
141 |     }
142 | 
143 |     return 0;
144 | }
145 | 
146 | 
147 | /**
148 |    Initialize the hash state
149 |    @param md   The hash state you wish to initialize
150 |    @return 0 if successful
151 | */
152 | int __device__ __host__ sha512_init(sha512_context * md) {
153 |     if (md == NULL) return 1;
154 | 
155 |     md->curlen = 0;
156 |     md->length = 0;
157 |     md->state[0] = UINT64_C(0x6a09e667f3bcc908);
158 |     md->state[1] = UINT64_C(0xbb67ae8584caa73b);
159 |     md->state[2] = UINT64_C(0x3c6ef372fe94f82b);
160 |     md->state[3] = UINT64_C(0xa54ff53a5f1d36f1);
161 |     md->state[4] = UINT64_C(0x510e527fade682d1);
162 |     md->state[5] = UINT64_C(0x9b05688c2b3e6c1f);
163 |     md->state[6] = UINT64_C(0x1f83d9abfb41bd6b);
164 |     md->state[7] = UINT64_C(0x5be0cd19137e2179);
165 | 
166 |     return 0;
167 | }
168 | 
169 | /**
170 |    Process a block of memory though the hash
171 |    @param md     The hash state
172 |    @param in     The data to hash
173 |    @param inlen  The length of the data (octets)
174 |    @return 0 if successful
175 | */
176 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen)
177 | {
178 |     size_t n;
179 |     size_t i;
180 |     int           err;
181 |     if (md == NULL) return 1;
182 |     if (in == NULL) return 1;
183 |     if (md->curlen > sizeof(md->buf)) {
184 |        return 1;
185 |     }
186 |     while (inlen > 0) {
187 |         if (md->curlen == 0 && inlen >= 128) {
188 |            if ((err = sha512_compress (md, (unsigned char *)in)) != 0) {
189 |               return err;
190 |            }
191 |            md->length += 128 * 8;
192 |            in             += 128;
193 |            inlen          -= 128;
194 |         } else {
195 |            n = MIN(inlen, (128 - md->curlen));
196 | 
197 |            for (i = 0; i < n; i++) {
198 |             md->buf[i + md->curlen] = in[i];
199 |            }
200 | 
201 | 
202 |            md->curlen += n;
203 |            in             += n;
204 |            inlen          -= n;
205 |            if (md->curlen == 128) {
206 |               if ((err = sha512_compress (md, md->buf)) != 0) {
207 |                  return err;
208 |               }
209 |               md->length += 8*128;
210 |               md->curlen = 0;
211 |            }
212 |        }
213 |     }
214 |     return 0;
215 | }
216 | 
217 | /**
218 |    Terminate the hash to get the digest
219 |    @param md  The hash state
220 |    @param out [out] The destination of the hash (64 bytes)
221 |    @return 0 if successful
222 | */
223 | int sha512_final(sha512_context * md, unsigned char *out)
224 | {
225 |     int i;
226 | 
227 |     if (md == NULL) return 1;
228 |     if (out == NULL) return 1;
229 | 
230 |     if (md->curlen >= sizeof(md->buf)) {
231 |         return 1;
232 |     }
233 | 
234 |     /* increase the length of the message */
235 |     md->length += md->curlen * UINT64_C(8);
236 | 
237 |     /* append the '1' bit */
238 |     md->buf[md->curlen++] = (unsigned char)0x80;
239 | 
240 |     /* if the length is currently above 112 bytes we append zeros
241 |      * then compress.  Then we can fall back to padding zeros and length
242 |      * encoding like normal.
243 |      */
244 |     if (md->curlen > 112) {
245 |         while (md->curlen < 128) {
246 |             md->buf[md->curlen++] = (unsigned char)0;
247 |         }
248 |         sha512_compress(md, md->buf);
249 |         md->curlen = 0;
250 |     }
251 | 
252 |     /* pad upto 120 bytes of zeroes
253 |      * note: that from 112 to 120 is the 64 MSB of the length.  We assume that you won't hash
254 |      * > 2^64 bits of data... :-)
255 |      */
256 |     while (md->curlen < 120) {
257 |         md->buf[md->curlen++] = (unsigned char)0;
258 |     }
259 | 
260 |     /* store length */
261 |     STORE64H(md->length, md->buf+120);
262 |     sha512_compress(md, md->buf);
263 | 
264 |     /* copy output */
265 |     for (i = 0; i < 8; i++) {
266 |         STORE64H(md->state[i], out+(8*i));
267 |     }
268 | 
269 |     return 0;
270 | }
271 | 
272 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out)
273 | {
274 |     sha512_context ctx;
275 |     int ret;
276 |     if ((ret = sha512_init(&ctx))) return ret;
277 |     if ((ret = sha512_update(&ctx, message, message_len))) return ret;
278 |     if ((ret = sha512_final(&ctx, out))) return ret;
279 |     return 0;
280 | }
281 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sha512.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHA512_H
 2 | #define SHA512_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #include "fixedint.h"
 7 | 
 8 | /* state */
 9 | typedef struct sha512_context_ {
10 |     uint64_t  length, state[8];
11 |     size_t curlen;
12 |     unsigned char buf[128];
13 | } sha512_context;
14 | 
15 | 
16 | int __device__ __host__ sha512_init(sha512_context * md);
17 | int __device__ __host__ sha512_final(sha512_context * md, unsigned char *out);
18 | int __device__ __host__ sha512_update(sha512_context * md, const unsigned char *in, size_t inlen);
19 | int __device__ __host__ sha512(const unsigned char *message, size_t message_len, unsigned char *out);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sign.cu:
--------------------------------------------------------------------------------
  1 | #include "ed25519.h"
  2 | #include "sha512.h"
  3 | #include "ge.h"
  4 | #include "sc.h"
  5 | #include "gpu_common.h"
  6 | #include "gpu_ctx.h"
  7 | 
  8 | 
  9 | static void __device__ __host__
 10 | ed25519_sign_device(unsigned char *signature,
 11 |                    const unsigned char *message,
 12 |                    size_t message_len,
 13 |                    const unsigned char *public_key,
 14 |                    const unsigned char *private_key) {
 15 |     sha512_context hash;
 16 |     unsigned char hram[64];
 17 |     unsigned char r[64];
 18 |     ge_p3 R;
 19 | 
 20 | 
 21 |     sha512_init(&hash);
 22 |     sha512_update(&hash, private_key + 32, 32);
 23 |     sha512_update(&hash, message, message_len);
 24 |     sha512_final(&hash, r);
 25 | 
 26 |     sc_reduce(r);
 27 |     ge_scalarmult_base(&R, r);
 28 |     ge_p3_tobytes(signature, &R);
 29 | 
 30 |     sha512_init(&hash);
 31 |     sha512_update(&hash, signature, 32);
 32 |     sha512_update(&hash, public_key, 32);
 33 |     sha512_update(&hash, message, message_len);
 34 |     sha512_final(&hash, hram);
 35 | 
 36 |     sc_reduce(hram);
 37 |     sc_muladd(signature + 32, hram, private_key, r);
 38 | }
 39 | 
 40 | void ed25519_sign(unsigned char *signature,
 41 |                    const unsigned char *message,
 42 |                    size_t message_len,
 43 |                    const unsigned char *public_key,
 44 |                    const unsigned char *private_key) {
 45 |     ed25519_sign_device(signature, message, message_len, public_key, private_key);
 46 | }
 47 | 
 48 | 
 49 | 
 50 | __global__ void ed25519_sign_kernel(unsigned char* packets,
 51 |                                     uint32_t message_size,
 52 |                                     uint32_t* public_key_offsets,
 53 |                                     uint32_t* private_key_offsets,
 54 |                                     uint32_t* message_start_offsets,
 55 |                                     uint32_t* message_lens,
 56 |                                     size_t num_transactions,
 57 |                                     uint8_t* out)
 58 | {
 59 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
 60 |     if (i < num_transactions) {
 61 |         uint32_t message_start_offset = message_start_offsets[i];
 62 |         uint32_t public_key_offset = public_key_offsets[i];
 63 |         uint32_t private_key_offset = private_key_offsets[i];
 64 |         uint32_t message_len = message_lens[i];
 65 | 
 66 |         ed25519_sign_device(&out[i * SIG_SIZE],
 67 |                             &packets[message_start_offset],
 68 |                             message_len,
 69 |                             &packets[public_key_offset],
 70 |                             &packets[private_key_offset]);
 71 |     }
 72 | }
 73 | 
 74 | 
 75 | 
 76 | void ed25519_sign_many(const gpu_Elems* elems,
 77 |                        uint32_t num_elems,
 78 |                        uint32_t message_size,
 79 |                        uint32_t total_packets,
 80 |                        uint32_t total_signatures,
 81 |                        const uint32_t* message_lens,
 82 |                        const uint32_t* public_key_offsets,
 83 |                        const uint32_t* private_key_offsets,
 84 |                        const uint32_t* message_start_offsets,
 85 |                        uint8_t* signatures_out,
 86 |                        uint8_t use_non_default_stream
 87 |                        ) {
 88 |     int num_threads_per_block = 64;
 89 |     int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block);
 90 |     size_t sig_out_size = SIG_SIZE * total_signatures;
 91 | 
 92 |     if (0 == total_packets) {
 93 |         return;
 94 |     }
 95 | 
 96 |     uint32_t total_packets_size = total_packets * message_size;
 97 | 
 98 |     LOG("signing %d packets sig_size: %zu message_size: %d\n",
 99 |         total_packets, sig_out_size, message_size);
100 | 
101 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
102 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
103 | 
104 |     cudaStream_t stream = 0;
105 |     if (0 != use_non_default_stream) {
106 |         stream = gpu_ctx->stream;
107 |     }
108 | 
109 |     setup_gpu_ctx(cur_ctx,
110 |                   elems,
111 |                   num_elems,
112 |                   message_size,
113 |                   total_packets,
114 |                   total_packets_size,
115 |                   total_signatures,
116 |                   message_lens,
117 |                   public_key_offsets,
118 |                   private_key_offsets,
119 |                   message_start_offsets,
120 |                   sig_out_size,
121 |                   stream
122 |                  );
123 | 
124 |     LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block);
125 |     ed25519_sign_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
126 |                             (cur_ctx->packets,
127 |                              message_size,
128 |                              cur_ctx->public_key_offsets,
129 |                              cur_ctx->signature_offsets,
130 |                              cur_ctx->message_start_offsets,
131 |                              cur_ctx->message_lens,
132 |                              total_signatures,
133 |                              cur_ctx->out);
134 | 
135 |     cudaError_t err = cudaMemcpyAsync(signatures_out, cur_ctx->out, sig_out_size, cudaMemcpyDeviceToHost, stream);
136 |     if (err != cudaSuccess)  {
137 |         fprintf(stderr, "sign: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n",
138 |                         signatures_out, cur_ctx->out, sig_out_size, num_elems, elems);
139 |     }
140 |     CUDA_CHK(err);
141 | 
142 |     CUDA_CHK(cudaStreamSynchronize(stream));
143 | 
144 |     release_gpu_ctx(gpu_ctx);
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/verify.cu:
--------------------------------------------------------------------------------
  1 | #include "sha512.h"
  2 | #include <algorithm>
  3 | #include <stdio.h>
  4 | #include "sc.cu"
  5 | #include "fe.cu"
  6 | #include "ge.cu"
  7 | #include "sha512.cu"
  8 | 
  9 | #include "ed25519.h"
 10 | #include <pthread.h>
 11 | 
 12 | #include "gpu_common.h"
 13 | #include "gpu_ctx.h"
 14 | 
 15 | #define USE_CLOCK_GETTIME
 16 | #include "perftime.h"
 17 | 
 18 | static int __host__ __device__ consttime_equal(const unsigned char *x, const unsigned char *y) {
 19 |     unsigned char r = 0;
 20 | 
 21 |     r = x[0] ^ y[0];
 22 |     #define F(i) r |= x[i] ^ y[i]
 23 |     F(1);
 24 |     F(2);
 25 |     F(3);
 26 |     F(4);
 27 |     F(5);
 28 |     F(6);
 29 |     F(7);
 30 |     F(8);
 31 |     F(9);
 32 |     F(10);
 33 |     F(11);
 34 |     F(12);
 35 |     F(13);
 36 |     F(14);
 37 |     F(15);
 38 |     F(16);
 39 |     F(17);
 40 |     F(18);
 41 |     F(19);
 42 |     F(20);
 43 |     F(21);
 44 |     F(22);
 45 |     F(23);
 46 |     F(24);
 47 |     F(25);
 48 |     F(26);
 49 |     F(27);
 50 |     F(28);
 51 |     F(29);
 52 |     F(30);
 53 |     F(31);
 54 |     #undef F
 55 | 
 56 |     return !r;
 57 | }
 58 | 
 59 | // 0 == success
 60 | static int __host__ __device__
 61 | get_checked_scalar(unsigned char* scalar, const unsigned char* signature) {
 62 |     // Check if top 4-bits are clear
 63 |     // then scalar is reduced.
 64 |     if ((signature[31] & 0xf0) == 0) {
 65 |         for (int i = 0; i < 32; i++) {
 66 |             scalar[i] = signature[i];
 67 |         }
 68 |         return 0;
 69 |     }
 70 | 
 71 |     if ((signature[31] >> 7) != 0) {
 72 |         return 1;
 73 |     }
 74 | 
 75 |     scalar32_reduce(scalar);
 76 |     if (!consttime_equal(scalar, signature)) {
 77 |         return 1;
 78 |     }
 79 |     return 0;
 80 | 
 81 | }
 82 | 
 83 | int ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar) {
 84 |     return get_checked_scalar(out_scalar, in_scalar);
 85 | }
 86 | 
 87 | // Return 0=success if ge unpacks and is not small order
 88 | static int __device__ __host__
 89 | check_packed_ge_small_order(const unsigned char* packed_group_element) {
 90 |     ge_p3 signature_R;
 91 | 
 92 |     // fail if ge does not unpack
 93 |     if (0 != ge_frombytes_negate_vartime(&signature_R, packed_group_element)) {
 94 |         return 1;
 95 |     }
 96 | 
 97 |     // fail if ge is small order
 98 |     if (0 != ge_is_small_order(&signature_R)) {
 99 |         return 1;
100 |     }
101 | 
102 |     return 0;
103 | }
104 | 
105 | int ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element) {
106 |     return check_packed_ge_small_order(packed_group_element);
107 | }
108 | 
109 | static int __device__ __host__
110 | ed25519_verify_device(const unsigned char *signature,
111 |                       const unsigned char *message,
112 |                       uint32_t message_len,
113 |                       const unsigned char *public_key,
114 |                       unsigned char* h) {
115 |     sha512_context hash;
116 |     unsigned char checker[32];
117 | 
118 |     // Check that s.reduce() == s
119 |     if (0 != get_checked_scalar(checker, signature + 32)) {
120 |         return 0;
121 |     }
122 | 
123 |     if (0 != check_packed_ge_small_order(signature)) {
124 |         return 0;
125 |     }
126 | 
127 |     sha512_init(&hash);
128 |     sha512_update(&hash, signature, 32);
129 |     sha512_update(&hash, public_key, 32);
130 |     sha512_update(&hash, message, message_len);
131 |     sha512_final(&hash, h);
132 | 
133 |     sc_reduce(h);
134 |     return 1;
135 | }
136 | 
137 | static int __device__ __host__
138 | ed25519_verify_scalar_double(const unsigned char* signature,
139 |                              const unsigned char* h,
140 |                              ge_cached* Ai) {
141 |     unsigned char checker[32];
142 |     ge_p2 R;
143 | 
144 |     ge_double_scalarmult_vartime(&R, h, Ai, signature + 32);
145 |     ge_tobytes(checker, &R);
146 | 
147 |     if (!consttime_equal(checker, signature)) {
148 |         return 0;
149 |     }
150 | 
151 |     return 1;
152 | }
153 | 
154 | int 
155 | ed25519_verify(const unsigned char *signature,
156 |                const unsigned char *message,
157 |                uint32_t message_len,
158 |                const unsigned char *public_key) {
159 |     unsigned char h[SHA512_SIZE];
160 |     if (0 == ed25519_verify_device(signature, message, message_len, public_key, h)) {
161 |         return 0;
162 |     }
163 | 
164 |     ge_cached Ai[GE_LOOKUP_SIZE];
165 |     if (0 == ge_gen_lookup(public_key, Ai)) {
166 |         return 0;
167 |     }
168 | 
169 |     if (0 == ed25519_verify_scalar_double(signature, h, Ai)) {
170 |         return 0;
171 |     }
172 | 
173 |     return 1;
174 | }
175 | 
176 | __global__ void
177 | ed25519_scalar_double_kernel(const uint8_t* packets,
178 |                              uint32_t* signature_offsets,
179 |                              uint8_t* out,
180 |                              ge_cached* Ai,
181 |                              size_t num_keys,
182 |                              uint8_t* h) {
183 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
184 |     if (i < num_keys && (0 != out[i])) {
185 |         uint32_t signature_offset = signature_offsets[i];
186 |         out[i] = ed25519_verify_scalar_double(&packets[signature_offset],
187 |                                               &h[i * SHA512_SIZE],
188 |                                               &Ai[i * GE_LOOKUP_SIZE]
189 |                                               );
190 |     }
191 | }
192 | 
193 | __global__ void
194 | ed25519_gen_lookup_kernel(const uint8_t* packets,
195 |                           uint32_t* public_key_offsets,
196 |                           ge_cached* Ai,
197 |                           size_t num_keys,
198 |                           uint8_t* out
199 | ) {
200 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
201 |     if (i < num_keys && (0 != out[i])) {
202 |         uint32_t public_key_offset = public_key_offsets[i];
203 |         out[i] = ge_gen_lookup(&packets[public_key_offset], &Ai[i * GE_LOOKUP_SIZE]);
204 |     }
205 | }
206 | 
207 | __global__ void
208 | ed25519_verify_kernel(const uint8_t* packets,
209 |                       uint32_t message_size,
210 |                       uint32_t* message_lens,
211 |                       uint32_t* public_key_offsets,
212 |                       uint32_t* signature_offsets,
213 |                       uint32_t* message_start_offsets,
214 |                       size_t num_keys,
215 |                       uint8_t* out,
216 |                       uint8_t* h)
217 | {
218 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
219 |     if (i < num_keys) {
220 |         uint32_t message_start_offset = message_start_offsets[i];
221 |         uint32_t signature_offset = signature_offsets[i];
222 |         uint32_t public_key_offset = public_key_offsets[i];
223 |         uint32_t message_len = message_lens[i];
224 | 
225 |         out[i] = ed25519_verify_device(&packets[signature_offset],
226 |                                        &packets[message_start_offset],
227 |                                        message_len,
228 |                                        &packets[public_key_offset],
229 |                                        &h[i * SHA512_SIZE]
230 |                                        );
231 |     }
232 | }
233 | 
234 | bool g_verbose = false;
235 | 
236 | void ed25519_set_verbose(bool val) {
237 |     g_verbose = val;
238 | }
239 | 
240 | void ed25519_verify_many(const gpu_Elems* elems,
241 |                          uint32_t num_elems,
242 |                          uint32_t message_size,
243 |                          uint32_t total_packets,
244 |                          uint32_t total_signatures,
245 |                          const uint32_t* message_lens,
246 |                          const uint32_t* public_key_offsets,
247 |                          const uint32_t* signature_offsets,
248 |                          const uint32_t* message_start_offsets,
249 |                          uint8_t* out,
250 |                          uint8_t use_non_default_stream)
251 | {
252 |     LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n",
253 |         num_elems, total_signatures, total_packets, message_size);
254 | 
255 |     size_t out_size = total_signatures * sizeof(uint8_t);
256 | 
257 |     uint32_t total_packets_size = total_packets * message_size;
258 | 
259 |     if (0 == total_packets) {
260 |         return;
261 |     }
262 | 
263 |     // Device allocate
264 | 
265 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
266 | 
267 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
268 | 
269 |     cudaStream_t stream = 0;
270 |     if (0 != use_non_default_stream) {
271 |         stream = gpu_ctx->stream;
272 |     }
273 | 
274 |     setup_gpu_ctx(cur_ctx,
275 |                   elems,
276 |                   num_elems,
277 |                   message_size,
278 |                   total_packets,
279 |                   total_packets_size,
280 |                   total_signatures,
281 |                   message_lens,
282 |                   public_key_offsets,
283 |                   signature_offsets,
284 |                   message_start_offsets,
285 |                   out_size,
286 |                   stream
287 |                  );
288 | 
289 |     int num_threads_per_block = 64;
290 |     int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block);
291 |     LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p stream: %p\n",
292 |            num_blocks, num_threads_per_block, (int)total_packets, out, gpu_ctx->stream);
293 | 
294 |     perftime_t start, end;
295 |     get_time(&start);
296 |     ed25519_verify_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
297 |                             (cur_ctx->packets,
298 |                              message_size,
299 |                              cur_ctx->message_lens,
300 |                              cur_ctx->public_key_offsets,
301 |                              cur_ctx->signature_offsets,
302 |                              cur_ctx->message_start_offsets,
303 |                              cur_ctx->offsets_len,
304 |                              cur_ctx->out,
305 |                              cur_ctx->h
306 |                              );
307 |     CUDA_CHK(cudaPeekAtLastError());
308 | 
309 |     ed25519_gen_lookup_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
310 |                              (cur_ctx->packets,
311 |                               cur_ctx->public_key_offsets,
312 |                               cur_ctx->Ai,
313 |                               cur_ctx->offsets_len,
314 |                               cur_ctx->out
315 |                              );
316 |     CUDA_CHK(cudaPeekAtLastError());
317 | 
318 |     ed25519_scalar_double_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
319 |                              (cur_ctx->packets,
320 |                               cur_ctx->signature_offsets,
321 |                               cur_ctx->out,
322 |                               cur_ctx->Ai,
323 |                               cur_ctx->offsets_len,
324 |                               cur_ctx->h);
325 |     CUDA_CHK(cudaPeekAtLastError());
326 | 
327 |     cudaError_t err = cudaMemcpyAsync(out, cur_ctx->out, out_size, cudaMemcpyDeviceToHost, stream);
328 |     if (err != cudaSuccess)  {
329 |         fprintf(stderr, "verify: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n",
330 |                         out, cur_ctx->out, out_size, num_elems, elems);
331 |     }
332 |     CUDA_CHK(err);
333 | 
334 |     CUDA_CHK(cudaStreamSynchronize(stream));
335 | 
336 |     release_gpu_ctx(gpu_ctx);
337 | 
338 |     get_time(&end);
339 |     LOG("time diff: %f\n", get_diff(&start, &end));
340 | }
341 | 
342 | // Ensure copyright and license notice is embedded in the binary
343 | const char* ed25519_license() {
344 |    return "Copyright (c) 2018 Solana Labs, Inc. "
345 |           "Licensed under the Apache License, Version 2.0 "
346 |           "<http://www.apache.org/licenses/LICENSE-2.0>";
347 | }
348 | 
349 | int cuda_host_register(void* ptr, size_t size, unsigned int flags) {
350 |    return cudaHostRegister(ptr, size, flags);
351 | }
352 | 
353 | int cuda_host_unregister(void* ptr) {
354 |    return cudaHostUnregister(ptr);
355 | }
356 | 


--------------------------------------------------------------------------------
/src/cuda-poh-verify/poh_verify.cu:
--------------------------------------------------------------------------------
  1 | #include <stddef.h>
  2 | #include <inttypes.h>
  3 | #include <pthread.h>
  4 | #include "gpu_common.h"
  5 | #include "sha256.cu"
  6 | 
  7 | #define MAX_NUM_GPUS 8
  8 | #define MAX_QUEUE_SIZE 8
  9 | #define NUM_THREADS_PER_BLOCK 64
 10 | 
 11 | 
 12 | __global__ void poh_verify_kernel(uint8_t* hashes, uint64_t* num_hashes_arr, size_t num_elems) {
 13 |     size_t idx = (size_t)(blockIdx.x * blockDim.x + threadIdx.x);
 14 |     if (idx >= num_elems) return;
 15 | 
 16 |     uint8_t hash[SHA256_BLOCK_SIZE];
 17 | 
 18 |     memcpy(hash, &hashes[idx * SHA256_BLOCK_SIZE], SHA256_BLOCK_SIZE);
 19 | 
 20 |     for (size_t i = 0; i < num_hashes_arr[idx]; i++) {
 21 |         hash_state sha_state;
 22 |         sha256_init(&sha_state);
 23 |         sha256_process(&sha_state, hash, SHA256_BLOCK_SIZE);
 24 |         sha256_done(&sha_state, hash);
 25 |     }  
 26 |     memcpy(&hashes[idx * SHA256_BLOCK_SIZE], hash, SHA256_BLOCK_SIZE);
 27 | }
 28 | 
 29 | typedef struct {
 30 |     uint8_t* hashes;
 31 |     uint64_t* num_hashes_arr;
 32 |     size_t num_elems_alloc;
 33 |     pthread_mutex_t mutex;
 34 |     cudaStream_t stream;
 35 | } gpu_ctx;
 36 | 
 37 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
 38 | 
 39 | static gpu_ctx g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 40 | static uint32_t g_cur_gpu = 0;
 41 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 42 | static int32_t g_total_gpus = -1;
 43 | 
 44 | static bool poh_init_locked() {
 45 |     if (g_total_gpus == -1) {
 46 |         cudaGetDeviceCount(&g_total_gpus);
 47 |         g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus);
 48 |         LOG("total_gpus: %d\n", g_total_gpus);
 49 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 50 |             CUDA_CHK(cudaSetDevice(gpu));
 51 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 52 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 53 |                 if (err != 0) {
 54 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 55 |                             err, gpu, queue);
 56 |                     g_total_gpus = 0;
 57 |                     return false;
 58 |                 }
 59 |                 CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream));
 60 |             }
 61 |         }
 62 |     }
 63 |     return g_total_gpus > 0;
 64 | }
 65 | 
 66 | bool poh_init() {
 67 |     cudaFree(0);
 68 |     pthread_mutex_lock(&g_ctx_mutex);
 69 |     bool success = poh_init_locked();
 70 |     pthread_mutex_unlock(&g_ctx_mutex);
 71 |     return success;
 72 | }
 73 | 
 74 | extern "C" {
 75 | 
 76 | void poh_verify_many_set_verbose(bool val) {
 77 |     g_verbose = val;
 78 | }
 79 | 
 80 | int poh_verify_many(uint8_t* hashes,
 81 |                     const uint64_t* num_hashes_arr,
 82 |                     size_t num_elems,
 83 |                     uint8_t use_non_default_stream)
 84 | {
 85 |     LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems);
 86 | 
 87 |     if (num_elems == 0) return 0;
 88 | 
 89 |     int32_t cur_gpu, cur_queue;
 90 | 
 91 |     pthread_mutex_lock(&g_ctx_mutex);
 92 |     if (!poh_init_locked()) {
 93 |         pthread_mutex_unlock(&g_ctx_mutex);
 94 |         LOG("No GPUs, exiting...\n");
 95 |         return 1;
 96 |     }
 97 |     cur_gpu = g_cur_gpu;
 98 |     g_cur_gpu++;
 99 |     g_cur_gpu %= g_total_gpus;
100 |     cur_queue = g_cur_queue[cur_gpu];
101 |     g_cur_queue[cur_gpu]++;
102 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
103 |     pthread_mutex_unlock(&g_ctx_mutex);
104 | 
105 |     gpu_ctx* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
106 |     pthread_mutex_lock(&cur_ctx->mutex);
107 | 
108 |     CUDA_CHK(cudaSetDevice(cur_gpu));
109 | 
110 |     LOG("cur gpu: %d cur queue: %d\n", cur_gpu, cur_queue);
111 | 
112 |     size_t hashes_size = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t);
113 |     size_t num_hashes_size = num_elems * sizeof(uint64_t);
114 | 
115 |     // Ensure there is enough memory allocated
116 |     if (cur_ctx->hashes == NULL || cur_ctx->num_elems_alloc < num_elems) {
117 |         CUDA_CHK(cudaFree(cur_ctx->hashes));
118 |         CUDA_CHK(cudaMalloc(&cur_ctx->hashes, hashes_size));
119 |         CUDA_CHK(cudaFree(cur_ctx->num_hashes_arr));
120 |         CUDA_CHK(cudaMalloc(&cur_ctx->num_hashes_arr, num_hashes_size));
121 | 
122 |         cur_ctx->num_elems_alloc = num_elems;
123 |     }
124 | 
125 |     cudaStream_t stream = 0;
126 |     if (0 != use_non_default_stream) {
127 |         stream = cur_ctx->stream;
128 |     }
129 | 
130 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->hashes, hashes, hashes_size, cudaMemcpyHostToDevice, stream));
131 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->num_hashes_arr, num_hashes_arr, num_hashes_size, cudaMemcpyHostToDevice, stream));
132 | 
133 |     int num_blocks = ROUND_UP_DIV(num_elems, NUM_THREADS_PER_BLOCK);
134 | 
135 |     poh_verify_kernel<<<num_blocks, NUM_THREADS_PER_BLOCK, 0, stream>>>(cur_ctx->hashes, cur_ctx->num_hashes_arr, num_elems);
136 |     CUDA_CHK(cudaPeekAtLastError());
137 | 
138 |     CUDA_CHK(cudaMemcpyAsync(hashes, cur_ctx->hashes, hashes_size, cudaMemcpyDeviceToHost, stream));
139 |     CUDA_CHK(cudaStreamSynchronize(stream));
140 | 
141 |     pthread_mutex_unlock(&cur_ctx->mutex);
142 | 
143 |     return 0;
144 | }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/gpu-common.mk:
--------------------------------------------------------------------------------
 1 | NVCC:=nvcc
 2 | #GPU_PTX_ARCH:=compute_35
 3 | #GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70
 4 | GPU_PTX_ARCH:=compute_60
 5 | GPU_ARCHS?=sm_61,sm_70,sm_75,sm_80,sm_86
 6 | HOST_CFLAGS:=-Wall -Werror -fPIC -Wno-strict-aliasing
 7 | GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH)
 8 | 
 9 | # enable for profiling
10 | #GPU_CFLAGS+=-lineinfo
11 | 
12 | # enable to see kernel register stats
13 | #GPU_CFLAGS+=--ptxas-options=-v
14 | 
15 | CFLAGS_release:=-Icommon $(GPU_CFLAGS) -O3 -Xcompiler "$(HOST_CFLAGS)"
16 | CFLAGS_debug:=$(CFLAGS_release) -g
17 | CFLAGS:=$(CFLAGS_$V)
18 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jerasure-sys"
 3 | description = "Rust bindings for jerasure 2.0"
 4 | version = "0.1.0"
 5 | homepage = "https://solana.com/"
 6 | readme = "../jerasure/README"
 7 | repository = "https://github.com/solana-labs/solana-perf-libs"
 8 | authors = ["Solana Maintainers <maintainers@solana.com>"]
 9 | license = "../jerasure/COPYING"
10 | links = "Jerasure"
11 | build = "build.rs"
12 | 
13 | [build-dependencies]
14 | cc = "1.0"
15 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/build.rs:
--------------------------------------------------------------------------------
 1 | extern crate cc;
 2 | 
 3 | fn main() {
 4 |     cc::Build::new()
 5 |         .files(&[
 6 |             "jerasure/src/galois.c",
 7 |             "jerasure/src/jerasure.c",
 8 |             "jerasure/src/reed_sol.c",
 9 |             "jerasure/src/cauchy.c",
10 |             "jerasure/src/liberation.c",
11 |         ])
12 |         .include("jerasure/include")
13 |         .include("gf-complete/include")
14 |         .compile("Jerasure");
15 |     println!("cargo:rustc-link-lib=static=Jerasure");
16 | }
17 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/gf-complete:
--------------------------------------------------------------------------------
1 | ../gf-complete/


--------------------------------------------------------------------------------
/src/jerasure-sys/jerasure:
--------------------------------------------------------------------------------
1 | ../jerasure


--------------------------------------------------------------------------------
/src/opencl-ecc-ed25519/gpu_ctx.cpp:
--------------------------------------------------------------------------------
  1 | #include "ed25519.h"
  2 | #include "gpu_ctx.h"
  3 | #include <pthread.h>
  4 | #include "gpu_common.h"
  5 | 
  6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
  7 | 
  8 | #define MAX_NUM_GPUS 	1
  9 | #define MAX_QUEUE_SIZE 	1
 10 | 
 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 12 | static uint32_t g_cur_gpu = 0;
 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 14 | static int32_t g_total_gpus = -1;
 15 | 
 16 | static bool cl_crypt_init_locked() {
 17 |     if (g_total_gpus == -1) {
 18 |         g_total_gpus = MAX_NUM_GPUS;
 19 |         LOG("total_gpus: %d\n", g_total_gpus);
 20 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 21 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 22 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 23 |                 if (err != 0) {
 24 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 25 |                             err, gpu, queue);
 26 |                     g_total_gpus = 0;
 27 |                     return false;
 28 |                 }
 29 |             }
 30 |         }
 31 |     }
 32 |     return g_total_gpus > 0;
 33 | }
 34 | 
 35 | bool ed25519_init() {
 36 |     pthread_mutex_lock(&g_ctx_mutex);
 37 |     bool success = cl_crypt_init_locked();
 38 |     pthread_mutex_unlock(&g_ctx_mutex);
 39 |     return success;
 40 | }
 41 | 
 42 | gpu_ctx_t* get_gpu_ctx() {
 43 |     int32_t cur_gpu, cur_queue;
 44 | 
 45 |     LOG("locking global mutex\n");
 46 |     pthread_mutex_lock(&g_ctx_mutex);
 47 |     if (!cl_crypt_init_locked()) {
 48 |         pthread_mutex_unlock(&g_ctx_mutex);
 49 |         LOG("No GPUs, exiting...\n");
 50 |         return NULL;
 51 |     }
 52 |     cur_gpu = g_cur_gpu;
 53 |     g_cur_gpu++;
 54 |     g_cur_gpu %= g_total_gpus;
 55 |     cur_queue = g_cur_queue[cur_gpu];
 56 |     g_cur_queue[cur_gpu]++;
 57 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
 58 |     pthread_mutex_unlock(&g_ctx_mutex);
 59 | 
 60 |     gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
 61 |     LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu);
 62 |     pthread_mutex_lock(&cur_ctx->mutex);
 63 | 
 64 |     LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue);
 65 | 
 66 |     return cur_ctx;
 67 | }
 68 | 
 69 | void setup_gpu_ctx(verify_ctx_t* cur_ctx,
 70 |                    const gpu_Elems* elems,
 71 |                    uint32_t num_elems,
 72 |                    uint32_t message_size,
 73 |                    uint32_t total_packets,
 74 |                    uint32_t total_packets_size,
 75 |                    uint32_t total_signatures,
 76 |                    const uint32_t* message_lens,
 77 |                    const uint32_t* public_key_offsets,
 78 |                    const uint32_t* signature_offsets,
 79 |                    const uint32_t* message_start_offsets,
 80 |                    size_t out_size
 81 |                    ) {
 82 | 	int ret;
 83 |     size_t offsets_size = total_signatures * sizeof(uint32_t);
 84 | 
 85 |     LOG("device allocate. packets: %d out: %d offsets_size: %zu\n",
 86 |         total_packets_size, (int)out_size, offsets_size);
 87 | 
 88 |     if (cur_ctx->packets == NULL ||
 89 |         total_packets_size > cur_ctx->packets_size_bytes) {
 90 |         clReleaseMemObject(cur_ctx->packets);
 91 |         cur_ctx->packets = clCreateBuffer(context, CL_MEM_READ_WRITE, total_packets_size, NULL, &ret);
 92 |         CL_ERR( ret );
 93 | 
 94 |         cur_ctx->packets_size_bytes = total_packets_size;
 95 |     }
 96 | 	
 97 | 	if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) {
 98 |         clReleaseMemObject(cur_ctx->out);
 99 |         cur_ctx->out = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &ret);
100 |         CL_ERR( ret );
101 | 
102 |         cur_ctx->out_size_bytes = total_signatures;
103 |     }
104 | 	
105 | 	if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) {
106 |         
107 |         clReleaseMemObject(cur_ctx->public_key_offsets);
108 |         cur_ctx->public_key_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
109 |         CL_ERR( ret );
110 |         
111 |         clReleaseMemObject(cur_ctx->signature_offsets);
112 |         cur_ctx->signature_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
113 |         CL_ERR( ret );
114 |         
115 |         clReleaseMemObject(cur_ctx->message_start_offsets);
116 |         cur_ctx->message_start_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
117 |         CL_ERR( ret );
118 |         
119 |         clReleaseMemObject(cur_ctx->message_lens);
120 |         cur_ctx->message_lens = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
121 |         CL_ERR( ret );
122 | 
123 |         cur_ctx->offsets_len = total_signatures;
124 |     }
125 | 
126 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->public_key_offsets, CL_TRUE, 0, offsets_size, public_key_offsets, 0, NULL, NULL));
127 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->signature_offsets, CL_TRUE, 0, offsets_size, signature_offsets, 0, NULL, NULL));
128 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_start_offsets, CL_TRUE, 0, offsets_size, message_start_offsets, 0, NULL, NULL));
129 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_lens, CL_TRUE, 0, offsets_size, message_lens, 0, NULL, NULL));
130 | 
131 |     size_t cur = 0;
132 |     for (size_t i = 0; i < num_elems; i++) {
133 |         LOG("i: %zu size: %d\n", i, elems[i].num * message_size);
134 |         CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->packets, CL_TRUE, cur * message_size, elems[i].num * message_size, elems[i].elems, 0, NULL, NULL));
135 |         cur += elems[i].num;
136 |     }
137 | }
138 | 
139 | 
140 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) {
141 |     pthread_mutex_unlock(&cur_ctx->mutex);
142 | }
143 | 
144 | void ed25519_free_gpu_mem() {
145 | 	for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) {
146 |         for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
147 |             verify_ctx_t* verify_ctx = &g_gpu_ctx[gpu][queue].verify_ctx;
148 | 			
149 | 			CL_ERR(clReleaseMemObject(verify_ctx->packets));
150 | 			CL_ERR(clReleaseMemObject(verify_ctx->out));
151 | 			CL_ERR(clReleaseMemObject(verify_ctx->message_lens));
152 | 			CL_ERR(clReleaseMemObject(verify_ctx->public_key_offsets));
153 | 			CL_ERR(clReleaseMemObject(verify_ctx->signature_offsets));
154 | 			CL_ERR(clReleaseMemObject(verify_ctx->message_start_offsets));
155 |         }
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/opencl-ecc-ed25519/gpu_ctx.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_CTX_H
 2 | #define GPU_CTX_H
 3 | 
 4 | #include "cl_common.h"
 5 | 
 6 | #include <inttypes.h>
 7 | #include "ed25519.h"
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | typedef struct {
14 |     cl_mem packets;
15 | 	uint32_t packets_size_bytes;
16 | 	
17 |     cl_mem out;
18 | 	size_t out_size_bytes;
19 | 	
20 |     cl_mem public_key_offsets;
21 |     cl_mem message_start_offsets;
22 |     cl_mem signature_offsets;
23 |     cl_mem message_lens;
24 | 	size_t offsets_len;
25 | 
26 |     size_t num;
27 |     size_t num_signatures;
28 |     uint32_t total_packets_len;
29 | } verify_ctx_t;
30 | 
31 | typedef struct {
32 |     verify_ctx_t verify_ctx;
33 | 
34 |     pthread_mutex_t mutex;
35 | } gpu_ctx_t;
36 | 
37 | extern gpu_ctx_t* get_gpu_ctx();
38 | extern void release_gpu_ctx(gpu_ctx_t*);
39 | 
40 | extern void ed25519_free_gpu_mem();
41 | 
42 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx,
43 |                           const gpu_Elems* elems,
44 |                           uint32_t num_elems,
45 |                           uint32_t message_size,
46 |                           uint32_t total_packets,
47 |                           uint32_t total_packets_size,
48 |                           uint32_t total_signatures,
49 |                           const uint32_t* message_lens,
50 |                           const uint32_t* public_key_offsets,
51 |                           const uint32_t* signature_offsets,
52 |                           const uint32_t* message_start_offsets,
53 |                           size_t out_size
54 | 						  );
55 | 
56 | #ifdef __cplusplus
57 | }
58 | #endif
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/opencl-ecc-ed25519/sign.cpp:
--------------------------------------------------------------------------------
  1 | #include "cl_common.h"
  2 | 
  3 | #include "ed25519.h"
  4 | #include "sha512.h"
  5 | #include "ge.h"
  6 | #include "sc.h"
  7 | #include "gpu_common.h"
  8 | #include "gpu_ctx.h"
  9 | 
 10 | static void
 11 | ed25519_sign_device(unsigned char *signature,
 12 |                    const unsigned char *message,
 13 |                    size_t message_len,
 14 |                    const unsigned char *public_key,
 15 |                    const unsigned char *private_key) {
 16 |     sha512_context hash;
 17 |     unsigned char hram[64];
 18 |     unsigned char r[64];
 19 |     ge_p3 R;
 20 | 
 21 | 
 22 |     sha512_init(&hash);
 23 |     sha512_update(&hash, private_key + 32, 32);
 24 |     sha512_update(&hash, message, message_len);
 25 |     sha512_final(&hash, r);
 26 | 
 27 |     sc_reduce(r);
 28 |     ge_scalarmult_base(&R, r);
 29 |     ge_p3_tobytes(signature, &R);
 30 | 
 31 |     sha512_init(&hash);
 32 |     sha512_update(&hash, signature, 32);
 33 |     sha512_update(&hash, public_key, 32);
 34 |     sha512_update(&hash, message, message_len);
 35 |     sha512_final(&hash, hram);
 36 | 
 37 |     sc_reduce(hram);
 38 |     sc_muladd(signature + 32, hram, private_key, r);
 39 | }
 40 | 
 41 | void ed25519_sign(unsigned char *signature,
 42 |                    const unsigned char *message,
 43 |                    size_t message_len,
 44 |                    const unsigned char *public_key,
 45 |                    const unsigned char *private_key) {
 46 |     ed25519_sign_device(signature, message, message_len, public_key, private_key);
 47 | }
 48 | 
 49 | void ed25519_sign_many(const gpu_Elems* elems,
 50 |                        uint32_t num_elems,
 51 |                        uint32_t message_size,
 52 |                        uint32_t total_packets,
 53 |                        uint32_t total_signatures,
 54 |                        const uint32_t* message_lens,
 55 |                        const uint32_t* public_key_offsets,
 56 |                        const uint32_t* private_key_offsets,
 57 |                        const uint32_t* message_start_offsets,
 58 |                        uint8_t* signatures_out,
 59 |                        uint8_t use_non_default_stream
 60 |                        ) {
 61 |     DIE(cl_check_init() == false, "OpenCL could not be init");
 62 |     
 63 |     cl_int ret;
 64 |     
 65 |     size_t sig_out_size = SIG_SIZE * total_signatures;
 66 | 
 67 |     if (0 == total_packets) {
 68 |         return;
 69 |     }
 70 | 	
 71 |     uint32_t total_packets_size = total_packets * message_size;
 72 | 	
 73 | 	LOG("signing %d packets sig_size: %zu message_size: %d\n",
 74 |         total_packets, sig_out_size, message_size);
 75 | 
 76 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
 77 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
 78 | 
 79 |     setup_gpu_ctx(cur_ctx,
 80 |                   elems,
 81 |                   num_elems,
 82 |                   message_size,
 83 |                   total_packets,
 84 |                   total_packets_size,
 85 |                   total_signatures,
 86 |                   message_lens,
 87 |                   public_key_offsets,
 88 |                   private_key_offsets,
 89 |                   message_start_offsets,
 90 |                   sig_out_size
 91 |                  );
 92 | 
 93 |     size_t num_threads_per_block = 64;
 94 |     size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
 95 |     LOG("signing blocks: %zu threads_per_block: %zu\n", num_blocks, num_threads_per_block);
 96 | 	
 97 |     /*
 98 | 	__kernel void ed25519_sign_kernel(__global unsigned char* packets,
 99 | 										uint32_t message_size,
100 | 										__global uint32_t* public_key_offsets,
101 | 										__global uint32_t* private_key_offsets,
102 | 										__global uint32_t* message_start_offsets,
103 | 										__global uint32_t* message_lens,
104 | 										uint32_t num_transactions,
105 | 										__global uint8_t* out)
106 | 	*/				 
107 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) );
108 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 1, sizeof(cl_uint), (void *)&message_size) );
109 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 2, sizeof(cl_mem), (void *)&cur_ctx->public_key_offsets) );
110 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 3, sizeof(cl_mem), (void *)&cur_ctx->signature_offsets) );
111 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 4, sizeof(cl_mem), (void *)&cur_ctx->message_start_offsets) );
112 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 5, sizeof(cl_mem), (void *)&cur_ctx->message_lens) );
113 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 6, sizeof(cl_uint), (void *)&total_signatures) );
114 |     CL_ERR( clSetKernelArg(ed25519_sign_kernel, 7, sizeof(cl_mem), (void *)&cur_ctx->out) );
115 | 
116 |     size_t globalSize[2] = {num_blocks * num_threads_per_block, 0};
117 |     size_t localSize[2] = {num_threads_per_block, 0};    
118 |     
119 |     ret = clEnqueueNDRangeKernel(cmd_queue, ed25519_sign_kernel, 1, NULL,
120 |         globalSize, localSize, 0, NULL, NULL);
121 |         CL_ERR( ret );
122 |         
123 |     CL_ERR( clEnqueueReadBuffer(cmd_queue, cur_ctx->out, CL_TRUE, 0, sig_out_size, signatures_out, 0, NULL, NULL));
124 | 
125 |     release_gpu_ctx(gpu_ctx);
126 | }
127 | 


--------------------------------------------------------------------------------
/src/opencl-ecc-ed25519/verify.cpp:
--------------------------------------------------------------------------------
  1 | #include "cl_common.h"
  2 | 
  3 | #include "sha512.h"
  4 | #include <algorithm>
  5 | #include <stdio.h>
  6 | #include "ge.cu"
  7 | #include "sc.cu"
  8 | #include "fe.cu"
  9 | #include "sha512.cu"
 10 | 
 11 | #include "ed25519.h"
 12 | #include <pthread.h>
 13 | 
 14 | #include "gpu_common.h"
 15 | #include "gpu_ctx.h"
 16 | 
 17 | #define USE_CLOCK_GETTIME
 18 | #include "perftime.h"
 19 | 
 20 | static int consttime_equal(const unsigned char *x, const unsigned char *y) {
 21 |     unsigned char r = 0;
 22 | 
 23 |     r = x[0] ^ y[0];
 24 |     #define F(i) r |= x[i] ^ y[i]
 25 |     F(1);
 26 |     F(2);
 27 |     F(3);
 28 |     F(4);
 29 |     F(5);
 30 |     F(6);
 31 |     F(7);
 32 |     F(8);
 33 |     F(9);
 34 |     F(10);
 35 |     F(11);
 36 |     F(12);
 37 |     F(13);
 38 |     F(14);
 39 |     F(15);
 40 |     F(16);
 41 |     F(17);
 42 |     F(18);
 43 |     F(19);
 44 |     F(20);
 45 |     F(21);
 46 |     F(22);
 47 |     F(23);
 48 |     F(24);
 49 |     F(25);
 50 |     F(26);
 51 |     F(27);
 52 |     F(28);
 53 |     F(29);
 54 |     F(30);
 55 |     F(31);
 56 |     #undef F
 57 | 
 58 |     return !r;
 59 | }
 60 | 
 61 | static int ed25519_verify_device(const unsigned char *signature,
 62 |                       const unsigned char *message,
 63 |                       uint32_t message_len,
 64 |                       const unsigned char *public_key) {
 65 |     unsigned char h[64];
 66 |     unsigned char checker[32];
 67 |     sha512_context hash;
 68 |     ge_p2 R;
 69 | 
 70 |     if (signature[63] & 224) {
 71 |         return 0;
 72 |     }
 73 | 
 74 |     sha512_init(&hash);
 75 |     sha512_update(&hash, signature, 32);
 76 |     sha512_update(&hash, public_key, 32);
 77 |     sha512_update(&hash, message, message_len);
 78 |     sha512_final(&hash, h);
 79 |     
 80 |     sc_reduce(h);
 81 |     ge_cached Ai[8];
 82 |     if (0 != ge_gen_lookup(public_key, Ai)) {
 83 |         return 0;
 84 |     }
 85 |     ge_double_scalarmult_vartime(&R, h, Ai, signature + 32);
 86 |     ge_tobytes(checker, &R);
 87 | 
 88 |     if (!consttime_equal(checker, signature)) {
 89 |         return 0;
 90 |     }
 91 | 
 92 |     return 1;
 93 | }
 94 | 
 95 | int ed25519_verify(const unsigned char *signature,
 96 |                const unsigned char *message,
 97 |                uint32_t message_len,
 98 |                const unsigned char *public_key) {
 99 |     return ed25519_verify_device(signature, message, message_len, public_key);
100 | }
101 | 
102 | bool g_verbose = true;
103 | 
104 | void ed25519_set_verbose(bool val) {
105 |     g_verbose = val;
106 | }
107 | 
108 | void ed25519_verify_many(const gpu_Elems* elems,
109 |                          uint32_t num_elems,
110 |                          uint32_t message_size,
111 |                          uint32_t total_packets,
112 |                          uint32_t total_signatures,
113 |                          const uint32_t* message_lens,
114 |                          const uint32_t* public_key_offsets,
115 |                          const uint32_t* signature_offsets,
116 |                          const uint32_t* message_start_offsets,
117 |                          uint8_t* out,
118 |                          uint8_t use_non_default_stream)
119 | {
120 |     DIE(cl_check_init() == false, "OpenCL could not be init");
121 |     
122 |     cl_int ret;
123 |     
124 |     LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n",
125 |         num_elems, total_signatures, total_packets, message_size);
126 | 
127 |     size_t out_size = total_signatures * sizeof(uint8_t);
128 | 
129 |     uint32_t total_packets_size = total_packets * message_size;
130 | 
131 |     if (0 == total_packets) {
132 |         return;
133 |     }
134 | 	
135 | 	// Device allocate
136 | 
137 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
138 | 
139 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
140 | 
141 |     setup_gpu_ctx(cur_ctx,
142 |                   elems,
143 |                   num_elems,
144 |                   message_size,
145 |                   total_packets,
146 |                   total_packets_size,
147 |                   total_signatures,
148 |                   message_lens,
149 |                   public_key_offsets,
150 |                   signature_offsets,
151 |                   message_start_offsets,
152 |                   out_size
153 |                  );
154 | 
155 |     size_t num_threads_per_block = 64;
156 |     size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
157 |     LOG("num_blocks: %zu threads_per_block: %zu keys: %d out: %p\n",
158 |            num_blocks, num_threads_per_block, (int)total_packets, out);                     
159 |                              
160 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) );
161 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 1, sizeof(cl_uint), (void *)&message_size) );
162 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 2, sizeof(cl_mem), (void *)&cur_ctx->message_lens) );
163 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 3, sizeof(cl_mem), (void *)&cur_ctx->public_key_offsets) );
164 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 4, sizeof(cl_mem), (void *)&cur_ctx->signature_offsets) );
165 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 5, sizeof(cl_mem), (void *)&cur_ctx->message_start_offsets) );
166 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 6, sizeof(cl_uint), (void *)&cur_ctx->offsets_len) );
167 |     CL_ERR( clSetKernelArg(ed25519_verify_kernel, 7, sizeof(cl_mem), (void *)&cur_ctx->out) );
168 | 
169 | 	perftime_t start, end;
170 |     get_time(&start);
171 | 
172 |     size_t globalSize[2] = {num_blocks * num_threads_per_block, 0};
173 |     size_t localSize[2] = {num_threads_per_block, 0};    
174 |     ret = clEnqueueNDRangeKernel(cmd_queue, ed25519_verify_kernel, 1, NULL,
175 |         globalSize, localSize, 0, NULL, NULL);
176 |         CL_ERR( ret );
177 |         
178 |     CL_ERR( clEnqueueReadBuffer(cmd_queue, cur_ctx->out, CL_TRUE, 0, out_size, out, 0, NULL, NULL));
179 | 	
180 | 	release_gpu_ctx(gpu_ctx);
181 | 
182 |     get_time(&end);
183 |     LOG("time diff: %f\n", get_diff(&start, &end));
184 | }
185 | 
186 | // Ensure copyright and license notice is embedded in the binary
187 | const char* ed25519_license() {
188 |    return "Copyright (c) 2018 Solana Labs, Inc. "
189 |           "Licensed under the Apache License, Version 2.0 "
190 |           "<http://www.apache.org/licenses/LICENSE-2.0>";
191 | }
192 | 
193 | // Supported by the cuda lib, so stub them here.
194 | int cuda_host_register(void* ptr, size_t size, unsigned int flags)
195 | {
196 |     return 0;
197 | }
198 | 
199 | int cuda_host_unregister(void* ptr)
200 | {
201 |     return 0;
202 | }
203 | 
204 | static int
205 | get_checked_scalar(unsigned char* scalar, const unsigned char* signature) {
206 |     // Check if top 4-bits are clear
207 |     // then scalar is reduced.
208 |     // if ((signature[31] & 0xf0) == 0) {
209 |     //     for (int i = 0; i < 32; i++) {
210 |     //         scalar[i] = signature[i];
211 |     //     }
212 |     //     return 0;
213 |     // }
214 | 
215 |     // if ((signature[31] >> 7) != 0) {
216 |     //     return 1;
217 |     // }
218 | 
219 |     // scalar32_reduce(scalar);
220 |     // if (!consttime_equal(scalar, signature)) {
221 |     //     return 1;
222 |     // }
223 |     fprintf(stderr, "get_checked_scalar not implemented.\n");
224 |     exit(1);
225 |     return 0;
226 | 
227 | }
228 | 
229 | int ed25519_get_checked_scalar(unsigned char* out_scalar, const unsigned char* in_scalar) {
230 |     return get_checked_scalar(out_scalar, in_scalar);
231 | }
232 | 
233 | // Return 0=success if ge unpacks and is not small order
234 | static int
235 | check_packed_ge_small_order(const unsigned char* packed_group_element) {
236 |     // ge_p3 signature_R;
237 | 
238 |     // fail if ge does not unpack
239 |     // if (0 != ge_frombytes_negate_vartime(&signature_R, packed_group_element)) {
240 |     //     return 1;
241 |     // }
242 | 
243 |     // // fail if ge is small order
244 |     // if (0 != ge_is_small_order(&signature_R)) {
245 |     //     return 1;
246 |     // }
247 |     fprintf(stderr, "check_packed_ge_small_order not implemented.\n");
248 |     exit(1);
249 |     return 0;
250 | }
251 | 
252 | int ed25519_check_packed_ge_small_order(const unsigned char* packed_group_element) {
253 |     return check_packed_ge_small_order(packed_group_element);
254 | }
255 | 


--------------------------------------------------------------------------------
/src/opencl-platform/cl_common.h:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include <string>
 4 | 
 5 | using namespace std;
 6 | 
 7 | #ifndef GPU_COMMON_H
 8 | #define GPU_COMMON_H
 9 | 
10 | // override CUDA directives
11 | #ifndef __device__
12 | #define __device__
13 | #endif
14 | 
15 | #ifndef __host__
16 | #define __host__
17 | #endif
18 | 
19 | /* this is to silent warnings about opencl version (without this Werror flag will prevent compiling)*/
20 | #ifndef CL_TARGET_OPENCL_VERSION
21 | #define CL_TARGET_OPENCL_VERSION 120
22 | #endif
23 | 
24 | extern bool g_verbose;
25 | 
26 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); }
27 | 
28 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y))
29 | 
30 | #ifndef UINT64_C
31 | #define UINT64_C uint64_t
32 | #endif
33 | 
34 | /************************************
35 | * OpenCL compile path
36 | *************************************/
37 | 
38 | #if __APPLE__
39 |    #include <OpenCL/opencl.h>
40 | #else
41 |    #include <CL/cl.h>
42 | #endif
43 | 
44 | // runs at the start of any OpenCL entry point crypto function
45 | bool cl_check_init(cl_uint sel_device_type);
46 | bool cl_check_init(void);
47 | 
48 | // do only 1 init, kernel compilation etc
49 | extern bool cl_is_init;
50 | 
51 | extern cl_context context;
52 | extern cl_command_queue cmd_queue;
53 | extern cl_program program;
54 | 
55 | extern cl_kernel init_sha256_state_kernel;
56 | extern cl_kernel end_sha256_state_kernel;
57 | 
58 | extern cl_kernel ed25519_sign_kernel;
59 | extern cl_kernel ed25519_verify_kernel;
60 | extern cl_kernel poh_verify_kernel;
61 | 
62 | // override any CUDA function qualifiers
63 | #define __host__
64 | #define __device__
65 | #define __global__
66 | 
67 | #include <iostream>
68 | 
69 | using namespace std;
70 | 
71 | // OpenCL utilities
72 | #define CL_ERR(cl_ret) if(cl_ret != CL_SUCCESS){ cout << endl << cl_get_string_err(cl_ret) << " file " << __FILE__ << "@" << __LINE__ << endl; }
73 | 
74 | int CL_COMPILE_ERR(int cl_ret,
75 |                   cl_program program,
76 |                   cl_device_id device);
77 | 
78 | const char* cl_get_string_err(cl_int err);
79 | void cl_get_compiler_err_log(cl_program program,
80 |                              cl_device_id device);
81 | 
82 | void read_kernel(string file_name, string &str_kernel);
83 | 
84 | #define DIE(assertion, call_description)                    \
85 | do {                                                        \
86 |     if (assertion) {                                        \
87 |             fprintf(stderr, "(%d): ",                       \
88 |                             __LINE__);                      \
89 |             perror(call_description);                       \
90 |             exit(EXIT_FAILURE);                             \
91 |     }                                                       \
92 | } while(0);
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/src/opencl-poh-verify/cl_poh_verify.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | #include <stddef.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include <inttypes.h>
  6 | #include <pthread.h>
  7 | 
  8 | #include "../opencl-platform/cl_common.h"
  9 | #if __APPLE__
 10 |    #include <OpenCL/opencl.h>
 11 | #else
 12 |    #include <CL/cl.h>
 13 | #endif
 14 | 
 15 | #include "perftime.h"
 16 | 
 17 | #define MAX_NUM_GPUS 	1
 18 | #define MAX_QUEUE_SIZE 	8
 19 | 
 20 | #define SHA256_BLOCK_SIZE 32
 21 | 
 22 | typedef struct {
 23 |     cl_mem in_out_hashes;
 24 |     cl_mem in_num_hashes_arr;
 25 |     size_t in_num_elems;
 26 | 
 27 |     pthread_mutex_t mutex;
 28 | } gpu_ctx_t;
 29 | 
 30 | 
 31 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 32 | static uint32_t g_cur_gpu = 0;
 33 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 34 | static int32_t g_total_gpus = -1;
 35 | 
 36 | extern bool cl_check_init();
 37 | static pthread_mutex_t clg_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
 38 | static bool initialized = false;
 39 | 
 40 | static bool cl_crypt_init_locked() {
 41 |     if (g_total_gpus == -1) {
 42 |         g_total_gpus = MAX_NUM_GPUS;
 43 |         LOG("total_gpus: %d\n", g_total_gpus);
 44 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 45 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 46 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 47 |                 if (err != 0) {
 48 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 49 |                             err, gpu, queue);
 50 |                     g_total_gpus = 0;
 51 |                     return false;
 52 |                 }
 53 |             }
 54 |         }
 55 |     }
 56 |     return g_total_gpus > 0;
 57 | }
 58 | 
 59 | gpu_ctx_t* get_gpu_ctx() {
 60 |     int32_t cur_gpu, cur_queue;
 61 | 
 62 |     LOG("getting gpu_ctx\n");
 63 | 
 64 |     cur_gpu = g_cur_gpu;
 65 |     g_cur_gpu++;
 66 |     g_cur_gpu %= g_total_gpus;
 67 |     cur_queue = g_cur_queue[cur_gpu];
 68 |     g_cur_queue[cur_gpu]++;
 69 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
 70 | 
 71 |     gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
 72 |     LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu);
 73 |     pthread_mutex_lock(&cur_ctx->mutex);
 74 | 
 75 |     LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue);
 76 | 
 77 |     return cur_ctx;
 78 | }
 79 | 
 80 | void setup_gpu_ctx(gpu_ctx_t *cur_ctx,
 81 |                     uint8_t* hashes,
 82 |                     const uint64_t* num_hashes_arr,
 83 |                     size_t num_elems,
 84 |                     size_t nr_bytes_hashes,
 85 |                     size_t nr_bytes_num_hashes_arr
 86 |                    ) {
 87 | 	int ret;
 88 | 
 89 |     LOG("device allocate. num hashes: %lu sizes in MB: hashes: %f num_hashes_arr: %f\n",
 90 |         num_elems, (double)nr_bytes_hashes/(1024*1024), (double)nr_bytes_num_hashes_arr/(1024*1024));
 91 | 
 92 |     if (cur_ctx->in_out_hashes == NULL || cur_ctx->in_num_elems < num_elems) {
 93 |         clReleaseMemObject(cur_ctx->in_out_hashes);
 94 |         cur_ctx->in_out_hashes = clCreateBuffer(context, CL_MEM_READ_WRITE, nr_bytes_hashes, NULL, &ret);
 95 |         CL_ERR( ret );
 96 |     }
 97 | 
 98 |     if (cur_ctx->in_num_hashes_arr == NULL || cur_ctx->in_num_elems < num_elems) {
 99 |         clReleaseMemObject(cur_ctx->in_num_hashes_arr);
100 |         cur_ctx->in_num_hashes_arr = clCreateBuffer(context, CL_MEM_READ_ONLY, nr_bytes_num_hashes_arr, NULL, &ret);
101 |         CL_ERR( ret );
102 |         cur_ctx->in_num_elems = num_elems;
103 |     }
104 | 
105 | 
106 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->in_out_hashes, CL_TRUE, 0, nr_bytes_hashes, hashes, 0, NULL, NULL));
107 |     CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->in_num_hashes_arr, CL_TRUE, 0, nr_bytes_num_hashes_arr, num_hashes_arr, 0, NULL, NULL));
108 | }
109 | 
110 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) {
111 |     pthread_mutex_unlock(&cur_ctx->mutex);
112 | }
113 | 
114 | extern "C" {
115 | 
116 | void poh_verify_many_set_verbose(bool val) {
117 |     g_verbose = val;
118 | }
119 | 
120 | int poh_verify_many(uint8_t* hashes,
121 |                     const uint64_t* num_hashes_arr,
122 |                     size_t num_elems,
123 |                     uint8_t use_non_default_stream)
124 | {
125 |     LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems);
126 | 
127 |     if (num_elems == 0) return 0;
128 | 
129 |     pthread_mutex_lock(&clg_ctx_mutex);
130 |     bool success = false;
131 | 
132 |     if (initialized == false) {
133 |         success = cl_check_init();
134 |         DIE(success == false, "OpenCL could not be init");
135 |         DIE(cl_crypt_init_locked() == false, "cl_crypt_init_locked failed");
136 |         initialized = true;
137 |     } else {
138 |         LOG("cl_poh_verify_many already initialized\n");
139 |     }
140 | 
141 |     gpu_ctx_t *cur_ctx = get_gpu_ctx();
142 |     pthread_mutex_unlock(&clg_ctx_mutex);
143 |     
144 |     size_t nr_bytes_hashes = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t);
145 |     size_t nr_bytes_num_hashes_arr = num_elems * sizeof(uint64_t);
146 |     setup_gpu_ctx(cur_ctx,
147 |                     hashes,
148 |                     num_hashes_arr,
149 |                     num_elems,
150 |                     nr_bytes_hashes,
151 |                     nr_bytes_num_hashes_arr);
152 |     
153 |     size_t num_threads_per_block = 64;
154 |     size_t num_blocks = ROUND_UP_DIV(num_elems, num_threads_per_block) * num_threads_per_block;
155 |     LOG("num_blocks: %zu threads_per_block: %zu nr hashes: %lu\n",
156 |            num_blocks, num_threads_per_block, num_elems);                     
157 |                              
158 |     CL_ERR( clSetKernelArg(poh_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->in_out_hashes) );
159 |     CL_ERR( clSetKernelArg(poh_verify_kernel, 1, sizeof(cl_mem), (void *)&cur_ctx->in_num_hashes_arr) );
160 |     CL_ERR( clSetKernelArg(poh_verify_kernel, 2, sizeof(cl_uint), (void *)&cur_ctx->in_num_elems) );
161 | 
162 | 	perftime_t start, end;
163 |     get_time(&start);
164 | 
165 |     size_t globalSize[2] = {num_blocks * num_threads_per_block, 0};
166 |     size_t localSize[2] = {num_threads_per_block, 0};    
167 |     cl_int ret = clEnqueueNDRangeKernel(cmd_queue, poh_verify_kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL);
168 |     CL_ERR( ret );
169 | 
170 |     ret = clFinish(cmd_queue);
171 |     CL_ERR( ret );
172 | 
173 |     ret = clEnqueueReadBuffer(cmd_queue, cur_ctx->in_out_hashes, CL_TRUE, 0, nr_bytes_hashes, hashes, 0, NULL, NULL);
174 |     CL_ERR( ret );
175 | 
176 |     get_time(&end);
177 |     LOG("time diff: %f\n", get_diff(&start, &end));
178 |     release_gpu_ctx(cur_ctx);
179 | 
180 |     return 0;
181 | }
182 | 
183 | }
184 | 


--------------------------------------------------------------------------------
/src/poh-simd/Makefile:
--------------------------------------------------------------------------------
 1 | CC=ispc
 2 | ISPC_FLAGS := -O2 --pic -I.
 3 | DEPS := sha256.h
 4 | 
 5 | ISPC_OBJ := ispcobj
 6 | ISPC_C_Objects := $(ISPC_OBJ)/poh-verify-sse2.o \
 7 |     		  $(ISPC_OBJ)/poh-verify-sse4.o \
 8 | 		  $(ISPC_OBJ)/poh-verify-avx1.o \
 9 | 		  $(ISPC_OBJ)/poh-verify-avx2.o \
10 | 		  $(ISPC_OBJ)/poh-verify-avx512skx.o
11 | 
12 | OUT ?= libs
13 | 
14 | .PHONY: all run
15 | all: $(OUT)/libpoh-simd.so
16 | run: all
17 | 
18 | $(ISPC_OBJ)/poh-verify-sse2.o: poh-verify.ispc $(DEPS)
19 | 	@mkdir -p $(ISPC_OBJ)
20 | 	$(CC) --target=sse2-i32x4 -DNAME_SUFFIX=sse2 $(ISPC_FLAGS) $< -o $@
21 | 
22 | $(ISPC_OBJ)/poh-verify-sse4.o: poh-verify.ispc $(DEPS)
23 | 	@mkdir -p $(ISPC_OBJ)
24 | 	$(CC) --target=sse4-i32x4 -DNAME_SUFFIX=sse4 $(ISPC_FLAGS) $< -o $@
25 | 
26 | $(ISPC_OBJ)/poh-verify-avx1.o: poh-verify.ispc $(DEPS)
27 | 	@mkdir -p $(ISPC_OBJ)
28 | 	$(CC) --target=avx1-i32x8 -DNAME_SUFFIX=avx1 $(ISPC_FLAGS) $< -o $@
29 | 
30 | $(ISPC_OBJ)/poh-verify-avx2.o: poh-verify.ispc $(DEPS)
31 | 	@mkdir -p $(ISPC_OBJ)
32 | 	$(CC) --target=avx2-i32x8 -DNAME_SUFFIX=avx2 $(ISPC_FLAGS) $< -o $@
33 | 
34 | $(ISPC_OBJ)/poh-verify-avx512skx.o: poh-verify.ispc $(DEPS)
35 | 	@mkdir -p $(ISPC_OBJ)
36 | 	$(CC) --target=avx512skx-i32x16 -DNAME_SUFFIX=avx512skx $(ISPC_FLAGS) $< -o $@
37 | 
38 | $(OUT)/libpoh-simd.so: $(ISPC_C_Objects)
39 | 	@mkdir -p $(OUT)
40 | 	gcc -shared -o $@ $^
41 | 
42 | clean:
43 | 	@rm -rf $(ISPC_OBJ) $(OUT)
44 | 


--------------------------------------------------------------------------------
/src/poh-simd/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | pwd=$PWD
 5 | cd "$(dirname "$0")"
 6 | 
 7 | echo --- Build
 8 | (
 9 |   set -x
10 |   make OUT="$pwd"/dist
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/src/poh-simd/poh-verify.ispc:
--------------------------------------------------------------------------------
  1 | #include "sha256.h"
  2 | 
  3 | #define MAKE_FN_NAME(x) export void  poh_verify_many_simd_ ## x (uniform u8 hashes[], uniform const unsigned int64 num_hashes_arr[])
  4 | #define FUNCTION_NAME(signal) MAKE_FN_NAME(signal)
  5 | 
  6 | FUNCTION_NAME(NAME_SUFFIX)
  7 | {
  8 |     foreach(i = 0 ... programCount) {
  9 |         u8* hash = &hashes[i * SHA256_BLOCK_SIZE];
 10 |         varying u32 s[8];
 11 |         varying u32 w[64];
 12 |         varying u32 T0;
 13 |         varying u32 T1;
 14 | 
 15 |         // Load words
 16 |         for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) {
 17 |             w[j] = (((u32)hash[j * 4] << 24) |
 18 |                     ((u32)hash[j * 4 + 1] << 16) |
 19 |                     ((u32)hash[j * 4 + 2] << 8) |
 20 |                     ((u32)hash[j * 4 + 3]));
 21 |         }
 22 | 
 23 |         if (num_hashes_arr[i] > 0) {
 24 |             for (int j = 0; j < num_hashes_arr[i]; j++) {
 25 |                 s[0] = 0x6a09e667;
 26 |                 s[1] = 0xbb67ae85;
 27 |                 s[2] = 0x3c6ef372;
 28 |                 s[3] = 0xa54ff53a;
 29 |                 s[4] = 0x510e527f;
 30 |                 s[5] = 0x9b05688c;
 31 |                 s[6] = 0x1f83d9ab;
 32 |                 s[7] = 0x5be0cd19;
 33 | 
 34 |                 w[8] = 0x80000000;
 35 |                 w[9] = 0;
 36 |                 w[10] = 0;
 37 |                 w[11] = 0;
 38 |                 w[12] = 0;
 39 |                 w[13] = 0;
 40 |                 w[14] = 0;
 41 |                 w[15] = 0x00000100;
 42 | 
 43 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]);
 44 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]);
 45 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]);
 46 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]);
 47 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]);
 48 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]);
 49 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]);
 50 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]);
 51 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]);
 52 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]);
 53 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]);
 54 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]);
 55 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]);
 56 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]);
 57 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]);
 58 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]);
 59 |                 w[16] = WSIGMA1(w[14]) + w[0] + w[9] + WSIGMA0(w[1]);
 60 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]);
 61 |                 w[17] = WSIGMA1(w[15]) + w[1] + w[10] + WSIGMA0(w[2]);
 62 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]);
 63 |                 w[18] = WSIGMA1(w[16]) + w[2] + w[11] + WSIGMA0(w[3]);
 64 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]);
 65 |                 w[19] = WSIGMA1(w[17]) + w[3] + w[12] + WSIGMA0(w[4]);
 66 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]);
 67 |                 w[20] = WSIGMA1(w[18]) + w[4] + w[13] + WSIGMA0(w[5]);
 68 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]);
 69 |                 w[21] = WSIGMA1(w[19]) + w[5] + w[14] + WSIGMA0(w[6]);
 70 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]);
 71 |                 w[22] = WSIGMA1(w[20]) + w[6] + w[15] + WSIGMA0(w[7]);
 72 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]);
 73 |                 w[23] = WSIGMA1(w[21]) + w[7] + w[16] + WSIGMA0(w[8]);
 74 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]);
 75 |                 w[24] = WSIGMA1(w[22]) + w[8] + w[17] + WSIGMA0(w[9]);
 76 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]);
 77 |                 w[25] = WSIGMA1(w[23]) + w[9] + w[18] + WSIGMA0(w[10]);
 78 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]);
 79 |                 w[26] = WSIGMA1(w[24]) + w[10] + w[19] + WSIGMA0(w[11]);
 80 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]);
 81 |                 w[27] = WSIGMA1(w[25]) + w[11] + w[20] + WSIGMA0(w[12]);
 82 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]);
 83 |                 w[28] = WSIGMA1(w[26]) + w[12] + w[21] + WSIGMA0(w[13]);
 84 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]);
 85 |                 w[29] = WSIGMA1(w[27]) + w[13] + w[22] + WSIGMA0(w[14]);
 86 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]);
 87 |                 w[30] = WSIGMA1(w[28]) + w[14] + w[23] + WSIGMA0(w[15]);
 88 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]);
 89 |                 w[31] = WSIGMA1(w[29]) + w[15] + w[24] + WSIGMA0(w[16]);
 90 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]);
 91 |                 w[32] = WSIGMA1(w[30]) + w[16] + w[25] + WSIGMA0(w[17]);
 92 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]);
 93 |                 w[33] = WSIGMA1(w[31]) + w[17] + w[26] + WSIGMA0(w[18]);
 94 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]);
 95 |                 w[34] = WSIGMA1(w[32]) + w[18] + w[27] + WSIGMA0(w[19]);
 96 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]);
 97 |                 w[35] = WSIGMA1(w[33]) + w[19] + w[28] + WSIGMA0(w[20]);
 98 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]);
 99 |                 w[36] = WSIGMA1(w[34]) + w[20] + w[29] + WSIGMA0(w[21]);
100 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]);
101 |                 w[37] = WSIGMA1(w[35]) + w[21] + w[30] + WSIGMA0(w[22]);
102 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]);
103 |                 w[38] = WSIGMA1(w[36]) + w[22] + w[31] + WSIGMA0(w[23]);
104 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]);
105 |                 w[39] = WSIGMA1(w[37]) + w[23] + w[32] + WSIGMA0(w[24]);
106 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]);
107 |                 w[40] = WSIGMA1(w[38]) + w[24] + w[33] + WSIGMA0(w[25]);
108 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]);
109 |                 w[41] = WSIGMA1(w[39]) + w[25] + w[34] + WSIGMA0(w[26]);
110 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]);
111 |                 w[42] = WSIGMA1(w[40]) + w[26] + w[35] + WSIGMA0(w[27]);
112 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]);
113 |                 w[43] = WSIGMA1(w[41]) + w[27] + w[36] + WSIGMA0(w[28]);
114 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]);
115 |                 w[44] = WSIGMA1(w[42]) + w[28] + w[37] + WSIGMA0(w[29]);
116 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]);
117 |                 w[45] = WSIGMA1(w[43]) + w[29] + w[38] + WSIGMA0(w[30]);
118 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]);
119 |                 w[46] = WSIGMA1(w[44]) + w[30] + w[39] + WSIGMA0(w[31]);
120 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]);
121 |                 w[47] = WSIGMA1(w[45]) + w[31] + w[40] + WSIGMA0(w[32]);
122 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]);
123 |                 w[48] = WSIGMA1(w[46]) + w[32] + w[41] + WSIGMA0(w[33]);
124 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]);
125 |                 w[49] = WSIGMA1(w[47]) + w[33] + w[42] + WSIGMA0(w[34]);
126 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]);
127 |                 w[50] = WSIGMA1(w[48]) + w[34] + w[43] + WSIGMA0(w[35]);
128 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]);
129 |                 w[51] = WSIGMA1(w[49]) + w[35] + w[44] + WSIGMA0(w[36]);
130 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]);
131 |                 w[52] = WSIGMA1(w[50]) + w[36] + w[45] + WSIGMA0(w[37]);
132 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]);
133 |                 w[53] = WSIGMA1(w[51]) + w[37] + w[46] + WSIGMA0(w[38]);
134 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]);
135 |                 w[54] = WSIGMA1(w[52]) + w[38] + w[47] + WSIGMA0(w[39]);
136 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]);
137 |                 w[55] = WSIGMA1(w[53]) + w[39] + w[48] + WSIGMA0(w[40]);
138 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]);
139 |                 w[56] = WSIGMA1(w[54]) + w[40] + w[49] + WSIGMA0(w[41]);
140 |                 SHA256ROUND(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]);
141 |                 w[57] = WSIGMA1(w[55]) + w[41] + w[50] + WSIGMA0(w[42]);
142 |                 SHA256ROUND(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]);
143 |                 w[58] = WSIGMA1(w[56]) + w[42] + w[51] + WSIGMA0(w[43]);
144 |                 SHA256ROUND(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]);
145 |                 w[59] = WSIGMA1(w[57]) + w[43] + w[52] + WSIGMA0(w[44]);
146 |                 SHA256ROUND(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]);
147 |                 w[60] = WSIGMA1(w[58]) + w[44] + w[53] + WSIGMA0(w[45]);
148 |                 SHA256ROUND(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]);
149 |                 w[61] = WSIGMA1(w[59]) + w[45] + w[54] + WSIGMA0(w[46]);
150 |                 SHA256ROUND(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]);
151 |                 w[62] = WSIGMA1(w[60]) + w[46] + w[55] + WSIGMA0(w[47]);
152 |                 SHA256ROUND(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]);
153 |                 w[63] = WSIGMA1(w[61]) + w[47] + w[56] + WSIGMA0(w[48]);
154 |                 SHA256ROUND(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]);
155 | 
156 |                 // Feed Forward
157 |                 s[0] = s[0] + 0x6a09e667;
158 |                 s[1] = s[1] + 0xbb67ae85;
159 |                 s[2] = s[2] + 0x3c6ef372;
160 |                 s[3] = s[3] + 0xa54ff53a;
161 |                 s[4] = s[4] + 0x510e527f;
162 |                 s[5] = s[5] + 0x9b05688c;
163 |                 s[6] = s[6] + 0x1f83d9ab;
164 |                 s[7] = s[7] + 0x5be0cd19;
165 | 
166 |                 // Store Hash value
167 |                 for (int k = 0; k < 8; k++) {
168 |                     w[k] = s[k];
169 |                 }
170 |             }
171 | 
172 |             // Store Hash value
173 |             for (int j = 0; j < SHA256_BLOCK_SIZE / 4; j++) {
174 |                 hash[j * 4 + 3] = s[j] & 0xff;
175 |                 hash[j * 4 + 2] = (s[j] >> 8) & 0xff;
176 |                 hash[j * 4 + 1] = (s[j] >> 16) & 0xff;
177 |                 hash[j * 4 + 0] = (s[j] >> 24) & 0xff;
178 |             }
179 |         }
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/src/poh-simd/sha256.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Adapted from kste's sha256 implementation, accessible at https://github.com/kste/sha256_avx
 3 |  *
 4 |  * MIT License
 5 |  *
 6 |  * Copyright (c) 2017
 7 |  *
 8 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 9 |  * of this software and associated documentation files (the "Software"), to deal
10 |  * in the Software without restriction, including without limitation the rights
11 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 |  * copies of the Software, and to permit persons to whom the Software is
13 |  * furnished to do so, subject to the following conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be included in all
16 |  * copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 |  * SOFTWARE.
25 |  */
26 | 
27 | #ifndef SHA256_H
28 | #define SHA256_H
29 | 
30 | #define u32 unsigned int32
31 | #define u8 unsigned int8
32 | 
33 | #define SHA256_BLOCK_SIZE 32
34 | 
35 | static const u32 RC[] = {
36 |     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
37 |     0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
38 |     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
39 |     0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
40 |     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
41 |     0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
42 |     0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
43 |     0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
44 |     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
45 |     0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
46 |     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
47 |     0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
48 |     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
49 |     0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
50 |     0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
51 |     0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
52 | };
53 | 
54 | #define MAJ(a, b, c) ((a&b) ^ (a&c) ^ (b&c))
55 | #define CH(a, b, c) ((a&b) ^ (~(a)&c))
56 | 
57 | #define ROTR32(x, r) ((x >> r) | (x << (SHA256_BLOCK_SIZE - r)))
58 | 
59 | #define SIGMA1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
60 | #define SIGMA0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
61 | 
62 | #define WSIGMA1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
63 | #define WSIGMA0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
64 | 
65 | #define SHA256ROUND(a, b, c, d, e, f, g, h, rc, w) \
66 |     T0 = h + SIGMA1(e) + CH(e, f, g) + RC[rc] + w; \
67 |     d = d + T0; \
68 |     T1 = SIGMA0(a) + MAJ(a, b, c); \
69 |     h = T0 + T1;
70 | 
71 | #endif


--------------------------------------------------------------------------------
/src/poh-verify-test/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "cl_common.h"
  2 | 
  3 | #include <string.h>
  4 | #include <stdio.h>
  5 | #include <inttypes.h>
  6 | #include <pthread.h>
  7 | 
  8 | #define USE_CLOCK_GETTIME
  9 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y))
 10 | #define SHA256_BLOCK_SIZE 32
 11 | 
 12 | #include "perftime.h"
 13 | 
 14 | bool g_verbose = false;
 15 | 
 16 | typedef struct input_poh_ {
 17 | 
 18 |     uint8_t* hashes;
 19 |     uint64_t* num_hashes_arr;
 20 |     size_t num_elems;
 21 | 
 22 | } input_poh;
 23 | 
 24 | void static inline save_out(uint8_t* hashes,
 25 |     size_t num_elems, size_t index_thread) {
 26 | 
 27 |     FILE * fp;
 28 | 
 29 |     const char *file_name = "test_hashes_output";
 30 |     char temp_string[50];
 31 |     sprintf(temp_string, "%s_%lu", file_name, index_thread);
 32 | 
 33 |     fp = fopen (temp_string, "w");
 34 |     if (fp == NULL) {
 35 |         fprintf(stderr, "Could not create file %s\n", temp_string);
 36 |         exit(-1);
 37 |     }
 38 | 
 39 |     for (size_t i = 0; i < num_elems; ++i) {
 40 |         fprintf(fp, "%hhu ", hashes[i]);
 41 |     }
 42 |     fclose(fp);
 43 | }
 44 | 
 45 | 
 46 | input_poh* allocate_input_poh(size_t num_elems) {
 47 | 
 48 |     input_poh* input_result = (input_poh*)calloc(1, sizeof(input_poh));
 49 |     DIE(input_result == NULL, "Error while allocating an input_poh structure");
 50 | 
 51 |     input_result->num_elems = num_elems;
 52 | 
 53 |     input_result->hashes = (uint8_t*)calloc(input_result->num_elems, sizeof(uint8_t));
 54 |     DIE(input_result->hashes == NULL, "Error while allocating input_result->hashes");
 55 | 
 56 |     input_result->num_hashes_arr = (uint64_t*)calloc(input_result->num_elems, sizeof(uint64_t));
 57 |     DIE(input_result->num_hashes_arr == NULL, "Error while allocating input_result->num_hashes_arr");
 58 | 
 59 |     return input_result;
 60 | }
 61 | 
 62 | void free_input_poh(input_poh** poh) {
 63 |     free((*poh)->hashes);
 64 |     (*poh)->hashes = NULL;
 65 |     free((*poh)->num_hashes_arr);
 66 |     (*poh)->num_hashes_arr = NULL;
 67 |     free(*poh);
 68 |     (*poh) = NULL;
 69 | 
 70 | } 
 71 | 
 72 | input_poh* get_input(const char* file_hashes, const char* file_hashes_arr, const char* file_num_elems) {
 73 |     
 74 |     FILE * fp;
 75 |     fp = fopen(file_hashes, "r");
 76 | 
 77 |     if (fp == NULL) {
 78 |         fprintf(stderr, "Could not open file %s\n", file_hashes);
 79 |         exit(-1);
 80 |     }
 81 | 
 82 |     FILE * fp2;
 83 |     fp2 = fopen(file_hashes_arr, "r");
 84 |     
 85 |     if (fp2 == NULL) {
 86 |         fprintf(stderr, "Could not open file %s\n", file_hashes_arr);
 87 |         exit(-1);
 88 |     }
 89 | 
 90 |     FILE * fp3;
 91 |     fp3 = fopen(file_num_elems, "r");
 92 |     
 93 |     if (fp3 == NULL) {
 94 |         fprintf(stderr, "Could not open file %s\n", file_num_elems);
 95 |         exit(-1);
 96 |     }
 97 | 
 98 |     size_t num_elems;
 99 |     DIE( 0 == fscanf(fp3, "%zu", &num_elems), "Error while reading num_elems from file");
100 |     fprintf(stderr, "num_elems read from file %s is %zu\n", file_num_elems, num_elems);
101 | 
102 |     input_poh* input_result = allocate_input_poh(num_elems);
103 | 
104 |     for (size_t i=0; i<input_result->num_elems; ++i) {
105 |         if( 0 == fscanf(fp, "%hhu", &input_result->hashes[i])) {
106 |             fprintf(stderr, "Error while reading hashes from file %s at index %lu \n", file_hashes, i);
107 |             exit(-2);
108 |         }
109 |     }    
110 | 
111 |     for (size_t i=0; i<input_result->num_elems/SHA256_BLOCK_SIZE; ++i) {  
112 |         if( 0 == fscanf(fp2, "%lu", &input_result->num_hashes_arr[i])) {
113 |             fprintf(stderr, "Error while reading input num_hashes_arr from file %s at index %lu \n", file_hashes_arr, i);
114 |             exit(-2);
115 |         }
116 |     }
117 | 
118 |     fclose(fp);
119 |     fclose(fp2);
120 |     fclose(fp3);
121 | 
122 |     return input_result;
123 | }
124 | 
125 | void generate_input(input_poh* input_result) {
126 |     srand(1); // keep the same seed in cuda and opencl variants 
127 |     for (size_t i = 0 ; i < input_result->num_elems; ++i) {
128 |         input_result->hashes[i] = rand() % 100000;
129 |     }
130 | 
131 |     for (size_t i = 0 ; i < input_result->num_elems/SHA256_BLOCK_SIZE; ++i) {
132 |         input_result->num_hashes_arr[i] = 20000;
133 |     }
134 | }
135 | 
136 | extern "C" {
137 |     extern int poh_verify_many(uint8_t*, const uint64_t*, size_t, uint8_t);
138 |     void poh_verify_many_set_verbose(bool);
139 | }
140 | 
141 | void* work(void *param) {
142 |     input_poh* input_result = (input_poh*)param;
143 |     poh_verify_many(input_result->hashes, input_result->num_hashes_arr, input_result->num_elems/SHA256_BLOCK_SIZE, 0);
144 |     return nullptr;
145 | }
146 | 
147 | int main(int argc, const char* argv[]) {
148 | 
149 |     if (argc == 1 || argc == 2) {
150 |         printf("usage 1: %s [-v] [-save_output] generate <nr_elements> <nr_inputs>\n", argv[0]);
151 |         printf("usage 2: %s [-v] [-save_output] [-check_result] <file_num_hashes> <file_num_hashes_arr> <file_num_elems>\n", argv[0]);
152 |         printf("usage: argc is %i \n", argc);
153 |         return 1;
154 |     }
155 | 
156 |     int arg = 1;
157 |     bool verbose = false;
158 |     bool save_output_file = false;
159 |     bool check_result = false;
160 |     if (0 == strcmp(argv[arg], "-v")) {
161 |         verbose = true;
162 |         arg++;
163 |     }
164 |     if (0 == strcmp(argv[arg], "-save_output")) {
165 |         save_output_file = true;
166 |         arg++;
167 |     }
168 |     if (0 == strcmp(argv[arg], "-check_result")) {
169 |         check_result = true;
170 |         arg++;
171 |     }
172 | 
173 |     poh_verify_many_set_verbose(verbose);
174 | 
175 |     if (0 == strcmp(argv[arg], "generate")) {
176 |         ++arg;
177 |         if ((argc - arg) != 2) {
178 |             printf("usage 1: %s [-v] [-save_output] generate <nr_elements> <nr_inputs>\n", argv[0]);
179 |             printf("usage 2: %s [-v] [-save_output] [-check_result] <file_num_hashes> <file_num_hashes_arr> <file_num_elems>\n", argv[0]);
180 |             printf("usage: argc is %i \n", argc);
181 |             return 1;
182 |         }
183 | 
184 |         if (0 == strcmp(argv[arg], "0")) {
185 |             printf("nr_elements is 0!\n");
186 |             return 1;
187 |         }
188 | 
189 |         size_t num_elems = strtoul(argv[arg], nullptr, 10);
190 |         if (num_elems == 0) {
191 |             printf("nr_elements is not a number %s!\n", argv[arg]);
192 |             exit(-1);
193 |         }
194 |         ++arg;
195 | 
196 |         size_t num_threads = strtoul(argv[arg], nullptr, 10);
197 |         if (num_threads == 0) {
198 |             printf("nr_inputs is not a number %s!\n", argv[arg]);
199 |             exit(-1);
200 |         }
201 | 
202 |         pthread_t *threads;
203 | 
204 |         num_elems = ROUND_UP_DIV(num_elems, SHA256_BLOCK_SIZE) * SHA256_BLOCK_SIZE;
205 |         printf("nr_elements rounded up to %lu \n", num_elems);
206 | 
207 |         threads = (pthread_t*)calloc(num_threads, sizeof(pthread_t));
208 |         if (threads == NULL) {
209 |             fprintf(stderr, "Error while allocating threads\n");
210 |             exit(-1);
211 |         }
212 | 
213 |         input_poh** vinput_result = (input_poh**)calloc(num_threads, sizeof(input_poh));
214 | 
215 |         for (size_t i = 0; i < num_threads; ++i) {
216 |             vinput_result[i] = allocate_input_poh(num_elems);
217 |             generate_input(vinput_result[i]);
218 |         }
219 |         LOG("Created and filled input_poh with %lu elements for %lu threads\n", num_elems, num_threads);
220 |         
221 |         perftime_t start, end;
222 |         get_time(&start);
223 | 
224 |         for (size_t i = 0; i < num_threads; ++i) {
225 |             if (pthread_create (&threads[i], NULL, work, (void*)vinput_result[i]) != 0) {
226 |                 fprintf(stderr, "Error while creating threads %lu\n", i);
227 |                 exit(-1);
228 |             }
229 |         }
230 | 
231 |         for (size_t i = 0; i < num_threads; ++i) {
232 |             if (pthread_join (threads[i], NULL) != 0) {
233 |                 fprintf(stderr, "Error while creating threads %lu\n", i);
234 |                 exit(-1);
235 |             }
236 |         } 
237 | 
238 |         get_time(&end);
239 | 
240 |         double diff = get_diff(&start, &end);
241 |         printf("Total time hashing diff: %f microSeconds or %f seconds \n", diff, diff/1000000);
242 | 
243 | 
244 |         for (size_t i = 0; i < num_threads; ++i) {
245 |             if (save_output_file) {
246 |                 save_out(vinput_result[i]->hashes, vinput_result[i]->num_elems, i);
247 |             }
248 |         }
249 | 
250 |         for (size_t i = 0; i < num_threads; ++i) {
251 |             free_input_poh(&vinput_result[i]);
252 |         }
253 |         free(vinput_result);
254 | 
255 |     }
256 |     else {
257 |         if ((argc - arg) != 3) {
258 |             printf("usage 1: %s [-v] [-save_output] generate <nr_elements>\n", argv[0]);
259 |             printf("usage 2: %s [-v] [-save_output] [-check_result] <file_num_hashes> <file_num_hashes_arr> <file_num_elems> \n", argv[0]);
260 |             printf("usage: argc is %i \n", argc);
261 |             return 1;
262 |         }
263 | 
264 |         input_poh* input_result = get_input(argv[arg], argv[arg+1], argv[arg+2]);
265 |         perftime_t start, end;
266 |         
267 |         get_time(&start);
268 |         work(input_result);
269 |         get_time(&end);
270 |         
271 |         double diff = get_diff(&start, &end);
272 |         printf("Total time hashing diff: %f microSeconds or %f seconds \n", diff, diff/1000000);
273 | 
274 |         if (save_output_file) {
275 |             save_out(input_result->hashes, input_result->num_elems, 0);
276 |         }
277 | 
278 |         if (check_result) {
279 |             FILE * fp;
280 |             fp = fopen(argv[arg+2], "r");
281 | 
282 |             if (fp == NULL) {
283 |                 fprintf(stderr, "Could not open file %s\n", argv[arg+2]);
284 |                 exit(-1);
285 |             }
286 | 
287 |             FILE * fp2;
288 |             const char* file_with_results = "..//poh-verify-test//test_hashes_output_332";
289 |             fp2 = fopen(file_with_results, "r");
290 | 
291 |             if (fp2 == NULL) {
292 |                 fprintf(stderr, "Could not open file %s\n", file_with_results);
293 |                 exit(-1);
294 |             }
295 | 
296 |             size_t num_elems;
297 |             DIE( 0 == fscanf(fp, "%zu", &num_elems), "Error while reading num_elems from file");
298 |             fprintf(stderr, "num_elems read from file %s is %zu\n", argv[arg+2], num_elems);
299 | 
300 |             uint8_t* test_result_hashes = (uint8_t*)calloc(input_result->num_elems, sizeof(uint8_t));
301 |             DIE(test_result_hashes == NULL, "Error while allocating test_result_hashes");
302 | 
303 |             size_t i = 0;
304 |             for (i=0; i < num_elems; ++i) {
305 |                 if( 0 == fscanf(fp2, "%hhu", &test_result_hashes[i])) {
306 |                     fprintf(stderr, "Error while reading hashes from file %s at index %lu \n", file_with_results, i);
307 |                     exit(-2);
308 |                 }
309 |             }
310 | 
311 |             for (i=0; i < num_elems; ++i) {
312 |                 if (test_result_hashes[i] != input_result->hashes[i]) {
313 |                     fprintf(stderr, "Different result detected at index %lu of %lu  actual result: %hhu expected: %hhu \n TEST FAILED", 
314 |                         i, num_elems, input_result->hashes[i], test_result_hashes[i]);
315 |                     break;
316 |                 }
317 |             }
318 | 
319 |             if (num_elems == i) {
320 |                 printf("TEST PASSED num_elems %lu\n", num_elems);
321 |             }
322 |             free(test_result_hashes);
323 |             fclose(fp);
324 |             fclose(fp2);
325 |         }
326 | 
327 | 
328 |         free_input_poh(&input_result);
329 |     }
330 | 
331 |     return 0;
332 | 
333 | }
334 | 


--------------------------------------------------------------------------------
/src/poh-verify-test/test_num_elems_332:
--------------------------------------------------------------------------------
1 | 4128


--------------------------------------------------------------------------------
/src/poh-verify-test/test_num_hashes_arr_332:
--------------------------------------------------------------------------------
1 | 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 16901 2451 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 19353 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | 
 3 | C_Flags := -O2 -fpic -I.
 4 | 
 5 | SGX_C_Flags := -Wno-implicit-function-declaration -std=c11 -m64 -O2 -nostdinc -DSGX_COMPAT -fpie -fstack-protector \
 6 | 	-IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf -I.
 7 | 
 8 | C_Files := $(wildcard *.c)
 9 | 
10 | OUT ?= libs
11 | 
12 | SGX_OBJ := sgxobj
13 | SGX_C_Objects := $(C_Files:%.c=$(SGX_OBJ)/%.o)
14 | 
15 | NONSGX_OBJ := nonsgxobj
16 | NONSGX_C_Objects := $(C_Files:%.c=$(NONSGX_OBJ)/%.o)
17 | 
18 | .PHONY: all run
19 | all: $(OUT)/libed25519.sgx.static.a $(OUT)/libed25519.static.a
20 | run: all
21 | 
22 | $(SGX_OBJ)/%.o: %.c
23 | 	@echo "CC  <=  $<"
24 | 	@mkdir -p $(SGX_OBJ)
25 | 	$(CC) $(SGX_C_Flags) -c $< -o $@
26 | 
27 | $(NONSGX_OBJ)/%.o: %.c
28 | 	@echo "CC  <=  $<"
29 | 	@mkdir -p $(NONSGX_OBJ)
30 | 	$(CC) $(C_Flags) -c $< -o $@
31 | 
32 | $(OUT)/libed25519.sgx.static.a: $(SGX_C_Objects)
33 | 	@mkdir -p $(OUT)
34 | 	ar rcs $@ $^
35 | 
36 | $(OUT)/libed25519.static.a: $(NONSGX_C_Objects)
37 | 	@mkdir -p $(OUT)
38 | 	ar rcs $@ $^
39 | 
40 | clean:
41 | 	@rm -rf $(SGX_OBJ) $(NONSGX_OBJ) $(OUT)
42 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/add_scalar.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "ge.h"
 3 | #include "sc.h"
 4 | #include "sha512.h"
 5 | 
 6 | 
 7 | /* see http://crypto.stackexchange.com/a/6215/4697 */
 8 | void ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar) {
 9 |     const unsigned char SC_1[32] = {1}; /* scalar with value 1 */
10 |     
11 |     unsigned char n[32]; 
12 |     ge_p3 nB;
13 |     ge_p1p1 A_p1p1;
14 |     ge_p3 A;
15 |     ge_p3 public_key_unpacked;
16 |     ge_cached T;
17 | 
18 |     sha512_context hash;
19 |     unsigned char hashbuf[64];
20 | 
21 |     int i;
22 | 
23 |     /* copy the scalar and clear highest bit */
24 |     for (i = 0; i < 31; ++i) {
25 |         n[i] = scalar[i];
26 |     }
27 |     n[31] = scalar[31] & 127;
28 | 
29 |     /* private key: a = n + t */
30 |     if (private_key) {
31 |         sc_muladd(private_key, SC_1, n, private_key);
32 | 
33 |         // https://github.com/orlp/ed25519/issues/3
34 |         sha512_init(&hash);
35 |         sha512_update(&hash, private_key + 32, 32);
36 |         sha512_update(&hash, scalar, 32);
37 |         sha512_final(&hash, hashbuf);
38 |         for (i = 0; i < 32; ++i) {
39 |             private_key[32 + i] = hashbuf[i];
40 |         }
41 |     }
42 | 
43 |     /* public key: A = nB + T */
44 |     if (public_key) {
45 |         /* if we know the private key we don't need a point addition, which is faster */
46 |         /* using a "timing attack" you could find out wether or not we know the private
47 |            key, but this information seems rather useless - if this is important pass
48 |            public_key and private_key seperately in 2 function calls */
49 |         if (private_key) {
50 |             ge_scalarmult_base(&A, private_key);
51 |         } else {
52 |             /* unpack public key into T */
53 |             ge_frombytes_negate_vartime(&public_key_unpacked, public_key);
54 |             fe_neg(public_key_unpacked.X, public_key_unpacked.X); /* undo negate */
55 |             fe_neg(public_key_unpacked.T, public_key_unpacked.T); /* undo negate */
56 |             ge_p3_to_cached(&T, &public_key_unpacked);
57 | 
58 |             /* calculate n*B */
59 |             ge_scalarmult_base(&nB, n);
60 | 
61 |             /* A = n*B + T */
62 |             ge_add(&A_p1p1, &nB, &T);
63 |             ge_p1p1_to_p3(&A, &A_p1p1);
64 |         }
65 |             
66 |         /* pack public key */
67 |         ge_p3_tobytes(public_key, &A);
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | pwd=$PWD
 5 | cd "$(dirname "$0")"
 6 | 
 7 | echo --- Build
 8 | (
 9 |   set -x
10 |   make OUT="$pwd"/libs
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ed25519.h:
--------------------------------------------------------------------------------
 1 | #ifndef ED25519_H
 2 | #define ED25519_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #if defined(_WIN32)
 7 |     #if defined(ED25519_BUILD_DLL)
 8 |         #define ED25519_DECLSPEC __declspec(dllexport)
 9 |     #elif defined(ED25519_DLL)
10 |         #define ED25519_DECLSPEC __declspec(dllimport)
11 |     #else
12 |         #define ED25519_DECLSPEC
13 |     #endif
14 | #else
15 |     #define ED25519_DECLSPEC
16 | #endif
17 | 
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 | 
23 | #ifndef ED25519_NO_SEED
24 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed);
25 | #endif
26 | 
27 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed);
28 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key);
29 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key);
30 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar);
31 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key);
32 | 
33 | 
34 | #ifdef __cplusplus
35 | }
36 | #endif
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/fe.h:
--------------------------------------------------------------------------------
 1 | #ifndef FE_H
 2 | #define FE_H
 3 | 
 4 | #include "fixedint.h"
 5 | 
 6 | 
 7 | /*
 8 |     fe means field element.
 9 |     Here the field is \Z/(2^255-19).
10 |     An element t, entries t[0]...t[9], represents the integer
11 |     t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
12 |     Bounds on each t[i] vary depending on context.
13 | */
14 | 
15 | 
16 | typedef int32_t fe[10];
17 | 
18 | 
19 | void fe_0(fe h);
20 | void fe_1(fe h);
21 | 
22 | void fe_frombytes(fe h, const unsigned char *s);
23 | void fe_tobytes(unsigned char *s, const fe h);
24 | 
25 | void fe_copy(fe h, const fe f);
26 | int fe_isnegative(const fe f);
27 | int fe_isnonzero(const fe f);
28 | void fe_cmov(fe f, const fe g, unsigned int b);
29 | void fe_cswap(fe f, fe g, unsigned int b);
30 | 
31 | void fe_neg(fe h, const fe f);
32 | void fe_add(fe h, const fe f, const fe g);
33 | void fe_invert(fe out, const fe z);
34 | void fe_sq(fe h, const fe f);
35 | void fe_sq2(fe h, const fe f);
36 | void fe_mul(fe h, const fe f, const fe g);
37 | void fe_mul121666(fe h, fe f);
38 | void fe_pow22523(fe out, const fe z);
39 | void fe_sub(fe h, const fe f, const fe g);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/fixedint.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Portable header to provide the 32 and 64 bits type.
 3 | 
 4 |     Not a compatible replacement for <stdint.h>, do not blindly use it as such.
 5 | */
 6 | 
 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED)
 8 |     #include <stdint.h>
 9 |     #define FIXEDINT_H_INCLUDED
10 | 
11 |     #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C)
12 |         #include <limits.h>
13 |         #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
14 |     #endif
15 | #endif
16 | 
17 | 
18 | #ifndef FIXEDINT_H_INCLUDED
19 |     #define FIXEDINT_H_INCLUDED
20 |     
21 |     #include <limits.h>
22 | 
23 |     /* (u)int32_t */
24 |     #ifndef uint32_t
25 |         #if (ULONG_MAX == 0xffffffffUL)
26 |             typedef unsigned long uint32_t;
27 |         #elif (UINT_MAX == 0xffffffffUL)
28 |             typedef unsigned int uint32_t;
29 |         #elif (USHRT_MAX == 0xffffffffUL)
30 |             typedef unsigned short uint32_t;
31 |         #endif
32 |     #endif
33 | 
34 | 
35 |     #ifndef int32_t
36 |         #if (LONG_MAX == 0x7fffffffL)
37 |             typedef signed long int32_t;
38 |         #elif (INT_MAX == 0x7fffffffL)
39 |             typedef signed int int32_t;
40 |         #elif (SHRT_MAX == 0x7fffffffL)
41 |             typedef signed short int32_t;
42 |         #endif
43 |     #endif
44 | 
45 | 
46 |     /* (u)int64_t */
47 |     #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L)
48 |         typedef long long int64_t;
49 |         typedef unsigned long long uint64_t;
50 | 
51 |         #define UINT64_C(v) v ##ULL
52 |         #define INT64_C(v) v ##LL
53 |     #elif defined(__GNUC__)
54 |         __extension__ typedef long long int64_t;
55 |         __extension__ typedef unsigned long long uint64_t;
56 | 
57 |         #define UINT64_C(v) v ##ULL
58 |         #define INT64_C(v) v ##LL
59 |     #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC)
60 |         typedef long long int64_t;
61 |         typedef unsigned long long uint64_t;
62 | 
63 |         #define UINT64_C(v) v ##ULL
64 |         #define INT64_C(v) v ##LL
65 |     #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
66 |         typedef __int64 int64_t;
67 |         typedef unsigned __int64 uint64_t;
68 | 
69 |         #define UINT64_C(v) v ##UI64
70 |         #define INT64_C(v) v ##I64
71 |     #endif
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ge.c:
--------------------------------------------------------------------------------
  1 | #include "ge.h"
  2 | #include "precomp_data.h"
  3 | 
  4 | 
  5 | /*
  6 | r = p + q
  7 | */
  8 | 
  9 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
 10 |     fe t0;
 11 |     fe_add(r->X, p->Y, p->X);
 12 |     fe_sub(r->Y, p->Y, p->X);
 13 |     fe_mul(r->Z, r->X, q->YplusX);
 14 |     fe_mul(r->Y, r->Y, q->YminusX);
 15 |     fe_mul(r->T, q->T2d, p->T);
 16 |     fe_mul(r->X, p->Z, q->Z);
 17 |     fe_add(t0, r->X, r->X);
 18 |     fe_sub(r->X, r->Z, r->Y);
 19 |     fe_add(r->Y, r->Z, r->Y);
 20 |     fe_add(r->Z, t0, r->T);
 21 |     fe_sub(r->T, t0, r->T);
 22 | }
 23 | 
 24 | 
 25 | static void slide(signed char *r, const unsigned char *a) {
 26 |     int i;
 27 |     int b;
 28 |     int k;
 29 | 
 30 |     for (i = 0; i < 256; ++i) {
 31 |         r[i] = 1 & (a[i >> 3] >> (i & 7));
 32 |     }
 33 | 
 34 |     for (i = 0; i < 256; ++i)
 35 |         if (r[i]) {
 36 |             for (b = 1; b <= 6 && i + b < 256; ++b) {
 37 |                 if (r[i + b]) {
 38 |                     if (r[i] + (r[i + b] << b) <= 15) {
 39 |                         r[i] += r[i + b] << b;
 40 |                         r[i + b] = 0;
 41 |                     } else if (r[i] - (r[i + b] << b) >= -15) {
 42 |                         r[i] -= r[i + b] << b;
 43 | 
 44 |                         for (k = i + b; k < 256; ++k) {
 45 |                             if (!r[k]) {
 46 |                                 r[k] = 1;
 47 |                                 break;
 48 |                             }
 49 | 
 50 |                             r[k] = 0;
 51 |                         }
 52 |                     } else {
 53 |                         break;
 54 |                     }
 55 |                 }
 56 |             }
 57 |         }
 58 | }
 59 | 
 60 | /*
 61 | r = a * A + b * B
 62 | where a = a[0]+256*a[1]+...+256^31 a[31].
 63 | and b = b[0]+256*b[1]+...+256^31 b[31].
 64 | B is the Ed25519 base point (x,4/5) with x positive.
 65 | */
 66 | 
 67 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) {
 68 |     signed char aslide[256];
 69 |     signed char bslide[256];
 70 |     ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
 71 |     ge_p1p1 t;
 72 |     ge_p3 u;
 73 |     ge_p3 A2;
 74 |     int i;
 75 |     slide(aslide, a);
 76 |     slide(bslide, b);
 77 |     ge_p3_to_cached(&Ai[0], A);
 78 |     ge_p3_dbl(&t, A);
 79 |     ge_p1p1_to_p3(&A2, &t);
 80 |     ge_add(&t, &A2, &Ai[0]);
 81 |     ge_p1p1_to_p3(&u, &t);
 82 |     ge_p3_to_cached(&Ai[1], &u);
 83 |     ge_add(&t, &A2, &Ai[1]);
 84 |     ge_p1p1_to_p3(&u, &t);
 85 |     ge_p3_to_cached(&Ai[2], &u);
 86 |     ge_add(&t, &A2, &Ai[2]);
 87 |     ge_p1p1_to_p3(&u, &t);
 88 |     ge_p3_to_cached(&Ai[3], &u);
 89 |     ge_add(&t, &A2, &Ai[3]);
 90 |     ge_p1p1_to_p3(&u, &t);
 91 |     ge_p3_to_cached(&Ai[4], &u);
 92 |     ge_add(&t, &A2, &Ai[4]);
 93 |     ge_p1p1_to_p3(&u, &t);
 94 |     ge_p3_to_cached(&Ai[5], &u);
 95 |     ge_add(&t, &A2, &Ai[5]);
 96 |     ge_p1p1_to_p3(&u, &t);
 97 |     ge_p3_to_cached(&Ai[6], &u);
 98 |     ge_add(&t, &A2, &Ai[6]);
 99 |     ge_p1p1_to_p3(&u, &t);
100 |     ge_p3_to_cached(&Ai[7], &u);
101 |     ge_p2_0(r);
102 | 
103 |     for (i = 255; i >= 0; --i) {
104 |         if (aslide[i] || bslide[i]) {
105 |             break;
106 |         }
107 |     }
108 | 
109 |     for (; i >= 0; --i) {
110 |         ge_p2_dbl(&t, r);
111 | 
112 |         if (aslide[i] > 0) {
113 |             ge_p1p1_to_p3(&u, &t);
114 |             ge_add(&t, &u, &Ai[aslide[i] / 2]);
115 |         } else if (aslide[i] < 0) {
116 |             ge_p1p1_to_p3(&u, &t);
117 |             ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
118 |         }
119 | 
120 |         if (bslide[i] > 0) {
121 |             ge_p1p1_to_p3(&u, &t);
122 |             ge_madd(&t, &u, &Bi[bslide[i] / 2]);
123 |         } else if (bslide[i] < 0) {
124 |             ge_p1p1_to_p3(&u, &t);
125 |             ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
126 |         }
127 | 
128 |         ge_p1p1_to_p2(r, &t);
129 |     }
130 | }
131 | 
132 | 
133 | static const fe d = {
134 |     -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
135 | };
136 | 
137 | static const fe sqrtm1 = {
138 |     -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
139 | };
140 | 
141 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) {
142 |     fe u;
143 |     fe v;
144 |     fe v3;
145 |     fe vxx;
146 |     fe check;
147 |     fe_frombytes(h->Y, s);
148 |     fe_1(h->Z);
149 |     fe_sq(u, h->Y);
150 |     fe_mul(v, u, d);
151 |     fe_sub(u, u, h->Z);     /* u = y^2-1 */
152 |     fe_add(v, v, h->Z);     /* v = dy^2+1 */
153 |     fe_sq(v3, v);
154 |     fe_mul(v3, v3, v);      /* v3 = v^3 */
155 |     fe_sq(h->X, v3);
156 |     fe_mul(h->X, h->X, v);
157 |     fe_mul(h->X, h->X, u);  /* x = uv^7 */
158 |     fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
159 |     fe_mul(h->X, h->X, v3);
160 |     fe_mul(h->X, h->X, u);  /* x = uv^3(uv^7)^((q-5)/8) */
161 |     fe_sq(vxx, h->X);
162 |     fe_mul(vxx, vxx, v);
163 |     fe_sub(check, vxx, u);  /* vx^2-u */
164 | 
165 |     if (fe_isnonzero(check)) {
166 |         fe_add(check, vxx, u); /* vx^2+u */
167 | 
168 |         if (fe_isnonzero(check)) {
169 |             return -1;
170 |         }
171 | 
172 |         fe_mul(h->X, h->X, sqrtm1);
173 |     }
174 | 
175 |     if (fe_isnegative(h->X) == (s[31] >> 7)) {
176 |         fe_neg(h->X, h->X);
177 |     }
178 | 
179 |     fe_mul(h->T, h->X, h->Y);
180 |     return 0;
181 | }
182 | 
183 | 
184 | /*
185 | r = p + q
186 | */
187 | 
188 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
189 |     fe t0;
190 |     fe_add(r->X, p->Y, p->X);
191 |     fe_sub(r->Y, p->Y, p->X);
192 |     fe_mul(r->Z, r->X, q->yplusx);
193 |     fe_mul(r->Y, r->Y, q->yminusx);
194 |     fe_mul(r->T, q->xy2d, p->T);
195 |     fe_add(t0, p->Z, p->Z);
196 |     fe_sub(r->X, r->Z, r->Y);
197 |     fe_add(r->Y, r->Z, r->Y);
198 |     fe_add(r->Z, t0, r->T);
199 |     fe_sub(r->T, t0, r->T);
200 | }
201 | 
202 | 
203 | /*
204 | r = p - q
205 | */
206 | 
207 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
208 |     fe t0;
209 | 
210 |     fe_add(r->X, p->Y, p->X);
211 |     fe_sub(r->Y, p->Y, p->X);
212 |     fe_mul(r->Z, r->X, q->yminusx);
213 |     fe_mul(r->Y, r->Y, q->yplusx);
214 |     fe_mul(r->T, q->xy2d, p->T);
215 |     fe_add(t0, p->Z, p->Z);
216 |     fe_sub(r->X, r->Z, r->Y);
217 |     fe_add(r->Y, r->Z, r->Y);
218 |     fe_sub(r->Z, t0, r->T);
219 |     fe_add(r->T, t0, r->T);
220 | }
221 | 
222 | 
223 | /*
224 | r = p
225 | */
226 | 
227 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
228 |     fe_mul(r->X, p->X, p->T);
229 |     fe_mul(r->Y, p->Y, p->Z);
230 |     fe_mul(r->Z, p->Z, p->T);
231 | }
232 | 
233 | 
234 | 
235 | /*
236 | r = p
237 | */
238 | 
239 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
240 |     fe_mul(r->X, p->X, p->T);
241 |     fe_mul(r->Y, p->Y, p->Z);
242 |     fe_mul(r->Z, p->Z, p->T);
243 |     fe_mul(r->T, p->X, p->Y);
244 | }
245 | 
246 | 
247 | void ge_p2_0(ge_p2 *h) {
248 |     fe_0(h->X);
249 |     fe_1(h->Y);
250 |     fe_1(h->Z);
251 | }
252 | 
253 | 
254 | 
255 | /*
256 | r = 2 * p
257 | */
258 | 
259 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
260 |     fe t0;
261 | 
262 |     fe_sq(r->X, p->X);
263 |     fe_sq(r->Z, p->Y);
264 |     fe_sq2(r->T, p->Z);
265 |     fe_add(r->Y, p->X, p->Y);
266 |     fe_sq(t0, r->Y);
267 |     fe_add(r->Y, r->Z, r->X);
268 |     fe_sub(r->Z, r->Z, r->X);
269 |     fe_sub(r->X, t0, r->Y);
270 |     fe_sub(r->T, r->T, r->Z);
271 | }
272 | 
273 | 
274 | void ge_p3_0(ge_p3 *h) {
275 |     fe_0(h->X);
276 |     fe_1(h->Y);
277 |     fe_1(h->Z);
278 |     fe_0(h->T);
279 | }
280 | 
281 | 
282 | /*
283 | r = 2 * p
284 | */
285 | 
286 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
287 |     ge_p2 q;
288 |     ge_p3_to_p2(&q, p);
289 |     ge_p2_dbl(r, &q);
290 | }
291 | 
292 | 
293 | 
294 | /*
295 | r = p
296 | */
297 | 
298 | static const fe d2 = {
299 |     -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199
300 | };
301 | 
302 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
303 |     fe_add(r->YplusX, p->Y, p->X);
304 |     fe_sub(r->YminusX, p->Y, p->X);
305 |     fe_copy(r->Z, p->Z);
306 |     fe_mul(r->T2d, p->T, d2);
307 | }
308 | 
309 | 
310 | /*
311 | r = p
312 | */
313 | 
314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
315 |     fe_copy(r->X, p->X);
316 |     fe_copy(r->Y, p->Y);
317 |     fe_copy(r->Z, p->Z);
318 | }
319 | 
320 | 
321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) {
322 |     fe recip;
323 |     fe x;
324 |     fe y;
325 |     fe_invert(recip, h->Z);
326 |     fe_mul(x, h->X, recip);
327 |     fe_mul(y, h->Y, recip);
328 |     fe_tobytes(s, y);
329 |     s[31] ^= fe_isnegative(x) << 7;
330 | }
331 | 
332 | 
333 | static unsigned char equal(signed char b, signed char c) {
334 |     unsigned char ub = b;
335 |     unsigned char uc = c;
336 |     unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
337 |     uint64_t y = x; /* 0: yes; 1..255: no */
338 |     y -= 1; /* large: yes; 0..254: no */
339 |     y >>= 63; /* 1: yes; 0: no */
340 |     return (unsigned char) y;
341 | }
342 | 
343 | static unsigned char negative(signed char b) {
344 |     uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
345 |     x >>= 63; /* 1: yes; 0: no */
346 |     return (unsigned char) x;
347 | }
348 | 
349 | static void cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) {
350 |     fe_cmov(t->yplusx, u->yplusx, b);
351 |     fe_cmov(t->yminusx, u->yminusx, b);
352 |     fe_cmov(t->xy2d, u->xy2d, b);
353 | }
354 | 
355 | 
356 | static void select(ge_precomp *t, int pos, signed char b) {
357 |     ge_precomp minust;
358 |     unsigned char bnegative = negative(b);
359 |     unsigned char babs = b - (((-bnegative) & b) << 1);
360 |     fe_1(t->yplusx);
361 |     fe_1(t->yminusx);
362 |     fe_0(t->xy2d);
363 |     cmov(t, &base[pos][0], equal(babs, 1));
364 |     cmov(t, &base[pos][1], equal(babs, 2));
365 |     cmov(t, &base[pos][2], equal(babs, 3));
366 |     cmov(t, &base[pos][3], equal(babs, 4));
367 |     cmov(t, &base[pos][4], equal(babs, 5));
368 |     cmov(t, &base[pos][5], equal(babs, 6));
369 |     cmov(t, &base[pos][6], equal(babs, 7));
370 |     cmov(t, &base[pos][7], equal(babs, 8));
371 |     fe_copy(minust.yplusx, t->yminusx);
372 |     fe_copy(minust.yminusx, t->yplusx);
373 |     fe_neg(minust.xy2d, t->xy2d);
374 |     cmov(t, &minust, bnegative);
375 | }
376 | 
377 | /*
378 | h = a * B
379 | where a = a[0]+256*a[1]+...+256^31 a[31]
380 | B is the Ed25519 base point (x,4/5) with x positive.
381 | 
382 | Preconditions:
383 |   a[31] <= 127
384 | */
385 | 
386 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
387 |     signed char e[64];
388 |     signed char carry;
389 |     ge_p1p1 r;
390 |     ge_p2 s;
391 |     ge_precomp t;
392 |     int i;
393 | 
394 |     for (i = 0; i < 32; ++i) {
395 |         e[2 * i + 0] = (a[i] >> 0) & 15;
396 |         e[2 * i + 1] = (a[i] >> 4) & 15;
397 |     }
398 | 
399 |     /* each e[i] is between 0 and 15 */
400 |     /* e[63] is between 0 and 7 */
401 |     carry = 0;
402 | 
403 |     for (i = 0; i < 63; ++i) {
404 |         e[i] += carry;
405 |         carry = e[i] + 8;
406 |         carry >>= 4;
407 |         e[i] -= carry << 4;
408 |     }
409 | 
410 |     e[63] += carry;
411 |     /* each e[i] is between -8 and 8 */
412 |     ge_p3_0(h);
413 | 
414 |     for (i = 1; i < 64; i += 2) {
415 |         select(&t, i / 2, e[i]);
416 |         ge_madd(&r, h, &t);
417 |         ge_p1p1_to_p3(h, &r);
418 |     }
419 | 
420 |     ge_p3_dbl(&r, h);
421 |     ge_p1p1_to_p2(&s, &r);
422 |     ge_p2_dbl(&r, &s);
423 |     ge_p1p1_to_p2(&s, &r);
424 |     ge_p2_dbl(&r, &s);
425 |     ge_p1p1_to_p2(&s, &r);
426 |     ge_p2_dbl(&r, &s);
427 |     ge_p1p1_to_p3(h, &r);
428 | 
429 |     for (i = 0; i < 64; i += 2) {
430 |         select(&t, i / 2, e[i]);
431 |         ge_madd(&r, h, &t);
432 |         ge_p1p1_to_p3(h, &r);
433 |     }
434 | }
435 | 
436 | 
437 | /*
438 | r = p - q
439 | */
440 | 
441 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
442 |     fe t0;
443 |     
444 |     fe_add(r->X, p->Y, p->X);
445 |     fe_sub(r->Y, p->Y, p->X);
446 |     fe_mul(r->Z, r->X, q->YminusX);
447 |     fe_mul(r->Y, r->Y, q->YplusX);
448 |     fe_mul(r->T, q->T2d, p->T);
449 |     fe_mul(r->X, p->Z, q->Z);
450 |     fe_add(t0, r->X, r->X);
451 |     fe_sub(r->X, r->Z, r->Y);
452 |     fe_add(r->Y, r->Z, r->Y);
453 |     fe_sub(r->Z, t0, r->T);
454 |     fe_add(r->T, t0, r->T);
455 | }
456 | 
457 | 
458 | void ge_tobytes(unsigned char *s, const ge_p2 *h) {
459 |     fe recip;
460 |     fe x;
461 |     fe y;
462 |     fe_invert(recip, h->Z);
463 |     fe_mul(x, h->X, recip);
464 |     fe_mul(y, h->Y, recip);
465 |     fe_tobytes(s, y);
466 |     s[31] ^= fe_isnegative(x) << 7;
467 | }
468 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ge.h:
--------------------------------------------------------------------------------
 1 | #ifndef GE_H
 2 | #define GE_H
 3 | 
 4 | #include "fe.h"
 5 | 
 6 | 
 7 | /*
 8 | ge means group element.
 9 | 
10 | Here the group is the set of pairs (x,y) of field elements (see fe.h)
11 | satisfying -x^2 + y^2 = 1 + d x^2y^2
12 | where d = -121665/121666.
13 | 
14 | Representations:
15 |   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
16 |   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
17 |   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
18 |   ge_precomp (Duif): (y+x,y-x,2dxy)
19 | */
20 | 
21 | typedef struct {
22 |   fe X;
23 |   fe Y;
24 |   fe Z;
25 | } ge_p2;
26 | 
27 | typedef struct {
28 |   fe X;
29 |   fe Y;
30 |   fe Z;
31 |   fe T;
32 | } ge_p3;
33 | 
34 | typedef struct {
35 |   fe X;
36 |   fe Y;
37 |   fe Z;
38 |   fe T;
39 | } ge_p1p1;
40 | 
41 | typedef struct {
42 |   fe yplusx;
43 |   fe yminusx;
44 |   fe xy2d;
45 | } ge_precomp;
46 | 
47 | typedef struct {
48 |   fe YplusX;
49 |   fe YminusX;
50 |   fe Z;
51 |   fe T2d;
52 | } ge_cached;
53 | 
54 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
55 | void ge_tobytes(unsigned char *s, const ge_p2 *h);
56 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s);
57 | 
58 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
59 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
60 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b);
61 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
62 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
63 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
64 | 
65 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
66 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
67 | void ge_p2_0(ge_p2 *h);
68 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
69 | void ge_p3_0(ge_p3 *h);
70 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
71 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
72 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/key_exchange.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "fe.h"
 3 | 
 4 | void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) {
 5 |     unsigned char e[32];
 6 |     unsigned int i;
 7 |     
 8 |     fe x1;
 9 |     fe x2;
10 |     fe z2;
11 |     fe x3;
12 |     fe z3;
13 |     fe tmp0;
14 |     fe tmp1;
15 | 
16 |     int pos;
17 |     unsigned int swap;
18 |     unsigned int b;
19 | 
20 |     /* copy the private key and make sure it's valid */
21 |     for (i = 0; i < 32; ++i) {
22 |         e[i] = private_key[i];
23 |     }
24 | 
25 |     e[0] &= 248;
26 |     e[31] &= 63;
27 |     e[31] |= 64;
28 | 
29 |     /* unpack the public key and convert edwards to montgomery */
30 |     /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */
31 |     fe_frombytes(x1, public_key);
32 |     fe_1(tmp1);
33 |     fe_add(tmp0, x1, tmp1);
34 |     fe_sub(tmp1, tmp1, x1);
35 |     fe_invert(tmp1, tmp1);
36 |     fe_mul(x1, tmp0, tmp1);
37 | 
38 |     fe_1(x2);
39 |     fe_0(z2);
40 |     fe_copy(x3, x1);
41 |     fe_1(z3);
42 | 
43 |     swap = 0;
44 |     for (pos = 254; pos >= 0; --pos) {
45 |         b = e[pos / 8] >> (pos & 7);
46 |         b &= 1;
47 |         swap ^= b;
48 |         fe_cswap(x2, x3, swap);
49 |         fe_cswap(z2, z3, swap);
50 |         swap = b;
51 | 
52 |         /* from montgomery.h */
53 |         fe_sub(tmp0, x3, z3);
54 |         fe_sub(tmp1, x2, z2);
55 |         fe_add(x2, x2, z2);
56 |         fe_add(z2, x3, z3);
57 |         fe_mul(z3, tmp0, x2);
58 |         fe_mul(z2, z2, tmp1);
59 |         fe_sq(tmp0, tmp1);
60 |         fe_sq(tmp1, x2);
61 |         fe_add(x3, z3, z2);
62 |         fe_sub(z2, z3, z2);
63 |         fe_mul(x2, tmp1, tmp0);
64 |         fe_sub(tmp1, tmp1, tmp0);
65 |         fe_sq(z2, z2);
66 |         fe_mul121666(z3, tmp1);
67 |         fe_sq(x3, x3);
68 |         fe_add(tmp0, tmp0, z3);
69 |         fe_mul(z3, x1, z2);
70 |         fe_mul(z2, tmp1, tmp0);
71 |     }
72 | 
73 |     fe_cswap(x2, x3, swap);
74 |     fe_cswap(z2, z3, swap);
75 | 
76 |     fe_invert(z2, z2);
77 |     fe_mul(x2, x2, z2);
78 |     fe_tobytes(shared_secret, x2);
79 | }
80 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/keypair.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | 
 5 | 
 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) {
 7 |     ge_p3 A;
 8 | 
 9 |     sha512(seed, 32, private_key);
10 |     private_key[0] &= 248;
11 |     private_key[31] &= 63;
12 |     private_key[31] |= 64;
13 | 
14 |     ge_scalarmult_base(&A, private_key);
15 |     ge_p3_tobytes(public_key, &A);
16 | }
17 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sc.h:
--------------------------------------------------------------------------------
 1 | #ifndef SC_H
 2 | #define SC_H
 3 | 
 4 | /*
 5 | The set of scalars is \Z/l
 6 | where l = 2^252 + 27742317777372353535851937790883648493.
 7 | */
 8 | 
 9 | void sc_reduce(unsigned char *s);
10 | void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/seed.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | 
 3 | #ifndef ED25519_NO_SEED
 4 | 
 5 | #ifdef _WIN32
 6 | #include <windows.h>
 7 | #include <wincrypt.h>
 8 | #else
 9 | #include <stdio.h>
10 | #endif
11 | 
12 | int ed25519_create_seed(unsigned char *seed) {
13 | #ifndef SGX_COMPAT
14 | #ifdef _WIN32
15 |     HCRYPTPROV prov;
16 | 
17 |     if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))  {
18 |         return 1;
19 |     }
20 | 
21 |     if (!CryptGenRandom(prov, 32, seed))  {
22 |         CryptReleaseContext(prov, 0);
23 |         return 1;
24 |     }
25 | 
26 |     CryptReleaseContext(prov, 0);
27 | #else
28 |     FILE *f = fopen("/dev/urandom", "rb");
29 | 
30 |     if (f == NULL) {
31 |         return 1;
32 |     }
33 | 
34 |     size_t ret = fread(seed, 1, 32, f);
35 |     if (ret != 32) {
36 |         fprintf(stderr, "Seed read error");
37 |         return 1;
38 |     }
39 |     fclose(f);
40 | #endif
41 | #endif
42 | 
43 |     return 0;
44 | }
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sha512.c:
--------------------------------------------------------------------------------
  1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis
  2 |  *
  3 |  * LibTomCrypt is a library that provides various cryptographic
  4 |  * algorithms in a highly modular and flexible manner.
  5 |  *
  6 |  * The library is free for all purposes without any express
  7 |  * guarantee it works.
  8 |  *
  9 |  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
 10 |  */
 11 | 
 12 | #include "fixedint.h"
 13 | #include "sha512.h"
 14 | 
 15 | /* the K array */
 16 | static const uint64_t K[80] = {
 17 |     UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 
 18 |     UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc),
 19 |     UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 
 20 |     UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118),
 21 |     UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 
 22 |     UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2),
 23 |     UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 
 24 |     UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694),
 25 |     UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 
 26 |     UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65),
 27 |     UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 
 28 |     UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5),
 29 |     UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 
 30 |     UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4),
 31 |     UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 
 32 |     UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70),
 33 |     UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 
 34 |     UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df),
 35 |     UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 
 36 |     UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b),
 37 |     UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001),
 38 |     UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30),
 39 |     UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 
 40 |     UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8),
 41 |     UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 
 42 |     UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8),
 43 |     UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 
 44 |     UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3),
 45 |     UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 
 46 |     UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec),
 47 |     UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 
 48 |     UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b),
 49 |     UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 
 50 |     UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178),
 51 |     UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 
 52 |     UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b),
 53 |     UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 
 54 |     UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c),
 55 |     UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 
 56 |     UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817)
 57 | };
 58 | 
 59 | /* Various logical functions */
 60 | 
 61 | #define ROR64c(x, y) \
 62 |     ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \
 63 |       ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF))
 64 | 
 65 | #define STORE64H(x, y)                                                                     \
 66 |    { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255);     \
 67 |      (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255);     \
 68 |      (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255);     \
 69 |      (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 70 | 
 71 | #define LOAD64H(x, y)                                                      \
 72 |    { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \
 73 |          (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \
 74 |          (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \
 75 |          (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); }
 76 | 
 77 | 
 78 | #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 79 | #define Maj(x,y,z)      (((x | y) & z) | (x & y)) 
 80 | #define S(x, n)         ROR64c(x, n)
 81 | #define R(x, n)         (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n))
 82 | #define Sigma0(x)       (S(x, 28) ^ S(x, 34) ^ S(x, 39))
 83 | #define Sigma1(x)       (S(x, 14) ^ S(x, 18) ^ S(x, 41))
 84 | #define Gamma0(x)       (S(x, 1) ^ S(x, 8) ^ R(x, 7))
 85 | #define Gamma1(x)       (S(x, 19) ^ S(x, 61) ^ R(x, 6))
 86 | #ifndef MIN
 87 |    #define MIN(x, y) ( ((x)<(y))?(x):(y) )
 88 | #endif
 89 | 
 90 | /* compress 1024-bits */
 91 | static int sha512_compress(sha512_context *md, unsigned char *buf)
 92 | {
 93 |     uint64_t S[8], W[80], t0, t1;
 94 |     int i;
 95 | 
 96 |     /* copy state into S */
 97 |     for (i = 0; i < 8; i++) {
 98 |         S[i] = md->state[i];
 99 |     }
100 | 
101 |     /* copy the state into 1024-bits into W[0..15] */
102 |     for (i = 0; i < 16; i++) {
103 |         LOAD64H(W[i], buf + (8*i));
104 |     }
105 | 
106 |     /* fill W[16..79] */
107 |     for (i = 16; i < 80; i++) {
108 |         W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
109 |     }        
110 | 
111 | /* Compress */
112 |     #define RND(a,b,c,d,e,f,g,h,i) \
113 |     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
114 |     t1 = Sigma0(a) + Maj(a, b, c);\
115 |     d += t0; \
116 |     h  = t0 + t1;
117 | 
118 |     for (i = 0; i < 80; i += 8) {
119 |        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
120 |        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
121 |        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
122 |        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
123 |        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
124 |        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
125 |        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
126 |        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
127 |    }
128 | 
129 |    #undef RND
130 | 
131 | 
132 | 
133 |     /* feedback */
134 |    for (i = 0; i < 8; i++) {
135 |         md->state[i] = md->state[i] + S[i];
136 |     }
137 | 
138 |     return 0;
139 | }
140 | 
141 | 
142 | /**
143 |    Initialize the hash state
144 |    @param md   The hash state you wish to initialize
145 |    @return 0 if successful
146 | */
147 | int sha512_init(sha512_context * md) {
148 |     if (md == NULL) return 1;
149 | 
150 |     md->curlen = 0;
151 |     md->length = 0;
152 |     md->state[0] = UINT64_C(0x6a09e667f3bcc908);
153 |     md->state[1] = UINT64_C(0xbb67ae8584caa73b);
154 |     md->state[2] = UINT64_C(0x3c6ef372fe94f82b);
155 |     md->state[3] = UINT64_C(0xa54ff53a5f1d36f1);
156 |     md->state[4] = UINT64_C(0x510e527fade682d1);
157 |     md->state[5] = UINT64_C(0x9b05688c2b3e6c1f);
158 |     md->state[6] = UINT64_C(0x1f83d9abfb41bd6b);
159 |     md->state[7] = UINT64_C(0x5be0cd19137e2179);
160 | 
161 |     return 0;
162 | }
163 | 
164 | /**
165 |    Process a block of memory though the hash
166 |    @param md     The hash state
167 |    @param in     The data to hash
168 |    @param inlen  The length of the data (octets)
169 |    @return 0 if successful
170 | */
171 | int sha512_update (sha512_context * md, const unsigned char *in, size_t inlen)               
172 | {                                                                                           
173 |     size_t n;
174 |     size_t i;                                                                        
175 |     int           err;     
176 |     if (md == NULL) return 1;  
177 |     if (in == NULL) return 1;                                                              
178 |     if (md->curlen > sizeof(md->buf)) {                             
179 |        return 1;                                                            
180 |     }                                                                                       
181 |     while (inlen > 0) {                                                                     
182 |         if (md->curlen == 0 && inlen >= 128) {                           
183 |            if ((err = sha512_compress (md, (unsigned char *)in)) != 0) {               
184 |               return err;                                                                   
185 |            }                                                                                
186 |            md->length += 128 * 8;                                        
187 |            in             += 128;                                                    
188 |            inlen          -= 128;                                                    
189 |         } else {                                                                            
190 |            n = MIN(inlen, (128 - md->curlen));
191 | 
192 |            for (i = 0; i < n; i++) {
193 |             md->buf[i + md->curlen] = in[i];
194 |            }
195 | 
196 | 
197 |            md->curlen += n;                                                     
198 |            in             += n;                                                             
199 |            inlen          -= n;                                                             
200 |            if (md->curlen == 128) {                                      
201 |               if ((err = sha512_compress (md, md->buf)) != 0) {            
202 |                  return err;                                                                
203 |               }                                                                             
204 |               md->length += 8*128;                                       
205 |               md->curlen = 0;                                                   
206 |            }                                                                                
207 |        }                                                                                    
208 |     }                                                                                       
209 |     return 0;                                                                        
210 | }
211 | 
212 | /**
213 |    Terminate the hash to get the digest
214 |    @param md  The hash state
215 |    @param out [out] The destination of the hash (64 bytes)
216 |    @return 0 if successful
217 | */
218 |    int sha512_final(sha512_context * md, unsigned char *out)
219 |    {
220 |     int i;
221 | 
222 |     if (md == NULL) return 1;
223 |     if (out == NULL) return 1;
224 | 
225 |     if (md->curlen >= sizeof(md->buf)) {
226 |      return 1;
227 |  }
228 | 
229 |     /* increase the length of the message */
230 |  md->length += md->curlen * UINT64_C(8);
231 | 
232 |     /* append the '1' bit */
233 |  md->buf[md->curlen++] = (unsigned char)0x80;
234 | 
235 |     /* if the length is currently above 112 bytes we append zeros
236 |      * then compress.  Then we can fall back to padding zeros and length
237 |      * encoding like normal.
238 |      */
239 |      if (md->curlen > 112) {
240 |         while (md->curlen < 128) {
241 |             md->buf[md->curlen++] = (unsigned char)0;
242 |         }
243 |         sha512_compress(md, md->buf);
244 |         md->curlen = 0;
245 |     }
246 | 
247 |     /* pad upto 120 bytes of zeroes 
248 |      * note: that from 112 to 120 is the 64 MSB of the length.  We assume that you won't hash
249 |      * > 2^64 bits of data... :-)
250 |      */
251 | while (md->curlen < 120) {
252 |     md->buf[md->curlen++] = (unsigned char)0;
253 | }
254 | 
255 |     /* store length */
256 | STORE64H(md->length, md->buf+120);
257 | sha512_compress(md, md->buf);
258 | 
259 |     /* copy output */
260 | for (i = 0; i < 8; i++) {
261 |     STORE64H(md->state[i], out+(8*i));
262 | }
263 | 
264 | return 0;
265 | }
266 | 
267 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out)
268 | {
269 |     sha512_context ctx;
270 |     int ret;
271 |     if ((ret = sha512_init(&ctx))) return ret;
272 |     if ((ret = sha512_update(&ctx, message, message_len))) return ret;
273 |     if ((ret = sha512_final(&ctx, out))) return ret;
274 |     return 0;
275 | }
276 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sha512.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHA512_H
 2 | #define SHA512_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #include "fixedint.h"
 7 | 
 8 | /* state */
 9 | typedef struct sha512_context_ {
10 |     uint64_t  length, state[8];
11 |     size_t curlen;
12 |     unsigned char buf[128];
13 | } sha512_context;
14 | 
15 | 
16 | int sha512_init(sha512_context * md);
17 | int sha512_final(sha512_context * md, unsigned char *out);
18 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen);
19 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sign.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | #include "sc.h"
 5 | 
 6 | 
 7 | void ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key) {
 8 |     sha512_context hash;
 9 |     unsigned char hram[64];
10 |     unsigned char r[64];
11 |     ge_p3 R;
12 | 
13 | 
14 |     sha512_init(&hash);
15 |     sha512_update(&hash, private_key + 32, 32);
16 |     sha512_update(&hash, message, message_len);
17 |     sha512_final(&hash, r);
18 | 
19 |     sc_reduce(r);
20 |     ge_scalarmult_base(&R, r);
21 |     ge_p3_tobytes(signature, &R);
22 | 
23 |     sha512_init(&hash);
24 |     sha512_update(&hash, signature, 32);
25 |     sha512_update(&hash, public_key, 32);
26 |     sha512_update(&hash, message, message_len);
27 |     sha512_final(&hash, hram);
28 | 
29 |     sc_reduce(hram);
30 |     sc_muladd(signature + 32, hram, private_key, r);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/verify.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | #include "sc.h"
 5 | 
 6 | static int consttime_equal(const unsigned char *x, const unsigned char *y) {
 7 |     unsigned char r = 0;
 8 | 
 9 |     r = x[0] ^ y[0];
10 |     #define F(i) r |= x[i] ^ y[i]
11 |     F(1);
12 |     F(2);
13 |     F(3);
14 |     F(4);
15 |     F(5);
16 |     F(6);
17 |     F(7);
18 |     F(8);
19 |     F(9);
20 |     F(10);
21 |     F(11);
22 |     F(12);
23 |     F(13);
24 |     F(14);
25 |     F(15);
26 |     F(16);
27 |     F(17);
28 |     F(18);
29 |     F(19);
30 |     F(20);
31 |     F(21);
32 |     F(22);
33 |     F(23);
34 |     F(24);
35 |     F(25);
36 |     F(26);
37 |     F(27);
38 |     F(28);
39 |     F(29);
40 |     F(30);
41 |     F(31);
42 |     #undef F
43 | 
44 |     return !r;
45 | }
46 | 
47 | int ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key) {
48 |     unsigned char h[64];
49 |     unsigned char checker[32];
50 |     sha512_context hash;
51 |     ge_p3 A;
52 |     ge_p2 R;
53 | 
54 |     if (signature[63] & 224) {
55 |         return 0;
56 |     }
57 | 
58 |     if (ge_frombytes_negate_vartime(&A, public_key) != 0) {
59 |         return 0;
60 |     }
61 | 
62 |     sha512_init(&hash);
63 |     sha512_update(&hash, signature, 32);
64 |     sha512_update(&hash, public_key, 32);
65 |     sha512_update(&hash, message, message_len);
66 |     sha512_final(&hash, h);
67 |     
68 |     sc_reduce(h);
69 |     ge_double_scalarmult_vartime(&R, h, &A, signature + 32);
70 |     ge_tobytes(checker, &R);
71 | 
72 |     if (!consttime_equal(checker, signature)) {
73 |         return 0;
74 |     }
75 | 
76 |     return 1;
77 | }
78 | 


--------------------------------------------------------------------------------
/src/sgx/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | pwd=$PWD
 5 | 
 6 | echo --- Build
 7 | cd "$(dirname "$0")/signing"
 8 | (
 9 |   set -x
10 |   mkdir -p "$pwd"/temp
11 |   openssl genrsa -out "$pwd"/temp/priv_key.pem -3 3072
12 |   openssl rsa -in "$pwd"/temp/priv_key.pem -pubout -out "$pwd"/temp/pub_key.pem
13 |   make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist PRIV_KEY="$pwd"/temp/priv_key.pem PUB_KEY="$pwd"/temp/pub_key.pem
14 | )
15 | 
16 | echo --- Build Enclave Test
17 | cd "../test"
18 | (
19 |   set -x
20 |   make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist
21 | )
22 | 


--------------------------------------------------------------------------------
/src/sgx/signing/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | SIGN_ENCLAVE ?= 1
 3 | PRIV_KEY ?= ../keys/private_key.pem
 4 | PUB_KEY ?= ../keys/public_key.pem
 5 | LIBS_PATH ?= ../../../libs
 6 | OUT ?= ../../../dist
 7 | 
 8 | SGX_COMMON_CFLAGS := -m64 -O2
 9 | SGX_LIBRARY_PATH := $(SGX_SDK)/lib64
10 | SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign
11 | SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r
12 | 
13 | Trusted_C_Flags := -Wno-implicit-function-declaration -std=c11 $(SGX_COMMON_CFLAGS) -nostdinc  -fpie -fstack-protector \
14 | 	-IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf \
15 | 	-I. -I../../sgx-ecc-ed25519 -fvisibility=hidden
16 | 
17 | Untrusted_C_Flags := -fPIC -O0 -g -Wno-attributes -I$(SGX_SDK)/include -I. -I../../sgx-ecc-ed25519 
18 | Test_C_Flags := $(Untrusted_C_Flags)
19 | 
20 | Link_Flags := $(SGX_COMMON_CFLAGS) -Wl,--no-undefined -nostdlib -nodefaultlibs -nostartfiles -L$(SGX_LIBRARY_PATH) \
21 | 	-Wl,--whole-archive -lsgx_trts -Wl,--no-whole-archive \
22 | 	-L$(LIBS_PATH) -led25519.sgx.static \
23 | 	-Wl,--start-group -lsgx_tstdc -lsgx_tcxx -lsgx_tkey_exchange -lsgx_tcrypto -lsgx_tservice -Wl,--end-group \
24 | 	-Wl,-Bstatic -Wl,-Bsymbolic -Wl,--no-undefined \
25 | 	-Wl,-pie,-eenclave_entry -Wl,--export-dynamic  \
26 | 	-Wl,--defsym,__ImageBase=0 \
27 | 	-Wl,--version-script=./signing.lds
28 | 
29 | Trusted_C_Files := $(filter $(wildcard *_trusted.c), $(wildcard *.c))
30 | Trusted_C_Files += signing_t.c
31 | Trusted_C_Objects := $(Trusted_C_Files:.c=.o)
32 | 
33 | Untrusted_C_Files := $(filter $(wildcard *_untrusted.c), $(wildcard *.c))
34 | Untrusted_C_Files += signing_u.c
35 | Untrusted_C_Objects := $(Untrusted_C_Files:.c=.o)
36 | 
37 | .PHONY: all run
38 | 
39 | ifneq ($(SIGN_ENCLAVE), 0)
40 | all: signing_u.c signing_t.c signing.signed.so libsigning.so
41 | else
42 | all: signing_u.c signing_t.c signing.so libsigning.so
43 | 	@echo "Build enclave signing.so success!"
44 | 	@echo
45 | 	@echo "**********************************************************************************************"
46 | 	@echo "PLEASE NOTE: In this mode, please sign the enclave first using Two Step Sign mechanism, before"
47 | 	@echo "you run the app to launch and access the enclave."
48 | 	@echo "**********************************************************************************************"
49 | 	@echo
50 | endif
51 | 
52 | run: all
53 | 
54 | signing_t.c: $(SGX_EDGER8R) signing.edl
55 | 	@echo "GEN  =>  $@"
56 | 	@$(SGX_EDGER8R) --trusted signing.edl --search-path $(SGX_SDK)/include
57 | 
58 | signing_u.c: $(SGX_EDGER8R) signing.edl
59 | 	@echo "GEN  =>  $@"
60 | 	@$(SGX_EDGER8R) --untrusted signing.edl --search-path $(SGX_SDK)/include
61 | 
62 | $(Trusted_C_Objects): %.o: %.c
63 | 	@echo "CC  <=  $<"
64 | 	$(CC) $(Trusted_C_Flags) -c $< -o $@
65 | 
66 | $(Untrusted_C_Objects): %.o: %.c
67 | 	@echo "CC  <=  $<"
68 | 	$(CC) $(Untrusted_C_Flags) -c $< -o $@
69 | 
70 | signing.so: signing_t.o $(Trusted_C_Objects)
71 | 	@echo "LINK =>  $@"
72 | 	$(CC) $^ -o $@ $(Link_Flags)
73 | 	mkdir -p $(OUT)
74 | 	cp $@ $(OUT)
75 | 
76 | signing.signed.so: signing.so
77 | 	@echo "SIGN =>  $@"
78 | 	$(SGX_ENCLAVE_SIGNER) gendata -enclave $< -config signing.config.xml -out /tmp/enclave_hash.hex
79 | 	openssl dgst -sha256 -out /tmp/signature.hex -sign $(PRIV_KEY) -keyform PEM /tmp/enclave_hash.hex
80 | 	$(SGX_ENCLAVE_SIGNER) catsig -enclave $< -config signing.config.xml -out $@ -key $(PUB_KEY) -sig /tmp/signature.hex -unsigned /tmp/enclave_hash.hex
81 | 	mkdir -p $(OUT)
82 | 	cp $@ $(OUT)
83 | 
84 | libsigning.so: signing_u.o signing_untrusted.o $(Untrusted_C_Objects)
85 | 	@echo "LINK =>  $@"
86 | 	$(CC) $^ -o $@ -shared -L$(SGX_LIBRARY_PATH) -lsgx_uae_service -lsgx_ukey_exchange -lsgx_urts -L$(LIBS_PATH) -led25519.static
87 | 	mkdir -p $(OUT)
88 | 	cp $@ $(OUT)
89 | 	cp signing_public.h $(OUT)
90 | 
91 | clean:
92 | 	@rm -f signing_t.* signing_u.* $(Trusted_C_Objects) $(Untrusted_C_Objects) signing.signed.so signing.so libsigning.so
93 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.config.xml:
--------------------------------------------------------------------------------
 1 | <EnclaveConfiguration>
 2 |   <ProdID>0</ProdID>
 3 |   <ISVSVN>0</ISVSVN>
 4 |   <StackMaxSize>0x20000</StackMaxSize>
 5 |   <HeapMaxSize>0x80000</HeapMaxSize>
 6 |   <TCSNum>4</TCSNum>
 7 |   <TCSPolicy>1</TCSPolicy>
 8 |   <DisableDebug>0</DisableDebug>
 9 | </EnclaveConfiguration>
10 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.edl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file contains Solana's SGX enclave interface.
 3 |  */
 4 | 
 5 | enclave {
 6 |     from "sgx_tkey_exchange.edl" import *;
 7 |     include "sgx_key_exchange.h"
 8 |     include "sgx_trts.h"
 9 |     include "signing_internal.h"
10 | 
11 |    trusted {
12 |       /* This function initializes enclave's remote attestation
13 |          parameters.
14 | 
15 |          Parameters:
16 |          b_pse:   Use Intel's Platform Services
17 |          pub_key: The caller's public key, that it'll use for
18 |                   communication with Intel's IAS service.
19 |          pctxt:   Remmote attestation context
20 |       */
21 |       public sgx_status_t init_remote_attestation(
22 |          int b_pse,
23 |          [in] sgx_ec256_public_t* pub_key,
24 |          [out] sgx_ra_context_t *pctxt);
25 | 
26 |       /* This function frees enclave's remote attestation
27 |          context (returned by init_remote_attestation).
28 |       */
29 |       public sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt);
30 | 
31 |       /* This function initializes an ED25519 keypair in enclave. It
32 |          returns the public key to the caller. The private key will
33 |          be used by sign_sgx_ed25519 function to sign the data.
34 | 
35 |          Enclave uses lockout parameters to compute if signing the
36 |          data will cause slashing.
37 |       */
38 |       public sgx_status_t init_sgx_ed25519(
39 |          uint32_t lockout_period,
40 |          uint32_t lockout_multiplier,
41 |          uint32_t lockout_max_depth,
42 |          uint32_t key_len,
43 |          [out, size=key_len] uint8_t* pubkey);
44 | 
45 |       /* This function returns sealed enclave data (keypair, lockout
46 |          parameters) to the caller. The data is encrypted using
47 |          enclave specific keys.
48 |       */
49 |       public sgx_status_t get_sgx_ed25519_data(
50 |          uint32_t data_size,
51 |          [out, size=data_size] uint8_t* sealed_data,
52 |          [out] uint32_t* data_size_needed);
53 | 
54 |       /* This function initializes enclave using sealed data. The data
55 |          was sealed using get_sgx_ed25519_data() function.
56 | 
57 |          The caller can also update the lockout parameters.
58 |       */
59 |       public sgx_status_t init_sgx_ed25519_from_data(
60 |          uint32_t data_size,
61 |          [in, size=data_size] uint8_t* sealed_data,
62 |          uint32_t update_lockout_params,
63 |          uint32_t lockout_period,
64 |          uint32_t lockout_multiplier,
65 |          uint32_t lockout_max_depth,
66 |          uint32_t key_len,
67 |          [out, size=key_len] uint8_t* pubkey);
68 | 
69 |       /* This function signs the message by using private key generated
70 |          during init function. The caller provides the new history entries.
71 |          The enclave checks the lockout parameters, past history and the
72 |          new history to compute slashing conditions. The enclave will not
73 |          sign the message if it'll result in slashing.
74 |       */
75 |       public sgx_status_t sign_sgx_ed25519(
76 |          uint32_t msg_len,
77 |          [in, size=msg_len] const uint8_t* msg,
78 |          uint32_t history_len,
79 |          [in, count=history_len] const history_entry_t* entries,
80 |          uint32_t sig_len,
81 |          [out, size=sig_len] uint8_t* signature);
82 |    };
83 | };
84 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.lds:
--------------------------------------------------------------------------------
 1 | signing.so
 2 | {
 3 |     global:
 4 |         g_global_data_sim;
 5 |         g_global_data;
 6 |         signing_entry;
 7 |     local:
 8 |         *;
 9 | };
10 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_internal.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "signing_public.h"
4 | 
5 | // The following definitions are specific to ed25519 specifications
6 | #define ED25519_PRIV_KEY_LEN 64
7 | #define ED25519_SIGNATURE_LEN 64
8 | #define ED25519_SEED_LEN 32
9 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_public.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdbool.h>
 4 | #include "sgx_eid.h"
 5 | #include "sgx_error.h"
 6 | 
 7 | #define ED25519_PUB_KEY_LEN 32
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | typedef struct ed25519_context {
14 |   bool enclaveEnabled;
15 |   sgx_enclave_id_t eid;
16 |   uint8_t public_key[ED25519_PUB_KEY_LEN];
17 | } ed25519_context_t;
18 | 
19 | typedef struct history_entry {
20 |   uint32_t num_hashes;
21 |   uint32_t optional_input_hash[4];
22 |   uint32_t result_hash[4];
23 | } history_entry_t;
24 | 
25 | /* This function initializes SGX enclave. It loads enclave_file
26 |    to SGX, which internally creates a new public/private keypair.
27 | 
28 |    If the platform does not support SGX, it creates a public/private
29 |    keypair in untrusted space. An error is returned in this scenario.
30 |    The user can choose to not use the library if SGX encalve is not
31 |    being used for signing.
32 | 
33 |    Note: The user must release the enclave by calling release_ed25519_context()
34 |          after they are done using it.
35 | */
36 | sgx_status_t init_ed25519(const char* enclave_file,
37 |                           uint32_t lockout_period,
38 |                           uint32_t lockout_multiplier,
39 |                           uint32_t lockout_max_depth,
40 |                           ed25519_context_t* pctxt);
41 | 
42 | /* This function returns the sealed data (private key and associated
43 |    informatio). The sealed data can be used to reinit the enclave using
44 |    init_ed25519_from_data().
45 | */
46 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt,
47 |                               uint32_t* datalen,
48 |                               uint8_t* data);
49 | 
50 | /* This function reinitializes the enclave using sealed data.
51 |  */
52 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt,
53 |                                     uint32_t datalen,
54 |                                     uint8_t* data,
55 |                                     uint32_t update_lockout_params,
56 |                                     uint32_t lockout_period,
57 |                                     uint32_t lockout_multiplier,
58 |                                     uint32_t lockout_max_depth);
59 | 
60 | /* This function signs the msg using the internally stored private
61 |    key. The signature is returned in the output "signature" buffer.
62 | 
63 |    This function must only be called after init_ed25519() function.
64 | */
65 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt,
66 |                           uint32_t msg_len,
67 |                           const uint8_t* msg,
68 |                           uint32_t history_len,
69 |                           const history_entry_t* entries,
70 |                           uint32_t sig_len,
71 |                           uint8_t* signature);
72 | 
73 | /* This function releases SGX enclave */
74 | void release_ed25519_context(ed25519_context_t* pctxt);
75 | 
76 | #ifdef __cplusplus
77 | }
78 | #endif
79 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_trusted.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file contains Solana's SGX enclave code for signing data.
  3 |  */
  4 | 
  5 | #include <stdbool.h>
  6 | #include <string.h>
  7 | 
  8 | #include "sgx_key.h"
  9 | #include "sgx_tseal.h"
 10 | 
 11 | #include "ed25519.h"
 12 | #include "signing_t.h"
 13 | 
 14 | typedef struct signing_parameters {
 15 |   bool initialized;
 16 |   uint8_t public_key[ED25519_PUB_KEY_LEN];
 17 |   uint8_t private_key[ED25519_PRIV_KEY_LEN];
 18 |   uint32_t nonce;
 19 |   uint32_t lockout_period;
 20 |   uint32_t lockout_multiplier;
 21 |   uint32_t lockout_max_depth;
 22 |   sgx_mc_uuid_t counter;
 23 |   uint32_t counter_value;
 24 | } signing_parameters_t;
 25 | 
 26 | static signing_parameters_t g_signing_params;
 27 | 
 28 | sgx_status_t init_remote_attestation(int b_pse,
 29 |                                      sgx_ec256_public_t* sp_pub_key,
 30 |                                      sgx_ra_context_t* pctxt) {
 31 |   sgx_status_t ret;
 32 |   if (b_pse) {
 33 |     int busy_retry_times = 2;
 34 |     do {
 35 |       ret = sgx_create_pse_session();
 36 |     } while (ret == SGX_ERROR_BUSY && busy_retry_times--);
 37 |     if (ret != SGX_SUCCESS)
 38 |       return ret;
 39 |   }
 40 |   ret = sgx_ra_init(sp_pub_key, b_pse, pctxt);
 41 |   if (b_pse) {
 42 |     sgx_close_pse_session();
 43 |   }
 44 |   return ret;
 45 | }
 46 | 
 47 | sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt) {
 48 |   return sgx_ra_close(ctxt);
 49 | }
 50 | 
 51 | /* This function creates a new public/private keypair in
 52 |    enclave trusted space.
 53 | */
 54 | sgx_status_t init_sgx_ed25519(uint32_t lockout_period,
 55 |                               uint32_t lockout_multiplier,
 56 |                               uint32_t lockout_max_depth,
 57 |                               uint32_t key_len,
 58 |                               uint8_t* pubkey) {
 59 |   if (key_len < sizeof(g_signing_params.public_key)) {
 60 |     return SGX_ERROR_INVALID_PARAMETER;
 61 |   }
 62 | 
 63 |   sgx_status_t status = SGX_SUCCESS;
 64 |   int busy_retry_times = 3;
 65 |   do {
 66 |     status = sgx_create_pse_session();
 67 |   } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0));
 68 | 
 69 |   if (SGX_SUCCESS != status) {
 70 |     return status;
 71 |   }
 72 | 
 73 |   status = sgx_create_monotonic_counter(&g_signing_params.counter,
 74 |                                         &g_signing_params.counter_value);
 75 |   sgx_close_pse_session();
 76 |   if (SGX_SUCCESS != status) {
 77 |     return status;
 78 |   }
 79 | 
 80 |   uint8_t seed[ED25519_SEED_LEN];
 81 |   status = sgx_read_rand(seed, sizeof(seed));
 82 |   if (SGX_SUCCESS != status) {
 83 |     return status;
 84 |   }
 85 | 
 86 |   ed25519_create_keypair(g_signing_params.public_key,
 87 |                          g_signing_params.private_key, seed);
 88 | 
 89 |   memcpy(pubkey, g_signing_params.public_key,
 90 |          sizeof(g_signing_params.public_key));
 91 | 
 92 |   g_signing_params.initialized = true;
 93 |   g_signing_params.lockout_max_depth = lockout_max_depth;
 94 |   g_signing_params.lockout_multiplier = lockout_multiplier;
 95 |   g_signing_params.lockout_period = lockout_period;
 96 | 
 97 |   return SGX_SUCCESS;
 98 | }
 99 | 
100 | sgx_status_t get_sgx_ed25519_data(uint32_t data_size,
101 |                                   uint8_t* sealed_data,
102 |                                   uint32_t* data_size_needed) {
103 |   *data_size_needed =
104 |       sgx_calc_sealed_data_size(0, sizeof(signing_parameters_t));
105 | 
106 |   if (*data_size_needed > data_size) {
107 |     return SGX_ERROR_INVALID_PARAMETER;
108 |   }
109 | 
110 |   sgx_status_t status = sgx_read_rand((uint8_t*)&g_signing_params.nonce,
111 |                                       sizeof(g_signing_params.nonce));
112 |   if (SGX_SUCCESS != status) {
113 |     return status;
114 |   }
115 | 
116 |   sgx_attributes_t attribute_mask;
117 |   attribute_mask.flags = SGX_FLAGS_INITTED | SGX_FLAGS_DEBUG;
118 |   attribute_mask.xfrm = 0x0;
119 | 
120 |   return sgx_seal_data_ex(SGX_KEYPOLICY_MRENCLAVE, attribute_mask, 0xF0000000,
121 |                           0, NULL, sizeof(g_signing_params),
122 |                           (const uint8_t*)&g_signing_params, *data_size_needed,
123 |                           (sgx_sealed_data_t*)sealed_data);
124 | }
125 | 
126 | sgx_status_t init_sgx_ed25519_from_data(uint32_t data_size,
127 |                                         uint8_t* sealed_data,
128 |                                         uint32_t update_lockout_params,
129 |                                         uint32_t lockout_period,
130 |                                         uint32_t lockout_multiplier,
131 |                                         uint32_t lockout_max_depth,
132 |                                         uint32_t key_len,
133 |                                         uint8_t* pubkey) {
134 |   if (key_len < sizeof(g_signing_params.public_key)) {
135 |     return SGX_ERROR_INVALID_PARAMETER;
136 |   }
137 | 
138 |   signing_parameters_t data;
139 |   uint32_t datalen = sizeof(data);
140 |   sgx_status_t status = sgx_unseal_data((const sgx_sealed_data_t*)sealed_data,
141 |                                         NULL, 0, (uint8_t*)&data, &datalen);
142 |   if (SGX_SUCCESS != status) {
143 |     return status;
144 |   }
145 | 
146 |   if (datalen != sizeof(data)) {
147 |     return SGX_ERROR_INVALID_PARAMETER;
148 |   }
149 | 
150 |   int busy_retry_times = 3;
151 |   do {
152 |     status = sgx_create_pse_session();
153 |   } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0));
154 | 
155 |   if (SGX_SUCCESS != status) {
156 |     return status;
157 |   }
158 | 
159 |   uint32_t counter_value = 0xffffffff;
160 |   status =
161 |       sgx_read_monotonic_counter(&g_signing_params.counter, &counter_value);
162 |   if (SGX_SUCCESS != status) {
163 |     sgx_close_pse_session();
164 |     return status;
165 |   }
166 | 
167 |   if (counter_value != g_signing_params.counter_value) {
168 |     sgx_close_pse_session();
169 |     return SGX_ERROR_INVALID_PARAMETER;
170 |   }
171 | 
172 |   status = sgx_increment_monotonic_counter(&g_signing_params.counter,
173 |                                            &g_signing_params.counter_value);
174 | 
175 |   sgx_close_pse_session();
176 |   if (SGX_SUCCESS != status) {
177 |     return status;
178 |   }
179 | 
180 |   memcpy(&g_signing_params, &data, sizeof(g_signing_params));
181 | 
182 |   memcpy(pubkey, g_signing_params.public_key,
183 |          sizeof(g_signing_params.public_key));
184 | 
185 |   g_signing_params.initialized = true;
186 |   if (update_lockout_params != 0) {
187 |     g_signing_params.lockout_max_depth = lockout_max_depth;
188 |     g_signing_params.lockout_multiplier = lockout_multiplier;
189 |     g_signing_params.lockout_period = lockout_period;
190 |   }
191 |   return SGX_SUCCESS;
192 | }
193 | 
194 | /* This function signs the msg using private key.
195 |  */
196 | sgx_status_t sign_sgx_ed25519(uint32_t msg_len,
197 |                               const uint8_t* msg,
198 |                               uint32_t history_len,
199 |                               const history_entry_t* entries,
200 |                               uint32_t sig_len,
201 |                               uint8_t* signature) {
202 |   if (!g_signing_params.initialized) {
203 |     return SGX_ERROR_INVALID_STATE;
204 |   }
205 | 
206 |   if (sig_len < ED25519_SIGNATURE_LEN) {
207 |     return SGX_ERROR_INVALID_PARAMETER;
208 |   }
209 | 
210 |   ed25519_sign(signature, msg, msg_len, g_signing_params.public_key,
211 |                g_signing_params.private_key);
212 | 
213 |   return SGX_SUCCESS;
214 | }
215 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_untrusted.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file contains Solana's SGX enclave code for signing data.
  3 |  */
  4 | 
  5 | #include <stdbool.h>
  6 | #include <string.h>
  7 | #include <time.h>
  8 | 
  9 | #include "ed25519.h"
 10 | #include "sgx_urts.h"
 11 | #include "signing_public.h"
 12 | #include "signing_u.h"
 13 | 
 14 | /* This function initializes SGX enclave. It loads enclave_file
 15 |    to SGX, which internally creates a new public/private keypair.
 16 | */
 17 | sgx_status_t init_ed25519(const char* enclave_file,
 18 |                           uint32_t lockout_period,
 19 |                           uint32_t lockout_multiplier,
 20 |                           uint32_t lockout_max_depth,
 21 |                           ed25519_context_t* pctxt) {
 22 |   int updated = 0;
 23 |   sgx_launch_token_t token = {0};
 24 |   sgx_enclave_id_t eid;
 25 | 
 26 |   // Try to load the SGX enclave
 27 |   sgx_status_t status =
 28 |       sgx_create_enclave(enclave_file, 1, &token, &updated, &eid, NULL);
 29 | 
 30 |   if (SGX_SUCCESS != status) {
 31 |     return status;
 32 |   }
 33 | 
 34 |   sgx_status_t retval = SGX_SUCCESS;
 35 |   status = init_sgx_ed25519(eid, &retval, lockout_period, lockout_multiplier,
 36 |                             lockout_max_depth, sizeof(pctxt->public_key),
 37 |                             &pctxt->public_key[0]);
 38 | 
 39 |   if (SGX_SUCCESS != status) {
 40 |     sgx_destroy_enclave(eid);
 41 |     return status;
 42 |   }
 43 | 
 44 |   if (SGX_SUCCESS != retval) {
 45 |     sgx_destroy_enclave(eid);
 46 |     return retval;
 47 |   }
 48 | 
 49 |   pctxt->enclaveEnabled = true;
 50 |   pctxt->eid = eid;
 51 | 
 52 |   return status;
 53 | }
 54 | 
 55 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt,
 56 |                               uint32_t* datalen,
 57 |                               uint8_t* data) {
 58 |   if (!pctxt->enclaveEnabled) {
 59 |     return SGX_ERROR_INVALID_ENCLAVE;
 60 |   }
 61 | 
 62 |   sgx_status_t retval = SGX_SUCCESS;
 63 |   sgx_status_t status =
 64 |       get_sgx_ed25519_data(pctxt->eid, &retval, *datalen, data, datalen);
 65 | 
 66 |   if (SGX_SUCCESS != status) {
 67 |     return status;
 68 |   }
 69 | 
 70 |   if (SGX_SUCCESS != retval) {
 71 |     return retval;
 72 |   }
 73 | 
 74 |   return status;
 75 | }
 76 | 
 77 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt,
 78 |                                     uint32_t datalen,
 79 |                                     uint8_t* data,
 80 |                                     uint32_t update_lockout_params,
 81 |                                     uint32_t lockout_period,
 82 |                                     uint32_t lockout_multiplier,
 83 |                                     uint32_t lockout_max_depth) {
 84 |   if (!pctxt->enclaveEnabled) {
 85 |     return SGX_ERROR_INVALID_ENCLAVE;
 86 |   }
 87 | 
 88 |   sgx_status_t retval = SGX_SUCCESS;
 89 |   sgx_status_t status = init_sgx_ed25519_from_data(
 90 |       pctxt->eid, &retval, datalen, data, update_lockout_params, lockout_period,
 91 |       lockout_multiplier, lockout_max_depth, sizeof(pctxt->public_key),
 92 |       &pctxt->public_key[0]);
 93 | 
 94 |   if (SGX_SUCCESS != status) {
 95 |     return status;
 96 |   }
 97 | 
 98 |   if (SGX_SUCCESS != retval) {
 99 |     return retval;
100 |   }
101 | 
102 |   return status;
103 | }
104 | 
105 | /* This function signs the msg using the internally stored private
106 |    key. The signature is returned in the output "signature" buffer.
107 | 
108 |    This function must only be called after init_ed25519() function.
109 | */
110 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt,
111 |                           uint32_t msg_len,
112 |                           const uint8_t* msg,
113 |                           uint32_t history_len,
114 |                           const history_entry_t* entries,
115 |                           uint32_t sig_len,
116 |                           uint8_t* signature) {
117 |   if (!pctxt->enclaveEnabled) {
118 |     return SGX_ERROR_INVALID_ENCLAVE;
119 |   }
120 | 
121 |   sgx_status_t retval = SGX_SUCCESS;
122 |   sgx_status_t status =
123 |       sign_sgx_ed25519(pctxt->eid, &retval, msg_len, msg, history_len, entries,
124 |                        sig_len, signature);
125 | 
126 |   if (SGX_SUCCESS != status) {
127 |     return status;
128 |   }
129 | 
130 |   if (SGX_SUCCESS != retval) {
131 |     return retval;
132 |   }
133 | 
134 |   return status;
135 | }
136 | 
137 | void release_ed25519_context(ed25519_context_t* pctxt) {
138 |   sgx_destroy_enclave(pctxt->eid);
139 | }


--------------------------------------------------------------------------------
/src/sgx/test/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | LIBS_PATH ?= ../../../libs
 3 | OUT ?= ../../../dist
 4 | 
 5 | C_Flags := -O2 -fpic -I. -I$(SGX_SDK)/include -I$(OUT) -I../../sgx-ecc-ed25519
 6 | 
 7 | C_Files := $(wildcard *.c)
 8 | C_Objects := $(C_Files:%.c=%.o)
 9 | 
10 | .PHONY: all run
11 | all: $(OUT)/signing_test
12 | run: all
13 | 
14 | %.o: %.c
15 | 	@echo "CC  <=  $<"
16 | 	$(CC) $(C_Flags) -c $< -o $@
17 | 
18 | $(OUT)/signing_test: $(C_Objects)
19 | 	@mkdir -p $(OUT)
20 | 	$(CC) $^ -o $@ -L$(OUT) -L$(LIBS_PATH) -lsigning -led25519.static
21 | 
22 | clean:
23 | 	@rm -rf $(C_Objects) $(OUT)/signing_test
24 | 


--------------------------------------------------------------------------------
/src/sgx/test/signing_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include "signing_public.h"
 6 | 
 7 | #include "ed25519.h"
 8 | 
 9 | void print_buffer(const uint8_t* buf, int len) {
10 |   char str[BUFSIZ] = {'\0'};
11 |   int offset = 0;
12 |   for (int i = 0; i < len; i++) {
13 |     offset += snprintf(&str[offset], BUFSIZ - offset, "0x%02x ", buf[i]);
14 |     if (!((i + 1) % 8))
15 |       offset += snprintf(&str[offset], BUFSIZ - offset, "\n");
16 |   }
17 |   offset += snprintf(&str[offset], BUFSIZ - offset, "\n");
18 |   printf("%s", str);
19 | }
20 | 
21 | int main(int argc, char* argv[]) {
22 |   if (argc < 2) {
23 |     printf("Usage: %s <enclave file path>\n", argv[0]);
24 |     return -1;
25 |   }
26 | 
27 |   ed25519_context_t ctxt;
28 |   uint32_t lockout_period = 10, lockout_multiplier = 2, lockout_max_depth = 32;
29 |   sgx_status_t status = init_ed25519(
30 |       argv[1], lockout_period, lockout_multiplier, lockout_max_depth, &ctxt);
31 |   if (SGX_SUCCESS != status) {
32 |     printf("Failed in init_ed25519. Error %d\n", status);
33 |     return -1;
34 |   }
35 | 
36 |   printf("Loaded the enclave. eid: %d\n", (uint32_t)ctxt.eid);
37 | 
38 |   uint32_t datalen = 0;
39 |   status = get_ed25519_data(&ctxt, &datalen, NULL);
40 | 
41 |   uint8_t* sealed_data = malloc(datalen);
42 |   status = get_ed25519_data(&ctxt, &datalen, sealed_data);
43 |   if (SGX_SUCCESS != status) {
44 |     printf("Failed in get_ed25519_data. Error %d\n", status);
45 |     release_ed25519_context(&ctxt);
46 |     free(sealed_data);
47 |     return -1;
48 |   }
49 | 
50 |   status =
51 |       init_ed25519_from_data(&ctxt, datalen, sealed_data, 1, lockout_period,
52 |                              lockout_multiplier, lockout_max_depth);
53 |   free(sealed_data);
54 |   if (SGX_SUCCESS != status) {
55 |     printf("Failed in init_ed25519_from_data. Error %d\n", status);
56 |     release_ed25519_context(&ctxt);
57 |     return -1;
58 |   }
59 | 
60 |   const history_entry_t entries;
61 |   uint8_t* data =
62 |       "This is a test string. We'll sign it using SGX enclave. Hope it works!!";
63 |   uint8_t signature[64];
64 |   memset(signature, 0, sizeof(signature));
65 |   status = sign_ed25519(&ctxt, sizeof(data), data, 1, &entries,
66 |                         sizeof(signature), signature);
67 |   if (SGX_SUCCESS != status) {
68 |     printf("Failed in sign_ed25519. Error %d\n", status);
69 |     release_ed25519_context(&ctxt);
70 |     return -1;
71 |   }
72 | 
73 |   printf("Signature:\n");
74 |   print_buffer(signature, sizeof(signature));
75 | 
76 |   if (ed25519_verify(signature, data, sizeof(data), ctxt.public_key) == 0) {
77 |     printf("Failed in verifying the signature\n");
78 |   } else {
79 |     printf("Signature verified\n");
80 |   }
81 | 
82 |   release_ed25519_context(&ctxt);
83 |   return 0;
84 | }


--------------------------------------------------------------------------------