├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── ci
    ├── build.sh
    ├── buildkite.yml
    ├── docker-run.sh
    ├── docker-sgx
    │   ├── Dockerfile
    │   └── build.sh
    ├── env.sh
    ├── upload-ci-artifact.sh
    └── upload-github-release-asset.sh
└── src
    ├── Makefile
    ├── config.h
    ├── cuda-crypt
        ├── aes.h
        ├── aes_cbc.cu
        ├── aes_core.cu
        ├── aes_locl.h
        ├── chacha.h
        ├── chacha20_core.cu
        ├── chacha_cbc.cu
        ├── common.cu
        ├── cryptoerr.h
        ├── modes.h
        ├── modes_lcl.h
        ├── perftime.h
        └── test.cu
    ├── cuda-ecc-ed25519
        ├── common.cu
        ├── ed25519.h
        ├── fe.cu
        ├── fe.h
        ├── fixedint.h
        ├── ge.cu
        ├── ge.h
        ├── gpu_ctx.cu
        ├── gpu_ctx.h
        ├── keypair.cu
        ├── license.txt
        ├── main.cu
        ├── perftime.h
        ├── precomp_data.h
        ├── sc.cu
        ├── sc.h
        ├── seed.cu
        ├── sha512.cu
        ├── sha512.h
        ├── sign.cu
        ├── vanity.cu
        └── verify.cu
    ├── cuda-headers
        └── gpu_common.h
    ├── cuda-poh-verify
        └── poh_verify.cu
    ├── cuda-sha256
        ├── sha256.cu
        └── tomcrypt_macros.h
    ├── gpu-common.mk
    ├── jerasure-sys
        ├── Cargo.toml
        ├── build.rs
        ├── gf-complete
        └── jerasure
    ├── sgx-ecc-ed25519
        ├── Makefile
        ├── add_scalar.c
        ├── build.sh
        ├── ed25519.h
        ├── fe.c
        ├── fe.h
        ├── fixedint.h
        ├── ge.c
        ├── ge.h
        ├── key_exchange.c
        ├── keypair.c
        ├── precomp_data.h
        ├── sc.c
        ├── sc.h
        ├── seed.c
        ├── sha512.c
        ├── sha512.h
        ├── sign.c
        └── verify.c
    └── sgx
        ├── build.sh
        ├── signing
            ├── Makefile
            ├── signing.config.xml
            ├── signing.edl
            ├── signing.lds
            ├── signing_internal.h
            ├── signing_public.h
            ├── signing_trusted.c
            └── signing_untrusted.c
        └── test
            ├── Makefile
            └── signing_test.c


/.gitignore:
--------------------------------------------------------------------------------
1 | # temp folder
2 | /temp/
3 | 
4 | # build output folders
5 | /libs/
6 | /dist/
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 Solana Labs, Inc.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | OS := $(shell uname)
 2 | 
 3 | all:
 4 | ifeq ($(OS),Darwin)
 5 | SO=dylib
 6 | else
 7 | SO=so
 8 | all: cuda_crypt
 9 | endif
10 | 
11 | V=release
12 | 
13 | .PHONY:cuda_crypt
14 | cuda_crypt:
15 | 	$(MAKE) V=$(V) -C src
16 | 
17 | DESTDIR ?= dist
18 | install:
19 | 	mkdir -p $(DESTDIR)
20 | ifneq ($(OS),Darwin)
21 | 	cp -f src/$(V)/libcuda-crypt.so $(DESTDIR)
22 | endif
23 | 	ls -lh $(DESTDIR)
24 | 
25 | .PHONY:clean
26 | clean:
27 | 	$(MAKE) V=$(V) -C src clean
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A CUDA based ed25519 vanity key finder (in Base58)
 2 | 
 3 | This is a GPU based vanity key finder. It does not currently use a CSPRNG and
 4 | any key generated by this tool is 100% not secure to use. Great fun for Tour de
 5 | Sol though.
 6 | 
 7 | ## Configure
 8 | Open `src/config.h` and add any prefixes you want to scan for to the list.
 9 | 
10 | ## Building
11 | Make sure your cuda binary are in your path, and build:
12 | 
13 | ```bash
14 | $ export PATH=/usr/local/cuda/bin:$PATH
15 | $ make -j$(nproc)
16 | ```
17 | 
18 | ## Running
19 | 
20 | ```bash
21 | LD_LIBRARY_PATH=./src/release ./src/release/cuda_ed25519_vanity
22 | ```
23 | 


--------------------------------------------------------------------------------
/ci/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | cd "$(dirname "$0")/.."
 4 | 
 5 | source ci/env.sh
 6 | source ci/upload-ci-artifact.sh
 7 | 
 8 | CUDA_HOMES=(
 9 |   /usr/local/cuda-10.0
10 |   /usr/local/cuda-10.1
11 | )
12 | 
13 | for CUDA_HOME in "${CUDA_HOMES[@]}"; do
14 |   CUDA_HOME_BASE="$(basename "$CUDA_HOME")"
15 |   echo "--- Build: $CUDA_HOME_BASE"
16 |   (
17 |     if [[ ! -d $CUDA_HOME/lib64 ]]; then
18 |       echo "Invalid CUDA_HOME: $CUDA_HOME"
19 |       exit 1
20 |     fi
21 | 
22 |     set -x
23 |     export LD_LIBRARY_PATH=$CUDA_HOME/lib64
24 |     export PATH=$PATH:$HOME/.cargo/bin/:$CUDA_HOME/bin
25 |     export DESTDIR=dist/$CUDA_HOME_BASE
26 | 
27 |     make -j"$(nproc)"
28 |     make install
29 |     make clean
30 | 
31 |     cp -vf "$CUDA_HOME"/version.txt "$DESTDIR"/cuda-version.txt
32 |   )
33 | done
34 | 
35 | echo --- Build SGX
36 | (
37 |   set -x
38 |   ci/docker-run.sh solanalabs/sgxsdk src/sgx-ecc-ed25519/build.sh
39 |   ci/docker-run.sh solanalabs/sgxsdk src/sgx/build.sh
40 | )
41 | 
42 | echo --- Create tarball
43 | (
44 |   set -x
45 |   cd dist
46 |   git rev-parse HEAD | tee solana-perf-HEAD.txt
47 |   tar zcvf ../solana-perf.tgz ./*
48 | )
49 | 
50 | upload-ci-artifact solana-perf.tgz
51 | 
52 | [[ -n $CI_TAG ]] || exit 0
53 | ci/upload-github-release-asset.sh solana-perf.tgz
54 | exit 0
55 | 


--------------------------------------------------------------------------------
/ci/buildkite.yml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - command: "ci/build.sh"
3 |     name: "build"
4 |     timeout_in_minutes: 20
5 |     agents:
6 |       - "queue=cuda"
7 | 


--------------------------------------------------------------------------------
/ci/docker-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | usage() {
 5 |   echo "Usage: $0 [--nopull] [docker image name] [command]"
 6 |   echo
 7 |   echo Runs command in the specified docker image with
 8 |   echo a CI-appropriate environment.
 9 |   echo
10 |   echo "--nopull   Skip the dockerhub image update"
11 |   echo "--shell    Skip command and enter an interactive shell"
12 |   echo
13 | }
14 | 
15 | cd "$(dirname "$0")/.."
16 | 
17 | INTERACTIVE=false
18 | if [[ $1 = --shell ]]; then
19 |   INTERACTIVE=true
20 |   shift
21 | fi
22 | 
23 | NOPULL=false
24 | if [[ $1 = --nopull ]]; then
25 |   NOPULL=true
26 |   shift
27 | fi
28 | 
29 | IMAGE="$1"
30 | if [[ -z "$IMAGE" ]]; then
31 |   echo Error: image not defined
32 |   exit 1
33 | fi
34 | 
35 | $NOPULL || docker pull "$IMAGE"
36 | shift
37 | 
38 | ARGS=(
39 |   --workdir /solana
40 |   --volume "$PWD:/solana"
41 |   --rm
42 | )
43 | 
44 | if [[ -n $CI ]]; then
45 |   # Share the real ~/.cargo between docker containers in CI for speed
46 |   ARGS+=(--volume "$HOME:/home")
47 | else
48 |   # Avoid sharing ~/.cargo when building locally to avoid a mixed macOS/Linux
49 |   # ~/.cargo
50 |   ARGS+=(--volume "$PWD:/home")
51 | fi
52 | ARGS+=(--env "CARGO_HOME=/home/.cargo")
53 | 
54 | # kcov tries to set the personality of the binary which docker
55 | # doesn't allow by default.
56 | ARGS+=(--security-opt "seccomp=unconfined")
57 | 
58 | # Ensure files are created with the current host uid/gid
59 | if [[ -z "$SOLANA_DOCKER_RUN_NOSETUID" ]]; then
60 |   ARGS+=(--user "$(id -u):$(id -g)")
61 | fi
62 | 
63 | # Environment variables to propagate into the container
64 | ARGS+=(
65 |   --env BUILDKITE
66 |   --env BUILDKITE_AGENT_ACCESS_TOKEN
67 |   --env BUILDKITE_BRANCH
68 |   --env BUILDKITE_JOB_ID
69 |   --env BUILDKITE_TAG
70 |   --env CODECOV_TOKEN
71 |   --env CRATES_IO_TOKEN
72 |   --env SNAPCRAFT_CREDENTIALS_KEY
73 | )
74 | 
75 | if $INTERACTIVE; then
76 |   if [[ -n $1 ]]; then
77 |     echo
78 |     echo "Note: '$*' ignored due to --shell argument"
79 |     echo
80 |   fi
81 |   set -x
82 |   exec docker run --interactive --tty "${ARGS[@]}" "$IMAGE" bash
83 | fi
84 | 
85 | set -x
86 | exec docker run "${ARGS[@]}" "$IMAGE" "$@"
87 | 


--------------------------------------------------------------------------------
/ci/docker-sgx/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && \
 5 |     apt-get install -y build-essential ocaml ocamlbuild automake autoconf libtool wget python libssl-dev libcurl4-openssl-dev protobuf-compiler libprotobuf-dev sudo kmod vim curl git-core libprotobuf-c0-dev libboost-thread-dev libboost-system-dev liblog4cpp5-dev libjsoncpp-dev alien uuid-dev libxml2-dev cmake pkg-config expect
 6 | 
 7 | 
 8 | RUN mkdir /root/sgx && mkdir /etc/init/ && \
 9 |     wget -O /root/sgx/sdk.bin https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/sgx_linux_x64_sdk_2.3.101.46683.bin && \
10 |     wget -O /root/sgx/psw.deb https://download.01.org/intel-sgx/linux-2.3.1/ubuntu18.04/libsgx-enclave-common_2.3.101.46683-1_amd64.deb && \
11 |     cd /root/sgx && \
12 |     dpkg -i /root/sgx/psw.deb && \
13 |     chmod +x /root/sgx/sdk.bin && \
14 |     echo -e 'no\n/opt' | /root/sgx/sdk.bin && \
15 |     echo 'source /opt/sgxsdk/environment' >> /root/.bashrc && \
16 |     rm -rf /root/sgx/*
17 | 
18 | WORKDIR /root
19 | 
20 | 


--------------------------------------------------------------------------------
/ci/docker-sgx/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -ex
3 | 
4 | cd "$(dirname "$0")"
5 | 
6 | docker build -t solanalabs/sgxsdk .
7 | docker push solanalabs/sgxsdk
8 | 
9 | 


--------------------------------------------------------------------------------
/ci/env.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # Normalized CI environment variables
 3 | #
 4 | # |source| me
 5 | #
 6 | 
 7 | if [[ -n $CI ]]; then
 8 |   export CI=1
 9 |   if [[ -n $TRAVIS ]]; then
10 |     export CI_BRANCH=$TRAVIS_BRANCH
11 |     export CI_BUILD_ID=$TRAVIS_BUILD_ID
12 |     export CI_COMMIT=$TRAVIS_COMMIT
13 |     export CI_JOB_ID=$TRAVIS_JOB_ID
14 |     if $TRAVIS_PULL_REQUEST; then
15 |       export CI_PULL_REQUEST=true
16 |     else
17 |       export CI_PULL_REQUEST=
18 |     fi
19 |     export CI_OS_NAME=$TRAVIS_OS_NAME
20 |     export CI_REPO_SLUG=$TRAVIS_REPO_SLUG
21 |     export CI_TAG=$TRAVIS_TAG
22 |   elif [[ -n $BUILDKITE ]]; then
23 |     export CI_BRANCH=$BUILDKITE_BRANCH
24 |     export CI_BUILD_ID=$BUILDKITE_BUILD_ID
25 |     export CI_COMMIT=$BUILDKITE_COMMIT
26 |     export CI_JOB_ID=$BUILDKITE_JOB_ID
27 |     # The standard BUILDKITE_PULL_REQUEST environment variable is always "false" due
28 |     # to how solana-ci-gate is used to trigger PR builds rather than using the
29 |     # standard Buildkite PR trigger.
30 |     if [[ $CI_BRANCH =~ pull/* ]]; then
31 |       export CI_PULL_REQUEST=true
32 |     else
33 |       export CI_PULL_REQUEST=
34 |     fi
35 |     export CI_OS_NAME=linux
36 |     export CI_REPO_SLUG=$BUILDKITE_ORGANIZATION_SLUG/$BUILDKITE_PIPELINE_SLUG
37 |     # TRIGGERED_BUILDKITE_TAG is a workaround to propagate BUILDKITE_TAG into
38 |     # the solana-secondary builder
39 |     if [[ -n $TRIGGERED_BUILDKITE_TAG ]]; then
40 |       export CI_TAG=$TRIGGERED_BUILDKITE_TAG
41 |     else
42 |       export CI_TAG=$BUILDKITE_TAG
43 |     fi
44 |   elif [[ -n $APPVEYOR ]]; then
45 |     export CI_BRANCH=$APPVEYOR_REPO_BRANCH
46 |     export CI_BUILD_ID=$APPVEYOR_BUILD_ID
47 |     export CI_COMMIT=$APPVEYOR_REPO_COMMIT
48 |     export CI_JOB_ID=$APPVEYOR_JOB_ID
49 |     if [[ -n $APPVEYOR_PULL_REQUEST_NUMBER ]]; then
50 |       export CI_PULL_REQUEST=true
51 |     else
52 |       export CI_PULL_REQUEST=
53 |     fi
54 |     if [[ $CI_LINUX = True ]]; then
55 |       export CI_OS_NAME=linux
56 |     elif [[ $CI_WINDOWS = True ]]; then
57 |       export CI_OS_NAME=windows
58 |     fi
59 |     export CI_REPO_SLUG=$APPVEYOR_REPO_NAME
60 |     export CI_TAG=$APPVEYOR_REPO_TAG_NAME
61 |   fi
62 | else
63 |   export CI=
64 |   export CI_BRANCH=
65 |   export CI_BUILD_ID=
66 |   export CI_COMMIT=
67 |   export CI_JOB_ID=
68 |   export CI_OS_NAME=
69 |   export CI_PULL_REQUEST=
70 |   export CI_REPO_SLUG=
71 |   export CI_TAG=
72 | fi
73 | 
74 | cat <<EOF
75 | CI=$CI
76 | CI_BRANCH=$CI_BRANCH
77 | CI_BUILD_ID=$CI_BUILD_ID
78 | CI_COMMIT=$CI_COMMIT
79 | CI_JOB_ID=$CI_JOB_ID
80 | CI_OS_NAME=$CI_OS_NAME
81 | CI_PULL_REQUEST=$CI_PULL_REQUEST
82 | CI_TAG=$CI_TAG
83 | EOF
84 | 


--------------------------------------------------------------------------------
/ci/upload-ci-artifact.sh:
--------------------------------------------------------------------------------
 1 | # |source| me
 2 | 
 3 | upload-ci-artifact() {
 4 |   echo "--- artifact: $1"
 5 |   if [[ -r "$1" ]]; then
 6 |     ls -l "$1"
 7 |     if ${BUILDKITE:-false}; then
 8 |       (
 9 |         set -x
10 |         buildkite-agent artifact upload "$1"
11 |       )
12 |     fi
13 |   else
14 |     echo ^^^ +++
15 |     echo "$1 not found"
16 |   fi
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/ci/upload-github-release-asset.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Uploads one or more files to a github release
 4 | #
 5 | # Prerequisites
 6 | # 1) GITHUB_TOKEN defined in the environment
 7 | # 2) TAG defined in the environment
 8 | #
 9 | set -e
10 | 
11 | if [[ -z $1 ]]; then
12 |   echo No files specified
13 |   exit 1
14 | fi
15 | 
16 | if [[ -z $GITHUB_TOKEN ]]; then
17 |   echo Error: GITHUB_TOKEN not defined
18 |   exit 1
19 | fi
20 | 
21 | if [[ -z $CI_TAG ]]; then
22 |   echo Error: CI_TAG not defined
23 |   exit 1
24 | fi
25 | 
26 | if [[ -z $CI_REPO_SLUG ]]; then
27 |   echo Error: CI_REPO_SLUG not defined
28 |   exit 1
29 | fi
30 | 
31 | releaseId=$( \
32 |   curl -s "https://api.github.com/repos/$CI_REPO_SLUG/releases/tags/$CI_TAG" \
33 |   | grep -m 1 \"id\": \
34 |   | sed -ne 's/^[^0-9]*\([0-9]*\),$/\1/p' \
35 | )
36 | echo "Github release id for $CI_TAG is $releaseId"
37 | 
38 | for file in "$@"; do
39 |   echo "--- Uploading $file to tag $CI_TAG of $CI_REPO_SLUG"
40 |   curl \
41 |     --data-binary @"$file" \
42 |     -H "Authorization: token $GITHUB_TOKEN" \
43 |     -H "Content-Type: application/octet-stream" \
44 |     "https://uploads.github.com/repos/$CI_REPO_SLUG/releases/$releaseId/assets?name=$(basename "$file")"
45 |   echo
46 | done
47 | 
48 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | #  Makefile to build libcuda-crypt.so
  3 | #   From sources in /cuda-crypt and /ed25519-verify
  4 | #
  5 | #   nvcc inserts conflicting driver symbols into a static lib (.so)
  6 | #   so more than one .so cannot be linked into a single program.
  7 | #   Shared objects with device kernels also did not seem to work--
  8 | #   it can build, but the kernel launch is not successful. (with cuda 9.2)
  9 | #   Hence, build ed25519 ecdsa verify and chacha encryption device
 10 | #   code into a single binary.
 11 | 
 12 | V=debug
 13 | include gpu-common.mk
 14 | 
 15 | CHACHA_TEST_BIN=cuda_chacha_test
 16 | ECC_TEST_BIN=cuda_ed25519_verify
 17 | ECC_SCAN_BIN=cuda_ed25519_vanity
 18 | LIB=cuda-crypt
 19 | 
 20 | CUDA_HEADER_DIR:=cuda-headers
 21 | CUDA_SHA256_DIR:=cuda-sha256
 22 | 
 23 | CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
 24 | 
 25 | all: $V/$(ECC_SCAN_BIN) $(V)/lib$(LIB).so
 26 | 
 27 | ECC_DIR:=cuda-ecc-ed25519
 28 | 
 29 | KEYPAIR_SRCS:=$(addprefix $(ECC_DIR)/,keypair.cu ed25519.h ge.h)
 30 | $V/keypair.o: $(KEYPAIR_SRCS)
 31 | 	@mkdir -p $(@D)
 32 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 33 | 
 34 | SEED_SRCS:=$(addprefix $(ECC_DIR)/,seed.cu ed25519.h)
 35 | $V/seed.o: $(SEED_SRCS)
 36 | 	@mkdir -p $(@D)
 37 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 38 | 
 39 | SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../$(CUDA_HEADER_DIR)/gpu_common.h ed25519.h)
 40 | $V/sign.o: $(SIGN_SRCS)
 41 | 	@mkdir -p $(@D)
 42 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 43 | 
 44 | VERIFY_SRCS:=$(addprefix $(ECC_DIR)/,verify.cu sha512.cu ge.cu sc.cu fe.cu keypair.cu common.cu ed25519.h)
 45 | $V/verify.o: $(VERIFY_SRCS)
 46 | 	@mkdir -p $(@D)
 47 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 48 | 
 49 | $V/gpu_ctx.o: $(addprefix $(ECC_DIR)/,gpu_ctx.cu gpu_ctx.h)
 50 | 	@mkdir -p $(@D)
 51 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 52 | 
 53 | CHACHA_DIR:=cuda-crypt
 54 | CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu chacha.h common.cu)
 55 | 
 56 | $V/chacha_cbc.o: $(CHACHA_SRCS)
 57 | 	@mkdir -p $(@D)
 58 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 59 | 
 60 | AES_SRCS:=$(addprefix $(CHACHA_DIR)/,aes_cbc.cu aes_core.cu modes_lcl.h common.cu)
 61 | 
 62 | $V/aes_cbc.o: $(AES_SRCS)
 63 | 	@mkdir -p $(@D)
 64 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 65 | 
 66 | POH_DIR:=cuda-poh-verify
 67 | POH_SRCS:=$(addprefix $(POH_DIR)/,poh_verify.cu)
 68 | 
 69 | $V/poh_verify.o: $(POH_SRCS)
 70 | 	@mkdir -p $(@D)
 71 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 72 | 
 73 | CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)
 74 | 
 75 | $V/crypt-dlink.o: $(CPU_GPU_OBJS)
 76 | 	$(NVCC) -Xcompiler "-fPIC" $(CFLAGS) --device-link $^ --output-file $@
 77 | 
 78 | $V/lib$(LIB).so: $V/crypt-dlink.o $(CPU_GPU_OBJS)
 79 | 	$(NVCC) -Xcompiler "-fPIC" --shared --output-file $@ $^
 80 | 
 81 | $V/$(CHACHA_TEST_BIN): $(CHACHA_DIR)/test.cu $V/lib$(LIB).so
 82 | 	$(NVCC) $(CFLAGS) -L$V -l$(LIB) $< -o $@
 83 | 
 84 | $V/ecc_main.o: $(addprefix $(ECC_DIR)/,main.cu ed25519.h)
 85 | 	@mkdir -p $(@D)
 86 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 87 | 
 88 | $V/$(ECC_TEST_BIN): $V/ecc_main.o $V/lib$(LIB).so
 89 | 	$(NVCC) $(CFLAGS) -L$V -l$(LIB) $< -o $@
 90 | 
 91 | $V/ecc_scan.o: $(addprefix $(ECC_DIR)/,vanity.cu ed25519.h)
 92 | 	@mkdir -p $(@D)
 93 | 	$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
 94 | 
 95 | $V/$(ECC_SCAN_BIN): $V/ecc_scan.o $V/lib$(LIB).so
 96 | 	$(NVCC) $(CFLAGS) -L$V -l$(LIB) $< -o $@
 97 | 
 98 | .PHONY:clean
 99 | clean:
100 | 	rm -rf $V
101 | 
102 | test: $V/$(ECC_TEST_BIN) $V/$(CHACHA_TEST_BIN)
103 | 	cd $(V) && ./$(CHACHA_TEST_BIN) 64 \
104 | 	cd $(V) && ./$(ECC_TEST_BIN) 1 1 1 1 1 1
105 | 	cd $(V) && ./$(ECC_TEST_BIN) 64 1 1 1 1 0
106 | 	cd $(V) && ./$(ECC_TEST_BIN) 100201 1 1 4 10 1
107 | 


--------------------------------------------------------------------------------
/src/config.h:
--------------------------------------------------------------------------------
1 | #ifndef VANITY_CONFIG
2 | #define VANITY_CONFIG
3 | 
4 | __device__ static char const *prefixes[] = {
5 | 	"hello",
6 | };
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/aes.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the OpenSSL license (the "License").  You may not use
 5 |  * this file except in compliance with the License.  You can obtain a copy
 6 |  * in the file LICENSE in the source distribution or at
 7 |  * https://www.openssl.org/source/license.html
 8 |  */
 9 | 
10 | #ifndef HEADER_AES_H
11 | # define HEADER_AES_H
12 | 
13 | #include <inttypes.h>
14 | # include <stddef.h>
15 | # ifdef  __cplusplus
16 | extern "C" {
17 | # endif
18 | 
19 | # define AES_ENCRYPT     1
20 | # define AES_DECRYPT     0
21 | 
22 | /*
23 |  * Because array size can't be a const in C, the following two are macros.
24 |  * Both sizes are in bytes.
25 |  */
26 | # define AES_MAXNR 14
27 | # define AES_BLOCK_SIZE 16
28 | 
29 | /* This should be a hidden type, but EVP requires that the size be known */
30 | struct aes_key_st {
31 | # ifdef AES_LONG
32 |     unsigned long rd_key[4 * (AES_MAXNR + 1)];
33 | # else
34 |     unsigned int rd_key[4 * (AES_MAXNR + 1)];
35 | # endif
36 |     int rounds;
37 | };
38 | typedef struct aes_key_st AES_KEY;
39 | 
40 | const char *AES_options(void);
41 | 
42 | int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
43 |                         AES_KEY *key);
44 | int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
45 |                         AES_KEY *key);
46 | 
47 | __host__ __device__ void AES_encrypt(const unsigned char *in, unsigned char *out,
48 |                  const AES_KEY *key);
49 | 
50 | void AES_decrypt(const unsigned char *in, unsigned char *out,
51 |                  const AES_KEY *key);
52 | 
53 | void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
54 |                      const AES_KEY *key, const int enc);
55 | 
56 | void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
57 |                      size_t length, const AES_KEY *key,
58 |                      unsigned char *ivec, const int enc);
59 | 
60 | void AES_cbc_encrypt_many(const unsigned char *in, unsigned char *out,
61 |                           size_t length, const AES_KEY *key,
62 |                           unsigned char *ivec, uint32_t num_keys, float* time_us);
63 | 
64 | void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
65 |                         size_t length, const AES_KEY *key,
66 |                         unsigned char *ivec, int *num, const int enc);
67 | void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
68 |                       size_t length, const AES_KEY *key,
69 |                       unsigned char *ivec, int *num, const int enc);
70 | void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
71 |                       size_t length, const AES_KEY *key,
72 |                       unsigned char *ivec, int *num, const int enc);
73 | void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
74 |                         size_t length, const AES_KEY *key,
75 |                         unsigned char *ivec, int *num);
76 | /* NB: the IV is _two_ blocks long */
77 | void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
78 |                      size_t length, const AES_KEY *key,
79 |                      unsigned char *ivec, const int enc);
80 | /* NB: the IV is _four_ blocks long */
81 | void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
82 |                         size_t length, const AES_KEY *key,
83 |                         const AES_KEY *key2, const unsigned char *ivec,
84 |                         const int enc);
85 | 
86 | int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
87 |                  unsigned char *out,
88 |                  const unsigned char *in, unsigned int inlen);
89 | int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
90 |                    unsigned char *out,
91 |                    const unsigned char *in, unsigned int inlen);
92 | 
93 | 
94 | # ifdef  __cplusplus
95 | }
96 | # endif
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/aes_cbc.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the OpenSSL license (the "License").  You may not use
  5 |  * this file except in compliance with the License.  You can obtain a copy
  6 |  * in the file LICENSE in the source distribution or at
  7 |  * https://www.openssl.org/source/license.html
  8 |  */
  9 | 
 10 | #include <algorithm>
 11 | #include "common.cu"
 12 | #include "aes.h"
 13 | #include "modes.h"
 14 | #include "perftime.h"
 15 | #include "modes_lcl.h"
 16 | #include "aes_core.cu"
 17 | #include "gpu_common.h"
 18 | 
 19 | #if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC)
 20 | # define STRICT_ALIGNMENT 0
 21 | #endif
 22 | 
 23 | __host__ __device__ void aes_cbc128_encrypt(const unsigned char* in, unsigned char* out,
 24 |                                             uint32_t len, const AES_KEY* key,
 25 |                                             unsigned char* ivec,
 26 |                                             const u32* l_te)
 27 | {
 28 |     size_t n;
 29 |     unsigned char *iv = ivec;
 30 | 
 31 |     if (len == 0)
 32 |         return;
 33 | 
 34 | #if !defined(OPENSSL_SMALL_FOOTPRINT)
 35 |     if (STRICT_ALIGNMENT &&
 36 |         ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) {
 37 |         while (len >= 16) {
 38 |             for (n = 0; n < 16; ++n)
 39 |                 out[n] = in[n] ^ iv[n];
 40 |             AES_encrypt(out, out, key, l_te);
 41 |             iv = out;
 42 |             len -= 16;
 43 |             in += 16;
 44 |             out += 16;
 45 |         }
 46 |     } else {
 47 |         while (len >= 16) {
 48 |             for (n = 0; n < 16; n += sizeof(size_t))
 49 |                 *(size_t *)(out + n) =
 50 |                     *(size_t *)(in + n) ^ *(size_t *)(iv + n);
 51 |             AES_encrypt(out, out, key, l_te);
 52 |             iv = out;
 53 |             len -= 16;
 54 |             in += 16;
 55 |             out += 16;
 56 |         }
 57 |     }
 58 | #endif
 59 |     while (len) {
 60 |         for (n = 0; n < 16 && n < len; ++n)
 61 |             out[n] = in[n] ^ iv[n];
 62 |         for (; n < 16; ++n)
 63 |             out[n] = iv[n];
 64 |         AES_encrypt(out, out, key, l_te);
 65 |         iv = out;
 66 |         if (len <= 16)
 67 |             break;
 68 |         len -= 16;
 69 |         in += 16;
 70 |         out += 16;
 71 |     }
 72 |     memcpy(ivec, iv, 16);
 73 | }
 74 | 
 75 | void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
 76 |                            size_t len, const AES_KEY *key,
 77 |                            unsigned char ivec[16], block128_f block)
 78 | {
 79 |     size_t n;
 80 |     union {
 81 |         size_t t[16 / sizeof(size_t)];
 82 |         unsigned char c[16];
 83 |     } tmp;
 84 | 
 85 |     if (len == 0)
 86 |         return;
 87 | 
 88 | #if !defined(OPENSSL_SMALL_FOOTPRINT)
 89 |     if (in != out) {
 90 |         const unsigned char *iv = ivec;
 91 | 
 92 |         if (STRICT_ALIGNMENT &&
 93 |             ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) {
 94 |             while (len >= 16) {
 95 |                 (*block) (in, out, key);
 96 |                 for (n = 0; n < 16; ++n)
 97 |                     out[n] ^= iv[n];
 98 |                 iv = in;
 99 |                 len -= 16;
100 |                 in += 16;
101 |                 out += 16;
102 |             }
103 |         } else if (16 % sizeof(size_t) == 0) { /* always true */
104 |             while (len >= 16) {
105 |                 size_t *out_t = (size_t *)out, *iv_t = (size_t *)iv;
106 | 
107 |                 (*block) (in, out, key);
108 |                 for (n = 0; n < 16 / sizeof(size_t); n++)
109 |                     out_t[n] ^= iv_t[n];
110 |                 iv = in;
111 |                 len -= 16;
112 |                 in += 16;
113 |                 out += 16;
114 |             }
115 |         }
116 |         memcpy(ivec, iv, 16);
117 |     } else {
118 |         if (STRICT_ALIGNMENT &&
119 |             ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) {
120 |             unsigned char c;
121 |             while (len >= 16) {
122 |                 (*block) (in, tmp.c, key);
123 |                 for (n = 0; n < 16; ++n) {
124 |                     c = in[n];
125 |                     out[n] = tmp.c[n] ^ ivec[n];
126 |                     ivec[n] = c;
127 |                 }
128 |                 len -= 16;
129 |                 in += 16;
130 |                 out += 16;
131 |             }
132 |         } else if (16 % sizeof(size_t) == 0) { /* always true */
133 |             while (len >= 16) {
134 |                 size_t c, *out_t = (size_t *)out, *ivec_t = (size_t *)ivec;
135 |                 const size_t *in_t = (const size_t *)in;
136 | 
137 |                 (*block) (in, tmp.c, key);
138 |                 for (n = 0; n < 16 / sizeof(size_t); n++) {
139 |                     c = in_t[n];
140 |                     out_t[n] = tmp.t[n] ^ ivec_t[n];
141 |                     ivec_t[n] = c;
142 |                 }
143 |                 len -= 16;
144 |                 in += 16;
145 |                 out += 16;
146 |             }
147 |         }
148 |     }
149 | #endif
150 |     while (len) {
151 |         unsigned char c;
152 |         (*block) (in, tmp.c, key);
153 |         for (n = 0; n < 16 && n < len; ++n) {
154 |             c = in[n];
155 |             out[n] = tmp.c[n] ^ ivec[n];
156 |             ivec[n] = c;
157 |         }
158 |         if (len <= 16) {
159 |             for (; n < 16; ++n)
160 |                 ivec[n] = in[n];
161 |             break;
162 |         }
163 |         len -= 16;
164 |         in += 16;
165 |         out += 16;
166 |     }
167 | }
168 | 
169 | 
170 | void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
171 |                      size_t len, const AES_KEY *key,
172 |                      unsigned char *ivec, const int enc)
173 | {
174 | 
175 |     if (enc)
176 |         aes_cbc128_encrypt(in, out, len, key, ivec, g_Te0);
177 |     else
178 |         CRYPTO_cbc128_decrypt(in, out, len, key, ivec,
179 |                               (block128_f) AES_decrypt);
180 | }
181 | 
182 | __global__ void CRYPTO_cbc128_encrypt_kernel(const unsigned char* input, unsigned char* output,
183 |                                              size_t length, const AES_KEY* keys,
184 |                                              unsigned char* ivec, uint32_t num_keys,
185 |                                              unsigned char* sha_state,
186 |                                              uint32_t* sample_idx,
187 |                                              uint32_t sample_len,
188 |                                              uint32_t block_offset)
189 | {
190 |     size_t i = (size_t)(blockIdx.x * blockDim.x + threadIdx.x);
191 | 
192 | //#if 0
193 | #ifdef __CUDA_ARCH__
194 |     __shared__ u32 l_te[256];
195 |     uint32_t tid = threadIdx.x;
196 |     l_te[tid] = g_Te0[tid];
197 |     __syncthreads();
198 | #else
199 |     const u32* l_te = g_Te0;
200 | #endif
201 | 
202 |     if (i < num_keys) {
203 |         aes_cbc128_encrypt(input, &output[i * length], length, &keys[i], &ivec[i * AES_BLOCK_SIZE], l_te);
204 | 
205 |         /*for (uint32_t j = 0; j < sample_len; j++) {
206 |             if (sample_idx[j] > block_offset && sample_idx[j] < (block_offset + length)) {
207 |             }
208 |         }*/
209 |     }
210 | }
211 | 
212 | void AES_cbc_encrypt_many(const unsigned char *in, unsigned char *out,
213 |                           size_t length, const AES_KEY *keys,
214 |                           unsigned char *ivec,
215 |                           uint32_t num_keys,
216 |                           float* time_us)
217 | {
218 | 
219 |     if (length < BLOCK_SIZE) {
220 |         printf("ERROR! block size(%d) > length(%zu)\n", BLOCK_SIZE, length);
221 |         return;
222 |     }
223 |     uint8_t* in_device = NULL;
224 |     uint8_t* in_device0 = NULL;
225 |     uint8_t* in_device1 = NULL;
226 |     AES_KEY* keys_device = NULL;
227 |     uint8_t* output_device = NULL;
228 |     uint8_t* output_device0 = NULL;
229 |     uint8_t* output_device1 = NULL;
230 |     uint8_t* ivec_device = NULL;
231 | 
232 |     uint8_t* sha_state_device = NULL;
233 | 
234 |     uint32_t sample_len = 0;
235 |     uint32_t* samples_device = NULL;
236 | 
237 |     CUDA_CHK(cudaMalloc(&in_device0, BLOCK_SIZE));
238 |     CUDA_CHK(cudaMalloc(&in_device1, BLOCK_SIZE));
239 | 
240 |     size_t ctx_size = sizeof(AES_KEY) * num_keys;
241 |     CUDA_CHK(cudaMalloc(&keys_device, ctx_size));
242 |     CUDA_CHK(cudaMemcpy(keys_device, keys, ctx_size, cudaMemcpyHostToDevice));
243 | 
244 |     size_t ivec_size = AES_BLOCK_SIZE * num_keys;
245 |     CUDA_CHK(cudaMalloc(&ivec_device, ivec_size));
246 |     CUDA_CHK(cudaMemcpy(ivec_device, ivec, ivec_size, cudaMemcpyHostToDevice));
247 | 
248 |     size_t output_size = (size_t)num_keys * (size_t)BLOCK_SIZE;
249 |     CUDA_CHK(cudaMalloc(&output_device0, output_size));
250 |     CUDA_CHK(cudaMalloc(&output_device1, output_size));
251 | 
252 |     int num_threads_per_block = 256;
253 |     int num_blocks = (num_keys + num_threads_per_block - 1) / num_threads_per_block;
254 | 
255 |     perftime_t start, end;
256 | 
257 |     get_time(&start);
258 | 
259 |     cudaStream_t stream, stream0, stream1;
260 |     cudaStreamCreate(&stream0);
261 |     cudaStreamCreate(&stream1);
262 | 
263 |     ssize_t slength = length;
264 |     size_t num_data_blocks = (length + BLOCK_SIZE - 1) / (BLOCK_SIZE);
265 | 
266 |     printf("num_blocks: %d threads_per_block: %d ivec_size: %zu keys size: %zu in: %p ind0: %p ind1: %p output_size: %zu num_data_blocks: %zu\n",
267 |                     num_blocks, num_threads_per_block, ivec_size, ctx_size, in, in_device0, in_device1, output_size, num_data_blocks);
268 | 
269 |     for (uint32_t i = 0;; i++) {
270 |         //if (i & 0x1) {
271 |         if (0) {
272 |             in_device = in_device1;
273 |             output_device = output_device1;
274 |             stream = stream1;
275 |         } else {
276 |             in_device = in_device0;
277 |             output_device = output_device0;
278 |             stream = stream0;
279 |         }
280 |         size_t size = std::min(slength, (ssize_t)BLOCK_SIZE);
281 |         //printf("copying to in_device: %p in: %p size: %zu num_data_blocks: %zu\n", in_device, in, size, num_data_blocks);
282 |         CUDA_CHK(cudaMemcpyAsync(in_device, in, size, cudaMemcpyHostToDevice, stream));
283 | 
284 |         CRYPTO_cbc128_encrypt_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>(
285 |                             in_device, output_device, size,
286 |                             keys_device, ivec_device, num_keys,
287 |                             sha_state_device,
288 |                             samples_device,
289 |                             sample_len,
290 |                             i * BLOCK_SIZE);
291 | #if 0
292 |         for (uint32_t j = 0; j < num_keys; j++) {
293 |             size_t block_offset = j * length + i * BLOCK_SIZE;
294 |             size_t out_offset = j * size;
295 |             //printf("i: %d j: %d copy %zi b block offset: %zu output offset: %zu num_data_blocks: %zu\n",
296 |             //                i, j, size, block_offset, out_offset, num_data_blocks);
297 |             CUDA_CHK(cudaMemcpy(&out[block_offset], &output_device[out_offset], size, cudaMemcpyDeviceToHost));
298 |         }
299 | #endif
300 | 
301 |         slength -= BLOCK_SIZE;
302 |         in += BLOCK_SIZE;
303 |         if (slength <= 0) {
304 |             break;
305 |         }
306 |     }
307 | 
308 |     CUDA_CHK(cudaMemcpy(ivec, ivec_device, ivec_size, cudaMemcpyDeviceToHost));
309 |     get_time(&end);
310 |     *time_us = get_diff(&start, &end);
311 | 
312 |     //printf("gpu time: %f us\n", get_diff(&start, &end));
313 | }
314 | 
315 | 
316 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/aes_locl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the OpenSSL license (the "License").  You may not use
 5 |  * this file except in compliance with the License.  You can obtain a copy
 6 |  * in the file LICENSE in the source distribution or at
 7 |  * https://www.openssl.org/source/license.html
 8 |  */
 9 | 
10 | #ifndef HEADER_AES_LOCL_H
11 | # define HEADER_AES_LOCL_H
12 | 
13 | # include <stdio.h>
14 | # include <stdlib.h>
15 | # include <string.h>
16 | 
17 | #if 0
18 | # if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
19 | #  define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
20 | #  define GETU32(p) SWAP(*((u32 *)(p)))
21 | #  define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
22 | # else
23 | #  define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
24 | #  define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
25 | # endif
26 | #endif
27 | 
28 | # ifdef AES_LONG
29 | typedef unsigned long u32;
30 | # else
31 | typedef unsigned int u32;
32 | # endif
33 | typedef unsigned short u16;
34 | typedef unsigned char u8;
35 | 
36 | # define MAXKC   (256/32)
37 | # define MAXKB   (256/8)
38 | # define MAXNR   14
39 | 
40 | /* This controls loop-unrolling in aes_core.c */
41 | #ifndef __CUDA_ARCH__
42 | # define FULL_UNROLL
43 | #endif
44 | 
45 | #endif                          /* !HEADER_AES_LOCL_H */
46 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/chacha.h:
--------------------------------------------------------------------------------
 1 | #ifndef HEADER_CHACHA_H
 2 | # define HEADER_CHACHA_H
 3 | 
 4 | #include <inttypes.h>
 5 | # include <stddef.h>
 6 | # ifdef  __cplusplus
 7 | extern "C" {
 8 | # endif
 9 | 
10 | #define CHACHA_KEY_SIZE 32
11 | #define CHACHA_NONCE_SIZE 12
12 | #define CHACHA_BLOCK_SIZE 64
13 | #define CHACHA_ROUNDS 500
14 | #define SAMPLE_SIZE 32
15 | 
16 | void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
17 |                                               const uint8_t key[CHACHA_KEY_SIZE], const uint8_t nonce[CHACHA_NONCE_SIZE],
18 |                                               uint32_t counter);
19 | 
20 | void cuda_chacha20_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
21 |                                const uint8_t key[CHACHA_KEY_SIZE], uint8_t* ivec);
22 | 
23 | void chacha_ctr_encrypt_many(const unsigned char* in, unsigned char* out,
24 |                              size_t length,
25 |                              const uint8_t* keys,
26 |                              const uint8_t* nonces,
27 |                              uint32_t num_keys,
28 |                              float* time_us);
29 | 
30 | void chacha_cbc_encrypt_many(const uint8_t* in, uint8_t* out,
31 |                              size_t length, const uint8_t *keys,
32 |                              uint8_t* ivec,
33 |                              uint32_t num_keys,
34 |                              float* time_us);
35 | 
36 | void chacha_cbc_encrypt_many_sample(const uint8_t* in,
37 |                                     void* out,
38 |                                     size_t length,
39 |                                     const uint8_t *keys,
40 |                                     uint8_t* ivecs,
41 |                                     uint32_t num_keys,
42 |                                     const uint64_t* samples,
43 |                                     uint32_t num_samples,
44 |                                     uint64_t starting_block_offset,
45 |                                     float* time_us);
46 | 
47 | void chacha_end_sha_state(const void* sha_state, uint8_t* out, uint32_t num_keys);
48 | 
49 | void chacha_init_sha_state(void* sha_state, uint32_t num_keys);
50 | 
51 | # ifdef  __cplusplus
52 | }
53 | # endif
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/chacha20_core.cu:
--------------------------------------------------------------------------------
  1 | #include "chacha.h"
  2 | 
  3 | #define ROTL32(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
  4 | 
  5 | #define ROTATE(v, c) ROTL32((v), (c))
  6 | 
  7 | #define XOR(v, w) ((v) ^ (w))
  8 | 
  9 | #define PLUS(x, y) ((x) + (y))
 10 | 
 11 | #define U32TO8_LITTLE(p, v) \
 12 | { (p)[0] = ((v)      ) & 0xff; (p)[1] = ((v) >>  8) & 0xff; \
 13 |   (p)[2] = ((v) >> 16) & 0xff; (p)[3] = ((v) >> 24) & 0xff; }
 14 | 
 15 | #define U8TO32_LITTLE(p)   \
 16 |      (((u32)((p)[0])      ) | ((u32)((p)[1]) <<  8) | \
 17 |       ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24)   )
 18 | 
 19 | #define QUARTERROUND(a,b,c,d) \
 20 |   x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
 21 |   x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
 22 |   x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
 23 |   x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
 24 | 
 25 | #ifdef __CUDA_ARCH__
 26 | #define SIGMA_DEF __device__ __constant__
 27 | #else
 28 | #define SIGMA_DEF
 29 | #endif
 30 | 
 31 | // sigma contains the ChaCha constants, which happen to be an ASCII string.
 32 | static const uint8_t SIGMA_DEF sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
 33 |                                              '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
 34 | 
 35 | static void __host__ __device__ chacha20_encrypt(const u32 input[16],
 36 |                                                  unsigned char output[64],
 37 |                                                  int num_rounds)
 38 | {
 39 |     u32 x[16];
 40 |     int i;
 41 |     memcpy(x, input, sizeof(u32) * 16);
 42 |     for (i = num_rounds; i > 0; i -= 2) {
 43 |         QUARTERROUND( 0, 4, 8,12)
 44 |         QUARTERROUND( 1, 5, 9,13)
 45 |         QUARTERROUND( 2, 6,10,14)
 46 |         QUARTERROUND( 3, 7,11,15)
 47 |         QUARTERROUND( 0, 5,10,15)
 48 |         QUARTERROUND( 1, 6,11,12)
 49 |         QUARTERROUND( 2, 7, 8,13)
 50 |         QUARTERROUND( 3, 4, 9,14)
 51 |     }
 52 |     for (i = 0; i < 16; ++i) {
 53 |         x[i] = PLUS(x[i], input[i]);
 54 |     }
 55 |     for (i = 0; i < 16; ++i) {
 56 |         U32TO8_LITTLE(output + 4 * i, x[i]);
 57 |     }
 58 | }
 59 | 
 60 | void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
 61 |                                               const uint8_t key[CHACHA_KEY_SIZE],
 62 |                                               const uint8_t nonce[CHACHA_NONCE_SIZE],
 63 |                                               uint32_t counter)
 64 | {
 65 |   uint32_t input[16];
 66 |   uint8_t buf[64];
 67 |   size_t todo, i;
 68 | 
 69 |   input[0] = U8TO32_LITTLE(sigma + 0);
 70 |   input[1] = U8TO32_LITTLE(sigma + 4);
 71 |   input[2] = U8TO32_LITTLE(sigma + 8);
 72 |   input[3] = U8TO32_LITTLE(sigma + 12);
 73 | 
 74 |   input[4] = U8TO32_LITTLE(key + 0);
 75 |   input[5] = U8TO32_LITTLE(key + 4);
 76 |   input[6] = U8TO32_LITTLE(key + 8);
 77 |   input[7] = U8TO32_LITTLE(key + 12);
 78 | 
 79 |   input[8] = U8TO32_LITTLE(key + 16);
 80 |   input[9] = U8TO32_LITTLE(key + 20);
 81 |   input[10] = U8TO32_LITTLE(key + 24);
 82 |   input[11] = U8TO32_LITTLE(key + 28);
 83 | 
 84 |   input[12] = counter;
 85 |   input[13] = U8TO32_LITTLE(nonce + 0);
 86 |   input[14] = U8TO32_LITTLE(nonce + 4);
 87 |   input[15] = U8TO32_LITTLE(nonce + 8);
 88 | 
 89 |   while (in_len > 0) {
 90 |     todo = sizeof(buf);
 91 |     if (in_len < todo) {
 92 |       todo = in_len;
 93 |     }
 94 | 
 95 |     chacha20_encrypt(input, buf, CHACHA_ROUNDS);
 96 |     for (i = 0; i < todo; i++) {
 97 |       out[i] = in[i] ^ buf[i];
 98 |     }
 99 | 
100 |     out += todo;
101 |     in += todo;
102 |     in_len -= todo;
103 | 
104 |     input[12]++;
105 |   }
106 | }
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/common.cu:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | #ifndef COMMON_CU
4 | #define COMMON_CU
5 | 
6 | #define BLOCK_SIZE (4 * 1024)
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/cryptoerr.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Generated by util/mkerr.pl DO NOT EDIT
 3 |  * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
 4 |  *
 5 |  * Licensed under the OpenSSL license (the "License").  You may not use
 6 |  * this file except in compliance with the License.  You can obtain a copy
 7 |  * in the file LICENSE in the source distribution or at
 8 |  * https://www.openssl.org/source/license.html
 9 |  */
10 | 
11 | #ifndef HEADER_CRYPTOERR_H
12 | # define HEADER_CRYPTOERR_H
13 | 
14 | # ifdef  __cplusplus
15 | extern "C"
16 | # endif
17 | int ERR_load_CRYPTO_strings(void);
18 | 
19 | /*
20 |  * CRYPTO function codes.
21 |  */
22 | # define CRYPTO_F_CMAC_CTX_NEW                            120
23 | # define CRYPTO_F_CRYPTO_DUP_EX_DATA                      110
24 | # define CRYPTO_F_CRYPTO_FREE_EX_DATA                     111
25 | # define CRYPTO_F_CRYPTO_GET_EX_NEW_INDEX                 100
26 | # define CRYPTO_F_CRYPTO_MEMDUP                           115
27 | # define CRYPTO_F_CRYPTO_NEW_EX_DATA                      112
28 | # define CRYPTO_F_CRYPTO_OCB128_COPY_CTX                  121
29 | # define CRYPTO_F_CRYPTO_OCB128_INIT                      122
30 | # define CRYPTO_F_CRYPTO_SET_EX_DATA                      102
31 | # define CRYPTO_F_FIPS_MODE_SET                           109
32 | # define CRYPTO_F_GET_AND_LOCK                            113
33 | # define CRYPTO_F_OPENSSL_ATEXIT                          114
34 | # define CRYPTO_F_OPENSSL_BUF2HEXSTR                      117
35 | # define CRYPTO_F_OPENSSL_FOPEN                           119
36 | # define CRYPTO_F_OPENSSL_HEXSTR2BUF                      118
37 | # define CRYPTO_F_OPENSSL_INIT_CRYPTO                     116
38 | # define CRYPTO_F_OPENSSL_LH_NEW                          126
39 | # define CRYPTO_F_OPENSSL_SK_DEEP_COPY                    127
40 | # define CRYPTO_F_OPENSSL_SK_DUP                          128
41 | # define CRYPTO_F_PKEY_HMAC_INIT                          123
42 | # define CRYPTO_F_PKEY_POLY1305_INIT                      124
43 | # define CRYPTO_F_PKEY_SIPHASH_INIT                       125
44 | # define CRYPTO_F_SK_RESERVE                              129
45 | 
46 | /*
47 |  * CRYPTO reason codes.
48 |  */
49 | # define CRYPTO_R_FIPS_MODE_NOT_SUPPORTED                 101
50 | # define CRYPTO_R_ILLEGAL_HEX_DIGIT                       102
51 | # define CRYPTO_R_ODD_NUMBER_OF_DIGITS                    103
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/modes.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the OpenSSL license (the "License").  You may not use
  5 |  * this file except in compliance with the License.  You can obtain a copy
  6 |  * in the file LICENSE in the source distribution or at
  7 |  * https://www.openssl.org/source/license.html
  8 |  */
  9 | 
 10 | #ifndef HEADER_MODES_H
 11 | # define HEADER_MODES_H
 12 | 
 13 | # include <stddef.h>
 14 | 
 15 | # ifdef  __cplusplus
 16 | extern "C" {
 17 | # endif
 18 | typedef void (*block128_f) (const unsigned char in[16],
 19 |                             unsigned char out[16], const void *key);
 20 | 
 21 | typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out,
 22 |                           size_t len, const void *key,
 23 |                           unsigned char ivec[16], int enc);
 24 | 
 25 | typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out,
 26 |                           size_t blocks, const void *key,
 27 |                           const unsigned char ivec[16]);
 28 | 
 29 | typedef void (*ccm128_f) (const unsigned char *in, unsigned char *out,
 30 |                           size_t blocks, const void *key,
 31 |                           const unsigned char ivec[16],
 32 |                           unsigned char cmac[16]);
 33 | 
 34 | __host__ __device__ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
 35 |                            uint32_t len, const void *key,
 36 |                            unsigned char* ivec, const uint32_t* Te3);
 37 | 
 38 | void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
 39 |                            size_t len, const void *key,
 40 |                            unsigned char ivec[16], block128_f block);
 41 | 
 42 | void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
 43 |                            size_t len, const void *key,
 44 |                            unsigned char ivec[16],
 45 |                            unsigned char ecount_buf[16], unsigned int *num,
 46 |                            block128_f block);
 47 | 
 48 | void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
 49 |                                  size_t len, const void *key,
 50 |                                  unsigned char ivec[16],
 51 |                                  unsigned char ecount_buf[16],
 52 |                                  unsigned int *num, ctr128_f ctr);
 53 | 
 54 | void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
 55 |                            size_t len, const void *key,
 56 |                            unsigned char ivec[16], int *num,
 57 |                            block128_f block);
 58 | 
 59 | void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
 60 |                            size_t len, const void *key,
 61 |                            unsigned char ivec[16], int *num,
 62 |                            int enc, block128_f block);
 63 | void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
 64 |                              size_t length, const void *key,
 65 |                              unsigned char ivec[16], int *num,
 66 |                              int enc, block128_f block);
 67 | void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
 68 |                              size_t bits, const void *key,
 69 |                              unsigned char ivec[16], int *num,
 70 |                              int enc, block128_f block);
 71 | 
 72 | size_t CRYPTO_cts128_encrypt_block(const unsigned char *in,
 73 |                                    unsigned char *out, size_t len,
 74 |                                    const void *key, unsigned char ivec[16],
 75 |                                    block128_f block);
 76 | size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
 77 |                              size_t len, const void *key,
 78 |                              unsigned char ivec[16], cbc128_f cbc);
 79 | size_t CRYPTO_cts128_decrypt_block(const unsigned char *in,
 80 |                                    unsigned char *out, size_t len,
 81 |                                    const void *key, unsigned char ivec[16],
 82 |                                    block128_f block);
 83 | size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
 84 |                              size_t len, const void *key,
 85 |                              unsigned char ivec[16], cbc128_f cbc);
 86 | 
 87 | size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in,
 88 |                                        unsigned char *out, size_t len,
 89 |                                        const void *key,
 90 |                                        unsigned char ivec[16],
 91 |                                        block128_f block);
 92 | size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
 93 |                                  size_t len, const void *key,
 94 |                                  unsigned char ivec[16], cbc128_f cbc);
 95 | size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in,
 96 |                                        unsigned char *out, size_t len,
 97 |                                        const void *key,
 98 |                                        unsigned char ivec[16],
 99 |                                        block128_f block);
100 | size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
101 |                                  size_t len, const void *key,
102 |                                  unsigned char ivec[16], cbc128_f cbc);
103 | 
104 | typedef struct gcm128_context GCM128_CONTEXT;
105 | 
106 | GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
107 | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block);
108 | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
109 |                          size_t len);
110 | int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
111 |                       size_t len);
112 | int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
113 |                           const unsigned char *in, unsigned char *out,
114 |                           size_t len);
115 | int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
116 |                           const unsigned char *in, unsigned char *out,
117 |                           size_t len);
118 | int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
119 |                                 const unsigned char *in, unsigned char *out,
120 |                                 size_t len, ctr128_f stream);
121 | int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
122 |                                 const unsigned char *in, unsigned char *out,
123 |                                 size_t len, ctr128_f stream);
124 | int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
125 |                          size_t len);
126 | void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
127 | void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
128 | 
129 | typedef struct ccm128_context CCM128_CONTEXT;
130 | 
131 | void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
132 |                         unsigned int M, unsigned int L, void *key,
133 |                         block128_f block);
134 | int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, const unsigned char *nonce,
135 |                         size_t nlen, size_t mlen);
136 | void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, const unsigned char *aad,
137 |                        size_t alen);
138 | int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
139 |                           unsigned char *out, size_t len);
140 | int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
141 |                           unsigned char *out, size_t len);
142 | int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
143 |                                 unsigned char *out, size_t len,
144 |                                 ccm128_f stream);
145 | int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
146 |                                 unsigned char *out, size_t len,
147 |                                 ccm128_f stream);
148 | size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
149 | 
150 | typedef struct xts128_context XTS128_CONTEXT;
151 | 
152 | int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
153 |                           const unsigned char iv[16],
154 |                           const unsigned char *inp, unsigned char *out,
155 |                           size_t len, int enc);
156 | 
157 | size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
158 |                        unsigned char *out,
159 |                        const unsigned char *in, size_t inlen,
160 |                        block128_f block);
161 | 
162 | size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
163 |                          unsigned char *out,
164 |                          const unsigned char *in, size_t inlen,
165 |                          block128_f block);
166 | size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
167 |                            unsigned char *out, const unsigned char *in,
168 |                            size_t inlen, block128_f block);
169 | size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
170 |                              unsigned char *out, const unsigned char *in,
171 |                              size_t inlen, block128_f block);
172 | 
173 | # ifndef OPENSSL_NO_OCB
174 | typedef struct ocb128_context OCB128_CONTEXT;
175 | 
176 | typedef void (*ocb128_f) (const unsigned char *in, unsigned char *out,
177 |                           size_t blocks, const void *key,
178 |                           size_t start_block_num,
179 |                           unsigned char offset_i[16],
180 |                           const unsigned char L_[][16],
181 |                           unsigned char checksum[16]);
182 | 
183 | OCB128_CONTEXT *CRYPTO_ocb128_new(void *keyenc, void *keydec,
184 |                                   block128_f encrypt, block128_f decrypt,
185 |                                   ocb128_f stream);
186 | int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
187 |                        block128_f encrypt, block128_f decrypt,
188 |                        ocb128_f stream);
189 | int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src,
190 |                            void *keyenc, void *keydec);
191 | int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
192 |                         size_t len, size_t taglen);
193 | int CRYPTO_ocb128_aad(OCB128_CONTEXT *ctx, const unsigned char *aad,
194 |                       size_t len);
195 | int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx, const unsigned char *in,
196 |                           unsigned char *out, size_t len);
197 | int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx, const unsigned char *in,
198 |                           unsigned char *out, size_t len);
199 | int CRYPTO_ocb128_finish(OCB128_CONTEXT *ctx, const unsigned char *tag,
200 |                          size_t len);
201 | int CRYPTO_ocb128_tag(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len);
202 | void CRYPTO_ocb128_cleanup(OCB128_CONTEXT *ctx);
203 | # endif                          /* OPENSSL_NO_OCB */
204 | 
205 | # ifdef  __cplusplus
206 | }
207 | # endif
208 | 
209 | #endif
210 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/modes_lcl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the OpenSSL license (the "License").  You may not use
  5 |  * this file except in compliance with the License.  You can obtain a copy
  6 |  * in the file LICENSE in the source distribution or at
  7 |  * https://www.openssl.org/source/license.html
  8 |  */
  9 | 
 10 | #include "modes.h"
 11 | 
 12 | #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
 13 | typedef __int64 i64;
 14 | typedef unsigned __int64 u64;
 15 | # define U64(C) C##UI64
 16 | #elif defined(__arch64__)
 17 | typedef long i64;
 18 | typedef unsigned long u64;
 19 | # define U64(C) C##UL
 20 | #else
 21 | typedef long long i64;
 22 | typedef unsigned long long u64;
 23 | # define U64(C) C##ULL
 24 | #endif
 25 | 
 26 | typedef unsigned int u32;
 27 | typedef unsigned char u8;
 28 | 
 29 | #define STRICT_ALIGNMENT 1
 30 | #ifndef PEDANTIC
 31 | # if defined(__i386)    || defined(__i386__)    || \
 32 |      defined(__x86_64)  || defined(__x86_64__)  || \
 33 |      defined(_M_IX86)   || defined(_M_AMD64)    || defined(_M_X64) || \
 34 |      defined(__aarch64__)                       || \
 35 |      defined(__s390__)  || defined(__s390x__)
 36 | #  undef STRICT_ALIGNMENT
 37 | # endif
 38 | #endif
 39 | 
 40 | #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 41 | 
 42 | # if defined(__CUDA_ARCH__)
 43 | #  undef STRICT_ALIGNMENT
 44 | #  define BSWAP4(x) __byte_perm(x, 0, 0x123)
 45 | 
 46 | # elif defined(__GNUC__) && __GNUC__>=2
 47 | 
 48 | #  if defined(__x86_64) || defined(__x86_64__)
 49 | #   define BSWAP8(x) ({ u64 ret_=(x);                   \
 50 |                         asm ("bswapq %0"                \
 51 |                         : "+r"(ret_));   ret_;          })
 52 | #   define BSWAP4(x) ({ u32 ret_=(x);                   \
 53 |                         asm ("bswapl %0"                \
 54 |                         : "+r"(ret_));   ret_;          })
 55 | #  elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)
 56 | #   define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x);   \
 57 |                         asm ("bswapl %0; bswapl %1"     \
 58 |                         : "+r"(hi_),"+r"(lo_));         \
 59 |                         (u64)hi_<<32|lo_;               })
 60 | #   define BSWAP4(x) ({ u32 ret_=(x);                   \
 61 |                         asm ("bswapl %0"                \
 62 |                         : "+r"(ret_));   ret_;          })
 63 | #  elif defined(__aarch64__)
 64 | #   define BSWAP8(x) ({ u64 ret_;                       \
 65 |                         asm ("rev %0,%1"                \
 66 |                         : "=r"(ret_) : "r"(x)); ret_;   })
 67 | #   define BSWAP4(x) ({ u32 ret_;                       \
 68 |                         asm ("rev %w0,%w1"              \
 69 |                         : "=r"(ret_) : "r"(x)); ret_;   })
 70 | #  elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT)
 71 | #   define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x);   \
 72 |                         asm ("rev %0,%0; rev %1,%1"     \
 73 |                         : "+r"(hi_),"+r"(lo_));         \
 74 |                         (u64)hi_<<32|lo_;               })
 75 | #   define BSWAP4(x) ({ u32 ret_;                       \
 76 |                         asm ("rev %0,%1"                \
 77 |                         : "=r"(ret_) : "r"((u32)(x)));  \
 78 |                         ret_;                           })
 79 | #  endif
 80 | 
 81 | # elif defined(_MSC_VER)
 82 | #  if _MSC_VER>=1300
 83 | #   include <stdlib.h>
 84 | #   pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
 85 | #   define BSWAP8(x)    _byteswap_uint64((u64)(x))
 86 | #   define BSWAP4(x)    _byteswap_ulong((u32)(x))
 87 | 
 88 | #  elif defined(_M_IX86)
 89 | __inline u32 _bswap4(u32 val)
 90 | {
 91 | _asm mov eax, val _asm bswap eax}
 92 | #   define BSWAP4(x)    _bswap4(x)
 93 | 
 94 | #  endif // MSC_VER > 1300
 95 | # endif // def(MSC_VER)
 96 | #endif
 97 | 
 98 | #if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
 99 | # define GETU32(p)       BSWAP4(*(const u32 *)(p))
100 | # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
101 | #else
102 | #  define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
103 | #  define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
104 | #endif
105 | /*- GCM definitions */ typedef struct {
106 |     u64 hi, lo;
107 | } u128;
108 | 
109 | #ifdef  TABLE_BITS
110 | # undef  TABLE_BITS
111 | #endif
112 | /*
113 |  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
114 |  * never be set to 8 [or 1]. For further information see gcm128.c.
115 |  */
116 | #define TABLE_BITS 4
117 | 
118 | struct gcm128_context {
119 |     /* Following 6 names follow names in GCM specification */
120 |     union {
121 |         u64 u[2];
122 |         u32 d[4];
123 |         u8 c[16];
124 |         size_t t[16 / sizeof(size_t)];
125 |     } Yi, EKi, EK0, len, Xi, H;
126 |     /*
127 |      * Relative position of Xi, H and pre-computed Htable is used in some
128 |      * assembler modules, i.e. don't change the order!
129 |      */
130 | #if TABLE_BITS==8
131 |     u128 Htable[256];
132 | #else
133 |     u128 Htable[16];
134 |     void (*gmult) (u64 Xi[2], const u128 Htable[16]);
135 |     void (*ghash) (u64 Xi[2], const u128 Htable[16], const u8 *inp,
136 |                    size_t len);
137 | #endif
138 |     unsigned int mres, ares;
139 |     block128_f block;
140 |     void *key;
141 | #if !defined(OPENSSL_SMALL_FOOTPRINT)
142 |     unsigned char Xn[48];
143 | #endif
144 | };
145 | 
146 | struct xts128_context {
147 |     void *key1, *key2;
148 |     block128_f block1, block2;
149 | };
150 | 
151 | struct ccm128_context {
152 |     union {
153 |         u64 u[2];
154 |         u8 c[16];
155 |     } nonce, cmac;
156 |     u64 blocks;
157 |     block128_f block;
158 |     void *key;
159 | };
160 | 
161 | #ifndef OPENSSL_NO_OCB
162 | 
163 | typedef union {
164 |     u64 a[2];
165 |     unsigned char c[16];
166 | } OCB_BLOCK;
167 | # define ocb_block16_xor(in1,in2,out) \
168 |     ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
169 |       (out)->a[1]=(in1)->a[1]^(in2)->a[1] )
170 | # if STRICT_ALIGNMENT
171 | #  define ocb_block16_xor_misaligned(in1,in2,out) \
172 |     ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
173 | # else
174 | #  define ocb_block16_xor_misaligned ocb_block16_xor
175 | # endif
176 | 
177 | struct ocb128_context {
178 |     /* Need both encrypt and decrypt key schedules for decryption */
179 |     block128_f encrypt;
180 |     block128_f decrypt;
181 |     void *keyenc;
182 |     void *keydec;
183 |     ocb128_f stream;    /* direction dependent */
184 |     /* Key dependent variables. Can be reused if key remains the same */
185 |     size_t l_index;
186 |     size_t max_l_index;
187 |     OCB_BLOCK l_star;
188 |     OCB_BLOCK l_dollar;
189 |     OCB_BLOCK *l;
190 |     /* Must be reset for each session */
191 |     struct {
192 |         u64 blocks_hashed;
193 |         u64 blocks_processed;
194 |         OCB_BLOCK offset_aad;
195 |         OCB_BLOCK sum;
196 |         OCB_BLOCK offset;
197 |         OCB_BLOCK checksum;
198 |     } sess;
199 | };
200 | #endif                          /* OPENSSL_NO_OCB */
201 | 


--------------------------------------------------------------------------------
/src/cuda-crypt/perftime.h:
--------------------------------------------------------------------------------
 1 | #ifndef PERFTIME_H
 2 | #define PERFTIME_H
 3 | 
 4 | #ifdef USE_RDTSC
 5 | static inline uint64_t rdtsc()
 6 | {
 7 |     unsigned int hi, lo;
 8 |     __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi));
 9 |     return ((uint64_t)hi << 32) | lo;
10 | }
11 | 
12 | typedef struct {
13 |     uint64_t count;
14 | } perftime_t;
15 | 
16 | #elif defined(USE_CLOCK_GETTIME)
17 | #include <time.h>
18 | typedef struct timespec perftime_t;
19 | #else
20 | #include <sys/time.h>
21 | typedef struct timeval perftime_t;
22 | #endif
23 | 
24 | static int get_time(perftime_t* t) {
25 | #ifdef USE_RDTSC
26 |     t->count = rdtsc();
27 |     return 0;
28 | #elif defined(USE_CLOCK_GETTIME)
29 |     return clock_gettime(CLOCK_MONOTONIC_RAW, t);
30 |     //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t);
31 | #else
32 |     return gettimeofday(t, NULL /* timezone */);
33 | #endif
34 | }
35 | 
36 | static double get_us(const perftime_t* time) {
37 | #ifdef USE_RDTSC
38 |     return time->count;
39 | #elif defined(USE_CLOCK_GETTIME)
40 |     return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000);
41 | #else
42 |     return (time->tv_usec + (double)time->tv_sec * 1000000);
43 | #endif
44 | }
45 | 
46 | static double get_diff(const perftime_t* start, const perftime_t* end) {
47 |     return get_us(end) - get_us(start);
48 | }
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/common.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef COMMON_CU
 3 | #define COMMON_CU
 4 | 
 5 | static uint64_t __host__ __device__ load_3(const unsigned char *in) {
 6 |     uint64_t result;
 7 | 
 8 |     result = (uint64_t) in[0];
 9 |     result |= ((uint64_t) in[1]) << 8;
10 |     result |= ((uint64_t) in[2]) << 16;
11 | 
12 |     return result;
13 | }
14 | 
15 | static uint64_t __host__ __device__ load_4(const unsigned char *in) {
16 |     uint64_t result;
17 | 
18 |     result = (uint64_t) in[0];
19 |     result |= ((uint64_t) in[1]) << 8;
20 |     result |= ((uint64_t) in[2]) << 16;
21 |     result |= ((uint64_t) in[3]) << 24;
22 |     
23 |     return result;
24 | }
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/ed25519.h:
--------------------------------------------------------------------------------
 1 | #ifndef ED25519_H
 2 | #define ED25519_H
 3 | 
 4 | #include <stddef.h>
 5 | #include <inttypes.h>
 6 | 
 7 | #if defined(_WIN32)
 8 |     #if defined(ED25519_BUILD_DLL)
 9 |         #define ED25519_DECLSPEC __declspec(dllexport)
10 |     #elif defined(ED25519_DLL)
11 |         #define ED25519_DECLSPEC __declspec(dllimport)
12 |     #else
13 |         #define ED25519_DECLSPEC
14 |     #endif
15 | #else
16 |     #define ED25519_DECLSPEC
17 | #endif
18 | 
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 | 
24 | #ifndef ED25519_NO_SEED
25 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed);
26 | #endif
27 | 
28 | #define PUB_KEY_SIZE 32
29 | #define PRIV_KEY_SIZE 64
30 | #define SEED_SIZE 32
31 | #define SCALAR_SIZE 32
32 | #define SIG_SIZE 64
33 | 
34 | typedef struct {
35 |     uint8_t* elems;
36 |     uint32_t num;
37 | } gpu_Elems;
38 | 
39 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed);
40 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key);
41 | 
42 | void ED25519_DECLSPEC ed25519_sign_many(const gpu_Elems* elems,
43 |                                         uint32_t num_elems,
44 |                                         uint32_t message_size,
45 |                                         uint32_t total_packets,
46 |                                         uint32_t total_signatures,
47 |                                         const uint32_t* message_lens,
48 |                                         const uint32_t* public_key_offsets,
49 |                                         const uint32_t* private_key_offsets,
50 |                                         const uint32_t* message_start_offsets,
51 |                                         uint8_t* signatures_out,
52 |                                         uint8_t use_non_default_stream);
53 | 
54 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, uint32_t message_len, const unsigned char *public_key);
55 | 
56 | void ED25519_DECLSPEC ed25519_verify_many(const gpu_Elems* elems,
57 |                                           uint32_t num_elems,
58 |                                           uint32_t message_size,
59 |                                           uint32_t total_packets,
60 |                                           uint32_t total_signatures,
61 |                                           const uint32_t* message_lens,
62 |                                           const uint32_t* public_key_offsets,
63 |                                           const uint32_t* private_key_offsets,
64 |                                           const uint32_t* message_start_offsets,
65 |                                           uint8_t* out,
66 |                                           uint8_t use_non_default_stream);
67 | 
68 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar);
69 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key);
70 | void ED25519_DECLSPEC ed25519_set_verbose(bool val);
71 | 
72 | const char* ED25519_DECLSPEC ed25519_license();
73 | bool ED25519_DECLSPEC ed25519_init();
74 | 
75 | int cuda_host_register(void* ptr, size_t size, unsigned int flags);
76 | int cuda_host_unregister(void* ptr);
77 | 
78 | #ifdef __cplusplus
79 | }
80 | #endif
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/fe.h:
--------------------------------------------------------------------------------
 1 | #ifndef FE_H
 2 | #define FE_H
 3 | 
 4 | #include "fixedint.h"
 5 | 
 6 | 
 7 | /*
 8 |     fe means field element.
 9 |     Here the field is \Z/(2^255-19).
10 |     An element t, entries t[0]...t[9], represents the integer
11 |     t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
12 |     Bounds on each t[i] vary depending on context.
13 | */
14 | 
15 | 
16 | typedef int32_t fe[10];
17 | 
18 | 
19 | void __host__ __device__ fe_0(fe h);
20 | void __device__ __host__ fe_1(fe h);
21 | 
22 | void __device__ __host__ fe_frombytes(fe h, const unsigned char *s);
23 | void __device__ __host__ fe_tobytes(unsigned char *s, const fe h);
24 | 
25 | void __host__ __device__ fe_copy(fe h, const fe f);
26 | int __host__ __device__ fe_isnegative(const fe f);
27 | int __device__ __host__ fe_isnonzero(const fe f);
28 | void __host__ __device__ fe_cmov(fe f, const fe g, unsigned int b);
29 | void fe_cswap(fe f, fe g, unsigned int b);
30 | 
31 | void __device__ __host__ fe_neg(fe h, const fe f);
32 | void __device__ __host__ fe_add(fe h, const fe f, const fe g);
33 | void __device__ __host__ fe_invert(fe out, const fe z);
34 | void __device__ __host__ fe_sq(fe h, const fe f);
35 | void __host__ __device__ fe_sq2(fe h, const fe f);
36 | void __device__ __host__ fe_mul(fe h, const fe f, const fe g);
37 | void fe_mul121666(fe h, fe f);
38 | void __device__ __host__ fe_pow22523(fe out, const fe z);
39 | void __device__ __host__ fe_sub(fe h, const fe f, const fe g);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/fixedint.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Portable header to provide the 32 and 64 bits type.
 3 | 
 4 |     Not a compatible replacement for <stdint.h>, do not blindly use it as such.
 5 | */
 6 | 
 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED)
 8 |     #include <stdint.h>
 9 |     #define FIXEDINT_H_INCLUDED
10 | 
11 |     #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C)
12 |         #include <limits.h>
13 |         #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
14 |     #endif
15 | #endif
16 | 
17 | 
18 | #ifndef FIXEDINT_H_INCLUDED
19 |     #define FIXEDINT_H_INCLUDED
20 |     
21 |     #include <limits.h>
22 | 
23 |     /* (u)int32_t */
24 |     #ifndef uint32_t
25 |         #if (ULONG_MAX == 0xffffffffUL)
26 |             typedef unsigned long uint32_t;
27 |         #elif (UINT_MAX == 0xffffffffUL)
28 |             typedef unsigned int uint32_t;
29 |         #elif (USHRT_MAX == 0xffffffffUL)
30 |             typedef unsigned short uint32_t;
31 |         #endif
32 |     #endif
33 | 
34 | 
35 |     #ifndef int32_t
36 |         #if (LONG_MAX == 0x7fffffffL)
37 |             typedef signed long int32_t;
38 |         #elif (INT_MAX == 0x7fffffffL)
39 |             typedef signed int int32_t;
40 |         #elif (SHRT_MAX == 0x7fffffffL)
41 |             typedef signed short int32_t;
42 |         #endif
43 |     #endif
44 | 
45 | 
46 |     /* (u)int64_t */
47 |     #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L)
48 |         typedef long long int64_t;
49 |         typedef unsigned long long uint64_t;
50 | 
51 |         #define UINT64_C(v) v ##ULL
52 |         #define INT64_C(v) v ##LL
53 |     #elif defined(__GNUC__)
54 |         __extension__ typedef long long int64_t;
55 |         __extension__ typedef unsigned long long uint64_t;
56 | 
57 |         #define UINT64_C(v) v ##ULL
58 |         #define INT64_C(v) v ##LL
59 |     #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC)
60 |         typedef long long int64_t;
61 |         typedef unsigned long long uint64_t;
62 | 
63 |         #define UINT64_C(v) v ##ULL
64 |         #define INT64_C(v) v ##LL
65 |     #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
66 |         typedef __int64 int64_t;
67 |         typedef unsigned __int64 uint64_t;
68 | 
69 |         #define UINT64_C(v) v ##UI64
70 |         #define INT64_C(v) v ##I64
71 |     #endif
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/ge.cu:
--------------------------------------------------------------------------------
  1 | #include "ge.h"
  2 | #include "precomp_data.h"
  3 | 
  4 | 
  5 | /*
  6 | r = p + q
  7 | */
  8 | 
  9 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
 10 |     fe t0;
 11 |     fe_add(r->X, p->Y, p->X);
 12 |     fe_sub(r->Y, p->Y, p->X);
 13 |     fe_mul(r->Z, r->X, q->YplusX);
 14 |     fe_mul(r->Y, r->Y, q->YminusX);
 15 |     fe_mul(r->T, q->T2d, p->T);
 16 |     fe_mul(r->X, p->Z, q->Z);
 17 |     fe_add(t0, r->X, r->X);
 18 |     fe_sub(r->X, r->Z, r->Y);
 19 |     fe_add(r->Y, r->Z, r->Y);
 20 |     fe_add(r->Z, t0, r->T);
 21 |     fe_sub(r->T, t0, r->T);
 22 | }
 23 | 
 24 | 
 25 | static void __host__ __device__ slide(signed char *r, const unsigned char *a) {
 26 |     int i;
 27 |     int b;
 28 |     int k;
 29 | 
 30 |     for (i = 0; i < 256; ++i) {
 31 |         r[i] = 1 & (a[i >> 3] >> (i & 7));
 32 |     }
 33 | 
 34 |     for (i = 0; i < 256; ++i)
 35 |         if (r[i]) {
 36 |             for (b = 1; b <= 6 && i + b < 256; ++b) {
 37 |                 if (r[i + b]) {
 38 |                     if (r[i] + (r[i + b] << b) <= 15) {
 39 |                         r[i] += r[i + b] << b;
 40 |                         r[i + b] = 0;
 41 |                     } else if (r[i] - (r[i + b] << b) >= -15) {
 42 |                         r[i] -= r[i + b] << b;
 43 | 
 44 |                         for (k = i + b; k < 256; ++k) {
 45 |                             if (!r[k]) {
 46 |                                 r[k] = 1;
 47 |                                 break;
 48 |                             }
 49 | 
 50 |                             r[k] = 0;
 51 |                         }
 52 |                     } else {
 53 |                         break;
 54 |                     }
 55 |                 }
 56 |             }
 57 |         }
 58 | }
 59 | 
 60 | /*
 61 | r = a * A + b * B
 62 | where a = a[0]+256*a[1]+...+256^31 a[31].
 63 | and b = b[0]+256*b[1]+...+256^31 b[31].
 64 | B is the Ed25519 base point (x,4/5) with x positive.
 65 | */
 66 | 
 67 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) {
 68 |     signed char aslide[256];
 69 |     signed char bslide[256];
 70 |     ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
 71 |     ge_p1p1 t;
 72 |     ge_p3 u;
 73 |     ge_p3 A2;
 74 |     int i;
 75 |     slide(aslide, a);
 76 |     slide(bslide, b);
 77 |     ge_p3_to_cached(&Ai[0], A);
 78 |     ge_p3_dbl(&t, A);
 79 |     ge_p1p1_to_p3(&A2, &t);
 80 |     ge_add(&t, &A2, &Ai[0]);
 81 |     ge_p1p1_to_p3(&u, &t);
 82 |     ge_p3_to_cached(&Ai[1], &u);
 83 |     ge_add(&t, &A2, &Ai[1]);
 84 |     ge_p1p1_to_p3(&u, &t);
 85 |     ge_p3_to_cached(&Ai[2], &u);
 86 |     ge_add(&t, &A2, &Ai[2]);
 87 |     ge_p1p1_to_p3(&u, &t);
 88 |     ge_p3_to_cached(&Ai[3], &u);
 89 |     ge_add(&t, &A2, &Ai[3]);
 90 |     ge_p1p1_to_p3(&u, &t);
 91 |     ge_p3_to_cached(&Ai[4], &u);
 92 |     ge_add(&t, &A2, &Ai[4]);
 93 |     ge_p1p1_to_p3(&u, &t);
 94 |     ge_p3_to_cached(&Ai[5], &u);
 95 |     ge_add(&t, &A2, &Ai[5]);
 96 |     ge_p1p1_to_p3(&u, &t);
 97 |     ge_p3_to_cached(&Ai[6], &u);
 98 |     ge_add(&t, &A2, &Ai[6]);
 99 |     ge_p1p1_to_p3(&u, &t);
100 |     ge_p3_to_cached(&Ai[7], &u);
101 |     ge_p2_0(r);
102 | 
103 |     for (i = 255; i >= 0; --i) {
104 |         if (aslide[i] || bslide[i]) {
105 |             break;
106 |         }
107 |     }
108 | 
109 |     for (; i >= 0; --i) {
110 |         ge_p2_dbl(&t, r);
111 | 
112 |         if (aslide[i] > 0) {
113 |             ge_p1p1_to_p3(&u, &t);
114 |             ge_add(&t, &u, &Ai[aslide[i] / 2]);
115 |         } else if (aslide[i] < 0) {
116 |             ge_p1p1_to_p3(&u, &t);
117 |             ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
118 |         }
119 | 
120 |         if (bslide[i] > 0) {
121 |             ge_p1p1_to_p3(&u, &t);
122 |             ge_madd(&t, &u, &Bi[bslide[i] / 2]);
123 |         } else if (bslide[i] < 0) {
124 |             ge_p1p1_to_p3(&u, &t);
125 |             ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
126 |         }
127 | 
128 |         ge_p1p1_to_p2(r, &t);
129 |     }
130 | }
131 | 
132 | 
133 | static const __device__ fe d = {
134 |     -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
135 | };
136 | 
137 | static const __device__ fe sqrtm1 = {
138 |     -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
139 | };
140 | 
141 | int __device__ __host__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) {
142 |     fe u;
143 |     fe v;
144 |     fe v3;
145 |     fe vxx;
146 |     fe check;
147 |     fe_frombytes(h->Y, s);
148 |     fe_1(h->Z);
149 |     fe_sq(u, h->Y);
150 |     fe_mul(v, u, d);
151 |     fe_sub(u, u, h->Z);     /* u = y^2-1 */
152 |     fe_add(v, v, h->Z);     /* v = dy^2+1 */
153 |     fe_sq(v3, v);
154 |     fe_mul(v3, v3, v);      /* v3 = v^3 */
155 |     fe_sq(h->X, v3);
156 |     fe_mul(h->X, h->X, v);
157 |     fe_mul(h->X, h->X, u);  /* x = uv^7 */
158 |     fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
159 |     fe_mul(h->X, h->X, v3);
160 |     fe_mul(h->X, h->X, u);  /* x = uv^3(uv^7)^((q-5)/8) */
161 |     fe_sq(vxx, h->X);
162 |     fe_mul(vxx, vxx, v);
163 |     fe_sub(check, vxx, u);  /* vx^2-u */
164 | 
165 |     if (fe_isnonzero(check)) {
166 |         fe_add(check, vxx, u); /* vx^2+u */
167 | 
168 |         if (fe_isnonzero(check)) {
169 |             return -1;
170 |         }
171 | 
172 |         fe_mul(h->X, h->X, sqrtm1);
173 |     }
174 | 
175 |     if (fe_isnegative(h->X) == (s[31] >> 7)) {
176 |         fe_neg(h->X, h->X);
177 |     }
178 | 
179 |     fe_mul(h->T, h->X, h->Y);
180 |     return 0;
181 | }
182 | 
183 | 
184 | /*
185 | r = p + q
186 | */
187 | 
188 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
189 |     fe t0;
190 |     fe_add(r->X, p->Y, p->X);
191 |     fe_sub(r->Y, p->Y, p->X);
192 |     fe_mul(r->Z, r->X, q->yplusx);
193 |     fe_mul(r->Y, r->Y, q->yminusx);
194 |     fe_mul(r->T, q->xy2d, p->T);
195 |     fe_add(t0, p->Z, p->Z);
196 |     fe_sub(r->X, r->Z, r->Y);
197 |     fe_add(r->Y, r->Z, r->Y);
198 |     fe_add(r->Z, t0, r->T);
199 |     fe_sub(r->T, t0, r->T);
200 | }
201 | 
202 | 
203 | /*
204 | r = p - q
205 | */
206 | 
207 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
208 |     fe t0;
209 | 
210 |     fe_add(r->X, p->Y, p->X);
211 |     fe_sub(r->Y, p->Y, p->X);
212 |     fe_mul(r->Z, r->X, q->yminusx);
213 |     fe_mul(r->Y, r->Y, q->yplusx);
214 |     fe_mul(r->T, q->xy2d, p->T);
215 |     fe_add(t0, p->Z, p->Z);
216 |     fe_sub(r->X, r->Z, r->Y);
217 |     fe_add(r->Y, r->Z, r->Y);
218 |     fe_sub(r->Z, t0, r->T);
219 |     fe_add(r->T, t0, r->T);
220 | }
221 | 
222 | 
223 | /*
224 | r = p
225 | */
226 | 
227 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
228 |     fe_mul(r->X, p->X, p->T);
229 |     fe_mul(r->Y, p->Y, p->Z);
230 |     fe_mul(r->Z, p->Z, p->T);
231 | }
232 | 
233 | 
234 | 
235 | /*
236 | r = p
237 | */
238 | 
239 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
240 |     fe_mul(r->X, p->X, p->T);
241 |     fe_mul(r->Y, p->Y, p->Z);
242 |     fe_mul(r->Z, p->Z, p->T);
243 |     fe_mul(r->T, p->X, p->Y);
244 | }
245 | 
246 | 
247 | void __host__ __device__ ge_p2_0(ge_p2 *h) {
248 |     fe_0(h->X);
249 |     fe_1(h->Y);
250 |     fe_1(h->Z);
251 | }
252 | 
253 | 
254 | 
255 | /*
256 | r = 2 * p
257 | */
258 | 
259 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
260 |     fe t0;
261 | 
262 |     fe_sq(r->X, p->X);
263 |     fe_sq(r->Z, p->Y);
264 |     fe_sq2(r->T, p->Z);
265 |     fe_add(r->Y, p->X, p->Y);
266 |     fe_sq(t0, r->Y);
267 |     fe_add(r->Y, r->Z, r->X);
268 |     fe_sub(r->Z, r->Z, r->X);
269 |     fe_sub(r->X, t0, r->Y);
270 |     fe_sub(r->T, r->T, r->Z);
271 | }
272 | 
273 | 
274 | void __host__ __device__ ge_p3_0(ge_p3 *h) {
275 |     fe_0(h->X);
276 |     fe_1(h->Y);
277 |     fe_1(h->Z);
278 |     fe_0(h->T);
279 | }
280 | 
281 | 
282 | /*
283 | r = 2 * p
284 | */
285 | 
286 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
287 |     ge_p2 q;
288 |     ge_p3_to_p2(&q, p);
289 |     ge_p2_dbl(r, &q);
290 | }
291 | 
292 | 
293 | 
294 | /*
295 | r = p
296 | */
297 | 
298 | static const __device__ fe d2 = {
299 |     -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199
300 | };
301 | 
302 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
303 |     fe_add(r->YplusX, p->Y, p->X);
304 |     fe_sub(r->YminusX, p->Y, p->X);
305 |     fe_copy(r->Z, p->Z);
306 |     fe_mul(r->T2d, p->T, d2);
307 | }
308 | 
309 | 
310 | /*
311 | r = p
312 | */
313 | 
314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
315 |     fe_copy(r->X, p->X);
316 |     fe_copy(r->Y, p->Y);
317 |     fe_copy(r->Z, p->Z);
318 | }
319 | 
320 | 
321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) {
322 |     fe recip;
323 |     fe x;
324 |     fe y;
325 |     fe_invert(recip, h->Z);
326 |     fe_mul(x, h->X, recip);
327 |     fe_mul(y, h->Y, recip);
328 |     fe_tobytes(s, y);
329 |     s[31] ^= fe_isnegative(x) << 7;
330 | }
331 | 
332 | 
333 | static unsigned char __host__ __device__ equal(signed char b, signed char c) {
334 |     unsigned char ub = b;
335 |     unsigned char uc = c;
336 |     unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
337 |     uint64_t y = x; /* 0: yes; 1..255: no */
338 |     y -= 1; /* large: yes; 0..254: no */
339 |     y >>= 63; /* 1: yes; 0: no */
340 |     return (unsigned char) y;
341 | }
342 | 
343 | static unsigned char __host__ __device__ negative(signed char b) {
344 |     uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
345 |     x >>= 63; /* 1: yes; 0: no */
346 |     return (unsigned char) x;
347 | }
348 | 
349 | static void __host__ __device__ cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) {
350 |     fe_cmov(t->yplusx, u->yplusx, b);
351 |     fe_cmov(t->yminusx, u->yminusx, b);
352 |     fe_cmov(t->xy2d, u->xy2d, b);
353 | }
354 | 
355 | 
356 | static void __host__ __device__ select(ge_precomp *t, int pos, signed char b) {
357 |     ge_precomp minust;
358 |     unsigned char bnegative = negative(b);
359 |     unsigned char babs = b - (((-bnegative) & b) << 1);
360 |     fe_1(t->yplusx);
361 |     fe_1(t->yminusx);
362 |     fe_0(t->xy2d);
363 |     cmov(t, &base[pos][0], equal(babs, 1));
364 |     cmov(t, &base[pos][1], equal(babs, 2));
365 |     cmov(t, &base[pos][2], equal(babs, 3));
366 |     cmov(t, &base[pos][3], equal(babs, 4));
367 |     cmov(t, &base[pos][4], equal(babs, 5));
368 |     cmov(t, &base[pos][5], equal(babs, 6));
369 |     cmov(t, &base[pos][6], equal(babs, 7));
370 |     cmov(t, &base[pos][7], equal(babs, 8));
371 |     fe_copy(minust.yplusx, t->yminusx);
372 |     fe_copy(minust.yminusx, t->yplusx);
373 |     fe_neg(minust.xy2d, t->xy2d);
374 |     cmov(t, &minust, bnegative);
375 | }
376 | 
377 | /*
378 | h = a * B
379 | where a = a[0]+256*a[1]+...+256^31 a[31]
380 | B is the Ed25519 base point (x,4/5) with x positive.
381 | 
382 | Preconditions:
383 |   a[31] <= 127
384 | */
385 | 
386 | void __device__ __host__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
387 |     signed char e[64];
388 |     signed char carry;
389 |     ge_p1p1 r;
390 |     ge_p2 s;
391 |     ge_precomp t;
392 |     int i;
393 | 
394 |     for (i = 0; i < 32; ++i) {
395 |         e[2 * i + 0] = (a[i] >> 0) & 15;
396 |         e[2 * i + 1] = (a[i] >> 4) & 15;
397 |     }
398 | 
399 |     /* each e[i] is between 0 and 15 */
400 |     /* e[63] is between 0 and 7 */
401 |     carry = 0;
402 | 
403 |     for (i = 0; i < 63; ++i) {
404 |         e[i] += carry;
405 |         carry = e[i] + 8;
406 |         carry >>= 4;
407 |         e[i] -= carry << 4;
408 |     }
409 | 
410 |     e[63] += carry;
411 |     /* each e[i] is between -8 and 8 */
412 |     ge_p3_0(h);
413 | 
414 |     for (i = 1; i < 64; i += 2) {
415 |         select(&t, i / 2, e[i]);
416 |         ge_madd(&r, h, &t);
417 |         ge_p1p1_to_p3(h, &r);
418 |     }
419 | 
420 |     ge_p3_dbl(&r, h);
421 |     ge_p1p1_to_p2(&s, &r);
422 |     ge_p2_dbl(&r, &s);
423 |     ge_p1p1_to_p2(&s, &r);
424 |     ge_p2_dbl(&r, &s);
425 |     ge_p1p1_to_p2(&s, &r);
426 |     ge_p2_dbl(&r, &s);
427 |     ge_p1p1_to_p3(h, &r);
428 | 
429 |     for (i = 0; i < 64; i += 2) {
430 |         select(&t, i / 2, e[i]);
431 |         ge_madd(&r, h, &t);
432 |         ge_p1p1_to_p3(h, &r);
433 |     }
434 | }
435 | 
436 | 
437 | /*
438 | r = p - q
439 | */
440 | 
441 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
442 |     fe t0;
443 |     
444 |     fe_add(r->X, p->Y, p->X);
445 |     fe_sub(r->Y, p->Y, p->X);
446 |     fe_mul(r->Z, r->X, q->YminusX);
447 |     fe_mul(r->Y, r->Y, q->YplusX);
448 |     fe_mul(r->T, q->T2d, p->T);
449 |     fe_mul(r->X, p->Z, q->Z);
450 |     fe_add(t0, r->X, r->X);
451 |     fe_sub(r->X, r->Z, r->Y);
452 |     fe_add(r->Y, r->Z, r->Y);
453 |     fe_sub(r->Z, t0, r->T);
454 |     fe_add(r->T, t0, r->T);
455 | }
456 | 
457 | 
458 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h) {
459 |     fe recip;
460 |     fe x;
461 |     fe y;
462 |     fe_invert(recip, h->Z);
463 |     fe_mul(x, h->X, recip);
464 |     fe_mul(y, h->Y, recip);
465 |     fe_tobytes(s, y);
466 |     s[31] ^= fe_isnegative(x) << 7;
467 | }
468 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/ge.h:
--------------------------------------------------------------------------------
 1 | #ifndef GE_H
 2 | #define GE_H
 3 | 
 4 | #include "fe.h"
 5 | 
 6 | 
 7 | /*
 8 | ge means group element.
 9 | 
10 | Here the group is the set of pairs (x,y) of field elements (see fe.h)
11 | satisfying -x^2 + y^2 = 1 + d x^2y^2
12 | where d = -121665/121666.
13 | 
14 | Representations:
15 |   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
16 |   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
17 |   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
18 |   ge_precomp (Duif): (y+x,y-x,2dxy)
19 | */
20 | 
21 | typedef struct {
22 |   fe X;
23 |   fe Y;
24 |   fe Z;
25 | } ge_p2;
26 | 
27 | typedef struct {
28 |   fe X;
29 |   fe Y;
30 |   fe Z;
31 |   fe T;
32 | } ge_p3;
33 | 
34 | typedef struct {
35 |   fe X;
36 |   fe Y;
37 |   fe Z;
38 |   fe T;
39 | } ge_p1p1;
40 | 
41 | typedef struct {
42 |   fe yplusx;
43 |   fe yminusx;
44 |   fe xy2d;
45 | } ge_precomp;
46 | 
47 | typedef struct {
48 |   fe YplusX;
49 |   fe YminusX;
50 |   fe Z;
51 |   fe T2d;
52 | } ge_cached;
53 | 
54 | void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
55 | void __host__ __device__ ge_tobytes(unsigned char *s, const ge_p2 *h);
56 | int  __host__ __device__ ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s);
57 | 
58 | void __host__ __device__ ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
59 | void __host__ __device__ ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
60 | void __host__ __device__ ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b);
61 | void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
62 | void __host__ __device__ ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
63 | void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
64 | 
65 | void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
66 | void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
67 | void __host__ __device__ ge_p2_0(ge_p2 *h);
68 | void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
69 | void __host__ __device__ ge_p3_0(ge_p3 *h);
70 | void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
71 | void __host__ __device__ ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
72 | void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/gpu_ctx.cu:
--------------------------------------------------------------------------------
  1 | #include "ed25519.h"
  2 | #include "gpu_ctx.h"
  3 | #include <pthread.h>
  4 | #include "gpu_common.h"
  5 | 
  6 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
  7 | 
  8 | #define MAX_NUM_GPUS 8
  9 | #define MAX_QUEUE_SIZE 8
 10 | 
 11 | static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 12 | static uint32_t g_cur_gpu = 0;
 13 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 14 | static int32_t g_total_gpus = -1;
 15 | 
 16 | static bool cuda_crypt_init_locked() {
 17 |     if (g_total_gpus == -1) {
 18 |         cudaGetDeviceCount(&g_total_gpus);
 19 |         g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus);
 20 |         LOG("total_gpus: %d\n", g_total_gpus);
 21 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 22 |             CUDA_CHK(cudaSetDevice(gpu));
 23 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 24 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 25 |                 if (err != 0) {
 26 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 27 |                             err, gpu, queue);
 28 |                     g_total_gpus = 0;
 29 |                     return false;
 30 |                 }
 31 |                 CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream));
 32 |             }
 33 |         }
 34 |     }
 35 |     return g_total_gpus > 0;
 36 | }
 37 | 
 38 | bool ed25519_init() {
 39 |     cudaFree(0);
 40 |     pthread_mutex_lock(&g_ctx_mutex);
 41 |     bool success = cuda_crypt_init_locked();
 42 |     pthread_mutex_unlock(&g_ctx_mutex);
 43 |     return success;
 44 | }
 45 | 
 46 | gpu_ctx_t* get_gpu_ctx() {
 47 |     int32_t cur_gpu, cur_queue;
 48 | 
 49 |     LOG("locking global mutex");
 50 |     pthread_mutex_lock(&g_ctx_mutex);
 51 |     if (!cuda_crypt_init_locked()) {
 52 |         pthread_mutex_unlock(&g_ctx_mutex);
 53 |         LOG("No GPUs, exiting...\n");
 54 |         return NULL;
 55 |     }
 56 |     cur_gpu = g_cur_gpu;
 57 |     g_cur_gpu++;
 58 |     g_cur_gpu %= g_total_gpus;
 59 |     cur_queue = g_cur_queue[cur_gpu];
 60 |     g_cur_queue[cur_gpu]++;
 61 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
 62 |     pthread_mutex_unlock(&g_ctx_mutex);
 63 | 
 64 |     gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
 65 |     LOG("locking contex mutex queue: %d gpu: %d", cur_queue, cur_gpu);
 66 |     pthread_mutex_lock(&cur_ctx->mutex);
 67 | 
 68 |     CUDA_CHK(cudaSetDevice(cur_gpu));
 69 | 
 70 |     LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue);
 71 | 
 72 |     return cur_ctx;
 73 | }
 74 | 
 75 | void setup_gpu_ctx(verify_ctx_t* cur_ctx,
 76 |                    const gpu_Elems* elems,
 77 |                    uint32_t num_elems,
 78 |                    uint32_t message_size,
 79 |                    uint32_t total_packets,
 80 |                    uint32_t total_packets_size,
 81 |                    uint32_t total_signatures,
 82 |                    const uint32_t* message_lens,
 83 |                    const uint32_t* public_key_offsets,
 84 |                    const uint32_t* signature_offsets,
 85 |                    const uint32_t* message_start_offsets,
 86 |                    size_t out_size,
 87 |                    cudaStream_t stream
 88 |                    ) {
 89 |     size_t offsets_size = total_signatures * sizeof(uint32_t);
 90 | 
 91 |     LOG("device allocate. packets: %d out: %d offsets_size: %zu\n",
 92 |         total_packets_size, (int)out_size, offsets_size);
 93 | 
 94 |     if (cur_ctx->packets == NULL ||
 95 |         total_packets_size > cur_ctx->packets_size_bytes) {
 96 |         CUDA_CHK(cudaFree(cur_ctx->packets));
 97 |         CUDA_CHK(cudaMalloc(&cur_ctx->packets, total_packets_size));
 98 | 
 99 |         cur_ctx->packets_size_bytes = total_packets_size;
100 |     }
101 | 
102 |     if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) {
103 |         CUDA_CHK(cudaFree(cur_ctx->out));
104 |         CUDA_CHK(cudaMalloc(&cur_ctx->out, out_size));
105 | 
106 |         cur_ctx->out_size_bytes = total_signatures;
107 |     }
108 | 
109 |     if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) {
110 |         CUDA_CHK(cudaFree(cur_ctx->public_key_offsets));
111 |         CUDA_CHK(cudaMalloc(&cur_ctx->public_key_offsets, offsets_size));
112 | 
113 |         CUDA_CHK(cudaFree(cur_ctx->signature_offsets));
114 |         CUDA_CHK(cudaMalloc(&cur_ctx->signature_offsets, offsets_size));
115 | 
116 |         CUDA_CHK(cudaFree(cur_ctx->message_start_offsets));
117 |         CUDA_CHK(cudaMalloc(&cur_ctx->message_start_offsets, offsets_size));
118 | 
119 |         CUDA_CHK(cudaFree(cur_ctx->message_lens));
120 |         CUDA_CHK(cudaMalloc(&cur_ctx->message_lens, offsets_size));
121 | 
122 |         cur_ctx->offsets_len = total_signatures;
123 |     }
124 | 
125 |     LOG("Done alloc");
126 | 
127 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->public_key_offsets, public_key_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
128 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->signature_offsets, signature_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
129 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_start_offsets, message_start_offsets, offsets_size, cudaMemcpyHostToDevice, stream));
130 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->message_lens, message_lens, offsets_size, cudaMemcpyHostToDevice, stream));
131 | 
132 |     size_t cur = 0;
133 |     for (size_t i = 0; i < num_elems; i++) {
134 |         LOG("i: %zu size: %d\n", i, elems[i].num * message_size);
135 |         CUDA_CHK(cudaMemcpyAsync(&cur_ctx->packets[cur * message_size], elems[i].elems, elems[i].num * message_size, cudaMemcpyHostToDevice, stream));
136 |         cur += elems[i].num;
137 |     }
138 | }
139 | 
140 | 
141 | void release_gpu_ctx(gpu_ctx_t* cur_ctx) {
142 |     pthread_mutex_unlock(&cur_ctx->mutex);
143 | }
144 | 
145 | void ed25519_free_gpu_mem() {
146 |     for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) {
147 |         for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
148 |             gpu_ctx_t* cur_ctx = &g_gpu_ctx[gpu][queue];
149 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.packets));
150 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.out));
151 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_lens));
152 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.public_key_offsets));
153 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.private_key_offsets));
154 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.signature_offsets));
155 |             CUDA_CHK(cudaFree(cur_ctx->verify_ctx.message_start_offsets));
156 |             if (cur_ctx->stream != 0) {
157 |                 CUDA_CHK(cudaStreamDestroy(cur_ctx->stream));
158 |             }
159 |         }
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/gpu_ctx.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_CTX_H
 2 | #define GPU_CTX_H
 3 | 
 4 | #include <inttypes.h>
 5 | #include "ed25519.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | typedef struct {
12 |     uint8_t* packets;
13 |     uint32_t packets_size_bytes;
14 | 
15 |     uint8_t* out;
16 |     size_t out_size_bytes;
17 | 
18 |     uint32_t* public_key_offsets;
19 |     uint32_t* private_key_offsets;
20 |     uint32_t* message_start_offsets;
21 |     uint32_t* signature_offsets;
22 |     uint32_t* message_lens;
23 |     size_t offsets_len;
24 | } verify_ctx_t;
25 | 
26 | typedef struct {
27 |     verify_ctx_t verify_ctx;
28 | 
29 |     pthread_mutex_t mutex;
30 |     cudaStream_t stream;
31 | } gpu_ctx_t;
32 | 
33 | extern gpu_ctx_t* get_gpu_ctx();
34 | extern void release_gpu_ctx(gpu_ctx_t*);
35 | 
36 | extern void ed25519_free_gpu_mem();
37 | 
38 | extern void setup_gpu_ctx(verify_ctx_t* cur_ctx,
39 |                           const gpu_Elems* elems,
40 |                           uint32_t num_elems,
41 |                           uint32_t message_size,
42 |                           uint32_t total_packets,
43 |                           uint32_t total_packets_size,
44 |                           uint32_t total_signatures,
45 |                           const uint32_t* message_lens,
46 |                           const uint32_t* public_key_offsets,
47 |                           const uint32_t* signature_offsets,
48 |                           const uint32_t* message_start_offsets,
49 |                           size_t out_size,
50 |                           cudaStream_t stream
51 |                           );
52 | 
53 | #ifdef __cplusplus
54 | }
55 | #endif
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/keypair.cu:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | 
 5 | 
 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) {
 7 |     ge_p3 A;
 8 | 
 9 |     sha512(seed, 32, private_key);
10 |     private_key[0] &= 248;
11 |     private_key[31] &= 63;
12 |     private_key[31] |= 64;
13 | 
14 |     ge_scalarmult_base(&A, private_key);
15 |     ge_p3_tobytes(public_key, &A);
16 | }
17 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com>
 2 | 
 3 | This software is provided 'as-is', without any express or implied warranty. In no event will the
 4 | authors be held liable for any damages arising from the use of this software.
 5 | 
 6 | Permission is granted to anyone to use this software for any purpose, including commercial
 7 | applications, and to alter it and redistribute it freely, subject to the following restrictions:
 8 | 
 9 | 1. The origin of this software must not be misrepresented; you must not claim that you wrote the
10 |    original software. If you use this software in a product, an acknowledgment in the product
11 |    documentation would be appreciated but is not required.
12 | 
13 | 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
14 |    being the original software.
15 | 
16 | 3. This notice may not be removed or altered from any source distribution.
17 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/main.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "ed25519.h"
  3 | #include <inttypes.h>
  4 | #include <assert.h>
  5 | #include <vector>
  6 | #include <pthread.h>
  7 | #include "gpu_common.h"
  8 | #include "gpu_ctx.h"
  9 | 
 10 | #define USE_CLOCK_GETTIME
 11 | #include "perftime.h"
 12 | 
 13 | #define PACKET_SIZE 512
 14 | 
 15 | typedef struct {
 16 |     size_t size;
 17 |     uint64_t num_retransmits;
 18 |     uint16_t addr[8];
 19 |     uint16_t port;
 20 |     bool v6;
 21 | } streamer_Meta;
 22 | 
 23 | typedef struct {
 24 |     uint8_t data[PACKET_SIZE];
 25 |     streamer_Meta meta;
 26 | } streamer_Packet;
 27 | 
 28 | void print_dwords(unsigned char* ptr, int size) {
 29 |     for (int j = 0; j < (size)/(int)sizeof(uint32_t); j++) {
 30 |         LOG("%x ", ((uint32_t*)ptr)[j]);
 31 |     }
 32 | }
 33 | 
 34 | typedef struct {
 35 |     uint8_t signature[SIG_SIZE];
 36 |     uint8_t public_key[PUB_KEY_SIZE];
 37 |     uint32_t message_len;
 38 |     uint8_t message[8];
 39 | } packet_t;
 40 | 
 41 | typedef struct {
 42 |     gpu_Elems* elems_h;
 43 |     uint32_t num_elems;
 44 |     uint32_t total_packets;
 45 |     uint32_t total_signatures;
 46 |     uint32_t* message_lens;
 47 |     uint32_t* public_key_offsets;
 48 |     uint32_t* signature_offsets;
 49 |     uint32_t* message_start_offsets;
 50 |     uint8_t* out_h;
 51 |     int num_iterations;
 52 |     uint8_t use_non_default_stream;
 53 | } verify_cpu_ctx_t;
 54 | 
 55 | static void* verify_proc(void* ctx) {
 56 |     verify_cpu_ctx_t* vctx = (verify_cpu_ctx_t*)ctx;
 57 |     LOG("Start iterations\n");
 58 |     for (int i = 0; i < vctx->num_iterations; i++) {
 59 |         ed25519_verify_many(&vctx->elems_h[0],
 60 |                             vctx->num_elems,
 61 |                             sizeof(streamer_Packet),
 62 |                             vctx->total_packets,
 63 |                             vctx->total_signatures,
 64 |                             vctx->message_lens,
 65 |                             vctx->public_key_offsets,
 66 |                             vctx->signature_offsets,
 67 |                             vctx->message_start_offsets,
 68 |                             vctx->out_h,
 69 |                             vctx->use_non_default_stream);
 70 |     }
 71 |     LOG("Done iterations\n");
 72 |     return NULL;
 73 | }
 74 | 
 75 | const static bool USE_CUDA_ALLOC = true;
 76 | 
 77 | template<typename T> static void ed25519_alloc(T** ptr, size_t num) {
 78 |     if (USE_CUDA_ALLOC) {
 79 |         CUDA_CHK(cudaMallocHost(ptr, sizeof(T) * num));
 80 |     } else {
 81 |         *ptr = (T*)calloc(sizeof(T), num);
 82 |     }
 83 | }
 84 | 
 85 | static void ed25519_free(void* ptr) {
 86 |     if (USE_CUDA_ALLOC) {
 87 |         CUDA_CHK(cudaFreeHost(ptr));
 88 |     } else {
 89 |         free(ptr);
 90 |     }
 91 | 
 92 | }
 93 | 
 94 | int main(int argc, const char* argv[]) {
 95 |     int arg;
 96 |     bool verbose = false;
 97 |     for (arg = 1; arg < argc; arg++) {
 98 |         if (0 == strcmp(argv[arg], "-v")) {
 99 |             verbose = true;
100 |         } else {
101 |             break;
102 |         }
103 |     }
104 | 
105 |     if ((argc - arg) != 6) {
106 |         printf("usage: %s [-v] <num_signatures> <num_elems> <num_sigs_per_packet> <num_threads> <num_iterations> <use_non_default_stream>\n", argv[0]);
107 |         return 1;
108 |     }
109 | 
110 |     ed25519_set_verbose(verbose);
111 | 
112 |     int num_signatures_per_elem = strtol(argv[arg++], NULL, 10);
113 |     if (num_signatures_per_elem <= 0) {
114 |         printf("num_signatures_per_elem should be > 0! %d\n", num_signatures_per_elem);
115 |         return 1;
116 |     }
117 | 
118 |     int num_elems = strtol(argv[arg++], NULL, 10);
119 |     if (num_elems <= 0) {
120 |         printf("num_elems should be > 0! %d\n", num_elems);
121 |         return 1;
122 |     }
123 | 
124 |     int num_sigs_per_packet = strtol(argv[arg++], NULL, 10);
125 |     if (num_sigs_per_packet <= 0) {
126 |         printf("num_sigs_per_packet should be > 0! %d\n", num_sigs_per_packet);
127 |         return 1;
128 |     }
129 | 
130 |     int num_threads = strtol(argv[arg++], NULL, 10);
131 |     if (num_threads <= 0) {
132 |         printf("num_threads should be > 0! %d\n", num_threads);
133 |         return 1;
134 |     }
135 | 
136 |     int num_iterations = strtol(argv[arg++], NULL, 10);
137 |     if (num_iterations <= 0) {
138 |         printf("num_iterations should be > 0! %d\n", num_iterations);
139 |         return 1;
140 |     }
141 | 
142 |     uint8_t use_non_default_stream = (uint8_t)strtol(argv[arg++], NULL, 10);
143 |     if (use_non_default_stream != 0 && use_non_default_stream != 1) {
144 |         printf("non_default_stream should be 0 or 1! %d\n", use_non_default_stream);
145 |         return 1;
146 |     }
147 | 
148 |     LOG("streamer size: %zu elems size: %zu\n", sizeof(streamer_Packet), sizeof(gpu_Elems));
149 | 
150 |     std::vector<verify_cpu_ctx_t> vctx = std::vector<verify_cpu_ctx_t>(num_threads);
151 | 
152 |     // Host allocate
153 |     unsigned char* seed_h = (unsigned char*)calloc(num_signatures_per_elem * SEED_SIZE, sizeof(uint32_t));
154 |     unsigned char* private_key_h = (unsigned char*)calloc(num_signatures_per_elem, PRIV_KEY_SIZE);
155 |     unsigned char message_h[] = "abcd1234";
156 |     int message_h_len = strlen((char*)message_h);
157 | 
158 |     uint32_t total_signatures = num_elems * num_signatures_per_elem;
159 | 
160 |     uint32_t* message_lens = NULL;
161 |     ed25519_alloc(&message_lens, total_signatures);
162 | 
163 |     uint32_t* signature_offsets = NULL;
164 |     ed25519_alloc(&signature_offsets, total_signatures);
165 | 
166 |     uint32_t* public_key_offsets = NULL;
167 |     ed25519_alloc(&public_key_offsets, total_signatures);
168 | 
169 |     uint32_t* message_start_offsets = NULL;
170 |     ed25519_alloc(&message_start_offsets, total_signatures);
171 | 
172 |     for (uint32_t i = 0; i < total_signatures; i++) {
173 |         uint32_t base_offset = i * sizeof(streamer_Packet);
174 |         signature_offsets[i] = base_offset + offsetof(packet_t, signature);
175 |         public_key_offsets[i] = base_offset + offsetof(packet_t, public_key);
176 |         message_start_offsets[i] = base_offset + offsetof(packet_t, message);
177 |         message_lens[i] = message_h_len;
178 |     }
179 | 
180 |     for (int i = 0; i < num_threads; i++) {
181 |         vctx[i].message_lens = message_lens;
182 |         vctx[i].signature_offsets = signature_offsets;
183 |         vctx[i].public_key_offsets = public_key_offsets;
184 |         vctx[i].message_start_offsets = message_start_offsets;
185 |         vctx[i].num_iterations = num_iterations;
186 |         vctx[i].use_non_default_stream = use_non_default_stream;
187 |     }
188 | 
189 |     streamer_Packet* packets_h = NULL;
190 |     ed25519_alloc(&packets_h, num_signatures_per_elem);
191 |     uint32_t total_packets = 0;
192 | 
193 |     gpu_Elems* elems_h = NULL;
194 |     ed25519_alloc(&elems_h, num_elems);
195 |     for (int i = 0; i < num_elems; i++) {
196 |         elems_h[i].num = num_signatures_per_elem;
197 |         elems_h[i].elems = (uint8_t*)&packets_h[0];
198 | 
199 |         total_packets += num_signatures_per_elem;
200 |     }
201 | 
202 |     LOG("initing signatures..\n");
203 |     for (int i = 0; i < num_signatures_per_elem; i++) {
204 |         packet_t* packet = (packet_t*)packets_h[i].data;
205 |         memcpy(packet->message, message_h, message_h_len);
206 | 
207 |         LOG("message_len: %d\n",
208 |             message_h_len);
209 |     }
210 | 
211 |     for (uint32_t i = 0; i < total_signatures; i++) {
212 |         LOG("sig_offset: %d pub_key_offset: %d message_start_offset: %d message_len: %d\n",
213 |             signature_offsets[i], public_key_offsets[i], message_start_offsets[i], message_lens[i]);
214 |     }
215 | 
216 |     int out_size = total_signatures * sizeof(uint8_t);
217 |     for (int i = 0; i < num_threads; i++) {
218 |         vctx[i].num_elems = num_elems;
219 |         ed25519_alloc(&vctx[i].out_h, out_size);
220 |         vctx[i].elems_h = &elems_h[0];
221 |         vctx[i].total_signatures = total_signatures;
222 |         vctx[i].total_packets = total_packets;
223 |     }
224 | 
225 |     LOG("creating seed..\n");
226 |     int ret = ed25519_create_seed(seed_h);
227 |     LOG("create_seed: %d\n", ret);
228 |     packet_t* first_packet_h = (packet_t*)packets_h[0].data;
229 |     ed25519_create_keypair(first_packet_h->public_key, private_key_h, seed_h);
230 |     ed25519_sign(first_packet_h->signature, first_packet_h->message, message_h_len, first_packet_h->public_key, private_key_h);
231 |     ret = ed25519_verify(first_packet_h->signature, message_h, message_h_len, first_packet_h->public_key);
232 |     LOG("verify: %d\n", ret);
233 | 
234 |     for (int i = 1; i < num_signatures_per_elem; i++) {
235 |         packet_t* packet_h = (packet_t*)packets_h[i].data;
236 |         memcpy(packet_h->signature, first_packet_h->signature, SIG_SIZE);
237 |         memcpy(packet_h->public_key, first_packet_h->public_key, PUB_KEY_SIZE);
238 |     }
239 | 
240 |     for (int i = 0; i < num_signatures_per_elem; i++ ) {
241 |         packet_t* packet_h = (packet_t*)packets_h[i].data;
242 |         unsigned char* sig_ptr = packet_h->signature;
243 |         unsigned char* messages_ptr = packet_h->message;
244 |         LOG("sig:");
245 |         print_dwords(sig_ptr, SIG_SIZE);
246 |         LOG("\nmessage: ");
247 |         print_dwords(messages_ptr, message_h_len);
248 |         LOG("\n\n");
249 |     }
250 |     LOG("\n");
251 | 
252 |     std::vector<pthread_t> threads = std::vector<pthread_t>(num_threads);
253 |     pthread_attr_t attr;
254 |     ret = pthread_attr_init(&attr);
255 |     if (ret != 0) {
256 |         LOG("ERROR: pthread_attr_init: %d\n", ret);
257 |         return 1;
258 |     }
259 | 
260 |     perftime_t start, end;
261 |     get_time(&start);
262 |     for (int i = 0; i < num_threads; i++) {
263 |         ret = pthread_create(&threads[i],
264 |                              &attr,
265 |                              verify_proc,
266 |                              &vctx[i]);
267 |         if (ret != 0) {
268 |             LOG("ERROR: pthread_create: %d\n", ret);
269 |             return 1;
270 |         }
271 |     }
272 | 
273 |     void* res = NULL;
274 |     for (int i = 0; i < num_threads; i++) {
275 |         ret = pthread_join(threads[i], &res);
276 |         if (ret != 0) {
277 |             LOG("ERROR: pthread_join: %d\n", ret);
278 |             return 1;
279 |         }
280 |     }
281 |     get_time(&end);
282 | 
283 |     int total = (num_threads * total_signatures * num_iterations);
284 |     double diff = get_diff(&start, &end);
285 |     printf("time diff: %f total: %d sigs/sec: %f\n",
286 |            diff,
287 |            total,
288 |            (double)total / (diff / 1e6));
289 | 
290 |     for (int thread = 0; thread < num_threads; thread++) {
291 |         LOG("ret:\n");
292 |         bool verify_failed = false;
293 |         for (int i = 0; i < out_size / (int)sizeof(uint8_t); i++) {
294 |             LOG("%x ", vctx[thread].out_h[i]);
295 |             if (vctx[thread].out_h[i] != 1) {
296 |                 verify_failed = true;
297 |             }
298 |         }
299 |         LOG("\n");
300 |         fflush(stdout);
301 |         assert(verify_failed == false);
302 |     }
303 | 
304 |     ed25519_free(elems_h);
305 |     ed25519_free(packets_h);
306 |     ed25519_free(message_lens);
307 |     ed25519_free(signature_offsets);
308 |     ed25519_free(public_key_offsets);
309 |     ed25519_free(message_start_offsets);
310 |     for (int thread = 0; thread < num_threads; thread++) {
311 |         ed25519_free(vctx[thread].out_h);
312 |     }
313 |     free(seed_h);
314 |     free(private_key_h);
315 |     ed25519_free_gpu_mem();
316 |     return 0;
317 | }
318 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/perftime.h:
--------------------------------------------------------------------------------
 1 | #ifndef PERFTIME_H
 2 | #define PERFTIME_H
 3 | 
 4 | #ifdef USE_RDTSC
 5 | static inline uint64_t rdtsc()
 6 | {
 7 |     unsigned int hi, lo;
 8 |     __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi));
 9 |     return ((uint64_t)hi << 32) | lo;
10 | }
11 | 
12 | typedef struct {
13 |     uint64_t count;
14 | } perftime_t;
15 | 
16 | #elif defined(USE_CLOCK_GETTIME)
17 | #include <time.h>
18 | typedef struct timespec perftime_t;
19 | #else
20 | #include <sys/time.h>
21 | typedef struct timeval perftime_t;
22 | #endif
23 | 
24 | static int get_time(perftime_t* t) {
25 | #ifdef USE_RDTSC
26 |     t->count = rdtsc();
27 |     return 0;
28 | #elif defined(USE_CLOCK_GETTIME)
29 |     return clock_gettime(CLOCK_MONOTONIC_RAW, t);
30 |     //return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t);
31 | #else
32 |     return gettimeofday(t, NULL /* timezone */);
33 | #endif
34 | }
35 | 
36 | static double get_us(const perftime_t* time) {
37 | #ifdef USE_RDTSC
38 |     return time->count;
39 | #elif defined(USE_CLOCK_GETTIME)
40 |     return ((time->tv_nsec/1000) + (double)time->tv_sec * 1000000);
41 | #else
42 |     return (time->tv_usec + (double)time->tv_sec * 1000000);
43 | #endif
44 | }
45 | 
46 | static double get_diff(const perftime_t* start, const perftime_t* end) {
47 |     return get_us(end) - get_us(start);
48 | }
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sc.h:
--------------------------------------------------------------------------------
 1 | #ifndef SC_H
 2 | #define SC_H
 3 | 
 4 | /*
 5 | The set of scalars is \Z/l
 6 | where l = 2^252 + 27742317777372353535851937790883648493.
 7 | */
 8 | 
 9 | void __host__ __device__ sc_reduce(unsigned char *s);
10 | void __host__ __device__ sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/seed.cu:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | 
 3 | #ifndef ED25519_NO_SEED
 4 | 
 5 | #ifdef _WIN32
 6 | #include <windows.h>
 7 | #include <wincrypt.h>
 8 | #else
 9 | #include <stdio.h>
10 | #endif
11 | 
12 | int ed25519_create_seed(unsigned char *seed) {
13 | #ifdef _WIN32
14 |     HCRYPTPROV prov;
15 | 
16 |     if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))  {
17 |         return 1;
18 |     }
19 | 
20 |     if (!CryptGenRandom(prov, 32, seed))  {
21 |         CryptReleaseContext(prov, 0);
22 |         return 1;
23 |     }
24 | 
25 |     CryptReleaseContext(prov, 0);
26 | #else
27 |     FILE *f = fopen("/dev/urandom", "rb");
28 | 
29 |     if (f == NULL) {
30 |         return 1;
31 |     }
32 | 
33 |     size_t res = fread(seed, 1, 32, f);
34 |     if (res != 32) {
35 |         return 1;
36 |     }
37 |     fclose(f);
38 | #endif
39 | 
40 |     return 0;
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sha512.cu:
--------------------------------------------------------------------------------
  1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis
  2 |  *
  3 |  * LibTomCrypt is a library that provides various cryptographic
  4 |  * algorithms in a highly modular and flexible manner.
  5 |  *
  6 |  * The library is free for all purposes without any express
  7 |  * guarantee it works.
  8 |  *
  9 |  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
 10 |  */
 11 | 
 12 | #include "fixedint.h"
 13 | #include "sha512.h"
 14 | 
 15 | #ifdef __CUDA_ARCH__
 16 | #define K_DEF __device__
 17 | #else
 18 | #define K_DEF
 19 | #endif
 20 | 
 21 | static const uint64_t K_DEF K[80] = {
 22 |     UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd),
 23 |     UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc),
 24 |     UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019),
 25 |     UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118),
 26 |     UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe),
 27 |     UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2),
 28 |     UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1),
 29 |     UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694),
 30 |     UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3),
 31 |     UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65),
 32 |     UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483),
 33 |     UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5),
 34 |     UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210),
 35 |     UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4),
 36 |     UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725),
 37 |     UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70),
 38 |     UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926),
 39 |     UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df),
 40 |     UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8),
 41 |     UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b),
 42 |     UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001),
 43 |     UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30),
 44 |     UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910),
 45 |     UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8),
 46 |     UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53),
 47 |     UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8),
 48 |     UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb),
 49 |     UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3),
 50 |     UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60),
 51 |     UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec),
 52 |     UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9),
 53 |     UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b),
 54 |     UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207),
 55 |     UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178),
 56 |     UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6),
 57 |     UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b),
 58 |     UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493),
 59 |     UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c),
 60 |     UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a),
 61 |     UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817)
 62 | };
 63 | 
 64 | /* Various logical functions */
 65 | 
 66 | #define ROR64c(x, y) \
 67 |     ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \
 68 |       ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF))
 69 | 
 70 | #define STORE64H(x, y)                                                                     \
 71 |    { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255);     \
 72 |      (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255);     \
 73 |      (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255);     \
 74 |      (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 75 | 
 76 | #define LOAD64H(x, y)                                                      \
 77 |    { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \
 78 |          (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \
 79 |          (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \
 80 |          (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); }
 81 | 
 82 | 
 83 | #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 84 | #define Maj(x,y,z)      (((x | y) & z) | (x & y))
 85 | #define S(x, n)         ROR64c(x, n)
 86 | #define R(x, n)         (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n))
 87 | #define Sigma0(x)       (S(x, 28) ^ S(x, 34) ^ S(x, 39))
 88 | #define Sigma1(x)       (S(x, 14) ^ S(x, 18) ^ S(x, 41))
 89 | #define Gamma0(x)       (S(x, 1) ^ S(x, 8) ^ R(x, 7))
 90 | #define Gamma1(x)       (S(x, 19) ^ S(x, 61) ^ R(x, 6))
 91 | #ifndef MIN
 92 |    #define MIN(x, y) ( ((x)<(y))?(x):(y) )
 93 | #endif
 94 | 
 95 | /* compress 1024-bits */
 96 | static int __device__ __host__ sha512_compress(sha512_context *md, unsigned char *buf)
 97 | {
 98 |     uint64_t S[8], W[80], t0, t1;
 99 |     int i;
100 | 
101 |     /* copy state into S */
102 |     for (i = 0; i < 8; i++) {
103 |         S[i] = md->state[i];
104 |     }
105 | 
106 |     /* copy the state into 1024-bits into W[0..15] */
107 |     for (i = 0; i < 16; i++) {
108 |         LOAD64H(W[i], buf + (8*i));
109 |     }
110 | 
111 |     /* fill W[16..79] */
112 |     for (i = 16; i < 80; i++) {
113 |         W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
114 |     }
115 | 
116 |     /* Compress */
117 |     #define RND(a,b,c,d,e,f,g,h,i) \
118 |     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
119 |     t1 = Sigma0(a) + Maj(a, b, c);\
120 |     d += t0; \
121 |     h  = t0 + t1;
122 | 
123 |     for (i = 0; i < 80; i += 8) {
124 |        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
125 |        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
126 |        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
127 |        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
128 |        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
129 |        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
130 |        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
131 |        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
132 |    }
133 | 
134 |    #undef RND
135 | 
136 | 
137 | 
138 |     /* feedback */
139 |     for (i = 0; i < 8; i++) {
140 |         md->state[i] = md->state[i] + S[i];
141 |     }
142 | 
143 |     return 0;
144 | }
145 | 
146 | 
147 | /**
148 |    Initialize the hash state
149 |    @param md   The hash state you wish to initialize
150 |    @return 0 if successful
151 | */
152 | int __device__ __host__ sha512_init(sha512_context * md) {
153 |     if (md == NULL) return 1;
154 | 
155 |     md->curlen = 0;
156 |     md->length = 0;
157 |     md->state[0] = UINT64_C(0x6a09e667f3bcc908);
158 |     md->state[1] = UINT64_C(0xbb67ae8584caa73b);
159 |     md->state[2] = UINT64_C(0x3c6ef372fe94f82b);
160 |     md->state[3] = UINT64_C(0xa54ff53a5f1d36f1);
161 |     md->state[4] = UINT64_C(0x510e527fade682d1);
162 |     md->state[5] = UINT64_C(0x9b05688c2b3e6c1f);
163 |     md->state[6] = UINT64_C(0x1f83d9abfb41bd6b);
164 |     md->state[7] = UINT64_C(0x5be0cd19137e2179);
165 | 
166 |     return 0;
167 | }
168 | 
169 | /**
170 |    Process a block of memory though the hash
171 |    @param md     The hash state
172 |    @param in     The data to hash
173 |    @param inlen  The length of the data (octets)
174 |    @return 0 if successful
175 | */
176 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen)
177 | {
178 |     size_t n;
179 |     size_t i;
180 |     int           err;
181 |     if (md == NULL) return 1;
182 |     if (in == NULL) return 1;
183 |     if (md->curlen > sizeof(md->buf)) {
184 |        return 1;
185 |     }
186 |     while (inlen > 0) {
187 |         if (md->curlen == 0 && inlen >= 128) {
188 |            if ((err = sha512_compress (md, (unsigned char *)in)) != 0) {
189 |               return err;
190 |            }
191 |            md->length += 128 * 8;
192 |            in             += 128;
193 |            inlen          -= 128;
194 |         } else {
195 |            n = MIN(inlen, (128 - md->curlen));
196 | 
197 |            for (i = 0; i < n; i++) {
198 |             md->buf[i + md->curlen] = in[i];
199 |            }
200 | 
201 | 
202 |            md->curlen += n;
203 |            in             += n;
204 |            inlen          -= n;
205 |            if (md->curlen == 128) {
206 |               if ((err = sha512_compress (md, md->buf)) != 0) {
207 |                  return err;
208 |               }
209 |               md->length += 8*128;
210 |               md->curlen = 0;
211 |            }
212 |        }
213 |     }
214 |     return 0;
215 | }
216 | 
217 | /**
218 |    Terminate the hash to get the digest
219 |    @param md  The hash state
220 |    @param out [out] The destination of the hash (64 bytes)
221 |    @return 0 if successful
222 | */
223 | int sha512_final(sha512_context * md, unsigned char *out)
224 | {
225 |     int i;
226 | 
227 |     if (md == NULL) return 1;
228 |     if (out == NULL) return 1;
229 | 
230 |     if (md->curlen >= sizeof(md->buf)) {
231 |         return 1;
232 |     }
233 | 
234 |     /* increase the length of the message */
235 |     md->length += md->curlen * UINT64_C(8);
236 | 
237 |     /* append the '1' bit */
238 |     md->buf[md->curlen++] = (unsigned char)0x80;
239 | 
240 |     /* if the length is currently above 112 bytes we append zeros
241 |      * then compress.  Then we can fall back to padding zeros and length
242 |      * encoding like normal.
243 |      */
244 |     if (md->curlen > 112) {
245 |         while (md->curlen < 128) {
246 |             md->buf[md->curlen++] = (unsigned char)0;
247 |         }
248 |         sha512_compress(md, md->buf);
249 |         md->curlen = 0;
250 |     }
251 | 
252 |     /* pad upto 120 bytes of zeroes
253 |      * note: that from 112 to 120 is the 64 MSB of the length.  We assume that you won't hash
254 |      * > 2^64 bits of data... :-)
255 |      */
256 |     while (md->curlen < 120) {
257 |         md->buf[md->curlen++] = (unsigned char)0;
258 |     }
259 | 
260 |     /* store length */
261 |     STORE64H(md->length, md->buf+120);
262 |     sha512_compress(md, md->buf);
263 | 
264 |     /* copy output */
265 |     for (i = 0; i < 8; i++) {
266 |         STORE64H(md->state[i], out+(8*i));
267 |     }
268 | 
269 |     return 0;
270 | }
271 | 
272 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out)
273 | {
274 |     sha512_context ctx;
275 |     int ret;
276 |     if ((ret = sha512_init(&ctx))) return ret;
277 |     if ((ret = sha512_update(&ctx, message, message_len))) return ret;
278 |     if ((ret = sha512_final(&ctx, out))) return ret;
279 |     return 0;
280 | }
281 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sha512.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHA512_H
 2 | #define SHA512_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #include "fixedint.h"
 7 | 
 8 | /* state */
 9 | typedef struct sha512_context_ {
10 |     uint64_t  length, state[8];
11 |     size_t curlen;
12 |     unsigned char buf[128];
13 | } sha512_context;
14 | 
15 | 
16 | int __device__ __host__ sha512_init(sha512_context * md);
17 | int __device__ __host__ sha512_final(sha512_context * md, unsigned char *out);
18 | int __device__ __host__ sha512_update(sha512_context * md, const unsigned char *in, size_t inlen);
19 | int __device__ __host__ sha512(const unsigned char *message, size_t message_len, unsigned char *out);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/sign.cu:
--------------------------------------------------------------------------------
  1 | #include "ed25519.h"
  2 | #include "sha512.h"
  3 | #include "ge.h"
  4 | #include "sc.h"
  5 | #include "gpu_common.h"
  6 | #include "gpu_ctx.h"
  7 | 
  8 | 
  9 | static void __device__ __host__
 10 | ed25519_sign_device(unsigned char *signature,
 11 |                    const unsigned char *message,
 12 |                    size_t message_len,
 13 |                    const unsigned char *public_key,
 14 |                    const unsigned char *private_key) {
 15 |     sha512_context hash;
 16 |     unsigned char hram[64];
 17 |     unsigned char r[64];
 18 |     ge_p3 R;
 19 | 
 20 | 
 21 |     sha512_init(&hash);
 22 |     sha512_update(&hash, private_key + 32, 32);
 23 |     sha512_update(&hash, message, message_len);
 24 |     sha512_final(&hash, r);
 25 | 
 26 |     sc_reduce(r);
 27 |     ge_scalarmult_base(&R, r);
 28 |     ge_p3_tobytes(signature, &R);
 29 | 
 30 |     sha512_init(&hash);
 31 |     sha512_update(&hash, signature, 32);
 32 |     sha512_update(&hash, public_key, 32);
 33 |     sha512_update(&hash, message, message_len);
 34 |     sha512_final(&hash, hram);
 35 | 
 36 |     sc_reduce(hram);
 37 |     sc_muladd(signature + 32, hram, private_key, r);
 38 | }
 39 | 
 40 | void ed25519_sign(unsigned char *signature,
 41 |                    const unsigned char *message,
 42 |                    size_t message_len,
 43 |                    const unsigned char *public_key,
 44 |                    const unsigned char *private_key) {
 45 |     ed25519_sign_device(signature, message, message_len, public_key, private_key);
 46 | }
 47 | 
 48 | 
 49 | 
 50 | __global__ void ed25519_sign_kernel(unsigned char* packets,
 51 |                                     uint32_t message_size,
 52 |                                     uint32_t* public_key_offsets,
 53 |                                     uint32_t* private_key_offsets,
 54 |                                     uint32_t* message_start_offsets,
 55 |                                     uint32_t* message_lens,
 56 |                                     size_t num_transactions,
 57 |                                     uint8_t* out)
 58 | {
 59 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
 60 |     if (i < num_transactions) {
 61 |         uint32_t message_start_offset = message_start_offsets[i];
 62 |         uint32_t public_key_offset = public_key_offsets[i];
 63 |         uint32_t private_key_offset = private_key_offsets[i];
 64 |         uint32_t message_len = message_lens[i];
 65 | 
 66 |         ed25519_sign_device(&out[i * SIG_SIZE],
 67 |                             &packets[message_start_offset],
 68 |                             message_len,
 69 |                             &packets[public_key_offset],
 70 |                             &packets[private_key_offset]);
 71 |     }
 72 | }
 73 | 
 74 | 
 75 | 
 76 | void ed25519_sign_many(const gpu_Elems* elems,
 77 |                        uint32_t num_elems,
 78 |                        uint32_t message_size,
 79 |                        uint32_t total_packets,
 80 |                        uint32_t total_signatures,
 81 |                        const uint32_t* message_lens,
 82 |                        const uint32_t* public_key_offsets,
 83 |                        const uint32_t* private_key_offsets,
 84 |                        const uint32_t* message_start_offsets,
 85 |                        uint8_t* signatures_out,
 86 |                        uint8_t use_non_default_stream
 87 |                        ) {
 88 |     int num_threads_per_block = 64;
 89 |     int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block);
 90 |     size_t sig_out_size = SIG_SIZE * total_signatures;
 91 | 
 92 |     if (0 == total_packets) {
 93 |         return;
 94 |     }
 95 | 
 96 |     uint32_t total_packets_size = total_packets * message_size;
 97 | 
 98 |     LOG("signing %d packets sig_size: %zu message_size: %d\n",
 99 |         total_packets, sig_out_size, message_size);
100 | 
101 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
102 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
103 | 
104 |     cudaStream_t stream = 0;
105 |     if (0 != use_non_default_stream) {
106 |         stream = gpu_ctx->stream;
107 |     }
108 | 
109 |     setup_gpu_ctx(cur_ctx,
110 |                   elems,
111 |                   num_elems,
112 |                   message_size,
113 |                   total_packets,
114 |                   total_packets_size,
115 |                   total_signatures,
116 |                   message_lens,
117 |                   public_key_offsets,
118 |                   private_key_offsets,
119 |                   message_start_offsets,
120 |                   sig_out_size,
121 |                   stream
122 |                  );
123 | 
124 |     LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block);
125 |     ed25519_sign_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
126 |                             (cur_ctx->packets,
127 |                              message_size,
128 |                              cur_ctx->public_key_offsets,
129 |                              cur_ctx->signature_offsets,
130 |                              cur_ctx->message_start_offsets,
131 |                              cur_ctx->message_lens,
132 |                              total_signatures,
133 |                              cur_ctx->out);
134 | 
135 |     cudaError_t err = cudaMemcpyAsync(signatures_out, cur_ctx->out, sig_out_size, cudaMemcpyDeviceToHost, stream);
136 |     if (err != cudaSuccess)  {
137 |         fprintf(stderr, "sign: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n",
138 |                         signatures_out, cur_ctx->out, sig_out_size, num_elems, elems);
139 |     }
140 |     CUDA_CHK(err);
141 | 
142 |     CUDA_CHK(cudaStreamSynchronize(stream));
143 | 
144 |     release_gpu_ctx(gpu_ctx);
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/src/cuda-ecc-ed25519/verify.cu:
--------------------------------------------------------------------------------
  1 | #include "sha512.h"
  2 | #include <algorithm>
  3 | #include <stdio.h>
  4 | #include "sc.cu"
  5 | #include "fe.cu"
  6 | #include "ge.cu"
  7 | #include "sha512.cu"
  8 | 
  9 | #include "ed25519.h"
 10 | #include <pthread.h>
 11 | 
 12 | #include "gpu_common.h"
 13 | #include "gpu_ctx.h"
 14 | 
 15 | #define USE_CLOCK_GETTIME
 16 | #include "perftime.h"
 17 | 
 18 | static int __host__ __device__ consttime_equal(const unsigned char *x, const unsigned char *y) {
 19 |     unsigned char r = 0;
 20 | 
 21 |     r = x[0] ^ y[0];
 22 |     #define F(i) r |= x[i] ^ y[i]
 23 |     F(1);
 24 |     F(2);
 25 |     F(3);
 26 |     F(4);
 27 |     F(5);
 28 |     F(6);
 29 |     F(7);
 30 |     F(8);
 31 |     F(9);
 32 |     F(10);
 33 |     F(11);
 34 |     F(12);
 35 |     F(13);
 36 |     F(14);
 37 |     F(15);
 38 |     F(16);
 39 |     F(17);
 40 |     F(18);
 41 |     F(19);
 42 |     F(20);
 43 |     F(21);
 44 |     F(22);
 45 |     F(23);
 46 |     F(24);
 47 |     F(25);
 48 |     F(26);
 49 |     F(27);
 50 |     F(28);
 51 |     F(29);
 52 |     F(30);
 53 |     F(31);
 54 |     #undef F
 55 | 
 56 |     return !r;
 57 | }
 58 | 
 59 | static int __device__ __host__
 60 | ed25519_verify_device(const unsigned char *signature,
 61 |                       const unsigned char *message,
 62 |                       uint32_t message_len,
 63 |                       const unsigned char *public_key) {
 64 |     unsigned char h[64];
 65 |     unsigned char checker[32];
 66 |     sha512_context hash;
 67 |     ge_p3 A;
 68 |     ge_p2 R;
 69 | 
 70 |     if (signature[63] & 224) {
 71 |         return 0;
 72 |     }
 73 | 
 74 |     if (ge_frombytes_negate_vartime(&A, public_key) != 0) {
 75 |         return 0;
 76 |     }
 77 | 
 78 |     sha512_init(&hash);
 79 |     sha512_update(&hash, signature, 32);
 80 |     sha512_update(&hash, public_key, 32);
 81 |     sha512_update(&hash, message, message_len);
 82 |     sha512_final(&hash, h);
 83 | 
 84 |     sc_reduce(h);
 85 |     ge_double_scalarmult_vartime(&R, h, &A, signature + 32);
 86 |     ge_tobytes(checker, &R);
 87 | 
 88 |     if (!consttime_equal(checker, signature)) {
 89 |         return 0;
 90 |     }
 91 | 
 92 |     return 1;
 93 | }
 94 | 
 95 | int 
 96 | ed25519_verify(const unsigned char *signature,
 97 |                const unsigned char *message,
 98 |                uint32_t message_len,
 99 |                const unsigned char *public_key) {
100 |     return ed25519_verify_device(signature, message, message_len, public_key);
101 | }
102 | 
103 | __global__ void ed25519_verify_kernel(const uint8_t* packets,
104 |                                       uint32_t message_size,
105 |                                       uint32_t* message_lens,
106 |                                       uint32_t* public_key_offsets,
107 |                                       uint32_t* signature_offsets,
108 |                                       uint32_t* message_start_offsets,
109 |                                       size_t num_keys,
110 |                                       uint8_t* out)
111 | {
112 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
113 |     if (i < num_keys) {
114 |         uint32_t message_start_offset = message_start_offsets[i];
115 |         uint32_t signature_offset = signature_offsets[i];
116 |         uint32_t public_key_offset = public_key_offsets[i];
117 |         uint32_t message_len = message_lens[i];
118 | 
119 |         out[i] = ed25519_verify_device(&packets[signature_offset],
120 |                                        &packets[message_start_offset],
121 |                                        message_len,
122 |                                        &packets[public_key_offset]);
123 |     }
124 | }
125 | 
126 | bool g_verbose = false;
127 | 
128 | void ed25519_set_verbose(bool val) {
129 |     g_verbose = val;
130 | }
131 | 
132 | void ed25519_verify_many(const gpu_Elems* elems,
133 |                          uint32_t num_elems,
134 |                          uint32_t message_size,
135 |                          uint32_t total_packets,
136 |                          uint32_t total_signatures,
137 |                          const uint32_t* message_lens,
138 |                          const uint32_t* public_key_offsets,
139 |                          const uint32_t* signature_offsets,
140 |                          const uint32_t* message_start_offsets,
141 |                          uint8_t* out,
142 |                          uint8_t use_non_default_stream)
143 | {
144 |     LOG("Starting verify_many: num_elems: %d total_signatures: %d total_packets: %d message_size: %d\n",
145 |         num_elems, total_signatures, total_packets, message_size);
146 | 
147 |     size_t out_size = total_signatures * sizeof(uint8_t);
148 | 
149 |     uint32_t total_packets_size = total_packets * message_size;
150 | 
151 |     if (0 == total_packets) {
152 |         return;
153 |     }
154 | 
155 |     // Device allocate
156 | 
157 |     gpu_ctx_t* gpu_ctx = get_gpu_ctx();
158 | 
159 |     verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
160 | 
161 |     cudaStream_t stream = 0;
162 |     if (0 != use_non_default_stream) {
163 |         stream = gpu_ctx->stream;
164 |     }
165 | 
166 |     setup_gpu_ctx(cur_ctx,
167 |                   elems,
168 |                   num_elems,
169 |                   message_size,
170 |                   total_packets,
171 |                   total_packets_size,
172 |                   total_signatures,
173 |                   message_lens,
174 |                   public_key_offsets,
175 |                   signature_offsets,
176 |                   message_start_offsets,
177 |                   out_size,
178 |                   stream
179 |                  );
180 | 
181 |     int num_threads_per_block = 64;
182 |     int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block);
183 |     LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p stream: %p\n",
184 |            num_blocks, num_threads_per_block, (int)total_packets, out, gpu_ctx->stream);
185 | 
186 |     perftime_t start, end;
187 |     get_time(&start);
188 |     ed25519_verify_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
189 |                             (cur_ctx->packets,
190 |                              message_size,
191 |                              cur_ctx->message_lens,
192 |                              cur_ctx->public_key_offsets,
193 |                              cur_ctx->signature_offsets,
194 |                              cur_ctx->message_start_offsets,
195 |                              cur_ctx->offsets_len,
196 |                              cur_ctx->out);
197 |     CUDA_CHK(cudaPeekAtLastError());
198 | 
199 |     cudaError_t err = cudaMemcpyAsync(out, cur_ctx->out, out_size, cudaMemcpyDeviceToHost, stream);
200 |     if (err != cudaSuccess)  {
201 |         fprintf(stderr, "verify: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n",
202 |                         out, cur_ctx->out, out_size, num_elems, elems);
203 |     }
204 |     CUDA_CHK(err);
205 | 
206 |     CUDA_CHK(cudaStreamSynchronize(stream));
207 | 
208 |     release_gpu_ctx(gpu_ctx);
209 | 
210 |     get_time(&end);
211 |     LOG("time diff: %f\n", get_diff(&start, &end));
212 | }
213 | 
214 | // Ensure copyright and license notice is embedded in the binary
215 | const char* ed25519_license() {
216 |    return "Copyright (c) 2018 Solana Labs, Inc. "
217 |           "Licensed under the Apache License, Version 2.0 "
218 |           "<http://www.apache.org/licenses/LICENSE-2.0>";
219 | }
220 | 
221 | int cuda_host_register(void* ptr, size_t size, unsigned int flags) {
222 |    return cudaHostRegister(ptr, size, flags);
223 | }
224 | 
225 | int cuda_host_unregister(void* ptr) {
226 |    return cudaHostUnregister(ptr);
227 | }
228 | 


--------------------------------------------------------------------------------
/src/cuda-headers/gpu_common.h:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | 
 4 | #ifndef GPU_COMMON_H
 5 | #define GPU_COMMON_H
 6 | 
 7 | extern bool g_verbose;
 8 | 
 9 | #define LOG(...) if (g_verbose) { printf(__VA_ARGS__); }
10 | 
11 | #define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y))
12 | 
13 | #define CUDA_CHK(ans) { cuda_assert((ans), __FILE__, __LINE__); }
14 | 
15 | inline void cuda_assert(cudaError_t err, const char *file, int line)
16 | {
17 |     if (err != cudaSuccess)
18 |     {
19 |         fprintf(stderr,"ERR: %s %s %d\n", cudaGetErrorString(err), file, line);
20 |         assert(0);
21 |     }
22 | }
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/cuda-poh-verify/poh_verify.cu:
--------------------------------------------------------------------------------
  1 | #include <stddef.h>
  2 | #include <inttypes.h>
  3 | #include <pthread.h>
  4 | #include "gpu_common.h"
  5 | #include "sha256.cu"
  6 | 
  7 | #define MAX_NUM_GPUS 8
  8 | #define MAX_QUEUE_SIZE 8
  9 | #define NUM_THREADS_PER_BLOCK 64
 10 | 
 11 | 
 12 | __global__ void poh_verify_kernel(uint8_t* hashes, uint64_t* num_hashes_arr, size_t num_elems) {
 13 |     size_t idx = (size_t)(blockIdx.x * blockDim.x + threadIdx.x);
 14 |     if (idx >= num_elems) return;
 15 | 
 16 |     uint8_t hash[SHA256_BLOCK_SIZE];
 17 | 
 18 |     memcpy(hash, &hashes[idx * SHA256_BLOCK_SIZE], SHA256_BLOCK_SIZE);
 19 | 
 20 |     for (size_t i = 0; i < num_hashes_arr[idx]; i++) {
 21 |         hash_state sha_state;
 22 |         sha256_init(&sha_state);
 23 |         sha256_process(&sha_state, hash, SHA256_BLOCK_SIZE);
 24 |         sha256_done(&sha_state, hash);
 25 |     }
 26 | 
 27 |     memcpy(&hashes[idx * SHA256_BLOCK_SIZE], hash, SHA256_BLOCK_SIZE);
 28 | }
 29 | 
 30 | typedef struct {
 31 |     uint8_t* hashes;
 32 |     uint64_t* num_hashes_arr;
 33 |     size_t num_elems_alloc;
 34 |     pthread_mutex_t mutex;
 35 |     cudaStream_t stream;
 36 | } gpu_ctx;
 37 | 
 38 | static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
 39 | 
 40 | static gpu_ctx g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
 41 | static uint32_t g_cur_gpu = 0;
 42 | static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
 43 | static int32_t g_total_gpus = -1;
 44 | 
 45 | static bool poh_init_locked() {
 46 |     if (g_total_gpus == -1) {
 47 |         cudaGetDeviceCount(&g_total_gpus);
 48 |         g_total_gpus = min(MAX_NUM_GPUS, g_total_gpus);
 49 |         LOG("total_gpus: %d\n", g_total_gpus);
 50 |         for (int gpu = 0; gpu < g_total_gpus; gpu++) {
 51 |             CUDA_CHK(cudaSetDevice(gpu));
 52 |             for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
 53 |                 int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
 54 |                 if (err != 0) {
 55 |                     fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
 56 |                             err, gpu, queue);
 57 |                     g_total_gpus = 0;
 58 |                     return false;
 59 |                 }
 60 |                 CUDA_CHK(cudaStreamCreate(&g_gpu_ctx[gpu][queue].stream));
 61 |             }
 62 |         }
 63 |     }
 64 |     return g_total_gpus > 0;
 65 | }
 66 | 
 67 | bool poh_init() {
 68 |     cudaFree(0);
 69 |     pthread_mutex_lock(&g_ctx_mutex);
 70 |     bool success = poh_init_locked();
 71 |     pthread_mutex_unlock(&g_ctx_mutex);
 72 |     return success;
 73 | }
 74 | 
 75 | extern "C" {
 76 | int poh_verify_many(uint8_t* hashes,
 77 |                     const uint64_t* num_hashes_arr,
 78 |                     size_t num_elems,
 79 |                     uint8_t use_non_default_stream)
 80 | {
 81 |     LOG("Starting poh_verify_many: num_elems: %zu\n", num_elems);
 82 | 
 83 |     if (num_elems == 0) return 0;
 84 | 
 85 |     int32_t cur_gpu, cur_queue;
 86 | 
 87 |     pthread_mutex_lock(&g_ctx_mutex);
 88 |     if (!poh_init_locked()) {
 89 |         pthread_mutex_unlock(&g_ctx_mutex);
 90 |         LOG("No GPUs, exiting...\n");
 91 |         return 1;
 92 |     }
 93 |     cur_gpu = g_cur_gpu;
 94 |     g_cur_gpu++;
 95 |     g_cur_gpu %= g_total_gpus;
 96 |     cur_queue = g_cur_queue[cur_gpu];
 97 |     g_cur_queue[cur_gpu]++;
 98 |     g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
 99 |     pthread_mutex_unlock(&g_ctx_mutex);
100 | 
101 |     gpu_ctx* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
102 |     pthread_mutex_lock(&cur_ctx->mutex);
103 | 
104 |     CUDA_CHK(cudaSetDevice(cur_gpu));
105 | 
106 |     LOG("cur gpu: %d cur queue: %d\n", cur_gpu, cur_queue);
107 | 
108 |     size_t hashes_size = num_elems * SHA256_BLOCK_SIZE * sizeof(uint8_t);
109 |     size_t num_hashes_size = num_elems * sizeof(uint64_t);
110 | 
111 |     // Ensure there is enough memory allocated
112 |     if (cur_ctx->hashes == NULL || cur_ctx->num_elems_alloc < num_elems) {
113 |         CUDA_CHK(cudaFree(cur_ctx->hashes));
114 |         CUDA_CHK(cudaMalloc(&cur_ctx->hashes, hashes_size));
115 |         CUDA_CHK(cudaFree(cur_ctx->num_hashes_arr));
116 |         CUDA_CHK(cudaMalloc(&cur_ctx->num_hashes_arr, num_hashes_size));
117 | 
118 |         cur_ctx->num_elems_alloc = num_elems;
119 |     }
120 | 
121 |     cudaStream_t stream = 0;
122 |     if (0 != use_non_default_stream) {
123 |         stream = cur_ctx->stream;
124 |     }
125 | 
126 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->hashes, hashes, hashes_size, cudaMemcpyHostToDevice, stream));
127 |     CUDA_CHK(cudaMemcpyAsync(cur_ctx->num_hashes_arr, num_hashes_arr, num_hashes_size, cudaMemcpyHostToDevice, stream));
128 | 
129 |     int num_blocks = ROUND_UP_DIV(num_elems, NUM_THREADS_PER_BLOCK);
130 | 
131 |     poh_verify_kernel<<<num_blocks, NUM_THREADS_PER_BLOCK, 0, stream>>>(cur_ctx->hashes, cur_ctx->num_hashes_arr, num_elems);
132 |     CUDA_CHK(cudaPeekAtLastError());
133 | 
134 |     CUDA_CHK(cudaMemcpyAsync(hashes, cur_ctx->hashes, hashes_size, cudaMemcpyDeviceToHost, stream));
135 | 
136 |     CUDA_CHK(cudaStreamSynchronize(stream));
137 | 
138 |     pthread_mutex_unlock(&cur_ctx->mutex);
139 | 
140 |     return 0;
141 | }
142 | }
143 | 


--------------------------------------------------------------------------------
/src/cuda-sha256/sha256.cu:
--------------------------------------------------------------------------------
  1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis
  2 |  *
  3 |  * LibTomCrypt is a library that provides various cryptographic
  4 |  * algorithms in a highly modular and flexible manner.
  5 |  *
  6 |  * The library is free for all purposes without any express
  7 |  * guarantee it works.
  8 |  */
  9 | 
 10 | /**
 11 |   @file sha256.c
 12 |   LTC_SHA256 by Tom St Denis
 13 | */
 14 | 
 15 | #include "tomcrypt_macros.h"
 16 | 
 17 | 
 18 | #define SHA256_BLOCK_SIZE 32
 19 | 
 20 | struct sha256_state {
 21 |     ulong64 length;
 22 |     ulong32 state[8], curlen;
 23 |     unsigned char buf[64];
 24 | };
 25 | 
 26 | typedef struct {
 27 |     struct sha256_state sha256;
 28 | } hash_state;
 29 | 
 30 | #ifdef LTC_SMALL_CODE
 31 | /* the K array */
 32 | static const ulong32 K[64] = {
 33 |     0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
 34 |     0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
 35 |     0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
 36 |     0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
 37 |     0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
 38 |     0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
 39 |     0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
 40 |     0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
 41 |     0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
 42 |     0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
 43 |     0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
 44 |     0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
 45 |     0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
 46 | };
 47 | #endif
 48 | 
 49 | /* Various logical functions */
 50 | #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 51 | #define Maj(x,y,z)      (((x | y) & z) | (x & y))
 52 | #define S(x, n)         RORc((x),(n))
 53 | #define R(x, n)         (((x)&0xFFFFFFFFUL)>>(n))
 54 | #define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
 55 | #define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
 56 | #define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
 57 | #define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
 58 | 
 59 | /* compress 512-bits */
 60 | #ifdef LTC_CLEAN_STACK
 61 | static int _sha256_compress(hash_state * md, const unsigned char *buf)
 62 | #else
 63 | static int __host__ __device__ sha256_compress(hash_state * md, const unsigned char *buf)
 64 | #endif
 65 | {
 66 |     ulong32 S[8], W[64], t0, t1;
 67 | #ifdef LTC_SMALL_CODE
 68 |     ulong32 t;
 69 | #endif
 70 |     int i;
 71 | 
 72 |     /* copy state into S */
 73 |     for (i = 0; i < 8; i++) {
 74 |         S[i] = md->sha256.state[i];
 75 |     }
 76 | 
 77 |     /* copy the state into 512-bits into W[0..15] */
 78 |     for (i = 0; i < 16; i++) {
 79 |         LOAD32H(W[i], buf + (4*i));
 80 |     }
 81 | 
 82 |     /* fill W[16..63] */
 83 |     for (i = 16; i < 64; i++) {
 84 |         W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
 85 |     }
 86 | 
 87 |     /* Compress */
 88 | #ifdef LTC_SMALL_CODE
 89 | #define RND(a,b,c,d,e,f,g,h,i)                         \
 90 |      t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
 91 |      t1 = Sigma0(a) + Maj(a, b, c);                    \
 92 |      d += t0;                                          \
 93 |      h  = t0 + t1;
 94 | 
 95 |      for (i = 0; i < 64; ++i) {
 96 |          RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
 97 |          t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
 98 |          S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
 99 |      }
100 | #else
101 | #define RND(a,b,c,d,e,f,g,h,i,ki)                    \
102 |      t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i];   \
103 |      t1 = Sigma0(a) + Maj(a, b, c);                  \
104 |      d += t0;                                        \
105 |      h  = t0 + t1;
106 | 
107 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
108 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491);
109 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf);
110 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5);
111 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b);
112 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1);
113 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4);
114 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5);
115 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98);
116 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01);
117 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be);
118 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3);
119 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74);
120 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
121 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
122 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
123 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
124 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
125 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
126 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
127 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
128 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
129 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
130 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
131 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
132 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
133 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
134 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
135 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
136 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
137 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
138 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
139 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
140 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
141 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
142 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
143 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
144 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
145 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
146 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
147 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
148 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
149 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
150 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
151 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
152 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
153 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
154 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
155 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
156 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
157 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
158 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
159 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
160 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
161 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
162 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
163 |     RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
164 |     RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
165 |     RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
166 |     RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
167 |     RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
168 |     RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
169 |     RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
170 |     RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
171 | 
172 | #undef RND
173 | 
174 | #endif
175 | 
176 |     /* feedback */
177 |     for (i = 0; i < 8; i++) {
178 |         md->sha256.state[i] = md->sha256.state[i] + S[i];
179 |     }
180 |     return CRYPT_OK;
181 | }
182 | 
183 | #ifdef LTC_CLEAN_STACK
184 | static int sha256_compress(hash_state * md, const unsigned char *buf)
185 | {
186 |     int err;
187 |     err = _sha256_compress(md, buf);
188 |     burn_stack(sizeof(ulong32) * 74);
189 |     return err;
190 | }
191 | #endif
192 | 
193 | /**
194 |    Initialize the hash state
195 |    @param md   The hash state you wish to initialize
196 |    @return CRYPT_OK if successful
197 | */
198 | inline int __host__ __device__ sha256_init(hash_state * md)
199 | {
200 |     LTC_ARGCHK(md != NULL);
201 | 
202 |     md->sha256.curlen = 0;
203 |     md->sha256.length = 0;
204 |     md->sha256.state[0] = 0x6A09E667UL;
205 |     md->sha256.state[1] = 0xBB67AE85UL;
206 |     md->sha256.state[2] = 0x3C6EF372UL;
207 |     md->sha256.state[3] = 0xA54FF53AUL;
208 |     md->sha256.state[4] = 0x510E527FUL;
209 |     md->sha256.state[5] = 0x9B05688CUL;
210 |     md->sha256.state[6] = 0x1F83D9ABUL;
211 |     md->sha256.state[7] = 0x5BE0CD19UL;
212 |     return CRYPT_OK;
213 | }
214 | 
215 | /**
216 |    Process a block of memory though the hash
217 |    @param md     The hash state
218 |    @param in     The data to hash
219 |    @param inlen  The length of the data (octets)
220 |    @return CRYPT_OK if successful
221 | */
222 | inline HASH_PROCESS(sha256_process, sha256_compress, sha256, 64)
223 | 
224 | /**
225 |    Terminate the hash to get the digest
226 |    @param md  The hash state
227 |    @param out [out] The destination of the hash (32 bytes)
228 |    @return CRYPT_OK if successful
229 | */
230 | inline int __host__ __device__ sha256_done(hash_state * md, unsigned char *out)
231 | {
232 |     int i;
233 | 
234 |     LTC_ARGCHK(md  != NULL);
235 |     LTC_ARGCHK(out != NULL);
236 | 
237 |     if (md->sha256.curlen >= sizeof(md->sha256.buf)) {
238 |        return CRYPT_INVALID_ARG;
239 |     }
240 | 
241 | 
242 |     /* increase the length of the message */
243 |     md->sha256.length += md->sha256.curlen * 8;
244 | 
245 |     /* append the '1' bit */
246 |     md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80;
247 | 
248 |     /* if the length is currently above 56 bytes we append zeros
249 |      * then compress.  Then we can fall back to padding zeros and length
250 |      * encoding like normal.
251 |      */
252 |     if (md->sha256.curlen > 56) {
253 |         while (md->sha256.curlen < 64) {
254 |             md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
255 |         }
256 |         sha256_compress(md, md->sha256.buf);
257 |         md->sha256.curlen = 0;
258 |     }
259 | 
260 |     /* pad upto 56 bytes of zeroes */
261 |     while (md->sha256.curlen < 56) {
262 |         md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
263 |     }
264 | 
265 |     /* store length */
266 |     STORE64H(md->sha256.length, md->sha256.buf+56);
267 |     sha256_compress(md, md->sha256.buf);
268 | 
269 |     /* copy output */
270 |     for (i = 0; i < 8; i++) {
271 |         STORE32H(md->sha256.state[i], out+(4*i));
272 |     }
273 | #ifdef LTC_CLEAN_STACK
274 |     zeromem(md, sizeof(hash_state));
275 | #endif
276 |     return CRYPT_OK;
277 | }
278 | 
279 | /**
280 |   Self-test the hash
281 |   @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled
282 | */
283 | #if 0
284 | int  sha256_test(void)
285 | {
286 |  #ifndef LTC_TEST
287 |     return CRYPT_NOP;
288 |  #else
289 |   static const struct {
290 |       const char *msg;
291 |       unsigned char hash[32];
292 |   } tests[] = {
293 |     { "abc",
294 |       { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea,
295 |         0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23,
296 |         0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
297 |         0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }
298 |     },
299 |     { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
300 |       { 0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8,
301 |         0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39,
302 |         0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67,
303 |         0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1 }
304 |     },
305 |   };
306 | 
307 |   int i;
308 |   unsigned char tmp[32];
309 |   hash_state md;
310 | 
311 |   for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) {
312 |       sha256_init(&md);
313 |       sha256_process(&md, (unsigned char*)tests[i].msg, (unsigned long)strlen(tests[i].msg));
314 |       sha256_done(&md, tmp);
315 |       if (compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA256", i)) {
316 |          return CRYPT_FAIL_TESTVECTOR;
317 |       }
318 |   }
319 |   return CRYPT_OK;
320 |  #endif
321 | }
322 | #endif
323 | 
324 | 
325 | 
326 | /* ref:         $Format:%D$ */
327 | /* git commit:  $Format:%H$ */
328 | /* commit time: $Format:%ai$ */
329 | 


--------------------------------------------------------------------------------
/src/gpu-common.mk:
--------------------------------------------------------------------------------
1 | NVCC:=nvcc
2 | GPU_PTX_ARCH:=compute_35
3 | GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70
4 | GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH)
5 | CFLAGS_release:=--ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "-Wall -Werror -fPIC -Wno-strict-aliasing"
6 | CFLAGS_debug:=$(CFLAGS_release) -g
7 | CFLAGS:=$(CFLAGS_$V)
8 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jerasure-sys"
 3 | description = "Rust bindings for jerasure 2.0"
 4 | version = "0.1.0"
 5 | homepage = "https://solana.com/"
 6 | readme = "../jerasure/README"
 7 | repository = "https://github.com/solana-labs/solana-perf-libs"
 8 | authors = ["Solana Maintainers <maintainers@solana.com>"]
 9 | license = "../jerasure/COPYING"
10 | links = "Jerasure"
11 | build = "build.rs"
12 | 
13 | [build-dependencies]
14 | cc = "1.0"
15 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/build.rs:
--------------------------------------------------------------------------------
 1 | extern crate cc;
 2 | 
 3 | fn main() {
 4 |     cc::Build::new()
 5 |         .files(&[
 6 |             "jerasure/src/galois.c",
 7 |             "jerasure/src/jerasure.c",
 8 |             "jerasure/src/reed_sol.c",
 9 |             "jerasure/src/cauchy.c",
10 |             "jerasure/src/liberation.c",
11 |         ])
12 |         .include("jerasure/include")
13 |         .include("gf-complete/include")
14 |         .compile("Jerasure");
15 |     println!("cargo:rustc-link-lib=static=Jerasure");
16 | }
17 | 


--------------------------------------------------------------------------------
/src/jerasure-sys/gf-complete:
--------------------------------------------------------------------------------
1 | ../gf-complete/


--------------------------------------------------------------------------------
/src/jerasure-sys/jerasure:
--------------------------------------------------------------------------------
1 | ../jerasure


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | 
 3 | C_Flags := -O2 -fpic -I.
 4 | 
 5 | SGX_C_Flags := -Wno-implicit-function-declaration -std=c11 -m64 -O2 -nostdinc -DSGX_COMPAT -fpie -fstack-protector \
 6 | 	-IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf -I.
 7 | 
 8 | C_Files := $(wildcard *.c)
 9 | 
10 | OUT ?= libs
11 | 
12 | SGX_OBJ := sgxobj
13 | SGX_C_Objects := $(C_Files:%.c=$(SGX_OBJ)/%.o)
14 | 
15 | NONSGX_OBJ := nonsgxobj
16 | NONSGX_C_Objects := $(C_Files:%.c=$(NONSGX_OBJ)/%.o)
17 | 
18 | .PHONY: all run
19 | all: $(OUT)/libed25519.sgx.static.a $(OUT)/libed25519.static.a
20 | run: all
21 | 
22 | $(SGX_OBJ)/%.o: %.c
23 | 	@echo "CC  <=  $<"
24 | 	@mkdir -p $(SGX_OBJ)
25 | 	$(CC) $(SGX_C_Flags) -c $< -o $@
26 | 
27 | $(NONSGX_OBJ)/%.o: %.c
28 | 	@echo "CC  <=  $<"
29 | 	@mkdir -p $(NONSGX_OBJ)
30 | 	$(CC) $(C_Flags) -c $< -o $@
31 | 
32 | $(OUT)/libed25519.sgx.static.a: $(SGX_C_Objects)
33 | 	@mkdir -p $(OUT)
34 | 	ar rcs $@ $^
35 | 
36 | $(OUT)/libed25519.static.a: $(NONSGX_C_Objects)
37 | 	@mkdir -p $(OUT)
38 | 	ar rcs $@ $^
39 | 
40 | clean:
41 | 	@rm -rf $(SGX_OBJ) $(NONSGX_OBJ) $(OUT)
42 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/add_scalar.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "ge.h"
 3 | #include "sc.h"
 4 | #include "sha512.h"
 5 | 
 6 | 
 7 | /* see http://crypto.stackexchange.com/a/6215/4697 */
 8 | void ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar) {
 9 |     const unsigned char SC_1[32] = {1}; /* scalar with value 1 */
10 |     
11 |     unsigned char n[32]; 
12 |     ge_p3 nB;
13 |     ge_p1p1 A_p1p1;
14 |     ge_p3 A;
15 |     ge_p3 public_key_unpacked;
16 |     ge_cached T;
17 | 
18 |     sha512_context hash;
19 |     unsigned char hashbuf[64];
20 | 
21 |     int i;
22 | 
23 |     /* copy the scalar and clear highest bit */
24 |     for (i = 0; i < 31; ++i) {
25 |         n[i] = scalar[i];
26 |     }
27 |     n[31] = scalar[31] & 127;
28 | 
29 |     /* private key: a = n + t */
30 |     if (private_key) {
31 |         sc_muladd(private_key, SC_1, n, private_key);
32 | 
33 |         // https://github.com/orlp/ed25519/issues/3
34 |         sha512_init(&hash);
35 |         sha512_update(&hash, private_key + 32, 32);
36 |         sha512_update(&hash, scalar, 32);
37 |         sha512_final(&hash, hashbuf);
38 |         for (i = 0; i < 32; ++i) {
39 |             private_key[32 + i] = hashbuf[i];
40 |         }
41 |     }
42 | 
43 |     /* public key: A = nB + T */
44 |     if (public_key) {
45 |         /* if we know the private key we don't need a point addition, which is faster */
46 |         /* using a "timing attack" you could find out wether or not we know the private
47 |            key, but this information seems rather useless - if this is important pass
48 |            public_key and private_key seperately in 2 function calls */
49 |         if (private_key) {
50 |             ge_scalarmult_base(&A, private_key);
51 |         } else {
52 |             /* unpack public key into T */
53 |             ge_frombytes_negate_vartime(&public_key_unpacked, public_key);
54 |             fe_neg(public_key_unpacked.X, public_key_unpacked.X); /* undo negate */
55 |             fe_neg(public_key_unpacked.T, public_key_unpacked.T); /* undo negate */
56 |             ge_p3_to_cached(&T, &public_key_unpacked);
57 | 
58 |             /* calculate n*B */
59 |             ge_scalarmult_base(&nB, n);
60 | 
61 |             /* A = n*B + T */
62 |             ge_add(&A_p1p1, &nB, &T);
63 |             ge_p1p1_to_p3(&A, &A_p1p1);
64 |         }
65 |             
66 |         /* pack public key */
67 |         ge_p3_tobytes(public_key, &A);
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | pwd=$PWD
 5 | cd "$(dirname "$0")"
 6 | 
 7 | echo --- Build
 8 | (
 9 |   set -x
10 |   make OUT="$pwd"/libs
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ed25519.h:
--------------------------------------------------------------------------------
 1 | #ifndef ED25519_H
 2 | #define ED25519_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #if defined(_WIN32)
 7 |     #if defined(ED25519_BUILD_DLL)
 8 |         #define ED25519_DECLSPEC __declspec(dllexport)
 9 |     #elif defined(ED25519_DLL)
10 |         #define ED25519_DECLSPEC __declspec(dllimport)
11 |     #else
12 |         #define ED25519_DECLSPEC
13 |     #endif
14 | #else
15 |     #define ED25519_DECLSPEC
16 | #endif
17 | 
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 | 
23 | #ifndef ED25519_NO_SEED
24 | int ED25519_DECLSPEC ed25519_create_seed(unsigned char *seed);
25 | #endif
26 | 
27 | void ED25519_DECLSPEC ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed);
28 | void ED25519_DECLSPEC ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key);
29 | int ED25519_DECLSPEC ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key);
30 | void ED25519_DECLSPEC ed25519_add_scalar(unsigned char *public_key, unsigned char *private_key, const unsigned char *scalar);
31 | void ED25519_DECLSPEC ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key);
32 | 
33 | 
34 | #ifdef __cplusplus
35 | }
36 | #endif
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/fe.h:
--------------------------------------------------------------------------------
 1 | #ifndef FE_H
 2 | #define FE_H
 3 | 
 4 | #include "fixedint.h"
 5 | 
 6 | 
 7 | /*
 8 |     fe means field element.
 9 |     Here the field is \Z/(2^255-19).
10 |     An element t, entries t[0]...t[9], represents the integer
11 |     t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
12 |     Bounds on each t[i] vary depending on context.
13 | */
14 | 
15 | 
16 | typedef int32_t fe[10];
17 | 
18 | 
19 | void fe_0(fe h);
20 | void fe_1(fe h);
21 | 
22 | void fe_frombytes(fe h, const unsigned char *s);
23 | void fe_tobytes(unsigned char *s, const fe h);
24 | 
25 | void fe_copy(fe h, const fe f);
26 | int fe_isnegative(const fe f);
27 | int fe_isnonzero(const fe f);
28 | void fe_cmov(fe f, const fe g, unsigned int b);
29 | void fe_cswap(fe f, fe g, unsigned int b);
30 | 
31 | void fe_neg(fe h, const fe f);
32 | void fe_add(fe h, const fe f, const fe g);
33 | void fe_invert(fe out, const fe z);
34 | void fe_sq(fe h, const fe f);
35 | void fe_sq2(fe h, const fe f);
36 | void fe_mul(fe h, const fe f, const fe g);
37 | void fe_mul121666(fe h, fe f);
38 | void fe_pow22523(fe out, const fe z);
39 | void fe_sub(fe h, const fe f, const fe g);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/fixedint.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Portable header to provide the 32 and 64 bits type.
 3 | 
 4 |     Not a compatible replacement for <stdint.h>, do not blindly use it as such.
 5 | */
 6 | 
 7 | #if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED)
 8 |     #include <stdint.h>
 9 |     #define FIXEDINT_H_INCLUDED
10 | 
11 |     #if defined(__WATCOMC__) && __WATCOMC__ >= 1250 && !defined(UINT64_C)
12 |         #include <limits.h>
13 |         #define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
14 |     #endif
15 | #endif
16 | 
17 | 
18 | #ifndef FIXEDINT_H_INCLUDED
19 |     #define FIXEDINT_H_INCLUDED
20 |     
21 |     #include <limits.h>
22 | 
23 |     /* (u)int32_t */
24 |     #ifndef uint32_t
25 |         #if (ULONG_MAX == 0xffffffffUL)
26 |             typedef unsigned long uint32_t;
27 |         #elif (UINT_MAX == 0xffffffffUL)
28 |             typedef unsigned int uint32_t;
29 |         #elif (USHRT_MAX == 0xffffffffUL)
30 |             typedef unsigned short uint32_t;
31 |         #endif
32 |     #endif
33 | 
34 | 
35 |     #ifndef int32_t
36 |         #if (LONG_MAX == 0x7fffffffL)
37 |             typedef signed long int32_t;
38 |         #elif (INT_MAX == 0x7fffffffL)
39 |             typedef signed int int32_t;
40 |         #elif (SHRT_MAX == 0x7fffffffL)
41 |             typedef signed short int32_t;
42 |         #endif
43 |     #endif
44 | 
45 | 
46 |     /* (u)int64_t */
47 |     #if (defined(__STDC__) && defined(__STDC_VERSION__) && __STDC__ && __STDC_VERSION__ >= 199901L)
48 |         typedef long long int64_t;
49 |         typedef unsigned long long uint64_t;
50 | 
51 |         #define UINT64_C(v) v ##ULL
52 |         #define INT64_C(v) v ##LL
53 |     #elif defined(__GNUC__)
54 |         __extension__ typedef long long int64_t;
55 |         __extension__ typedef unsigned long long uint64_t;
56 | 
57 |         #define UINT64_C(v) v ##ULL
58 |         #define INT64_C(v) v ##LL
59 |     #elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC)
60 |         typedef long long int64_t;
61 |         typedef unsigned long long uint64_t;
62 | 
63 |         #define UINT64_C(v) v ##ULL
64 |         #define INT64_C(v) v ##LL
65 |     #elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
66 |         typedef __int64 int64_t;
67 |         typedef unsigned __int64 uint64_t;
68 | 
69 |         #define UINT64_C(v) v ##UI64
70 |         #define INT64_C(v) v ##I64
71 |     #endif
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ge.c:
--------------------------------------------------------------------------------
  1 | #include "ge.h"
  2 | #include "precomp_data.h"
  3 | 
  4 | 
  5 | /*
  6 | r = p + q
  7 | */
  8 | 
  9 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
 10 |     fe t0;
 11 |     fe_add(r->X, p->Y, p->X);
 12 |     fe_sub(r->Y, p->Y, p->X);
 13 |     fe_mul(r->Z, r->X, q->YplusX);
 14 |     fe_mul(r->Y, r->Y, q->YminusX);
 15 |     fe_mul(r->T, q->T2d, p->T);
 16 |     fe_mul(r->X, p->Z, q->Z);
 17 |     fe_add(t0, r->X, r->X);
 18 |     fe_sub(r->X, r->Z, r->Y);
 19 |     fe_add(r->Y, r->Z, r->Y);
 20 |     fe_add(r->Z, t0, r->T);
 21 |     fe_sub(r->T, t0, r->T);
 22 | }
 23 | 
 24 | 
 25 | static void slide(signed char *r, const unsigned char *a) {
 26 |     int i;
 27 |     int b;
 28 |     int k;
 29 | 
 30 |     for (i = 0; i < 256; ++i) {
 31 |         r[i] = 1 & (a[i >> 3] >> (i & 7));
 32 |     }
 33 | 
 34 |     for (i = 0; i < 256; ++i)
 35 |         if (r[i]) {
 36 |             for (b = 1; b <= 6 && i + b < 256; ++b) {
 37 |                 if (r[i + b]) {
 38 |                     if (r[i] + (r[i + b] << b) <= 15) {
 39 |                         r[i] += r[i + b] << b;
 40 |                         r[i + b] = 0;
 41 |                     } else if (r[i] - (r[i + b] << b) >= -15) {
 42 |                         r[i] -= r[i + b] << b;
 43 | 
 44 |                         for (k = i + b; k < 256; ++k) {
 45 |                             if (!r[k]) {
 46 |                                 r[k] = 1;
 47 |                                 break;
 48 |                             }
 49 | 
 50 |                             r[k] = 0;
 51 |                         }
 52 |                     } else {
 53 |                         break;
 54 |                     }
 55 |                 }
 56 |             }
 57 |         }
 58 | }
 59 | 
 60 | /*
 61 | r = a * A + b * B
 62 | where a = a[0]+256*a[1]+...+256^31 a[31].
 63 | and b = b[0]+256*b[1]+...+256^31 b[31].
 64 | B is the Ed25519 base point (x,4/5) with x positive.
 65 | */
 66 | 
 67 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) {
 68 |     signed char aslide[256];
 69 |     signed char bslide[256];
 70 |     ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
 71 |     ge_p1p1 t;
 72 |     ge_p3 u;
 73 |     ge_p3 A2;
 74 |     int i;
 75 |     slide(aslide, a);
 76 |     slide(bslide, b);
 77 |     ge_p3_to_cached(&Ai[0], A);
 78 |     ge_p3_dbl(&t, A);
 79 |     ge_p1p1_to_p3(&A2, &t);
 80 |     ge_add(&t, &A2, &Ai[0]);
 81 |     ge_p1p1_to_p3(&u, &t);
 82 |     ge_p3_to_cached(&Ai[1], &u);
 83 |     ge_add(&t, &A2, &Ai[1]);
 84 |     ge_p1p1_to_p3(&u, &t);
 85 |     ge_p3_to_cached(&Ai[2], &u);
 86 |     ge_add(&t, &A2, &Ai[2]);
 87 |     ge_p1p1_to_p3(&u, &t);
 88 |     ge_p3_to_cached(&Ai[3], &u);
 89 |     ge_add(&t, &A2, &Ai[3]);
 90 |     ge_p1p1_to_p3(&u, &t);
 91 |     ge_p3_to_cached(&Ai[4], &u);
 92 |     ge_add(&t, &A2, &Ai[4]);
 93 |     ge_p1p1_to_p3(&u, &t);
 94 |     ge_p3_to_cached(&Ai[5], &u);
 95 |     ge_add(&t, &A2, &Ai[5]);
 96 |     ge_p1p1_to_p3(&u, &t);
 97 |     ge_p3_to_cached(&Ai[6], &u);
 98 |     ge_add(&t, &A2, &Ai[6]);
 99 |     ge_p1p1_to_p3(&u, &t);
100 |     ge_p3_to_cached(&Ai[7], &u);
101 |     ge_p2_0(r);
102 | 
103 |     for (i = 255; i >= 0; --i) {
104 |         if (aslide[i] || bslide[i]) {
105 |             break;
106 |         }
107 |     }
108 | 
109 |     for (; i >= 0; --i) {
110 |         ge_p2_dbl(&t, r);
111 | 
112 |         if (aslide[i] > 0) {
113 |             ge_p1p1_to_p3(&u, &t);
114 |             ge_add(&t, &u, &Ai[aslide[i] / 2]);
115 |         } else if (aslide[i] < 0) {
116 |             ge_p1p1_to_p3(&u, &t);
117 |             ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
118 |         }
119 | 
120 |         if (bslide[i] > 0) {
121 |             ge_p1p1_to_p3(&u, &t);
122 |             ge_madd(&t, &u, &Bi[bslide[i] / 2]);
123 |         } else if (bslide[i] < 0) {
124 |             ge_p1p1_to_p3(&u, &t);
125 |             ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
126 |         }
127 | 
128 |         ge_p1p1_to_p2(r, &t);
129 |     }
130 | }
131 | 
132 | 
133 | static const fe d = {
134 |     -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
135 | };
136 | 
137 | static const fe sqrtm1 = {
138 |     -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
139 | };
140 | 
141 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) {
142 |     fe u;
143 |     fe v;
144 |     fe v3;
145 |     fe vxx;
146 |     fe check;
147 |     fe_frombytes(h->Y, s);
148 |     fe_1(h->Z);
149 |     fe_sq(u, h->Y);
150 |     fe_mul(v, u, d);
151 |     fe_sub(u, u, h->Z);     /* u = y^2-1 */
152 |     fe_add(v, v, h->Z);     /* v = dy^2+1 */
153 |     fe_sq(v3, v);
154 |     fe_mul(v3, v3, v);      /* v3 = v^3 */
155 |     fe_sq(h->X, v3);
156 |     fe_mul(h->X, h->X, v);
157 |     fe_mul(h->X, h->X, u);  /* x = uv^7 */
158 |     fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
159 |     fe_mul(h->X, h->X, v3);
160 |     fe_mul(h->X, h->X, u);  /* x = uv^3(uv^7)^((q-5)/8) */
161 |     fe_sq(vxx, h->X);
162 |     fe_mul(vxx, vxx, v);
163 |     fe_sub(check, vxx, u);  /* vx^2-u */
164 | 
165 |     if (fe_isnonzero(check)) {
166 |         fe_add(check, vxx, u); /* vx^2+u */
167 | 
168 |         if (fe_isnonzero(check)) {
169 |             return -1;
170 |         }
171 | 
172 |         fe_mul(h->X, h->X, sqrtm1);
173 |     }
174 | 
175 |     if (fe_isnegative(h->X) == (s[31] >> 7)) {
176 |         fe_neg(h->X, h->X);
177 |     }
178 | 
179 |     fe_mul(h->T, h->X, h->Y);
180 |     return 0;
181 | }
182 | 
183 | 
184 | /*
185 | r = p + q
186 | */
187 | 
188 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
189 |     fe t0;
190 |     fe_add(r->X, p->Y, p->X);
191 |     fe_sub(r->Y, p->Y, p->X);
192 |     fe_mul(r->Z, r->X, q->yplusx);
193 |     fe_mul(r->Y, r->Y, q->yminusx);
194 |     fe_mul(r->T, q->xy2d, p->T);
195 |     fe_add(t0, p->Z, p->Z);
196 |     fe_sub(r->X, r->Z, r->Y);
197 |     fe_add(r->Y, r->Z, r->Y);
198 |     fe_add(r->Z, t0, r->T);
199 |     fe_sub(r->T, t0, r->T);
200 | }
201 | 
202 | 
203 | /*
204 | r = p - q
205 | */
206 | 
207 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
208 |     fe t0;
209 | 
210 |     fe_add(r->X, p->Y, p->X);
211 |     fe_sub(r->Y, p->Y, p->X);
212 |     fe_mul(r->Z, r->X, q->yminusx);
213 |     fe_mul(r->Y, r->Y, q->yplusx);
214 |     fe_mul(r->T, q->xy2d, p->T);
215 |     fe_add(t0, p->Z, p->Z);
216 |     fe_sub(r->X, r->Z, r->Y);
217 |     fe_add(r->Y, r->Z, r->Y);
218 |     fe_sub(r->Z, t0, r->T);
219 |     fe_add(r->T, t0, r->T);
220 | }
221 | 
222 | 
223 | /*
224 | r = p
225 | */
226 | 
227 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
228 |     fe_mul(r->X, p->X, p->T);
229 |     fe_mul(r->Y, p->Y, p->Z);
230 |     fe_mul(r->Z, p->Z, p->T);
231 | }
232 | 
233 | 
234 | 
235 | /*
236 | r = p
237 | */
238 | 
239 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
240 |     fe_mul(r->X, p->X, p->T);
241 |     fe_mul(r->Y, p->Y, p->Z);
242 |     fe_mul(r->Z, p->Z, p->T);
243 |     fe_mul(r->T, p->X, p->Y);
244 | }
245 | 
246 | 
247 | void ge_p2_0(ge_p2 *h) {
248 |     fe_0(h->X);
249 |     fe_1(h->Y);
250 |     fe_1(h->Z);
251 | }
252 | 
253 | 
254 | 
255 | /*
256 | r = 2 * p
257 | */
258 | 
259 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
260 |     fe t0;
261 | 
262 |     fe_sq(r->X, p->X);
263 |     fe_sq(r->Z, p->Y);
264 |     fe_sq2(r->T, p->Z);
265 |     fe_add(r->Y, p->X, p->Y);
266 |     fe_sq(t0, r->Y);
267 |     fe_add(r->Y, r->Z, r->X);
268 |     fe_sub(r->Z, r->Z, r->X);
269 |     fe_sub(r->X, t0, r->Y);
270 |     fe_sub(r->T, r->T, r->Z);
271 | }
272 | 
273 | 
274 | void ge_p3_0(ge_p3 *h) {
275 |     fe_0(h->X);
276 |     fe_1(h->Y);
277 |     fe_1(h->Z);
278 |     fe_0(h->T);
279 | }
280 | 
281 | 
282 | /*
283 | r = 2 * p
284 | */
285 | 
286 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
287 |     ge_p2 q;
288 |     ge_p3_to_p2(&q, p);
289 |     ge_p2_dbl(r, &q);
290 | }
291 | 
292 | 
293 | 
294 | /*
295 | r = p
296 | */
297 | 
298 | static const fe d2 = {
299 |     -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199
300 | };
301 | 
302 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
303 |     fe_add(r->YplusX, p->Y, p->X);
304 |     fe_sub(r->YminusX, p->Y, p->X);
305 |     fe_copy(r->Z, p->Z);
306 |     fe_mul(r->T2d, p->T, d2);
307 | }
308 | 
309 | 
310 | /*
311 | r = p
312 | */
313 | 
314 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
315 |     fe_copy(r->X, p->X);
316 |     fe_copy(r->Y, p->Y);
317 |     fe_copy(r->Z, p->Z);
318 | }
319 | 
320 | 
321 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) {
322 |     fe recip;
323 |     fe x;
324 |     fe y;
325 |     fe_invert(recip, h->Z);
326 |     fe_mul(x, h->X, recip);
327 |     fe_mul(y, h->Y, recip);
328 |     fe_tobytes(s, y);
329 |     s[31] ^= fe_isnegative(x) << 7;
330 | }
331 | 
332 | 
333 | static unsigned char equal(signed char b, signed char c) {
334 |     unsigned char ub = b;
335 |     unsigned char uc = c;
336 |     unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
337 |     uint64_t y = x; /* 0: yes; 1..255: no */
338 |     y -= 1; /* large: yes; 0..254: no */
339 |     y >>= 63; /* 1: yes; 0: no */
340 |     return (unsigned char) y;
341 | }
342 | 
343 | static unsigned char negative(signed char b) {
344 |     uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
345 |     x >>= 63; /* 1: yes; 0: no */
346 |     return (unsigned char) x;
347 | }
348 | 
349 | static void cmov(ge_precomp *t, const ge_precomp *u, unsigned char b) {
350 |     fe_cmov(t->yplusx, u->yplusx, b);
351 |     fe_cmov(t->yminusx, u->yminusx, b);
352 |     fe_cmov(t->xy2d, u->xy2d, b);
353 | }
354 | 
355 | 
356 | static void select(ge_precomp *t, int pos, signed char b) {
357 |     ge_precomp minust;
358 |     unsigned char bnegative = negative(b);
359 |     unsigned char babs = b - (((-bnegative) & b) << 1);
360 |     fe_1(t->yplusx);
361 |     fe_1(t->yminusx);
362 |     fe_0(t->xy2d);
363 |     cmov(t, &base[pos][0], equal(babs, 1));
364 |     cmov(t, &base[pos][1], equal(babs, 2));
365 |     cmov(t, &base[pos][2], equal(babs, 3));
366 |     cmov(t, &base[pos][3], equal(babs, 4));
367 |     cmov(t, &base[pos][4], equal(babs, 5));
368 |     cmov(t, &base[pos][5], equal(babs, 6));
369 |     cmov(t, &base[pos][6], equal(babs, 7));
370 |     cmov(t, &base[pos][7], equal(babs, 8));
371 |     fe_copy(minust.yplusx, t->yminusx);
372 |     fe_copy(minust.yminusx, t->yplusx);
373 |     fe_neg(minust.xy2d, t->xy2d);
374 |     cmov(t, &minust, bnegative);
375 | }
376 | 
377 | /*
378 | h = a * B
379 | where a = a[0]+256*a[1]+...+256^31 a[31]
380 | B is the Ed25519 base point (x,4/5) with x positive.
381 | 
382 | Preconditions:
383 |   a[31] <= 127
384 | */
385 | 
386 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
387 |     signed char e[64];
388 |     signed char carry;
389 |     ge_p1p1 r;
390 |     ge_p2 s;
391 |     ge_precomp t;
392 |     int i;
393 | 
394 |     for (i = 0; i < 32; ++i) {
395 |         e[2 * i + 0] = (a[i] >> 0) & 15;
396 |         e[2 * i + 1] = (a[i] >> 4) & 15;
397 |     }
398 | 
399 |     /* each e[i] is between 0 and 15 */
400 |     /* e[63] is between 0 and 7 */
401 |     carry = 0;
402 | 
403 |     for (i = 0; i < 63; ++i) {
404 |         e[i] += carry;
405 |         carry = e[i] + 8;
406 |         carry >>= 4;
407 |         e[i] -= carry << 4;
408 |     }
409 | 
410 |     e[63] += carry;
411 |     /* each e[i] is between -8 and 8 */
412 |     ge_p3_0(h);
413 | 
414 |     for (i = 1; i < 64; i += 2) {
415 |         select(&t, i / 2, e[i]);
416 |         ge_madd(&r, h, &t);
417 |         ge_p1p1_to_p3(h, &r);
418 |     }
419 | 
420 |     ge_p3_dbl(&r, h);
421 |     ge_p1p1_to_p2(&s, &r);
422 |     ge_p2_dbl(&r, &s);
423 |     ge_p1p1_to_p2(&s, &r);
424 |     ge_p2_dbl(&r, &s);
425 |     ge_p1p1_to_p2(&s, &r);
426 |     ge_p2_dbl(&r, &s);
427 |     ge_p1p1_to_p3(h, &r);
428 | 
429 |     for (i = 0; i < 64; i += 2) {
430 |         select(&t, i / 2, e[i]);
431 |         ge_madd(&r, h, &t);
432 |         ge_p1p1_to_p3(h, &r);
433 |     }
434 | }
435 | 
436 | 
437 | /*
438 | r = p - q
439 | */
440 | 
441 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
442 |     fe t0;
443 |     
444 |     fe_add(r->X, p->Y, p->X);
445 |     fe_sub(r->Y, p->Y, p->X);
446 |     fe_mul(r->Z, r->X, q->YminusX);
447 |     fe_mul(r->Y, r->Y, q->YplusX);
448 |     fe_mul(r->T, q->T2d, p->T);
449 |     fe_mul(r->X, p->Z, q->Z);
450 |     fe_add(t0, r->X, r->X);
451 |     fe_sub(r->X, r->Z, r->Y);
452 |     fe_add(r->Y, r->Z, r->Y);
453 |     fe_sub(r->Z, t0, r->T);
454 |     fe_add(r->T, t0, r->T);
455 | }
456 | 
457 | 
458 | void ge_tobytes(unsigned char *s, const ge_p2 *h) {
459 |     fe recip;
460 |     fe x;
461 |     fe y;
462 |     fe_invert(recip, h->Z);
463 |     fe_mul(x, h->X, recip);
464 |     fe_mul(y, h->Y, recip);
465 |     fe_tobytes(s, y);
466 |     s[31] ^= fe_isnegative(x) << 7;
467 | }
468 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/ge.h:
--------------------------------------------------------------------------------
 1 | #ifndef GE_H
 2 | #define GE_H
 3 | 
 4 | #include "fe.h"
 5 | 
 6 | 
 7 | /*
 8 | ge means group element.
 9 | 
10 | Here the group is the set of pairs (x,y) of field elements (see fe.h)
11 | satisfying -x^2 + y^2 = 1 + d x^2y^2
12 | where d = -121665/121666.
13 | 
14 | Representations:
15 |   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
16 |   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
17 |   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
18 |   ge_precomp (Duif): (y+x,y-x,2dxy)
19 | */
20 | 
21 | typedef struct {
22 |   fe X;
23 |   fe Y;
24 |   fe Z;
25 | } ge_p2;
26 | 
27 | typedef struct {
28 |   fe X;
29 |   fe Y;
30 |   fe Z;
31 |   fe T;
32 | } ge_p3;
33 | 
34 | typedef struct {
35 |   fe X;
36 |   fe Y;
37 |   fe Z;
38 |   fe T;
39 | } ge_p1p1;
40 | 
41 | typedef struct {
42 |   fe yplusx;
43 |   fe yminusx;
44 |   fe xy2d;
45 | } ge_precomp;
46 | 
47 | typedef struct {
48 |   fe YplusX;
49 |   fe YminusX;
50 |   fe Z;
51 |   fe T2d;
52 | } ge_cached;
53 | 
54 | void ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
55 | void ge_tobytes(unsigned char *s, const ge_p2 *h);
56 | int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s);
57 | 
58 | void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
59 | void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
60 | void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b);
61 | void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
62 | void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
63 | void ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
64 | 
65 | void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
66 | void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
67 | void ge_p2_0(ge_p2 *h);
68 | void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
69 | void ge_p3_0(ge_p3 *h);
70 | void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
71 | void ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
72 | void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/key_exchange.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "fe.h"
 3 | 
 4 | void ed25519_key_exchange(unsigned char *shared_secret, const unsigned char *public_key, const unsigned char *private_key) {
 5 |     unsigned char e[32];
 6 |     unsigned int i;
 7 |     
 8 |     fe x1;
 9 |     fe x2;
10 |     fe z2;
11 |     fe x3;
12 |     fe z3;
13 |     fe tmp0;
14 |     fe tmp1;
15 | 
16 |     int pos;
17 |     unsigned int swap;
18 |     unsigned int b;
19 | 
20 |     /* copy the private key and make sure it's valid */
21 |     for (i = 0; i < 32; ++i) {
22 |         e[i] = private_key[i];
23 |     }
24 | 
25 |     e[0] &= 248;
26 |     e[31] &= 63;
27 |     e[31] |= 64;
28 | 
29 |     /* unpack the public key and convert edwards to montgomery */
30 |     /* due to CodesInChaos: montgomeryX = (edwardsY + 1)*inverse(1 - edwardsY) mod p */
31 |     fe_frombytes(x1, public_key);
32 |     fe_1(tmp1);
33 |     fe_add(tmp0, x1, tmp1);
34 |     fe_sub(tmp1, tmp1, x1);
35 |     fe_invert(tmp1, tmp1);
36 |     fe_mul(x1, tmp0, tmp1);
37 | 
38 |     fe_1(x2);
39 |     fe_0(z2);
40 |     fe_copy(x3, x1);
41 |     fe_1(z3);
42 | 
43 |     swap = 0;
44 |     for (pos = 254; pos >= 0; --pos) {
45 |         b = e[pos / 8] >> (pos & 7);
46 |         b &= 1;
47 |         swap ^= b;
48 |         fe_cswap(x2, x3, swap);
49 |         fe_cswap(z2, z3, swap);
50 |         swap = b;
51 | 
52 |         /* from montgomery.h */
53 |         fe_sub(tmp0, x3, z3);
54 |         fe_sub(tmp1, x2, z2);
55 |         fe_add(x2, x2, z2);
56 |         fe_add(z2, x3, z3);
57 |         fe_mul(z3, tmp0, x2);
58 |         fe_mul(z2, z2, tmp1);
59 |         fe_sq(tmp0, tmp1);
60 |         fe_sq(tmp1, x2);
61 |         fe_add(x3, z3, z2);
62 |         fe_sub(z2, z3, z2);
63 |         fe_mul(x2, tmp1, tmp0);
64 |         fe_sub(tmp1, tmp1, tmp0);
65 |         fe_sq(z2, z2);
66 |         fe_mul121666(z3, tmp1);
67 |         fe_sq(x3, x3);
68 |         fe_add(tmp0, tmp0, z3);
69 |         fe_mul(z3, x1, z2);
70 |         fe_mul(z2, tmp1, tmp0);
71 |     }
72 | 
73 |     fe_cswap(x2, x3, swap);
74 |     fe_cswap(z2, z3, swap);
75 | 
76 |     fe_invert(z2, z2);
77 |     fe_mul(x2, x2, z2);
78 |     fe_tobytes(shared_secret, x2);
79 | }
80 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/keypair.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | 
 5 | 
 6 | void ed25519_create_keypair(unsigned char *public_key, unsigned char *private_key, const unsigned char *seed) {
 7 |     ge_p3 A;
 8 | 
 9 |     sha512(seed, 32, private_key);
10 |     private_key[0] &= 248;
11 |     private_key[31] &= 63;
12 |     private_key[31] |= 64;
13 | 
14 |     ge_scalarmult_base(&A, private_key);
15 |     ge_p3_tobytes(public_key, &A);
16 | }
17 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sc.h:
--------------------------------------------------------------------------------
 1 | #ifndef SC_H
 2 | #define SC_H
 3 | 
 4 | /*
 5 | The set of scalars is \Z/l
 6 | where l = 2^252 + 27742317777372353535851937790883648493.
 7 | */
 8 | 
 9 | void sc_reduce(unsigned char *s);
10 | void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, const unsigned char *c);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/seed.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | 
 3 | #ifndef ED25519_NO_SEED
 4 | 
 5 | #ifdef _WIN32
 6 | #include <windows.h>
 7 | #include <wincrypt.h>
 8 | #else
 9 | #include <stdio.h>
10 | #endif
11 | 
12 | int ed25519_create_seed(unsigned char *seed) {
13 | #ifndef SGX_COMPAT
14 | #ifdef _WIN32
15 |     HCRYPTPROV prov;
16 | 
17 |     if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))  {
18 |         return 1;
19 |     }
20 | 
21 |     if (!CryptGenRandom(prov, 32, seed))  {
22 |         CryptReleaseContext(prov, 0);
23 |         return 1;
24 |     }
25 | 
26 |     CryptReleaseContext(prov, 0);
27 | #else
28 |     FILE *f = fopen("/dev/urandom", "rb");
29 | 
30 |     if (f == NULL) {
31 |         return 1;
32 |     }
33 | 
34 |     fread(seed, 1, 32, f);
35 |     fclose(f);
36 | #endif
37 | #endif
38 | 
39 |     return 0;
40 | }
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sha512.c:
--------------------------------------------------------------------------------
  1 | /* LibTomCrypt, modular cryptographic library -- Tom St Denis
  2 |  *
  3 |  * LibTomCrypt is a library that provides various cryptographic
  4 |  * algorithms in a highly modular and flexible manner.
  5 |  *
  6 |  * The library is free for all purposes without any express
  7 |  * guarantee it works.
  8 |  *
  9 |  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
 10 |  */
 11 | 
 12 | #include "fixedint.h"
 13 | #include "sha512.h"
 14 | 
 15 | /* the K array */
 16 | static const uint64_t K[80] = {
 17 |     UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 
 18 |     UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc),
 19 |     UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 
 20 |     UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118),
 21 |     UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 
 22 |     UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2),
 23 |     UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 
 24 |     UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694),
 25 |     UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 
 26 |     UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65),
 27 |     UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 
 28 |     UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5),
 29 |     UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 
 30 |     UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4),
 31 |     UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 
 32 |     UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70),
 33 |     UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 
 34 |     UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df),
 35 |     UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 
 36 |     UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b),
 37 |     UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001),
 38 |     UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30),
 39 |     UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 
 40 |     UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8),
 41 |     UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 
 42 |     UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8),
 43 |     UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 
 44 |     UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3),
 45 |     UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 
 46 |     UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec),
 47 |     UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 
 48 |     UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b),
 49 |     UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 
 50 |     UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178),
 51 |     UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 
 52 |     UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b),
 53 |     UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 
 54 |     UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c),
 55 |     UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 
 56 |     UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817)
 57 | };
 58 | 
 59 | /* Various logical functions */
 60 | 
 61 | #define ROR64c(x, y) \
 62 |     ( ((((x)&UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)(y)&UINT64_C(63))) | \
 63 |       ((x)<<((uint64_t)(64-((y)&UINT64_C(63)))))) & UINT64_C(0xFFFFFFFFFFFFFFFF))
 64 | 
 65 | #define STORE64H(x, y)                                                                     \
 66 |    { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255);     \
 67 |      (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255);     \
 68 |      (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255);     \
 69 |      (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 70 | 
 71 | #define LOAD64H(x, y)                                                      \
 72 |    { x = (((uint64_t)((y)[0] & 255))<<56)|(((uint64_t)((y)[1] & 255))<<48) | \
 73 |          (((uint64_t)((y)[2] & 255))<<40)|(((uint64_t)((y)[3] & 255))<<32) | \
 74 |          (((uint64_t)((y)[4] & 255))<<24)|(((uint64_t)((y)[5] & 255))<<16) | \
 75 |          (((uint64_t)((y)[6] & 255))<<8)|(((uint64_t)((y)[7] & 255))); }
 76 | 
 77 | 
 78 | #define Ch(x,y,z)       (z ^ (x & (y ^ z)))
 79 | #define Maj(x,y,z)      (((x | y) & z) | (x & y)) 
 80 | #define S(x, n)         ROR64c(x, n)
 81 | #define R(x, n)         (((x) &UINT64_C(0xFFFFFFFFFFFFFFFF))>>((uint64_t)n))
 82 | #define Sigma0(x)       (S(x, 28) ^ S(x, 34) ^ S(x, 39))
 83 | #define Sigma1(x)       (S(x, 14) ^ S(x, 18) ^ S(x, 41))
 84 | #define Gamma0(x)       (S(x, 1) ^ S(x, 8) ^ R(x, 7))
 85 | #define Gamma1(x)       (S(x, 19) ^ S(x, 61) ^ R(x, 6))
 86 | #ifndef MIN
 87 |    #define MIN(x, y) ( ((x)<(y))?(x):(y) )
 88 | #endif
 89 | 
 90 | /* compress 1024-bits */
 91 | static int sha512_compress(sha512_context *md, unsigned char *buf)
 92 | {
 93 |     uint64_t S[8], W[80], t0, t1;
 94 |     int i;
 95 | 
 96 |     /* copy state into S */
 97 |     for (i = 0; i < 8; i++) {
 98 |         S[i] = md->state[i];
 99 |     }
100 | 
101 |     /* copy the state into 1024-bits into W[0..15] */
102 |     for (i = 0; i < 16; i++) {
103 |         LOAD64H(W[i], buf + (8*i));
104 |     }
105 | 
106 |     /* fill W[16..79] */
107 |     for (i = 16; i < 80; i++) {
108 |         W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
109 |     }        
110 | 
111 | /* Compress */
112 |     #define RND(a,b,c,d,e,f,g,h,i) \
113 |     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
114 |     t1 = Sigma0(a) + Maj(a, b, c);\
115 |     d += t0; \
116 |     h  = t0 + t1;
117 | 
118 |     for (i = 0; i < 80; i += 8) {
119 |        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
120 |        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
121 |        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
122 |        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
123 |        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
124 |        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
125 |        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
126 |        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
127 |    }
128 | 
129 |    #undef RND
130 | 
131 | 
132 | 
133 |     /* feedback */
134 |    for (i = 0; i < 8; i++) {
135 |         md->state[i] = md->state[i] + S[i];
136 |     }
137 | 
138 |     return 0;
139 | }
140 | 
141 | 
142 | /**
143 |    Initialize the hash state
144 |    @param md   The hash state you wish to initialize
145 |    @return 0 if successful
146 | */
147 | int sha512_init(sha512_context * md) {
148 |     if (md == NULL) return 1;
149 | 
150 |     md->curlen = 0;
151 |     md->length = 0;
152 |     md->state[0] = UINT64_C(0x6a09e667f3bcc908);
153 |     md->state[1] = UINT64_C(0xbb67ae8584caa73b);
154 |     md->state[2] = UINT64_C(0x3c6ef372fe94f82b);
155 |     md->state[3] = UINT64_C(0xa54ff53a5f1d36f1);
156 |     md->state[4] = UINT64_C(0x510e527fade682d1);
157 |     md->state[5] = UINT64_C(0x9b05688c2b3e6c1f);
158 |     md->state[6] = UINT64_C(0x1f83d9abfb41bd6b);
159 |     md->state[7] = UINT64_C(0x5be0cd19137e2179);
160 | 
161 |     return 0;
162 | }
163 | 
164 | /**
165 |    Process a block of memory though the hash
166 |    @param md     The hash state
167 |    @param in     The data to hash
168 |    @param inlen  The length of the data (octets)
169 |    @return 0 if successful
170 | */
171 | int sha512_update (sha512_context * md, const unsigned char *in, size_t inlen)               
172 | {                                                                                           
173 |     size_t n;
174 |     size_t i;                                                                        
175 |     int           err;     
176 |     if (md == NULL) return 1;  
177 |     if (in == NULL) return 1;                                                              
178 |     if (md->curlen > sizeof(md->buf)) {                             
179 |        return 1;                                                            
180 |     }                                                                                       
181 |     while (inlen > 0) {                                                                     
182 |         if (md->curlen == 0 && inlen >= 128) {                           
183 |            if ((err = sha512_compress (md, (unsigned char *)in)) != 0) {               
184 |               return err;                                                                   
185 |            }                                                                                
186 |            md->length += 128 * 8;                                        
187 |            in             += 128;                                                    
188 |            inlen          -= 128;                                                    
189 |         } else {                                                                            
190 |            n = MIN(inlen, (128 - md->curlen));
191 | 
192 |            for (i = 0; i < n; i++) {
193 |             md->buf[i + md->curlen] = in[i];
194 |            }
195 | 
196 | 
197 |            md->curlen += n;                                                     
198 |            in             += n;                                                             
199 |            inlen          -= n;                                                             
200 |            if (md->curlen == 128) {                                      
201 |               if ((err = sha512_compress (md, md->buf)) != 0) {            
202 |                  return err;                                                                
203 |               }                                                                             
204 |               md->length += 8*128;                                       
205 |               md->curlen = 0;                                                   
206 |            }                                                                                
207 |        }                                                                                    
208 |     }                                                                                       
209 |     return 0;                                                                        
210 | }
211 | 
212 | /**
213 |    Terminate the hash to get the digest
214 |    @param md  The hash state
215 |    @param out [out] The destination of the hash (64 bytes)
216 |    @return 0 if successful
217 | */
218 |    int sha512_final(sha512_context * md, unsigned char *out)
219 |    {
220 |     int i;
221 | 
222 |     if (md == NULL) return 1;
223 |     if (out == NULL) return 1;
224 | 
225 |     if (md->curlen >= sizeof(md->buf)) {
226 |      return 1;
227 |  }
228 | 
229 |     /* increase the length of the message */
230 |  md->length += md->curlen * UINT64_C(8);
231 | 
232 |     /* append the '1' bit */
233 |  md->buf[md->curlen++] = (unsigned char)0x80;
234 | 
235 |     /* if the length is currently above 112 bytes we append zeros
236 |      * then compress.  Then we can fall back to padding zeros and length
237 |      * encoding like normal.
238 |      */
239 |      if (md->curlen > 112) {
240 |         while (md->curlen < 128) {
241 |             md->buf[md->curlen++] = (unsigned char)0;
242 |         }
243 |         sha512_compress(md, md->buf);
244 |         md->curlen = 0;
245 |     }
246 | 
247 |     /* pad upto 120 bytes of zeroes 
248 |      * note: that from 112 to 120 is the 64 MSB of the length.  We assume that you won't hash
249 |      * > 2^64 bits of data... :-)
250 |      */
251 | while (md->curlen < 120) {
252 |     md->buf[md->curlen++] = (unsigned char)0;
253 | }
254 | 
255 |     /* store length */
256 | STORE64H(md->length, md->buf+120);
257 | sha512_compress(md, md->buf);
258 | 
259 |     /* copy output */
260 | for (i = 0; i < 8; i++) {
261 |     STORE64H(md->state[i], out+(8*i));
262 | }
263 | 
264 | return 0;
265 | }
266 | 
267 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out)
268 | {
269 |     sha512_context ctx;
270 |     int ret;
271 |     if ((ret = sha512_init(&ctx))) return ret;
272 |     if ((ret = sha512_update(&ctx, message, message_len))) return ret;
273 |     if ((ret = sha512_final(&ctx, out))) return ret;
274 |     return 0;
275 | }
276 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sha512.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHA512_H
 2 | #define SHA512_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #include "fixedint.h"
 7 | 
 8 | /* state */
 9 | typedef struct sha512_context_ {
10 |     uint64_t  length, state[8];
11 |     size_t curlen;
12 |     unsigned char buf[128];
13 | } sha512_context;
14 | 
15 | 
16 | int sha512_init(sha512_context * md);
17 | int sha512_final(sha512_context * md, unsigned char *out);
18 | int sha512_update(sha512_context * md, const unsigned char *in, size_t inlen);
19 | int sha512(const unsigned char *message, size_t message_len, unsigned char *out);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/sign.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | #include "sc.h"
 5 | 
 6 | 
 7 | void ed25519_sign(unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key, const unsigned char *private_key) {
 8 |     sha512_context hash;
 9 |     unsigned char hram[64];
10 |     unsigned char r[64];
11 |     ge_p3 R;
12 | 
13 | 
14 |     sha512_init(&hash);
15 |     sha512_update(&hash, private_key + 32, 32);
16 |     sha512_update(&hash, message, message_len);
17 |     sha512_final(&hash, r);
18 | 
19 |     sc_reduce(r);
20 |     ge_scalarmult_base(&R, r);
21 |     ge_p3_tobytes(signature, &R);
22 | 
23 |     sha512_init(&hash);
24 |     sha512_update(&hash, signature, 32);
25 |     sha512_update(&hash, public_key, 32);
26 |     sha512_update(&hash, message, message_len);
27 |     sha512_final(&hash, hram);
28 | 
29 |     sc_reduce(hram);
30 |     sc_muladd(signature + 32, hram, private_key, r);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/sgx-ecc-ed25519/verify.c:
--------------------------------------------------------------------------------
 1 | #include "ed25519.h"
 2 | #include "sha512.h"
 3 | #include "ge.h"
 4 | #include "sc.h"
 5 | 
 6 | static int consttime_equal(const unsigned char *x, const unsigned char *y) {
 7 |     unsigned char r = 0;
 8 | 
 9 |     r = x[0] ^ y[0];
10 |     #define F(i) r |= x[i] ^ y[i]
11 |     F(1);
12 |     F(2);
13 |     F(3);
14 |     F(4);
15 |     F(5);
16 |     F(6);
17 |     F(7);
18 |     F(8);
19 |     F(9);
20 |     F(10);
21 |     F(11);
22 |     F(12);
23 |     F(13);
24 |     F(14);
25 |     F(15);
26 |     F(16);
27 |     F(17);
28 |     F(18);
29 |     F(19);
30 |     F(20);
31 |     F(21);
32 |     F(22);
33 |     F(23);
34 |     F(24);
35 |     F(25);
36 |     F(26);
37 |     F(27);
38 |     F(28);
39 |     F(29);
40 |     F(30);
41 |     F(31);
42 |     #undef F
43 | 
44 |     return !r;
45 | }
46 | 
47 | int ed25519_verify(const unsigned char *signature, const unsigned char *message, size_t message_len, const unsigned char *public_key) {
48 |     unsigned char h[64];
49 |     unsigned char checker[32];
50 |     sha512_context hash;
51 |     ge_p3 A;
52 |     ge_p2 R;
53 | 
54 |     if (signature[63] & 224) {
55 |         return 0;
56 |     }
57 | 
58 |     if (ge_frombytes_negate_vartime(&A, public_key) != 0) {
59 |         return 0;
60 |     }
61 | 
62 |     sha512_init(&hash);
63 |     sha512_update(&hash, signature, 32);
64 |     sha512_update(&hash, public_key, 32);
65 |     sha512_update(&hash, message, message_len);
66 |     sha512_final(&hash, h);
67 |     
68 |     sc_reduce(h);
69 |     ge_double_scalarmult_vartime(&R, h, &A, signature + 32);
70 |     ge_tobytes(checker, &R);
71 | 
72 |     if (!consttime_equal(checker, signature)) {
73 |         return 0;
74 |     }
75 | 
76 |     return 1;
77 | }
78 | 


--------------------------------------------------------------------------------
/src/sgx/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | pwd=$PWD
 5 | 
 6 | echo --- Build
 7 | cd "$(dirname "$0")/signing"
 8 | (
 9 |   set -x
10 |   mkdir -p "$pwd"/temp
11 |   openssl genrsa -out "$pwd"/temp/priv_key.pem -3 3072
12 |   openssl rsa -in "$pwd"/temp/priv_key.pem -pubout -out "$pwd"/temp/pub_key.pem
13 |   make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist PRIV_KEY="$pwd"/temp/priv_key.pem PUB_KEY="$pwd"/temp/pub_key.pem
14 | )
15 | 
16 | echo --- Build Enclave Test
17 | cd "../test"
18 | (
19 |   set -x
20 |   make LIBS_PATH="$pwd"/libs OUT="$pwd"/dist
21 | )
22 | 


--------------------------------------------------------------------------------
/src/sgx/signing/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | SIGN_ENCLAVE ?= 1
 3 | PRIV_KEY ?= ../keys/private_key.pem
 4 | PUB_KEY ?= ../keys/public_key.pem
 5 | LIBS_PATH ?= ../../../libs
 6 | OUT ?= ../../../dist
 7 | 
 8 | SGX_COMMON_CFLAGS := -m64 -O2
 9 | SGX_LIBRARY_PATH := $(SGX_SDK)/lib64
10 | SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign
11 | SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r
12 | 
13 | Trusted_C_Flags := -Wno-implicit-function-declaration -std=c11 $(SGX_COMMON_CFLAGS) -nostdinc  -fpie -fstack-protector \
14 | 	-IInclude -I. -I$(SGX_SDK)/include -I$(SGX_SDK)/include/tlibc -I$(SGX_SDK)/include/libcxx -fno-builtin-printf \
15 | 	-I. -I../../sgx-ecc-ed25519 -fvisibility=hidden
16 | 
17 | Untrusted_C_Flags := -fPIC -O0 -g -Wno-attributes -I$(SGX_SDK)/include -I. -I../../sgx-ecc-ed25519 
18 | Test_C_Flags := $(Untrusted_C_Flags)
19 | 
20 | Link_Flags := $(SGX_COMMON_CFLAGS) -Wl,--no-undefined -nostdlib -nodefaultlibs -nostartfiles -L$(SGX_LIBRARY_PATH) \
21 | 	-Wl,--whole-archive -lsgx_trts -Wl,--no-whole-archive \
22 | 	-L$(LIBS_PATH) -led25519.sgx.static \
23 | 	-Wl,--start-group -lsgx_tstdc -lsgx_tcxx -lsgx_tkey_exchange -lsgx_tcrypto -lsgx_tservice -Wl,--end-group \
24 | 	-Wl,-Bstatic -Wl,-Bsymbolic -Wl,--no-undefined \
25 | 	-Wl,-pie,-eenclave_entry -Wl,--export-dynamic  \
26 | 	-Wl,--defsym,__ImageBase=0 \
27 | 	-Wl,--version-script=./signing.lds
28 | 
29 | Trusted_C_Files := $(filter $(wildcard *_trusted.c), $(wildcard *.c))
30 | Trusted_C_Files += signing_t.c
31 | Trusted_C_Objects := $(Trusted_C_Files:.c=.o)
32 | 
33 | Untrusted_C_Files := $(filter $(wildcard *_untrusted.c), $(wildcard *.c))
34 | Untrusted_C_Files += signing_u.c
35 | Untrusted_C_Objects := $(Untrusted_C_Files:.c=.o)
36 | 
37 | .PHONY: all run
38 | 
39 | ifneq ($(SIGN_ENCLAVE), 0)
40 | all: signing_u.c signing_t.c signing.signed.so libsigning.so
41 | else
42 | all: signing_u.c signing_t.c signing.so libsigning.so
43 | 	@echo "Build enclave signing.so success!"
44 | 	@echo
45 | 	@echo "**********************************************************************************************"
46 | 	@echo "PLEASE NOTE: In this mode, please sign the enclave first using Two Step Sign mechanism, before"
47 | 	@echo "you run the app to launch and access the enclave."
48 | 	@echo "**********************************************************************************************"
49 | 	@echo
50 | endif
51 | 
52 | run: all
53 | 
54 | signing_t.c: $(SGX_EDGER8R) signing.edl
55 | 	@echo "GEN  =>  $@"
56 | 	@$(SGX_EDGER8R) --trusted signing.edl --search-path $(SGX_SDK)/include
57 | 
58 | signing_u.c: $(SGX_EDGER8R) signing.edl
59 | 	@echo "GEN  =>  $@"
60 | 	@$(SGX_EDGER8R) --untrusted signing.edl --search-path $(SGX_SDK)/include
61 | 
62 | $(Trusted_C_Objects): %.o: %.c
63 | 	@echo "CC  <=  $<"
64 | 	$(CC) $(Trusted_C_Flags) -c $< -o $@
65 | 
66 | $(Untrusted_C_Objects): %.o: %.c
67 | 	@echo "CC  <=  $<"
68 | 	$(CC) $(Untrusted_C_Flags) -c $< -o $@
69 | 
70 | signing.so: signing_t.o $(Trusted_C_Objects)
71 | 	@echo "LINK =>  $@"
72 | 	$(CC) $^ -o $@ $(Link_Flags)
73 | 	mkdir -p $(OUT)
74 | 	cp $@ $(OUT)
75 | 
76 | signing.signed.so: signing.so
77 | 	@echo "SIGN =>  $@"
78 | 	$(SGX_ENCLAVE_SIGNER) gendata -enclave $< -config signing.config.xml -out /tmp/enclave_hash.hex
79 | 	openssl dgst -sha256 -out /tmp/signature.hex -sign $(PRIV_KEY) -keyform PEM /tmp/enclave_hash.hex
80 | 	$(SGX_ENCLAVE_SIGNER) catsig -enclave $< -config signing.config.xml -out $@ -key $(PUB_KEY) -sig /tmp/signature.hex -unsigned /tmp/enclave_hash.hex
81 | 	mkdir -p $(OUT)
82 | 	cp $@ $(OUT)
83 | 
84 | libsigning.so: signing_u.o signing_untrusted.o $(Untrusted_C_Objects)
85 | 	@echo "LINK =>  $@"
86 | 	$(CC) $^ -o $@ -shared -L$(SGX_LIBRARY_PATH) -lsgx_uae_service -lsgx_ukey_exchange -lsgx_urts -L$(LIBS_PATH) -led25519.static
87 | 	mkdir -p $(OUT)
88 | 	cp $@ $(OUT)
89 | 	cp signing_public.h $(OUT)
90 | 
91 | clean:
92 | 	@rm -f signing_t.* signing_u.* $(Trusted_C_Objects) $(Untrusted_C_Objects) signing.signed.so signing.so libsigning.so
93 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.config.xml:
--------------------------------------------------------------------------------
 1 | <EnclaveConfiguration>
 2 |   <ProdID>0</ProdID>
 3 |   <ISVSVN>0</ISVSVN>
 4 |   <StackMaxSize>0x20000</StackMaxSize>
 5 |   <HeapMaxSize>0x80000</HeapMaxSize>
 6 |   <TCSNum>4</TCSNum>
 7 |   <TCSPolicy>1</TCSPolicy>
 8 |   <DisableDebug>0</DisableDebug>
 9 | </EnclaveConfiguration>
10 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.edl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file contains Solana's SGX enclave interface.
 3 |  */
 4 | 
 5 | enclave {
 6 |     from "sgx_tkey_exchange.edl" import *;
 7 |     include "sgx_key_exchange.h"
 8 |     include "sgx_trts.h"
 9 |     include "signing_internal.h"
10 | 
11 |    trusted {
12 |       /* This function initializes enclave's remote attestation
13 |          parameters.
14 | 
15 |          Parameters:
16 |          b_pse:   Use Intel's Platform Services
17 |          pub_key: The caller's public key, that it'll use for
18 |                   communication with Intel's IAS service.
19 |          pctxt:   Remmote attestation context
20 |       */
21 |       public sgx_status_t init_remote_attestation(
22 |          int b_pse,
23 |          [in] sgx_ec256_public_t* pub_key,
24 |          [out] sgx_ra_context_t *pctxt);
25 | 
26 |       /* This function frees enclave's remote attestation
27 |          context (returned by init_remote_attestation).
28 |       */
29 |       public sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt);
30 | 
31 |       /* This function initializes an ED25519 keypair in enclave. It
32 |          returns the public key to the caller. The private key will
33 |          be used by sign_sgx_ed25519 function to sign the data.
34 | 
35 |          Enclave uses lockout parameters to compute if signing the
36 |          data will cause slashing.
37 |       */
38 |       public sgx_status_t init_sgx_ed25519(
39 |          uint32_t lockout_period,
40 |          uint32_t lockout_multiplier,
41 |          uint32_t lockout_max_depth,
42 |          uint32_t key_len,
43 |          [out, size=key_len] uint8_t* pubkey);
44 | 
45 |       /* This function returns sealed enclave data (keypair, lockout
46 |          parameters) to the caller. The data is encrypted using
47 |          enclave specific keys.
48 |       */
49 |       public sgx_status_t get_sgx_ed25519_data(
50 |          uint32_t data_size,
51 |          [out, size=data_size] uint8_t* sealed_data,
52 |          [out] uint32_t* data_size_needed);
53 | 
54 |       /* This function initializes enclave using sealed data. The data
55 |          was sealed using get_sgx_ed25519_data() function.
56 | 
57 |          The caller can also update the lockout parameters.
58 |       */
59 |       public sgx_status_t init_sgx_ed25519_from_data(
60 |          uint32_t data_size,
61 |          [in, size=data_size] uint8_t* sealed_data,
62 |          uint32_t update_lockout_params,
63 |          uint32_t lockout_period,
64 |          uint32_t lockout_multiplier,
65 |          uint32_t lockout_max_depth,
66 |          uint32_t key_len,
67 |          [out, size=key_len] uint8_t* pubkey);
68 | 
69 |       /* This function signs the message by using private key generated
70 |          during init function. The caller provides the new history entries.
71 |          The enclave checks the lockout parameters, past history and the
72 |          new history to compute slashing conditions. The enclave will not
73 |          sign the message if it'll result in slashing.
74 |       */
75 |       public sgx_status_t sign_sgx_ed25519(
76 |          uint32_t msg_len,
77 |          [in, size=msg_len] const uint8_t* msg,
78 |          uint32_t history_len,
79 |          [in, count=history_len] const history_entry_t* entries,
80 |          uint32_t sig_len,
81 |          [out, size=sig_len] uint8_t* signature);
82 |    };
83 | };
84 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing.lds:
--------------------------------------------------------------------------------
 1 | signing.so
 2 | {
 3 |     global:
 4 |         g_global_data_sim;
 5 |         g_global_data;
 6 |         signing_entry;
 7 |     local:
 8 |         *;
 9 | };
10 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_internal.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "signing_public.h"
4 | 
5 | // The following definitions are specific to ed25519 specifications
6 | #define ED25519_PRIV_KEY_LEN 64
7 | #define ED25519_SIGNATURE_LEN 64
8 | #define ED25519_SEED_LEN 32
9 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_public.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdbool.h>
 4 | #include "sgx_eid.h"
 5 | #include "sgx_error.h"
 6 | 
 7 | #define ED25519_PUB_KEY_LEN 32
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | typedef struct ed25519_context {
14 |   bool enclaveEnabled;
15 |   sgx_enclave_id_t eid;
16 |   uint8_t public_key[ED25519_PUB_KEY_LEN];
17 | } ed25519_context_t;
18 | 
19 | typedef struct history_entry {
20 |   uint32_t num_hashes;
21 |   uint32_t optional_input_hash[4];
22 |   uint32_t result_hash[4];
23 | } history_entry_t;
24 | 
25 | /* This function initializes SGX enclave. It loads enclave_file
26 |    to SGX, which internally creates a new public/private keypair.
27 | 
28 |    If the platform does not support SGX, it creates a public/private
29 |    keypair in untrusted space. An error is returned in this scenario.
30 |    The user can choose to not use the library if SGX encalve is not
31 |    being used for signing.
32 | 
33 |    Note: The user must release the enclave by calling release_ed25519_context()
34 |          after they are done using it.
35 | */
36 | sgx_status_t init_ed25519(const char* enclave_file,
37 |                           uint32_t lockout_period,
38 |                           uint32_t lockout_multiplier,
39 |                           uint32_t lockout_max_depth,
40 |                           ed25519_context_t* pctxt);
41 | 
42 | /* This function returns the sealed data (private key and associated
43 |    informatio). The sealed data can be used to reinit the enclave using
44 |    init_ed25519_from_data().
45 | */
46 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt,
47 |                               uint32_t* datalen,
48 |                               uint8_t* data);
49 | 
50 | /* This function reinitializes the enclave using sealed data.
51 |  */
52 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt,
53 |                                     uint32_t datalen,
54 |                                     uint8_t* data,
55 |                                     uint32_t update_lockout_params,
56 |                                     uint32_t lockout_period,
57 |                                     uint32_t lockout_multiplier,
58 |                                     uint32_t lockout_max_depth);
59 | 
60 | /* This function signs the msg using the internally stored private
61 |    key. The signature is returned in the output "signature" buffer.
62 | 
63 |    This function must only be called after init_ed25519() function.
64 | */
65 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt,
66 |                           uint32_t msg_len,
67 |                           const uint8_t* msg,
68 |                           uint32_t history_len,
69 |                           const history_entry_t* entries,
70 |                           uint32_t sig_len,
71 |                           uint8_t* signature);
72 | 
73 | /* This function releases SGX enclave */
74 | void release_ed25519_context(ed25519_context_t* pctxt);
75 | 
76 | #ifdef __cplusplus
77 | }
78 | #endif
79 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_trusted.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file contains Solana's SGX enclave code for signing data.
  3 |  */
  4 | 
  5 | #include <stdbool.h>
  6 | #include <string.h>
  7 | 
  8 | #include "sgx_key.h"
  9 | #include "sgx_tseal.h"
 10 | 
 11 | #include "ed25519.h"
 12 | #include "signing_t.h"
 13 | 
 14 | typedef struct signing_parameters {
 15 |   bool initialized;
 16 |   uint8_t public_key[ED25519_PUB_KEY_LEN];
 17 |   uint8_t private_key[ED25519_PRIV_KEY_LEN];
 18 |   uint32_t nonce;
 19 |   uint32_t lockout_period;
 20 |   uint32_t lockout_multiplier;
 21 |   uint32_t lockout_max_depth;
 22 |   sgx_mc_uuid_t counter;
 23 |   uint32_t counter_value;
 24 | } signing_parameters_t;
 25 | 
 26 | static signing_parameters_t g_signing_params;
 27 | 
 28 | sgx_status_t init_remote_attestation(int b_pse,
 29 |                                      sgx_ec256_public_t* sp_pub_key,
 30 |                                      sgx_ra_context_t* pctxt) {
 31 |   sgx_status_t ret;
 32 |   if (b_pse) {
 33 |     int busy_retry_times = 2;
 34 |     do {
 35 |       ret = sgx_create_pse_session();
 36 |     } while (ret == SGX_ERROR_BUSY && busy_retry_times--);
 37 |     if (ret != SGX_SUCCESS)
 38 |       return ret;
 39 |   }
 40 |   ret = sgx_ra_init(sp_pub_key, b_pse, pctxt);
 41 |   if (b_pse) {
 42 |     sgx_close_pse_session();
 43 |   }
 44 |   return ret;
 45 | }
 46 | 
 47 | sgx_status_t close_remote_attestation(sgx_ra_context_t ctxt) {
 48 |   return sgx_ra_close(ctxt);
 49 | }
 50 | 
 51 | /* This function creates a new public/private keypair in
 52 |    enclave trusted space.
 53 | */
 54 | sgx_status_t init_sgx_ed25519(uint32_t lockout_period,
 55 |                               uint32_t lockout_multiplier,
 56 |                               uint32_t lockout_max_depth,
 57 |                               uint32_t key_len,
 58 |                               uint8_t* pubkey) {
 59 |   if (key_len < sizeof(g_signing_params.public_key)) {
 60 |     return SGX_ERROR_INVALID_PARAMETER;
 61 |   }
 62 | 
 63 |   sgx_status_t status = SGX_SUCCESS;
 64 |   int busy_retry_times = 3;
 65 |   do {
 66 |     status = sgx_create_pse_session();
 67 |   } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0));
 68 | 
 69 |   if (SGX_SUCCESS != status) {
 70 |     return status;
 71 |   }
 72 | 
 73 |   status = sgx_create_monotonic_counter(&g_signing_params.counter,
 74 |                                         &g_signing_params.counter_value);
 75 |   sgx_close_pse_session();
 76 |   if (SGX_SUCCESS != status) {
 77 |     return status;
 78 |   }
 79 | 
 80 |   uint8_t seed[ED25519_SEED_LEN];
 81 |   status = sgx_read_rand(seed, sizeof(seed));
 82 |   if (SGX_SUCCESS != status) {
 83 |     return status;
 84 |   }
 85 | 
 86 |   ed25519_create_keypair(g_signing_params.public_key,
 87 |                          g_signing_params.private_key, seed);
 88 | 
 89 |   memcpy(pubkey, g_signing_params.public_key,
 90 |          sizeof(g_signing_params.public_key));
 91 | 
 92 |   g_signing_params.initialized = true;
 93 |   g_signing_params.lockout_max_depth = lockout_max_depth;
 94 |   g_signing_params.lockout_multiplier = lockout_multiplier;
 95 |   g_signing_params.lockout_period = lockout_period;
 96 | 
 97 |   return SGX_SUCCESS;
 98 | }
 99 | 
100 | sgx_status_t get_sgx_ed25519_data(uint32_t data_size,
101 |                                   uint8_t* sealed_data,
102 |                                   uint32_t* data_size_needed) {
103 |   *data_size_needed =
104 |       sgx_calc_sealed_data_size(0, sizeof(signing_parameters_t));
105 | 
106 |   if (*data_size_needed > data_size) {
107 |     return SGX_ERROR_INVALID_PARAMETER;
108 |   }
109 | 
110 |   sgx_status_t status = sgx_read_rand((uint8_t*)&g_signing_params.nonce,
111 |                                       sizeof(g_signing_params.nonce));
112 |   if (SGX_SUCCESS != status) {
113 |     return status;
114 |   }
115 | 
116 |   sgx_attributes_t attribute_mask;
117 |   attribute_mask.flags = SGX_FLAGS_INITTED | SGX_FLAGS_DEBUG;
118 |   attribute_mask.xfrm = 0x0;
119 | 
120 |   return sgx_seal_data_ex(SGX_KEYPOLICY_MRENCLAVE, attribute_mask, 0xF0000000,
121 |                           0, NULL, sizeof(g_signing_params),
122 |                           (const uint8_t*)&g_signing_params, *data_size_needed,
123 |                           (sgx_sealed_data_t*)sealed_data);
124 | }
125 | 
126 | sgx_status_t init_sgx_ed25519_from_data(uint32_t data_size,
127 |                                         uint8_t* sealed_data,
128 |                                         uint32_t update_lockout_params,
129 |                                         uint32_t lockout_period,
130 |                                         uint32_t lockout_multiplier,
131 |                                         uint32_t lockout_max_depth,
132 |                                         uint32_t key_len,
133 |                                         uint8_t* pubkey) {
134 |   if (key_len < sizeof(g_signing_params.public_key)) {
135 |     return SGX_ERROR_INVALID_PARAMETER;
136 |   }
137 | 
138 |   signing_parameters_t data;
139 |   uint32_t datalen = sizeof(data);
140 |   sgx_status_t status = sgx_unseal_data((const sgx_sealed_data_t*)sealed_data,
141 |                                         NULL, 0, (uint8_t*)&data, &datalen);
142 |   if (SGX_SUCCESS != status) {
143 |     return status;
144 |   }
145 | 
146 |   if (datalen != sizeof(data)) {
147 |     return SGX_ERROR_INVALID_PARAMETER;
148 |   }
149 | 
150 |   int busy_retry_times = 3;
151 |   do {
152 |     status = sgx_create_pse_session();
153 |   } while (status == SGX_ERROR_BUSY && (busy_retry_times-- > 0));
154 | 
155 |   if (SGX_SUCCESS != status) {
156 |     return status;
157 |   }
158 | 
159 |   uint32_t counter_value = 0xffffffff;
160 |   status =
161 |       sgx_read_monotonic_counter(&g_signing_params.counter, &counter_value);
162 |   if (SGX_SUCCESS != status) {
163 |     sgx_close_pse_session();
164 |     return status;
165 |   }
166 | 
167 |   if (counter_value != g_signing_params.counter_value) {
168 |     sgx_close_pse_session();
169 |     return SGX_ERROR_INVALID_PARAMETER;
170 |   }
171 | 
172 |   status = sgx_increment_monotonic_counter(&g_signing_params.counter,
173 |                                            &g_signing_params.counter_value);
174 | 
175 |   sgx_close_pse_session();
176 |   if (SGX_SUCCESS != status) {
177 |     return status;
178 |   }
179 | 
180 |   memcpy(&g_signing_params, &data, sizeof(g_signing_params));
181 | 
182 |   memcpy(pubkey, g_signing_params.public_key,
183 |          sizeof(g_signing_params.public_key));
184 | 
185 |   g_signing_params.initialized = true;
186 |   if (update_lockout_params != 0) {
187 |     g_signing_params.lockout_max_depth = lockout_max_depth;
188 |     g_signing_params.lockout_multiplier = lockout_multiplier;
189 |     g_signing_params.lockout_period = lockout_period;
190 |   }
191 |   return SGX_SUCCESS;
192 | }
193 | 
194 | /* This function signs the msg using private key.
195 |  */
196 | sgx_status_t sign_sgx_ed25519(uint32_t msg_len,
197 |                               const uint8_t* msg,
198 |                               uint32_t history_len,
199 |                               const history_entry_t* entries,
200 |                               uint32_t sig_len,
201 |                               uint8_t* signature) {
202 |   if (!g_signing_params.initialized) {
203 |     return SGX_ERROR_INVALID_STATE;
204 |   }
205 | 
206 |   if (sig_len < ED25519_SIGNATURE_LEN) {
207 |     return SGX_ERROR_INVALID_PARAMETER;
208 |   }
209 | 
210 |   ed25519_sign(signature, msg, msg_len, g_signing_params.public_key,
211 |                g_signing_params.private_key);
212 | 
213 |   return SGX_SUCCESS;
214 | }
215 | 


--------------------------------------------------------------------------------
/src/sgx/signing/signing_untrusted.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file contains Solana's SGX enclave code for signing data.
  3 |  */
  4 | 
  5 | #include <stdbool.h>
  6 | #include <string.h>
  7 | #include <time.h>
  8 | 
  9 | #include "ed25519.h"
 10 | #include "sgx_urts.h"
 11 | #include "signing_public.h"
 12 | #include "signing_u.h"
 13 | 
 14 | /* This function initializes SGX enclave. It loads enclave_file
 15 |    to SGX, which internally creates a new public/private keypair.
 16 | */
 17 | sgx_status_t init_ed25519(const char* enclave_file,
 18 |                           uint32_t lockout_period,
 19 |                           uint32_t lockout_multiplier,
 20 |                           uint32_t lockout_max_depth,
 21 |                           ed25519_context_t* pctxt) {
 22 |   int updated = 0;
 23 |   sgx_launch_token_t token = {0};
 24 |   sgx_enclave_id_t eid;
 25 | 
 26 |   // Try to load the SGX enclave
 27 |   sgx_status_t status =
 28 |       sgx_create_enclave(enclave_file, 1, &token, &updated, &eid, NULL);
 29 | 
 30 |   if (SGX_SUCCESS != status) {
 31 |     return status;
 32 |   }
 33 | 
 34 |   sgx_status_t retval = SGX_SUCCESS;
 35 |   status = init_sgx_ed25519(eid, &retval, lockout_period, lockout_multiplier,
 36 |                             lockout_max_depth, sizeof(pctxt->public_key),
 37 |                             &pctxt->public_key[0]);
 38 | 
 39 |   if (SGX_SUCCESS != status) {
 40 |     sgx_destroy_enclave(eid);
 41 |     return status;
 42 |   }
 43 | 
 44 |   if (SGX_SUCCESS != retval) {
 45 |     sgx_destroy_enclave(eid);
 46 |     return retval;
 47 |   }
 48 | 
 49 |   pctxt->enclaveEnabled = true;
 50 |   pctxt->eid = eid;
 51 | 
 52 |   return status;
 53 | }
 54 | 
 55 | sgx_status_t get_ed25519_data(ed25519_context_t* pctxt,
 56 |                               uint32_t* datalen,
 57 |                               uint8_t* data) {
 58 |   if (!pctxt->enclaveEnabled) {
 59 |     return SGX_ERROR_INVALID_ENCLAVE;
 60 |   }
 61 | 
 62 |   sgx_status_t retval = SGX_SUCCESS;
 63 |   sgx_status_t status =
 64 |       get_sgx_ed25519_data(pctxt->eid, &retval, *datalen, data, datalen);
 65 | 
 66 |   if (SGX_SUCCESS != status) {
 67 |     return status;
 68 |   }
 69 | 
 70 |   if (SGX_SUCCESS != retval) {
 71 |     return retval;
 72 |   }
 73 | 
 74 |   return status;
 75 | }
 76 | 
 77 | sgx_status_t init_ed25519_from_data(ed25519_context_t* pctxt,
 78 |                                     uint32_t datalen,
 79 |                                     uint8_t* data,
 80 |                                     uint32_t update_lockout_params,
 81 |                                     uint32_t lockout_period,
 82 |                                     uint32_t lockout_multiplier,
 83 |                                     uint32_t lockout_max_depth) {
 84 |   if (!pctxt->enclaveEnabled) {
 85 |     return SGX_ERROR_INVALID_ENCLAVE;
 86 |   }
 87 | 
 88 |   sgx_status_t retval = SGX_SUCCESS;
 89 |   sgx_status_t status = init_sgx_ed25519_from_data(
 90 |       pctxt->eid, &retval, datalen, data, update_lockout_params, lockout_period,
 91 |       lockout_multiplier, lockout_max_depth, sizeof(pctxt->public_key),
 92 |       &pctxt->public_key[0]);
 93 | 
 94 |   if (SGX_SUCCESS != status) {
 95 |     return status;
 96 |   }
 97 | 
 98 |   if (SGX_SUCCESS != retval) {
 99 |     return retval;
100 |   }
101 | 
102 |   return status;
103 | }
104 | 
105 | /* This function signs the msg using the internally stored private
106 |    key. The signature is returned in the output "signature" buffer.
107 | 
108 |    This function must only be called after init_ed25519() function.
109 | */
110 | sgx_status_t sign_ed25519(ed25519_context_t* pctxt,
111 |                           uint32_t msg_len,
112 |                           const uint8_t* msg,
113 |                           uint32_t history_len,
114 |                           const history_entry_t* entries,
115 |                           uint32_t sig_len,
116 |                           uint8_t* signature) {
117 |   if (!pctxt->enclaveEnabled) {
118 |     return SGX_ERROR_INVALID_ENCLAVE;
119 |   }
120 | 
121 |   sgx_status_t retval = SGX_SUCCESS;
122 |   sgx_status_t status =
123 |       sign_sgx_ed25519(pctxt->eid, &retval, msg_len, msg, history_len, entries,
124 |                        sig_len, signature);
125 | 
126 |   if (SGX_SUCCESS != status) {
127 |     return status;
128 |   }
129 | 
130 |   if (SGX_SUCCESS != retval) {
131 |     return retval;
132 |   }
133 | 
134 |   return status;
135 | }
136 | 
137 | void release_ed25519_context(ed25519_context_t* pctxt) {
138 |   sgx_destroy_enclave(pctxt->eid);
139 | }


--------------------------------------------------------------------------------
/src/sgx/test/Makefile:
--------------------------------------------------------------------------------
 1 | SGX_SDK ?= /opt/sgxsdk
 2 | LIBS_PATH ?= ../../../libs
 3 | OUT ?= ../../../dist
 4 | 
 5 | C_Flags := -O2 -fpic -I. -I$(SGX_SDK)/include -I$(OUT) -I../../sgx-ecc-ed25519
 6 | 
 7 | C_Files := $(wildcard *.c)
 8 | C_Objects := $(C_Files:%.c=%.o)
 9 | 
10 | .PHONY: all run
11 | all: $(OUT)/signing_test
12 | run: all
13 | 
14 | %.o: %.c
15 | 	@echo "CC  <=  $<"
16 | 	$(CC) $(C_Flags) -c $< -o $@
17 | 
18 | $(OUT)/signing_test: $(C_Objects)
19 | 	@mkdir -p $(OUT)
20 | 	$(CC) $^ -o $@ -L$(OUT) -L$(LIBS_PATH) -lsigning -led25519.static
21 | 
22 | clean:
23 | 	@rm -rf $(C_Objects) $(OUT)/signing_test
24 | 


--------------------------------------------------------------------------------
/src/sgx/test/signing_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include "signing_public.h"
 6 | 
 7 | #include "ed25519.h"
 8 | 
 9 | void print_buffer(const uint8_t* buf, int len) {
10 |   char str[BUFSIZ] = {'\0'};
11 |   int offset = 0;
12 |   for (int i = 0; i < len; i++) {
13 |     offset += snprintf(&str[offset], BUFSIZ - offset, "0x%02x ", buf[i]);
14 |     if (!((i + 1) % 8))
15 |       offset += snprintf(&str[offset], BUFSIZ - offset, "\n");
16 |   }
17 |   offset += snprintf(&str[offset], BUFSIZ - offset, "\n");
18 |   printf("%s", str);
19 | }
20 | 
21 | int main(int argc, char* argv[]) {
22 |   if (argc < 2) {
23 |     printf("Usage: %s <enclave file path>\n", argv[0]);
24 |     return -1;
25 |   }
26 | 
27 |   ed25519_context_t ctxt;
28 |   uint32_t lockout_period = 10, lockout_multiplier = 2, lockout_max_depth = 32;
29 |   sgx_status_t status = init_ed25519(
30 |       argv[1], lockout_period, lockout_multiplier, lockout_max_depth, &ctxt);
31 |   if (SGX_SUCCESS != status) {
32 |     printf("Failed in init_ed25519. Error %d\n", status);
33 |     return -1;
34 |   }
35 | 
36 |   printf("Loaded the enclave. eid: %d\n", (uint32_t)ctxt.eid);
37 | 
38 |   uint32_t datalen = 0;
39 |   status = get_ed25519_data(&ctxt, &datalen, NULL);
40 | 
41 |   uint8_t* sealed_data = malloc(datalen);
42 |   status = get_ed25519_data(&ctxt, &datalen, sealed_data);
43 |   if (SGX_SUCCESS != status) {
44 |     printf("Failed in get_ed25519_data. Error %d\n", status);
45 |     release_ed25519_context(&ctxt);
46 |     free(sealed_data);
47 |     return -1;
48 |   }
49 | 
50 |   status =
51 |       init_ed25519_from_data(&ctxt, datalen, sealed_data, 1, lockout_period,
52 |                              lockout_multiplier, lockout_max_depth);
53 |   free(sealed_data);
54 |   if (SGX_SUCCESS != status) {
55 |     printf("Failed in init_ed25519_from_data. Error %d\n", status);
56 |     release_ed25519_context(&ctxt);
57 |     return -1;
58 |   }
59 | 
60 |   const history_entry_t entries;
61 |   uint8_t* data =
62 |       "This is a test string. We'll sign it using SGX enclave. Hope it works!!";
63 |   uint8_t signature[64];
64 |   memset(signature, 0, sizeof(signature));
65 |   status = sign_ed25519(&ctxt, sizeof(data), data, 1, &entries,
66 |                         sizeof(signature), signature);
67 |   if (SGX_SUCCESS != status) {
68 |     printf("Failed in sign_ed25519. Error %d\n", status);
69 |     release_ed25519_context(&ctxt);
70 |     return -1;
71 |   }
72 | 
73 |   printf("Signature:\n");
74 |   print_buffer(signature, sizeof(signature));
75 | 
76 |   if (ed25519_verify(signature, data, sizeof(data), ctxt.public_key) == 0) {
77 |     printf("Failed in verifying the signature\n");
78 |   } else {
79 |     printf("Signature verified\n");
80 |   }
81 | 
82 |   release_ed25519_context(&ctxt);
83 |   return 0;
84 | }


--------------------------------------------------------------------------------