├── Makefile ├── README.md ├── data ├── tables.txt └── template.md ├── package.json ├── plots ├── filesize.gnuplot ├── fusion.gnuplot ├── instructions.gnuplot ├── mips.gnuplot.template ├── operations.gnuplot ├── optimisation.gnuplot ├── registers.gnuplot └── runtime.gnuplot ├── rv8-bench.js ├── scripts └── update-plots.sh └── src ├── aes.c ├── bigint.cc ├── dhrystone.c ├── miniz.c ├── norx.c ├── primes.c ├── qsort.c └── sha512.c /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # rv8-bench 3 | # 4 | 5 | CFLAGS = -fPIE -g 6 | LDFLAGS = -static 7 | 8 | RV32 = riscv32-linux-musl- 9 | RV64 = riscv64-linux-musl- 10 | I386 = i386-linux-musl- 11 | X86_64 = x86_64-linux-musl- 12 | ARM32 = arm-linux-musleabihf- 13 | ARM64 = aarch64-linux-musl- 14 | 15 | PROGRAMS = aes bigint dhrystone miniz norx primes qsort sha512 16 | 17 | RV32_PROGS = $(addprefix bin/riscv32/, $(PROGRAMS)) 18 | RV64_PROGS = $(addprefix bin/riscv64/, $(PROGRAMS)) 19 | I386_PROGS = $(addprefix bin/i386/, $(PROGRAMS)) 20 | X86_64_PROGS = $(addprefix bin/x86_64/, $(PROGRAMS)) 21 | ARM32_PROGS = $(addprefix bin/arm/, $(PROGRAMS)) 22 | ARM64_PROGS = $(addprefix bin/aarch64/, $(PROGRAMS)) 23 | 24 | ALL_PROGS = $(RV32_PROGS) $(RV64_PROGS) $(I386_PROGS) $(X86_64_PROGS) $(ARM32_PROGS) $(ARM64_PROGS) 25 | O3_PROGS = $(addsuffix .O3, $(ALL_PROGS)) $(addsuffix .O3.stripped, $(ALL_PROGS)) 26 | O2_PROGS = $(addsuffix .O2, $(ALL_PROGS)) $(addsuffix .O2.stripped, $(ALL_PROGS)) 27 | OS_PROGS = $(addsuffix .Os, $(ALL_PROGS)) $(addsuffix .Os.stripped, $(ALL_PROGS)) 28 | 29 | all: $(O3_PROGS) $(O2_PROGS) $(OS_PROGS) | npm 30 | 31 | npm: ; npm install 32 | 33 | clean: ; rm -fr bin 34 | 35 | bin/riscv32/%.O3: src/%.c 36 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 37 | bin/riscv32/%.O3: src/%.cc 38 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 39 | bin/riscv32/%.O3.stripped: bin/riscv32/%.O3 40 | @echo STRIP $@ ; $(RV32)strip --strip-all $< -o $@ 41 | bin/riscv32/%.O2: src/%.c 42 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 43 | bin/riscv32/%.O2: src/%.cc 44 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 45 | bin/riscv32/%.O2.stripped: bin/riscv32/%.O2 46 | @echo STRIP $@ ; $(RV32)strip --strip-all $< -o $@ 47 | bin/riscv32/%.Os: src/%.c 48 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 49 | bin/riscv32/%.Os: src/%.cc 50 | @echo CC $@ ; mkdir -p $(@D) ; $(RV32)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 51 | bin/riscv32/%.Os.stripped: bin/riscv32/%.Os 52 | @echo STRIP $@ ; $(RV32)strip --strip-all $< -o $@ 53 | 54 | bin/riscv64/%.O3: src/%.c 55 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 56 | bin/riscv64/%.O3: src/%.cc 57 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 58 | bin/riscv64/%.O3.stripped: bin/riscv64/%.O3 59 | @echo STRIP $@ ; $(RV64)strip --strip-all $< -o $@ 60 | bin/riscv64/%.O2: src/%.c 61 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 62 | bin/riscv64/%.O2: src/%.cc 63 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 64 | bin/riscv64/%.O2.stripped: bin/riscv64/%.O2 65 | @echo STRIP $@ ; $(RV64)strip --strip-all $< -o $@ 66 | bin/riscv64/%.Os: src/%.c 67 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 68 | bin/riscv64/%.Os: src/%.cc 69 | @echo CC $@ ; mkdir -p $(@D) ; $(RV64)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 70 | bin/riscv64/%.Os.stripped: bin/riscv64/%.Os 71 | @echo STRIP $@ ; $(RV64)strip --strip-all $< -o $@ 72 | 73 | bin/i386/%.O3: src/%.c 74 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 75 | bin/i386/%.O3: src/%.cc 76 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 77 | bin/i386/%.O3.stripped: bin/i386/%.O3 78 | @echo STRIP $@ ; $(I386)strip --strip-all $< -o $@ 79 | bin/i386/%.O2: src/%.c 80 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 81 | bin/i386/%.O2: src/%.cc 82 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 83 | bin/i386/%.O2.stripped: bin/i386/%.O2 84 | @echo STRIP $@ ; $(I386)strip --strip-all $< -o $@ 85 | bin/i386/%.Os: src/%.c 86 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 87 | bin/i386/%.Os: src/%.cc 88 | @echo CC $@ ; mkdir -p $(@D) ; $(I386)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 89 | bin/i386/%.Os.stripped: bin/i386/%.Os 90 | @echo STRIP $@ ; $(I386)strip --strip-all $< -o $@ 91 | 92 | bin/x86_64/%.O3: src/%.c 93 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 94 | bin/x86_64/%.O3: src/%.cc 95 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 96 | bin/x86_64/%.O3.stripped: bin/x86_64/%.O3 97 | @echo STRIP $@ ; $(X86_64)strip --strip-all $< -o $@ 98 | bin/x86_64/%.O2: src/%.c 99 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 100 | bin/x86_64/%.O2: src/%.cc 101 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 102 | bin/x86_64/%.O2.stripped: bin/x86_64/%.O2 103 | @echo STRIP $@ ; $(X86_64)strip --strip-all $< -o $@ 104 | bin/x86_64/%.Os: src/%.c 105 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 106 | bin/x86_64/%.Os: src/%.cc 107 | @echo CC $@ ; mkdir -p $(@D) ; $(X86_64)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 108 | bin/x86_64/%.Os.stripped: bin/x86_64/%.Os 109 | @echo STRIP $@ ; $(X86_64)strip --strip-all $< -o $@ 110 | 111 | bin/arm/%.O3: src/%.c 112 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 113 | bin/arm/%.O3: src/%.cc 114 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 115 | bin/arm/%.O3.stripped: bin/arm/%.O3 116 | @echo STRIP $@ ; $(ARM32)strip --strip-all $< -o $@ 117 | bin/arm/%.O2: src/%.c 118 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 119 | bin/arm/%.O2: src/%.cc 120 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 121 | bin/arm/%.O2.stripped: bin/arm/%.O2 122 | @echo STRIP $@ ; $(ARM32)strip --strip-all $< -o $@ 123 | bin/arm/%.Os: src/%.c 124 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 125 | bin/arm/%.Os: src/%.cc 126 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM32)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 127 | bin/arm/%.Os.stripped: bin/arm/%.Os 128 | @echo STRIP $@ ; $(ARM32)strip --strip-all $< -o $@ 129 | 130 | bin/aarch64/%.O3: src/%.c 131 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)gcc $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 132 | bin/aarch64/%.O3: src/%.cc 133 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)g++ $(LDFLAGS) -O3 $(CFLAGS) $< -o $@ 134 | bin/aarch64/%.O3.stripped: bin/aarch64/%.O3 135 | @echo STRIP $@ ; $(ARM64)strip --strip-all $< -o $@ 136 | bin/aarch64/%.O2: src/%.c 137 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)gcc $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 138 | bin/aarch64/%.O2: src/%.cc 139 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)g++ $(LDFLAGS) -O2 $(CFLAGS) $< -o $@ 140 | bin/aarch64/%.O2.stripped: bin/aarch64/%.O2 141 | @echo STRIP $@ ; $(ARM64)strip --strip-all $< -o $@ 142 | bin/aarch64/%.Os: src/%.c 143 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)gcc $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 144 | bin/aarch64/%.Os: src/%.cc 145 | @echo CC $@ ; mkdir -p $(@D) ; $(ARM64)g++ $(LDFLAGS) -Os $(CFLAGS) $< -o $@ 146 | bin/aarch64/%.Os.stripped: bin/aarch64/%.Os 147 | @echo STRIP $@ ; $(ARM64)strip --strip-all $< -o $@ 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rv8-bench 2 | =============== 3 | 4 | The rv8 benchmark suite contains a small set of currently integer 5 | centric benchmarks for regression testing of the rv8 binary 6 | translation engine. The suite contains the following test programs: 7 | 8 | - qsort 9 | - aes 10 | - norx 11 | - dhrystone 12 | - primes 13 | - miniz 14 | - sha512 15 | 16 | ## Dependencies 17 | 18 | rv8-bench depends on the following software: 19 | 20 | - [rv8](https://github.com/rv8-io/rv8/) 21 | - [riscv-qemu](https://github.com/riscv/riscv-qemu/) 22 | - [musl-riscv-toolchain-7.1.0-2](https://github.com/rv8-io/musl-riscv-toolchain/releases/tag/v7.1.0-2) 23 | - [Node.js](https://nodejs.org/) 24 | 25 | _Installing Node.js on macOS with brew_ 26 | ``` 27 | brew install node 28 | ``` 29 | 30 | _Installing Node.js on Debian Stretch_ 31 | ``` 32 | sudo apt-get install nodejs 33 | curl -sL https://deb.nodesource.com/setup_6.x | sudo -E bash - 34 | ``` 35 | 36 | _Installing musl-riscv-toolchain_ 37 | ``` 38 | git clone https://github.com/rv8-io/musl-riscv-toolchain.git 39 | cd musl-riscv-toolchain 40 | for i in riscv32 riscv64 i386 x86_64 arm aarch64; do sh bootstrap.sh $i ; done 41 | ``` 42 | 43 | _Installing rv8_ 44 | ``` 45 | git clone https://github.com/rv8-io/rv8.git 46 | cd rv8 47 | git submodule update --init 48 | make -j4 && sudo make install 49 | ``` 50 | 51 | _Installing riscv-qemu_ 52 | ``` 53 | git clone https://github.com/riscv/riscv-qemu.git 54 | cd riscv-qemu 55 | git submodule update --init 56 | ./configure --target-list=riscv64-softmmu,riscv32-softmmu 57 | make -j4 && sudo make install 58 | ``` 59 | 60 | ## Building 61 | 62 | To build the rv8 benchmarks for riscv32, riscv64, i386 and 63 | x86_64 run `make` in the rv8-bench directory: 64 | 65 | ``` 66 | cd rv8-bench 67 | make 68 | ``` 69 | 70 | ## Running 71 | 72 | To see how to run the benchmarks execute `npm start` for usage instructions: 73 | 74 | ``` 75 | npm start bench 76 | npm start gather 77 | ``` 78 | -------------------------------------------------------------------------------- /data/tables.txt: -------------------------------------------------------------------------------- 1 | runtime-O3-64 "Runtime 64-bit -O3 (seconds)" 2 | program "%s" program 3 | qemu-aarch64 "%s" qemu-aarch64-O3-runtime 4 | qemu-riscv64 "%s" qemu-riscv64-O3-runtime 5 | rv8-riscv64 "%s" rv-jit-riscv64-O3-runtime 6 | native-x86-64 "%s" native-x86-64-O3-runtime 7 | 8 | runtime-O2-64 "Runtime 64-bit -O2 (seconds)" 9 | program "%s" program 10 | qemu-aarch64 "%s" qemu-aarch64-O2-runtime 11 | qemu-riscv64 "%s" qemu-riscv64-O2-runtime 12 | rv8-riscv64 "%s" rv-jit-riscv64-O2-runtime 13 | native-x86-64 "%s" native-x86-64-O2-runtime 14 | 15 | runtime-Os-64 "Runtime 64-bit -Os (seconds)" 16 | program "%s" program 17 | qemu-aarch64 "%s" qemu-aarch64-Os-runtime 18 | qemu-riscv64 "%s" qemu-riscv64-Os-runtime 19 | rv8-riscv64 "%s" rv-jit-riscv64-Os-runtime 20 | native-x86-64 "%s" native-x86-64-Os-runtime 21 | 22 | runtime-O3-32 "Runtime 32-bit -O3 (seconds)" 23 | program "%s" program 24 | qemu-arm32 "%s" qemu-arm-O3-runtime 25 | qemu-riscv32 "%s" qemu-riscv32-O3-runtime 26 | rv8-riscv32 "%s" rv-jit-riscv32-O3-runtime 27 | native-x86-32 "%s" native-x86-32-O3-runtime 28 | 29 | runtime-O2-32 "Runtime 32-bit -O2 (seconds)" 30 | program "%s" program 31 | qemu-arm32 "%s" qemu-arm-O2-runtime 32 | qemu-riscv32 "%s" qemu-riscv32-O2-runtime 33 | rv8-riscv32 "%s" rv-jit-riscv32-O2-runtime 34 | native-x86-32 "%s" native-x86-32-O2-runtime 35 | 36 | runtime-Os-32 "Runtime 32-bit -Os (seconds)" 37 | program "%s" program 38 | qemu-arm32 "%s" qemu-arm-Os-runtime 39 | qemu-riscv32 "%s" qemu-riscv32-Os-runtime 40 | rv8-riscv32 "%s" rv-jit-riscv32-Os-runtime 41 | native-x86-32 "%s" native-x86-32-Os-runtime 42 | 43 | ratio-O3-64 "Performance Ratio 64-bit -O3 (smaller is better)" 44 | program "%s" program 45 | qemu-aarch64 "%s" qemu-aarch64-O3-runtime/native-x86-64-O3-runtime 46 | qemu-riscv64 "%s" qemu-riscv64-O3-runtime/native-x86-64-O3-runtime 47 | rv8-riscv64 "%s" rv-jit-riscv64-O3-runtime/native-x86-64-O3-runtime 48 | native-x86-64 "%s" 1.00 49 | 50 | ratio-O2-64 "Performance Ratio 64-bit -O2 (smaller is better)" 51 | program "%s" program 52 | qemu-aarch64 "%s" qemu-aarch64-O2-runtime/native-x86-64-O2-runtime 53 | qemu-riscv64 "%s" qemu-riscv64-O2-runtime/native-x86-64-O2-runtime 54 | rv8-riscv64 "%s" rv-jit-riscv64-O2-runtime/native-x86-64-O2-runtime 55 | native-x86-64 "%s" 1.00 56 | 57 | ratio-Os-64 "Performance Ratio 64-bit -Os (smaller is better)" 58 | program "%s" program 59 | qemu-aarch64 "%s" qemu-aarch64-Os-runtime/native-x86-64-Os-runtime 60 | qemu-riscv64 "%s" qemu-riscv64-Os-runtime/native-x86-64-Os-runtime 61 | rv8-riscv64 "%s" rv-jit-riscv64-Os-runtime/native-x86-64-Os-runtime 62 | native-x86-64 "%s" 1.00 63 | 64 | ratio-O3-32 "Performance Ratio 32-bit -O3 (smaller is better)" 65 | program "%s" program 66 | qemu-arm32 "%s" qemu-arm-O3-runtime/native-x86-32-O3-runtime 67 | qemu-riscv32 "%s" qemu-riscv32-O3-runtime/native-x86-32-O3-runtime 68 | rv8-riscv32 "%s" rv-jit-riscv32-O3-runtime/native-x86-32-O3-runtime 69 | native-x86-32 "%s" 1.00 70 | 71 | ratio-O2-32 "Performance Ratio 32-bit -O2 (smaller is better)" 72 | program "%s" program 73 | qemu-arm32 "%s" qemu-arm-O2-runtime/native-x86-32-O2-runtime 74 | qemu-riscv32 "%s" qemu-riscv32-O2-runtime/native-x86-32-O2-runtime 75 | rv8-riscv32 "%s" rv-jit-riscv32-O2-runtime/native-x86-32-O2-runtime 76 | native-x86-32 "%s" 1.00 77 | 78 | ratio-Os-32 "Performance Ratio 32-bit -Os (smaller is better)" 79 | program "%s" program 80 | qemu-arm32 "%s" qemu-arm-Os-runtime/native-x86-32-Os-runtime 81 | qemu-riscv32 "%s" qemu-riscv32-Os-runtime/native-x86-32-Os-runtime 82 | rv8-riscv32 "%s" rv-jit-riscv32-Os-runtime/native-x86-32-Os-runtime 83 | native-x86-32 "%s" 1.00 84 | 85 | opt-x86-64 "Optimisation native x86-64, -O3, -O2 and -Os" 86 | program "%s" program 87 | x86-64-O3 "%s" native-x86-64-O3-runtime 88 | x86-64-O2 "%s" native-x86-64-O2-runtime 89 | x86-64-Os "%s" native-x86-64-Os-runtime 90 | x86-64-O3:O2 "%s" native-x86-64-O3-runtime/native-x86-64-O2-runtime 91 | x86-64-O3:Os "%s" native-x86-64-O3-runtime/native-x86-64-Os-runtime 92 | x86-64-O2:Os "%s" native-x86-64-O2-runtime/native-x86-64-Os-runtime 93 | 94 | opt-x86-32 "Optimisation native x86-32, -O3, -O2 and -Os" 95 | program "%s" program 96 | x86-32-O3 "%s" native-x86-32-O3-runtime 97 | x86-32-O2 "%s" native-x86-32-O2-runtime 98 | x86-32-Os "%s" native-x86-32-Os-runtime 99 | x86-32-O3:O2 "%s" native-x86-32-O3-runtime/native-x86-32-O2-runtime 100 | x86-32-O3:Os "%s" native-x86-32-O3-runtime/native-x86-32-Os-runtime 101 | x86-32-O2:Os "%s" native-x86-32-O2-runtime/native-x86-32-Os-runtime 102 | 103 | opt-riscv64 "Optimisation rv8 riscv64, -O3, -O2 and -Os" 104 | program "%s" program 105 | rv8-rv64-O3 "%s" rv-jit-riscv64-O3-runtime 106 | rv8-rv64-O2 "%s" rv-jit-riscv64-O2-runtime 107 | rv8-rv64-Os "%s" rv-jit-riscv64-Os-runtime 108 | rv8-rv64-O3:O2 "%s" rv-jit-riscv64-O3-runtime/rv-jit-riscv64-O2-runtime 109 | rv8-rv64-O3:Os "%s" rv-jit-riscv64-O3-runtime/rv-jit-riscv64-Os-runtime 110 | rv8-rv64-O2:Os "%s" rv-jit-riscv64-O2-runtime/rv-jit-riscv64-Os-runtime 111 | 112 | opt-riscv32 "Optimisation rv8 riscv32, -O3, -O2 and -Os" 113 | program "%s" program 114 | rv8-rv32-O3 "%s" rv-jit-riscv32-O3-runtime 115 | rv8-rv32-O2 "%s" rv-jit-riscv32-O2-runtime 116 | rv8-rv32-Os "%s" rv-jit-riscv32-Os-runtime 117 | rv8-rv32-O3:O2 "%s" rv-jit-riscv32-O3-runtime/rv-jit-riscv32-O2-runtime 118 | rv8-rv32-O3:Os "%s" rv-jit-riscv32-O3-runtime/rv-jit-riscv32-Os-runtime 119 | rv8-rv32-O2:Os "%s" rv-jit-riscv32-O2-runtime/rv-jit-riscv32-Os-runtime 120 | 121 | fusion-O3-64 "Macro-op fusion performance 64-bit -O3" 122 | program "%s" program 123 | rv8-fuse-off-O3 "%s" rv-jit-df-riscv64-O3-runtime 124 | rv8-fuse-on-O3 "%s" rv-jit-riscv64-O3-runtime 125 | rv8-fuse-ratio-O3 "%s" rv-jit-riscv64-O3-runtime/rv-jit-df-riscv64-O3-runtime 126 | 127 | fusion-O2-64 "Macro-op fusion performance 64-bit -O2" 128 | program "%s" program 129 | rv8-fuse-off-O2 "%s" rv-jit-df-riscv64-O2-runtime 130 | rv8-fuse-on-O2 "%s" rv-jit-riscv64-O2-runtime 131 | rv8-fuse-ratio-O2 "%s" rv-jit-riscv64-O2-runtime/rv-jit-df-riscv64-O2-runtime 132 | 133 | fusion-Os-64 "Macro-op fusion performance 64-bit -Os" 134 | program "%s" program 135 | rv8-fuse-off-Os "%s" rv-jit-df-riscv64-Os-runtime 136 | rv8-fuse-on-Os "%s" rv-jit-riscv64-Os-runtime 137 | rv8-fuse-ratio-Os "%s" rv-jit-riscv64-Os-runtime/rv-jit-df-riscv64-Os-runtime 138 | 139 | fusion-O3-32 "Macro-op fusion performance 32-bit -O3" 140 | program "%s" program 141 | rv8-fuse-off-O3 "%s" rv-jit-df-riscv32-O3-runtime 142 | rv8-fuse-on-O3 "%s" rv-jit-riscv32-O3-runtime 143 | rv8-fuse-ratio-O3 "%s" rv-jit-riscv32-O3-runtime/rv-jit-df-riscv32-O3-runtime 144 | 145 | fusion-O2-32 "Macro-op fusion performance 32-bit -O2" 146 | program "%s" program 147 | rv8-fuse-off-O2 "%s" rv-jit-df-riscv32-O2-runtime 148 | rv8-fuse-on-O2 "%s" rv-jit-riscv32-O2-runtime 149 | rv8-fuse-ratio-O2 "%s" rv-jit-riscv32-O2-runtime/rv-jit-df-riscv32-O2-runtime 150 | 151 | fusion-Os-32 "Macro-op fusion performance 32-bit -Os" 152 | program "%s" program 153 | rv8-fuse-off-Os "%s" rv-jit-df-riscv32-Os-runtime 154 | rv8-fuse-on-Os "%s" rv-jit-riscv32-Os-runtime 155 | rv8-fuse-ratio-Os "%s" rv-jit-riscv32-Os-runtime/rv-jit-df-riscv32-Os-runtime 156 | 157 | filesize-O3 "Compiled File Size (bytes) -O3" 158 | program "%s" program 159 | arm32 "%d" arm-O3-filesize 160 | aarch64 "%d" aarch64-O3-filesize 161 | riscv32 "%d" riscv32-O3-filesize 162 | riscv64 "%d" riscv64-O3-filesize 163 | x86-32 "%d" x86-32-O3-filesize 164 | x86-64 "%d" x86-64-O3-filesize 165 | 166 | filesize-O2 "Compiled File Size (bytes) -O2" 167 | program "%s" program 168 | arm32 "%d" arm-O2-filesize 169 | aarch64 "%d" aarch64-O2-filesize 170 | riscv32 "%d" riscv32-O2-filesize 171 | riscv64 "%d" riscv64-O2-filesize 172 | x86-32 "%d" x86-32-O2-filesize 173 | x86-64 "%d" x86-64-O2-filesize 174 | 175 | filesize-Os "Compiled File Size (bytes) -Os" 176 | program "%s" program 177 | arm32 "%d" arm-Os-filesize 178 | aarch64 "%d" aarch64-Os-filesize 179 | riscv32 "%d" riscv32-Os-filesize 180 | riscv64 "%d" riscv64-Os-filesize 181 | x86-32 "%d" x86-32-Os-filesize 182 | x86-64 "%d" x86-64-Os-filesize 183 | 184 | mips-O3-64 "Instructions per second (MIPS) qemu, rv8 and native 64-bit -O3" 185 | program "%s" program 186 | qemu-riscv64-mips "%d" rv-sim-riscv64-O3-instret/qemu-riscv64-O3-runtime 187 | rv8-riscv64-mips "%d" rv-sim-riscv64-O3-instret/rv-jit-riscv64-O3-runtime 188 | native-x86-mips "%d" native-x86-64-O3-instret/native-x86-64-O3-runtime 189 | 190 | mips-O2-64 "Instructions per second (MIPS) qemu, rv8 and native 64-bit -O2" 191 | program "%s" program 192 | qemu-riscv64-mips "%d" rv-sim-riscv64-O2-instret/qemu-riscv64-O2-runtime 193 | rv8-riscv64-mips "%d" rv-sim-riscv64-O2-instret/rv-jit-riscv64-O2-runtime 194 | native-x86-mips "%d" native-x86-64-O2-instret/native-x86-64-O2-runtime 195 | 196 | mips-Os-64 "Instructions per second (MIPS) qemu, rv8 and native 64-bit -Os" 197 | program "%s" program 198 | qemu-riscv64-mips "%d" rv-sim-riscv64-Os-instret/qemu-riscv64-Os-runtime 199 | rv8-riscv64-mips "%d" rv-sim-riscv64-Os-instret/rv-jit-riscv64-Os-runtime 200 | native-x86-mips "%d" native-x86-64-Os-instret/native-x86-64-Os-runtime 201 | 202 | mips-O3-32 "Instructions per second (MIPS) qemu, rv8 and native 32-bit -O3" 203 | program "%s" program 204 | qemu-riscv32-mips "%d" rv-sim-riscv32-O3-instret/qemu-riscv32-O3-runtime 205 | rv8-riscv32-mips "%d" rv-sim-riscv32-O3-instret/rv-jit-riscv32-O3-runtime 206 | native-x86-mips "%d" native-x86-32-O3-instret/native-x86-32-O3-runtime 207 | 208 | mips-O2-32 "Instructions per second (MIPS) qemu, rv8 and native 32-bit -O2" 209 | program "%s" program 210 | qemu-riscv32-mips "%d" rv-sim-riscv32-O2-instret/qemu-riscv32-O2-runtime 211 | rv8-riscv32-mips "%d" rv-sim-riscv32-O2-instret/rv-jit-riscv32-O2-runtime 212 | native-x86-mips "%d" native-x86-32-O2-instret/native-x86-32-O2-runtime 213 | 214 | mips-Os-32 "Instructions per second (MIPS) qemu, rv8 and native 32-bit -Os" 215 | program "%s" program 216 | qemu-riscv32-mips "%d" rv-sim-riscv32-Os-instret/qemu-riscv32-Os-runtime 217 | rv8-riscv32-mips "%d" rv-sim-riscv32-Os-instret/rv-jit-riscv32-Os-runtime 218 | native-x86-mips "%d" native-x86-32-Os-instret/native-x86-32-Os-runtime 219 | 220 | operations-O3-64 "Retired Operations (Mops) x86-64 vs riscv64 -O3" 221 | program "%s" program 222 | x86-instret "%d" native-x86-64-O3-instret 223 | x86-uops-executed "%d" native-x86-64-O3-uops-executed 224 | x86-uops-issued "%d" native-x86-64-O3-uops-issued 225 | x86-uops-retired "%d" native-x86-64-O3-uops-retired-all 226 | x86-uops-slots "%d" native-x86-64-O3-uops-retired-slots 227 | riscv64-instret "%d" rv-sim-riscv64-O3-instret 228 | 229 | operations-O2-64 "Retired Operations (Mops) x86-64 vs riscv64 -O2" 230 | program "%s" program 231 | x86-instret "%d" native-x86-64-O2-instret 232 | x86-uops-executed "%d" native-x86-64-O2-uops-executed 233 | x86-uops-issued "%d" native-x86-64-O2-uops-issued 234 | x86-uops-retired "%d" native-x86-64-O2-uops-retired-all 235 | x86-uops-slots "%d" native-x86-64-O2-uops-retired-slots 236 | riscv64-instret "%d" rv-sim-riscv64-O2-instret 237 | 238 | operations-Os-64 "Retired Operations (Mops) x86-64 vs riscv64 -Os" 239 | program "%s" program 240 | x86-instret "%d" native-x86-64-Os-instret 241 | x86-uops-executed "%d" native-x86-64-Os-uops-executed 242 | x86-uops-issued "%d" native-x86-64-Os-uops-issued 243 | x86-uops-retired "%d" native-x86-64-Os-uops-retired-all 244 | x86-uops-slots "%d" native-x86-64-Os-uops-retired-slots 245 | riscv64-instret "%d" rv-sim-riscv64-Os-instret 246 | 247 | operations-O3-32 "Retired Operations (Mops) x86-32 vs riscv32 -O3" 248 | program "%s" program 249 | x86-instret "%d" native-x86-32-O3-instret 250 | x86-uops-executed "%d" native-x86-32-O3-uops-executed 251 | x86-uops-issued "%d" native-x86-32-O3-uops-issued 252 | x86-uops-retired "%d" native-x86-32-O3-uops-retired-all 253 | x86-uops-slots "%d" native-x86-32-O3-uops-retired-slots 254 | riscv32-instret "%d" rv-sim-riscv32-O3-instret 255 | 256 | operations-O2-32 "Retired Operations (Mops) x86-32 vs riscv32 -O2" 257 | program "%s" program 258 | x86-instret "%d" native-x86-32-O2-instret 259 | x86-uops-executed "%d" native-x86-32-O2-uops-executed 260 | x86-uops-issued "%d" native-x86-32-O2-uops-issued 261 | x86-uops-retired "%d" native-x86-32-O2-uops-retired-all 262 | x86-uops-slots "%d" native-x86-32-O2-uops-retired-slots 263 | riscv32-instret "%d" rv-sim-riscv32-O2-instret 264 | 265 | operations-Os-32 "Retired Operations (Mops) x86-32 vs riscv32 -Os" 266 | program "%s" program 267 | x86-instret "%d" native-x86-32-Os-instret 268 | x86-uops-executed "%d" native-x86-32-Os-uops-executed 269 | x86-uops-issued "%d" native-x86-32-Os-uops-issued 270 | x86-uops-retired "%d" native-x86-32-Os-uops-retired-all 271 | x86-uops-slots "%d" native-x86-32-Os-uops-retired-slots 272 | riscv32-instret "%d" rv-sim-riscv32-Os-instret 273 | -------------------------------------------------------------------------------- /data/template.md: -------------------------------------------------------------------------------- 1 | ## Benchmark results 2 | 3 | This document contains [rv8-bench](https://github.com/rv8-io/rv8-bench/) 4 | results compiled using GCC 7.1.0 and musl libc. The results include runtime 5 | and instructions per second comparisons for the QEMU and rv8 JIT engines 6 | and native x86. The benchmark suite is compiled for aarch64, arm32, riscv64, 7 | riscv32, x86-64 and x86-32. 8 | 9 | The results include runtime neutral metrics such as retired RISC-V 10 | instructions, x86 micro-ops, executable file sizes plus dynamic register and 11 | instruction histograms for RISC-V. 12 | 13 | #### Benchmark source 14 | 15 | The following sources have been used to run the benchmarks: 16 | 17 | - rv8 - [https://github.com/rv8-io/rv8/](https://github.com/rv8-io/rv8/) 18 | - rv8-bench - [https://github.com/rv8-io/rv8-bench/](https://github.com/rv8-io/rv8-bench/) 19 | - qemu-riscv - [https://github.com/riscv/riscv-qemu/](https://github.com/riscv/riscv-qemu/) 20 | - musl-riscv-toolchain - [https://github.com/rv8-io/musl-riscv-toolchain/](https://github.com/rv8-io/musl-riscv-toolchain/) 21 | 22 | #### Benchmark metrics 23 | 24 | The following benchmark metrics have been plotted and tabulated: 25 | 26 | - [Runtimes](#runtimes) 27 | - [Optimisation](#optimisation) 28 | - [Instructions Per Second](#instructions-per-second) 29 | - [Retired Micro-ops](#retired-micro-ops) 30 | - [Executable File Sizes](#executable-file-sizes) 31 | - [Dynamic Register Usage](#dynamic-register-usage) 32 | - [Dynamic Instruction Usage](#dynamic-instruction-usage) 33 | 34 | #### Benchmark details 35 | 36 | The [rv8-bench](https://github.com/rv8-io/rv8-bench/) 37 | benchmark suite contains the following test programs: 38 | 39 | Benchmark | Type | Description 40 | :-- | :-- | :-- 41 | aes | crypto | encrypt, decrypt and compare 30MiB of data 42 | bigint | numeric | compute 23 ^ 111121 and count base 10 digits 43 | dhrystone | synthetic | synthetic integer workload 44 | miniz | compression | compress, decompress and compare 8MiB of data 45 | norx | crypto | encrypt, decrypt and compare 30MiB of data 46 | primes | numeric | calculate largest prime number below 33333333 47 | qsort | sorting | sort array containing 50 million items 48 | sha512 | digest | calculate SHA-512 hash of 64MiB of data 49 | 50 | #### Compiler details 51 | 52 | The following compiler architectures, versions, compile options 53 | and runtime libraries are used to run the benchmarks: 54 | 55 | Architecture | Compiler | C Library | Compile options 56 | :-- | :-- | :-- | :-- 57 | x86-32 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 58 | x86-64 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 59 | riscv32 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 60 | riscv64 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 61 | arm32 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 62 | aarch64 | GCC 7.2.0 | musl libc | `'-O3'`, `'-O2'` and `'-Os'` 63 | 64 | #### Measurement details 65 | 66 | - Dynamic instruction counts are measured using `rv-sim -E` 67 | - Benchmarks are run 20 times and the best result is taken 68 | - All programs are compiled as position independent executables (`-fPIE`) 69 | - Host: Intel® 6th-gen Core™ i7-5557U Broadwell (3.10-3.40GHz, 4MB cache) 70 | - x86-64 μops measured with 71 | - `perf stat -e cycles,instructions,r1b1,r10e,r2c2,r1c2 ` 72 | 73 | 74 | ### Runtimes 75 | 76 | Runtime results comparing qemu, rv8 and native x86: 77 | 78 | ![benchmark runtimes -O3 64-bit]({{ site.url }}/plots/runtime-O3-64.svg) 79 | 80 | _Figure 1: Benchmark runtimes -O3 64-bit_ 81 | 82 | TABLE runtime-O3-64 83 | 84 | TABLE ratio-O3-64 85 | 86 | ![benchmark runtimes -O2 64-bit]({{ site.url }}/plots/runtime-O2-64.svg) 87 | 88 | _Figure 2: Benchmark runtimes -O2 64-bit_ 89 | 90 | TABLE runtime-O2-64 91 | 92 | TABLE ratio-O2-64 93 | 94 | ![benchmark runtimes -Os 64-bit]({{ site.url }}/plots/runtime-Os-64.svg) 95 | 96 | _Figure 3: Benchmark runtimes -Os 64-bit_ 97 | 98 | TABLE runtime-Os-64 99 | 100 | TABLE ratio-Os-64 101 | 102 | ![benchmark runtimes -O3 32-bit]({{ site.url }}/plots/runtime-O3-32.svg) 103 | 104 | _Figure 4: Benchmark runtimes -O3 32-bit_ 105 | 106 | TABLE runtime-O3-32 107 | 108 | TABLE ratio-O3-32 109 | 110 | ![benchmark runtimes -O2 32-bit]({{ site.url }}/plots/runtime-O2-32.svg) 111 | 112 | _Figure 5: Benchmark runtimes -O2 32-bit_ 113 | 114 | TABLE runtime-O2-32 115 | 116 | TABLE ratio-O2-32 117 | 118 | ![benchmark runtimes -Os 32-bit]({{ site.url }}/plots/runtime-Os-32.svg) 119 | 120 | _Figure 6: Benchmark runtimes -Os 32-bit_ 121 | 122 | TABLE runtime-Os-32 123 | 124 | TABLE ratio-Os-32 125 | 126 | 127 | ### Optimisation 128 | 129 | Runtimes and ratios for optimisation levels (-O3, -O2 and -Os): 130 | 131 | ![optimisation comparison native x86-64, -O3, -O2 and -Os]({{ site.url }}/plots/optimisation-x86-64.svg ) 132 | 133 | _Figure 7: Optimisation native x86-64, -O3, -O2 and -Os_ 134 | 135 | TABLE opt-x86-64 136 | 137 | ![optimisation comparison native x86-32, -O3, -O2 and -Os]({{ site.url }}/plots/optimisation-x86-32.svg ) 138 | 139 | _Figure 8: Optimisation native x86-32, -O3, -O2 and -Os_ 140 | 141 | TABLE opt-x86-32 142 | 143 | ![optimisation comparison rv8 riscv64, -O3, -O2 and -Os]({{ site.url }}/plots/optimisation-riscv64.svg ) 144 | 145 | _Figure 9: Optimisation rv8 riscv64, -O3, -O2 and -Os_ 146 | 147 | TABLE opt-riscv64 148 | 149 | ![optimisation comparison rv8 riscv32, -O3, -O2 and -Os]({{ site.url }}/plots/optimisation-riscv32.svg ) 150 | 151 | _Figure 10: Optimisation rv8 riscv32, -O3, -O2 and -Os_ 152 | 153 | TABLE opt-riscv32 154 | 155 | 156 | ### Instructions Per Second 157 | 158 | Instructions per second in millions comparing qemu, rv8 and native x86: 159 | 160 | ![operation counts -O3 64-bit]({{ site.url }}/plots/mips-O3-64.svg) 161 | 162 | _Figure 11: Millions of Instructions Per Second -O3 64-bit_ 163 | 164 | TABLE mips-O3-64 165 | 166 | ![operation counts -O2 64-bit]({{ site.url }}/plots/mips-O2-64.svg) 167 | 168 | _Figure 12: Millions of Instructions Per Second -O2 64-bit_ 169 | 170 | TABLE mips-O2-64 171 | 172 | ![operation counts -Os 64-bit]({{ site.url }}/plots/mips-Os-64.svg) 173 | 174 | _Figure 13: Millions of Instructions Per Second -Os 64-bit_ 175 | 176 | TABLE mips-Os-64 177 | 178 | ![operation counts -O3 32-bit]({{ site.url }}/plots/mips-O3-32.svg) 179 | 180 | _Figure 14: Millions of Instructions Per Second -O3 32-bit_ 181 | 182 | TABLE mips-O3-32 183 | 184 | ![operation counts -O2 32-bit]({{ site.url }}/plots/mips-O2-32.svg) 185 | 186 | _Figure 15: Millions of Instructions Per Second -O2 32-bit_ 187 | 188 | TABLE mips-O2-32 189 | 190 | ![operation counts -Os 32-bit]({{ site.url }}/plots/mips-Os-32.svg) 191 | 192 | _Figure 16: Millions of Instructions Per Second -Os 32-bit_ 193 | 194 | TABLE mips-Os-32 195 | 196 | 197 | ### Retired Micro-ops 198 | 199 | The following table describes the measured x86 performance counters: 200 | 201 | counter | x86 event mask | description 202 | :------------ | :-------------------------- | :----------------------------------- 203 | instret | `INST_RETIRED` | instructions retired 204 | uops-executed | `UOPS_EXECUTED.THREAD` | uops executed 205 | uops-issued | `UOPS_ISSUED.ANY` | uops issued 206 | uops-slots | `UOPS_RETIRED.RETIRE_SLOTS` | uop retirement slots used 207 | uops-retired | `UOPS_RETIRED.ANY` | uops retired 208 | 209 | Total retired micro-op/instruction counts comparing RISC-V and x86: 210 | 211 | ![operation counts -O3 64-bit]({{ site.url }}/plots/operations-O3-64.svg) 212 | 213 | _Figure 17: Retired operation counts -O3 64-bit_ 214 | 215 | TABLE operations-O3-64 216 | 217 | ![operation counts -Os 64-bit]({{ site.url }}/plots/operations-O2-64.svg) 218 | 219 | _Figure 18: Retired operation counts -O2 64-bit_ 220 | 221 | TABLE operations-O2-64 222 | 223 | ![operation counts -O2 64-bit]({{ site.url }}/plots/operations-Os-64.svg) 224 | 225 | _Figure 19: Retired operation counts -Os 64-bit_ 226 | 227 | TABLE operations-Os-64 228 | 229 | ![operation counts -O3 32-bit]({{ site.url }}/plots/operations-O3-32.svg) 230 | 231 | _Figure 20: Retired operation counts -O3 32-bit_ 232 | 233 | TABLE operations-O3-32 234 | 235 | ![operation counts -O2 32-bit]({{ site.url }}/plots/operations-O2-32.svg) 236 | 237 | _Figure 21: Retired operation counts -O2 32-bit_ 238 | 239 | TABLE operations-O2-32 240 | 241 | ![operation counts -Os 32-bit]({{ site.url }}/plots/operations-Os-32.svg) 242 | 243 | _Figure 22: Retired operation counts -Os 32-bit_ 244 | 245 | TABLE operations-Os-32 246 | 247 | 248 | ### Executable File Sizes 249 | 250 | GCC stripped executable sizes comparing aarch64, riscv32, riscv64, x86-32 and x86-64: 251 | 252 | ![benchmark filesizes -O3]({{ site.url }}/plots/filesize-O3.svg) 253 | 254 | _Figure 23: Compiled file sizes -O3_ 255 | 256 | TABLE filesize-O3 257 | 258 | ![benchmark filesizes -O2]({{ site.url }}/plots/filesize-O2.svg) 259 | 260 | _Figure 24: Compiled file sizes -O2_ 261 | 262 | TABLE filesize-O2 263 | 264 | ![benchmark filesizes -Os]({{ site.url }}/plots/filesize-Os.svg) 265 | 266 | _Figure 25: Compiled file sizes -Os_ 267 | 268 | TABLE filesize-Os 269 | 270 | 271 | ### Dynamic Register Usage 272 | 273 | Dynamic register usage results comparing riscv64 -O3, -O2, -Os 274 | 275 | ![aes register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-aes-rv64-1.svg) 276 | 277 | _Figure 26: Dynamic register usage - aes -O3, -O2, -Os (sorted by frequency)_ 278 | 279 | ![aes register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-aes-rv64-2.svg) 280 | 281 | _Figure 27: Dynamic register usage - aes -O3, -O2, -Os (sorted by alphabetically)_ 282 | 283 | ![bigint register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-bigint-rv64-1.svg) 284 | 285 | _Figure 28: Dynamic register usage - bigint -O3, -O2, -Os (sorted by frequency)_ 286 | 287 | ![bigint register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-bigint-rv64-2.svg) 288 | 289 | _Figure 29: Dynamic register usage - bigint -O3, -O2, -Os (sorted by alphabetically)_ 290 | 291 | ![dhrystone register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-dhrystone-rv64-1.svg) 292 | 293 | _Figure 30: Dynamic register usage - dhrystone -O3, -O2, -Os (sorted by frequency)_ 294 | 295 | ![dhrystone register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-dhrystone-rv64-2.svg) 296 | 297 | _Figure 31: Dynamic register usage - dhrystone -O3, -O2, -Os (sorted by alphabetically)_ 298 | 299 | ![miniz register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-miniz-rv64-1.svg) 300 | 301 | _Figure 32: Dynamic register usage - miniz -O3, -O2, -Os (sorted by frequency)_ 302 | 303 | ![miniz register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-miniz-rv64-2.svg) 304 | 305 | _Figure 33: Dynamic register usage - miniz -O3, -O2, -Os (sorted by alphabetically)_ 306 | 307 | ![norx register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-norx-rv64-1.svg) 308 | 309 | _Figure 34: Dynamic register usage - norx -O3, -O2, -Os (sorted by frequency)_ 310 | 311 | ![norx register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-norx-rv64-2.svg) 312 | 313 | _Figure 35: Dynamic register usage - norx -O3, -O2, -Os (sorted by alphabetically)_ 314 | 315 | ![primes register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-primes-rv64-1.svg) 316 | 317 | _Figure 36: Dynamic register usage - primes -O3, -O2, -Os (sorted by frequency)_ 318 | 319 | ![primes register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-primes-rv64-2.svg) 320 | 321 | _Figure 37: Dynamic register usage - primes -O3, -O2, -Os (sorted by alphabetically)_ 322 | 323 | ![qsort register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-qsort-rv64-1.svg) 324 | 325 | _Figure 38: Dynamic register usage - qsort -O3, -O2, -Os (sorted by frequency)_ 326 | 327 | ![qsort register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-qsort-rv64-2.svg) 328 | 329 | _Figure 39: Dynamic register usage - qsort -O3, -O2, -Os (sorted by alphabetically)_ 330 | 331 | ![sha512 register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-sha512-rv64-1.svg) 332 | 333 | _Figure 40: Dynamic register usage - sha512 -O3, -O2, -Os (sorted by frequency)_ 334 | 335 | ![sha512 register usage -O3, -O2, -Os]({{ site.url }}/plots/registers-sha512-rv64-2.svg) 336 | 337 | _Figure 41: Dynamic register usage - sha512 -O3, -O2, -Os (sorted by alphabetically)_ 338 | 339 | 340 | ### Dynamic Instruction Usage 341 | 342 | Dynamic instruction usage results comparing riscv64 -O3, -O2, -Os 343 | 344 | ![aes instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-aes-rv64-1.svg) 345 | 346 | _Figure 42: Dynamic instruction usage - aes -O3, -O2, -Os (sorted by frequency)_ 347 | 348 | ![aes instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-aes-rv64-2.svg) 349 | 350 | _Figure 43: Dynamic instruction usage - aes -O3, -O2, -Os (sorted by alphabetically)_ 351 | 352 | ![bigint instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-bigint-rv64-1.svg) 353 | 354 | _Figure 44: Dynamic instruction usage - bigint -O3, -O2, -Os (sorted by frequency)_ 355 | 356 | ![bigint instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-bigint-rv64-2.svg) 357 | 358 | _Figure 45: Dynamic instruction usage - bigint -O3, -O2, -Os (sorted by alphabetically)_ 359 | 360 | ![dhrystone instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-dhrystone-rv64-1.svg) 361 | 362 | _Figure 46: Dynamic instruction usage - dhrystone -O3, -O2, -Os (sorted by frequency)_ 363 | 364 | ![dhrystone instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-dhrystone-rv64-2.svg) 365 | 366 | _Figure 47: Dynamic instruction usage - dhrystone -O3, -O2, -Os (sorted by alphabetically)_ 367 | 368 | ![miniz instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-miniz-rv64-1.svg) 369 | 370 | _Figure 48: Dynamic instruction usage - miniz -O3, -O2, -Os (sorted by frequency)_ 371 | 372 | ![miniz instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-miniz-rv64-2.svg) 373 | 374 | _Figure 49: Dynamic instruction usage - miniz -O3, -O2, -Os (sorted by alphabetically)_ 375 | 376 | ![norx instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-norx-rv64-1.svg) 377 | 378 | _Figure 50: Dynamic instruction usage - norx -O3, -O2, -Os (sorted by frequency)_ 379 | 380 | ![norx instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-norx-rv64-2.svg) 381 | 382 | _Figure 51: Dynamic instruction usage - norx -O3, -O2, -Os (sorted by alphabetically)_ 383 | 384 | ![primes instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-primes-rv64-1.svg) 385 | 386 | _Figure 52: Dynamic instruction usage - primes -O3, -O2, -Os (sorted by frequency)_ 387 | 388 | ![primes instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-primes-rv64-2.svg) 389 | 390 | _Figure 53: Dynamic instruction usage - primes -O3, -O2, -Os (sorted by alphabetically)_ 391 | 392 | ![qsort instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-qsort-rv64-1.svg) 393 | 394 | _Figure 54: Dynamic instruction usage - qsort -O3, -O2, -Os (sorted by frequency)_ 395 | 396 | ![qsort instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-qsort-rv64-2.svg) 397 | 398 | _Figure 55: Dynamic instruction usage - qsort -O3, -O2, -Os (sorted by alphabetically)_ 399 | 400 | ![sha512 instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-sha512-rv64-1.svg) 401 | 402 | _Figure 56: Dynamic instruction usage - sha512 -O3, -O2, -Os (sorted by frequency)_ 403 | 404 | ![sha512 instruction usage -O3, -O2, -Os]({{ site.url }}/plots/instructions-sha512-rv64-2.svg) 405 | 406 | _Figure 57: Dynamic instruction usage - sha512 -O3, -O2, -Os (sorted by alphabetically)_ 407 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rv8-bench", 3 | "version": "0.0.1", 4 | "description": "rv8 benchmark suite", 5 | "main": "rv8-bench.js", 6 | "scripts": { 7 | "start": "node rv8-bench.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/rv8-io/rv8-bench.git" 12 | }, 13 | "keywords": [ 14 | "benchmarks" 15 | ], 16 | "author": "Michael Clark ", 17 | "license": "MIT", 18 | "bugs": { 19 | "url": "https://github.com/rv8-io/rv8-bench/issues" 20 | }, 21 | "homepage": "https://github.com/rv8-io/rv8-bench#readme", 22 | "dependencies": { 23 | "child_process": "^1.0.2" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /plots/filesize.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 2 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "Filesize (bytes)" offset 2,0,0 13 | set yrange [0:150000] 14 | 15 | set grid xtics ytics 16 | 17 | set output "svg/filesize-O3.svg" 18 | set title "rv8-bench (Filesize -O3)" 19 | plot 'data/benchmarks.dat' using \ 20 | 'arm-O3-filesize':xtic(1) ti 'arm32-O3-filesize', \ 21 | '' u 'aarch64-O3-filesize' ti col, \ 22 | '' u 'riscv32-O3-filesize' ti col, \ 23 | '' u 'riscv64-O3-filesize' ti col, \ 24 | '' u 'x86-32-O3-filesize' ti col, \ 25 | '' u 'x86-64-O3-filesize' ti col 26 | 27 | set output "svg/filesize-O2.svg" 28 | set title "rv8-bench (Filesize -O2)" 29 | plot 'data/benchmarks.dat' using \ 30 | 'arm-O2-filesize':xtic(1) ti 'arm32-O2-filesize', \ 31 | '' u 'aarch64-O2-filesize' ti col, \ 32 | '' u 'riscv32-O2-filesize' ti col, \ 33 | '' u 'riscv64-O2-filesize' ti col, \ 34 | '' u 'x86-32-O2-filesize' ti col, \ 35 | '' u 'x86-64-O2-filesize' ti col 36 | 37 | set output "svg/filesize-Os.svg" 38 | set title "rv8-bench (Filesize -Os)" 39 | plot 'data/benchmarks.dat' using \ 40 | 'arm-Os-filesize':xtic(1) ti 'arm32-Os-filesize', \ 41 | '' u 'aarch64-Os-filesize' ti col, \ 42 | '' u 'riscv32-Os-filesize' ti col, \ 43 | '' u 'riscv64-Os-filesize' ti col, \ 44 | '' u 'x86-32-Os-filesize' ti col, \ 45 | '' u 'x86-64-Os-filesize' ti col 46 | -------------------------------------------------------------------------------- /plots/fusion.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 2 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "Runtime (secs)" offset 2,0,0 13 | 14 | set grid xtics ytics 15 | 16 | set yrange [0:3.0] 17 | 18 | set output "svg/fusion-64.svg" 19 | set title "rv8-bench (Runtime fusion=on,off 64-bit)" 20 | plot 'data/benchmarks.dat' using \ 21 | 'rv-jit-nf-riscv64-O3-runtime':xtic(1) ti 'rv8-fuse-off-rv64-O3-runtime', \ 22 | '' u 'rv-jit-nf-riscv64-O2-runtime' ti 'rv8-fuse-off-rv64-O2-runtime', \ 23 | '' u 'rv-jit-nf-riscv64-Os-runtime' ti 'rv8-fuse-off-rv64-Os-runtime', \ 24 | '' u 'rv-jit-riscv64-O3-runtime' ti 'rv8-fuse-on-rv64-O3-runtime', \ 25 | '' u 'rv-jit-riscv64-O2-runtime' ti 'rv8-fuse-on-rv64-O2-runtime', \ 26 | '' u 'rv-jit-riscv64-Os-runtime' ti 'rv8-fuse-on-rv64-Os-runtime' 27 | 28 | set yrange [0:4] 29 | 30 | set output "svg/fusion-32.svg" 31 | set title "rv8-bench (Runtime fusion=on,off 32-bit)" 32 | plot 'data/benchmarks.dat' using \ 33 | 'rv-jit-nf-riscv32-O3-runtime':xtic(1) ti 'rv8-fuse-off-rv32-O3-runtime', \ 34 | '' u 'rv-jit-nf-riscv32-O2-runtime' ti 'rv8-fuse-off-rv32-O2-runtime', \ 35 | '' u 'rv-jit-nf-riscv32-Os-runtime' ti 'rv8-fuse-off-rv32-Os-runtime', \ 36 | '' u 'rv-jit-riscv32-O3-runtime' ti 'rv8-fuse-on-rv32-O3-runtime', \ 37 | '' u 'rv-jit-riscv32-O2-runtime' ti 'rv8-fuse-on-rv32-O2-runtime', \ 38 | '' u 'rv-jit-riscv32-Os-runtime' ti 'rv8-fuse-on-rv32-Os-runtime' 39 | -------------------------------------------------------------------------------- /plots/instructions.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set xtic scale 0 4 | set xtics nomirror rotate by -60 5 | set style line 101 lc rgb '#606060' lt 1 lw 1 6 | set border ls 101 7 | set ylabel "Frequency (count)" offset 2,0,0 8 | set grid xtics ytics 9 | 10 | set style line 1 lt 1 lw 2.5 pt 7 ps 0.5 lc '#9400D3' 11 | set style line 2 lt 1 lw 2.0 pt 7 ps 0.5 lc '#228B22' 12 | set style line 3 lt 1 lw 1.5 pt 7 ps 0.5 lc '#00BFFF' 13 | 14 | filenames = "aes bigint dhrystone miniz norx primes qsort sha512" 15 | 16 | do for [file in filenames] { 17 | infile1 = sprintf('stats/rv-hist-riscv64-%s-O3.dir/hist-inst.csv',file) 18 | infile2 = sprintf('stats/rv-hist-riscv64-%s-O2.dir/hist-inst.csv',file) 19 | infile3 = sprintf('stats/rv-hist-riscv64-%s-Os.dir/hist-inst.csv',file) 20 | outfile = sprintf('svg/instructions-%s-rv64-1.svg',file) 21 | set output outfile 22 | set title sprintf('rv8-bench (%s instructions riscv64 -O3, -O2, -Os)',file) 23 | plot infile1 using 2:xtic(1) title 'O3' with linespoints ls 1, \ 24 | infile2 using 2:xtic(1) title 'O2' with linespoints ls 2, \ 25 | infile3 using 2:xtic(1) title 'Os' with linespoints ls 3 26 | } 27 | 28 | set style data histogram 29 | set style fill solid border -1 30 | set boxwidth 0.9 31 | set style histogram rowstacked 32 | 33 | do for [file in filenames] { 34 | infile = sprintf('stats/rv-hist-inst-riscv64-%s.csv',file) 35 | outfile = sprintf('svg/instructions-%s-rv64-2.svg',file) 36 | set output outfile 37 | set title sprintf('rv8-bench (%s instructions riscv64 -O3, -O2, -Os)',file) 38 | plot infile using 2:xtic(1) ti col, '' using 3 ti col, '' using 4 ti col 39 | } 40 | -------------------------------------------------------------------------------- /plots/mips.gnuplot.template: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 1 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "MIPS (Millions of Instructions Per Second)" offset 2,0,0 13 | set yrange [0:15000] 14 | 15 | set grid xtics ytics 16 | 17 | set output "svg/mips-O3-64.svg" 18 | set title "rv8-bench (MIPS -O3 64-bit)" 19 | plot 'data/benchmarks_noheader.dat' using \ 20 | ((native_x86_64_O3_instret/native_x86_64_O3_runtime)/1000000):xtic(1) title 'native-x86-64-O3-mips', \ 21 | '' u ((rv_sim_riscv64_O3_instret/rv_jit_riscv64_O3_runtime)/1000000) title 'rv8-riscv64-O3-mips', \ 22 | '' u ((rv_sim_riscv64_O3_instret/qemu_riscv64_O3_runtime)/1000000) title 'qemu-riscv64-O3-mips' 23 | 24 | set output "svg/mips-O2-64.svg" 25 | set title "rv8-bench (MIPS -O2 64-bit)" 26 | plot 'data/benchmarks_noheader.dat' using \ 27 | ((native_x86_64_O2_instret/native_x86_64_O2_runtime)/1000000):xtic(1) title 'native-x86-64-O2-mips', \ 28 | '' u ((rv_sim_riscv64_O2_instret/rv_jit_riscv64_O2_runtime)/1000000) title 'rv8-riscv64-O2-mips', \ 29 | '' u ((rv_sim_riscv64_O2_instret/qemu_riscv64_O2_runtime)/1000000) title 'qemu-riscv64-O2-mips' 30 | 31 | set output "svg/mips-Os-64.svg" 32 | set title "rv8-bench (MIPS -Os 64-bit)" 33 | plot 'data/benchmarks_noheader.dat' using \ 34 | ((native_x86_64_Os_instret/native_x86_64_Os_runtime)/1000000):xtic(1) title 'native-x86-64-Os-mips', \ 35 | '' u ((rv_sim_riscv64_Os_instret/rv_jit_riscv64_Os_runtime)/1000000) title 'rv8-riscv64-Os-mips', \ 36 | '' u ((rv_sim_riscv64_Os_instret/qemu_riscv64_Os_runtime)/1000000) title 'qemu-riscv64-Os-mips' 37 | 38 | set output "svg/mips-O3-32.svg" 39 | set title "rv8-bench (MIPS -O3 32-bit)" 40 | plot 'data/benchmarks_noheader.dat' using \ 41 | ((native_x86_32_O3_instret/native_x86_32_O3_runtime)/1000000):xtic(1) title 'native-x86-32-O3-mips', \ 42 | '' u ((rv_sim_riscv32_O3_instret/rv_jit_riscv32_O3_runtime)/1000000) title 'rv8-riscv32-O3-mips', \ 43 | '' u ((rv_sim_riscv32_O3_instret/qemu_riscv32_O3_runtime)/1000000) title 'qemu-riscv32-O3-mips' 44 | 45 | set output "svg/mips-O2-32.svg" 46 | set title "rv8-bench (MIPS -O2 32-bit)" 47 | plot 'data/benchmarks_noheader.dat' using \ 48 | ((native_x86_32_O2_instret/native_x86_32_O2_runtime)/1000000):xtic(1) title 'native-x86-32-O2-mips', \ 49 | '' u ((rv_sim_riscv32_O2_instret/rv_jit_riscv32_O2_runtime)/1000000) title 'rv8-riscv32-O2-mips', \ 50 | '' u ((rv_sim_riscv32_O2_instret/qemu_riscv32_O2_runtime)/1000000) title 'qemu-riscv32-O2-mips' 51 | 52 | set output "svg/mips-Os-32.svg" 53 | set title "rv8-bench (MIPS -Os 32-bit)" 54 | plot 'data/benchmarks_noheader.dat' using \ 55 | ((native_x86_32_Os_instret/native_x86_32_Os_runtime)/1000000):xtic(1) title 'native-x86-32-Os-mips', \ 56 | '' u ((rv_sim_riscv32_Os_instret/rv_jit_riscv32_Os_runtime)/1000000) title 'rv8-riscv32-Os-mips', \ 57 | '' u ((rv_sim_riscv32_Os_instret/qemu_riscv32_Os_runtime)/1000000) title 'qemu-riscv32-Os-mips' 58 | -------------------------------------------------------------------------------- /plots/operations.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 2 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "Retired Operations" offset 2,0,0 13 | set yrange [0:1e10] 14 | 15 | set grid xtics ytics 16 | 17 | set output "svg/operations-O3-64.svg" 18 | set title "rv8-bench (Retired Operations -O3 64-bit)" 19 | plot 'data/benchmarks.dat' using \ 20 | 'native-x86-64-O3-instret':xtic(1) ti col, \ 21 | '' u 'native-x86-64-O3-uops-executed' ti col, \ 22 | '' u 'native-x86-64-O3-uops-issued' ti col, \ 23 | '' u 'native-x86-64-O3-uops-retired-all' ti col, \ 24 | '' u 'native-x86-64-O3-uops-retired-slots' ti col, \ 25 | '' u 'rv-sim-riscv64-O3-instret' ti col 26 | 27 | set output "svg/operations-O2-64.svg" 28 | set title "rv8-bench (Retired Operations -O2 64-bit)" 29 | plot 'data/benchmarks.dat' using \ 30 | 'native-x86-64-O2-instret':xtic(1) ti col, \ 31 | '' u 'native-x86-64-O2-uops-executed' ti col, \ 32 | '' u 'native-x86-64-O2-uops-issued' ti col, \ 33 | '' u 'native-x86-64-O2-uops-retired-all' ti col, \ 34 | '' u 'native-x86-64-O2-uops-retired-slots' ti col, \ 35 | '' u 'rv-sim-riscv64-O2-instret' ti col 36 | 37 | set output "svg/operations-Os-64.svg" 38 | set title "rv8-bench (Retired Operations -Os 64-bit)" 39 | plot 'data/benchmarks.dat' using \ 40 | 'native-x86-64-Os-instret':xtic(1) ti col, \ 41 | '' u 'native-x86-64-Os-uops-executed' ti col, \ 42 | '' u 'native-x86-64-Os-uops-issued' ti col, \ 43 | '' u 'native-x86-64-Os-uops-retired-all' ti col, \ 44 | '' u 'native-x86-64-Os-uops-retired-slots' ti col, \ 45 | '' u 'rv-sim-riscv64-Os-instret' ti col 46 | 47 | set yrange [0:2e10] 48 | 49 | set output "svg/operations-O3-32.svg" 50 | set title "rv8-bench (Retired Operations -O3 32-bit)" 51 | plot 'data/benchmarks.dat' using \ 52 | 'native-x86-32-O3-instret':xtic(1) ti col, \ 53 | '' u 'native-x86-32-O3-uops-executed' ti col, \ 54 | '' u 'native-x86-32-O3-uops-issued' ti col, \ 55 | '' u 'native-x86-32-O3-uops-retired-all' ti col, \ 56 | '' u 'native-x86-32-O3-uops-retired-slots' ti col, \ 57 | '' u 'rv-sim-riscv32-O3-instret' ti col 58 | 59 | set output "svg/operations-O2-32.svg" 60 | set title "rv8-bench (Retired Operations -O2 32-bit)" 61 | plot 'data/benchmarks.dat' using \ 62 | 'native-x86-32-O2-instret':xtic(1) ti col, \ 63 | '' u 'native-x86-32-O2-uops-executed' ti col, \ 64 | '' u 'native-x86-32-O2-uops-issued' ti col, \ 65 | '' u 'native-x86-32-O2-uops-retired-all' ti col, \ 66 | '' u 'native-x86-32-O2-uops-retired-slots' ti col, \ 67 | '' u 'rv-sim-riscv32-O2-instret' ti col 68 | 69 | set output "svg/operations-Os-32.svg" 70 | set title "rv8-bench (Retired Operations -Os 32-bit)" 71 | plot 'data/benchmarks.dat' using \ 72 | 'native-x86-32-Os-instret':xtic(1) ti col, \ 73 | '' u 'native-x86-32-Os-uops-executed' ti col, \ 74 | '' u 'native-x86-32-Os-uops-issued' ti col, \ 75 | '' u 'native-x86-32-Os-uops-retired-all' ti col, \ 76 | '' u 'native-x86-32-Os-uops-retired-slots' ti col, \ 77 | '' u 'rv-sim-riscv32-Os-instret' ti col 78 | -------------------------------------------------------------------------------- /plots/optimisation.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 2 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "Runtime (secs)" offset 2,0,0 13 | 14 | set grid xtics ytics 15 | 16 | set yrange [0:2.0] 17 | 18 | set output "svg/optimisation-x86-64.svg" 19 | set title "rv8-bench (Optimisation native x86-64 -O3, -O2 and -Os)" 20 | plot 'data/benchmarks.dat' using \ 21 | 'native-x86-64-O3-runtime':xtic(1) ti 'native-x86-64-O3-runtime', \ 22 | '' u 'native-x86-64-O2-runtime' ti 'native-x86-64-O2-runtime', \ 23 | '' u 'native-x86-64-Os-runtime' ti 'native-x86-64-Os-runtime' 24 | 25 | set output "svg/optimisation-x86-32.svg" 26 | set title "rv8-bench (Optimisation native x86-32 -O3, -O2 and -Os)" 27 | plot 'data/benchmarks.dat' using \ 28 | 'native-x86-32-O3-runtime':xtic(1) ti 'native-x86-32-O3-runtime', \ 29 | '' u 'native-x86-32-O2-runtime' ti 'native-x86-32-O2-runtime', \ 30 | '' u 'native-x86-32-Os-runtime' ti 'native-x86-32-Os-runtime' 31 | 32 | set yrange [0:4] 33 | 34 | set output "svg/optimisation-riscv64.svg" 35 | set title "rv8-bench (Optimisation rv8 riscv64 -O3, -O2 and -Os)" 36 | plot 'data/benchmarks.dat' using \ 37 | 'rv-jit-riscv64-O3-runtime':xtic(1) ti 'rv8-riscv64-O3-runtime', \ 38 | '' u 'rv-jit-riscv64-O2-runtime' ti 'rv8-riscv64-O2-runtime', \ 39 | '' u 'rv-jit-riscv64-Os-runtime' ti 'rv8-riscv64-Os-runtime' 40 | 41 | set output "svg/optimisation-riscv32.svg" 42 | set title "rv8-bench (Optimisation rv8 riscv32 -O3, -O2 and -Os)" 43 | plot 'data/benchmarks.dat' using \ 44 | 'rv-jit-riscv32-O3-runtime':xtic(1) ti 'rv8-riscv32-O3-runtime', \ 45 | '' u 'rv-jit-riscv32-O2-runtime' ti 'rv8-riscv32-O2-runtime', \ 46 | '' u 'rv-jit-riscv32-Os-runtime' ti 'rv8-riscv32-Os-runtime' 47 | -------------------------------------------------------------------------------- /plots/registers.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set xtic scale 0 4 | set xtics nomirror rotate by -60 5 | set style line 101 lc rgb '#606060' lt 1 lw 1 6 | set border ls 101 7 | set ylabel "Frequency (count)" offset 2,0,0 8 | set grid xtics ytics 9 | 10 | set style line 1 lt 1 lw 2.5 pt 7 ps 0.5 lc '#9400D3' 11 | set style line 2 lt 1 lw 2.0 pt 7 ps 0.5 lc '#228B22' 12 | set style line 3 lt 1 lw 1.5 pt 7 ps 0.5 lc '#00BFFF' 13 | 14 | filenames = "aes bigint dhrystone miniz norx primes qsort sha512" 15 | 16 | do for [file in filenames] { 17 | infile1 = sprintf('stats/rv-hist-riscv64-%s-O3.dir/hist-reg.csv',file) 18 | infile2 = sprintf('stats/rv-hist-riscv64-%s-O2.dir/hist-reg.csv',file) 19 | infile3 = sprintf('stats/rv-hist-riscv64-%s-Os.dir/hist-reg.csv',file) 20 | outfile = sprintf('svg/registers-%s-rv64-1.svg',file) 21 | set output outfile 22 | set title sprintf('rv8-bench (%s registers riscv64 -O3, -O2, -Os)',file) 23 | plot infile1 using 2:xtic(1) title 'O3' with linespoints ls 1, \ 24 | infile2 using 2:xtic(1) title 'O2' with linespoints ls 2, \ 25 | infile3 using 2:xtic(1) title 'Os' with linespoints ls 3 26 | } 27 | 28 | set style data histogram 29 | set style fill solid border -1 30 | set boxwidth 0.9 31 | set style histogram rowstacked 32 | 33 | do for [file in filenames] { 34 | infile = sprintf('stats/rv-hist-reg-riscv64-%s.csv',file) 35 | outfile = sprintf('svg/registers-%s-rv64-2.svg',file) 36 | set output outfile 37 | set title sprintf('rv8-bench (%s registers riscv64 -O3, -O2, -Os)',file) 38 | plot infile using 2:xtic(1) ti col, '' using 3 ti col, '' using 4 ti col 39 | } 40 | -------------------------------------------------------------------------------- /plots/runtime.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg size 640,480 2 | set auto x 3 | set style data histogram 4 | set style histogram cluster gap 2 5 | set style fill solid border -1 6 | set boxwidth 0.9 7 | set xtic scale 0 8 | 9 | set style line 101 lc rgb '#606060' lt 1 lw 1 10 | set border ls 101 11 | 12 | set ylabel "Runtime (secs)" offset 2,0,0 13 | set yrange [0:10] 14 | 15 | set grid xtics ytics 16 | 17 | set output "svg/runtime-O3-64.svg" 18 | set title "rv8-bench (Runtime -O3 64-bit)" 19 | plot 'data/benchmarks.dat' using \ 20 | 'native-x86-64-O3-runtime':xtic(1) ti col, \ 21 | '' u 'rv-jit-riscv64-O3-runtime' ti 'rv8-riscv64-O3-runtime', \ 22 | '' u 'qemu-riscv64-O3-runtime' ti col, \ 23 | '' u 'qemu-aarch64-O3-runtime' ti col 24 | 25 | set output "svg/runtime-O2-64.svg" 26 | set title "rv8-bench (Runtime -O2 64-bit)" 27 | plot 'data/benchmarks.dat' using \ 28 | 'native-x86-64-O2-runtime':xtic(1) ti col, \ 29 | '' u 'rv-jit-riscv64-O2-runtime' ti 'rv8-riscv64-O2-runtime', \ 30 | '' u 'qemu-riscv64-O2-runtime' ti col, \ 31 | '' u 'qemu-aarch64-O2-runtime' ti col 32 | 33 | set output "svg/runtime-Os-64.svg" 34 | set title "rv8-bench (Runtime -Os 64-bit)" 35 | plot 'data/benchmarks.dat' using \ 36 | 'native-x86-64-Os-runtime':xtic(1) ti col, \ 37 | '' u 'rv-jit-riscv64-Os-runtime' ti 'rv8-riscv64-Os-runtime', \ 38 | '' u 'qemu-riscv64-Os-runtime' ti col, \ 39 | '' u 'qemu-aarch64-Os-runtime' ti col 40 | 41 | set output "svg/runtime-O3-32.svg" 42 | set title "rv8-bench (Runtime -O3 32-bit)" 43 | plot 'data/benchmarks.dat' using \ 44 | 'native-x86-32-O3-runtime':xtic(1) ti col, \ 45 | '' u 'rv-jit-riscv32-O3-runtime' ti 'rv8-riscv32-O3-runtime', \ 46 | '' u 'qemu-riscv32-O3-runtime' ti col, \ 47 | '' u 'qemu-arm-O3-runtime' ti 'qemu-arm32-O3-runtime' 48 | 49 | set output "svg/runtime-O2-32.svg" 50 | set title "rv8-bench (Runtime -O2 32-bit)" 51 | plot 'data/benchmarks.dat' using \ 52 | 'native-x86-32-O2-runtime':xtic(1) ti col, \ 53 | '' u 'rv-jit-riscv32-O2-runtime' ti 'rv8-riscv32-O2-runtime', \ 54 | '' u 'qemu-riscv32-O2-runtime' ti col, \ 55 | '' u 'qemu-arm-O2-runtime' ti 'qemu-arm32-O2-runtime' 56 | 57 | set output "svg/runtime-Os-32.svg" 58 | set title "rv8-bench (Runtime -Os 32-bit)" 59 | plot 'data/benchmarks.dat' using \ 60 | 'native-x86-32-Os-runtime':xtic(1) ti col, \ 61 | '' u 'rv-jit-riscv32-Os-runtime' ti 'rv8-riscv32-Os-runtime', \ 62 | '' u 'qemu-riscv32-Os-runtime' ti col, \ 63 | '' u 'qemu-arm-Os-runtime' ti 'qemu-arm32-Os-runtime' 64 | -------------------------------------------------------------------------------- /rv8-bench.js: -------------------------------------------------------------------------------- 1 | /* 2 | * rv8-bench 3 | */ 4 | 5 | var fs = require("fs"); 6 | var util = require("util"); 7 | var child_process = require('child_process'); 8 | 9 | var benchmarks = [ 'aes', 'bigint', 'dhrystone', 'miniz', 'norx', 'primes', 'qsort', 'sha512' ]; 10 | 11 | var targets = [ 'rv-hist-riscv32', 12 | 'rv-hist-riscv64', 13 | 'rv-sim-riscv32', 14 | 'rv-sim-riscv64', 15 | 'rv-jit-riscv32', 16 | 'rv-jit-riscv64', 17 | 'rv-jit-mem-riscv32', 18 | 'rv-jit-mem-riscv64', 19 | 'qemu-riscv32', 20 | 'qemu-riscv64', 21 | 'qemu-arm', 22 | 'qemu-aarch64', 23 | 'native-i386', 24 | 'native-x86_64', 25 | 'size-riscv32', 26 | 'size-riscv64', 27 | 'size-arm', 28 | 'size-aarch64', 29 | 'size-i386', 30 | 'size-x86_64' ]; 31 | 32 | var opts = [ 'O3', 'O2', 'Os' ]; 33 | 34 | var fmt_time = [ ['benchmark', 15], ['system', 15], ['opt', 3], ['runtime', 8] ] 35 | 36 | var fmt_inst = [ ['benchmark', 15], ['system', 15], ['opt', 3], ['runtime', 8], ['instret', 8] ] 37 | 38 | var fmt_size = [ ['benchmark', 15], ['system', 15], ['opt', 3], ['filesize', 8] ] 39 | 40 | var stats_dir = "./stats" 41 | var data_dir = "./data" 42 | 43 | var max_sim_runs = 5; 44 | 45 | var row_count = 0; 46 | 47 | var last_fmt = null; 48 | 49 | 50 | function padl(n, width, z) { 51 | z = z || ' '; 52 | n = n + ''; 53 | return (n.length >= width ? '' : new Array(width - n.length + 1).join(z)) + n; 54 | } 55 | 56 | function padr(n, width, z) { 57 | z = z || ' '; 58 | n = n + ''; 59 | return n + (n.length >= width ? '' : new Array(width - n.length + 1).join(z)); 60 | } 61 | 62 | function repeat(width, z) { 63 | z = z || ' '; 64 | return new Array(width).join(z); 65 | } 66 | 67 | function benchmark_cmd(bench, cmd, args) 68 | { 69 | var start = process.hrtime(); 70 | var obj = child_process.spawnSync(cmd, args) 71 | var elapsed = process.hrtime(start); 72 | var elapsed_secs = elapsed[0] + elapsed[1] / 1000000000.0 73 | var data = {}; 74 | if (obj.status == 0) { 75 | var dmips = obj.stdout.toString().match(/\s([0-9]+)\sDMIPS/); 76 | //dhrystone mips 77 | if (dmips) { 78 | data['dmips'] = dmips[1]; 79 | } 80 | // riscv instructions retired 81 | var rv_inst = obj.stdout.toString().match(/instret\s+:\s+([0-9]+)/m); 82 | if (rv_inst) { 83 | data['instret'] = rv_inst[1]; 84 | } 85 | // x86 cycles 86 | var x86_cycles = obj.stderr.toString().match(/([0-9,]+)\s+cycles/m); 87 | if (x86_cycles) { 88 | data['cycles'] = x86_cycles[1].split(',').join(''); 89 | } 90 | // x86 instructions retired 91 | var x86_inst = obj.stderr.toString().match(/([0-9,]+)\s+instructions/m); 92 | if (x86_inst) { 93 | data['instret'] = x86_inst[1].split(',').join(''); 94 | } 95 | // unfused domain, uops sent to execution port 96 | var x86_uops_executed = obj.stderr.toString().match(/([0-9,]+)\s+r1b1/m); 97 | if (x86_uops_executed) { 98 | data['x86_uops_executed'] = x86_uops_executed[1].split(',').join(''); 99 | } 100 | // fused domain, uops issued 101 | var x86_uops_issued = obj.stderr.toString().match(/([0-9,]+)\s+r10e/m); 102 | if (x86_uops_issued) { 103 | data['x86_uops_issued'] = x86_uops_issued[1].split(',').join(''); 104 | } 105 | // fused domain, uop retirement slots used 106 | var x86_uops_retired_slots = obj.stderr.toString().match(/([0-9,]+)\s+r2c2/m); 107 | if (x86_uops_retired_slots) { 108 | data['x86_uops_retired_slots'] = x86_uops_retired_slots[1].split(',').join(''); 109 | } 110 | // unfused domain, uops retired all 111 | var x86_uops_retired_all = obj.stderr.toString().match(/([0-9,]+)\s+r1c2/m); 112 | if (x86_uops_retired_all) { 113 | data['x86_uops_retired_all'] = x86_uops_retired_all[1].split(',').join(''); 114 | } 115 | data['runtime'] = Math.round(elapsed_secs* 1000) / 1000; 116 | } else { 117 | data['error'] = true; 118 | } 119 | return data; 120 | } 121 | 122 | function benchmark_format_headings(fmt) 123 | { 124 | var output = ""; 125 | for (var i = 0; i < fmt.length; i++) { 126 | if (i != 0) output += ' | ' ; 127 | output += padr(fmt[i][0], fmt[i][1]); 128 | } 129 | return output; 130 | } 131 | 132 | function benchmark_format_rule(fmt) 133 | { 134 | var output = ""; 135 | for (var i = 0; i < fmt.length; i++) { 136 | if (i != 0) output += ' | ' ; 137 | output += padr(repeat(fmt[i][1], '-'), fmt[i][1]); 138 | } 139 | return output; 140 | } 141 | 142 | function benchmark_format_row(fmt, data) 143 | { 144 | var output = ""; 145 | for (var i = 0; i < fmt.length; i++) { 146 | if (i != 0) output += ' | ' ; 147 | output += padr(data[fmt[i][0]], fmt[i][1]); 148 | } 149 | return output; 150 | } 151 | 152 | function benchmark_print_heading(fmt) 153 | { 154 | console.log(benchmark_format_headings(fmt)); 155 | console.log(benchmark_format_rule(fmt)); 156 | } 157 | 158 | function benchmark_print_row(fmt, data) 159 | { 160 | if (fmt != last_fmt || row_count % 20 == 0) { 161 | console.log(''); 162 | benchmark_print_heading(fmt); 163 | } 164 | console.log(benchmark_format_row(fmt, data)); 165 | row_count++; 166 | last_fmt = fmt; 167 | } 168 | 169 | function benchmark_add_row(benchmark, system, opt, data) 170 | { 171 | data['benchmark'] = benchmark; 172 | data['system'] = system; 173 | data['opt'] = opt; 174 | if (!fs.existsSync(stats_dir)){ 175 | fs.mkdirSync(stats_dir); 176 | } 177 | var path = stats_dir + '/' + system + '.json'; 178 | var contents, arr; 179 | try { 180 | contents = fs.readFileSync(path); 181 | } catch (error) {} 182 | if (contents) { 183 | arr = JSON.parse(contents); 184 | arr.push(data); 185 | } else { 186 | arr = [ data ]; 187 | } 188 | fs.writeFileSync(path, JSON.stringify(arr)); 189 | } 190 | 191 | function benchmark_size(benchmark, target, opt, runs) 192 | { 193 | var system = 'size-' + target; 194 | var binary = 'bin/' + target + '/' + benchmark + "." + opt + ".stripped"; 195 | var stats = fs.statSync(binary); 196 | var data = { 197 | runtime: 0, 198 | filesize: stats.size 199 | }; 200 | benchmark_add_row(benchmark, system, opt, data); 201 | benchmark_print_row(fmt_size, data); 202 | } 203 | 204 | function benchmark_hist(benchmark, target, opt, runs) 205 | { 206 | var system = 'rv-hist-' + target; 207 | var hist_dir = stats_dir + '/' + system + '-' + benchmark + '-' + opt + '.dir'; 208 | if (!fs.existsSync(stats_dir)){ 209 | fs.mkdirSync(stats_dir); 210 | } 211 | if (!fs.existsSync(hist_dir)){ 212 | fs.mkdirSync(hist_dir); 213 | } 214 | var data = benchmark_cmd(benchmark, 'rv-sim', 215 | ['-I', '-R', '-D', hist_dir, '-E', 'bin/' + target + '/' + benchmark + "." + opt]); 216 | benchmark_add_row(benchmark, system, opt, data); 217 | benchmark_print_row(fmt_inst, data); 218 | } 219 | 220 | function benchmark_sim(benchmark, target, opt, runs) 221 | { 222 | var system = 'rv-sim-' + target; 223 | for (var i = 0; i < Math.min(runs, max_sim_runs); i++) { 224 | var data = benchmark_cmd(benchmark, 'rv-sim', 225 | ['-E', 'bin/' + target + '/' + benchmark + "." + opt]); 226 | benchmark_add_row(benchmark, system, opt, data); 227 | benchmark_print_row(fmt_inst, data); 228 | } 229 | } 230 | 231 | function benchmark_jit(benchmark, target, opt, runs) 232 | { 233 | var system = 'rv-jit-' + target; 234 | for (var i = 0; i < runs; i++) { 235 | var data = benchmark_cmd(benchmark, 'rv-jit', 236 | ['bin/' + target + '/' + benchmark + "." + opt]); 237 | benchmark_add_row(benchmark, system, opt, data); 238 | benchmark_print_row(fmt_time, data); 239 | } 240 | } 241 | 242 | function benchmark_jit_ir(benchmark, target, opt, runs) 243 | { 244 | var system = 'rv-jit-ir-' + target; 245 | for (var i = 0; i < runs; i++) { 246 | var data = benchmark_cmd(benchmark, 'rv-jit', 247 | ['-i', '-N', '-E', 'bin/' + target + '/' + benchmark + "." + opt]); 248 | benchmark_add_row(benchmark, system, opt, data); 249 | benchmark_print_row(fmt_inst, data); 250 | } 251 | } 252 | 253 | function benchmark_jit_nf(benchmark, target, opt, runs) 254 | { 255 | var system = 'rv-jit-nf-' + target; 256 | for (var i = 0; i < runs; i++) { 257 | var data = benchmark_cmd(benchmark, 'rv-jit', 258 | ['-N', 'bin/' + target + '/' + benchmark + "." + opt]); 259 | benchmark_add_row(benchmark, system, opt, data); 260 | benchmark_print_row(fmt_time, data); 261 | } 262 | } 263 | 264 | function benchmark_jit_mem(benchmark, target, opt, runs) 265 | { 266 | var system = 'rv-jit-mem-' + target; 267 | for (var i = 0; i < runs; i++) { 268 | var data = benchmark_cmd(benchmark, 'rv-jit', 269 | ['-M', 'bin/' + target + '/' + benchmark + "." + opt]); 270 | benchmark_add_row(benchmark, system, opt, data); 271 | benchmark_print_row(fmt_time, data); 272 | } 273 | } 274 | 275 | function benchmark_qemu(benchmark, target, opt, runs) 276 | { 277 | var system = 'qemu-' + target; 278 | for (var i = 0; i < runs; i++) { 279 | var data = benchmark_cmd(benchmark, 'qemu-' + target, 280 | ['bin/' + target + '/' + benchmark + "." + opt]); 281 | benchmark_add_row(benchmark, system, opt, data); 282 | benchmark_print_row(fmt_time, data); 283 | } 284 | } 285 | 286 | function benchmark_native(benchmark, target, opt, runs) 287 | { 288 | var system = 'native-' + target; 289 | for (var i = 0; i < runs; i++) { 290 | var data = benchmark_cmd(benchmark, 'perf', 291 | ['stat', '-e', 'cycles,instructions,r1b1,r10e,r2c2,r1c2', 292 | 'bin/' + target + '/' + benchmark + "." + opt]); 293 | benchmark_add_row(benchmark, system, opt, data); 294 | benchmark_print_row(fmt_inst, data); 295 | } 296 | } 297 | 298 | function benchmark_run(benchmark, target, opt, runs) 299 | { 300 | switch (target) { 301 | case 'rv-hist-riscv32': benchmark_hist(benchmark, 'riscv32', opt, runs); break; 302 | case 'rv-hist-riscv64': benchmark_hist(benchmark, 'riscv64', opt, runs); break; 303 | case 'rv-sim-riscv32': benchmark_sim(benchmark, 'riscv32', opt, runs); break; 304 | case 'rv-sim-riscv64': benchmark_sim(benchmark, 'riscv64', opt, runs); break; 305 | case 'rv-jit-riscv32': benchmark_jit(benchmark, 'riscv32', opt, runs); break; 306 | case 'rv-jit-riscv64': benchmark_jit(benchmark, 'riscv64', opt, runs); break; 307 | case 'rv-jit-ir-riscv32': benchmark_jit_ir(benchmark, 'riscv32', opt, runs); break; 308 | case 'rv-jit-ir-riscv64': benchmark_jit_ir(benchmark, 'riscv64', opt, runs); break; 309 | case 'rv-jit-nf-riscv32': benchmark_jit_nf(benchmark, 'riscv32', opt, runs); break; 310 | case 'rv-jit-nf-riscv64': benchmark_jit_nf(benchmark, 'riscv64', opt, runs); break; 311 | case 'rv-jit-mem-riscv32': benchmark_jit_mem(benchmark, 'riscv32', opt, runs); break; 312 | case 'rv-jit-mem-riscv64': benchmark_jit_mem(benchmark, 'riscv64', opt, runs); break; 313 | case 'qemu-riscv32': benchmark_qemu(benchmark, 'riscv32', opt, runs); break; 314 | case 'qemu-riscv64': benchmark_qemu(benchmark, 'riscv64', opt, runs); break; 315 | case 'qemu-arm': benchmark_qemu(benchmark, 'arm', opt, runs); break; 316 | case 'qemu-aarch64': benchmark_qemu(benchmark, 'aarch64', opt, runs); break; 317 | case 'native-i386': benchmark_native(benchmark, 'i386', opt, runs); break; 318 | case 'native-x86_64': benchmark_native(benchmark, 'x86_64', opt, runs); break; 319 | case 'size-riscv32': benchmark_size(benchmark, 'riscv32', opt, runs); break; 320 | case 'size-riscv64': benchmark_size(benchmark, 'riscv64', opt, runs); break; 321 | case 'size-arm': benchmark_size(benchmark, 'arm', opt, runs); break; 322 | case 'size-aarch64': benchmark_size(benchmark, 'aarch64', opt, runs); break; 323 | case 'size-i386': benchmark_size(benchmark, 'i386', opt, runs); break; 324 | case 'size-x86_64': benchmark_size(benchmark, 'x86_64', opt, runs); break; 325 | } 326 | } 327 | 328 | function benchmark_peel_opt(benchmark, target, opt, runs) 329 | { 330 | if (opt == 'all') { 331 | opts.forEach(function(opt) { 332 | benchmark_run(benchmark, target, opt, runs); 333 | }); 334 | } else { 335 | benchmark_run(benchmark, target, opt, runs); 336 | } 337 | } 338 | function benchmark_peel_target(benchmark, target, opt, runs) 339 | { 340 | if (target == 'all') { 341 | targets.forEach(function(target) { 342 | benchmark_peel_opt(benchmark, target, opt, runs); 343 | }); 344 | } else { 345 | benchmark_peel_opt(benchmark, target, opt, runs); 346 | } 347 | } 348 | 349 | function benchmark_peel_benchmark(benchmark, target, opt, runs) 350 | { 351 | if (benchmark == 'all') { 352 | benchmarks.forEach(function(benchmark) { 353 | benchmark_peel_target(benchmark, target, opt, runs); 354 | }); 355 | } else { 356 | benchmark_peel_target(benchmark, target, opt, runs); 357 | } 358 | } 359 | 360 | function parse_table_line(line) 361 | { 362 | var state = 0; 363 | var comps = []; 364 | var buf = ""; 365 | for (var i = 0; i < line.length; i++) { 366 | var c = line.charAt(i); 367 | switch (state) { 368 | case 0: /* whitespace */ 369 | if (c == ' ' || c == '\t') { 370 | // skip 371 | } else if (c == '"') { 372 | state = 1; 373 | } else { 374 | state = 2; 375 | buf += c; 376 | } 377 | break; 378 | case 1: /* quoted string */ 379 | if (c == '"') { 380 | comps.push(buf); 381 | buf = ''; 382 | state = 0; 383 | } else { 384 | buf += c; 385 | } 386 | break; 387 | case 2: /* unquoted string */ 388 | if (c == ' ') { 389 | comps.push(buf); 390 | buf = ''; 391 | state = 0; 392 | } else { 393 | buf += c; 394 | } 395 | break; 396 | } 397 | } 398 | if (buf.length > 0) { 399 | comps.push(buf); 400 | } 401 | return comps; 402 | } 403 | 404 | function roundWithTrailingZeros(n, d) 405 | { 406 | b = Math.pow(10, d); 407 | n = Math.round(n * b) / b; 408 | var s = n.toString(); 409 | if (s.indexOf('.') == -1) s += '.'; 410 | while (s.length < (s.indexOf('.') + d + 1)) s += '0'; 411 | return s; 412 | } 413 | 414 | function benchmark_gather_all() 415 | { 416 | var keys = {}; 417 | var gather = {}; 418 | benchmarks.forEach(function(benchmark) { 419 | gather[benchmark] = {}; 420 | }); 421 | targets.forEach(function(target) { 422 | var path = stats_dir + '/' + target + '.json'; 423 | var contents = fs.readFileSync(path); 424 | var arr = JSON.parse(contents); 425 | opts.forEach(function(opt) { 426 | benchmarks.forEach(function(benchmark) { 427 | var best_runtime = -1, best_index = -1; 428 | for (var i = 0; i < arr.length; i++) { 429 | var data = arr[i]; 430 | if (data['opt'] != opt) continue; 431 | if (data['benchmark'] != benchmark) continue; 432 | if (best_index == -1 || data['runtime'] < best_runtime) { 433 | best_index = i; 434 | best_runtime = data['runtime']; 435 | } 436 | } 437 | if (best_index == -1) return; 438 | var data = arr[best_index]; 439 | for (var key in data) { 440 | if (!data.hasOwnProperty(key)) continue; 441 | if (key == 'benchmark') continue; 442 | if (key == 'opt') continue; 443 | if (key == 'system') continue; 444 | if (key == 'dmips') continue; 445 | if (key == 'runtime' && target.startsWith('size')) continue; 446 | if (target.startsWith('rv-hist')) continue; 447 | var new_key = target + '-' + opt + '-' + key; 448 | new_key = new_key.replace(/_/g, '-'); 449 | new_key = new_key.replace(/^size-/, ''); 450 | new_key = new_key.replace(/x86-uops/, 'uops'); 451 | new_key = new_key.replace(/i386/, 'x86-32'); 452 | gather[benchmark][new_key] = data[key]; 453 | keys[new_key] = true; 454 | } 455 | }); 456 | }); 457 | }); 458 | 459 | // sort keys 460 | var keylist = []; 461 | for (var key in keys) { 462 | if (keys.hasOwnProperty(key)) { 463 | keylist.push(key); 464 | } 465 | } 466 | keylist.sort(); 467 | 468 | // write keylist 469 | fs.writeFileSync(data_dir + '/keylist.txt', '#key\n' + keylist.join("\n")); 470 | 471 | // write format line 472 | var arr = []; 473 | var format = 'benchmark'; 474 | for (var i = 0; i < keylist.length; i++) { 475 | format += '\t' + keylist[i]; 476 | } 477 | arr.push(format); 478 | 479 | // write data, one row per benchmark 480 | benchmarks.forEach(function(benchmark) { 481 | var data = gather[benchmark]; 482 | var row = benchmark; 483 | for (var i = 0; i < keylist.length; i++) { 484 | row += '\t' + gather[benchmark][keylist[i]]; 485 | } 486 | arr.push(row); 487 | }); 488 | arr.push(''); 489 | 490 | // write to file 491 | if (!fs.existsSync(data_dir)) { 492 | fs.mkdirSync(data_dir); 493 | } 494 | fs.writeFileSync(data_dir + '/benchmarks.dat', arr.join("\n")); 495 | 496 | // read tables 497 | var tables = []; 498 | var table = null, column = null; 499 | var content = fs.readFileSync(data_dir + '/tables.txt'); 500 | var tmeta_arr = content.toString().split('\n'); 501 | for (var i = 0; i < tmeta_arr.length; i++) { 502 | var comps = parse_table_line(tmeta_arr[i]); 503 | if (comps.length == 2) { 504 | table = { 505 | name: comps[0], 506 | title: comps[1], 507 | columns: [] 508 | }; 509 | tables.push(table); 510 | } else if (comps.length == 3) { 511 | column = { 512 | title: comps[0], 513 | fmt: comps[1], 514 | name: comps[2] 515 | }; 516 | table.columns.push(column); 517 | } 518 | } 519 | 520 | // create tables 521 | var table_markup = {}; 522 | for (var i = 0; i < tables.length; i++) { 523 | var table = tables[i]; 524 | var title = table.title; 525 | var columns = table.columns; 526 | var head1 = '', head2 = ''; 527 | for (var j = 0; j < columns.length; j++) { 528 | var column = columns[j]; 529 | if (j != 0) { head1 += ' | '; head2 += ' | '; } 530 | head1 += column.title; 531 | if (j != 0) { head2 += '--:'; } else { head2 += ':--'; } 532 | } 533 | var output = []; 534 | table_markup[table.name] = output; 535 | output.push('**' + table.title + '**') 536 | output.push(''); 537 | output.push(head1); 538 | output.push(head2); 539 | var total_sum = {}, total_geo = {}; 540 | for (var j = 0; j < columns.length; j++) { 541 | var column = columns[j]; 542 | var name = column.name; 543 | total_sum[name] = 0; 544 | total_geo[name] = 1; 545 | } 546 | benchmarks.forEach(function(benchmark) { 547 | var data = gather[benchmark]; 548 | var row = ''; 549 | for (var j = 0; j < columns.length; j++) { 550 | var column = columns[j]; 551 | var fmt = column.fmt; 552 | var name = column.name; 553 | var ratio = name.split('/'); 554 | var field_data; 555 | if (name > 0) { 556 | field_data = name; 557 | } else if (name == 'program') { 558 | field_data = benchmark; 559 | } else if (ratio.length > 1) { 560 | if (table.name.indexOf('mips') == 0) { 561 | field_data = Math.round((data[ratio[0]] / data[ratio[1]]) / 1000000); 562 | } else { 563 | field_data = roundWithTrailingZeros(data[ratio[0]] / data[ratio[1]], 2); 564 | } 565 | } else { 566 | if (table.name.indexOf('operations') == 0) { 567 | field_data = Math.round(parseInt(data[name]) / 1000000); 568 | } else if (table.name.indexOf('filesize') == 0) { 569 | field_data = data[name]; 570 | } else { 571 | field_data = roundWithTrailingZeros(data[name], 2); 572 | } 573 | } 574 | if (field_data > 0) { 575 | total_sum[name] = total_sum[name] + parseFloat(field_data); 576 | total_geo[name] = total_geo[name] * parseFloat(field_data); 577 | } 578 | var field_text = util.format(fmt, field_data); 579 | if (j != 0) { row += ' | '; } 580 | row += field_text; 581 | } 582 | output.push(row); 583 | }); 584 | var row = ''; 585 | var type = 'Sum'; 586 | if (table.name.indexOf('ratio') == 0) { type = 'Geomean'; } 587 | else if (table.name.indexOf('mips') == 0) { type = 'Geomean'; } 588 | else if (table.name.indexOf('fusion') == 0) { type = 'Geomean'; } 589 | else if (table.name.indexOf('opt') == 0) { type = 'Geomean'; } 590 | for (var j = 0; j < columns.length; j++) { 591 | var column = columns[j]; 592 | var fmt = column.fmt; 593 | var name = column.name; 594 | var field_data; 595 | if (name == 'program') { 596 | field_data = '_(' + type + ')_'; 597 | } else if (type == 'Geomean') { 598 | if (table.name.indexOf('mips') == 0 || table.name.indexOf('operations') == 0) { 599 | field_data = Math.round(Math.pow(parseFloat(total_geo[name]), 1.0/benchmarks.length)); 600 | } else { 601 | field_data = roundWithTrailingZeros(Math.pow(parseFloat(total_geo[name]), 1.0/benchmarks.length), 2); 602 | } 603 | } else if (type == 'Sum') { 604 | field_data = roundWithTrailingZeros(total_sum[name], 2); 605 | } 606 | var field_text = util.format(fmt, field_data); 607 | if (j != 0) { row += ' | '; } 608 | row += field_text; 609 | } 610 | output.push(row); 611 | output.push(); 612 | } 613 | 614 | // substitute template 615 | var content = fs.readFileSync(data_dir + '/template.md'); 616 | var template_arr = content.toString().split('\n'); 617 | var output = []; 618 | for (var i = 0; i < template_arr.length; i++) { 619 | var line = template_arr[i]; 620 | if (line.indexOf('TABLE ') == 0) { 621 | var table_name = line.substr(6); 622 | var table_rows = table_markup[table_name]; 623 | for (var j = 0; j < table_rows.length; j++) { 624 | output.push(table_rows[j]); 625 | } 626 | } else { 627 | output.push(line); 628 | } 629 | } 630 | fs.writeFileSync(data_dir + '/bench.md', output.join('\n')); 631 | } 632 | 633 | function benchmark_print_all() 634 | { 635 | targets.forEach(function(target) { 636 | var path = stats_dir + '/' + target + '.json'; 637 | var contents = fs.readFileSync(path); 638 | var arr = JSON.parse(contents); 639 | opts.forEach(function(opt) { 640 | benchmarks.forEach(function(benchmark) { 641 | var best_runtime = -1, best_index = -1; 642 | for (var i = 0; i < arr.length; i++) { 643 | var data = arr[i]; 644 | if (data['opt'] != opt) continue; 645 | if (data['benchmark'] != benchmark) continue; 646 | if (best_runtime == -1 || data['runtime'] < best_runtime) { 647 | best_index = i; 648 | best_runtime = data['runtime']; 649 | } 650 | } 651 | if (best_index == -1) return; 652 | var data = arr[best_index]; 653 | if ('filesize' in data) { 654 | benchmark_print_row(fmt_size, data); 655 | } else if ('instret' in data) { 656 | benchmark_print_row(fmt_inst, data); 657 | } else { 658 | benchmark_print_row(fmt_time, data); 659 | } 660 | }); 661 | }); 662 | }); 663 | } 664 | 665 | var help_or_error = false; 666 | 667 | var task = process.argv[2]; 668 | if (task == "gather" && process.argv.length == 3) { 669 | // 670 | } else if (task == "print" && process.argv.length == 3) { 671 | // 672 | } else if (task == "bench" && process.argv.length == 7) { 673 | // 674 | } else { 675 | help_or_error = true; 676 | } 677 | 678 | if (help_or_error) { 679 | console.log('usage: npm start gather'); 680 | console.log(''); 681 | console.log('usage: npm start print'); 682 | console.log(''); 683 | console.log('usage: npm start bench '); 684 | console.log(''); 685 | console.log(' [ all | ' + benchmarks.join(' | ') + ' ]'); 686 | console.log(''); 687 | console.log(' [ all | ' + targets.join(' | ') + ' ]'); 688 | console.log(''); 689 | console.log(' [ all | ' + opts.join(' | ') + ' ]'); 690 | console.log(''); 691 | process.exit(); 692 | } 693 | 694 | if (task == "gather") 695 | { 696 | benchmark_gather_all(); 697 | } 698 | 699 | if (task == "print") 700 | { 701 | benchmark_print_all(); 702 | } 703 | 704 | if (task == "bench") 705 | { 706 | var benchmark = process.argv[3]; 707 | var target = process.argv[4]; 708 | var opt = process.argv[5]; 709 | var runs = parseInt(process.argv[6]); 710 | 711 | if (benchmarks.indexOf(benchmark) == -1 && benchmark != 'all') { 712 | console.log('unknown benchmark ' + benchmark); 713 | process.exit(); 714 | } else if (targets.indexOf(target) == -1 && target != 'all') { 715 | console.log('unknown target ' + target); 716 | process.exit(); 717 | } else if (opts.indexOf(opt) == -1 && opt != 'all') { 718 | console.log('unknown opt ' + opt); 719 | process.exit(); 720 | } 721 | 722 | console.log('benchmark : ' + benchmark); 723 | console.log('target : ' + target); 724 | console.log('opt : ' + opt); 725 | console.log('runs : ' + runs); 726 | console.log(''); 727 | 728 | benchmark_peel_benchmark(benchmark, target, opt, runs); 729 | } -------------------------------------------------------------------------------- /scripts/update-plots.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | keynum() { 4 | nl data/keylist.txt | grep $1 | awk '{ print $1; }' 5 | } 6 | 7 | substitute() { 8 | cp $1 $2 9 | for i in $(cat data/keylist.txt | grep -v '#'); do 10 | n=$(keynum $i) 11 | k=$(echo $i | tr '-' '_') 12 | sed -i '' "s#$k#\$$n#g" $2 13 | done 14 | } 15 | 16 | # gather benchmark results 17 | npm start gather 18 | 19 | # join Os and O3 histograms 20 | for i in aes bigint dhrystone miniz norx primes qsort sha512; 21 | do 22 | # rename zero register to x0 23 | sed -i '' s#zero#x0# stats/rv-hist-riscv64-$i-O3.dir/hist-reg.csv 24 | sed -i '' s#zero#x0# stats/rv-hist-riscv64-$i-Os.dir/hist-reg.csv 25 | 26 | # combine O3 and Os register histograms 27 | join <(sort -k1,1 stats/rv-hist-riscv64-$i-O3.dir/hist-reg.csv) \ 28 | <(sort -k1,1 stats/rv-hist-riscv64-$i-O2.dir/hist-reg.csv) \ 29 | >stats/rv-hist-reg-riscv64-$i.tmp.1 30 | join <(sort -k1,1 stats/rv-hist-reg-riscv64-$i.tmp.1) \ 31 | <(sort -k1,1 stats/rv-hist-riscv64-$i-Os.dir/hist-reg.csv) \ 32 | >stats/rv-hist-reg-riscv64-$i.tmp.2 33 | echo "register O3 O2 Os" > stats/rv-hist-reg-riscv64-$i.csv 34 | grep -v register >stats/rv-hist-reg-riscv64-$i.csv 35 | 36 | # combine O3 and Os instruction histograms 37 | join <(sort -k1,1 stats/rv-hist-riscv64-$i-O3.dir/hist-inst.csv) \ 38 | <(sort -k1,1 stats/rv-hist-riscv64-$i-O2.dir/hist-inst.csv) \ 39 | >stats/rv-hist-inst-riscv64-$i.tmp.1 40 | join <(sort -k1,1 stats/rv-hist-inst-riscv64-$i.tmp.1) \ 41 | <(sort -k1,1 stats/rv-hist-riscv64-$i-Os.dir/hist-inst.csv) \ 42 | >stats/rv-hist-inst-riscv64-$i.tmp.2 43 | echo "opcode O3 O2 Os" > stats/rv-hist-inst-riscv64-$i.csv 44 | grep -v opcode >stats/rv-hist-inst-riscv64-$i.csv 45 | 46 | # remove temporary files 47 | rm -f stats/rv-hist-*-riscv64-$i.tmp.* 48 | done 49 | 50 | # substitute names with column numbers for the mips plot 51 | grep -v benchmark data/benchmarks.dat >data/benchmarks_noheader.dat 52 | substitute plots/mips.gnuplot.template plots/mips.gnuplot 53 | 54 | # generate the plots 55 | #gnuplot plots/fusion.gnuplot 56 | gnuplot plots/filesize.gnuplot 57 | gnuplot plots/mips.gnuplot 58 | gnuplot plots/operations.gnuplot 59 | gnuplot plots/optimisation.gnuplot 60 | gnuplot plots/registers.gnuplot 61 | gnuplot plots/instructions.gnuplot 62 | gnuplot plots/runtime.gnuplot 63 | -------------------------------------------------------------------------------- /src/aes.c: -------------------------------------------------------------------------------- 1 | /* 2 | * AES (Rijndael) cipher 3 | * 4 | * Modifications to public domain implementation: 5 | * 6 | * - use C pre-processor to make it easier to change S table access 7 | * 8 | * Copyright (c) 2003-2012, Jouni Malinen 9 | * 10 | * This software may be distributed under the terms of the BSD license. 11 | * See README for more details. 12 | */ 13 | 14 | /* 15 | * rijndael-alg-fst.c 16 | * 17 | * @version 3.0 (December 2000) 18 | * 19 | * Optimised ANSI C code for the Rijndael cipher (now AES) 20 | * 21 | * @author Vincent Rijmen 22 | * @author Antoon Bosselaers 23 | * @author Paulo Barreto 24 | * 25 | * This code is hereby placed in the public domain. 26 | * 27 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 28 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 31 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 35 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 36 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 37 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | #include 41 | #include 42 | #include 43 | 44 | typedef unsigned char u8; 45 | typedef unsigned int u32; 46 | 47 | extern const u32 Te0[256]; 48 | extern const u32 Te1[256]; 49 | extern const u32 Te2[256]; 50 | extern const u32 Te3[256]; 51 | extern const u32 Te4[256]; 52 | extern const u32 Td0[256]; 53 | extern const u32 Td1[256]; 54 | extern const u32 Td2[256]; 55 | extern const u32 Td3[256]; 56 | extern const u32 Td4[256]; 57 | extern const u32 rcon[10]; 58 | extern const u8 Td4s[256]; 59 | extern const u8 rcons[10]; 60 | 61 | #define TE0(i) Te0[((i) >> 24) & 0xff] 62 | #define TE1(i) Te1[((i) >> 16) & 0xff] 63 | #define TE2(i) Te2[((i) >> 8) & 0xff] 64 | #define TE3(i) Te3[(i) & 0xff] 65 | #define TE41(i) (Te4[((i) >> 24) & 0xff] & 0xff000000) 66 | #define TE42(i) (Te4[((i) >> 16) & 0xff] & 0x00ff0000) 67 | #define TE43(i) (Te4[((i) >> 8) & 0xff] & 0x0000ff00) 68 | #define TE44(i) (Te4[(i) & 0xff] & 0x000000ff) 69 | #define TE421(i) (Te4[((i) >> 16) & 0xff] & 0xff000000) 70 | #define TE432(i) (Te4[((i) >> 8) & 0xff] & 0x00ff0000) 71 | #define TE443(i) (Te4[(i) & 0xff] & 0x0000ff00) 72 | #define TE414(i) (Te4[((i) >> 24) & 0xff] & 0x000000ff) 73 | #define TE411(i) (Te4[((i) >> 24) & 0xff] & 0xff000000) 74 | #define TE422(i) (Te4[((i) >> 16) & 0xff] & 0x00ff0000) 75 | #define TE433(i) (Te4[((i) >> 8) & 0xff] & 0x0000ff00) 76 | #define TE444(i) (Te4[(i) & 0xff] & 0x000000ff) 77 | #define TE4(i) (Te4[(i)] & 0x000000ff) 78 | #define TD0(i) Td0[((i) >> 24) & 0xff] 79 | #define TD1(i) Td1[((i) >> 16) & 0xff] 80 | #define TD2(i) Td2[((i) >> 8) & 0xff] 81 | #define TD3(i) Td3[(i) & 0xff] 82 | #define TD41(i) (Td4[((i) >> 24) & 0xff] & 0xff000000) 83 | #define TD42(i) (Td4[((i) >> 16) & 0xff] & 0x00ff0000) 84 | #define TD43(i) (Td4[((i) >> 8) & 0xff] & 0x0000ff00) 85 | #define TD44(i) (Td4[(i) & 0xff] & 0x000000ff) 86 | #define TD0_(i) Td0[(i) & 0xff] 87 | #define TD1_(i) Td1[(i) & 0xff] 88 | #define TD2_(i) Td2[(i) & 0xff] 89 | #define TD3_(i) Td3[(i) & 0xff] 90 | #define RCON(i) rcon[(i)] 91 | 92 | #define GETU32(pt) (((u32)(pt)[0] << 24) ^ \ 93 | ((u32)(pt)[1] << 16) ^ \ 94 | ((u32)(pt)[2] << 8) ^ \ 95 | ((u32)(pt)[3])) 96 | #define PUTU32(ct, st) { \ 97 | (ct)[0] = (u8)((st) >> 24); \ 98 | (ct)[1] = (u8)((st) >> 16); \ 99 | (ct)[2] = (u8)((st) >> 8); \ 100 | (ct)[3] = (u8)(st); } 101 | 102 | #define AES_PRIV_SIZE (4 * 4 * 15 + 4) 103 | #define AES_PRIV_NR_POS (4 * 15) 104 | 105 | const u32 Te0[256] = { 106 | 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 107 | 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 108 | 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 109 | 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, 110 | 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 111 | 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, 112 | 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 113 | 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, 114 | 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 115 | 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, 116 | 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 117 | 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, 118 | 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 119 | 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, 120 | 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 121 | 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, 122 | 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 123 | 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, 124 | 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 125 | 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, 126 | 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 127 | 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, 128 | 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 129 | 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, 130 | 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 131 | 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, 132 | 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 133 | 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, 134 | 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 135 | 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, 136 | 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 137 | 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, 138 | 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 139 | 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, 140 | 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 141 | 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, 142 | 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 143 | 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, 144 | 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 145 | 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, 146 | 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 147 | 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, 148 | 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 149 | 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, 150 | 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 151 | 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, 152 | 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 153 | 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, 154 | 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 155 | 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, 156 | 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 157 | 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, 158 | 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 159 | 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, 160 | 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 161 | 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, 162 | 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 163 | 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, 164 | 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 165 | 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, 166 | 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 167 | 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, 168 | 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 169 | 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, 170 | }; 171 | const u32 Te1[256] = { 172 | 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 173 | 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, 174 | 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 175 | 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, 176 | 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 177 | 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, 178 | 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 179 | 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, 180 | 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 181 | 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, 182 | 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 183 | 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, 184 | 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 185 | 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, 186 | 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 187 | 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, 188 | 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 189 | 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, 190 | 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 191 | 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, 192 | 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 193 | 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, 194 | 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 195 | 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, 196 | 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 197 | 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, 198 | 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 199 | 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, 200 | 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 201 | 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, 202 | 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 203 | 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, 204 | 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 205 | 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, 206 | 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 207 | 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, 208 | 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 209 | 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, 210 | 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 211 | 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, 212 | 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 213 | 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, 214 | 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 215 | 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, 216 | 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 217 | 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, 218 | 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 219 | 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, 220 | 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 221 | 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, 222 | 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 223 | 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, 224 | 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 225 | 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, 226 | 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 227 | 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, 228 | 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 229 | 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, 230 | 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 231 | 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, 232 | 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 233 | 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, 234 | 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 235 | 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, 236 | }; 237 | const u32 Te2[256] = { 238 | 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 239 | 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, 240 | 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 241 | 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, 242 | 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 243 | 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, 244 | 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 245 | 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, 246 | 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 247 | 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, 248 | 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 249 | 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, 250 | 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 251 | 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, 252 | 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 253 | 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, 254 | 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 255 | 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, 256 | 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 257 | 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, 258 | 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 259 | 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, 260 | 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 261 | 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, 262 | 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 263 | 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, 264 | 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 265 | 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, 266 | 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 267 | 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, 268 | 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 269 | 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, 270 | 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 271 | 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, 272 | 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 273 | 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, 274 | 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 275 | 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, 276 | 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 277 | 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, 278 | 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 279 | 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, 280 | 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 281 | 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, 282 | 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 283 | 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, 284 | 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 285 | 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, 286 | 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 287 | 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, 288 | 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 289 | 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, 290 | 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 291 | 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, 292 | 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 293 | 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, 294 | 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 295 | 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, 296 | 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 297 | 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, 298 | 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 299 | 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, 300 | 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 301 | 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, 302 | }; 303 | const u32 Te3[256] = { 304 | 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 305 | 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, 306 | 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 307 | 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, 308 | 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 309 | 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, 310 | 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 311 | 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, 312 | 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 313 | 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, 314 | 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 315 | 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, 316 | 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 317 | 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, 318 | 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 319 | 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, 320 | 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 321 | 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, 322 | 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 323 | 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, 324 | 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 325 | 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, 326 | 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 327 | 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, 328 | 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 329 | 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, 330 | 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 331 | 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, 332 | 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 333 | 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, 334 | 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 335 | 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, 336 | 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 337 | 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, 338 | 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 339 | 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, 340 | 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 341 | 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, 342 | 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 343 | 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, 344 | 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 345 | 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, 346 | 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 347 | 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, 348 | 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 349 | 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, 350 | 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 351 | 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, 352 | 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 353 | 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, 354 | 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 355 | 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, 356 | 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 357 | 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, 358 | 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 359 | 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, 360 | 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 361 | 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, 362 | 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 363 | 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, 364 | 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 365 | 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, 366 | 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 367 | 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, 368 | }; 369 | const u32 Te4[256] = { 370 | 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, 371 | 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, 372 | 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, 373 | 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, 374 | 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, 375 | 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, 376 | 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, 377 | 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, 378 | 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, 379 | 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, 380 | 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, 381 | 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, 382 | 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, 383 | 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, 384 | 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, 385 | 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, 386 | 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, 387 | 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, 388 | 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, 389 | 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, 390 | 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, 391 | 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, 392 | 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, 393 | 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, 394 | 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, 395 | 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, 396 | 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, 397 | 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, 398 | 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, 399 | 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, 400 | 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, 401 | 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, 402 | 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, 403 | 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, 404 | 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, 405 | 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, 406 | 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, 407 | 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, 408 | 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, 409 | 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, 410 | 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, 411 | 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, 412 | 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, 413 | 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, 414 | 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, 415 | 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, 416 | 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, 417 | 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, 418 | 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, 419 | 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, 420 | 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, 421 | 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, 422 | 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, 423 | 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, 424 | 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, 425 | 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, 426 | 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, 427 | 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, 428 | 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, 429 | 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, 430 | 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, 431 | 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, 432 | 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, 433 | 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, 434 | }; 435 | const u32 Td0[256] = { 436 | 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 437 | 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 438 | 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 439 | 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, 440 | 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 441 | 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, 442 | 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 443 | 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, 444 | 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 445 | 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, 446 | 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 447 | 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, 448 | 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 449 | 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, 450 | 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 451 | 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, 452 | 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 453 | 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, 454 | 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 455 | 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, 456 | 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 457 | 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, 458 | 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 459 | 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, 460 | 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 461 | 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, 462 | 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 463 | 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, 464 | 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 465 | 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, 466 | 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 467 | 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, 468 | 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 469 | 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, 470 | 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 471 | 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, 472 | 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 473 | 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, 474 | 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 475 | 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, 476 | 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 477 | 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, 478 | 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 479 | 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, 480 | 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 481 | 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, 482 | 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 483 | 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, 484 | 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 485 | 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, 486 | 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 487 | 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, 488 | 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 489 | 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, 490 | 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 491 | 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, 492 | 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 493 | 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, 494 | 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 495 | 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, 496 | 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 497 | 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, 498 | 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 499 | 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, 500 | }; 501 | const u32 Td1[256] = { 502 | 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 503 | 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, 504 | 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 505 | 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, 506 | 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 507 | 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, 508 | 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 509 | 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, 510 | 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 511 | 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, 512 | 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 513 | 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, 514 | 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 515 | 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, 516 | 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 517 | 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, 518 | 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 519 | 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, 520 | 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 521 | 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, 522 | 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 523 | 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, 524 | 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 525 | 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, 526 | 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 527 | 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, 528 | 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 529 | 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, 530 | 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 531 | 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, 532 | 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 533 | 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, 534 | 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 535 | 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, 536 | 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 537 | 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, 538 | 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 539 | 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, 540 | 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 541 | 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, 542 | 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 543 | 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, 544 | 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 545 | 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, 546 | 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 547 | 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, 548 | 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 549 | 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, 550 | 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 551 | 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, 552 | 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 553 | 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, 554 | 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 555 | 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, 556 | 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 557 | 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, 558 | 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 559 | 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, 560 | 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 561 | 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, 562 | 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 563 | 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, 564 | 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 565 | 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, 566 | }; 567 | const u32 Td2[256] = { 568 | 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 569 | 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, 570 | 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 571 | 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, 572 | 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 573 | 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, 574 | 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 575 | 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, 576 | 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 577 | 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, 578 | 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 579 | 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, 580 | 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 581 | 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, 582 | 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 583 | 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, 584 | 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 585 | 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, 586 | 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 587 | 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, 588 | 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 589 | 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, 590 | 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 591 | 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, 592 | 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 593 | 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, 594 | 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 595 | 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, 596 | 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 597 | 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, 598 | 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 599 | 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, 600 | 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 601 | 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, 602 | 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 603 | 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, 604 | 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 605 | 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, 606 | 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 607 | 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, 608 | 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 609 | 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, 610 | 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 611 | 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, 612 | 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 613 | 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, 614 | 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 615 | 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, 616 | 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 617 | 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, 618 | 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 619 | 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, 620 | 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 621 | 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, 622 | 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 623 | 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, 624 | 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 625 | 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, 626 | 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 627 | 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, 628 | 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 629 | 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, 630 | 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 631 | 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, 632 | }; 633 | const u32 Td3[256] = { 634 | 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 635 | 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, 636 | 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 637 | 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, 638 | 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 639 | 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, 640 | 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 641 | 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, 642 | 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 643 | 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, 644 | 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 645 | 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, 646 | 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 647 | 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, 648 | 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 649 | 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, 650 | 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 651 | 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, 652 | 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 653 | 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, 654 | 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 655 | 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, 656 | 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 657 | 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, 658 | 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 659 | 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, 660 | 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 661 | 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, 662 | 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 663 | 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, 664 | 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 665 | 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, 666 | 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 667 | 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, 668 | 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 669 | 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, 670 | 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 671 | 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, 672 | 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 673 | 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, 674 | 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 675 | 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, 676 | 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 677 | 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, 678 | 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 679 | 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, 680 | 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 681 | 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, 682 | 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 683 | 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, 684 | 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 685 | 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, 686 | 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 687 | 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, 688 | 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 689 | 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, 690 | 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 691 | 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, 692 | 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 693 | 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, 694 | 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 695 | 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, 696 | 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 697 | 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, 698 | }; 699 | const u32 Td4[256] = { 700 | 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 701 | 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, 702 | 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 703 | 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, 704 | 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 705 | 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, 706 | 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 707 | 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, 708 | 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 709 | 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, 710 | 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 711 | 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, 712 | 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 713 | 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, 714 | 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 715 | 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, 716 | 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 717 | 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, 718 | 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 719 | 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, 720 | 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 721 | 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, 722 | 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 723 | 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, 724 | 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 725 | 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, 726 | 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 727 | 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, 728 | 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 729 | 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, 730 | 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 731 | 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, 732 | 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, 733 | 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, 734 | 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, 735 | 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, 736 | 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, 737 | 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, 738 | 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, 739 | 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, 740 | 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, 741 | 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, 742 | 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, 743 | 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, 744 | 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, 745 | 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, 746 | 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, 747 | 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, 748 | 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, 749 | 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, 750 | 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, 751 | 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, 752 | 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, 753 | 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, 754 | 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, 755 | 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, 756 | 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, 757 | 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, 758 | 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, 759 | 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, 760 | 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, 761 | 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, 762 | 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, 763 | 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, 764 | }; 765 | const u32 rcon[] = { 766 | 0x01000000, 0x02000000, 0x04000000, 0x08000000, 767 | 0x10000000, 0x20000000, 0x40000000, 0x80000000, 768 | 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ 769 | }; 770 | 771 | 772 | /** 773 | * Expand the cipher key into the encryption key schedule. 774 | * 775 | * @return the number of rounds for the given cipher key size. 776 | */ 777 | int aes_rijndael_key_setup_enc(u32 rk[], const u8 cipherKey[], size_t keyBits) 778 | { 779 | int i; 780 | u32 temp; 781 | 782 | rk[0] = GETU32(cipherKey ); 783 | rk[1] = GETU32(cipherKey + 4); 784 | rk[2] = GETU32(cipherKey + 8); 785 | rk[3] = GETU32(cipherKey + 12); 786 | 787 | if (keyBits == 128) { 788 | for (i = 0; i < 10; i++) { 789 | temp = rk[3]; 790 | rk[4] = rk[0] ^ TE421(temp) ^ TE432(temp) ^ 791 | TE443(temp) ^ TE414(temp) ^ RCON(i); 792 | rk[5] = rk[1] ^ rk[4]; 793 | rk[6] = rk[2] ^ rk[5]; 794 | rk[7] = rk[3] ^ rk[6]; 795 | rk += 4; 796 | } 797 | return 10; 798 | } 799 | 800 | rk[4] = GETU32(cipherKey + 16); 801 | rk[5] = GETU32(cipherKey + 20); 802 | 803 | if (keyBits == 192) { 804 | for (i = 0; i < 8; i++) { 805 | temp = rk[5]; 806 | rk[6] = rk[0] ^ TE421(temp) ^ TE432(temp) ^ 807 | TE443(temp) ^ TE414(temp) ^ RCON(i); 808 | rk[7] = rk[1] ^ rk[6]; 809 | rk[8] = rk[2] ^ rk[7]; 810 | rk[9] = rk[3] ^ rk[8]; 811 | if (i == 7) 812 | return 12; 813 | rk[10] = rk[4] ^ rk[9]; 814 | rk[11] = rk[5] ^ rk[10]; 815 | rk += 6; 816 | } 817 | } 818 | 819 | rk[6] = GETU32(cipherKey + 24); 820 | rk[7] = GETU32(cipherKey + 28); 821 | 822 | if (keyBits == 256) { 823 | for (i = 0; i < 7; i++) { 824 | temp = rk[7]; 825 | rk[8] = rk[0] ^ TE421(temp) ^ TE432(temp) ^ 826 | TE443(temp) ^ TE414(temp) ^ RCON(i); 827 | rk[9] = rk[1] ^ rk[8]; 828 | rk[10] = rk[2] ^ rk[9]; 829 | rk[11] = rk[3] ^ rk[10]; 830 | if (i == 6) 831 | return 14; 832 | temp = rk[11]; 833 | rk[12] = rk[4] ^ TE411(temp) ^ TE422(temp) ^ 834 | TE433(temp) ^ TE444(temp); 835 | rk[13] = rk[5] ^ rk[12]; 836 | rk[14] = rk[6] ^ rk[13]; 837 | rk[15] = rk[7] ^ rk[14]; 838 | rk += 8; 839 | } 840 | } 841 | 842 | return -1; 843 | } 844 | 845 | void aes_rijndael_encrypt(const u32 rk[], int Nr, const u8 pt[16], u8 ct[16]) 846 | { 847 | u32 s0, s1, s2, s3, t0, t1, t2, t3; 848 | #ifndef AES_FULL_UNROLL 849 | int r; 850 | #endif /* AES_FULL_UNROLL */ 851 | 852 | /* 853 | * map byte array block to cipher state 854 | * and add initial round key: 855 | */ 856 | s0 = GETU32(pt ) ^ rk[0]; 857 | s1 = GETU32(pt + 4) ^ rk[1]; 858 | s2 = GETU32(pt + 8) ^ rk[2]; 859 | s3 = GETU32(pt + 12) ^ rk[3]; 860 | 861 | #define ROUND(i,d,s) \ 862 | d##0 = TE0(s##0) ^ TE1(s##1) ^ TE2(s##2) ^ TE3(s##3) ^ rk[4 * i]; \ 863 | d##1 = TE0(s##1) ^ TE1(s##2) ^ TE2(s##3) ^ TE3(s##0) ^ rk[4 * i + 1]; \ 864 | d##2 = TE0(s##2) ^ TE1(s##3) ^ TE2(s##0) ^ TE3(s##1) ^ rk[4 * i + 2]; \ 865 | d##3 = TE0(s##3) ^ TE1(s##0) ^ TE2(s##1) ^ TE3(s##2) ^ rk[4 * i + 3] 866 | 867 | #ifdef AES_FULL_UNROLL 868 | 869 | ROUND(1,t,s); 870 | ROUND(2,s,t); 871 | ROUND(3,t,s); 872 | ROUND(4,s,t); 873 | ROUND(5,t,s); 874 | ROUND(6,s,t); 875 | ROUND(7,t,s); 876 | ROUND(8,s,t); 877 | ROUND(9,t,s); 878 | if (Nr > 10) { 879 | ROUND(10,s,t); 880 | ROUND(11,t,s); 881 | if (Nr > 12) { 882 | ROUND(12,s,t); 883 | ROUND(13,t,s); 884 | } 885 | } 886 | 887 | rk += Nr << 2; 888 | 889 | #else 890 | 891 | /* Nr - 1 full rounds: */ 892 | r = Nr >> 1; 893 | for (;;) { 894 | ROUND(1,t,s); 895 | rk += 8; 896 | if (--r == 0) 897 | break; 898 | ROUND(0,s,t); 899 | } 900 | 901 | #endif /* AES_FULL_UNROLL */ 902 | 903 | #undef ROUND 904 | 905 | /* 906 | * apply last round and 907 | * map cipher state to byte array block: 908 | */ 909 | s0 = TE41(t0) ^ TE42(t1) ^ TE43(t2) ^ TE44(t3) ^ rk[0]; 910 | PUTU32(ct , s0); 911 | s1 = TE41(t1) ^ TE42(t2) ^ TE43(t3) ^ TE44(t0) ^ rk[1]; 912 | PUTU32(ct + 4, s1); 913 | s2 = TE41(t2) ^ TE42(t3) ^ TE43(t0) ^ TE44(t1) ^ rk[2]; 914 | PUTU32(ct + 8, s2); 915 | s3 = TE41(t3) ^ TE42(t0) ^ TE43(t1) ^ TE44(t2) ^ rk[3]; 916 | PUTU32(ct + 12, s3); 917 | } 918 | 919 | 920 | /** 921 | * Expand the cipher key into the decryption key schedule. 922 | * 923 | * @return the number of rounds for the given cipher key size. 924 | */ 925 | int aes_rijndael_key_setup_dec(u32 rk[], const u8 cipherKey[], size_t keyBits) 926 | { 927 | int Nr, i, j; 928 | u32 temp; 929 | 930 | /* expand the cipher key: */ 931 | Nr = aes_rijndael_key_setup_enc(rk, cipherKey, keyBits); 932 | if (Nr < 0) 933 | return Nr; 934 | /* invert the order of the round keys: */ 935 | for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { 936 | temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; 937 | temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; 938 | temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; 939 | temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; 940 | } 941 | /* apply the inverse MixColumn transform to all round keys but the 942 | * first and the last: */ 943 | for (i = 1; i < Nr; i++) { 944 | rk += 4; 945 | for (j = 0; j < 4; j++) { 946 | rk[j] = TD0_(TE4((rk[j] >> 24) )) ^ 947 | TD1_(TE4((rk[j] >> 16) & 0xff)) ^ 948 | TD2_(TE4((rk[j] >> 8) & 0xff)) ^ 949 | TD3_(TE4((rk[j] ) & 0xff)); 950 | } 951 | } 952 | 953 | return Nr; 954 | } 955 | 956 | void aes_rijndael_decrypt(const u32 rk[], int Nr, const u8 ct[16], u8 pt[16]) 957 | { 958 | u32 s0, s1, s2, s3, t0, t1, t2, t3; 959 | #ifndef AES_FULL_UNROLL 960 | int r; 961 | #endif /* AES_FULL_UNROLL */ 962 | 963 | /* 964 | * map byte array block to cipher state 965 | * and add initial round key: 966 | */ 967 | s0 = GETU32(ct ) ^ rk[0]; 968 | s1 = GETU32(ct + 4) ^ rk[1]; 969 | s2 = GETU32(ct + 8) ^ rk[2]; 970 | s3 = GETU32(ct + 12) ^ rk[3]; 971 | 972 | #define ROUND(i,d,s) \ 973 | d##0 = TD0(s##0) ^ TD1(s##3) ^ TD2(s##2) ^ TD3(s##1) ^ rk[4 * i]; \ 974 | d##1 = TD0(s##1) ^ TD1(s##0) ^ TD2(s##3) ^ TD3(s##2) ^ rk[4 * i + 1]; \ 975 | d##2 = TD0(s##2) ^ TD1(s##1) ^ TD2(s##0) ^ TD3(s##3) ^ rk[4 * i + 2]; \ 976 | d##3 = TD0(s##3) ^ TD1(s##2) ^ TD2(s##1) ^ TD3(s##0) ^ rk[4 * i + 3] 977 | 978 | #ifdef AES_FULL_UNROLL 979 | 980 | ROUND(1,t,s); 981 | ROUND(2,s,t); 982 | ROUND(3,t,s); 983 | ROUND(4,s,t); 984 | ROUND(5,t,s); 985 | ROUND(6,s,t); 986 | ROUND(7,t,s); 987 | ROUND(8,s,t); 988 | ROUND(9,t,s); 989 | if (Nr > 10) { 990 | ROUND(10,s,t); 991 | ROUND(11,t,s); 992 | if (Nr > 12) { 993 | ROUND(12,s,t); 994 | ROUND(13,t,s); 995 | } 996 | } 997 | 998 | rk += Nr << 2; 999 | 1000 | #else 1001 | 1002 | /* Nr - 1 full rounds: */ 1003 | r = Nr >> 1; 1004 | for (;;) { 1005 | ROUND(1,t,s); 1006 | rk += 8; 1007 | if (--r == 0) 1008 | break; 1009 | ROUND(0,s,t); 1010 | } 1011 | 1012 | #endif /* AES_FULL_UNROLL */ 1013 | 1014 | #undef ROUND 1015 | 1016 | /* 1017 | * apply last round and 1018 | * map cipher state to byte array block: 1019 | */ 1020 | s0 = TD41(t0) ^ TD42(t3) ^ TD43(t2) ^ TD44(t1) ^ rk[0]; 1021 | PUTU32(pt , s0); 1022 | s1 = TD41(t1) ^ TD42(t0) ^ TD43(t3) ^ TD44(t2) ^ rk[1]; 1023 | PUTU32(pt + 4, s1); 1024 | s2 = TD41(t2) ^ TD42(t1) ^ TD43(t0) ^ TD44(t3) ^ rk[2]; 1025 | PUTU32(pt + 8, s2); 1026 | s3 = TD41(t3) ^ TD42(t2) ^ TD43(t1) ^ TD44(t0) ^ rk[3]; 1027 | PUTU32(pt + 12, s3); 1028 | } 1029 | 1030 | 1031 | /* AES decrypt interface */ 1032 | 1033 | void * aes_decrypt_init(const u8 *key, size_t len) 1034 | { 1035 | u32 *rk; 1036 | int res; 1037 | rk = malloc(AES_PRIV_SIZE); 1038 | if (rk == NULL) 1039 | return NULL; 1040 | res = aes_rijndael_key_setup_dec(rk, key, len * 8); 1041 | if (res < 0) { 1042 | free(rk); 1043 | return NULL; 1044 | } 1045 | rk[AES_PRIV_NR_POS] = res; 1046 | return rk; 1047 | } 1048 | 1049 | void aes_decrypt(void *ctx, const u8 *crypt, u8 *plain) 1050 | { 1051 | u32 *rk = ctx; 1052 | aes_rijndael_decrypt(ctx, rk[AES_PRIV_NR_POS], crypt, plain); 1053 | } 1054 | 1055 | void aes_decrypt_deinit(void *ctx) 1056 | { 1057 | memset(ctx, 0, AES_PRIV_SIZE); 1058 | free(ctx); 1059 | } 1060 | 1061 | 1062 | /* AES encrypt interface */ 1063 | 1064 | void * aes_encrypt_init(const u8 *key, size_t len) 1065 | { 1066 | u32 *rk; 1067 | int res; 1068 | rk = malloc(AES_PRIV_SIZE); 1069 | if (rk == NULL) 1070 | return NULL; 1071 | res = aes_rijndael_key_setup_enc(rk, key, len * 8); 1072 | if (res < 0) { 1073 | free(rk); 1074 | return NULL; 1075 | } 1076 | rk[AES_PRIV_NR_POS] = res; 1077 | return rk; 1078 | } 1079 | 1080 | void aes_encrypt(void *ctx, const u8 *plain, u8 *crypt) 1081 | { 1082 | u32 *rk = ctx; 1083 | aes_rijndael_encrypt(ctx, rk[AES_PRIV_NR_POS], plain, crypt); 1084 | } 1085 | 1086 | void aes_encrypt_deinit(void *ctx) 1087 | { 1088 | memset(ctx, 0, AES_PRIV_SIZE); 1089 | free(ctx); 1090 | } 1091 | 1092 | 1093 | /* main */ 1094 | 1095 | int main() 1096 | { 1097 | static const u8 key[16] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F }; 1098 | static const size_t DATA_SIZE = 32 * 1024 * 1024; 1099 | 1100 | u8 *pt1 = malloc(DATA_SIZE); 1101 | u8 *ct = malloc(DATA_SIZE); 1102 | u8 *pt2 = malloc(DATA_SIZE); 1103 | 1104 | /* create test pattern */ 1105 | char c = 0x01; 1106 | for (size_t j = 0; j < DATA_SIZE; j+= sizeof(int)) { 1107 | pt1[j] = (c ^= c * 7); 1108 | } 1109 | 1110 | /* encrpyt test pattern */ 1111 | void *rk1 = aes_encrypt_init(key, 16); 1112 | for (size_t j = 0; j < DATA_SIZE; j += 16) { 1113 | aes_encrypt((u32*)rk1, pt1 + j, ct + j); 1114 | } 1115 | aes_encrypt_deinit(rk1); 1116 | 1117 | /* decrypt test pattern */ 1118 | void *rk2 = aes_decrypt_init(key, 16); 1119 | for (size_t j = 0; j < DATA_SIZE; j += 16) { 1120 | aes_decrypt((u32*)rk2, ct + j, pt2 + j); 1121 | } 1122 | aes_decrypt_deinit(rk2); 1123 | 1124 | /* compare */ 1125 | printf("%d\n", memcmp(pt1, pt2, DATA_SIZE)); 1126 | 1127 | free(pt1); 1128 | free(ct); 1129 | free(pt2); 1130 | return 0; 1131 | } 1132 | -------------------------------------------------------------------------------- /src/bigint.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * nat.h 3 | * 4 | * unsigned natural number 5 | * 6 | * Copyright (C) 2017, Michael Clark 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a 9 | * copy of this software and associated documentation files (the "Software"), 10 | * to deal in the Software without restriction, including without limitation 11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 | * and/or sell copies of the Software, and to permit persons to whom the 13 | * Software is furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | 40 | #if defined (__GNUC__) 41 | #define clz __builtin_clz 42 | #elif defined (_MSC_VER) 43 | #include 44 | inline static unsigned long clz(unsigned int val) 45 | { 46 | unsigned long count; 47 | return _BitScanReverse(&count, val) ? 31 - count : 0; 48 | } 49 | #else 50 | #error clz not defined 51 | #endif 52 | 53 | struct Nat 54 | { 55 | /*! limb type */ 56 | typedef unsigned int limb_t; 57 | typedef unsigned long long limb2_t; 58 | typedef signed long long slimb2_t; 59 | 60 | /*! limb bit width and bit shift */ 61 | enum { limb_bits = 32, limb_shift = 5 }; 62 | 63 | /*! 64 | * limbs is a vector of words with the little end at offset 0 65 | */ 66 | std::vector limbs; 67 | 68 | 69 | /* 70 | * constructors 71 | */ 72 | 73 | /*! default constructor */ 74 | Nat(); 75 | 76 | /*! integral constructor */ 77 | Nat(const limb_t n); 78 | 79 | /*! array constructor */ 80 | Nat(const std::initializer_list l); 81 | 82 | /*! string constructor */ 83 | Nat(std::string str, size_t radix = 0); 84 | 85 | /*! copy constructor */ 86 | Nat(const Nat &operand); 87 | 88 | /*! move constructor */ 89 | Nat(const Nat&& operand) noexcept; 90 | 91 | 92 | /* 93 | * assignment operators 94 | */ 95 | 96 | /*! integral copy assignment operator */ 97 | Nat& operator=(const limb_t l); 98 | 99 | /*! Nat copy assignment operator */ 100 | Nat& operator=(const Nat &operand); 101 | 102 | /*! Nat move assignment operator */ 103 | Nat& operator=(Nat &&operand); 104 | 105 | 106 | /* 107 | * internal methods 108 | */ 109 | 110 | /*! expand limbs to match operand */ 111 | void _expand(const Nat &operand); 112 | 113 | /*! contract zero big end limbs */ 114 | void _contract(); 115 | 116 | /*! resize number of limbs */ 117 | void _resize(size_t n); 118 | 119 | 120 | /* 121 | * handy limb and bit accessor methods 122 | */ 123 | 124 | /*! return number of limbs */ 125 | size_t num_limbs() const; 126 | 127 | /*! access word at limb offset */ 128 | limb_t limb_at(size_t n) const; 129 | 130 | /*! test bit at bit offset */ 131 | int test_bit(size_t n) const; 132 | 133 | /*! set bit at bit offset */ 134 | void set_bit(size_t n); 135 | 136 | 137 | /* 138 | * add, subtract, shifts and logical operators 139 | */ 140 | 141 | /*! add with carry equals */ 142 | Nat& operator+=(const Nat &operand); 143 | 144 | /*! subtract with borrow equals */ 145 | Nat& operator-=(const Nat &operand); 146 | 147 | /*! left shift equals */ 148 | Nat& operator<<=(size_t shamt); 149 | 150 | /*! right shift equals */ 151 | Nat& operator>>=(size_t shamt); 152 | 153 | /*! logical and equals */ 154 | Nat& operator&=(const Nat &operand); 155 | 156 | /*! logical or equals */ 157 | Nat& operator|=(const Nat &operand); 158 | 159 | /*! add with carry */ 160 | Nat operator+(const Nat &operand) const; 161 | 162 | /*! subtract with borrow */ 163 | Nat operator-(const Nat &operand) const; 164 | /*! left shift */ 165 | Nat operator<<(size_t shamt) const; 166 | 167 | /*! right shift */ 168 | Nat operator>>(size_t shamt) const; 169 | 170 | /*! logical and */ 171 | Nat operator&(const Nat &operand) const; 172 | 173 | /*! logical or */ 174 | Nat operator|(const Nat &operand) const; 175 | 176 | 177 | /* 178 | * comparison operators 179 | */ 180 | 181 | /*! equals */ 182 | bool operator==(const Nat &operand) const; 183 | 184 | /*! less than */ 185 | bool operator<(const Nat &operand) const; 186 | 187 | /*! not equals */ 188 | bool operator!=(const Nat &operand) const; 189 | 190 | /*! less than or equal*/ 191 | bool operator<=(const Nat &operand) const; 192 | 193 | /*! greater than */ 194 | bool operator>(const Nat &operand) const; 195 | 196 | /*! less than or equal*/ 197 | bool operator>=(const Nat &operand) const; 198 | 199 | /*! not */ 200 | bool operator!() const; 201 | 202 | 203 | /* 204 | * multply, divide and pow 205 | */ 206 | 207 | /*! base 2^limb_bits multiply */ 208 | static void mult(const Nat &multiplicand, const Nat multiplier, Nat &result); 209 | 210 | /*! base 2^limb_bits division */ 211 | static void divrem(const Nat ÷nd, const Nat &divisor, Nat "ient, Nat &remainder); 212 | 213 | /*! multiply */ 214 | Nat operator*(const Nat &operand) const; 215 | 216 | /*! division quotient */ 217 | Nat operator/(const Nat &divisor) const; 218 | 219 | /*! division remainder */ 220 | Nat operator%(const Nat &divisor) const; 221 | 222 | /*! multiply equals */ 223 | Nat& operator*=(const Nat &operand); 224 | 225 | /*! divide equals */ 226 | Nat& operator/=(const Nat &operand); 227 | 228 | /*! modulus equals */ 229 | Nat& operator%=(const Nat &operand); 230 | 231 | /*! raise to the power */ 232 | Nat pow(size_t exp) const; 233 | 234 | 235 | /* 236 | * convert natural number to string 237 | */ 238 | 239 | /*! convert Nat to string */ 240 | std::string to_string(size_t radix = 10) const; 241 | 242 | /*! convert Nat from string */ 243 | void from_string(const char *str, size_t len, size_t radix); 244 | 245 | }; 246 | 247 | using limb_t = Nat::limb_t; 248 | using limb2_t = Nat::limb2_t; 249 | 250 | /* 251 | * constructors 252 | */ 253 | 254 | /*! default constructor */ 255 | Nat::Nat() : limbs{ 0 } {} 256 | 257 | /*! integral constructor */ 258 | Nat::Nat(const limb_t n) : limbs{ n } {} 259 | 260 | /*! array constructor */ 261 | Nat::Nat(const std::initializer_list l) : limbs(l) { _contract(); } 262 | 263 | /*! string constructor */ 264 | Nat::Nat(std::string str, size_t radix) : limbs{0} { from_string(str.c_str(), str.size(), radix); } 265 | 266 | /*! copy constructor */ 267 | Nat::Nat(const Nat &operand) : limbs(operand.limbs) {} 268 | 269 | /*! move constructor */ 270 | Nat::Nat(const Nat&& operand) noexcept : limbs(std::move(operand.limbs)) {} 271 | 272 | 273 | /* 274 | * assignment operators 275 | */ 276 | 277 | /*! integral copy assignment operator */ 278 | Nat& Nat::operator=(const limb_t l) 279 | { 280 | _resize(1); 281 | limbs[0] = l; 282 | return *this; 283 | } 284 | 285 | /*! Nat copy assignment operator */ 286 | Nat& Nat::operator=(const Nat &operand) 287 | { 288 | limbs = operand.limbs; 289 | return *this; 290 | } 291 | 292 | /*! Nat move assignment operator */ 293 | Nat& Nat::operator=(Nat &&operand) 294 | { 295 | limbs = std::move(operand.limbs); 296 | return *this; 297 | } 298 | 299 | 300 | /* 301 | * internal methods 302 | */ 303 | 304 | /*! expand limbs to match operand */ 305 | void Nat::_expand(const Nat &operand) 306 | { 307 | limbs.resize(std::max(num_limbs(), operand.num_limbs())); 308 | } 309 | 310 | /*! contract zero big end limbs */ 311 | void Nat::_contract() 312 | { 313 | while(num_limbs() > 1 && limbs.back() == 0) { 314 | limbs.pop_back(); 315 | } 316 | } 317 | 318 | /*! resize number of limbs */ 319 | void Nat::_resize(size_t n) 320 | { 321 | limbs.resize(n); 322 | } 323 | 324 | 325 | /* 326 | * handy limb and bit accessor methods 327 | */ 328 | 329 | /*! return number of limbs */ 330 | size_t Nat::num_limbs() const { return limbs.size(); } 331 | 332 | /*! access word at limb offset */ 333 | limb_t Nat::limb_at(size_t n) const { return n < num_limbs() ? limbs[n] : 0; } 334 | 335 | /*! test bit at bit offset */ 336 | int Nat::test_bit(size_t n) const 337 | { 338 | size_t word = n >> limb_shift; 339 | if (word < num_limbs()) return (limbs[word] >> (n & (limb_bits-1))) & 1; 340 | else return 0; 341 | } 342 | 343 | /*! set bit at bit offset */ 344 | void Nat::set_bit(size_t n) 345 | { 346 | size_t word = n >> limb_shift; 347 | if (word >= num_limbs()) _resize(word + 1); 348 | limbs[word] |= (1ULL << (n & (limb_bits-1))); 349 | } 350 | 351 | 352 | /* 353 | * multiply and divide require add with carry, subtract 354 | * with borrow, left and right shift logical operators 355 | */ 356 | 357 | 358 | /* define self mutating operations */ 359 | 360 | /*! add with carry equals */ 361 | Nat& Nat::operator+=(const Nat &operand) 362 | { 363 | _expand(operand); 364 | limb_t carry = 0; 365 | for (size_t i = 0; i < num_limbs(); i++) { 366 | limb_t old_val = limbs[i]; 367 | limb_t new_val = old_val + operand.limb_at(i) + carry; 368 | limbs[i] = new_val; 369 | carry = new_val < old_val; 370 | } 371 | if (carry) { 372 | limbs.push_back(1); 373 | } 374 | return *this; 375 | } 376 | 377 | /*! subtract with borrow equals */ 378 | Nat& Nat::operator-=(const Nat &operand) 379 | { 380 | _expand(operand); 381 | limb_t borrow = 0; 382 | for (size_t i = 0; i < num_limbs(); i++) { 383 | limb_t old_val = limbs[i]; 384 | limb_t new_val = old_val - operand.limb_at(i) - borrow; 385 | limbs[i] = new_val; 386 | borrow = new_val > old_val; 387 | } 388 | assert(borrow == 0); /* unsigned underflow */ 389 | _contract(); 390 | return *this; 391 | } 392 | 393 | /*! left shift equals */ 394 | Nat& Nat::operator<<=(size_t shamt) 395 | { 396 | size_t limb_shamt = shamt >> limb_shift; 397 | if (limb_shamt > 0) { 398 | limbs.insert(limbs.begin(), limb_shamt, 0); 399 | shamt -= limb_shamt; 400 | } 401 | if (!shamt) return *this; 402 | 403 | limb_t carry = 0; 404 | for (size_t j = 0; j < num_limbs(); j++) { 405 | limb_t old_val = limbs[j]; 406 | limb_t new_val = (old_val << shamt) | carry; 407 | limbs[j] = new_val; 408 | carry = old_val >> (limb_bits - shamt); 409 | } 410 | if (carry) { 411 | limbs.push_back(carry); 412 | } 413 | return *this; 414 | } 415 | 416 | /*! right shift equals */ 417 | Nat& Nat::operator>>=(size_t shamt) 418 | { 419 | size_t limb_shamt = shamt >> limb_shift; 420 | if (limb_shamt > 0) { 421 | limbs.erase(limbs.begin(), limbs.begin() + limb_shamt); 422 | shamt -= limb_shamt; 423 | } 424 | if (!shamt) return *this; 425 | 426 | limb_t carry = 0; 427 | for (size_t j = num_limbs(); j > 0; j--) { 428 | limb_t old_val = limbs[j - 1]; 429 | limb_t new_val = (old_val >> shamt) | carry; 430 | limbs[j - 1] = new_val; 431 | carry = old_val << (limb_bits - shamt); 432 | } 433 | _contract(); 434 | return *this; 435 | } 436 | 437 | /*! logical and equals */ 438 | Nat& Nat::operator&=(const Nat &operand) 439 | { 440 | _expand(operand); 441 | for (size_t i = 0; i < num_limbs(); i++) { 442 | limbs[i] = operand.limb_at(i) & limbs[i]; 443 | } 444 | _contract(); 445 | return *this; 446 | } 447 | 448 | /*! logical or equals */ 449 | Nat& Nat::operator|=(const Nat &operand) 450 | { 451 | _expand(operand); 452 | for (size_t i = 0; i < num_limbs(); i++) { 453 | limbs[i] = operand.limb_at(i) | limbs[i]; 454 | } 455 | _contract(); 456 | return *this; 457 | } 458 | 459 | 460 | /* const operations copy and use the mutating operations */ 461 | 462 | /*! add with carry */ 463 | Nat Nat::operator+(const Nat &operand) const 464 | { 465 | Nat result(*this); 466 | return result += operand; 467 | } 468 | 469 | /*! subtract with borrow */ 470 | Nat Nat::operator-(const Nat &operand) const 471 | { 472 | Nat result(*this); 473 | return result -= operand; 474 | } 475 | 476 | /*! left shift */ 477 | Nat Nat::operator<<(size_t shamt) const 478 | { 479 | Nat result(*this); 480 | return result <<= shamt; 481 | } 482 | 483 | /*! right shift */ 484 | Nat Nat::operator>>(size_t shamt) const 485 | { 486 | Nat result(*this); 487 | return result >>= shamt; 488 | } 489 | 490 | /*! logical and */ 491 | Nat Nat::operator&(const Nat &operand) const 492 | { 493 | Nat result(*this); 494 | return result &= operand; 495 | } 496 | 497 | /*! logical or */ 498 | Nat Nat::operator|(const Nat &operand) const 499 | { 500 | Nat result(*this); 501 | return result |= operand; 502 | } 503 | 504 | 505 | /* 506 | * comparison are defined in terms of "equals" and "less than" 507 | */ 508 | 509 | /*! equals */ 510 | bool Nat::operator==(const Nat &operand) const 511 | { 512 | if (num_limbs() != operand.num_limbs()) return false; 513 | for (size_t i = 0; i < num_limbs(); i++) { 514 | if (limbs[i] != operand.limbs[i]) return false; 515 | } 516 | return true; 517 | } 518 | 519 | /*! less than */ 520 | bool Nat::operator<(const Nat &operand) const 521 | { 522 | if (num_limbs() > operand.num_limbs()) return false; 523 | else if (num_limbs() < operand.num_limbs()) return true; 524 | for (ptrdiff_t i = num_limbs()-1; i >= 0; i--) { 525 | if (limbs[i] > operand.limbs[i]) return false; 526 | else if (limbs[i] < operand.limbs[i]) return true; 527 | } 528 | return false; 529 | } 530 | 531 | 532 | /* 533 | * axiomatically define other comparisons in terms of "equals" and "less than" 534 | */ 535 | 536 | /*! not equals */ 537 | bool Nat::operator!=(const Nat &operand) const { return !(*this == operand); } 538 | 539 | /*! less than or equal*/ 540 | bool Nat::operator<=(const Nat &operand) const { return *this < operand || *this == operand; } 541 | 542 | /*! greater than */ 543 | bool Nat::operator>(const Nat &operand) const { return !(*this <= operand); } 544 | 545 | /*! less than or equal*/ 546 | bool Nat::operator>=(const Nat &operand) const { return !(*this < operand) || *this == operand; } 547 | 548 | /*! not */ 549 | bool Nat::operator!() const { return *this == 0; } 550 | 551 | /* 552 | * multply and divide 553 | * 554 | * These routines are derived from Hacker's Delight 555 | */ 556 | 557 | /*! base 2^limb_bits multiply */ 558 | void Nat::mult(const Nat &multiplicand, const Nat multiplier, Nat &result) 559 | { 560 | size_t m = multiplicand.num_limbs(), n = multiplier.num_limbs(); 561 | result._resize(m + n); 562 | for (size_t j = 0; j < n; j++) { 563 | limb_t k = 0; 564 | for (size_t i = 0; i < m; i++) { 565 | limb2_t t = limb2_t(multiplicand.limbs[i]) * limb2_t(multiplier.limbs[j]) + 566 | limb2_t(result.limbs[i + j]) + limb2_t(k); 567 | result.limbs[i + j] = limb_t(t); 568 | k = t >> limb_bits; 569 | } 570 | result.limbs[j + m] = k; 571 | } 572 | result._contract(); 573 | } 574 | 575 | /*! base 2^limb_bits division */ 576 | void Nat::divrem(const Nat ÷nd, const Nat &divisor, Nat "ient, Nat &remainder) 577 | { 578 | quotient = 0; 579 | remainder = 0; 580 | ptrdiff_t m = dividend.num_limbs(), n = divisor.num_limbs(); 581 | quotient._resize(std::max(m - n + 1, ptrdiff_t(1))); 582 | remainder._resize(n); 583 | limb_t *q = quotient.limbs.data(), *r = remainder.limbs.data(); 584 | const limb_t *u = dividend.limbs.data(), *v = divisor.limbs.data(); 585 | 586 | const limb2_t b = (1ULL << limb_bits); // Number base 587 | limb_t *un, *vn; // Normalized form of u, v. 588 | limb2_t qhat; // Estimated quotient digit. 589 | limb2_t rhat; // A remainder. 590 | 591 | if (m < n || n <= 0 || v[n-1] == 0) { 592 | quotient = 0; 593 | remainder = dividend; 594 | return; 595 | } 596 | 597 | // Single digit divisor 598 | if (n == 1) { 599 | limb2_t k = 0; 600 | for (ptrdiff_t j = m - 1; j >= 0; j--) { 601 | q[j] = limb_t((k*b + u[j]) / v[0]); 602 | k = (k*b + u[j]) - q[j]*v[0]; 603 | } 604 | r[0] = limb_t(k); 605 | quotient._contract(); 606 | remainder._contract(); 607 | return; 608 | } 609 | 610 | // Normalize by shifting v left just enough so that 611 | // its high-order bit is on, and shift u left the 612 | // same amount. We may have to append a high-order 613 | // digit on the dividend; we do that unconditionally. 614 | 615 | int s = clz(v[n-1]); // 0 <= s <= limb_bits. 616 | vn = (limb_t *)alloca(sizeof(limb_t) * n); 617 | for (ptrdiff_t i = n - 1; i > 0; i--) { 618 | vn[i] = (v[i] << s) | (v[i-1] >> (limb_bits-s)); 619 | } 620 | vn[0] = v[0] << s; 621 | 622 | un = (limb_t *)alloca(sizeof(limb_t) * (m + 1)); 623 | un[m] = u[m-1] >> (limb_bits-s); 624 | for (ptrdiff_t i = m - 1; i > 0; i--) { 625 | un[i] = (u[i] << s) | (u[i-1] >> (limb_bits-s)); 626 | } 627 | un[0] = u[0] << s; 628 | for (ptrdiff_t j = m - n; j >= 0; j--) { // Main loop. 629 | // Compute estimate qhat of q[j]. 630 | qhat = (un[j+n]*b + un[j+n-1]) / vn[n-1]; 631 | rhat = (un[j+n]*b + un[j+n-1]) - qhat * vn[n-1]; 632 | again: 633 | if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2]) { 634 | qhat = qhat - 1; 635 | rhat = rhat + vn[n-1]; 636 | if (rhat < b) goto again; 637 | } 638 | // Multiply and subtract. 639 | limb2_t k = 0; 640 | slimb2_t t = 0; 641 | for (ptrdiff_t i = 0; i < n; i++) { 642 | unsigned long long p = qhat*vn[i]; 643 | t = un[i+j] - k - (p & ((1ULL<> limb_bits) - (t >> limb_bits); 646 | } 647 | t = un[j+n] - k; 648 | un[j+n] = limb_t(t); 649 | 650 | q[j] = limb_t(qhat); // Store quotient digit. 651 | if (t < 0) { // If we subtracted too 652 | q[j] = q[j] - 1; // much, add back. 653 | k = 0; 654 | for (ptrdiff_t i = 0; i < n; i++) { 655 | t = un[i+j] + vn[i] + k; 656 | un[i+j] = limb_t(t); 657 | k = t >> limb_bits; 658 | } 659 | un[j+n] = limb_t(un[j+n] + k); 660 | } 661 | } 662 | 663 | // normalize remainder 664 | for (ptrdiff_t i = 0; i < n; i++) { 665 | r[i] = (un[i] >> s) | (un[i + 1] << (limb_bits - s)); 666 | } 667 | 668 | quotient._contract(); 669 | remainder._contract(); 670 | } 671 | 672 | /*! multiply */ 673 | Nat Nat::operator*(const Nat &operand) const 674 | { 675 | Nat result(0); 676 | mult(*this, operand, result); 677 | return result; 678 | } 679 | 680 | /*! division quotient */ 681 | Nat Nat::operator/(const Nat &divisor) const 682 | { 683 | Nat quotient(0), remainder(0); 684 | divrem(*this, divisor, quotient, remainder); 685 | return quotient; 686 | } 687 | 688 | /*! division remainder */ 689 | Nat Nat::operator%(const Nat &divisor) const 690 | { 691 | Nat quotient(0), remainder(0); 692 | divrem(*this, divisor, quotient, remainder); 693 | return remainder; 694 | } 695 | 696 | /*! multiply equals */ 697 | Nat& Nat::operator*=(const Nat &operand) 698 | { 699 | Nat result = *this * operand; 700 | *this = std::move(result); 701 | return *this; 702 | } 703 | 704 | /*! divide equals */ 705 | Nat& Nat::operator/=(const Nat &operand) 706 | { 707 | Nat result = *this / operand; 708 | *this = std::move(result); 709 | return *this; 710 | } 711 | 712 | /*! modulus equals */ 713 | Nat& Nat::operator%=(const Nat &operand) 714 | { 715 | Nat result = *this % operand; 716 | *this = std::move(result); 717 | return *this; 718 | } 719 | 720 | /* 721 | * pow 722 | * 723 | * power via squaring 724 | */ 725 | 726 | /*! raise to the power */ 727 | Nat Nat::pow(size_t exp) const 728 | { 729 | if (exp == 0) return 1; 730 | Nat x = *this, y = 1; 731 | while (exp > 1) { 732 | if ((exp & 1) == 0) { 733 | exp >>= 1; 734 | } else { 735 | y = x * y; 736 | exp = (exp - 1) >> 1; 737 | } 738 | x = x * x; 739 | } 740 | return x * y; 741 | } 742 | 743 | 744 | /* 745 | * helpers for recursive divide and conquer conversion to string 746 | */ 747 | 748 | static inline ptrdiff_t _to_string_c(const Nat &val, std::string &s, ptrdiff_t offset) 749 | { 750 | limb2_t v = limb2_t(val.limb_at(0)) | (limb2_t(val.limb_at(1)) << Nat::limb_bits); 751 | do { 752 | s[--offset] = '0' + char(v % 10); 753 | } while ((v /= 10) != 0); 754 | return offset; 755 | } 756 | 757 | static ptrdiff_t _to_string_r(const Nat &val, std::vector &sq, size_t level, 758 | std::string &s, size_t digits, ptrdiff_t offset) 759 | { 760 | Nat q, r; 761 | Nat::divrem(val, sq[level], q, r); 762 | if (level > 0) { 763 | if (r != 0) { 764 | if (q != 0) { 765 | _to_string_r(r, sq, level-1, s, digits >> 1, offset); 766 | return _to_string_r(q, sq, level-1, s, digits >> 1, offset - digits); 767 | } else { 768 | return _to_string_r(r, sq, level-1, s, digits >> 1, offset); 769 | } 770 | } 771 | } else { 772 | if (r != 0) { 773 | if (q != 0) { 774 | _to_string_c(r, s, offset); 775 | offset = _to_string_c(q, s, offset - digits); 776 | } else { 777 | offset = _to_string_c(r, s, offset); 778 | } 779 | } 780 | } 781 | return offset; 782 | } 783 | 784 | /* 785 | * convert natural number to string 786 | */ 787 | 788 | /*! convert Nat to string */ 789 | std::string Nat::to_string(size_t radix) const 790 | { 791 | static const char* hexdigits = "0123456789abcdef"; 792 | static const Nat tenp18{0xa7640000, 0xde0b6b3}; 793 | static const size_t dgib = 3566893131; /* log2(10) * 1024^3 */ 794 | 795 | switch (radix) { 796 | case 10: { 797 | /* estimate string length */ 798 | std::string s; 799 | size_t climit = (size_t)(((long long)(num_limbs()) << (limb_shift + 30)) / (long long)dgib) + 1; 800 | s.resize(climit, '0'); 801 | 802 | /* square the chunk size until ~= sqrt(n) */ 803 | Nat chunk = tenp18; 804 | size_t digits = 18; 805 | std::vector sq = { tenp18 }; 806 | do { 807 | chunk *= chunk; 808 | digits <<= 1; 809 | sq.push_back(chunk); 810 | } while ((chunk.num_limbs() < (num_limbs() >> 1))); 811 | 812 | /* recursively divide by chunk squares */ 813 | ptrdiff_t offset = _to_string_r(*this, sq, sq.size() - 1, s, digits, climit); 814 | 815 | /* return less reserve */ 816 | return s.substr(offset); 817 | } 818 | case 2: { 819 | std::string s("0b"); 820 | limb_t l1 = limbs.back(); 821 | size_t n = limb_bits - clz(l1); 822 | size_t t = n + ((num_limbs() - 1) << limb_shift); 823 | s.resize(t + 2); 824 | auto i = s.begin() + 2; 825 | for (ptrdiff_t k = n - 1; k >= 0; k--) { 826 | *(i++) = '0' + ((l1 >> k) & 1); 827 | } 828 | for (ptrdiff_t j = num_limbs() - 2; j >= 0; j--) { 829 | limb_t l = limbs[j]; 830 | for (ptrdiff_t k = limb_bits - 1; k >= 0; k--) { 831 | *(i++) = '0' + ((l >> k) & 1); 832 | } 833 | } 834 | return s; 835 | } 836 | case 16: { 837 | std::string s("0x"); 838 | limb_t l1 = limbs.back(); 839 | size_t n = ((limb_bits >> 2) - (clz(l1) >> 2)); 840 | size_t t = n + ((num_limbs() - 1) << (limb_shift - 2)); 841 | s.resize(t + 2); 842 | auto i = s.begin() + 2; 843 | for (ptrdiff_t k = n - 1; k >= 0; k--) { 844 | *(i++) = hexdigits[(l1 >> (k << 2)) & 0xf]; 845 | } 846 | for (ptrdiff_t j = num_limbs() - 2; j >= 0; j--) { 847 | limb_t l = limbs[j]; 848 | for (ptrdiff_t k = (limb_bits >> 2) - 1; k >= 0; k--) { 849 | *(i++) = hexdigits[(l >> (k << 2)) & 0xf]; 850 | } 851 | } 852 | return s; 853 | } 854 | default: { 855 | return std::string(); 856 | } 857 | } 858 | } 859 | 860 | void Nat::from_string(const char *str, size_t len, size_t radix) 861 | { 862 | static const Nat tenp18{0xa7640000, 0xde0b6b3}; 863 | static const Nat twop64{0,0,1}; 864 | if (len > 2) { 865 | if (strncmp(str, "0b", 2) == 0) { 866 | radix = 2; 867 | str += 2; 868 | len -= 2; 869 | } else if (strncmp(str, "0x", 2) == 0) { 870 | radix = 16; 871 | str += 2; 872 | len -= 2; 873 | } 874 | } 875 | if (radix == 0) { 876 | radix = 10; 877 | } 878 | switch (radix) { 879 | case 10: { 880 | for (size_t i = 0; i < len; i += 18) { 881 | size_t chunklen = i + 18 < len ? 18 : len - i; 882 | std::string chunk(str + i, chunklen); 883 | limb2_t num = strtoull(chunk.c_str(), nullptr, 10); 884 | if (chunklen == 18) { 885 | *this *= tenp18; 886 | } else { 887 | *this *= Nat(10).pow(chunklen); 888 | } 889 | *this += Nat{limb_t(num), limb_t(num >> limb_bits)}; 890 | } 891 | break; 892 | } 893 | case 2: { 894 | for (size_t i = 0; i < len; i += 64) { 895 | size_t chunklen = i + 64 < len ? 64 : len - i; 896 | std::string chunk(str + i, chunklen); 897 | limb2_t num = strtoull(chunk.c_str(), nullptr, 2); 898 | if (chunklen == 64) { 899 | *this *= twop64; 900 | } else { 901 | *this *= Nat(2).pow(chunklen); 902 | } 903 | *this += Nat{limb_t(num), limb_t(num >> limb_bits)}; 904 | } 905 | break; 906 | } 907 | case 16: { 908 | for (size_t i = 0; i < len; i += 16) { 909 | size_t chunklen = i + 16 < len ? 16 : len - i; 910 | std::string chunk(str + i, chunklen); 911 | limb2_t num = strtoull(chunk.c_str(), nullptr, 16); 912 | if (chunklen == 16) { 913 | *this *= twop64; 914 | } else { 915 | *this *= Nat(16).pow(chunklen); 916 | } 917 | *this += Nat{limb_t(num), limb_t(num >> limb_bits)}; 918 | } 919 | break; 920 | } 921 | default: { 922 | limbs.push_back(0); 923 | } 924 | } 925 | } 926 | 927 | int main(int argc, char const *argv[]) 928 | { 929 | int val = 23, power = 111121; 930 | Nat result = Nat(val).pow(power); 931 | printf("%d ^ %d has %d digits\n", val, power, result.to_string().size()); 932 | assert(result.limb_at(0) == 2367549335); 933 | } 934 | -------------------------------------------------------------------------------- /src/dhrystone.c: -------------------------------------------------------------------------------- 1 | /***** hpda:net.sources / homxb!gemini / 1:58 am Apr 1, 1986*/ 2 | /* EVERBODY: Please read "APOLOGY" below. -rick 01/06/85 3 | * See introduction in net.arch, or net.micro 4 | * 5 | * "DHRYSTONE" Benchmark Program 6 | * 7 | * Version: C/1.1-mc, 23/04/2017 8 | * 9 | * Author: Michael Clark 10 | * 11 | * - Use gettimeofday instead of time/getrusage. 12 | * - Changed K&R declarations to typesafe versions. 13 | * - Fix bug and call Proc3 with the address of RecordPtr 14 | * to match the types in the original K&R declaration. 15 | * 16 | * Version: C/1.1, 12/01/84 17 | * 18 | * Date: PROGRAM updated 01/06/86, RESULTS updated 03/31/86 19 | * 20 | * Author: Reinhold P. Weicker, CACM Vol 27, No 10, 10/84 pg. 1013 21 | * Translated from ADA by Rick Richardson 22 | * Every method to preserve ADA-likeness has been used, 23 | * at the expense of C-ness. 24 | * 25 | * Compile: cc -O dry.c -o drynr : No registers 26 | * cc -O -DREG=register dry.c -o dryr : Registers 27 | * 28 | * Defines: Defines are provided for old C compiler's 29 | * which don't have enums, and can't assign structures. 30 | * The time(2) function is library dependant; Most 31 | * return the time in seconds, but beware of some, like 32 | * Aztec C, which return other units. 33 | * The LOOPS define is initially set for 50000 loops. 34 | * If you have a machine with large integers and is 35 | * very fast, please change this number to 500000 to 36 | * get better accuracy. Please select the way to 37 | * measure the execution time using the TIME define. 38 | * For single user machines, time(2) is adequate. For 39 | * multi-user machines where you cannot get single-user 40 | * access, use the times(2) function. If you have 41 | * neither, use a stopwatch in the dead of night. 42 | * Use a "printf" at the point marked "start timer" 43 | * to begin your timings. DO NOT use the UNIX "time(1)" 44 | * command, as this will measure the total time to 45 | * run this program, which will (erroneously) include 46 | * the time to malloc(3) storage and to compute the 47 | * time it takes to do nothing. 48 | * 49 | * Run: drynr; dryr 50 | * 51 | * Results: If you get any new machine/OS results, please send to: 52 | * 53 | * ihnp4!castor!pcrat!rick 54 | * 55 | * and thanks to all that do. Space prevents listing 56 | * the names of those who have provided some of these 57 | * results. I'll be forwarding these results to 58 | * Rheinhold Weicker. 59 | * 60 | * Note: I order the list in increasing performance of the 61 | * "with registers" benchmark. If the compiler doesn't 62 | * provide register variables, then the benchmark 63 | * is the same for both and NOREG. 64 | * 65 | * PLEASE: Send complete information about the machine type, 66 | * clock speed, OS and C manufacturer/version. If 67 | * the machine is modified, tell me what was done. 68 | * On UNIX, execute uname -a and cc -V to get this info. 69 | * 70 | * 80x8x NOTE: 80x8x benchers: please try to do all memory models 71 | * for a particular compiler. 72 | * 73 | * APOLOGY (1/30/86): 74 | * Well, I goofed things up! As pointed out by Haakon Bugge, 75 | * the line of code marked "GOOF" below was missing from the 76 | * Dhrystone distribution for the last several months. It 77 | * *WAS* in a backup copy I made last winter, so no doubt it 78 | * was victimized by sleepy fingers operating vi! 79 | * 80 | * The effect of the line missing is that the reported benchmarks 81 | * are 15% too fast (at least on a 80286). Now, this creates 82 | * a dilema - do I throw out ALL the data so far collected 83 | * and use only results from this (corrected) version, or 84 | * do I just keep collecting data for the old version? 85 | * 86 | * Since the data collected so far *is* valid as long as it 87 | * is compared with like data, I have decided to keep 88 | * TWO lists- one for the old benchmark, and one for the 89 | * new. This also gives me an opportunity to correct one 90 | * other error I made in the instructions for this benchmark. 91 | * My experience with C compilers has been mostly with 92 | * UNIX 'pcc' derived compilers, where the 'optimizer' simply 93 | * fixes sloppy code generation (peephole optimization). 94 | * But today, there exist C compiler optimizers that will actually 95 | * perform optimization in the Computer Science sense of the word, 96 | * by removing, for example, assignments to a variable whose 97 | * value is never used. Dhrystone, unfortunately, provides 98 | * lots of opportunities for this sort of optimization. 99 | * 100 | * I request that benchmarkers re-run this new, corrected 101 | * version of Dhrystone, turning off or bypassing optimizers 102 | * which perform more than peephole optimization. Please 103 | * indicate the version of Dhrystone used when reporting the 104 | * results to me. 105 | * 106 | * RESULTS BEGIN HERE 107 | * 108 | *----------------DHRYSTONE VERSION 1.1 RESULTS BEGIN-------------------------- 109 | * 110 | * MACHINE MICROPROCESSOR OPERATING COMPILER DHRYSTONES/SEC. 111 | * TYPE SYSTEM NO REG REGS 112 | * -------------------------- ------------ ----------- --------------- 113 | * Apple IIe 65C02-1.02Mhz DOS 3.3 Aztec CII v1.05i 37 37 114 | * - Z80-2.5Mhz CPM-80 v2.2 Aztec CII v1.05g 91 91 115 | * - 8086-8Mhz RMX86 V6 Intel C-86 V2.0 197 203LM?? 116 | * IBM PC/XT 8088-4.77Mhz COHERENT 2.3.43 Mark Wiiliams 259 275 117 | * - 8086-8Mhz RMX86 V6 Intel C-86 V2.0 287 304 ?? 118 | * Fortune 32:16 68000-6Mhz V7+sys3+4.1BSD cc 360 346 119 | * PDP-11/34A w/FP-11C UNIX V7m cc 406 449 120 | * Macintosh512 68000-7.7Mhz Mac ROM O/S DeSmet(C ware) 625 625 121 | * VAX-11/750 w/FPA UNIX 4.2BSD cc 831 852 122 | * DataMedia 932 68000-10Mhz UNIX sysV cc 837 888 123 | * Plexus P35 68000-12.5Mhz UNIX sysIII cc 835 894 124 | * ATT PC7300 68010-10Mhz UNIX 5.0.3 cc 973 1034 125 | * Compaq II 80286-8Mhz MSDOS 3.1 MS C 3.0 1086 1140 LM 126 | * IBM PC/AT 80286-7.5Mhz Venix/286 SVR2 cc 1159 1254 *15 127 | * Compaq II 80286-8Mhz MSDOS 3.1 MS C 3.0 1190 1282 MM 128 | * MicroVAX II - Mach/4.3 cc 1361 1385 129 | * DEC uVAX II - Ultrix-32m v1.1 cc 1385 1399 130 | * Compaq II 80286-8Mhz MSDOS 3.1 MS C 3.0 1351 1428 131 | * VAX 11/780 - UNIX 4.2BSD cc 1417 1441 132 | * VAX-780/MA780 Mach/4.3 cc 1428 1470 133 | * VAX 11/780 - UNIX 5.0.1 cc 4.1.1.31 1650 1640 134 | * Ridge 32C V1 - ROS 3.3 Ridge C (older) 1628 1695 135 | * Gould PN6005 - UTX 1.1c+ (4.2) cc 1732 1884 136 | * Gould PN9080 custom ECL UTX-32 1.1C cc 4745 4992 137 | * VAX-784 - Mach/4.3 cc 5263 5555 &4 138 | * VAX 8600 - 4.3 BSD cc 6329 6423 139 | * Amdahl 5860 - UTS sysV cc 1.22 28735 28846 140 | * IBM3090/200 - ? ? 31250 31250 141 | * 142 | * 143 | *----------------DHRYSTONE VERSION 1.0 RESULTS BEGIN-------------------------- 144 | * 145 | * MACHINE MICROPROCESSOR OPERATING COMPILER DHRYSTONES/SEC. 146 | * TYPE SYSTEM NO REG REGS 147 | * -------------------------- ------------ ----------- --------------- 148 | * Commodore 64 6510-1MHz C64 ROM C Power 2.8 36 36 149 | * HP-110 8086-5.33Mhz MSDOS 2.11 Lattice 2.14 284 284 150 | * IBM PC/XT 8088-4.77Mhz PC/IX cc 271 294 151 | * CCC 3205 - Xelos(SVR2) cc 558 592 152 | * Perq-II 2901 bitslice Accent S5c cc (CMU) 301 301 153 | * IBM PC/XT 8088-4.77Mhz COHERENT 2.3.43 MarkWilliams cc 296 317 154 | * Cosmos 68000-8Mhz UniSoft cc 305 322 155 | * IBM PC/XT 8088-4.77Mhz Venix/86 2.0 cc 297 324 156 | * DEC PRO 350 11/23 Venix/PRO SVR2 cc 299 325 157 | * IBM PC 8088-4.77Mhz MSDOS 2.0 b16cc 2.0 310 340 158 | * PDP11/23 11/23 Venix (V7) cc 320 358 159 | * Commodore Amiga ? Lattice 3.02 368 371 160 | * PC/XT 8088-4.77Mhz Venix/86 SYS V cc 339 377 161 | * IBM PC 8088-4.77Mhz MSDOS 2.0 CI-C86 2.20M 390 390 162 | * IBM PC/XT 8088-4.77Mhz PCDOS 2.1 Wizard 2.1 367 403 163 | * IBM PC/XT 8088-4.77Mhz PCDOS 3.1 Lattice 2.15 403 403 @ 164 | * Colex DM-6 68010-8Mhz Unisoft SYSV cc 378 410 165 | * IBM PC 8088-4.77Mhz PCDOS 3.1 Datalight 1.10 416 416 166 | * IBM PC NEC V20-4.77Mhz MSDOS 3.1 MS 3.1 387 420 167 | * IBM PC/XT 8088-4.77Mhz PCDOS 2.1 Microsoft 3.0 390 427 168 | * IBM PC NEC V20-4.77Mhz MSDOS 3.1 MS 3.1 (186) 393 427 169 | * PDP-11/34 - UNIX V7M cc 387 438 170 | * IBM PC 8088, 4.77mhz PC-DOS 2.1 Aztec C v3.2d 423 454 171 | * Tandy 1000 V20, 4.77mhz MS-DOS 2.11 Aztec C v3.2d 423 458 172 | * Tandy TRS-16B 68000-6Mhz Xenix 1.3.5 cc 438 458 173 | * PDP-11/34 - RSTS/E decus c 438 495 174 | * Onyx C8002 Z8000-4Mhz IS/1 1.1 (V7) cc 476 511 175 | * Tandy TRS-16B 68000-6Mhz Xenix 1.3.5 Green Hills 609 617 176 | * DEC PRO 380 11/73 Venix/PRO SVR2 cc 577 628 177 | * FHL QT+ 68000-10Mhz Os9/68000 version 1.3 603 649 FH 178 | * Apollo DN550 68010-?Mhz AegisSR9/IX cc 3.12 666 666 179 | * HP-110 8086-5.33Mhz MSDOS 2.11 Aztec-C 641 676 180 | * ATT PC6300 8086-8Mhz MSDOS 2.11 b16cc 2.0 632 684 181 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 CI-C86 2.1 666 684 182 | * Tandy 6000 68000-8Mhz Xenix 3.0 cc 694 694 183 | * IBM PC/AT 80286-6Mhz Xenix 3.0 cc 684 704 MM 184 | * Macintosh 68000-7.8Mhz 2M Mac Rom Mac C 32 bit int 694 704 185 | * Macintosh 68000-7.7Mhz - MegaMax C 2.0 661 709 186 | * Macintosh512 68000-7.7Mhz Mac ROM O/S DeSmet(C ware) 714 714 187 | * IBM PC/AT 80286-6Mhz Xenix 3.0 cc 704 714 LM 188 | * Codata 3300 68000-8Mhz UniPlus+ (v7) cc 678 725 189 | * WICAT MB 68000-8Mhz System V WICAT C 4.1 585 731 ~ 190 | * Cadmus 9000 68010-10Mhz UNIX cc 714 735 191 | * AT&T 6300 8086-8Mhz Venix/86 SVR2 cc 668 743 192 | * Cadmus 9790 68010-10Mhz 1MB SVR0,Cadmus3.7 cc 720 747 193 | * NEC PC9801F 8086-8Mhz PCDOS 2.11 Lattice 2.15 768 - @ 194 | * ATT PC6300 8086-8Mhz MSDOS 2.11 CI-C86 2.20M 769 769 195 | * Burroughs XE550 68010-10Mhz Centix 2.10 cc 769 769 CT1 196 | * EAGLE/TURBO 8086-8Mhz Venix/86 SVR2 cc 696 779 197 | * ALTOS 586 8086-10Mhz Xenix 3.0b cc 724 793 198 | * DEC 11/73 J-11 micro Ultrix-11 V3.0 cc 735 793 199 | * ATT 3B2/300 WE32000-?Mhz UNIX 5.0.2 cc 735 806 200 | * Apollo DN320 68010-?Mhz AegisSR9/IX cc 3.12 806 806 201 | * IRIS-2400 68010-10Mhz UNIX System V cc 772 829 202 | * Atari 520ST 68000-8Mhz TOS DigResearch 839 846 203 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 MS 3.0(large) 833 847 LM 204 | * WICAT MB 68000-8Mhz System V WICAT C 4.1 675 853 S~ 205 | * VAX 11/750 - Ultrix 1.1 4.2BSD cc 781 862 206 | * CCC 7350A 68000-8MHz UniSoft V.2 cc 821 875 207 | * VAX 11/750 - UNIX 4.2bsd cc 862 877 208 | * Fast Mac 68000-7.7Mhz - MegaMax C 2.0 839 904 + 209 | * IBM PC/XT 8086-9.54Mhz PCDOS 3.1 Microsoft 3.0 833 909 C1 210 | * DEC 11/44 Ultrix-11 V3.0 cc 862 909 211 | * Macintosh 68000-7.8Mhz 2M Mac Rom Mac C 16 bit int 877 909 S 212 | * CCC 3210 - Xelos R01(SVR2) cc 849 924 213 | * CCC 3220 - Ed. 7 v2.3 cc 892 925 214 | * IBM PC/AT 80286-6Mhz Xenix 3.0 cc -i 909 925 215 | * AT&T 6300 8086, 8mhz MS-DOS 2.11 Aztec C v3.2d 862 943 216 | * IBM PC/AT 80286-6Mhz Xenix 3.0 cc 892 961 217 | * VAX 11/750 w/FPA Eunice 3.2 cc 914 976 218 | * IBM PC/XT 8086-9.54Mhz PCDOS 3.1 Wizard 2.1 892 980 C1 219 | * IBM PC/XT 8086-9.54Mhz PCDOS 3.1 Lattice 2.15 980 980 C1 220 | * Plexus P35 68000-10Mhz UNIX System III cc 984 980 221 | * PDP-11/73 KDJ11-AA 15Mhz UNIX V7M 2.1 cc 862 981 222 | * VAX 11/750 w/FPA UNIX 4.3bsd cc 994 997 223 | * IRIS-1400 68010-10Mhz UNIX System V cc 909 1000 224 | * IBM PC/AT 80286-6Mhz Venix/86 2.1 cc 961 1000 225 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 b16cc 2.0 943 1063 226 | * Zilog S8000/11 Z8001-5.5Mhz Zeus 3.2 cc 1011 1084 227 | * NSC ICM-3216 NSC 32016-10Mhz UNIX SVR2 cc 1041 1084 228 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 MS 3.0(small) 1063 1086 229 | * VAX 11/750 w/FPA VMS VAX-11 C 2.0 958 1091 230 | * Stride 68000-10Mhz System-V/68 cc 1041 1111 231 | * Plexus P/60 MC68000-12.5Mhz UNIX SYSIII Plexus 1111 1111 232 | * ATT PC7300 68010-10Mhz UNIX 5.0.2 cc 1041 1111 233 | * CCC 3230 - Xelos R01(SVR2) cc 1040 1126 234 | * Stride 68000-12Mhz System-V/68 cc 1063 1136 235 | * IBM PC/AT 80286-6Mhz Venix/286 SVR2 cc 1056 1149 236 | * Plexus P/60 MC68000-12.5Mhz UNIX SYSIII Plexus 1111 1163 T 237 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 Datalight 1.10 1190 1190 238 | * ATT PC6300+ 80286-6Mhz MSDOS 3.1 b16cc 2.0 1111 1219 239 | * IBM PC/AT 80286-6Mhz PCDOS 3.1 Wizard 2.1 1136 1219 240 | * Sun2/120 68010-10Mhz Sun 4.2BSD cc 1136 1219 241 | * IBM PC/AT 80286-6Mhz PCDOS 3.0 CI-C86 2.20M 1219 1219 242 | * WICAT PB 68000-8Mhz System V WICAT C 4.1 998 1226 ~ 243 | * MASSCOMP 500 68010-10MHz RTU V3.0 cc (V3.2) 1156 1238 244 | * Alliant FX/8 IP (68012-12Mhz) Concentrix cc -ip;exec -i 1170 1243 FX 245 | * Cyb DataMate 68010-12.5Mhz Uniplus 5.0 Unisoft cc 1162 1250 246 | * PDP 11/70 - UNIX 5.2 cc 1162 1250 247 | * IBM PC/AT 80286-6Mhz PCDOS 3.1 Lattice 2.15 1250 1250 248 | * IBM PC/AT 80286-7.5Mhz Venix/86 2.1 cc 1190 1315 *15 249 | * Sun2/120 68010-10Mhz Standalone cc 1219 1315 250 | * Intel 380 80286-8Mhz Xenix R3.0up1 cc 1250 1315 *16 251 | * Sequent Balance 8000 NS32032-10MHz Dynix 2.0 cc 1250 1315 N12 252 | * IBM PC/DSI-32 32032-10Mhz MSDOS 3.1 GreenHills 2.14 1282 1315 C3 253 | * ATT 3B2/400 WE32100-?Mhz UNIX 5.2 cc 1315 1315 254 | * CCC 3250XP - Xelos R01(SVR2) cc 1215 1318 255 | * IBM PC/RT 032 RISC(801?)?Mhz BSD 4.2 cc 1248 1333 RT 256 | * DG MV4000 - AOS/VS 5.00 cc 1333 1333 257 | * IBM PC/AT 80286-8Mhz Venix/86 2.1 cc 1275 1380 *16 258 | * IBM PC/AT 80286-6Mhz MSDOS 3.0 Microsoft 3.0 1250 1388 259 | * ATT PC6300+ 80286-6Mhz MSDOS 3.1 CI-C86 2.20M 1428 1428 260 | * COMPAQ/286 80286-8Mhz Venix/286 SVR2 cc 1326 1443 261 | * IBM PC/AT 80286-7.5Mhz Venix/286 SVR2 cc 1333 1449 *15 262 | * WICAT PB 68000-8Mhz System V WICAT C 4.1 1169 1464 S~ 263 | * Tandy II/6000 68000-8Mhz Xenix 3.0 cc 1384 1477 264 | * MicroVAX II - Mach/4.3 cc 1513 1536 265 | * WICAT MB 68000-12.5Mhz System V WICAT C 4.1 1246 1537 ~ 266 | * IBM PC/AT 80286-9Mhz SCO Xenix V cc 1540 1556 *18 267 | * Cyb DataMate 68010-12.5Mhz Uniplus 5.0 Unisoft cc 1470 1562 S 268 | * VAX 11/780 - UNIX 5.2 cc 1515 1562 269 | * MicroVAX-II - - - 1562 1612 270 | * VAX-780/MA780 Mach/4.3 cc 1587 1612 271 | * VAX 11/780 - UNIX 4.3bsd cc 1646 1662 272 | * Apollo DN660 - AegisSR9/IX cc 3.12 1666 1666 273 | * ATT 3B20 - UNIX 5.2 cc 1515 1724 274 | * NEC PC-98XA 80286-8Mhz PCDOS 3.1 Lattice 2.15 1724 1724 @ 275 | * HP9000-500 B series CPU HP-UX 4.02 cc 1724 - 276 | * Ridge 32C V1 - ROS 3.3 Ridge C (older) 1776 - 277 | * IBM PC/STD 80286-8Mhz MSDOS 3.0 Microsoft 3.0 1724 1785 C2 278 | * WICAT MB 68000-12.5Mhz System V WICAT C 4.1 1450 1814 S~ 279 | * WICAT PB 68000-12.5Mhz System V WICAT C 4.1 1530 1898 ~ 280 | * DEC-2065 KL10-Model B TOPS-20 6.1FT5 Port. C Comp. 1937 1946 281 | * Gould PN6005 - UTX 1.1(4.2BSD) cc 1675 1964 282 | * DEC2060 KL-10 TOPS-20 cc 2000 2000 NM 283 | * Intel 310AP 80286-8Mhz Xenix 3.0 cc 1893 2009 284 | * VAX 11/785 - UNIX 5.2 cc 2083 2083 285 | * VAX 11/785 - VMS VAX-11 C 2.0 2083 2083 286 | * VAX 11/785 - UNIX SVR2 cc 2123 2083 287 | * VAX 11/785 - ULTRIX-32 1.1 cc 2083 2091 288 | * VAX 11/785 - UNIX 4.3bsd cc 2135 2136 289 | * WICAT PB 68000-12.5Mhz System V WICAT C 4.1 1780 2233 S~ 290 | * Pyramid 90x - OSx 2.3 cc 2272 2272 291 | * Pyramid 90x FPA,cache,4Mb OSx 2.5 cc no -O 2777 2777 292 | * Pyramid 90x w/cache OSx 2.5 cc w/-O 3333 3333 293 | * IBM-4341-II - VM/SP3 Waterloo C 1.2 3333 3333 294 | * IRIS-2400T 68020-16.67Mhz UNIX System V cc 3105 3401 295 | * Celerity C-1200 ? UNIX 4.2BSD cc 3485 3468 296 | * SUN 3/75 68020-16.67Mhz SUN 4.2 V3 cc 3333 3571 297 | * IBM-4341 Model 12 UTS 5.0 ? 3685 3685 298 | * SUN-3/160 68020-16.67Mhz Sun 4.2 V3.0A cc 3381 3764 299 | * Sun 3/180 68020-16.67Mhz Sun 4.2 cc 3333 3846 300 | * IBM-4341 Model 12 UTS 5.0 ? 3910 3910 MN 301 | * MC 5400 68020-16.67MHz RTU V3.0 cc (V4.0) 3952 4054 302 | * Intel 386/20 80386-12.5Mhz PMON debugger Intel C386v0.2 4149 4386 303 | * NCR Tower32 68020-16.67Mhz SYS 5.0 Rel 2.0 cc 3846 4545 304 | * MC 5600/5700 68020-16.67MHz RTU V3.0 cc (V4.0) 4504 4746 % 305 | * Intel 386/20 80386-12.5Mhz PMON debugger Intel C386v0.2 4534 4794 i1 306 | * Intel 386/20 80386-16Mhz PMON debugger Intel C386v0.2 5304 5607 307 | * Gould PN9080 custom ECL UTX-32 1.1C cc 5369 5676 308 | * Gould 1460-342 ECL proc UTX/32 1.1/c cc 5342 5677 G1 309 | * VAX-784 - Mach/4.3 cc 5882 5882 &4 310 | * Intel 386/20 80386-16Mhz PMON debugger Intel C386v0.2 5801 6133 i1 311 | * VAX 8600 - UNIX 4.3bsd cc 7024 7088 312 | * VAX 8600 - VMS VAX-11 C 2.0 7142 7142 313 | * Alliant FX/8 CE Concentrix cc -ce;exec -c 6952 7655 FX 314 | * CCI POWER 6/32 COS(SV+4.2) cc 7500 7800 315 | * CCI POWER 6/32 POWER 6 UNIX/V cc 8236 8498 316 | * CCI POWER 6/32 4.2 Rel. 1.2b cc 8963 9544 317 | * Sperry (CCI Power 6) 4.2BSD cc 9345 10000 318 | * CRAY-X-MP/12 105Mhz COS 1.14 Cray C 10204 10204 319 | * IBM-3083 - UTS 5.0 Rel 1 cc 16666 12500 320 | * CRAY-1A 80Mhz CTSS Cray C 2.0 12100 13888 321 | * IBM-3083 - VM/CMS HPO 3.4 Waterloo C 1.2 13889 13889 322 | * Amdahl 470 V/8 UTS/V 5.2 cc v1.23 15560 15560 323 | * CRAY-X-MP/48 105Mhz CTSS Cray C 2.0 15625 17857 324 | * Amdahl 580 - UTS 5.0 Rel 1.2 cc v1.5 23076 23076 325 | * Amdahl 5860 UTS/V 5.2 cc v1.23 28970 28970 326 | * 327 | * NOTE 328 | * * Crystal changed from 'stock' to listed value. 329 | * + This Macintosh was upgraded from 128K to 512K in such a way that 330 | * the new 384K of memory is not slowed down by video generator accesses. 331 | * % Single processor; MC == MASSCOMP 332 | * NM A version 7 C compiler written at New Mexico Tech. 333 | * @ vanilla Lattice compiler used with MicroPro standard library 334 | * S Shorts used instead of ints 335 | * T with Chris Torek's patches (whatever they are). 336 | * ~ For WICAT Systems: MB=MultiBus, PB=Proprietary Bus 337 | * LM Large Memory Model. (Otherwise, all 80x8x results are small model) 338 | * MM Medium Memory Model. (Otherwise, all 80x8x results are small model) 339 | * C1 Univation PC TURBO Co-processor; 9.54Mhz 8086, 640K RAM 340 | * C2 Seattle Telecom STD-286 board 341 | * C3 Definicon DSI-32 coprocessor 342 | * C? Unknown co-processor board? 343 | * CT1 Convergent Technologies MegaFrame, 1 processor. 344 | * MN Using Mike Newtons 'optimizer' (see net.sources). 345 | * G1 This Gould machine has 2 processors and was able to run 2 dhrystone 346 | * Benchmarks in parallel with no slowdown. 347 | * FH FHC == Frank Hogg Labs (Hazelwood Uniquad 2 in an FHL box). 348 | * FX The Alliant FX/8 is a system consisting of 1-8 CEs (computation 349 | * engines) and 1-12 IPs (interactive processors). Note N8 applies. 350 | * RT This is one of the RT's that CMU has been using for awhile. I'm 351 | * not sure that this is identical to the machine that IBM is selling 352 | * to the public. 353 | * i1 Normally, the 386/20 starter kit has a 16k direct mapped cache 354 | * which inserts 2 or 3 wait states on a write thru. These results 355 | * were obtained by disabling the write-thru, or essentially turning 356 | * the cache into 0 wait state memory. 357 | * Nnn This machine has multiple processors, allowing "nn" copies of the 358 | * benchmark to run in the same time as 1 copy. 359 | * &nn This machine has "nn" processors, and the benchmark results were 360 | * obtained by having all "nn" processors working on 1 copy of dhrystone. 361 | * (Note, this is different than Nnn. Salesmen like this measure). 362 | * ? I don't trust results marked with '?'. These were sent to me with 363 | * either incomplete info, or with times that just don't make sense. 364 | * ?? means I think the performance is too poor, ?! means too good. 365 | * If anybody can confirm these figures, please respond. 366 | * 367 | * ABBREVIATIONS 368 | * CCC Concurrent Computer Corp. (was Perkin-Elmer) 369 | * MC Masscomp 370 | * 371 | *--------------------------------RESULTS END---------------------------------- 372 | * 373 | * The following program contains statements of a high-level programming 374 | * language (C) in a distribution considered representative: 375 | * 376 | * assignments 53% 377 | * control statements 32% 378 | * procedure, function calls 15% 379 | * 380 | * 100 statements are dynamically executed. The program is balanced with 381 | * respect to the three aspects: 382 | * - statement type 383 | * - operand type (for simple data types) 384 | * - operand access 385 | * operand global, local, parameter, or constant. 386 | * 387 | * The combination of these three aspects is balanced only approximately. 388 | * 389 | * The program does not compute anything meaningfull, but it is 390 | * syntactically and semantically correct. 391 | * 392 | */ 393 | 394 | #include 395 | #include 396 | #include 397 | #include 398 | 399 | #define LOOPS 10000000 400 | 401 | char Version[] = "1.1-mc"; 402 | 403 | typedef enum { Ident1, Ident2, Ident3, Ident4, Ident5 } Enumeration; 404 | 405 | typedef int OneToThirty; 406 | typedef int OneToFifty; 407 | typedef char CapitalLetter; 408 | typedef char String30[31]; 409 | typedef int Array1Dim[51]; 410 | typedef int Array2Dim[51][51]; 411 | 412 | struct Record 413 | { 414 | struct Record *PtrComp; 415 | Enumeration Discr; 416 | Enumeration EnumComp; 417 | OneToFifty IntComp; 418 | String30 StringComp; 419 | }; 420 | 421 | typedef struct Record RecordType; 422 | typedef RecordType *RecordPtr; 423 | typedef int boolean; 424 | 425 | #define TRUE 1 426 | #define FALSE 0 427 | 428 | void Proc0(); 429 | void Proc1(RecordPtr PtrParIn); 430 | void Proc2(OneToFifty *IntParIO); 431 | void Proc3(RecordPtr *PtrParOut); 432 | void Proc4(); 433 | void Proc5(); 434 | void Proc6(Enumeration EnumParIn, Enumeration *EnumParOut); 435 | void Proc7(OneToFifty IntParI1, OneToFifty IntParI2, OneToFifty *IntParOut); 436 | void Proc8(Array1Dim Array1Par, Array2Dim Array2Par, OneToFifty IntParI1, OneToFifty IntParI2); 437 | Enumeration Func1(CapitalLetter CharPar1, CapitalLetter CharPar2); 438 | boolean Func2(String30 StrParI1, String30 StrParI2); 439 | 440 | int main() 441 | { 442 | Proc0(); 443 | exit(0); 444 | } 445 | 446 | /* 447 | * Package 1 448 | */ 449 | int IntGlob; 450 | boolean BoolGlob; 451 | char Char1Glob; 452 | char Char2Glob; 453 | Array1Dim Array1Glob; 454 | Array2Dim Array2Glob; 455 | RecordPtr PtrGlb; 456 | RecordPtr PtrGlbNext; 457 | 458 | void Proc0() 459 | { 460 | OneToFifty IntLoc1; 461 | OneToFifty IntLoc2; 462 | OneToFifty IntLoc3; 463 | char CharIndex; 464 | Enumeration EnumLoc; 465 | String30 String1Loc; 466 | String30 String2Loc; 467 | 468 | register unsigned int i; 469 | struct timeval starttime; 470 | struct timeval endtime; 471 | long benchtime; 472 | 473 | PtrGlbNext = (RecordPtr) malloc(sizeof(RecordType)); 474 | PtrGlb = (RecordPtr) malloc(sizeof(RecordType)); 475 | PtrGlb->PtrComp = PtrGlbNext; 476 | PtrGlb->Discr = Ident1; 477 | PtrGlb->EnumComp = Ident3; 478 | PtrGlb->IntComp = 40; 479 | strcpy(PtrGlb->StringComp, "DHRYSTONE PROGRAM, SOME STRING"); 480 | Array2Glob[8][7] = 10; /* Was missing in published program */ 481 | 482 | /***************** 483 | -- Start Timer -- 484 | *****************/ 485 | gettimeofday(&starttime, NULL); 486 | for (i = 0; i < LOOPS; ++i) 487 | { 488 | 489 | Proc5(); 490 | Proc4(); 491 | IntLoc1 = 2; 492 | IntLoc2 = 3; 493 | strcpy(String2Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); 494 | EnumLoc = Ident2; 495 | BoolGlob = ! Func2(String1Loc, String2Loc); 496 | while (IntLoc1 < IntLoc2) 497 | { 498 | IntLoc3 = 5 * IntLoc1 - IntLoc2; 499 | Proc7(IntLoc1, IntLoc2, &IntLoc3); 500 | ++IntLoc1; 501 | } 502 | Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3); 503 | Proc1(PtrGlb); 504 | for (CharIndex = 'A'; CharIndex <= Char2Glob; ++CharIndex) 505 | if (EnumLoc == Func1(CharIndex, 'C')) 506 | Proc6(Ident1, &EnumLoc); 507 | IntLoc3 = IntLoc2 * IntLoc1; 508 | IntLoc2 = IntLoc3 / IntLoc1; 509 | IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1; 510 | Proc2(&IntLoc1); 511 | } 512 | 513 | /***************** 514 | -- Stop Timer -- 515 | *****************/ 516 | 517 | gettimeofday(&endtime, NULL); 518 | benchtime = (endtime.tv_sec * 1000000 + endtime.tv_usec) - 519 | (starttime.tv_sec * 1000000 + starttime.tv_usec); 520 | printf("Dhrystone(%s), %ld passes, %ld microseconds, %ld DMIPS\n", 521 | Version, (unsigned long) LOOPS, benchtime, 522 | /* overflow free division by VAX DMIPS value (1757) */ 523 | (LOOPS * 71) / (benchtime >> 3)); 524 | } 525 | 526 | void Proc1(RecordPtr PtrParIn) 527 | { 528 | #define NextRecord (*(PtrParIn->PtrComp)) 529 | 530 | NextRecord = *PtrGlb; 531 | PtrParIn->IntComp = 5; 532 | NextRecord.IntComp = PtrParIn->IntComp; 533 | NextRecord.PtrComp = PtrParIn->PtrComp; 534 | Proc3(&NextRecord.PtrComp); 535 | if (NextRecord.Discr == Ident1) 536 | { 537 | NextRecord.IntComp = 6; 538 | Proc6(PtrParIn->EnumComp, &NextRecord.EnumComp); 539 | NextRecord.PtrComp = PtrGlb->PtrComp; 540 | Proc7(NextRecord.IntComp, 10, &NextRecord.IntComp); 541 | } 542 | else 543 | *PtrParIn = NextRecord; 544 | 545 | #undef NextRecord 546 | } 547 | 548 | void Proc2(OneToFifty *IntParIO) 549 | { 550 | OneToFifty IntLoc; 551 | Enumeration EnumLoc; 552 | 553 | IntLoc = *IntParIO + 10; 554 | for(;;) 555 | { 556 | if (Char1Glob == 'A') 557 | { 558 | --IntLoc; 559 | *IntParIO = IntLoc - IntGlob; 560 | EnumLoc = Ident1; 561 | } 562 | if (EnumLoc == Ident1) 563 | break; 564 | } 565 | } 566 | 567 | void Proc3(RecordPtr *PtrParOut) 568 | { 569 | if (PtrGlb != NULL) 570 | *PtrParOut = PtrGlb->PtrComp; 571 | else 572 | IntGlob = 100; 573 | Proc7(10, IntGlob, &PtrGlb->IntComp); 574 | } 575 | 576 | void Proc4() 577 | { 578 | boolean BoolLoc; 579 | 580 | BoolLoc = Char1Glob == 'A'; 581 | BoolLoc |= BoolGlob; 582 | Char2Glob = 'B'; 583 | } 584 | 585 | void Proc5() 586 | { 587 | Char1Glob = 'A'; 588 | BoolGlob = FALSE; 589 | } 590 | 591 | extern boolean Func3(); 592 | 593 | void Proc6(Enumeration EnumParIn, Enumeration *EnumParOut) 594 | { 595 | *EnumParOut = EnumParIn; 596 | if (! Func3(EnumParIn) ) 597 | *EnumParOut = Ident4; 598 | switch (EnumParIn) 599 | { 600 | case Ident1: *EnumParOut = Ident1; break; 601 | case Ident2: if (IntGlob > 100) *EnumParOut = Ident1; 602 | else *EnumParOut = Ident4; 603 | break; 604 | case Ident3: *EnumParOut = Ident2; break; 605 | case Ident4: break; 606 | case Ident5: *EnumParOut = Ident3; 607 | } 608 | } 609 | 610 | void Proc7(OneToFifty IntParI1, OneToFifty IntParI2, OneToFifty *IntParOut) 611 | { 612 | OneToFifty IntLoc; 613 | 614 | IntLoc = IntParI1 + 2; 615 | *IntParOut = IntParI2 + IntLoc; 616 | } 617 | 618 | void Proc8(Array1Dim Array1Par, Array2Dim Array2Par, OneToFifty IntParI1, OneToFifty IntParI2) 619 | { 620 | OneToFifty IntLoc; 621 | OneToFifty IntIndex; 622 | 623 | IntLoc = IntParI1 + 5; 624 | Array1Par[IntLoc] = IntParI2; 625 | Array1Par[IntLoc+1] = Array1Par[IntLoc]; 626 | Array1Par[IntLoc+30] = IntLoc; 627 | for (IntIndex = IntLoc; IntIndex <= (IntLoc+1); ++IntIndex) 628 | Array2Par[IntLoc][IntIndex] = IntLoc; 629 | ++Array2Par[IntLoc][IntLoc-1]; 630 | Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc]; 631 | IntGlob = 5; 632 | } 633 | 634 | Enumeration Func1(CapitalLetter CharPar1, CapitalLetter CharPar2) 635 | { 636 | CapitalLetter CharLoc1; 637 | CapitalLetter CharLoc2; 638 | 639 | CharLoc1 = CharPar1; 640 | CharLoc2 = CharLoc1; 641 | if (CharLoc2 != CharPar2) 642 | return (Ident1); 643 | else 644 | return (Ident2); 645 | } 646 | 647 | boolean Func2(String30 StrParI1, String30 StrParI2) 648 | { 649 | OneToThirty IntLoc; 650 | CapitalLetter CharLoc; 651 | 652 | IntLoc = 1; 653 | while (IntLoc <= 1) 654 | if (Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1) 655 | { 656 | CharLoc = 'A'; 657 | ++IntLoc; 658 | } 659 | if (CharLoc >= 'W' && CharLoc <= 'Z') 660 | IntLoc = 7; 661 | if (CharLoc == 'X') 662 | return(TRUE); 663 | else 664 | { 665 | if (strcmp(StrParI1, StrParI2) > 0) 666 | { 667 | IntLoc += 7; 668 | return (TRUE); 669 | } 670 | else 671 | return (FALSE); 672 | } 673 | } 674 | 675 | boolean Func3(Enumeration EnumParIn) 676 | { 677 | Enumeration EnumLoc; 678 | 679 | EnumLoc = EnumParIn; 680 | if (EnumLoc == Ident3) return (TRUE); 681 | return (FALSE); 682 | } 683 | 684 | /* ---------- */ 685 | -------------------------------------------------------------------------------- /src/norx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cifra - embedded cryptography library 3 | * Written in 2014 by Joseph Birr-Pixton 4 | * 5 | * To the extent possible under law, the author(s) have dedicated all 6 | * copyright and related and neighboring rights to this software to the 7 | * public domain worldwide. This software is distributed without any 8 | * warranty. 9 | * 10 | * You should have received a copy of the CC0 Public Domain Dedication 11 | * along with this software. If not, see 12 | * . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | typedef struct 24 | { 25 | uint32_t s[16]; 26 | } norx32_ctx; 27 | 28 | /* Domain separation constants */ 29 | #define DOMAIN_HEADER 0x01 30 | #define DOMAIN_PAYLOAD 0x02 31 | #define DOMAIN_TRAILER 0x04 32 | #define DOMAIN_TAG 0x08 33 | 34 | #define WORD_BYTES 4 35 | #define WORD_BITS 32 36 | #define ROUNDS 4 37 | #define DEGREE 1 38 | #define TAG_BITS 128 39 | #define RATE_BYTES 48 40 | #define RATE_WORDS 12 41 | 42 | 43 | /* Assorted bitwise and common operations used in ciphers. */ 44 | 45 | #define MIN(a,b) (((a)<(b))?(a):(b)) 46 | #define MAX(a,b) (((a)>(b))?(a):(b)) 47 | 48 | /** Circularly rotate right x by n bits. 49 | * 0 > n > 32. */ 50 | static inline uint32_t rotr32(uint32_t x, unsigned n) 51 | { 52 | return (x >> n) | (x << (32 - n)); 53 | } 54 | /** Read 4 bytes from buf, as a 32-bit little endian quantity. */ 55 | static inline uint32_t read32_le(const uint8_t buf[4]) 56 | { 57 | return (buf[3] << 24) | 58 | (buf[2] << 16) | 59 | (buf[1] << 8) | 60 | (buf[0]); 61 | } 62 | 63 | /** Encode v as a 32-bit little endian quantity into buf. */ 64 | static inline void write32_le(uint32_t v, uint8_t buf[4]) 65 | { 66 | *buf++ = v & 0xff; 67 | *buf++ = (v >> 8) & 0xff; 68 | *buf++ = (v >> 16) & 0xff; 69 | *buf = (v >> 24) & 0xff; 70 | } 71 | 72 | typedef void (*cf_blockwise_in_fn)(void *ctx, const uint8_t *data); 73 | 74 | void cf_blockwise_accumulate_final(uint8_t *partial, size_t *npartial, size_t nblock, 75 | const void *inp, size_t nbytes, 76 | cf_blockwise_in_fn process, 77 | cf_blockwise_in_fn process_final, 78 | void *ctx) 79 | { 80 | const uint8_t *bufin = inp; 81 | assert(partial && *npartial < nblock); 82 | assert(inp || !nbytes); 83 | assert(process && ctx); 84 | 85 | /* If we have partial data, copy in to buffer. */ 86 | if (*npartial && nbytes) 87 | { 88 | size_t space = nblock - *npartial; 89 | size_t taken = MIN(space, nbytes); 90 | 91 | memcpy(partial + *npartial, bufin, taken); 92 | 93 | bufin += taken; 94 | nbytes -= taken; 95 | *npartial += taken; 96 | 97 | /* If that gives us a full block, process it. */ 98 | if (*npartial == nblock) 99 | { 100 | if (nbytes == 0) 101 | process_final(ctx, partial); 102 | else 103 | process(ctx, partial); 104 | *npartial = 0; 105 | } 106 | } 107 | 108 | /* now nbytes < nblock or *npartial == 0. */ 109 | 110 | /* If we have a full block of data, process it directly. */ 111 | while (nbytes >= nblock) 112 | { 113 | /* Partial buffer must be empty, or we're ignoring extant data */ 114 | assert(*npartial == 0); 115 | 116 | if (nbytes == nblock) 117 | process_final(ctx, bufin); 118 | else 119 | process(ctx, bufin); 120 | bufin += nblock; 121 | nbytes -= nblock; 122 | } 123 | 124 | /* Finally, if we have remaining data, buffer it. */ 125 | while (nbytes) 126 | { 127 | size_t space = nblock - *npartial; 128 | size_t taken = MIN(space, nbytes); 129 | 130 | memcpy(partial + *npartial, bufin, taken); 131 | 132 | bufin += taken; 133 | nbytes -= taken; 134 | *npartial += taken; 135 | 136 | /* If we started with *npartial, we must have copied it 137 | * in first. */ 138 | assert(*npartial < nblock); 139 | } 140 | } 141 | 142 | void cf_blockwise_accumulate(uint8_t *partial, size_t *npartial, size_t nblock, 143 | const void *inp, size_t nbytes, 144 | cf_blockwise_in_fn process, 145 | void *ctx) 146 | { 147 | cf_blockwise_accumulate_final(partial, npartial, nblock, 148 | inp, nbytes, 149 | process, process, ctx); 150 | } 151 | 152 | /* cifra norx */ 153 | 154 | static void permute(norx32_ctx *restrict ctx) 155 | { 156 | /* This is one quarter of G; the function H plus xor/rotate. */ 157 | #define P(u, v, w, rr) \ 158 | (u) = ((u) ^ (v)) ^ (((u) & (v)) << 1); \ 159 | (w) = rotr32((u) ^ (w), rr); 160 | 161 | #define G(s, a, b, c, d) \ 162 | P(s[a], s[b], s[d], 8) \ 163 | P(s[c], s[d], s[b], 11) \ 164 | P(s[a], s[b], s[d], 16) \ 165 | P(s[c], s[d], s[b], 31) 166 | 167 | for (int i = 0; i < ROUNDS; i++) 168 | { 169 | /* columns */ 170 | G(ctx->s, 0, 4, 8, 12); 171 | G(ctx->s, 1, 5, 9, 13); 172 | G(ctx->s, 2, 6, 10, 14); 173 | G(ctx->s, 3, 7, 11, 15); 174 | 175 | /* diagonals */ 176 | G(ctx->s, 0, 5, 10, 15); 177 | G(ctx->s, 1, 6, 11, 12); 178 | G(ctx->s, 2, 7, 8, 13); 179 | G(ctx->s, 3, 4, 9, 14); 180 | } 181 | 182 | #undef G 183 | #undef P 184 | } 185 | 186 | static void init(norx32_ctx *ctx, 187 | const uint8_t key[static 16], 188 | const uint8_t nonce[static 8]) 189 | { 190 | /* 1. Basic setup */ 191 | ctx->s[0] = read32_le(nonce + 0); 192 | ctx->s[1] = read32_le(nonce + 4); 193 | ctx->s[2] = 0xb707322f; 194 | ctx->s[3] = 0xa0c7c90d; 195 | 196 | ctx->s[4] = read32_le(key + 0); 197 | ctx->s[5] = read32_le(key + 4); 198 | ctx->s[6] = read32_le(key + 8); 199 | ctx->s[7] = read32_le(key + 12); 200 | 201 | ctx->s[8] = 0xa3d8d930; 202 | ctx->s[9] = 0x3fa8b72c; 203 | ctx->s[10] = 0xed84eb49; 204 | ctx->s[11] = 0xedca4787; 205 | 206 | ctx->s[12] = 0x335463eb; 207 | ctx->s[13] = 0xf994220b; 208 | ctx->s[14] = 0xbe0bf5c9; 209 | ctx->s[15] = 0xd7c49104; 210 | 211 | /* 2. Parameter integration 212 | * w = 32 213 | * l = 4 214 | * p = 1 215 | * t = 128 216 | */ 217 | ctx->s[12] ^= WORD_BITS; 218 | ctx->s[13] ^= ROUNDS; 219 | ctx->s[14] ^= DEGREE; 220 | ctx->s[15] ^= TAG_BITS; 221 | 222 | permute(ctx); 223 | } 224 | 225 | /* Input domain separation constant for next step, and final permutation of 226 | * preceeding step. */ 227 | static void switch_domain(norx32_ctx *ctx, uint32_t constant) 228 | { 229 | ctx->s[15] ^= constant; 230 | permute(ctx); 231 | } 232 | 233 | typedef struct 234 | { 235 | norx32_ctx *ctx; 236 | uint32_t type; 237 | } blockctx; 238 | 239 | static void input_block_final(void *vctx, const uint8_t *data) 240 | { 241 | blockctx *bctx = vctx; 242 | norx32_ctx *ctx = bctx->ctx; 243 | 244 | /* just xor-in data. */ 245 | for (int i = 0; i < RATE_WORDS; i++) 246 | { 247 | ctx->s[i] ^= read32_le(data); 248 | data += WORD_BYTES; 249 | } 250 | } 251 | 252 | static void input_block(void *vctx, const uint8_t *data) 253 | { 254 | /* Process block, then prepare for the next one. */ 255 | blockctx *bctx = vctx; 256 | input_block_final(vctx, data); 257 | switch_domain(bctx->ctx, bctx->type); 258 | } 259 | 260 | static void input(norx32_ctx *ctx, uint32_t type, 261 | const uint8_t *buf, size_t nbuf) 262 | { 263 | uint8_t partial[RATE_BYTES]; 264 | size_t npartial = 0; 265 | blockctx bctx = { ctx, type }; 266 | 267 | /* Process input. */ 268 | cf_blockwise_accumulate(partial, &npartial, sizeof partial, 269 | buf, nbuf, 270 | input_block, 271 | &bctx); 272 | 273 | /* Now pad partial. This contains the trailing portion of buf. */ 274 | memset(partial + npartial, 0, sizeof(partial) - npartial); 275 | partial[npartial] = 0x01; 276 | partial[sizeof(partial) - 1] ^= 0x80; 277 | 278 | input_block_final(&bctx, partial); 279 | } 280 | 281 | static void do_header(norx32_ctx *ctx, const uint8_t *buf, size_t nbuf) 282 | { 283 | if (nbuf) 284 | { 285 | switch_domain(ctx, DOMAIN_HEADER); 286 | input(ctx, DOMAIN_HEADER, buf, nbuf); 287 | } 288 | } 289 | 290 | static void do_trailer(norx32_ctx *ctx, const uint8_t *buf, size_t nbuf) 291 | { 292 | if (nbuf) 293 | { 294 | switch_domain(ctx, DOMAIN_TRAILER); 295 | input(ctx, DOMAIN_TRAILER, buf, nbuf); 296 | } 297 | } 298 | 299 | static void body_block_encrypt(norx32_ctx *ctx, 300 | const uint8_t plain[static RATE_BYTES], 301 | uint8_t cipher[static RATE_BYTES]) 302 | { 303 | for (int i = 0; i < RATE_WORDS; i++) 304 | { 305 | ctx->s[i] ^= read32_le(plain); 306 | write32_le(ctx->s[i], cipher); 307 | plain += WORD_BYTES; 308 | cipher += WORD_BYTES; 309 | } 310 | } 311 | 312 | static void encrypt_body(norx32_ctx *ctx, 313 | const uint8_t *plain, uint8_t *cipher, size_t nbytes) 314 | { 315 | if (nbytes == 0) 316 | return; 317 | 318 | /* Process full blocks: easy */ 319 | while (nbytes >= RATE_BYTES) 320 | { 321 | switch_domain(ctx, DOMAIN_PAYLOAD); 322 | body_block_encrypt(ctx, plain, cipher); 323 | plain += RATE_BYTES; 324 | cipher += RATE_BYTES; 325 | nbytes -= RATE_BYTES; 326 | } 327 | 328 | /* Final padded block. */ 329 | uint8_t partial[RATE_BYTES]; 330 | memset(partial, 0, sizeof partial); 331 | memcpy(partial, plain, nbytes); 332 | partial[nbytes] ^= 0x01; 333 | partial[sizeof(partial) - 1] ^= 0x80; 334 | 335 | switch_domain(ctx, DOMAIN_PAYLOAD); 336 | body_block_encrypt(ctx, partial, partial); 337 | 338 | memcpy(cipher, partial, nbytes); 339 | } 340 | 341 | static void body_block_decrypt(norx32_ctx *ctx, 342 | const uint8_t cipher[static RATE_BYTES], 343 | uint8_t plain[static RATE_BYTES], 344 | size_t start, size_t end) 345 | { 346 | for (size_t i = start; i < end; i++) 347 | { 348 | uint32_t ct = read32_le(cipher); 349 | write32_le(ctx->s[i] ^ ct, plain); 350 | ctx->s[i] = ct; 351 | plain += WORD_BYTES; 352 | cipher += WORD_BYTES; 353 | } 354 | } 355 | 356 | static void undo_padding(norx32_ctx *ctx, size_t bytes) 357 | { 358 | assert(bytes < RATE_BYTES); 359 | ctx->s[bytes / WORD_BYTES] ^= 0x01 << ((bytes % WORD_BYTES) * 8); 360 | ctx->s[RATE_WORDS - 1] ^= 0x80000000; 361 | } 362 | 363 | static void decrypt_body(norx32_ctx *ctx, 364 | const uint8_t *cipher, uint8_t *plain, size_t nbytes) 365 | { 366 | if (nbytes == 0) 367 | return; 368 | 369 | /* Process full blocks. */ 370 | while (nbytes >= RATE_BYTES) 371 | { 372 | switch_domain(ctx, DOMAIN_PAYLOAD); 373 | body_block_decrypt(ctx, cipher, plain, 0, RATE_WORDS); 374 | plain += RATE_BYTES; 375 | cipher += RATE_BYTES; 376 | nbytes -= RATE_BYTES; 377 | } 378 | 379 | /* Then partial blocks. */ 380 | size_t offset = 0; 381 | switch_domain(ctx, DOMAIN_PAYLOAD); 382 | 383 | undo_padding(ctx, nbytes); 384 | 385 | /* In units of whole words. */ 386 | while (nbytes >= WORD_BYTES) 387 | { 388 | body_block_decrypt(ctx, cipher, plain, offset, offset + 1); 389 | plain += WORD_BYTES; 390 | cipher += WORD_BYTES; 391 | nbytes -= WORD_BYTES; 392 | offset += 1; 393 | } 394 | 395 | /* And then, finally, bytewise. */ 396 | uint8_t tmp[WORD_BYTES]; 397 | write32_le(ctx->s[offset], tmp); 398 | 399 | for (size_t i = 0; i < nbytes; i++) 400 | { 401 | uint8_t c = cipher[i]; 402 | plain[i] = tmp[i] ^ c; 403 | tmp[i] = c; 404 | } 405 | 406 | ctx->s[offset] = read32_le(tmp); 407 | } 408 | 409 | static void get_tag(norx32_ctx *ctx, uint8_t tag[static 16]) 410 | { 411 | switch_domain(ctx, DOMAIN_TAG); 412 | permute(ctx); 413 | write32_le(ctx->s[0], tag + 0); 414 | write32_le(ctx->s[1], tag + 4); 415 | write32_le(ctx->s[2], tag + 8); 416 | write32_le(ctx->s[3], tag + 12); 417 | } 418 | 419 | void cf_norx32_encrypt(const uint8_t key[static 16], 420 | const uint8_t nonce[static 8], 421 | const uint8_t *header, size_t nheader, 422 | const uint8_t *plaintext, size_t nbytes, 423 | const uint8_t *trailer, size_t ntrailer, 424 | uint8_t *ciphertext, 425 | uint8_t tag[static 16]) 426 | { 427 | norx32_ctx ctx; 428 | 429 | init(&ctx, key, nonce); 430 | do_header(&ctx, header, nheader); 431 | encrypt_body(&ctx, plaintext, ciphertext, nbytes); 432 | do_trailer(&ctx, trailer, ntrailer); 433 | get_tag(&ctx, tag); 434 | 435 | bzero(&ctx, sizeof ctx); 436 | } 437 | 438 | int cf_norx32_decrypt(const uint8_t key[static 16], 439 | const uint8_t nonce[static 8], 440 | const uint8_t *header, size_t nheader, 441 | const uint8_t *ciphertext, size_t nbytes, 442 | const uint8_t *trailer, size_t ntrailer, 443 | const uint8_t tag[static 16], 444 | uint8_t *plaintext) 445 | { 446 | norx32_ctx ctx; 447 | uint8_t ourtag[16]; 448 | 449 | init(&ctx, key, nonce); 450 | do_header(&ctx, header, nheader); 451 | decrypt_body(&ctx, ciphertext, plaintext, nbytes); 452 | do_trailer(&ctx, trailer, ntrailer); 453 | get_tag(&ctx, ourtag); 454 | 455 | int err = 0; 456 | 457 | if (!memcmp(ourtag, tag, sizeof ourtag)) 458 | { 459 | err = 1; 460 | bzero(plaintext, nbytes); 461 | bzero(ourtag, sizeof ourtag); 462 | } 463 | 464 | bzero(&ctx, sizeof ctx); 465 | return err; 466 | } 467 | 468 | /* main */ 469 | 470 | int main() 471 | { 472 | static const uint8_t key[16] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F }; 473 | static const uint8_t nonce[8] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; 474 | static const size_t DATA_SIZE = 32 * 1024 * 1024; 475 | uint8_t tag[16]; 476 | 477 | uint8_t *pt1 = malloc(DATA_SIZE); 478 | uint8_t *ct = malloc(DATA_SIZE); 479 | uint8_t *pt2 = malloc(DATA_SIZE); 480 | 481 | /* create test pattern */ 482 | char c = 0x01; 483 | for (size_t j = 0; j < DATA_SIZE; j+= sizeof(int)) { 484 | pt1[j] = (c ^= c * 7); 485 | } 486 | 487 | /* encrpyt test pattern */ 488 | bzero(tag, sizeof tag); 489 | cf_norx32_encrypt(key, nonce, NULL, 0, pt1, DATA_SIZE, NULL, 0, ct, tag); 490 | 491 | /* decrypt test pattern */ 492 | bzero(tag, sizeof tag); 493 | cf_norx32_decrypt(key, nonce, NULL, 0, ct, DATA_SIZE, NULL, 0, tag, pt2); 494 | 495 | /* compare */ 496 | printf("%d\n", memcmp(pt1, pt2, DATA_SIZE)); 497 | 498 | free(pt1); 499 | free(ct); 500 | free(pt2); 501 | return 0; 502 | } 503 | -------------------------------------------------------------------------------- /src/primes.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define test(p) (primes[p >> 6] & 1 << (p & 0x3f)) 7 | #define set(p) (primes[p >> 6] |= 1 << (p & 0x3f)) 8 | #define is_prime(p) !test(p) 9 | 10 | int main() 11 | { 12 | int limit = 33333333; 13 | size_t primes_size = ((limit >> 6) + 1) * sizeof(uint64_t); 14 | uint64_t *primes = (uint64_t*)malloc(primes_size); 15 | int64_t p = 2, sqrt_limit = (int64_t)sqrt(limit); 16 | while (p <= limit >> 1) { 17 | for (int64_t n = 2 * p; n <= limit; n += p) if (!test(n)) set(n); 18 | while (++p <= sqrt_limit && test(p)); 19 | } 20 | for (int i = limit; i > 0; i--) { 21 | if (is_prime(i)) { 22 | printf("%d\n", i); 23 | return 0; 24 | } 25 | } 26 | }; 27 | -------------------------------------------------------------------------------- /src/qsort.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * Copyright (c) 1992, 1993 3 | * The Regents of the University of California. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | 33 | typedef int cmp_t(const void *, const void *); 34 | 35 | static inline char *med3(char *, char *, char *, cmp_t *); 36 | static inline void swapfunc(char *, char *, int, int, int); 37 | 38 | #define MIN(a, b) ((a) < (b) ? a : b) 39 | 40 | /* 41 | * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". 42 | */ 43 | #define swapcode(TYPE, parmi, parmj, n) { \ 44 | long i = (n) / sizeof (TYPE); \ 45 | TYPE *pi = (TYPE *) (parmi); \ 46 | TYPE *pj = (TYPE *) (parmj); \ 47 | do { \ 48 | TYPE t = *pi; \ 49 | *pi++ = *pj; \ 50 | *pj++ = t; \ 51 | } while (--i > 0); \ 52 | } 53 | 54 | #define SWAPINIT(TYPE, a, es) swaptype_ ## TYPE = \ 55 | ((char *)a - (char *)0) % sizeof(TYPE) || \ 56 | es % sizeof(TYPE) ? 2 : es == sizeof(TYPE) ? 0 : 1; 57 | 58 | static inline void 59 | swapfunc( char *a, char *b, int n, int swaptype_long, int swaptype_int) 60 | { 61 | if (swaptype_long <= 1) 62 | swapcode(long, a, b, n) 63 | else if (swaptype_int <= 1) 64 | swapcode(int, a, b, n) 65 | else 66 | swapcode(char, a, b, n) 67 | } 68 | 69 | #define swap(a, b) \ 70 | if (swaptype_long == 0) { \ 71 | long t = *(long *)(a); \ 72 | *(long *)(a) = *(long *)(b); \ 73 | *(long *)(b) = t; \ 74 | } else if (swaptype_int == 0) { \ 75 | int t = *(int *)(a); \ 76 | *(int *)(a) = *(int *)(b); \ 77 | *(int *)(b) = t; \ 78 | } else \ 79 | swapfunc(a, b, es, swaptype_long, swaptype_int) 80 | 81 | #define vecswap(a, b, n) \ 82 | if ((n) > 0) swapfunc(a, b, n, swaptype_long, swaptype_int) 83 | 84 | static inline char * 85 | med3(char *a, char *b, char *c, cmp_t *cmp) 86 | { 87 | return cmp(a, b) < 0 ? 88 | (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a )) 89 | :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c )); 90 | } 91 | 92 | void 93 | qsort(void *a, size_t n, size_t es, cmp_t *cmp) 94 | { 95 | char *pa, *pb, *pc, *pd, *pl, *pm, *pn; 96 | size_t d, r; 97 | int cmp_result; 98 | int swaptype_long, swaptype_int, swap_cnt; 99 | 100 | loop: SWAPINIT(long, a, es); 101 | SWAPINIT(int, a, es); 102 | swap_cnt = 0; 103 | if (n < 7) { 104 | for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) 105 | for (pl = pm; 106 | pl > (char *)a && cmp(pl - es, pl) > 0; 107 | pl -= es) 108 | swap(pl, pl - es); 109 | return; 110 | } 111 | pm = (char *)a + (n / 2) * es; 112 | if (n > 7) { 113 | pl = a; 114 | pn = (char *)a + (n - 1) * es; 115 | if (n > 40) { 116 | d = (n / 8) * es; 117 | pl = med3(pl, pl + d, pl + 2 * d, cmp); 118 | pm = med3(pm - d, pm, pm + d, cmp); 119 | pn = med3(pn - 2 * d, pn - d, pn, cmp); 120 | } 121 | pm = med3(pl, pm, pn, cmp); 122 | } 123 | swap(a, pm); 124 | pa = pb = (char *)a + es; 125 | 126 | pc = pd = (char *)a + (n - 1) * es; 127 | for (;;) { 128 | while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) { 129 | if (cmp_result == 0) { 130 | swap_cnt = 1; 131 | swap(pa, pb); 132 | pa += es; 133 | } 134 | pb += es; 135 | } 136 | while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) { 137 | if (cmp_result == 0) { 138 | swap_cnt = 1; 139 | swap(pc, pd); 140 | pd -= es; 141 | } 142 | pc -= es; 143 | } 144 | if (pb > pc) 145 | break; 146 | swap(pb, pc); 147 | swap_cnt = 1; 148 | pb += es; 149 | pc -= es; 150 | } 151 | if (swap_cnt == 0) { /* Switch to insertion sort */ 152 | for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) 153 | for (pl = pm; 154 | pl > (char *)a && cmp(pl - es, pl) > 0; 155 | pl -= es) 156 | swap(pl, pl - es); 157 | return; 158 | } 159 | 160 | pn = (char *)a + n * es; 161 | r = MIN(pa - (char *)a, pb - pa); 162 | vecswap(a, pb - r, r); 163 | r = MIN(pd - pc, pn - pd - es); 164 | vecswap(pb, pn - r, r); 165 | if ((r = pb - pa) > es) 166 | qsort(a, r / es, es, cmp); 167 | if ((r = pd - pc) > es) { 168 | /* Iterate rather than recurse to save stack space */ 169 | a = pn - r; 170 | n = r / es; 171 | goto loop; 172 | } 173 | /* qsort(pn - r, r / es, es, cmp);*/ 174 | } 175 | 176 | #define ARRAY_SIZE 50000000 177 | 178 | int compare (const void *a, const void *b) 179 | { 180 | const int *ia = (const int *)a; 181 | const int *ib = (const int *)b; 182 | return *ia - *ib; 183 | } 184 | 185 | int main() 186 | { 187 | int *arr = malloc(ARRAY_SIZE * sizeof(int)); 188 | int val = 1; 189 | for (size_t i = 0; i < ARRAY_SIZE; i++) { 190 | arr[i] = val; 191 | val = ((val * 8191) << 7) ^ val; 192 | } 193 | qsort(arr, ARRAY_SIZE, sizeof(int), compare); 194 | printf("%u\n", arr[ARRAY_SIZE-1]); 195 | return 0; 196 | } 197 | -------------------------------------------------------------------------------- /src/sha512.c: -------------------------------------------------------------------------------- 1 | // 2 | // test-sha512.c 3 | // 4 | 5 | /* 6 | * derived from SUPERCOP: https://bench.cr.yp.to/supercop.html 7 | * 8 | * Copyright (c) 2011 Stanford University. 9 | * Copyright (c) 2014 Cryptography Research, Inc. 10 | * Released under the MIT License. See LICENSE for license information. 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define bswap64 __builtin_bswap64 19 | 20 | static const int SHA512_OUTPUT_BYTES = 64; 21 | 22 | struct sha512_ctx_t 23 | { 24 | uint64_t chain[8]; 25 | uint8_t block[128]; 26 | uint64_t nbytes; 27 | }; 28 | 29 | static const uint64_t 30 | sha512_init_state[8] = { 31 | 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 32 | 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 33 | }; 34 | 35 | static const uint64_t 36 | sha512_k[80] = { 37 | 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 38 | 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 39 | 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, 40 | 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, 41 | 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 42 | 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, 43 | 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, 44 | 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, 45 | 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, 46 | 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, 47 | 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, 48 | 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, 49 | 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, 50 | 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 51 | 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, 52 | 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, 53 | 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 54 | 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, 55 | 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, 56 | 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 57 | }; 58 | 59 | static inline uint64_t rotate_r(uint64_t x, int d) { 60 | return (x >> d) | (x << (64 - d)); 61 | } 62 | 63 | static inline uint64_t S0 (uint64_t h1) { 64 | return rotate_r(h1, 28) ^ rotate_r(h1, 34) ^ rotate_r(h1, 39); 65 | } 66 | 67 | static inline uint64_t S1 (uint64_t h4) { 68 | return rotate_r(h4,14) ^ rotate_r(h4,18) ^ rotate_r(h4,41); 69 | } 70 | 71 | static inline uint64_t s0 (uint64_t a) { 72 | return rotate_r(a,1) ^ rotate_r(a,8) ^ a>>7; 73 | } 74 | 75 | static inline uint64_t s1 (uint64_t b) { 76 | return rotate_r(b,19) ^ rotate_r(b,61) ^ b>>6; 77 | } 78 | 79 | static inline uint64_t ch (uint64_t h4, uint64_t h5, uint64_t h6) { 80 | return h6^(h4 & (h6^h5)); 81 | } 82 | 83 | static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { 84 | return (h1&h2) ^ (h3&(h1^h2)); 85 | } 86 | 87 | static void sha512_process_block(struct sha512_ctx_t *ctx) 88 | { 89 | uint64_t i, tmp, a, b, 90 | *w = (uint64_t *) ctx->block, 91 | *state = ctx->chain, 92 | h0 = state[0], h1 = state[1], h2 = state[2], h3 = state[3], 93 | h4 = state[4], h5 = state[5], h6 = state[6], h7 = state[7]; 94 | 95 | /* Clang doesn't unswitch this automatically */ 96 | for (i=0; i<16; i++) { 97 | /* load up the input word for this round */ 98 | tmp = w[i] = bswap64(w[i]); 99 | tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; 100 | 101 | /* shift register */ 102 | h7 = h6; h6 = h5; h5 = h4; 103 | h4 = h3 + tmp; 104 | h3 = h2; h2 = h1; h1 = h0; 105 | h0 = tmp + maj(h1,h2,h3) + S0(h1); 106 | } 107 | 108 | for (; i<80; i++) { 109 | /* load up the input word for this round */ 110 | a = w[(i+1 ) & 15]; 111 | b = w[(i+14) & 15]; 112 | tmp = w[i&15] = s0(a) + s1(b) + w[i&15] + w[(i+9) & 15]; 113 | tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; 114 | 115 | /* shift register */ 116 | h7 = h6; h6 = h5; h5 = h4; 117 | h4 = h3 + tmp; 118 | h3 = h2; h2 = h1; h1 = h0; 119 | h0 = tmp + maj(h1,h2,h3) + S0(h1); 120 | } 121 | 122 | state[0] += h0; 123 | state[1] += h1; 124 | state[2] += h2; 125 | state[3] += h3; 126 | state[4] += h4; 127 | state[5] += h5; 128 | state[6] += h6; 129 | state[7] += h7; 130 | } 131 | 132 | void sha512_init(struct sha512_ctx_t *ctx) 133 | { 134 | ctx->nbytes = 0; 135 | memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); 136 | memset(ctx->block, 0, sizeof(ctx->block)); 137 | } 138 | 139 | void sha512_update(struct sha512_ctx_t *ctx, const unsigned char *data, uint64_t bytes) 140 | { 141 | assert(ctx->nbytes < 1ull<<56); 142 | assert(bytes < 1ull<<56); 143 | 144 | while (bytes) { 145 | uint64_t fill = ctx->nbytes % 128, accept = 128 - fill; 146 | if (accept > bytes) accept = bytes; 147 | ctx->nbytes += accept; 148 | memcpy(ctx->block + fill, data, accept); 149 | 150 | if (fill+accept == 128) { 151 | sha512_process_block(ctx); 152 | } 153 | 154 | bytes -= accept; 155 | data += accept; 156 | } 157 | 158 | assert(ctx->nbytes < 1ull<<56); 159 | } 160 | 161 | void sha512_final(struct sha512_ctx_t *ctx, uint8_t result[64]) 162 | { 163 | uint64_t fill = ctx->nbytes % 128, i; 164 | ctx->block[fill++] = 0x80; 165 | if (fill > 112) { 166 | memset(ctx->block + fill, 0, 128-fill); 167 | sha512_process_block(ctx); 168 | fill = 0; 169 | } 170 | memset(ctx->block + fill, 0, 112-fill); 171 | 172 | uint64_t highCount = 0, lowCount = bswap64((ctx->nbytes * 8)); 173 | memcpy(&ctx->block[112],&highCount,8); 174 | memcpy(&ctx->block[120],&lowCount,8); 175 | sha512_process_block(ctx); 176 | for (i=0; i<8; i++) { 177 | ctx->chain[i] = bswap64(ctx->chain[i]); 178 | } 179 | memcpy(result, ctx->chain, sizeof(ctx->chain)); 180 | sha512_init(ctx); 181 | } 182 | 183 | int main() 184 | { 185 | struct sha512_ctx_t sha512; 186 | uint8_t buf[SHA512_OUTPUT_BYTES]; 187 | uint8_t output[SHA512_OUTPUT_BYTES]; 188 | 189 | memset(buf, 0, SHA512_OUTPUT_BYTES); 190 | sha512_init(&sha512); 191 | for (size_t i = 0; i < 1000000; i++) { 192 | sha512_update(&sha512, buf, SHA512_OUTPUT_BYTES); 193 | } 194 | sha512_final(&sha512, output); 195 | 196 | for (size_t j = 0; j < SHA512_OUTPUT_BYTES; j++) { 197 | printf("%hhx", output[j]); 198 | } 199 | printf("\n"); 200 | } 201 | --------------------------------------------------------------------------------