├── autogen.sh ├── doc └── Implementing_Lightweight_Block_Ciphers_on_x86_Architectures.pdf ├── autoclean.sh ├── Makefile.default ├── Makefile.in ├── LICENCE.txt ├── src ├── common │ ├── lib_common.c │ ├── test_vectors.h │ ├── basic_helpers.h │ ├── log.h │ └── bitslice_common.h ├── table │ ├── gen_tables │ │ ├── Piccolo_generateTables.c │ │ ├── PRESENT_generateTables.c │ │ └── LED_generateTables.c │ ├── LED │ │ └── LED.c │ └── Piccolo │ │ └── Piccolo.c └── bitslice │ ├── Piccolo │ ├── Piccolo.c │ └── Piccolo_util.h │ ├── LED │ ├── LED_utils.h │ └── LED.c │ └── PRESENT │ └── PRESENT.c ├── README.md └── configure.ac /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "[Generating configure file ...]" 4 | echo " |-> Run ./configure, and then make" 5 | autoconf configure.ac > configure 6 | chmod +x ./configure 7 | -------------------------------------------------------------------------------- /doc/Implementing_Lightweight_Block_Ciphers_on_x86_Architectures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rben-dev/lightweight-crypto-lib/HEAD/doc/Implementing_Lightweight_Block_Ciphers_on_x86_Architectures.pdf -------------------------------------------------------------------------------- /autoclean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | main_dir="./" 4 | 5 | clean_dirs=($main_dir) 6 | 7 | echo "Cleaning the project ..." 8 | make clean &> /dev/null 9 | echo "Cleaning AUTOCONF files ..." 10 | rm -rf autom4te.cache config.log config.status configure 11 | echo "Cleaning Makefiles ..." 12 | for (( i = 0 ; i < ${#clean_dirs[*]} ; i++ )) 13 | do 14 | rm -f ${clean_dirs[i]}/Makefile 15 | done 16 | -------------------------------------------------------------------------------- /Makefile.default: -------------------------------------------------------------------------------- 1 | table_dir=src/table 2 | vperm_dir=src/vperm 3 | bitslice_dir=src/bitslice 4 | common_dir=src/common 5 | include_dir=src 6 | LIB_COPTS= -Wall -Wextra -Wpointer-arith -Wcast-align -Wredundant-decls -Wnested-externs -Winline -Wuninitialized -fPIC -O3 -shared -mssse3 7 | LIB_LDOPTS=-lpthread 8 | BIN_COPTS= -Wall -Wextra -Wpointer-arith -Wcast-align -Wredundant-decls -Wnested-externs -Winline -Wuninitialized -O3 9 | BIN_LDOPTS=-ldl 10 | defines=-DTABLE -DVPERM -DBITSLICE -DAVX -DBASHCOLORS -DTEST_VECTORS -DMEASURE_PERF -DLED64 -DLED128 -DPRESENT80 -DPRESENT128 -DPiccolo80 -DPiccolo128 -DTHREAD_SAFE 11 | outdir=bin 12 | outlib=$(outdir)/liblightweight-ciphers.so 13 | outbin=$(outdir)/check_all_ciphers 14 | 15 | .PHONY: lib checks 16 | 17 | all: lib checks 18 | 19 | lib: 20 | @mkdir -p $(outdir) 21 | $(CC) $(defines) $(LIB_COPTS) -I $(include_dir) $(table_dir)/LED/LED.c $(vperm_dir)/LED/LED.c $(bitslice_dir)/LED/LED.c $(table_dir)/PRESENT/PRESENT.c $(vperm_dir)/PRESENT/PRESENT.c $(bitslice_dir)/PRESENT/PRESENT.c $(table_dir)/Piccolo/Piccolo.c $(vperm_dir)/Piccolo/Piccolo.c $(bitslice_dir)/Piccolo/Piccolo.c $(common_dir)/lib_common.c -o $(outlib) $(LIB_LDOPTS) 22 | 23 | checks: 24 | @mkdir -p $(outdir) 25 | $(CC) $(defines) $(BIN_COPTS) -I $(include_dir) $(common_dir)/check_all.c -o $(outbin) $(BIN_LDOPTS) 26 | 27 | clean: 28 | @rm -f $(outlib) $(outbin) 29 | @rm -rf $(outdir) 30 | -------------------------------------------------------------------------------- /Makefile.in: -------------------------------------------------------------------------------- 1 | table_dir=src/table 2 | vperm_dir=src/vperm 3 | bitslice_dir=src/bitslice 4 | common_dir=src/common 5 | include_dir=src 6 | LIB_COPTS=@arch32_opt@ @arch64_opt@ -Wall -Wextra -Wpointer-arith -Wcast-align -Wredundant-decls -Wnested-externs -Winline -Wuninitialized -fPIC -O3 -shared @ssse_link@ 7 | LIB_LDOPTS=@threadsafe_link@ 8 | BIN_COPTS=@arch32_opt@ @arch64_opt@ -Wall -Wextra -Wpointer-arith -Wcast-align -Wredundant-decls -Wnested-externs -Winline -Wuninitialized -O3 9 | BIN_LDOPTS=-ldl 10 | defines=@table_define@ @vperm_define@ @bitslice_define@ @avx_define@ -DBASHCOLORS -DTEST_VECTORS -DMEASURE_PERF @led64_define@ @led128_define@ @present80_define@ @present128_define@ @piccolo80_define@ @piccolo128_define@ @threadsafe_define@ 11 | outdir=bin 12 | outlib=$(outdir)/liblightweight-ciphers.so 13 | outbin=$(outdir)/check_all_ciphers 14 | 15 | .PHONY: lib checks 16 | 17 | all: lib checks 18 | 19 | lib: 20 | @mkdir -p $(outdir) 21 | $(CC) $(defines) $(LIB_COPTS) -I $(include_dir) $(table_dir)/LED/LED.c $(vperm_dir)/LED/LED.c $(bitslice_dir)/LED/LED.c $(table_dir)/PRESENT/PRESENT.c $(vperm_dir)/PRESENT/PRESENT.c $(bitslice_dir)/PRESENT/PRESENT.c $(table_dir)/Piccolo/Piccolo.c $(vperm_dir)/Piccolo/Piccolo.c $(bitslice_dir)/Piccolo/Piccolo.c $(common_dir)/lib_common.c -o $(outlib) $(LIB_LDOPTS) 22 | 23 | checks: 24 | @mkdir -p $(outdir) 25 | $(CC) $(defines) $(BIN_COPTS) -I $(include_dir) $(common_dir)/check_all.c -o $(outbin) $(BIN_LDOPTS) 26 | 27 | clean: 28 | @rm -f $(outlib) $(outbin) 29 | @rm -rf $(outdir) 30 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright ANSSI and NTU (2013-2015) 4 | Contributors: 5 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 6 | Jian GUO [ntu.guo@gmail.com] and 7 | Victor LOMNE [victor.lomne@gmail.com] and 8 | Thomas PEYRIN [thomas.peyrin@gmail.com] 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in 18 | all copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 | THE SOFTWARE. 27 | 28 | Except as contained in this notice, the name(s) of the above copyright holders 29 | shall not be used in advertising or otherwise to promote the sale, use or other 30 | dealings in this Software without prior written authorization. 31 | -------------------------------------------------------------------------------- /src/common/lib_common.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #include 61 | 62 | /*** Common stuff for all the algorithms ****/ 63 | /* For thread-safety */ 64 | /* This is only useful for bitslice based implementations */ 65 | #ifdef THREAD_SAFE 66 | #ifdef BITSLICE 67 | #include 68 | pthread_mutex_t bitslice_init_mutex = PTHREAD_MUTEX_INITIALIZER; 69 | #endif 70 | #endif 71 | 72 | #ifdef MEASURE_PERF 73 | /* Global variables to keep track of */ 74 | /* performance measurements */ 75 | uint64_t key_schedule_start = 0; 76 | uint64_t key_schedule_end = 0; 77 | uint64_t encrypt_start = 0; 78 | uint64_t encrypt_end = 0; 79 | #endif 80 | -------------------------------------------------------------------------------- /src/common/test_vectors.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | /* Test vectors for all the ciphers */ 61 | /**** LED ***/ 62 | /* LED64 test vectors */ 63 | u64 test_vectorsLED64[] = {0x0ULL, 0xefcdab8967452301ULL}; 64 | u16 keys64LED64[sizeof(test_vectorsLED64)/sizeof(u64)][KEY64] = {{0}, {0x2301, 0x6745, 0xab89, 0xefcd}}; 65 | u64 test_vectorsLED64results[sizeof(test_vectorsLED64)/sizeof(u64)] = {0x98c7a0031040c239ULL, 0x58fc93381e5503a0ULL}; 66 | /* LED128 test vectors */ 67 | u64 test_vectorsLED128[] = {0x0ULL, 0xefcdab8967452301ULL}; 68 | u16 keys128LED128[sizeof(test_vectorsLED128)/sizeof(u64)][KEY128] = {{0}, {0x2301, 0x6745, 0xab89, 0xefcd, 0x2301, 0x6745, 0xab89, 0xefcd}}; 69 | u64 test_vectorsLED128results[sizeof(test_vectorsLED128)/sizeof(u64)] = {0xa1db0c85a0b2ec3dULL, 0xc24f017f5824b8d6ULL}; 70 | 71 | /*** PRESENT ***/ 72 | /* PRESENT80 test vectors */ 73 | u64 test_vectorsPRESENT80[] = {0x0ULL, 0x0ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL}; 74 | u16 keys80PRESENT80[sizeof(test_vectorsPRESENT80)/sizeof(u64)][KEY80] = {{0}, {0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, {0}, {0xffff, 0xffff, 0xffff, 0xffff, 0xffff}}; 75 | u64 test_vectorsPRESENT80results[sizeof(test_vectorsPRESENT80)/sizeof(u64)] = {0x4584227b38c17955ULL, 0x495094f5c0462ce7ULL, 0x7b41682fc7ff12a1ULL, 0xd2103221d3dc3333ULL}; 76 | /* PRESENT128 test vectors */ 77 | u64 test_vectorsPRESENT128[] = {0x0ULL}; 78 | u16 keys128PRESENT128[sizeof(test_vectorsPRESENT128)/sizeof(u64)][KEY128] = {{0}}; 79 | u64 test_vectorsPRESENT128results[sizeof(test_vectorsPRESENT128)/sizeof(u64)] = {0xaf00692e2a70db96ULL}; 80 | 81 | /*** Piccolo ***/ 82 | /* Piccolo80 test vectors */ 83 | u64 test_vectorsPiccolo80[] = {0xefcdab8967452301ULL, 0x517fe322009f2f67}; 84 | u16 keys80Piccolo80[sizeof(test_vectorsPiccolo80)/sizeof(u64)][KEY80] = {{0x1100, 0x3322, 0x5544, 0x7766, 0x9988}, {0x0fbb, 0x83f6, 0x94d5, 0xa445, 0x9120}}; 85 | u64 test_vectorsPiccolo80results[sizeof(test_vectorsPiccolo80)/sizeof(u64)] = {0x5640f83599ff2b8dULL, 0xccbf57920c60a302ULL}; 86 | /* Piccolo128 test vectors */ 87 | u64 test_vectorsPiccolo128[] = {0xefcdab8967452301ULL, 0x517fe322009f2f67}; 88 | u16 keys128Piccolo128[sizeof(test_vectorsPiccolo128)/sizeof(u64)][KEY128] = {{0x1100, 0x3322, 0x5544, 0x7766, 0x9988, 0xbbaa, 0xddcc, 0xffee}, {0x0fbb, 0x83f6, 0x94d5, 0xa445, 0x9120, 0x5926, 0x74f7, 0x7d76}}; 89 | u64 test_vectorsPiccolo128results[sizeof(test_vectorsPiccolo128)/sizeof(u64)] = {0xff897b65ea2cc45eULL, 0x6b806ca361beee6cULL}; 90 | -------------------------------------------------------------------------------- /src/common/basic_helpers.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #define _GNU_SOURCE 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | #include 69 | 70 | #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 71 | 72 | #ifndef MAIN_INCLUDE 73 | /* Includes useful for instrinsics */ 74 | #ifdef BITSLICE 75 | /* For SSE2 */ 76 | #include 77 | #include 78 | 79 | /* For SSE3 and SSSE3 */ 80 | #include 81 | #include 82 | #endif 83 | #endif 84 | 85 | /* For thread-safety */ 86 | /* This is only useful for bitslice based implementations */ 87 | #ifdef THREAD_SAFE 88 | #ifdef BITSLICE 89 | #include 90 | extern pthread_mutex_t bitslice_init_mutex; 91 | #endif 92 | #endif 93 | 94 | /* Stringifying macros for inline assembly */ 95 | #define tos(a) #a 96 | #define tostr(a) tos(a) 97 | 98 | /* Types definitions */ 99 | typedef uint64_t u64; 100 | typedef uint32_t u32; 101 | typedef uint16_t u16; 102 | typedef uint8_t byte; 103 | typedef uint8_t u8; 104 | #ifndef MAIN_INCLUDE 105 | #ifdef BITSLICE 106 | typedef __m128i word; 107 | #endif 108 | #endif 109 | 110 | /* Parallelism definition */ 111 | #define TABLE_P 1 112 | #define VPERM_P 2 113 | #define BITSLICE8_P 8 114 | #define BITSLICE16_P 16 115 | #define BITSLICE32_P 32 116 | 117 | /* Key size in u16 basic type */ 118 | #define KEY64 4 119 | #define KEY80 5 120 | #define KEY128 8 121 | 122 | /* Specific number of subkeys for ciphers */ 123 | #define LED64_SUBKEYS_SIZE (1 * sizeof(u64)) 124 | #define LED128_SUBKEYS_SIZE (2 * sizeof(u64)) 125 | #define PRESENT80_SUBKEYS_SIZE (32 * sizeof(u64)) 126 | #define PRESENT128_SUBKEYS_SIZE (32 * sizeof(u64)) 127 | #define Piccolo80_SUBKEYS_SIZE (27 * sizeof(u64)) 128 | #define Piccolo128_SUBKEYS_SIZE (33 * sizeof(u64)) 129 | 130 | /* Context saving macros for inline assembly code */ 131 | #define Push_All_Regs() do{\ 132 | asm("push rdx");\ 133 | asm("push rcx");\ 134 | asm("push rax");\ 135 | asm("push rbx");\ 136 | }while(0); 137 | 138 | #define Pop_All_Regs() do{\ 139 | asm("pop rbx");\ 140 | asm("pop rax");\ 141 | asm("pop rcx");\ 142 | asm("pop rdx");\ 143 | }while(0); 144 | 145 | /******* Performance measurement helpers *********/ 146 | 147 | /* Number of samples for performance measurement */ 148 | #ifndef SAMPLES 149 | #define SAMPLES (0x1ULL << 21) 150 | #endif 151 | 152 | #ifndef MAIN_INCLUDE 153 | #ifdef MEASURE_PERF 154 | /* Global variables to keep track of */ 155 | /* performance measurements */ 156 | extern u64 key_schedule_start; 157 | extern u64 key_schedule_end; 158 | extern u64 encrypt_start; 159 | extern u64 encrypt_end; 160 | #endif 161 | #endif 162 | 163 | #ifdef MEASURE_PERF 164 | inline static u64 rdtsc(){ 165 | #if defined(__i386__) 166 | u64 cycles; 167 | __asm__ volatile (".byte 0x0f, 0x31" : "=A"(cycles)); 168 | return cycles; 169 | #else 170 | #if defined(__x86_64__) 171 | u32 hi, lo; 172 | __asm__ volatile ("rdtsc" : "=a"(lo), "=d"(hi)); 173 | return (((u64)lo) | ((u64)(hi) << 32)); 174 | #else 175 | #error "Unsupported architecture for counting cycles" 176 | #endif 177 | #endif 178 | } 179 | 180 | /* Setting the CPU affinity */ 181 | inline static void setCPUaffinity(){ 182 | int cpu_mask = 0x1; 183 | cpu_set_t set; 184 | CPU_ZERO(&set); 185 | CPU_SET(cpu_mask, &set); 186 | if(sched_setaffinity(getpid(), sizeof(set), &set) == -1){ 187 | printf("Impossible to set CPU affinity...\n"); 188 | } 189 | return; 190 | } 191 | #endif 192 | -------------------------------------------------------------------------------- /src/common/log.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | /* Logging facilities */ 61 | 62 | #ifdef BASHCOLORS 63 | #define ANSI_COLOR_RED "\x1b[31m" 64 | #define ANSI_COLOR_GREEN "\x1b[32m" 65 | #define ANSI_COLOR_YELLOW "\x1b[33m" 66 | #define ANSI_COLOR_BLUE "\x1b[34m" 67 | #define ANSI_COLOR_MAGENTA "\x1b[35m" 68 | #define ANSI_COLOR_CYAN "\x1b[36m" 69 | #define ANSI_COLOR_LBLUE "\x1b[94m" 70 | #define ANSI_COLOR_LGRAY "\x1b[37m" 71 | #define ANSI_COLOR_NONE "" 72 | 73 | #define ANSI_STYLE_BOLD "\x1b[1m" 74 | #define ANSI_STYLE_UNDERLINED "\x1b[4m" 75 | #define ANSI_STYLE_INVERTED "\x1b[7m" 76 | #define ANSI_STYLE_DIM "\x1b[2m" 77 | #define ANSI_STYLE_BLINK "\x1b[5m" 78 | #define ANSI_STYLE_HIDDEN "\x1b[8m" 79 | #define ANSI_STYLE_BOLDUNDERLINED "\x1b[1m\x1b[4m" 80 | #define ANSI_STYLE_NONE "" 81 | 82 | #define ANSI_BACKGROUND_RED "\x1b[41m" 83 | #define ANSI_BACKGROUND_GREEN "\x1b[42m" 84 | #define ANSI_BACKGROUND_YELLOW "\x1b[43m" 85 | #define ANSI_BACKGROUND_BLUE "\x1b[44m" 86 | #define ANSI_BACKGROUND_MAGENTA "\x1b[45m" 87 | #define ANSI_BACKGROUND_CYAN "\x1b[46m" 88 | #define ANSI_BACKGROUND_NONE "" 89 | 90 | #define ANSI_RESET "\x1b[0m" 91 | 92 | #else 93 | 94 | #define ANSI_COLOR_RED "" 95 | #define ANSI_COLOR_GREEN "" 96 | #define ANSI_COLOR_YELLOW "" 97 | #define ANSI_COLOR_BLUE "" 98 | #define ANSI_COLOR_MAGENTA "" 99 | #define ANSI_COLOR_CYAN "" 100 | #define ANSI_COLOR_LBLUE "" 101 | #define ANSI_COLOR_LGRAY "" 102 | #define ANSI_COLOR_NONE "" 103 | 104 | #define ANSI_STYLE_BOLD "" 105 | #define ANSI_STYLE_UNDERLINED "" 106 | #define ANSI_STYLE_INVERTED "" 107 | #define ANSI_STYLE_DIM "" 108 | #define ANSI_STYLE_BLINK "" 109 | #define ANSI_STYLE_HIDDEN "" 110 | #define ANSI_STYLE_BOLDUNDERLINED "" 111 | #define ANSI_STYLE_NONE "" 112 | 113 | #define ANSI_BACKGROUND_RED "" 114 | #define ANSI_BACKGROUND_GREEN "" 115 | #define ANSI_BACKGROUND_YELLOW "" 116 | #define ANSI_BACKGROUND_BLUE "" 117 | #define ANSI_BACKGROUND_MAGENTA "" 118 | #define ANSI_BACKGROUND_CYAN "" 119 | #define ANSI_BACKGROUND_NONE "" 120 | 121 | #define ANSI_RESET "" 122 | 123 | #endif 124 | 125 | unsigned char use_colors = 1; 126 | 127 | typedef enum {red = 0, green = 1, yellow = 2, blue = 3, magenta = 4, cyan = 5, lblue = 6, lgray = 7, nocolor = 8} color; 128 | const char* bash_colors[] = {ANSI_COLOR_RED, ANSI_COLOR_GREEN, ANSI_COLOR_YELLOW, ANSI_COLOR_BLUE, ANSI_COLOR_MAGENTA, ANSI_COLOR_CYAN, ANSI_COLOR_LBLUE, ANSI_COLOR_LGRAY, ANSI_COLOR_NONE}; 129 | 130 | typedef enum {bold = 0, underlined = 1, inverted = 2, dim = 3, blink = 4, hidden = 5, boldunderlined = 6, nostyle = 7} style; 131 | const char* bash_styles[] = {ANSI_STYLE_BOLD, ANSI_STYLE_UNDERLINED, ANSI_STYLE_INVERTED, ANSI_STYLE_DIM, ANSI_STYLE_BLINK, ANSI_STYLE_HIDDEN, ANSI_STYLE_BOLDUNDERLINED, ANSI_STYLE_NONE}; 132 | 133 | typedef enum {redbg = 0, greenbg = 1, yellowbg = 2, bluebg = 3, magentabg = 4, cyanbg = 5, nobg = 6} background; 134 | const char* bash_backgrounds[] = {ANSI_BACKGROUND_RED, ANSI_BACKGROUND_GREEN, ANSI_BACKGROUND_YELLOW, ANSI_BACKGROUND_BLUE, ANSI_BACKGROUND_MAGENTA, ANSI_BACKGROUND_CYAN, ANSI_BACKGROUND_NONE}; 135 | 136 | #ifdef BASHCOLORS 137 | #define custom_printf(col, bg, st, format, ...) do {\ 138 | if(use_colors == 1){\ 139 | printf("%s%s%s"format ANSI_RESET, bash_colors[col], bash_backgrounds[bg], bash_styles[st], ##__VA_ARGS__);\ 140 | }\ 141 | else{\ 142 | printf(format, ##__VA_ARGS__);\ 143 | }\ 144 | } while(0); 145 | #else 146 | #define custom_printf(col, bg, st, ...) do {\ 147 | printf(__VA_ARGS__);\ 148 | } while(0); 149 | #endif 150 | 151 | #define custom_printf_warning(...) do {\ 152 | custom_printf(yellow, nobg, boldunderlined, "Warning:");\ 153 | custom_printf(nocolor, nobg, nostyle, " ");\ 154 | custom_printf(nocolor, nobg, nostyle, ##__VA_ARGS__);\ 155 | } while(0); 156 | 157 | #define custom_printf_error(...) do {\ 158 | custom_printf(red, nobg, boldunderlined, "Error:");\ 159 | custom_printf(nocolor, nobg, nostyle, " ");\ 160 | custom_printf(nocolor, nobg, nostyle, ##__VA_ARGS__);\ 161 | } while(0); 162 | 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | This software is a computer program whose purpose is to implement 4 | lightweight block ciphers with different optimizations for the x86 5 | platform. Three algorithms have been implemented: [PRESENT](http://homes.esat.kuleuven.be/.../papers/present_ches07.pdf), 6 | [LED](https://sites.google.com/site/ledblockcipher/) and [Piccolo](http://link.springer.com/chapter/10.1007%2F978-3-642-23951-9_23#page-1). 7 | Three techniques have been explored: table based 8 | implementations, vperm (for vector permutation) and bitslice 9 | implementations. For more details on these techniques and their 10 | adaptation to the algorithms, please refer to the 11 | [SAC 2013 paper](http://eprint.iacr.org/2013/445). The pdf 12 | of the extended paper is [here](doc/Implementing_Lightweight_Block_Ciphers_on_x86_Architectures.pdf). 13 | 14 | Here is a big picture of how the code is divided: 15 | 16 | * `src/common` contains common headers, structures and functions. 17 | * `src/table` contains table based implementations, with the code 18 | that generates the tables in src/table/gen_tables. The code here 19 | is written in pure C so it should compile on any platform (x86 20 | and other architectures), as well as any OS flavour (*nix, 21 | Windows ...). 22 | * `src/vperm` contains vperm based implementations. They are written 23 | in inline assembly for x86_64 and will only compile and work on 24 | this platform. The code only compiles with gcc, but porting it to 25 | other assembly flavours should not be too complicated. 26 | * `src/bitslice` contains bitslice based implementations. They are 27 | written in asm intrinsics. It should compile and run on i386 as 28 | well as x86_64 platforms, and it should be portable to other OS 29 | flavours since intrinsics are standard among many compilers. 30 | 31 | **NOTE1**: vperm and bitslice implementations require a x86 CPU with at least 32 | **SSSE3 extensions**. 33 | 34 | **NOTE2**: the code has been tested on Linux (Debian), but it should work 35 | on any environment with a decent gcc compiler (Mac OS, Windows with Cygwin ...). 36 | 37 | ## Authors 38 | 39 | * Ryad Benadjila () 40 | * Jian Guo () 41 | * Victor Lomné () 42 | * Thomas Peyrin () 43 | 44 | ## Quick start 45 | 46 | ### Dependencies 47 | The program only requires the [autotools](http://www.gnu.org/software/autoconf/) package, though it is not mandatory. 48 | If you don't want to use autotools, just copy the [Makefile.default](Makefile.default) 49 | as a Makefile and compile the code. Please note however that depending on your 50 | CPU type, you might need to adapt the Makefile. 51 | 52 | For now, the program **is only compatible with gcc** (e.g. it cannot be compiled wit clang). 53 | 54 | **NOTE**: the autotools are used in the project to automatically detect the CPU 55 | and adapt the compilation options. 56 | 57 | ### Configure and build the project 58 | ./autoclean.sh 59 | 60 | ./autogen.sh 61 | 62 | ./configure 63 | 64 | make 65 | 66 | The produced files are in the `bin` directory. They consist of a `.so` dynamic 67 | library containing the ciphers as well as a standalone binary that tests the 68 | **reference vectors** of the ciphers and **their performance** for each implementation 69 | type. The standalone binary dynamically loads the library with _dlopen_. 70 | 71 | The configure script takes some options to restrict the ciphers and the implementation flavours 72 | one wants to compile in the library. One can also force a given architecture (i386 or 73 | x86\_64) as well as other specific options described through the script help: 74 | 75 | ./configure --help 76 | 77 | --with-led LED cipher 78 | --with-led64 LED cipher/64-bit key variant 79 | --with-led128 LED cipher/128-bit key variant 80 | --with-present PRESENT cipher 81 | --with-present80 PRESENT cipher/80-bit key variant 82 | --with-present128 PRESENT cipher/128-bit key variant 83 | --with-piccolo Piccolo cipher 84 | --with-piccolo80 Piccolo cipher/80-bit key variant 85 | --with-piccolo128 Piccolo cipher/128-bit key variant 86 | --with-table Table implementations 87 | --with-vperm Vperm implementations 88 | --with-bitslice Bitslice implementations 89 | --with-ssse SSSE implementations 90 | --with-avx AVX implementations 91 | --with-thread-safe Thread safe library (link with pthread) 92 | --with-arch32 Force 32-bit compilation 93 | --with-arch64 Force 64-bit compilation 94 | 95 | ### Running the program 96 | 97 | Running the tests is as simple as: 98 | 99 | cd bin 100 | 101 | ./check_all_ciphers 102 | 103 | The main binary takes options that restrict the execution to test vectors or performance 104 | benchmark only, or to some implemenations only. The number of samples used for benchmark can also 105 | be tuned here. The default is to use bash colors for a better readability, but one can turn this 106 | off (e.g. for logging the results in a file). 107 | 108 | ./check_all_ciphers -h 109 | Usage: ./check_all_ciphers (LED|PRESENT|Piccolo|LED64|LED128|PRESENT80|PRESENT128|Piccolo80|Piccolo128) 110 | (table|vperm|bitslice|bitslice8|bitslice16|bitslice32) 111 | Other options: 112 | -no-colors 113 | -perf-only 114 | -vectors-only 115 | -samples=[0-9]* 116 | 117 | ### The lightweight block ciphers API 118 | 119 | The **key schedule** as well as the **core encryption cipher** of the three lightweight block 120 | ciphers have been implemented in the different flavours (table, vperm and bitslice). We have 121 | tried to make it as easy to use as possible for users by chosing a common API that takes 122 | into account the **parallelism** offered by each technique (please refer to the 123 | [paper](doc/Implementing_Lightweight_Block_Ciphers_on_x86_Architectures.pdf) for details about 124 | how performances really depend on use cases of the cipher and how the chosen parallelism 125 | can impact them). 126 | 127 | The common API naming convention is: 128 | 129 | **Cipher ## key\_size ## implementation\_type ## implementation** (with **##** being the 130 | concatenation), where: 131 | 132 | * **Cipher** is one of `LED`, `PRESENT`, `Piccolo` 133 | * **key\_size** is one of `64` or `128` for LED, `80` or `128` for PRESENT and Piccolo 134 | * **implementation\_type** is one of `table` (for LED, PRESENT, Piccolo), `vperm` (for 135 | LED, PRESENT, Piccolo), `bitslice8` (for PRESENT), `bitslice16` (for LED, PRESENT and Piccolo) and 136 | `bitslice32` (for LED) 137 | * **implementation** is one of `_key_schedule`, `_core` (for encryption) and `_cipher` (for combined 138 | key schedule and encryption) 139 | 140 | For instance, **LED64bitslice16\_core** is the bitslice implementation of the encryption core of LED 141 | for a 64-bit key size and 16 parallel blocks as input. Similarly, **PRESENT80vperm\_key\_schedule** is 142 | the vperm key schedule of PRESENT for a 80-bit key size (vperm has a 2 blocks parallelism). 143 | Finally, **Piccolo128table\_cipher** is the table based implementation of the combined key schedule and 144 | encryption of Piccolo for a 128-bit key (table based implementations have a parallelism of 1 block). 145 | 146 | ### A note about performance measurements 147 | 148 | We emphasize the fact that in order to get results consistent with the ones 149 | given in the [SAC 2013 paper](http://eprint.iacr.org/2013/445), one must 150 | **disable new Intel CPUs Turbo Boost technology**. This technology uses 151 | dynamic upscale of the processor to locally _overclock_ it. We use 152 | the *rdtsc* instruction to perform CPU cycles measurements: the cycle 153 | count obtained here can be desynchronized with the real CPU clock when 154 | Turbo Boost is active. 155 | 156 | Turbo Boost can be disabled from the BIOS. A more user-friendly solution 157 | under Linux is to use MSR (model specific) register 0x1a0 to dynamically 158 | disable/enable it on chosen CPU cores. One must first insert the `msr` 159 | module: 160 | 161 | 162 | modprobe -i msr 163 | 164 | Then, to disable Turbo Boost on the CPU number 'cpu': 165 | 166 | wrmsr -p cpu 0x1a0 0x4000850089 167 | 168 | To enable it back: 169 | 170 | wrmsr -p cpu 0x1a0 0x850089 171 | -------------------------------------------------------------------------------- /src/table/gen_tables/Piccolo_generateTables.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | /*PICCOLOgenerateTables.c */ 61 | /* author: Victor LOMNE - victor.lomne@gmail.com */ 62 | 63 | 64 | 65 | #include 66 | 67 | /* PICCOLO Sbox */ 68 | unsigned char sbox[16] = {0x0E, 0x04, 0x0B, 0x02, 0x03, 0x08, 0x00, 0X09, 0x01, 0x0A, 0x07, 0x0F, 0x06, 0x0C, 0x05, 0x0D}; 69 | 70 | /* PICCOLO xtime (tabulated multiplication by x in the finite field x^4 + x + 1) */ 71 | unsigned char xtime[16] = {0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x03, 0x01, 0x07, 0x05, 0x0B, 0x09, 0x0F, 0x0D}; 72 | 73 | 74 | 75 | /****************************************************************************************************/ 76 | void main(void) 77 | { 78 | unsigned int i, temp; 79 | unsigned char il, ih; 80 | unsigned int T0_Piccolo[256], T1_Piccolo[256]; 81 | unsigned long long T2_Piccolo[256], T3_Piccolo[256], T4_Piccolo[256], T5_Piccolo[256], Tcon80_Piccolo[25], Tcon128_Piccolo[31]; 82 | FILE * ptr; 83 | 84 | /* loop over the possible input values to compute for T0, T1, T2, T3, T4 and T5 */ 85 | /* T0, T1, T2, T3 T4 and T5 allow to compute the Feistel function */ 86 | for(i = 0; i < 256; i++) 87 | { 88 | /* compute low and high parts of i */ 89 | il = i & 0x0f; 90 | ih = (i & 0xf0) >> 4; 91 | 92 | /* compute T0 */ 93 | T0_Piccolo[i] = (sbox[il] ^ sbox[ih]) << 12; 94 | T0_Piccolo[i] |= (sbox[il] ^ xtime[sbox[ih]] ^ sbox[ih]) << 8; 95 | T0_Piccolo[i] |= (xtime[sbox[il]] ^ sbox[il] ^ xtime[sbox[ih]]) << 4; 96 | T0_Piccolo[i] |= (xtime[sbox[il]] ^ sbox[ih]); 97 | 98 | /* compute T1 */ 99 | T1_Piccolo[i] = (xtime[sbox[il]] ^ sbox[il] ^ xtime[sbox[ih]]) << 12; 100 | T1_Piccolo[i] |= (xtime[sbox[il]] ^ sbox[ih]) << 8; 101 | T1_Piccolo[i] |= (sbox[il] ^ sbox[ih]) << 4; 102 | T1_Piccolo[i] |= (sbox[il] ^ xtime[sbox[ih]] ^ sbox[ih]); 103 | 104 | /* compute T2 and T3 */ 105 | T2_Piccolo[i] = ((unsigned long long)(sbox[il] | (sbox[ih] << 4))) << 16; 106 | T3_Piccolo[i] = ((unsigned long long)(sbox[il] | (sbox[ih] << 4))) << 24; 107 | 108 | /* compute T4 and T5 */ 109 | T4_Piccolo[i] = (unsigned long long)(sbox[il] | (sbox[ih] << 4)) << 48; 110 | T5_Piccolo[i] = (unsigned long long)(sbox[il] | (sbox[ih] << 4)) << 56; 111 | } 112 | 113 | /* loop over the 25 rounds to compute constants (in Tcon80_Piccolo) for PICCOLO80 key schedule */ 114 | for(i = 0; i < 25; i++) 115 | { 116 | temp = ( (((i+1) & 0x1f) << 27) | (((i+1) & 0x1f) << 17) | (((i+1) & 0x1f) << 10) | ((i+1) & 0x1f) ) ^ 0x0f1e2d3c; 117 | Tcon80_Piccolo[i] = ((unsigned long long)(temp & 0xff000000) >> 8) | ((unsigned long long)(temp & 0x00ff0000) << 8) | ((unsigned long long)(temp & 0x0000ff00) << 40) | ((unsigned long long)(temp & 0x000000ff) << 56); 118 | } 119 | 120 | /* loop over the 31 rounds to compute constants (in Tcon128_Piccolo) for PICCOLO128 key schedule */ 121 | for(i = 0; i < 31; i++) 122 | { 123 | temp = ( (((i+1) & 0x1f) << 27) | (((i+1) & 0x1f) << 17) | (((i+1) & 0x1f) << 10) | ((i+1) & 0x1f) ) ^ 0x6547a98b; 124 | Tcon128_Piccolo[i] = ((unsigned long long)(temp & 0xff000000) >> 8) | ((unsigned long long)(temp & 0x00ff0000) << 8) | ((unsigned long long)(temp & 0x0000ff00) << 40) | ((unsigned long long)(temp & 0x000000ff) << 56); 125 | } 126 | 127 | /* open a pointer and create the file PICCOLOtables.h */ 128 | ptr = fopen("../Piccolo/Piccolo_tables.h", "w"); 129 | if(ptr == NULL) 130 | { 131 | printf("Unable to create file Piccolo_tables.h\n"); 132 | return; 133 | } 134 | 135 | /* write T0 */ 136 | fprintf(ptr, "unsigned int T0_Piccolo[256] = {"); 137 | for(i = 0; i < 256; i++) 138 | { 139 | fprintf(ptr, "0x%04x", T0_Piccolo[i]); 140 | if(i == 255) 141 | fprintf(ptr, "};\n\n"); 142 | else 143 | fprintf(ptr, ", "); 144 | } 145 | 146 | /* write T1 */ 147 | fprintf(ptr, "unsigned int T1_Piccolo[256] = {"); 148 | for(i = 0; i < 256; i++) 149 | { 150 | fprintf(ptr, "0x%04x", T1_Piccolo[i]); 151 | if(i == 255) 152 | fprintf(ptr, "};\n\n"); 153 | else 154 | fprintf(ptr, ", "); 155 | } 156 | 157 | /* write T2 */ 158 | fprintf(ptr, "unsigned long long T2_Piccolo[256] = {"); 159 | for(i = 0; i < 256; i++) 160 | { 161 | fprintf(ptr, "0x%016llx", T2_Piccolo[i]); 162 | if(i == 255) 163 | fprintf(ptr, "};\n\n"); 164 | else 165 | fprintf(ptr, ", "); 166 | } 167 | 168 | /* write T3 */ 169 | fprintf(ptr, "unsigned long long T3_Piccolo[256] = {"); 170 | for(i = 0; i < 256; i++) 171 | { 172 | fprintf(ptr, "0x%016llx", T3_Piccolo[i]); 173 | if(i == 255) 174 | fprintf(ptr, "};\n\n"); 175 | else 176 | fprintf(ptr, ", "); 177 | } 178 | 179 | /* write T4 */ 180 | fprintf(ptr, "unsigned long long T4_Piccolo[256] = {"); 181 | for(i = 0; i < 256; i++) 182 | { 183 | fprintf(ptr, "0x%016llx", T4_Piccolo[i]); 184 | if(i == 255) 185 | fprintf(ptr, "};\n\n"); 186 | else 187 | fprintf(ptr, ", "); 188 | } 189 | 190 | /* write T5 */ 191 | fprintf(ptr, "unsigned long long T5_Piccolo[256] = {"); 192 | for(i = 0; i < 256; i++) 193 | { 194 | fprintf(ptr, "0x%016llx", T5_Piccolo[i]); 195 | if(i == 255) 196 | fprintf(ptr, "};\n\n"); 197 | else 198 | fprintf(ptr, ", "); 199 | } 200 | 201 | /* write Tcon80_Piccolo */ 202 | fprintf(ptr, "unsigned long long Tcon80_Piccolo[25] = {"); 203 | for(i = 0; i < 25; i++) 204 | { 205 | fprintf(ptr, "0x%016llx", Tcon80_Piccolo[i]); 206 | if(i == 24) 207 | fprintf(ptr, "};\n\n"); 208 | else 209 | fprintf(ptr, ", "); 210 | } 211 | 212 | /* write Tcon128_Piccolo */ 213 | fprintf(ptr, "unsigned long long Tcon128_Piccolo[31] = {"); 214 | for(i = 0; i < 31; i++) 215 | { 216 | fprintf(ptr, "0x%016llx", Tcon128_Piccolo[i]); 217 | if(i == 30) 218 | fprintf(ptr, "};\n\n"); 219 | else 220 | fprintf(ptr, ", "); 221 | } 222 | 223 | /* close the pointer */ 224 | fclose(ptr); 225 | 226 | return; 227 | } 228 | -------------------------------------------------------------------------------- /src/bitslice/Piccolo/Piccolo.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #ifdef BITSLICE 61 | #include "Piccolo_util.h" 62 | 63 | #ifdef BITSLICE 64 | #ifdef Piccolo80 65 | void Piccolo80bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 66 | void Piccolo80bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 67 | void Piccolo80bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 masterkeys[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]); 68 | #endif 69 | #ifdef Piccolo128 70 | void Piccolo128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 71 | void Piccolo128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 72 | void Piccolo128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 masterkeys[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]); 73 | #endif 74 | #endif 75 | 76 | #ifdef Piccolo80 77 | void Piccolo80bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 78 | /* FIXME: this should not be necessary ... */ 79 | /* Input keys rearrangement for convenience */ 80 | u16 key[KEY80][BITSLICE16_P]; 81 | Piccolo_rearrange_keys(masterKey, key, BITSLICE16_P, KEY80); 82 | 83 | /* load key and pack key */ 84 | Piccolo_pack_keys80((const u16 (*)[BITSLICE16_P])key, Piccolo80_rk23(roundKeys), Piccolo80_rk01(roundKeys), Piccolo80_rk44(roundKeys), Piccolo80_wk01(roundKeys), Piccolo80_wk23(roundKeys)); 85 | 86 | return; 87 | } 88 | 89 | void Piccolo80bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 90 | word s[(BITSLICE16_P/2)], t0, t1, t2, t3, t4; 91 | int i; 92 | 93 | /* load state */ 94 | for(i = 0; i < BITSLICE16_P/2; i++){ 95 | s[i] = LOAD(((u64*)message) + 2*i); 96 | inverse_bytes_endian64(s[i]); 97 | } 98 | /* pack state */ 99 | Piccolo_packing16(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], t0, t1, t2); 100 | 101 | /* the encryption process */ 102 | int r; 103 | AddKey(s[4], s[5], s[6], s[7], Piccolo80_wk01(subkeys)[0], Piccolo80_wk01(subkeys)[1], Piccolo80_wk01(subkeys)[2], Piccolo80_wk01(subkeys)[3]); 104 | for(r = 0; r < 25; r++) 105 | { 106 | F16(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], t0, t1, t2, t3, t4); 107 | switch(r%5){ 108 | case 0: 109 | case 2: AddKey(s[0], s[1], s[2], s[3], Piccolo80_rk23(subkeys)[0], Piccolo80_rk23(subkeys)[1], Piccolo80_rk23(subkeys)[2], Piccolo80_rk23(subkeys)[3]); 110 | break; 111 | case 1: 112 | case 4: AddKey(s[0], s[1], s[2], s[3], Piccolo80_rk01(subkeys)[0], Piccolo80_rk01(subkeys)[1], Piccolo80_rk01(subkeys)[2], Piccolo80_rk01(subkeys)[3]); 113 | break; 114 | case 3: AddKey(s[0], s[1], s[2], s[3], Piccolo80_rk44(subkeys)[0], Piccolo80_rk44(subkeys)[1], Piccolo80_rk44(subkeys)[2], Piccolo80_rk44(subkeys)[3]); 115 | break; 116 | } 117 | AddKey(s[0], s[1], s[2], s[3], Piccolo80_constants[r][0], Piccolo80_constants[r][1], Piccolo80_constants[r][2], Piccolo80_constants[r][3]); 118 | if(r < 24){ 119 | Piccolo_RoundPermutation(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]); 120 | } 121 | } 122 | 123 | AddKey(s[4], s[5], s[6], s[7], Piccolo80_wk23(subkeys)[0], Piccolo80_wk23(subkeys)[1], Piccolo80_wk23(subkeys)[2], Piccolo80_wk23(subkeys)[3]); 124 | 125 | /* unpack state */ 126 | Piccolo_unpacking16(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], t0, t1, t2); 127 | /* store the results back to ciphertext */ 128 | for(i = 0; i < (BITSLICE16_P/2); i++){ 129 | inverse_bytes_endian64(s[i]); 130 | STORE(s[i], ((u64*)ciphertext) + 2*i); 131 | } 132 | 133 | return; 134 | } 135 | 136 | void Piccolo80bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 masterkeys[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]) 137 | { 138 | u8 subkeys[BITSLICE16_P * Piccolo80_SUBKEYS_SIZE]; 139 | 140 | Piccolo_init(); 141 | 142 | #ifdef MEASURE_PERF 143 | key_schedule_start = rdtsc(); 144 | #endif 145 | Piccolo80bitslice16_key_schedule((u8*)masterkeys, (u8*)subkeys); 146 | #ifdef MEASURE_PERF 147 | key_schedule_end = rdtsc(); 148 | #endif 149 | 150 | #ifdef MEASURE_PERF 151 | encrypt_start = rdtsc(); 152 | #endif 153 | Piccolo80bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 154 | #ifdef MEASURE_PERF 155 | encrypt_end = rdtsc(); 156 | #endif 157 | 158 | return; 159 | } 160 | #endif 161 | 162 | #ifdef Piccolo128 163 | int Piccolo128_key_constants0[31] = {2, 4, 6, 2, 6, 0, 4, 6, 4, 2, 0, 4, 0, 6, 2, 0, 2, 4, 6, 2, 6, 0, 4, 6, 4, 2, 0, 4, 0, 6, 2}; 164 | int Piccolo128_key_constants1[31] = {3, 5, 7, 1, 7, 3, 5, 1, 5, 7, 3, 1, 3, 5, 7, 1, 7, 3, 5, 1, 5, 7, 3, 1, 3, 5, 7, 1, 7, 3, 5}; 165 | 166 | void Piccolo128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 167 | /* FIXME: this should not be necessary ... */ 168 | /* Input keys rearrangement for convenience */ 169 | u16 key[KEY128][BITSLICE16_P]; 170 | Piccolo_rearrange_keys(masterKey, key, BITSLICE16_P, KEY128); 171 | 172 | /* load key and pack key */ 173 | Piccolo_pack_keys128((const u16 (*)[BITSLICE16_P])key, Piccolo128_rk(roundKeys, 0), Piccolo128_wk01(roundKeys), Piccolo128_wk23(roundKeys)); 174 | 175 | return; 176 | } 177 | 178 | void Piccolo128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 179 | int i; 180 | word s[(BITSLICE16_P/2)], t0, t1, t2, t3, t4; 181 | 182 | /* load state */ 183 | for(i = 0; i < (BITSLICE16_P/2); i++){ 184 | s[i] = LOAD(((u64*)message) + 2*i); 185 | inverse_bytes_endian64(s[i]); 186 | } 187 | /* pack state */ 188 | Piccolo_packing16(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], t0, t1, t2); 189 | 190 | /* the encryption process */ 191 | int r; 192 | AddKey(s[4], s[5], s[6], s[7], Piccolo128_wk01(subkeys)[0], Piccolo128_wk01(subkeys)[1], Piccolo128_wk01(subkeys)[2], Piccolo128_wk01(subkeys)[3]); 193 | for(r = 0; r < 31; r++) 194 | { 195 | F16(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], t0, t1, t2, t3, t4); 196 | int keyc0 = Piccolo128_key_constants0[r]; 197 | int keyc1 = Piccolo128_key_constants1[r]; 198 | AddKey(s[0], s[1], s[2], s[3], Piccolo128_rk(subkeys,keyc0)[0], Piccolo128_rk(subkeys,keyc0)[1], Piccolo128_rk(subkeys,keyc0)[2], Piccolo128_rk(subkeys,keyc0)[3]); 199 | AddKey(s[0], s[1], s[2], s[3], Piccolo128_rk(subkeys,keyc1)[0], Piccolo128_rk(subkeys,keyc1)[1], Piccolo128_rk(subkeys,keyc1)[2], Piccolo128_rk(subkeys,keyc1)[3]); 200 | AddKey(s[0], s[1], s[2], s[3], Piccolo128_constants[r][0], Piccolo128_constants[r][1], Piccolo128_constants[r][2], Piccolo128_constants[r][3]); 201 | if(r < 30){ 202 | Piccolo_RoundPermutation(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]); 203 | } 204 | } 205 | 206 | AddKey(s[4], s[5], s[6], s[7], Piccolo128_wk23(subkeys)[0], Piccolo128_wk23(subkeys)[1], Piccolo128_wk23(subkeys)[2], Piccolo128_wk23(subkeys)[3]); 207 | 208 | /* unpack state */ 209 | Piccolo_unpacking16(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], t0, t1, t2); 210 | /* store the results back to ciphertext */ 211 | for(i = 0; i < (BITSLICE16_P/2); i++){ 212 | inverse_bytes_endian64(s[i]); 213 | STORE(s[i], ((u64*)ciphertext) + 2*i); 214 | } 215 | 216 | return; 217 | } 218 | 219 | void Piccolo128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 masterkeys[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]) 220 | { 221 | u8 subkeys[BITSLICE16_P * Piccolo128_SUBKEYS_SIZE]; 222 | 223 | Piccolo_init(); 224 | 225 | #ifdef MEASURE_PERF 226 | key_schedule_start = rdtsc(); 227 | #endif 228 | Piccolo128bitslice16_key_schedule((u8*)masterkeys, (u8*)subkeys); 229 | #ifdef MEASURE_PERF 230 | key_schedule_end = rdtsc(); 231 | #endif 232 | 233 | #ifdef MEASURE_PERF 234 | encrypt_start = rdtsc(); 235 | #endif 236 | Piccolo128bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 237 | #ifdef MEASURE_PERF 238 | encrypt_end = rdtsc(); 239 | #endif 240 | 241 | return; 242 | } 243 | #endif 244 | 245 | #endif 246 | -------------------------------------------------------------------------------- /src/bitslice/LED/LED_utils.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #include "../../common/basic_helpers.h" 61 | #include "../../common/bitslice_common.h" 62 | 63 | #define ru64() ((((u64) rand()) << 32) | ((u64)rand())) 64 | 65 | 66 | #undef SWAP8 67 | #define SWAP8(l, h, t0)\ 68 | t0 = l;\ 69 | l = PUNPCKLBW(l , h);\ 70 | h = PUNPCKHBW(t0, h); 71 | 72 | #undef uSWAP8 73 | #define uSWAP8(x, y, t0);\ 74 | t0 = x;\ 75 | x = XOR(PSHUFB(x , mask_unpack8_l0), PSHUFB(y, mask_unpack8_h0));\ 76 | y = XOR(PSHUFB(t0, mask_unpack8_l1), PSHUFB(y, mask_unpack8_h1)); 77 | 78 | word constants_LED64_BITSLICE16_P[32][BITSLICE16_P/2]; 79 | word constants_LED64_BITSLICE32_P[32][BITSLICE32_P/2]; 80 | word constants_LED128_BITSLICE16_P[48][BITSLICE16_P/2]; 81 | word constants_LED128_BITSLICE32_P[48][BITSLICE32_P/2]; 82 | 83 | const u8 LED_RC[48] = { 84 | 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3E, 0x3D, 0x3B, 0x37, 0x2F, 85 | 0x1E, 0x3C, 0x39, 0x33, 0x27, 0x0E, 0x1D, 0x3A, 0x35, 0x2B, 86 | 0x16, 0x2C, 0x18, 0x30, 0x21, 0x02, 0x05, 0x0B, 0x17, 0x2E, 87 | 0x1C, 0x38, 0x31, 0x23, 0x06, 0x0D, 0x1B, 0x36, 0x2D, 0x1A, 88 | 0x34, 0x29, 0x12, 0x24, 0x08, 0x11, 0x22, 0x04 89 | }; 90 | #define generate_constants(P, LED) do {\ 91 | /* precompute the constants */\ 92 | u64 c[32];\ 93 | word t0, t1, t2;\ 94 | int r;\ 95 | int R;\ 96 | if(LED == 64){\ 97 | R = 32;\ 98 | }\ 99 | else{\ 100 | R = 48;\ 101 | }\ 102 | for(r = 0; r < R; r++)\ 103 | {\ 104 | u64 t = 0;\ 105 | t |= (u64) (LED>>4)&0xF;\ 106 | t |= ((u64) 1^((LED>>4) & 0xFF)) << 16;\ 107 | t |= ((u64) 2^(LED&0xF)) << 32;\ 108 | t |= ((u64) 3^(LED&0xF)) << 48;\ 109 | \ 110 | t |= ((u64) (LED_RC[r] >> 3) & 7) << 4;\ 111 | t |= ((u64) (LED_RC[r] >> 3) & 7) << (4+32);\ 112 | t |= ((u64) (LED_RC[r] >> 0) & 7) << (4+16);\ 113 | t |= ((u64) (LED_RC[r] >> 0) & 7) << (4+16+32);\ 114 | int i;\ 115 | for(i = 0; i < P; i++){\ 116 | c[i] = t;\ 117 | }\ 118 | for(i = 0; i < P/2; i++){\ 119 | constants_LED ## LED ## _ ## BITSLICE ## P ## _P[r][i] = LOAD(c+2*i);\ 120 | }\ 121 | if(P == BITSLICE32_P){\ 122 | packing32(\ 123 | constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][0],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][1],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][2],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][3],\ 124 | constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][4],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][5],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][6],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][7],\ 125 | constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][8],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][9],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][10],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][11],\ 126 | constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][12],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][13],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][14],constants_LED ## LED ## _ ## BITSLICE ## 32 ## _P[r][15],\ 127 | t0, t1, t2);\ 128 | }\ 129 | else{\ 130 | packing16(\ 131 | constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][0],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][1],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][2],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][3],\ 132 | constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][4],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][5],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][6],constants_LED ## LED ## _ ## BITSLICE ## 16 ## _P[r][7],\ 133 | t0, t1, t2);\ 134 | }\ 135 | }\ 136 | } while(0); 137 | 138 | u8 LED_init_check = 0; 139 | void LED_init(){ 140 | #ifdef THREAD_SAFE 141 | pthread_mutex_lock(&bitslice_init_mutex); 142 | #endif 143 | if(LED_init_check == 0){ 144 | init(); 145 | generate_constants(16, 64); 146 | generate_constants(32, 64); 147 | generate_constants(16, 128); 148 | generate_constants(32, 128); 149 | } 150 | LED_init_check = 1; 151 | #ifdef THREAD_SAFE 152 | pthread_mutex_unlock(&bitslice_init_mutex); 153 | #endif 154 | return; 155 | } 156 | 157 | /* 16 copies of the data, 158 | ai: i-th bit of x0,0 (16 copies), followed by i-th bit of x0,1, x0,2, x0,3, then i-th bit of x1,0, x1,1, x1,2, x1,3 for i = 0, 1, 2, 3 (a3 for MSB) 159 | bi: i-th bit of x2,0 (16 copies), followed by i-th bit of x2,1, x2,2, x2,3, then i-th bit of x3,0, x3,1, x3,2, x3,3 for i = 0, 1, 2, 3 (b3 for MSB) 160 | */ 161 | 162 | /* Sbox Layer 16 */ 163 | #define SboxLayer16(a0, a1, a2, a3, a4, a5, a6, a7, t0) do {\ 164 | Sbox(a0, a1, a2, a3, t0);\ 165 | Sbox(a4, a5, a6, a7, t0);\ 166 | } while(0); 167 | 168 | /* Sbox Layer 32 */ 169 | #define SboxLayer32(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, t0) do{\ 170 | Sbox( a0, a1, a2, a3, t0);\ 171 | Sbox( a4, a5, a6, a7, t0);\ 172 | Sbox( a8, a9, a10, a11, t0);\ 173 | Sbox(a12, a13, a14, a15, t0);\ 174 | } while(0); 175 | 176 | #define MCS32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0) do {\ 177 | e3 = XOR(XOR(a1,b3), XOR(c2,d2));\ 178 | e2 = XOR(XOR(XOR(a3,a0), XOR(b2,c1)), d1);\ 179 | e1 = XOR(XOR(XOR(a3,a2), XOR(b1,c0)), XOR(XOR(c3, d0), d3));\ 180 | e0 = XOR(XOR(a2,b0), XOR(c3,d3));\ 181 | } while(0); 182 | 183 | #define MIXCOL32(a3, a2, a1, a0, c3, c2, c1, c0,b3, b2, b1, b0, d3, d2, d1, d0, e3, e2, e1, e0) do {\ 184 | MCS32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0);\ 185 | MCS32(b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0);\ 186 | MCS32(c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0, b3, b2, b1, b0);\ 187 | MCS32(d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0);\ 188 | d3 = c3; d2 = c2; d1=c1; d0=c0;\ 189 | c3 = b3; c2 = b2; c1=b1; c0=b0;\ 190 | b3 = a3; b2 = a2; b1=a1; b0=a0;\ 191 | a3 = e3; a2 = e2; a1=e1; a0=e0;\ 192 | } while(0); 193 | 194 | 195 | #define MIXCOL16(a3, a2, a1, a0, c3, c2, c1, c0, b3, b2, b1, b0, d3, d2, d1, d0, e3, e2, e1, e0) do {\ 196 | b3 = SHRB128(a3,8);\ 197 | b2 = SHRB128(a2,8);\ 198 | b1 = SHRB128(a1,8);\ 199 | b0 = SHRB128(a0,8);\ 200 | d3 = SHRB128(c3,8);\ 201 | d2 = SHRB128(c2,8);\ 202 | d1 = SHRB128(c1,8);\ 203 | d0 = SHRB128(c0,8);\ 204 | \ 205 | MCS32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0);\ 206 | MCS32(b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0);\ 207 | MCS32(c3, c2, c1, c0, d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0, b3, b2, b1, b0);\ 208 | MCS32(d3, d2, d1, d0, e3, e2, e1, e0, a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0);\ 209 | \ 210 | a3 = XOR(AND(e3, mask_l64), SHLB128(a3,8));\ 211 | a2 = XOR(AND(e2, mask_l64), SHLB128(a2,8));\ 212 | a1 = XOR(AND(e1, mask_l64), SHLB128(a1,8));\ 213 | a0 = XOR(AND(e0, mask_l64), SHLB128(a0,8));\ 214 | c3 = XOR(AND(b3, mask_l64), SHLB128(c3,8));\ 215 | c2 = XOR(AND(b2, mask_l64), SHLB128(c2,8));\ 216 | c1 = XOR(AND(b1, mask_l64), SHLB128(c1,8));\ 217 | c0 = XOR(AND(b0, mask_l64), SHLB128(c0,8));\ 218 | } while(0); 219 | 220 | 221 | /* shift row operation, data structure follows above p16 */ 222 | #define SR16(a0, a1, a2, a3, a4, a5, a6, a7) do {\ 223 | a0 = PSHUFB(a0, mask16_sr01);\ 224 | a1 = PSHUFB(a1, mask16_sr01);\ 225 | a2 = PSHUFB(a2, mask16_sr01);\ 226 | a3 = PSHUFB(a3, mask16_sr01);\ 227 | a4 = PSHUFB(a4, mask16_sr23);\ 228 | a5 = PSHUFB(a5, mask16_sr23);\ 229 | a6 = PSHUFB(a6, mask16_sr23);\ 230 | a7 = PSHUFB(a7, mask16_sr23);\ 231 | } while(0); 232 | 233 | /* shift row operation, data structure follows above p32 */ 234 | #define SR32(a3, a2, a1, a0, c3, c2, c1, c0, b3, b2, b1, b0, d3, d2, d1, d0) do {\ 235 | b3 = PSHUF32(b3, _MM_SHUFFLE(2, 1, 0, 3));\ 236 | b2 = PSHUF32(b2, _MM_SHUFFLE(2, 1, 0, 3));\ 237 | b1 = PSHUF32(b1, _MM_SHUFFLE(2, 1, 0, 3));\ 238 | b0 = PSHUF32(b0, _MM_SHUFFLE(2, 1, 0, 3));\ 239 | \ 240 | c3 = PSHUF32(c3, _MM_SHUFFLE(1, 0, 3, 2));\ 241 | c2 = PSHUF32(c2, _MM_SHUFFLE(1, 0, 3, 2));\ 242 | c1 = PSHUF32(c1, _MM_SHUFFLE(1, 0, 3, 2));\ 243 | c0 = PSHUF32(c0, _MM_SHUFFLE(1, 0, 3, 2));\ 244 | \ 245 | d3 = PSHUF32(d3, _MM_SHUFFLE(0, 3, 2, 1));\ 246 | d2 = PSHUF32(d2, _MM_SHUFFLE(0, 3, 2, 1));\ 247 | d1 = PSHUF32(d1, _MM_SHUFFLE(0, 3, 2, 1));\ 248 | d0 = PSHUF32(d0, _MM_SHUFFLE(0, 3, 2, 1));\ 249 | } while(0); 250 | 251 | 252 | -------------------------------------------------------------------------------- /src/bitslice/PRESENT/PRESENT.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #ifdef BITSLICE 61 | #include "PRESENT_utils.h" 62 | 63 | #ifdef BITSLICE 64 | #ifdef PRESENT80 65 | void PRESENT80bitslice8_key_schedule(const u8* masterKey, u8* roundKeys); 66 | void PRESENT80bitslice8_core(const u8* message, const u8* subkeys, u8* ciphertext); 67 | void PRESENT80bitslice8_cipher(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY80], u64 ciphertext[BITSLICE8_P]); 68 | void PRESENT80bitslice8_cipher_(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY80], u64 ciphertext[BITSLICE8_P]); 69 | void PRESENT80bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 70 | void PRESENT80bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 71 | void PRESENT80bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]); 72 | void PRESENT80bitslice16_cipher_(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]); 73 | #endif 74 | #ifdef PRESENT128 75 | void PRESENT128bitslice8_key_schedule(const u8* masterKey, u8* roundKeys); 76 | void PRESENT128bitslice8_core(const u8* message, const u8* subkeys, u8* ciphertext); 77 | void PRESENT128bitslice8_cipher(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY128], u64 ciphertext[BITSLICE8_P]); 78 | void PRESENT128bitslice8_cipher_(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY128], u64 ciphertext[BITSLICE8_P]); 79 | void PRESENT128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 80 | void PRESENT128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 81 | void PRESENT128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]); 82 | void PRESENT128bitslice16_cipher_(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]); 83 | #endif 84 | #endif 85 | 86 | #ifdef PRESENT80 87 | /********************************************************************************/ 88 | /********************************************************************************/ 89 | void PRESENT80bitslice8_key_schedule(const u8* masterKey, u8* roundKeys){ 90 | PRESENT_KEY_SCHEDULE(BITSLICE8_P, 80); 91 | return; 92 | } 93 | 94 | void PRESENT80bitslice8_core(const u8* message, const u8* subkeys, u8* ciphertext){ 95 | PRESENT_CORE_ENCRYPT(BITSLICE8_P, 80); 96 | return; 97 | } 98 | 99 | void PRESENT80bitslice8_cipher(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY80], u64 ciphertext[BITSLICE8_P]) 100 | { 101 | u8 subkeys[BITSLICE8_P * PRESENT80_SUBKEYS_SIZE]; 102 | 103 | /* Initialize the constants: done once and for all */ 104 | PRESENT_init(); 105 | 106 | #ifdef MEASURE_PERF 107 | key_schedule_start = rdtsc(); 108 | #endif 109 | PRESENT80bitslice8_key_schedule((u8*)key, (u8*)subkeys); 110 | #ifdef MEASURE_PERF 111 | key_schedule_end = rdtsc(); 112 | #endif 113 | 114 | #ifdef MEASURE_PERF 115 | encrypt_start = rdtsc(); 116 | #endif 117 | PRESENT80bitslice8_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 118 | #ifdef MEASURE_PERF 119 | encrypt_end = rdtsc(); 120 | #endif 121 | return; 122 | } 123 | 124 | /* Ciphering with mixed key schedule and encryption */ 125 | /* It takes less memory since subkeys are not stored */ 126 | void PRESENT80bitslice8_cipher_(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY80], u64 ciphertext[BITSLICE8_P]) 127 | { 128 | PRESENT_init(); 129 | #ifdef MEASURE_PERF 130 | key_schedule_start = 0; 131 | #endif 132 | #ifdef MEASURE_PERF 133 | key_schedule_start = 0; 134 | #endif 135 | 136 | #ifdef MEASURE_PERF 137 | encrypt_start = rdtsc(); 138 | #endif 139 | PRESENT_CORE_ENCRYPT_AND_KEY_SCHED(BITSLICE8_P, 80); 140 | #ifdef MEASURE_PERF 141 | encrypt_end = rdtsc(); 142 | #endif 143 | return; 144 | } 145 | 146 | /********************************************************************************/ 147 | /********************************************************************************/ 148 | void PRESENT80bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 149 | PRESENT_KEY_SCHEDULE(BITSLICE16_P, 80); 150 | return; 151 | } 152 | 153 | void PRESENT80bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 154 | PRESENT_CORE_ENCRYPT(BITSLICE16_P, 80); 155 | return; 156 | } 157 | 158 | void PRESENT80bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]) 159 | { 160 | u8 subkeys[BITSLICE16_P * PRESENT80_SUBKEYS_SIZE]; 161 | 162 | /* Initialize the constants: done once and for all */ 163 | PRESENT_init(); 164 | 165 | #ifdef MEASURE_PERF 166 | key_schedule_start = rdtsc(); 167 | #endif 168 | PRESENT80bitslice16_key_schedule((u8*)key, (u8*)subkeys); 169 | #ifdef MEASURE_PERF 170 | key_schedule_end = rdtsc(); 171 | #endif 172 | 173 | #ifdef MEASURE_PERF 174 | encrypt_start = rdtsc(); 175 | #endif 176 | PRESENT80bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 177 | #ifdef MEASURE_PERF 178 | encrypt_end = rdtsc(); 179 | #endif 180 | return; 181 | } 182 | 183 | /* Ciphering with mixed key schedule and encryption */ 184 | /* It takes less memory since subkeys are not stored */ 185 | void PRESENT80bitslice16_cipher_(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY80], u64 ciphertext[BITSLICE16_P]) 186 | { 187 | PRESENT_init(); 188 | #ifdef MEASURE_PERF 189 | key_schedule_start = 0; 190 | #endif 191 | #ifdef MEASURE_PERF 192 | key_schedule_start = 0; 193 | #endif 194 | 195 | #ifdef MEASURE_PERF 196 | encrypt_start = rdtsc(); 197 | #endif 198 | PRESENT_CORE_ENCRYPT_AND_KEY_SCHED(BITSLICE16_P, 80); 199 | #ifdef MEASURE_PERF 200 | encrypt_end = rdtsc(); 201 | #endif 202 | return; 203 | } 204 | #endif 205 | 206 | #ifdef PRESENT128 207 | /********************************************************************************/ 208 | /********************************************************************************/ 209 | void PRESENT128bitslice8_key_schedule(const u8* masterKey, u8* roundKeys){ 210 | PRESENT_KEY_SCHEDULE(BITSLICE8_P, 128); 211 | return; 212 | } 213 | 214 | void PRESENT128bitslice8_core(const u8* message, const u8* subkeys, u8* ciphertext){ 215 | PRESENT_CORE_ENCRYPT(BITSLICE8_P, 128); 216 | return; 217 | } 218 | 219 | void PRESENT128bitslice8_cipher(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY128], u64 ciphertext[BITSLICE8_P]) 220 | { 221 | u8 subkeys[BITSLICE8_P * PRESENT128_SUBKEYS_SIZE]; 222 | 223 | /* Initialize the constants: done once and for all */ 224 | PRESENT_init(); 225 | 226 | #ifdef MEASURE_PERF 227 | key_schedule_start = rdtsc(); 228 | #endif 229 | PRESENT128bitslice8_key_schedule((u8*)key, (u8*)subkeys); 230 | #ifdef MEASURE_PERF 231 | key_schedule_end = rdtsc(); 232 | #endif 233 | 234 | #ifdef MEASURE_PERF 235 | encrypt_start = rdtsc(); 236 | #endif 237 | PRESENT128bitslice8_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 238 | #ifdef MEASURE_PERF 239 | encrypt_end = rdtsc(); 240 | #endif 241 | return; 242 | } 243 | 244 | /* Ciphering with mixed key schedule and encryption */ 245 | /* It takes less memory since subkeys are not stored */ 246 | void PRESENT128bitslice8_cipher_(const u64 plaintext[BITSLICE8_P], const u16 key[BITSLICE8_P][KEY128], u64 ciphertext[BITSLICE8_P]) 247 | { 248 | PRESENT_init(); 249 | #ifdef MEASURE_PERF 250 | key_schedule_start = 0; 251 | #endif 252 | #ifdef MEASURE_PERF 253 | key_schedule_start = 0; 254 | #endif 255 | 256 | #ifdef MEASURE_PERF 257 | encrypt_start = rdtsc(); 258 | #endif 259 | PRESENT_CORE_ENCRYPT_AND_KEY_SCHED(BITSLICE8_P, 128); 260 | #ifdef MEASURE_PERF 261 | encrypt_end = rdtsc(); 262 | #endif 263 | return; 264 | } 265 | 266 | 267 | /********************************************************************************/ 268 | /********************************************************************************/ 269 | void PRESENT128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 270 | PRESENT_KEY_SCHEDULE(BITSLICE16_P, 128); 271 | return; 272 | } 273 | 274 | void PRESENT128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 275 | PRESENT_CORE_ENCRYPT(BITSLICE16_P, 128); 276 | return; 277 | } 278 | 279 | void PRESENT128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]) 280 | { 281 | u8 subkeys[BITSLICE16_P * PRESENT128_SUBKEYS_SIZE]; 282 | 283 | /* Initialize the constants: done once and for all */ 284 | PRESENT_init(); 285 | 286 | #ifdef MEASURE_PERF 287 | key_schedule_start = rdtsc(); 288 | #endif 289 | PRESENT128bitslice16_key_schedule((u8*)key, (u8*)subkeys); 290 | #ifdef MEASURE_PERF 291 | key_schedule_end = rdtsc(); 292 | #endif 293 | 294 | #ifdef MEASURE_PERF 295 | encrypt_start = rdtsc(); 296 | #endif 297 | PRESENT128bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 298 | #ifdef MEASURE_PERF 299 | encrypt_end = rdtsc(); 300 | #endif 301 | return; 302 | } 303 | 304 | /* Ciphering with mixed key schedule and encryption */ 305 | /* It takes less memory since subkeys are not stored */ 306 | void PRESENT128bitslice16_cipher_(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]) 307 | { 308 | PRESENT_init(); 309 | #ifdef MEASURE_PERF 310 | key_schedule_start = 0; 311 | #endif 312 | #ifdef MEASURE_PERF 313 | key_schedule_start = 0; 314 | #endif 315 | 316 | #ifdef MEASURE_PERF 317 | encrypt_start = rdtsc(); 318 | #endif 319 | PRESENT_CORE_ENCRYPT_AND_KEY_SCHED(BITSLICE16_P, 128); 320 | #ifdef MEASURE_PERF 321 | encrypt_end = rdtsc(); 322 | #endif 323 | return; 324 | } 325 | #endif 326 | 327 | #endif 328 | -------------------------------------------------------------------------------- /src/table/gen_tables/PRESENT_generateTables.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | /* PRESENTgenerateTables.c */ 61 | /* author: Victor LOMNE - victor.lomne@gmail.com */ 62 | 63 | 64 | 65 | #include 66 | 67 | /* PRESENT Sbox */ 68 | unsigned char sbox[16] = {0x0C, 0x05, 0x06, 0x0B, 0x09, 0x00, 0x0A, 0X0D, 0x03, 0x0E, 0x0F, 0x08, 0x04, 0x07, 0x01, 0x02}; 69 | 70 | 71 | 72 | /****************************************************************************************************/ 73 | void main(void) 74 | { 75 | unsigned char il, ih, twoSboxes; 76 | unsigned int i; 77 | unsigned long long T0_PRESENT[256], T1_PRESENT[256], T2_PRESENT[256], T3_PRESENT[256], T4_PRESENT[256], T5_PRESENT[256], T6_PRESENT[256], T7_PRESENT[256], TroundCounters80[31], TroundCounters128[31], TsboxKS80[16], TsboxKS128[256]; 78 | FILE * ptr; 79 | 80 | /* loop over the possible input values to compute for T0, T1, T2, T3, T4, T5, T6 and T7 */ 81 | for(i = 0; i < 256; i++) 82 | { 83 | /* compute low and high parts of i */ 84 | il = i & 0x0f; 85 | ih = (i & 0xf0) >> 4; 86 | 87 | /* compute two sboxes look-up */ 88 | twoSboxes = (sbox[ih] << 4) | sbox[il]; 89 | 90 | /* compute T0 */ 91 | T0_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 0; 92 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 16; 93 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 32; 94 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 48; 95 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 1; 96 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 17; 97 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 33; 98 | T0_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 49; 99 | 100 | /* compute T1 */ 101 | T1_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 2; 102 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 18; 103 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 34; 104 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 50; 105 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 3; 106 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 19; 107 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 35; 108 | T1_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 51; 109 | 110 | /* compute T2 */ 111 | T2_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 4; 112 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 20; 113 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 36; 114 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 52; 115 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 5; 116 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 21; 117 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 37; 118 | T2_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 53; 119 | 120 | /* compute T3 */ 121 | T3_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 6; 122 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 22; 123 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 38; 124 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 54; 125 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 7; 126 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 23; 127 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 39; 128 | T3_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 55; 129 | 130 | /* compute T4 */ 131 | T4_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 8; 132 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 24; 133 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 40; 134 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 56; 135 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 9; 136 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 25; 137 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 41; 138 | T4_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 57; 139 | 140 | /* compute T5 */ 141 | T5_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 10; 142 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 26; 143 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 42; 144 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 58; 145 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 11; 146 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 27; 147 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 43; 148 | T5_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 59; 149 | 150 | /* compute T6 */ 151 | T6_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 12; 152 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 28; 153 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 44; 154 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 60; 155 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 13; 156 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 29; 157 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 45; 158 | T6_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 61; 159 | 160 | /* compute T7 */ 161 | T7_PRESENT[i] = ((unsigned long long)( (twoSboxes >> 0) & 0x01 )) << 14; 162 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 1) & 0x01 )) << 30; 163 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 2) & 0x01 )) << 46; 164 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 3) & 0x01 )) << 62; 165 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 4) & 0x01 )) << 15; 166 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 5) & 0x01 )) << 31; 167 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 6) & 0x01 )) << 47; 168 | T7_PRESENT[i] |= ((unsigned long long)( (twoSboxes >> 7) & 0x01 )) << 63; 169 | 170 | /* compute TsboxKS128 */ 171 | TsboxKS128[i] = ((unsigned long long)twoSboxes) << 56; 172 | } 173 | 174 | /* compute TroundCounters80 */ 175 | for(i = 0; i < 31; i++) 176 | TroundCounters80[i] = ((unsigned long long)(i+1)) << 18; 177 | 178 | /* compute TsboxKS80 */ 179 | for(i = 0; i < 16; i++) 180 | TsboxKS80[i] = ((unsigned long long)sbox[i]) << 60; 181 | 182 | /* compute TroundCounters128 */ 183 | for(i = 0; i < 31; i++) 184 | TroundCounters128[i] = ((unsigned long long)(i+1)) << 1; 185 | 186 | 187 | 188 | /* open a pointer and create the file PRESENTtables.h */ 189 | ptr = fopen("../PRESENT/PRESENT_tables.h", "w"); 190 | if(ptr == NULL) 191 | { 192 | printf("Unable to create file PRESENT_tables.h\n"); 193 | return; 194 | } 195 | 196 | /* write T0 */ 197 | fprintf(ptr, "unsigned long long T0_PRESENT[256] = {"); 198 | for(i = 0; i < 256; i++) 199 | { 200 | fprintf(ptr, "0x%016llx", T0_PRESENT[i]); 201 | if(i == 255) 202 | fprintf(ptr, "};\n\n"); 203 | else 204 | fprintf(ptr, ", "); 205 | } 206 | 207 | /* write T1 */ 208 | fprintf(ptr, "unsigned long long T1_PRESENT[256] = {"); 209 | for(i = 0; i < 256; i++) 210 | { 211 | fprintf(ptr, "0x%016llx", T1_PRESENT[i]); 212 | if(i == 255) 213 | fprintf(ptr, "};\n\n"); 214 | else 215 | fprintf(ptr, ", "); 216 | } 217 | 218 | /* write T2 */ 219 | fprintf(ptr, "unsigned long long T2_PRESENT[256] = {"); 220 | for(i = 0; i < 256; i++) 221 | { 222 | fprintf(ptr, "0x%016llx", T2_PRESENT[i]); 223 | if(i == 255) 224 | fprintf(ptr, "};\n\n"); 225 | else 226 | fprintf(ptr, ", "); 227 | } 228 | 229 | /* write T3 */ 230 | fprintf(ptr, "unsigned long long T3_PRESENT[256] = {"); 231 | for(i = 0; i < 256; i++) 232 | { 233 | fprintf(ptr, "0x%016llx", T3_PRESENT[i]); 234 | if(i == 255) 235 | fprintf(ptr, "};\n\n"); 236 | else 237 | fprintf(ptr, ", "); 238 | } 239 | 240 | /* write T4 */ 241 | fprintf(ptr, "unsigned long long T4_PRESENT[256] = {"); 242 | for(i = 0; i < 256; i++) 243 | { 244 | fprintf(ptr, "0x%016llx", T4_PRESENT[i]); 245 | if(i == 255) 246 | fprintf(ptr, "};\n\n"); 247 | else 248 | fprintf(ptr, ", "); 249 | } 250 | 251 | /* write T5 */ 252 | fprintf(ptr, "unsigned long long T5_PRESENT[256] = {"); 253 | for(i = 0; i < 256; i++) 254 | { 255 | fprintf(ptr, "0x%016llx", T5_PRESENT[i]); 256 | if(i == 255) 257 | fprintf(ptr, "};\n\n"); 258 | else 259 | fprintf(ptr, ", "); 260 | } 261 | 262 | /* write T6 */ 263 | fprintf(ptr, "unsigned long long T6_PRESENT[256] = {"); 264 | for(i = 0; i < 256; i++) 265 | { 266 | fprintf(ptr, "0x%016llx", T6_PRESENT[i]); 267 | if(i == 255) 268 | fprintf(ptr, "};\n\n"); 269 | else 270 | fprintf(ptr, ", "); 271 | } 272 | 273 | /* write T7 */ 274 | fprintf(ptr, "unsigned long long T7_PRESENT[256] = {"); 275 | for(i = 0; i < 256; i++) 276 | { 277 | fprintf(ptr, "0x%016llx", T7_PRESENT[i]); 278 | if(i == 255) 279 | fprintf(ptr, "};\n\n"); 280 | else 281 | fprintf(ptr, ", "); 282 | } 283 | 284 | /* write TroundCounters80 */ 285 | fprintf(ptr, "unsigned long long TroundCounters80[31] = {"); 286 | for(i = 0; i < 31; i++) 287 | { 288 | fprintf(ptr, "0x%016llx", TroundCounters80[i]); 289 | if(i == 30) 290 | fprintf(ptr, "};\n\n"); 291 | else 292 | fprintf(ptr, ", "); 293 | } 294 | 295 | /* write TsboxKS80 */ 296 | fprintf(ptr, "unsigned long long TsboxKS80[16] = {"); 297 | for(i = 0; i < 16; i++) 298 | { 299 | fprintf(ptr, "0x%016llx", TsboxKS80[i]); 300 | if(i == 15) 301 | fprintf(ptr, "};\n\n"); 302 | else 303 | fprintf(ptr, ", "); 304 | } 305 | 306 | /* write TroundCounters128 */ 307 | fprintf(ptr, "unsigned long long TroundCounters128[31] = {"); 308 | for(i = 0; i < 31; i++) 309 | { 310 | fprintf(ptr, "0x%016llx", TroundCounters128[i]); 311 | if(i == 30) 312 | fprintf(ptr, "};\n\n"); 313 | else 314 | fprintf(ptr, ", "); 315 | } 316 | 317 | /* write TsboxKS128 */ 318 | fprintf(ptr, "unsigned long long TsboxKS128[256] = {"); 319 | for(i = 0; i < 256; i++) 320 | { 321 | fprintf(ptr, "0x%016llx", TsboxKS128[i]); 322 | if(i == 255) 323 | fprintf(ptr, "};\n\n"); 324 | else 325 | fprintf(ptr, ", "); 326 | } 327 | 328 | /* close the pointer */ 329 | fclose(ptr); 330 | 331 | return; 332 | 333 | } 334 | -------------------------------------------------------------------------------- /src/table/LED/LED.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #ifdef TABLE 61 | #include 62 | #include "LED_tables.h" 63 | 64 | #ifdef TABLE 65 | #ifdef LED64 66 | void LED128table_key_schedule(const u8* masterKey128, u8* roundKeys128); 67 | void LED128table_core(const u8* plaintext, const u8* roundKeys128, u8* ciphertext); 68 | void LED128table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY128], u64 ciphertext_out[TABLE_P]); 69 | #endif 70 | #ifdef LED128 71 | void LED64table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY64], u64 ciphertext_out[TABLE_P]); 72 | void LED64table_key_schedule(const u8* masterKey64, u8* roundKeys64); 73 | void LED64table_core(const u8* plaintext, const u8* roundKeys64, u8* ciphertext); 74 | #endif 75 | #endif 76 | 77 | /****************************************************************************************************/ 78 | /* some macros */ 79 | 80 | #define ROTL16(in, l) ((in) << l) ^ ((in) >> (16-l)) 81 | #define ROTR16(in, l) ((in) >> l) ^ ((in) << (16-l)) 82 | #define MASK4 0x0f 83 | #define MASK8 0xff 84 | #define MASK16 0xffff 85 | 86 | #define LEDROUND(state) do {\ 87 | unsigned long long stateIn;\ 88 | stateIn = state;\ 89 | state = T0_LED[stateIn & MASK8];\ 90 | state ^= T1_LED[(stateIn >> 8) & MASK8];\ 91 | state ^= T2_LED[(stateIn >> 16) & MASK8];\ 92 | state ^= T3_LED[(stateIn >> 24) & MASK8];\ 93 | state ^= T4_LED[(stateIn >> 32) & MASK8];\ 94 | state ^= T5_LED[(stateIn >> 40) & MASK8];\ 95 | state ^= T6_LED[(stateIn >> 48) & MASK8];\ 96 | state ^= T7_LED[(stateIn >> 56) & MASK8];\ 97 | } while(0); 98 | 99 | 100 | 101 | /****************************************************************************************************/ 102 | /* LED64 key schedule */ 103 | #ifdef LED64 104 | void LED64table_key_schedule(const u8* masterKey64, u8* roundKeys64) 105 | { 106 | ((u64*)roundKeys64)[0] = ((u64*)masterKey64)[0]; 107 | 108 | return; 109 | } 110 | #endif 111 | 112 | 113 | 114 | /****************************************************************************************************/ 115 | /* LED128 key schedule */ 116 | #ifdef LED128 117 | void LED128table_key_schedule(const u8* masterKey128, u8* roundKeys128) 118 | { 119 | ((u64*)roundKeys128)[0] = ((u64*)masterKey128)[0]; 120 | ((u64*)roundKeys128)[1] = ((u64*)masterKey128)[1]; 121 | 122 | return; 123 | } 124 | #endif 125 | 126 | 127 | 128 | /****************************************************************************************************/ 129 | /* LED64 encryption core */ 130 | #ifdef LED64 131 | void LED64table_core(const u8* plaintext, const u8* roundKeys64, u8* ciphertext) 132 | { 133 | u64 * state, * roundKeys; 134 | 135 | /* cast variables */ 136 | *((u64*)ciphertext) = *((u64*)plaintext); 137 | state = (u64 *)ciphertext; 138 | roundKeys = (u64 *)roundKeys64; 139 | 140 | /* addRoundKey */ 141 | state[0] ^= roundKeys[0]; 142 | 143 | /* step 1 */ 144 | state[0] ^= Tcon64LED[0]; 145 | LEDROUND(state[0]); 146 | state[0] ^= Tcon64LED[1]; 147 | LEDROUND(state[0]); 148 | state[0] ^= Tcon64LED[2]; 149 | LEDROUND(state[0]); 150 | state[0] ^= Tcon64LED[3]; 151 | LEDROUND(state[0]); 152 | 153 | /* addRoundKey */ 154 | state[0] ^= roundKeys[0]; 155 | 156 | /* step 2 */ 157 | state[0] ^= Tcon64LED[4]; 158 | LEDROUND(state[0]); 159 | state[0] ^= Tcon64LED[5]; 160 | LEDROUND(state[0]); 161 | state[0] ^= Tcon64LED[6]; 162 | LEDROUND(state[0]); 163 | state[0] ^= Tcon64LED[7]; 164 | LEDROUND(state[0]); 165 | 166 | /* addRoundKey */ 167 | state[0] ^= roundKeys[0]; 168 | 169 | /* step 3 */ 170 | state[0] ^= Tcon64LED[8]; 171 | LEDROUND(state[0]); 172 | state[0] ^= Tcon64LED[9]; 173 | LEDROUND(state[0]); 174 | state[0] ^= Tcon64LED[10]; 175 | LEDROUND(state[0]); 176 | state[0] ^= Tcon64LED[11]; 177 | LEDROUND(state[0]); 178 | 179 | /* addRoundKey */ 180 | state[0] ^= roundKeys[0]; 181 | 182 | /* step 4 */ 183 | state[0] ^= Tcon64LED[12]; 184 | LEDROUND(state[0]); 185 | state[0] ^= Tcon64LED[13]; 186 | LEDROUND(state[0]); 187 | state[0] ^= Tcon64LED[14]; 188 | LEDROUND(state[0]); 189 | state[0] ^= Tcon64LED[15]; 190 | LEDROUND(state[0]); 191 | 192 | /* addRoundKey */ 193 | state[0] ^= roundKeys[0]; 194 | 195 | /* step 5 */ 196 | state[0] ^= Tcon64LED[16]; 197 | LEDROUND(state[0]); 198 | state[0] ^= Tcon64LED[17]; 199 | LEDROUND(state[0]); 200 | state[0] ^= Tcon64LED[18]; 201 | LEDROUND(state[0]); 202 | state[0] ^= Tcon64LED[19]; 203 | LEDROUND(state[0]); 204 | 205 | /* addRoundKey */ 206 | state[0] ^= roundKeys[0]; 207 | 208 | /* step 6 */ 209 | state[0] ^= Tcon64LED[20]; 210 | LEDROUND(state[0]); 211 | state[0] ^= Tcon64LED[21]; 212 | LEDROUND(state[0]); 213 | state[0] ^= Tcon64LED[22]; 214 | LEDROUND(state[0]); 215 | state[0] ^= Tcon64LED[23]; 216 | LEDROUND(state[0]); 217 | 218 | /* addRoundKey */ 219 | state[0] ^= roundKeys[0]; 220 | 221 | /* step 7 */ 222 | state[0] ^= Tcon64LED[24]; 223 | LEDROUND(state[0]); 224 | state[0] ^= Tcon64LED[25]; 225 | LEDROUND(state[0]); 226 | state[0] ^= Tcon64LED[26]; 227 | LEDROUND(state[0]); 228 | state[0] ^= Tcon64LED[27]; 229 | LEDROUND(state[0]); 230 | 231 | /* addRoundKey */ 232 | state[0] ^= roundKeys[0]; 233 | 234 | /* step 8 */ 235 | state[0] ^= Tcon64LED[28]; 236 | LEDROUND(state[0]); 237 | state[0] ^= Tcon64LED[29]; 238 | LEDROUND(state[0]); 239 | state[0] ^= Tcon64LED[30]; 240 | LEDROUND(state[0]); 241 | state[0] ^= Tcon64LED[31]; 242 | LEDROUND(state[0]); 243 | 244 | /* addRoundKey */ 245 | state[0] ^= roundKeys[0]; 246 | 247 | return; 248 | } 249 | #endif 250 | 251 | 252 | 253 | /****************************************************************************************************/ 254 | /* LED128 encryption core */ 255 | #ifdef LED128 256 | void LED128table_core(const u8* plaintext, const u8* roundKeys128, u8* ciphertext) 257 | { 258 | u64 * state, * roundKeys; 259 | 260 | /* cast variables */ 261 | *((u64*)ciphertext) = *((u64*)plaintext); 262 | state = (u64 *)ciphertext; 263 | roundKeys = (u64 *)roundKeys128; 264 | 265 | /* addRoundKey */ 266 | state[0] ^= roundKeys[0]; 267 | 268 | /* step 1 */ 269 | state[0] ^= Tcon128LED[0]; 270 | LEDROUND(state[0]); 271 | state[0] ^= Tcon128LED[1]; 272 | LEDROUND(state[0]); 273 | state[0] ^= Tcon128LED[2]; 274 | LEDROUND(state[0]); 275 | state[0] ^= Tcon128LED[3]; 276 | LEDROUND(state[0]); 277 | 278 | /* addRoundKey */ 279 | state[0] ^= roundKeys[1]; 280 | 281 | /* step 2 */ 282 | state[0] ^= Tcon128LED[4]; 283 | LEDROUND(state[0]); 284 | state[0] ^= Tcon128LED[5]; 285 | LEDROUND(state[0]); 286 | state[0] ^= Tcon128LED[6]; 287 | LEDROUND(state[0]); 288 | state[0] ^= Tcon128LED[7]; 289 | LEDROUND(state[0]); 290 | 291 | /* addRoundKey */ 292 | state[0] ^= roundKeys[0]; 293 | 294 | /* step 3 */ 295 | state[0] ^= Tcon128LED[8]; 296 | LEDROUND(state[0]); 297 | state[0] ^= Tcon128LED[9]; 298 | LEDROUND(state[0]); 299 | state[0] ^= Tcon128LED[10]; 300 | LEDROUND(state[0]); 301 | state[0] ^= Tcon128LED[11]; 302 | LEDROUND(state[0]); 303 | 304 | /* addRoundKey */ 305 | state[0] ^= roundKeys[1]; 306 | 307 | /* step 4 */ 308 | state[0] ^= Tcon128LED[12]; 309 | LEDROUND(state[0]); 310 | state[0] ^= Tcon128LED[13]; 311 | LEDROUND(state[0]); 312 | state[0] ^= Tcon128LED[14]; 313 | LEDROUND(state[0]); 314 | state[0] ^= Tcon128LED[15]; 315 | LEDROUND(state[0]); 316 | 317 | /* addRoundKey */ 318 | state[0] ^= roundKeys[0]; 319 | 320 | /* step 5 */ 321 | state[0] ^= Tcon128LED[16]; 322 | LEDROUND(state[0]); 323 | state[0] ^= Tcon128LED[17]; 324 | LEDROUND(state[0]); 325 | state[0] ^= Tcon128LED[18]; 326 | LEDROUND(state[0]); 327 | state[0] ^= Tcon128LED[19]; 328 | LEDROUND(state[0]); 329 | 330 | /* addRoundKey */ 331 | state[0] ^= roundKeys[1]; 332 | 333 | /* step 6 */ 334 | state[0] ^= Tcon128LED[20]; 335 | LEDROUND(state[0]); 336 | state[0] ^= Tcon128LED[21]; 337 | LEDROUND(state[0]); 338 | state[0] ^= Tcon128LED[22]; 339 | LEDROUND(state[0]); 340 | state[0] ^= Tcon128LED[23]; 341 | LEDROUND(state[0]); 342 | 343 | /* addRoundKey */ 344 | state[0] ^= roundKeys[0]; 345 | 346 | /* step 7 */ 347 | state[0] ^= Tcon128LED[24]; 348 | LEDROUND(state[0]); 349 | state[0] ^= Tcon128LED[25]; 350 | LEDROUND(state[0]); 351 | state[0] ^= Tcon128LED[26]; 352 | LEDROUND(state[0]); 353 | state[0] ^= Tcon128LED[27]; 354 | LEDROUND(state[0]); 355 | 356 | /* addRoundKey */ 357 | state[0] ^= roundKeys[1]; 358 | 359 | /* step 8 */ 360 | state[0] ^= Tcon128LED[28]; 361 | LEDROUND(state[0]); 362 | state[0] ^= Tcon128LED[29]; 363 | LEDROUND(state[0]); 364 | state[0] ^= Tcon128LED[30]; 365 | LEDROUND(state[0]); 366 | state[0] ^= Tcon128LED[31]; 367 | LEDROUND(state[0]); 368 | 369 | /* addRoundKey */ 370 | state[0] ^= roundKeys[0]; 371 | 372 | /* step 9 */ 373 | state[0] ^= Tcon128LED[32]; 374 | LEDROUND(state[0]); 375 | state[0] ^= Tcon128LED[33]; 376 | LEDROUND(state[0]); 377 | state[0] ^= Tcon128LED[34]; 378 | LEDROUND(state[0]); 379 | state[0] ^= Tcon128LED[35]; 380 | LEDROUND(state[0]); 381 | 382 | /* addRoundKey */ 383 | state[0] ^= roundKeys[1]; 384 | 385 | /* step 10 */ 386 | state[0] ^= Tcon128LED[36]; 387 | LEDROUND(state[0]); 388 | state[0] ^= Tcon128LED[37]; 389 | LEDROUND(state[0]); 390 | state[0] ^= Tcon128LED[38]; 391 | LEDROUND(state[0]); 392 | state[0] ^= Tcon128LED[39]; 393 | LEDROUND(state[0]); 394 | 395 | /* addRoundKey */ 396 | state[0] ^= roundKeys[0]; 397 | 398 | /* step 11 */ 399 | state[0] ^= Tcon128LED[40]; 400 | LEDROUND(state[0]); 401 | state[0] ^= Tcon128LED[41]; 402 | LEDROUND(state[0]); 403 | state[0] ^= Tcon128LED[42]; 404 | LEDROUND(state[0]); 405 | state[0] ^= Tcon128LED[43]; 406 | LEDROUND(state[0]); 407 | 408 | /* addRoundKey */ 409 | state[0] ^= roundKeys[1]; 410 | 411 | /* step 12 */ 412 | state[0] ^= Tcon128LED[44]; 413 | LEDROUND(state[0]); 414 | state[0] ^= Tcon128LED[45]; 415 | LEDROUND(state[0]); 416 | state[0] ^= Tcon128LED[46]; 417 | LEDROUND(state[0]); 418 | state[0] ^= Tcon128LED[47]; 419 | LEDROUND(state[0]); 420 | 421 | /* addRoundKey */ 422 | state[0] ^= roundKeys[0]; 423 | 424 | return; 425 | } 426 | #endif 427 | 428 | 429 | 430 | /****************************************************************************************************/ 431 | /* LED64 key schedule + encryption */ 432 | #ifdef LED64 433 | void LED64table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY64], u64 ciphertext_out[TABLE_P]) 434 | { 435 | /* Key schedule: subkeys are of size 1*8 bytes */ 436 | u8 subkeys[TABLE_P * LED64_SUBKEYS_SIZE]; 437 | 438 | /* The key schedule does merely nothing */ 439 | #ifdef MEASURE_PERF 440 | key_schedule_start = 0; 441 | #endif 442 | 443 | /* Compute the subkeys */ 444 | LED64table_key_schedule((const u8*)keys_in, subkeys); 445 | 446 | #ifdef MEASURE_PERF 447 | key_schedule_end = 0; 448 | #endif 449 | 450 | #ifdef MEASURE_PERF 451 | encrypt_start = rdtsc(); 452 | #endif 453 | 454 | /* Call the core encryption */ 455 | LED64table_core((const u8*)plaintext_in, subkeys, (u8*)ciphertext_out); 456 | 457 | #ifdef MEASURE_PERF 458 | encrypt_end = rdtsc(); 459 | #endif 460 | 461 | return; 462 | } 463 | #endif 464 | 465 | 466 | 467 | /****************************************************************************************************/ 468 | /* LED128 key schedule + encryption */ 469 | #ifdef LED128 470 | void LED128table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY128], u64 ciphertext_out[TABLE_P]) 471 | { 472 | /* Key schedule: subkeys are of size 1*16 bytes */ 473 | u8 subkeys[TABLE_P * LED128_SUBKEYS_SIZE]; 474 | 475 | /* The key schedule does merely nothing */ 476 | #ifdef MEASURE_PERF 477 | key_schedule_start = 0; 478 | #endif 479 | 480 | /* Compute the subkeys */ 481 | LED128table_key_schedule((const u8*)keys_in, subkeys); 482 | 483 | #ifdef MEASURE_PERF 484 | key_schedule_end = 0; 485 | #endif 486 | 487 | #ifdef MEASURE_PERF 488 | encrypt_start = rdtsc(); 489 | #endif 490 | 491 | /* Call the core encryption */ 492 | LED128table_core((const u8*)plaintext_in, subkeys, (u8*)ciphertext_out); 493 | 494 | #ifdef MEASURE_PERF 495 | encrypt_end = rdtsc(); 496 | #endif 497 | 498 | return; 499 | } 500 | #endif 501 | 502 | #endif 503 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT(myconfig, version-0.1) 2 | 3 | main_dir="./" 4 | table_dir="./src/table" 5 | vperm_dir="./src/vperm" 6 | bitslice_dir="./src/bitslice" 7 | 8 | AC_CONFIG_FILES([$main_dir/Makefile]) 9 | 10 | AC_DEFUN(CHECK_OPTION,[ 11 | if test "$1" != "yes"; then 12 | if test "$1" != "no"; then 13 | if test "$1" != "default"; then 14 | AC_MSG_ERROR([value $1 is not a proper value for option $2 (use "yes", "no" or "default")]) 15 | fi 16 | fi 17 | fi 18 | ]) 19 | 20 | ## Default values 21 | w_led="default" 22 | w_present="default" 23 | w_piccolo="default" 24 | w_led64="default" 25 | w_present80="default" 26 | w_piccolo80="default" 27 | w_led128="default" 28 | w_present128="default" 29 | w_piccolo128="default" 30 | 31 | w_table="default" 32 | w_vperm="default" 33 | w_bitslice="default" 34 | 35 | w_ssse="default" 36 | w_avx="default" 37 | 38 | w_threadsafe="default" 39 | 40 | w_arch32="default" 41 | w_arch64="default" 42 | 43 | curr_arch="" 44 | 45 | AC_ARG_WITH(led, [ --with-led LED cipher], w_led="$withval") 46 | AC_ARG_WITH(led64, [ --with-led64 LED cipher/64-bit key variant], w_led64="$withval") 47 | AC_ARG_WITH(led128, [ --with-led128 LED cipher/128-bit key variant], w_led128="$withval") 48 | AC_ARG_WITH(present, [ --with-present PRESENT cipher], w_present="$withval") 49 | AC_ARG_WITH(present80, [ --with-present80 PRESENT cipher/80-bit key variant], w_present80="$withval") 50 | AC_ARG_WITH(present128, [ --with-present128 PRESENT cipher/128-bit key variant], w_present128="$withval") 51 | AC_ARG_WITH(piccolo, [ --with-piccolo Piccolo cipher], w_piccolo="$withval") 52 | AC_ARG_WITH(piccolo80, [ --with-piccolo80 Piccolo cipher/80-bit key variant], w_piccolo80="$withval") 53 | AC_ARG_WITH(piccolo128, [ --with-piccolo128 Piccolo cipher/128-bit key variant], w_piccolo128="$withval") 54 | AC_ARG_WITH(table, [ --with-table Table implementations], w_table="$withval") 55 | AC_ARG_WITH(vperm, [ --with-vperm Vperm implementations], w_vperm="$withval") 56 | AC_ARG_WITH(bitslice, [ --with-bitslice Bitslice implementations], w_bitslice="$withval") 57 | AC_ARG_WITH(ssse, [ --with-ssse SSSE implementations], w_ssse="$withval") 58 | AC_ARG_WITH(avx, [ --with-avx AVX implementations], w_avx="$withval") 59 | AC_ARG_WITH(threadsafe, [ --with-threadsafe Thread safe library (link with pthread)], w_threadsafe="$withval") 60 | AC_ARG_WITH(arch32, [ --with-arch32 Force 32-bit compilation], w_arch32="$withval") 61 | AC_ARG_WITH(arch64, [ --with-arch64 Force 64-bit compilation], w_arch64="$withval") 62 | 63 | CHECK_OPTION($w_led, "--with-led") 64 | CHECK_OPTION($w_led64, "--with-led64") 65 | CHECK_OPTION($w_led128, "--with-led128") 66 | CHECK_OPTION($w_present, "--with-present") 67 | CHECK_OPTION($w_present80, "--with-present80") 68 | CHECK_OPTION($w_present128, "--with-present128") 69 | CHECK_OPTION($w_piccolo, "--with-piccolo") 70 | CHECK_OPTION($w_piccolo80, "--with-piccolo80") 71 | CHECK_OPTION($w_piccolo128, "--with-piccolo128") 72 | CHECK_OPTION($w_table, "--with-table") 73 | CHECK_OPTION($w_vperm, "--with-vperm") 74 | CHECK_OPTION($w_bitslice, "--with-bitslice") 75 | CHECK_OPTION($w_ssse, "--with-ssse") 76 | CHECK_OPTION($w_avx, "--with-avx") 77 | CHECK_OPTION($w_threadsafe, "--with-threadsafe") 78 | CHECK_OPTION($w_arch32, "--with-arch32") 79 | CHECK_OPTION($w_arch64, "--with-arch64") 80 | if test "$w_arch32" == "yes"; then 81 | if test "$w_arch64" == "yes"; then 82 | AC_MSG_ERROR([--with-arch32 and --with-arch64 cannot be set to "yes" together!]) 83 | fi 84 | if test "$w_vperm" == "yes"; then 85 | AC_MSG_ERROR([--with-arch32 and --with-vperm cannot be set to "yes" together! (vperm code is in x86_64 assembly)]) 86 | fi 87 | fi 88 | if test "$w_arch32" == "no"; then 89 | if test "$w_arch64" == "no"; then 90 | AC_MSG_ERROR([--with-arch32 and --with-arch64 cannot be set to "no" together!]) 91 | fi 92 | fi 93 | 94 | 95 | # Summary helpers 96 | AC_DEFUN(WRITE_TO_FILE,[ 97 | printf "%-40s: %s\n" $2 $3 >> $1 98 | ]) 99 | AC_DEFUN(SHOW_SUMMARY,[ 100 | cat $1 && rm -f $1 101 | ]) 102 | 103 | # Remove any existing summary file 104 | rm -f ./summary 105 | echo "###########################################" >> ./summary 106 | echo "### SUMMARY ###" >> ./summary 107 | echo "###########################################" >> ./summary 108 | 109 | ########## 110 | # CHECK Make 111 | case $host_type in 112 | *freebsd*) 113 | AC_CHECK_PROG(MAKEPROG,gmake,gmake,no) 114 | if test "$MAKEPROG" == "no" 115 | then 116 | AC_MSG_ERROR(Cannot find GNU gmake.) 117 | fi 118 | AC_SUBST(MAKEPROG,$MAKEPROG) 119 | ;; 120 | *) 121 | AC_CHECK_PROG(MAKEPROG,make,make,no) 122 | if test "$MAKEPROG" == "no" 123 | then 124 | AC_MSG_ERROR(Cannot find GNU make.) 125 | fi 126 | AC_SUBST(MAKEPROG,$MAKEPROG) 127 | ;; 128 | esac 129 | 130 | AC_PROG_CC(gcc cc) 131 | 132 | ######### INTERNAL OPTIONS ########### 133 | ######### 134 | summary="" 135 | 136 | ####### CHECKING CPU CAPABILITIES 137 | ################################################################## 138 | # Check for CPU arch 139 | AC_DEFUN(CHECK_CPU_ARCH,[ 140 | C_FILE="./check_cpu_arch.c" 141 | FILE="./check_cpu_arch" 142 | rm -f $C_FILE $FILE 143 | cat <$C_FILE 144 | #include 145 | #include 146 | int main(int argc, char* argv[]){ 147 | #ifdef __i386__ 148 | return 1; 149 | #endif 150 | #ifdef __x86_64__ 151 | return 2; 152 | #endif 153 | return (sizeof(void*)); 154 | } 155 | EOM 156 | $CC $C_FILE -o $FILE &> /dev/null 157 | OUT=$? 158 | if [[ "$OUT" != "0" ]]; then 159 | AC_MSG_ERROR([problem when compiling $C_FILE]) 160 | fi 161 | # Check for output value 162 | $FILE 163 | OUT=$? 164 | if [[ "$OUT" == "1" ]]; then 165 | echo "### Detected CPU: x86 32-bit ###" >> ./summary 166 | AC_MSG_NOTICE([---> Your CPU is in 32-bit mode]) 167 | if test "$w_vperm" == "default"; then 168 | w_vperm="no" 169 | fi 170 | curr_arch="32-bit" 171 | fi 172 | if [[ "$OUT" == "2" ]]; then 173 | echo "### Detected CPU: x86 64-bit ###" >> ./summary 174 | AC_MSG_NOTICE([---> Your CPU is in 64-bit mode]) 175 | curr_arch="64-bit" 176 | fi 177 | if [[ "$OUT" == "4" ]]; then 178 | echo "### Detected CPU: no x86 (32-bit) ###" >> ./summary 179 | AC_MSG_NOTICE([---> Your CPU has a non x86 architecture]) 180 | if test "$w_vperm" == "default"; then 181 | w_vperm="no" 182 | fi 183 | if test "$w_bitslice" == "default"; then 184 | w_bitslice="no" 185 | fi 186 | curr_arch="32-bit" 187 | fi 188 | if [[ "$OUT" == "8" ]]; then 189 | echo "### Detected CPU: no x86 (64-bit) ###" >> ./summary 190 | AC_MSG_NOTICE([---> Your CPU has a non x86 architecture]) 191 | if test "$w_vperm" == "default"; then 192 | w_vperm="no" 193 | fi 194 | if test "$w_bitslice" == "default"; then 195 | w_bitslice="no" 196 | fi 197 | curr_arch="64-bit" 198 | fi 199 | if [[ "$curr_arch" == "" ]]; then 200 | AC_MSG_ERROR([problem when detecting CPU architecture: unknown architecture!]) 201 | fi 202 | 203 | # Job done: remove unnecessary files 204 | rm -f $C_FILE $FILE 205 | ]) 206 | 207 | ################################################################## 208 | # Check for CPU features 209 | AC_DEFUN(CHECK_CPU_FEATURES,[ 210 | C_FILE="./check_cpu_features.c" 211 | FILE="./check_cpu_features" 212 | rm -f $C_FILE $FILE 213 | cat <$C_FILE 214 | #include 215 | #include 216 | #include "common/helpers.h" 217 | int main(int argc, char* argv[]){ 218 | return 0; 219 | } 220 | EOM 221 | $CC -DBITSLICE -DVPERM -DAVX $C_FILE -I $main_dir/src -o $FILE &> /dev/null 222 | OUT=$? 223 | if [[ "$OUT" != "0" ]]; then 224 | AC_MSG_ERROR([problem when compiling $C_FILE]) 225 | fi 226 | # Check for output value 227 | $FILE 228 | OUT=$? 229 | if [[ "$OUT" == "2" ]]; then 230 | echo "### Detected CPU: no SSSE3 ###" >> ./summary 231 | echo "###########################################" >> ./summary 232 | AC_MSG_NOTICE([---> Your CPU does not support SSSE3]) 233 | w_avx="no" 234 | w_ssse="no" 235 | if test "$w_vperm" == "default"; then 236 | w_vperm="no" 237 | fi 238 | if test "$w_bitslice" == "default"; then 239 | w_bitslice="no" 240 | fi 241 | fi 242 | if [[ "$OUT" == "3" ]]; then 243 | echo "### Detected CPU: SSSE3, no AVX ###" >> ./summary 244 | echo "###########################################" >> ./summary 245 | AC_MSG_NOTICE([---> Your CPU supports SSSE3 but not AVX]) 246 | w_avx="no" 247 | fi 248 | if [[ "$OUT" == "0" ]]; then 249 | echo "### Detected CPU: SSSE3 and AVX ###" >> ./summary 250 | echo "###########################################" >> ./summary 251 | AC_MSG_NOTICE([---> Your CPU supports both SSSE3 and AVX]) 252 | fi 253 | # Job done: remove unnecessary files 254 | rm -f $C_FILE $FILE 255 | ]) 256 | 257 | #### Check for CPU architecture and features (instruction set) 258 | CHECK_CPU_ARCH() 259 | CHECK_CPU_FEATURES() 260 | 261 | 262 | # If we force arch32, we force vperm to be desactivated 263 | if test "$w_arch32" == "yes"; then 264 | w_vperm="no" 265 | w_avx="no" 266 | fi 267 | 268 | # table handling 269 | if test "$w_table" != "no"; then 270 | AC_MSG_NOTICE([Using table based functions]) 271 | WRITE_TO_FILE(summary, "table", "YES") 272 | AC_SUBST(table_define, "-DTABLE") 273 | else 274 | WRITE_TO_FILE(summary, "table", "NO") 275 | AC_SUBST(table_define, "") 276 | fi 277 | 278 | # vperm handling 279 | if test "$w_vperm" != "no"; then 280 | AC_MSG_NOTICE([Using vperm functions]) 281 | WRITE_TO_FILE(summary, "vperm", "YES") 282 | AC_SUBST(vperm_define, "-DVPERM") 283 | else 284 | WRITE_TO_FILE(summary, "vperm", "NO") 285 | AC_SUBST(vperm_define, "") 286 | fi 287 | 288 | # bitslice handling 289 | if test "$w_bitslice" != "no"; then 290 | AC_MSG_NOTICE([Using bitslice functions]) 291 | WRITE_TO_FILE(summary, "bitslice", "YES") 292 | AC_SUBST(bitslice_define, "-DBITSLICE") 293 | else 294 | WRITE_TO_FILE(summary, "bitslice", "NO") 295 | fi 296 | 297 | # led handling 298 | led64_log="" 299 | led128_log="" 300 | if test "$w_led64" != "no"; then 301 | if test "$w_led" != "no"; then 302 | led64_log="1" 303 | WRITE_TO_FILE(summary, "LED64", "YES") 304 | AC_SUBST(led64_define, "-DLED64") 305 | else 306 | led64_log="1" 307 | WRITE_TO_FILE(summary, "LED64", "NO") 308 | AC_SUBST(led64_define, "") 309 | fi 310 | else 311 | led64_log="1" 312 | WRITE_TO_FILE(summary, "LED64", "NO") 313 | AC_SUBST(led64_define, "") 314 | fi 315 | if test "$w_led128" != "no"; then 316 | if test "$w_led" != "no"; then 317 | led128_log="1" 318 | WRITE_TO_FILE(summary, "LED128", "YES") 319 | AC_SUBST(led128_define, "-DLED128") 320 | else 321 | led128_log="1" 322 | WRITE_TO_FILE(summary, "LED128", "NO") 323 | AC_SUBST(led128_define, "") 324 | fi 325 | else 326 | led128_log="1" 327 | WRITE_TO_FILE(summary, "LED128", "NO") 328 | AC_SUBST(led128_define, "") 329 | fi 330 | if test "$w_led" != "no"; then 331 | if test "$led64_log" == ""; then 332 | WRITE_TO_FILE(summary, "LED64", "YES") 333 | AC_SUBST(led64_define, "-DLED64") 334 | fi 335 | if test "$led128_log" == ""; then 336 | WRITE_TO_FILE(summary, "LED128", "YES") 337 | AC_SUBST(led128_define, "-DLED128") 338 | fi 339 | else 340 | if test "$led64_log" == ""; then 341 | WRITE_TO_FILE(summary, "LED64", "NO") 342 | AC_SUBST(led64_define, "") 343 | fi 344 | if test "$led128_log" == ""; then 345 | WRITE_TO_FILE(summary, "LED128", "NO") 346 | AC_SUBST(led128_define, "") 347 | fi 348 | fi 349 | # present handling 350 | present80_log="" 351 | present128_log="" 352 | if test "$w_present80" != "no"; then 353 | if test "$w_present" != "no"; then 354 | present80_log="1" 355 | WRITE_TO_FILE(summary, "PRESENT80", "YES") 356 | AC_SUBST(present80_define, "-DPRESENT80") 357 | else 358 | present80_log="1" 359 | WRITE_TO_FILE(summary, "PRESENT80", "NO") 360 | AC_SUBST(present80_define, "") 361 | fi 362 | else 363 | present80_log="1" 364 | WRITE_TO_FILE(summary, "PRESENT80", "NO") 365 | AC_SUBST(present80_define, "") 366 | fi 367 | if test "$w_present128" != "no"; then 368 | if test "$w_present" != "no"; then 369 | present128_log="1" 370 | WRITE_TO_FILE(summary, "PRESENT128", "YES") 371 | AC_SUBST(present128_define, "-DPRESENT128") 372 | else 373 | present128_log="1" 374 | WRITE_TO_FILE(summary, "PRESENT128", "NO") 375 | AC_SUBST(present128_define, "") 376 | fi 377 | else 378 | present128_log="1" 379 | WRITE_TO_FILE(summary, "PRESENT128", "NO") 380 | AC_SUBST(present128_define, "") 381 | fi 382 | if test "$w_present" != "no"; then 383 | if test "$present80_log" == ""; then 384 | WRITE_TO_FILE(summary, "PRESENT80", "YES") 385 | AC_SUBST(present80_define, "-DPRESENT80") 386 | fi 387 | if test "$present128_log" == ""; then 388 | WRITE_TO_FILE(summary, "PRESENT128", "YES") 389 | AC_SUBST(present128_define, "-DPRESENT128") 390 | fi 391 | else 392 | if test "$present80_log" == ""; then 393 | WRITE_TO_FILE(summary, "PRESENT80", "NO") 394 | AC_SUBST(present80_define, "") 395 | fi 396 | if test "$present128_log" == ""; then 397 | WRITE_TO_FILE(summary, "PRESENT128", "NO") 398 | AC_SUBST(present128_define, "") 399 | fi 400 | fi 401 | 402 | # piccolo handling 403 | piccolo80_log="" 404 | piccolo128_log="" 405 | if test "$w_piccolo80" != "no"; then 406 | if test "$w_piccolo" != "no"; then 407 | piccolo80_log="1" 408 | WRITE_TO_FILE(summary, "Piccolo80", "YES") 409 | AC_SUBST(piccolo80_define, "-DPiccolo80") 410 | else 411 | piccolo80_log="1" 412 | WRITE_TO_FILE(summary, "Piccolo80", "NO") 413 | AC_SUBST(piccolo80_define, "") 414 | fi 415 | else 416 | piccolo80_log="1" 417 | WRITE_TO_FILE(summary, "Piccolo80", "NO") 418 | AC_SUBST(piccolo80_define, "") 419 | fi 420 | if test "$w_piccolo128" != "no"; then 421 | if test "$w_piccolo" != "no"; then 422 | piccolo128_log="1" 423 | WRITE_TO_FILE(summary, "Piccolo128", "YES") 424 | AC_SUBST(piccolo128_define, "-DPiccolo128") 425 | else 426 | piccolo128_log="1" 427 | WRITE_TO_FILE(summary, "Piccolo128", "NO") 428 | AC_SUBST(piccolo128_define, "") 429 | fi 430 | else 431 | piccolo128_log="1" 432 | WRITE_TO_FILE(summary, "Piccolo128", "NO") 433 | AC_SUBST(piccolo128_define, "") 434 | fi 435 | if test "$w_piccolo" != "no"; then 436 | if test "$piccolo80_log" == ""; then 437 | WRITE_TO_FILE(summary, "Piccolo80", "YES") 438 | AC_SUBST(piccolo80_define, "-DPiccolo80") 439 | fi 440 | if test "$piccolo128_log" == ""; then 441 | WRITE_TO_FILE(summary, "Piccolo128", "YES") 442 | AC_SUBST(piccolo128_define, "-DPiccolo128") 443 | fi 444 | else 445 | if test "$piccolo80_log" == ""; then 446 | WRITE_TO_FILE(summary, "Piccolo80", "NO") 447 | AC_SUBST(piccolo80_define, "") 448 | fi 449 | if test "$piccolo128_log" == ""; then 450 | WRITE_TO_FILE(summary, "Piccolo128", "NO") 451 | AC_SUBST(piccolo128_define, "") 452 | fi 453 | fi 454 | 455 | # ssse handling 456 | if test "$w_ssse" != "no"; then 457 | WRITE_TO_FILE(summary, "Using SSSE", "YES") 458 | AC_SUBST(ssse_link, "-mssse3") 459 | else 460 | WRITE_TO_FILE(summary, "Using SSSE", "NO") 461 | AC_SUBST(ssse_link, "") 462 | fi 463 | 464 | # avx handling 465 | if test "$w_avx" != "no"; then 466 | WRITE_TO_FILE(summary, "Using AVX", "YES") 467 | AC_SUBST(avx_define, "-DAVX") 468 | else 469 | WRITE_TO_FILE(summary, "Using AVX", "NO") 470 | AC_SUBST(avx_define, "") 471 | fi 472 | 473 | # threadsafe handling 474 | if test "$w_threadsafe" != "no"; then 475 | WRITE_TO_FILE(summary, "Using thread-safe functions", "YES") 476 | AC_SUBST(threadsafe_define, "-DTHREAD_SAFE") 477 | AC_SUBST(threadsafe_link, "-lpthread") 478 | else 479 | WRITE_TO_FILE(summary, "Using thread-safe functions", "NO") 480 | AC_SUBST(threadsafe_define, "") 481 | AC_SUBST(threadsafe_link, "") 482 | fi 483 | 484 | # arch32 handling 485 | if test "$w_arch32" == "yes"; then 486 | WRITE_TO_FILE(summary, "Compiling for architecture (forced)", "32-bit") 487 | AC_SUBST(arch32_opt, "-m32") 488 | else 489 | AC_SUBST(arch32_opt, "") 490 | fi 491 | 492 | # arch64 handling 493 | if test "$w_arch64" == "yes"; then 494 | WRITE_TO_FILE(summary, "Compiling for architecture (forced)", "64-bit") 495 | AC_SUBST(arch64_opt, "-m64") 496 | else 497 | AC_SUBST(arch64_opt, "") 498 | fi 499 | 500 | # Default arch 501 | if test "$w_arch32" == "default"; then 502 | if test "$w_arch64" == "default"; then 503 | WRITE_TO_FILE(summary, "Compiling for architecture (detected)", "$curr_arch") 504 | fi 505 | fi 506 | AC_OUTPUT(Makefile) 507 | 508 | SHOW_SUMMARY([summary]) 509 | -------------------------------------------------------------------------------- /src/bitslice/Piccolo/Piccolo_util.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #include "../../common/basic_helpers.h" 61 | #include "../../common/bitslice_common.h" 62 | 63 | /* FIXME: this should not be used ... */ 64 | /* Rearranging the input key for bitslice convenience */ 65 | #define Piccolo_rearrange_keys(a, b, parallelism, key_type) do {\ 66 | int i, j;\ 67 | for(i=0; i < parallelism; i++){\ 68 | for(j=0; j < key_type; j++){\ 69 | ((u16 (*)[parallelism])b)[j][i] = (((u16 (*)[key_type])a)[i][j] << 8) ^ (((u16 (*)[key_type])a)[i][j] >> 8);\ 70 | }\ 71 | }\ 72 | } while(0); 73 | 74 | word mask_rp0, mask_rp1; 75 | word mask_wk0, mask_wk1; 76 | word mask_rotl16, mask_rotl32, mask_rotl48; 77 | word mask_shf0, mask_shf16, mask_shf32; 78 | 79 | #undef uSWAP8 80 | #define uSWAP8(x, y, t0); do {\ 81 | t0 = x; x = y; y = t0;\ 82 | t0 = x;\ 83 | x = XOR(PSHUFB(x , mask_unpack8_l0), PSHUFB(y, mask_unpack8_h0));\ 84 | y = XOR(PSHUFB(t0, mask_unpack8_l1), PSHUFB(y, mask_unpack8_h1));\ 85 | } while(0); 86 | 87 | #define Piccolo_packing16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2) do {\ 88 | packing16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2);\ 89 | SWAP64(a0, a4, t0);\ 90 | SWAP64(a1, a5, t0);\ 91 | SWAP64(a2, a6, t0);\ 92 | SWAP64(a3, a7, t0);\ 93 | } while(0); 94 | 95 | #define Piccolo_unpacking16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2) do {\ 96 | uSWAP64(a0, a4, t0); \ 97 | uSWAP64(a1, a5, t0); \ 98 | uSWAP64(a2, a6, t0); \ 99 | uSWAP64(a3, a7, t0); \ 100 | unpacking16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2);\ 101 | } while(0); 102 | 103 | #define ROTL16(x) PSHUFB((x), mask_rotl16) 104 | #define ROTL32(x) PSHUFB((x), mask_rotl32) 105 | #define ROTL48(x) PSHUFB((x), mask_rotl48) 106 | #define TIMES3(x) XOR((x), ROTL16(x)) 107 | #define TIMES33(x) XOR(ROTL16(x), ROTL32(TIMES3(x))) 108 | 109 | #define SHF0(x) PSHUFB(x, mask_shf0) 110 | #define SHF16(x) PSHUFB(x, mask_shf16) 111 | #define SHF32(x) PSHUFB(x, mask_shf32) 112 | 113 | #define SHF3(x) XOR(SHF0(x), SHF16(x)) 114 | #define SHF33(x) XOR(SHF16(x), SHF32(SHF3(x))) 115 | 116 | word Piccolo80_constants[25][(BITSLICE16_P/2)/2]; 117 | word Piccolo128_constants[31][(BITSLICE16_P/2)/2]; 118 | u32 Piccolo80_RC[25]; 119 | u32 Piccolo128_RC[31]; 120 | #define Piccolo_generate_constants(P, TYPE) do {\ 121 | int i, j;\ 122 | int R;\ 123 | u16 t[2][P];\ 124 | if(TYPE == 80){\ 125 | R = 25;\ 126 | }\ 127 | else{\ 128 | R = 31;\ 129 | }\ 130 | for(i = 0; i < R; i++){\ 131 | Piccolo ## TYPE ## _RC[i] = (i+1) & 0x1fU;\ 132 | Piccolo ## TYPE ## _RC[i] <<= 10;\ 133 | Piccolo ## TYPE ## _RC[i] ^= (i+1) & 0x1fU;\ 134 | Piccolo ## TYPE ## _RC[i] ^= Piccolo ## TYPE ## _RC[i] << 17;\ 135 | if(TYPE == 128){\ 136 | Piccolo ## TYPE ## _RC[i] ^= 0x6547a98bU;\ 137 | }\ 138 | else{\ 139 | Piccolo ## TYPE ## _RC[i] ^= 0x0f1e2d3cU;\ 140 | }\ 141 | for(j = 0; j < P; j++){\ 142 | t[0][j] = (u16)(Piccolo ## TYPE ## _RC[i] >> 16);\ 143 | t[1][j] = (u16)(Piccolo ## TYPE ## _RC[i] & 0xffff);\ 144 | }\ 145 | pack_one(Piccolo ## TYPE ## _ ## constants[i], t[0], t[1]);\ 146 | }\ 147 | } while(0); 148 | 149 | 150 | /* 151 | #define pack_one(w, a, b) do {\ 152 | word t0, t1, t2, t3, t4, t5, t6;\ 153 | u64 x[BITSLICE16_P];\ 154 | int i;\ 155 | for(i = 0; i < BITSLICE16_P; i++){\ 156 | x[i] = (((u64)a[i]) << 48) ^ (((u64)b[i]) << 16);\ 157 | }\ 158 | t0 = LOAD(x+0);\ 159 | t1 = LOAD(x+2);\ 160 | t2 = LOAD(x+4);\ 161 | t3 = LOAD(x+6);\ 162 | \ 163 | w[0] = LOAD(x+8);\ 164 | w[1] = LOAD(x+10);\ 165 | w[2] = LOAD(x+12);\ 166 | w[3] = LOAD(x+14);\ 167 | Piccolo_packing16(t0, t1, t2, t3, w[0], w[1], w[2], w[3], t4, t5, t6);\ 168 | } while(0); 169 | */ 170 | void pack_one(word* w, const u16 a[BITSLICE16_P], const u16 b[BITSLICE16_P]) 171 | { 172 | word t0, t1, t2, t3, t4, t5, t6; 173 | u64 x[BITSLICE16_P]; 174 | int i; 175 | for(i = 0; i < BITSLICE16_P; i++){ 176 | x[i] = (((u64)a[i]) << 48) ^ (((u64)b[i]) << 16); 177 | } 178 | t0 = LOAD(x+0); 179 | t1 = LOAD(x+2); 180 | t2 = LOAD(x+4); 181 | t3 = LOAD(x+6); 182 | 183 | w[0] = LOAD(x+8); 184 | w[1] = LOAD(x+10); 185 | w[2] = LOAD(x+12); 186 | w[3] = LOAD(x+14); 187 | Piccolo_packing16(t0, t1, t2, t3, w[0], w[1], w[2], w[3], t4, t5, t6); 188 | } 189 | 190 | /* 191 | CHES 12 paper: 192 | Input: r3 (MSB), r2, r1, r0, tmp 193 | Output: r0 (MSB), r1, r2, r3 194 | 1. tmp = r1; r1 |= r2; r3 = ~r3; 195 | 2. r0 ^= r2; r1 ^= r3; r3 |= r2; 196 | 3. r0 ^= r3; r3 = r1; 197 | 4. r3 |= r0; 198 | 5. r3 ^= tmp; tmp |= r0; 199 | 6. r2 ^= tmp; r3 = ~r3; 200 | */ 201 | #define piccoloSbox(r3, r2, r1, r0, t) do {\ 202 | t = r1; r1 = OR(r1, r2); r3 = XOR(r3, mask_one);\ 203 | r0 = XOR(r0, r2); r1 = XOR(r1, r3); r3 = OR(r3, r2);\ 204 | r0 = XOR(r0, r3);\ 205 | r3 = OR(r1, r0);\ 206 | r3 = XOR(r3, t); t = OR(t, r0);\ 207 | r2 = XOR(r2, t); r3 = XOR(r3, mask_one);\ 208 | } while(0); 209 | 210 | #define SboxLayer16(a0, a1, a2, a3, a4, a5, a6, a7) do {\ 211 | piccoloSbox(a0, a1, a2, a3);\ 212 | piccoloSbox(a4, a5, a6, a7);\ 213 | } while(0); 214 | 215 | #define MIXCOL16(r0, r1, r2, r3, t) do {\ 216 | t = r0;\ 217 | r0 = XOR(TIMES3(r1), TIMES33(r0));\ 218 | r1 = XOR(TIMES3(r2), TIMES33(r1));\ 219 | r2 = XOR(XOR(TIMES3(t), TIMES3(r3)), TIMES33(r2));\ 220 | r3 = XOR(TIMES3(t), TIMES33(r3));\ 221 | } while(0); 222 | 223 | #define F16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, t3, t4) do {\ 224 | t0 = a0; t1 = a1; t2 = a2;t3 = a3;\ 225 | piccoloSbox(t0, t1, t2, t3, t4);\ 226 | MIXCOL16(t3, t2, t1, t0, t4);\ 227 | piccoloSbox(t3, t2, t1, t0, t4);\ 228 | AddKey(a4, a5, a6, a7, t0, t1, t2, t3);\ 229 | } while(0); 230 | 231 | #define PERMUTE16(r0, r1, r2, r3) do {\ 232 | r0 = XOR(SHF3(r1), SHF33(r0));\ 233 | r1 = XOR(SHF3(r2), SHF33(r1));\ 234 | r2 = XOR(XOR(SHF3(r0), SHF3(r3)), SHF33(r2));\ 235 | r3 = XOR(SHF3(r0), SHF33(r3));\ 236 | } while(0); 237 | 238 | /* Piccolo round permutation */ 239 | #define Piccolo_RoundPermutation(a0, a1, a2, a3, a4, a5, a6, a7) do {\ 240 | a0 = PSHUFB(a0, mask_rp0);\ 241 | a1 = PSHUFB(a1, mask_rp0);\ 242 | a2 = PSHUFB(a2, mask_rp0);\ 243 | a3 = PSHUFB(a3, mask_rp0);\ 244 | \ 245 | a4 = PSHUFB(a4, mask_rp1);\ 246 | a5 = PSHUFB(a5, mask_rp1);\ 247 | a6 = PSHUFB(a6, mask_rp1);\ 248 | a7 = PSHUFB(a7, mask_rp1);\ 249 | word t;\ 250 | t = a0; a0 = a4; a4 = t;\ 251 | t = a1; a1 = a5; a5 = t;\ 252 | t = a2; a2 = a6; a6 = t;\ 253 | t = a3; a3 = a7; a7 = t;\ 254 | } while(0); 255 | 256 | void pack_two(word* w0, word* w1, const u16 a[BITSLICE16_P], const u16 b[BITSLICE16_P], const u16 c[BITSLICE16_P], const u16 d[BITSLICE16_P]) 257 | { 258 | u64 x[BITSLICE16_P]; 259 | word t4, t5, t6; 260 | int i; 261 | for(i = 0; i < BITSLICE16_P; i++){ 262 | x[i] = (((u64)a[i]) << 48) ^ (((u64)b[i]) << 16) ^ (((u64)c[i]) << 32) ^ ((u64)d[i]); 263 | } 264 | w0[0] = LOAD(x+0); 265 | w0[1] = LOAD(x+2); 266 | w0[2] = LOAD(x+4); 267 | w0[3] = LOAD(x+6); 268 | 269 | w1[0] = LOAD(x+8); 270 | w1[1] = LOAD(x+10); 271 | w1[2] = LOAD(x+12); 272 | w1[3] = LOAD(x+14); 273 | 274 | Piccolo_packing16(w0[0], w0[1], w0[2], w0[3], w1[0], w1[1], w1[2], w1[3], t4, t5, t6); 275 | } 276 | 277 | void pack_four(word* w0, word* w1, word* w2, word* w3, const u16 a[BITSLICE16_P], const u16 b[BITSLICE16_P], const u16 c[BITSLICE16_P], const u16 d[BITSLICE16_P], int h) 278 | { 279 | u64 x[BITSLICE16_P]; 280 | word t4, t5, t6; 281 | int i; 282 | for(i = 0; i < BITSLICE16_P; i++) 283 | { 284 | x[i] = (((u64)a[i]) << 48) ^ (((u64)b[i]) << 16) ^ (((u64)c[i]) << 32) ^ ((u64)d[i]); 285 | } 286 | w0[0] = LOAD(x+0); 287 | w0[1] = LOAD(x+2); 288 | w0[2] = LOAD(x+4); 289 | w0[3] = LOAD(x+6); 290 | 291 | w1[0] = LOAD(x+8); 292 | w1[1] = LOAD(x+10); 293 | w1[2] = LOAD(x+12); 294 | w1[3] = LOAD(x+14); 295 | 296 | Piccolo_packing16(w0[0], w0[1], w0[2], w0[3], w1[0], w1[1], w1[2], w1[3], t4, t5, t6); 297 | 298 | if(h){ 299 | w2[0] = PSHUFB(w0[0], mask_unpack64_l0); 300 | w2[1] = PSHUFB(w0[1], mask_unpack64_l0); 301 | w2[2] = PSHUFB(w0[2], mask_unpack64_l0); 302 | w2[3] = PSHUFB(w0[3], mask_unpack64_l0); 303 | 304 | w3[0] = PSHUFB(w0[0], mask_unpack64_l1); 305 | w3[1] = PSHUFB(w0[1], mask_unpack64_l1); 306 | w3[2] = PSHUFB(w0[2], mask_unpack64_l1); 307 | w3[3] = PSHUFB(w0[3], mask_unpack64_l1); 308 | 309 | w0[0] = PSHUFB(w1[0], mask_unpack64_l0); 310 | w0[1] = PSHUFB(w1[1], mask_unpack64_l0); 311 | w0[2] = PSHUFB(w1[2], mask_unpack64_l0); 312 | w0[3] = PSHUFB(w1[3], mask_unpack64_l0); 313 | 314 | w1[0] = PSHUFB(w1[0], mask_unpack64_l1); 315 | w1[1] = PSHUFB(w1[1], mask_unpack64_l1); 316 | w1[2] = PSHUFB(w1[2], mask_unpack64_l1); 317 | w1[3] = PSHUFB(w1[3], mask_unpack64_l1); 318 | } 319 | else{ 320 | w2[0] = PSHUFB(w0[0], mask_unpack64_h0); 321 | w2[1] = PSHUFB(w0[1], mask_unpack64_h0); 322 | w2[2] = PSHUFB(w0[2], mask_unpack64_h0); 323 | w2[3] = PSHUFB(w0[3], mask_unpack64_h0); 324 | 325 | w3[0] = PSHUFB(w0[0], mask_unpack64_h1); 326 | w3[1] = PSHUFB(w0[1], mask_unpack64_h1); 327 | w3[2] = PSHUFB(w0[2], mask_unpack64_h1); 328 | w3[3] = PSHUFB(w0[3], mask_unpack64_h1); 329 | 330 | w0[0] = PSHUFB(w1[0], mask_unpack64_h0); 331 | w0[1] = PSHUFB(w1[1], mask_unpack64_h0); 332 | w0[2] = PSHUFB(w1[2], mask_unpack64_h0); 333 | w0[3] = PSHUFB(w1[3], mask_unpack64_h0); 334 | 335 | w1[0] = PSHUFB(w1[0], mask_unpack64_h1); 336 | w1[1] = PSHUFB(w1[1], mask_unpack64_h1); 337 | w1[2] = PSHUFB(w1[2], mask_unpack64_h1); 338 | w1[3] = PSHUFB(w1[3], mask_unpack64_h1); 339 | } 340 | } 341 | 342 | 343 | /* 344 | rk23: 0,2 mod 5, k[2], k[3] 345 | rk01: round 1,4 mod 5, k[0], [1] 346 | rk44: 3 mod 5, k[4], k[4] 347 | wk0, k0L | k1R 348 | wk1, K1L | K0R 349 | 350 | wk2, k4L | k3R 351 | wk3, K3L | k4R */ 352 | 353 | #define Piccolo80_rk23(k) (((word*)k) + ((BITSLICE16_P/2)/2)*0) 354 | #define Piccolo80_rk01(k) (((word*)k) + ((BITSLICE16_P/2)/2)*1) 355 | #define Piccolo80_rk44(k) (((word*)k) + ((BITSLICE16_P/2)/2)*2) 356 | #define Piccolo80_wk01(k) (((word*)k) + ((BITSLICE16_P/2)/2)*3) 357 | #define Piccolo80_wk23(k) (((word*)k) + ((BITSLICE16_P/2)/2)*4) 358 | 359 | void Piccolo_pack_keys80(const u16 keys[KEY80][BITSLICE16_P], word* rk23, word* rk01, word* rk44, word* wk01, word* wk23) 360 | { 361 | int i; 362 | pack_two(rk23, rk01, keys[0], keys[1], keys[2], keys[3]); 363 | pack_one(rk44, keys[4], keys[4]); 364 | for(i = 0; i < ((BITSLICE16_P/2)/2); i++){ 365 | wk01[i] = PSHUFB(rk01[i], mask_wk0); 366 | wk01[i] = XOR(wk01[i], PSHUFB(rk01[i], mask_wk1)); 367 | wk23[i] = PSHUFB(rk44[i], mask_wk0); 368 | wk23[i] = XOR(wk23[i], PSHUFB(rk23[i], mask_wk1)); 369 | } 370 | } 371 | 372 | /* word rk0[(BITSLICE16_P/2)/2], rk1[(BITSLICE16_P/2)/2], rk2[(BITSLICE16_P/2)/2], rk3[(BITSLICE16_P/2)/2], rk4[(BITSLICE16_P/2)/2], rk5[(BITSLICE16_P/2)/2], rk6[(BITSLICE16_P/2)/2], rk7[(BITSLICE16_P/2)/2]; */ 373 | #define Piccolo128_rk(k,i) (((word*)k) + ((i*BITSLICE16_P/2)/2)) 374 | #define Piccolo128_wk01(k) (((word*)k) + (KEY128) *((BITSLICE16_P/2)/2)) 375 | #define Piccolo128_wk23(k) (((word*)k) + (KEY128+1)*((BITSLICE16_P/2)/2)) 376 | 377 | void Piccolo_pack_keys128(const u16 keys[KEY128][BITSLICE16_P], word* rk, word* wk01, word* wk23) 378 | { 379 | int i; 380 | 381 | pack_four(Piccolo128_rk(rk,0), Piccolo128_rk(rk,2), Piccolo128_rk(rk,4), Piccolo128_rk(rk,6), keys[0], keys[2], keys[4], keys[6], 1); 382 | pack_four(Piccolo128_rk(rk,1), Piccolo128_rk(rk,3), Piccolo128_rk(rk,5), Piccolo128_rk(rk,7), keys[1], keys[3], keys[5], keys[7], 0); 383 | 384 | for(i = 0; i < ((BITSLICE16_P/2)/2); i++){ 385 | wk01[i] = PSHUFB(Piccolo128_rk(rk,0)[i], mask_wk0); 386 | wk01[i] = XOR(wk01[i], PSHUFB(Piccolo128_rk(rk,1)[i], mask_wk1)); 387 | wk23[i] = PSHUFB(Piccolo128_rk(rk, 4)[i], mask_wk0); 388 | wk23[i] = XOR(wk23[i], PSHUFB(Piccolo128_rk(rk,7)[i], mask_wk1)); 389 | } 390 | } 391 | 392 | /* Initialize masks and constants */ 393 | unsigned char Piccolo_init_check = 0; 394 | void Piccolo_init() 395 | { 396 | #ifdef THREAD_SAFE 397 | pthread_mutex_lock(&bitslice_init_mutex); 398 | #endif 399 | if(Piccolo_init_check == 0){ 400 | init(); 401 | mask_one = CONSTANT(0xff); 402 | mask_rotl16 = SET(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2); 403 | mask_rotl32 = SET(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4); 404 | mask_rotl48 = SET(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6); 405 | mask_shf0 = SET(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); 406 | mask_shf16 = SET(9, 8, 7, 6, 13, 12, 3, 2, 1, 0, 15, 14, 5, 4, 11, 10); 407 | mask_shf32 = SET(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); 408 | mask_rp0 = SET(15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8); 409 | mask_rp1 = SET(7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0); 410 | mask_wk0 = SET(0x80, 0x80, 0x80, 0x80, 3, 2, 1, 0, 7, 6, 5, 4, 0x80, 0x80, 0x80, 0x80); 411 | mask_wk1 = SET(15, 14, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 11, 10, 9, 8); 412 | mask_l16 = SET(0x80, 0x80, 13, 12, 0x80, 0x80, 9, 8, 0x80, 0x80, 5, 4, 0x80, 0x80, 1, 0); 413 | mask_r16 = SET(15, 14, 0x80, 0x80, 11, 10, 0x80, 0x80, 7, 6, 0x80, 0x80, 3, 2, 0x80, 0x80); 414 | Piccolo_generate_constants(BITSLICE16_P, 80); 415 | Piccolo_generate_constants(BITSLICE16_P, 128); 416 | } 417 | Piccolo_init_check = 1; 418 | #ifdef THREAD_SAFE 419 | pthread_mutex_unlock(&bitslice_init_mutex); 420 | #endif 421 | return; 422 | } 423 | -------------------------------------------------------------------------------- /src/bitslice/LED/LED.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #ifdef BITSLICE 61 | #include "LED_utils.h" 62 | 63 | #ifdef BITSLICE 64 | #ifdef LED64 65 | void LED64bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 66 | void LED64bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 67 | void LED64bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY64], u64 ciphertext[BITSLICE16_P]); 68 | void LED64bitslice32_key_schedule(const u8* masterKey, u8* roundKeys); 69 | void LED64bitslice32_core(const u8* message, const u8* subkeys, u8* ciphertext); 70 | void LED64bitslice32_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY64], u64 ciphertext[BITSLICE16_P]); 71 | #endif 72 | #ifdef LED128 73 | void LED128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys); 74 | void LED128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext); 75 | void LED128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]); 76 | void LED128bitslice32_key_schedule(const u8* masterKey, u8* roundKeys); 77 | void LED128bitslice32_core(const u8* message, const u8* subkeys, u8* ciphertext); 78 | void LED128bitslice32_cipher(const u64 plaintext[BITSLICE32_P], const u16 key[BITSLICE32_P][KEY128], u64 ciphertext[BITSLICE32_P]); 79 | #endif 80 | #endif 81 | 82 | /*************************************************************************************/ 83 | /*************************************************************************************/ 84 | #ifdef LED64 85 | void LED64bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 86 | word t0, t1, t2; 87 | word* k = (word*)roundKeys; 88 | int i; 89 | /* loading key */ 90 | for(i = 0; i < (BITSLICE16_P/2); i++){ 91 | k[i] = LOAD(((u16(*)[KEY64])masterKey)[2*i]); 92 | inverse_nibble_endian(k[i], t0, t1); 93 | } 94 | /* packing key */ 95 | packing16(k[0], k[1], k[2], k[3],k[4], k[5], k[6], k[7], t0, t1, t2); 96 | 97 | return; 98 | } 99 | 100 | void LED64bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 101 | word s[BITSLICE16_P/2], t0, t1, t2 ,t3, t4, t5, t6, t7, t8, t9, t10, t11; 102 | word* k = (word*)subkeys; 103 | int i, r; 104 | 105 | for(i = 0; i < (BITSLICE16_P/2); i++){ 106 | s[i] = LOAD(((u64*)message)+2*i); 107 | inverse_nibble_endian(s[i], t0, t1); 108 | } 109 | /* packing plaintext */ 110 | packing16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2); 111 | 112 | /* key addition */ 113 | for(i = 0; i < (BITSLICE16_P/2); i++) s[i] = XOR(s[i], k[i]); 114 | for(r = 0; r < 32; r++){ 115 | for(i = 0; i < (BITSLICE16_P/2); i++) s[i] = XOR(s[i], constants_LED64_BITSLICE16_P[r][i]); 116 | SboxLayer16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0); 117 | SR16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7]); 118 | MIXCOL16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11); 119 | if((r&3) == 3){ 120 | for(i = 0; i < (BITSLICE16_P/2); i++) s[i] = XOR(s[i], k[i]); 121 | } 122 | } 123 | 124 | /* STORE the results */ 125 | unpacking16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2); 126 | for(i = 0; i < (BITSLICE16_P/2); i++){ 127 | inverse_nibble_endian(s[i], t0, t1); 128 | STORE(s[i], ((u64*)ciphertext) + 2*i); 129 | } 130 | 131 | return; 132 | } 133 | 134 | void LED64bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY64], u64 ciphertext[BITSLICE16_P]) 135 | { 136 | word subkeys[BITSLICE16_P/2]; 137 | 138 | /* Initialize the constants: done once and for all */ 139 | LED_init(); 140 | 141 | #ifdef MEASURE_PERF 142 | key_schedule_start = rdtsc(); 143 | #endif 144 | LED64bitslice16_key_schedule((u8*)key, (u8*)subkeys); 145 | #ifdef MEASURE_PERF 146 | key_schedule_end = rdtsc(); 147 | #endif 148 | 149 | #ifdef MEASURE_PERF 150 | encrypt_start = rdtsc(); 151 | #endif 152 | LED64bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 153 | #ifdef MEASURE_PERF 154 | encrypt_end = rdtsc(); 155 | #endif 156 | 157 | return; 158 | } 159 | #endif 160 | 161 | /*************************************************************************************/ 162 | /*************************************************************************************/ 163 | #ifdef LED128 164 | void LED128bitslice16_key_schedule(const u8* masterKey, u8* roundKeys){ 165 | word t0, t1, t2; 166 | word* k0 = (word*)roundKeys; 167 | word* k1 = k0 + (BITSLICE16_P/2); 168 | 169 | int i; 170 | /* loading key */ 171 | for(i = 0; i < (BITSLICE16_P/2); i++){ 172 | t0 = LOAD(((u16(*)[KEY128])masterKey)[2*i]); 173 | t1 = LOAD((((u16(*)[KEY128])masterKey)[2*i])+KEY128); 174 | k0[i] = PUNPCKLQDQ(t0, t1); 175 | k1[i] = PUNPCKHQDQ(t0, t1); 176 | inverse_nibble_endian(k0[i], t0, t1); 177 | inverse_nibble_endian(k1[i], t0, t1); 178 | } 179 | /* packing key */ 180 | packing16(k0[0], k0[1], k0[2], k0[3],k0[4], k0[5], k0[6], k0[7], t0, t1, t2); 181 | packing16(k1[0], k1[1], k1[2], k1[3],k1[4], k1[5], k1[6], k1[7], t0, t1, t2); 182 | 183 | return; 184 | } 185 | 186 | 187 | void LED128bitslice16_core(const u8* message, const u8* subkeys, u8* ciphertext){ 188 | word s[(BITSLICE16_P/2)], t0, t1, t2 ,t3, t4, t5, t6, t7, t8, t9, t10, t11; 189 | word* k0 = (word*)subkeys; 190 | word* k1 = k0 + (BITSLICE16_P/2); 191 | 192 | /* loading plaintext and key */ 193 | int i, r; 194 | 195 | for(i = 0; i < (BITSLICE16_P/2); i++){ 196 | s[i] = LOAD(((u64*)message)+2*i); 197 | inverse_nibble_endian(s[i], t0, t1); 198 | } 199 | /* packing the plaintext and key */ 200 | packing16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2); 201 | for(i = 0; i < (BITSLICE16_P/2); i++) s[i] = XOR(s[i], k0[i]); 202 | for(r = 0; r < 48; r++){ 203 | for(i = 0; i < 8; i++) s[i] = XOR(s[i], constants_LED128_BITSLICE16_P[r][i]); 204 | SboxLayer16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0); 205 | SR16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7]); 206 | MIXCOL16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2 ,t3, t4, t5, t6, t7, t8, t9, t10, t11); 207 | if((r&3) == 3){ 208 | if((r&4) == 0) 209 | for(i = 0; i < 8; i++) s[i] = XOR(s[i], k0[i]); 210 | else 211 | for(i = 0; i < 8; i++) s[i] = XOR(s[i], k1[i]); 212 | } 213 | } 214 | 215 | /* STORE the results */ 216 | unpacking16(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7], t0, t1, t2); 217 | for(i = 0; i < (BITSLICE16_P/2); i++){ 218 | inverse_nibble_endian(s[i], t0, t1); 219 | STORE(s[i], ((u64*)ciphertext) + 2*i); 220 | } 221 | 222 | return; 223 | } 224 | 225 | void LED128bitslice16_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY128], u64 ciphertext[BITSLICE16_P]) 226 | { 227 | word subkeys[BITSLICE16_P]; 228 | 229 | LED_init(); 230 | 231 | #ifdef MEASURE_PERF 232 | key_schedule_start = rdtsc(); 233 | #endif 234 | LED128bitslice16_key_schedule((u8*)key, (u8*)subkeys); 235 | #ifdef MEASURE_PERF 236 | key_schedule_end = rdtsc(); 237 | #endif 238 | 239 | #ifdef MEASURE_PERF 240 | encrypt_start = rdtsc(); 241 | #endif 242 | LED128bitslice16_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 243 | #ifdef MEASURE_PERF 244 | encrypt_end = rdtsc(); 245 | #endif 246 | 247 | return; 248 | } 249 | #endif 250 | 251 | /*************************************************************************************/ 252 | /*************************************************************************************/ 253 | #ifdef LED64 254 | void LED64bitslice32_key_schedule(const u8* masterKey, u8* roundKeys){ 255 | word t0, t1, t2; 256 | word* k = (word*)roundKeys; 257 | int i; 258 | /* loading key */ 259 | for(i = 0; i < (BITSLICE32_P/2); i++){ 260 | k[i] = LOAD(((u16(*)[KEY64])masterKey)[2*i]); 261 | inverse_nibble_endian(k[i], t0, t1); 262 | } 263 | /* packing key */ 264 | packing32(k[0], k[1], k[2], k[3],k[4], k[5], k[6], k[7],k[8], k[9], k[10], k[11],k[12], k[13], k[14], k[15], t0, t1, t2); 265 | 266 | return; 267 | } 268 | 269 | void LED64bitslice32_core(const u8* message, const u8* subkeys, u8* ciphertext){ 270 | word s[BITSLICE32_P/2], t0, t1, t2 ,t3; 271 | word* k = (word*)subkeys; 272 | int i, r; 273 | 274 | for(i = 0; i < (BITSLICE32_P/2); i++){ 275 | s[i] = LOAD(((u64*)message)+2*i); 276 | inverse_nibble_endian(s[i], t0, t1); 277 | } 278 | 279 | /* packing the plaintext and key */ 280 | packing32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2); 281 | 282 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], k[i]); 283 | for(r = 0; r < 32; r++){ 284 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], constants_LED64_BITSLICE32_P[r][i]); 285 | SboxLayer32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0); 286 | SR32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15]); 287 | MIXCOL32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2, t3); 288 | if((r&3) == 3){ 289 | for(i = 0; i < 16; i++) s[i] = XOR(s[i], k[i]); 290 | } 291 | } 292 | /* STORE the results */ 293 | unpacking32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2); 294 | for(i = 0; i < (BITSLICE32_P/2); i++){ 295 | inverse_nibble_endian(s[i], t0, t1); 296 | STORE(s[i], ((u64*)ciphertext) + 2*i); 297 | } 298 | 299 | return; 300 | } 301 | 302 | void LED64bitslice32_cipher(const u64 plaintext[BITSLICE16_P], const u16 key[BITSLICE16_P][KEY64], u64 ciphertext[BITSLICE16_P]) 303 | { 304 | word subkeys[BITSLICE32_P/2]; 305 | 306 | /* Initialize the constants: done once and for all */ 307 | LED_init(); 308 | 309 | #ifdef MEASURE_PERF 310 | key_schedule_start = rdtsc(); 311 | #endif 312 | LED64bitslice32_key_schedule((u8*)key, (u8*)subkeys); 313 | #ifdef MEASURE_PERF 314 | key_schedule_end = rdtsc(); 315 | #endif 316 | 317 | #ifdef MEASURE_PERF 318 | encrypt_start = rdtsc(); 319 | #endif 320 | LED64bitslice32_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 321 | #ifdef MEASURE_PERF 322 | encrypt_end = rdtsc(); 323 | #endif 324 | 325 | return; 326 | } 327 | #endif 328 | 329 | /*************************************************************************************/ 330 | /*************************************************************************************/ 331 | #ifdef LED128 332 | void LED128bitslice32_key_schedule(const u8* masterKey, u8* roundKeys){ 333 | word t0, t1, t2; 334 | word* k0 = (word*)roundKeys; 335 | word* k1 = k0 + (BITSLICE32_P/2); 336 | 337 | int i; 338 | /* loading key */ 339 | for(i = 0; i < (BITSLICE32_P/2); i++){ 340 | t0 = LOAD(((u16(*)[KEY128])masterKey)[2*i]); 341 | t1 = LOAD((((u16(*)[KEY128])masterKey)[2*i])+KEY128); 342 | k0[i] = PUNPCKLQDQ(t0, t1); 343 | k1[i] = PUNPCKHQDQ(t0, t1); 344 | inverse_nibble_endian(k0[i], t0, t1); 345 | inverse_nibble_endian(k1[i], t0, t1); 346 | } 347 | /* packing key */ 348 | packing32(k0[0], k0[1], k0[2], k0[3],k0[4], k0[5], k0[6], k0[7],k0[8], k0[9], k0[10], k0[11],k0[12], k0[13], k0[14], k0[15], t0, t1, t2); 349 | packing32(k1[0], k1[1], k1[2], k1[3],k1[4], k1[5], k1[6], k1[7],k1[8], k1[9], k1[10], k1[11],k1[12], k1[13], k1[14], k1[15], t0, t1, t2); 350 | 351 | return; 352 | } 353 | 354 | 355 | void LED128bitslice32_core(const u8* message, const u8* subkeys, u8* ciphertext){ 356 | word s[(BITSLICE32_P/2)], t0, t1, t2 ,t3; 357 | word* k0 = (word*)subkeys; 358 | word* k1 = k0 + (BITSLICE32_P/2); 359 | 360 | /* loading plaintext and key */ 361 | int i, r; 362 | 363 | for(i = 0; i < (BITSLICE32_P/2); i++){ 364 | s[i] = LOAD(((u64*)message)+2*i); 365 | inverse_nibble_endian(s[i], t0, t1); 366 | } 367 | 368 | /* packing the plaintext and key */ 369 | packing32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2); 370 | 371 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], k0[i]); 372 | for(r = 0; r < 48; r++){ 373 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], constants_LED128_BITSLICE32_P[r][i]); 374 | SboxLayer32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0); 375 | SR32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15]); 376 | MIXCOL32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2, t3); 377 | if((r&3) == 3){ 378 | if((r&4) == 0) 379 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], k0[i]); 380 | else 381 | for(i = 0; i < (BITSLICE32_P/2); i++) s[i] = XOR(s[i], k1[i]); 382 | } 383 | } 384 | 385 | /* STORE the results */ 386 | unpacking32(s[0], s[1], s[2], s[3],s[4], s[5], s[6], s[7],s[8], s[9], s[10], s[11],s[12], s[13], s[14], s[15], t0, t1, t2); 387 | for(i = 0; i < (BITSLICE32_P/2); i++){ 388 | inverse_nibble_endian(s[i], t0, t1); 389 | STORE(s[i], ((u64*)ciphertext) + 2*i); 390 | } 391 | 392 | return; 393 | } 394 | 395 | void LED128bitslice32_cipher(const u64 plaintext[BITSLICE32_P], const u16 key[BITSLICE32_P][KEY128], u64 ciphertext[BITSLICE32_P]) 396 | { 397 | word subkeys[BITSLICE32_P]; 398 | 399 | LED_init(); 400 | 401 | #ifdef MEASURE_PERF 402 | key_schedule_start = rdtsc(); 403 | #endif 404 | LED128bitslice32_key_schedule((u8*)key, (u8*)subkeys); 405 | #ifdef MEASURE_PERF 406 | key_schedule_end = rdtsc(); 407 | #endif 408 | 409 | #ifdef MEASURE_PERF 410 | encrypt_start = rdtsc(); 411 | #endif 412 | LED128bitslice32_core((u8*)plaintext, (u8*)subkeys, (u8*)ciphertext); 413 | #ifdef MEASURE_PERF 414 | encrypt_end = rdtsc(); 415 | #endif 416 | 417 | return; 418 | } 419 | #endif 420 | 421 | #endif 422 | -------------------------------------------------------------------------------- /src/table/gen_tables/LED_generateTables.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | /* LEDgenerateTables.c */ 61 | /* author: Victor LOMNE - victor.lomne@gmail.com */ 62 | 63 | 64 | 65 | #include 66 | 67 | /* LED Sbox */ 68 | unsigned char sbox[16] = {0x0C, 0x05, 0x06, 0x0B, 0x09, 0x00, 0x0A, 0X0D, 0x03, 0x0E, 0x0F, 0x08, 0x04, 0x07, 0x01, 0x02}; 69 | 70 | /* LED xtime (tabulated multiplication by x in the finite field x^4 + x + 1) */ 71 | unsigned char xtime[16] = {0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x03, 0x01, 0x07, 0x05, 0x0B, 0x09, 0x0F, 0x0D}; 72 | 73 | /* Round constants */ 74 | unsigned char RC[48] = {0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3E, 0x3D, 0x3B, 0x37, 0x2F, 0x1E, 0x3C, 0x39, 0x33, 0x27, 0x0E, 0x1D, 0x3A, 0x35, 0x2B, 0x16, 0x2C, 0x18, 0x30, 0x21, 0x02, 0x05, 0x0B, 0x17, 0x2E, 0x1C, 0x38, 0x31, 0x23, 0x06, 0x0D, 0x1B, 0x36, 0x2D, 0x1A, 0x34, 0x29, 0x12, 0x24, 0x08, 0x11, 0x22, 0x04}; 75 | 76 | 77 | 78 | /****************************************************************************************************/ 79 | void main(void) 80 | { 81 | unsigned int i; 82 | unsigned char il, ih; 83 | unsigned long long T0_LED[256], T1_LED[256], T2_LED[256], T3_LED[256], T4_LED[256], T5_LED[256], T6_LED[256], T7_LED[256], Tcon64_LED[32], Tcon128_LED[48]; 84 | FILE * ptr; 85 | 86 | /* loop over the possible input values to compute for T0, T1, T2, T3, T4, T5, T6 and T7 */ 87 | /* T0, T1, T2, T3, T4, T5, T6 and T7 allow to compute MixColumnsSerial(ShiftRows(SubCells(i))) */ 88 | for(i = 0; i < 256; i++) 89 | { 90 | /* compute low and high parts of i */ 91 | ih = i & 0x0f; 92 | il = (i & 0xf0) >> 4; 93 | 94 | /* compute T0 */ 95 | T0_LED[i] = ((unsigned long long)( xtime[xtime[sbox[il]]] )) << 4; 96 | T0_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] )) << 0; 97 | T0_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] )) << 20; 98 | T0_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] )) << 16; 99 | T0_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 36; 100 | T0_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 32; 101 | T0_LED[i] |= ((unsigned long long)( xtime[sbox[il]] )) << 52; 102 | T0_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 48; 103 | 104 | /* compute T1 */ 105 | T1_LED[i] = ((unsigned long long)( xtime[xtime[sbox[il]]] )) << 12; 106 | T1_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] )) << 8; 107 | T1_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] )) << 28; 108 | T1_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] )) << 24; 109 | T1_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 44; 110 | T1_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 40; 111 | T1_LED[i] |= ((unsigned long long)( xtime[sbox[il]] )) << 60; 112 | T1_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 56; 113 | 114 | /* compute T2 */ 115 | T2_LED[i] = ((unsigned long long)( sbox[ih] )) << 4; 116 | T2_LED[i] |= ((unsigned long long)( sbox[il] )) << 8; 117 | T2_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 20; 118 | T2_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 24; 119 | T2_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 36; 120 | T2_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 40; 121 | T2_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 52; 122 | T2_LED[i] |= ((unsigned long long)( xtime[sbox[il]] )) << 56; 123 | 124 | /* compute T3 */ 125 | T3_LED[i] = ((unsigned long long)( sbox[il] )) << 0; 126 | T3_LED[i] |= ((unsigned long long)( sbox[ih] )) << 12; 127 | T3_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 16; 128 | T3_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 28; 129 | T3_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 32; 130 | T3_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 44; 131 | T3_LED[i] |= ((unsigned long long)( xtime[sbox[il]] )) << 48; 132 | T3_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 60; 133 | 134 | /* compute T4 */ 135 | T4_LED[i] = ((unsigned long long)( xtime[sbox[il]] )) << 12; 136 | T4_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 8; 137 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ sbox[il] )) << 28; 138 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ sbox[ih] )) << 24; 139 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] )) << 44; 140 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] )) << 40; 141 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 60; 142 | T4_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 56; 143 | 144 | /* compute T5 */ 145 | T5_LED[i] = ((unsigned long long)( xtime[sbox[il]] )) << 4; 146 | T5_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 0; 147 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ sbox[il] )) << 20; 148 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ sbox[ih] )) << 16; 149 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] )) << 36; 150 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] )) << 32; 151 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 52; 152 | T5_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 48; 153 | 154 | /* compute T6 */ 155 | T6_LED[i] = ((unsigned long long)( xtime[sbox[il]] )) << 0; 156 | T6_LED[i] |= ((unsigned long long)( xtime[sbox[ih]] )) << 12; 157 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 16; 158 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 28; 159 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ sbox[il] )) << 32; 160 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ sbox[ih] )) << 44; 161 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 48; 162 | T6_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 60; 163 | 164 | /* compute T7 */ 165 | T7_LED[i] = ((unsigned long long)( xtime[sbox[ih]] )) << 4; 166 | T7_LED[i] |= ((unsigned long long)( xtime[sbox[il]] )) << 8; 167 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[ih]]] ^ xtime[sbox[ih]] )) << 20; 168 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[sbox[il]]] ^ xtime[sbox[il]] )) << 24; 169 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ sbox[ih] )) << 36; 170 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ sbox[il] )) << 40; 171 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[ih]]]] ^ xtime[sbox[ih]] ^ sbox[ih] )) << 52; 172 | T7_LED[i] |= ((unsigned long long)( xtime[xtime[xtime[sbox[il]]]] ^ xtime[sbox[il]] ^ sbox[il] )) << 56; 173 | } 174 | 175 | /* compute Tcon64_LED */ 176 | for(i = 0; i < 32; i++) 177 | { 178 | /* nibble 0 of row 0 */ 179 | Tcon64_LED[i] = ((unsigned long long)( 0 ^ ((64 & 0xF0) >> 4) )) << 4; 180 | 181 | /* nibble 0 of row 1 */ 182 | Tcon64_LED[i] |= ((unsigned long long)( 1 ^ ((64 & 0xF0) >> 4) )) << 20; 183 | 184 | /* nibble 0 of row 2 */ 185 | Tcon64_LED[i] |= ((unsigned long long)( 2 ^ (64 & 0x0F) )) << 36; 186 | 187 | /* nibble 0 of row 3 */ 188 | Tcon64_LED[i] |= ((unsigned long long)( 3 ^ (64 & 0x0F) )) << 52; 189 | 190 | /* nibble 1 of row 0 */ 191 | Tcon64_LED[i] |= ((unsigned long long)( (RC[i] & 0x38) >> 3)) << 0; 192 | 193 | /* nibble 1 of row 1 */ 194 | Tcon64_LED[i] |= ((unsigned long long)( (RC[i] & 0x07) )) << 16; 195 | 196 | /* nibble 1 of row 2 */ 197 | Tcon64_LED[i] |= ((unsigned long long)( (RC[i] & 0x38) >> 3)) << 32; 198 | 199 | /* nibble 1 of row 3 */ 200 | Tcon64_LED[i] |= ((unsigned long long)( (RC[i] & 0x07) )) << 48; 201 | } 202 | 203 | /* compute Tcon128_LED */ 204 | for(i = 0; i < 48; i++) 205 | { 206 | /* nibble 0 of row 0 */ 207 | Tcon128_LED[i] = ((unsigned long long)( 0 ^ ((128 & 0xF0) >> 4) )) << 4; 208 | 209 | /* nibble 0 of row 1 */ 210 | Tcon128_LED[i] |= ((unsigned long long)( 1 ^ ((128 & 0xF0) >> 4) )) << 20; 211 | 212 | /* nibble 0 of row 2 */ 213 | Tcon128_LED[i] |= ((unsigned long long)( 2 ^ (128 & 0x0F) )) << 36; 214 | 215 | /* nibble 0 of row 3 */ 216 | Tcon128_LED[i] |= ((unsigned long long)( 3 ^ (128 & 0x0F) )) << 52; 217 | 218 | /* nibble 1 of row 0 */ 219 | Tcon128_LED[i] |= ((unsigned long long)( (RC[i] & 0x38) >> 3)) << 0; 220 | 221 | /* nibble 1 of row 1 */ 222 | Tcon128_LED[i] |= ((unsigned long long)( (RC[i] & 0x07) )) << 16; 223 | 224 | /* nibble 1 of row 2 */ 225 | Tcon128_LED[i] |= ((unsigned long long)( (RC[i] & 0x38) >> 3)) << 32; 226 | 227 | /* nibble 1 of row 3 */ 228 | Tcon128_LED[i] |= ((unsigned long long)( (RC[i] & 0x07) )) << 48; 229 | } 230 | 231 | /* open a pointer and create the file LEDtables.h */ 232 | ptr = fopen("../LED/LED_tables.h", "w"); 233 | if(ptr == NULL) 234 | { 235 | printf("Unable to create file LED_tables.h\n"); 236 | return; 237 | } 238 | 239 | /* write T0 */ 240 | fprintf(ptr, "unsigned long long T0_LED[256] = {"); 241 | for(i = 0; i < 256; i++) 242 | { 243 | fprintf(ptr, "0x%016llx", T0_LED[i]); 244 | if(i == 255) 245 | fprintf(ptr, "};\n\n"); 246 | else 247 | fprintf(ptr, ", "); 248 | } 249 | 250 | /* write T1 */ 251 | fprintf(ptr, "unsigned long long T1_LED[256] = {"); 252 | for(i = 0; i < 256; i++) 253 | { 254 | fprintf(ptr, "0x%016llx", T1_LED[i]); 255 | if(i == 255) 256 | fprintf(ptr, "};\n\n"); 257 | else 258 | fprintf(ptr, ", "); 259 | } 260 | 261 | /* write T2 */ 262 | fprintf(ptr, "unsigned long long T2_LED[256] = {"); 263 | for(i = 0; i < 256; i++) 264 | { 265 | fprintf(ptr, "0x%016llx", T2_LED[i]); 266 | if(i == 255) 267 | fprintf(ptr, "};\n\n"); 268 | else 269 | fprintf(ptr, ", "); 270 | } 271 | 272 | /* write T3 */ 273 | fprintf(ptr, "unsigned long long T3_LED[256] = {"); 274 | for(i = 0; i < 256; i++) 275 | { 276 | fprintf(ptr, "0x%016llx", T3_LED[i]); 277 | if(i == 255) 278 | fprintf(ptr, "};\n\n"); 279 | else 280 | fprintf(ptr, ", "); 281 | } 282 | 283 | /* write T4 */ 284 | fprintf(ptr, "unsigned long long T4_LED[256] = {"); 285 | for(i = 0; i < 256; i++) 286 | { 287 | fprintf(ptr, "0x%016llx", T4_LED[i]); 288 | if(i == 255) 289 | fprintf(ptr, "};\n\n"); 290 | else 291 | fprintf(ptr, ", "); 292 | } 293 | 294 | /* write T5 */ 295 | fprintf(ptr, "unsigned long long T5_LED[256] = {"); 296 | for(i = 0; i < 256; i++) 297 | { 298 | fprintf(ptr, "0x%016llx", T5_LED[i]); 299 | if(i == 255) 300 | fprintf(ptr, "};\n\n"); 301 | else 302 | fprintf(ptr, ", "); 303 | } 304 | 305 | /* write T6 */ 306 | fprintf(ptr, "unsigned long long T6_LED[256] = {"); 307 | for(i = 0; i < 256; i++) 308 | { 309 | fprintf(ptr, "0x%016llx", T6_LED[i]); 310 | if(i == 255) 311 | fprintf(ptr, "};\n\n"); 312 | else 313 | fprintf(ptr, ", "); 314 | } 315 | 316 | /* write T7 */ 317 | fprintf(ptr, "unsigned long long T7_LED[256] = {"); 318 | for(i = 0; i < 256; i++) 319 | { 320 | fprintf(ptr, "0x%016llx", T7_LED[i]); 321 | if(i == 255) 322 | fprintf(ptr, "};\n\n"); 323 | else 324 | fprintf(ptr, ", "); 325 | } 326 | 327 | /* write Tcon64_LED */ 328 | fprintf(ptr, "unsigned long long Tcon64_LED[32] = {"); 329 | for(i = 0; i < 32; i++) 330 | { 331 | fprintf(ptr, "0x%016llx", Tcon64_LED[i]); 332 | if(i == 31) 333 | fprintf(ptr, "};\n\n"); 334 | else 335 | fprintf(ptr, ", "); 336 | } 337 | 338 | /* write Tcon128_LED */ 339 | fprintf(ptr, "unsigned long long Tcon128_LED[48] = {"); 340 | for(i = 0; i < 48; i++) 341 | { 342 | fprintf(ptr, "0x%016llx", Tcon128_LED[i]); 343 | if(i == 47) 344 | fprintf(ptr, "};\n\n"); 345 | else 346 | fprintf(ptr, ", "); 347 | } 348 | 349 | /* close the pointer */ 350 | fclose(ptr); 351 | 352 | return; 353 | } 354 | -------------------------------------------------------------------------------- /src/common/bitslice_common.h: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | static word mask0, mask1, mask2, mask3, mask4, mask5, mask6, mask7, mask_odd, mask_oddh, mask_key1, mask_key2, mask128_key1, mask128_key2, mask128_and_b21, mask128_and_b22, mask512_key1, mask512_key2; 61 | static word mask_unpack8_l0, mask_unpack8_l1, mask_unpack8_h0, mask_unpack8_h1; 62 | static word mask_unpack16_l0,mask_unpack16_l1, mask_unpack16_h0, mask_unpack16_h1; 63 | static word mask_unpack64_l0,mask_unpack64_l1, mask_unpack64_h0, mask_unpack64_h1; 64 | static word mask_u, mask_l, mask88, mask44, mask22, mask11, mask_one; /* masks for unpacking 4-bit nibbles */ 65 | static word mask16_sr01, mask16_sr23; /* masks for shift rows */ 66 | static word mask_byte_endian; 67 | static word mask_byte_endian64; 68 | static word mask_byte_endian80; 69 | 70 | static word mask_u64, mask_l64; 71 | 72 | #define CONSTANT(b) _mm_set1_epi8((b)) /* set each byte in a 128-bit register to be "b" */ 73 | #define SET _mm_set_epi8 /* set bytes of a 128-bit vector */ 74 | #define SET32 _mm_set_epi32 /* set 32-bit words of a 128-bit vector */ 75 | #define XOR(x,y) _mm_xor_si128((x),(y)) /* XOR(x,y) = x ^ y, where x and y are two 128-bit word */ 76 | #define AND(x,y) _mm_and_si128((x),(y)) /* AND(x,y) = x & y, where x and y are two 128-bit word */ 77 | #define ANDNOT(x,y) _mm_andnot_si128((x),(y)) /* ANDNOT(x,y) = (!x) & y, where x and y are two 128-bit word */ 78 | #define OR(x,y) _mm_or_si128((x),(y)) /* OR(x,y) = x | y, where x and y are two 128-bit word */ 79 | #define LOAD(p) _mm_loadu_si128((word *)(p)) /* load 16 bytes from the memory adress p, return a 128-bit word, where p is the multiple of 16 */ 80 | #define PSHUFB(x,y) _mm_shuffle_epi8((x),(y)) /* x byte ordering is reordered according to value of b */ 81 | #define PUNPCKHBW(x,y) _mm_unpackhi_epi8((x),(y)) /* interleaves the higher 8 bytes of x and y */ 82 | #define PUNPCKLBW(x,y) _mm_unpacklo_epi8((x),(y)) /* interleaves the lower 8 bytes of x and y */ 83 | #define PUNPCKHDQ(x,y) _mm_unpackhi_epi32((x),(y)) /* */ 84 | #define PUNPCKHQDQ(x,y) _mm_unpackhi_epi64((x),(y)) /* */ 85 | #define PUNPCKLDQ(x,y) _mm_unpacklo_epi32((x),(y)) /* */ 86 | #define PUNPCKLQDQ(x,y) _mm_unpacklo_epi64((x),(y)) /* */ 87 | #define PMOVMSKB(x) _mm_movemask_epi8 ((x)) /* Creates a 16-bit mask made up of the most significant bit of each byte of the source */ 88 | /* operand and returns the result in the low 16-bit of the output */ 89 | #define PSHIFTR(x,i) _mm_srli_epi16((x),(i)) /* Shift right each 16-bit word of x by i positions */ 90 | #define PSHIFTL(x,i) _mm_slli_epi16((x),(i)) /* Shift left each 16-bit word of x by i positions */ 91 | #define STORE(x,p) _mm_storeu_si128((word *)(p), (x)) /* store the 128-bit register x back to memory addressed by p */ 92 | #define INTERLEAVEL16(x,y) _mm_unpacklo_epi16((x), (y)) /* interleaves lower 16-bit nibbles */ 93 | #define INTERLEAVEH16(x,y) _mm_unpackhi_epi16((x), (y)) /* interleaves higher 16-bit nibbles */ 94 | #define INTERLEAVEL32(x,y) _mm_unpacklo_epi32((x), (y)) /* interleaves lower 32-bit nibbles */ 95 | #define INTERLEAVEH32(x,y) _mm_unpackhi_epi32((x), (y)) /* interleaves higher 32-bit nibbles */ 96 | #define INTERLEAVEL64(x,y) _mm_unpacklo_epi64((x), (y)) /* interleaves lower 64-bit nibbles */ 97 | #define INTERLEAVEH64(x,y) _mm_unpackhi_epi64((x), (y)) /* interleaves higher 64-bit nibbles */ 98 | 99 | #define SHLB128(x,i) _mm_slli_si128((x),(i)) 100 | #define SHRB128(x,i) _mm_srli_si128((x),(i)) 101 | 102 | #define SHLQ64(x, i) _mm_slli_epi64((x),(i)) 103 | #define SHRQ64(x, i) _mm_srli_epi64((x),(i)) 104 | 105 | #define PSHUF32(x,y) _mm_shuffle_epi32((x),(y)) 106 | 107 | #define XOR_Key(r0, r1, r2, r3, key0, key1, key2, key3) do {\ 108 | r0 = XOR(r0,key0);\ 109 | r1 = XOR(r1,key1);\ 110 | r2 = XOR(r2,key2);\ 111 | r3 = XOR(r3,key3);\ 112 | } while(0); 113 | 114 | #define Sbox(x3, x2, x1, x0, t) do {\ 115 | x2 = XOR(x2, x1); x3 = XOR(x3, x1);\ 116 | t = x2; x2 = AND(x2, x3);\ 117 | x1 = XOR(x1, x2); t = XOR(t , x0);\ 118 | x2 = x1; x1 = AND(x1, t);\ 119 | x1 = XOR(x1, x3); t = XOR(t , x0);\ 120 | t = OR(t , x2); x2 = XOR(x2, x0);\ 121 | x2 = XOR(x2, x1); t = XOR(t , x3);\ 122 | x2 = XOR(x2, mask_one); x0 = XOR(x0, t);\ 123 | x3 = x2; x2 = AND(x2, x1);\ 124 | x2 = XOR(x2, t); x2 = XOR(x2, mask_one);\ 125 | } while(0); 126 | 127 | #define packing8(r0, r1, r2, r3, t0, t1, w0, w1, w2, w3) do {\ 128 | t1 = PSHIFTL(r0, 4);\ 129 | t0 = PUNPCKLBW(t1, r0);\ 130 | t1 = PUNPCKHBW(t1, r0);\ 131 | \ 132 | w0 = AND( t0 , mask0);\ 133 | w1 = AND(PSHIFTL(t0,1), mask0);\ 134 | w2 = AND(PSHIFTL(t0,2), mask0);\ 135 | w3 = AND(PSHIFTL(t0,3), mask0);\ 136 | \ 137 | w0 = XOR(w0, AND(PSHIFTR(t1, 1), mask1));\ 138 | w1 = XOR(w1, AND( t1 , mask1));\ 139 | w2 = XOR(w2, AND(PSHIFTL(t1, 1), mask1));\ 140 | w3 = XOR(w3, AND(PSHIFTL(t1, 2), mask1));\ 141 | \ 142 | t1 = PSHIFTL(r1, 4);\ 143 | t0 = PUNPCKLBW(t1, r1);\ 144 | t1 = PUNPCKHBW(t1, r1);\ 145 | \ 146 | w0 = XOR(w0, AND(PSHIFTR(t0, 2), mask2));\ 147 | w1 = XOR(w1, AND(PSHIFTR(t0, 1), mask2));\ 148 | w2 = XOR(w2, AND( t0 , mask2));\ 149 | w3 = XOR(w3, AND(PSHIFTL(t0, 1), mask2));\ 150 | \ 151 | w0 = XOR(w0, AND(PSHIFTR(t1, 3), mask3));\ 152 | w1 = XOR(w1, AND(PSHIFTR(t1, 2), mask3));\ 153 | w2 = XOR(w2, AND(PSHIFTR(t1, 1), mask3));\ 154 | w3 = XOR(w3, AND( t1 , mask3));\ 155 | \ 156 | t1 = PSHIFTL(r2, 4);\ 157 | t0 = PUNPCKLBW(t1, r2);\ 158 | t1 = PUNPCKHBW(t1, r2);\ 159 | \ 160 | w0 = XOR(w0, AND(PSHIFTR(t0, 4), mask4));\ 161 | w1 = XOR(w1, AND(PSHIFTR(t0, 3), mask4));\ 162 | w2 = XOR(w2, AND(PSHIFTR(t0, 2), mask4));\ 163 | w3 = XOR(w3, AND(PSHIFTR(t0, 1), mask4));\ 164 | \ 165 | w0 = XOR(w0, AND(PSHIFTR(t1, 5), mask5));\ 166 | w1 = XOR(w1, AND(PSHIFTR(t1, 4), mask5));\ 167 | w2 = XOR(w2, AND(PSHIFTR(t1, 3), mask5));\ 168 | w3 = XOR(w3, AND(PSHIFTR(t1, 2), mask5));\ 169 | \ 170 | t1 = PSHIFTL(r3, 4);\ 171 | t0 = PUNPCKLBW(t1, r3);\ 172 | t1 = PUNPCKHBW(t1, r3);\ 173 | \ 174 | w0 = XOR(w0, AND(PSHIFTR(t0, 6), mask6));\ 175 | w1 = XOR(w1, AND(PSHIFTR(t0, 5), mask6));\ 176 | w2 = XOR(w2, AND(PSHIFTR(t0, 4), mask6));\ 177 | w3 = XOR(w3, AND(PSHIFTR(t0, 3), mask6));\ 178 | \ 179 | r0 = XOR(w0, AND(PSHIFTR(t1, 7), mask7));\ 180 | r1 = XOR(w1, AND(PSHIFTR(t1, 6), mask7));\ 181 | r2 = XOR(w2, AND(PSHIFTR(t1, 5), mask7));\ 182 | r3 = XOR(w3, AND(PSHIFTR(t1, 4), mask7));\ 183 | } while(0); 184 | 185 | #define unpacking8(r0,r1,r2,r3,r4,r5,r6,r7,t,in0,in1,in2,in3) do {\ 186 | r0 = AND(in0, mask0);\ 187 | t = AND(in1, mask0);\ 188 | t = PSHIFTR(t , 1);\ 189 | r0 = XOR(r0, t );\ 190 | t = AND(in2, mask0);\ 191 | t = PSHIFTR(t , 2);\ 192 | r0 = XOR(r0, t );\ 193 | t = AND(in3, mask0);\ 194 | t = PSHIFTR(t , 3);\ 195 | r0 = XOR(r0, t );\ 196 | t = PSHIFTL(r0, 4);\ 197 | r0 = XOR(r0, t );\ 198 | r0 = PSHUFB(r0, mask_odd);\ 199 | \ 200 | r1 = AND(in0, mask1);\ 201 | r1 = PSHIFTL(r1, 1);\ 202 | t = AND(in1, mask1);\ 203 | r1 = XOR(r1, t );\ 204 | t = AND(in2, mask1);\ 205 | t = PSHIFTR(t , 1);\ 206 | r1 = XOR(r1, t );\ 207 | t = AND(in3, mask1);\ 208 | t = PSHIFTR(t , 2);\ 209 | r1 = XOR(r1, t );\ 210 | t = PSHIFTL(r1, 4);\ 211 | r1 = XOR(r1, t );\ 212 | r1 = PSHUFB(r1, mask_oddh);\ 213 | \ 214 | r2 = AND(in0, mask2);\ 215 | r2 = PSHIFTL(r2, 2);\ 216 | t = AND(in1, mask2);\ 217 | t = PSHIFTL(t , 1);\ 218 | r2 = XOR(r2, t );\ 219 | t = AND(in2, mask2);\ 220 | r2 = XOR(r2, t );\ 221 | t = AND(in3, mask2);\ 222 | t = PSHIFTR(t , 1);\ 223 | r2 = XOR(r2, t );\ 224 | t = PSHIFTL(r2, 4);\ 225 | r2 = XOR(r2, t );\ 226 | r2 = PSHUFB(r2, mask_odd);\ 227 | \ 228 | r3 = AND(in0, mask3);\ 229 | r3 = PSHIFTL(r3, 3);\ 230 | t = AND(in1, mask3);\ 231 | t = PSHIFTL(t , 2);\ 232 | r3 = XOR(r3, t );\ 233 | t = AND(in2, mask3);\ 234 | t = PSHIFTL(t , 1);\ 235 | r3 = XOR(r3, t );\ 236 | t = AND(in3, mask3);\ 237 | r3 = XOR(r3, t );\ 238 | t = PSHIFTL(r3, 4);\ 239 | r3 = XOR(r3, t );\ 240 | r3 = PSHUFB(r3, mask_oddh);\ 241 | \ 242 | r4 = AND(in0, mask4);\ 243 | r4 = PSHIFTL(r4, 4);\ 244 | t = AND(in1, mask4);\ 245 | t = PSHIFTL(t , 3);\ 246 | r4 = XOR(r4, t );\ 247 | t = AND(in2, mask4);\ 248 | t = PSHIFTL(t , 2);\ 249 | r4 = XOR(r4, t );\ 250 | t = AND(in3, mask4);\ 251 | t = PSHIFTL(t , 1);\ 252 | r4 = XOR(r4, t );\ 253 | t = PSHIFTL(r4, 4);\ 254 | r4 = XOR(r4, t );\ 255 | r4 = PSHUFB(r4, mask_odd);\ 256 | \ 257 | r5 = AND(in0, mask5);\ 258 | r5 = PSHIFTL(r5, 5);\ 259 | t = AND(in1, mask5);\ 260 | t = PSHIFTL(t , 4);\ 261 | r5 = XOR(r5, t );\ 262 | t = AND(in2, mask5);\ 263 | t = PSHIFTL(t , 3);\ 264 | r5 = XOR(r5, t );\ 265 | t = AND(in3, mask5);\ 266 | t = PSHIFTL(t , 2);\ 267 | r5 = XOR(r5, t );\ 268 | t = PSHIFTL(r5, 4);\ 269 | r5 = XOR(r5, t );\ 270 | r5 = PSHUFB(r5, mask_oddh);\ 271 | \ 272 | r6 = AND(in0, mask6);\ 273 | r6 = PSHIFTL(r6, 6);\ 274 | t = AND(in1, mask6);\ 275 | t = PSHIFTL(t , 5);\ 276 | r6 = XOR(r6, t );\ 277 | t = AND(in2, mask6);\ 278 | t = PSHIFTL(t , 4);\ 279 | r6 = XOR(r6, t );\ 280 | t = AND(in3, mask6);\ 281 | t = PSHIFTL(t , 3);\ 282 | r6 = XOR(r6, t );\ 283 | t = PSHIFTL(r6, 4);\ 284 | r6 = XOR(r6, t );\ 285 | r6 = PSHUFB(r6, mask_odd);\ 286 | \ 287 | r7 = AND(in0, mask7);\ 288 | r7 = PSHIFTL(r7, 7);\ 289 | t = AND(in1, mask7);\ 290 | t = PSHIFTL(t , 6);\ 291 | r7 = XOR(r7, t );\ 292 | t = AND(in2, mask7);\ 293 | t = PSHIFTL(t , 5);\ 294 | r7 = XOR(r7, t );\ 295 | t = AND(in3, mask7);\ 296 | t = PSHIFTL(t , 4);\ 297 | r7 = XOR(r7, t );\ 298 | t = PSHIFTL(r7, 4);\ 299 | r7 = XOR(r7, t );\ 300 | r7 = PSHUFB(r7, mask_oddh);\ 301 | r0 = XOR(r0, r1); r1 = XOR(r2, r3); r2=XOR(r4, r5); r3 = XOR(r6, r7);\ 302 | } while(0); 303 | 304 | #define SWAP8(l, h, t0) do {\ 305 | t0 = l;\ 306 | l = PUNPCKLBW(l , h);\ 307 | h = PUNPCKHBW(t0, h);\ 308 | t0 = l; l = h; h = t0;\ 309 | } while(0); 310 | 311 | #define uSWAP8(x, y, t0) do {\ 312 | t0 = x;\ 313 | x = XOR(PSHUFB(x , mask_unpack8_l0), PSHUFB(y, mask_unpack8_h0));\ 314 | y = XOR(PSHUFB(t0, mask_unpack8_l1), PSHUFB(y, mask_unpack8_h1));\ 315 | t0 = x; x = y; y = t0;\ 316 | } while(0); 317 | 318 | #define SWAP16(x,y,t0) do {\ 319 | t0 = x;\ 320 | x = INTERLEAVEL16(x,y);\ 321 | y = INTERLEAVEH16(t0,y);\ 322 | } while(0); 323 | 324 | #define SWAP64(x,y,t0) do {\ 325 | t0 = x;\ 326 | x = INTERLEAVEL64(x,y);\ 327 | y = INTERLEAVEH64(t0,y);\ 328 | } while(0); 329 | 330 | #define uINTERLEAVE16(x, y, t0) do {\ 331 | t0 = x;\ 332 | x = XOR(PSHUFB(x , mask_unpack16_l0), PSHUFB(y, mask_unpack16_h0));\ 333 | y = XOR(PSHUFB(t0, mask_unpack16_l1), PSHUFB(y, mask_unpack16_h1));\ 334 | } while(0); 335 | 336 | #define uSWAP16(x,y, t0) uINTERLEAVE16(x, y, t0); 337 | 338 | #define uSWAP64(x, y, t0) do {\ 339 | t0 = x;\ 340 | x = XOR(PSHUFB(x , mask_unpack64_l0), PSHUFB(y, mask_unpack64_h0));\ 341 | y = XOR(PSHUFB(t0, mask_unpack64_l1), PSHUFB(y, mask_unpack64_h1));\ 342 | } while(0); 343 | 344 | #define SWAP4(a, b, t0, t1) do {\ 345 | t0 = AND(a, mask_u);\ 346 | t1 = PSHIFTL(AND(a, mask_l), 4);\ 347 | \ 348 | a = PSHIFTR(AND(b, mask_u), 4);\ 349 | b = AND(b, mask_l);\ 350 | \ 351 | a = XOR(t0, a);\ 352 | b = XOR(t1, b);\ 353 | } while(0); 354 | 355 | /* t0 = a; a = b; b = t0; */ 356 | 357 | 358 | /* SWAP4 with big endian */ 359 | #define SWAP4BE(a, b, t0, t1) do {\ 360 | t0 = AND(a, mask_u);\ 361 | t1 = PSHIFTL(AND(a, mask_l), 4);\ 362 | \ 363 | a = AND(b, mask_l);\ 364 | b = PSHIFTR(AND(b, mask_u), 4);\ 365 | \ 366 | a = XOR(t1, a);\ 367 | b = XOR(t0, b);\ 368 | } while(0); 369 | 370 | /* t0 = a; a = b; b = t0; */ 371 | 372 | #define uSWAP4(a, b, t0, t1) do {\ 373 | t0 = AND(a, mask_u);\ 374 | t1 = AND(b, mask_u);\ 375 | a = AND(a, mask_l);\ 376 | b = AND(b, mask_l);\ 377 | b = XOR(PSHIFTL(a, 4), b);\ 378 | a = XOR(t0, PSHIFTR(t1,4));\ 379 | } while(0); 380 | 381 | /*uSWAP4 with big endian */ 382 | #define uSWAP4BE(a, b, t0, t1) do {\ 383 | t0 = AND(a, mask_u);\ 384 | t1 = AND(b, mask_u);\ 385 | a = AND(a, mask_l);\ 386 | b = AND(b, mask_l);\ 387 | a = XOR(PSHIFTL(a, 4), b);\ 388 | b = XOR(t0, PSHIFTR(t1,4));\ 389 | } while(0); 390 | 391 | 392 | #define BitSlice4(a, b, c, d, t0, t1, t2) do {\ 393 | t0 = XOR(XOR( AND(a, mask88) , PSHIFTR(AND(b, mask88), 1)), XOR(PSHIFTR(AND(c, mask88), 2), PSHIFTR(AND(d, mask88), 3)));\ 394 | t1 = XOR(XOR(PSHIFTL(AND(a, mask44),1), AND(b, mask44)) , XOR(PSHIFTR(AND(c, mask44), 1), PSHIFTR(AND(d, mask44), 2)));\ 395 | t2 = XOR(XOR(PSHIFTL(AND(a, mask22),2), PSHIFTL(AND(b, mask22), 1)), XOR( AND(c, mask22), PSHIFTR(AND(d, mask22), 1)));\ 396 | d = XOR(XOR(PSHIFTL(AND(a, mask11),3), PSHIFTL(AND(b, mask11), 2)), XOR(PSHIFTL(AND(c, mask11), 1), AND(d, mask11) ));\ 397 | c = t2;\ 398 | b = t1;\ 399 | a = t0;\ 400 | } while(0); 401 | 402 | #define uBitSlice4(a, b, c, d, t0, t1, t2) BitSlice4(a, b, c, d, t0, t1, t2); 403 | 404 | 405 | /* 406 | Step 1: t0, a0, a1, a2, a3, a4, a5, a6 407 | t0 = A0A1 B0B1 A2A3 B2B3 ... A14A15B14B15 408 | a0 = A16A17 B16B17 A18A19 B18B19 ... A30A31B30B31 409 | ... 410 | Step 2: 411 | t1 = A0-- B0-- A1-- B1-- A07-- B07-- 412 | a7 = A8-- B8-- A9-- B9-- A15-- B15-- 413 | 414 | output sequence: t0, a3, a0, a4, a1, a5, a2, a6 415 | */ 416 | #define packing16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2) do {\ 417 | SWAP4(a0, a1, t0, t1);\ 418 | SWAP4(a2, a3, t0, t1);\ 419 | SWAP4(a4, a5, t0, t1);\ 420 | SWAP4(a6, a7, t0, t1);\ 421 | \ 422 | SWAP8(a0, a1, t0);\ 423 | SWAP8(a2, a3, t0);\ 424 | SWAP8(a4, a5, t0);\ 425 | SWAP8(a6, a7, t0);\ 426 | \ 427 | SWAP8(a0, a1, t0);\ 428 | SWAP8(a2, a3, t0);\ 429 | SWAP8(a4, a5, t0);\ 430 | SWAP8(a6, a7, t0);\ 431 | \ 432 | BitSlice4(a0, a2, a4, a6, t0, t1, t2);\ 433 | BitSlice4(a1, a3, a5, a7, t0, t1, t2);\ 434 | \ 435 | t0 = a1; a1 = a2; a2 = a4; a4 = t0;\ 436 | t0 = a3; a3 = a6; a6 = a5; a5 = t0;\ 437 | } while(0); 438 | 439 | #define unpacking16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2) do {\ 440 | t0 = a4; a4 = a2; a2 = a1; a1 = t0;\ 441 | t0 = a5; a5 = a6; a6 = a3; a3 = t0;\ 442 | \ 443 | uBitSlice4(a0, a2, a4, a6, t0, t1, t2);\ 444 | uBitSlice4(a1, a3, a5, a7, t0, t1, t2);\ 445 | \ 446 | uSWAP8(a0, a1, t0);\ 447 | uSWAP8(a2, a3, t0);\ 448 | uSWAP8(a4, a5, t0);\ 449 | uSWAP8(a6, a7, t0);\ 450 | \ 451 | uSWAP8(a0, a1, t0);\ 452 | uSWAP8(a2, a3, t0);\ 453 | uSWAP8(a4, a5, t0);\ 454 | uSWAP8(a6, a7, t0);\ 455 | \ 456 | uSWAP4(a0, a1, t0, t1);\ 457 | uSWAP4(a2, a3, t0, t1);\ 458 | uSWAP4(a4, a5, t0, t1);\ 459 | uSWAP4(a6, a7, t0, t1);\ 460 | } while(0); 461 | 462 | /* now the results are in the order t0, a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0, d3, d2, d1 // */ 463 | /* d0, t0, a2, a3, a1, a0, b2, b3, b1, b0, c2, c3, c1, c0 */ 464 | 465 | /* tbd */ 466 | #define packing32(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, t0, t1, t2) do {\ 467 | packing16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2);\ 468 | packing16(a8, a9, a10, a11, a12, a13, a14, a15, t0, t1, t2);\ 469 | SWAP16(a0, a8, t0);\ 470 | SWAP16(a1, a9, t0);\ 471 | SWAP16(a2, a10, t0);\ 472 | SWAP16(a3, a11, t0);\ 473 | SWAP16(a4, a12, t0);\ 474 | SWAP16(a5, a13, t0);\ 475 | SWAP16(a6, a14, t0);\ 476 | SWAP16(a7, a15, t0);\ 477 | } while(0); 478 | 479 | #define unpacking32(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, t0, t1, t2) do {\ 480 | uSWAP16(a0, a8, t0);\ 481 | uSWAP16(a1, a9, t0);\ 482 | uSWAP16(a2, a10, t0);\ 483 | uSWAP16(a3, a11, t0);\ 484 | uSWAP16(a4, a12, t0);\ 485 | uSWAP16(a5, a13, t0);\ 486 | uSWAP16(a6, a14, t0);\ 487 | uSWAP16(a7, a15, t0);\ 488 | unpacking16(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2);\ 489 | unpacking16(a8, a9, a10, a11, a12, a13, a14, a15, t0, t1, t2);\ 490 | } while(0); 491 | 492 | #define init() do {\ 493 | mask0 = CONSTANT(0x80); /* set all 16 bytes of mask0 to the value 0x80 */\ 494 | mask1 = CONSTANT(0x40);\ 495 | mask2 = CONSTANT(0x20);\ 496 | mask3 = CONSTANT(0x10);\ 497 | mask4 = CONSTANT(0x08);\ 498 | mask5 = CONSTANT(0x04);\ 499 | mask6 = CONSTANT(0x02);\ 500 | mask7 = CONSTANT(0x01);\ 501 | mask_one = CONSTANT(0xff);\ 502 | mask_odd = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 15,13,11,9,7,5,3,1);\ 503 | mask_oddh = SET( 15,13,11,9,7,5,3,1,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 504 | mask_key1 = SET(0x0, 0x80, 0x80, 0x80, 0x80, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5);\ 505 | mask_key2 = SET(0x80, 3, 2, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);\ 506 | mask128_key1 = SET(0x0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);\ 507 | mask128_key2 = SET(0x80, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1);\ 508 | \ 509 | mask512_key1 = SET(0x1, 0x0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);\ 510 | mask512_key2 = SET(0x80, 0x80, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2);\ 511 | \ 512 | mask128_and_b21 = SET(0, 0, 0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff);\ 513 | mask128_and_b22 = SET(0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\ 514 | \ 515 | mask_u = CONSTANT(0xf0);\ 516 | mask_l = CONSTANT(0x0f);\ 517 | mask88 = CONSTANT(0x88);\ 518 | mask44 = CONSTANT(0x44);\ 519 | mask22 = CONSTANT(0x22);\ 520 | mask11 = CONSTANT(0x11);\ 521 | mask_one = CONSTANT(0xff);\ 522 | mask_unpack64_l0 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 7, 6, 5, 4, 3, 2, 1, 0);\ 523 | mask_unpack64_l1 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 15, 14, 13, 12, 11, 10, 9, 8);\ 524 | mask_unpack64_h0 = SET(7, 6, 5, 4, 3, 2, 1, 0, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 525 | mask_unpack64_h1 = SET(15, 14, 13, 12, 11, 10, 9, 8, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 526 | mask_unpack16_l0 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 13, 12, 9, 8, 5, 4, 1, 0);\ 527 | mask_unpack16_l1 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 15, 14, 11, 10, 7, 6, 3, 2);\ 528 | mask_unpack16_h0 = SET(13, 12, 9, 8, 5, 4, 1, 0, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 529 | mask_unpack16_h1 = SET(15, 14, 11, 10, 7, 6, 3, 2, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 530 | \ 531 | mask_unpack8_l0 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 14, 12, 10, 8, 6, 4, 2, 0);\ 532 | mask_unpack8_l1 = SET(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80, 15, 13, 11, 9, 7, 5, 3, 1);\ 533 | mask_unpack8_h0 = SET(14, 12, 10, 8, 6, 4, 2, 0, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 534 | mask_unpack8_h1 = SET(15, 13, 11, 9, 7, 5, 3, 1, 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);\ 535 | mask_u64 = SET(0xff, 0xff, 0xff, 0xff,0xff, 0xff, 0xff, 0xff,0,0,0,0,0,0,0,0);\ 536 | mask_l64 = SET(0,0,0,0,0,0,0,0, 0xff, 0xff, 0xff, 0xff,0xff, 0xff, 0xff, 0xff);\ 537 | \ 538 | mask16_sr01 = SET(13, 12, 11, 10, 9, 8, 15, 14, 7, 6, 5, 4, 3, 2, 1, 0);\ 539 | mask16_sr23 = SET(9, 8, 15, 14, 13, 12, 11, 10, 3, 2, 1, 0, 7, 6, 5, 4);\ 540 | mask_byte_endian = SET(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);\ 541 | mask_byte_endian64 = SET(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);\ 542 | mask_byte_endian80 = SET(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7);\ 543 | } while(0); 544 | 545 | static word mask_l16, mask_r16; 546 | #define cross16(r, a, b) do {\ 547 | r = XOR(AND(a, mask_l16), AND(b, mask_r16));\ 548 | } while(0); 549 | 550 | #define pack_lr(r0, r1, r2, r3, a0, a1, a2, a3, b0, b1, b2, b3) do {\ 551 | cross(r0, a0, b0);\ 552 | cross(r1, a1, b1);\ 553 | cross(r2, a2, b2);\ 554 | cross(r3, a3, b3);\ 555 | } while(0); 556 | 557 | #define AddKey(r0, r1, r2, r3, a0, a1, a2, a3) XOR_Key(r0, r1, r2, r3, a0, a1, a2, a3) 558 | 559 | #define inverse_nibble_endian(a, t0, t1) do {\ 560 | t0 = AND(a, mask_l);\ 561 | t1 = AND(a, mask_u);\ 562 | t0 = PSHIFTL(t0, 4);\ 563 | t1 = PSHIFTR(t1, 4);\ 564 | a = XOR(t0, t1);\ 565 | } while(0); 566 | 567 | #define inverse_bytes_endian(a) do {\ 568 | a = PSHUFB(a, mask_byte_endian);\ 569 | } while(0); 570 | 571 | #define inverse_bytes_endian64(a) do {\ 572 | a = PSHUFB(a, mask_byte_endian64);\ 573 | } while(0); 574 | 575 | #define inverse_bytes_endian80(a) do {\ 576 | a = PSHUFB(a, mask_byte_endian80);\ 577 | } while(0); 578 | -------------------------------------------------------------------------------- /src/table/Piccolo/Piccolo.c: -------------------------------------------------------------------------------- 1 | /*------------------------ MIT License HEADER ------------------------------------ 2 | Copyright ANSSI and NTU (2015) 3 | Contributors: 4 | Ryad BENADJILA [ryadbenadjila@gmail.com] and 5 | Jian GUO [ntu.guo@gmail.com] and 6 | Victor LOMNE [victor.lomne@gmail.com] and 7 | Thomas PEYRIN [thomas.peyrin@gmail.com] 8 | 9 | This software is a computer program whose purpose is to implement 10 | lightweight block ciphers with different optimizations for the x86 11 | platform. Three algorithms have been implemented: PRESENT, LED and 12 | Piccolo. Three techniques have been explored: table based 13 | implementations, vperm (for vector permutation) and bitslice 14 | implementations. For more details, please refer to the SAC 2013 15 | paper: 16 | http://eprint.iacr.org/2013/445 17 | as well as the documentation of the project. 18 | Here is a big picture of how the code is divided: 19 | - src/common contains common headers, structures and functions. 20 | - src/table contains table based implementations, with the code 21 | that generates the tables in src/table/gen_tables. The code here 22 | is written in pure C so it should compile on any platform (x86 23 | and other architectures), as well as any OS flavour (*nix, 24 | Windows ...). 25 | - src/vperm contains vperm based implementations. They are written 26 | in inline assembly for x86_64 and will only compile and work on 27 | this platform. The code only compiles with gcc, but porting it to 28 | other assembly flavours should not be too complicated. 29 | - src/bitslice contains bitslice based implementations. They are 30 | written in asm intrinsics. It should compile and run on i386 as 31 | well as x86_64 platforms, and it should be portable to other OS 32 | flavours since intrinsics are standard among many compilers. 33 | Note: vperm and bitslice implementations require a x86 CPU with at least 34 | SSSE3 extensions. 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 40 | copies of the Software, and to permit persons to whom the Software is 41 | furnished to do so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in 44 | all copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 52 | THE SOFTWARE. 53 | 54 | Except as contained in this notice, the name(s) of the above copyright holders 55 | shall not be used in advertising or otherwise to promote the sale, use or other 56 | dealings in this Software without prior written authorization. 57 | 58 | 59 | -------------------------- MIT License HEADER ----------------------------------*/ 60 | #ifdef TABLE 61 | #include 62 | #include "Piccolo_tables.h" 63 | 64 | #ifdef TABLE 65 | #ifdef Piccolo80 66 | void Piccolo80table_key_schedule(const u8* masterKey80, u8* roundKeys80); 67 | void Piccolo80table_core(const u8* plaintext, const u8* roundKeys80, u8* ciphertext); 68 | void Piccolo80table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY80], u64 ciphertext_out[TABLE_P]); 69 | #endif 70 | #ifdef Piccolo128 71 | void Piccolo128table_key_schedule(const u8* masterKey128, u8* roundKeys128); 72 | void Piccolo128table_core(const u8* plaintext, const u8* roundKeys128, u8* ciphertext); 73 | void Piccolo128table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY128], u64 ciphertext_out[TABLE_P]); 74 | #endif 75 | #endif 76 | 77 | /****************************************************************************************************/ 78 | /* some macros */ 79 | 80 | #define ROTL64(in, l) ((in) << l) ^ ((in) >> (64-l)) 81 | #define ROTR64(in, l) ((in) >> l) ^ ((in) << (64-l)) 82 | #define MASK4 0x0f 83 | #define MASK8 0xff 84 | #define MASK16 0xffff 85 | 86 | #define PICCOLOKEYSCHEDULE128PERM(key) do {\ 87 | u64 key0, key1, key2, key3, key45, key67;\ 88 | key0 = ((u64 *)key)[0] & 0x000000000000ffff;\ 89 | key1 = ((u64 *)key)[0] & 0x00000000ffff0000;\ 90 | key2 = ((u64 *)key)[0] & 0x0000ffff00000000;\ 91 | key3 = ((u64 *)key)[0] & 0xffff000000000000;\ 92 | key45 = ((u64 *)key)[1] & 0x00000000ffffffff;\ 93 | key67 = ((u64 *)key)[1] & 0xffffffff00000000;\ 94 | key2 = ROTR64(key2, 32);\ 95 | key3 = ROTR64(key3, 32);\ 96 | key45 = ROTL64(key45, 32);\ 97 | ((u64 *)key)[0] = key1 ^ key2 ^ key67;\ 98 | ((u64 *)key)[1] = key0 ^ key3 ^ key45;\ 99 | } while(0); 100 | 101 | #define PICCOLOROUNDPERM(state) do {\ 102 | u64 state0, state1;\ 103 | state0 = state & 0x00ff00ff00ff00ff;\ 104 | state1 = state & 0xff00ff00ff00ff00;\ 105 | state0 = ROTR64(state0, 16);\ 106 | state1 = ROTL64(state1, 16);\ 107 | state = state0 ^ state1;\ 108 | } while(0); 109 | 110 | #define PICCOLOROUND(state, key) do {\ 111 | unsigned int w0, w1;\ 112 | u64 outputFeistel0, outputFeistel1, outputFeistel2, outputFeistel3;\ 113 | w0 = state & MASK8;\ 114 | w1 = (state >> 8) & MASK8;\ 115 | w0 = T0_Piccolo[w0];\ 116 | w1 = T1_Piccolo[w1];\ 117 | w0 = w0 ^ w1;\ 118 | outputFeistel0 = T2_Piccolo[w0 & MASK8];\ 119 | outputFeistel1 = T3_Piccolo[(w0 >> 8) & MASK8];\ 120 | w0 = (state >> 32) & MASK8;\ 121 | w1 = (state >> 40) & MASK8;\ 122 | w0 = T0_Piccolo[w0];\ 123 | w1 = T1_Piccolo[w1];\ 124 | w0 = w0 ^ w1;\ 125 | outputFeistel2 = T4_Piccolo[w0 & MASK8];\ 126 | outputFeistel3 = T5_Piccolo[(w0 >> 8) & MASK8];\ 127 | state ^= outputFeistel0 ^ outputFeistel1 ^ outputFeistel2 ^ outputFeistel3 ^ key;\ 128 | } while(0); 129 | 130 | 131 | 132 | /****************************************************************************************************/ 133 | /* Piccolo80 key schedule */ 134 | #ifdef Piccolo80 135 | void Piccolo80table_key_schedule(const u8* masterKey80, u8* roundKeys80) 136 | { 137 | u64 k2k3, k0k1, k4k4; 138 | 139 | ((u64*)roundKeys80)[0] = 0; 140 | ((u64*)roundKeys80)[26] = 0; 141 | /* compute wk0 and wk1 such that roundKeys80[0..7] = wk0 | 0x0000 | wk1 | 0x0000 */ 142 | roundKeys80[0] = masterKey80[0]; 143 | roundKeys80[1] = masterKey80[3]; 144 | roundKeys80[4] = masterKey80[2]; 145 | roundKeys80[5] = masterKey80[1]; 146 | 147 | /* compute wk2 and wk3 such that roundKeys80[208..215] = wk2 | 0x0000 | wk3 | 0x0000 */ 148 | roundKeys80[208] = masterKey80[8]; 149 | roundKeys80[209] = masterKey80[7]; 150 | roundKeys80[212] = masterKey80[6]; 151 | roundKeys80[213] = masterKey80[9]; 152 | 153 | /* compute 0x0000 | k2 | 0x0000 | k3 */ 154 | k2k3 = ((u64)((u16 *)masterKey80)[2] << 16) | ((u64)((u16 *)masterKey80)[3] << 48); 155 | 156 | /* compute 0x0000 | k0 | 0x0000 | k1 */ 157 | k0k1 = ((u64)((u16 *)masterKey80)[0] << 16) | ((u64)((u16 *)masterKey80)[1] << 48); 158 | 159 | /* compute 0x0000 | k4 | 0x0000 | k4 */ 160 | k4k4 = ((u64)((u16 *)masterKey80)[4] << 16) | ((u64)((u16 *)masterKey80)[4] << 48); 161 | 162 | /* compute 0x0000 | rk0 | 0x0000 | rk1 */ 163 | ((u64 *)roundKeys80)[1] = Tcon80_Piccolo[0] ^ k2k3; 164 | 165 | /* compute 0x0000 | rk2 | 0x0000 | rk3 */ 166 | ((u64 *)roundKeys80)[2] = Tcon80_Piccolo[1] ^ k0k1; 167 | 168 | /* compute 0x0000 | rk4 | 0x0000 | rk5 */ 169 | ((u64 *)roundKeys80)[3] = Tcon80_Piccolo[2] ^ k2k3; 170 | 171 | /* compute 0x0000 | rk6 | 0x0000 | rk7 */ 172 | ((u64 *)roundKeys80)[4] = Tcon80_Piccolo[3] ^ k4k4; 173 | 174 | /* compute 0x0000 | rk8 | 0x0000 | rk9 */ 175 | ((u64 *)roundKeys80)[5] = Tcon80_Piccolo[4] ^ k0k1; 176 | 177 | /* compute 0x0000 | rk10 | 0x0000 | rk11 */ 178 | ((u64 *)roundKeys80)[6] = Tcon80_Piccolo[5] ^ k2k3; 179 | 180 | /* compute 0x0000 | rk12 | 0x0000 | rk13 */ 181 | ((u64 *)roundKeys80)[7] = Tcon80_Piccolo[6] ^ k0k1; 182 | 183 | /* compute 0x0000 | rk14 | 0x0000 | rk15 */ 184 | ((u64 *)roundKeys80)[8] = Tcon80_Piccolo[7] ^ k2k3; 185 | 186 | /* compute 0x0000 | rk16 | 0x0000 | rk17 */ 187 | ((u64 *)roundKeys80)[9] = Tcon80_Piccolo[8] ^ k4k4; 188 | 189 | /* compute 0x0000 | rk18 | 0x0000 | rk19 */ 190 | ((u64 *)roundKeys80)[10] = Tcon80_Piccolo[9] ^ k0k1; 191 | 192 | /* compute 0x0000 | rk20 | 0x0000 | rk21 */ 193 | ((u64 *)roundKeys80)[11] = Tcon80_Piccolo[10] ^ k2k3; 194 | 195 | /* compute 0x0000 | rk22 | 0x0000 | rk23 */ 196 | ((u64 *)roundKeys80)[12] = Tcon80_Piccolo[11] ^ k0k1; 197 | 198 | /* compute 0x0000 | rk24 | 0x0000 | rk25 */ 199 | ((u64 *)roundKeys80)[13] = Tcon80_Piccolo[12] ^ k2k3; 200 | 201 | /* compute 0x0000 | rk26 | 0x0000 | rk27 */ 202 | ((u64 *)roundKeys80)[14] = Tcon80_Piccolo[13] ^ k4k4; 203 | 204 | /* compute 0x0000 | rk28 | 0x0000 | rk29 */ 205 | ((u64 *)roundKeys80)[15] = Tcon80_Piccolo[14] ^ k0k1; 206 | 207 | /* compute 0x0000 | rk30 | 0x0000 | rk31 */ 208 | ((u64 *)roundKeys80)[16] = Tcon80_Piccolo[15] ^ k2k3; 209 | 210 | /* compute 0x0000 | rk32 | 0x0000 | rk33 */ 211 | ((u64 *)roundKeys80)[17] = Tcon80_Piccolo[16] ^ k0k1; 212 | 213 | /* compute 0x0000 | rk34 | 0x0000 | rk35 */ 214 | ((u64 *)roundKeys80)[18] = Tcon80_Piccolo[17] ^ k2k3; 215 | 216 | /* compute 0x0000 | rk36 | 0x0000 | rk37 */ 217 | ((u64 *)roundKeys80)[19] = Tcon80_Piccolo[18] ^ k4k4; 218 | 219 | /* compute 0x0000 | rk38 | 0x0000 | rk39 */ 220 | ((u64 *)roundKeys80)[20] = Tcon80_Piccolo[19] ^ k0k1; 221 | 222 | /* compute 0x0000 | rk40 | 0x0000 | rk41 */ 223 | ((u64 *)roundKeys80)[21] = Tcon80_Piccolo[20] ^ k2k3; 224 | 225 | /* compute 0x0000 | rk42 | 0x0000 | rk43 */ 226 | ((u64 *)roundKeys80)[22] = Tcon80_Piccolo[21] ^ k0k1; 227 | 228 | /* compute 0x0000 | rk44 | 0x0000 | rk45 */ 229 | ((u64 *)roundKeys80)[23] = Tcon80_Piccolo[22] ^ k2k3; 230 | 231 | /* compute 0x0000 | rk46 | 0x0000 | rk47 */ 232 | ((u64 *)roundKeys80)[24] = Tcon80_Piccolo[23] ^ k4k4; 233 | 234 | /* compute 0x0000 | rk48 | 0x0000 | rk49 */ 235 | ((u64 *)roundKeys80)[25] = Tcon80_Piccolo[24] ^ k0k1; 236 | 237 | return; 238 | } 239 | #endif 240 | 241 | 242 | 243 | /****************************************************************************************************/ 244 | /* Piccolo128 key schedule */ 245 | #ifdef Piccolo128 246 | void Piccolo128table_key_schedule(const u8* masterKey128, u8* roundKeys128) 247 | { 248 | u64 k0, k1, k2, k3, k4, k5, k6, k7, k4k5, k2k1, k0k3, k6k1, k2k7, k4k1, k6k5, k4k3, k0k7, k2k5; 249 | 250 | ((u64*)roundKeys128)[0] = 0; 251 | ((u64*)roundKeys128)[32] = 0; 252 | /* compute wk0 and wk1 such that roundKeys128[0..7] = wk0 | 0x0000 | wk1 | 0x0000 */ 253 | roundKeys128[0] = masterKey128[0]; 254 | roundKeys128[1] = masterKey128[3]; 255 | roundKeys128[4] = masterKey128[2]; 256 | roundKeys128[5] = masterKey128[1]; 257 | 258 | /* compute wk2 and wk3 such that roundKeys128[256..263] = wk2 | 0x0000 | wk3 | 0x0000 */ 259 | roundKeys128[256] = masterKey128[8]; 260 | roundKeys128[257] = masterKey128[15]; 261 | roundKeys128[260] = masterKey128[14]; 262 | roundKeys128[261] = masterKey128[9]; 263 | 264 | /* compute 0x0000 | k0 | 0x0000 | 0x0000 */ 265 | k0 = (u64)((u16 *)masterKey128)[0] << 16; 266 | 267 | /* compute 0x0000 | k2 | 0x0000 | 0x0000 */ 268 | k2 = (u64)((u16 *)masterKey128)[2] << 16; 269 | 270 | /* compute 0x0000 | k4 | 0x0000 | 0x0000 */ 271 | k4 = (u64)((u16 *)masterKey128)[4] << 16; 272 | 273 | /* compute 0x0000 | k6 | 0x0000 | 0x0000 */ 274 | k6 = (u64)((u16 *)masterKey128)[6] << 16; 275 | 276 | /* compute 0x0000 | 0x00 | 0x0000 | k1 */ 277 | k1 = (u64)((u16 *)masterKey128)[1] << 48; 278 | 279 | /* compute 0x0000 | 0x00 | 0x0000 | k3 */ 280 | k3 = (u64)((u16 *)masterKey128)[3] << 48; 281 | 282 | /* compute 0x0000 | 0x00 | 0x0000 | k5 */ 283 | k5 = (u64)((u16 *)masterKey128)[5] << 48; 284 | 285 | /* compute 0x0000 | 0x00 | 0x0000 | k7 */ 286 | k7 = (u64)((u16 *)masterKey128)[7] << 48; 287 | 288 | /* compute 0x0000 | k4 | 0x0000 | k5 */ 289 | k4k5 = k4 ^ k5; 290 | 291 | /* compute 0x0000 | k2 | 0x0000 | k1 */ 292 | k2k1 = k2 ^ k1; 293 | 294 | /* compute 0x0000 | k0 | 0x0000 | k3 */ 295 | k0k3 = k0 ^ k3; 296 | 297 | /* compute 0x0000 | k6 | 0x0000 | k1 */ 298 | k6k1 = k6 ^ k1; 299 | 300 | /* compute 0x0000 | k2 | 0x0000 | k7 */ 301 | k2k7 = k2 ^ k7; 302 | 303 | /* compute 0x0000 | k4 | 0x0000 | k1 */ 304 | k4k1 = k4 ^ k1; 305 | 306 | /* compute 0x0000 | k6 | 0x0000 | k5 */ 307 | k6k5 = k6 ^ k5; 308 | 309 | /* compute 0x0000 | k4 | 0x0000 | k3 */ 310 | k4k3 = k4 ^ k3; 311 | 312 | /* compute 0x0000 | k0 | 0x0000 | k7 */ 313 | k0k7 = k0 ^ k7; 314 | 315 | /* compute 0x0000 | k2 | 0x0000 | k5 */ 316 | k2k5 = k2 ^ k5; 317 | 318 | /* compute 0x0000 | rk0 | 0x0000 | rk1 */ 319 | ((u64 *)roundKeys128)[1] = Tcon128_Piccolo[0] ^ k2 ^ k3; 320 | 321 | /* compute 0x0000 | rk2 | 0x0000 | rk3 */ 322 | ((u64 *)roundKeys128)[2] = Tcon128_Piccolo[1] ^ k4k5; 323 | 324 | /* compute 0x0000 | rk4 | 0x0000 | rk5 */ 325 | ((u64 *)roundKeys128)[3] = Tcon128_Piccolo[2] ^ k6 ^ k7; 326 | 327 | /* take into account Key Schedule permutation */ 328 | /* compute 0x0000 | rk6 | 0x0000 | rk7 */ 329 | ((u64 *)roundKeys128)[4] = Tcon128_Piccolo[3] ^ k2k1; 330 | 331 | /* compute 0x0000 | rk8 | 0x0000 | rk9 */ 332 | ((u64 *)roundKeys128)[5] = Tcon128_Piccolo[4] ^ k6 ^ k7; 333 | 334 | /* compute 0x0000 | rk10 | 0x0000 | rk11 */ 335 | ((u64 *)roundKeys128)[6] = Tcon128_Piccolo[5] ^ k0k3; 336 | 337 | /* compute 0x0000 | rk12 | 0x0000 | rk13 */ 338 | ((u64 *)roundKeys128)[7] = Tcon128_Piccolo[6] ^ k4k5; 339 | 340 | /* take into account Key Schedule permutation */ 341 | /* compute 0x0000 | rk14 | 0x0000 | rk15 */ 342 | ((u64 *)roundKeys128)[8] = Tcon128_Piccolo[7] ^ k6k1; 343 | 344 | /* compute 0x0000 | rk16 | 0x0000 | rk17 */ 345 | ((u64 *)roundKeys128)[9] = Tcon128_Piccolo[8] ^ k4k5; 346 | 347 | /* compute 0x0000 | rk18 | 0x0000 | rk19 */ 348 | ((u64 *)roundKeys128)[10] = Tcon128_Piccolo[9] ^ k2k7; 349 | 350 | /* compute 0x0000 | rk20 | 0x0000 | rk21 */ 351 | ((u64 *)roundKeys128)[11] = Tcon128_Piccolo[10] ^ k0k3; 352 | 353 | /* take into account Key Schedule permutation */ 354 | /* compute 0x0000 | rk22 | 0x0000 | rk23 */ 355 | ((u64 *)roundKeys128)[12] = Tcon128_Piccolo[11] ^ k4k1; 356 | 357 | /* compute 0x0000 | rk24 | 0x0000 | rk25 */ 358 | ((u64 *)roundKeys128)[13] = Tcon128_Piccolo[12] ^ k0k3; 359 | 360 | /* compute 0x0000 | rk26 | 0x0000 | rk27 */ 361 | ((u64 *)roundKeys128)[14] = Tcon128_Piccolo[13] ^ k6k5; 362 | 363 | /* compute 0x0000 | rk28 | 0x0000 | rk29 */ 364 | ((u64 *)roundKeys128)[15] = Tcon128_Piccolo[14] ^ k2k7; 365 | 366 | /* take into account Key Schedule permutation */ 367 | /* compute 0x0000 | rk29 | 0x0000 | rk30 */ 368 | ((u64 *)roundKeys128)[16] = Tcon128_Piccolo[15] ^ k0 ^ k1; 369 | 370 | /* compute 0x0000 | rk31 | 0x0000 | rk32 */ 371 | ((u64 *)roundKeys128)[17] = Tcon128_Piccolo[16] ^ k2k7; 372 | 373 | /* compute 0x0000 | rk33 | 0x0000 | rk34 */ 374 | ((u64 *)roundKeys128)[18] = Tcon128_Piccolo[17] ^ k4k3; 375 | 376 | /* compute 0x0000 | rk35 | 0x0000 | rk36 */ 377 | ((u64 *)roundKeys128)[19] = Tcon128_Piccolo[18] ^ k6k5; 378 | 379 | /* take into account Key Schedule permutation */ 380 | /* compute 0x0000 | rk37 | 0x0000 | rk38 */ 381 | ((u64 *)roundKeys128)[20] = Tcon128_Piccolo[19] ^ k2k1; 382 | 383 | /* compute 0x0000 | rk39 | 0x0000 | rk40 */ 384 | ((u64 *)roundKeys128)[21] = Tcon128_Piccolo[20] ^ k6k5; 385 | 386 | /* compute 0x0000 | rk41 | 0x0000 | rk42 */ 387 | ((u64 *)roundKeys128)[22] = Tcon128_Piccolo[21] ^ k0k7; 388 | 389 | /* compute 0x0000 | rk43 | 0x0000 | rk44 */ 390 | ((u64 *)roundKeys128)[23] = Tcon128_Piccolo[22] ^ k4k3; 391 | 392 | /* take into account Key Schedule permutation */ 393 | /* compute 0x0000 | rk45 | 0x0000 | rk46 */ 394 | ((u64 *)roundKeys128)[24] = Tcon128_Piccolo[23] ^ k6k1; 395 | 396 | /* compute 0x0000 | rk47 | 0x0000 | rk48 */ 397 | ((u64 *)roundKeys128)[25] = Tcon128_Piccolo[24] ^ k4k3; 398 | 399 | /* compute 0x0000 | rk49 | 0x0000 | rk50 */ 400 | ((u64 *)roundKeys128)[26] = Tcon128_Piccolo[25] ^ k2k5; 401 | 402 | /* compute 0x0000 | rk51 | 0x0000 | rk52 */ 403 | ((u64 *)roundKeys128)[27] = Tcon128_Piccolo[26] ^ k0k7; 404 | 405 | /* take into account Key Schedule permutation */ 406 | /* compute 0x0000 | rk53 | 0x0000 | rk54 */ 407 | ((u64 *)roundKeys128)[28] = Tcon128_Piccolo[27] ^ k4k1; 408 | 409 | /* compute 0x0000 | rk55 | 0x0000 | rk56 */ 410 | ((u64 *)roundKeys128)[29] = Tcon128_Piccolo[28] ^ k0k7; 411 | 412 | /* compute 0x0000 | rk57 | 0x0000 | rk58 */ 413 | ((u64 *)roundKeys128)[30] = Tcon128_Piccolo[29] ^ k6 ^ k3; 414 | 415 | /* compute 0x0000 | rk59 | 0x0000 | rk60 */ 416 | ((u64 *)roundKeys128)[31] = Tcon128_Piccolo[30] ^ k2k5; 417 | 418 | return; 419 | } 420 | #endif 421 | 422 | 423 | 424 | /****************************************************************************************************/ 425 | /* Piccolo80 encryption core */ 426 | #ifdef Piccolo80 427 | void Piccolo80table_core(const u8* plaintext, const u8* roundKeys80, u8* ciphertext) 428 | { 429 | u64 * state, * roundKeys; 430 | 431 | /* cast variables */ 432 | *((u64*)ciphertext) = *((u64*)plaintext); 433 | state = (u64 *)ciphertext; 434 | roundKeys = (u64 *)roundKeys80; 435 | 436 | /* initial key whitening */ 437 | state[0] ^= roundKeys[0]; 438 | 439 | /* perform one round */ 440 | PICCOLOROUND(state[0], roundKeys[1]); 441 | 442 | /* perform 6 permutation and round transformations */ 443 | PICCOLOROUNDPERM(state[0]); 444 | PICCOLOROUND(state[0], roundKeys[2]); 445 | PICCOLOROUNDPERM(state[0]); 446 | PICCOLOROUND(state[0], roundKeys[3]); 447 | PICCOLOROUNDPERM(state[0]); 448 | PICCOLOROUND(state[0], roundKeys[4]); 449 | PICCOLOROUNDPERM(state[0]); 450 | PICCOLOROUND(state[0], roundKeys[5]); 451 | PICCOLOROUNDPERM(state[0]); 452 | PICCOLOROUND(state[0], roundKeys[6]); 453 | PICCOLOROUNDPERM(state[0]); 454 | PICCOLOROUND(state[0], roundKeys[7]); 455 | 456 | /* perform 6 permutation and round transformations */ 457 | PICCOLOROUNDPERM(state[0]); 458 | PICCOLOROUND(state[0], roundKeys[8]); 459 | PICCOLOROUNDPERM(state[0]); 460 | PICCOLOROUND(state[0], roundKeys[9]); 461 | PICCOLOROUNDPERM(state[0]); 462 | PICCOLOROUND(state[0], roundKeys[10]); 463 | PICCOLOROUNDPERM(state[0]); 464 | PICCOLOROUND(state[0], roundKeys[11]); 465 | PICCOLOROUNDPERM(state[0]); 466 | PICCOLOROUND(state[0], roundKeys[12]); 467 | PICCOLOROUNDPERM(state[0]); 468 | PICCOLOROUND(state[0], roundKeys[13]); 469 | 470 | /* perform 6 permutation and round transformations */ 471 | PICCOLOROUNDPERM(state[0]); 472 | PICCOLOROUND(state[0], roundKeys[14]); 473 | PICCOLOROUNDPERM(state[0]); 474 | PICCOLOROUND(state[0], roundKeys[15]); 475 | PICCOLOROUNDPERM(state[0]); 476 | PICCOLOROUND(state[0], roundKeys[16]); 477 | PICCOLOROUNDPERM(state[0]); 478 | PICCOLOROUND(state[0], roundKeys[17]); 479 | PICCOLOROUNDPERM(state[0]); 480 | PICCOLOROUND(state[0], roundKeys[18]); 481 | PICCOLOROUNDPERM(state[0]); 482 | PICCOLOROUND(state[0], roundKeys[19]); 483 | 484 | /* perform 6 permutation and round transformations */ 485 | PICCOLOROUNDPERM(state[0]); 486 | PICCOLOROUND(state[0], roundKeys[20]); 487 | PICCOLOROUNDPERM(state[0]); 488 | PICCOLOROUND(state[0], roundKeys[21]); 489 | PICCOLOROUNDPERM(state[0]); 490 | PICCOLOROUND(state[0], roundKeys[22]); 491 | PICCOLOROUNDPERM(state[0]); 492 | PICCOLOROUND(state[0], roundKeys[23]); 493 | PICCOLOROUNDPERM(state[0]); 494 | PICCOLOROUND(state[0], roundKeys[24]); 495 | PICCOLOROUNDPERM(state[0]); 496 | PICCOLOROUND(state[0], roundKeys[25]); 497 | 498 | /* final key whitening */ 499 | state[0] ^= roundKeys[26]; 500 | 501 | return; 502 | } 503 | #endif 504 | 505 | 506 | 507 | /****************************************************************************************************/ 508 | /* Piccolo128 encryption core */ 509 | #ifdef Piccolo128 510 | void Piccolo128table_core(const u8* plaintext, const u8* roundKeys128, u8* ciphertext) 511 | { 512 | u64 * state, * roundKeys; 513 | 514 | /* cast variables */ 515 | *((u64*)ciphertext) = *((u64*)plaintext); 516 | state = (u64 *)ciphertext; 517 | roundKeys = (u64 *)roundKeys128; 518 | 519 | /* initial key whitening */ 520 | state[0] ^= roundKeys[0]; 521 | 522 | /* perform one round */ 523 | PICCOLOROUND(state[0], roundKeys[1]); 524 | 525 | /* perform 6 permutation and round transformations */ 526 | PICCOLOROUNDPERM(state[0]); 527 | PICCOLOROUND(state[0], roundKeys[2]); 528 | PICCOLOROUNDPERM(state[0]); 529 | PICCOLOROUND(state[0], roundKeys[3]); 530 | PICCOLOROUNDPERM(state[0]); 531 | PICCOLOROUND(state[0], roundKeys[4]); 532 | PICCOLOROUNDPERM(state[0]); 533 | PICCOLOROUND(state[0], roundKeys[5]); 534 | PICCOLOROUNDPERM(state[0]); 535 | PICCOLOROUND(state[0], roundKeys[6]); 536 | PICCOLOROUNDPERM(state[0]); 537 | PICCOLOROUND(state[0], roundKeys[7]); 538 | 539 | /* perform 6 permutation and round transformations */ 540 | PICCOLOROUNDPERM(state[0]); 541 | PICCOLOROUND(state[0], roundKeys[8]); 542 | PICCOLOROUNDPERM(state[0]); 543 | PICCOLOROUND(state[0], roundKeys[9]); 544 | PICCOLOROUNDPERM(state[0]); 545 | PICCOLOROUND(state[0], roundKeys[10]); 546 | PICCOLOROUNDPERM(state[0]); 547 | PICCOLOROUND(state[0], roundKeys[11]); 548 | PICCOLOROUNDPERM(state[0]); 549 | PICCOLOROUND(state[0], roundKeys[12]); 550 | PICCOLOROUNDPERM(state[0]); 551 | PICCOLOROUND(state[0], roundKeys[13]); 552 | 553 | /* perform 6 permutation and round transformations */ 554 | PICCOLOROUNDPERM(state[0]); 555 | PICCOLOROUND(state[0], roundKeys[14]); 556 | PICCOLOROUNDPERM(state[0]); 557 | PICCOLOROUND(state[0], roundKeys[15]); 558 | PICCOLOROUNDPERM(state[0]); 559 | PICCOLOROUND(state[0], roundKeys[16]); 560 | PICCOLOROUNDPERM(state[0]); 561 | PICCOLOROUND(state[0], roundKeys[17]); 562 | PICCOLOROUNDPERM(state[0]); 563 | PICCOLOROUND(state[0], roundKeys[18]); 564 | PICCOLOROUNDPERM(state[0]); 565 | PICCOLOROUND(state[0], roundKeys[19]); 566 | 567 | /* perform 6 permutation and round transformations */ 568 | PICCOLOROUNDPERM(state[0]); 569 | PICCOLOROUND(state[0], roundKeys[20]); 570 | PICCOLOROUNDPERM(state[0]); 571 | PICCOLOROUND(state[0], roundKeys[21]); 572 | PICCOLOROUNDPERM(state[0]); 573 | PICCOLOROUND(state[0], roundKeys[22]); 574 | PICCOLOROUNDPERM(state[0]); 575 | PICCOLOROUND(state[0], roundKeys[23]); 576 | PICCOLOROUNDPERM(state[0]); 577 | PICCOLOROUND(state[0], roundKeys[24]); 578 | PICCOLOROUNDPERM(state[0]); 579 | PICCOLOROUND(state[0], roundKeys[25]); 580 | 581 | /* perform 6 permutation and round transformations */ 582 | PICCOLOROUNDPERM(state[0]); 583 | PICCOLOROUND(state[0], roundKeys[26]); 584 | PICCOLOROUNDPERM(state[0]); 585 | PICCOLOROUND(state[0], roundKeys[27]); 586 | PICCOLOROUNDPERM(state[0]); 587 | PICCOLOROUND(state[0], roundKeys[28]); 588 | PICCOLOROUNDPERM(state[0]); 589 | PICCOLOROUND(state[0], roundKeys[29]); 590 | PICCOLOROUNDPERM(state[0]); 591 | PICCOLOROUND(state[0], roundKeys[30]); 592 | PICCOLOROUNDPERM(state[0]); 593 | PICCOLOROUND(state[0], roundKeys[31]); 594 | 595 | /* final key whitening */ 596 | state[0] ^= roundKeys[32]; 597 | 598 | return; 599 | } 600 | #endif 601 | 602 | 603 | 604 | /****************************************************************************************************/ 605 | /* Piccolo80 key schedule + encryption */ 606 | #ifdef Piccolo80 607 | void Piccolo80table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY80], u64 ciphertext_out[TABLE_P]) 608 | { 609 | /* Key schedule: subkeys are of size 2*264 bytes */ 610 | u8 subkeys[TABLE_P * Piccolo80_SUBKEYS_SIZE]; 611 | 612 | #ifdef MEASURE_PERF 613 | key_schedule_start = rdtsc(); 614 | #endif 615 | 616 | /* Compute the subkeys */ 617 | Piccolo80table_key_schedule((const u8*)keys_in, subkeys); 618 | 619 | #ifdef MEASURE_PERF 620 | key_schedule_end = rdtsc(); 621 | #endif 622 | 623 | #ifdef MEASURE_PERF 624 | encrypt_start = rdtsc(); 625 | #endif 626 | 627 | /* Call the core encryption */ 628 | Piccolo80table_core((const u8*)plaintext_in, subkeys, (u8*)ciphertext_out); 629 | 630 | #ifdef MEASURE_PERF 631 | encrypt_end = rdtsc(); 632 | #endif 633 | 634 | return; 635 | } 636 | #endif 637 | 638 | 639 | 640 | /****************************************************************************************************/ 641 | /* Piccolo128 key schedule + encryption */ 642 | #ifdef Piccolo128 643 | void Piccolo128table_cipher(const u64 plaintext_in[TABLE_P], const u16 keys_in[TABLE_P][KEY128], u64 ciphertext_out[TABLE_P]) 644 | { 645 | /* Key schedule: subkeys are of size 2*264 bytes */ 646 | u8 subkeys[TABLE_P * Piccolo128_SUBKEYS_SIZE]; 647 | 648 | #ifdef MEASURE_PERF 649 | key_schedule_start = rdtsc(); 650 | #endif 651 | /* Compute the subkeys */ 652 | Piccolo128table_key_schedule((const u8*)keys_in, subkeys); 653 | 654 | #ifdef MEASURE_PERF 655 | key_schedule_end = rdtsc(); 656 | #endif 657 | 658 | #ifdef MEASURE_PERF 659 | encrypt_start = rdtsc(); 660 | #endif 661 | 662 | /* Call the core encryption */ 663 | Piccolo128table_core((const u8*)plaintext_in, subkeys, (u8*)ciphertext_out); 664 | 665 | #ifdef MEASURE_PERF 666 | encrypt_end = rdtsc(); 667 | #endif 668 | 669 | return; 670 | } 671 | #endif 672 | 673 | #endif 674 | --------------------------------------------------------------------------------