├── .dockerignore ├── .editorconfig ├── .github └── workflows │ ├── linux.yml │ └── windows.yml ├── .gitignore ├── .gitmodules ├── CudaKeeloq.vcxproj ├── CudaKeeloq.vcxproj.filters ├── LICENSE ├── build.sh ├── dockerfile ├── examples ├── alphabet.bin ├── dictionary.bin └── dictionary.words ├── makefile ├── readme.md ├── run.sh └── src ├── algorithm ├── keeloq │ ├── keeloq_decryptor.h │ ├── keeloq_encrypted.h │ ├── keeloq_kernel.cu │ ├── keeloq_kernel.h │ ├── keeloq_kernel.inl │ ├── keeloq_kernel_input.cpp │ ├── keeloq_kernel_input.h │ ├── keeloq_learning_types.cpp │ ├── keeloq_learning_types.h │ ├── keeloq_single_result.cpp │ └── keeloq_single_result.h ├── multibase_digit.h ├── multibase_number.h └── multibase_system.h ├── bruteforce ├── bruteforce_config.cpp ├── bruteforce_config.h ├── bruteforce_filters.cpp ├── bruteforce_filters.h ├── bruteforce_pattern.cpp ├── bruteforce_pattern.h ├── bruteforce_round.cpp ├── bruteforce_round.h ├── bruteforce_type.cpp ├── bruteforce_type.h └── generators │ ├── generator_bruteforce.cpp │ ├── generator_bruteforce.h │ ├── generator_bruteforce_filtered_kernel.inl │ ├── generator_bruteforce_pattern_kernel.inl │ ├── generator_bruteforce_seed_kernel.inl │ ├── generator_bruteforce_simple_kernel.inl │ └── generator_kernel.cu ├── common.h ├── device ├── cuda_array.h ├── cuda_common.h ├── cuda_context.h ├── cuda_double_array.h ├── cuda_object.h ├── cuda_span.h └── cuda_vector.h ├── host ├── command_line_args.cpp ├── command_line_args.h ├── console.cpp ├── console.h ├── host_utils.cpp ├── host_utils.h └── timer.h ├── kernels └── kernel_result.h ├── main.cpp └── tests ├── test_all.h ├── test_alphabet.cpp ├── test_alphabet.h ├── test_benchmark.cpp ├── test_benchmark.h ├── test_console.cpp ├── test_console.h ├── test_filters.cpp ├── test_filters.h ├── test_keeloq.cpp ├── test_keeloq.h ├── test_kernel.cu ├── test_pattern.cpp └── test_pattern.h /.dockerignore: -------------------------------------------------------------------------------- 1 | x64/* -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_trailing_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | 14 | [makefile] 15 | indent_style = tab 16 | trim_trailing_whitespace = false -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | on: 3 | push: 4 | tags: 5 | - test-linux* 6 | - release-v* 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | packages: write 13 | contents: read 14 | 15 | steps: 16 | - id: actor 17 | uses: ASzc/change-string-case-action@v5 18 | with: 19 | string: ${{ github.actor }} 20 | 21 | - name: Checkout 22 | uses: actions/checkout@v3.3.0 23 | with: 24 | submodules: 'true' 25 | 26 | - name: Linux build using docker 27 | run: chmod +x build.sh && ./build.sh 28 | 29 | - name: Tag container 30 | run: docker tag cudakeeloq:local ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }} 31 | 32 | - name: Login to GitHub Container Registry 33 | uses: docker/login-action@v2 34 | with: 35 | registry: ghcr.io 36 | username: ${{ github.actor }} 37 | password: ${{ secrets.GITHUB_TOKEN }} 38 | 39 | - name: Docker Push to ghcr.io 40 | run: docker push ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }} 41 | 42 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | on: 3 | push: 4 | tags: 5 | - test-windows* 6 | - release-v* 7 | 8 | jobs: 9 | build: 10 | 11 | env: 12 | CUDA_VERSION_MAJOR: 12 13 | CUDA_VERSION_MINOR: 2 14 | CUDA_VERSION_PACTH: 0 15 | 16 | runs-on: windows-2022 17 | permissions: 18 | contents: write 19 | steps: 20 | 21 | - name: Setup CUDA (manual) 22 | env: 23 | CUDA_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}.${{env.CUDA_VERSION_PACTH}}" 24 | PKGS_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}" 25 | run: | 26 | echo "Downloading CUDA version: ${{env.CUDA_VERSION_STR}}" 27 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/CUDAVisualStudioIntegration.exe" -OutFile ${{RUNNER.TEMP}}\cuda-vs-installer.exe 28 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/nvcc.exe" -OutFile ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe 29 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/cudart.exe" -OutFile ${{RUNNER.TEMP}}\cuda-rt-installer.exe 30 | 31 | echo "Unpacking archives" 32 | 7z x ${{RUNNER.TEMP}}\cuda-vs-installer.exe extras\visual_studio_integration\MSBuildExtensions -o${{RUNNER.TEMP}}\cuda 33 | 7z x ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe -o${{RUNNER.TEMP}}\cuda 34 | 7z x ${{RUNNER.TEMP}}\cuda-rt-installer.exe -o${{RUNNER.TEMP}}\cuda 35 | 36 | echo "Exporting Environment variables for next steps" 37 | [Environment]::SetEnvironmentVariable("CUDA_PATH", "${{RUNNER.TEMP}}\cuda") 38 | 39 | Add-Content $env:GITHUB_ENV "CUDA_PATH=$env:CUDA_PATH" 40 | Add-Content $env:GITHUB_ENV "CUDA_PATH_V${{env.CUDA_VERSION_MAJOR}}_${{env.CUDA_VERSION_MINOR}}=$env:CUDA_PATH" 41 | Add-Content $env:GITHUB_PATH "$env:CUDA_PATH\bin" 42 | 43 | - name: CUDA check 44 | run: | 45 | echo "CUDA PATH: $env:CUDA_PATH" 46 | ${{env.CUDA_PATH}}\bin\nvcc.exe -V 47 | 48 | - name: Checkout 49 | uses: actions/checkout@v3.3.0 50 | with: 51 | submodules: 'true' 52 | 53 | - name: Setup MSBuild 54 | uses: microsoft/setup-msbuild@v1.3.1 55 | with: 56 | # vs-version: latest 57 | msbuild-architecture: "x64" 58 | 59 | 60 | - name: Build Application 61 | run: msbuild CudaKeeloq.vcxproj -t:Rebuild -p:Configuration=Release -p:Platform=x64 62 | 63 | - name: Create Artifacts 64 | run: | 65 | mkdir artifacts 66 | xcopy x64\Release\CudaKeeloq.exe artifacts\ 67 | xcopy .\examples artifacts\examples /E /H /I /C 68 | Compress-Archive -Path artifacts/* -DestinationPath artifacts/cudakeeloq.zip 69 | 70 | - name: Create Release 71 | uses: ncipollo/release-action@v1.12.0 72 | with: 73 | artifacts: artifacts/cudakeeloq.zip 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | x64/ 2 | *.user 3 | .vscode/ 4 | .vs/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cxxopts"] 2 | path = ThirdParty/cxxopts 3 | url = https://github.com/jarro2783/cxxopts.git 4 | [submodule "ThirdParty/cpp-terminal"] 5 | path = ThirdParty/cpp-terminal 6 | url = https://github.com/jupyter-xeus/cpp-terminal.git 7 | [submodule "ThirdParty/cxxopts"] 8 | path = ThirdParty/cxxopts 9 | url = https://github.com/jarro2783/cxxopts.git 10 | -------------------------------------------------------------------------------- /CudaKeeloq.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Profile 10 | x64 11 | 12 | 13 | Release 14 | x64 15 | 16 | 17 | 18 | {1CC91BE8-9FA0-4CC1-A597-9EBFBC8C21CA} 19 | CudaKeeloq 20 | 10.0 21 | 22 | 23 | 24 | Application 25 | true 26 | MultiByte 27 | v143 28 | 29 | 30 | Application 31 | false 32 | true 33 | MultiByte 34 | v143 35 | 36 | 37 | Application 38 | false 39 | true 40 | MultiByte 41 | v143 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | true 59 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty; 60 | 61 | 62 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty; 63 | 64 | 65 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty; 66 | 67 | 68 | 69 | Level3 70 | Disabled 71 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 72 | stdcpp17 73 | stdc17 74 | 75 | 76 | true 77 | Console 78 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 79 | libcmt.lib 80 | 81 | 82 | 64 83 | compute_80,sm_80 84 | 85 | 86 | true 87 | false 88 | false 89 | compile 90 | false 91 | --std=c++17 %(AdditionalOptions) 92 | 93 | 94 | 95 | 96 | Level3 97 | MaxSpeed 98 | true 99 | true 100 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 101 | stdcpp17 102 | stdc17 103 | 104 | 105 | true 106 | true 107 | true 108 | Console 109 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 110 | libcmt.lib 111 | 112 | 113 | 64 114 | true 115 | 116 | 117 | compute_80,sm_80 118 | false 119 | false 120 | --std=c++17 %(AdditionalOptions) 121 | 122 | 123 | 124 | 125 | Level3 126 | MaxSpeed 127 | true 128 | true 129 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 130 | stdcpp17 131 | stdc17 132 | 133 | 134 | true 135 | true 136 | true 137 | Console 138 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 139 | libcmt.lib 140 | 141 | 142 | 64 143 | true 144 | 145 | 146 | compute_80,sm_80 147 | true 148 | false 149 | --std=c++17 %(AdditionalOptions) 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /CudaKeeloq.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | tests 7 | 8 | 9 | tests 10 | 11 | 12 | host 13 | 14 | 15 | host 16 | 17 | 18 | device 19 | 20 | 21 | device 22 | 23 | 24 | device 25 | 26 | 27 | device 28 | 29 | 30 | bruteforce 31 | 32 | 33 | bruteforce 34 | 35 | 36 | bruteforce 37 | 38 | 39 | bruteforce 40 | 41 | 42 | bruteforce\generators 43 | 44 | 45 | algorithm\keeloq 46 | 47 | 48 | algorithm\keeloq 49 | 50 | 51 | algorithm\keeloq 52 | 53 | 54 | algorithm\keeloq 55 | 56 | 57 | algorithm\keeloq 58 | 59 | 60 | kernels 61 | 62 | 63 | algorithm\keeloq 64 | 65 | 66 | tests 67 | 68 | 69 | algorithm 70 | 71 | 72 | algorithm 73 | 74 | 75 | bruteforce 76 | 77 | 78 | algorithm 79 | 80 | 81 | device 82 | 83 | 84 | tests 85 | 86 | 87 | tests 88 | 89 | 90 | device 91 | 92 | 93 | tests 94 | 95 | 96 | host 97 | 98 | 99 | tests 100 | 101 | 102 | device 103 | 104 | 105 | host 106 | 107 | 108 | 109 | 110 | 111 | tests 112 | 113 | 114 | tests 115 | 116 | 117 | host 118 | 119 | 120 | host 121 | 122 | 123 | bruteforce 124 | 125 | 126 | bruteforce 127 | 128 | 129 | bruteforce 130 | 131 | 132 | bruteforce 133 | 134 | 135 | bruteforce\generators 136 | 137 | 138 | algorithm\keeloq 139 | 140 | 141 | algorithm\keeloq 142 | 143 | 144 | algorithm\keeloq 145 | 146 | 147 | bruteforce 148 | 149 | 150 | tests 151 | 152 | 153 | tests 154 | 155 | 156 | tests 157 | 158 | 159 | host 160 | 161 | 162 | tests 163 | 164 | 165 | 166 | 167 | {5526a2e2-4a02-4459-977a-dbdbc1265827} 168 | 169 | 170 | {076c7b6e-a941-47c2-9514-8616f16b61e8} 171 | 172 | 173 | {7d4a1a80-1bf1-48ba-95df-5c102fb9c8ca} 174 | 175 | 176 | {bc3686d0-8983-4b00-bc95-074a07972e2b} 177 | 178 | 179 | {366584ba-328c-44ba-85d8-08d370307c4f} 180 | 181 | 182 | {348c2a54-dd3e-4683-bc68-b9a5c9dfd9fa} 183 | 184 | 185 | {9e8a7676-0ad7-4e6a-b3ff-a61ddb6b9e0e} 186 | 187 | 188 | {8636e053-53f4-43e2-b4f2-6ea75d4a849e} 189 | 190 | 191 | 192 | 193 | bruteforce\generators 194 | 195 | 196 | tests 197 | 198 | 199 | algorithm\keeloq 200 | 201 | 202 | 203 | 204 | bruteforce\generators 205 | 206 | 207 | bruteforce\generators 208 | 209 | 210 | bruteforce\generators 211 | 212 | 213 | algorithm\keeloq 214 | 215 | 216 | 217 | 218 | bruteforce\generators 219 | 220 | 221 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 https://github.com/X-Stuff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CONTAINER="${CONTAINER:-cudakeeloq}" 4 | TAG="${TAG:-local}" 5 | 6 | docker build . -t $CONTAINER:$TAG 7 | 8 | -------------------------------------------------------------------------------- /dockerfile: -------------------------------------------------------------------------------- 1 | ARG CONFIGURATION="release" 2 | ARG CUDA_MAJOR=12 3 | ARG CUDA_MINOR=2 4 | ARG CUDA_PATCH=0 5 | 6 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-devel-ubuntu22.04 as builder 7 | ARG CONFIGURATION 8 | 9 | # 10 | WORKDIR /workspace 11 | 12 | # Copy sources 13 | COPY examples/ examples 14 | COPY src/ src 15 | COPY ThirdParty/ ThirdParty 16 | COPY makefile makefile 17 | 18 | # make 19 | RUN make $CONFIGURATION 20 | 21 | # runner 22 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-ubuntu22.04 23 | ARG CONFIGURATION 24 | 25 | RUN groupadd cuda && useradd -m -d /app -g cuda cuda 26 | USER cuda 27 | 28 | WORKDIR /app 29 | COPY --chown=cuda:cuda --from=builder /workspace/x64/$CONFIGURATION/bin /app/ 30 | ENTRYPOINT [ "/app/CudaKeeloq" ] 31 | CMD [ "--help" ] 32 | -------------------------------------------------------------------------------- /examples/alphabet.bin: -------------------------------------------------------------------------------- 1 | 01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/@#$THIS DUPLICATED BYTES WILL BE DELETED -------------------------------------------------------------------------------- /examples/dictionary.bin: -------------------------------------------------------------------------------- 1 | W¿OÖ‰6áø—(Ðt€ OáíÎtÙ_: HK( -------------------------------------------------------------------------------- /examples/dictionary.words: -------------------------------------------------------------------------------- 1 | 0xC0FFEE00DEAD6666 2 | 0b10101010101010101010101010101010101 3 | 9223372036854775807 4 | 0xDEAD6666C0FFEE00:1234567 5 | 9223372036854775807:31337 6 | 0b10101010101010101010101010101010101:0987654321 -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # THIS MAKE FILE IS SUPPOSED TO BE USED IN WITH DOCKER IMAGE nvidia/cuda:12.0.1-devel 2 | 3 | TARGET_NAME=CudaKeeloq 4 | 5 | CUDA_ROOT_DIR=/usr/local/cuda 6 | 7 | ARCH=x64 8 | CONFIG_RELEASE=release 9 | CONFIG_PROFILE=profile 10 | CONFIG_DEBUG=debug 11 | 12 | # CC compiler options: 13 | CC=g++ 14 | CC_FLAGS=-std=c++17 -Wall 15 | CC_INCLUDE=-I./src/ -I./ThirdParty/ -I./ThirdParty/cpp-terminal 16 | 17 | NVCC=nvcc 18 | NVCC_FLAGS=--gpu-architecture=compute_80 --gpu-code=sm_80 --std=c++17 19 | NVCC_INCLUDE=-I./src/ 20 | 21 | # CUDA library directory: 22 | CUDA_LIB_DIR= -L$(CUDA_ROOT_DIR)/lib64 23 | # CUDA include directory: 24 | CUDA_INC_DIR= -I$(CUDA_ROOT_DIR)/include 25 | # CUDA linking libraries: 26 | CUDA_LINK_LIBS= -lcudart 27 | 28 | 29 | # Configurations, default debug 30 | all: debug 31 | @echo No target specified. Default is debug 32 | 33 | release: OBJ_DIR=./$(ARCH)/$(CONFIG_RELEASE)/obj 34 | release: EXE_DIR=./$(ARCH)/$(CONFIG_RELEASE)/bin 35 | release: NVCC_FLAGS+= -use_fast_math -O3 -Xptxas -O3 --m64 36 | release: CC_FLAGS+= -O3 -DNDEBUG 37 | release:link 38 | 39 | profile: OBJ_DIR=./$(ARCH)/$(CONFIG_PROFILE)/obj 40 | profile: EXE_DIR=./$(ARCH)/$(CONFIG_PROFILE)/bin 41 | profile: NVCC_FLAGS+= -lineinfo -use_fast_math 42 | profile: CC_FLAGS+= -DNDEBUG 43 | profile:link 44 | 45 | debug: OBJ_DIR=./$(ARCH)/$(CONFIG_DEBUG)/obj 46 | debug: EXE_DIR=./$(ARCH)/$(CONFIG_DEBUG)/bin 47 | debug: NVCC_FLAGS+= -G 48 | debug: CC_FLAGS+= -D_DEBUG 49 | debug:link 50 | 51 | # Sources C++ 52 | CPP_FILES = $(shell find src/ -iname "*.cpp") 53 | CPP_OBJECTS = $(CPP_FILES:%.cpp=%.o) 54 | 55 | # Sources CUDA 56 | CUDA_FILES = $(shell find src/ -iname "*.cu") 57 | CUDA_OBJECTS = $(CUDA_FILES:%.cu=%.o) 58 | 59 | # Link 60 | link: $(CPP_OBJECTS) $(CUDA_OBJECTS) 61 | $(CC) $(CC_FLAGS) \ 62 | $(addprefix $(OBJ_DIR)/, $(notdir $(CPP_OBJECTS))) \ 63 | $(addprefix $(OBJ_DIR)/, $(notdir $(CUDA_OBJECTS))) \ 64 | -o $(EXE_DIR)/$(TARGET_NAME) $(CUDA_INC_DIR) $(CUDA_LIB_DIR) $(CUDA_LINK_LIBS) 65 | 66 | # Compile C++ 67 | $(CPP_OBJECTS): | mkdirs 68 | $(CC) $(CC_FLAGS) $(CC_INCLUDE) $(CUDA_INC_DIR) -c $(basename $@).cpp -o $(OBJ_DIR)/$(notdir $@) 69 | 70 | # Compile CUDA 71 | $(CUDA_OBJECTS): | mkdirs 72 | $(NVCC) $(NVCC_FLAGS) $(NVCC_INCLUDE) -c $(basename $@).cu -o $(OBJ_DIR)/$(notdir $@) 73 | 74 | # Prepare 75 | mkdirs: 76 | mkdir -p $(OBJ_DIR) 77 | mkdir -p $(EXE_DIR) 78 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Intro 2 | This is a CUDA accelerated simple bruteforcer of [KeeLoq](https://en.wikipedia.org/wiki/KeeLoq) algorithm. 3 | 4 | ## Disclaimer 5 | 6 | > 64-bit keeloq key is 18,446,744,073,709,551,615 possible combinations. 7 | EVEN! if your GPU will be able to calculate 1 billion keys in a second. 8 | You will need 18446744073709551615 / 1000000000 / 3600 / 24 / 365 = 584 YEARS! to brute a single key. 9 | > My laptop 3080Ti can do only 230 MKeys/s 10 | So it's practically impossible to use this application "as is" in real life attack. 11 | 12 | ## Version history 13 | 14 | * `0.1.2` 15 | - Fixed `dockerfile`, added `CMD` (issue: https://github.com/X-Stuff/CudaKeeloq/issues/4). 16 | - CUDA version updated to `12.2`` 17 | * `0.1.1` 18 | - Added seed bruteforce mode (issue: https://github.com/x-stuff/CudaKeeloq/issues/2). 19 | - Added support of specifying seed in a text dictionaries. 20 | - Fixed some minor bugs and internal refactoring. 21 | - Slightly improved performance (1-5%) 22 | * `0.1.0` 23 | - Initial public release. 24 | 25 | ## Capabilities 26 | 27 | * **Simple** (+1) bruteforce 28 | > Regular straightforward bruteforce where keys just incremented +1 29 | * **Filtered** (+1) bruteforce 30 | > Some basics filters for simple bruteforce, like "keys with more that 6 subsequent zeros". Filters may apply to `Include` and/or `Exclude` rules. e.g. you can exclude keys with all ASCII symbols. You may also use `Include` filter but its performance is incredibly bad, so I don't recommend this attack type at all. 31 | * **Alphabet** attack 32 | > You may specify exact set of bytes which should be used for generating keys during bruteforce. For example you may want to use only numbers, or only ascii symbols. 33 | * **Pattern** attack 34 | > This is like extended alphabet. You may specify individual set of bytes for each byte in 64bit key. For example you may want first byte be any value, but others be only numbers. 35 | * **Dictionary** attack 36 | > Manufacturer keys will be taken from input files. Supports binary and text modes. In text mode it's allowed to specify a seed also. 37 | 38 | ## Limitations 39 | 40 | * Doesn't support mixed bruteforce mode, when you need to bruteforce keys and seeds. You have to specify either `seed` or `key` if you want to brute `secure` or `faac` types. 41 | * Can't do binary dictionary window attack. If you want something similar, you can make 63 more files. Where each file's content will be shifted by 1-to-63 bit to the left. 42 | 43 | ## Build 44 | 45 | ### Windows 46 | #### Requirements 47 | 48 | * CUDA Toolkit v12.2.0 49 | - nvcc 50 | - cuda runtime 51 | - visual studio extension 52 | 53 | * Microsoft Visual Studio 2022 54 | 55 | #### Compiling 56 | Just open `.vcxproj` file and build it 57 | 58 | ### Linux 59 | #### Requirements 60 | * docker 61 | * NVIDIA Container [Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 62 | 63 | #### Compiling 64 | ``` 65 | $ ./build.sh 66 | ``` 67 | This will create a container `cudakeeloq:local` with compiled app 68 | 69 | Run the bruteforcer 70 | ``` 71 | $ ./run.sh 72 | ``` 73 | > NOTE: You may need to have CUDA docker extension installed in order to have `--gpus` command line argument works. 74 | 75 | ### Different CUDA Version 76 | 77 | #### Windows 78 | Open `.vcxproj` find and replace: 79 | * `CUDA 12.2.targets` with desired version 80 | * `CUDA 12.2.props` with disired version 81 | 82 | #### Linux 83 | Open `dockerfile` and change `CUDA_MAJOR`, `CUDA_MINOR` and `CUDA_PATCH` variables 84 | 85 | ## Run 86 | 87 | ### Requirements 88 | * NVidia GPU (1GB+ RAM) 89 | * RAM 1GB+ 90 | * **(Linux only)** installed CUDA docker extension `nvidia-container-toolkit` 91 | 92 | ### Examples 93 | 94 | #### Simple bruteforce 95 | 96 | ``` 97 | ./CudaKeeloq --inputs xx,yy,zz --mode=1 --start=0x9876543210 --count=1000000 98 | ``` 99 | - bruteforce of 1 million keys starting from `0x9876543210` 100 | 101 | #### Alphabet bruteforce 102 | 103 | ``` 104 | ./CudaKeeloq --inputs xx,yy,zz --mode=3 --learning-type=0 --alphabet=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33 105 | ``` 106 | Specified 2 alphabets - 2 attacks will be launched: 107 | - First will use file `examples/alphabet.bin` as alphabet source. 108 | - Second alphabet is provided via command line `10:20:30:AA:BB:CC:DD:EE:FF:02:33` 109 | 110 | #### Pattern bruteforce 111 | 112 | ``` 113 | ./CudaKeeloq --inputs xx,yy,zz --mode=4 --pattern=FF:11:*:*:AA-FF:01|10:00:FF,*:*:*:*:AB:CD:EF:00 114 | ``` 115 | Specified 2 patterns - 2 attacks will be launched: 116 | - First will check keys started (less significant bytes) from `..00FF`, then will be either `01` or `10` bytes, then bytes range `AA, AB, AC, AD ... FF`, then 2 any bytes [`0x00`:`0xFF`], and final 2 bytes will be `11` and `FF`. 117 | - Second has constant lower 32 bit value `ABCDEF00` and higher 32 bits will be bruted. 118 | 119 | #### Seed bruteforce 120 | 121 | ``` 122 | ./CudaKeeloq --inputs xx,yy,zz --mode=5 --start= 123 | ``` 124 | This will launch seed attack with specified manufacturer key: ``. If `--learning-type=` not specified - will try to check all learning types with seed - `FAAC` and `Secure` ( `Rev` version dropped intentionally since manufacturer key explicitly set ). You can specify start seed with `--seed=` but it's kind of useless. Check all 32-bit values is matter of minutes, and check will be forced to do all over the `uint32` range anyway. 125 | 126 | #### Dictionary bruteforce 127 | 128 | ``` 129 | ./CudaKeeloq --inputs xx,yy,zz --mode=0 --word-dict=0xFDE4531BBACAD12,examples/dictionary.words --bin-dict=examples/dictionary.bin --bin-dict-mode=1 130 | ``` 131 | This will launch 2 dictionary attacks: 132 | 1. Explicit key `0xFDE4531BBACAD12` from command line and parsed keys from `examples/dictionary.words` 133 | 2. Keys created from bytes of file `examples/dictionary.bin`. Since `--bin-dict-mode=1` specified, read 8-bytes from file will be reversed. So the bytes `01 00 00 00 ...` will become `uint64` number with value `1`. 134 | 135 | Other arguments for this mode aren't used. 136 | 137 | ## Command line arguments 138 | 139 | ### Inputs 140 | 141 | * `--inputs=[i1, i2, i3]` - inputs are captured "over the air" bytes, you need to provide 1-3 in format of hexadecimal number: `0x1122334455667788`. 142 | > NOTE: It is possible to launch bruteforce over the single captured data, but there will be tons of false matches. You shouldn't use it. Even with 2 inputs chances are pretty high. 143 | 144 | #### Bruteforce range 145 | 146 | * `--start=` - defines the initial value from which bruteforce begins. Applies to all types except dictionary ( default is `0` ). 147 | For alphabet or pattern types, should be specified value which can be converted to pattern or alphabet. e.g. 148 | if you use alphabet `77:88:FF:AA:BB` and specify `--start=0x778899FFAABBAABB` - bruteforce will start from `0x7788`**`77`**`FFAABBAABB` since `99` is not exist in alphabet it will be replace with the first byte in alphabet. 149 | If you specify `0` as start value bruteforce will start from `0x77777..`. 150 | 151 | * `--count=` - number of keys to generate and check ( default is `uint64 max` - means all ). 152 | If you using simple +1 mode it will define the last key to check. In other mode determine the last key might be not trivial task. 153 | 154 | * `--seed=` - seed value. It used only in `SECURE` and `FAAC` learning modes. Providing `seed` without a learning mode will just significantly reduce bruteforce speed. 155 | If you definitely know that captured data encrypted with `seed`'ed algorithm - specify `--learning-type=4,5,8,9` (`SECURE` and `FAAC` both with `_rev` variation), no sense in that case to calculate the others. And vice versa, if you don't know the `seed` - do not specify it, otherwise - useless calculation would be done. 156 | Not supported in `dictionary` mode. 157 | 158 | 159 | #### Modes 160 | 161 | In case of `single` input - the match check will be done only by match 18-bits of `serial`. 162 | Keeloq OTA packet divided into 2 parts `fix` and `hop`. 163 | - `fix` - 4-bit encoded `button` and 28-bit `serial` number of transmitter 164 | - `hop` - encoded `serial`, `button` and `counter` 165 | So in single mode decoded `serial` will be matched to stripped `serial` from `fix` part. 166 | This is not accurate and gives you tons of *phantoms*. 167 | 168 | In case of `normal` inputs (2-3) - the analysis will be slightly more complex. 169 | - All inputs will be decode with same key 170 | - All decoded coded `hop` parts will be compared by `serial` 171 | - if `serial` match - then will be checked `button` - button should be the same 172 | - if `button` match - then will be checked `counter` - is should be increasing per each input 173 | 174 | 3 inputs is enough to eliminate *phantoms* no need to provide more (however there is still a possibility to catch one). 175 | 176 | 2 inputs might give you less accurate results with *phantoms*. 177 | 178 | Obviously `single` mode is 3-4x times faster than `normal` due to optimizations. However results in `single` mode might not be accurate. 179 | 180 | #### Capture 181 | 182 | The idea of normal flow is: 183 | - Setup your radio capture device. 184 | - Click same button 3 times on your transmitter (same serial, same button, increasing counter). 185 | - Convert encoded signal to bytes. 186 | - Provide these bytes as inputs. 187 | 188 | ### CUDA Setup 189 | 190 | * `--cuda-blocks=N` - `N` is a number of thread blocks for each bruteforce attack round 191 | * `--cuda-threads=N` - `N` is a number of CUDA threads for each block (above). If `0` (default) - the maximum from device caps will be set. 192 | 193 | Overall number of threads is multiplication of `cuda-blocks` and `cuda-threads`. 194 | 195 | Keep in mind that the more overall threads you will have - the more memory they will consume. (e.g. `8196` and `1024` consumes approx. 2.5 GB RAM (and GPU RAM)) 196 | 197 | ### Attack modes 198 | 199 | Each bruteforce run must define a mode. 200 | Several modes can be specified at the same time like `--mode=0,1,3,4` but you should provide all expected arguments for each mode. 201 | 202 | * `--mode=0` - Dictionary. Expects (one or both): 203 | - `--word-dict=[f1, w1, ...]` - `f1` - text dictionary file(s) with hexadecimal values, `w1` hexadecimal word itself. Supports seed also, should be provided with `:` as delimiter (e.g.: `0xAABBCCDDEE:1234`) and as decimal number only. 204 | - `--bin-dict=[f1, f2]` - binary file(s) each 8 bytes of which will be used as dictionary word. 205 | Supports `--bin-dict-mode=M` where `M` = { `0`, `1`, `2`}. `0` - as is (big endian). `1` - reverse (little endian), `2` - both. 206 | * `--mode=1` - Simple bruteforce mode. 207 | - `--start` defines the first key from which bruteforce begins. 208 | - `--count` how much keys to check. 209 | * `--mode=2` - Filtered bruteforce mode. Same as simple, but with filters: 210 | - `--exclude-filter=V` where `V` is number representing filers flags combinations. (see: [bruteforce_filters.h](src/bruteforce/bruteforce_filters.h)) 211 | - `--include-filter=V` (same as above) 212 | * `--mode=3` - Alphabet mode. Expects: 213 | - `--alphabet=[f1, a1, ...]` - where `f1` is a binary file contents of which will be interpreted as allowed bytes in key during bruteforce. where `a1` is `:` separated hex string of bytes in alphabet (like: `AA:BB:01`) 214 | - Also allowed to use `--start` and `--count` arguments, with same meaning 215 | * `--mode=4` - Pattern mode. Expects: 216 | - `--pattern=[f1, p1, ...]` - where `f1` is a text file with hexadecimal pattern, and `p1` is `:` separated hexadecimal pattern like `*:aa:00-33:0xF3|0x11:AL0 ...`. 217 | - `*` - means any byte 218 | - `0xNN-0xMM` - means range from `0xNN` to `0xMM` 219 | - `A|B|C` - means `A` or `B` or `C` 220 | - `AL[0..N]` - means alphabet from inputs (must be specified with `--alphabet=`) 221 | - `--alphabet=` - if pattern has `AL` - alphabet must be specified. 222 | * `--mode=5` - Seed bruteforce mode. Expects: 223 | - `--start=` - Manufacturer key. This will be the only key to check. Instead of bruteforcing keys, this mode bruteforce seeds. 224 | 225 | ### Learning types 226 | 227 | By default the app will try to brute all known learning keys (16 or 12 depending if seed is specified), however if you know exact learning type, you might increase you bruteforce time x12-16 times. You may also specify several learning types simultaneously `--learning-type=0,5,7`. 228 | > NOTE: Depending on your GPU, at some point, using `ALL` learning types *might be* faster than explicitly specified, due to the GPU branching problem. 229 | 230 | Each learning type has its `_REV` modification. That's mean it will use byte-reversed key for decryption. See more [keeloq_learning_types.h](src/algorithm/keeloq/keeloq_learning_types.h) 231 | 232 | Here and below `=x[y]` - `x` value for normal mode, `y` for reversed. 233 | 234 | * `--learning-type=0[1]` - Simple learning 235 | * `--learning-type=2[3]` - Normal learning 236 | * `--learning-type=4[5]` - Secure learning (requires seed) 237 | * `--learning-type=6[7]` - XOR learning 238 | * `--learning-type=8[9]` - FAAC learning (requires seed) 239 | * `--learning-type=10[11]` - *UNKNOWN TYPE1* 240 | * `--learning-type=12[13]` - *UNKNOWN TYPE2* 241 | * `--learning-type=14[15]` - *UNKNOWN TYPE3* 242 | 243 | ### Miscellaneous 244 | 245 | * `--first-match` - if `true` (default) will stop all bruteforce on first match 246 | * `--test` - launches internal debug tests (useful mostly if built in `Debug` configuration) 247 | * `--benchmark` - launches CUDA setup benchmark. Will show comparison of different CUDA setup (block and threads). 248 | * `--help`, `-h` - prints help 249 | 250 | 251 | ## Performance 252 | 253 | > Windows executable, release mode, MSVS 2022, CUDA 12.0.0 254 | 255 | For my laptop's GPU ( 3080Ti ) the best results with `8196` CUDA Blocks and maximum CUDA threads (from device info - `1024`) - it gives me approx.: 256 | * `28` MKeys/s for `ALL` learning types if `seed` **is** specified. 257 | * `49` MKeys/s for `ALL` learning types if `seed` is **not** provided. 258 | * `500` MKeys for `Simple` ( the easiest type single keeloq decryption ). 259 | * `250` MKeys for `Normal` and `Secure`. 260 | * `220` MKeys for `FAAC`. 261 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CONTAINER="${CONTAINER:-cudakeeloq}" 4 | TAG="${TAG:-local}" 5 | DOCKER_ARGS="${DOCKER_ARGS:-}" 6 | 7 | docker run --rm -it --init --gpus=all ${DOCKER_ARGS} $CONTAINER:$TAG $@ 8 | 9 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_decryptor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | #include "device/cuda_common.h" 8 | 9 | 10 | /** 11 | * Data struct which allows to decrypt encrypted data 12 | * In fact just (in most cases) just 64-bit integer (8 bytes array) 13 | */ 14 | struct Decryptor 15 | { 16 | Decryptor() = default; 17 | 18 | __host__ __device__ Decryptor(uint64_t k, uint32_t s) : key(k), key_seed(s), key_rev(misc::rev_bytes(key)) {} 19 | 20 | __host__ __device__ inline bool operator==(const Decryptor& other) 21 | { 22 | return key == other.key && key_seed == other.key_seed; 23 | } 24 | 25 | __host__ __device__ inline bool operator<(const Decryptor& other) 26 | { 27 | return key < other.key; 28 | } 29 | 30 | // Get manufacturer key 31 | __host__ __device__ inline uint64_t man() const { return key; } 32 | 33 | // Get seed 34 | __host__ __device__ inline uint32_t seed() const { return key_seed; } 35 | 36 | // Get byte-reversed manufacturer key 37 | __host__ __device__ inline uint64_t nam() const { return key_rev; } 38 | 39 | // If decryptor was initialized properly 40 | __host__ __device__ inline bool is_valid() const { return key != 0; } 41 | 42 | protected: 43 | 44 | // manufacturer key 45 | uint64_t key; 46 | 47 | // seed (for special learning types only) 48 | uint32_t key_seed; 49 | 50 | // reversed manufacturer key 51 | uint64_t key_rev; 52 | 53 | }; -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_encrypted.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "device/cuda_common.h" 6 | 7 | 8 | /** 9 | * struct for convenience 10 | * Represents sent over the air data 11 | * Since from engineering perspective normal byte (bit) order is big endian 12 | * In order to get fixed and hopping codes OTA has to be bit-reversed 13 | */ 14 | struct EncParcel 15 | { 16 | // Raw data transmitted over the air 17 | uint64_t ota; 18 | 19 | __host__ EncParcel() : EncParcel(0) { } 20 | 21 | __device__ __host__ EncParcel(uint64_t data) : ota(data) 22 | { 23 | uint64_t key = misc::rev_bits(ota, sizeof(ota) * 8); 24 | 25 | fixed = (uint32_t)(key >> 32); 26 | hopping = (uint32_t)(key); 27 | } 28 | 29 | // Fixed code in parcel 30 | __device__ __host__ inline uint32_t fix() const { return fixed; } 31 | 32 | // hopping code in parcel 33 | __device__ __host__ inline uint32_t hop() const { return hopping; } 34 | 35 | // first 18 bits of fixed code - serial (can be used in decryption) 36 | __device__ __host__ inline uint32_t srl() const { return fixed & 0x3FF; } 37 | 38 | // last 4 bits of fixed code - button (can be used in decryption) 39 | __device__ __host__ inline uint32_t btn() const { return fixed >> 28; } 40 | 41 | private: 42 | 43 | // Fixed part of the parcel ( 28-bit serial | 4-bit button ) 44 | uint32_t fixed; 45 | 46 | // Encrypted hopping code ( keeloq encrypted serial, button, counter ) 47 | uint32_t hopping; 48 | }; -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "keeloq_kernel.h" 2 | 3 | #include "keeloq_kernel.inl" 4 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_kernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "device/cuda_context.h" 6 | #include "kernels/kernel_result.h" 7 | 8 | #include "algorithm/keeloq/keeloq_kernel_input.h" 9 | #include "algorithm/keeloq/keeloq_learning_types.h" 10 | 11 | #include 12 | 13 | #define NLF_LOOKUP_CONSTANT 0x3a5c742e 14 | 15 | #ifdef NO_INNER_LOOPS 16 | #define KEELOQ_INNER_LOOP(ctx, index, num) uint32_t index = ctx.thread_id; 17 | #else 18 | #define KEELOQ_INNER_LOOP(ctx, index, num) CUDA_FOR_THREAD_ID(ctx, index, num) 19 | #endif // !NO_INNER_LOOPS 20 | 21 | 22 | 23 | #define bit(x, n) (((x) >> (n)) & 1) 24 | #define g5(x, a, b, c, d, e) \ 25 | (bit(x, a) + bit(x, b) * 2 + bit(x, c) * 4 + bit(x, d) * 8 + bit(x, e) * 16) 26 | 27 | // 0, 8, 19, 25, 30 == 0x42080101 28 | #define g5dec(x, g) \ 29 | auto m = (x & 0x42080101); \ 30 | g = (0b11111 & ( m | (m >> 7) | (m >> 17) | ( m >> 22) | (m >> 26))) 31 | 32 | 33 | __device__ __host__ inline uint32_t keeloq_common_decrypt_orig(const uint32_t data, const uint64_t key) { 34 | uint32_t x = data, r; 35 | for (r = 0; r < 528; r++) 36 | x = (x << 1) ^ bit(x, 31) ^ bit(x, 15) ^ (uint32_t)bit(key, (15 - r) & 63) ^ 37 | bit(NLF_LOOKUP_CONSTANT, g5(x, 0, 8, 19, 25, 30)); 38 | return x; 39 | } 40 | 41 | // This version like 5 times faster 42 | __device__ __host__ inline uint32_t keeloq_common_decrypt(const uint32_t data, const uint64_t key) 43 | { 44 | uint32_t x = data, g, k, f; 45 | int32_t r = 15; 46 | 47 | // outer 33 cycles 48 | for (uint8_t outer = 0; outer < 33; ++outer) 49 | { 50 | // Inner 16 cycles which could be unrolled (improves performance in release - decreases in debug) 51 | UNROLL 52 | for (uint8_t inner = 0; inner < 16; ++inner) 53 | { 54 | uint32_t key_bit = r & 0b111111; 55 | 56 | g5dec(x, g); 57 | 58 | k = (uint32_t)((key >> key_bit)); 59 | f = ((x >> 31) ^ (x >> 15) ^ (NLF_LOOKUP_CONSTANT >> g) ^ k) & 1; 60 | x = (x << 1) ^ f; 61 | 62 | --r; 63 | } 64 | } 65 | 66 | 67 | return x; 68 | } 69 | 70 | __device__ __host__ inline uint32_t keeloq_common_encrypt(const uint32_t data, const uint64_t key) { 71 | uint32_t x = data, r; 72 | for (r = 0; r < 528; r++) 73 | x = (x >> 1) ^ ((bit(x, 0) ^ bit(x, 16) ^ (uint32_t)bit(key, r & 63) ^ 74 | bit(NLF_LOOKUP_CONSTANT, g5(x, 1, 9, 20, 26, 31))) 75 | << 31); 76 | return x; 77 | } 78 | 79 | namespace keeloq 80 | { 81 | namespace kernels 82 | { 83 | 84 | // launch simple keeloq calculation on GPU to check if everything working 85 | __host__ bool cuda_is_working(); 86 | 87 | // Main kernel launcher wrapper 88 | __host__ KernelResult cuda_brute(KeeloqKernelInput & mainInputs, uint16_t ThreadBlocks, uint16_t ThreadsInBlock); 89 | 90 | } 91 | 92 | // Get enrcypted OTA data for specific configuration with key and learning ( xor simple and normal supported ) 93 | __host__ EncParcel GetOTA(uint64_t key, uint32_t seed, uint32_t serial, uint8_t button, uint16_t count, KeeloqLearningType::Type learning); 94 | 95 | } -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_kernel_input.cpp: -------------------------------------------------------------------------------- 1 | #include "keeloq_kernel_input.h" 2 | #include "common.h" 3 | 4 | void KeeloqKernelInput::WriteDecryptors(const std::vector& source, size_t from, size_t num) 5 | { 6 | if (decryptors != nullptr) 7 | { 8 | assert(config.type == BruteforceType::Dictionary); 9 | 10 | size_t copy_num = std::max(0, std::min(num, (source.size() - from))); 11 | decryptors->write(&source[from], copy_num); 12 | } 13 | } 14 | 15 | 16 | void KeeloqKernelInput::NextDecryptor() 17 | { 18 | assert(config.type != BruteforceType::Dictionary); 19 | config.next_decryptor(); 20 | } 21 | 22 | void KeeloqKernelInput::Initialize(const BruteforceConfig& InConfig, const KeeloqLearningType::Mask& InLearnings) 23 | { 24 | config = InConfig; 25 | learnings = InLearnings; 26 | allLearnings = learnings.is_all_enabled(); 27 | } 28 | 29 | void KeeloqKernelInput::BeforeGenerateDecryptors() 30 | { 31 | switch (config.type) 32 | { 33 | case BruteforceType::Filtered: 34 | { 35 | config.filters.sync_key = config.start.man(); 36 | break; 37 | } 38 | default: 39 | break; 40 | } 41 | } 42 | 43 | void KeeloqKernelInput::AfterGeneratedDecryptors() 44 | { 45 | // last generated decryptor - is first on next batch 46 | // Warning: In case of non-aligned calculations "real" last decryptor may be somewhere in the middle of array 47 | config.last = decryptors->host_last(); 48 | } 49 | 50 | size_t KeeloqKernelInput::NumInputs() const 51 | { 52 | assert(encdata != nullptr && "Encdata unknown yet!"); 53 | 54 | auto num = encdata ? encdata->host().num : 0; 55 | 56 | assert(num >= 1 && num <= 3 && "NumInputs(): Most probably something was wrong with memory copying!"); 57 | 58 | return num; 59 | } 60 | 61 | bool KeeloqKernelInput::InputsFixMatch() const 62 | { 63 | assert(encdata != nullptr && "Encdata unknown yet!"); 64 | 65 | if (encdata) 66 | { 67 | std::vector enc_data; 68 | encdata->copy(enc_data); 69 | 70 | assert(enc_data.size() >= 1 && enc_data.size() <= 3 && "InputsFixMatch(): Most probably something was wrong with memory copying!"); 71 | 72 | if (enc_data.size() > 2) 73 | { 74 | return enc_data[0].fix() == enc_data[1].fix() && enc_data[1].fix() == enc_data[2].fix(); 75 | } 76 | else if (enc_data.size() > 1) 77 | { 78 | return enc_data[0].fix() == enc_data[1].fix(); 79 | } 80 | } 81 | 82 | return false; 83 | } 84 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_kernel_input.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include // memcpy 6 | 7 | #include "device/cuda_array.h" 8 | #include "device/cuda_object.h" 9 | 10 | #include "algorithm/keeloq/keeloq_encrypted.h" 11 | #include "algorithm/keeloq/keeloq_decryptor.h" 12 | #include "algorithm/keeloq/keeloq_single_result.h" 13 | #include "algorithm/keeloq/keeloq_learning_types.h" 14 | 15 | #include "bruteforce/bruteforce_config.h" 16 | 17 | 18 | // Input data for main keeloq calculation kernel 19 | struct KeeloqKernelInput : TGenericGpuObject 20 | { 21 | // Constant per-run input data (captured encoded) 22 | CudaArray* encdata = nullptr; 23 | 24 | // Single-run set of decryptors 25 | CudaArray* decryptors = nullptr; 26 | 27 | // Single-run results 28 | CudaArray* results = nullptr; 29 | 30 | KeeloqKernelInput() : TGenericGpuObject(this) 31 | { 32 | } 33 | 34 | KeeloqKernelInput(KeeloqKernelInput&& other) noexcept : TGenericGpuObject(this) 35 | { 36 | encdata = other.encdata; 37 | decryptors = other.decryptors; 38 | results = other.results; 39 | config = other.config; 40 | learnings = other.learnings; 41 | } 42 | 43 | KeeloqKernelInput& operator=(KeeloqKernelInput&& other) = delete; 44 | KeeloqKernelInput& operator=(const KeeloqKernelInput& other) = delete; 45 | 46 | public: 47 | // 48 | __device__ __inline__ const KeeloqLearningType::Mask& GetLearningMask() const { return learnings; } 49 | 50 | // 51 | __device__ __inline__ bool AllLearningsEnabled() const { return allLearnings; } 52 | 53 | // 54 | __device__ __inline__ const BruteforceConfig& GetConfig() const { return config; } 55 | 56 | public: 57 | void WriteDecryptors(const std::vector& source, size_t from, size_t num); 58 | 59 | void NextDecryptor(); 60 | 61 | void Initialize(const BruteforceConfig& inConfig, const KeeloqLearningType::Mask& inLearnings); 62 | 63 | // A "callback" which is called by generator. Used to prepare inputs for generators 64 | void BeforeGenerateDecryptors(); 65 | 66 | // A "callback" which is called after generator creates Decryptors. Used to set correct last generated Decryptor 67 | void AfterGeneratedDecryptors(); 68 | 69 | // Get Number of OTA inputs. Will do a GPU->CPU copy 70 | size_t NumInputs() const; 71 | 72 | // Does the fixed parts of inputs match? Will do a GPU->CPU copy 73 | bool InputsFixMatch() const; 74 | private: 75 | // Which type of learning use for decryption 76 | KeeloqLearningType::Mask learnings; 77 | 78 | // optimizations. Just a bool field that could be accessed from GPU 79 | bool allLearnings = false; 80 | 81 | // from this decryptor generation will start 82 | BruteforceConfig config; 83 | }; 84 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_learning_types.cpp: -------------------------------------------------------------------------------- 1 | #include "keeloq_learning_types.h" 2 | 3 | #include 4 | 5 | 6 | const char* KeeloqLearningType::LearningNames[] = { 7 | "KEELOQ_LEARNING_SIMPLE", 8 | "KEELOQ_LEARNING_SIMPLE_REV", 9 | "KEELOQ_LEARNING_NORMAL", 10 | "KEELOQ_LEARNING_NORMAL_REV", 11 | "KEELOQ_LEARNING_SECURE", 12 | "KEELOQ_LEARNING_SECURE_REV", 13 | "KEELOQ_LEARNING_MAGIC_XOR_TYPE_1", 14 | "KEELOQ_LEARNING_MAGIC_XOR_TYPE_1_REV", 15 | "KEELOQ_LEARNING_FAAC", 16 | "KEELOQ_LEARNING_FAAC_REV", 17 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1", 18 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1_REV", 19 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2", 20 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2_REV", 21 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3", 22 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3_REV", 23 | "ALL" 24 | }; 25 | 26 | const size_t KeeloqLearningType::LearningNamesCount = sizeof(LearningNames) / sizeof(char*); 27 | 28 | 29 | std::string KeeloqLearningType::to_string(const std::vector& learning_types) 30 | { 31 | if (learning_types.size() == 0) 32 | { 33 | return LearningNames[KeeloqLearningType::LAST]; 34 | } 35 | 36 | return to_mask(learning_types).to_string(); 37 | } 38 | 39 | std::string KeeloqLearningType::Mask::to_string() const 40 | { 41 | if (is_all_enabled()) 42 | { 43 | return LearningNames[KeeloqLearningType::LAST]; 44 | } 45 | 46 | std::string result; 47 | for (auto type = 0; type < KeeloqLearningType::LAST; ++type) 48 | { 49 | if (values[type]) 50 | { 51 | if (result.size() > 0) 52 | { 53 | result += ", "; 54 | } 55 | result += KeeloqLearningType::Name(type); 56 | } 57 | } 58 | 59 | return result; 60 | } 61 | 62 | KeeloqLearningType::Mask KeeloqLearningType::to_mask(const std::vector& in_types) 63 | { 64 | KeeloqLearningType::Mask result; 65 | 66 | if (in_types.size() > 0) 67 | { 68 | for (auto type : in_types) 69 | { 70 | result.values[type] = true; 71 | } 72 | } 73 | else 74 | { 75 | memcpy(result.values, KeeloqLearningType::Mask::All, sizeof(KeeloqLearningType::Mask::All)); 76 | } 77 | 78 | return result; 79 | } 80 | 81 | bool KeeloqLearningType::Mask::is_all_enabled() const 82 | { 83 | return std::memcmp(values, All, sizeof(All)) == 0; 84 | } 85 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_learning_types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | enum class LearningDectyptionMode 12 | { 13 | // Explicit defined learning types 14 | Invalid = 0, 15 | 16 | Explicit = 1 << 0, 17 | 18 | Force = 1 << 1, 19 | 20 | Normal = 1 << 2, 21 | 22 | Seeded = 1 << 3, 23 | 24 | // Disable Reverse manufacturer key calculations 25 | NoRev = 1 << 4, 26 | 27 | // Run only learning types without seed 28 | ForceNormal = Force | Normal, 29 | 30 | // Run only learning types with seed 31 | ForceSeeded = Force | Seeded, 32 | 33 | // Explicit defined but without seed 34 | ExplicitNormal = Explicit | Normal, 35 | 36 | // Explicit defined but with seed only 37 | ExplicitSeeded = Explicit | Seeded, 38 | 39 | // RUNS ALL LEARNING TYPES. Seeded Included, even if seed is 0 40 | ForceAll = ForceNormal | ForceSeeded, 41 | 42 | // Runs runtime checks if learning type need to be calculated (specified via mask) 43 | ExplicitAll = ExplicitNormal | ExplicitSeeded 44 | }; 45 | 46 | /** 47 | * reference: https://github.com/DarkFlippers/unleashed-firmware/blob/dev/lib/subghz/protocols/keeloq_common.h 48 | */ 49 | struct KeeloqLearningType 50 | { 51 | using Type = uint8_t; 52 | 53 | enum : Type 54 | { 55 | Simple = 0, 56 | Simple_Rev, 57 | 58 | Normal, 59 | Normal_Rev, 60 | 61 | Secure, 62 | Secure_Rev, 63 | 64 | Xor, 65 | Xor_Rev, 66 | 67 | Faac, 68 | Faac_Rev, 69 | 70 | Serial1, 71 | Serial1_Rev, 72 | 73 | Serial2, 74 | Serial2_Rev, 75 | 76 | Serial3, 77 | Serial3_Rev, 78 | 79 | LAST, 80 | 81 | INVALID = 0xff, 82 | }; 83 | 84 | struct Mask 85 | { 86 | friend struct KeeloqLearningType; 87 | 88 | // Default mask when all learning types are enabled 89 | static constexpr Type All[LAST] = { true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true }; 90 | 91 | __host__ __device__ __inline__ bool operator[](uint8_t index) const { return values[index]; } 92 | 93 | void set(uint8_t index, bool is_enabled) { values[index] = is_enabled; } 94 | 95 | bool is_all_enabled() const; 96 | 97 | std::string to_string() const; 98 | 99 | private: 100 | uint8_t values[LAST] = { 0 }; 101 | }; 102 | 103 | public: 104 | 105 | static std::string to_string(const std::vector& learning_types); 106 | 107 | static Mask to_mask(const std::vector& in_types); 108 | static Mask full_mask() { return to_mask({}); } 109 | 110 | static constexpr const char* ValueString(Type type) 111 | { 112 | 113 | constexpr const char* LUT[]{ 114 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 115 | "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 116 | "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", 117 | "30", "31", "32" 118 | }; 119 | 120 | return LUT[type]; 121 | } 122 | 123 | static constexpr const char* Name(Type type) 124 | { 125 | if (type >= LearningNamesCount) { 126 | return "INVALID"; 127 | } 128 | 129 | return LearningNames[type]; 130 | } 131 | 132 | private: 133 | 134 | static const char* LearningNames[]; 135 | 136 | static const size_t LearningNamesCount; 137 | }; 138 | -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_single_result.cpp: -------------------------------------------------------------------------------- 1 | #include "keeloq_single_result.h" 2 | 3 | #include 4 | 5 | #include "device/cuda_common.h" 6 | 7 | 8 | namespace 9 | { 10 | uint32_t SerialFromOTA(uint64_t ota) 11 | { 12 | return misc::rev_bits(ota, sizeof(ota) * 8) >> 32 & 0x0FFFFFFF; 13 | } 14 | } 15 | 16 | void SingleResult::DecryptedArray::print(uint8_t element, uint64_t ota, bool ismatch) const 17 | { 18 | printf("[%-40s] Btn:0x%X\tSerial:0x%X (0x%" PRIX32 ")\tCounter:0x%X\t%s\n", KeeloqLearningType::Name(element), 19 | (data[element] >> 28), // Button 20 | (data[element] >> 16) & 0x3ff, // Serial 21 | SerialFromOTA(ota), // Serial (OTA) 22 | data[element] & 0xFFFF, // Counter 23 | (ismatch ? "(MATCH)" : "")); 24 | } 25 | 26 | void SingleResult::DecryptedArray::print() const 27 | { 28 | for (uint8_t i = 0; i < ResultsCount; ++i) 29 | { 30 | print(i, -1, false); 31 | } 32 | } 33 | 34 | void SingleResult::print(bool onlymatch /* = true */) const 35 | { 36 | printf("Results (Input: 0x%" PRIX64 " - Man key: 0x%" PRIX64 " - Seed: %u )\n\n", 37 | encrypted.ota, decryptor.man(), decryptor.seed()); 38 | 39 | for (uint8_t i = 0; i < ResultsCount; ++i) 40 | { 41 | bool isMatch = match == i; 42 | if (!onlymatch) 43 | { 44 | decrypted.print(i, encrypted.ota, isMatch); 45 | } 46 | else if (isMatch) 47 | { 48 | decrypted.print(i, encrypted.ota, isMatch); 49 | } 50 | } 51 | printf("\n"); 52 | } -------------------------------------------------------------------------------- /src/algorithm/keeloq/keeloq_single_result.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | #include "algorithm/keeloq/keeloq_learning_types.h" 8 | #include "algorithm/keeloq/keeloq_encrypted.h" 9 | #include "algorithm/keeloq/keeloq_decryptor.h" 10 | 11 | 12 | /** 13 | * For each testing manufacturer key we retrieve this results 14 | * Depending on selected learning type you may have from 1 to 16 (now it the last) 15 | * decrypted results for further analysis 16 | */ 17 | struct SingleResult 18 | { 19 | static constexpr uint8_t ResultsCount = KeeloqLearningType::LAST; 20 | 21 | struct DecryptedArray 22 | { 23 | // fixed side array for every learning type 24 | // If is in global memory (common case) - use operator[] - though cache 25 | // If is thread local - use direct access 26 | uint32_t data[ResultsCount]; 27 | 28 | __host__ __device__ inline uint32_t operator[](uint32_t index) const 29 | { 30 | assert(index < ResultsCount && "Invalid index of decrypted data. Bigger than last element"); 31 | #if __CUDA_ARCH__ 32 | return __ldca(&data[index]); 33 | #else 34 | return data[index]; 35 | #endif 36 | } 37 | 38 | __host__ __device__ inline uint32_t srl(KeeloqLearningType::Type learning) const 39 | { 40 | return ((*this)[learning] >> 16) & 0x3ff; 41 | } 42 | 43 | __host__ __device__ inline uint32_t btn(KeeloqLearningType::Type learning) const 44 | { 45 | return ((*this)[learning] >> 28); 46 | } 47 | 48 | __host__ __device__ inline uint32_t cnt(KeeloqLearningType::Type learning) const 49 | { 50 | return ((*this)[learning]) & 0x0000FFFF; 51 | } 52 | 53 | void print(uint8_t element, uint64_t ota, bool ismatch) const; 54 | 55 | void print() const; 56 | }; 57 | 58 | 59 | // Input encrypted data 60 | EncParcel encrypted; 61 | 62 | // used manufacturer key and seed for this result 63 | Decryptor decryptor; 64 | 65 | // Decrypted values for each known learning type 66 | DecryptedArray decrypted; 67 | 68 | // Set by GPU after analysis if there was a match 69 | KeeloqLearningType::Type match; 70 | 71 | void print(bool onlymatch = true) const; 72 | }; -------------------------------------------------------------------------------- /src/algorithm/multibase_digit.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | 9 | /** 10 | * This struct represents single digit in multi-base system (each digit has own base) 11 | * Imagine this like cylinder with N elements on it 12 | * 13 | * This struct represents all possible variant for a number (byte_ in multi-base system (attack pattern) setup 14 | */ 15 | struct MultibaseDigit 16 | { 17 | template friend struct MultibaseSystem; 18 | 19 | // Creates digit config 20 | __host__ inline MultibaseDigit(const std::vector& numerals); 21 | 22 | public: 23 | // Return numeral by index 24 | __host__ __device__ inline uint8_t numeral(uint8_t in_index) const; 25 | 26 | // Cast from byte-255 value to Digit's config 27 | __host__ __device__ inline uint8_t cast(uint8_t value) const { return numeral(lookup(value)); } 28 | 29 | // return numeral index of the value 30 | // e.g. 31 | // num = { 0x04, 0xAB, 0xd7, 0x56 } 32 | // lookup(0xAB) returns 1 33 | // lookup(0x56) returns 3 34 | // lookup(0xFF) returns 0 (not found returns default) 35 | __host__ __device__ inline uint8_t lookup(uint8_t value) const { return lut[value]; } 36 | 37 | // return count of possible numerals for that digit 38 | __host__ __device__ inline uint8_t count() const { return size; } 39 | 40 | __host__ inline std::vector as_vector() const { return std::vector(&num[0], &num[0] + size); } 41 | 42 | __host__ inline std::string to_string() const; 43 | 44 | private: 45 | MultibaseDigit() : MultibaseDigit(DefaultByteArray<>::as_vector>()) 46 | { 47 | } 48 | 49 | // numeral values. it may be not just 0,1,2,3,4... 50 | // but for base 4 it may be: 0xA3, 0xCC, 0x01, 0x22 51 | uint8_t num[256] = { 0 }; 52 | 53 | // lookup table: 54 | // at index that equals numeral value there is a value which represents index in numerals 55 | // e.g. https://asciiflow.com/ 56 | // ┌───────────────────────┐ 57 | // ▲ │ 58 | // numerals = [ 0x03, 0x02, 0x01, 0x00, ... garbage. ] │ 59 | // ▲ │ 60 | // ┌──────────────────┘ │ 61 | // ▲ │ 62 | // lut = [ 0x03, 0x02, 0x01, 0x00, 0x00 ... 0x00] │ 63 | // ▲ │ 64 | // └──────────────────────────────────────────┘ 65 | // 66 | uint8_t lut[256] = { 0 }; 67 | 68 | // Actual size of numerals (the base if number representing by this digit) 69 | uint8_t size = 0; 70 | }; 71 | 72 | __host__ __device__ uint8_t MultibaseDigit::numeral(uint8_t in_index) const 73 | { 74 | assert(in_index < size); 75 | 76 | // Important for optimization purposes 77 | // WE ARE NOT USING (value % size) 78 | return num[in_index]; 79 | } 80 | 81 | __host__ inline MultibaseDigit::MultibaseDigit(const std::vector& numerals) 82 | { 83 | // incrementing in the loop, for the duplicate numerals cases 84 | size = 0; 85 | 86 | for (uint8_t i = 0; i < numerals.size(); ++i) 87 | { 88 | // Getting next numeral candidate 89 | uint8_t numeral_value = numerals[i]; 90 | 91 | // if there is 0 value (index) in the lookup table 92 | // that means `numeral_value` wasn't added to available values yet 93 | if (!lut[numeral_value]) 94 | { 95 | // putting index of `numeric_value` to the lut 96 | lut[numeral_value] = size; 97 | 98 | // setting the size-th numeral 99 | num[size] = numeral_value; 100 | 101 | // increasing the size 102 | ++size; 103 | } 104 | } 105 | 106 | assert(size > 0 && "Digit base should be at least 0"); 107 | } 108 | 109 | __host__ inline std::string MultibaseDigit::to_string() const 110 | { 111 | std::string hex; 112 | 113 | for (int i = 0; i < count(); ++i) 114 | { 115 | std::string fmt(i == 0 ? "%X" : ":%X"); 116 | hex += str::format(fmt, numeral(i)); 117 | } 118 | 119 | return hex; 120 | } 121 | -------------------------------------------------------------------------------- /src/algorithm/multibase_number.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | 9 | // 10 | union U64Number 11 | { 12 | uint64_t u64; 13 | 14 | uint8_t u8[8]; 15 | }; 16 | 17 | 18 | // 19 | struct MultibaseNumber 20 | { 21 | template friend struct MultibaseSystem; 22 | 23 | // 24 | __host__ __device__ uint64_t number() const { return value.u64; } 25 | 26 | private: 27 | 28 | U64Number value = {0}; 29 | 30 | U64Number indices = {0}; 31 | }; 32 | -------------------------------------------------------------------------------- /src/algorithm/multibase_system.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "algorithm/multibase_digit.h" 9 | #include "algorithm/multibase_number.h" 10 | 11 | 12 | 13 | /** 14 | * This represents a number system where each digit has it's one base 15 | * Imagine this as set of rolling cylinders, installed side-by-side. 16 | * This is stateless structure. It allows to do arithmetical calculations 17 | * and conversions. 18 | * 19 | * Since application of this struct is only byte bruteforce - the maximum supported base is 255 20 | * 21 | * e.g. NumDigits == 4 22 | * So there are 4 cylinders 23 | * For example 1st cylinder has base 2, second - 4, third - 6, fourth - 8 24 | * So the number 1 will be equal 0001 25 | * number 10 is equals 0012 (1 * 8) + 2 = | 0 | 0 | 1 | 2 | ) 26 | * number 100 is 0204 (12 * 8) + 4 = | 0 | 2 | 0 | 4 | ) 27 | * number 500 is 2224 (62 * 8) + 4 = | 2 | 2 | 2 | 4 | ) = ((10 * 6 + 2) * 8) + 4 = ((((2 * 4 + 2)) * 6 + 2) * 8) + 4 28 | * 29 | * Since this structure is pretty heavy 30 | * The idea is NOT allow non-const methods on device 31 | * You should have a single const reference on device. 32 | */ 33 | template 34 | struct MultibaseSystem 35 | { 36 | static_assert(NumDigits <= 8, "At the moment we only support 8 bytes numbers"); 37 | 38 | // For easier usage with aliased types 39 | constexpr static uint8_t DigitsNumber = NumDigits; 40 | 41 | /** 42 | * A generic case when all digits has different bases (pattern usage) 43 | */ 44 | __host__ MultibaseSystem(const std::vector>& numerals); 45 | 46 | /** 47 | * Use the same ByteDigit for every digit in this value (alphabet usage) 48 | */ 49 | __host__ MultibaseSystem(const MultibaseDigit& digit); 50 | 51 | /** 52 | * Default constructor where all Digits are Default - full range 0-255 53 | */ 54 | __host__ MultibaseSystem() : MultibaseSystem(MultibaseDigit()) { } 55 | 56 | // It's pretty heavy struct if you want clone it - constructor above 57 | // TODO: disable copy 58 | // MultibaseSystem(const MultibaseSystem& other) = delete; 59 | // MultibaseSystem& operator=(const MultibaseSystem& other) = delete; 60 | 61 | public: 62 | 63 | // count of all numbers in this system 64 | __host__ __device__ inline size_t invariants() const; 65 | 66 | // cast base10 number into number of this system 67 | __host__ __device__ inline MultibaseNumber cast(uint64_t input) const; 68 | 69 | // Adds @amount in base10 to the @target argument and returns it 70 | __host__ __device__ inline MultibaseNumber& increment(MultibaseNumber& target, uint64_t amount) const; 71 | 72 | // get digit config by its index 73 | __host__ __device__ inline const MultibaseDigit& get_config(uint8_t digit_index) const { assert(digit_index < NumDigits); return digits[digit_index]; } 74 | 75 | protected: 76 | 77 | // the digits 78 | MultibaseDigit digits[NumDigits]; 79 | }; 80 | 81 | // 82 | using Multibase8DigitsSystem = MultibaseSystem<8>; 83 | 84 | 85 | template 86 | __host__ __device__ size_t MultibaseSystem::invariants() const 87 | { 88 | size_t num = digits[0].size; 89 | 90 | UNROLL 91 | for (uint8_t i = 1; i < NumDigits; ++i) 92 | { 93 | num *= digits[i].size; 94 | } 95 | 96 | return num; 97 | } 98 | 99 | 100 | template 101 | __host__ __device__ MultibaseNumber MultibaseSystem::cast(uint64_t input) const 102 | { 103 | MultibaseNumber number; 104 | U64Number u64Input = { input }; 105 | 106 | UNROLL 107 | for (uint8_t i = 0; i < NumDigits; ++i) 108 | { 109 | number.indices.u8[i] = digits[i].lookup(u64Input.u8[i]); 110 | 111 | number.value.u8[i] = digits[i].numeral(number.indices.u8[i]); 112 | } 113 | 114 | return number; 115 | } 116 | 117 | template 118 | __host__ __device__ inline MultibaseNumber& MultibaseSystem::increment(MultibaseNumber& target, uint64_t amount) const 119 | { 120 | UNROLL 121 | for (uint8_t i = 0; i < NumDigits; ++i) 122 | { 123 | uint8_t index = target.indices.u8[i]; 124 | uint8_t size = digits[i].size; 125 | 126 | target.indices.u8[i] = static_cast((amount + index) % size); 127 | amount = (amount + index) / size; 128 | 129 | target.value.u8[i] = digits[i].numeral(target.indices.u8[i]); 130 | } 131 | 132 | // here base10value will contain overflow 133 | // not sure what to do with it 134 | 135 | return target; 136 | } 137 | 138 | 139 | template 140 | __host__ MultibaseSystem::MultibaseSystem(const std::vector>& numerals) 141 | { 142 | for (uint8_t i = 0; i < NumDigits; ++i) 143 | { 144 | if (i < numerals.size()) 145 | { 146 | digits[i] = MultibaseDigit(numerals[i]); 147 | } 148 | else 149 | { 150 | digits[i] = MultibaseDigit(); 151 | } 152 | } 153 | } 154 | 155 | template 156 | __host__ MultibaseSystem::MultibaseSystem(const MultibaseDigit& digit) 157 | { 158 | for (uint8_t i = 0; i < NumDigits; ++i) 159 | { 160 | digits[i] = digit; 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_config.cpp: -------------------------------------------------------------------------------- 1 | #include "bruteforce_config.h" 2 | #include "bruteforce_type.h" 3 | #include "bruteforce_filters.h" 4 | 5 | #include "algorithm/keeloq/keeloq_decryptor.h" 6 | 7 | 8 | BruteforceConfig BruteforceConfig::GetDictionary(std::vector&& dictionary) 9 | { 10 | BruteforceConfig result(Decryptor(0,0), BruteforceType::Dictionary, dictionary.size()); 11 | result.decryptors = std::move(dictionary); 12 | return result; 13 | }; 14 | 15 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size) 16 | { 17 | return BruteforceConfig(first, BruteforceType::Simple, size); 18 | } 19 | 20 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters) 21 | { 22 | BruteforceConfig result(first, BruteforceType::Filtered, size); 23 | result.filters = filters; 24 | return result; 25 | } 26 | 27 | BruteforceConfig BruteforceConfig::GetSeedBruteforce(Decryptor first) 28 | { 29 | return BruteforceConfig(first, BruteforceType::Seed, (uint32_t)-1); 30 | } 31 | 32 | BruteforceConfig BruteforceConfig::GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num) 33 | { 34 | auto result = GetPattern(first, BruteforcePattern(alphabet), num); 35 | result.type = BruteforceType::Alphabet; 36 | return result; 37 | } 38 | 39 | BruteforceConfig BruteforceConfig::GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num) 40 | { 41 | num = std::min(pattern.size() - 1, num); 42 | 43 | first = Decryptor(pattern.init(first.man()).number(), first.seed()); 44 | 45 | BruteforceConfig result(first, BruteforceType::Pattern, num); 46 | result.pattern = pattern; 47 | return result; 48 | } 49 | 50 | uint64_t BruteforceConfig::dict_size() const 51 | { 52 | if (type == BruteforceType::Dictionary) 53 | { 54 | return size; 55 | } 56 | return 0; 57 | } 58 | 59 | uint64_t BruteforceConfig::brute_size() const 60 | { 61 | if (type != BruteforceType::Dictionary) 62 | { 63 | return size; 64 | } 65 | return 0; 66 | } 67 | 68 | void BruteforceConfig::next_decryptor() 69 | { 70 | if (type != BruteforceType::Dictionary) 71 | { 72 | start = last; 73 | 74 | if (type == BruteforceType::Alphabet || type == BruteforceType::Pattern) 75 | { 76 | // +1 for these attacks cause next here is the last *checked* 77 | auto startnum = pattern.init(start.man()); 78 | start = Decryptor(pattern.next(startnum, 1).number(), start.seed()); 79 | } 80 | else if (type == BruteforceType::Simple || type == BruteforceType::Filtered) 81 | { 82 | start = Decryptor(start.man() + 1, start.seed()); 83 | } 84 | } 85 | } 86 | 87 | std::string BruteforceConfig::toString() const 88 | { 89 | const char* pGeneratorName = BruteforceType::Name(type); 90 | switch (type) 91 | { 92 | case BruteforceType::Simple: 93 | { 94 | return str::format("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX", 95 | pGeneratorName, start.man(), start.seed(), start.man() + brute_size()); 96 | } 97 | case BruteforceType::Filtered: 98 | { 99 | return str::format("Type: %s. Initial: 0x%llX (seed:%u). Brute count: %zd.\n\tFilters: %s", 100 | pGeneratorName, start.man(), start.seed(), brute_size(), filters.toString().c_str()); 101 | } 102 | case BruteforceType::Alphabet: 103 | case BruteforceType::Pattern: 104 | { 105 | MultibaseNumber begin = pattern.init(start.man()); 106 | MultibaseNumber end = pattern.next(begin, brute_size()); 107 | 108 | auto result = str::format("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX. (Count: %zd) All invariants: %zd", 109 | pGeneratorName, begin.number(), start.seed(), end.number(), brute_size(), pattern.size()); 110 | 111 | if (type == BruteforceType::Alphabet) 112 | { 113 | result += str::format("\n\tAlphabet: %s", pattern.bytes_variants(0).to_string().c_str()); 114 | } 115 | else 116 | { 117 | std::string pattern_string = pattern.to_string(true); 118 | result += str::format("\nPattern: %s", pattern_string.c_str()); 119 | } 120 | return result; 121 | } 122 | case BruteforceType::Dictionary: 123 | { 124 | return str::format("Type: %s. Words num: %zd", pGeneratorName, dict_size()); 125 | } 126 | case BruteforceType::Seed: 127 | { 128 | return str::format("Type: %s. Manufacturer key: 0x%llX Start Seed:%u", 129 | pGeneratorName, start.man(), start.seed()); 130 | } 131 | } 132 | return str::format("UNSUPPORTED Type (%d): %s", (int)type, pGeneratorName); 133 | } 134 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "algorithm/keeloq/keeloq_decryptor.h" 11 | 12 | #include "bruteforce/bruteforce_pattern.h" 13 | #include "bruteforce/bruteforce_filters.h" 14 | #include "bruteforce/bruteforce_type.h" 15 | #include "bruteforce/bruteforce_config.h" 16 | 17 | 18 | /** 19 | * Single run attack configuration 20 | * Run - selected type with specific parameters 21 | */ 22 | struct BruteforceConfig 23 | { 24 | // HOST SET. ONCE. Which generator to use. 25 | BruteforceType::Type type; 26 | 27 | // HOST SET. UPDATING. PER BATCH. Decryption batch (or decryptors generation) will start from this 28 | Decryptor start; 29 | 30 | // HOST SET. ONCE. How many generator rounds should be taken (in fact how many times CUDA kernel will be called) 31 | size_t size; 32 | 33 | // Dictionary - HOST SET. ONCE. 34 | // Brute - GPU SET. UPDATING. 35 | std::vector decryptors; 36 | 37 | // HOST SET. ONCE. for filtered type. 38 | BruteforceFilters filters; 39 | 40 | // HOST SET. ONCE. for pattern or alphabet type. (alphabet is just special case of pattern) 41 | BruteforcePattern pattern; 42 | 43 | // GPU SET. UPDATING. Last generated decryptor (will be initial for next block run) 44 | Decryptor last; 45 | 46 | public: 47 | 48 | BruteforceConfig() : BruteforceConfig(Decryptor(0, 0), BruteforceType::LAST, 0) 49 | { 50 | } 51 | 52 | public: 53 | 54 | static BruteforceConfig GetDictionary(std::vector&& dictionary); 55 | 56 | static BruteforceConfig GetBruteforce(Decryptor first, size_t size); 57 | 58 | static BruteforceConfig GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters); 59 | 60 | static BruteforceConfig GetSeedBruteforce(Decryptor first); 61 | 62 | static BruteforceConfig GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num = (size_t)-1); 63 | 64 | static BruteforceConfig GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num = (size_t)-1); 65 | 66 | public: 67 | 68 | uint64_t dict_size() const; 69 | 70 | uint64_t brute_size() const; 71 | 72 | std::string toString() const; 73 | 74 | void next_decryptor(); 75 | 76 | private: 77 | BruteforceConfig(Decryptor start, BruteforceType::Type t, size_t num) : 78 | type(t), start(start), size(num), decryptors(), filters(), pattern(), last(start) 79 | { 80 | } 81 | }; 82 | 83 | inline std::vector operator "" _b(const char* ascii, size_t num) 84 | { 85 | return std::vector(ascii, ascii + num); 86 | } -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_filters.cpp: -------------------------------------------------------------------------------- 1 | #include "bruteforce_filters.h" 2 | #include "common.h" 3 | 4 | #include 5 | #include 6 | 7 | 8 | const std::vector> BruteforceFilters::FilterNames = 9 | { 10 | { BruteforceFilters::Flags::None, "None" }, 11 | { BruteforceFilters::Flags::All, "All" }, 12 | 13 | { BruteforceFilters::Flags::Max6ZerosInARow, "6 zero bit in a row" }, 14 | { BruteforceFilters::Flags::Max6OnesInARow, "6 one bit in a row" }, 15 | 16 | { BruteforceFilters::Flags::BytesIncremental, "Incremental bytes pattern" }, 17 | { BruteforceFilters::Flags::BytesRepeat4, "4 same byte in a row" }, 18 | 19 | { BruteforceFilters::Flags::AsciiNumbers, "ASCII numbers" }, 20 | { BruteforceFilters::Flags::AsciiAlpha, "ASCII letters" }, 21 | { BruteforceFilters::Flags::AsciiSpecial, "ASCII special characters" }, 22 | }; 23 | 24 | 25 | std::string BruteforceFilters::toString(Flags::Type flags) const 26 | { 27 | if (flags == Flags::None) { return "None"; } 28 | if (flags == Flags::All) { return "All"; } 29 | 30 | std::string result; 31 | 32 | for (const auto& pair : BruteforceFilters::FilterNames) 33 | { 34 | auto check = (uint64_t)std::get<0>(pair); 35 | if (check != 0 && (check & (uint64_t)flags) == check) 36 | { 37 | result += std::get<1>(pair); 38 | result += " | "; 39 | } 40 | } 41 | if (result.size() > 0) 42 | { 43 | result.erase(result.end() - 3, result.end()); 44 | } 45 | return result; 46 | } 47 | 48 | std::string BruteforceFilters::toString() const 49 | { 50 | std::string include_str = toString(include); 51 | std::string exclude_str = toString(exclude); 52 | 53 | return "Include: '" + include_str + "'\tExclude: '" + exclude_str + "'"; 54 | } 55 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_filters.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | 12 | constexpr uint8_t KeySizeBytes = sizeof(uint64_t); 13 | 14 | constexpr uint8_t KeySizeBits = sizeof(uint64_t) * 8; 15 | 16 | /** 17 | * Filters for +1 bruteforce. 18 | * Will apply if `include(value) && !exclude(value)` 19 | * 20 | * Very little performance increase over simple +1 with exclude 21 | * The filters are not optimized quite good and have a lot of `if` blocks 22 | */ 23 | struct BruteforceFilters 24 | { 25 | struct Flags 26 | { 27 | using Type = uint64_t; 28 | 29 | enum : Type 30 | { 31 | // 32 | None = 0, 33 | 34 | // filter function return true if key has more than 6 consecutive 0 bits 35 | Max6ZerosInARow = (1 << 0), 36 | 37 | // filter function return true if key has more than 6 consecutive 1 bits 38 | Max6OnesInARow = (1 << 1), 39 | 40 | 41 | // filter function return true if key has patterns like 11:22:33:44.. or FF:EE:DD:CC 42 | // 6 bytes by default 43 | BytesIncremental = (1 << 5), 44 | 45 | // filter function return true if key has repeating patterns like xx11:11:11:11xx or xxAA:AA:AA:AAxx 46 | BytesRepeat4 = (1 << 6), 47 | 48 | // filter function return true if key consist from only ascii numbers 49 | AsciiNumbers = (1 << 11), 50 | 51 | // filter function return true if key consist from only letters 'a'-'z' 'A'-'Z' 52 | AsciiAlpha = (1 << 12), 53 | 54 | // filter function return true if key consist from ascii letters and numbers 55 | AsciiAlphaNum = AsciiAlpha | AsciiNumbers, 56 | 57 | // filter function return true if key consist from only ASCII special symbols like '^%#&* 58 | AsciiSpecial = (1 << 13), 59 | 60 | // filter function return true if key consist from only ASCII typed characters 61 | AsciiAny = AsciiAlphaNum | AsciiSpecial, 62 | 63 | // 64 | All = (uint64_t)-1, 65 | }; 66 | 67 | __host__ __device__ static inline bool HasAll(Type test, Type check) 68 | { 69 | return check == (test & check); 70 | } 71 | __host__ __device__ static inline bool HasAny(Type test, Type check) 72 | { 73 | return (test & check); 74 | } 75 | }; 76 | 77 | public: 78 | 79 | std::string toString(Flags::Type flags) const; 80 | 81 | std::string toString() const; 82 | 83 | // Return true if key pass current filters 84 | __host__ __device__ inline bool Pass(uint64_t key) const; 85 | 86 | __host__ __device__ inline static bool check_filters(uint64_t key, Flags::Type filter); 87 | 88 | private: 89 | 90 | __host__ __device__ static bool all_any_ascii(uint64_t key); 91 | 92 | __host__ __device__ static bool all_ascii_num(uint64_t key); 93 | 94 | __host__ __device__ static bool all_ascii_alpha(uint64_t key); 95 | 96 | __host__ __device__ static bool all_ascii_symbol(uint64_t key); 97 | 98 | template 99 | __host__ __device__ inline static bool all_min_max(uint64_t key); 100 | 101 | template 102 | __host__ __device__ inline static bool has_consecutive_bits(uint64_t key); 103 | 104 | template 105 | __host__ __device__ inline static bool has_consecutive_bytes(uint64_t key); 106 | 107 | template 108 | __host__ __device__ inline static bool has_incremental_pattern(uint64_t key); 109 | 110 | 111 | private: 112 | 113 | static const std::vector> FilterNames; 114 | 115 | public: 116 | // Filter for keys to include. 117 | // WARNING: 118 | // Could be executed INFINITELY LONG TIME 119 | // e.g. start: 0x00000000001 filter SmartFilterFlags::AsciiAny 120 | // it will took around trillions and trillions operations just to get to the first valid with simple +1 121 | // In case of specific input - use dictionary, pattern or alphabet 122 | Flags::Type include = Flags::All; 123 | 124 | // Filter for keys to exclude 125 | Flags::Type exclude = Flags::None; 126 | 127 | // A global for all CUDA threads uint64 value which is used for generating filtered keys 128 | // it is accessed via atomicAdd function. 129 | uint64_t sync_key = 0; 130 | }; 131 | 132 | 133 | template 134 | __host__ __device__ bool BruteforceFilters::all_min_max(uint64_t key) 135 | { 136 | // for logical AND start should be with true 137 | bool result = true; 138 | uint8_t* bPtrKey = (uint8_t*)&key; 139 | 140 | UNROLL 141 | for (uint8_t i = 0; i < KeySizeBytes; ++i) 142 | { 143 | // TODO: Some vector instruction here 144 | result &= bPtrKey[i] >= ValueMin && bPtrKey[i] <= ValueMax; 145 | } 146 | 147 | return result; 148 | } 149 | 150 | template 151 | __host__ __device__ bool BruteforceFilters::has_consecutive_bits(uint64_t key) 152 | { 153 | uint8_t result = false; 154 | uint64_t mask = (1 << MaxCount) - 1; 155 | 156 | key = bit ? key : ~key; 157 | 158 | UNROLL 159 | for (uint8_t i = 0; i < KeySizeBits; ++i) 160 | { 161 | // inverse - filter pass if no consecutive bits 162 | result |= (key & mask) == mask; 163 | key = key >> 1; 164 | } 165 | 166 | return result; 167 | } 168 | 169 | template 170 | __host__ __device__ bool BruteforceFilters::has_consecutive_bytes(uint64_t key) 171 | { 172 | // for logical OR start should be with false 173 | bool result = false; 174 | 175 | uint8_t index = 0; 176 | uint8_t* bPtrKey = (uint8_t*)&key; 177 | 178 | UNROLL 179 | for (uint8_t i = 1; i < KeySizeBytes; ++i) 180 | { 181 | bool equal = bPtrKey[i] == bPtrKey[index]; 182 | index = equal * index + (1 - equal) * i; 183 | 184 | result |= (i - index) >= (MaxCount - 1); 185 | } 186 | 187 | return result; 188 | } 189 | 190 | template 191 | __host__ __device__ bool BruteforceFilters::has_incremental_pattern(uint64_t key) 192 | { 193 | // for logical OR start should be with false 194 | bool result = false; 195 | 196 | uint8_t index = 0; 197 | uint8_t* bPtrKey = (uint8_t*)&key; 198 | 199 | UNROLL 200 | for (uint8_t i = 1; i < KeySizeBytes; ++i) 201 | { 202 | uint8_t deltaIndex = (i - index); 203 | 204 | #ifdef __CUDA_ARCH__ 205 | uint8_t asbDeltaValue = __sad(bPtrKey[i], bPtrKey[index], 0); 206 | #else 207 | uint8_t asbDeltaValue = abs(bPtrKey[i] - bPtrKey[index]); 208 | #endif 209 | 210 | bool match = asbDeltaValue == (0x11 * deltaIndex); 211 | 212 | index = match * index + (1 - match) * i; 213 | 214 | result |= deltaIndex >= (MaxCount - 1); 215 | } 216 | 217 | return result; 218 | } 219 | 220 | 221 | __host__ __device__ inline bool BruteforceFilters::check_filters(uint64_t key, Flags::Type filter) 222 | { 223 | bool key_has_any = false; 224 | 225 | // fastest should go first 226 | if (!key_has_any && Flags::HasAll(filter, Flags::AsciiAny)) 227 | { 228 | key_has_any |= all_any_ascii(key); 229 | } 230 | 231 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiNumbers)) 232 | { 233 | key_has_any |= all_ascii_num(key); 234 | } 235 | 236 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiAlpha)) 237 | { 238 | key_has_any |= all_ascii_alpha(key); 239 | } 240 | 241 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiSpecial)) 242 | { 243 | key_has_any |= all_ascii_symbol(key); 244 | } 245 | 246 | // 247 | if (!key_has_any && Flags::HasAny(filter, Flags::Max6OnesInARow)) 248 | { 249 | key_has_any |= has_consecutive_bits<1>(key); 250 | } 251 | 252 | if (!key_has_any && Flags::HasAny(filter, Flags::Max6ZerosInARow)) 253 | { 254 | key_has_any |= has_consecutive_bits<0>(key); 255 | } 256 | 257 | if (!key_has_any && Flags::HasAny(filter, Flags::BytesRepeat4)) 258 | { 259 | key_has_any |= has_consecutive_bytes(key); 260 | } 261 | 262 | if (!key_has_any && Flags::HasAny(filter, BruteforceFilters::Flags::BytesIncremental)) 263 | { 264 | key_has_any |= has_incremental_pattern(key); 265 | } 266 | 267 | return key_has_any; 268 | } 269 | 270 | __host__ __device__ inline bool BruteforceFilters::Pass(uint64_t key) const 271 | { 272 | bool pass = true; 273 | 274 | if (include != Flags::All && include != Flags::None) 275 | { 276 | // Include keys match patterns 277 | pass &= check_filters(key, include); 278 | } 279 | 280 | if (exclude != Flags::None && exclude != Flags::All) 281 | { 282 | // Exclude keys which match patterns 283 | pass &= !check_filters(key, exclude); 284 | } 285 | 286 | return pass; 287 | } 288 | 289 | 290 | __host__ __device__ inline bool BruteforceFilters::all_any_ascii(uint64_t key) 291 | { 292 | constexpr uint8_t value_min = '!'; 293 | constexpr uint8_t value_max = '~'; 294 | 295 | return all_min_max(key); 296 | } 297 | 298 | __host__ __device__ inline bool BruteforceFilters::all_ascii_num(uint64_t key) 299 | { 300 | constexpr uint8_t value_min = '0'; 301 | constexpr uint8_t value_max = '9'; 302 | 303 | return all_min_max(key); 304 | } 305 | 306 | __host__ __device__ inline bool BruteforceFilters::all_ascii_alpha(uint64_t key) 307 | { 308 | // for logical AND start should be with true 309 | bool result = true; 310 | uint8_t* bPtrKey = (uint8_t*)&key; 311 | 312 | UNROLL 313 | for (uint8_t i = 0; i < KeySizeBytes; ++i) 314 | { 315 | result &= (bPtrKey[i] >= 'a' && bPtrKey[i] <= 'z') || (bPtrKey[i] >= 'A' && bPtrKey[i] <= 'Z'); 316 | } 317 | 318 | return result; 319 | } 320 | 321 | __host__ __device__ inline bool BruteforceFilters::all_ascii_symbol(uint64_t key) 322 | { 323 | // for logical AND start should be with true 324 | bool result = true; 325 | uint8_t* bPtrKey = (uint8_t*)&key; 326 | 327 | UNROLL 328 | for (uint8_t i = 0; i < KeySizeBytes; ++i) 329 | { 330 | result &= 331 | (bPtrKey[i] >= '!' && bPtrKey[i] <= '/') || 332 | (bPtrKey[i] >= ':' && bPtrKey[i] <= '@') || 333 | (bPtrKey[i] >= '[' && bPtrKey[i] <= '`') || 334 | (bPtrKey[i] >= '{' && bPtrKey[i] <= '~'); 335 | } 336 | 337 | return result; 338 | } 339 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_pattern.cpp: -------------------------------------------------------------------------------- 1 | #include "bruteforce_pattern.h" 2 | 3 | #include 4 | 5 | 6 | namespace 7 | { 8 | std::vector split(const std::string& delim, std::string input) 9 | { 10 | std::vector result; 11 | 12 | auto delim_index = input.find(delim); 13 | while (delim_index != std::string::npos) 14 | { 15 | std::string part = input.substr(0, delim_index); 16 | 17 | result.push_back(part); 18 | 19 | input.erase(0, delim_index + delim.size()); 20 | 21 | delim_index = input.find(delim); 22 | } 23 | 24 | result.push_back(input); 25 | return result; 26 | } 27 | } 28 | 29 | 30 | BruteforcePattern::BruteforcePattern(std::vector>&& pattern_bytes, const std::string& pattern_string) 31 | : system(pattern_bytes), repr_string(pattern_string) 32 | { 33 | } 34 | 35 | 36 | BruteforcePattern::BruteforcePattern(const MultibaseDigit& same_bytes) 37 | : system(same_bytes), repr_string("N/A") 38 | { 39 | } 40 | 41 | __host__ std::string BruteforcePattern::to_string(bool extended) const 42 | { 43 | if (!extended) 44 | { 45 | return repr_string; 46 | } 47 | 48 | std::string result; 49 | 50 | for (int d = 0; d < PatternSystem::DigitsNumber; ++d) 51 | { 52 | const auto& digitConfig = system.get_config(d); 53 | 54 | result += str::format("\t 0%d: (", d); 55 | 56 | if (digitConfig.count() < 255) 57 | { 58 | for (int i = 0; i < digitConfig.count(); ++i) 59 | { 60 | result += str::format(i == 0 ? "%X" : ":%X", digitConfig.numeral(i)); 61 | } 62 | } 63 | else 64 | { 65 | result += " "; 66 | } 67 | 68 | result += ")\n"; 69 | } 70 | 71 | return "\"" + repr_string + "\": \n" + result; 72 | } 73 | 74 | bool BruteforcePattern::TryParseSingleByte(std::string text, uint8_t& out) 75 | { 76 | if (text.size() == 2 || (text.size() == 4 && text.rfind("0x", 0) == 0)) 77 | { 78 | auto value = (uint8_t)strtoul(text.c_str(), nullptr, 16); 79 | 80 | if (value != 0 || (text.rfind("00") != std::string::npos)) 81 | { 82 | out = value; 83 | return true; 84 | } 85 | } 86 | 87 | return false; 88 | } 89 | 90 | std::vector BruteforcePattern::TryParseRangeBytes(std::string text) 91 | { 92 | auto delimeter_index = text.find("-"); 93 | std::string from = text.substr(0, delimeter_index); 94 | std::string to = text.substr(delimeter_index + 1); 95 | 96 | uint8_t byte_from; 97 | uint8_t byte_to; 98 | 99 | if (!TryParseSingleByte(from, byte_from) || !TryParseSingleByte(to, byte_to)) 100 | { 101 | return { }; 102 | } 103 | 104 | if (byte_from > byte_to) 105 | { 106 | byte_from = std::exchange(byte_to, byte_from); 107 | } 108 | 109 | std::vector result(byte_to - byte_from + 1); 110 | for (uint8_t i = 0; i < result.size(); ++i) 111 | { 112 | result[i] = i + byte_from; 113 | } 114 | 115 | return result; 116 | } 117 | 118 | std::vector BruteforcePattern::ParseBytes(std::string text) 119 | { 120 | // easy - all 121 | if (text == "*") 122 | { 123 | return DefaultByteArray<>::as_vector>(); 124 | } 125 | 126 | // easy single byte 0xAA or AA 127 | uint8_t single_byte; 128 | if (TryParseSingleByte(text, single_byte)) 129 | { 130 | return { single_byte }; 131 | } 132 | 133 | // range bytes 134 | auto delimeter_index = text.find("-"); 135 | if (delimeter_index != std::string::npos) 136 | { 137 | return TryParseRangeBytes(text); 138 | } 139 | 140 | // set of bytes 141 | std::vector result; 142 | std::vector splitted = split("|", text); 143 | for (const auto& part : splitted) 144 | { 145 | uint8_t byte; 146 | if (TryParseSingleByte(part, byte)) 147 | { 148 | result.push_back(byte); 149 | } 150 | } 151 | 152 | return result; 153 | } 154 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_pattern.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "algorithm/multibase_system.h" 9 | #include "algorithm/multibase_number.h" 10 | 11 | 12 | using PatternValue = MultibaseNumber; 13 | using PatternSystem = MultibaseSystem; 14 | 15 | /** 16 | * In pattern mode "cylinders" (see multi-base system) are different sizes 17 | * Alphabet - 8 cylinders with same set of bytes 18 | * Pattern - 8 cylinders with different set of bytes 19 | */ 20 | struct BruteforcePattern 21 | { 22 | // How many bytes in this pattern - this basically represent bye-length of the bruteforce target 23 | // if bruteforce target is 64-bit key - this number should be 8 24 | static constexpr uint8_t BytesNumber = PatternSystem::DigitsNumber; 25 | 26 | BruteforcePattern() = default; 27 | 28 | // Create pattern from bytes. 29 | // LITTLE ENDIAN. pattern_bytes[0] is lowest byte in pattern. e.g. ( [ { 0x01 }, ... ] will ends as key 0x.....01 ) 30 | // Pattern string is just for reference 31 | BruteforcePattern(std::vector>&& pattern_bytes, const std::string& pattern_string = "N/A"); 32 | 33 | // Special case constructor when patter is the same for every digit in underlying system 34 | // However this is not how this class supposed to be constructed by public 35 | // user-code should use @BruteforceAlphabet instead in that case 36 | BruteforcePattern(const MultibaseDigit& same_bytes); 37 | 38 | public: 39 | 40 | // Initialize value for this pattern from 64-bit number 41 | __host__ __device__ inline PatternValue init(uint64_t begin) const; 42 | 43 | // Roll cylinders, increment @curr by @amount 44 | __host__ __device__ inline PatternValue next(const PatternValue& curr, uint64_t amount) const; 45 | 46 | // how many numbers are in this pattern 47 | __host__ __device__ inline size_t size() const { return system.invariants(); } 48 | 49 | // This pattern is fixed size (same as target attacking key size) 50 | // according to this pattern each byte can be only a specific value 51 | // with this method you can retrieve configuration of that byte 52 | __host__ __device__ inline const MultibaseDigit& bytes_variants(uint8_t index) const; 53 | 54 | public: 55 | 56 | __host__ std::string to_string(bool extended = false) const; 57 | 58 | public: 59 | // Convert possible single-byte pattern string to set of bytes 60 | // 0xDA -> single byte 61 | // 0x19-0x2A -> range 62 | // * -> full 63 | // 0x91;0x23 -> set of specific bytes 64 | static std::vector ParseBytes(std::string text); 65 | 66 | // tries to parse single byte value like 0xA1 or FF 67 | static bool TryParseSingleByte(std::string text, uint8_t& out); 68 | 69 | // tries to parse single byte value like 0xA1 or FF 70 | static std::vector TryParseRangeBytes(std::string text); 71 | 72 | protected: 73 | 74 | PatternSystem system; 75 | 76 | // **cannot and not designed** to be accessed on GPU 77 | std::string repr_string; 78 | }; 79 | 80 | __host__ __device__ inline PatternValue BruteforcePattern::init(uint64_t begin) const 81 | { 82 | return system.cast(begin); 83 | } 84 | 85 | __host__ __device__ inline PatternValue BruteforcePattern::next(const PatternValue& curr, uint64_t amount) const 86 | { 87 | MultibaseNumber result = curr; 88 | return system.increment(result, amount); 89 | } 90 | 91 | __host__ __device__ inline const MultibaseDigit& BruteforcePattern::bytes_variants(uint8_t index) const 92 | { 93 | assert(index < BytesNumber && "Invalid byte index, it's bigger that bytes count in this pattern"); 94 | return system.get_config(index); 95 | } 96 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_round.cpp: -------------------------------------------------------------------------------- 1 | #include "bruteforce_round.h" 2 | 3 | #include "common.h" 4 | #include "bruteforce_config.h" 5 | #include "kernels/kernel_result.h" 6 | #include "algorithm/keeloq/keeloq_encrypted.h" 7 | #include "algorithm/keeloq/keeloq_learning_types.h" 8 | #include "algorithm/keeloq/keeloq_single_result.h" 9 | 10 | #include 11 | 12 | 13 | BruteforceRound::BruteforceRound(const std::vector& enc, const BruteforceConfig& config, std::vector selected_learning, 14 | uint32_t blocks, uint32_t threads, uint32_t iterations) 15 | : encrypted_data(enc) 16 | { 17 | #if NO_INNER_LOOPS 18 | iterations = 1; 19 | #endif 20 | 21 | CUDASetup[0] = blocks; 22 | CUDASetup[1] = threads; 23 | CUDASetup[2] = iterations; 24 | 25 | num_decryptors_per_batch = iterations * threads * blocks; 26 | 27 | kernel_inputs.Initialize(config, KeeloqLearningType::to_mask(selected_learning)); 28 | } 29 | 30 | const std::vector& BruteforceRound::read_results_gpu() 31 | { 32 | kernel_inputs.results->copy(block_results); 33 | return block_results; 34 | } 35 | 36 | const std::vector& BruteforceRound::read_decryptors_gpu() 37 | { 38 | kernel_inputs.decryptors->copy(decryptors); 39 | return decryptors; 40 | } 41 | 42 | bool BruteforceRound::check_results(const KernelResult& result) 43 | { 44 | if (result.error < 0) 45 | { 46 | printf("Kernel fatal error: %d\n Round should be finished!\n", result.error); 47 | return true; 48 | } 49 | else if (result.error != 0) 50 | { 51 | printf("CUDA calculations num errors: %d\n", result.error); 52 | } 53 | 54 | if (result.value > 0) 55 | { 56 | auto& all_results = read_results_gpu(); 57 | 58 | printf("Matches count: %d\n", result.value); 59 | 60 | for (const auto& result : all_results) 61 | { 62 | if (result.match == KeeloqLearningType::INVALID) 63 | { 64 | continue; 65 | } 66 | 67 | result.print(); 68 | } 69 | 70 | return true; 71 | } 72 | 73 | return false; 74 | } 75 | 76 | size_t BruteforceRound::get_mem_size() const 77 | { 78 | assert(inited); 79 | return 80 | encrypted_data.size() * sizeof(EncParcel) + 81 | decryptors.size() * sizeof(Decryptor) + 82 | block_results.size() * sizeof(SingleResult); 83 | } 84 | 85 | size_t BruteforceRound::num_batches() const 86 | { 87 | assert(inited); 88 | if (Type() == BruteforceType::Dictionary) 89 | { 90 | uint8_t non_align = Config().dict_size() % keys_per_batch() == 0 ? 0 : 1; 91 | return Config().dict_size() / keys_per_batch() + non_align; 92 | } 93 | else 94 | { 95 | uint8_t non_align = Config().brute_size() % keys_per_batch() == 0 ? 0 : 1; 96 | return Config().brute_size() / keys_per_batch() + non_align; 97 | } 98 | } 99 | 100 | size_t BruteforceRound::keys_per_batch() const 101 | { 102 | assert(inited); 103 | return decryptors.size(); 104 | } 105 | 106 | size_t BruteforceRound::results_per_batch() const 107 | { 108 | assert(inited); 109 | return block_results.size(); 110 | } 111 | 112 | void BruteforceRound::Init() 113 | { 114 | if (!inited) 115 | { 116 | // allocated once. updated every run on GPU 117 | decryptors = std::vector(num_decryptors_per_batch); 118 | 119 | // allocated once. updated evert run on GPU. copied to CPU only if match found. 120 | block_results = std::vector(encrypted_data.size() * decryptors.size()); 121 | 122 | alloc(); 123 | 124 | inited = true; 125 | } 126 | } 127 | 128 | std::string BruteforceRound::to_string() const 129 | { 130 | assert(inited); 131 | 132 | return str::format("Setup:\n" 133 | "\tCUDA: Blocks:%u Threads:%u Iterations:%u\n" 134 | "\tEncrypted data size:%zd\n" 135 | "\tLearning type:%s\n" 136 | "\tResults per batch:%zd\n" 137 | "\tDecryptors per batch:%zd\n" 138 | "\tConfig: %s", 139 | CudaBlocks(), CudaThreads(), CudaThreadIterations(), 140 | encrypted_data.size(), kernel_inputs.GetLearningMask().to_string().c_str(), results_per_batch(), keys_per_batch(), Config().toString().c_str()); 141 | } 142 | 143 | 144 | void BruteforceRound::alloc() 145 | { 146 | // 147 | assert(kernel_inputs.encdata == nullptr && "Encrypted data already allocated on GPU"); 148 | assert(kernel_inputs.decryptors == nullptr && "Decryptors data already allocated on GPU"); 149 | assert(kernel_inputs.results == nullptr && "Results data already allocated on GPU"); 150 | 151 | // ALLOCATE ON GPU 152 | if (kernel_inputs.encdata == nullptr) 153 | { 154 | kernel_inputs.encdata = CudaArray::allocate(encrypted_data); 155 | } 156 | 157 | if (kernel_inputs.decryptors == nullptr) 158 | { 159 | kernel_inputs.decryptors = CudaArray::allocate(decryptors); 160 | } 161 | 162 | if (kernel_inputs.results == nullptr) 163 | { 164 | kernel_inputs.results = CudaArray::allocate(block_results); 165 | } 166 | } 167 | 168 | void BruteforceRound::free() 169 | { 170 | if (kernel_inputs.encdata != nullptr) 171 | { 172 | kernel_inputs.encdata->free(); 173 | kernel_inputs.encdata = nullptr; 174 | } 175 | 176 | if (kernel_inputs.decryptors != nullptr) 177 | { 178 | kernel_inputs.decryptors->free(); 179 | kernel_inputs.decryptors = nullptr; 180 | } 181 | 182 | if (kernel_inputs.results != nullptr) 183 | { 184 | kernel_inputs.results->free(); 185 | kernel_inputs.results = nullptr; 186 | } 187 | 188 | encrypted_data.clear(); 189 | decryptors.clear(); 190 | block_results.clear(); 191 | } 192 | -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_round.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "algorithm/keeloq/keeloq_learning_types.h" 9 | #include "algorithm/keeloq/keeloq_single_result.h" 10 | #include "algorithm/keeloq/keeloq_encrypted.h" 11 | #include "algorithm/keeloq/keeloq_kernel_input.h" 12 | 13 | #include "bruteforce/bruteforce_config.h" 14 | #include "kernels/kernel_result.h" 15 | 16 | 17 | /** 18 | * Round is a set of bruteforce batches 19 | * Each batch runs N thread 20 | * Each thread checks 1 or more decryptor 21 | * Each check is 1 or more (configured in args) keeloq learnings 22 | * 23 | * Typical is: 24 | * Via command line some rounds were created - e.g. Dictionary and Simple attacks 25 | * Each attack has N decryptors to check, though num blocks B, num threads T, and I iteration 26 | * This means `(1 BATCH size) = B * T * I` decryptors check 27 | * This means num of batches = `N / (1 BATCH size)` 28 | */ 29 | struct BruteforceRound 30 | { 31 | // Construct round struct without specific learning type (means use all learnings) 32 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, uint32_t blocks, uint32_t threads, uint32_t iterations) : 33 | BruteforceRound(data, gen, {}, blocks, threads, iterations) {} 34 | 35 | // Construct round struct with only one selected learning type 36 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, KeeloqLearningType::Type single_learning, 37 | uint32_t blocks, uint32_t threads, uint32_t iterations) : 38 | BruteforceRound(data, gen, std::vector { single_learning }, blocks, threads, iterations) {} 39 | 40 | // Standard constructor 41 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, std::vector selected_learning, 42 | uint32_t blocks, uint32_t threads, uint32_t iterations); 43 | 44 | ~BruteforceRound() 45 | { 46 | free(); 47 | } 48 | 49 | public: 50 | // Allocates memory 51 | void Init(); 52 | 53 | // Reads results data from GPU memory into internal container and returns const reference to it 54 | const std::vector& read_results_gpu(); 55 | 56 | // Reads decryptors data from GPU memory into internal container and returns const reference to it 57 | const std::vector& read_decryptors_gpu(); 58 | 59 | // Checks Kernel's results 60 | // Return true if Round should be finished 61 | bool check_results(const KernelResult& result); 62 | 63 | // Get allocated memory amount for data 64 | size_t get_mem_size() const; 65 | 66 | // How many batches in this round (basically total keys to check divides by number of keys in a batch) 67 | size_t num_batches() const; 68 | 69 | // How many calculated results are in a batch (if use 3 inputs - 3 x keys_per_batch) 70 | size_t results_per_batch() const; 71 | 72 | // How many keys to check in this batch 73 | size_t keys_per_batch() const; 74 | 75 | std::string to_string() const; 76 | 77 | public: 78 | inline uint32_t CudaBlocks() const { return CUDASetup[0]; } 79 | 80 | inline uint32_t CudaThreads() const { return CUDASetup[1]; } 81 | 82 | inline uint32_t CudaThreadIterations() const { return CUDASetup[2]; } 83 | 84 | inline const BruteforceConfig& Config() const { assert(inited); return kernel_inputs.GetConfig(); } 85 | 86 | inline BruteforceType::Type Type() const { assert(inited); return Config().type; } 87 | 88 | inline KeeloqKernelInput& Inputs() { assert(inited); return kernel_inputs; } 89 | 90 | private: 91 | 92 | void alloc(); 93 | 94 | void free(); 95 | 96 | private: 97 | 98 | bool inited = false; 99 | 100 | uint32_t num_decryptors_per_batch = 0; 101 | 102 | // 103 | KeeloqKernelInput kernel_inputs; 104 | 105 | // Constant per run 106 | std::vector encrypted_data; 107 | 108 | // could be pretty much data here 109 | std::vector decryptors; 110 | 111 | // could be pretty much data here 112 | std::vector block_results; 113 | 114 | uint32_t CUDASetup[3] = { 0 }; 115 | }; -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_type.cpp: -------------------------------------------------------------------------------- 1 | #include "bruteforce_type.h" 2 | 3 | 4 | const char* BruteforceType::GeneratorTypeName[] = { 5 | "Dictionary", 6 | "Simple", 7 | "Filtered", 8 | "Alphabet", 9 | "Pattern", 10 | "Seed" 11 | }; 12 | 13 | const size_t BruteforceType::GeneratorTypeNamesCount = sizeof(GeneratorTypeName) / sizeof(char*); 14 | 15 | const char* BruteforceType::Name(Type type) 16 | { 17 | if (type > GeneratorTypeNamesCount) 18 | { 19 | return "UNKNOWN"; 20 | } 21 | 22 | return GeneratorTypeName[type]; 23 | } -------------------------------------------------------------------------------- /src/bruteforce/bruteforce_type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | /** 6 | * Type of bruteforce attack 7 | */ 8 | struct BruteforceType 9 | { 10 | using Type = uint8_t; 11 | 12 | enum : Type 13 | { 14 | // Generation will be skipped 15 | Dictionary = 0, 16 | None = Dictionary, 17 | 18 | // Simple +1 generator (very fast in terms of generation of decryptors candidates) 19 | Simple, 20 | 21 | // Simple +1 bruteforce but with filters applied performance may degrade) 22 | Filtered, 23 | 24 | // Specify alphabet and brute over it 25 | Alphabet, 26 | 27 | // ASCII pattern like ?A:01:??:3?:* 28 | Pattern, 29 | 30 | // Simple +1 seed bruteforce. Since seed is 32bit value, seed bruteforce may be done in a acceptable amount of time 31 | Seed, 32 | 33 | // Not for usage 34 | LAST, 35 | }; 36 | 37 | static const char* Name(Type type); 38 | 39 | private: 40 | 41 | static const char* GeneratorTypeName[]; 42 | 43 | static const size_t GeneratorTypeNamesCount; 44 | }; 45 | 46 | -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce.cpp: -------------------------------------------------------------------------------- 1 | #include "generator_bruteforce.h" 2 | 3 | #include "algorithm/keeloq/keeloq_kernel_input.h" 4 | #include "kernels/kernel_result.h" 5 | 6 | 7 | int GeneratorBruteforce::PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads) 8 | { 9 | const BruteforceConfig& config = inputs.GetConfig(); 10 | KernelResult generator_results; 11 | 12 | inputs.BeforeGenerateDecryptors(); 13 | 14 | switch (config.type) 15 | { 16 | case BruteforceType::Simple: 17 | { 18 | GeneratorBruteforceSimple::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr()); 19 | break; 20 | } 21 | case BruteforceType::Seed: 22 | { 23 | GeneratorBruteforceSeed::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr()); 24 | break; 25 | } 26 | case BruteforceType::Filtered: 27 | { 28 | GeneratorBruteforceFiltered::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr()); 29 | break; 30 | } 31 | case BruteforceType::Pattern: 32 | case BruteforceType::Alphabet: 33 | { 34 | GeneratorBruteforcePattern::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr()); 35 | break; 36 | } 37 | case BruteforceType::Dictionary: 38 | { 39 | return 0; 40 | } 41 | default: 42 | 43 | printf("Error: Invalid bruteforce type: %d %s! Don't know how to generate decryptors!\n", 44 | (int)config.type, BruteforceType::Name(config.type)); 45 | return 0; 46 | } 47 | 48 | inputs.read(); // it will not cause underneath arrays copy 49 | generator_results.read(); 50 | 51 | inputs.AfterGeneratedDecryptors(); 52 | 53 | return generator_results.error; 54 | } 55 | -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | #include "algorithm/keeloq/keeloq_kernel_input.h" 8 | #include "kernels/kernel_result.h" 9 | 10 | 11 | /** 12 | * Declare new struct which represents a wrapper around cu implementation 13 | */ 14 | #define DECLARE_GENERATOR(name, ...) \ 15 | extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)(); \ 16 | __global__ void GENERATOR_KERNEL_NAME(name)(__VA_ARGS__); \ 17 | struct name : public IGenerator \ 18 | {\ 19 | typedef void(*func)(__VA_ARGS__); \ 20 | inline static func GetKernelFunctionPtr() { return (func)GENERATOR_KERNEL_GETTER_NAME(name)(); } \ 21 | }; 22 | 23 | 24 | template 25 | struct IGenerator 26 | { 27 | typedef void(*KernelFunc)(KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results); 28 | 29 | static inline void LaunchKernel(uint16_t blocks, uint16_t threads, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results) 30 | { 31 | void* args[] = { &input, &results }; 32 | 33 | auto* func = TSelf::GetKernelFunctionPtr(); 34 | 35 | auto error = cudaLaunchKernel((const void*)func, dim3(blocks), dim3(threads), args, 0, nullptr); 36 | CUDA_CHECK(error); 37 | } 38 | }; 39 | 40 | struct GeneratorBruteforce 41 | { 42 | // Checks type of used generator in inputs and launches kernel to generate next batch of decryptors 43 | // Decryptors are generated on GPU and stored in GPU memory 44 | static int PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads); 45 | }; 46 | 47 | 48 | // Extern cuda kernels - Implementation are in inl.file 49 | DECLARE_GENERATOR(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls); 50 | DECLARE_GENERATOR(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls); 51 | DECLARE_GENERATOR(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls); 52 | DECLARE_GENERATOR(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls); 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce_filtered_kernel.inl: -------------------------------------------------------------------------------- 1 | #include "device/cuda_context.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "kernels/kernel_result.h" 7 | #include "algorithm/keeloq/keeloq_kernel_input.h" 8 | #include "algorithm/keeloq/keeloq_decryptor.h" 9 | 10 | #include "bruteforce/bruteforce_type.h" 11 | #include "bruteforce/bruteforce_filters.h" 12 | 13 | template 14 | __device__ inline uint64_t RequestNewBlock(TPtr* from, uint32_t size) 15 | { 16 | return atomicAdd((unsigned long long int*)from, size); 17 | } 18 | 19 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls) 20 | { 21 | const BruteforceConfig& config = input->GetConfig(); 22 | 23 | assert(config.type == BruteforceType::Filtered); 24 | 25 | assert(config.start.man() > 0x100000000000 && "Starting key should be big enough to start bruteforcing. Consider pattern brute."); 26 | 27 | assert(config.filters.include != BruteforceFilters::Flags::None && "Include filter None is invalid - will lead to infinite loop!"); 28 | assert(config.filters.exclude != BruteforceFilters::Flags::All && "Exclude filter All is invalid - will lead to infinite loop!"); 29 | 30 | CudaContext ctx = CudaContext::Get(); 31 | 32 | const uint32_t seed = config.start.seed(); 33 | 34 | CudaArray& decryptors = *input->decryptors; 35 | size_t num_decryptors = decryptors.num; 36 | 37 | const BruteforceFilters& filters = config.filters; 38 | 39 | 40 | // if we need to generate 24 keys, and we have 64 threads, 40 last should do nothing 41 | uint8_t at_least_one = num_decryptors >= ctx.thread_id; 42 | 43 | // if we have to generate 75 keys with 64 threads, 11 threads should do +1 key generation 44 | size_t non_aligned = num_decryptors % ctx.thread_max; 45 | uint8_t additional_this_thread = non_aligned > 0 && non_aligned > ctx.thread_id; 46 | 47 | // decremental value how many keys should be generated by this thread 48 | uint32_t num_to_generate = static_cast(at_least_one * (num_decryptors / ctx.thread_max + additional_this_thread)); 49 | 50 | uint32_t block_size = 0; 51 | 52 | // Block (from first to num_to_generate) of keys to check 53 | uint64_t man_block_begin = 0; 54 | uint64_t man_block_end = 0; 55 | 56 | CUDA_FOR_THREAD_ID(ctx, write_index, num_decryptors) 57 | { 58 | bool written = false; 59 | 60 | do 61 | { 62 | if (block_size == 0) 63 | { 64 | // We have to acquire next block not more than we have to generate left 65 | // It's need to prevent situations like: 66 | // this thread need to generate 2 keys 67 | // but it has requested 100 to check 68 | // first to keys are valid and next 98 nobody will check, and thread cannot "return" it to unchecked pool 69 | // since check indication is just an atomic add operation 70 | block_size = num_to_generate; 71 | 72 | // get raw manufacturer key start index (number) which will be incremented and checked 73 | // for each thread. atomic instruction guaranties no overlapping key checks between threads 74 | // For example: 75 | // each thread should generate 16 keys 76 | // `next.man` is 123 right now, thread 1 do atomic add and get 77 | // start from 123, do 16 checks 78 | // `next.man` now 139, but thread 1 runs checks from 123 79 | // ... 80 | // thread 1 found 4 keys, so it should generate 12 more 81 | // `next.man` is 6383 on next iteration (other threads do jobs as well) 82 | // thread 1 adds 12 to `next.man` and get previous value 83 | // so now it starts check 12 keys from 6383 to 6395 84 | // 85 | man_block_begin = RequestNewBlock(&filters.sync_key, block_size); 86 | 87 | // TODO: Check how overflow behaves 88 | man_block_end = man_block_begin + block_size; 89 | } 90 | 91 | for (uint64_t key = man_block_begin; key < man_block_end; ++key) 92 | { 93 | if (filters.Pass(key)) 94 | { 95 | --num_to_generate; 96 | 97 | // next write should start testing keys from next key from current 98 | man_block_begin = key + 1; 99 | 100 | // break while loop 101 | written = true; 102 | 103 | decryptors[write_index] = Decryptor(key, seed); 104 | break; 105 | } 106 | } 107 | 108 | if (!written) 109 | { 110 | // request new block 111 | block_size = 0; 112 | } 113 | 114 | } while (!written); 115 | } 116 | } 117 | 118 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceFiltered); -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce_pattern_kernel.inl: -------------------------------------------------------------------------------- 1 | #include "device/cuda_context.h" 2 | 3 | #include "kernels/kernel_result.h" 4 | #include "algorithm/keeloq/keeloq_kernel_input.h" 5 | 6 | #include "bruteforce/bruteforce_pattern.h" 7 | #include "bruteforce/bruteforce_type.h" 8 | 9 | 10 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls) 11 | { 12 | const BruteforceConfig& config = input->GetConfig(); 13 | 14 | assert((config.type == BruteforceType::Alphabet) || (config.type == BruteforceType::Pattern)); 15 | 16 | CudaContext ctx = CudaContext::Get(); 17 | 18 | const BruteforcePattern& pattern = config.pattern; 19 | CudaArray& decryptors = *input->decryptors; 20 | 21 | // Imagine alphabet as rotating rings with letters on it 22 | // we have 8-bytes key so there will be 8 rings 23 | // bytes in the key show how much ring is rotated 24 | // and what 'letter' it should have. 25 | // Or also it can be considered as 8-digit N-based number 26 | MultibaseNumber start = pattern.init(config.start.man()); 27 | 28 | const uint32_t seed = config.start.seed(); 29 | 30 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num) 31 | { 32 | decryptors[decryptor_index] = Decryptor(pattern.next(start, decryptor_index).number(), seed); 33 | } 34 | } 35 | 36 | DEFINE_GENERATOR_GETTER(GeneratorBruteforcePattern); -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce_seed_kernel.inl: -------------------------------------------------------------------------------- 1 | #include "device/cuda_context.h" 2 | 3 | #include "kernels/kernel_result.h" 4 | #include "algorithm/keeloq/keeloq_kernel_input.h" 5 | #include "bruteforce/bruteforce_type.h" 6 | 7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls) 8 | { 9 | CudaContext ctx = CudaContext::Get(); 10 | 11 | assert(input->GetConfig().type == BruteforceType::Seed); 12 | 13 | const Decryptor& start = input->GetConfig().start; 14 | 15 | CudaArray& decryptors = *input->decryptors; 16 | 17 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num) 18 | { 19 | decryptors[decryptor_index] = Decryptor(start.man(), start.seed() + decryptor_index); 20 | } 21 | } 22 | 23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSeed); -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_bruteforce_simple_kernel.inl: -------------------------------------------------------------------------------- 1 | #include "device/cuda_context.h" 2 | 3 | #include "kernels/kernel_result.h" 4 | #include "algorithm/keeloq/keeloq_kernel_input.h" 5 | #include "bruteforce/bruteforce_type.h" 6 | 7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls) 8 | { 9 | CudaContext ctx = CudaContext::Get(); 10 | 11 | assert(input->GetConfig().type == BruteforceType::Simple); 12 | 13 | const Decryptor& start = input->GetConfig().start; 14 | 15 | CudaArray& decryptors = *input->decryptors; 16 | 17 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num) 18 | { 19 | decryptors[decryptor_index] = Decryptor(start.man() + decryptor_index, start.seed()); 20 | } 21 | } 22 | 23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSimple); -------------------------------------------------------------------------------- /src/bruteforce/generators/generator_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "generator_bruteforce_pattern_kernel.inl" 4 | #include "generator_bruteforce_filtered_kernel.inl" 5 | #include "generator_bruteforce_simple_kernel.inl" 6 | #include "generator_bruteforce_seed_kernel.inl" -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #define CUDA_CHECK(error) \ 11 | if (error != 0) { printf("\nASSERTION FAILED. CUDA ERROR!\n%s: %s\n", cudaGetErrorName((cudaError_t)error), cudaGetErrorString((cudaError_t)error)); }\ 12 | assert(error == 0) 13 | 14 | 15 | #define GENERATOR_KERNEL_NAME(name) \ 16 | Kernel_##name 17 | 18 | #define GENERATOR_KERNEL_GETTER_NAME(name) \ 19 | GetKernel_##name 20 | 21 | #define DEFINE_GENERATOR_KERNEL(name, ...) \ 22 | GENERATOR_KERNEL_NAME(name)(__VA_ARGS__) 23 | 24 | #define DEFINE_GENERATOR_GETTER(name) \ 25 | extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)() { return (void*)&Kernel_##name; } 26 | 27 | 28 | #if __CUDA_ARCH__ 29 | #define UNROLL #pragma unroll 30 | #else 31 | #define UNROLL 32 | #endif 33 | 34 | #ifndef NO_INNER_LOOPS 35 | #define NO_INNER_LOOPS 1 36 | #endif // !NO_INNER_LOOPS 37 | 38 | 39 | 40 | template 41 | struct DefaultByteArray 42 | { 43 | uint8_t element[NSize]; 44 | 45 | constexpr DefaultByteArray() : element() 46 | { 47 | for (uint8_t i = 0; i < NSize; ++i) 48 | { 49 | element[i] = i; 50 | } 51 | } 52 | 53 | template 54 | static inline Vector as_vector() 55 | { 56 | constexpr DefaultByteArray array = DefaultByteArray(); 57 | return Vector(&array.element[0], &array.element[0] + NSize); 58 | } 59 | }; 60 | 61 | 62 | namespace str 63 | { 64 | 65 | template 66 | inline String format(const String& format, Args ... args) 67 | { 68 | int size_s = snprintf(nullptr, 0, format.c_str(), args ...) + 1; // Extra space for '\0' 69 | if (size_s <= 0) 70 | { 71 | return {}; 72 | } 73 | 74 | auto size = static_cast(size_s); 75 | String result(size, '\0'); 76 | 77 | snprintf(&result[0], size, format.c_str(), args ...); 78 | result.pop_back(); // remove '\0' from the end 79 | 80 | return result; 81 | } 82 | 83 | } 84 | 85 | 86 | inline uint64_t operator "" _u64(const char* ascii, size_t num) 87 | { 88 | const uint8_t size = sizeof(uint64_t); 89 | 90 | uint64_t number = 0; 91 | uint8_t* pNumber = (uint8_t*)&number; 92 | 93 | for (uint8_t i = 0; i < size; ++i) 94 | { 95 | pNumber[size - i - 1] = num > i ? ascii[i] : 0; 96 | } 97 | 98 | return number; 99 | } -------------------------------------------------------------------------------- /src/device/cuda_array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | 9 | /** 10 | * A helper class for arrays allocated in GPU memory 11 | */ 12 | template 13 | struct CudaArray 14 | { 15 | using TCudaArray = CudaArray; 16 | 17 | T* CUDA_data; 18 | 19 | size_t num; 20 | 21 | __host__ __device__ inline T& operator[](uint32_t index) 22 | { 23 | assert(index < num && "Index is out of range in CudaArray"); 24 | return CUDA_data[index]; 25 | } 26 | __host__ __device__ inline const T& operator[](uint32_t index) const 27 | { 28 | assert(index < num && "Index is out of range in CudaArray"); 29 | return CUDA_data[index]; 30 | } 31 | 32 | // Frees all associated resources assuming this pointer is GPU address 33 | inline void free() 34 | { 35 | TCudaArray::free(this); 36 | } 37 | 38 | // Copies bytes from @source to this array 39 | // Assumes array was allocated on GPU, will not fail if num > size 40 | inline void write(const T* source, size_t num) 41 | { 42 | TCudaArray::write(this, source, num); 43 | } 44 | 45 | // Copies data from GPU to @target array 46 | inline size_t copy(std::vector& target) 47 | { 48 | return TCudaArray::copy(this, target); 49 | } 50 | 51 | // Copies self GPU object (without all underlying data in array) 52 | // into CPU memory 53 | inline TCudaArray host() 54 | { 55 | // thiscall should work even with invalid pointer 56 | return TCudaArray::host(this); 57 | } 58 | 59 | // Copies this pointer (which assumes to be GPU) to host and return copy of the last element 60 | inline T host_last() 61 | { 62 | // thiscall should work even with invalid pointer 63 | TCudaArray HOST_array = TCudaArray::host(this); 64 | return TCudaArray::read(HOST_array, HOST_array.num - 1); 65 | } 66 | 67 | public: 68 | static TCudaArray* allocate(const std::vector& source); 69 | static void free(TCudaArray* array); 70 | 71 | static TCudaArray host(const TCudaArray* device); 72 | 73 | static T read(const TCudaArray& HOST_Array, size_t index); 74 | 75 | static size_t copy(const TCudaArray* array, std::vector& target); 76 | 77 | static void write(TCudaArray* dest, const T* source, size_t num); 78 | }; 79 | 80 | template 81 | T CudaArray::read(const TCudaArray& HOST_Array, size_t index) 82 | { 83 | assert(index < HOST_Array.num); 84 | 85 | T result; 86 | auto error = cudaMemcpy(&result, &HOST_Array.CUDA_data[index], sizeof(T), cudaMemcpyDeviceToHost); 87 | CUDA_CHECK(error); 88 | 89 | return result; 90 | } 91 | 92 | template 93 | CudaArray* CudaArray::allocate(const std::vector& source) 94 | { 95 | // Allocate memory of vector itself (ptr + size_t == 16 bytes) 96 | CudaArray* result = nullptr; 97 | uint32_t error = cudaMalloc((void**) & result, sizeof(CudaArray)); 98 | CUDA_CHECK(error); 99 | 100 | // Write size_t - size of the data 101 | size_t source_size = source.size(); 102 | error = cudaMemcpy(&result->num, &source_size, sizeof(size_t), cudaMemcpyHostToDevice); 103 | CUDA_CHECK(error); 104 | 105 | // Device pointer of data (if available) 106 | T* data_ptr = nullptr; 107 | if (source_size > 0) 108 | { 109 | size_t allocated_bytes = sizeof(T) * source_size; 110 | 111 | // allocate data on device and copy from RAW 112 | error = cudaMalloc((void**)&data_ptr, allocated_bytes); 113 | CUDA_CHECK(error); 114 | 115 | error = cudaMemcpy(data_ptr, source.data(), allocated_bytes, cudaMemcpyHostToDevice); 116 | CUDA_CHECK(error); 117 | } 118 | 119 | // Write data pointer (null if no data) 120 | error = cudaMemcpy(&result->CUDA_data, &data_ptr, sizeof(T*), cudaMemcpyHostToDevice); 121 | CUDA_CHECK(error); 122 | 123 | return result; 124 | } 125 | 126 | template 127 | CudaArray CudaArray::host(const TCudaArray* device) 128 | { 129 | assert(device && "Invalid CUDA pointer"); 130 | TCudaArray HOST_dest = { 0 }; 131 | 132 | auto error = cudaMemcpy(&HOST_dest, device, sizeof(TCudaArray), cudaMemcpyDeviceToHost); 133 | CUDA_CHECK(error); 134 | 135 | return HOST_dest; 136 | } 137 | 138 | template 139 | void CudaArray::free(TCudaArray* array) 140 | { 141 | TCudaArray HOST_array = host(array); 142 | 143 | if (HOST_array.CUDA_data) 144 | { 145 | auto error = cudaFree(HOST_array.CUDA_data); 146 | CUDA_CHECK(error); 147 | } 148 | 149 | auto error = cudaFree(array); 150 | CUDA_CHECK(error); 151 | } 152 | 153 | template 154 | size_t CudaArray::copy(const TCudaArray* array, std::vector& target) 155 | { 156 | TCudaArray HOST_array = host(array); 157 | if (HOST_array.num > 0) 158 | { 159 | size_t allocated_bytes = HOST_array.num * sizeof(T); 160 | target.resize(HOST_array.num); 161 | 162 | auto error = cudaMemcpy((T*)target.data(), HOST_array.CUDA_data, allocated_bytes, cudaMemcpyDeviceToHost); 163 | CUDA_CHECK(error); 164 | } 165 | 166 | return HOST_array.num; 167 | } 168 | 169 | template 170 | void CudaArray::write(TCudaArray* dest, const T* source, size_t num) 171 | { 172 | TCudaArray HOST_dest = host(dest); 173 | 174 | assert(HOST_dest.CUDA_data && "CUDA Data wasn't allocated"); 175 | if (HOST_dest.CUDA_data) 176 | { 177 | auto copy_bytes = sizeof(T) * std::min(num, HOST_dest.num); 178 | 179 | auto error = cudaMemcpy(HOST_dest.CUDA_data, source, copy_bytes, cudaMemcpyHostToDevice); 180 | CUDA_CHECK(error); 181 | } 182 | } -------------------------------------------------------------------------------- /src/device/cuda_common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | 8 | namespace misc 9 | { 10 | 11 | // device-faster version of byte reversing function 12 | __host__ __device__ __forceinline__ uint64_t rev_bytes(uint64_t input) 13 | { 14 | #if __CUDA_ARCH__ 15 | uint32_t key_lo = input; 16 | uint32_t key_hi = input >> 32; 17 | 18 | constexpr uint32_t selector_0 = 0x4567; 19 | constexpr uint32_t selector_1 = 0x0123; 20 | 21 | uint32_t key_rev_lo = __byte_perm(key_lo, key_hi, selector_0); 22 | uint32_t key_rev_hi = __byte_perm(key_lo, key_hi, selector_1); 23 | 24 | return ((uint64_t)key_rev_hi << 32) | key_rev_lo; 25 | #else 26 | uint64_t input_rev = 0; 27 | uint64_t input_rev_byte = 0; 28 | for (uint8_t i = 0; i < 64; i += 8) 29 | { 30 | input_rev_byte = (uint8_t)(input >> i); 31 | input_rev = input_rev | input_rev_byte << (56 - i); 32 | } 33 | 34 | return input_rev; 35 | #endif 36 | } 37 | 38 | // Reverses amount of bits in @input 39 | __device__ __host__ __forceinline__ uint64_t rev_bits(uint64_t input, uint8_t rev_bit_count) 40 | { 41 | uint64_t reverse_key = 0; 42 | for (uint8_t i = 0; i < rev_bit_count; i++) 43 | { 44 | reverse_key = reverse_key << 1 | ((input >> i) & 1); 45 | } 46 | return reverse_key; 47 | } 48 | } -------------------------------------------------------------------------------- /src/device/cuda_context.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | 8 | // For loop macro 9 | // indexer @i Will be incremented by amount of all threads in the context 10 | #define CUDA_FOR_THREAD_ID(ctx, i, count) for(uint32_t i = ctx.thread_id; i < count; i += ctx.thread_max) 11 | 12 | // Custom struct of CUDA thread execution context 13 | struct CudaContext 14 | { 15 | // Maximum overall threads 16 | uint32_t thread_max; 17 | 18 | // Global thread id 19 | uint32_t thread_id; 20 | 21 | __host__ __device__ static inline CudaContext Get() 22 | { 23 | #if __CUDA_ARCH__ 24 | return CudaContext 25 | { 26 | gridDim.x * blockDim.x, 27 | blockIdx.x * blockDim.x + threadIdx.x 28 | }; 29 | #else 30 | assert(false && "CUDA context is not available on host"); 31 | return {}; 32 | #endif 33 | } 34 | }; -------------------------------------------------------------------------------- /src/device/cuda_double_array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | /** 8 | * This is convenience wrapper allows for easier array management and 9 | * copying between CPU and GPU (and vice versa). 10 | * 11 | * WARNING: 12 | * If this object is data owner (constructed without input data) - it will free memory in destructor 13 | * If this object is just pointer container - will not do anything destructive with pointers in destructor 14 | */ 15 | template 16 | struct DoubleArray 17 | { 18 | using TCUDAPtr = T*; 19 | using THOSTPtr = T*; 20 | 21 | THOSTPtr HOST_mem; 22 | TCUDAPtr CUDA_mem; 23 | 24 | size_t size; 25 | 26 | DoubleArray(size_t num, bool zeros = true) 27 | : hostOwner(true) 28 | { 29 | size = sizeof(T) * num; 30 | HOST_mem = (T*)malloc(size); 31 | 32 | uint32_t error = cudaMalloc(&CUDA_mem, size); 33 | CUDA_CHECK(error); 34 | 35 | if (zeros) 36 | { 37 | memset(HOST_mem, 0, size); 38 | cudaMemset(CUDA_mem, 0, size); 39 | } 40 | } 41 | 42 | DoubleArray(THOSTPtr array, size_t num) 43 | : hostOwner(false) 44 | { 45 | HOST_mem = array; 46 | size = sizeof(T) * num; 47 | 48 | cudaError error = cudaMalloc((void**) & CUDA_mem, size); 49 | CUDA_CHECK(error); 50 | 51 | error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice); 52 | CUDA_CHECK(error); 53 | } 54 | 55 | ~DoubleArray() 56 | { 57 | if (CUDA_mem) 58 | { 59 | cudaFree(CUDA_mem); 60 | CUDA_mem = nullptr; 61 | } 62 | 63 | if (HOST_mem && hostOwner) 64 | { 65 | free(HOST_mem); 66 | HOST_mem = nullptr; 67 | } 68 | } 69 | 70 | void write_GPU() 71 | { 72 | uint32_t error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice); 73 | CUDA_CHECK(error); 74 | } 75 | 76 | void read_GPU() 77 | { 78 | uint32_t error = cudaMemcpy(HOST_mem, CUDA_mem, size, cudaMemcpyDeviceToHost); 79 | CUDA_CHECK(error); 80 | } 81 | 82 | private: 83 | bool hostOwner; 84 | }; -------------------------------------------------------------------------------- /src/device/cuda_object.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | 8 | template 9 | struct CudaObject 10 | { 11 | TTarget* CUDA_Ptr; 12 | TTarget* HOST_Ptr; 13 | 14 | CudaObject(TTarget* source) 15 | { 16 | CUDA_Ptr = nullptr; 17 | HOST_Ptr = source; 18 | } 19 | CudaObject(CudaObject&& other) = delete; 20 | 21 | CudaObject(const CudaObject& other) = delete; 22 | CudaObject& operator =(const CudaObject& other) = delete; 23 | 24 | ~CudaObject() 25 | { 26 | if (CUDA_Ptr) 27 | { 28 | uint32_t error = cudaFree(CUDA_Ptr); 29 | CUDA_CHECK(error); 30 | CUDA_Ptr = nullptr; 31 | } 32 | 33 | HOST_Ptr = nullptr; 34 | } 35 | 36 | TTarget* ptr(bool sync = true) 37 | { 38 | if (HOST_Ptr == nullptr) 39 | { 40 | assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU"); 41 | return nullptr; 42 | } 43 | 44 | if (CUDA_Ptr == nullptr) 45 | { 46 | auto error = cudaMalloc((void**)&CUDA_Ptr, sizeof(TTarget)); 47 | CUDA_CHECK(error); 48 | } 49 | 50 | if (CUDA_Ptr && sync) 51 | { 52 | auto error = cudaMemcpy(CUDA_Ptr, HOST_Ptr, sizeof(TTarget), cudaMemcpyHostToDevice); 53 | CUDA_CHECK(error); 54 | } 55 | 56 | return CUDA_Ptr; 57 | } 58 | 59 | void read() 60 | { 61 | if (HOST_Ptr == nullptr) 62 | { 63 | assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU"); 64 | return; 65 | } 66 | 67 | if (CUDA_Ptr) 68 | { 69 | auto error = cudaMemcpy(HOST_Ptr, CUDA_Ptr, sizeof(TTarget), cudaMemcpyDeviceToHost); 70 | CUDA_CHECK(error); 71 | } 72 | } 73 | }; 74 | 75 | // Self owned GPU object 76 | template 77 | struct TGenericGpuObject 78 | { 79 | using TCudaPtr = T*; 80 | 81 | TGenericGpuObject(T* Self) : SelfGpu(Self) { 82 | } 83 | 84 | TGenericGpuObject(const TGenericGpuObject& other) = delete; 85 | TGenericGpuObject& operator =(const TGenericGpuObject& other) = delete; 86 | 87 | virtual TCudaPtr ptr() 88 | { 89 | return SelfGpu.ptr(); 90 | } 91 | 92 | void read() 93 | { 94 | SelfGpu.read(); 95 | } 96 | 97 | protected: 98 | 99 | CudaObject SelfGpu; 100 | }; -------------------------------------------------------------------------------- /src/device/cuda_span.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | 9 | /** 10 | * Something like C# span 11 | * Memory view 12 | */ 13 | template 14 | struct Span 15 | { 16 | __host__ __device__ Span(T* ptr, uint32_t num) : data(ptr), size(num) { } 17 | 18 | __host__ __device__ inline T& operator[](uint32_t index) 19 | { 20 | assert(index < size && "Index is out of range in Span"); 21 | 22 | #if __CUDA_ARCH__ 23 | return __ldca(&data[index]); 24 | #else 25 | return data[index]; 26 | #endif 27 | } 28 | 29 | __host__ __device__ inline const T& operator[](uint32_t index) const 30 | { 31 | assert(index < size && "Index is out of range in Span"); 32 | #if __CUDA_ARCH__ 33 | return __ldca(&data[index]); 34 | #else 35 | return data[index]; 36 | #endif 37 | } 38 | 39 | // Number of elements in Span 40 | __host__ __device__ inline uint32_t num() const { return size; } 41 | 42 | // fall back if T is not simple type 43 | __host__ __device__ typename std::enable_if, T&>::type __ldca(T* ptr) { return *ptr; } 44 | __host__ __device__ typename std::enable_if, const T&>::type __ldca(T* ptr) const { return *ptr; } 45 | 46 | private: 47 | 48 | T* data; 49 | 50 | uint32_t size; 51 | }; -------------------------------------------------------------------------------- /src/device/cuda_vector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "device/cuda_array.h" 6 | 7 | #include 8 | #include 9 | 10 | 11 | /** 12 | * Host owned, GPU copy, vector 13 | * Can be considered as wrapper around host vector 14 | * Owns GPU data, frees on destructor 15 | */ 16 | template 17 | struct CudaVector 18 | { 19 | // Explicit construct using initializer list 20 | CudaVector(std::initializer_list&& initializer) : 21 | cpu_vector(std::move(initializer)), 22 | gpu_array(nullptr) 23 | { 24 | } 25 | 26 | // Forward construction to vector 27 | template 28 | CudaVector(Args&&... args) : 29 | cpu_vector(std::forward(args)...), 30 | gpu_array(nullptr) 31 | { 32 | } 33 | 34 | ~CudaVector(); 35 | 36 | // CPU memory is immutable since it should be 37 | // synchronized with gpu 38 | const std::vector& cpu() const { return cpu_vector; } 39 | 40 | // Size of CPU vector (gpu will be the same size once allocated) 41 | const size_t size() const { return cpu_vector.size(); } 42 | 43 | // Reads GPU pointer and copies data from GPU memory to CPU 44 | void read(); 45 | 46 | // Pointer to GPU array 47 | // Will allocate (and copy) by default 48 | CudaArray* gpu(bool allocate = true); 49 | 50 | private: 51 | 52 | std::vector cpu_vector; 53 | 54 | CudaArray* gpu_array; 55 | }; 56 | 57 | template void CudaVector::read() 58 | { 59 | assert(gpu_array); 60 | if (gpu_array) 61 | { 62 | gpu_array->copy(cpu_vector); 63 | } 64 | } 65 | 66 | template CudaArray* CudaVector::gpu(bool allocate /*= true*/) 67 | { 68 | if (gpu_array || !allocate) 69 | { 70 | return gpu_array; 71 | } 72 | 73 | gpu_array = CudaArray::allocate(cpu_vector); 74 | return gpu_array; 75 | } 76 | 77 | template CudaVector::~CudaVector() 78 | { 79 | if (gpu_array != nullptr) 80 | { 81 | gpu_array->free(); 82 | gpu_array = nullptr; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/host/command_line_args.cpp: -------------------------------------------------------------------------------- 1 | #include "command_line_args.h" 2 | 3 | #include 4 | 5 | #include "host/host_utils.h" 6 | #include "host/console.h" 7 | 8 | //#define CXXOPTS_NO_EXCEPTIONS 9 | #include "cxxopts/include/cxxopts.hpp" 10 | 11 | 12 | namespace 13 | { 14 | 15 | inline void read_alphabets(CommandLineArgs& target, cxxopts::ParseResult& result) 16 | { 17 | if (result.count(ARG_ALPHABET) == 0) 18 | { 19 | // no alphabets available 20 | return; 21 | } 22 | 23 | const auto& alphabet_args = result[ARG_ALPHABET].as>(); 24 | 25 | for (const auto& alphabet_arg : alphabet_args) 26 | { 27 | auto alphabet_bytes = host::utils::read_alphabet_binary_file(alphabet_arg.c_str()); 28 | if (alphabet_bytes.size() > 0) 29 | { 30 | target.alphabets.emplace_back(alphabet_bytes); 31 | } 32 | else 33 | { 34 | // "61:ab:00:33..." -> "61,ab,00,33..." 35 | std::string alphabet_bytes_hex = alphabet_arg; 36 | std::replace(alphabet_bytes_hex.begin(), alphabet_bytes_hex.end(), ':', ','); 37 | 38 | // ["61","ab","00","33"] 39 | std::vector alphabet_hex; 40 | cxxopts::values::parse_value(alphabet_bytes_hex, alphabet_hex); 41 | 42 | // ["61","ab","00","33"] -> [0x61, 0xAB, 0x00, 0x33] 43 | std::vector alphabet_bytes; 44 | alphabet_bytes.reserve(alphabet_hex.size()); 45 | 46 | for (const auto& hex : alphabet_hex) 47 | { 48 | auto value = (uint8_t)strtoul(hex.c_str(), nullptr, 16); 49 | if (value != 0 && hex != "00") 50 | { 51 | alphabet_bytes.push_back(value); 52 | } 53 | else 54 | { 55 | printf("Error: cannot parse alphabet byte '%s'!\n", hex.c_str()); 56 | } 57 | } 58 | 59 | // 60 | if (alphabet_bytes.size() > 0) 61 | { 62 | target.alphabets.emplace_back(alphabet_bytes); 63 | 64 | if (alphabet_bytes.size() > 256) 65 | { 66 | printf("Warning: Alphabet: '%s' has to much bytes in hex string: %zd (should be less than 257)\n", 67 | result[ARG_ALPHABET].as().c_str(), alphabet_bytes.size()); 68 | } 69 | } 70 | else 71 | { 72 | printf("Error: Alphabet: '%s' is not valid file, neither valid alphabet hex string (like: AA:11:b3...)!\n", 73 | alphabet_arg.c_str()); 74 | } 75 | } 76 | } 77 | } 78 | 79 | inline void parse_dictionary_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 80 | { 81 | if (result[ARG_WORDDICT].count() > 0) 82 | { 83 | auto dict = result[ARG_WORDDICT].as>(); 84 | 85 | std::vector decryptors; 86 | 87 | for (const auto& dict_arg : dict) 88 | { 89 | std::vector from_file = host::utils::read_word_dictionary_file(dict_arg.c_str()); 90 | 91 | if (from_file.size() > 0) 92 | { 93 | decryptors.insert(decryptors.end(), from_file.begin(), from_file.end()); 94 | } 95 | else if (auto key = strtoull(dict_arg.c_str(), nullptr, 16)) 96 | { 97 | decryptors.push_back(Decryptor(key, 0)); 98 | } 99 | else 100 | { 101 | printf("Error: invalid param passed to '--%s' argument: '%s' not a dictionary file neither word\n", ARG_WORDDICT, dict_arg.c_str()); 102 | } 103 | } 104 | 105 | if (decryptors.size() > 0) 106 | { 107 | target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors))); 108 | } 109 | } 110 | 111 | if (result[ARG_BINDICT].count() > 0) 112 | { 113 | auto seed = result[ARG_SEED].as(); 114 | 115 | auto dicts = result[ARG_BINDICT].as>(); 116 | auto mode = result[ARG_BINDMODE].as(); 117 | 118 | std::vector decryptors; 119 | 120 | for (const auto& bin_dict_path : dicts) 121 | { 122 | std::vector decryptors = host::utils::read_binary_dictionary_file(bin_dict_path.c_str(), mode, seed); 123 | 124 | if (decryptors.size() > 0) 125 | { 126 | target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors))); 127 | } 128 | else 129 | { 130 | printf("Error: invalid param passed to '--%s' argument: '%s'. Invalid file!\n", 131 | ARG_BINDICT, bin_dict_path.c_str()); 132 | } 133 | } 134 | 135 | } 136 | } 137 | 138 | inline void parse_bruteforce_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 139 | { 140 | auto start_key = result[ARG_START].as(); 141 | auto seed = result[ARG_SEED].as(); 142 | Decryptor first_decryptor(start_key, seed); 143 | 144 | auto count_key = result[ARG_COUNT].as(); 145 | 146 | target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key)); 147 | } 148 | 149 | inline void parse_seed_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 150 | { 151 | if (result.count(ARG_START) == 0) 152 | { 153 | printf("Error: For seed mode, it's necessary to specify a manufacturer key with '--" ARG_START "' argument!\n"); 154 | return; 155 | } 156 | 157 | auto start_key = result[ARG_START].as(); 158 | auto seed = result[ARG_SEED].as(); 159 | Decryptor first_decryptor(start_key, seed); 160 | 161 | target.brute_configs.push_back(BruteforceConfig::GetSeedBruteforce(first_decryptor)); 162 | } 163 | 164 | inline void parse_bruteforce_filtered_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 165 | { 166 | auto start_key = result[ARG_START].as(); 167 | auto seed = result[ARG_SEED].as(); 168 | Decryptor first_decryptor(start_key, seed); 169 | 170 | auto count_key = result[ARG_COUNT].as(); 171 | 172 | auto include_filter = result[ARG_IFILTER].as(); 173 | auto exclude_filter = result[ARG_EFILTER].as(); 174 | 175 | BruteforceFilters filters 176 | { 177 | include_filter, 178 | exclude_filter, 179 | }; 180 | 181 | target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key, filters)); 182 | } 183 | 184 | inline void parse_alphabet_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 185 | { 186 | auto start_key = result[ARG_START].as(); 187 | auto seed = result[ARG_SEED].as(); 188 | Decryptor first_decryptor(start_key, seed); 189 | 190 | auto count_key = result[ARG_COUNT].as(); 191 | 192 | for (const auto& alphabet : target.alphabets) 193 | { 194 | target.brute_configs.push_back(BruteforceConfig::GetAlphabet(first_decryptor, alphabet, count_key)); 195 | } 196 | } 197 | 198 | inline void parse_pattern_mode(CommandLineArgs& target, cxxopts::ParseResult& result) 199 | { 200 | auto start_key = result[ARG_START].as(); 201 | auto seed = result[ARG_SEED].as(); 202 | Decryptor first_decryptor(start_key, seed); 203 | 204 | auto count_key = result[ARG_COUNT].as(); 205 | 206 | const auto& args = result[ARG_PATTERN].as>(); 207 | 208 | auto full_bytes = DefaultByteArray<>::as_vector>(); 209 | 210 | for (const auto& pattern_arg : args) 211 | { 212 | std::string pattern_bytes_hex = pattern_arg; 213 | std::replace(pattern_bytes_hex.begin(), pattern_bytes_hex.end(), ':', ','); 214 | 215 | std::vector bytes_hex; 216 | cxxopts::values::parse_value(pattern_bytes_hex, bytes_hex); 217 | 218 | 219 | std::vector> result; 220 | for (auto& hex : bytes_hex) 221 | { 222 | // append alphabets' bytes 223 | if (hex.find("AL") != std::string::npos) 224 | { 225 | if (target.alphabets.size() == 0) 226 | { 227 | printf("ERROR: Cannot use Alphabet in patterns - no provided alphabets. Replacing with *\n"); 228 | result.push_back(full_bytes); 229 | continue; 230 | } 231 | 232 | auto al_index = strtoul(hex.substr(2).c_str(), nullptr, 10); 233 | if (al_index >= target.alphabets.size()) 234 | { 235 | printf("ERROR: Argument %s referring alphabet: %ld (index: %ld), but there are only %zd available. " 236 | "Replacing with first one\n", hex.c_str(), al_index + 1, al_index, target.alphabets.size()); 237 | al_index = 0; 238 | } 239 | 240 | result.push_back(target.alphabets[al_index].as_vector()); 241 | } 242 | else 243 | { 244 | // append regular pattern bytes 245 | auto bytes = BruteforcePattern::ParseBytes(hex); 246 | 247 | if (bytes.size() == 0) 248 | { 249 | printf("ERROR: Invalid string '%s' for byte pattern! Ignoring. Replacing with *\n", hex.c_str()); 250 | result.push_back(full_bytes); 251 | } 252 | else 253 | { 254 | result.push_back(bytes); 255 | } 256 | } 257 | } 258 | 259 | // validate 260 | if (result.size() > 8) 261 | { 262 | printf("Warning: Pattern string: '%s' contains more than 8 per-bytes delimiters (%zd) other will be ignored.\n", 263 | pattern_arg.c_str(), bytes_hex.size()); 264 | } 265 | else if (result.size() < 8) 266 | { 267 | printf("Warning: Pattern string: '%s' contains less than 8 per-bytes delimiters (%zd) other fill with full pattern.\n", 268 | pattern_arg.c_str(), bytes_hex.size()); 269 | 270 | for (auto i = result.size(); i < 8; ++i) 271 | { 272 | result.push_back(full_bytes); 273 | } 274 | } 275 | 276 | 277 | // reverse bytes 278 | std::reverse(result.begin(), result.end()); 279 | 280 | // Add pattern attack to config 281 | target.brute_configs.push_back(BruteforceConfig::GetPattern(first_decryptor, BruteforcePattern(std::move(result), pattern_arg), count_key)); 282 | } 283 | } 284 | 285 | constexpr const char* Usage() 286 | { 287 | return "" 288 | "\nExample:\n" 289 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz" 290 | " --" ARG_MODE "=1 --" ARG_START "=0x9876543210 --" ARG_COUNT "=1000000" 291 | "\n\n\tThis will launch simple bruteforce (+1) attack with 1 million checks from 0x9876543210. " 292 | "Will be checked ALL 16 (12 if no seed specified) keeloq learning types" 293 | 294 | "\nExample:\n" 295 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz" 296 | " --" ARG_MODE "=3 --" ARG_LTYPE "=0 --" ARG_ALPHABET "=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33" 297 | "\n\n\tThis will launch 2 alphabets attacks for all possible combinations for SIMPLE learning Keeloq type. " 298 | "First alphabet will be taken from file, second - parsed from inputs." 299 | 300 | "\nExample:\n" 301 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz" 302 | " --" ARG_MODE "=4 --" ARG_LTYPE "=2 --" ARG_ALPHABET "=examples/alphabet.bin --" ARG_PATTERN "=AL0:11:AB|BC:*:00-44:AL0:AA-FF:01" 303 | "\n\n\tThis will launch pattern attacks with NORMAL keeloq learning type." 304 | "\n\tPattern applied 'as is' - big endian. The highest byte (0xXX.......) will be taken from 1st alphabet." 305 | "\n\tNext byte (0x..XX....) will be exact `0x11`." 306 | "\n\tNext byte (0x....XX..) will be `0xAB` or `0xBC`.\n" 307 | 308 | "\nExample:\n" 309 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz" 310 | " --" ARG_MODE "=5 --" ARG_START "=0xAABBCCDDEEFF" 311 | "\n\n\tThis will launch seed bruteforce attack for all seed learning types. " 312 | "\n\tSpecifying '--" ARG_LTYPE "=a,b,c' will narrow learning types to provided ones." 313 | ; 314 | } 315 | 316 | } 317 | 318 | CommandLineArgs CommandLineArgs::parse(int argc, const char** argv) 319 | { 320 | cxxopts::Options options(APP_NAME, R"( 321 | 322 | _______ _____ ___ __ __ __ 323 | / ___/ / / / _ \/ _ | / //_/__ ___ / / ___ ___ _ 324 | / /__/ /_/ / // / __ | / ,< / -_) -_) /__/ _ \/ _ `/ 325 | \___/\____/____/_/ |_| /_/|_|\__/\__/____/\___/\_, / 326 | /_/ 327 | ___ __ ___ 328 | / _ )______ __/ /____ / _/__ ___________ ____ 329 | / _ / __/ // / __/ -_) _/ _ \/ __/ __/ -_) __/ 330 | /____/_/ \_,_/\__/\__/_/ \___/_/ \__/\__/_/ 331 | 332 | )"); 333 | options.set_width(::console::get_width()) 334 | .allow_unrecognised_options() 335 | .add_options() 336 | ("h," ARG_HELP, "Prints this help") 337 | 338 | // What to bruteforce 339 | (ARG_INPUTS, "Comma separated uint64 values (it's better to have 3)", 340 | cxxopts::value>(), "[k1, k1, k3]") 341 | 342 | // CUDA Setup 343 | (ARG_BLOCKS, "How many thread blocks to launch.", 344 | cxxopts::value()->default_value("32"), "") 345 | (ARG_THREADS, "How many threads will be launched in a block (if 0 - will use value from device).", 346 | cxxopts::value()->default_value("0"), "") 347 | 348 | #ifndef NO_INNER_LOOPS 349 | (ARG_LOOPS, "How many loop iterations will one thread perform (keep it low).", 350 | cxxopts::value()->default_value("2"), "") 351 | #endif 352 | 353 | // Mode - what bruteforce type will be used 354 | (ARG_MODE, 355 | "Bruteforce modes (comma separated):" 356 | "\n\t0: - Dictionary." 357 | "\n\t1: - Simple +1." 358 | "\n\t2: - Simple +1 with filters." 359 | "\n\t3: - Alphabet. Bruteforce +1 using only specified bytes." 360 | "\n\t4: - Pattern. Bruteforce with bytes selected by specified pattern." 361 | "\n\t5: - Seed. Bruteforce only seed with provided manufacturer key (applied only to algorithms with seed).", 362 | cxxopts::value>(), "[m1,m2..]") 363 | (ARG_LTYPE, 364 | "Specific learning type (if you know your target well). Increases approximately x16 times (since doesn't calculate other types)" 365 | "\n\tV+1 means with reverse key (There are also more types. see source code):" 366 | "\n\t0: - Simple" 367 | "\n\t2: - Normal" 368 | "\n\t4: - Secure" 369 | "\n\t6: - Xor" 370 | "\nALL", 371 | cxxopts::value>()->default_value(KeeloqLearningType::ValueString(KeeloqLearningType::LAST)), "") 372 | 373 | // Dictionaries files 374 | (ARG_WORDDICT, "Word dictionary file(s) or word(s) - contains hexadecimal strings which will be used as keys. e.g: 0xaabb1122 FFbb9800121212", 375 | cxxopts::value>(), "[f1,w1,...]") 376 | (ARG_BINDICT, "Binary dictionary file(s) - each 8 bytes of the file will be used as key (do not check duplicates or zeros)", 377 | cxxopts::value>(), "[b1,b2,...]") 378 | (ARG_BINDMODE, "Byte order mode for binary dictionary. 0 - as is. 1 - reverse, 2 - add both", 379 | cxxopts::value()->default_value("0"), "") 380 | 381 | // Common (Bruteforce, Alphabet) - set start and end of execution 382 | (ARG_START, "The first key value which will be used for selected mode(s)", 383 | cxxopts::value()->default_value("0"), "") 384 | (ARG_SEED, "The seed which is used for bruteforce. If you specify it, most probably you need to check seed-only learning types (SECURE, FAAC)", 385 | cxxopts::value()->default_value("0"), "") 386 | (ARG_COUNT, "How many keys selected mode(s) should check.", 387 | cxxopts::value()->default_value("0xFFFFFFFFFFFFFFFF"), "") 388 | 389 | // Alphabet 390 | (ARG_ALPHABET, "Alphabet binary file(s) or alphabet hex string(s) (like: AA:61:62:bb)", 391 | cxxopts::value>(), "[f1,a1,...]") 392 | 393 | // Pattern 394 | (ARG_PATTERN, "Pattern file (or pattern itself) - contains comma separated patterns like: AL1:0A:0x10-0x32:*:33|44|FA:FF\n" 395 | "Pattern is in big endian. That means first byte in patter is highest byte (e.g. 01:.... equals key 0x01......)\n" 396 | "Each byte in pattern separated by `:`, pattern types:\n" 397 | "\tAL[0-N] - alphabet N (index in " ARG_ALPHABET " )\n" 398 | "\t0A - constant. might be any byte as hex string\n" 399 | "\t0x10-0x32 - range. bytes from first to second (including)\n" 400 | "\t* - any byte\n" 401 | "\t33|44|FA - exact 3 bytes", 402 | cxxopts::value>(), "[f1,p1,...]") 403 | 404 | // Bruteforce filters 405 | (ARG_EFILTER, "Exclude filter: key matching this filters will not be used in bruteforce.", 406 | cxxopts::value()->default_value("0"), "") 407 | (ARG_IFILTER, "Include filter: only keys matching this filters will be used in bruteforce. (WARNING: may be EXTREMELY heavy to compute)", 408 | cxxopts::value()->default_value("0xFFFFFFFFFFFFFFFF"), "") 409 | 410 | // Stop config 411 | (ARG_FMATCH, "Boolean. Stop bruteforce on first match. If inputs are 3+ probably should set to true", 412 | cxxopts::value()->default_value("true")) 413 | 414 | // Tests run 415 | (ARG_TEST, "Boolean. Run application tests. You'd better use them in debug.", 416 | cxxopts::value()->default_value("false")) 417 | 418 | // Benchmarks run 419 | (ARG_BENCHMARK, "Boolean. Run application benchmarks. You can specify learning and num loops type from command line also.", 420 | cxxopts::value()->default_value("false")) 421 | ; 422 | 423 | CommandLineArgs args; 424 | 425 | auto result = options.parse(argc, argv); 426 | 427 | // tests 428 | args.run_tests = result[ARG_TEST].as(); 429 | 430 | // benchmarks 431 | args.run_bench = result[ARG_BENCHMARK].as(); 432 | 433 | // CUDA setup 434 | args.init_cuda(result[ARG_BLOCKS].as(), result[ARG_THREADS].as(), 435 | result.count(ARG_LOOPS) > 0 ? result[ARG_LOOPS].as() : 0); 436 | 437 | if (result.count(ARG_HELP) || result.arguments().size() == 0 || result.count(ARG_INPUTS) == 0) 438 | { 439 | if (!args.run_tests && !args.run_bench) 440 | { 441 | printf("\n%s\n", options.help().c_str()); 442 | printf("%s\n", Usage()); 443 | } 444 | return args; 445 | } 446 | 447 | // Inputs 448 | if (result.count(ARG_INPUTS) > 0) 449 | { 450 | args.init_inputs(result[ARG_INPUTS].as>()); 451 | if (args.inputs.size() < 3) 452 | { 453 | printf("WARNING: No enough inputs: '%zd'! Need at least 3!\nHowever we'll proceed...\n", args.inputs.size()); 454 | } 455 | } 456 | else 457 | { 458 | printf("Error: No inputs! Nothing to brute!\n%s\n", options.help().c_str()); 459 | return args; 460 | } 461 | 462 | // Stop if need 463 | args.match_stop = result[ARG_FMATCH].as(); 464 | 465 | // Alphabets 466 | read_alphabets(args, result); 467 | 468 | // Bruteforce configs 469 | if (result.count(ARG_MODE) > 0) 470 | { 471 | for (const auto& mode : result[ARG_MODE].as>()) 472 | { 473 | switch (mode) 474 | { 475 | case (uint8_t)BruteforceType::Dictionary: 476 | parse_dictionary_mode(args, result); 477 | break; 478 | case (uint8_t)BruteforceType::Simple: 479 | parse_bruteforce_mode(args, result); 480 | break; 481 | case (uint8_t)BruteforceType::Filtered: 482 | parse_bruteforce_filtered_mode(args, result); 483 | break; 484 | case (uint8_t)BruteforceType::Alphabet: 485 | parse_alphabet_mode(args, result); 486 | break; 487 | case (uint8_t)BruteforceType::Pattern: 488 | parse_pattern_mode(args, result); 489 | break; 490 | case (uint8_t)BruteforceType::Seed: 491 | parse_seed_mode(args, result); 492 | break; 493 | default: 494 | break; 495 | } 496 | } 497 | 498 | if (args.brute_configs.size() == 0) 499 | { 500 | printf("Error: Cannot parse inputs to even single brute config! Result arguments:\n'%s'\n", 501 | result.arguments_string().c_str()); 502 | } 503 | } 504 | else 505 | { 506 | printf("Error: you need to specify bruteforce mode!\n%s\n", 507 | options.help().c_str()); 508 | } 509 | 510 | if (result.count(ARG_LTYPE) > 0) 511 | { 512 | auto learning_type_bytes = result[ARG_LTYPE].as>(); 513 | 514 | args.selected_learning.clear(); 515 | for (auto value : learning_type_bytes) 516 | { 517 | if (value < KeeloqLearningType::LAST) 518 | { 519 | args.selected_learning.push_back(value); 520 | } 521 | } 522 | } 523 | 524 | return args; 525 | } 526 | 527 | bool CommandLineArgs::can_bruteforce() 528 | { 529 | return inputs.size() > 0 && brute_configs.size() > 0; 530 | } 531 | 532 | void CommandLineArgs::init_inputs(const std::vector& inp) 533 | { 534 | inputs.reserve(inp.size()); 535 | for (uint64_t ota : inp) 536 | { 537 | inputs.push_back(EncParcel(ota)); 538 | } 539 | } 540 | 541 | void CommandLineArgs::init_cuda(uint16_t b, uint16_t t, uint16_t l) 542 | { 543 | cuda_blocks = b; 544 | assert(cuda_blocks < max_cuda_blocks() && "This GPU cannot use this much blocks!"); 545 | 546 | cuda_threads = t; 547 | cuda_loops = l; 548 | 549 | if (cuda_threads == 0) 550 | { 551 | cuda_threads = (uint16_t)max_cuda_threads(); 552 | } 553 | } 554 | 555 | uint32_t CommandLineArgs::max_cuda_threads() 556 | { 557 | cudaDeviceProp prop; 558 | cudaGetDeviceProperties(&prop, 0); 559 | 560 | return prop.maxThreadsPerBlock; 561 | } 562 | 563 | uint32_t CommandLineArgs::max_cuda_blocks() 564 | { 565 | cudaDeviceProp prop; 566 | cudaGetDeviceProperties(&prop, 0); 567 | 568 | return prop.maxGridSize[0]; 569 | } 570 | -------------------------------------------------------------------------------- /src/host/command_line_args.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | #include "algorithm/keeloq/keeloq_learning_types.h" 8 | #include "algorithm/keeloq/keeloq_encrypted.h" 9 | #include "bruteforce/bruteforce_config.h" 10 | 11 | #define APP_NAME "CudaKeeloq" 12 | 13 | #define ARG_HELP "help" 14 | #define ARG_TEST "test" 15 | #define ARG_BENCHMARK "benchmark" 16 | #define ARG_INPUTS "inputs" 17 | #define ARG_BLOCKS "cuda-blocks" 18 | #define ARG_THREADS "cuda-threads" 19 | #define ARG_LOOPS "cuda-loops" 20 | #define ARG_MODE "mode" 21 | #define ARG_LTYPE "learning-type" 22 | #define ARG_WORDDICT "word-dict" 23 | #define ARG_BINDICT "bin-dict" 24 | #define ARG_BINDMODE "bin-dict-mode" 25 | #define ARG_START "start" 26 | #define ARG_SEED "seed" 27 | #define ARG_COUNT "count" 28 | #define ARG_ALPHABET "alphabet" 29 | #define ARG_PATTERN "pattern" 30 | #define ARG_IFILTER "include-filter" 31 | #define ARG_EFILTER "exclude-filter" 32 | #define ARG_FMATCH "first-match" 33 | 34 | 35 | /** 36 | * Aggregated configuration of application 37 | */ 38 | struct CommandLineArgs 39 | { 40 | // Input encrypted data (3 caught OTA values) 41 | std::vector inputs; 42 | 43 | // How brute will be performed (may be several iterations) 44 | std::vector brute_configs; 45 | 46 | // Do not do all 16 calculations, use predefined one 47 | std::vector selected_learning = {}; 48 | 49 | // Alphabets are just set of possible byte values 50 | // this sets may be shared between attacks 51 | std::vector alphabets; 52 | 53 | // Stop on first match 54 | bool match_stop; 55 | 56 | // Cuda setup 57 | uint16_t cuda_blocks; 58 | uint16_t cuda_threads; 59 | uint16_t cuda_loops; 60 | 61 | // run also tests 62 | bool run_tests; 63 | 64 | // Run only benchmarks (with selected values) 65 | bool run_bench; 66 | 67 | public: 68 | 69 | // Parse from standard terminal way 70 | static CommandLineArgs parse(int argc, const char** argv); 71 | 72 | public: 73 | // Checks if arguments enough for bruteforcing 74 | bool can_bruteforce(); 75 | 76 | // Init enc parcel collection with raw OTA values 77 | void init_inputs(const std::vector& inp); 78 | 79 | void init_cuda(uint16_t b, uint16_t t, uint16_t l); 80 | 81 | // Check device capabilities and returns maximum thread allowed for single block 82 | static uint32_t max_cuda_threads(); 83 | 84 | // Check device capabilities and returns maximum allowed number of blocks 85 | static uint32_t max_cuda_blocks(); 86 | }; -------------------------------------------------------------------------------- /src/host/console.cpp: -------------------------------------------------------------------------------- 1 | #include "console.h" 2 | 3 | #include 4 | 5 | #ifdef _MSC_VER 6 | #pragma warning(push) 7 | #pragma warning(disable: 4996) 8 | #endif 9 | #include "cpp-terminal/terminal_base.h" 10 | #include "cpp-terminal/terminal.h" 11 | #ifdef _MSC_VER 12 | #pragma warning(pop) 13 | #endif 14 | 15 | static Term::Terminal s_term = Term::Terminal(true, false); 16 | 17 | 18 | void console::progress_bar(double percent, const std::chrono::seconds& elapsed) 19 | { 20 | constexpr auto progress_width = 80; 21 | 22 | static char progress_fill[progress_width] = { 0 }; 23 | static char progress_none[progress_width] = { 0 }; 24 | if (progress_fill[0] == 0) 25 | { 26 | std::memset(progress_fill, '=', sizeof(progress_fill)); 27 | std::memset(progress_none, '-', sizeof(progress_none)); 28 | } 29 | 30 | std::chrono::seconds eta = elapsed.count() > 0 ? 31 | std::chrono::seconds((uint64_t)(elapsed.count() / percent)) - elapsed : std::chrono::seconds(0); 32 | 33 | printf("[%.*s>", (int)(progress_width * percent), progress_fill); 34 | printf("%.*s]", (int)(progress_width * (1 - percent)), progress_none); 35 | printf("%d%% %02" PRId64 ":%02" PRId64 ":%02" PRId64 " ETA:%02" PRId64 ":%02" PRId64 ":%02" PRId64 " \n", (int)(percent * 100), 36 | elapsed.count() / 3600, (elapsed.count() / 60) % 60, elapsed.count() % 60, 37 | eta.count() / 3600, (eta.count() / 60) % 60, eta.count() % 60); 38 | } 39 | 40 | void console::clear_line(int width /*= 0*/) 41 | { 42 | int tWidth = 0; 43 | int tHeight = 0; 44 | 45 | s_term.get_term_size(tHeight, tWidth); 46 | printf("\r%*s", width > 0 ? width : (tWidth - 1), ""); 47 | printf("\r"); 48 | } 49 | 50 | 51 | int console::read_esc_press() 52 | { 53 | return s_term.read_key0() == Term::ESC; 54 | } 55 | 56 | 57 | void console::set_cursor_state(bool visible) 58 | { 59 | s_term.write(visible ? Term::cursor_on() : Term::cursor_off()); 60 | } 61 | 62 | uint32_t console::get_width() 63 | { 64 | int cols = CONSOLE_WIDTH; 65 | int rows = 0; 66 | 67 | s_term.get_term_size(rows, cols); 68 | 69 | return cols; 70 | } 71 | -------------------------------------------------------------------------------- /src/host/console.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "host/command_line_args.h" 9 | 10 | constexpr char WAIT_SPIN[] = "|/-\\"; 11 | #define WAIT_CHAR(i) (WAIT_SPIN[i % (sizeof(WAIT_SPIN) - 1)]) 12 | 13 | #define CONSOLE_WIDTH 160 14 | 15 | #define console_clear() printf("\033[H\033[J") 16 | 17 | #define console_cursor_up(lines) printf("\033[%dA", (lines)) 18 | #define console_set_width(col) printf("\033[%du", (col)) 19 | 20 | #define console_cursor_ret_up(lines) printf("\033[%dF", (lines)) 21 | #define console_set_cursor(x,y) printf("\033[%d;%dH", (y), (x)) 22 | 23 | #define save_cursor_pos() printf("\033[s") 24 | #define load_cursor_pos() printf("\033[u") 25 | 26 | namespace console 27 | { 28 | 29 | void progress_bar(double percent, const std::chrono::seconds& elapsed); 30 | 31 | void clear_line(int width = 0); 32 | 33 | int read_esc_press(); 34 | 35 | void set_cursor_state(bool visible); 36 | 37 | uint32_t get_width(); 38 | 39 | } -------------------------------------------------------------------------------- /src/host/host_utils.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | 3 | #include "common.h" 4 | 5 | #include 6 | #include 7 | 8 | #include "host_utils.h" 9 | 10 | #include "algorithm/keeloq/keeloq_decryptor.h" 11 | 12 | 13 | #ifndef _MSC_VER 14 | #include 15 | #else 16 | #define bswap_64(x) _byteswap_uint64(x) 17 | #endif 18 | 19 | 20 | namespace 21 | { 22 | 23 | bool parse_manufactorer_key(char* key_str, uint64_t& key) 24 | { 25 | if (!key_str) 26 | { 27 | return 0; 28 | } 29 | 30 | int base = 0; 31 | if (key_str[0] == '0' && (key_str[1] == 'b' || key_str[1] == 'B')) 32 | { 33 | key_str[0] = ' '; 34 | key_str[1] = ' '; 35 | base = 2; 36 | } 37 | 38 | if (auto parsed = strtoull(key_str, nullptr, base)) 39 | { 40 | key = parsed; 41 | return true; 42 | } 43 | else 44 | { 45 | return false; 46 | } 47 | } 48 | 49 | bool parse_seed(char* seed_str, uint32_t& seed) 50 | { 51 | if (!seed_str) 52 | { 53 | return 0; 54 | } 55 | 56 | int base = 10; 57 | 58 | if (auto parsed = strtoul(seed_str, nullptr, base)) 59 | { 60 | seed = parsed; 61 | return true; 62 | } 63 | else 64 | { 65 | return false; 66 | } 67 | } 68 | 69 | } 70 | 71 | 72 | 73 | std::vector host::utils::read_word_dictionary_file(const char* file) 74 | { 75 | std::vector results; 76 | 77 | if (FILE* file_dict = fopen(file, "r")) 78 | { 79 | char line[256] = { 0 }; 80 | char delim[2] = ":"; 81 | 82 | while (fgets(line, sizeof(line), file_dict)) 83 | { 84 | auto man_str = strtok(line, delim); 85 | if (man_str == nullptr) 86 | { 87 | man_str = line; 88 | } 89 | 90 | uint64_t man = (uint64_t)0; 91 | uint32_t seed = (uint32_t)0; 92 | 93 | if (!parse_manufactorer_key(man_str, man)) 94 | { 95 | printf("Error: invalid line: `%s` in file: '%s'\n", line, file); 96 | } 97 | 98 | auto seed_str = strtok(NULL, delim); 99 | parse_seed(seed_str, seed); 100 | 101 | results.emplace_back(man, seed); 102 | } 103 | 104 | fclose(file_dict); 105 | } 106 | 107 | return results; 108 | } 109 | 110 | std::vector host::utils::read_binary_dictionary_file(const char* file, uint8_t mode, uint32_t seed) 111 | { 112 | std::vector decryptors; 113 | 114 | if (FILE* bin_file = fopen(file, "rb")) 115 | { 116 | uint8_t key[sizeof(uint64_t)] = { 0 }; 117 | 118 | while (fread(key, sizeof(uint64_t), sizeof(uint8_t), bin_file)) 119 | { 120 | uint64_t reversed = *(uint64_t*)key; 121 | uint64_t as_is = bswap_64(reversed); 122 | 123 | uint64_t key = mode == 0 ? as_is : reversed; 124 | 125 | decryptors.push_back(Decryptor(key, seed)); 126 | if (mode == 2) 127 | { 128 | // reversed already added above 129 | decryptors.push_back(Decryptor(as_is, seed)); 130 | } 131 | } 132 | 133 | fclose(bin_file); 134 | } 135 | 136 | return decryptors; 137 | } 138 | 139 | std::vector host::utils::read_alphabet_binary_file(const char* file) 140 | { 141 | constexpr uint32_t MaxFileSize = 256; 142 | 143 | if (FILE* alphabet_file = fopen(file, "rb")) 144 | { 145 | // alphabet with more than 256 bytes is impossible (or just has duplicates) 146 | uint8_t bytes[MaxFileSize]; 147 | size_t read_bytes = fread(bytes, sizeof(uint8_t), sizeof(bytes), alphabet_file); 148 | 149 | fseek(alphabet_file, 0, SEEK_END); 150 | uint64_t size = ftell(alphabet_file); 151 | if (size > MaxFileSize) 152 | { 153 | printf("Warning: File's '%s' is %" PRIu64 " bytes. It is bigger than read %u bytes. Alphabet bytes should be unique!\n", 154 | file, size, MaxFileSize); 155 | } 156 | 157 | fclose(alphabet_file); 158 | 159 | std::vector alphabet_bytes(&bytes[0], &bytes[read_bytes]); 160 | return alphabet_bytes; 161 | } 162 | 163 | return {}; 164 | } 165 | -------------------------------------------------------------------------------- /src/host/host_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct Decryptor; 6 | 7 | namespace host 8 | { 9 | namespace utils 10 | { 11 | 12 | // Read file with set with hexadecimal string keys 13 | std::vector read_word_dictionary_file(const char* file); 14 | 15 | // Read binary file as set of keys 8-bytes each 16 | std::vector read_binary_dictionary_file(const char* file, uint8_t mode, uint32_t seed); 17 | 18 | // Read first 256 bytes of a binary file as alphabet 19 | std::vector read_alphabet_binary_file(const char* file); 20 | 21 | } 22 | } -------------------------------------------------------------------------------- /src/host/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Simple helper timer 6 | template 7 | struct Timer 8 | { 9 | static inline Timer start() 10 | { 11 | return Timer(TClock::now()); 12 | } 13 | 14 | template 15 | inline TDuration elapsed() 16 | { 17 | return std::chrono::duration_cast(TClock::now() - start_point); 18 | } 19 | 20 | inline std::chrono::seconds elapsed_secods() 21 | { 22 | return elapsed(); 23 | } 24 | 25 | protected: 26 | Timer(typename TClock::time_point initial) : start_point(initial) 27 | { 28 | } 29 | 30 | private: 31 | 32 | typename TClock::time_point start_point; 33 | }; -------------------------------------------------------------------------------- /src/kernels/kernel_result.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "device/cuda_object.h" 6 | 7 | /** 8 | * Generic output from CUDA kernel 9 | */ 10 | struct KernelResult : TGenericGpuObject 11 | { 12 | // num errors. negative are kernel errors. positive - number of threads error 13 | int error = 0; 14 | 15 | // overall result 16 | int value = 0; 17 | 18 | KernelResult() : TGenericGpuObject(this) 19 | { 20 | } 21 | 22 | KernelResult(KernelResult&& other) noexcept : TGenericGpuObject(this) 23 | { 24 | error = other.error; 25 | value = other.value; 26 | } 27 | 28 | KernelResult& operator=(KernelResult&& other) noexcept 29 | { 30 | error = other.error; 31 | value = other.value; 32 | SelfGpu.HOST_Ptr = this; 33 | return *this; 34 | } 35 | }; 36 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "stdio.h" 4 | 5 | #include "host/console.h" 6 | 7 | #include "algorithm/keeloq/keeloq_kernel.h" 8 | #include "bruteforce/generators/generator_bruteforce.h" 9 | #include "bruteforce/bruteforce_round.h" 10 | 11 | #include "tests/test_all.h" 12 | 13 | CommandLineArgs demoTestCommandlineArgs(int num_gen_input = 3) 14 | { 15 | constexpr uint64_t debugKey = 0xC0FFEE00DEAD6666; 16 | 17 | #if _DEBUG 18 | uint64_t first = debugKey & 0xFFFFFFFFFFC00000; 19 | #else 20 | uint64_t first = debugKey & 0xFFFFFFFFF0000000; 21 | #endif 22 | uint64_t count = 0xFFFFFFF; 23 | 24 | Decryptor first_decryptor_ptrn(0, tests::keeloq::default_seed); 25 | Decryptor first_decryptor_brtf(first, tests::keeloq::default_seed); 26 | 27 | CommandLineArgs cmd; 28 | cmd.inputs = tests::keeloq::gen_inputs(debugKey, num_gen_input); 29 | cmd.alphabets.emplace_back(MultibaseDigit("abcdef"_b)); 30 | cmd.alphabets.emplace_back(MultibaseDigit( { 0xC0, 0xFF, 0xEE, 0x00, 0xDE, 0xAD, 0x66 })); 31 | 32 | // Dictionary 33 | cmd.brute_configs.emplace_back(BruteforceConfig::GetDictionary({ 34 | Decryptor(666, tests::keeloq::default_seed), 35 | Decryptor(debugKey - 1, tests::keeloq::default_seed), 36 | Decryptor(debugKey, tests::keeloq::default_seed), 37 | Decryptor(debugKey + 1, tests::keeloq::default_seed) 38 | })); 39 | 40 | // Alphabet 41 | cmd.brute_configs.emplace_back(BruteforceConfig::GetAlphabet(first_decryptor_ptrn, cmd.alphabets[1])); 42 | cmd.brute_configs.emplace_back(BruteforceConfig::GetAlphabet(first_decryptor_ptrn, cmd.alphabets[0])); 43 | 44 | // Seed 45 | cmd.brute_configs.emplace_back(BruteforceConfig::GetSeedBruteforce(Decryptor(debugKey, 0))); 46 | 47 | // Pattern (reversed) 48 | cmd.brute_configs.emplace_back(BruteforceConfig::GetPattern(first_decryptor_ptrn, BruteforcePattern( 49 | { 50 | BruteforcePattern::ParseBytes("c0|c1|c2|c3"), 51 | BruteforcePattern::ParseBytes("F0-FF"), 52 | BruteforcePattern::ParseBytes("E0-EF"), 53 | BruteforcePattern::ParseBytes("00-99"), 54 | { 0xED, 0xDE }, 55 | { 0xDA, 0xAD }, 56 | { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 }, 57 | { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 } 58 | }, "N/A"))); 59 | 60 | // Simple 61 | cmd.brute_configs.emplace_back(BruteforceConfig::GetBruteforce(first_decryptor_brtf, count)); 62 | 63 | // Filters 64 | cmd.brute_configs.emplace_back(BruteforceConfig::GetBruteforce(first_decryptor_brtf, count, BruteforceFilters 65 | { 66 | // Include only 67 | BruteforceFilters::Flags::All, 68 | 69 | // Exclude 70 | BruteforceFilters::Flags::BytesIncremental 71 | })); 72 | 73 | cmd.selected_learning = { }; // ALL 74 | 75 | cmd.match_stop = false; 76 | cmd.run_bench = false; 77 | cmd.run_tests = false; 78 | 79 | #if _DEBUG 80 | cmd.init_cuda(512, 0, 1); 81 | #else 82 | cmd.init_cuda(4096, 0, 1); 83 | #endif 84 | 85 | return cmd; 86 | } 87 | 88 | void bruteforce(const CommandLineArgs& args) 89 | { 90 | if (args.selected_learning.size() == 0) 91 | { 92 | printf("Bruteforcing without specific learning type (slower)" 93 | "(1 KKey/s == %u Kkc (keeloq calcs) per second)\n" 94 | "In case of full range there also redundant checks since using _REV learning types ( X-00:11:22 == X_REV-22:11:00 )\n", KeeloqLearningType::LAST); 95 | } 96 | 97 | for (const auto& config : args.brute_configs) 98 | { 99 | BruteforceRound attackRound(args.inputs, config, args.selected_learning, args.cuda_blocks, args.cuda_threads, args.cuda_loops); 100 | 101 | printf("\nallocating..."); 102 | attackRound.Init(); 103 | 104 | printf("\rRunning... \n%s\n", attackRound.to_string().c_str()); 105 | 106 | bool match = false; 107 | 108 | size_t batchesInRound = attackRound.num_batches(); 109 | size_t keysInBatch = attackRound.keys_per_batch(); 110 | 111 | auto roundStartTime = std::chrono::system_clock::now(); 112 | 113 | for (size_t batch = 0; !match && batch < batchesInRound; ++batch) 114 | { 115 | auto batchStartTime = std::chrono::high_resolution_clock::now(); 116 | 117 | KeeloqKernelInput& kernelInput = attackRound.Inputs(); 118 | 119 | if (attackRound.Type() != BruteforceType::Dictionary) 120 | { 121 | // Generate decryptors (if available) 122 | int error = GeneratorBruteforce::PrepareDecryptors(kernelInput, attackRound.CudaBlocks(), attackRound.CudaThreads()); 123 | if (error) 124 | { 125 | printf("Error: Key generation resulted with error: %d", error); 126 | assert(false); 127 | return; 128 | } 129 | 130 | // Make previous last generated key be an initial for current generation batch 131 | kernelInput.NextDecryptor(); 132 | } 133 | else 134 | { 135 | // Write next batch of keys from dictionary 136 | kernelInput.WriteDecryptors(config.decryptors, batch * keysInBatch, keysInBatch); 137 | } 138 | 139 | // do the bruteforce 140 | auto kernelResults = keeloq::kernels::cuda_brute(kernelInput, attackRound.CudaBlocks(), attackRound.CudaThreads()); 141 | match = attackRound.check_results(kernelResults); 142 | 143 | auto duration = std::chrono::duration_cast( 144 | std::chrono::high_resolution_clock::now() - batchStartTime); 145 | 146 | if (batch == 0 || match) 147 | { 148 | console::set_cursor_state(false); 149 | printf("\n\n\n"); 150 | } 151 | 152 | if (!match) 153 | { 154 | auto kilo_result_per_second = duration.count() == 0 ? 0 : keysInBatch / duration.count(); 155 | auto progress_percent = (double)(batch + 1) / batchesInRound; 156 | 157 | console_cursor_ret_up(2); 158 | 159 | const Decryptor& last_used_decryptor = kernelInput.GetConfig().last; 160 | 161 | printf("[%c][%zd/%zd] %" PRIu64 "(ms)/batch Speed: %" PRIu64 " KKeys/s Last key:0x%" PRIX64 " (%u) \n", 162 | WAIT_CHAR(batch), 163 | batch, batchesInRound, 164 | duration.count(), 165 | kilo_result_per_second, 166 | last_used_decryptor.man(), last_used_decryptor.seed()); 167 | 168 | auto overall = std::chrono::duration_cast( 169 | std::chrono::system_clock::now() - roundStartTime); 170 | 171 | console::progress_bar(progress_percent, overall); 172 | } 173 | } 174 | 175 | if (!match) 176 | { 177 | printf("\n\nAfter: %zd batches no results was found. Keys checked:%zd\n\n", 178 | batchesInRound, batchesInRound * keysInBatch); 179 | } 180 | else if (args.match_stop) 181 | { 182 | break; 183 | } 184 | } 185 | } 186 | 187 | int main(int argc, const char** argv) 188 | { 189 | assert(tests::cuda_check_working()); 190 | 191 | if (!keeloq::kernels::cuda_is_working()) 192 | { 193 | printf("Error: This device cannot compute keeloq right. Single encryption and decryption mismatch.\n"); 194 | assert(false); 195 | return 1; 196 | } 197 | 198 | // Be default if no arguments specified - launch demo mode 199 | bool demo_mode = argc <= 1; 200 | auto args = demo_mode ? demoTestCommandlineArgs() : CommandLineArgs::parse(argc, argv); 201 | 202 | bool had_tests = args.run_bench || args.run_tests; 203 | 204 | if (args.run_tests) 205 | { 206 | printf("\n...RUNNING TESTS...\n"); 207 | tests::console::run(); 208 | 209 | tests::pattern_generation(); 210 | tests::alphabet_generation(); 211 | tests::filters_generation(); 212 | 213 | printf("\n...TESTS FINISHED...\n"); 214 | } 215 | 216 | if (args.run_bench) 217 | { 218 | benchmark::all(args); 219 | } 220 | 221 | if (args.can_bruteforce()) 222 | { 223 | if (demo_mode) 224 | { 225 | printf(R"( 226 | ___ __ ___ __ 227 | / _ \___ __ _ ___ / |/ /__ ___/ /__ 228 | / // / -_) ' \/ _ \ / /|_/ / _ \/ _ / -_) 229 | /____/\__/_/_/_/\___/ /_/ /_/\___/\_,_/\__/ 230 | 231 | )"); 232 | } 233 | 234 | bruteforce(args); 235 | } 236 | else if (!had_tests) 237 | { 238 | printf("\nNot enough arguments for bruteforce\n"); 239 | return 1; 240 | } 241 | 242 | // this will free all memory as well 243 | cudaDeviceReset(); 244 | 245 | console::set_cursor_state(true); 246 | return 0; 247 | } -------------------------------------------------------------------------------- /src/tests/test_all.h: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include 4 | 5 | #include "tests/test_alphabet.h" 6 | #include "tests/test_benchmark.h" 7 | #include "tests/test_console.h" 8 | #include "tests/test_filters.h" 9 | #include "tests/test_keeloq.h" 10 | #include "tests/test_pattern.h" 11 | 12 | namespace tests 13 | { 14 | __host__ bool cuda_check_working(); 15 | } -------------------------------------------------------------------------------- /src/tests/test_alphabet.cpp: -------------------------------------------------------------------------------- 1 | #include "tests/test_alphabet.h" 2 | 3 | #include "device/cuda_vector.h" 4 | #include "kernels/kernel_result.h" 5 | #include "bruteforce/bruteforce_config.h" 6 | #include "bruteforce/generators/generator_bruteforce.h" 7 | #include "algorithm/keeloq/keeloq_kernel_input.h" 8 | #include "algorithm/keeloq/keeloq_decryptor.h" 9 | 10 | 11 | bool tests::alphabet_generation() 12 | { 13 | // Filtered generator test itself 14 | constexpr auto NumBlocks = 64; 15 | constexpr auto NumThreads = 64; 16 | 17 | auto testConfig = BruteforceConfig::GetAlphabet(Decryptor(0,0), "abcd"_b, 0xFFFFFFFF); 18 | 19 | CudaVector decryptors(NumBlocks * NumThreads); 20 | 21 | KeeloqKernelInput generatorInputs; 22 | generatorInputs.decryptors = decryptors.gpu(); 23 | generatorInputs.Initialize(testConfig, KeeloqLearningType::full_mask()); 24 | 25 | for (int i = 0; i < 16; ++i) 26 | { 27 | GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads); 28 | 29 | decryptors.read(); 30 | 31 | assert((decryptors.cpu()[0].man() & 0x0000FFFFFFFFFFFF) == 0x616161616161); 32 | 33 | generatorInputs.NextDecryptor(); 34 | } 35 | 36 | assert(decryptors.cpu()[4095].man() == 0x6464646464646464); 37 | 38 | return true; 39 | } -------------------------------------------------------------------------------- /src/tests/test_alphabet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include 6 | 7 | namespace tests 8 | { 9 | bool alphabet_generation(); 10 | } -------------------------------------------------------------------------------- /src/tests/test_benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "test_benchmark.h" 7 | #include "host/console.h" 8 | #include "host/timer.h" 9 | 10 | #include "bruteforce/generators/generator_bruteforce.h" 11 | #include "bruteforce/bruteforce_config.h" 12 | #include "bruteforce/bruteforce_round.h" 13 | 14 | #include "algorithm/keeloq/keeloq_kernel.h" 15 | #include "tests/test_keeloq.h" 16 | 17 | 18 | void benchmark::run(const CommandLineArgs& args, const BruteforceConfig& benchmarkConfig, const std::vector& CudaBlocks, const std::vector& CudaThreads) 19 | { 20 | #if _DEBUG 21 | constexpr size_t TargetCalculations = 10000000; 22 | #else 23 | constexpr size_t TargetCalculations = 100000000; 24 | #endif 25 | 26 | static const uint32_t MaxCudaThreads = CommandLineArgs::max_cuda_threads(); 27 | static const uint32_t MaxCudaBlocks = CommandLineArgs::max_cuda_blocks(); 28 | 29 | printf("BENCHMARK BEGIN\n\nConfig is: Alphabet\n"); 30 | #ifndef NO_INNER_LOOPS 31 | printf("Num loops inside CUDA : %u\n", args.cuda_loops); 32 | #endif // !NO_INNER_LOOPS 33 | printf( 34 | "Max Available CUDA Threads per block : %u\n" 35 | "Max Available CUDA Blocks : %u\n" 36 | "Num total calculations : %" PRIu64 " (Millions)\n" 37 | "Learning : %s\n" 38 | "Seed specified : %s\n\n", 39 | MaxCudaThreads, MaxCudaBlocks, 40 | (TargetCalculations / 1000000), 41 | KeeloqLearningType::to_string(args.selected_learning).c_str(), 42 | (benchmarkConfig.start.seed() == 0 ? "false" : "true")); 43 | 44 | 45 | bool in_progress = true; 46 | for (auto NumCudaBlocks : CudaBlocks) 47 | { 48 | if (!in_progress || NumCudaBlocks > MaxCudaBlocks) 49 | { 50 | break; 51 | } 52 | 53 | for (auto NumCudaThreads : CudaThreads) 54 | { 55 | if (!in_progress || NumCudaThreads > MaxCudaThreads) 56 | { 57 | break; 58 | } 59 | 60 | BruteforceRound benchmarkRound(args.inputs, benchmarkConfig, args.selected_learning, NumCudaBlocks, NumCudaThreads, args.cuda_loops); 61 | benchmarkRound.Init(); 62 | 63 | size_t keysInBatch = benchmarkRound.keys_per_batch(); 64 | size_t numBatches = std::max(1, TargetCalculations / keysInBatch); 65 | 66 | std::vector batches_kResults_per_sec; 67 | 68 | console::set_cursor_state(false); 69 | printf("\n"); 70 | 71 | auto roundTimer = Timer::start(); 72 | 73 | for (size_t i = 0; in_progress && i < numBatches; ++i) 74 | { 75 | auto batchTimer = Timer::start(); 76 | 77 | KeeloqKernelInput& kernelInput = benchmarkRound.Inputs(); 78 | kernelInput.NextDecryptor(); 79 | 80 | GeneratorBruteforce::PrepareDecryptors(kernelInput, NumCudaBlocks, NumCudaThreads); 81 | keeloq::kernels::cuda_brute(kernelInput, NumCudaBlocks, NumCudaThreads); 82 | 83 | auto elapsedMs = batchTimer.elapsed().count(); 84 | if (elapsedMs > 0) 85 | { 86 | batches_kResults_per_sec.push_back(keysInBatch / elapsedMs); 87 | } 88 | 89 | console_cursor_ret_up(1); 90 | console::progress_bar(i / (double)numBatches, roundTimer.elapsed_secods()); 91 | 92 | if (console::read_esc_press()) 93 | { 94 | console_cursor_ret_up(1); 95 | console::clear_line(); 96 | printf("Benchmark skipped\n"); 97 | in_progress = false; 98 | } 99 | } 100 | 101 | if (in_progress) 102 | { 103 | std::sort(batches_kResults_per_sec.begin(), batches_kResults_per_sec.end()); 104 | 105 | uint64_t median = 0; 106 | uint64_t avg = 0; 107 | 108 | auto num_results = batches_kResults_per_sec.size(); 109 | if (num_results > 0) 110 | { 111 | avg = std::reduce(batches_kResults_per_sec.begin(), batches_kResults_per_sec.end()) / num_results; 112 | } 113 | 114 | if (num_results > 2) 115 | { 116 | median = num_results % 2 == 0 ? 117 | (batches_kResults_per_sec[num_results / 2] + batches_kResults_per_sec[num_results / 2 - 1]) / 2 : 118 | batches_kResults_per_sec[num_results / 2]; 119 | } 120 | 121 | console_cursor_ret_up(1); 122 | console::clear_line(); 123 | 124 | // Creating results 125 | printf("| CUDA: %" PRIu16 " x %" PRIu16 " \t| MEM: %" PRIu64 " MB\t | Time (ms): %" PRIu64 " \t |\tSpeed (K/s): %" PRIu64 " (avg.) %" PRIu64 " (median) |\t\t\t\t\n", 126 | NumCudaBlocks, NumCudaThreads, 127 | benchmarkRound.get_mem_size() / (1024 * 1024), 128 | roundTimer.elapsed().count(), 129 | avg, median); 130 | } 131 | } 132 | } 133 | } 134 | 135 | void benchmark::all(const CommandLineArgs& args) 136 | { 137 | cudaDeviceProp prop; 138 | cudaGetDeviceProperties(&prop, 0); 139 | 140 | std::vector CudaBlocks = { 256, 512, 1024, 2048, 4096, 8196 }; 141 | std::vector CudaThreads = { 128, 256, 512, 1024, 2048 }; 142 | 143 | BruteforceConfig benchmarkConfig_no_seed = BruteforceConfig::GetAlphabet(Decryptor(0, 0), "0123456789abcdefgh"_b); 144 | BruteforceConfig benchmarkConfig_wt_seed = BruteforceConfig::GetAlphabet(Decryptor(0, 1234567), "0123456789abcdefgh"_b); 145 | 146 | console_clear(); 147 | CommandLineArgs copy = args; 148 | copy.inputs = tests::keeloq::gen_inputs(0xFF123FF3434FFFFF); 149 | 150 | copy.selected_learning = {}; 151 | run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads); 152 | run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads); 153 | 154 | copy.selected_learning = { KeeloqLearningType::Simple }; 155 | run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads); 156 | 157 | copy.selected_learning = { KeeloqLearningType::Normal }; 158 | run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads); 159 | 160 | copy.selected_learning = { KeeloqLearningType::Secure }; 161 | run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads); 162 | 163 | copy.selected_learning = { KeeloqLearningType::Faac }; 164 | run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads); 165 | 166 | copy.selected_learning = { KeeloqLearningType::Simple, KeeloqLearningType::Normal, KeeloqLearningType::Xor}; 167 | run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads); 168 | 169 | #ifndef NO_INNER_LOOPS 170 | copy.cuda_loops = 4; 171 | run(copy, CudaBlocks, CudaThreads); 172 | #endif 173 | } 174 | -------------------------------------------------------------------------------- /src/tests/test_benchmark.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "host/command_line_args.h" 6 | 7 | #include 8 | 9 | struct BruteforceConfig; 10 | 11 | namespace benchmark 12 | { 13 | void run(const CommandLineArgs& args, const BruteforceConfig& benchmarkConfig, const std::vector& CudaBlocks, const std::vector& CudaThreads); 14 | 15 | void all(const CommandLineArgs& args); 16 | } -------------------------------------------------------------------------------- /src/tests/test_console.cpp: -------------------------------------------------------------------------------- 1 | #include "test_console.h" 2 | 3 | #include "host/command_line_args.h" 4 | #include "host/console.h" 5 | 6 | namespace 7 | { 8 | void print_args(const char** args, size_t num) 9 | { 10 | printf("\n\t*** TESTING COMMAND LINE:\n"); 11 | for (size_t i = 0; i < num; ++i) 12 | { 13 | printf("%s ", args[i]); 14 | } 15 | printf("\n\n"); 16 | } 17 | } 18 | 19 | 20 | CommandLineArgs tests::console::run() 21 | { 22 | const char* commandline[] = { 23 | APP_NAME, 24 | "--" ARG_INPUTS"=0xC65D52A0A81FD504,0xCCA9B335A81FD504,0xE0DA7372A81FD504", 25 | "--" ARG_BLOCKS"=32", 26 | "--" ARG_THREADS"=32", 27 | "--" ARG_LOOPS"=4", 28 | "--" ARG_MODE"=0,1,2,3,4,5", 29 | "--" ARG_LTYPE"=0,1,2,3,4", 30 | 31 | "--" ARG_WORDDICT"=0xFDE4531BBACAD12,FDE4531BBACAD13,0xFDE4531BBACAD14,examples/dictionary.words", 32 | "--" ARG_BINDICT"=examples/dictionary.bin", 33 | "--" ARG_BINDMODE"=2", 34 | 35 | "--" ARG_START"=1", 36 | "--" ARG_SEED"=777", 37 | "--" ARG_COUNT"=0xFFFF", 38 | 39 | "--" ARG_ALPHABET"=61:62:63:64:zz:AB,examples/alphabet.bin", 40 | 41 | "--" ARG_IFILTER"=0x2", //SmartFilterFlags::Max6OnesInARow other are very heavy, this one will allow all numbers less than 0x03FFFFFFFFFFFFFF 42 | "--" ARG_EFILTER"=64", //SmartFilterFlags::BytesRepeat4 43 | 44 | "--" ARG_PATTERN"=0x01:*:0x43-0x10:0xA0-FF:AA|0x34|0xBB:0x66|0x77:AL0,0x88:asd:w1:88:*:AL2:BB:73", 45 | 46 | "--" ARG_FMATCH, 47 | "--" ARG_BENCHMARK "=1", 48 | "--" ARG_TEST "=true", 49 | }; 50 | 51 | const char* help[] = { 52 | "CudaKeeloq.exe", 53 | "-h" 54 | }; 55 | 56 | const char* commandlineInvalidSeedMode[] = { 57 | APP_NAME, 58 | "--" ARG_INPUTS"=0xC65D52A0A81FD504,0xCCA9B335A81FD504,0xE0DA7372A81FD504", 59 | "--" ARG_BLOCKS"=32", 60 | "--" ARG_MODE"=5" 61 | }; 62 | 63 | // Print Help 64 | print_args(help, sizeof(help) / sizeof(char*)); 65 | CommandLineArgs args = CommandLineArgs::parse(sizeof(help) / sizeof(char*), help); 66 | 67 | print_args(commandlineInvalidSeedMode, sizeof(commandlineInvalidSeedMode) / sizeof(char*)); 68 | args = CommandLineArgs::parse(sizeof(commandlineInvalidSeedMode) / sizeof(char*), commandlineInvalidSeedMode); 69 | assert(args.brute_configs.size() == 0); 70 | 71 | print_args(commandline, sizeof(commandline) / sizeof(char*)); 72 | args = CommandLineArgs::parse(sizeof(commandline) / sizeof(char*), commandline); 73 | 74 | assert(args.alphabets.size() == 2); 75 | assert(args.brute_configs.size() == 9); 76 | 77 | assert(args.match_stop); 78 | assert(args.run_bench); 79 | assert(args.run_tests); 80 | 81 | return args; 82 | } 83 | -------------------------------------------------------------------------------- /src/tests/test_console.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct CommandLineArgs; 4 | 5 | namespace tests 6 | { 7 | namespace console 8 | { 9 | CommandLineArgs run(); 10 | } 11 | } -------------------------------------------------------------------------------- /src/tests/test_filters.cpp: -------------------------------------------------------------------------------- 1 | #include "tests/test_filters.h" 2 | 3 | #include 4 | 5 | #include "device/cuda_vector.h" 6 | #include "device/cuda_double_array.h" 7 | 8 | #include "bruteforce/bruteforce_filters.h" 9 | #include "bruteforce/generators/generator_bruteforce.h" 10 | 11 | 12 | bool tests::filters_generation() 13 | { 14 | BruteforceFiltersTestInputs test_cases[] = { 15 | { 0x1111334404bbccee, BruteforceFilters::Flags::Max6ZerosInARow, true }, 16 | { 0x11113344aabbccee, BruteforceFilters::Flags::Max6ZerosInARow, false }, 17 | { 0x11113344aabbccee, BruteforceFilters::Flags::Max6OnesInARow, false }, 18 | { 0x11113344aFFbccee, BruteforceFilters::Flags::Max6OnesInARow, true }, 19 | 20 | { 0x3132333435363738, BruteforceFilters::Flags::AsciiNumbers, true }, 21 | { 0x3132333435363738, BruteforceFilters::Flags::AsciiAlphaNum, true }, 22 | { 0x3132333435363738, BruteforceFilters::Flags::AsciiAny, true }, 23 | { 0x2931323334353637, BruteforceFilters::Flags::AsciiNumbers, false }, 24 | 25 | { 0x4142434465666768, BruteforceFilters::Flags::AsciiAlpha, true }, 26 | { 0x4142434465666768, BruteforceFilters::Flags::AsciiAlphaNum, true }, 27 | { 0x4142434465666768, BruteforceFilters::Flags::AsciiAny, true }, 28 | { 0x3142434465666768, BruteforceFilters::Flags::AsciiAlpha, false }, 29 | { 0x2142434465666768, BruteforceFilters::Flags::AsciiAlphaNum, false }, 30 | { 0x1142434465666768, BruteforceFilters::Flags::AsciiAny, false }, 31 | 32 | { 0x214023245e28297e, BruteforceFilters::Flags::AsciiSpecial, true }, 33 | { 0x214023245e28297e, BruteforceFilters::Flags::AsciiAny, true }, 34 | { 0x114023245e28297e, BruteforceFilters::Flags::AsciiSpecial, false }, 35 | 36 | { 0x0022222222556677, BruteforceFilters::Flags::BytesRepeat4, true }, 37 | { 0x0022222222226677, BruteforceFilters::Flags::BytesRepeat4, true }, 38 | { 0x00Abcdef11111111, BruteforceFilters::Flags::BytesRepeat4, true }, 39 | { 0x0011222222556677, BruteforceFilters::Flags::BytesRepeat4, false }, 40 | { 0x0011223344556677, BruteforceFilters::Flags::BytesRepeat4, false }, 41 | 42 | { 0x112233445566aa00, BruteforceFilters::Flags::BytesIncremental, true }, 43 | { 0xFFEEDDCCBBAA1234, BruteforceFilters::Flags::BytesIncremental, true }, 44 | { 0x1122334455778899, BruteforceFilters::Flags::BytesIncremental, false }, 45 | }; 46 | 47 | static uint8_t NumTests = sizeof(test_cases) / sizeof(BruteforceFiltersTestInputs); 48 | bool result_success = true; 49 | 50 | // CPU tests 51 | for (int i = 0; i < NumTests; ++i) 52 | { 53 | bool value = BruteforceFilters::check_filters(test_cases[i].value, test_cases[i].flags); 54 | result_success &= value == test_cases[i].result; 55 | 56 | assert(result_success); 57 | } 58 | 59 | // GPU tests 60 | DoubleArray test_inputs(test_cases, NumTests); 61 | cuda_check_bruteforce_filters(test_inputs.CUDA_mem, NumTests); 62 | test_inputs.read_GPU(); // for asserts 63 | 64 | for (uint8_t i = 0; i < NumTests; ++i) 65 | { 66 | result_success &= test_inputs.HOST_mem[i].value == 1; 67 | assert(result_success); 68 | } 69 | 70 | // Filtered generator test itself 71 | constexpr auto NumBlocks = 32;// 1; // 72 | constexpr auto NumThreads = 512;// 2;// 512; 73 | 74 | constexpr auto NumToGenerate = 0xFFFFF; 75 | 76 | auto first_decryptor = Decryptor(0xAADEADBEEFA00000, 0); 77 | 78 | auto testConfig = BruteforceConfig::GetBruteforce(first_decryptor, NumToGenerate, 79 | BruteforceFilters{ 80 | BruteforceFilters::Flags::All, // SmartFilterFlags::AsciiAny; // 81 | BruteforceFilters::Flags::BytesIncremental | BruteforceFilters::Flags::BytesRepeat4, // SmartFilterFlags::BytesRepeat4; // 82 | }); 83 | 84 | CudaVector decryptors(NumToGenerate); 85 | 86 | KeeloqKernelInput generatorInputs; 87 | generatorInputs.decryptors = decryptors.gpu(); 88 | generatorInputs.Initialize(testConfig, KeeloqLearningType::full_mask()); 89 | 90 | KernelResult result; 91 | 92 | auto error = GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads); 93 | result_success &= error == 0; 94 | 95 | decryptors.read(); 96 | 97 | bool found = false; 98 | for (size_t i = 0; !found && i < decryptors.size(); ++i) 99 | { 100 | // looking for exact code - check nothing missed 101 | found |= decryptors.cpu()[i].man() == 0xAADEADBEEFA63ED2; 102 | } 103 | 104 | assert(found); 105 | result_success &= found; 106 | 107 | result.read(); 108 | return result_success; 109 | } 110 | -------------------------------------------------------------------------------- /src/tests/test_filters.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | #include "bruteforce/bruteforce_filters.h" 6 | 7 | struct BruteforceFiltersTestInputs 8 | { 9 | uint64_t value; 10 | 11 | BruteforceFilters::Flags::Type flags; 12 | 13 | bool result; 14 | }; 15 | 16 | namespace tests 17 | { 18 | bool filters_generation(); 19 | 20 | __host__ void cuda_check_bruteforce_filters(BruteforceFiltersTestInputs* tests, uint8_t num); 21 | } -------------------------------------------------------------------------------- /src/tests/test_keeloq.cpp: -------------------------------------------------------------------------------- 1 | #include "test_keeloq.h" 2 | 3 | #include "algorithm/keeloq/keeloq_kernel.h" 4 | 5 | 6 | std::vector tests::keeloq::gen_inputs(uint64_t key, uint8_t num /*= 3*/, 7 | uint32_t serial /*= 0xDEADBEEF*/, uint16_t counter /*= 0x123*/, uint8_t button /*= 0x3*/, uint32_t seed /*= 987654321*/, 8 | KeeloqLearningType::Type learning /*= KeeloqLearningType::Normal*/) 9 | { 10 | std::vector result { ::keeloq::GetOTA(key, seed, serial, button, counter, learning) }; 11 | 12 | for (uint8_t i = 1; i < num; ++i) 13 | { 14 | result.emplace_back(::keeloq::GetOTA(key, seed, serial, button, counter + i, learning)); 15 | } 16 | 17 | return result; 18 | } 19 | -------------------------------------------------------------------------------- /src/tests/test_keeloq.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "algorithm/keeloq/keeloq_learning_types.h" 6 | #include "algorithm/keeloq/keeloq_encrypted.h" 7 | 8 | namespace tests 9 | { 10 | namespace keeloq 11 | { 12 | constexpr uint32_t default_seed = 987654321; 13 | 14 | constexpr uint8_t default_button = 0x3; 15 | 16 | constexpr uint16_t default_counter = 0x123; 17 | 18 | constexpr uint32_t default_serial = 0xDEADBEEF; 19 | 20 | 21 | // Generate test inputs for specific key 22 | std::vector gen_inputs(uint64_t key, uint8_t num = 3, 23 | uint32_t serial = default_serial, uint16_t counter = default_counter, uint8_t button = default_button, uint32_t seed = default_seed, 24 | KeeloqLearningType::Type learning = KeeloqLearningType::Normal); 25 | 26 | template 27 | inline std::vector gen_inputs(uint64_t key, uint8_t num = 3, 28 | uint32_t serial = default_serial, uint16_t counter = default_counter, uint8_t button = default_button, uint32_t seed = default_seed) 29 | { 30 | return gen_inputs(key, num, serial, counter, button, seed, TLearning); 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /src/tests/test_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include "test_all.h" 4 | #include "device/cuda_common.h" 5 | 6 | #include 7 | 8 | namespace 9 | { 10 | __global__ void Kernel_Test(uint64_t* input) 11 | { 12 | *input = 0x1234567890ABCDEF; 13 | *input = misc::rev_bytes(*input); 14 | } 15 | 16 | __global__ void Kernel_RunFiltersTests(BruteforceFiltersTestInputs* tests, uint8_t num) 17 | { 18 | for (int i = 0; i < num; ++i) 19 | { 20 | bool value = BruteforceFilters::check_filters(tests[i].value, tests[i].flags); 21 | assert(value == tests[i].result); 22 | 23 | tests[i].value = value == tests[i].result; 24 | } 25 | } 26 | } 27 | 28 | __host__ void tests::cuda_check_bruteforce_filters(BruteforceFiltersTestInputs * tests, uint8_t num) 29 | { 30 | void* args[] = { &tests, &num }; 31 | auto error = cudaLaunchKernel((void*) & Kernel_RunFiltersTests, dim3(), dim3(), args, 0, nullptr); 32 | CUDA_CHECK(error); 33 | } 34 | 35 | __host__ bool tests::cuda_check_working() 36 | { 37 | uint64_t result = 0; 38 | uint64_t* pInput; 39 | 40 | auto error = cudaMalloc((void**)&pInput, sizeof(uint64_t)); 41 | CUDA_CHECK(error); 42 | 43 | void *args[] = { &pInput }; 44 | auto* func = (void*) &Kernel_Test; 45 | error = cudaLaunchKernel(func, dim3(), dim3(), args, 0, 0); 46 | CUDA_CHECK(error); 47 | 48 | error = cudaMemcpy(&result, pInput, sizeof(uint64_t), cudaMemcpyDeviceToHost); 49 | CUDA_CHECK(error); 50 | 51 | assert(result == 0xEFCDAB9078563412); 52 | Kernel_Test<<<1, 1>>>(pInput); 53 | 54 | error = cudaFree(pInput); 55 | CUDA_CHECK(error); 56 | 57 | return result == 0xEFCDAB9078563412; 58 | } -------------------------------------------------------------------------------- /src/tests/test_pattern.cpp: -------------------------------------------------------------------------------- 1 | #include "test_pattern.h" 2 | 3 | #include 4 | 5 | #include "tests/test_keeloq.h" 6 | 7 | #include "bruteforce/bruteforce_config.h" 8 | #include "bruteforce/bruteforce_pattern.h" 9 | 10 | #include "device/cuda_vector.h" 11 | 12 | #include "algorithm/keeloq/keeloq_kernel.h" 13 | #include "algorithm/keeloq/keeloq_kernel_input.h" 14 | 15 | #include "bruteforce/generators/generator_bruteforce.h" 16 | 17 | 18 | namespace 19 | { 20 | BruteforceConfig GetSingleKeyConfig(uint64_t key, bool rev = true) 21 | { 22 | uint8_t* pKey = (uint8_t*)&key; 23 | 24 | std::vector> pattern = 25 | { 26 | { pKey[7] }, { pKey[6] }, { pKey[5] }, { pKey[4] }, { pKey[3] }, { pKey[2] }, { pKey[1] }, { pKey[0] }, 27 | }; 28 | 29 | if (rev) 30 | { 31 | std::reverse(pattern.begin(), pattern.end()); 32 | } 33 | 34 | BruteforcePattern br_pattern(std::move(pattern), "Test"); 35 | return BruteforceConfig::GetPattern(Decryptor(0,0), br_pattern, 0xFFFFFFFF); 36 | } 37 | } 38 | 39 | 40 | bool tests::pattern_generation() 41 | { 42 | constexpr auto NumBlocks = 64; 43 | constexpr auto NumThreads = 64; 44 | 45 | const uint64_t debugKey = "hello_world"_u64; 46 | 47 | CudaVector encrypted = tests::keeloq::gen_inputs(debugKey); 48 | 49 | CudaVector decryptors(NumBlocks * NumThreads); 50 | CudaVector results(decryptors.size() * encrypted.size()); 51 | 52 | BruteforceConfig config = GetSingleKeyConfig(debugKey); 53 | if (config.pattern.init(0).number() != debugKey) 54 | { 55 | assert(false); 56 | return false; 57 | } 58 | 59 | KeeloqKernelInput generatorInputs; 60 | generatorInputs.encdata = encrypted.gpu(); 61 | generatorInputs.decryptors = decryptors.gpu(); 62 | generatorInputs.results = results.gpu(); 63 | generatorInputs.Initialize(config, KeeloqLearningType::full_mask()); 64 | 65 | GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads); 66 | auto result = ::keeloq::kernels::cuda_brute(generatorInputs, NumBlocks, NumThreads); 67 | 68 | decryptors.read(); 69 | results.read(); 70 | assert(decryptors.cpu()[0].man() == debugKey); 71 | 72 | return decryptors.cpu()[0].man() == debugKey; 73 | } 74 | 75 | -------------------------------------------------------------------------------- /src/tests/test_pattern.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common.h" 4 | 5 | namespace tests 6 | { 7 | bool pattern_generation(); 8 | } --------------------------------------------------------------------------------