├── .dockerignore
├── .editorconfig
├── .github
└── workflows
│ ├── linux.yml
│ └── windows.yml
├── .gitignore
├── .gitmodules
├── CudaKeeloq.vcxproj
├── CudaKeeloq.vcxproj.filters
├── LICENSE
├── build.sh
├── dockerfile
├── examples
├── alphabet.bin
├── dictionary.bin
└── dictionary.words
├── makefile
├── readme.md
├── run.sh
└── src
├── algorithm
├── keeloq
│ ├── keeloq_decryptor.h
│ ├── keeloq_encrypted.h
│ ├── keeloq_kernel.cu
│ ├── keeloq_kernel.h
│ ├── keeloq_kernel.inl
│ ├── keeloq_kernel_input.cpp
│ ├── keeloq_kernel_input.h
│ ├── keeloq_learning_types.cpp
│ ├── keeloq_learning_types.h
│ ├── keeloq_single_result.cpp
│ └── keeloq_single_result.h
├── multibase_digit.h
├── multibase_number.h
└── multibase_system.h
├── bruteforce
├── bruteforce_config.cpp
├── bruteforce_config.h
├── bruteforce_filters.cpp
├── bruteforce_filters.h
├── bruteforce_pattern.cpp
├── bruteforce_pattern.h
├── bruteforce_round.cpp
├── bruteforce_round.h
├── bruteforce_type.cpp
├── bruteforce_type.h
└── generators
│ ├── generator_bruteforce.cpp
│ ├── generator_bruteforce.h
│ ├── generator_bruteforce_filtered_kernel.inl
│ ├── generator_bruteforce_pattern_kernel.inl
│ ├── generator_bruteforce_seed_kernel.inl
│ ├── generator_bruteforce_simple_kernel.inl
│ └── generator_kernel.cu
├── common.h
├── device
├── cuda_array.h
├── cuda_common.h
├── cuda_context.h
├── cuda_double_array.h
├── cuda_object.h
├── cuda_span.h
└── cuda_vector.h
├── host
├── command_line_args.cpp
├── command_line_args.h
├── console.cpp
├── console.h
├── host_utils.cpp
├── host_utils.h
└── timer.h
├── kernels
└── kernel_result.h
├── main.cpp
└── tests
├── test_all.h
├── test_alphabet.cpp
├── test_alphabet.h
├── test_benchmark.cpp
├── test_benchmark.h
├── test_console.cpp
├── test_console.h
├── test_filters.cpp
├── test_filters.h
├── test_keeloq.cpp
├── test_keeloq.h
├── test_kernel.cu
├── test_pattern.cpp
└── test_pattern.h
/.dockerignore:
--------------------------------------------------------------------------------
1 | x64/*
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 4
6 | end_of_line = lf
7 | charset = utf-8
8 | trim_trailing_whitespace = true
9 | insert_trailing_newline = true
10 |
11 | [*.md]
12 | trim_trailing_whitespace = false
13 |
14 | [makefile]
15 | indent_style = tab
16 | trim_trailing_whitespace = false
--------------------------------------------------------------------------------
/.github/workflows/linux.yml:
--------------------------------------------------------------------------------
1 | name: Linux
2 | on:
3 | push:
4 | tags:
5 | - test-linux*
6 | - release-v*
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | permissions:
12 | packages: write
13 | contents: read
14 |
15 | steps:
16 | - id: actor
17 | uses: ASzc/change-string-case-action@v5
18 | with:
19 | string: ${{ github.actor }}
20 |
21 | - name: Checkout
22 | uses: actions/checkout@v3.3.0
23 | with:
24 | submodules: 'true'
25 |
26 | - name: Linux build using docker
27 | run: chmod +x build.sh && ./build.sh
28 |
29 | - name: Tag container
30 | run: docker tag cudakeeloq:local ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }}
31 |
32 | - name: Login to GitHub Container Registry
33 | uses: docker/login-action@v2
34 | with:
35 | registry: ghcr.io
36 | username: ${{ github.actor }}
37 | password: ${{ secrets.GITHUB_TOKEN }}
38 |
39 | - name: Docker Push to ghcr.io
40 | run: docker push ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }}
41 |
42 |
--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
1 | name: Windows
2 | on:
3 | push:
4 | tags:
5 | - test-windows*
6 | - release-v*
7 |
8 | jobs:
9 | build:
10 |
11 | env:
12 | CUDA_VERSION_MAJOR: 12
13 | CUDA_VERSION_MINOR: 2
14 | CUDA_VERSION_PACTH: 0
15 |
16 | runs-on: windows-2022
17 | permissions:
18 | contents: write
19 | steps:
20 |
21 | - name: Setup CUDA (manual)
22 | env:
23 | CUDA_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}.${{env.CUDA_VERSION_PACTH}}"
24 | PKGS_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}"
25 | run: |
26 | echo "Downloading CUDA version: ${{env.CUDA_VERSION_STR}}"
27 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/CUDAVisualStudioIntegration.exe" -OutFile ${{RUNNER.TEMP}}\cuda-vs-installer.exe
28 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/nvcc.exe" -OutFile ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe
29 | Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/cudart.exe" -OutFile ${{RUNNER.TEMP}}\cuda-rt-installer.exe
30 |
31 | echo "Unpacking archives"
32 | 7z x ${{RUNNER.TEMP}}\cuda-vs-installer.exe extras\visual_studio_integration\MSBuildExtensions -o${{RUNNER.TEMP}}\cuda
33 | 7z x ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe -o${{RUNNER.TEMP}}\cuda
34 | 7z x ${{RUNNER.TEMP}}\cuda-rt-installer.exe -o${{RUNNER.TEMP}}\cuda
35 |
36 | echo "Exporting Environment variables for next steps"
37 | [Environment]::SetEnvironmentVariable("CUDA_PATH", "${{RUNNER.TEMP}}\cuda")
38 |
39 | Add-Content $env:GITHUB_ENV "CUDA_PATH=$env:CUDA_PATH"
40 | Add-Content $env:GITHUB_ENV "CUDA_PATH_V${{env.CUDA_VERSION_MAJOR}}_${{env.CUDA_VERSION_MINOR}}=$env:CUDA_PATH"
41 | Add-Content $env:GITHUB_PATH "$env:CUDA_PATH\bin"
42 |
43 | - name: CUDA check
44 | run: |
45 | echo "CUDA PATH: $env:CUDA_PATH"
46 | ${{env.CUDA_PATH}}\bin\nvcc.exe -V
47 |
48 | - name: Checkout
49 | uses: actions/checkout@v3.3.0
50 | with:
51 | submodules: 'true'
52 |
53 | - name: Setup MSBuild
54 | uses: microsoft/setup-msbuild@v1.3.1
55 | with:
56 | # vs-version: latest
57 | msbuild-architecture: "x64"
58 |
59 |
60 | - name: Build Application
61 | run: msbuild CudaKeeloq.vcxproj -t:Rebuild -p:Configuration=Release -p:Platform=x64
62 |
63 | - name: Create Artifacts
64 | run: |
65 | mkdir artifacts
66 | xcopy x64\Release\CudaKeeloq.exe artifacts\
67 | xcopy .\examples artifacts\examples /E /H /I /C
68 | Compress-Archive -Path artifacts/* -DestinationPath artifacts/cudakeeloq.zip
69 |
70 | - name: Create Release
71 | uses: ncipollo/release-action@v1.12.0
72 | with:
73 | artifacts: artifacts/cudakeeloq.zip
74 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | x64/
2 | *.user
3 | .vscode/
4 | .vs/
5 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "cxxopts"]
2 | path = ThirdParty/cxxopts
3 | url = https://github.com/jarro2783/cxxopts.git
4 | [submodule "ThirdParty/cpp-terminal"]
5 | path = ThirdParty/cpp-terminal
6 | url = https://github.com/jupyter-xeus/cpp-terminal.git
7 | [submodule "ThirdParty/cxxopts"]
8 | path = ThirdParty/cxxopts
9 | url = https://github.com/jarro2783/cxxopts.git
10 |
--------------------------------------------------------------------------------
/CudaKeeloq.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Profile
10 | x64
11 |
12 |
13 | Release
14 | x64
15 |
16 |
17 |
18 | {1CC91BE8-9FA0-4CC1-A597-9EBFBC8C21CA}
19 | CudaKeeloq
20 | 10.0
21 |
22 |
23 |
24 | Application
25 | true
26 | MultiByte
27 | v143
28 |
29 |
30 | Application
31 | false
32 | true
33 | MultiByte
34 | v143
35 |
36 |
37 | Application
38 | false
39 | true
40 | MultiByte
41 | v143
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | true
59 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;
60 |
61 |
62 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;
63 |
64 |
65 | $(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;
66 |
67 |
68 |
69 | Level3
70 | Disabled
71 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
72 | stdcpp17
73 | stdc17
74 |
75 |
76 | true
77 | Console
78 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
79 | libcmt.lib
80 |
81 |
82 | 64
83 | compute_80,sm_80
84 |
85 |
86 | true
87 | false
88 | false
89 | compile
90 | false
91 | --std=c++17 %(AdditionalOptions)
92 |
93 |
94 |
95 |
96 | Level3
97 | MaxSpeed
98 | true
99 | true
100 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
101 | stdcpp17
102 | stdc17
103 |
104 |
105 | true
106 | true
107 | true
108 | Console
109 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
110 | libcmt.lib
111 |
112 |
113 | 64
114 | true
115 |
116 |
117 | compute_80,sm_80
118 | false
119 | false
120 | --std=c++17 %(AdditionalOptions)
121 |
122 |
123 |
124 |
125 | Level3
126 | MaxSpeed
127 | true
128 | true
129 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
130 | stdcpp17
131 | stdc17
132 |
133 |
134 | true
135 | true
136 | true
137 | Console
138 | cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
139 | libcmt.lib
140 |
141 |
142 | 64
143 | true
144 |
145 |
146 | compute_80,sm_80
147 | true
148 | false
149 | --std=c++17 %(AdditionalOptions)
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
--------------------------------------------------------------------------------
/CudaKeeloq.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | tests
7 |
8 |
9 | tests
10 |
11 |
12 | host
13 |
14 |
15 | host
16 |
17 |
18 | device
19 |
20 |
21 | device
22 |
23 |
24 | device
25 |
26 |
27 | device
28 |
29 |
30 | bruteforce
31 |
32 |
33 | bruteforce
34 |
35 |
36 | bruteforce
37 |
38 |
39 | bruteforce
40 |
41 |
42 | bruteforce\generators
43 |
44 |
45 | algorithm\keeloq
46 |
47 |
48 | algorithm\keeloq
49 |
50 |
51 | algorithm\keeloq
52 |
53 |
54 | algorithm\keeloq
55 |
56 |
57 | algorithm\keeloq
58 |
59 |
60 | kernels
61 |
62 |
63 | algorithm\keeloq
64 |
65 |
66 | tests
67 |
68 |
69 | algorithm
70 |
71 |
72 | algorithm
73 |
74 |
75 | bruteforce
76 |
77 |
78 | algorithm
79 |
80 |
81 | device
82 |
83 |
84 | tests
85 |
86 |
87 | tests
88 |
89 |
90 | device
91 |
92 |
93 | tests
94 |
95 |
96 | host
97 |
98 |
99 | tests
100 |
101 |
102 | device
103 |
104 |
105 | host
106 |
107 |
108 |
109 |
110 |
111 | tests
112 |
113 |
114 | tests
115 |
116 |
117 | host
118 |
119 |
120 | host
121 |
122 |
123 | bruteforce
124 |
125 |
126 | bruteforce
127 |
128 |
129 | bruteforce
130 |
131 |
132 | bruteforce
133 |
134 |
135 | bruteforce\generators
136 |
137 |
138 | algorithm\keeloq
139 |
140 |
141 | algorithm\keeloq
142 |
143 |
144 | algorithm\keeloq
145 |
146 |
147 | bruteforce
148 |
149 |
150 | tests
151 |
152 |
153 | tests
154 |
155 |
156 | tests
157 |
158 |
159 | host
160 |
161 |
162 | tests
163 |
164 |
165 |
166 |
167 | {5526a2e2-4a02-4459-977a-dbdbc1265827}
168 |
169 |
170 | {076c7b6e-a941-47c2-9514-8616f16b61e8}
171 |
172 |
173 | {7d4a1a80-1bf1-48ba-95df-5c102fb9c8ca}
174 |
175 |
176 | {bc3686d0-8983-4b00-bc95-074a07972e2b}
177 |
178 |
179 | {366584ba-328c-44ba-85d8-08d370307c4f}
180 |
181 |
182 | {348c2a54-dd3e-4683-bc68-b9a5c9dfd9fa}
183 |
184 |
185 | {9e8a7676-0ad7-4e6a-b3ff-a61ddb6b9e0e}
186 |
187 |
188 | {8636e053-53f4-43e2-b4f2-6ea75d4a849e}
189 |
190 |
191 |
192 |
193 | bruteforce\generators
194 |
195 |
196 | tests
197 |
198 |
199 | algorithm\keeloq
200 |
201 |
202 |
203 |
204 | bruteforce\generators
205 |
206 |
207 | bruteforce\generators
208 |
209 |
210 | bruteforce\generators
211 |
212 |
213 | algorithm\keeloq
214 |
215 |
216 |
217 |
218 | bruteforce\generators
219 |
220 |
221 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 https://github.com/X-Stuff
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | CONTAINER="${CONTAINER:-cudakeeloq}"
4 | TAG="${TAG:-local}"
5 |
6 | docker build . -t $CONTAINER:$TAG
7 |
8 |
--------------------------------------------------------------------------------
/dockerfile:
--------------------------------------------------------------------------------
1 | ARG CONFIGURATION="release"
2 | ARG CUDA_MAJOR=12
3 | ARG CUDA_MINOR=2
4 | ARG CUDA_PATCH=0
5 |
6 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-devel-ubuntu22.04 as builder
7 | ARG CONFIGURATION
8 |
9 | #
10 | WORKDIR /workspace
11 |
12 | # Copy sources
13 | COPY examples/ examples
14 | COPY src/ src
15 | COPY ThirdParty/ ThirdParty
16 | COPY makefile makefile
17 |
18 | # make
19 | RUN make $CONFIGURATION
20 |
21 | # runner
22 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-ubuntu22.04
23 | ARG CONFIGURATION
24 |
25 | RUN groupadd cuda && useradd -m -d /app -g cuda cuda
26 | USER cuda
27 |
28 | WORKDIR /app
29 | COPY --chown=cuda:cuda --from=builder /workspace/x64/$CONFIGURATION/bin /app/
30 | ENTRYPOINT [ "/app/CudaKeeloq" ]
31 | CMD [ "--help" ]
32 |
--------------------------------------------------------------------------------
/examples/alphabet.bin:
--------------------------------------------------------------------------------
1 | 01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/@#$THIS DUPLICATED BYTES WILL BE DELETED
--------------------------------------------------------------------------------
/examples/dictionary.bin:
--------------------------------------------------------------------------------
1 | W¿OÖ‰6áø—(Ðt€ OáíÎtÙ_: HK(
--------------------------------------------------------------------------------
/examples/dictionary.words:
--------------------------------------------------------------------------------
1 | 0xC0FFEE00DEAD6666
2 | 0b10101010101010101010101010101010101
3 | 9223372036854775807
4 | 0xDEAD6666C0FFEE00:1234567
5 | 9223372036854775807:31337
6 | 0b10101010101010101010101010101010101:0987654321
--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
1 | # THIS MAKE FILE IS SUPPOSED TO BE USED IN WITH DOCKER IMAGE nvidia/cuda:12.0.1-devel
2 |
3 | TARGET_NAME=CudaKeeloq
4 |
5 | CUDA_ROOT_DIR=/usr/local/cuda
6 |
7 | ARCH=x64
8 | CONFIG_RELEASE=release
9 | CONFIG_PROFILE=profile
10 | CONFIG_DEBUG=debug
11 |
12 | # CC compiler options:
13 | CC=g++
14 | CC_FLAGS=-std=c++17 -Wall
15 | CC_INCLUDE=-I./src/ -I./ThirdParty/ -I./ThirdParty/cpp-terminal
16 |
17 | NVCC=nvcc
18 | NVCC_FLAGS=--gpu-architecture=compute_80 --gpu-code=sm_80 --std=c++17
19 | NVCC_INCLUDE=-I./src/
20 |
21 | # CUDA library directory:
22 | CUDA_LIB_DIR= -L$(CUDA_ROOT_DIR)/lib64
23 | # CUDA include directory:
24 | CUDA_INC_DIR= -I$(CUDA_ROOT_DIR)/include
25 | # CUDA linking libraries:
26 | CUDA_LINK_LIBS= -lcudart
27 |
28 |
29 | # Configurations, default debug
30 | all: debug
31 | @echo No target specified. Default is debug
32 |
33 | release: OBJ_DIR=./$(ARCH)/$(CONFIG_RELEASE)/obj
34 | release: EXE_DIR=./$(ARCH)/$(CONFIG_RELEASE)/bin
35 | release: NVCC_FLAGS+= -use_fast_math -O3 -Xptxas -O3 --m64
36 | release: CC_FLAGS+= -O3 -DNDEBUG
37 | release:link
38 |
39 | profile: OBJ_DIR=./$(ARCH)/$(CONFIG_PROFILE)/obj
40 | profile: EXE_DIR=./$(ARCH)/$(CONFIG_PROFILE)/bin
41 | profile: NVCC_FLAGS+= -lineinfo -use_fast_math
42 | profile: CC_FLAGS+= -DNDEBUG
43 | profile:link
44 |
45 | debug: OBJ_DIR=./$(ARCH)/$(CONFIG_DEBUG)/obj
46 | debug: EXE_DIR=./$(ARCH)/$(CONFIG_DEBUG)/bin
47 | debug: NVCC_FLAGS+= -G
48 | debug: CC_FLAGS+= -D_DEBUG
49 | debug:link
50 |
51 | # Sources C++
52 | CPP_FILES = $(shell find src/ -iname "*.cpp")
53 | CPP_OBJECTS = $(CPP_FILES:%.cpp=%.o)
54 |
55 | # Sources CUDA
56 | CUDA_FILES = $(shell find src/ -iname "*.cu")
57 | CUDA_OBJECTS = $(CUDA_FILES:%.cu=%.o)
58 |
59 | # Link
60 | link: $(CPP_OBJECTS) $(CUDA_OBJECTS)
61 | $(CC) $(CC_FLAGS) \
62 | $(addprefix $(OBJ_DIR)/, $(notdir $(CPP_OBJECTS))) \
63 | $(addprefix $(OBJ_DIR)/, $(notdir $(CUDA_OBJECTS))) \
64 | -o $(EXE_DIR)/$(TARGET_NAME) $(CUDA_INC_DIR) $(CUDA_LIB_DIR) $(CUDA_LINK_LIBS)
65 |
66 | # Compile C++
67 | $(CPP_OBJECTS): | mkdirs
68 | $(CC) $(CC_FLAGS) $(CC_INCLUDE) $(CUDA_INC_DIR) -c $(basename $@).cpp -o $(OBJ_DIR)/$(notdir $@)
69 |
70 | # Compile CUDA
71 | $(CUDA_OBJECTS): | mkdirs
72 | $(NVCC) $(NVCC_FLAGS) $(NVCC_INCLUDE) -c $(basename $@).cu -o $(OBJ_DIR)/$(notdir $@)
73 |
74 | # Prepare
75 | mkdirs:
76 | mkdir -p $(OBJ_DIR)
77 | mkdir -p $(EXE_DIR)
78 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## Intro
2 | This is a CUDA accelerated simple bruteforcer of [KeeLoq](https://en.wikipedia.org/wiki/KeeLoq) algorithm.
3 |
4 | ## Disclaimer
5 |
6 | > 64-bit keeloq key is 18,446,744,073,709,551,615 possible combinations.
7 | EVEN! if your GPU will be able to calculate 1 billion keys in a second.
8 | You will need 18446744073709551615 / 1000000000 / 3600 / 24 / 365 = 584 YEARS! to brute a single key.
9 | > My laptop 3080Ti can do only 230 MKeys/s
10 | So it's practically impossible to use this application "as is" in real life attack.
11 |
12 | ## Version history
13 |
14 | * `0.1.2`
15 | - Fixed `dockerfile`, added `CMD` (issue: https://github.com/X-Stuff/CudaKeeloq/issues/4).
16 | - CUDA version updated to `12.2``
17 | * `0.1.1`
18 | - Added seed bruteforce mode (issue: https://github.com/x-stuff/CudaKeeloq/issues/2).
19 | - Added support of specifying seed in a text dictionaries.
20 | - Fixed some minor bugs and internal refactoring.
21 | - Slightly improved performance (1-5%)
22 | * `0.1.0`
23 | - Initial public release.
24 |
25 | ## Capabilities
26 |
27 | * **Simple** (+1) bruteforce
28 | > Regular straightforward bruteforce where keys just incremented +1
29 | * **Filtered** (+1) bruteforce
30 | > Some basics filters for simple bruteforce, like "keys with more that 6 subsequent zeros". Filters may apply to `Include` and/or `Exclude` rules. e.g. you can exclude keys with all ASCII symbols. You may also use `Include` filter but its performance is incredibly bad, so I don't recommend this attack type at all.
31 | * **Alphabet** attack
32 | > You may specify exact set of bytes which should be used for generating keys during bruteforce. For example you may want to use only numbers, or only ascii symbols.
33 | * **Pattern** attack
34 | > This is like extended alphabet. You may specify individual set of bytes for each byte in 64bit key. For example you may want first byte be any value, but others be only numbers.
35 | * **Dictionary** attack
36 | > Manufacturer keys will be taken from input files. Supports binary and text modes. In text mode it's allowed to specify a seed also.
37 |
38 | ## Limitations
39 |
40 | * Doesn't support mixed bruteforce mode, when you need to bruteforce keys and seeds. You have to specify either `seed` or `key` if you want to brute `secure` or `faac` types.
41 | * Can't do binary dictionary window attack. If you want something similar, you can make 63 more files. Where each file's content will be shifted by 1-to-63 bit to the left.
42 |
43 | ## Build
44 |
45 | ### Windows
46 | #### Requirements
47 |
48 | * CUDA Toolkit v12.2.0
49 | - nvcc
50 | - cuda runtime
51 | - visual studio extension
52 |
53 | * Microsoft Visual Studio 2022
54 |
55 | #### Compiling
56 | Just open `.vcxproj` file and build it
57 |
58 | ### Linux
59 | #### Requirements
60 | * docker
61 | * NVIDIA Container [Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
62 |
63 | #### Compiling
64 | ```
65 | $ ./build.sh
66 | ```
67 | This will create a container `cudakeeloq:local` with compiled app
68 |
69 | Run the bruteforcer
70 | ```
71 | $ ./run.sh
72 | ```
73 | > NOTE: You may need to have CUDA docker extension installed in order to have `--gpus` command line argument works.
74 |
75 | ### Different CUDA Version
76 |
77 | #### Windows
78 | Open `.vcxproj` find and replace:
79 | * `CUDA 12.2.targets` with desired version
80 | * `CUDA 12.2.props` with disired version
81 |
82 | #### Linux
83 | Open `dockerfile` and change `CUDA_MAJOR`, `CUDA_MINOR` and `CUDA_PATCH` variables
84 |
85 | ## Run
86 |
87 | ### Requirements
88 | * NVidia GPU (1GB+ RAM)
89 | * RAM 1GB+
90 | * **(Linux only)** installed CUDA docker extension `nvidia-container-toolkit`
91 |
92 | ### Examples
93 |
94 | #### Simple bruteforce
95 |
96 | ```
97 | ./CudaKeeloq --inputs xx,yy,zz --mode=1 --start=0x9876543210 --count=1000000
98 | ```
99 | - bruteforce of 1 million keys starting from `0x9876543210`
100 |
101 | #### Alphabet bruteforce
102 |
103 | ```
104 | ./CudaKeeloq --inputs xx,yy,zz --mode=3 --learning-type=0 --alphabet=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33
105 | ```
106 | Specified 2 alphabets - 2 attacks will be launched:
107 | - First will use file `examples/alphabet.bin` as alphabet source.
108 | - Second alphabet is provided via command line `10:20:30:AA:BB:CC:DD:EE:FF:02:33`
109 |
110 | #### Pattern bruteforce
111 |
112 | ```
113 | ./CudaKeeloq --inputs xx,yy,zz --mode=4 --pattern=FF:11:*:*:AA-FF:01|10:00:FF,*:*:*:*:AB:CD:EF:00
114 | ```
115 | Specified 2 patterns - 2 attacks will be launched:
116 | - First will check keys started (less significant bytes) from `..00FF`, then will be either `01` or `10` bytes, then bytes range `AA, AB, AC, AD ... FF`, then 2 any bytes [`0x00`:`0xFF`], and final 2 bytes will be `11` and `FF`.
117 | - Second has constant lower 32 bit value `ABCDEF00` and higher 32 bits will be bruted.
118 |
119 | #### Seed bruteforce
120 |
121 | ```
122 | ./CudaKeeloq --inputs xx,yy,zz --mode=5 --start=
123 | ```
124 | This will launch seed attack with specified manufacturer key: ``. If `--learning-type=` not specified - will try to check all learning types with seed - `FAAC` and `Secure` ( `Rev` version dropped intentionally since manufacturer key explicitly set ). You can specify start seed with `--seed=` but it's kind of useless. Check all 32-bit values is matter of minutes, and check will be forced to do all over the `uint32` range anyway.
125 |
126 | #### Dictionary bruteforce
127 |
128 | ```
129 | ./CudaKeeloq --inputs xx,yy,zz --mode=0 --word-dict=0xFDE4531BBACAD12,examples/dictionary.words --bin-dict=examples/dictionary.bin --bin-dict-mode=1
130 | ```
131 | This will launch 2 dictionary attacks:
132 | 1. Explicit key `0xFDE4531BBACAD12` from command line and parsed keys from `examples/dictionary.words`
133 | 2. Keys created from bytes of file `examples/dictionary.bin`. Since `--bin-dict-mode=1` specified, read 8-bytes from file will be reversed. So the bytes `01 00 00 00 ...` will become `uint64` number with value `1`.
134 |
135 | Other arguments for this mode aren't used.
136 |
137 | ## Command line arguments
138 |
139 | ### Inputs
140 |
141 | * `--inputs=[i1, i2, i3]` - inputs are captured "over the air" bytes, you need to provide 1-3 in format of hexadecimal number: `0x1122334455667788`.
142 | > NOTE: It is possible to launch bruteforce over the single captured data, but there will be tons of false matches. You shouldn't use it. Even with 2 inputs chances are pretty high.
143 |
144 | #### Bruteforce range
145 |
146 | * `--start=` - defines the initial value from which bruteforce begins. Applies to all types except dictionary ( default is `0` ).
147 | For alphabet or pattern types, should be specified value which can be converted to pattern or alphabet. e.g.
148 | if you use alphabet `77:88:FF:AA:BB` and specify `--start=0x778899FFAABBAABB` - bruteforce will start from `0x7788`**`77`**`FFAABBAABB` since `99` is not exist in alphabet it will be replace with the first byte in alphabet.
149 | If you specify `0` as start value bruteforce will start from `0x77777..`.
150 |
151 | * `--count=` - number of keys to generate and check ( default is `uint64 max` - means all ).
152 | If you using simple +1 mode it will define the last key to check. In other mode determine the last key might be not trivial task.
153 |
154 | * `--seed=` - seed value. It used only in `SECURE` and `FAAC` learning modes. Providing `seed` without a learning mode will just significantly reduce bruteforce speed.
155 | If you definitely know that captured data encrypted with `seed`'ed algorithm - specify `--learning-type=4,5,8,9` (`SECURE` and `FAAC` both with `_rev` variation), no sense in that case to calculate the others. And vice versa, if you don't know the `seed` - do not specify it, otherwise - useless calculation would be done.
156 | Not supported in `dictionary` mode.
157 |
158 |
159 | #### Modes
160 |
161 | In case of `single` input - the match check will be done only by match 18-bits of `serial`.
162 | Keeloq OTA packet divided into 2 parts `fix` and `hop`.
163 | - `fix` - 4-bit encoded `button` and 28-bit `serial` number of transmitter
164 | - `hop` - encoded `serial`, `button` and `counter`
165 | So in single mode decoded `serial` will be matched to stripped `serial` from `fix` part.
166 | This is not accurate and gives you tons of *phantoms*.
167 |
168 | In case of `normal` inputs (2-3) - the analysis will be slightly more complex.
169 | - All inputs will be decode with same key
170 | - All decoded coded `hop` parts will be compared by `serial`
171 | - if `serial` match - then will be checked `button` - button should be the same
172 | - if `button` match - then will be checked `counter` - is should be increasing per each input
173 |
174 | 3 inputs is enough to eliminate *phantoms* no need to provide more (however there is still a possibility to catch one).
175 |
176 | 2 inputs might give you less accurate results with *phantoms*.
177 |
178 | Obviously `single` mode is 3-4x times faster than `normal` due to optimizations. However results in `single` mode might not be accurate.
179 |
180 | #### Capture
181 |
182 | The idea of normal flow is:
183 | - Setup your radio capture device.
184 | - Click same button 3 times on your transmitter (same serial, same button, increasing counter).
185 | - Convert encoded signal to bytes.
186 | - Provide these bytes as inputs.
187 |
188 | ### CUDA Setup
189 |
190 | * `--cuda-blocks=N` - `N` is a number of thread blocks for each bruteforce attack round
191 | * `--cuda-threads=N` - `N` is a number of CUDA threads for each block (above). If `0` (default) - the maximum from device caps will be set.
192 |
193 | Overall number of threads is multiplication of `cuda-blocks` and `cuda-threads`.
194 |
195 | Keep in mind that the more overall threads you will have - the more memory they will consume. (e.g. `8196` and `1024` consumes approx. 2.5 GB RAM (and GPU RAM))
196 |
197 | ### Attack modes
198 |
199 | Each bruteforce run must define a mode.
200 | Several modes can be specified at the same time like `--mode=0,1,3,4` but you should provide all expected arguments for each mode.
201 |
202 | * `--mode=0` - Dictionary. Expects (one or both):
203 | - `--word-dict=[f1, w1, ...]` - `f1` - text dictionary file(s) with hexadecimal values, `w1` hexadecimal word itself. Supports seed also, should be provided with `:` as delimiter (e.g.: `0xAABBCCDDEE:1234`) and as decimal number only.
204 | - `--bin-dict=[f1, f2]` - binary file(s) each 8 bytes of which will be used as dictionary word.
205 | Supports `--bin-dict-mode=M` where `M` = { `0`, `1`, `2`}. `0` - as is (big endian). `1` - reverse (little endian), `2` - both.
206 | * `--mode=1` - Simple bruteforce mode.
207 | - `--start` defines the first key from which bruteforce begins.
208 | - `--count` how much keys to check.
209 | * `--mode=2` - Filtered bruteforce mode. Same as simple, but with filters:
210 | - `--exclude-filter=V` where `V` is number representing filers flags combinations. (see: [bruteforce_filters.h](src/bruteforce/bruteforce_filters.h))
211 | - `--include-filter=V` (same as above)
212 | * `--mode=3` - Alphabet mode. Expects:
213 | - `--alphabet=[f1, a1, ...]` - where `f1` is a binary file contents of which will be interpreted as allowed bytes in key during bruteforce. where `a1` is `:` separated hex string of bytes in alphabet (like: `AA:BB:01`)
214 | - Also allowed to use `--start` and `--count` arguments, with same meaning
215 | * `--mode=4` - Pattern mode. Expects:
216 | - `--pattern=[f1, p1, ...]` - where `f1` is a text file with hexadecimal pattern, and `p1` is `:` separated hexadecimal pattern like `*:aa:00-33:0xF3|0x11:AL0 ...`.
217 | - `*` - means any byte
218 | - `0xNN-0xMM` - means range from `0xNN` to `0xMM`
219 | - `A|B|C` - means `A` or `B` or `C`
220 | - `AL[0..N]` - means alphabet from inputs (must be specified with `--alphabet=`)
221 | - `--alphabet=` - if pattern has `AL` - alphabet must be specified.
222 | * `--mode=5` - Seed bruteforce mode. Expects:
223 | - `--start=` - Manufacturer key. This will be the only key to check. Instead of bruteforcing keys, this mode bruteforce seeds.
224 |
225 | ### Learning types
226 |
227 | By default the app will try to brute all known learning keys (16 or 12 depending if seed is specified), however if you know exact learning type, you might increase you bruteforce time x12-16 times. You may also specify several learning types simultaneously `--learning-type=0,5,7`.
228 | > NOTE: Depending on your GPU, at some point, using `ALL` learning types *might be* faster than explicitly specified, due to the GPU branching problem.
229 |
230 | Each learning type has its `_REV` modification. That's mean it will use byte-reversed key for decryption. See more [keeloq_learning_types.h](src/algorithm/keeloq/keeloq_learning_types.h)
231 |
232 | Here and below `=x[y]` - `x` value for normal mode, `y` for reversed.
233 |
234 | * `--learning-type=0[1]` - Simple learning
235 | * `--learning-type=2[3]` - Normal learning
236 | * `--learning-type=4[5]` - Secure learning (requires seed)
237 | * `--learning-type=6[7]` - XOR learning
238 | * `--learning-type=8[9]` - FAAC learning (requires seed)
239 | * `--learning-type=10[11]` - *UNKNOWN TYPE1*
240 | * `--learning-type=12[13]` - *UNKNOWN TYPE2*
241 | * `--learning-type=14[15]` - *UNKNOWN TYPE3*
242 |
243 | ### Miscellaneous
244 |
245 | * `--first-match` - if `true` (default) will stop all bruteforce on first match
246 | * `--test` - launches internal debug tests (useful mostly if built in `Debug` configuration)
247 | * `--benchmark` - launches CUDA setup benchmark. Will show comparison of different CUDA setup (block and threads).
248 | * `--help`, `-h` - prints help
249 |
250 |
251 | ## Performance
252 |
253 | > Windows executable, release mode, MSVS 2022, CUDA 12.0.0
254 |
255 | For my laptop's GPU ( 3080Ti ) the best results with `8196` CUDA Blocks and maximum CUDA threads (from device info - `1024`) - it gives me approx.:
256 | * `28` MKeys/s for `ALL` learning types if `seed` **is** specified.
257 | * `49` MKeys/s for `ALL` learning types if `seed` is **not** provided.
258 | * `500` MKeys for `Simple` ( the easiest type single keeloq decryption ).
259 | * `250` MKeys for `Normal` and `Secure`.
260 | * `220` MKeys for `FAAC`.
261 |
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | CONTAINER="${CONTAINER:-cudakeeloq}"
4 | TAG="${TAG:-local}"
5 | DOCKER_ARGS="${DOCKER_ARGS:-}"
6 |
7 | docker run --rm -it --init --gpus=all ${DOCKER_ARGS} $CONTAINER:$TAG $@
8 |
9 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_decryptor.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 | #include "device/cuda_common.h"
8 |
9 |
10 | /**
11 | * Data struct which allows to decrypt encrypted data
12 | * In fact just (in most cases) just 64-bit integer (8 bytes array)
13 | */
14 | struct Decryptor
15 | {
16 | Decryptor() = default;
17 |
18 | __host__ __device__ Decryptor(uint64_t k, uint32_t s) : key(k), key_seed(s), key_rev(misc::rev_bytes(key)) {}
19 |
20 | __host__ __device__ inline bool operator==(const Decryptor& other)
21 | {
22 | return key == other.key && key_seed == other.key_seed;
23 | }
24 |
25 | __host__ __device__ inline bool operator<(const Decryptor& other)
26 | {
27 | return key < other.key;
28 | }
29 |
30 | // Get manufacturer key
31 | __host__ __device__ inline uint64_t man() const { return key; }
32 |
33 | // Get seed
34 | __host__ __device__ inline uint32_t seed() const { return key_seed; }
35 |
36 | // Get byte-reversed manufacturer key
37 | __host__ __device__ inline uint64_t nam() const { return key_rev; }
38 |
39 | // If decryptor was initialized properly
40 | __host__ __device__ inline bool is_valid() const { return key != 0; }
41 |
42 | protected:
43 |
44 | // manufacturer key
45 | uint64_t key;
46 |
47 | // seed (for special learning types only)
48 | uint32_t key_seed;
49 |
50 | // reversed manufacturer key
51 | uint64_t key_rev;
52 |
53 | };
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_encrypted.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include "device/cuda_common.h"
6 |
7 |
8 | /**
9 | * struct for convenience
10 | * Represents sent over the air data
11 | * Since from engineering perspective normal byte (bit) order is big endian
12 | * In order to get fixed and hopping codes OTA has to be bit-reversed
13 | */
14 | struct EncParcel
15 | {
16 | // Raw data transmitted over the air
17 | uint64_t ota;
18 |
19 | __host__ EncParcel() : EncParcel(0) { }
20 |
21 | __device__ __host__ EncParcel(uint64_t data) : ota(data)
22 | {
23 | uint64_t key = misc::rev_bits(ota, sizeof(ota) * 8);
24 |
25 | fixed = (uint32_t)(key >> 32);
26 | hopping = (uint32_t)(key);
27 | }
28 |
29 | // Fixed code in parcel
30 | __device__ __host__ inline uint32_t fix() const { return fixed; }
31 |
32 | // hopping code in parcel
33 | __device__ __host__ inline uint32_t hop() const { return hopping; }
34 |
35 | // first 18 bits of fixed code - serial (can be used in decryption)
36 | __device__ __host__ inline uint32_t srl() const { return fixed & 0x3FF; }
37 |
38 | // last 4 bits of fixed code - button (can be used in decryption)
39 | __device__ __host__ inline uint32_t btn() const { return fixed >> 28; }
40 |
41 | private:
42 |
43 | // Fixed part of the parcel ( 28-bit serial | 4-bit button )
44 | uint32_t fixed;
45 |
46 | // Encrypted hopping code ( keeloq encrypted serial, button, counter )
47 | uint32_t hopping;
48 | };
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "keeloq_kernel.h"
2 |
3 | #include "keeloq_kernel.inl"
4 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include "device/cuda_context.h"
6 | #include "kernels/kernel_result.h"
7 |
8 | #include "algorithm/keeloq/keeloq_kernel_input.h"
9 | #include "algorithm/keeloq/keeloq_learning_types.h"
10 |
11 | #include
12 |
13 | #define NLF_LOOKUP_CONSTANT 0x3a5c742e
14 |
15 | #ifdef NO_INNER_LOOPS
16 | #define KEELOQ_INNER_LOOP(ctx, index, num) uint32_t index = ctx.thread_id;
17 | #else
18 | #define KEELOQ_INNER_LOOP(ctx, index, num) CUDA_FOR_THREAD_ID(ctx, index, num)
19 | #endif // !NO_INNER_LOOPS
20 |
21 |
22 |
23 | #define bit(x, n) (((x) >> (n)) & 1)
24 | #define g5(x, a, b, c, d, e) \
25 | (bit(x, a) + bit(x, b) * 2 + bit(x, c) * 4 + bit(x, d) * 8 + bit(x, e) * 16)
26 |
27 | // 0, 8, 19, 25, 30 == 0x42080101
28 | #define g5dec(x, g) \
29 | auto m = (x & 0x42080101); \
30 | g = (0b11111 & ( m | (m >> 7) | (m >> 17) | ( m >> 22) | (m >> 26)))
31 |
32 |
33 | __device__ __host__ inline uint32_t keeloq_common_decrypt_orig(const uint32_t data, const uint64_t key) {
34 | uint32_t x = data, r;
35 | for (r = 0; r < 528; r++)
36 | x = (x << 1) ^ bit(x, 31) ^ bit(x, 15) ^ (uint32_t)bit(key, (15 - r) & 63) ^
37 | bit(NLF_LOOKUP_CONSTANT, g5(x, 0, 8, 19, 25, 30));
38 | return x;
39 | }
40 |
41 | // This version like 5 times faster
42 | __device__ __host__ inline uint32_t keeloq_common_decrypt(const uint32_t data, const uint64_t key)
43 | {
44 | uint32_t x = data, g, k, f;
45 | int32_t r = 15;
46 |
47 | // outer 33 cycles
48 | for (uint8_t outer = 0; outer < 33; ++outer)
49 | {
50 | // Inner 16 cycles which could be unrolled (improves performance in release - decreases in debug)
51 | UNROLL
52 | for (uint8_t inner = 0; inner < 16; ++inner)
53 | {
54 | uint32_t key_bit = r & 0b111111;
55 |
56 | g5dec(x, g);
57 |
58 | k = (uint32_t)((key >> key_bit));
59 | f = ((x >> 31) ^ (x >> 15) ^ (NLF_LOOKUP_CONSTANT >> g) ^ k) & 1;
60 | x = (x << 1) ^ f;
61 |
62 | --r;
63 | }
64 | }
65 |
66 |
67 | return x;
68 | }
69 |
70 | __device__ __host__ inline uint32_t keeloq_common_encrypt(const uint32_t data, const uint64_t key) {
71 | uint32_t x = data, r;
72 | for (r = 0; r < 528; r++)
73 | x = (x >> 1) ^ ((bit(x, 0) ^ bit(x, 16) ^ (uint32_t)bit(key, r & 63) ^
74 | bit(NLF_LOOKUP_CONSTANT, g5(x, 1, 9, 20, 26, 31)))
75 | << 31);
76 | return x;
77 | }
78 |
79 | namespace keeloq
80 | {
81 | namespace kernels
82 | {
83 |
84 | // launch simple keeloq calculation on GPU to check if everything working
85 | __host__ bool cuda_is_working();
86 |
87 | // Main kernel launcher wrapper
88 | __host__ KernelResult cuda_brute(KeeloqKernelInput & mainInputs, uint16_t ThreadBlocks, uint16_t ThreadsInBlock);
89 |
90 | }
91 |
92 | // Get enrcypted OTA data for specific configuration with key and learning ( xor simple and normal supported )
93 | __host__ EncParcel GetOTA(uint64_t key, uint32_t seed, uint32_t serial, uint8_t button, uint16_t count, KeeloqLearningType::Type learning);
94 |
95 | }
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel_input.cpp:
--------------------------------------------------------------------------------
1 | #include "keeloq_kernel_input.h"
2 | #include "common.h"
3 |
4 | void KeeloqKernelInput::WriteDecryptors(const std::vector& source, size_t from, size_t num)
5 | {
6 | if (decryptors != nullptr)
7 | {
8 | assert(config.type == BruteforceType::Dictionary);
9 |
10 | size_t copy_num = std::max(0, std::min(num, (source.size() - from)));
11 | decryptors->write(&source[from], copy_num);
12 | }
13 | }
14 |
15 |
16 | void KeeloqKernelInput::NextDecryptor()
17 | {
18 | assert(config.type != BruteforceType::Dictionary);
19 | config.next_decryptor();
20 | }
21 |
22 | void KeeloqKernelInput::Initialize(const BruteforceConfig& InConfig, const KeeloqLearningType::Mask& InLearnings)
23 | {
24 | config = InConfig;
25 | learnings = InLearnings;
26 | allLearnings = learnings.is_all_enabled();
27 | }
28 |
29 | void KeeloqKernelInput::BeforeGenerateDecryptors()
30 | {
31 | switch (config.type)
32 | {
33 | case BruteforceType::Filtered:
34 | {
35 | config.filters.sync_key = config.start.man();
36 | break;
37 | }
38 | default:
39 | break;
40 | }
41 | }
42 |
43 | void KeeloqKernelInput::AfterGeneratedDecryptors()
44 | {
45 | // last generated decryptor - is first on next batch
46 | // Warning: In case of non-aligned calculations "real" last decryptor may be somewhere in the middle of array
47 | config.last = decryptors->host_last();
48 | }
49 |
50 | size_t KeeloqKernelInput::NumInputs() const
51 | {
52 | assert(encdata != nullptr && "Encdata unknown yet!");
53 |
54 | auto num = encdata ? encdata->host().num : 0;
55 |
56 | assert(num >= 1 && num <= 3 && "NumInputs(): Most probably something was wrong with memory copying!");
57 |
58 | return num;
59 | }
60 |
61 | bool KeeloqKernelInput::InputsFixMatch() const
62 | {
63 | assert(encdata != nullptr && "Encdata unknown yet!");
64 |
65 | if (encdata)
66 | {
67 | std::vector enc_data;
68 | encdata->copy(enc_data);
69 |
70 | assert(enc_data.size() >= 1 && enc_data.size() <= 3 && "InputsFixMatch(): Most probably something was wrong with memory copying!");
71 |
72 | if (enc_data.size() > 2)
73 | {
74 | return enc_data[0].fix() == enc_data[1].fix() && enc_data[1].fix() == enc_data[2].fix();
75 | }
76 | else if (enc_data.size() > 1)
77 | {
78 | return enc_data[0].fix() == enc_data[1].fix();
79 | }
80 | }
81 |
82 | return false;
83 | }
84 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel_input.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include // memcpy
6 |
7 | #include "device/cuda_array.h"
8 | #include "device/cuda_object.h"
9 |
10 | #include "algorithm/keeloq/keeloq_encrypted.h"
11 | #include "algorithm/keeloq/keeloq_decryptor.h"
12 | #include "algorithm/keeloq/keeloq_single_result.h"
13 | #include "algorithm/keeloq/keeloq_learning_types.h"
14 |
15 | #include "bruteforce/bruteforce_config.h"
16 |
17 |
18 | // Input data for main keeloq calculation kernel
19 | struct KeeloqKernelInput : TGenericGpuObject
20 | {
21 | // Constant per-run input data (captured encoded)
22 | CudaArray* encdata = nullptr;
23 |
24 | // Single-run set of decryptors
25 | CudaArray* decryptors = nullptr;
26 |
27 | // Single-run results
28 | CudaArray* results = nullptr;
29 |
30 | KeeloqKernelInput() : TGenericGpuObject(this)
31 | {
32 | }
33 |
34 | KeeloqKernelInput(KeeloqKernelInput&& other) noexcept : TGenericGpuObject(this)
35 | {
36 | encdata = other.encdata;
37 | decryptors = other.decryptors;
38 | results = other.results;
39 | config = other.config;
40 | learnings = other.learnings;
41 | }
42 |
43 | KeeloqKernelInput& operator=(KeeloqKernelInput&& other) = delete;
44 | KeeloqKernelInput& operator=(const KeeloqKernelInput& other) = delete;
45 |
46 | public:
47 | //
48 | __device__ __inline__ const KeeloqLearningType::Mask& GetLearningMask() const { return learnings; }
49 |
50 | //
51 | __device__ __inline__ bool AllLearningsEnabled() const { return allLearnings; }
52 |
53 | //
54 | __device__ __inline__ const BruteforceConfig& GetConfig() const { return config; }
55 |
56 | public:
57 | void WriteDecryptors(const std::vector& source, size_t from, size_t num);
58 |
59 | void NextDecryptor();
60 |
61 | void Initialize(const BruteforceConfig& inConfig, const KeeloqLearningType::Mask& inLearnings);
62 |
63 | // A "callback" which is called by generator. Used to prepare inputs for generators
64 | void BeforeGenerateDecryptors();
65 |
66 | // A "callback" which is called after generator creates Decryptors. Used to set correct last generated Decryptor
67 | void AfterGeneratedDecryptors();
68 |
69 | // Get Number of OTA inputs. Will do a GPU->CPU copy
70 | size_t NumInputs() const;
71 |
72 | // Does the fixed parts of inputs match? Will do a GPU->CPU copy
73 | bool InputsFixMatch() const;
74 | private:
75 | // Which type of learning use for decryption
76 | KeeloqLearningType::Mask learnings;
77 |
78 | // optimizations. Just a bool field that could be accessed from GPU
79 | bool allLearnings = false;
80 |
81 | // from this decryptor generation will start
82 | BruteforceConfig config;
83 | };
84 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_learning_types.cpp:
--------------------------------------------------------------------------------
1 | #include "keeloq_learning_types.h"
2 |
3 | #include
4 |
5 |
6 | const char* KeeloqLearningType::LearningNames[] = {
7 | "KEELOQ_LEARNING_SIMPLE",
8 | "KEELOQ_LEARNING_SIMPLE_REV",
9 | "KEELOQ_LEARNING_NORMAL",
10 | "KEELOQ_LEARNING_NORMAL_REV",
11 | "KEELOQ_LEARNING_SECURE",
12 | "KEELOQ_LEARNING_SECURE_REV",
13 | "KEELOQ_LEARNING_MAGIC_XOR_TYPE_1",
14 | "KEELOQ_LEARNING_MAGIC_XOR_TYPE_1_REV",
15 | "KEELOQ_LEARNING_FAAC",
16 | "KEELOQ_LEARNING_FAAC_REV",
17 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1",
18 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1_REV",
19 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2",
20 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2_REV",
21 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3",
22 | "KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3_REV",
23 | "ALL"
24 | };
25 |
26 | const size_t KeeloqLearningType::LearningNamesCount = sizeof(LearningNames) / sizeof(char*);
27 |
28 |
29 | std::string KeeloqLearningType::to_string(const std::vector& learning_types)
30 | {
31 | if (learning_types.size() == 0)
32 | {
33 | return LearningNames[KeeloqLearningType::LAST];
34 | }
35 |
36 | return to_mask(learning_types).to_string();
37 | }
38 |
39 | std::string KeeloqLearningType::Mask::to_string() const
40 | {
41 | if (is_all_enabled())
42 | {
43 | return LearningNames[KeeloqLearningType::LAST];
44 | }
45 |
46 | std::string result;
47 | for (auto type = 0; type < KeeloqLearningType::LAST; ++type)
48 | {
49 | if (values[type])
50 | {
51 | if (result.size() > 0)
52 | {
53 | result += ", ";
54 | }
55 | result += KeeloqLearningType::Name(type);
56 | }
57 | }
58 |
59 | return result;
60 | }
61 |
62 | KeeloqLearningType::Mask KeeloqLearningType::to_mask(const std::vector& in_types)
63 | {
64 | KeeloqLearningType::Mask result;
65 |
66 | if (in_types.size() > 0)
67 | {
68 | for (auto type : in_types)
69 | {
70 | result.values[type] = true;
71 | }
72 | }
73 | else
74 | {
75 | memcpy(result.values, KeeloqLearningType::Mask::All, sizeof(KeeloqLearningType::Mask::All));
76 | }
77 |
78 | return result;
79 | }
80 |
81 | bool KeeloqLearningType::Mask::is_all_enabled() const
82 | {
83 | return std::memcmp(values, All, sizeof(All)) == 0;
84 | }
85 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_learning_types.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 |
11 | enum class LearningDectyptionMode
12 | {
13 | // Explicit defined learning types
14 | Invalid = 0,
15 |
16 | Explicit = 1 << 0,
17 |
18 | Force = 1 << 1,
19 |
20 | Normal = 1 << 2,
21 |
22 | Seeded = 1 << 3,
23 |
24 | // Disable Reverse manufacturer key calculations
25 | NoRev = 1 << 4,
26 |
27 | // Run only learning types without seed
28 | ForceNormal = Force | Normal,
29 |
30 | // Run only learning types with seed
31 | ForceSeeded = Force | Seeded,
32 |
33 | // Explicit defined but without seed
34 | ExplicitNormal = Explicit | Normal,
35 |
36 | // Explicit defined but with seed only
37 | ExplicitSeeded = Explicit | Seeded,
38 |
39 | // RUNS ALL LEARNING TYPES. Seeded Included, even if seed is 0
40 | ForceAll = ForceNormal | ForceSeeded,
41 |
42 | // Runs runtime checks if learning type need to be calculated (specified via mask)
43 | ExplicitAll = ExplicitNormal | ExplicitSeeded
44 | };
45 |
46 | /**
47 | * reference: https://github.com/DarkFlippers/unleashed-firmware/blob/dev/lib/subghz/protocols/keeloq_common.h
48 | */
49 | struct KeeloqLearningType
50 | {
51 | using Type = uint8_t;
52 |
53 | enum : Type
54 | {
55 | Simple = 0,
56 | Simple_Rev,
57 |
58 | Normal,
59 | Normal_Rev,
60 |
61 | Secure,
62 | Secure_Rev,
63 |
64 | Xor,
65 | Xor_Rev,
66 |
67 | Faac,
68 | Faac_Rev,
69 |
70 | Serial1,
71 | Serial1_Rev,
72 |
73 | Serial2,
74 | Serial2_Rev,
75 |
76 | Serial3,
77 | Serial3_Rev,
78 |
79 | LAST,
80 |
81 | INVALID = 0xff,
82 | };
83 |
84 | struct Mask
85 | {
86 | friend struct KeeloqLearningType;
87 |
88 | // Default mask when all learning types are enabled
89 | static constexpr Type All[LAST] = { true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true };
90 |
91 | __host__ __device__ __inline__ bool operator[](uint8_t index) const { return values[index]; }
92 |
93 | void set(uint8_t index, bool is_enabled) { values[index] = is_enabled; }
94 |
95 | bool is_all_enabled() const;
96 |
97 | std::string to_string() const;
98 |
99 | private:
100 | uint8_t values[LAST] = { 0 };
101 | };
102 |
103 | public:
104 |
105 | static std::string to_string(const std::vector& learning_types);
106 |
107 | static Mask to_mask(const std::vector& in_types);
108 | static Mask full_mask() { return to_mask({}); }
109 |
110 | static constexpr const char* ValueString(Type type)
111 | {
112 |
113 | constexpr const char* LUT[]{
114 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
115 | "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
116 | "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
117 | "30", "31", "32"
118 | };
119 |
120 | return LUT[type];
121 | }
122 |
123 | static constexpr const char* Name(Type type)
124 | {
125 | if (type >= LearningNamesCount) {
126 | return "INVALID";
127 | }
128 |
129 | return LearningNames[type];
130 | }
131 |
132 | private:
133 |
134 | static const char* LearningNames[];
135 |
136 | static const size_t LearningNamesCount;
137 | };
138 |
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_single_result.cpp:
--------------------------------------------------------------------------------
1 | #include "keeloq_single_result.h"
2 |
3 | #include
4 |
5 | #include "device/cuda_common.h"
6 |
7 |
8 | namespace
9 | {
10 | uint32_t SerialFromOTA(uint64_t ota)
11 | {
12 | return misc::rev_bits(ota, sizeof(ota) * 8) >> 32 & 0x0FFFFFFF;
13 | }
14 | }
15 |
16 | void SingleResult::DecryptedArray::print(uint8_t element, uint64_t ota, bool ismatch) const
17 | {
18 | printf("[%-40s] Btn:0x%X\tSerial:0x%X (0x%" PRIX32 ")\tCounter:0x%X\t%s\n", KeeloqLearningType::Name(element),
19 | (data[element] >> 28), // Button
20 | (data[element] >> 16) & 0x3ff, // Serial
21 | SerialFromOTA(ota), // Serial (OTA)
22 | data[element] & 0xFFFF, // Counter
23 | (ismatch ? "(MATCH)" : ""));
24 | }
25 |
26 | void SingleResult::DecryptedArray::print() const
27 | {
28 | for (uint8_t i = 0; i < ResultsCount; ++i)
29 | {
30 | print(i, -1, false);
31 | }
32 | }
33 |
34 | void SingleResult::print(bool onlymatch /* = true */) const
35 | {
36 | printf("Results (Input: 0x%" PRIX64 " - Man key: 0x%" PRIX64 " - Seed: %u )\n\n",
37 | encrypted.ota, decryptor.man(), decryptor.seed());
38 |
39 | for (uint8_t i = 0; i < ResultsCount; ++i)
40 | {
41 | bool isMatch = match == i;
42 | if (!onlymatch)
43 | {
44 | decrypted.print(i, encrypted.ota, isMatch);
45 | }
46 | else if (isMatch)
47 | {
48 | decrypted.print(i, encrypted.ota, isMatch);
49 | }
50 | }
51 | printf("\n");
52 | }
--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_single_result.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 | #include "algorithm/keeloq/keeloq_learning_types.h"
8 | #include "algorithm/keeloq/keeloq_encrypted.h"
9 | #include "algorithm/keeloq/keeloq_decryptor.h"
10 |
11 |
12 | /**
13 | * For each testing manufacturer key we retrieve this results
14 | * Depending on selected learning type you may have from 1 to 16 (now it the last)
15 | * decrypted results for further analysis
16 | */
17 | struct SingleResult
18 | {
19 | static constexpr uint8_t ResultsCount = KeeloqLearningType::LAST;
20 |
21 | struct DecryptedArray
22 | {
23 | // fixed side array for every learning type
24 | // If is in global memory (common case) - use operator[] - though cache
25 | // If is thread local - use direct access
26 | uint32_t data[ResultsCount];
27 |
28 | __host__ __device__ inline uint32_t operator[](uint32_t index) const
29 | {
30 | assert(index < ResultsCount && "Invalid index of decrypted data. Bigger than last element");
31 | #if __CUDA_ARCH__
32 | return __ldca(&data[index]);
33 | #else
34 | return data[index];
35 | #endif
36 | }
37 |
38 | __host__ __device__ inline uint32_t srl(KeeloqLearningType::Type learning) const
39 | {
40 | return ((*this)[learning] >> 16) & 0x3ff;
41 | }
42 |
43 | __host__ __device__ inline uint32_t btn(KeeloqLearningType::Type learning) const
44 | {
45 | return ((*this)[learning] >> 28);
46 | }
47 |
48 | __host__ __device__ inline uint32_t cnt(KeeloqLearningType::Type learning) const
49 | {
50 | return ((*this)[learning]) & 0x0000FFFF;
51 | }
52 |
53 | void print(uint8_t element, uint64_t ota, bool ismatch) const;
54 |
55 | void print() const;
56 | };
57 |
58 |
59 | // Input encrypted data
60 | EncParcel encrypted;
61 |
62 | // used manufacturer key and seed for this result
63 | Decryptor decryptor;
64 |
65 | // Decrypted values for each known learning type
66 | DecryptedArray decrypted;
67 |
68 | // Set by GPU after analysis if there was a match
69 | KeeloqLearningType::Type match;
70 |
71 | void print(bool onlymatch = true) const;
72 | };
--------------------------------------------------------------------------------
/src/algorithm/multibase_digit.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 |
9 | /**
10 | * This struct represents single digit in multi-base system (each digit has own base)
11 | * Imagine this like cylinder with N elements on it
12 | *
13 | * This struct represents all possible variant for a number (byte_ in multi-base system (attack pattern) setup
14 | */
15 | struct MultibaseDigit
16 | {
17 | template friend struct MultibaseSystem;
18 |
19 | // Creates digit config
20 | __host__ inline MultibaseDigit(const std::vector& numerals);
21 |
22 | public:
23 | // Return numeral by index
24 | __host__ __device__ inline uint8_t numeral(uint8_t in_index) const;
25 |
26 | // Cast from byte-255 value to Digit's config
27 | __host__ __device__ inline uint8_t cast(uint8_t value) const { return numeral(lookup(value)); }
28 |
29 | // return numeral index of the value
30 | // e.g.
31 | // num = { 0x04, 0xAB, 0xd7, 0x56 }
32 | // lookup(0xAB) returns 1
33 | // lookup(0x56) returns 3
34 | // lookup(0xFF) returns 0 (not found returns default)
35 | __host__ __device__ inline uint8_t lookup(uint8_t value) const { return lut[value]; }
36 |
37 | // return count of possible numerals for that digit
38 | __host__ __device__ inline uint8_t count() const { return size; }
39 |
40 | __host__ inline std::vector as_vector() const { return std::vector(&num[0], &num[0] + size); }
41 |
42 | __host__ inline std::string to_string() const;
43 |
44 | private:
45 | MultibaseDigit() : MultibaseDigit(DefaultByteArray<>::as_vector>())
46 | {
47 | }
48 |
49 | // numeral values. it may be not just 0,1,2,3,4...
50 | // but for base 4 it may be: 0xA3, 0xCC, 0x01, 0x22
51 | uint8_t num[256] = { 0 };
52 |
53 | // lookup table:
54 | // at index that equals numeral value there is a value which represents index in numerals
55 | // e.g. https://asciiflow.com/
56 | // ┌───────────────────────┐
57 | // ▲ │
58 | // numerals = [ 0x03, 0x02, 0x01, 0x00, ... garbage. ] │
59 | // ▲ │
60 | // ┌──────────────────┘ │
61 | // ▲ │
62 | // lut = [ 0x03, 0x02, 0x01, 0x00, 0x00 ... 0x00] │
63 | // ▲ │
64 | // └──────────────────────────────────────────┘
65 | //
66 | uint8_t lut[256] = { 0 };
67 |
68 | // Actual size of numerals (the base if number representing by this digit)
69 | uint8_t size = 0;
70 | };
71 |
72 | __host__ __device__ uint8_t MultibaseDigit::numeral(uint8_t in_index) const
73 | {
74 | assert(in_index < size);
75 |
76 | // Important for optimization purposes
77 | // WE ARE NOT USING (value % size)
78 | return num[in_index];
79 | }
80 |
81 | __host__ inline MultibaseDigit::MultibaseDigit(const std::vector& numerals)
82 | {
83 | // incrementing in the loop, for the duplicate numerals cases
84 | size = 0;
85 |
86 | for (uint8_t i = 0; i < numerals.size(); ++i)
87 | {
88 | // Getting next numeral candidate
89 | uint8_t numeral_value = numerals[i];
90 |
91 | // if there is 0 value (index) in the lookup table
92 | // that means `numeral_value` wasn't added to available values yet
93 | if (!lut[numeral_value])
94 | {
95 | // putting index of `numeric_value` to the lut
96 | lut[numeral_value] = size;
97 |
98 | // setting the size-th numeral
99 | num[size] = numeral_value;
100 |
101 | // increasing the size
102 | ++size;
103 | }
104 | }
105 |
106 | assert(size > 0 && "Digit base should be at least 0");
107 | }
108 |
109 | __host__ inline std::string MultibaseDigit::to_string() const
110 | {
111 | std::string hex;
112 |
113 | for (int i = 0; i < count(); ++i)
114 | {
115 | std::string fmt(i == 0 ? "%X" : ":%X");
116 | hex += str::format(fmt, numeral(i));
117 | }
118 |
119 | return hex;
120 | }
121 |
--------------------------------------------------------------------------------
/src/algorithm/multibase_number.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 |
9 | //
10 | union U64Number
11 | {
12 | uint64_t u64;
13 |
14 | uint8_t u8[8];
15 | };
16 |
17 |
18 | //
19 | struct MultibaseNumber
20 | {
21 | template friend struct MultibaseSystem;
22 |
23 | //
24 | __host__ __device__ uint64_t number() const { return value.u64; }
25 |
26 | private:
27 |
28 | U64Number value = {0};
29 |
30 | U64Number indices = {0};
31 | };
32 |
--------------------------------------------------------------------------------
/src/algorithm/multibase_system.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 | #include "algorithm/multibase_digit.h"
9 | #include "algorithm/multibase_number.h"
10 |
11 |
12 |
13 | /**
14 | * This represents a number system where each digit has it's one base
15 | * Imagine this as set of rolling cylinders, installed side-by-side.
16 | * This is stateless structure. It allows to do arithmetical calculations
17 | * and conversions.
18 | *
19 | * Since application of this struct is only byte bruteforce - the maximum supported base is 255
20 | *
21 | * e.g. NumDigits == 4
22 | * So there are 4 cylinders
23 | * For example 1st cylinder has base 2, second - 4, third - 6, fourth - 8
24 | * So the number 1 will be equal 0001
25 | * number 10 is equals 0012 (1 * 8) + 2 = | 0 | 0 | 1 | 2 | )
26 | * number 100 is 0204 (12 * 8) + 4 = | 0 | 2 | 0 | 4 | )
27 | * number 500 is 2224 (62 * 8) + 4 = | 2 | 2 | 2 | 4 | ) = ((10 * 6 + 2) * 8) + 4 = ((((2 * 4 + 2)) * 6 + 2) * 8) + 4
28 | *
29 | * Since this structure is pretty heavy
30 | * The idea is NOT allow non-const methods on device
31 | * You should have a single const reference on device.
32 | */
33 | template
34 | struct MultibaseSystem
35 | {
36 | static_assert(NumDigits <= 8, "At the moment we only support 8 bytes numbers");
37 |
38 | // For easier usage with aliased types
39 | constexpr static uint8_t DigitsNumber = NumDigits;
40 |
41 | /**
42 | * A generic case when all digits has different bases (pattern usage)
43 | */
44 | __host__ MultibaseSystem(const std::vector>& numerals);
45 |
46 | /**
47 | * Use the same ByteDigit for every digit in this value (alphabet usage)
48 | */
49 | __host__ MultibaseSystem(const MultibaseDigit& digit);
50 |
51 | /**
52 | * Default constructor where all Digits are Default - full range 0-255
53 | */
54 | __host__ MultibaseSystem() : MultibaseSystem(MultibaseDigit()) { }
55 |
56 | // It's pretty heavy struct if you want clone it - constructor above
57 | // TODO: disable copy
58 | // MultibaseSystem(const MultibaseSystem& other) = delete;
59 | // MultibaseSystem& operator=(const MultibaseSystem& other) = delete;
60 |
61 | public:
62 |
63 | // count of all numbers in this system
64 | __host__ __device__ inline size_t invariants() const;
65 |
66 | // cast base10 number into number of this system
67 | __host__ __device__ inline MultibaseNumber cast(uint64_t input) const;
68 |
69 | // Adds @amount in base10 to the @target argument and returns it
70 | __host__ __device__ inline MultibaseNumber& increment(MultibaseNumber& target, uint64_t amount) const;
71 |
72 | // get digit config by its index
73 | __host__ __device__ inline const MultibaseDigit& get_config(uint8_t digit_index) const { assert(digit_index < NumDigits); return digits[digit_index]; }
74 |
75 | protected:
76 |
77 | // the digits
78 | MultibaseDigit digits[NumDigits];
79 | };
80 |
81 | //
82 | using Multibase8DigitsSystem = MultibaseSystem<8>;
83 |
84 |
85 | template
86 | __host__ __device__ size_t MultibaseSystem::invariants() const
87 | {
88 | size_t num = digits[0].size;
89 |
90 | UNROLL
91 | for (uint8_t i = 1; i < NumDigits; ++i)
92 | {
93 | num *= digits[i].size;
94 | }
95 |
96 | return num;
97 | }
98 |
99 |
100 | template
101 | __host__ __device__ MultibaseNumber MultibaseSystem::cast(uint64_t input) const
102 | {
103 | MultibaseNumber number;
104 | U64Number u64Input = { input };
105 |
106 | UNROLL
107 | for (uint8_t i = 0; i < NumDigits; ++i)
108 | {
109 | number.indices.u8[i] = digits[i].lookup(u64Input.u8[i]);
110 |
111 | number.value.u8[i] = digits[i].numeral(number.indices.u8[i]);
112 | }
113 |
114 | return number;
115 | }
116 |
117 | template
118 | __host__ __device__ inline MultibaseNumber& MultibaseSystem::increment(MultibaseNumber& target, uint64_t amount) const
119 | {
120 | UNROLL
121 | for (uint8_t i = 0; i < NumDigits; ++i)
122 | {
123 | uint8_t index = target.indices.u8[i];
124 | uint8_t size = digits[i].size;
125 |
126 | target.indices.u8[i] = static_cast((amount + index) % size);
127 | amount = (amount + index) / size;
128 |
129 | target.value.u8[i] = digits[i].numeral(target.indices.u8[i]);
130 | }
131 |
132 | // here base10value will contain overflow
133 | // not sure what to do with it
134 |
135 | return target;
136 | }
137 |
138 |
139 | template
140 | __host__ MultibaseSystem::MultibaseSystem(const std::vector>& numerals)
141 | {
142 | for (uint8_t i = 0; i < NumDigits; ++i)
143 | {
144 | if (i < numerals.size())
145 | {
146 | digits[i] = MultibaseDigit(numerals[i]);
147 | }
148 | else
149 | {
150 | digits[i] = MultibaseDigit();
151 | }
152 | }
153 | }
154 |
155 | template
156 | __host__ MultibaseSystem::MultibaseSystem(const MultibaseDigit& digit)
157 | {
158 | for (uint8_t i = 0; i < NumDigits; ++i)
159 | {
160 | digits[i] = digit;
161 | }
162 | }
163 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_config.cpp:
--------------------------------------------------------------------------------
1 | #include "bruteforce_config.h"
2 | #include "bruteforce_type.h"
3 | #include "bruteforce_filters.h"
4 |
5 | #include "algorithm/keeloq/keeloq_decryptor.h"
6 |
7 |
8 | BruteforceConfig BruteforceConfig::GetDictionary(std::vector&& dictionary)
9 | {
10 | BruteforceConfig result(Decryptor(0,0), BruteforceType::Dictionary, dictionary.size());
11 | result.decryptors = std::move(dictionary);
12 | return result;
13 | };
14 |
15 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size)
16 | {
17 | return BruteforceConfig(first, BruteforceType::Simple, size);
18 | }
19 |
20 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters)
21 | {
22 | BruteforceConfig result(first, BruteforceType::Filtered, size);
23 | result.filters = filters;
24 | return result;
25 | }
26 |
27 | BruteforceConfig BruteforceConfig::GetSeedBruteforce(Decryptor first)
28 | {
29 | return BruteforceConfig(first, BruteforceType::Seed, (uint32_t)-1);
30 | }
31 |
32 | BruteforceConfig BruteforceConfig::GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num)
33 | {
34 | auto result = GetPattern(first, BruteforcePattern(alphabet), num);
35 | result.type = BruteforceType::Alphabet;
36 | return result;
37 | }
38 |
39 | BruteforceConfig BruteforceConfig::GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num)
40 | {
41 | num = std::min(pattern.size() - 1, num);
42 |
43 | first = Decryptor(pattern.init(first.man()).number(), first.seed());
44 |
45 | BruteforceConfig result(first, BruteforceType::Pattern, num);
46 | result.pattern = pattern;
47 | return result;
48 | }
49 |
50 | uint64_t BruteforceConfig::dict_size() const
51 | {
52 | if (type == BruteforceType::Dictionary)
53 | {
54 | return size;
55 | }
56 | return 0;
57 | }
58 |
59 | uint64_t BruteforceConfig::brute_size() const
60 | {
61 | if (type != BruteforceType::Dictionary)
62 | {
63 | return size;
64 | }
65 | return 0;
66 | }
67 |
68 | void BruteforceConfig::next_decryptor()
69 | {
70 | if (type != BruteforceType::Dictionary)
71 | {
72 | start = last;
73 |
74 | if (type == BruteforceType::Alphabet || type == BruteforceType::Pattern)
75 | {
76 | // +1 for these attacks cause next here is the last *checked*
77 | auto startnum = pattern.init(start.man());
78 | start = Decryptor(pattern.next(startnum, 1).number(), start.seed());
79 | }
80 | else if (type == BruteforceType::Simple || type == BruteforceType::Filtered)
81 | {
82 | start = Decryptor(start.man() + 1, start.seed());
83 | }
84 | }
85 | }
86 |
87 | std::string BruteforceConfig::toString() const
88 | {
89 | const char* pGeneratorName = BruteforceType::Name(type);
90 | switch (type)
91 | {
92 | case BruteforceType::Simple:
93 | {
94 | return str::format("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX",
95 | pGeneratorName, start.man(), start.seed(), start.man() + brute_size());
96 | }
97 | case BruteforceType::Filtered:
98 | {
99 | return str::format("Type: %s. Initial: 0x%llX (seed:%u). Brute count: %zd.\n\tFilters: %s",
100 | pGeneratorName, start.man(), start.seed(), brute_size(), filters.toString().c_str());
101 | }
102 | case BruteforceType::Alphabet:
103 | case BruteforceType::Pattern:
104 | {
105 | MultibaseNumber begin = pattern.init(start.man());
106 | MultibaseNumber end = pattern.next(begin, brute_size());
107 |
108 | auto result = str::format("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX. (Count: %zd) All invariants: %zd",
109 | pGeneratorName, begin.number(), start.seed(), end.number(), brute_size(), pattern.size());
110 |
111 | if (type == BruteforceType::Alphabet)
112 | {
113 | result += str::format("\n\tAlphabet: %s", pattern.bytes_variants(0).to_string().c_str());
114 | }
115 | else
116 | {
117 | std::string pattern_string = pattern.to_string(true);
118 | result += str::format("\nPattern: %s", pattern_string.c_str());
119 | }
120 | return result;
121 | }
122 | case BruteforceType::Dictionary:
123 | {
124 | return str::format("Type: %s. Words num: %zd", pGeneratorName, dict_size());
125 | }
126 | case BruteforceType::Seed:
127 | {
128 | return str::format("Type: %s. Manufacturer key: 0x%llX Start Seed:%u",
129 | pGeneratorName, start.man(), start.seed());
130 | }
131 | }
132 | return str::format("UNSUPPORTED Type (%d): %s", (int)type, pGeneratorName);
133 | }
134 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_config.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 | #include
9 |
10 | #include "algorithm/keeloq/keeloq_decryptor.h"
11 |
12 | #include "bruteforce/bruteforce_pattern.h"
13 | #include "bruteforce/bruteforce_filters.h"
14 | #include "bruteforce/bruteforce_type.h"
15 | #include "bruteforce/bruteforce_config.h"
16 |
17 |
18 | /**
19 | * Single run attack configuration
20 | * Run - selected type with specific parameters
21 | */
22 | struct BruteforceConfig
23 | {
24 | // HOST SET. ONCE. Which generator to use.
25 | BruteforceType::Type type;
26 |
27 | // HOST SET. UPDATING. PER BATCH. Decryption batch (or decryptors generation) will start from this
28 | Decryptor start;
29 |
30 | // HOST SET. ONCE. How many generator rounds should be taken (in fact how many times CUDA kernel will be called)
31 | size_t size;
32 |
33 | // Dictionary - HOST SET. ONCE.
34 | // Brute - GPU SET. UPDATING.
35 | std::vector decryptors;
36 |
37 | // HOST SET. ONCE. for filtered type.
38 | BruteforceFilters filters;
39 |
40 | // HOST SET. ONCE. for pattern or alphabet type. (alphabet is just special case of pattern)
41 | BruteforcePattern pattern;
42 |
43 | // GPU SET. UPDATING. Last generated decryptor (will be initial for next block run)
44 | Decryptor last;
45 |
46 | public:
47 |
48 | BruteforceConfig() : BruteforceConfig(Decryptor(0, 0), BruteforceType::LAST, 0)
49 | {
50 | }
51 |
52 | public:
53 |
54 | static BruteforceConfig GetDictionary(std::vector&& dictionary);
55 |
56 | static BruteforceConfig GetBruteforce(Decryptor first, size_t size);
57 |
58 | static BruteforceConfig GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters);
59 |
60 | static BruteforceConfig GetSeedBruteforce(Decryptor first);
61 |
62 | static BruteforceConfig GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num = (size_t)-1);
63 |
64 | static BruteforceConfig GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num = (size_t)-1);
65 |
66 | public:
67 |
68 | uint64_t dict_size() const;
69 |
70 | uint64_t brute_size() const;
71 |
72 | std::string toString() const;
73 |
74 | void next_decryptor();
75 |
76 | private:
77 | BruteforceConfig(Decryptor start, BruteforceType::Type t, size_t num) :
78 | type(t), start(start), size(num), decryptors(), filters(), pattern(), last(start)
79 | {
80 | }
81 | };
82 |
83 | inline std::vector operator "" _b(const char* ascii, size_t num)
84 | {
85 | return std::vector(ascii, ascii + num);
86 | }
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_filters.cpp:
--------------------------------------------------------------------------------
1 | #include "bruteforce_filters.h"
2 | #include "common.h"
3 |
4 | #include
5 | #include
6 |
7 |
8 | const std::vector> BruteforceFilters::FilterNames =
9 | {
10 | { BruteforceFilters::Flags::None, "None" },
11 | { BruteforceFilters::Flags::All, "All" },
12 |
13 | { BruteforceFilters::Flags::Max6ZerosInARow, "6 zero bit in a row" },
14 | { BruteforceFilters::Flags::Max6OnesInARow, "6 one bit in a row" },
15 |
16 | { BruteforceFilters::Flags::BytesIncremental, "Incremental bytes pattern" },
17 | { BruteforceFilters::Flags::BytesRepeat4, "4 same byte in a row" },
18 |
19 | { BruteforceFilters::Flags::AsciiNumbers, "ASCII numbers" },
20 | { BruteforceFilters::Flags::AsciiAlpha, "ASCII letters" },
21 | { BruteforceFilters::Flags::AsciiSpecial, "ASCII special characters" },
22 | };
23 |
24 |
25 | std::string BruteforceFilters::toString(Flags::Type flags) const
26 | {
27 | if (flags == Flags::None) { return "None"; }
28 | if (flags == Flags::All) { return "All"; }
29 |
30 | std::string result;
31 |
32 | for (const auto& pair : BruteforceFilters::FilterNames)
33 | {
34 | auto check = (uint64_t)std::get<0>(pair);
35 | if (check != 0 && (check & (uint64_t)flags) == check)
36 | {
37 | result += std::get<1>(pair);
38 | result += " | ";
39 | }
40 | }
41 | if (result.size() > 0)
42 | {
43 | result.erase(result.end() - 3, result.end());
44 | }
45 | return result;
46 | }
47 |
48 | std::string BruteforceFilters::toString() const
49 | {
50 | std::string include_str = toString(include);
51 | std::string exclude_str = toString(exclude);
52 |
53 | return "Include: '" + include_str + "'\tExclude: '" + exclude_str + "'";
54 | }
55 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_filters.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | #include
10 |
11 |
12 | constexpr uint8_t KeySizeBytes = sizeof(uint64_t);
13 |
14 | constexpr uint8_t KeySizeBits = sizeof(uint64_t) * 8;
15 |
16 | /**
17 | * Filters for +1 bruteforce.
18 | * Will apply if `include(value) && !exclude(value)`
19 | *
20 | * Very little performance increase over simple +1 with exclude
21 | * The filters are not optimized quite good and have a lot of `if` blocks
22 | */
23 | struct BruteforceFilters
24 | {
25 | struct Flags
26 | {
27 | using Type = uint64_t;
28 |
29 | enum : Type
30 | {
31 | //
32 | None = 0,
33 |
34 | // filter function return true if key has more than 6 consecutive 0 bits
35 | Max6ZerosInARow = (1 << 0),
36 |
37 | // filter function return true if key has more than 6 consecutive 1 bits
38 | Max6OnesInARow = (1 << 1),
39 |
40 |
41 | // filter function return true if key has patterns like 11:22:33:44.. or FF:EE:DD:CC
42 | // 6 bytes by default
43 | BytesIncremental = (1 << 5),
44 |
45 | // filter function return true if key has repeating patterns like xx11:11:11:11xx or xxAA:AA:AA:AAxx
46 | BytesRepeat4 = (1 << 6),
47 |
48 | // filter function return true if key consist from only ascii numbers
49 | AsciiNumbers = (1 << 11),
50 |
51 | // filter function return true if key consist from only letters 'a'-'z' 'A'-'Z'
52 | AsciiAlpha = (1 << 12),
53 |
54 | // filter function return true if key consist from ascii letters and numbers
55 | AsciiAlphaNum = AsciiAlpha | AsciiNumbers,
56 |
57 | // filter function return true if key consist from only ASCII special symbols like '^%#&*
58 | AsciiSpecial = (1 << 13),
59 |
60 | // filter function return true if key consist from only ASCII typed characters
61 | AsciiAny = AsciiAlphaNum | AsciiSpecial,
62 |
63 | //
64 | All = (uint64_t)-1,
65 | };
66 |
67 | __host__ __device__ static inline bool HasAll(Type test, Type check)
68 | {
69 | return check == (test & check);
70 | }
71 | __host__ __device__ static inline bool HasAny(Type test, Type check)
72 | {
73 | return (test & check);
74 | }
75 | };
76 |
77 | public:
78 |
79 | std::string toString(Flags::Type flags) const;
80 |
81 | std::string toString() const;
82 |
83 | // Return true if key pass current filters
84 | __host__ __device__ inline bool Pass(uint64_t key) const;
85 |
86 | __host__ __device__ inline static bool check_filters(uint64_t key, Flags::Type filter);
87 |
88 | private:
89 |
90 | __host__ __device__ static bool all_any_ascii(uint64_t key);
91 |
92 | __host__ __device__ static bool all_ascii_num(uint64_t key);
93 |
94 | __host__ __device__ static bool all_ascii_alpha(uint64_t key);
95 |
96 | __host__ __device__ static bool all_ascii_symbol(uint64_t key);
97 |
98 | template
99 | __host__ __device__ inline static bool all_min_max(uint64_t key);
100 |
101 | template
102 | __host__ __device__ inline static bool has_consecutive_bits(uint64_t key);
103 |
104 | template
105 | __host__ __device__ inline static bool has_consecutive_bytes(uint64_t key);
106 |
107 | template
108 | __host__ __device__ inline static bool has_incremental_pattern(uint64_t key);
109 |
110 |
111 | private:
112 |
113 | static const std::vector> FilterNames;
114 |
115 | public:
116 | // Filter for keys to include.
117 | // WARNING:
118 | // Could be executed INFINITELY LONG TIME
119 | // e.g. start: 0x00000000001 filter SmartFilterFlags::AsciiAny
120 | // it will took around trillions and trillions operations just to get to the first valid with simple +1
121 | // In case of specific input - use dictionary, pattern or alphabet
122 | Flags::Type include = Flags::All;
123 |
124 | // Filter for keys to exclude
125 | Flags::Type exclude = Flags::None;
126 |
127 | // A global for all CUDA threads uint64 value which is used for generating filtered keys
128 | // it is accessed via atomicAdd function.
129 | uint64_t sync_key = 0;
130 | };
131 |
132 |
133 | template
134 | __host__ __device__ bool BruteforceFilters::all_min_max(uint64_t key)
135 | {
136 | // for logical AND start should be with true
137 | bool result = true;
138 | uint8_t* bPtrKey = (uint8_t*)&key;
139 |
140 | UNROLL
141 | for (uint8_t i = 0; i < KeySizeBytes; ++i)
142 | {
143 | // TODO: Some vector instruction here
144 | result &= bPtrKey[i] >= ValueMin && bPtrKey[i] <= ValueMax;
145 | }
146 |
147 | return result;
148 | }
149 |
150 | template
151 | __host__ __device__ bool BruteforceFilters::has_consecutive_bits(uint64_t key)
152 | {
153 | uint8_t result = false;
154 | uint64_t mask = (1 << MaxCount) - 1;
155 |
156 | key = bit ? key : ~key;
157 |
158 | UNROLL
159 | for (uint8_t i = 0; i < KeySizeBits; ++i)
160 | {
161 | // inverse - filter pass if no consecutive bits
162 | result |= (key & mask) == mask;
163 | key = key >> 1;
164 | }
165 |
166 | return result;
167 | }
168 |
169 | template
170 | __host__ __device__ bool BruteforceFilters::has_consecutive_bytes(uint64_t key)
171 | {
172 | // for logical OR start should be with false
173 | bool result = false;
174 |
175 | uint8_t index = 0;
176 | uint8_t* bPtrKey = (uint8_t*)&key;
177 |
178 | UNROLL
179 | for (uint8_t i = 1; i < KeySizeBytes; ++i)
180 | {
181 | bool equal = bPtrKey[i] == bPtrKey[index];
182 | index = equal * index + (1 - equal) * i;
183 |
184 | result |= (i - index) >= (MaxCount - 1);
185 | }
186 |
187 | return result;
188 | }
189 |
190 | template
191 | __host__ __device__ bool BruteforceFilters::has_incremental_pattern(uint64_t key)
192 | {
193 | // for logical OR start should be with false
194 | bool result = false;
195 |
196 | uint8_t index = 0;
197 | uint8_t* bPtrKey = (uint8_t*)&key;
198 |
199 | UNROLL
200 | for (uint8_t i = 1; i < KeySizeBytes; ++i)
201 | {
202 | uint8_t deltaIndex = (i - index);
203 |
204 | #ifdef __CUDA_ARCH__
205 | uint8_t asbDeltaValue = __sad(bPtrKey[i], bPtrKey[index], 0);
206 | #else
207 | uint8_t asbDeltaValue = abs(bPtrKey[i] - bPtrKey[index]);
208 | #endif
209 |
210 | bool match = asbDeltaValue == (0x11 * deltaIndex);
211 |
212 | index = match * index + (1 - match) * i;
213 |
214 | result |= deltaIndex >= (MaxCount - 1);
215 | }
216 |
217 | return result;
218 | }
219 |
220 |
221 | __host__ __device__ inline bool BruteforceFilters::check_filters(uint64_t key, Flags::Type filter)
222 | {
223 | bool key_has_any = false;
224 |
225 | // fastest should go first
226 | if (!key_has_any && Flags::HasAll(filter, Flags::AsciiAny))
227 | {
228 | key_has_any |= all_any_ascii(key);
229 | }
230 |
231 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiNumbers))
232 | {
233 | key_has_any |= all_ascii_num(key);
234 | }
235 |
236 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiAlpha))
237 | {
238 | key_has_any |= all_ascii_alpha(key);
239 | }
240 |
241 | if (!key_has_any && Flags::HasAny(filter, Flags::AsciiSpecial))
242 | {
243 | key_has_any |= all_ascii_symbol(key);
244 | }
245 |
246 | //
247 | if (!key_has_any && Flags::HasAny(filter, Flags::Max6OnesInARow))
248 | {
249 | key_has_any |= has_consecutive_bits<1>(key);
250 | }
251 |
252 | if (!key_has_any && Flags::HasAny(filter, Flags::Max6ZerosInARow))
253 | {
254 | key_has_any |= has_consecutive_bits<0>(key);
255 | }
256 |
257 | if (!key_has_any && Flags::HasAny(filter, Flags::BytesRepeat4))
258 | {
259 | key_has_any |= has_consecutive_bytes(key);
260 | }
261 |
262 | if (!key_has_any && Flags::HasAny(filter, BruteforceFilters::Flags::BytesIncremental))
263 | {
264 | key_has_any |= has_incremental_pattern(key);
265 | }
266 |
267 | return key_has_any;
268 | }
269 |
270 | __host__ __device__ inline bool BruteforceFilters::Pass(uint64_t key) const
271 | {
272 | bool pass = true;
273 |
274 | if (include != Flags::All && include != Flags::None)
275 | {
276 | // Include keys match patterns
277 | pass &= check_filters(key, include);
278 | }
279 |
280 | if (exclude != Flags::None && exclude != Flags::All)
281 | {
282 | // Exclude keys which match patterns
283 | pass &= !check_filters(key, exclude);
284 | }
285 |
286 | return pass;
287 | }
288 |
289 |
290 | __host__ __device__ inline bool BruteforceFilters::all_any_ascii(uint64_t key)
291 | {
292 | constexpr uint8_t value_min = '!';
293 | constexpr uint8_t value_max = '~';
294 |
295 | return all_min_max(key);
296 | }
297 |
298 | __host__ __device__ inline bool BruteforceFilters::all_ascii_num(uint64_t key)
299 | {
300 | constexpr uint8_t value_min = '0';
301 | constexpr uint8_t value_max = '9';
302 |
303 | return all_min_max(key);
304 | }
305 |
306 | __host__ __device__ inline bool BruteforceFilters::all_ascii_alpha(uint64_t key)
307 | {
308 | // for logical AND start should be with true
309 | bool result = true;
310 | uint8_t* bPtrKey = (uint8_t*)&key;
311 |
312 | UNROLL
313 | for (uint8_t i = 0; i < KeySizeBytes; ++i)
314 | {
315 | result &= (bPtrKey[i] >= 'a' && bPtrKey[i] <= 'z') || (bPtrKey[i] >= 'A' && bPtrKey[i] <= 'Z');
316 | }
317 |
318 | return result;
319 | }
320 |
321 | __host__ __device__ inline bool BruteforceFilters::all_ascii_symbol(uint64_t key)
322 | {
323 | // for logical AND start should be with true
324 | bool result = true;
325 | uint8_t* bPtrKey = (uint8_t*)&key;
326 |
327 | UNROLL
328 | for (uint8_t i = 0; i < KeySizeBytes; ++i)
329 | {
330 | result &=
331 | (bPtrKey[i] >= '!' && bPtrKey[i] <= '/') ||
332 | (bPtrKey[i] >= ':' && bPtrKey[i] <= '@') ||
333 | (bPtrKey[i] >= '[' && bPtrKey[i] <= '`') ||
334 | (bPtrKey[i] >= '{' && bPtrKey[i] <= '~');
335 | }
336 |
337 | return result;
338 | }
339 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_pattern.cpp:
--------------------------------------------------------------------------------
1 | #include "bruteforce_pattern.h"
2 |
3 | #include
4 |
5 |
6 | namespace
7 | {
8 | std::vector split(const std::string& delim, std::string input)
9 | {
10 | std::vector result;
11 |
12 | auto delim_index = input.find(delim);
13 | while (delim_index != std::string::npos)
14 | {
15 | std::string part = input.substr(0, delim_index);
16 |
17 | result.push_back(part);
18 |
19 | input.erase(0, delim_index + delim.size());
20 |
21 | delim_index = input.find(delim);
22 | }
23 |
24 | result.push_back(input);
25 | return result;
26 | }
27 | }
28 |
29 |
30 | BruteforcePattern::BruteforcePattern(std::vector>&& pattern_bytes, const std::string& pattern_string)
31 | : system(pattern_bytes), repr_string(pattern_string)
32 | {
33 | }
34 |
35 |
36 | BruteforcePattern::BruteforcePattern(const MultibaseDigit& same_bytes)
37 | : system(same_bytes), repr_string("N/A")
38 | {
39 | }
40 |
41 | __host__ std::string BruteforcePattern::to_string(bool extended) const
42 | {
43 | if (!extended)
44 | {
45 | return repr_string;
46 | }
47 |
48 | std::string result;
49 |
50 | for (int d = 0; d < PatternSystem::DigitsNumber; ++d)
51 | {
52 | const auto& digitConfig = system.get_config(d);
53 |
54 | result += str::format("\t 0%d: (", d);
55 |
56 | if (digitConfig.count() < 255)
57 | {
58 | for (int i = 0; i < digitConfig.count(); ++i)
59 | {
60 | result += str::format(i == 0 ? "%X" : ":%X", digitConfig.numeral(i));
61 | }
62 | }
63 | else
64 | {
65 | result += " ";
66 | }
67 |
68 | result += ")\n";
69 | }
70 |
71 | return "\"" + repr_string + "\": \n" + result;
72 | }
73 |
74 | bool BruteforcePattern::TryParseSingleByte(std::string text, uint8_t& out)
75 | {
76 | if (text.size() == 2 || (text.size() == 4 && text.rfind("0x", 0) == 0))
77 | {
78 | auto value = (uint8_t)strtoul(text.c_str(), nullptr, 16);
79 |
80 | if (value != 0 || (text.rfind("00") != std::string::npos))
81 | {
82 | out = value;
83 | return true;
84 | }
85 | }
86 |
87 | return false;
88 | }
89 |
90 | std::vector BruteforcePattern::TryParseRangeBytes(std::string text)
91 | {
92 | auto delimeter_index = text.find("-");
93 | std::string from = text.substr(0, delimeter_index);
94 | std::string to = text.substr(delimeter_index + 1);
95 |
96 | uint8_t byte_from;
97 | uint8_t byte_to;
98 |
99 | if (!TryParseSingleByte(from, byte_from) || !TryParseSingleByte(to, byte_to))
100 | {
101 | return { };
102 | }
103 |
104 | if (byte_from > byte_to)
105 | {
106 | byte_from = std::exchange(byte_to, byte_from);
107 | }
108 |
109 | std::vector result(byte_to - byte_from + 1);
110 | for (uint8_t i = 0; i < result.size(); ++i)
111 | {
112 | result[i] = i + byte_from;
113 | }
114 |
115 | return result;
116 | }
117 |
118 | std::vector BruteforcePattern::ParseBytes(std::string text)
119 | {
120 | // easy - all
121 | if (text == "*")
122 | {
123 | return DefaultByteArray<>::as_vector>();
124 | }
125 |
126 | // easy single byte 0xAA or AA
127 | uint8_t single_byte;
128 | if (TryParseSingleByte(text, single_byte))
129 | {
130 | return { single_byte };
131 | }
132 |
133 | // range bytes
134 | auto delimeter_index = text.find("-");
135 | if (delimeter_index != std::string::npos)
136 | {
137 | return TryParseRangeBytes(text);
138 | }
139 |
140 | // set of bytes
141 | std::vector result;
142 | std::vector splitted = split("|", text);
143 | for (const auto& part : splitted)
144 | {
145 | uint8_t byte;
146 | if (TryParseSingleByte(part, byte))
147 | {
148 | result.push_back(byte);
149 | }
150 | }
151 |
152 | return result;
153 | }
154 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_pattern.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 | #include "algorithm/multibase_system.h"
9 | #include "algorithm/multibase_number.h"
10 |
11 |
12 | using PatternValue = MultibaseNumber;
13 | using PatternSystem = MultibaseSystem;
14 |
15 | /**
16 | * In pattern mode "cylinders" (see multi-base system) are different sizes
17 | * Alphabet - 8 cylinders with same set of bytes
18 | * Pattern - 8 cylinders with different set of bytes
19 | */
20 | struct BruteforcePattern
21 | {
22 | // How many bytes in this pattern - this basically represent bye-length of the bruteforce target
23 | // if bruteforce target is 64-bit key - this number should be 8
24 | static constexpr uint8_t BytesNumber = PatternSystem::DigitsNumber;
25 |
26 | BruteforcePattern() = default;
27 |
28 | // Create pattern from bytes.
29 | // LITTLE ENDIAN. pattern_bytes[0] is lowest byte in pattern. e.g. ( [ { 0x01 }, ... ] will ends as key 0x.....01 )
30 | // Pattern string is just for reference
31 | BruteforcePattern(std::vector>&& pattern_bytes, const std::string& pattern_string = "N/A");
32 |
33 | // Special case constructor when patter is the same for every digit in underlying system
34 | // However this is not how this class supposed to be constructed by public
35 | // user-code should use @BruteforceAlphabet instead in that case
36 | BruteforcePattern(const MultibaseDigit& same_bytes);
37 |
38 | public:
39 |
40 | // Initialize value for this pattern from 64-bit number
41 | __host__ __device__ inline PatternValue init(uint64_t begin) const;
42 |
43 | // Roll cylinders, increment @curr by @amount
44 | __host__ __device__ inline PatternValue next(const PatternValue& curr, uint64_t amount) const;
45 |
46 | // how many numbers are in this pattern
47 | __host__ __device__ inline size_t size() const { return system.invariants(); }
48 |
49 | // This pattern is fixed size (same as target attacking key size)
50 | // according to this pattern each byte can be only a specific value
51 | // with this method you can retrieve configuration of that byte
52 | __host__ __device__ inline const MultibaseDigit& bytes_variants(uint8_t index) const;
53 |
54 | public:
55 |
56 | __host__ std::string to_string(bool extended = false) const;
57 |
58 | public:
59 | // Convert possible single-byte pattern string to set of bytes
60 | // 0xDA -> single byte
61 | // 0x19-0x2A -> range
62 | // * -> full
63 | // 0x91;0x23 -> set of specific bytes
64 | static std::vector ParseBytes(std::string text);
65 |
66 | // tries to parse single byte value like 0xA1 or FF
67 | static bool TryParseSingleByte(std::string text, uint8_t& out);
68 |
69 | // tries to parse single byte value like 0xA1 or FF
70 | static std::vector TryParseRangeBytes(std::string text);
71 |
72 | protected:
73 |
74 | PatternSystem system;
75 |
76 | // **cannot and not designed** to be accessed on GPU
77 | std::string repr_string;
78 | };
79 |
80 | __host__ __device__ inline PatternValue BruteforcePattern::init(uint64_t begin) const
81 | {
82 | return system.cast(begin);
83 | }
84 |
85 | __host__ __device__ inline PatternValue BruteforcePattern::next(const PatternValue& curr, uint64_t amount) const
86 | {
87 | MultibaseNumber result = curr;
88 | return system.increment(result, amount);
89 | }
90 |
91 | __host__ __device__ inline const MultibaseDigit& BruteforcePattern::bytes_variants(uint8_t index) const
92 | {
93 | assert(index < BytesNumber && "Invalid byte index, it's bigger that bytes count in this pattern");
94 | return system.get_config(index);
95 | }
96 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_round.cpp:
--------------------------------------------------------------------------------
1 | #include "bruteforce_round.h"
2 |
3 | #include "common.h"
4 | #include "bruteforce_config.h"
5 | #include "kernels/kernel_result.h"
6 | #include "algorithm/keeloq/keeloq_encrypted.h"
7 | #include "algorithm/keeloq/keeloq_learning_types.h"
8 | #include "algorithm/keeloq/keeloq_single_result.h"
9 |
10 | #include
11 |
12 |
13 | BruteforceRound::BruteforceRound(const std::vector& enc, const BruteforceConfig& config, std::vector selected_learning,
14 | uint32_t blocks, uint32_t threads, uint32_t iterations)
15 | : encrypted_data(enc)
16 | {
17 | #if NO_INNER_LOOPS
18 | iterations = 1;
19 | #endif
20 |
21 | CUDASetup[0] = blocks;
22 | CUDASetup[1] = threads;
23 | CUDASetup[2] = iterations;
24 |
25 | num_decryptors_per_batch = iterations * threads * blocks;
26 |
27 | kernel_inputs.Initialize(config, KeeloqLearningType::to_mask(selected_learning));
28 | }
29 |
30 | const std::vector& BruteforceRound::read_results_gpu()
31 | {
32 | kernel_inputs.results->copy(block_results);
33 | return block_results;
34 | }
35 |
36 | const std::vector& BruteforceRound::read_decryptors_gpu()
37 | {
38 | kernel_inputs.decryptors->copy(decryptors);
39 | return decryptors;
40 | }
41 |
42 | bool BruteforceRound::check_results(const KernelResult& result)
43 | {
44 | if (result.error < 0)
45 | {
46 | printf("Kernel fatal error: %d\n Round should be finished!\n", result.error);
47 | return true;
48 | }
49 | else if (result.error != 0)
50 | {
51 | printf("CUDA calculations num errors: %d\n", result.error);
52 | }
53 |
54 | if (result.value > 0)
55 | {
56 | auto& all_results = read_results_gpu();
57 |
58 | printf("Matches count: %d\n", result.value);
59 |
60 | for (const auto& result : all_results)
61 | {
62 | if (result.match == KeeloqLearningType::INVALID)
63 | {
64 | continue;
65 | }
66 |
67 | result.print();
68 | }
69 |
70 | return true;
71 | }
72 |
73 | return false;
74 | }
75 |
76 | size_t BruteforceRound::get_mem_size() const
77 | {
78 | assert(inited);
79 | return
80 | encrypted_data.size() * sizeof(EncParcel) +
81 | decryptors.size() * sizeof(Decryptor) +
82 | block_results.size() * sizeof(SingleResult);
83 | }
84 |
85 | size_t BruteforceRound::num_batches() const
86 | {
87 | assert(inited);
88 | if (Type() == BruteforceType::Dictionary)
89 | {
90 | uint8_t non_align = Config().dict_size() % keys_per_batch() == 0 ? 0 : 1;
91 | return Config().dict_size() / keys_per_batch() + non_align;
92 | }
93 | else
94 | {
95 | uint8_t non_align = Config().brute_size() % keys_per_batch() == 0 ? 0 : 1;
96 | return Config().brute_size() / keys_per_batch() + non_align;
97 | }
98 | }
99 |
100 | size_t BruteforceRound::keys_per_batch() const
101 | {
102 | assert(inited);
103 | return decryptors.size();
104 | }
105 |
106 | size_t BruteforceRound::results_per_batch() const
107 | {
108 | assert(inited);
109 | return block_results.size();
110 | }
111 |
112 | void BruteforceRound::Init()
113 | {
114 | if (!inited)
115 | {
116 | // allocated once. updated every run on GPU
117 | decryptors = std::vector(num_decryptors_per_batch);
118 |
119 | // allocated once. updated evert run on GPU. copied to CPU only if match found.
120 | block_results = std::vector(encrypted_data.size() * decryptors.size());
121 |
122 | alloc();
123 |
124 | inited = true;
125 | }
126 | }
127 |
128 | std::string BruteforceRound::to_string() const
129 | {
130 | assert(inited);
131 |
132 | return str::format("Setup:\n"
133 | "\tCUDA: Blocks:%u Threads:%u Iterations:%u\n"
134 | "\tEncrypted data size:%zd\n"
135 | "\tLearning type:%s\n"
136 | "\tResults per batch:%zd\n"
137 | "\tDecryptors per batch:%zd\n"
138 | "\tConfig: %s",
139 | CudaBlocks(), CudaThreads(), CudaThreadIterations(),
140 | encrypted_data.size(), kernel_inputs.GetLearningMask().to_string().c_str(), results_per_batch(), keys_per_batch(), Config().toString().c_str());
141 | }
142 |
143 |
144 | void BruteforceRound::alloc()
145 | {
146 | //
147 | assert(kernel_inputs.encdata == nullptr && "Encrypted data already allocated on GPU");
148 | assert(kernel_inputs.decryptors == nullptr && "Decryptors data already allocated on GPU");
149 | assert(kernel_inputs.results == nullptr && "Results data already allocated on GPU");
150 |
151 | // ALLOCATE ON GPU
152 | if (kernel_inputs.encdata == nullptr)
153 | {
154 | kernel_inputs.encdata = CudaArray::allocate(encrypted_data);
155 | }
156 |
157 | if (kernel_inputs.decryptors == nullptr)
158 | {
159 | kernel_inputs.decryptors = CudaArray::allocate(decryptors);
160 | }
161 |
162 | if (kernel_inputs.results == nullptr)
163 | {
164 | kernel_inputs.results = CudaArray::allocate(block_results);
165 | }
166 | }
167 |
168 | void BruteforceRound::free()
169 | {
170 | if (kernel_inputs.encdata != nullptr)
171 | {
172 | kernel_inputs.encdata->free();
173 | kernel_inputs.encdata = nullptr;
174 | }
175 |
176 | if (kernel_inputs.decryptors != nullptr)
177 | {
178 | kernel_inputs.decryptors->free();
179 | kernel_inputs.decryptors = nullptr;
180 | }
181 |
182 | if (kernel_inputs.results != nullptr)
183 | {
184 | kernel_inputs.results->free();
185 | kernel_inputs.results = nullptr;
186 | }
187 |
188 | encrypted_data.clear();
189 | decryptors.clear();
190 | block_results.clear();
191 | }
192 |
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_round.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 | #include "algorithm/keeloq/keeloq_learning_types.h"
9 | #include "algorithm/keeloq/keeloq_single_result.h"
10 | #include "algorithm/keeloq/keeloq_encrypted.h"
11 | #include "algorithm/keeloq/keeloq_kernel_input.h"
12 |
13 | #include "bruteforce/bruteforce_config.h"
14 | #include "kernels/kernel_result.h"
15 |
16 |
17 | /**
18 | * Round is a set of bruteforce batches
19 | * Each batch runs N thread
20 | * Each thread checks 1 or more decryptor
21 | * Each check is 1 or more (configured in args) keeloq learnings
22 | *
23 | * Typical is:
24 | * Via command line some rounds were created - e.g. Dictionary and Simple attacks
25 | * Each attack has N decryptors to check, though num blocks B, num threads T, and I iteration
26 | * This means `(1 BATCH size) = B * T * I` decryptors check
27 | * This means num of batches = `N / (1 BATCH size)`
28 | */
29 | struct BruteforceRound
30 | {
31 | // Construct round struct without specific learning type (means use all learnings)
32 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, uint32_t blocks, uint32_t threads, uint32_t iterations) :
33 | BruteforceRound(data, gen, {}, blocks, threads, iterations) {}
34 |
35 | // Construct round struct with only one selected learning type
36 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, KeeloqLearningType::Type single_learning,
37 | uint32_t blocks, uint32_t threads, uint32_t iterations) :
38 | BruteforceRound(data, gen, std::vector { single_learning }, blocks, threads, iterations) {}
39 |
40 | // Standard constructor
41 | BruteforceRound(const std::vector& data, const BruteforceConfig& gen, std::vector selected_learning,
42 | uint32_t blocks, uint32_t threads, uint32_t iterations);
43 |
44 | ~BruteforceRound()
45 | {
46 | free();
47 | }
48 |
49 | public:
50 | // Allocates memory
51 | void Init();
52 |
53 | // Reads results data from GPU memory into internal container and returns const reference to it
54 | const std::vector& read_results_gpu();
55 |
56 | // Reads decryptors data from GPU memory into internal container and returns const reference to it
57 | const std::vector& read_decryptors_gpu();
58 |
59 | // Checks Kernel's results
60 | // Return true if Round should be finished
61 | bool check_results(const KernelResult& result);
62 |
63 | // Get allocated memory amount for data
64 | size_t get_mem_size() const;
65 |
66 | // How many batches in this round (basically total keys to check divides by number of keys in a batch)
67 | size_t num_batches() const;
68 |
69 | // How many calculated results are in a batch (if use 3 inputs - 3 x keys_per_batch)
70 | size_t results_per_batch() const;
71 |
72 | // How many keys to check in this batch
73 | size_t keys_per_batch() const;
74 |
75 | std::string to_string() const;
76 |
77 | public:
78 | inline uint32_t CudaBlocks() const { return CUDASetup[0]; }
79 |
80 | inline uint32_t CudaThreads() const { return CUDASetup[1]; }
81 |
82 | inline uint32_t CudaThreadIterations() const { return CUDASetup[2]; }
83 |
84 | inline const BruteforceConfig& Config() const { assert(inited); return kernel_inputs.GetConfig(); }
85 |
86 | inline BruteforceType::Type Type() const { assert(inited); return Config().type; }
87 |
88 | inline KeeloqKernelInput& Inputs() { assert(inited); return kernel_inputs; }
89 |
90 | private:
91 |
92 | void alloc();
93 |
94 | void free();
95 |
96 | private:
97 |
98 | bool inited = false;
99 |
100 | uint32_t num_decryptors_per_batch = 0;
101 |
102 | //
103 | KeeloqKernelInput kernel_inputs;
104 |
105 | // Constant per run
106 | std::vector encrypted_data;
107 |
108 | // could be pretty much data here
109 | std::vector decryptors;
110 |
111 | // could be pretty much data here
112 | std::vector block_results;
113 |
114 | uint32_t CUDASetup[3] = { 0 };
115 | };
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_type.cpp:
--------------------------------------------------------------------------------
1 | #include "bruteforce_type.h"
2 |
3 |
4 | const char* BruteforceType::GeneratorTypeName[] = {
5 | "Dictionary",
6 | "Simple",
7 | "Filtered",
8 | "Alphabet",
9 | "Pattern",
10 | "Seed"
11 | };
12 |
13 | const size_t BruteforceType::GeneratorTypeNamesCount = sizeof(GeneratorTypeName) / sizeof(char*);
14 |
15 | const char* BruteforceType::Name(Type type)
16 | {
17 | if (type > GeneratorTypeNamesCount)
18 | {
19 | return "UNKNOWN";
20 | }
21 |
22 | return GeneratorTypeName[type];
23 | }
--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_type.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | /**
6 | * Type of bruteforce attack
7 | */
8 | struct BruteforceType
9 | {
10 | using Type = uint8_t;
11 |
12 | enum : Type
13 | {
14 | // Generation will be skipped
15 | Dictionary = 0,
16 | None = Dictionary,
17 |
18 | // Simple +1 generator (very fast in terms of generation of decryptors candidates)
19 | Simple,
20 |
21 | // Simple +1 bruteforce but with filters applied performance may degrade)
22 | Filtered,
23 |
24 | // Specify alphabet and brute over it
25 | Alphabet,
26 |
27 | // ASCII pattern like ?A:01:??:3?:*
28 | Pattern,
29 |
30 | // Simple +1 seed bruteforce. Since seed is 32bit value, seed bruteforce may be done in a acceptable amount of time
31 | Seed,
32 |
33 | // Not for usage
34 | LAST,
35 | };
36 |
37 | static const char* Name(Type type);
38 |
39 | private:
40 |
41 | static const char* GeneratorTypeName[];
42 |
43 | static const size_t GeneratorTypeNamesCount;
44 | };
45 |
46 |
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce.cpp:
--------------------------------------------------------------------------------
1 | #include "generator_bruteforce.h"
2 |
3 | #include "algorithm/keeloq/keeloq_kernel_input.h"
4 | #include "kernels/kernel_result.h"
5 |
6 |
7 | int GeneratorBruteforce::PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads)
8 | {
9 | const BruteforceConfig& config = inputs.GetConfig();
10 | KernelResult generator_results;
11 |
12 | inputs.BeforeGenerateDecryptors();
13 |
14 | switch (config.type)
15 | {
16 | case BruteforceType::Simple:
17 | {
18 | GeneratorBruteforceSimple::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
19 | break;
20 | }
21 | case BruteforceType::Seed:
22 | {
23 | GeneratorBruteforceSeed::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
24 | break;
25 | }
26 | case BruteforceType::Filtered:
27 | {
28 | GeneratorBruteforceFiltered::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
29 | break;
30 | }
31 | case BruteforceType::Pattern:
32 | case BruteforceType::Alphabet:
33 | {
34 | GeneratorBruteforcePattern::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
35 | break;
36 | }
37 | case BruteforceType::Dictionary:
38 | {
39 | return 0;
40 | }
41 | default:
42 |
43 | printf("Error: Invalid bruteforce type: %d %s! Don't know how to generate decryptors!\n",
44 | (int)config.type, BruteforceType::Name(config.type));
45 | return 0;
46 | }
47 |
48 | inputs.read(); // it will not cause underneath arrays copy
49 | generator_results.read();
50 |
51 | inputs.AfterGeneratedDecryptors();
52 |
53 | return generator_results.error;
54 | }
55 |
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 | #include "algorithm/keeloq/keeloq_kernel_input.h"
8 | #include "kernels/kernel_result.h"
9 |
10 |
11 | /**
12 | * Declare new struct which represents a wrapper around cu implementation
13 | */
14 | #define DECLARE_GENERATOR(name, ...) \
15 | extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)(); \
16 | __global__ void GENERATOR_KERNEL_NAME(name)(__VA_ARGS__); \
17 | struct name : public IGenerator \
18 | {\
19 | typedef void(*func)(__VA_ARGS__); \
20 | inline static func GetKernelFunctionPtr() { return (func)GENERATOR_KERNEL_GETTER_NAME(name)(); } \
21 | };
22 |
23 |
24 | template
25 | struct IGenerator
26 | {
27 | typedef void(*KernelFunc)(KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results);
28 |
29 | static inline void LaunchKernel(uint16_t blocks, uint16_t threads, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results)
30 | {
31 | void* args[] = { &input, &results };
32 |
33 | auto* func = TSelf::GetKernelFunctionPtr();
34 |
35 | auto error = cudaLaunchKernel((const void*)func, dim3(blocks), dim3(threads), args, 0, nullptr);
36 | CUDA_CHECK(error);
37 | }
38 | };
39 |
40 | struct GeneratorBruteforce
41 | {
42 | // Checks type of used generator in inputs and launches kernel to generate next batch of decryptors
43 | // Decryptors are generated on GPU and stored in GPU memory
44 | static int PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads);
45 | };
46 |
47 |
48 | // Extern cuda kernels - Implementation are in inl.file
49 | DECLARE_GENERATOR(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
50 | DECLARE_GENERATOR(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
51 | DECLARE_GENERATOR(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
52 | DECLARE_GENERATOR(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_filtered_kernel.inl:
--------------------------------------------------------------------------------
1 | #include "device/cuda_context.h"
2 |
3 | #include
4 | #include
5 |
6 | #include "kernels/kernel_result.h"
7 | #include "algorithm/keeloq/keeloq_kernel_input.h"
8 | #include "algorithm/keeloq/keeloq_decryptor.h"
9 |
10 | #include "bruteforce/bruteforce_type.h"
11 | #include "bruteforce/bruteforce_filters.h"
12 |
13 | template
14 | __device__ inline uint64_t RequestNewBlock(TPtr* from, uint32_t size)
15 | {
16 | return atomicAdd((unsigned long long int*)from, size);
17 | }
18 |
19 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
20 | {
21 | const BruteforceConfig& config = input->GetConfig();
22 |
23 | assert(config.type == BruteforceType::Filtered);
24 |
25 | assert(config.start.man() > 0x100000000000 && "Starting key should be big enough to start bruteforcing. Consider pattern brute.");
26 |
27 | assert(config.filters.include != BruteforceFilters::Flags::None && "Include filter None is invalid - will lead to infinite loop!");
28 | assert(config.filters.exclude != BruteforceFilters::Flags::All && "Exclude filter All is invalid - will lead to infinite loop!");
29 |
30 | CudaContext ctx = CudaContext::Get();
31 |
32 | const uint32_t seed = config.start.seed();
33 |
34 | CudaArray& decryptors = *input->decryptors;
35 | size_t num_decryptors = decryptors.num;
36 |
37 | const BruteforceFilters& filters = config.filters;
38 |
39 |
40 | // if we need to generate 24 keys, and we have 64 threads, 40 last should do nothing
41 | uint8_t at_least_one = num_decryptors >= ctx.thread_id;
42 |
43 | // if we have to generate 75 keys with 64 threads, 11 threads should do +1 key generation
44 | size_t non_aligned = num_decryptors % ctx.thread_max;
45 | uint8_t additional_this_thread = non_aligned > 0 && non_aligned > ctx.thread_id;
46 |
47 | // decremental value how many keys should be generated by this thread
48 | uint32_t num_to_generate = static_cast(at_least_one * (num_decryptors / ctx.thread_max + additional_this_thread));
49 |
50 | uint32_t block_size = 0;
51 |
52 | // Block (from first to num_to_generate) of keys to check
53 | uint64_t man_block_begin = 0;
54 | uint64_t man_block_end = 0;
55 |
56 | CUDA_FOR_THREAD_ID(ctx, write_index, num_decryptors)
57 | {
58 | bool written = false;
59 |
60 | do
61 | {
62 | if (block_size == 0)
63 | {
64 | // We have to acquire next block not more than we have to generate left
65 | // It's need to prevent situations like:
66 | // this thread need to generate 2 keys
67 | // but it has requested 100 to check
68 | // first to keys are valid and next 98 nobody will check, and thread cannot "return" it to unchecked pool
69 | // since check indication is just an atomic add operation
70 | block_size = num_to_generate;
71 |
72 | // get raw manufacturer key start index (number) which will be incremented and checked
73 | // for each thread. atomic instruction guaranties no overlapping key checks between threads
74 | // For example:
75 | // each thread should generate 16 keys
76 | // `next.man` is 123 right now, thread 1 do atomic add and get
77 | // start from 123, do 16 checks
78 | // `next.man` now 139, but thread 1 runs checks from 123
79 | // ...
80 | // thread 1 found 4 keys, so it should generate 12 more
81 | // `next.man` is 6383 on next iteration (other threads do jobs as well)
82 | // thread 1 adds 12 to `next.man` and get previous value
83 | // so now it starts check 12 keys from 6383 to 6395
84 | //
85 | man_block_begin = RequestNewBlock(&filters.sync_key, block_size);
86 |
87 | // TODO: Check how overflow behaves
88 | man_block_end = man_block_begin + block_size;
89 | }
90 |
91 | for (uint64_t key = man_block_begin; key < man_block_end; ++key)
92 | {
93 | if (filters.Pass(key))
94 | {
95 | --num_to_generate;
96 |
97 | // next write should start testing keys from next key from current
98 | man_block_begin = key + 1;
99 |
100 | // break while loop
101 | written = true;
102 |
103 | decryptors[write_index] = Decryptor(key, seed);
104 | break;
105 | }
106 | }
107 |
108 | if (!written)
109 | {
110 | // request new block
111 | block_size = 0;
112 | }
113 |
114 | } while (!written);
115 | }
116 | }
117 |
118 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceFiltered);
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_pattern_kernel.inl:
--------------------------------------------------------------------------------
1 | #include "device/cuda_context.h"
2 |
3 | #include "kernels/kernel_result.h"
4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
5 |
6 | #include "bruteforce/bruteforce_pattern.h"
7 | #include "bruteforce/bruteforce_type.h"
8 |
9 |
10 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
11 | {
12 | const BruteforceConfig& config = input->GetConfig();
13 |
14 | assert((config.type == BruteforceType::Alphabet) || (config.type == BruteforceType::Pattern));
15 |
16 | CudaContext ctx = CudaContext::Get();
17 |
18 | const BruteforcePattern& pattern = config.pattern;
19 | CudaArray& decryptors = *input->decryptors;
20 |
21 | // Imagine alphabet as rotating rings with letters on it
22 | // we have 8-bytes key so there will be 8 rings
23 | // bytes in the key show how much ring is rotated
24 | // and what 'letter' it should have.
25 | // Or also it can be considered as 8-digit N-based number
26 | MultibaseNumber start = pattern.init(config.start.man());
27 |
28 | const uint32_t seed = config.start.seed();
29 |
30 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
31 | {
32 | decryptors[decryptor_index] = Decryptor(pattern.next(start, decryptor_index).number(), seed);
33 | }
34 | }
35 |
36 | DEFINE_GENERATOR_GETTER(GeneratorBruteforcePattern);
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_seed_kernel.inl:
--------------------------------------------------------------------------------
1 | #include "device/cuda_context.h"
2 |
3 | #include "kernels/kernel_result.h"
4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
5 | #include "bruteforce/bruteforce_type.h"
6 |
7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
8 | {
9 | CudaContext ctx = CudaContext::Get();
10 |
11 | assert(input->GetConfig().type == BruteforceType::Seed);
12 |
13 | const Decryptor& start = input->GetConfig().start;
14 |
15 | CudaArray& decryptors = *input->decryptors;
16 |
17 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
18 | {
19 | decryptors[decryptor_index] = Decryptor(start.man(), start.seed() + decryptor_index);
20 | }
21 | }
22 |
23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSeed);
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_simple_kernel.inl:
--------------------------------------------------------------------------------
1 | #include "device/cuda_context.h"
2 |
3 | #include "kernels/kernel_result.h"
4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
5 | #include "bruteforce/bruteforce_type.h"
6 |
7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
8 | {
9 | CudaContext ctx = CudaContext::Get();
10 |
11 | assert(input->GetConfig().type == BruteforceType::Simple);
12 |
13 | const Decryptor& start = input->GetConfig().start;
14 |
15 | CudaArray& decryptors = *input->decryptors;
16 |
17 | CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
18 | {
19 | decryptors[decryptor_index] = Decryptor(start.man() + decryptor_index, start.seed());
20 | }
21 | }
22 |
23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSimple);
--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 |
3 | #include "generator_bruteforce_pattern_kernel.inl"
4 | #include "generator_bruteforce_filtered_kernel.inl"
5 | #include "generator_bruteforce_simple_kernel.inl"
6 | #include "generator_bruteforce_seed_kernel.inl"
--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 |
10 | #define CUDA_CHECK(error) \
11 | if (error != 0) { printf("\nASSERTION FAILED. CUDA ERROR!\n%s: %s\n", cudaGetErrorName((cudaError_t)error), cudaGetErrorString((cudaError_t)error)); }\
12 | assert(error == 0)
13 |
14 |
15 | #define GENERATOR_KERNEL_NAME(name) \
16 | Kernel_##name
17 |
18 | #define GENERATOR_KERNEL_GETTER_NAME(name) \
19 | GetKernel_##name
20 |
21 | #define DEFINE_GENERATOR_KERNEL(name, ...) \
22 | GENERATOR_KERNEL_NAME(name)(__VA_ARGS__)
23 |
24 | #define DEFINE_GENERATOR_GETTER(name) \
25 | extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)() { return (void*)&Kernel_##name; }
26 |
27 |
28 | #if __CUDA_ARCH__
29 | #define UNROLL #pragma unroll
30 | #else
31 | #define UNROLL
32 | #endif
33 |
34 | #ifndef NO_INNER_LOOPS
35 | #define NO_INNER_LOOPS 1
36 | #endif // !NO_INNER_LOOPS
37 |
38 |
39 |
40 | template
41 | struct DefaultByteArray
42 | {
43 | uint8_t element[NSize];
44 |
45 | constexpr DefaultByteArray() : element()
46 | {
47 | for (uint8_t i = 0; i < NSize; ++i)
48 | {
49 | element[i] = i;
50 | }
51 | }
52 |
53 | template
54 | static inline Vector as_vector()
55 | {
56 | constexpr DefaultByteArray array = DefaultByteArray();
57 | return Vector(&array.element[0], &array.element[0] + NSize);
58 | }
59 | };
60 |
61 |
62 | namespace str
63 | {
64 |
65 | template
66 | inline String format(const String& format, Args ... args)
67 | {
68 | int size_s = snprintf(nullptr, 0, format.c_str(), args ...) + 1; // Extra space for '\0'
69 | if (size_s <= 0)
70 | {
71 | return {};
72 | }
73 |
74 | auto size = static_cast(size_s);
75 | String result(size, '\0');
76 |
77 | snprintf(&result[0], size, format.c_str(), args ...);
78 | result.pop_back(); // remove '\0' from the end
79 |
80 | return result;
81 | }
82 |
83 | }
84 |
85 |
86 | inline uint64_t operator "" _u64(const char* ascii, size_t num)
87 | {
88 | const uint8_t size = sizeof(uint64_t);
89 |
90 | uint64_t number = 0;
91 | uint8_t* pNumber = (uint8_t*)&number;
92 |
93 | for (uint8_t i = 0; i < size; ++i)
94 | {
95 | pNumber[size - i - 1] = num > i ? ascii[i] : 0;
96 | }
97 |
98 | return number;
99 | }
--------------------------------------------------------------------------------
/src/device/cuda_array.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 |
9 | /**
10 | * A helper class for arrays allocated in GPU memory
11 | */
12 | template
13 | struct CudaArray
14 | {
15 | using TCudaArray = CudaArray;
16 |
17 | T* CUDA_data;
18 |
19 | size_t num;
20 |
21 | __host__ __device__ inline T& operator[](uint32_t index)
22 | {
23 | assert(index < num && "Index is out of range in CudaArray");
24 | return CUDA_data[index];
25 | }
26 | __host__ __device__ inline const T& operator[](uint32_t index) const
27 | {
28 | assert(index < num && "Index is out of range in CudaArray");
29 | return CUDA_data[index];
30 | }
31 |
32 | // Frees all associated resources assuming this pointer is GPU address
33 | inline void free()
34 | {
35 | TCudaArray::free(this);
36 | }
37 |
38 | // Copies bytes from @source to this array
39 | // Assumes array was allocated on GPU, will not fail if num > size
40 | inline void write(const T* source, size_t num)
41 | {
42 | TCudaArray::write(this, source, num);
43 | }
44 |
45 | // Copies data from GPU to @target array
46 | inline size_t copy(std::vector& target)
47 | {
48 | return TCudaArray::copy(this, target);
49 | }
50 |
51 | // Copies self GPU object (without all underlying data in array)
52 | // into CPU memory
53 | inline TCudaArray host()
54 | {
55 | // thiscall should work even with invalid pointer
56 | return TCudaArray::host(this);
57 | }
58 |
59 | // Copies this pointer (which assumes to be GPU) to host and return copy of the last element
60 | inline T host_last()
61 | {
62 | // thiscall should work even with invalid pointer
63 | TCudaArray HOST_array = TCudaArray::host(this);
64 | return TCudaArray::read(HOST_array, HOST_array.num - 1);
65 | }
66 |
67 | public:
68 | static TCudaArray* allocate(const std::vector& source);
69 | static void free(TCudaArray* array);
70 |
71 | static TCudaArray host(const TCudaArray* device);
72 |
73 | static T read(const TCudaArray& HOST_Array, size_t index);
74 |
75 | static size_t copy(const TCudaArray* array, std::vector& target);
76 |
77 | static void write(TCudaArray* dest, const T* source, size_t num);
78 | };
79 |
80 | template
81 | T CudaArray::read(const TCudaArray& HOST_Array, size_t index)
82 | {
83 | assert(index < HOST_Array.num);
84 |
85 | T result;
86 | auto error = cudaMemcpy(&result, &HOST_Array.CUDA_data[index], sizeof(T), cudaMemcpyDeviceToHost);
87 | CUDA_CHECK(error);
88 |
89 | return result;
90 | }
91 |
92 | template
93 | CudaArray* CudaArray::allocate(const std::vector& source)
94 | {
95 | // Allocate memory of vector itself (ptr + size_t == 16 bytes)
96 | CudaArray* result = nullptr;
97 | uint32_t error = cudaMalloc((void**) & result, sizeof(CudaArray));
98 | CUDA_CHECK(error);
99 |
100 | // Write size_t - size of the data
101 | size_t source_size = source.size();
102 | error = cudaMemcpy(&result->num, &source_size, sizeof(size_t), cudaMemcpyHostToDevice);
103 | CUDA_CHECK(error);
104 |
105 | // Device pointer of data (if available)
106 | T* data_ptr = nullptr;
107 | if (source_size > 0)
108 | {
109 | size_t allocated_bytes = sizeof(T) * source_size;
110 |
111 | // allocate data on device and copy from RAW
112 | error = cudaMalloc((void**)&data_ptr, allocated_bytes);
113 | CUDA_CHECK(error);
114 |
115 | error = cudaMemcpy(data_ptr, source.data(), allocated_bytes, cudaMemcpyHostToDevice);
116 | CUDA_CHECK(error);
117 | }
118 |
119 | // Write data pointer (null if no data)
120 | error = cudaMemcpy(&result->CUDA_data, &data_ptr, sizeof(T*), cudaMemcpyHostToDevice);
121 | CUDA_CHECK(error);
122 |
123 | return result;
124 | }
125 |
126 | template
127 | CudaArray CudaArray::host(const TCudaArray* device)
128 | {
129 | assert(device && "Invalid CUDA pointer");
130 | TCudaArray HOST_dest = { 0 };
131 |
132 | auto error = cudaMemcpy(&HOST_dest, device, sizeof(TCudaArray), cudaMemcpyDeviceToHost);
133 | CUDA_CHECK(error);
134 |
135 | return HOST_dest;
136 | }
137 |
138 | template
139 | void CudaArray::free(TCudaArray* array)
140 | {
141 | TCudaArray HOST_array = host(array);
142 |
143 | if (HOST_array.CUDA_data)
144 | {
145 | auto error = cudaFree(HOST_array.CUDA_data);
146 | CUDA_CHECK(error);
147 | }
148 |
149 | auto error = cudaFree(array);
150 | CUDA_CHECK(error);
151 | }
152 |
153 | template
154 | size_t CudaArray::copy(const TCudaArray* array, std::vector& target)
155 | {
156 | TCudaArray HOST_array = host(array);
157 | if (HOST_array.num > 0)
158 | {
159 | size_t allocated_bytes = HOST_array.num * sizeof(T);
160 | target.resize(HOST_array.num);
161 |
162 | auto error = cudaMemcpy((T*)target.data(), HOST_array.CUDA_data, allocated_bytes, cudaMemcpyDeviceToHost);
163 | CUDA_CHECK(error);
164 | }
165 |
166 | return HOST_array.num;
167 | }
168 |
169 | template
170 | void CudaArray::write(TCudaArray* dest, const T* source, size_t num)
171 | {
172 | TCudaArray HOST_dest = host(dest);
173 |
174 | assert(HOST_dest.CUDA_data && "CUDA Data wasn't allocated");
175 | if (HOST_dest.CUDA_data)
176 | {
177 | auto copy_bytes = sizeof(T) * std::min(num, HOST_dest.num);
178 |
179 | auto error = cudaMemcpy(HOST_dest.CUDA_data, source, copy_bytes, cudaMemcpyHostToDevice);
180 | CUDA_CHECK(error);
181 | }
182 | }
--------------------------------------------------------------------------------
/src/device/cuda_common.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 |
8 | namespace misc
9 | {
10 |
11 | // device-faster version of byte reversing function
12 | __host__ __device__ __forceinline__ uint64_t rev_bytes(uint64_t input)
13 | {
14 | #if __CUDA_ARCH__
15 | uint32_t key_lo = input;
16 | uint32_t key_hi = input >> 32;
17 |
18 | constexpr uint32_t selector_0 = 0x4567;
19 | constexpr uint32_t selector_1 = 0x0123;
20 |
21 | uint32_t key_rev_lo = __byte_perm(key_lo, key_hi, selector_0);
22 | uint32_t key_rev_hi = __byte_perm(key_lo, key_hi, selector_1);
23 |
24 | return ((uint64_t)key_rev_hi << 32) | key_rev_lo;
25 | #else
26 | uint64_t input_rev = 0;
27 | uint64_t input_rev_byte = 0;
28 | for (uint8_t i = 0; i < 64; i += 8)
29 | {
30 | input_rev_byte = (uint8_t)(input >> i);
31 | input_rev = input_rev | input_rev_byte << (56 - i);
32 | }
33 |
34 | return input_rev;
35 | #endif
36 | }
37 |
38 | // Reverses amount of bits in @input
39 | __device__ __host__ __forceinline__ uint64_t rev_bits(uint64_t input, uint8_t rev_bit_count)
40 | {
41 | uint64_t reverse_key = 0;
42 | for (uint8_t i = 0; i < rev_bit_count; i++)
43 | {
44 | reverse_key = reverse_key << 1 | ((input >> i) & 1);
45 | }
46 | return reverse_key;
47 | }
48 | }
--------------------------------------------------------------------------------
/src/device/cuda_context.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 |
8 | // For loop macro
9 | // indexer @i Will be incremented by amount of all threads in the context
10 | #define CUDA_FOR_THREAD_ID(ctx, i, count) for(uint32_t i = ctx.thread_id; i < count; i += ctx.thread_max)
11 |
12 | // Custom struct of CUDA thread execution context
13 | struct CudaContext
14 | {
15 | // Maximum overall threads
16 | uint32_t thread_max;
17 |
18 | // Global thread id
19 | uint32_t thread_id;
20 |
21 | __host__ __device__ static inline CudaContext Get()
22 | {
23 | #if __CUDA_ARCH__
24 | return CudaContext
25 | {
26 | gridDim.x * blockDim.x,
27 | blockIdx.x * blockDim.x + threadIdx.x
28 | };
29 | #else
30 | assert(false && "CUDA context is not available on host");
31 | return {};
32 | #endif
33 | }
34 | };
--------------------------------------------------------------------------------
/src/device/cuda_double_array.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 | /**
8 | * This is convenience wrapper allows for easier array management and
9 | * copying between CPU and GPU (and vice versa).
10 | *
11 | * WARNING:
12 | * If this object is data owner (constructed without input data) - it will free memory in destructor
13 | * If this object is just pointer container - will not do anything destructive with pointers in destructor
14 | */
15 | template
16 | struct DoubleArray
17 | {
18 | using TCUDAPtr = T*;
19 | using THOSTPtr = T*;
20 |
21 | THOSTPtr HOST_mem;
22 | TCUDAPtr CUDA_mem;
23 |
24 | size_t size;
25 |
26 | DoubleArray(size_t num, bool zeros = true)
27 | : hostOwner(true)
28 | {
29 | size = sizeof(T) * num;
30 | HOST_mem = (T*)malloc(size);
31 |
32 | uint32_t error = cudaMalloc(&CUDA_mem, size);
33 | CUDA_CHECK(error);
34 |
35 | if (zeros)
36 | {
37 | memset(HOST_mem, 0, size);
38 | cudaMemset(CUDA_mem, 0, size);
39 | }
40 | }
41 |
42 | DoubleArray(THOSTPtr array, size_t num)
43 | : hostOwner(false)
44 | {
45 | HOST_mem = array;
46 | size = sizeof(T) * num;
47 |
48 | cudaError error = cudaMalloc((void**) & CUDA_mem, size);
49 | CUDA_CHECK(error);
50 |
51 | error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice);
52 | CUDA_CHECK(error);
53 | }
54 |
55 | ~DoubleArray()
56 | {
57 | if (CUDA_mem)
58 | {
59 | cudaFree(CUDA_mem);
60 | CUDA_mem = nullptr;
61 | }
62 |
63 | if (HOST_mem && hostOwner)
64 | {
65 | free(HOST_mem);
66 | HOST_mem = nullptr;
67 | }
68 | }
69 |
70 | void write_GPU()
71 | {
72 | uint32_t error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice);
73 | CUDA_CHECK(error);
74 | }
75 |
76 | void read_GPU()
77 | {
78 | uint32_t error = cudaMemcpy(HOST_mem, CUDA_mem, size, cudaMemcpyDeviceToHost);
79 | CUDA_CHECK(error);
80 | }
81 |
82 | private:
83 | bool hostOwner;
84 | };
--------------------------------------------------------------------------------
/src/device/cuda_object.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 |
7 |
8 | template
9 | struct CudaObject
10 | {
11 | TTarget* CUDA_Ptr;
12 | TTarget* HOST_Ptr;
13 |
14 | CudaObject(TTarget* source)
15 | {
16 | CUDA_Ptr = nullptr;
17 | HOST_Ptr = source;
18 | }
19 | CudaObject(CudaObject&& other) = delete;
20 |
21 | CudaObject(const CudaObject& other) = delete;
22 | CudaObject& operator =(const CudaObject& other) = delete;
23 |
24 | ~CudaObject()
25 | {
26 | if (CUDA_Ptr)
27 | {
28 | uint32_t error = cudaFree(CUDA_Ptr);
29 | CUDA_CHECK(error);
30 | CUDA_Ptr = nullptr;
31 | }
32 |
33 | HOST_Ptr = nullptr;
34 | }
35 |
36 | TTarget* ptr(bool sync = true)
37 | {
38 | if (HOST_Ptr == nullptr)
39 | {
40 | assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU");
41 | return nullptr;
42 | }
43 |
44 | if (CUDA_Ptr == nullptr)
45 | {
46 | auto error = cudaMalloc((void**)&CUDA_Ptr, sizeof(TTarget));
47 | CUDA_CHECK(error);
48 | }
49 |
50 | if (CUDA_Ptr && sync)
51 | {
52 | auto error = cudaMemcpy(CUDA_Ptr, HOST_Ptr, sizeof(TTarget), cudaMemcpyHostToDevice);
53 | CUDA_CHECK(error);
54 | }
55 |
56 | return CUDA_Ptr;
57 | }
58 |
59 | void read()
60 | {
61 | if (HOST_Ptr == nullptr)
62 | {
63 | assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU");
64 | return;
65 | }
66 |
67 | if (CUDA_Ptr)
68 | {
69 | auto error = cudaMemcpy(HOST_Ptr, CUDA_Ptr, sizeof(TTarget), cudaMemcpyDeviceToHost);
70 | CUDA_CHECK(error);
71 | }
72 | }
73 | };
74 |
75 | // Self owned GPU object
76 | template
77 | struct TGenericGpuObject
78 | {
79 | using TCudaPtr = T*;
80 |
81 | TGenericGpuObject(T* Self) : SelfGpu(Self) {
82 | }
83 |
84 | TGenericGpuObject(const TGenericGpuObject& other) = delete;
85 | TGenericGpuObject& operator =(const TGenericGpuObject& other) = delete;
86 |
87 | virtual TCudaPtr ptr()
88 | {
89 | return SelfGpu.ptr();
90 | }
91 |
92 | void read()
93 | {
94 | SelfGpu.read();
95 | }
96 |
97 | protected:
98 |
99 | CudaObject SelfGpu;
100 | };
--------------------------------------------------------------------------------
/src/device/cuda_span.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include
6 | #include
7 |
8 |
9 | /**
10 | * Something like C# span
11 | * Memory view
12 | */
13 | template
14 | struct Span
15 | {
16 | __host__ __device__ Span(T* ptr, uint32_t num) : data(ptr), size(num) { }
17 |
18 | __host__ __device__ inline T& operator[](uint32_t index)
19 | {
20 | assert(index < size && "Index is out of range in Span");
21 |
22 | #if __CUDA_ARCH__
23 | return __ldca(&data[index]);
24 | #else
25 | return data[index];
26 | #endif
27 | }
28 |
29 | __host__ __device__ inline const T& operator[](uint32_t index) const
30 | {
31 | assert(index < size && "Index is out of range in Span");
32 | #if __CUDA_ARCH__
33 | return __ldca(&data[index]);
34 | #else
35 | return data[index];
36 | #endif
37 | }
38 |
39 | // Number of elements in Span
40 | __host__ __device__ inline uint32_t num() const { return size; }
41 |
42 | // fall back if T is not simple type
43 | __host__ __device__ typename std::enable_if, T&>::type __ldca(T* ptr) { return *ptr; }
44 | __host__ __device__ typename std::enable_if, const T&>::type __ldca(T* ptr) const { return *ptr; }
45 |
46 | private:
47 |
48 | T* data;
49 |
50 | uint32_t size;
51 | };
--------------------------------------------------------------------------------
/src/device/cuda_vector.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "common.h"
4 |
5 | #include "device/cuda_array.h"
6 |
7 | #include
8 | #include
9 |
10 |
11 | /**
12 | * Host owned, GPU copy, vector
13 | * Can be considered as wrapper around host vector
14 | * Owns GPU data, frees on destructor
15 | */
16 | template
17 | struct CudaVector
18 | {
19 | // Explicit construct using initializer list
20 | CudaVector(std::initializer_list&& initializer) :
21 | cpu_vector(std::move(initializer)),
22 | gpu_array(nullptr)
23 | {
24 | }
25 |
26 | // Forward construction to vector
27 | template
28 | CudaVector(Args&&... args) :
29 | cpu_vector(std::forward(args)...),
30 | gpu_array(nullptr)
31 | {
32 | }
33 |
34 | ~CudaVector();
35 |
36 | // CPU memory is immutable since it should be
37 | // synchronized with gpu
38 | const std::vector& cpu() const { return cpu_vector; }
39 |
40 | // Size of CPU vector (gpu will be the same size once allocated)
41 | const size_t size() const { return cpu_vector.size(); }
42 |
43 | // Reads GPU pointer and copies data from GPU memory to CPU
44 | void read();
45 |
46 | // Pointer to GPU array
47 | // Will allocate (and copy) by default
48 | CudaArray* gpu(bool allocate = true);
49 |
50 | private:
51 |
52 | std::vector cpu_vector;
53 |
54 | CudaArray* gpu_array;
55 | };
56 |
57 | template void CudaVector::read()
58 | {
59 | assert(gpu_array);
60 | if (gpu_array)
61 | {
62 | gpu_array->copy(cpu_vector);
63 | }
64 | }
65 |
66 | template CudaArray* CudaVector::gpu(bool allocate /*= true*/)
67 | {
68 | if (gpu_array || !allocate)
69 | {
70 | return gpu_array;
71 | }
72 |
73 | gpu_array = CudaArray::allocate(cpu_vector);
74 | return gpu_array;
75 | }
76 |
77 | template CudaVector::~CudaVector()
78 | {
79 | if (gpu_array != nullptr)
80 | {
81 | gpu_array->free();
82 | gpu_array = nullptr;
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/host/command_line_args.cpp:
--------------------------------------------------------------------------------
1 | #include "command_line_args.h"
2 |
3 | #include
4 |
5 | #include "host/host_utils.h"
6 | #include "host/console.h"
7 |
8 | //#define CXXOPTS_NO_EXCEPTIONS
9 | #include "cxxopts/include/cxxopts.hpp"
10 |
11 |
12 | namespace
13 | {
14 |
15 | inline void read_alphabets(CommandLineArgs& target, cxxopts::ParseResult& result)
16 | {
17 | if (result.count(ARG_ALPHABET) == 0)
18 | {
19 | // no alphabets available
20 | return;
21 | }
22 |
23 | const auto& alphabet_args = result[ARG_ALPHABET].as>();
24 |
25 | for (const auto& alphabet_arg : alphabet_args)
26 | {
27 | auto alphabet_bytes = host::utils::read_alphabet_binary_file(alphabet_arg.c_str());
28 | if (alphabet_bytes.size() > 0)
29 | {
30 | target.alphabets.emplace_back(alphabet_bytes);
31 | }
32 | else
33 | {
34 | // "61:ab:00:33..." -> "61,ab,00,33..."
35 | std::string alphabet_bytes_hex = alphabet_arg;
36 | std::replace(alphabet_bytes_hex.begin(), alphabet_bytes_hex.end(), ':', ',');
37 |
38 | // ["61","ab","00","33"]
39 | std::vector alphabet_hex;
40 | cxxopts::values::parse_value(alphabet_bytes_hex, alphabet_hex);
41 |
42 | // ["61","ab","00","33"] -> [0x61, 0xAB, 0x00, 0x33]
43 | std::vector alphabet_bytes;
44 | alphabet_bytes.reserve(alphabet_hex.size());
45 |
46 | for (const auto& hex : alphabet_hex)
47 | {
48 | auto value = (uint8_t)strtoul(hex.c_str(), nullptr, 16);
49 | if (value != 0 && hex != "00")
50 | {
51 | alphabet_bytes.push_back(value);
52 | }
53 | else
54 | {
55 | printf("Error: cannot parse alphabet byte '%s'!\n", hex.c_str());
56 | }
57 | }
58 |
59 | //
60 | if (alphabet_bytes.size() > 0)
61 | {
62 | target.alphabets.emplace_back(alphabet_bytes);
63 |
64 | if (alphabet_bytes.size() > 256)
65 | {
66 | printf("Warning: Alphabet: '%s' has to much bytes in hex string: %zd (should be less than 257)\n",
67 | result[ARG_ALPHABET].as().c_str(), alphabet_bytes.size());
68 | }
69 | }
70 | else
71 | {
72 | printf("Error: Alphabet: '%s' is not valid file, neither valid alphabet hex string (like: AA:11:b3...)!\n",
73 | alphabet_arg.c_str());
74 | }
75 | }
76 | }
77 | }
78 |
79 | inline void parse_dictionary_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
80 | {
81 | if (result[ARG_WORDDICT].count() > 0)
82 | {
83 | auto dict = result[ARG_WORDDICT].as>();
84 |
85 | std::vector decryptors;
86 |
87 | for (const auto& dict_arg : dict)
88 | {
89 | std::vector from_file = host::utils::read_word_dictionary_file(dict_arg.c_str());
90 |
91 | if (from_file.size() > 0)
92 | {
93 | decryptors.insert(decryptors.end(), from_file.begin(), from_file.end());
94 | }
95 | else if (auto key = strtoull(dict_arg.c_str(), nullptr, 16))
96 | {
97 | decryptors.push_back(Decryptor(key, 0));
98 | }
99 | else
100 | {
101 | printf("Error: invalid param passed to '--%s' argument: '%s' not a dictionary file neither word\n", ARG_WORDDICT, dict_arg.c_str());
102 | }
103 | }
104 |
105 | if (decryptors.size() > 0)
106 | {
107 | target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors)));
108 | }
109 | }
110 |
111 | if (result[ARG_BINDICT].count() > 0)
112 | {
113 | auto seed = result[ARG_SEED].as();
114 |
115 | auto dicts = result[ARG_BINDICT].as>();
116 | auto mode = result[ARG_BINDMODE].as();
117 |
118 | std::vector decryptors;
119 |
120 | for (const auto& bin_dict_path : dicts)
121 | {
122 | std::vector decryptors = host::utils::read_binary_dictionary_file(bin_dict_path.c_str(), mode, seed);
123 |
124 | if (decryptors.size() > 0)
125 | {
126 | target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors)));
127 | }
128 | else
129 | {
130 | printf("Error: invalid param passed to '--%s' argument: '%s'. Invalid file!\n",
131 | ARG_BINDICT, bin_dict_path.c_str());
132 | }
133 | }
134 |
135 | }
136 | }
137 |
138 | inline void parse_bruteforce_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
139 | {
140 | auto start_key = result[ARG_START].as();
141 | auto seed = result[ARG_SEED].as();
142 | Decryptor first_decryptor(start_key, seed);
143 |
144 | auto count_key = result[ARG_COUNT].as();
145 |
146 | target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key));
147 | }
148 |
149 | inline void parse_seed_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
150 | {
151 | if (result.count(ARG_START) == 0)
152 | {
153 | printf("Error: For seed mode, it's necessary to specify a manufacturer key with '--" ARG_START "' argument!\n");
154 | return;
155 | }
156 |
157 | auto start_key = result[ARG_START].as();
158 | auto seed = result[ARG_SEED].as();
159 | Decryptor first_decryptor(start_key, seed);
160 |
161 | target.brute_configs.push_back(BruteforceConfig::GetSeedBruteforce(first_decryptor));
162 | }
163 |
164 | inline void parse_bruteforce_filtered_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
165 | {
166 | auto start_key = result[ARG_START].as();
167 | auto seed = result[ARG_SEED].as();
168 | Decryptor first_decryptor(start_key, seed);
169 |
170 | auto count_key = result[ARG_COUNT].as();
171 |
172 | auto include_filter = result[ARG_IFILTER].as();
173 | auto exclude_filter = result[ARG_EFILTER].as();
174 |
175 | BruteforceFilters filters
176 | {
177 | include_filter,
178 | exclude_filter,
179 | };
180 |
181 | target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key, filters));
182 | }
183 |
184 | inline void parse_alphabet_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
185 | {
186 | auto start_key = result[ARG_START].as();
187 | auto seed = result[ARG_SEED].as();
188 | Decryptor first_decryptor(start_key, seed);
189 |
190 | auto count_key = result[ARG_COUNT].as();
191 |
192 | for (const auto& alphabet : target.alphabets)
193 | {
194 | target.brute_configs.push_back(BruteforceConfig::GetAlphabet(first_decryptor, alphabet, count_key));
195 | }
196 | }
197 |
198 | inline void parse_pattern_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
199 | {
200 | auto start_key = result[ARG_START].as();
201 | auto seed = result[ARG_SEED].as();
202 | Decryptor first_decryptor(start_key, seed);
203 |
204 | auto count_key = result[ARG_COUNT].as();
205 |
206 | const auto& args = result[ARG_PATTERN].as>();
207 |
208 | auto full_bytes = DefaultByteArray<>::as_vector>();
209 |
210 | for (const auto& pattern_arg : args)
211 | {
212 | std::string pattern_bytes_hex = pattern_arg;
213 | std::replace(pattern_bytes_hex.begin(), pattern_bytes_hex.end(), ':', ',');
214 |
215 | std::vector bytes_hex;
216 | cxxopts::values::parse_value(pattern_bytes_hex, bytes_hex);
217 |
218 |
219 | std::vector> result;
220 | for (auto& hex : bytes_hex)
221 | {
222 | // append alphabets' bytes
223 | if (hex.find("AL") != std::string::npos)
224 | {
225 | if (target.alphabets.size() == 0)
226 | {
227 | printf("ERROR: Cannot use Alphabet in patterns - no provided alphabets. Replacing with *\n");
228 | result.push_back(full_bytes);
229 | continue;
230 | }
231 |
232 | auto al_index = strtoul(hex.substr(2).c_str(), nullptr, 10);
233 | if (al_index >= target.alphabets.size())
234 | {
235 | printf("ERROR: Argument %s referring alphabet: %ld (index: %ld), but there are only %zd available. "
236 | "Replacing with first one\n", hex.c_str(), al_index + 1, al_index, target.alphabets.size());
237 | al_index = 0;
238 | }
239 |
240 | result.push_back(target.alphabets[al_index].as_vector());
241 | }
242 | else
243 | {
244 | // append regular pattern bytes
245 | auto bytes = BruteforcePattern::ParseBytes(hex);
246 |
247 | if (bytes.size() == 0)
248 | {
249 | printf("ERROR: Invalid string '%s' for byte pattern! Ignoring. Replacing with *\n", hex.c_str());
250 | result.push_back(full_bytes);
251 | }
252 | else
253 | {
254 | result.push_back(bytes);
255 | }
256 | }
257 | }
258 |
259 | // validate
260 | if (result.size() > 8)
261 | {
262 | printf("Warning: Pattern string: '%s' contains more than 8 per-bytes delimiters (%zd) other will be ignored.\n",
263 | pattern_arg.c_str(), bytes_hex.size());
264 | }
265 | else if (result.size() < 8)
266 | {
267 | printf("Warning: Pattern string: '%s' contains less than 8 per-bytes delimiters (%zd) other fill with full pattern.\n",
268 | pattern_arg.c_str(), bytes_hex.size());
269 |
270 | for (auto i = result.size(); i < 8; ++i)
271 | {
272 | result.push_back(full_bytes);
273 | }
274 | }
275 |
276 |
277 | // reverse bytes
278 | std::reverse(result.begin(), result.end());
279 |
280 | // Add pattern attack to config
281 | target.brute_configs.push_back(BruteforceConfig::GetPattern(first_decryptor, BruteforcePattern(std::move(result), pattern_arg), count_key));
282 | }
283 | }
284 |
285 | constexpr const char* Usage()
286 | {
287 | return ""
288 | "\nExample:\n"
289 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
290 | " --" ARG_MODE "=1 --" ARG_START "=0x9876543210 --" ARG_COUNT "=1000000"
291 | "\n\n\tThis will launch simple bruteforce (+1) attack with 1 million checks from 0x9876543210. "
292 | "Will be checked ALL 16 (12 if no seed specified) keeloq learning types"
293 |
294 | "\nExample:\n"
295 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
296 | " --" ARG_MODE "=3 --" ARG_LTYPE "=0 --" ARG_ALPHABET "=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33"
297 | "\n\n\tThis will launch 2 alphabets attacks for all possible combinations for SIMPLE learning Keeloq type. "
298 | "First alphabet will be taken from file, second - parsed from inputs."
299 |
300 | "\nExample:\n"
301 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
302 | " --" ARG_MODE "=4 --" ARG_LTYPE "=2 --" ARG_ALPHABET "=examples/alphabet.bin --" ARG_PATTERN "=AL0:11:AB|BC:*:00-44:AL0:AA-FF:01"
303 | "\n\n\tThis will launch pattern attacks with NORMAL keeloq learning type."
304 | "\n\tPattern applied 'as is' - big endian. The highest byte (0xXX.......) will be taken from 1st alphabet."
305 | "\n\tNext byte (0x..XX....) will be exact `0x11`."
306 | "\n\tNext byte (0x....XX..) will be `0xAB` or `0xBC`.\n"
307 |
308 | "\nExample:\n"
309 | "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
310 | " --" ARG_MODE "=5 --" ARG_START "=0xAABBCCDDEEFF"
311 | "\n\n\tThis will launch seed bruteforce attack for all seed learning types. "
312 | "\n\tSpecifying '--" ARG_LTYPE "=a,b,c' will narrow learning types to provided ones."
313 | ;
314 | }
315 |
316 | }
317 |
318 | CommandLineArgs CommandLineArgs::parse(int argc, const char** argv)
319 | {
320 | cxxopts::Options options(APP_NAME, R"(
321 |
322 | _______ _____ ___ __ __ __
323 | / ___/ / / / _ \/ _ | / //_/__ ___ / / ___ ___ _
324 | / /__/ /_/ / // / __ | / ,< / -_) -_) /__/ _ \/ _ `/
325 | \___/\____/____/_/ |_| /_/|_|\__/\__/____/\___/\_, /
326 | /_/
327 | ___ __ ___
328 | / _ )______ __/ /____ / _/__ ___________ ____
329 | / _ / __/ // / __/ -_) _/ _ \/ __/ __/ -_) __/
330 | /____/_/ \_,_/\__/\__/_/ \___/_/ \__/\__/_/
331 |
332 | )");
333 | options.set_width(::console::get_width())
334 | .allow_unrecognised_options()
335 | .add_options()
336 | ("h," ARG_HELP, "Prints this help")
337 |
338 | // What to bruteforce
339 | (ARG_INPUTS, "Comma separated uint64 values (it's better to have 3)",
340 | cxxopts::value>(), "[k1, k1, k3]")
341 |
342 | // CUDA Setup
343 | (ARG_BLOCKS, "How many thread blocks to launch.",
344 | cxxopts::value()->default_value("32"), "")
345 | (ARG_THREADS, "How many threads will be launched in a block (if 0 - will use value from device).",
346 | cxxopts::value()->default_value("0"), "")
347 |
348 | #ifndef NO_INNER_LOOPS
349 | (ARG_LOOPS, "How many loop iterations will one thread perform (keep it low).",
350 | cxxopts::value()->default_value("2"), "")
351 | #endif
352 |
353 | // Mode - what bruteforce type will be used
354 | (ARG_MODE,
355 | "Bruteforce modes (comma separated):"
356 | "\n\t0: - Dictionary."
357 | "\n\t1: - Simple +1."
358 | "\n\t2: - Simple +1 with filters."
359 | "\n\t3: - Alphabet. Bruteforce +1 using only specified bytes."
360 | "\n\t4: - Pattern. Bruteforce with bytes selected by specified pattern."
361 | "\n\t5: - Seed. Bruteforce only seed with provided manufacturer key (applied only to algorithms with seed).",
362 | cxxopts::value>(), "[m1,m2..]")
363 | (ARG_LTYPE,
364 | "Specific learning type (if you know your target well). Increases approximately x16 times (since doesn't calculate other types)"
365 | "\n\tV+1 means with reverse key (There are also more types. see source code):"
366 | "\n\t0: - Simple"
367 | "\n\t2: - Normal"
368 | "\n\t4: - Secure"
369 | "\n\t6: - Xor"
370 | "\nALL",
371 | cxxopts::value>()->default_value(KeeloqLearningType::ValueString(KeeloqLearningType::LAST)), "")
372 |
373 | // Dictionaries files
374 | (ARG_WORDDICT, "Word dictionary file(s) or word(s) - contains hexadecimal strings which will be used as keys. e.g: 0xaabb1122 FFbb9800121212",
375 | cxxopts::value>(), "[f1,w1,...]")
376 | (ARG_BINDICT, "Binary dictionary file(s) - each 8 bytes of the file will be used as key (do not check duplicates or zeros)",
377 | cxxopts::value>(), "[b1,b2,...]")
378 | (ARG_BINDMODE, "Byte order mode for binary dictionary. 0 - as is. 1 - reverse, 2 - add both",
379 | cxxopts::value()->default_value("0"), "