├── .dockerignore
├── .editorconfig
├── .github
    └── workflows
    │   ├── linux.yml
    │   └── windows.yml
├── .gitignore
├── .gitmodules
├── CudaKeeloq.vcxproj
├── CudaKeeloq.vcxproj.filters
├── LICENSE
├── build.sh
├── dockerfile
├── examples
    ├── alphabet.bin
    ├── dictionary.bin
    └── dictionary.words
├── makefile
├── readme.md
├── run.sh
└── src
    ├── algorithm
        ├── keeloq
        │   ├── keeloq_decryptor.h
        │   ├── keeloq_encrypted.h
        │   ├── keeloq_kernel.cu
        │   ├── keeloq_kernel.h
        │   ├── keeloq_kernel.inl
        │   ├── keeloq_kernel_input.cpp
        │   ├── keeloq_kernel_input.h
        │   ├── keeloq_learning_types.cpp
        │   ├── keeloq_learning_types.h
        │   ├── keeloq_single_result.cpp
        │   └── keeloq_single_result.h
        ├── multibase_digit.h
        ├── multibase_number.h
        └── multibase_system.h
    ├── bruteforce
        ├── bruteforce_config.cpp
        ├── bruteforce_config.h
        ├── bruteforce_filters.cpp
        ├── bruteforce_filters.h
        ├── bruteforce_pattern.cpp
        ├── bruteforce_pattern.h
        ├── bruteforce_round.cpp
        ├── bruteforce_round.h
        ├── bruteforce_type.cpp
        ├── bruteforce_type.h
        └── generators
        │   ├── generator_bruteforce.cpp
        │   ├── generator_bruteforce.h
        │   ├── generator_bruteforce_filtered_kernel.inl
        │   ├── generator_bruteforce_pattern_kernel.inl
        │   ├── generator_bruteforce_seed_kernel.inl
        │   ├── generator_bruteforce_simple_kernel.inl
        │   └── generator_kernel.cu
    ├── common.h
    ├── device
        ├── cuda_array.h
        ├── cuda_common.h
        ├── cuda_context.h
        ├── cuda_double_array.h
        ├── cuda_object.h
        ├── cuda_span.h
        └── cuda_vector.h
    ├── host
        ├── command_line_args.cpp
        ├── command_line_args.h
        ├── console.cpp
        ├── console.h
        ├── host_utils.cpp
        ├── host_utils.h
        └── timer.h
    ├── kernels
        └── kernel_result.h
    ├── main.cpp
    └── tests
        ├── test_all.h
        ├── test_alphabet.cpp
        ├── test_alphabet.h
        ├── test_benchmark.cpp
        ├── test_benchmark.h
        ├── test_console.cpp
        ├── test_console.h
        ├── test_filters.cpp
        ├── test_filters.h
        ├── test_keeloq.cpp
        ├── test_keeloq.h
        ├── test_kernel.cu
        ├── test_pattern.cpp
        └── test_pattern.h


/.dockerignore:
--------------------------------------------------------------------------------
1 | x64/*


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 4
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_trailing_newline = true
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 
14 | [makefile]
15 | indent_style = tab
16 | trim_trailing_whitespace = false


--------------------------------------------------------------------------------
/.github/workflows/linux.yml:
--------------------------------------------------------------------------------
 1 | name: Linux
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - test-linux*
 6 |       - release-v*
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       packages: write
13 |       contents: read
14 | 
15 |     steps:
16 |       - id: actor
17 |         uses: ASzc/change-string-case-action@v5
18 |         with:
19 |           string: ${{ github.actor }}
20 | 
21 |       - name: Checkout
22 |         uses: actions/checkout@v3.3.0
23 |         with:
24 |           submodules: 'true'
25 | 
26 |       - name: Linux build using docker
27 |         run: chmod +x build.sh && ./build.sh
28 | 
29 |       - name: Tag container
30 |         run: docker tag cudakeeloq:local ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }}
31 | 
32 |       - name: Login to GitHub Container Registry
33 |         uses: docker/login-action@v2
34 |         with:
35 |           registry: ghcr.io
36 |           username: ${{ github.actor }}
37 |           password: ${{ secrets.GITHUB_TOKEN }}
38 | 
39 |       - name: Docker Push to ghcr.io
40 |         run: docker push ghcr.io/${{ steps.actor.outputs.lowercase }}/cudakeeloq:${{ github.ref_name }}
41 | 
42 | 


--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
 1 | name: Windows
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - test-windows*
 6 |       - release-v*
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     env:
12 |       CUDA_VERSION_MAJOR: 12
13 |       CUDA_VERSION_MINOR: 2
14 |       CUDA_VERSION_PACTH: 0
15 | 
16 |     runs-on: windows-2022
17 |     permissions:
18 |       contents: write
19 |     steps:
20 | 
21 |       - name: Setup CUDA (manual)
22 |         env:
23 |           CUDA_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}.${{env.CUDA_VERSION_PACTH}}"
24 |           PKGS_VERSION_STR: "${{env.CUDA_VERSION_MAJOR}}.${{env.CUDA_VERSION_MINOR}}"
25 |         run: |
26 |           echo "Downloading CUDA version: ${{env.CUDA_VERSION_STR}}"
27 |           Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/CUDAVisualStudioIntegration.exe" -OutFile ${{RUNNER.TEMP}}\cuda-vs-installer.exe
28 |           Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/nvcc.exe" -OutFile ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe
29 |           Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/${{env.CUDA_VERSION_STR}}/network_installers/windows/x86_64/wddm2/cudart.exe" -OutFile ${{RUNNER.TEMP}}\cuda-rt-installer.exe
30 | 
31 |           echo "Unpacking archives"
32 |           7z x ${{RUNNER.TEMP}}\cuda-vs-installer.exe extras\visual_studio_integration\MSBuildExtensions -o${{RUNNER.TEMP}}\cuda
33 |           7z x ${{RUNNER.TEMP}}\cuda-nvcc-installer.exe -o${{RUNNER.TEMP}}\cuda
34 |           7z x ${{RUNNER.TEMP}}\cuda-rt-installer.exe -o${{RUNNER.TEMP}}\cuda
35 | 
36 |           echo "Exporting Environment variables for next steps"
37 |           [Environment]::SetEnvironmentVariable("CUDA_PATH", "${{RUNNER.TEMP}}\cuda")
38 | 
39 |           Add-Content $env:GITHUB_ENV "CUDA_PATH=$env:CUDA_PATH"
40 |           Add-Content $env:GITHUB_ENV "CUDA_PATH_V${{env.CUDA_VERSION_MAJOR}}_${{env.CUDA_VERSION_MINOR}}=$env:CUDA_PATH"
41 |           Add-Content $env:GITHUB_PATH "$env:CUDA_PATH\bin"
42 | 
43 |       - name: CUDA check
44 |         run: |
45 |           echo "CUDA PATH: $env:CUDA_PATH"
46 |           ${{env.CUDA_PATH}}\bin\nvcc.exe -V
47 | 
48 |       - name: Checkout
49 |         uses: actions/checkout@v3.3.0
50 |         with:
51 |           submodules: 'true'
52 | 
53 |       - name: Setup MSBuild
54 |         uses: microsoft/setup-msbuild@v1.3.1
55 |         with:
56 |           # vs-version: latest
57 |           msbuild-architecture: "x64"
58 | 
59 | 
60 |       - name: Build Application
61 |         run: msbuild CudaKeeloq.vcxproj -t:Rebuild -p:Configuration=Release -p:Platform=x64
62 | 
63 |       - name: Create Artifacts
64 |         run: |
65 |           mkdir artifacts
66 |           xcopy x64\Release\CudaKeeloq.exe artifacts\
67 |           xcopy .\examples artifacts\examples /E /H /I /C
68 |           Compress-Archive -Path artifacts/* -DestinationPath artifacts/cudakeeloq.zip
69 | 
70 |       - name: Create Release
71 |         uses: ncipollo/release-action@v1.12.0
72 |         with:
73 |           artifacts: artifacts/cudakeeloq.zip
74 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | x64/
2 | *.user
3 | .vscode/
4 | .vs/
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "cxxopts"]
 2 | 	path = ThirdParty/cxxopts
 3 | 	url = https://github.com/jarro2783/cxxopts.git
 4 | [submodule "ThirdParty/cpp-terminal"]
 5 | 	path = ThirdParty/cpp-terminal
 6 | 	url = https://github.com/jupyter-xeus/cpp-terminal.git
 7 | [submodule "ThirdParty/cxxopts"]
 8 | 	path = ThirdParty/cxxopts
 9 | 	url = https://github.com/jarro2783/cxxopts.git
10 | 


--------------------------------------------------------------------------------
/CudaKeeloq.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Profile|x64">
  9 |       <Configuration>Profile</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|x64">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |   </ItemGroup>
 17 |   <PropertyGroup Label="Globals">
 18 |     <ProjectGuid>{1CC91BE8-9FA0-4CC1-A597-9EBFBC8C21CA}</ProjectGuid>
 19 |     <RootNamespace>CudaKeeloq</RootNamespace>
 20 |     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
 21 |   </PropertyGroup>
 22 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 23 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 24 |     <ConfigurationType>Application</ConfigurationType>
 25 |     <UseDebugLibraries>true</UseDebugLibraries>
 26 |     <CharacterSet>MultiByte</CharacterSet>
 27 |     <PlatformToolset>v143</PlatformToolset>
 28 |   </PropertyGroup>
 29 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 30 |     <ConfigurationType>Application</ConfigurationType>
 31 |     <UseDebugLibraries>false</UseDebugLibraries>
 32 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 33 |     <CharacterSet>MultiByte</CharacterSet>
 34 |     <PlatformToolset>v143</PlatformToolset>
 35 |   </PropertyGroup>
 36 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration">
 37 |     <ConfigurationType>Application</ConfigurationType>
 38 |     <UseDebugLibraries>false</UseDebugLibraries>
 39 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 40 |     <CharacterSet>MultiByte</CharacterSet>
 41 |     <PlatformToolset>v143</PlatformToolset>
 42 |   </PropertyGroup>
 43 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 44 |   <ImportGroup Label="ExtensionSettings">
 45 |     <Import Project="$(CUDA_PATH)\extras\visual_studio_integration\MSBuildExtensions\CUDA 12.2.props" />
 46 |   </ImportGroup>
 47 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 48 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 49 |   </ImportGroup>
 50 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 51 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 52 |   </ImportGroup>
 53 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="PropertySheets">
 54 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 55 |   </ImportGroup>
 56 |   <PropertyGroup Label="UserMacros" />
 57 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <LinkIncremental>true</LinkIncremental>
 59 |     <IncludePath>$(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;</IncludePath>
 60 |   </PropertyGroup>
 61 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 62 |     <IncludePath>$(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;</IncludePath>
 63 |   </PropertyGroup>
 64 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">
 65 |     <IncludePath>$(IncludePath);$(projectdir)\src;$(CUDA_PATH)\include;$(projectdir)\ThirdParty\cpp-terminal;$(projectdir)\ThirdParty;</IncludePath>
 66 |   </PropertyGroup>
 67 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 68 |     <ClCompile>
 69 |       <WarningLevel>Level3</WarningLevel>
 70 |       <Optimization>Disabled</Optimization>
 71 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 72 |       <LanguageStandard>stdcpp17</LanguageStandard>
 73 |       <LanguageStandard_C>stdc17</LanguageStandard_C>
 74 |     </ClCompile>
 75 |     <Link>
 76 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 77 |       <SubSystem>Console</SubSystem>
 78 |       <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 79 |       <IgnoreSpecificDefaultLibraries>libcmt.lib</IgnoreSpecificDefaultLibraries>
 80 |     </Link>
 81 |     <CudaCompile>
 82 |       <TargetMachinePlatform>64</TargetMachinePlatform>
 83 |       <CodeGeneration>compute_80,sm_80</CodeGeneration>
 84 |       <AdditionalCompilerOptions>
 85 |       </AdditionalCompilerOptions>
 86 |       <FastMath>true</FastMath>
 87 |       <GenerateRelocatableDeviceCode>false</GenerateRelocatableDeviceCode>
 88 |       <ExtensibleWholeProgramCompilation>false</ExtensibleWholeProgramCompilation>
 89 |       <NvccCompilation>compile</NvccCompilation>
 90 |       <Keep>false</Keep>
 91 |       <AdditionalOptions>--std=c++17 %(AdditionalOptions)</AdditionalOptions>
 92 |     </CudaCompile>
 93 |   </ItemDefinitionGroup>
 94 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 95 |     <ClCompile>
 96 |       <WarningLevel>Level3</WarningLevel>
 97 |       <Optimization>MaxSpeed</Optimization>
 98 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 99 |       <IntrinsicFunctions>true</IntrinsicFunctions>
100 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
101 |       <LanguageStandard>stdcpp17</LanguageStandard>
102 |       <LanguageStandard_C>stdc17</LanguageStandard_C>
103 |     </ClCompile>
104 |     <Link>
105 |       <GenerateDebugInformation>true</GenerateDebugInformation>
106 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
107 |       <OptimizeReferences>true</OptimizeReferences>
108 |       <SubSystem>Console</SubSystem>
109 |       <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
110 |       <IgnoreSpecificDefaultLibraries>libcmt.lib</IgnoreSpecificDefaultLibraries>
111 |     </Link>
112 |     <CudaCompile>
113 |       <TargetMachinePlatform>64</TargetMachinePlatform>
114 |       <FastMath>true</FastMath>
115 |       <AdditionalCompilerOptions>
116 |       </AdditionalCompilerOptions>
117 |       <CodeGeneration>compute_80,sm_80</CodeGeneration>
118 |       <GenerateLineInfo>false</GenerateLineInfo>
119 |       <GPUDebugInfo>false</GPUDebugInfo>
120 |       <AdditionalOptions>--std=c++17 %(AdditionalOptions)</AdditionalOptions>
121 |     </CudaCompile>
122 |   </ItemDefinitionGroup>
123 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">
124 |     <ClCompile>
125 |       <WarningLevel>Level3</WarningLevel>
126 |       <Optimization>MaxSpeed</Optimization>
127 |       <FunctionLevelLinking>true</FunctionLevelLinking>
128 |       <IntrinsicFunctions>true</IntrinsicFunctions>
129 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
130 |       <LanguageStandard>stdcpp17</LanguageStandard>
131 |       <LanguageStandard_C>stdc17</LanguageStandard_C>
132 |     </ClCompile>
133 |     <Link>
134 |       <GenerateDebugInformation>true</GenerateDebugInformation>
135 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
136 |       <OptimizeReferences>true</OptimizeReferences>
137 |       <SubSystem>Console</SubSystem>
138 |       <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
139 |       <IgnoreSpecificDefaultLibraries>libcmt.lib</IgnoreSpecificDefaultLibraries>
140 |     </Link>
141 |     <CudaCompile>
142 |       <TargetMachinePlatform>64</TargetMachinePlatform>
143 |       <FastMath>true</FastMath>
144 |       <AdditionalCompilerOptions>
145 |       </AdditionalCompilerOptions>
146 |       <CodeGeneration>compute_80,sm_80</CodeGeneration>
147 |       <GenerateLineInfo>true</GenerateLineInfo>
148 |       <GPUDebugInfo>false</GPUDebugInfo>
149 |       <AdditionalOptions>--std=c++17 %(AdditionalOptions)</AdditionalOptions>
150 |     </CudaCompile>
151 |   </ItemDefinitionGroup>
152 |   <ItemGroup>
153 |     <ClInclude Include="src\algorithm\keeloq\keeloq_kernel.h" />
154 |     <ClInclude Include="src\algorithm\keeloq\keeloq_decryptor.h" />
155 |     <ClInclude Include="src\algorithm\keeloq\keeloq_encrypted.h" />
156 |     <ClInclude Include="src\algorithm\keeloq\keeloq_kernel_input.h" />
157 |     <ClInclude Include="src\algorithm\keeloq\keeloq_learning_types.h" />
158 |     <ClInclude Include="src\algorithm\keeloq\keeloq_single_result.h" />
159 |     <ClInclude Include="src\algorithm\multibase_digit.h" />
160 |     <ClInclude Include="src\algorithm\multibase_number.h" />
161 |     <ClInclude Include="src\algorithm\multibase_system.h" />
162 |     <ClInclude Include="src\bruteforce\bruteforce_config.h" />
163 |     <ClInclude Include="src\bruteforce\bruteforce_filters.h" />
164 |     <ClInclude Include="src\bruteforce\bruteforce_pattern.h" />
165 |     <ClInclude Include="src\bruteforce\bruteforce_round.h" />
166 |     <ClInclude Include="src\bruteforce\bruteforce_type.h" />
167 |     <ClInclude Include="src\bruteforce\generators\generator_bruteforce.h" />
168 |     <ClInclude Include="src\common.h" />
169 |     <ClInclude Include="src\device\cuda_array.h" />
170 |     <ClInclude Include="src\device\cuda_common.h" />
171 |     <ClInclude Include="src\device\cuda_context.h" />
172 |     <ClInclude Include="src\device\cuda_double_array.h" />
173 |     <ClInclude Include="src\device\cuda_object.h" />
174 |     <ClInclude Include="src\device\cuda_span.h" />
175 |     <ClInclude Include="src\device\cuda_vector.h" />
176 |     <ClInclude Include="src\host\command_line_args.h" />
177 |     <ClInclude Include="src\host\console.h" />
178 |     <ClInclude Include="src\host\host_utils.h" />
179 |     <ClInclude Include="src\host\timer.h" />
180 |     <ClInclude Include="src\kernels\kernel_result.h" />
181 |     <ClInclude Include="src\tests\test_console.h" />
182 |     <ClInclude Include="src\tests\test_keeloq.h" />
183 |     <ClInclude Include="src\tests\test_pattern.h" />
184 |     <ClInclude Include="src\tests\test_all.h" />
185 |     <ClInclude Include="src\tests\test_alphabet.h" />
186 |     <ClInclude Include="src\tests\test_benchmark.h" />
187 |     <ClInclude Include="src\tests\test_filters.h" />
188 |   </ItemGroup>
189 |   <ItemGroup>
190 |     <ClCompile Include="src\algorithm\keeloq\keeloq_kernel_input.cpp" />
191 |     <ClCompile Include="src\algorithm\keeloq\keeloq_learning_types.cpp" />
192 |     <ClCompile Include="src\algorithm\keeloq\keeloq_single_result.cpp" />
193 |     <ClCompile Include="src\bruteforce\bruteforce_config.cpp" />
194 |     <ClCompile Include="src\bruteforce\bruteforce_filters.cpp" />
195 |     <ClCompile Include="src\bruteforce\bruteforce_pattern.cpp" />
196 |     <ClCompile Include="src\bruteforce\bruteforce_round.cpp" />
197 |     <ClCompile Include="src\bruteforce\bruteforce_type.cpp" />
198 |     <ClCompile Include="src\bruteforce\generators\generator_bruteforce.cpp" />
199 |     <ClCompile Include="src\host\command_line_args.cpp" />
200 |     <ClCompile Include="src\host\console.cpp" />
201 |     <ClCompile Include="src\host\host_utils.cpp" />
202 |     <ClCompile Include="src\main.cpp" />
203 |     <ClCompile Include="src\tests\test_alphabet.cpp" />
204 |     <ClCompile Include="src\tests\test_benchmark.cpp" />
205 |     <ClCompile Include="src\tests\test_console.cpp" />
206 |     <ClCompile Include="src\tests\test_filters.cpp" />
207 |     <ClCompile Include="src\tests\test_keeloq.cpp" />
208 |     <ClCompile Include="src\tests\test_pattern.cpp" />
209 |   </ItemGroup>
210 |   <ItemGroup>
211 |     <CudaCompile Include="src\algorithm\keeloq\keeloq_kernel.cu" />
212 |     <CudaCompile Include="src\bruteforce\generators\generator_kernel.cu" />
213 |     <CudaCompile Include="src\tests\test_kernel.cu" />
214 |   </ItemGroup>
215 |   <ItemGroup>
216 |     <None Include=".editorconfig" />
217 |     <None Include="makefile" />
218 |     <None Include="src\algorithm\keeloq\keeloq_kernel.inl" />
219 |     <None Include="src\bruteforce\generators\generator_bruteforce_pattern_kernel.inl" />
220 |     <None Include="src\bruteforce\generators\generator_bruteforce_filtered_kernel.inl" />
221 |     <None Include="src\bruteforce\generators\generator_bruteforce_seed_kernel.inl" />
222 |     <None Include="src\bruteforce\generators\generator_bruteforce_simple_kernel.inl" />
223 |   </ItemGroup>
224 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
225 |   <ImportGroup Label="ExtensionTargets">
226 |     <Import Project="$(CUDA_PATH)\extras\visual_studio_integration\MSBuildExtensions\CUDA 12.2.targets" />
227 |   </ImportGroup>
228 | </Project>


--------------------------------------------------------------------------------
/CudaKeeloq.vcxproj.filters:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup>
  4 |     <ClInclude Include="src\common.h" />
  5 |     <ClInclude Include="src\tests\test_filters.h">
  6 |       <Filter>tests</Filter>
  7 |     </ClInclude>
  8 |     <ClInclude Include="src\tests\test_alphabet.h">
  9 |       <Filter>tests</Filter>
 10 |     </ClInclude>
 11 |     <ClInclude Include="src\host\command_line_args.h">
 12 |       <Filter>host</Filter>
 13 |     </ClInclude>
 14 |     <ClInclude Include="src\host\console.h">
 15 |       <Filter>host</Filter>
 16 |     </ClInclude>
 17 |     <ClInclude Include="src\device\cuda_object.h">
 18 |       <Filter>device</Filter>
 19 |     </ClInclude>
 20 |     <ClInclude Include="src\device\cuda_context.h">
 21 |       <Filter>device</Filter>
 22 |     </ClInclude>
 23 |     <ClInclude Include="src\device\cuda_double_array.h">
 24 |       <Filter>device</Filter>
 25 |     </ClInclude>
 26 |     <ClInclude Include="src\device\cuda_array.h">
 27 |       <Filter>device</Filter>
 28 |     </ClInclude>
 29 |     <ClInclude Include="src\bruteforce\bruteforce_filters.h">
 30 |       <Filter>bruteforce</Filter>
 31 |     </ClInclude>
 32 |     <ClInclude Include="src\bruteforce\bruteforce_config.h">
 33 |       <Filter>bruteforce</Filter>
 34 |     </ClInclude>
 35 |     <ClInclude Include="src\bruteforce\bruteforce_round.h">
 36 |       <Filter>bruteforce</Filter>
 37 |     </ClInclude>
 38 |     <ClInclude Include="src\bruteforce\bruteforce_type.h">
 39 |       <Filter>bruteforce</Filter>
 40 |     </ClInclude>
 41 |     <ClInclude Include="src\bruteforce\generators\generator_bruteforce.h">
 42 |       <Filter>bruteforce\generators</Filter>
 43 |     </ClInclude>
 44 |     <ClInclude Include="src\algorithm\keeloq\keeloq_decryptor.h">
 45 |       <Filter>algorithm\keeloq</Filter>
 46 |     </ClInclude>
 47 |     <ClInclude Include="src\algorithm\keeloq\keeloq_encrypted.h">
 48 |       <Filter>algorithm\keeloq</Filter>
 49 |     </ClInclude>
 50 |     <ClInclude Include="src\algorithm\keeloq\keeloq_learning_types.h">
 51 |       <Filter>algorithm\keeloq</Filter>
 52 |     </ClInclude>
 53 |     <ClInclude Include="src\algorithm\keeloq\keeloq_single_result.h">
 54 |       <Filter>algorithm\keeloq</Filter>
 55 |     </ClInclude>
 56 |     <ClInclude Include="src\algorithm\keeloq\keeloq_kernel_input.h">
 57 |       <Filter>algorithm\keeloq</Filter>
 58 |     </ClInclude>
 59 |     <ClInclude Include="src\kernels\kernel_result.h">
 60 |       <Filter>kernels</Filter>
 61 |     </ClInclude>
 62 |     <ClInclude Include="src\algorithm\keeloq\keeloq_kernel.h">
 63 |       <Filter>algorithm\keeloq</Filter>
 64 |     </ClInclude>
 65 |     <ClInclude Include="src\tests\test_all.h">
 66 |       <Filter>tests</Filter>
 67 |     </ClInclude>
 68 |     <ClInclude Include="src\algorithm\multibase_number.h">
 69 |       <Filter>algorithm</Filter>
 70 |     </ClInclude>
 71 |     <ClInclude Include="src\algorithm\multibase_system.h">
 72 |       <Filter>algorithm</Filter>
 73 |     </ClInclude>
 74 |     <ClInclude Include="src\bruteforce\bruteforce_pattern.h">
 75 |       <Filter>bruteforce</Filter>
 76 |     </ClInclude>
 77 |     <ClInclude Include="src\algorithm\multibase_digit.h">
 78 |       <Filter>algorithm</Filter>
 79 |     </ClInclude>
 80 |     <ClInclude Include="src\device\cuda_common.h">
 81 |       <Filter>device</Filter>
 82 |     </ClInclude>
 83 |     <ClInclude Include="src\tests\test_benchmark.h">
 84 |       <Filter>tests</Filter>
 85 |     </ClInclude>
 86 |     <ClInclude Include="src\tests\test_pattern.h">
 87 |       <Filter>tests</Filter>
 88 |     </ClInclude>
 89 |     <ClInclude Include="src\device\cuda_vector.h">
 90 |       <Filter>device</Filter>
 91 |     </ClInclude>
 92 |     <ClInclude Include="src\tests\test_keeloq.h">
 93 |       <Filter>tests</Filter>
 94 |     </ClInclude>
 95 |     <ClInclude Include="src\host\host_utils.h">
 96 |       <Filter>host</Filter>
 97 |     </ClInclude>
 98 |     <ClInclude Include="src\tests\test_console.h">
 99 |       <Filter>tests</Filter>
100 |     </ClInclude>
101 |     <ClInclude Include="src\device\cuda_span.h">
102 |       <Filter>device</Filter>
103 |     </ClInclude>
104 |     <ClInclude Include="src\host\timer.h">
105 |       <Filter>host</Filter>
106 |     </ClInclude>
107 |   </ItemGroup>
108 |   <ItemGroup>
109 |     <ClCompile Include="src\main.cpp" />
110 |     <ClCompile Include="src\tests\test_filters.cpp">
111 |       <Filter>tests</Filter>
112 |     </ClCompile>
113 |     <ClCompile Include="src\tests\test_alphabet.cpp">
114 |       <Filter>tests</Filter>
115 |     </ClCompile>
116 |     <ClCompile Include="src\host\command_line_args.cpp">
117 |       <Filter>host</Filter>
118 |     </ClCompile>
119 |     <ClCompile Include="src\host\console.cpp">
120 |       <Filter>host</Filter>
121 |     </ClCompile>
122 |     <ClCompile Include="src\bruteforce\bruteforce_filters.cpp">
123 |       <Filter>bruteforce</Filter>
124 |     </ClCompile>
125 |     <ClCompile Include="src\bruteforce\bruteforce_config.cpp">
126 |       <Filter>bruteforce</Filter>
127 |     </ClCompile>
128 |     <ClCompile Include="src\bruteforce\bruteforce_round.cpp">
129 |       <Filter>bruteforce</Filter>
130 |     </ClCompile>
131 |     <ClCompile Include="src\bruteforce\bruteforce_type.cpp">
132 |       <Filter>bruteforce</Filter>
133 |     </ClCompile>
134 |     <ClCompile Include="src\bruteforce\generators\generator_bruteforce.cpp">
135 |       <Filter>bruteforce\generators</Filter>
136 |     </ClCompile>
137 |     <ClCompile Include="src\algorithm\keeloq\keeloq_learning_types.cpp">
138 |       <Filter>algorithm\keeloq</Filter>
139 |     </ClCompile>
140 |     <ClCompile Include="src\algorithm\keeloq\keeloq_single_result.cpp">
141 |       <Filter>algorithm\keeloq</Filter>
142 |     </ClCompile>
143 |     <ClCompile Include="src\algorithm\keeloq\keeloq_kernel_input.cpp">
144 |       <Filter>algorithm\keeloq</Filter>
145 |     </ClCompile>
146 |     <ClCompile Include="src\bruteforce\bruteforce_pattern.cpp">
147 |       <Filter>bruteforce</Filter>
148 |     </ClCompile>
149 |     <ClCompile Include="src\tests\test_benchmark.cpp">
150 |       <Filter>tests</Filter>
151 |     </ClCompile>
152 |     <ClCompile Include="src\tests\test_pattern.cpp">
153 |       <Filter>tests</Filter>
154 |     </ClCompile>
155 |     <ClCompile Include="src\tests\test_keeloq.cpp">
156 |       <Filter>tests</Filter>
157 |     </ClCompile>
158 |     <ClCompile Include="src\host\host_utils.cpp">
159 |       <Filter>host</Filter>
160 |     </ClCompile>
161 |     <ClCompile Include="src\tests\test_console.cpp">
162 |       <Filter>tests</Filter>
163 |     </ClCompile>
164 |   </ItemGroup>
165 |   <ItemGroup>
166 |     <Filter Include="algorithm">
167 |       <UniqueIdentifier>{5526a2e2-4a02-4459-977a-dbdbc1265827}</UniqueIdentifier>
168 |     </Filter>
169 |     <Filter Include="bruteforce">
170 |       <UniqueIdentifier>{076c7b6e-a941-47c2-9514-8616f16b61e8}</UniqueIdentifier>
171 |     </Filter>
172 |     <Filter Include="bruteforce\generators">
173 |       <UniqueIdentifier>{7d4a1a80-1bf1-48ba-95df-5c102fb9c8ca}</UniqueIdentifier>
174 |     </Filter>
175 |     <Filter Include="device">
176 |       <UniqueIdentifier>{bc3686d0-8983-4b00-bc95-074a07972e2b}</UniqueIdentifier>
177 |     </Filter>
178 |     <Filter Include="host">
179 |       <UniqueIdentifier>{366584ba-328c-44ba-85d8-08d370307c4f}</UniqueIdentifier>
180 |     </Filter>
181 |     <Filter Include="kernels">
182 |       <UniqueIdentifier>{348c2a54-dd3e-4683-bc68-b9a5c9dfd9fa}</UniqueIdentifier>
183 |     </Filter>
184 |     <Filter Include="tests">
185 |       <UniqueIdentifier>{9e8a7676-0ad7-4e6a-b3ff-a61ddb6b9e0e}</UniqueIdentifier>
186 |     </Filter>
187 |     <Filter Include="algorithm\keeloq">
188 |       <UniqueIdentifier>{8636e053-53f4-43e2-b4f2-6ea75d4a849e}</UniqueIdentifier>
189 |     </Filter>
190 |   </ItemGroup>
191 |   <ItemGroup>
192 |     <CudaCompile Include="src\bruteforce\generators\generator_kernel.cu">
193 |       <Filter>bruteforce\generators</Filter>
194 |     </CudaCompile>
195 |     <CudaCompile Include="src\tests\test_kernel.cu">
196 |       <Filter>tests</Filter>
197 |     </CudaCompile>
198 |     <CudaCompile Include="src\algorithm\keeloq\keeloq_kernel.cu">
199 |       <Filter>algorithm\keeloq</Filter>
200 |     </CudaCompile>
201 |   </ItemGroup>
202 |   <ItemGroup>
203 |     <None Include="src\bruteforce\generators\generator_bruteforce_pattern_kernel.inl">
204 |       <Filter>bruteforce\generators</Filter>
205 |     </None>
206 |     <None Include="src\bruteforce\generators\generator_bruteforce_simple_kernel.inl">
207 |       <Filter>bruteforce\generators</Filter>
208 |     </None>
209 |     <None Include="src\bruteforce\generators\generator_bruteforce_filtered_kernel.inl">
210 |       <Filter>bruteforce\generators</Filter>
211 |     </None>
212 |     <None Include="src\algorithm\keeloq\keeloq_kernel.inl">
213 |       <Filter>algorithm\keeloq</Filter>
214 |     </None>
215 |     <None Include=".editorconfig" />
216 |     <None Include="makefile" />
217 |     <None Include="src\bruteforce\generators\generator_bruteforce_seed_kernel.inl">
218 |       <Filter>bruteforce\generators</Filter>
219 |     </None>
220 |   </ItemGroup>
221 | </Project>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 https://github.com/X-Stuff
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | CONTAINER="${CONTAINER:-cudakeeloq}"
4 | TAG="${TAG:-local}"
5 | 
6 | docker build . -t $CONTAINER:$TAG
7 | 
8 | 


--------------------------------------------------------------------------------
/dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CONFIGURATION="release"
 2 | ARG CUDA_MAJOR=12
 3 | ARG CUDA_MINOR=2
 4 | ARG CUDA_PATCH=0
 5 | 
 6 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-devel-ubuntu22.04 as builder
 7 | ARG CONFIGURATION
 8 | 
 9 | #
10 | WORKDIR /workspace
11 | 
12 | # Copy sources
13 | COPY examples/ examples
14 | COPY src/ src
15 | COPY ThirdParty/ ThirdParty
16 | COPY makefile makefile
17 | 
18 | # make
19 | RUN make $CONFIGURATION
20 | 
21 | # runner
22 | FROM nvidia/cuda:${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}-base-ubuntu22.04
23 | ARG CONFIGURATION
24 | 
25 | RUN groupadd cuda && useradd -m -d /app -g cuda cuda
26 | USER cuda
27 | 
28 | WORKDIR /app
29 | COPY --chown=cuda:cuda --from=builder /workspace/x64/$CONFIGURATION/bin /app/
30 | ENTRYPOINT [ "/app/CudaKeeloq" ]
31 | CMD [ "--help" ]
32 | 


--------------------------------------------------------------------------------
/examples/alphabet.bin:
--------------------------------------------------------------------------------
1 | 01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/@#$THIS DUPLICATED BYTES WILL BE DELETED


--------------------------------------------------------------------------------
/examples/dictionary.bin:
--------------------------------------------------------------------------------
1 | W¿OÖ‰6áø—(Ðt€ OáíÎtÙ_: HK(


--------------------------------------------------------------------------------
/examples/dictionary.words:
--------------------------------------------------------------------------------
1 | 0xC0FFEE00DEAD6666
2 | 0b10101010101010101010101010101010101
3 | 9223372036854775807
4 | 0xDEAD6666C0FFEE00:1234567
5 | 9223372036854775807:31337
6 | 0b10101010101010101010101010101010101:0987654321


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | # THIS MAKE FILE IS SUPPOSED TO BE USED IN WITH DOCKER IMAGE nvidia/cuda:12.0.1-devel
 2 | 
 3 | TARGET_NAME=CudaKeeloq
 4 | 
 5 | CUDA_ROOT_DIR=/usr/local/cuda
 6 | 
 7 | ARCH=x64
 8 | CONFIG_RELEASE=release
 9 | CONFIG_PROFILE=profile
10 | CONFIG_DEBUG=debug
11 | 
12 | # CC compiler options:
13 | CC=g++
14 | CC_FLAGS=-std=c++17 -Wall
15 | CC_INCLUDE=-I./src/ -I./ThirdParty/ -I./ThirdParty/cpp-terminal
16 | 
17 | NVCC=nvcc
18 | NVCC_FLAGS=--gpu-architecture=compute_80 --gpu-code=sm_80 --std=c++17
19 | NVCC_INCLUDE=-I./src/
20 | 
21 | # CUDA library directory:
22 | CUDA_LIB_DIR= -L$(CUDA_ROOT_DIR)/lib64
23 | # CUDA include directory:
24 | CUDA_INC_DIR= -I$(CUDA_ROOT_DIR)/include
25 | # CUDA linking libraries:
26 | CUDA_LINK_LIBS= -lcudart
27 | 
28 | 
29 | # Configurations, default debug
30 | all: debug
31 | 	@echo No target specified. Default is debug
32 | 
33 | release: OBJ_DIR=./$(ARCH)/$(CONFIG_RELEASE)/obj
34 | release: EXE_DIR=./$(ARCH)/$(CONFIG_RELEASE)/bin
35 | release: NVCC_FLAGS+= -use_fast_math -O3 -Xptxas -O3 --m64
36 | release: CC_FLAGS+= -O3 -DNDEBUG
37 | release:link
38 | 
39 | profile: OBJ_DIR=./$(ARCH)/$(CONFIG_PROFILE)/obj
40 | profile: EXE_DIR=./$(ARCH)/$(CONFIG_PROFILE)/bin
41 | profile: NVCC_FLAGS+= -lineinfo -use_fast_math
42 | profile: CC_FLAGS+= -DNDEBUG
43 | profile:link
44 | 
45 | debug:   OBJ_DIR=./$(ARCH)/$(CONFIG_DEBUG)/obj
46 | debug:   EXE_DIR=./$(ARCH)/$(CONFIG_DEBUG)/bin
47 | debug:   NVCC_FLAGS+= -G
48 | debug:   CC_FLAGS+= -D_DEBUG
49 | debug:link
50 | 
51 | # Sources C++
52 | CPP_FILES = $(shell find src/ -iname "*.cpp")
53 | CPP_OBJECTS = $(CPP_FILES:%.cpp=%.o)
54 | 
55 | # Sources CUDA
56 | CUDA_FILES = $(shell find src/ -iname "*.cu")
57 | CUDA_OBJECTS = $(CUDA_FILES:%.cu=%.o)
58 | 
59 | # Link
60 | link: $(CPP_OBJECTS) $(CUDA_OBJECTS)
61 | 	$(CC) $(CC_FLAGS) \
62 | 		$(addprefix $(OBJ_DIR)/, $(notdir $(CPP_OBJECTS))) \
63 | 		$(addprefix $(OBJ_DIR)/, $(notdir $(CUDA_OBJECTS))) \
64 | 		-o $(EXE_DIR)/$(TARGET_NAME) $(CUDA_INC_DIR) $(CUDA_LIB_DIR) $(CUDA_LINK_LIBS)
65 | 
66 | # Compile C++
67 | $(CPP_OBJECTS): | mkdirs
68 | 	$(CC) $(CC_FLAGS) $(CC_INCLUDE) $(CUDA_INC_DIR) -c $(basename $@).cpp -o $(OBJ_DIR)/$(notdir $@)
69 | 
70 | # Compile CUDA
71 | $(CUDA_OBJECTS): | mkdirs
72 | 	$(NVCC) $(NVCC_FLAGS) $(NVCC_INCLUDE) -c $(basename $@).cu -o $(OBJ_DIR)/$(notdir $@)
73 | 
74 | # Prepare
75 | mkdirs:
76 | 	mkdir -p $(OBJ_DIR)
77 | 	mkdir -p $(EXE_DIR)
78 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | ## Intro
  2 | This is a CUDA accelerated simple bruteforcer of [KeeLoq](https://en.wikipedia.org/wiki/KeeLoq) algorithm.
  3 | 
  4 | ## Disclaimer
  5 | 
  6 | > 64-bit keeloq key is 18,446,744,073,709,551,615 possible combinations.
  7 | EVEN! if your GPU will be able to calculate 1 billion keys in a second.
  8 | You will need 18446744073709551615 / 1000000000 / 3600 / 24 / 365 = 584 YEARS! to brute a single key.
  9 | > My laptop 3080Ti can do only 230 MKeys/s
 10 | So it's practically impossible to use this application "as is" in real life attack.
 11 | 
 12 | ## Version history
 13 | 
 14 |  * `0.1.2`
 15 |    - Fixed `dockerfile`, added `CMD` (issue: https://github.com/X-Stuff/CudaKeeloq/issues/4).
 16 |    - CUDA version updated to `12.2``
 17 |  * `0.1.1`
 18 |    - Added seed bruteforce mode (issue: https://github.com/x-stuff/CudaKeeloq/issues/2).
 19 |    - Added support of specifying seed in a text dictionaries.
 20 |    - Fixed some minor bugs and internal refactoring.
 21 |    - Slightly improved performance (1-5%)
 22 |  * `0.1.0`
 23 |    - Initial public release.
 24 | 
 25 | ## Capabilities
 26 | 
 27 | * **Simple** (+1) bruteforce
 28 |   > Regular straightforward bruteforce where keys just incremented +1
 29 | * **Filtered** (+1) bruteforce
 30 |   > Some basics filters for simple bruteforce, like "keys with more that 6 subsequent zeros". Filters may apply to `Include` and/or `Exclude` rules. e.g. you can exclude keys with all ASCII symbols. You may also use `Include` filter but its performance is incredibly bad, so I don't recommend this attack type at all.
 31 | * **Alphabet** attack
 32 |   > You may specify exact set of bytes which should be used for generating keys during bruteforce. For example you may want to use only numbers, or only ascii symbols.
 33 | * **Pattern** attack
 34 |   > This is like extended alphabet. You may specify individual set of bytes for each byte in 64bit key. For example you may want first byte be any value, but others be only numbers.
 35 | * **Dictionary** attack
 36 |   > Manufacturer keys will be taken from input files. Supports binary and text modes. In text mode it's allowed to specify a seed also.
 37 | 
 38 | ## Limitations
 39 | 
 40 |  * Doesn't support mixed bruteforce mode, when you need to bruteforce keys and seeds. You have to specify either `seed` or `key` if you want to brute `secure` or `faac` types.
 41 |  * Can't do binary dictionary window attack. If you want something similar, you can make 63 more files. Where each file's content will be shifted by 1-to-63 bit to the left.
 42 | 
 43 | ## Build
 44 | 
 45 | ### Windows
 46 | #### Requirements
 47 | 
 48 | * CUDA Toolkit v12.2.0
 49 |   - nvcc
 50 |   - cuda runtime
 51 |   - visual studio extension
 52 | 
 53 | * Microsoft Visual Studio 2022
 54 | 
 55 | #### Compiling
 56 |  Just open `.vcxproj` file and build it
 57 | 
 58 | ### Linux
 59 | #### Requirements
 60 | * docker
 61 | * NVIDIA Container [Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
 62 | 
 63 | #### Compiling
 64 | ```
 65 | $ ./build.sh
 66 | ```
 67 | This will create a container `cudakeeloq:local` with compiled app
 68 | 
 69 | Run the bruteforcer
 70 | ```
 71 | $ ./run.sh <ARGS>
 72 | ```
 73 | > NOTE: You may need to have CUDA docker extension installed in order to have `--gpus` command line argument works.
 74 | 
 75 | ### Different CUDA Version
 76 | 
 77 | #### Windows
 78 | Open `.vcxproj` find and replace:
 79 |  * `CUDA 12.2.targets` with desired version
 80 |  * `CUDA 12.2.props` with disired version
 81 | 
 82 | #### Linux
 83 | Open `dockerfile` and change `CUDA_MAJOR`, `CUDA_MINOR` and `CUDA_PATCH` variables
 84 | 
 85 | ## Run
 86 | 
 87 | ### Requirements
 88 | * NVidia GPU (1GB+ RAM)
 89 | * RAM 1GB+
 90 | * **(Linux only)** installed CUDA docker extension `nvidia-container-toolkit`
 91 | 
 92 | ### Examples
 93 | 
 94 | #### Simple bruteforce
 95 | 
 96 | ```
 97 | ./CudaKeeloq --inputs xx,yy,zz --mode=1 --start=0x9876543210 --count=1000000
 98 | ```
 99 |  - bruteforce of 1 million keys starting from `0x9876543210`
100 | 
101 | #### Alphabet bruteforce
102 | 
103 | ```
104 | ./CudaKeeloq --inputs xx,yy,zz --mode=3 --learning-type=0 --alphabet=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33
105 | ```
106 | Specified 2 alphabets - 2 attacks will be launched:
107 |  - First will use file `examples/alphabet.bin` as alphabet source.
108 |  - Second alphabet is provided via command line `10:20:30:AA:BB:CC:DD:EE:FF:02:33`
109 | 
110 | #### Pattern bruteforce
111 | 
112 | ```
113 | ./CudaKeeloq --inputs xx,yy,zz --mode=4 --pattern=FF:11:*:*:AA-FF:01|10:00:FF,*:*:*:*:AB:CD:EF:00
114 | ```
115 | Specified 2 patterns - 2 attacks will be launched:
116 |   - First will check keys started (less significant bytes) from `..00FF`, then will be either `01` or `10` bytes, then bytes range `AA, AB, AC, AD ... FF`, then 2 any bytes [`0x00`:`0xFF`], and final 2 bytes will be `11` and `FF`.
117 |   - Second has constant lower 32 bit value `ABCDEF00` and higher 32 bits will be bruted.
118 | 
119 | #### Seed bruteforce
120 | 
121 | ```
122 | ./CudaKeeloq --inputs xx,yy,zz --mode=5 --start=<man>
123 | ```
124 | This will launch seed attack with specified manufacturer key: `<man>`. If `--learning-type=` not specified - will try to check all learning types with seed - `FAAC` and `Secure` ( `Rev` version dropped intentionally since manufacturer key explicitly set ). You can specify start seed with `--seed=` but it's kind of useless. Check all 32-bit values is matter of minutes, and check will be forced to do all over the `uint32` range anyway.
125 | 
126 | #### Dictionary bruteforce
127 | 
128 | ```
129 | ./CudaKeeloq --inputs xx,yy,zz --mode=0 --word-dict=0xFDE4531BBACAD12,examples/dictionary.words --bin-dict=examples/dictionary.bin --bin-dict-mode=1
130 | ```
131 | This will launch 2 dictionary attacks:
132 |   1. Explicit key `0xFDE4531BBACAD12` from command line and parsed keys from `examples/dictionary.words`
133 |   2. Keys created from bytes of file `examples/dictionary.bin`. Since `--bin-dict-mode=1` specified, read 8-bytes from file will be reversed. So the bytes `01 00 00 00 ...` will become `uint64` number with value `1`.
134 | 
135 | Other arguments for this mode aren't used.
136 | 
137 | ## Command line arguments
138 | 
139 | ### Inputs
140 | 
141 | * `--inputs=[i1, i2, i3]` - inputs are captured "over the air" bytes, you need to provide 1-3 in format of hexadecimal number: `0x1122334455667788`.
142 |   > NOTE: It is possible to launch bruteforce over the single captured data, but there will be tons of false matches. You shouldn't use it. Even with 2 inputs chances are pretty high.
143 | 
144 | #### Bruteforce range
145 | 
146 | * `--start=<value>` - defines the initial value from which bruteforce begins. Applies to all types except dictionary ( default is `0` ).
147 | For alphabet or pattern types, should be specified value which can be converted to pattern or alphabet. e.g.
148 | if you use alphabet `77:88:FF:AA:BB` and specify `--start=0x778899FFAABBAABB` - bruteforce will start from `0x7788`**`77`**`FFAABBAABB` since `99` is not exist in alphabet it will be replace with the first byte in alphabet.
149 | If you specify `0` as start value bruteforce will start from `0x77777..`.
150 | 
151 | * `--count=<value>` - number of keys to generate and check ( default is `uint64 max` - means all ).
152 | If you using simple +1 mode it will define the last key to check. In other mode determine the last key might be not trivial task.
153 | 
154 | * `--seed=<value>` - seed value. It used only in `SECURE` and `FAAC` learning modes. Providing `seed` without a learning mode will just significantly reduce bruteforce speed.
155 | If you definitely know that captured data encrypted with `seed`'ed algorithm - specify `--learning-type=4,5,8,9` (`SECURE` and `FAAC` both with `_rev` variation), no sense in that case to calculate the others. And vice versa, if you don't know the `seed` - do not specify it, otherwise - useless calculation would be done.
156 | Not supported in `dictionary` mode.
157 | 
158 | 
159 | #### Modes
160 | 
161 | In case of `single` input - the match check will be done only by match 18-bits of `serial`.
162 | Keeloq OTA packet divided into 2 parts `fix` and `hop`.
163 |   - `fix` - 4-bit encoded `button` and 28-bit `serial` number of transmitter
164 |   - `hop` - encoded `serial`, `button` and `counter`
165 | So in single mode decoded `serial` will be matched to stripped `serial` from `fix` part.
166 | This is not accurate and gives you tons of *phantoms*.
167 | 
168 | In case of `normal` inputs (2-3) - the analysis will be slightly more complex.
169 |   - All inputs will be decode with same key
170 |   - All decoded coded `hop` parts will be compared by `serial`
171 |     - if `serial` match - then will be checked `button` - button should be the same
172 |     - if `button` match - then will be checked `counter` - is should be increasing per each input
173 | 
174 | 3 inputs is enough to eliminate *phantoms* no need to provide more (however there is still a possibility to catch one).
175 | 
176 | 2 inputs might give you less accurate results with *phantoms*.
177 | 
178 | Obviously `single` mode is 3-4x times faster than `normal` due to optimizations. However results in `single` mode might not be accurate.
179 | 
180 | #### Capture
181 | 
182 | The idea of normal flow is:
183 |  - Setup your radio capture device.
184 |  - Click same button 3 times on your transmitter (same serial, same button, increasing counter).
185 |  - Convert encoded signal to bytes.
186 |  - Provide these bytes as inputs.
187 | 
188 | ### CUDA Setup
189 | 
190 | * `--cuda-blocks=N` - `N` is a number of thread blocks for each bruteforce attack round
191 | * `--cuda-threads=N` - `N` is a number of CUDA threads for each block (above). If `0` (default) - the maximum from device caps will be set.
192 | 
193 | Overall number of threads is multiplication of `cuda-blocks` and `cuda-threads`.
194 | 
195 | Keep in mind that the more overall threads you will have - the more memory they will consume. (e.g. `8196` and `1024` consumes approx. 2.5 GB RAM (and GPU RAM))
196 | 
197 | ### Attack modes
198 | 
199 | Each bruteforce run must define a mode.
200 | Several modes can be specified at the same time like `--mode=0,1,3,4` but you should provide all expected arguments for each mode.
201 | 
202 |  * `--mode=0` - Dictionary. Expects (one or both):
203 |     - `--word-dict=[f1, w1, ...]` - `f1` - text dictionary file(s) with hexadecimal values, `w1` hexadecimal word itself. Supports seed also, should be provided with `:` as delimiter (e.g.: `0xAABBCCDDEE:1234`) and as decimal number only.
204 |     - `--bin-dict=[f1, f2]` - binary file(s) each 8 bytes of which will be used as dictionary word.
205 |     Supports `--bin-dict-mode=M` where `M` = { `0`, `1`, `2`}. `0` - as is (big endian). `1` - reverse (little endian), `2` - both.
206 |  * `--mode=1` - Simple bruteforce mode.
207 |     - `--start` defines the first key from which bruteforce begins.
208 |     - `--count` how much keys to check.
209 |  * `--mode=2` - Filtered bruteforce mode. Same as simple, but with filters:
210 |     - `--exclude-filter=V` where `V` is number representing filers flags combinations. (see: [bruteforce_filters.h](src/bruteforce/bruteforce_filters.h))
211 |     - `--include-filter=V` (same as above)
212 |  * `--mode=3` - Alphabet mode. Expects:
213 |     - `--alphabet=[f1, a1, ...]` - where `f1` is a binary file contents of which will be interpreted as allowed bytes in key during bruteforce. where `a1` is `:` separated hex string of bytes in alphabet (like: `AA:BB:01`)
214 |     - Also allowed to use `--start` and `--count` arguments, with same meaning
215 |  * `--mode=4` - Pattern mode. Expects:
216 |     - `--pattern=[f1, p1, ...]` - where `f1` is a text file with hexadecimal pattern, and `p1` is `:` separated hexadecimal pattern like `*:aa:00-33:0xF3|0x11:AL0 ...`.
217 |         - `*` - means any byte
218 |         - `0xNN-0xMM` - means range from `0xNN` to `0xMM`
219 |         - `A|B|C` - means `A` or `B` or `C`
220 |         - `AL[0..N]` - means alphabet from inputs (must be specified with `--alphabet=`)
221 |     - `--alphabet=` - if pattern has `AL` - alphabet must be specified.
222 |  * `--mode=5` - Seed bruteforce mode. Expects:
223 |     - `--start=<man>` - Manufacturer key. This will be the only key to check. Instead of bruteforcing keys, this mode bruteforce seeds.
224 | 
225 | ### Learning types
226 | 
227 | By default the app will try to brute all known learning keys (16 or 12 depending if seed is specified), however if you know exact learning type, you might increase you bruteforce time x12-16 times. You may also specify several learning types simultaneously `--learning-type=0,5,7`.
228 | > NOTE: Depending on your GPU, at some point, using `ALL` learning types *might be* faster than explicitly specified, due to the GPU branching problem.
229 | 
230 | Each learning type has its `_REV` modification. That's mean it will use byte-reversed key for decryption. See more [keeloq_learning_types.h](src/algorithm/keeloq/keeloq_learning_types.h)
231 | 
232 | Here and below `=x[y]` - `x` value for normal mode, `y` for reversed.
233 | 
234 | * `--learning-type=0[1]` - Simple learning
235 | * `--learning-type=2[3]` - Normal learning
236 | * `--learning-type=4[5]` - Secure learning (requires seed)
237 | * `--learning-type=6[7]` - XOR learning
238 | * `--learning-type=8[9]` - FAAC learning (requires seed)
239 | * `--learning-type=10[11]` - *UNKNOWN TYPE1*
240 | * `--learning-type=12[13]` - *UNKNOWN TYPE2*
241 | * `--learning-type=14[15]` - *UNKNOWN TYPE3*
242 | 
243 | ### Miscellaneous
244 | 
245 |  * `--first-match` - if `true` (default) will stop all bruteforce on first match
246 |  * `--test` - launches internal debug tests (useful mostly if built in `Debug` configuration)
247 |  * `--benchmark` - launches CUDA setup benchmark. Will show comparison of different CUDA setup (block and threads).
248 |  * `--help`, `-h` - prints help
249 | 
250 | 
251 |  ## Performance
252 | 
253 |  > Windows executable, release mode, MSVS 2022, CUDA 12.0.0
254 | 
255 |  For my laptop's GPU ( 3080Ti ) the best results with `8196` CUDA Blocks and maximum CUDA threads (from device info - `1024`) - it gives me approx.:
256 |   * `28` MKeys/s for `ALL` learning types if `seed` **is** specified.
257 |   * `49` MKeys/s for `ALL` learning types if `seed` is **not** provided.
258 |   * `500` MKeys for `Simple` ( the easiest type single keeloq decryption ).
259 |   * `250` MKeys for `Normal` and `Secure`.
260 |   * `220` MKeys for `FAAC`.
261 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | CONTAINER="${CONTAINER:-cudakeeloq}"
4 | TAG="${TAG:-local}"
5 | DOCKER_ARGS="${DOCKER_ARGS:-}"
6 | 
7 | docker run --rm -it --init --gpus=all ${DOCKER_ARGS} $CONTAINER:$TAG $@
8 | 
9 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_decryptor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | #include "device/cuda_common.h"
 8 | 
 9 | 
10 | /**
11 |  * Data struct which allows to decrypt encrypted data
12 |  * In fact just (in most cases) just 64-bit integer (8 bytes array)
13 |  */
14 | struct Decryptor
15 | {
16 |     Decryptor() = default;
17 | 
18 |     __host__ __device__ Decryptor(uint64_t k, uint32_t s) : key(k), key_seed(s), key_rev(misc::rev_bytes(key)) {}
19 | 
20 | 	__host__ __device__ inline bool operator==(const Decryptor& other)
21 | 	{
22 | 		return key == other.key && key_seed == other.key_seed;
23 | 	}
24 | 
25 | 	__host__ __device__ inline bool operator<(const Decryptor& other)
26 | 	{
27 | 		return key < other.key;
28 | 	}
29 | 
30 |     // Get manufacturer key
31 |     __host__ __device__ inline uint64_t man() const { return key; }
32 | 
33 |     // Get seed
34 |     __host__ __device__ inline uint32_t seed() const { return key_seed; }
35 | 
36 |     // Get byte-reversed manufacturer key
37 |     __host__ __device__ inline uint64_t nam() const { return key_rev; }
38 | 
39 |     // If decryptor was initialized properly
40 |     __host__ __device__ inline bool is_valid() const { return key != 0; }
41 | 
42 | protected:
43 | 
44 |     // manufacturer key
45 |     uint64_t key;
46 | 
47 |     // seed (for special learning types only)
48 |     uint32_t key_seed;
49 | 
50 |     // reversed manufacturer key
51 |     uint64_t key_rev;
52 | 
53 | };


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_encrypted.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "device/cuda_common.h"
 6 | 
 7 | 
 8 | /**
 9 |  *  struct for convenience
10 |  * Represents sent over the air data
11 |  * Since from engineering perspective normal byte (bit) order is big endian
12 |  * In order to get fixed and hopping codes OTA has to be bit-reversed
13 |  */
14 | struct EncParcel
15 | {
16 |     // Raw data transmitted over the air
17 |     uint64_t ota;
18 | 
19 |     __host__ EncParcel() : EncParcel(0) { }
20 | 
21 |     __device__ __host__ EncParcel(uint64_t data) : ota(data)
22 |     {
23 |         uint64_t key = misc::rev_bits(ota, sizeof(ota) * 8);
24 | 
25 |         fixed = (uint32_t)(key >> 32);
26 |         hopping = (uint32_t)(key);
27 |     }
28 | 
29 |     // Fixed code in parcel
30 |     __device__ __host__ inline uint32_t fix() const { return fixed; }
31 | 
32 |     // hopping code in parcel
33 |     __device__ __host__ inline uint32_t hop() const { return hopping; }
34 | 
35 |     // first 18 bits of fixed code - serial (can be used in decryption)
36 |     __device__ __host__ inline uint32_t srl() const { return fixed & 0x3FF; }
37 | 
38 |     // last 4 bits of fixed code - button (can be used in decryption)
39 |     __device__ __host__ inline uint32_t btn() const { return fixed >> 28; }
40 | 
41 | private:
42 | 
43 |     // Fixed part of the parcel ( 28-bit serial | 4-bit button )
44 |     uint32_t fixed;
45 | 
46 |     // Encrypted hopping code ( keeloq encrypted serial, button, counter )
47 |     uint32_t hopping;
48 | };


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "keeloq_kernel.h"
2 | 
3 | #include "keeloq_kernel.inl"
4 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "device/cuda_context.h"
 6 | #include "kernels/kernel_result.h"
 7 | 
 8 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 9 | #include "algorithm/keeloq/keeloq_learning_types.h"
10 | 
11 | #include <cuda_runtime_api.h>
12 | 
13 | #define NLF_LOOKUP_CONSTANT 0x3a5c742e
14 | 
15 | #ifdef NO_INNER_LOOPS
16 |     #define KEELOQ_INNER_LOOP(ctx, index, num) uint32_t index = ctx.thread_id;
17 | #else
18 |     #define KEELOQ_INNER_LOOP(ctx, index, num) CUDA_FOR_THREAD_ID(ctx, index, num)
19 | #endif // !NO_INNER_LOOPS
20 | 
21 | 
22 | 
23 | #define bit(x, n) (((x) >> (n)) & 1)
24 | #define g5(x, a, b, c, d, e) \
25 |     (bit(x, a) + bit(x, b) * 2 + bit(x, c) * 4 + bit(x, d) * 8 + bit(x, e) * 16)
26 | 
27 | // 0, 8, 19, 25, 30 == 0x42080101
28 | #define g5dec(x, g) \
29 |     auto m = (x & 0x42080101); \
30 |     g = (0b11111 & ( m | (m >> 7) | (m >> 17) | ( m >> 22) | (m >> 26)))
31 | 
32 | 
33 | __device__ __host__ inline uint32_t keeloq_common_decrypt_orig(const uint32_t data, const uint64_t key) {
34 |     uint32_t x = data, r;
35 |     for (r = 0; r < 528; r++)
36 |         x = (x << 1) ^ bit(x, 31) ^ bit(x, 15) ^ (uint32_t)bit(key, (15 - r) & 63) ^
37 |         bit(NLF_LOOKUP_CONSTANT, g5(x, 0, 8, 19, 25, 30));
38 |     return x;
39 | }
40 | 
41 | // This version like 5 times faster
42 | __device__ __host__ inline uint32_t keeloq_common_decrypt(const uint32_t data, const uint64_t key)
43 | {
44 |     uint32_t x = data, g, k, f;
45 |     int32_t r = 15;
46 | 
47 |     // outer 33 cycles
48 |     for (uint8_t outer = 0; outer < 33; ++outer)
49 |     {
50 |         // Inner 16 cycles which could be unrolled (improves performance in release - decreases in debug)
51 |         UNROLL
52 |         for (uint8_t inner = 0; inner < 16; ++inner)
53 |         {
54 |             uint32_t key_bit = r & 0b111111;
55 | 
56 |             g5dec(x, g);
57 | 
58 |             k = (uint32_t)((key >> key_bit));
59 |             f = ((x >> 31) ^ (x >> 15) ^ (NLF_LOOKUP_CONSTANT >> g) ^ k) & 1;
60 |             x = (x << 1) ^ f;
61 | 
62 |             --r;
63 |         }
64 |     }
65 | 
66 | 
67 |     return x;
68 | }
69 | 
70 | __device__ __host__ inline uint32_t keeloq_common_encrypt(const uint32_t data, const uint64_t key) {
71 |     uint32_t x = data, r;
72 |     for (r = 0; r < 528; r++)
73 |         x = (x >> 1) ^ ((bit(x, 0) ^ bit(x, 16) ^ (uint32_t)bit(key, r & 63) ^
74 |             bit(NLF_LOOKUP_CONSTANT, g5(x, 1, 9, 20, 26, 31)))
75 |             << 31);
76 |     return x;
77 | }
78 | 
79 | namespace keeloq
80 | {
81 | namespace kernels
82 | {
83 | 
84 | // launch simple keeloq calculation on GPU to check if everything working
85 | __host__ bool cuda_is_working();
86 | 
87 | // Main kernel launcher wrapper
88 | __host__ KernelResult cuda_brute(KeeloqKernelInput & mainInputs, uint16_t ThreadBlocks, uint16_t ThreadsInBlock);
89 | 
90 | }
91 | 
92 | // Get enrcypted OTA data for specific configuration with key and learning ( xor simple and normal supported )
93 | __host__ EncParcel GetOTA(uint64_t key, uint32_t seed, uint32_t serial, uint8_t button, uint16_t count, KeeloqLearningType::Type learning);
94 | 
95 | }


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel_input.cpp:
--------------------------------------------------------------------------------
 1 | #include "keeloq_kernel_input.h"
 2 | #include "common.h"
 3 | 
 4 | void KeeloqKernelInput::WriteDecryptors(const std::vector<Decryptor>& source, size_t from, size_t num)
 5 | {
 6 | 	if (decryptors != nullptr)
 7 | 	{
 8 | 		assert(config.type == BruteforceType::Dictionary);
 9 | 
10 | 		size_t copy_num = std::max<size_t>(0, std::min(num, (source.size() - from)));
11 | 		decryptors->write(&source[from], copy_num);
12 | 	}
13 | }
14 | 
15 | 
16 | void KeeloqKernelInput::NextDecryptor()
17 | {
18 | 	assert(config.type != BruteforceType::Dictionary);
19 | 	config.next_decryptor();
20 | }
21 | 
22 | void KeeloqKernelInput::Initialize(const BruteforceConfig& InConfig, const KeeloqLearningType::Mask& InLearnings)
23 | {
24 |     config = InConfig;
25 |     learnings = InLearnings;
26 |     allLearnings = learnings.is_all_enabled();
27 | }
28 | 
29 | void KeeloqKernelInput::BeforeGenerateDecryptors()
30 | {
31 |     switch (config.type)
32 |     {
33 |     case BruteforceType::Filtered:
34 |     {
35 |         config.filters.sync_key = config.start.man();
36 |         break;
37 |     }
38 |     default:
39 |         break;
40 |     }
41 | }
42 | 
43 | void KeeloqKernelInput::AfterGeneratedDecryptors()
44 | {
45 |     // last generated decryptor - is first on next batch
46 |     //  Warning: In case of non-aligned calculations "real" last decryptor may be somewhere in the middle of array
47 |     config.last = decryptors->host_last();
48 | }
49 | 
50 | size_t KeeloqKernelInput::NumInputs() const
51 | {
52 |     assert(encdata != nullptr && "Encdata unknown yet!");
53 | 
54 |     auto num = encdata ? encdata->host().num : 0;
55 | 
56 |     assert(num >= 1 && num <= 3 && "NumInputs(): Most probably something was wrong with memory copying!");
57 | 
58 |     return num;
59 | }
60 | 
61 | bool KeeloqKernelInput::InputsFixMatch() const
62 | {
63 |     assert(encdata != nullptr && "Encdata unknown yet!");
64 | 
65 |     if (encdata)
66 |     {
67 |         std::vector<EncParcel> enc_data;
68 |         encdata->copy(enc_data);
69 | 
70 |         assert(enc_data.size() >= 1 && enc_data.size() <= 3 && "InputsFixMatch(): Most probably something was wrong with memory copying!");
71 | 
72 |         if (enc_data.size() > 2)
73 |         {
74 |             return enc_data[0].fix() == enc_data[1].fix() && enc_data[1].fix() == enc_data[2].fix();
75 |         }
76 |         else if (enc_data.size() > 1)
77 |         {
78 |             return enc_data[0].fix() == enc_data[1].fix();
79 |         }
80 |     }
81 | 
82 |     return false;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_kernel_input.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <cstring> // memcpy
 6 | 
 7 | #include "device/cuda_array.h"
 8 | #include "device/cuda_object.h"
 9 | 
10 | #include "algorithm/keeloq/keeloq_encrypted.h"
11 | #include "algorithm/keeloq/keeloq_decryptor.h"
12 | #include "algorithm/keeloq/keeloq_single_result.h"
13 | #include "algorithm/keeloq/keeloq_learning_types.h"
14 | 
15 | #include "bruteforce/bruteforce_config.h"
16 | 
17 | 
18 | // Input data for main keeloq calculation kernel
19 | struct KeeloqKernelInput : TGenericGpuObject<KeeloqKernelInput>
20 | {
21 |     // Constant per-run input data (captured encoded)
22 |     CudaArray<EncParcel>* encdata = nullptr;
23 | 
24 |     // Single-run set of decryptors
25 |     CudaArray<Decryptor>* decryptors = nullptr;
26 | 
27 |     // Single-run results
28 |     CudaArray<SingleResult>* results = nullptr;
29 | 
30 |     KeeloqKernelInput() : TGenericGpuObject<KeeloqKernelInput>(this)
31 |     {
32 |     }
33 | 
34 |     KeeloqKernelInput(KeeloqKernelInput&& other) noexcept : TGenericGpuObject<KeeloqKernelInput>(this)
35 |     {
36 |         encdata = other.encdata;
37 |         decryptors = other.decryptors;
38 |         results = other.results;
39 |         config = other.config;
40 |         learnings = other.learnings;
41 |     }
42 | 
43 |     KeeloqKernelInput& operator=(KeeloqKernelInput&& other) = delete;
44 |     KeeloqKernelInput& operator=(const KeeloqKernelInput& other) = delete;
45 | 
46 | public:
47 |     //
48 |     __device__ __inline__ const KeeloqLearningType::Mask& GetLearningMask() const { return learnings; }
49 | 
50 |     //
51 |     __device__ __inline__ bool AllLearningsEnabled() const { return allLearnings; }
52 | 
53 |     //
54 |     __device__ __inline__ const BruteforceConfig& GetConfig() const { return config; }
55 | 
56 | public:
57 |     void WriteDecryptors(const std::vector<Decryptor>& source, size_t from, size_t num);
58 | 
59 |     void NextDecryptor();
60 | 
61 |     void Initialize(const BruteforceConfig& inConfig, const KeeloqLearningType::Mask& inLearnings);
62 | 
63 |     // A "callback" which is called by generator. Used to prepare inputs for generators
64 |     void BeforeGenerateDecryptors();
65 | 
66 |     // A "callback" which is called after generator creates Decryptors. Used to set correct last generated Decryptor
67 |     void AfterGeneratedDecryptors();
68 | 
69 |     // Get Number of OTA inputs. Will do a GPU->CPU copy
70 |     size_t NumInputs() const;
71 | 
72 |     // Does the fixed parts of inputs match? Will do a GPU->CPU copy
73 |     bool InputsFixMatch() const;
74 | private:
75 |     // Which type of learning use for decryption
76 |     KeeloqLearningType::Mask learnings;
77 | 
78 |     // optimizations. Just a bool field that could be accessed from GPU
79 |     bool allLearnings = false;
80 | 
81 |     // from this decryptor generation will start
82 |     BruteforceConfig config;
83 | };
84 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_learning_types.cpp:
--------------------------------------------------------------------------------
 1 | #include "keeloq_learning_types.h"
 2 | 
 3 | #include <cstring>
 4 | 
 5 | 
 6 | const char* KeeloqLearningType::LearningNames[] = {
 7 | 	"KEELOQ_LEARNING_SIMPLE",
 8 | 	"KEELOQ_LEARNING_SIMPLE_REV",
 9 | 	"KEELOQ_LEARNING_NORMAL",
10 | 	"KEELOQ_LEARNING_NORMAL_REV",
11 | 	"KEELOQ_LEARNING_SECURE",
12 | 	"KEELOQ_LEARNING_SECURE_REV",
13 | 	"KEELOQ_LEARNING_MAGIC_XOR_TYPE_1",
14 | 	"KEELOQ_LEARNING_MAGIC_XOR_TYPE_1_REV",
15 | 	"KEELOQ_LEARNING_FAAC",
16 | 	"KEELOQ_LEARNING_FAAC_REV",
17 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1",
18 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_1_REV",
19 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2",
20 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_2_REV",
21 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3",
22 | 	"KEELOQ_LEARNING_MAGIC_SERIAL_TYPE_3_REV",
23 | 	"ALL"
24 | };
25 | 
26 | const size_t KeeloqLearningType::LearningNamesCount = sizeof(LearningNames) / sizeof(char*);
27 | 
28 | 
29 | std::string KeeloqLearningType::to_string(const std::vector<Type>& learning_types)
30 | {
31 | 	if (learning_types.size() == 0)
32 | 	{
33 | 		return LearningNames[KeeloqLearningType::LAST];
34 | 	}
35 | 
36 | 	return to_mask(learning_types).to_string();
37 | }
38 | 
39 | std::string KeeloqLearningType::Mask::to_string() const
40 | {
41 |     if (is_all_enabled())
42 |     {
43 |         return LearningNames[KeeloqLearningType::LAST];
44 |     }
45 | 
46 |     std::string result;
47 |     for (auto type = 0; type < KeeloqLearningType::LAST; ++type)
48 |     {
49 |         if (values[type])
50 |         {
51 |             if (result.size() > 0)
52 |             {
53 |                 result += ", ";
54 |             }
55 |             result += KeeloqLearningType::Name(type);
56 |         }
57 |     }
58 | 
59 |     return result;
60 | }
61 | 
62 | KeeloqLearningType::Mask KeeloqLearningType::to_mask(const std::vector<Type>& in_types)
63 | {
64 |     KeeloqLearningType::Mask result;
65 | 
66 |     if (in_types.size() > 0)
67 |     {
68 |         for (auto type : in_types)
69 |         {
70 |             result.values[type] = true;
71 |         }
72 |     }
73 |     else
74 |     {
75 |         memcpy(result.values, KeeloqLearningType::Mask::All, sizeof(KeeloqLearningType::Mask::All));
76 |     }
77 | 
78 |     return result;
79 | }
80 | 
81 | bool KeeloqLearningType::Mask::is_all_enabled() const
82 | {
83 |     return std::memcmp(values, All, sizeof(All)) == 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_learning_types.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <string>
  7 | #include <type_traits>
  8 | #include <cuda_runtime_api.h>
  9 | 
 10 | 
 11 | enum class LearningDectyptionMode
 12 | {
 13 |     // Explicit defined learning types
 14 |     Invalid  = 0,
 15 | 
 16 |     Explicit = 1 << 0,
 17 | 
 18 |     Force = 1 << 1,
 19 | 
 20 |     Normal = 1 << 2,
 21 | 
 22 |     Seeded = 1 << 3,
 23 | 
 24 |     // Disable Reverse manufacturer key calculations
 25 |     NoRev = 1 << 4,
 26 | 
 27 |     // Run only learning types without seed
 28 |     ForceNormal = Force | Normal,
 29 | 
 30 |     // Run only learning types with seed
 31 |     ForceSeeded = Force | Seeded,
 32 | 
 33 |     // Explicit defined but without seed
 34 |     ExplicitNormal = Explicit | Normal,
 35 | 
 36 |     // Explicit defined but with seed only
 37 |     ExplicitSeeded = Explicit | Seeded,
 38 | 
 39 |     // RUNS ALL LEARNING TYPES. Seeded Included, even if seed is 0
 40 |     ForceAll = ForceNormal | ForceSeeded,
 41 | 
 42 |     // Runs runtime checks if learning type need to be calculated (specified via mask)
 43 |     ExplicitAll = ExplicitNormal | ExplicitSeeded
 44 | };
 45 | 
 46 | /**
 47 |  * reference: https://github.com/DarkFlippers/unleashed-firmware/blob/dev/lib/subghz/protocols/keeloq_common.h
 48 |  */
 49 | struct KeeloqLearningType
 50 | {
 51 |     using Type = uint8_t;
 52 | 
 53 |     enum : Type
 54 |     {
 55 |         Simple = 0,
 56 |         Simple_Rev,
 57 | 
 58 |         Normal,
 59 |         Normal_Rev,
 60 | 
 61 |         Secure,
 62 |         Secure_Rev,
 63 | 
 64 |         Xor,
 65 |         Xor_Rev,
 66 | 
 67 |         Faac,
 68 |         Faac_Rev,
 69 | 
 70 |         Serial1,
 71 |         Serial1_Rev,
 72 | 
 73 |         Serial2,
 74 |         Serial2_Rev,
 75 | 
 76 |         Serial3,
 77 |         Serial3_Rev,
 78 | 
 79 |         LAST,
 80 | 
 81 |         INVALID = 0xff,
 82 |     };
 83 | 
 84 |     struct Mask
 85 |     {
 86 |         friend struct KeeloqLearningType;
 87 | 
 88 |         // Default mask when all learning types are enabled
 89 |         static constexpr Type All[LAST] = { true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true };
 90 | 
 91 |         __host__ __device__ __inline__ bool operator[](uint8_t index) const { return values[index]; }
 92 | 
 93 |         void set(uint8_t index, bool is_enabled) { values[index] = is_enabled; }
 94 | 
 95 |         bool is_all_enabled() const;
 96 | 
 97 |         std::string to_string() const;
 98 | 
 99 |         private:
100 |             uint8_t values[LAST] = { 0 };
101 |     };
102 | 
103 | public:
104 | 
105 |     static std::string to_string(const std::vector<Type>& learning_types);
106 | 
107 |     static Mask to_mask(const std::vector<Type>& in_types);
108 |     static Mask full_mask() { return to_mask({}); }
109 | 
110 |     static constexpr const char* ValueString(Type type)
111 |     {
112 | 
113 |         constexpr const char* LUT[]{
114 |             "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
115 |             "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
116 |             "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
117 |             "30", "31", "32"
118 |         };
119 | 
120 |         return LUT[type];
121 |     }
122 | 
123 |     static constexpr const char* Name(Type type)
124 |     {
125 |         if (type >= LearningNamesCount) {
126 |             return "INVALID";
127 |         }
128 | 
129 |         return LearningNames[type];
130 |     }
131 | 
132 | private:
133 | 
134 |     static const char* LearningNames[];
135 | 
136 |     static const size_t LearningNamesCount;
137 | };
138 | 


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_single_result.cpp:
--------------------------------------------------------------------------------
 1 | #include "keeloq_single_result.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | #include "device/cuda_common.h"
 6 | 
 7 | 
 8 | namespace
 9 | {
10 |     uint32_t SerialFromOTA(uint64_t ota)
11 |     {
12 |         return misc::rev_bits(ota, sizeof(ota) * 8) >> 32 & 0x0FFFFFFF;
13 |     }
14 | }
15 | 
16 | void SingleResult::DecryptedArray::print(uint8_t element, uint64_t ota, bool ismatch) const
17 | {
18 |     printf("[%-40s] Btn:0x%X\tSerial:0x%X (0x%" PRIX32 ")\tCounter:0x%X\t%s\n", KeeloqLearningType::Name(element),
19 |         (data[element] >> 28),              // Button
20 |         (data[element] >> 16) & 0x3ff,      // Serial
21 |         SerialFromOTA(ota),                 // Serial (OTA)
22 |         data[element] & 0xFFFF,             // Counter
23 |         (ismatch ? "(MATCH)" : ""));
24 | }
25 | 
26 | void SingleResult::DecryptedArray::print() const
27 | {
28 |     for (uint8_t i = 0; i < ResultsCount; ++i)
29 |     {
30 |         print(i, -1, false);
31 |     }
32 | }
33 | 
34 | void SingleResult::print(bool onlymatch /* = true */) const
35 | {
36 |     printf("Results (Input: 0x%" PRIX64 " - Man key: 0x%" PRIX64 " - Seed: %u )\n\n",
37 |         encrypted.ota, decryptor.man(), decryptor.seed());
38 | 
39 |     for (uint8_t i = 0; i < ResultsCount; ++i)
40 |     {
41 |         bool isMatch = match == i;
42 |         if (!onlymatch)
43 |         {
44 |             decrypted.print(i, encrypted.ota, isMatch);
45 |         }
46 |         else if (isMatch)
47 |         {
48 |             decrypted.print(i, encrypted.ota, isMatch);
49 |         }
50 |     }
51 |     printf("\n");
52 | }


--------------------------------------------------------------------------------
/src/algorithm/keeloq/keeloq_single_result.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <stdint.h>
 6 | 
 7 | #include "algorithm/keeloq/keeloq_learning_types.h"
 8 | #include "algorithm/keeloq/keeloq_encrypted.h"
 9 | #include "algorithm/keeloq/keeloq_decryptor.h"
10 | 
11 | 
12 | /**
13 |  *  For each testing manufacturer key we retrieve this results
14 |  * Depending on selected learning type you may have from 1 to 16 (now it the last)
15 |  * decrypted results for further analysis
16 |  */
17 | struct SingleResult
18 | {
19 | 	static constexpr uint8_t ResultsCount = KeeloqLearningType::LAST;
20 | 
21 | 	struct DecryptedArray
22 | 	{
23 | 		// fixed side array for every learning type
24 |         // If is in global memory (common case) - use operator[] - though cache
25 |         // If is thread local - use direct access
26 | 		uint32_t data[ResultsCount];
27 | 
28 |         __host__ __device__ inline uint32_t operator[](uint32_t index) const
29 |         {
30 |             assert(index < ResultsCount && "Invalid index of decrypted data. Bigger than last element");
31 | #if __CUDA_ARCH__
32 |             return __ldca(&data[index]);
33 | #else
34 |             return data[index];
35 | #endif
36 |         }
37 | 
38 |         __host__ __device__ inline uint32_t srl(KeeloqLearningType::Type learning) const
39 |         {
40 |             return ((*this)[learning] >> 16) & 0x3ff;
41 |         }
42 | 
43 |         __host__ __device__ inline uint32_t btn(KeeloqLearningType::Type learning) const
44 |         {
45 |             return ((*this)[learning] >> 28);
46 |         }
47 | 
48 |         __host__ __device__ inline uint32_t cnt(KeeloqLearningType::Type learning) const
49 |         {
50 |             return ((*this)[learning]) & 0x0000FFFF;
51 |         }
52 | 
53 |         void print(uint8_t element, uint64_t ota, bool ismatch) const;
54 | 
55 |         void print() const;
56 |     };
57 | 
58 | 
59 |     // Input encrypted data
60 |     EncParcel encrypted;
61 | 
62 |     // used manufacturer key and seed for this result
63 |     Decryptor decryptor;
64 | 
65 | 	// Decrypted values for each known learning type
66 | 	DecryptedArray decrypted;
67 | 
68 | 	// Set by GPU after analysis if there was a match
69 | 	KeeloqLearningType::Type match;
70 | 
71 | 	void print(bool onlymatch = true) const;
72 | };


--------------------------------------------------------------------------------
/src/algorithm/multibase_digit.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <string>
  7 | 
  8 | 
  9 | /**
 10 |  *  This struct represents single digit in multi-base system (each digit has own base)
 11 |  * Imagine this like cylinder with N elements on it
 12 |  *
 13 |  * This struct represents all possible variant for a number (byte_ in multi-base system (attack pattern) setup
 14 |  */
 15 | struct MultibaseDigit
 16 | {
 17 | 	template<uint8_t TNum> friend struct MultibaseSystem;
 18 | 
 19 | 	// Creates digit config
 20 | 	__host__ inline MultibaseDigit(const std::vector<uint8_t>& numerals);
 21 | 
 22 | public:
 23 | 	// Return numeral by index
 24 | 	__host__ __device__ inline uint8_t numeral(uint8_t in_index) const;
 25 | 
 26 | 	// Cast from byte-255 value to Digit's config
 27 | 	__host__ __device__ inline uint8_t cast(uint8_t value) const { return numeral(lookup(value)); }
 28 | 
 29 | 	// return numeral index of the value
 30 | 	// e.g.
 31 | 	//  num = { 0x04, 0xAB, 0xd7, 0x56 }
 32 | 	//  lookup(0xAB) returns 1
 33 | 	//  lookup(0x56) returns 3
 34 | 	//  lookup(0xFF) returns 0 (not found returns default)
 35 | 	__host__ __device__ inline uint8_t lookup(uint8_t value) const { return lut[value]; }
 36 | 
 37 | 	// return count of possible numerals for that digit
 38 | 	__host__ __device__ inline uint8_t count() const { return size; }
 39 | 
 40 | 	__host__ inline std::vector<uint8_t> as_vector() const { return std::vector<uint8_t>(&num[0], &num[0] + size); }
 41 | 
 42 | 	__host__ inline std::string to_string() const;
 43 | 
 44 | private:
 45 | 	MultibaseDigit() : MultibaseDigit(DefaultByteArray<>::as_vector<std::vector<uint8_t>>())
 46 | 	{
 47 | 	}
 48 | 
 49 | 	// numeral values. it may be not just 0,1,2,3,4...
 50 | 	// but for base 4 it may be: 0xA3, 0xCC, 0x01, 0x22
 51 | 	uint8_t num[256] = { 0 };
 52 | 
 53 | 	// lookup table:
 54 | 	//  at index that equals numeral value there is a value which represents index in numerals
 55 | 	//  e.g.                              https://asciiflow.com/
 56 | 	//                                  ┌───────────────────────┐
 57 | 	//                                  ▲                       │
 58 | 	// numerals = [ 0x03, 0x02, 0x01, 0x00, ... garbage. ]      │
 59 | 	//                                  ▲                       │
 60 | 	//               ┌──────────────────┘                       │
 61 | 	//               ▲                                          │
 62 | 	//	    lut = [ 0x03, 0x02, 0x01, 0x00, 0x00 ... 0x00]      │
 63 | 	//               ▲                                          │
 64 | 	//               └──────────────────────────────────────────┘
 65 | 	//
 66 | 	uint8_t lut[256] = { 0 };
 67 | 
 68 | 	// Actual size of numerals (the base if number representing by this digit)
 69 | 	uint8_t size = 0;
 70 | };
 71 | 
 72 | __host__ __device__ uint8_t MultibaseDigit::numeral(uint8_t in_index) const
 73 | {
 74 | 	assert(in_index < size);
 75 | 
 76 | 	// Important for optimization purposes
 77 | 	// WE ARE NOT USING (value % size)
 78 | 	return num[in_index];
 79 | }
 80 | 
 81 | __host__ inline MultibaseDigit::MultibaseDigit(const std::vector<uint8_t>& numerals)
 82 | {
 83 | 	// incrementing in the loop, for the duplicate numerals cases
 84 | 	size = 0;
 85 | 
 86 | 	for (uint8_t i = 0; i < numerals.size(); ++i)
 87 | 	{
 88 | 		// Getting next numeral candidate
 89 | 		uint8_t numeral_value = numerals[i];
 90 | 
 91 | 		// if there is 0 value (index) in the lookup table
 92 | 		// that means `numeral_value` wasn't added to available values yet
 93 | 		if (!lut[numeral_value])
 94 | 		{
 95 | 			// putting index of `numeric_value` to the lut
 96 | 			lut[numeral_value] = size;
 97 | 
 98 | 			// setting the size-th numeral
 99 | 			num[size] = numeral_value;
100 | 
101 | 			// increasing the size
102 | 			++size;
103 | 		}
104 | 	}
105 | 
106 | 	assert(size > 0 && "Digit base should be at least 0");
107 | }
108 | 
109 | __host__ inline std::string MultibaseDigit::to_string() const
110 | {
111 |     std::string hex;
112 | 
113 | 	for (int i = 0; i < count(); ++i)
114 | 	{
115 |         std::string fmt(i == 0 ? "%X" : ":%X");
116 |         hex += str::format<std::string>(fmt, numeral(i));
117 | 	}
118 | 
119 | 	return hex;
120 | }
121 | 


--------------------------------------------------------------------------------
/src/algorithm/multibase_number.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <vector>
 6 | #include <cuda_runtime_api.h>
 7 | 
 8 | 
 9 | //
10 | union U64Number
11 | {
12 | 	uint64_t u64;
13 | 
14 | 	uint8_t u8[8];
15 | };
16 | 
17 | 
18 | //
19 | struct MultibaseNumber
20 | {
21 | 	template<uint8_t TNum> friend struct MultibaseSystem;
22 | 
23 | 	//
24 | 	__host__ __device__ uint64_t number() const { return value.u64; }
25 | 
26 | private:
27 | 
28 | 	U64Number value   = {0};
29 | 
30 | 	U64Number indices = {0};
31 | };
32 | 


--------------------------------------------------------------------------------
/src/algorithm/multibase_system.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <cuda_runtime_api.h>
  7 | 
  8 | #include "algorithm/multibase_digit.h"
  9 | #include "algorithm/multibase_number.h"
 10 | 
 11 | 
 12 | 
 13 | /**
 14 | *  This represents a number system where each digit has it's one base
 15 | * Imagine this as set of rolling cylinders, installed side-by-side.
 16 | * This is stateless structure. It allows to do arithmetical calculations
 17 | * and conversions.
 18 | *
 19 | * Since application of this struct is only byte bruteforce - the maximum supported base is 255
 20 | *
 21 | * e.g. NumDigits == 4
 22 | *  So there are 4 cylinders
 23 | *  For example 1st cylinder has base 2, second - 4, third - 6, fourth - 8
 24 | *  So the number 1 will be equal 0001
 25 | *  number 10 is equals 0012    (1 * 8) + 2 = | 0 | 0 | 1 | 2 | )
 26 | *  number 100 is       0204   (12 * 8) + 4 = | 0 | 2 | 0 | 4 | )
 27 | *  number 500 is       2224   (62 * 8) + 4 = | 2 | 2 | 2 | 4 | ) = ((10 * 6 + 2) * 8) + 4 = ((((2 * 4 + 2)) * 6 + 2) * 8) + 4
 28 | *
 29 | *  Since this structure is pretty heavy
 30 | * The idea is NOT allow non-const methods on device
 31 | * You should have a single const reference on device.
 32 | */
 33 | template<uint8_t NumDigits = 8>
 34 | struct MultibaseSystem
 35 | {
 36 | 	static_assert(NumDigits <= 8, "At the moment we only support 8 bytes numbers");
 37 | 
 38 | 	// For easier usage with aliased types
 39 | 	constexpr static uint8_t DigitsNumber = NumDigits;
 40 | 
 41 | 	/**
 42 | 	 *  A generic case when all digits has different bases (pattern usage)
 43 | 	 */
 44 | 	__host__ MultibaseSystem(const std::vector<std::vector<uint8_t>>& numerals);
 45 | 
 46 | 	/**
 47 | 	 *  Use the same ByteDigit for every digit in this value (alphabet usage)
 48 | 	 */
 49 | 	__host__ MultibaseSystem(const MultibaseDigit& digit);
 50 | 
 51 | 	/**
 52 | 	 *  Default constructor where all Digits are Default - full range 0-255
 53 | 	 */
 54 | 	__host__ MultibaseSystem() : MultibaseSystem(MultibaseDigit()) { }
 55 | 
 56 | 	// It's pretty heavy struct if you want clone it - constructor above
 57 | 	// TODO: disable copy
 58 | 	// MultibaseSystem(const MultibaseSystem& other) = delete;
 59 | 	// MultibaseSystem& operator=(const MultibaseSystem& other) = delete;
 60 | 
 61 | public:
 62 | 
 63 | 	// count of all numbers in this system
 64 | 	__host__ __device__ inline size_t invariants() const;
 65 | 
 66 | 	// cast base10 number into number of this system
 67 | 	__host__ __device__ inline MultibaseNumber cast(uint64_t input) const;
 68 | 
 69 | 	// Adds @amount in base10 to the @target argument and returns it
 70 | 	__host__ __device__ inline MultibaseNumber& increment(MultibaseNumber& target, uint64_t amount) const;
 71 | 
 72 | 	// get digit config by its index
 73 | 	__host__ __device__ inline const MultibaseDigit& get_config(uint8_t digit_index) const { assert(digit_index < NumDigits); return digits[digit_index]; }
 74 | 
 75 | protected:
 76 | 
 77 | 	// the digits
 78 | 	MultibaseDigit digits[NumDigits];
 79 | };
 80 | 
 81 | //
 82 | using Multibase8DigitsSystem = MultibaseSystem<8>;
 83 | 
 84 | 
 85 | template<uint8_t NumDigits /*= 8*/>
 86 | __host__ __device__ size_t MultibaseSystem<NumDigits>::invariants() const
 87 | {
 88 | 	size_t num = digits[0].size;
 89 | 
 90 | 	UNROLL
 91 | 	for (uint8_t i = 1; i < NumDigits; ++i)
 92 | 	{
 93 | 		num *= digits[i].size;
 94 | 	}
 95 | 
 96 | 	return num;
 97 | }
 98 | 
 99 | 
100 | template<uint8_t NumDigits /*= 8*/>
101 | __host__ __device__ MultibaseNumber MultibaseSystem<NumDigits>::cast(uint64_t input) const
102 | {
103 | 	MultibaseNumber number;
104 | 	U64Number u64Input = { input };
105 | 
106 | 	UNROLL
107 | 	for (uint8_t i = 0; i < NumDigits; ++i)
108 | 	{
109 | 		number.indices.u8[i] = digits[i].lookup(u64Input.u8[i]);
110 | 
111 | 		number.value.u8[i] = digits[i].numeral(number.indices.u8[i]);
112 | 	}
113 | 
114 | 	return number;
115 | }
116 | 
117 | template<uint8_t NumDigits /*= 8*/>
118 | __host__ __device__ inline MultibaseNumber& MultibaseSystem<NumDigits>::increment(MultibaseNumber& target, uint64_t amount) const
119 | {
120 | 	UNROLL
121 | 	for (uint8_t i = 0; i < NumDigits; ++i)
122 | 	{
123 | 		uint8_t index = target.indices.u8[i];
124 | 		uint8_t size = digits[i].size;
125 | 
126 | 		target.indices.u8[i] = static_cast<uint8_t>((amount + index) % size);
127 | 		amount = (amount + index) / size;
128 | 
129 | 		target.value.u8[i] = digits[i].numeral(target.indices.u8[i]);
130 | 	}
131 | 
132 | 	// here base10value will contain overflow
133 | 	// not sure what to do with it
134 | 
135 | 	return target;
136 | }
137 | 
138 | 
139 | template<uint8_t NumDigits /*= 8*/>
140 | __host__ MultibaseSystem<NumDigits>::MultibaseSystem(const std::vector<std::vector<uint8_t>>& numerals)
141 | {
142 | 	for (uint8_t i = 0; i < NumDigits; ++i)
143 | 	{
144 | 		if (i < numerals.size())
145 | 		{
146 | 			digits[i] = MultibaseDigit(numerals[i]);
147 | 		}
148 | 		else
149 | 		{
150 | 			digits[i] = MultibaseDigit();
151 | 		}
152 | 	}
153 | }
154 | 
155 | template<uint8_t NumDigits /*= 8*/>
156 | __host__ MultibaseSystem<NumDigits>::MultibaseSystem(const MultibaseDigit& digit)
157 | {
158 | 	for (uint8_t i = 0; i < NumDigits; ++i)
159 | 	{
160 | 		digits[i] = digit;
161 | 	}
162 | }
163 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_config.cpp:
--------------------------------------------------------------------------------
  1 | #include "bruteforce_config.h"
  2 | #include "bruteforce_type.h"
  3 | #include "bruteforce_filters.h"
  4 | 
  5 | #include "algorithm/keeloq/keeloq_decryptor.h"
  6 | 
  7 | 
  8 | BruteforceConfig BruteforceConfig::GetDictionary(std::vector<Decryptor>&& dictionary)
  9 | {
 10 |     BruteforceConfig result(Decryptor(0,0), BruteforceType::Dictionary, dictionary.size());
 11 |     result.decryptors = std::move(dictionary);
 12 |     return result;
 13 | };
 14 | 
 15 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size)
 16 | {
 17 |     return BruteforceConfig(first, BruteforceType::Simple, size);
 18 | }
 19 | 
 20 | BruteforceConfig BruteforceConfig::GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters)
 21 | {
 22 |     BruteforceConfig result(first, BruteforceType::Filtered, size);
 23 |     result.filters = filters;
 24 |     return result;
 25 | }
 26 | 
 27 | BruteforceConfig BruteforceConfig::GetSeedBruteforce(Decryptor first)
 28 | {
 29 |     return BruteforceConfig(first, BruteforceType::Seed, (uint32_t)-1);
 30 | }
 31 | 
 32 | BruteforceConfig BruteforceConfig::GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num)
 33 | {
 34 |     auto result = GetPattern(first, BruteforcePattern(alphabet), num);
 35 |     result.type = BruteforceType::Alphabet;
 36 |     return result;
 37 | }
 38 | 
 39 | BruteforceConfig BruteforceConfig::GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num)
 40 | {
 41 |     num = std::min(pattern.size() - 1, num);
 42 | 
 43 |     first = Decryptor(pattern.init(first.man()).number(), first.seed());
 44 | 
 45 |     BruteforceConfig result(first, BruteforceType::Pattern, num);
 46 |     result.pattern = pattern;
 47 |     return result;
 48 | }
 49 | 
 50 | uint64_t BruteforceConfig::dict_size() const
 51 | {
 52 |     if (type == BruteforceType::Dictionary)
 53 |     {
 54 |         return size;
 55 |     }
 56 |     return 0;
 57 | }
 58 | 
 59 | uint64_t BruteforceConfig::brute_size() const
 60 | {
 61 |     if (type != BruteforceType::Dictionary)
 62 |     {
 63 |         return size;
 64 |     }
 65 |     return 0;
 66 | }
 67 | 
 68 | void BruteforceConfig::next_decryptor()
 69 | {
 70 |     if (type != BruteforceType::Dictionary)
 71 |     {
 72 |         start = last;
 73 | 
 74 |         if (type == BruteforceType::Alphabet || type == BruteforceType::Pattern)
 75 |         {
 76 |             // +1 for these attacks cause next here is the last *checked*
 77 |             auto startnum = pattern.init(start.man());
 78 |             start = Decryptor(pattern.next(startnum, 1).number(), start.seed());
 79 |         }
 80 |         else if (type == BruteforceType::Simple || type == BruteforceType::Filtered)
 81 |         {
 82 |             start = Decryptor(start.man() + 1, start.seed());
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | std::string BruteforceConfig::toString() const
 88 | {
 89 |     const char* pGeneratorName = BruteforceType::Name(type);
 90 |     switch (type)
 91 |     {
 92 |     case BruteforceType::Simple:
 93 |     {
 94 |         return str::format<std::string>("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX",
 95 |             pGeneratorName, start.man(), start.seed(), start.man() + brute_size());
 96 |     }
 97 |     case BruteforceType::Filtered:
 98 |     {
 99 |         return str::format<std::string>("Type: %s. Initial: 0x%llX (seed:%u). Brute count: %zd.\n\tFilters: %s",
100 |             pGeneratorName, start.man(), start.seed(), brute_size(), filters.toString().c_str());
101 |     }
102 |     case BruteforceType::Alphabet:
103 |     case BruteforceType::Pattern:
104 |     {
105 |         MultibaseNumber begin = pattern.init(start.man());
106 |         MultibaseNumber end = pattern.next(begin, brute_size());
107 | 
108 |         auto result =  str::format<std::string>("Type: %s. First: 0x%llX (seed:%u). Last: 0x%llX. (Count: %zd)  All invariants: %zd",
109 |             pGeneratorName, begin.number(), start.seed(), end.number(), brute_size(), pattern.size());
110 | 
111 |         if (type == BruteforceType::Alphabet)
112 |         {
113 |             result += str::format<std::string>("\n\tAlphabet: %s", pattern.bytes_variants(0).to_string().c_str());
114 |         }
115 |         else
116 |         {
117 |             std::string pattern_string = pattern.to_string(true);
118 |             result += str::format<std::string>("\nPattern: %s", pattern_string.c_str());
119 |         }
120 |         return result;
121 |     }
122 |     case BruteforceType::Dictionary:
123 |     {
124 |         return str::format<std::string>("Type: %s. Words num: %zd", pGeneratorName, dict_size());
125 |     }
126 |     case BruteforceType::Seed:
127 |     {
128 |         return str::format<std::string>("Type: %s. Manufacturer key: 0x%llX Start Seed:%u",
129 |             pGeneratorName, start.man(), start.seed());
130 |     }
131 |     }
132 |     return str::format<std::string>("UNSUPPORTED Type (%d): %s", (int)type, pGeneratorName);
133 | }
134 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_config.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <vector>
 6 | #include <string>
 7 | 
 8 | #include <cuda_runtime_api.h>
 9 | 
10 | #include "algorithm/keeloq/keeloq_decryptor.h"
11 | 
12 | #include "bruteforce/bruteforce_pattern.h"
13 | #include "bruteforce/bruteforce_filters.h"
14 | #include "bruteforce/bruteforce_type.h"
15 | #include "bruteforce/bruteforce_config.h"
16 | 
17 | 
18 | /**
19 |  *  Single run attack configuration
20 |  * Run - selected type with specific parameters
21 |  */
22 | struct BruteforceConfig
23 | {
24 |     // HOST SET. ONCE. Which generator to use.
25 |     BruteforceType::Type type;
26 | 
27 |     // HOST SET. UPDATING. PER BATCH. Decryption batch (or decryptors generation) will start from this
28 |     Decryptor start;
29 | 
30 |     // HOST SET. ONCE. How many generator rounds should be taken (in fact how many times CUDA kernel will be called)
31 |     size_t size;
32 | 
33 |     // Dictionary - HOST SET. ONCE.
34 |     // Brute -      GPU SET. UPDATING.
35 |     std::vector<Decryptor> decryptors;
36 | 
37 |     // HOST SET. ONCE. for filtered type.
38 |     BruteforceFilters filters;
39 | 
40 |     // HOST SET. ONCE. for pattern or alphabet type. (alphabet is just special case of pattern)
41 |     BruteforcePattern pattern;
42 | 
43 |     // GPU SET. UPDATING. Last generated decryptor (will be initial for next block run)
44 |     Decryptor last;
45 | 
46 | public:
47 | 
48 |     BruteforceConfig() : BruteforceConfig(Decryptor(0, 0), BruteforceType::LAST, 0)
49 |     {
50 |     }
51 | 
52 | public:
53 | 
54 |     static BruteforceConfig GetDictionary(std::vector<Decryptor>&& dictionary);
55 | 
56 |     static BruteforceConfig GetBruteforce(Decryptor first, size_t size);
57 | 
58 |     static BruteforceConfig GetBruteforce(Decryptor first, size_t size, const BruteforceFilters& filters);
59 | 
60 |     static BruteforceConfig GetSeedBruteforce(Decryptor first);
61 | 
62 |     static BruteforceConfig GetAlphabet(Decryptor first, const MultibaseDigit& alphabet, size_t num = (size_t)-1);
63 | 
64 |     static BruteforceConfig GetPattern(Decryptor first, const BruteforcePattern& pattern, size_t num = (size_t)-1);
65 | 
66 | public:
67 | 
68 |     uint64_t dict_size() const;
69 | 
70 |     uint64_t brute_size() const;
71 | 
72 |     std::string toString() const;
73 | 
74 |     void next_decryptor();
75 | 
76 | private:
77 |     BruteforceConfig(Decryptor start, BruteforceType::Type t, size_t num) :
78 |         type(t), start(start), size(num), decryptors(), filters(), pattern(), last(start)
79 |     {
80 |     }
81 | };
82 | 
83 | inline std::vector<uint8_t> operator "" _b(const char* ascii, size_t num)
84 | {
85 |     return std::vector<uint8_t>(ascii, ascii + num);
86 | }


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_filters.cpp:
--------------------------------------------------------------------------------
 1 | #include "bruteforce_filters.h"
 2 | #include "common.h"
 3 | 
 4 | #include <cuda_runtime.h>
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | 
 8 | const std::vector<std::tuple<BruteforceFilters::Flags::Type, const char*>> BruteforceFilters::FilterNames =
 9 | {
10 | 	{ BruteforceFilters::Flags::None,               "None" },
11 | 	{ BruteforceFilters::Flags::All,                "All" },
12 | 
13 | 	{ BruteforceFilters::Flags::Max6ZerosInARow,    "6 zero bit in a row" },
14 | 	{ BruteforceFilters::Flags::Max6OnesInARow,     "6 one bit in a row" },
15 | 
16 | 	{ BruteforceFilters::Flags::BytesIncremental,   "Incremental bytes pattern" },
17 | 	{ BruteforceFilters::Flags::BytesRepeat4,       "4 same byte in a row" },
18 | 
19 | 	{ BruteforceFilters::Flags::AsciiNumbers,       "ASCII numbers" },
20 | 	{ BruteforceFilters::Flags::AsciiAlpha,         "ASCII letters" },
21 | 	{ BruteforceFilters::Flags::AsciiSpecial,       "ASCII special characters" },
22 | };
23 | 
24 | 
25 | std::string BruteforceFilters::toString(Flags::Type flags) const
26 | {
27 | 	if (flags == Flags::None) { return "None"; }
28 | 	if (flags == Flags::All)  { return "All";  }
29 | 
30 | 	std::string result;
31 | 
32 | 	for (const auto& pair : BruteforceFilters::FilterNames)
33 | 	{
34 | 		auto check = (uint64_t)std::get<0>(pair);
35 | 		if (check != 0 && (check & (uint64_t)flags) == check)
36 | 		{
37 | 			result += std::get<1>(pair);
38 | 			result += " | ";
39 | 		}
40 | 	}
41 | 	if (result.size() > 0)
42 | 	{
43 | 		result.erase(result.end() - 3, result.end());
44 | 	}
45 | 	return result;
46 | }
47 | 
48 | std::string BruteforceFilters::toString() const
49 | {
50 | 	std::string include_str = toString(include);
51 | 	std::string exclude_str = toString(exclude);
52 | 
53 | 	return "Include: '" + include_str + "'\tExclude: '" + exclude_str + "'";
54 | }
55 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_filters.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <tuple>
  7 | #include <string>
  8 | 
  9 | #include <cuda_runtime_api.h>
 10 | 
 11 | 
 12 | constexpr uint8_t KeySizeBytes = sizeof(uint64_t);
 13 | 
 14 | constexpr uint8_t KeySizeBits = sizeof(uint64_t) * 8;
 15 | 
 16 | /**
 17 |  *  Filters for +1 bruteforce.
 18 |  * Will apply if `include(value) && !exclude(value)`
 19 |  *
 20 |  * Very little performance increase over simple +1 with exclude
 21 |  * The filters are not optimized quite good and have a lot of `if` blocks
 22 |  */
 23 | struct BruteforceFilters
 24 | {
 25 |     struct Flags
 26 |     {
 27 |         using Type = uint64_t;
 28 | 
 29 |         enum : Type
 30 |         {
 31 |             //
 32 |             None = 0,
 33 | 
 34 |             // filter function return true if key has more than 6 consecutive 0 bits
 35 |             Max6ZerosInARow = (1 << 0),
 36 | 
 37 |             // filter function return true if key has more than 6 consecutive 1 bits
 38 |             Max6OnesInARow = (1 << 1),
 39 | 
 40 | 
 41 |             // filter function return true if key has patterns like 11:22:33:44.. or FF:EE:DD:CC
 42 |             // 6 bytes by default
 43 |             BytesIncremental = (1 << 5),
 44 | 
 45 |             // filter function return true if key has repeating patterns like xx11:11:11:11xx or xxAA:AA:AA:AAxx
 46 |             BytesRepeat4 = (1 << 6),
 47 | 
 48 |             // filter function return true if key consist from only ascii numbers
 49 |             AsciiNumbers = (1 << 11),
 50 | 
 51 |             // filter function return true if key consist from only letters 'a'-'z' 'A'-'Z'
 52 |             AsciiAlpha = (1 << 12),
 53 | 
 54 |             // filter function return true if key consist from ascii letters and numbers
 55 |             AsciiAlphaNum = AsciiAlpha | AsciiNumbers,
 56 | 
 57 |             // filter function return true if key consist from only ASCII special symbols like '^%#&*
 58 |             AsciiSpecial = (1 << 13),
 59 | 
 60 |             // filter function return true if key consist from only ASCII typed characters
 61 |             AsciiAny = AsciiAlphaNum | AsciiSpecial,
 62 | 
 63 |             //
 64 |             All = (uint64_t)-1,
 65 |         };
 66 | 
 67 |         __host__ __device__ static inline bool HasAll(Type test, Type check)
 68 |         {
 69 |             return check == (test & check);
 70 |         }
 71 |         __host__ __device__ static inline bool HasAny(Type test, Type check)
 72 |         {
 73 |             return (test & check);
 74 |         }
 75 |     };
 76 | 
 77 | public:
 78 | 
 79 |     std::string toString(Flags::Type flags) const;
 80 | 
 81 |     std::string toString() const;
 82 | 
 83 |     // Return true if key pass current filters
 84 |     __host__ __device__ inline bool Pass(uint64_t key) const;
 85 | 
 86 |     __host__ __device__ inline static bool check_filters(uint64_t key, Flags::Type filter);
 87 | 
 88 | private:
 89 | 
 90 |     __host__ __device__ static bool all_any_ascii(uint64_t key);
 91 | 
 92 |     __host__ __device__ static bool all_ascii_num(uint64_t key);
 93 | 
 94 |     __host__ __device__ static bool all_ascii_alpha(uint64_t key);
 95 | 
 96 |     __host__ __device__ static bool all_ascii_symbol(uint64_t key);
 97 | 
 98 |     template<uint8_t ValueMin, uint8_t ValueMax>
 99 |     __host__ __device__ inline static bool all_min_max(uint64_t key);
100 | 
101 |     template<uint8_t bit, uint8_t MaxCount = 6>
102 |     __host__ __device__ inline static bool has_consecutive_bits(uint64_t key);
103 | 
104 |     template<uint8_t MaxCount = 4>
105 |     __host__ __device__ inline static bool has_consecutive_bytes(uint64_t key);
106 | 
107 |     template<uint8_t MaxCount = 6>
108 |     __host__ __device__ inline static bool has_incremental_pattern(uint64_t key);
109 | 
110 | 
111 | private:
112 | 
113 |     static const std::vector<std::tuple<Flags::Type, const char*>> FilterNames;
114 | 
115 | public:
116 |     // Filter for keys to include.
117 |     // WARNING:
118 |     //  Could be executed INFINITELY LONG TIME
119 |     //  e.g. start: 0x00000000001 filter SmartFilterFlags::AsciiAny
120 |     //  it will took around trillions and trillions operations just to get to the first valid with simple +1
121 |     //  In case of specific input - use dictionary, pattern or alphabet
122 |     Flags::Type include = Flags::All;
123 | 
124 |     // Filter for keys to exclude
125 |     Flags::Type exclude = Flags::None;
126 | 
127 |     //  A global for all CUDA threads uint64 value which is used for generating filtered keys
128 |     // it is accessed via atomicAdd function.
129 |     uint64_t sync_key = 0;
130 | };
131 | 
132 | 
133 | template<uint8_t ValueMin, uint8_t ValueMax>
134 | __host__ __device__ bool BruteforceFilters::all_min_max(uint64_t key)
135 | {
136 |     // for logical AND start should be with true
137 |     bool result = true;
138 |     uint8_t* bPtrKey = (uint8_t*)&key;
139 | 
140 |     UNROLL
141 |     for (uint8_t i = 0; i < KeySizeBytes; ++i)
142 |     {
143 |         // TODO: Some vector instruction here
144 |         result &= bPtrKey[i] >= ValueMin && bPtrKey[i] <= ValueMax;
145 |     }
146 | 
147 |     return result;
148 | }
149 | 
150 | template<uint8_t bit, uint8_t MaxCount /*= 6*/>
151 | __host__ __device__ bool BruteforceFilters::has_consecutive_bits(uint64_t key)
152 | {
153 |     uint8_t result = false;
154 |     uint64_t mask = (1 << MaxCount) - 1;
155 | 
156 |     key = bit ? key : ~key;
157 | 
158 |     UNROLL
159 |     for (uint8_t i = 0; i < KeySizeBits; ++i)
160 |     {
161 |         // inverse - filter pass if no consecutive bits
162 |         result |= (key & mask) == mask;
163 |         key = key >> 1;
164 |     }
165 | 
166 |     return result;
167 | }
168 | 
169 | template<uint8_t MaxCount /*= 4*/>
170 | __host__ __device__ bool BruteforceFilters::has_consecutive_bytes(uint64_t key)
171 | {
172 |     // for logical OR start should be with false
173 |     bool result = false;
174 | 
175 |     uint8_t index = 0;
176 |     uint8_t* bPtrKey = (uint8_t*)&key;
177 | 
178 |     UNROLL
179 |     for (uint8_t i = 1; i < KeySizeBytes; ++i)
180 |     {
181 |         bool equal = bPtrKey[i] == bPtrKey[index];
182 |         index = equal * index + (1 - equal) * i;
183 | 
184 |         result |= (i - index) >= (MaxCount - 1);
185 |     }
186 | 
187 |     return result;
188 | }
189 | 
190 | template<uint8_t MaxCount /*= 6*/>
191 | __host__ __device__ bool BruteforceFilters::has_incremental_pattern(uint64_t key)
192 | {
193 |     // for logical OR start should be with false
194 |     bool result = false;
195 | 
196 |     uint8_t index = 0;
197 |     uint8_t* bPtrKey = (uint8_t*)&key;
198 | 
199 |     UNROLL
200 |     for (uint8_t i = 1; i < KeySizeBytes; ++i)
201 |     {
202 |         uint8_t deltaIndex = (i - index);
203 | 
204 | #ifdef __CUDA_ARCH__
205 |         uint8_t asbDeltaValue = __sad(bPtrKey[i], bPtrKey[index], 0);
206 | #else
207 |         uint8_t asbDeltaValue = abs(bPtrKey[i] - bPtrKey[index]);
208 | #endif
209 | 
210 |         bool match = asbDeltaValue == (0x11 * deltaIndex);
211 | 
212 |         index = match * index + (1 - match) * i;
213 | 
214 |         result |= deltaIndex >= (MaxCount - 1);
215 |     }
216 | 
217 |     return result;
218 | }
219 | 
220 | 
221 | __host__ __device__ inline bool BruteforceFilters::check_filters(uint64_t key, Flags::Type filter)
222 | {
223 |     bool key_has_any = false;
224 | 
225 |     // fastest should go first
226 |     if (!key_has_any && Flags::HasAll(filter, Flags::AsciiAny))
227 |     {
228 |         key_has_any |= all_any_ascii(key);
229 |     }
230 | 
231 |     if (!key_has_any && Flags::HasAny(filter, Flags::AsciiNumbers))
232 |     {
233 |         key_has_any |= all_ascii_num(key);
234 |     }
235 | 
236 |     if (!key_has_any && Flags::HasAny(filter, Flags::AsciiAlpha))
237 |     {
238 |         key_has_any |= all_ascii_alpha(key);
239 |     }
240 | 
241 |     if (!key_has_any && Flags::HasAny(filter, Flags::AsciiSpecial))
242 |     {
243 |         key_has_any |= all_ascii_symbol(key);
244 |     }
245 | 
246 |     //
247 |     if (!key_has_any && Flags::HasAny(filter, Flags::Max6OnesInARow))
248 |     {
249 |         key_has_any |= has_consecutive_bits<1>(key);
250 |     }
251 | 
252 |     if (!key_has_any && Flags::HasAny(filter, Flags::Max6ZerosInARow))
253 |     {
254 |         key_has_any |= has_consecutive_bits<0>(key);
255 |     }
256 | 
257 |     if (!key_has_any && Flags::HasAny(filter, Flags::BytesRepeat4))
258 |     {
259 |         key_has_any |= has_consecutive_bytes(key);
260 |     }
261 | 
262 |     if (!key_has_any && Flags::HasAny(filter, BruteforceFilters::Flags::BytesIncremental))
263 |     {
264 |         key_has_any |= has_incremental_pattern(key);
265 |     }
266 | 
267 |     return key_has_any;
268 | }
269 | 
270 | __host__ __device__ inline bool BruteforceFilters::Pass(uint64_t key) const
271 | {
272 |     bool pass = true;
273 | 
274 |     if (include != Flags::All && include != Flags::None)
275 |     {
276 |         // Include keys match patterns
277 |         pass &= check_filters(key, include);
278 |     }
279 | 
280 |     if (exclude != Flags::None && exclude != Flags::All)
281 |     {
282 |         // Exclude keys  which match patterns
283 |         pass &= !check_filters(key, exclude);
284 |     }
285 | 
286 |     return pass;
287 | }
288 | 
289 | 
290 | __host__ __device__ inline bool BruteforceFilters::all_any_ascii(uint64_t key)
291 | {
292 |     constexpr uint8_t value_min = '!';
293 |     constexpr uint8_t value_max = '~';
294 | 
295 |     return all_min_max<value_min, value_max>(key);
296 | }
297 | 
298 | __host__ __device__ inline bool BruteforceFilters::all_ascii_num(uint64_t key)
299 | {
300 |     constexpr uint8_t value_min = '0';
301 |     constexpr uint8_t value_max = '9';
302 | 
303 |     return all_min_max<value_min, value_max>(key);
304 | }
305 | 
306 | __host__ __device__ inline bool BruteforceFilters::all_ascii_alpha(uint64_t key)
307 | {
308 |     // for logical AND start should be with true
309 |     bool result = true;
310 |     uint8_t* bPtrKey = (uint8_t*)&key;
311 | 
312 |     UNROLL
313 |         for (uint8_t i = 0; i < KeySizeBytes; ++i)
314 |         {
315 |             result &= (bPtrKey[i] >= 'a' && bPtrKey[i] <= 'z') || (bPtrKey[i] >= 'A' && bPtrKey[i] <= 'Z');
316 |         }
317 | 
318 |     return result;
319 | }
320 | 
321 | __host__ __device__ inline bool BruteforceFilters::all_ascii_symbol(uint64_t key)
322 | {
323 |     // for logical AND start should be with true
324 |     bool result = true;
325 |     uint8_t* bPtrKey = (uint8_t*)&key;
326 | 
327 |     UNROLL
328 |         for (uint8_t i = 0; i < KeySizeBytes; ++i)
329 |         {
330 |             result &=
331 |                 (bPtrKey[i] >= '!' && bPtrKey[i] <= '/') ||
332 |                 (bPtrKey[i] >= ':' && bPtrKey[i] <= '@') ||
333 |                 (bPtrKey[i] >= '[' && bPtrKey[i] <= '`') ||
334 |                 (bPtrKey[i] >= '{' && bPtrKey[i] <= '~');
335 |         }
336 | 
337 |     return result;
338 | }
339 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_pattern.cpp:
--------------------------------------------------------------------------------
  1 | #include "bruteforce_pattern.h"
  2 | 
  3 | #include <utility>
  4 | 
  5 | 
  6 | namespace
  7 | {
  8 | std::vector<std::string> split(const std::string& delim, std::string input)
  9 | {
 10 |     std::vector<std::string> result;
 11 | 
 12 |     auto delim_index = input.find(delim);
 13 |     while (delim_index != std::string::npos)
 14 |     {
 15 |         std::string part = input.substr(0, delim_index);
 16 | 
 17 |         result.push_back(part);
 18 | 
 19 |         input.erase(0, delim_index + delim.size());
 20 | 
 21 |         delim_index = input.find(delim);
 22 |     }
 23 | 
 24 |     result.push_back(input);
 25 |     return result;
 26 | }
 27 | }
 28 | 
 29 | 
 30 | BruteforcePattern::BruteforcePattern(std::vector<std::vector<uint8_t>>&& pattern_bytes, const std::string& pattern_string)
 31 |     : system(pattern_bytes), repr_string(pattern_string)
 32 | {
 33 | }
 34 | 
 35 | 
 36 | BruteforcePattern::BruteforcePattern(const MultibaseDigit& same_bytes)
 37 |     : system(same_bytes), repr_string("N/A")
 38 | {
 39 | }
 40 | 
 41 | __host__ std::string BruteforcePattern::to_string(bool extended) const
 42 | {
 43 |     if (!extended)
 44 |     {
 45 |         return repr_string;
 46 |     }
 47 | 
 48 |     std::string result;
 49 | 
 50 |     for (int d = 0; d < PatternSystem::DigitsNumber; ++d)
 51 |     {
 52 |         const auto& digitConfig = system.get_config(d);
 53 | 
 54 |         result += str::format<std::string>("\t 0%d: (", d);
 55 | 
 56 |         if (digitConfig.count() < 255)
 57 |         {
 58 |             for (int i = 0; i < digitConfig.count(); ++i)
 59 |             {
 60 |                 result += str::format<std::string>(i == 0 ? "%X" : ":%X", digitConfig.numeral(i));
 61 |             }
 62 |         }
 63 |         else
 64 |         {
 65 |             result += " <ANY> ";
 66 |         }
 67 | 
 68 |         result += ")\n";
 69 |     }
 70 | 
 71 |     return "\"" + repr_string + "\": \n" + result;
 72 | }
 73 | 
 74 | bool BruteforcePattern::TryParseSingleByte(std::string text, uint8_t& out)
 75 | {
 76 |     if (text.size() == 2 || (text.size() == 4 && text.rfind("0x", 0) == 0))
 77 |     {
 78 |         auto value = (uint8_t)strtoul(text.c_str(), nullptr, 16);
 79 | 
 80 |         if (value != 0 || (text.rfind("00") != std::string::npos))
 81 |         {
 82 |             out = value;
 83 |             return true;
 84 |         }
 85 |     }
 86 | 
 87 |     return false;
 88 | }
 89 | 
 90 | std::vector<uint8_t> BruteforcePattern::TryParseRangeBytes(std::string text)
 91 | {
 92 |     auto delimeter_index = text.find("-");
 93 |     std::string from = text.substr(0, delimeter_index);
 94 |     std::string to = text.substr(delimeter_index + 1);
 95 | 
 96 |     uint8_t byte_from;
 97 |     uint8_t byte_to;
 98 | 
 99 |     if (!TryParseSingleByte(from, byte_from) || !TryParseSingleByte(to, byte_to))
100 |     {
101 |         return { };
102 |     }
103 | 
104 |     if (byte_from > byte_to)
105 |     {
106 |         byte_from = std::exchange(byte_to, byte_from);
107 |     }
108 | 
109 |     std::vector<uint8_t> result(byte_to - byte_from + 1);
110 |     for (uint8_t i = 0; i < result.size(); ++i)
111 |     {
112 |         result[i] = i + byte_from;
113 |     }
114 | 
115 |     return result;
116 | }
117 | 
118 | std::vector<uint8_t> BruteforcePattern::ParseBytes(std::string text)
119 | {
120 |     // easy - all
121 |     if (text == "*")
122 |     {
123 |         return DefaultByteArray<>::as_vector<std::vector<uint8_t>>();
124 |     }
125 | 
126 |     // easy single byte 0xAA or AA
127 |     uint8_t single_byte;
128 |     if (TryParseSingleByte(text, single_byte))
129 |     {
130 |         return { single_byte };
131 |     }
132 | 
133 |     // range bytes
134 |     auto delimeter_index = text.find("-");
135 |     if (delimeter_index != std::string::npos)
136 |     {
137 |         return TryParseRangeBytes(text);
138 |     }
139 | 
140 |     // set of bytes
141 |     std::vector<uint8_t> result;
142 |     std::vector<std::string> splitted = split("|", text);
143 |     for (const auto& part : splitted)
144 |     {
145 |         uint8_t byte;
146 |         if (TryParseSingleByte(part, byte))
147 |         {
148 |             result.push_back(byte);
149 |         }
150 |     }
151 | 
152 |     return result;
153 | }
154 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_pattern.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <vector>
 6 | #include <string>
 7 | 
 8 | #include "algorithm/multibase_system.h"
 9 | #include "algorithm/multibase_number.h"
10 | 
11 | 
12 | using PatternValue = MultibaseNumber;
13 | using PatternSystem = MultibaseSystem<sizeof(uint64_t)>;
14 | 
15 | /**
16 |  *  In pattern mode "cylinders" (see multi-base system) are different sizes
17 |  * Alphabet - 8 cylinders with same set of bytes
18 |  * Pattern  - 8 cylinders with different set of bytes
19 |  */
20 | struct BruteforcePattern
21 | {
22 |     // How many bytes in this pattern - this basically represent bye-length of the bruteforce target
23 |     // if bruteforce target is 64-bit key - this number should be 8
24 |     static constexpr uint8_t BytesNumber = PatternSystem::DigitsNumber;
25 | 
26 |     BruteforcePattern() = default;
27 | 
28 |     //  Create pattern from bytes.
29 |     // LITTLE ENDIAN. pattern_bytes[0] is lowest byte in pattern. e.g. ( [ { 0x01 }, ... ] will ends as key 0x.....01 )
30 |     // Pattern string is just for reference
31 |     BruteforcePattern(std::vector<std::vector<uint8_t>>&& pattern_bytes, const std::string& pattern_string = "N/A");
32 | 
33 |     //  Special case constructor when patter is the same for every digit in underlying system
34 |     // However this is not how this class supposed to be constructed by public
35 |     // user-code should use @BruteforceAlphabet instead in that case
36 |     BruteforcePattern(const MultibaseDigit& same_bytes);
37 | 
38 | public:
39 | 
40 |     // Initialize value for this pattern from 64-bit number
41 |     __host__ __device__ inline PatternValue init(uint64_t begin) const;
42 | 
43 |     // Roll cylinders, increment @curr by @amount
44 |     __host__ __device__ inline PatternValue next(const PatternValue& curr, uint64_t amount) const;
45 | 
46 |     // how many numbers are in this pattern
47 |     __host__ __device__ inline size_t size() const { return system.invariants(); }
48 | 
49 |     // This pattern is fixed size (same as target attacking key size)
50 |     // according to this pattern each byte can be only a specific value
51 |     // with this method you can retrieve configuration of that byte
52 |     __host__ __device__ inline const MultibaseDigit& bytes_variants(uint8_t index) const;
53 | 
54 | public:
55 | 
56 |     __host__ std::string to_string(bool extended = false) const;
57 | 
58 | public:
59 |     // Convert possible single-byte pattern string to set of bytes
60 |     // 0xDA      -> single byte
61 |     // 0x19-0x2A -> range
62 |     // *         -> full
63 |     // 0x91;0x23 -> set of specific bytes
64 |     static std::vector<uint8_t> ParseBytes(std::string text);
65 | 
66 |     // tries to parse single byte value like 0xA1 or FF
67 |     static bool TryParseSingleByte(std::string text, uint8_t& out);
68 | 
69 |     // tries to parse single byte value like 0xA1 or FF
70 |     static std::vector<uint8_t> TryParseRangeBytes(std::string text);
71 | 
72 | protected:
73 | 
74 |     PatternSystem system;
75 | 
76 |     // **cannot and not designed** to be accessed on GPU
77 |     std::string repr_string;
78 | };
79 | 
80 | __host__ __device__ inline PatternValue BruteforcePattern::init(uint64_t begin) const
81 | {
82 |     return system.cast(begin);
83 | }
84 | 
85 | __host__ __device__ inline PatternValue BruteforcePattern::next(const PatternValue& curr, uint64_t amount) const
86 | {
87 |     MultibaseNumber result = curr;
88 |     return system.increment(result, amount);
89 | }
90 | 
91 | __host__ __device__ inline const MultibaseDigit& BruteforcePattern::bytes_variants(uint8_t index) const
92 | {
93 |     assert(index < BytesNumber && "Invalid byte index, it's bigger that bytes count in this pattern");
94 |     return system.get_config(index);
95 | }
96 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_round.cpp:
--------------------------------------------------------------------------------
  1 | #include "bruteforce_round.h"
  2 | 
  3 | #include "common.h"
  4 | #include "bruteforce_config.h"
  5 | #include "kernels/kernel_result.h"
  6 | #include "algorithm/keeloq/keeloq_encrypted.h"
  7 | #include "algorithm/keeloq/keeloq_learning_types.h"
  8 | #include "algorithm/keeloq/keeloq_single_result.h"
  9 | 
 10 | #include <cuda_runtime_api.h>
 11 | 
 12 | 
 13 | BruteforceRound::BruteforceRound(const std::vector<EncParcel>& enc, const BruteforceConfig& config, std::vector<KeeloqLearningType::Type> selected_learning,
 14 |     uint32_t blocks, uint32_t threads, uint32_t iterations)
 15 |     : encrypted_data(enc)
 16 | {
 17 | #if NO_INNER_LOOPS
 18 |     iterations = 1;
 19 | #endif
 20 | 
 21 |     CUDASetup[0] = blocks;
 22 |     CUDASetup[1] = threads;
 23 |     CUDASetup[2] = iterations;
 24 | 
 25 |     num_decryptors_per_batch = iterations * threads * blocks;
 26 | 
 27 |     kernel_inputs.Initialize(config, KeeloqLearningType::to_mask(selected_learning));
 28 | }
 29 | 
 30 | const std::vector<SingleResult>& BruteforceRound::read_results_gpu()
 31 | {
 32 |     kernel_inputs.results->copy(block_results);
 33 |     return block_results;
 34 | }
 35 | 
 36 | const std::vector<Decryptor>& BruteforceRound::read_decryptors_gpu()
 37 | {
 38 |     kernel_inputs.decryptors->copy(decryptors);
 39 |     return decryptors;
 40 | }
 41 | 
 42 | bool BruteforceRound::check_results(const KernelResult& result)
 43 | {
 44 |     if (result.error < 0)
 45 |     {
 46 |         printf("Kernel fatal error: %d\n Round should be finished!\n", result.error);
 47 |         return true;
 48 |     }
 49 |     else if (result.error != 0)
 50 |     {
 51 |         printf("CUDA calculations num errors: %d\n", result.error);
 52 |     }
 53 | 
 54 |     if (result.value > 0)
 55 |     {
 56 |         auto& all_results = read_results_gpu();
 57 | 
 58 |         printf("Matches count: %d\n", result.value);
 59 | 
 60 |         for (const auto& result : all_results)
 61 |         {
 62 |             if (result.match == KeeloqLearningType::INVALID)
 63 |             {
 64 |                 continue;
 65 |             }
 66 | 
 67 |             result.print();
 68 |         }
 69 | 
 70 |         return true;
 71 |     }
 72 | 
 73 |     return false;
 74 | }
 75 | 
 76 | size_t BruteforceRound::get_mem_size() const
 77 | {
 78 |     assert(inited);
 79 |     return
 80 |         encrypted_data.size() * sizeof(EncParcel) +
 81 |         decryptors.size() * sizeof(Decryptor) +
 82 |         block_results.size() * sizeof(SingleResult);
 83 | }
 84 | 
 85 | size_t BruteforceRound::num_batches() const
 86 | {
 87 |     assert(inited);
 88 |     if (Type() == BruteforceType::Dictionary)
 89 |     {
 90 |         uint8_t non_align = Config().dict_size() % keys_per_batch() == 0 ? 0 : 1;
 91 |         return Config().dict_size() / keys_per_batch() + non_align;
 92 |     }
 93 |     else
 94 |     {
 95 |         uint8_t non_align = Config().brute_size() % keys_per_batch() == 0 ? 0 : 1;
 96 |         return Config().brute_size() / keys_per_batch() + non_align;
 97 |     }
 98 | }
 99 | 
100 | size_t BruteforceRound::keys_per_batch() const
101 | {
102 |     assert(inited);
103 |     return decryptors.size();
104 | }
105 | 
106 | size_t BruteforceRound::results_per_batch() const
107 | {
108 |     assert(inited);
109 |     return block_results.size();
110 | }
111 | 
112 | void BruteforceRound::Init()
113 | {
114 |     if (!inited)
115 |     {
116 |         // allocated once. updated every run on GPU
117 |         decryptors = std::vector<Decryptor>(num_decryptors_per_batch);
118 | 
119 |         // allocated once. updated evert run on GPU. copied to CPU only if match found.
120 |         block_results = std::vector<SingleResult>(encrypted_data.size() * decryptors.size());
121 | 
122 |         alloc();
123 | 
124 |         inited = true;
125 |     }
126 | }
127 | 
128 | std::string BruteforceRound::to_string() const
129 | {
130 |     assert(inited);
131 | 
132 |     return str::format<std::string>("Setup:\n"
133 |         "\tCUDA: Blocks:%u Threads:%u Iterations:%u\n"
134 |         "\tEncrypted data size:%zd\n"
135 |         "\tLearning type:%s\n"
136 |         "\tResults per batch:%zd\n"
137 |         "\tDecryptors per batch:%zd\n"
138 |         "\tConfig: %s",
139 |         CudaBlocks(), CudaThreads(), CudaThreadIterations(),
140 |         encrypted_data.size(), kernel_inputs.GetLearningMask().to_string().c_str(), results_per_batch(), keys_per_batch(), Config().toString().c_str());
141 | }
142 | 
143 | 
144 | void BruteforceRound::alloc()
145 | {
146 |     //
147 |     assert(kernel_inputs.encdata == nullptr		&& "Encrypted data already allocated on GPU");
148 |     assert(kernel_inputs.decryptors == nullptr	&& "Decryptors data already allocated on GPU");
149 |     assert(kernel_inputs.results == nullptr		&& "Results data already allocated on GPU");
150 | 
151 |     // ALLOCATE ON GPU
152 |     if (kernel_inputs.encdata == nullptr)
153 |     {
154 |         kernel_inputs.encdata = CudaArray<EncParcel>::allocate(encrypted_data);
155 |     }
156 | 
157 |     if (kernel_inputs.decryptors == nullptr)
158 |     {
159 |         kernel_inputs.decryptors = CudaArray<Decryptor>::allocate(decryptors);
160 |     }
161 | 
162 |     if (kernel_inputs.results == nullptr)
163 |     {
164 |         kernel_inputs.results = CudaArray<SingleResult>::allocate(block_results);
165 |     }
166 | }
167 | 
168 | void BruteforceRound::free()
169 | {
170 |     if (kernel_inputs.encdata != nullptr)
171 |     {
172 |         kernel_inputs.encdata->free();
173 |         kernel_inputs.encdata = nullptr;
174 |     }
175 | 
176 |     if (kernel_inputs.decryptors != nullptr)
177 |     {
178 |         kernel_inputs.decryptors->free();
179 |         kernel_inputs.decryptors = nullptr;
180 |     }
181 | 
182 |     if (kernel_inputs.results != nullptr)
183 |     {
184 |         kernel_inputs.results->free();
185 |         kernel_inputs.results = nullptr;
186 |     }
187 | 
188 |     encrypted_data.clear();
189 |     decryptors.clear();
190 |     block_results.clear();
191 | }
192 | 


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_round.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <string>
  7 | 
  8 | #include "algorithm/keeloq/keeloq_learning_types.h"
  9 | #include "algorithm/keeloq/keeloq_single_result.h"
 10 | #include "algorithm/keeloq/keeloq_encrypted.h"
 11 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 12 | 
 13 | #include "bruteforce/bruteforce_config.h"
 14 | #include "kernels/kernel_result.h"
 15 | 
 16 | 
 17 | /**
 18 |  *  Round is a set of bruteforce batches
 19 |  * Each batch runs N thread
 20 |  * Each thread checks 1 or more decryptor
 21 |  * Each check is 1 or more (configured in args) keeloq learnings
 22 |  *
 23 |  * Typical is:
 24 |  *  Via command line some rounds were created - e.g. Dictionary and Simple attacks
 25 |  *  Each attack has N decryptors to check, though num blocks B, num threads T, and I iteration
 26 |  *  This means `(1 BATCH size) = B * T * I` decryptors check
 27 |  *  This means num of batches = `N / (1 BATCH size)`
 28 |  */
 29 | struct BruteforceRound
 30 | {
 31 |     // Construct round struct without specific learning type (means use all learnings)
 32 |     BruteforceRound(const std::vector<EncParcel>& data, const BruteforceConfig& gen, uint32_t blocks, uint32_t threads, uint32_t iterations) :
 33 |         BruteforceRound(data, gen, {}, blocks, threads, iterations) {}
 34 | 
 35 |     // Construct round struct with only one selected learning type
 36 |     BruteforceRound(const std::vector<EncParcel>& data, const BruteforceConfig& gen, KeeloqLearningType::Type single_learning,
 37 |         uint32_t blocks, uint32_t threads, uint32_t iterations) :
 38 |         BruteforceRound(data, gen, std::vector<KeeloqLearningType::Type> { single_learning }, blocks, threads, iterations) {}
 39 | 
 40 |     // Standard constructor
 41 |     BruteforceRound(const std::vector<EncParcel>& data, const BruteforceConfig& gen, std::vector<KeeloqLearningType::Type> selected_learning,
 42 |         uint32_t blocks, uint32_t threads, uint32_t iterations);
 43 | 
 44 |     ~BruteforceRound()
 45 |     {
 46 |         free();
 47 |     }
 48 | 
 49 | public:
 50 |     // Allocates memory
 51 |     void Init();
 52 | 
 53 |     // Reads results data from GPU memory into internal container and returns const reference to it
 54 |     const std::vector<SingleResult>& read_results_gpu();
 55 | 
 56 |     // Reads decryptors data from GPU memory into internal container and returns const reference to it
 57 |     const std::vector<Decryptor>& read_decryptors_gpu();
 58 | 
 59 |     // Checks Kernel's results
 60 |     // Return true if Round should be finished
 61 |     bool check_results(const KernelResult& result);
 62 | 
 63 |     // Get allocated memory amount for data
 64 |     size_t get_mem_size() const;
 65 | 
 66 |     // How many batches in this round (basically total keys to check divides by number of keys in a batch)
 67 |     size_t num_batches() const;
 68 | 
 69 |     // How many calculated results are in a batch (if use 3 inputs - 3 x keys_per_batch)
 70 |     size_t results_per_batch() const;
 71 | 
 72 |     // How many keys to check in this batch
 73 |     size_t keys_per_batch() const;
 74 | 
 75 |     std::string to_string() const;
 76 | 
 77 | public:
 78 |     inline uint32_t CudaBlocks() const { return CUDASetup[0]; }
 79 | 
 80 |     inline uint32_t CudaThreads() const { return CUDASetup[1]; }
 81 | 
 82 |     inline uint32_t CudaThreadIterations() const { return CUDASetup[2]; }
 83 | 
 84 |     inline const BruteforceConfig& Config() const { assert(inited); return kernel_inputs.GetConfig(); }
 85 | 
 86 |     inline BruteforceType::Type Type() const { assert(inited); return Config().type; }
 87 | 
 88 |     inline KeeloqKernelInput& Inputs() { assert(inited); return kernel_inputs; }
 89 | 
 90 | private:
 91 | 
 92 |     void alloc();
 93 | 
 94 |     void free();
 95 | 
 96 | private:
 97 | 
 98 |     bool inited = false;
 99 | 
100 |     uint32_t num_decryptors_per_batch = 0;
101 | 
102 |     //
103 |     KeeloqKernelInput kernel_inputs;
104 | 
105 |     // Constant per run
106 |     std::vector<EncParcel> encrypted_data;
107 | 
108 |     // could be pretty much data here
109 |     std::vector<Decryptor> decryptors;
110 | 
111 |     // could be pretty much data here
112 |     std::vector<SingleResult> block_results;
113 | 
114 |     uint32_t CUDASetup[3] = { 0 };
115 | };


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_type.cpp:
--------------------------------------------------------------------------------
 1 | #include "bruteforce_type.h"
 2 | 
 3 | 
 4 | const char* BruteforceType::GeneratorTypeName[] = {
 5 |     "Dictionary",
 6 |     "Simple",
 7 |     "Filtered",
 8 |     "Alphabet",
 9 |     "Pattern",
10 |     "Seed"
11 | };
12 | 
13 | const size_t BruteforceType::GeneratorTypeNamesCount = sizeof(GeneratorTypeName) / sizeof(char*);
14 | 
15 | const char* BruteforceType::Name(Type type)
16 | {
17 |     if (type > GeneratorTypeNamesCount)
18 |     {
19 |         return "UNKNOWN";
20 |     }
21 | 
22 |     return GeneratorTypeName[type];
23 | }


--------------------------------------------------------------------------------
/src/bruteforce/bruteforce_type.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | /**
 6 |  *  Type of bruteforce attack
 7 |  */
 8 | struct BruteforceType
 9 | {
10 |     using Type = uint8_t;
11 | 
12 |     enum : Type
13 |     {
14 |         // Generation will be skipped
15 |         Dictionary = 0,
16 |         None = Dictionary,
17 | 
18 |         // Simple +1 generator (very fast in terms of generation of decryptors candidates)
19 |         Simple,
20 | 
21 |         // Simple +1 bruteforce but with filters applied performance may degrade)
22 |         Filtered,
23 | 
24 |         // Specify alphabet and brute over it
25 |         Alphabet,
26 | 
27 |         // ASCII pattern like ?A:01:??:3?:*
28 |         Pattern,
29 | 
30 |         // Simple +1 seed bruteforce. Since seed is 32bit value, seed bruteforce may be done in a acceptable amount of time
31 |         Seed,
32 | 
33 |         // Not for usage
34 |         LAST,
35 |     };
36 | 
37 |     static const char* Name(Type type);
38 | 
39 | private:
40 | 
41 |     static const char* GeneratorTypeName[];
42 | 
43 |     static const size_t GeneratorTypeNamesCount;
44 | };
45 | 
46 | 


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce.cpp:
--------------------------------------------------------------------------------
 1 | #include "generator_bruteforce.h"
 2 | 
 3 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 4 | #include "kernels/kernel_result.h"
 5 | 
 6 | 
 7 | int GeneratorBruteforce::PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads)
 8 | {
 9 |     const BruteforceConfig& config = inputs.GetConfig();
10 |     KernelResult generator_results;
11 | 
12 |     inputs.BeforeGenerateDecryptors();
13 | 
14 |     switch (config.type)
15 |     {
16 |     case BruteforceType::Simple:
17 |     {
18 |         GeneratorBruteforceSimple::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
19 |         break;
20 |     }
21 |     case BruteforceType::Seed:
22 |     {
23 |         GeneratorBruteforceSeed::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
24 |         break;
25 |     }
26 |     case BruteforceType::Filtered:
27 |     {
28 |         GeneratorBruteforceFiltered::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
29 |         break;
30 |     }
31 |     case BruteforceType::Pattern:
32 |     case BruteforceType::Alphabet:
33 |     {
34 |         GeneratorBruteforcePattern::LaunchKernel(blocks, threads, inputs.ptr(), generator_results.ptr());
35 |         break;
36 |     }
37 |     case BruteforceType::Dictionary:
38 |     {
39 |         return 0;
40 |     }
41 |     default:
42 | 
43 |         printf("Error: Invalid bruteforce type: %d %s! Don't know how to generate decryptors!\n",
44 |             (int)config.type, BruteforceType::Name(config.type));
45 |         return 0;
46 |     }
47 | 
48 |     inputs.read();          // it will not cause underneath arrays copy
49 |     generator_results.read();
50 | 
51 |     inputs.AfterGeneratedDecryptors();
52 | 
53 |     return generator_results.error;
54 | }
55 | 


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <cuda_runtime.h>
 6 | 
 7 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 8 | #include "kernels/kernel_result.h"
 9 | 
10 | 
11 | /**
12 |  * Declare new struct which represents a wrapper around cu implementation
13 |  */
14 | #define DECLARE_GENERATOR(name, ...) \
15 | 	extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)(); \
16 | 	__global__ void GENERATOR_KERNEL_NAME(name)(__VA_ARGS__); \
17 | 	struct name : public IGenerator<name> \
18 | 	{\
19 | 		typedef void(*func)(__VA_ARGS__); \
20 | 		inline static func GetKernelFunctionPtr() { return (func)GENERATOR_KERNEL_GETTER_NAME(name)(); } \
21 | 	};
22 | 
23 | 
24 | template<typename TSelf>
25 | struct IGenerator
26 | {
27 | 	typedef void(*KernelFunc)(KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results);
28 | 
29 | 	static inline void LaunchKernel(uint16_t blocks, uint16_t threads, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr results)
30 | 	{
31 | 		void* args[] = { &input, &results };
32 | 
33 | 		auto* func = TSelf::GetKernelFunctionPtr();
34 | 
35 | 		auto error = cudaLaunchKernel((const void*)func, dim3(blocks), dim3(threads), args, 0, nullptr);
36 | 		CUDA_CHECK(error);
37 | 	}
38 | };
39 | 
40 | struct GeneratorBruteforce
41 | {
42 | 	// Checks type of used generator in inputs and launches kernel to generate next batch of decryptors
43 | 	// Decryptors are generated on GPU and stored in GPU memory
44 | 	static int PrepareDecryptors(KeeloqKernelInput& inputs, uint16_t blocks, uint16_t threads);
45 | };
46 | 
47 | 
48 | // Extern cuda kernels - Implementation are in inl.file
49 | DECLARE_GENERATOR(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
50 | DECLARE_GENERATOR(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
51 | DECLARE_GENERATOR(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
52 | DECLARE_GENERATOR(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls);
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_filtered_kernel.inl:
--------------------------------------------------------------------------------
  1 | #include "device/cuda_context.h"
  2 | 
  3 | #include <cuda_runtime.h>
  4 | #include <device_atomic_functions.h>
  5 | 
  6 | #include "kernels/kernel_result.h"
  7 | #include "algorithm/keeloq/keeloq_kernel_input.h"
  8 | #include "algorithm/keeloq/keeloq_decryptor.h"
  9 | 
 10 | #include "bruteforce/bruteforce_type.h"
 11 | #include "bruteforce/bruteforce_filters.h"
 12 | 
 13 | template<typename TPtr>
 14 | __device__ inline uint64_t RequestNewBlock(TPtr* from, uint32_t size)
 15 | {
 16 |     return atomicAdd((unsigned long long int*)from, size);
 17 | }
 18 | 
 19 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceFiltered, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
 20 | {
 21 |     const BruteforceConfig& config = input->GetConfig();
 22 | 
 23 |     assert(config.type == BruteforceType::Filtered);
 24 | 
 25 |     assert(config.start.man() > 0x100000000000 && "Starting key should be big enough to start bruteforcing. Consider pattern brute.");
 26 | 
 27 |     assert(config.filters.include != BruteforceFilters::Flags::None && "Include filter None is invalid - will lead to infinite loop!");
 28 |     assert(config.filters.exclude != BruteforceFilters::Flags::All && "Exclude filter All is invalid - will lead to infinite loop!");
 29 | 
 30 |     CudaContext ctx = CudaContext::Get();
 31 | 
 32 |     const uint32_t seed = config.start.seed();
 33 | 
 34 |     CudaArray<Decryptor>& decryptors = *input->decryptors;
 35 |     size_t num_decryptors = decryptors.num;
 36 | 
 37 |     const BruteforceFilters& filters = config.filters;
 38 | 
 39 | 
 40 |     // if we need to generate 24 keys, and we have 64 threads, 40 last should do nothing
 41 |     uint8_t at_least_one = num_decryptors >= ctx.thread_id;
 42 | 
 43 |     // if we have to generate 75 keys with 64 threads, 11 threads should do +1 key generation
 44 |     size_t non_aligned = num_decryptors % ctx.thread_max;
 45 |     uint8_t additional_this_thread = non_aligned > 0 && non_aligned > ctx.thread_id;
 46 | 
 47 |     // decremental value how many keys should be generated by this thread
 48 |     uint32_t num_to_generate = static_cast<uint32_t>(at_least_one * (num_decryptors / ctx.thread_max + additional_this_thread));
 49 | 
 50 |     uint32_t block_size = 0;
 51 | 
 52 |     // Block (from first to num_to_generate) of keys to check
 53 |     uint64_t man_block_begin = 0;
 54 |     uint64_t man_block_end = 0;
 55 | 
 56 |     CUDA_FOR_THREAD_ID(ctx, write_index, num_decryptors)
 57 |     {
 58 |         bool written = false;
 59 | 
 60 |         do
 61 |         {
 62 |             if (block_size == 0)
 63 |             {
 64 |                 // We have to acquire next block not more than we have to generate left
 65 |                 // It's need to prevent situations like:
 66 |                 //  this thread need to generate 2 keys
 67 |                 //  but it has requested 100 to check
 68 |                 //  first to keys are valid and next 98 nobody will check, and thread cannot "return" it to unchecked pool
 69 |                 //  since check indication is just an atomic add operation
 70 |                 block_size = num_to_generate;
 71 | 
 72 |                 // get raw manufacturer key start index (number) which will be incremented and checked
 73 |                 // for each thread. atomic instruction guaranties no overlapping key checks between threads
 74 |                 // For example:
 75 |                 //   each thread should generate 16 keys
 76 |                 //   `next.man` is 123 right now, thread 1 do atomic add and get
 77 |                 //   start from 123, do 16 checks
 78 |                 //   `next.man` now 139, but thread 1 runs checks from 123
 79 |                 //   ...
 80 |                 //   thread 1 found 4 keys, so it should generate 12 more
 81 |                 //   `next.man` is 6383 on next iteration (other threads do jobs as well)
 82 |                 //   thread 1 adds 12 to `next.man` and get previous value
 83 |                 //   so now it starts check 12 keys from 6383 to 6395
 84 |                 //
 85 |                 man_block_begin = RequestNewBlock(&filters.sync_key, block_size);
 86 | 
 87 |                 // TODO: Check how overflow behaves
 88 |                 man_block_end = man_block_begin + block_size;
 89 |             }
 90 | 
 91 |             for (uint64_t key = man_block_begin; key < man_block_end; ++key)
 92 |             {
 93 |                 if (filters.Pass(key))
 94 |                 {
 95 |                     --num_to_generate;
 96 | 
 97 |                     // next write should start testing keys from next key from current
 98 |                     man_block_begin = key + 1;
 99 | 
100 |                     // break while loop
101 |                     written = true;
102 | 
103 |                     decryptors[write_index] = Decryptor(key, seed);
104 |                     break;
105 |                 }
106 |             }
107 | 
108 |             if (!written)
109 |             {
110 |                 // request new block
111 |                 block_size = 0;
112 |             }
113 | 
114 |         } while (!written);
115 |     }
116 | }
117 | 
118 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceFiltered);


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_pattern_kernel.inl:
--------------------------------------------------------------------------------
 1 | #include "device/cuda_context.h"
 2 | 
 3 | #include "kernels/kernel_result.h"
 4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 5 | 
 6 | #include "bruteforce/bruteforce_pattern.h"
 7 | #include "bruteforce/bruteforce_type.h"
 8 | 
 9 | 
10 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforcePattern, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
11 | {
12 |     const BruteforceConfig& config = input->GetConfig();
13 | 
14 | 	assert((config.type == BruteforceType::Alphabet) || (config.type == BruteforceType::Pattern));
15 | 
16 | 	CudaContext ctx = CudaContext::Get();
17 | 
18 | 	const BruteforcePattern& pattern = config.pattern;
19 | 	CudaArray<Decryptor>& decryptors = *input->decryptors;
20 | 
21 | 	// Imagine alphabet as rotating rings with letters on it
22 | 	// we have 8-bytes key so there will be 8 rings
23 | 	// bytes in the key show how much ring is rotated
24 | 	// and what 'letter' it should have.
25 | 	// Or also it can be considered as 8-digit N-based number
26 | 	MultibaseNumber start = pattern.init(config.start.man());
27 | 
28 |     const uint32_t seed = config.start.seed();
29 | 
30 | 	CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
31 | 	{
32 |         decryptors[decryptor_index] = Decryptor(pattern.next(start, decryptor_index).number(), seed);
33 | 	}
34 | }
35 | 
36 | DEFINE_GENERATOR_GETTER(GeneratorBruteforcePattern);


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_seed_kernel.inl:
--------------------------------------------------------------------------------
 1 | #include "device/cuda_context.h"
 2 | 
 3 | #include "kernels/kernel_result.h"
 4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 5 | #include "bruteforce/bruteforce_type.h"
 6 | 
 7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSeed, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
 8 | {
 9 | 	CudaContext ctx = CudaContext::Get();
10 | 
11 | 	assert(input->GetConfig().type == BruteforceType::Seed);
12 | 
13 | 	const Decryptor& start = input->GetConfig().start;
14 | 
15 | 	CudaArray<Decryptor>& decryptors = *input->decryptors;
16 | 
17 | 	CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
18 | 	{
19 | 		decryptors[decryptor_index] = Decryptor(start.man(), start.seed() + decryptor_index);
20 | 	}
21 | }
22 | 
23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSeed);


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_bruteforce_simple_kernel.inl:
--------------------------------------------------------------------------------
 1 | #include "device/cuda_context.h"
 2 | 
 3 | #include "kernels/kernel_result.h"
 4 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 5 | #include "bruteforce/bruteforce_type.h"
 6 | 
 7 | __global__ void DEFINE_GENERATOR_KERNEL(GeneratorBruteforceSimple, KeeloqKernelInput::TCudaPtr input, KernelResult::TCudaPtr resuls)
 8 | {
 9 | 	CudaContext ctx = CudaContext::Get();
10 | 
11 | 	assert(input->GetConfig().type == BruteforceType::Simple);
12 | 
13 | 	const Decryptor& start = input->GetConfig().start;
14 | 
15 | 	CudaArray<Decryptor>& decryptors = *input->decryptors;
16 | 
17 | 	CUDA_FOR_THREAD_ID(ctx, decryptor_index, decryptors.num)
18 | 	{
19 | 		decryptors[decryptor_index] = Decryptor(start.man() + decryptor_index, start.seed());
20 | 	}
21 | }
22 | 
23 | DEFINE_GENERATOR_GETTER(GeneratorBruteforceSimple);


--------------------------------------------------------------------------------
/src/bruteforce/generators/generator_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 | 
3 | #include "generator_bruteforce_pattern_kernel.inl"
4 | #include "generator_bruteforce_filtered_kernel.inl"
5 | #include "generator_bruteforce_simple_kernel.inl"
6 | #include "generator_bruteforce_seed_kernel.inl"


--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <stdint.h>
 5 | #include <assert.h>
 6 | #include <stdio.h>
 7 | #include <inttypes.h>
 8 | 
 9 | 
10 | #define CUDA_CHECK(error) \
11 |     if (error != 0) { printf("\nASSERTION FAILED. CUDA ERROR!\n%s: %s\n", cudaGetErrorName((cudaError_t)error), cudaGetErrorString((cudaError_t)error)); }\
12 |     assert(error == 0)
13 | 
14 | 
15 | #define GENERATOR_KERNEL_NAME(name) \
16 |     Kernel_##name
17 | 
18 | #define GENERATOR_KERNEL_GETTER_NAME(name) \
19 |     GetKernel_##name
20 | 
21 | #define DEFINE_GENERATOR_KERNEL(name, ...) \
22 |     GENERATOR_KERNEL_NAME(name)(__VA_ARGS__)
23 | 
24 | #define DEFINE_GENERATOR_GETTER(name) \
25 |     extern "C" void* GENERATOR_KERNEL_GETTER_NAME(name)() { return (void*)&Kernel_##name; }
26 | 
27 | 
28 | #if __CUDA_ARCH__
29 |     #define UNROLL #pragma unroll
30 | #else
31 |     #define UNROLL
32 | #endif
33 | 
34 | #ifndef NO_INNER_LOOPS
35 |     #define NO_INNER_LOOPS 1
36 | #endif // !NO_INNER_LOOPS
37 | 
38 | 
39 | 
40 | template<uint8_t NSize = 255>
41 | struct DefaultByteArray
42 | {
43 |     uint8_t element[NSize];
44 | 
45 |     constexpr DefaultByteArray() : element()
46 |     {
47 |         for (uint8_t i = 0; i < NSize; ++i)
48 |         {
49 |             element[i] = i;
50 |         }
51 |     }
52 | 
53 |     template<typename Vector>
54 |     static inline Vector as_vector()
55 |     {
56 |         constexpr DefaultByteArray array = DefaultByteArray();
57 |         return Vector(&array.element[0], &array.element[0] + NSize);
58 |     }
59 | };
60 | 
61 | 
62 | namespace str
63 | {
64 | 
65 | template<typename String, typename ... Args>
66 | inline String format(const String& format, Args ... args)
67 | {
68 |     int size_s = snprintf(nullptr, 0, format.c_str(), args ...) + 1; // Extra space for '\0'
69 |     if (size_s <= 0)
70 |     {
71 |         return {};
72 |     }
73 | 
74 |     auto size = static_cast<size_t>(size_s);
75 |     String result(size, '\0');
76 | 
77 |     snprintf(&result[0], size, format.c_str(), args ...);
78 |     result.pop_back(); // remove '\0' from the end
79 | 
80 |     return result;
81 | }
82 | 
83 | }
84 | 
85 | 
86 | inline uint64_t operator "" _u64(const char* ascii, size_t num)
87 | {
88 |     const uint8_t size = sizeof(uint64_t);
89 | 
90 |     uint64_t number = 0;
91 |     uint8_t* pNumber = (uint8_t*)&number;
92 | 
93 |     for (uint8_t i = 0; i < size; ++i)
94 |     {
95 |         pNumber[size - i - 1] = num > i ? ascii[i] : 0;
96 |     }
97 | 
98 |     return number;
99 | }


--------------------------------------------------------------------------------
/src/device/cuda_array.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <vector>
  6 | #include <cuda_runtime_api.h>
  7 | 
  8 | 
  9 | /**
 10 |  *  A helper class for arrays allocated in GPU memory
 11 |  */
 12 | template<typename T>
 13 | struct CudaArray
 14 | {
 15 |     using TCudaArray = CudaArray<T>;
 16 | 
 17 |     T* CUDA_data;
 18 | 
 19 |     size_t num;
 20 | 
 21 |     __host__ __device__ inline T& operator[](uint32_t index)
 22 |     {
 23 |         assert(index < num && "Index is out of range in CudaArray");
 24 |         return CUDA_data[index];
 25 |     }
 26 |     __host__ __device__ inline const T& operator[](uint32_t index) const
 27 |     {
 28 |         assert(index < num && "Index is out of range in CudaArray");
 29 |         return CUDA_data[index];
 30 |     }
 31 | 
 32 |     // Frees all associated resources assuming this pointer is GPU address
 33 |     inline void free()
 34 |     {
 35 |         TCudaArray::free(this);
 36 |     }
 37 | 
 38 |     // Copies bytes from @source to this array
 39 |     // Assumes array was allocated on GPU, will not fail if num > size
 40 |     inline void write(const T* source, size_t num)
 41 |     {
 42 |         TCudaArray::write(this, source, num);
 43 |     }
 44 | 
 45 |     // Copies data from GPU to @target array
 46 |     inline size_t copy(std::vector<T>& target)
 47 |     {
 48 |         return TCudaArray::copy(this, target);
 49 |     }
 50 | 
 51 |     // Copies self GPU object (without all underlying data in array)
 52 |     // into CPU memory
 53 |     inline TCudaArray host()
 54 |     {
 55 |         // thiscall should work even with invalid pointer
 56 |         return TCudaArray::host(this);
 57 |     }
 58 | 
 59 |     // Copies this pointer (which assumes to be GPU) to host and return copy of the last element
 60 |     inline T host_last()
 61 |     {
 62 |         // thiscall should work even with invalid pointer
 63 |         TCudaArray HOST_array = TCudaArray::host(this);
 64 |         return TCudaArray::read(HOST_array, HOST_array.num - 1);
 65 |     }
 66 | 
 67 | public:
 68 |     static TCudaArray* allocate(const std::vector<T>& source);
 69 |     static void free(TCudaArray* array);
 70 | 
 71 |     static TCudaArray host(const TCudaArray* device);
 72 | 
 73 |     static T read(const TCudaArray& HOST_Array, size_t index);
 74 | 
 75 |     static size_t copy(const TCudaArray* array, std::vector<T>& target);
 76 | 
 77 |     static void write(TCudaArray* dest, const T* source, size_t num);
 78 | };
 79 | 
 80 | template<typename T>
 81 | T CudaArray<T>::read(const TCudaArray& HOST_Array, size_t index)
 82 | {
 83 |     assert(index < HOST_Array.num);
 84 | 
 85 |     T result;
 86 |     auto error = cudaMemcpy(&result, &HOST_Array.CUDA_data[index], sizeof(T), cudaMemcpyDeviceToHost);
 87 |     CUDA_CHECK(error);
 88 | 
 89 |     return result;
 90 | }
 91 | 
 92 | template<typename T>
 93 | CudaArray<T>* CudaArray<T>::allocate(const std::vector<T>& source)
 94 | {
 95 |     // Allocate memory of vector itself (ptr + size_t == 16 bytes)
 96 |     CudaArray<T>* result = nullptr;
 97 |     uint32_t error = cudaMalloc((void**) & result, sizeof(CudaArray<T>));
 98 |     CUDA_CHECK(error);
 99 | 
100 |     // Write size_t - size of the data
101 |     size_t source_size = source.size();
102 |     error = cudaMemcpy(&result->num, &source_size, sizeof(size_t), cudaMemcpyHostToDevice);
103 |     CUDA_CHECK(error);
104 | 
105 |     // Device pointer of data (if available)
106 |     T* data_ptr = nullptr;
107 |     if (source_size > 0)
108 |     {
109 |         size_t allocated_bytes = sizeof(T) * source_size;
110 | 
111 |         // allocate data on device and copy from RAW
112 |         error = cudaMalloc((void**)&data_ptr, allocated_bytes);
113 |         CUDA_CHECK(error);
114 | 
115 |         error = cudaMemcpy(data_ptr, source.data(), allocated_bytes, cudaMemcpyHostToDevice);
116 |         CUDA_CHECK(error);
117 |     }
118 | 
119 |     // Write data pointer (null if no data)
120 |     error = cudaMemcpy(&result->CUDA_data, &data_ptr, sizeof(T*), cudaMemcpyHostToDevice);
121 |     CUDA_CHECK(error);
122 | 
123 |     return result;
124 | }
125 | 
126 | template<typename T>
127 | CudaArray<T> CudaArray<T>::host(const TCudaArray* device)
128 | {
129 |     assert(device && "Invalid CUDA pointer");
130 |     TCudaArray HOST_dest = { 0 };
131 | 
132 |     auto error = cudaMemcpy(&HOST_dest, device, sizeof(TCudaArray), cudaMemcpyDeviceToHost);
133 |     CUDA_CHECK(error);
134 | 
135 |     return HOST_dest;
136 | }
137 | 
138 | template<typename T>
139 | void CudaArray<T>::free(TCudaArray* array)
140 | {
141 |     TCudaArray HOST_array = host(array);
142 | 
143 |     if (HOST_array.CUDA_data)
144 |     {
145 |         auto error = cudaFree(HOST_array.CUDA_data);
146 |         CUDA_CHECK(error);
147 |     }
148 | 
149 |     auto error = cudaFree(array);
150 |     CUDA_CHECK(error);
151 | }
152 | 
153 | template<typename T>
154 | size_t CudaArray<T>::copy(const TCudaArray* array, std::vector<T>& target)
155 | {
156 |     TCudaArray HOST_array = host(array);
157 |     if (HOST_array.num > 0)
158 |     {
159 |         size_t allocated_bytes = HOST_array.num * sizeof(T);
160 |         target.resize(HOST_array.num);
161 | 
162 |         auto error = cudaMemcpy((T*)target.data(), HOST_array.CUDA_data, allocated_bytes, cudaMemcpyDeviceToHost);
163 |         CUDA_CHECK(error);
164 |     }
165 | 
166 |     return HOST_array.num;
167 | }
168 | 
169 | template<typename T>
170 | void CudaArray<T>::write(TCudaArray* dest, const T* source, size_t num)
171 | {
172 |     TCudaArray HOST_dest = host(dest);
173 | 
174 |     assert(HOST_dest.CUDA_data && "CUDA Data wasn't allocated");
175 |     if (HOST_dest.CUDA_data)
176 |     {
177 |         auto copy_bytes = sizeof(T) * std::min(num, HOST_dest.num);
178 | 
179 |         auto error = cudaMemcpy(HOST_dest.CUDA_data, source, copy_bytes, cudaMemcpyHostToDevice);
180 |         CUDA_CHECK(error);
181 |     }
182 | }


--------------------------------------------------------------------------------
/src/device/cuda_common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | 
 8 | namespace misc
 9 | {
10 | 
11 | // device-faster version of byte reversing function
12 | __host__ __device__ __forceinline__ uint64_t rev_bytes(uint64_t input)
13 | {
14 | #if __CUDA_ARCH__
15 |     uint32_t key_lo = input;
16 |     uint32_t key_hi = input >> 32;
17 | 
18 |     constexpr uint32_t selector_0 = 0x4567;
19 |     constexpr uint32_t selector_1 = 0x0123;
20 | 
21 |     uint32_t key_rev_lo = __byte_perm(key_lo, key_hi, selector_0);
22 |     uint32_t key_rev_hi = __byte_perm(key_lo, key_hi, selector_1);
23 | 
24 |     return ((uint64_t)key_rev_hi << 32) | key_rev_lo;
25 | #else
26 |     uint64_t input_rev = 0;
27 |     uint64_t input_rev_byte = 0;
28 |     for (uint8_t i = 0; i < 64; i += 8)
29 |     {
30 |         input_rev_byte = (uint8_t)(input >> i);
31 |         input_rev = input_rev | input_rev_byte << (56 - i);
32 |     }
33 | 
34 |     return input_rev;
35 | #endif
36 | }
37 | 
38 | // Reverses amount of bits in @input
39 | __device__ __host__ __forceinline__ uint64_t rev_bits(uint64_t input, uint8_t rev_bit_count)
40 | {
41 |     uint64_t reverse_key = 0;
42 |     for (uint8_t i = 0; i < rev_bit_count; i++)
43 |     {
44 |         reverse_key = reverse_key << 1 | ((input >> i) & 1);
45 |     }
46 |     return reverse_key;
47 | }
48 | }


--------------------------------------------------------------------------------
/src/device/cuda_context.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <device_launch_parameters.h>
 6 | 
 7 | 
 8 | // For loop macro
 9 | // indexer @i Will be incremented by amount of all threads in the context
10 | #define CUDA_FOR_THREAD_ID(ctx, i, count) for(uint32_t i = ctx.thread_id; i < count; i += ctx.thread_max)
11 | 
12 | // Custom struct of CUDA thread execution context
13 | struct CudaContext
14 | {
15 |     // Maximum overall threads
16 |     uint32_t thread_max;
17 | 
18 |     // Global thread id
19 |     uint32_t thread_id;
20 | 
21 |     __host__ __device__ static inline CudaContext Get()
22 |     {
23 |     #if __CUDA_ARCH__
24 |         return CudaContext
25 |         {
26 |             gridDim.x * blockDim.x,
27 |             blockIdx.x * blockDim.x + threadIdx.x
28 |         };
29 |     #else
30 |         assert(false && "CUDA context is not available on host");
31 |         return {};
32 |     #endif
33 |     }
34 | };


--------------------------------------------------------------------------------
/src/device/cuda_double_array.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | /**
 8 |  *  This is convenience wrapper allows for easier array management and
 9 |  * copying between CPU and GPU (and vice versa).
10 |  *
11 |  * WARNING:
12 |  *	If this object is data owner (constructed without input data) - it will free memory in destructor
13 |  *	If this object is just pointer container - will not do anything destructive with pointers in destructor
14 |  */
15 | template<typename T>
16 | struct DoubleArray
17 | {
18 | 	using TCUDAPtr = T*;
19 | 	using THOSTPtr = T*;
20 | 
21 | 	THOSTPtr HOST_mem;
22 | 	TCUDAPtr CUDA_mem;
23 | 
24 | 	size_t size;
25 | 
26 | 	DoubleArray(size_t num, bool zeros = true)
27 | 		: hostOwner(true)
28 | 	{
29 | 		size = sizeof(T) * num;
30 | 		HOST_mem = (T*)malloc(size);
31 | 
32 | 		uint32_t error = cudaMalloc(&CUDA_mem, size);
33 | 		CUDA_CHECK(error);
34 | 
35 | 		if (zeros)
36 | 		{
37 | 			memset(HOST_mem, 0, size);
38 | 			cudaMemset(CUDA_mem, 0, size);
39 | 		}
40 | 	}
41 | 
42 | 	DoubleArray(THOSTPtr array, size_t num)
43 | 		: hostOwner(false)
44 | 	{
45 | 		HOST_mem = array;
46 | 		size = sizeof(T) * num;
47 | 
48 | 		cudaError error = cudaMalloc((void**) & CUDA_mem, size);
49 | 		CUDA_CHECK(error);
50 | 
51 | 		error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice);
52 | 		CUDA_CHECK(error);
53 | 	}
54 | 
55 | 	~DoubleArray()
56 | 	{
57 | 		if (CUDA_mem)
58 | 		{
59 | 			cudaFree(CUDA_mem);
60 | 			CUDA_mem = nullptr;
61 | 		}
62 | 
63 | 		if (HOST_mem && hostOwner)
64 | 		{
65 | 			free(HOST_mem);
66 | 			HOST_mem = nullptr;
67 | 		}
68 | 	}
69 | 
70 | 	void write_GPU()
71 | 	{
72 | 		uint32_t error = cudaMemcpy(CUDA_mem, HOST_mem, size, cudaMemcpyHostToDevice);
73 | 		CUDA_CHECK(error);
74 | 	}
75 | 
76 | 	void read_GPU()
77 | 	{
78 | 		uint32_t error = cudaMemcpy(HOST_mem, CUDA_mem, size, cudaMemcpyDeviceToHost);
79 | 		CUDA_CHECK(error);
80 | 	}
81 | 
82 | private:
83 | 	bool hostOwner;
84 | };


--------------------------------------------------------------------------------
/src/device/cuda_object.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <cuda_runtime_api.h>
  6 | 
  7 | 
  8 | template<typename TTarget>
  9 | struct CudaObject
 10 | {
 11 | 	TTarget* CUDA_Ptr;
 12 | 	TTarget* HOST_Ptr;
 13 | 
 14 | 	CudaObject(TTarget* source)
 15 | 	{
 16 | 		CUDA_Ptr = nullptr;
 17 | 		HOST_Ptr = source;
 18 | 	}
 19 | 	CudaObject(CudaObject<TTarget>&& other) = delete;
 20 | 
 21 | 	CudaObject(const CudaObject<TTarget>& other) = delete;
 22 | 	CudaObject<TTarget>& operator =(const CudaObject<TTarget>& other) = delete;
 23 | 
 24 | 	~CudaObject()
 25 | 	{
 26 | 		if (CUDA_Ptr)
 27 | 		{
 28 | 			uint32_t error = cudaFree(CUDA_Ptr);
 29 | 			CUDA_CHECK(error);
 30 | 			CUDA_Ptr = nullptr;
 31 | 		}
 32 | 
 33 | 		HOST_Ptr = nullptr;
 34 | 	}
 35 | 
 36 | 	TTarget* ptr(bool sync = true)
 37 | 	{
 38 | 		if (HOST_Ptr == nullptr)
 39 | 		{
 40 | 			assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU");
 41 | 			return nullptr;
 42 | 		}
 43 | 
 44 | 		if (CUDA_Ptr == nullptr)
 45 | 		{
 46 | 			auto error = cudaMalloc((void**)&CUDA_Ptr, sizeof(TTarget));
 47 | 			CUDA_CHECK(error);
 48 | 		}
 49 | 
 50 | 		if (CUDA_Ptr && sync)
 51 | 		{
 52 | 			auto error = cudaMemcpy(CUDA_Ptr, HOST_Ptr, sizeof(TTarget), cudaMemcpyHostToDevice);
 53 | 			CUDA_CHECK(error);
 54 | 		}
 55 | 
 56 | 		return CUDA_Ptr;
 57 | 	}
 58 | 
 59 | 	void read()
 60 | 	{
 61 | 		if (HOST_Ptr == nullptr)
 62 | 		{
 63 | 			assert(false && "OBJECT IS NO LONGER CAN BE USED IN GPU");
 64 | 			return;
 65 | 		}
 66 | 
 67 | 		if (CUDA_Ptr)
 68 | 		{
 69 | 			auto error = cudaMemcpy(HOST_Ptr, CUDA_Ptr, sizeof(TTarget), cudaMemcpyDeviceToHost);
 70 | 			CUDA_CHECK(error);
 71 | 		}
 72 | 	}
 73 | };
 74 | 
 75 | // Self owned GPU object
 76 | template<typename T>
 77 | struct TGenericGpuObject
 78 | {
 79 | 	using TCudaPtr = T*;
 80 | 
 81 | 	TGenericGpuObject(T* Self) : SelfGpu(Self) {
 82 | 	}
 83 | 
 84 | 	TGenericGpuObject(const TGenericGpuObject<T>& other) = delete;
 85 | 	TGenericGpuObject<T>& operator =(const TGenericGpuObject<T>& other) = delete;
 86 | 
 87 | 	virtual TCudaPtr ptr()
 88 | 	{
 89 | 		return SelfGpu.ptr();
 90 | 	}
 91 | 
 92 | 	void read()
 93 | 	{
 94 | 		SelfGpu.read();
 95 | 	}
 96 | 
 97 | protected:
 98 | 
 99 | 	CudaObject<T> SelfGpu;
100 | };


--------------------------------------------------------------------------------
/src/device/cuda_span.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <type_traits>
 6 | #include <cuda_runtime_api.h>
 7 | 
 8 | 
 9 | /**
10 |  *  Something like C# span
11 |  * Memory view
12 |  */
13 | template<typename T>
14 | struct Span
15 | {
16 |     __host__ __device__ Span(T* ptr, uint32_t num) : data(ptr), size(num) { }
17 | 
18 |     __host__ __device__ inline T& operator[](uint32_t index)
19 |     {
20 |         assert(index < size && "Index is out of range in Span");
21 | 
22 | #if __CUDA_ARCH__
23 |         return __ldca(&data[index]);
24 | #else
25 |         return data[index];
26 | #endif
27 |     }
28 | 
29 |     __host__ __device__ inline const T& operator[](uint32_t index) const
30 |     {
31 |         assert(index < size && "Index is out of range in Span");
32 | #if __CUDA_ARCH__
33 |         return __ldca(&data[index]);
34 | #else
35 |         return data[index];
36 | #endif
37 |     }
38 | 
39 |     // Number of elements in Span
40 |     __host__ __device__ inline uint32_t num() const { return size; }
41 | 
42 |     // fall back if T is not simple type
43 |     __host__ __device__ typename std::enable_if<!std::is_integral_v<T>, T&>::type __ldca(T* ptr) { return *ptr; }
44 |     __host__ __device__ typename std::enable_if<!std::is_integral_v<T>, const T&>::type __ldca(T* ptr) const { return *ptr; }
45 | 
46 | private:
47 | 
48 |     T* data;
49 | 
50 |     uint32_t size;
51 | };


--------------------------------------------------------------------------------
/src/device/cuda_vector.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "device/cuda_array.h"
 6 | 
 7 | #include <vector>
 8 | #include <cuda_runtime_api.h>
 9 | 
10 | 
11 | /**
12 |  *  Host owned, GPU copy, vector
13 |  * Can be considered as wrapper around host vector
14 |  * Owns GPU data, frees on destructor
15 |  */
16 | template<typename T>
17 | struct CudaVector
18 | {
19 |     // Explicit construct using initializer list
20 |     CudaVector(std::initializer_list<T>&& initializer) :
21 |         cpu_vector(std::move(initializer)),
22 |         gpu_array(nullptr)
23 |     {
24 |     }
25 | 
26 |     // Forward construction to vector
27 |     template<typename ... Args>
28 |     CudaVector(Args&&... args) :
29 |         cpu_vector(std::forward<Args>(args)...),
30 |         gpu_array(nullptr)
31 |     {
32 |     }
33 | 
34 |     ~CudaVector();
35 | 
36 |     // CPU memory is immutable since it should be
37 |     // synchronized with gpu
38 |     const std::vector<T>& cpu() const { return cpu_vector; }
39 | 
40 |     // Size of CPU vector (gpu will be the same size once allocated)
41 |     const size_t size() const { return cpu_vector.size(); }
42 | 
43 |     // Reads GPU pointer and copies data from GPU memory to CPU
44 |     void read();
45 | 
46 |     //  Pointer to GPU array
47 |     // Will allocate (and copy) by default
48 |     CudaArray<T>* gpu(bool allocate = true);
49 | 
50 | private:
51 | 
52 |     std::vector<T> cpu_vector;
53 | 
54 |     CudaArray<T>* gpu_array;
55 | };
56 | 
57 | template<typename T> void CudaVector<T>::read()
58 | {
59 |     assert(gpu_array);
60 |     if (gpu_array)
61 |     {
62 |         gpu_array->copy(cpu_vector);
63 |     }
64 | }
65 | 
66 | template<typename T> CudaArray<T>* CudaVector<T>::gpu(bool allocate /*= true*/)
67 | {
68 |     if (gpu_array || !allocate)
69 |     {
70 |         return gpu_array;
71 |     }
72 | 
73 |     gpu_array = CudaArray<T>::allocate(cpu_vector);
74 |     return gpu_array;
75 | }
76 | 
77 | template<typename T> CudaVector<T>::~CudaVector()
78 | {
79 |     if (gpu_array != nullptr)
80 |     {
81 |         gpu_array->free();
82 |         gpu_array = nullptr;
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/host/command_line_args.cpp:
--------------------------------------------------------------------------------
  1 | #include "command_line_args.h"
  2 | 
  3 | #include <cuda_runtime_api.h>
  4 | 
  5 | #include "host/host_utils.h"
  6 | #include "host/console.h"
  7 | 
  8 | //#define CXXOPTS_NO_EXCEPTIONS
  9 | #include "cxxopts/include/cxxopts.hpp"
 10 | 
 11 | 
 12 | namespace
 13 | {
 14 | 
 15 | inline void read_alphabets(CommandLineArgs& target, cxxopts::ParseResult& result)
 16 | {
 17 |     if (result.count(ARG_ALPHABET) == 0)
 18 |     {
 19 |         // no alphabets available
 20 |         return;
 21 |     }
 22 | 
 23 |     const auto& alphabet_args = result[ARG_ALPHABET].as<std::vector<std::string>>();
 24 | 
 25 |     for (const auto& alphabet_arg : alphabet_args)
 26 |     {
 27 |         auto alphabet_bytes = host::utils::read_alphabet_binary_file(alphabet_arg.c_str());
 28 |         if (alphabet_bytes.size() > 0)
 29 |         {
 30 |             target.alphabets.emplace_back(alphabet_bytes);
 31 |         }
 32 |         else
 33 |         {
 34 |             // "61:ab:00:33..." -> "61,ab,00,33..."
 35 |             std::string alphabet_bytes_hex = alphabet_arg;
 36 |             std::replace(alphabet_bytes_hex.begin(), alphabet_bytes_hex.end(), ':', ',');
 37 | 
 38 |             // ["61","ab","00","33"]
 39 |             std::vector<std::string> alphabet_hex;
 40 |             cxxopts::values::parse_value(alphabet_bytes_hex, alphabet_hex);
 41 | 
 42 |             // ["61","ab","00","33"] -> [0x61, 0xAB, 0x00, 0x33]
 43 |             std::vector<uint8_t> alphabet_bytes;
 44 |             alphabet_bytes.reserve(alphabet_hex.size());
 45 | 
 46 |             for (const auto& hex : alphabet_hex)
 47 |             {
 48 |                 auto value = (uint8_t)strtoul(hex.c_str(), nullptr, 16);
 49 |                 if (value != 0 && hex != "00")
 50 |                 {
 51 |                     alphabet_bytes.push_back(value);
 52 |                 }
 53 |                 else
 54 |                 {
 55 |                     printf("Error: cannot parse alphabet byte '%s'!\n", hex.c_str());
 56 |                 }
 57 |             }
 58 | 
 59 |             //
 60 |             if (alphabet_bytes.size() > 0)
 61 |             {
 62 |                 target.alphabets.emplace_back(alphabet_bytes);
 63 | 
 64 |                 if (alphabet_bytes.size() > 256)
 65 |                 {
 66 |                     printf("Warning: Alphabet: '%s' has to much bytes in hex string: %zd (should be less than 257)\n",
 67 |                         result[ARG_ALPHABET].as<std::string>().c_str(), alphabet_bytes.size());
 68 |                 }
 69 |             }
 70 |             else
 71 |             {
 72 |                 printf("Error: Alphabet: '%s' is not valid file, neither valid alphabet hex string (like: AA:11:b3...)!\n",
 73 |                     alphabet_arg.c_str());
 74 |             }
 75 |         }
 76 |     }
 77 | }
 78 | 
 79 | inline void parse_dictionary_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
 80 | {
 81 |     if (result[ARG_WORDDICT].count() > 0)
 82 |     {
 83 |         auto dict = result[ARG_WORDDICT].as<std::vector<std::string>>();
 84 | 
 85 |         std::vector<Decryptor> decryptors;
 86 | 
 87 |         for (const auto& dict_arg : dict)
 88 |         {
 89 |             std::vector<Decryptor> from_file = host::utils::read_word_dictionary_file(dict_arg.c_str());
 90 | 
 91 |             if (from_file.size() > 0)
 92 |             {
 93 |                 decryptors.insert(decryptors.end(), from_file.begin(), from_file.end());
 94 |             }
 95 |             else if (auto key = strtoull(dict_arg.c_str(), nullptr, 16))
 96 |             {
 97 |                 decryptors.push_back(Decryptor(key, 0));
 98 |             }
 99 |             else
100 |             {
101 |                 printf("Error: invalid param passed to '--%s' argument: '%s' not a dictionary file neither word\n", ARG_WORDDICT, dict_arg.c_str());
102 |             }
103 |         }
104 | 
105 |         if (decryptors.size() > 0)
106 |         {
107 |             target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors)));
108 |         }
109 |     }
110 | 
111 |     if (result[ARG_BINDICT].count() > 0)
112 |     {
113 |         auto seed = result[ARG_SEED].as<uint32_t>();
114 | 
115 |         auto dicts = result[ARG_BINDICT].as<std::vector<std::string>>();
116 |         auto mode = result[ARG_BINDMODE].as<uint8_t>();
117 | 
118 |         std::vector<Decryptor> decryptors;
119 | 
120 |         for (const auto& bin_dict_path : dicts)
121 |         {
122 |             std::vector<Decryptor> decryptors = host::utils::read_binary_dictionary_file(bin_dict_path.c_str(), mode, seed);
123 | 
124 |             if (decryptors.size() > 0)
125 |             {
126 |                 target.brute_configs.push_back(BruteforceConfig::GetDictionary(std::move(decryptors)));
127 |             }
128 |             else
129 |             {
130 |                 printf("Error: invalid param passed to '--%s' argument: '%s'. Invalid file!\n",
131 |                     ARG_BINDICT, bin_dict_path.c_str());
132 |             }
133 |         }
134 | 
135 |     }
136 | }
137 | 
138 | inline void parse_bruteforce_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
139 | {
140 |     auto start_key = result[ARG_START].as<uint64_t>();
141 |     auto seed = result[ARG_SEED].as<uint32_t>();
142 |     Decryptor first_decryptor(start_key, seed);
143 | 
144 |     auto count_key = result[ARG_COUNT].as<size_t>();
145 | 
146 |     target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key));
147 | }
148 | 
149 | inline void parse_seed_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
150 | {
151 |     if (result.count(ARG_START) == 0)
152 |     {
153 |         printf("Error: For seed mode, it's necessary to specify a manufacturer key with '--" ARG_START "' argument!\n");
154 |         return;
155 |     }
156 | 
157 |     auto start_key = result[ARG_START].as<uint64_t>();
158 |     auto seed = result[ARG_SEED].as<uint32_t>();
159 |     Decryptor first_decryptor(start_key, seed);
160 | 
161 |     target.brute_configs.push_back(BruteforceConfig::GetSeedBruteforce(first_decryptor));
162 | }
163 | 
164 | inline void parse_bruteforce_filtered_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
165 | {
166 |     auto start_key = result[ARG_START].as<uint64_t>();
167 |     auto seed = result[ARG_SEED].as<uint32_t>();
168 |     Decryptor first_decryptor(start_key, seed);
169 | 
170 |     auto count_key = result[ARG_COUNT].as<size_t>();
171 | 
172 |     auto include_filter = result[ARG_IFILTER].as<BruteforceFilters::Flags::Type>();
173 |     auto exclude_filter = result[ARG_EFILTER].as<BruteforceFilters::Flags::Type>();
174 | 
175 |     BruteforceFilters filters
176 |     {
177 |         include_filter,
178 |         exclude_filter,
179 |     };
180 | 
181 |     target.brute_configs.push_back(BruteforceConfig::GetBruteforce(first_decryptor, count_key, filters));
182 | }
183 | 
184 | inline void parse_alphabet_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
185 | {
186 |     auto start_key = result[ARG_START].as<uint64_t>();
187 |     auto seed = result[ARG_SEED].as<uint32_t>();
188 |     Decryptor first_decryptor(start_key, seed);
189 | 
190 |     auto count_key = result[ARG_COUNT].as<size_t>();
191 | 
192 |     for (const auto& alphabet : target.alphabets)
193 |     {
194 |         target.brute_configs.push_back(BruteforceConfig::GetAlphabet(first_decryptor, alphabet, count_key));
195 |     }
196 | }
197 | 
198 | inline void parse_pattern_mode(CommandLineArgs& target, cxxopts::ParseResult& result)
199 | {
200 |     auto start_key = result[ARG_START].as<uint64_t>();
201 |     auto seed = result[ARG_SEED].as<uint32_t>();
202 |     Decryptor first_decryptor(start_key, seed);
203 | 
204 |     auto count_key = result[ARG_COUNT].as<size_t>();
205 | 
206 |     const auto& args = result[ARG_PATTERN].as<std::vector<std::string>>();
207 | 
208 |     auto full_bytes = DefaultByteArray<>::as_vector<std::vector<uint8_t>>();
209 | 
210 |     for (const auto& pattern_arg : args)
211 |     {
212 |         std::string pattern_bytes_hex = pattern_arg;
213 |         std::replace(pattern_bytes_hex.begin(), pattern_bytes_hex.end(), ':', ',');
214 | 
215 |         std::vector<std::string> bytes_hex;
216 |         cxxopts::values::parse_value(pattern_bytes_hex, bytes_hex);
217 | 
218 | 
219 |         std::vector<std::vector<uint8_t>> result;
220 |         for (auto& hex : bytes_hex)
221 |         {
222 |             // append alphabets' bytes
223 |             if (hex.find("AL") != std::string::npos)
224 |             {
225 |                 if (target.alphabets.size() == 0)
226 |                 {
227 |                     printf("ERROR: Cannot use Alphabet in patterns - no provided alphabets. Replacing with *\n");
228 |                     result.push_back(full_bytes);
229 |                     continue;
230 |                 }
231 | 
232 |                 auto al_index = strtoul(hex.substr(2).c_str(), nullptr, 10);
233 |                 if (al_index >= target.alphabets.size())
234 |                 {
235 |                     printf("ERROR: Argument %s referring alphabet: %ld (index: %ld), but there are only %zd available. "
236 |                         "Replacing with first one\n", hex.c_str(), al_index + 1, al_index, target.alphabets.size());
237 |                     al_index = 0;
238 |                 }
239 | 
240 |                 result.push_back(target.alphabets[al_index].as_vector());
241 |             }
242 |             else
243 |             {
244 |                 // append regular pattern bytes
245 |                 auto bytes = BruteforcePattern::ParseBytes(hex);
246 | 
247 |                 if (bytes.size() == 0)
248 |                 {
249 |                     printf("ERROR: Invalid string '%s' for byte pattern! Ignoring. Replacing with *\n", hex.c_str());
250 |                     result.push_back(full_bytes);
251 |                 }
252 |                 else
253 |                 {
254 |                     result.push_back(bytes);
255 |                 }
256 |             }
257 |         }
258 | 
259 |         // validate
260 |         if (result.size() > 8)
261 |         {
262 |             printf("Warning: Pattern string: '%s' contains more than 8 per-bytes delimiters (%zd) other will be ignored.\n",
263 |                 pattern_arg.c_str(), bytes_hex.size());
264 |         }
265 |         else if (result.size() < 8)
266 |         {
267 |             printf("Warning: Pattern string: '%s' contains less than 8 per-bytes delimiters (%zd) other fill with full pattern.\n",
268 |                 pattern_arg.c_str(), bytes_hex.size());
269 | 
270 |             for (auto i = result.size(); i < 8; ++i)
271 |             {
272 |                 result.push_back(full_bytes);
273 |             }
274 |         }
275 | 
276 | 
277 |         // reverse bytes
278 |         std::reverse(result.begin(), result.end());
279 | 
280 |         // Add pattern attack to config
281 |         target.brute_configs.push_back(BruteforceConfig::GetPattern(first_decryptor, BruteforcePattern(std::move(result), pattern_arg), count_key));
282 |     }
283 | }
284 | 
285 | constexpr const char* Usage()
286 | {
287 |     return ""
288 |         "\nExample:\n"
289 |         "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
290 |         " --" ARG_MODE "=1 --" ARG_START "=0x9876543210 --" ARG_COUNT "=1000000"
291 |         "\n\n\tThis will launch simple bruteforce (+1) attack with 1 million checks from 0x9876543210. "
292 |         "Will be checked ALL 16 (12 if no seed specified) keeloq learning types"
293 | 
294 |         "\nExample:\n"
295 |         "\t./" APP_NAME " --" ARG_INPUTS " xxx,yy,zzz"
296 |         " --" ARG_MODE "=3 --" ARG_LTYPE "=0 --" ARG_ALPHABET "=examples/alphabet.bin,10:20:30:AA:BB:CC:DD:EE:FF:02:33"
297 |         "\n\n\tThis will launch 2 alphabets attacks for all possible combinations for SIMPLE learning Keeloq type. "
298 |         "First alphabet will be taken from file, second - parsed from inputs."
299 | 
300 |         "\nExample:\n"
301 |         "\t./" APP_NAME " --"  ARG_INPUTS " xxx,yy,zzz"
302 |         " --" ARG_MODE "=4 --" ARG_LTYPE "=2 --" ARG_ALPHABET "=examples/alphabet.bin --" ARG_PATTERN "=AL0:11:AB|BC:*:00-44:AL0:AA-FF:01"
303 |         "\n\n\tThis will launch pattern attacks with NORMAL keeloq learning type."
304 |         "\n\tPattern applied 'as is' - big endian. The highest byte (0xXX.......) will be taken from 1st alphabet."
305 |         "\n\tNext byte (0x..XX....) will be exact `0x11`."
306 |         "\n\tNext byte (0x....XX..) will be `0xAB` or `0xBC`.\n"
307 | 
308 |         "\nExample:\n"
309 |         "\t./" APP_NAME " --"  ARG_INPUTS " xxx,yy,zzz"
310 |         " --" ARG_MODE "=5 --" ARG_START "=0xAABBCCDDEEFF"
311 |         "\n\n\tThis will launch seed bruteforce attack for all seed learning types. "
312 |         "\n\tSpecifying '--" ARG_LTYPE "=a,b,c' will narrow learning types to provided ones."
313 |         ;
314 | }
315 | 
316 | }
317 | 
318 | CommandLineArgs CommandLineArgs::parse(int argc, const char** argv)
319 | {
320 |     cxxopts::Options options(APP_NAME, R"(
321 | 
322 |   _______  _____  ___     __ __        __
323 |  / ___/ / / / _ \/ _ |   / //_/__ ___ / /  ___  ___ _
324 | / /__/ /_/ / // / __ |  / ,< / -_) -_) /__/ _ \/ _ `/
325 | \___/\____/____/_/ |_| /_/|_|\__/\__/____/\___/\_, /
326 |                                                 /_/
327 |    ___           __      ___
328 |   / _ )______ __/ /____ / _/__  ___________ ____
329 |  / _  / __/ // / __/ -_) _/ _ \/ __/ __/ -_) __/
330 | /____/_/  \_,_/\__/\__/_/ \___/_/  \__/\__/_/
331 | 
332 | )");
333 |     options.set_width(::console::get_width())
334 |         .allow_unrecognised_options()
335 |         .add_options()
336 |         ("h," ARG_HELP, "Prints this help")
337 | 
338 |         // What to bruteforce
339 |         (ARG_INPUTS, "Comma separated uint64 values (it's better to have 3)",
340 |             cxxopts::value<std::vector<uint64_t>>(), "[k1, k1, k3]")
341 | 
342 |         // CUDA Setup
343 |         (ARG_BLOCKS, "How many thread blocks to launch.",
344 |             cxxopts::value<uint16_t>()->default_value("32"), "<num>")
345 |         (ARG_THREADS, "How many threads will be launched in a block (if 0 - will use value from device).",
346 |             cxxopts::value<uint16_t>()->default_value("0"), "<num>")
347 | 
348 | #ifndef NO_INNER_LOOPS
349 |         (ARG_LOOPS, "How many loop iterations will one thread perform (keep it low).",
350 |             cxxopts::value<uint16_t>()->default_value("2"), "<num>")
351 | #endif
352 | 
353 |         // Mode - what bruteforce type will be used
354 |         (ARG_MODE,
355 |             "Bruteforce modes (comma separated):"
356 |             "\n\t0: - Dictionary."
357 |             "\n\t1: - Simple +1."
358 |             "\n\t2: - Simple +1 with filters."
359 |             "\n\t3: - Alphabet. Bruteforce +1 using only specified bytes."
360 |             "\n\t4: - Pattern. Bruteforce with bytes selected by specified pattern."
361 |             "\n\t5: - Seed. Bruteforce only seed with provided manufacturer key (applied only to algorithms with seed).",
362 |             cxxopts::value<std::vector<uint8_t>>(), "[m1,m2..]")
363 |         (ARG_LTYPE,
364 |             "Specific learning type (if you know your target well). Increases approximately x16 times (since doesn't calculate other types)"
365 |             "\n\tV+1 means with reverse key (There are also more types. see source code):"
366 |             "\n\t0: - Simple"
367 |             "\n\t2: - Normal"
368 |             "\n\t4: - Secure"
369 |             "\n\t6: - Xor"
370 |             "\nALL",
371 |             cxxopts::value<std::vector<uint8_t>>()->default_value(KeeloqLearningType::ValueString(KeeloqLearningType::LAST)), "<type>")
372 | 
373 |         // Dictionaries files
374 |         (ARG_WORDDICT, "Word dictionary file(s) or word(s) - contains hexadecimal strings which will be used as keys. e.g: 0xaabb1122 FFbb9800121212",
375 |             cxxopts::value<std::vector<std::string>>(), "[f1,w1,...]")
376 |         (ARG_BINDICT, "Binary dictionary file(s) - each 8 bytes of the file will be used as key (do not check duplicates or zeros)",
377 |             cxxopts::value<std::vector<std::string>>(), "[b1,b2,...]")
378 |         (ARG_BINDMODE, "Byte order mode for binary dictionary. 0 - as is. 1 - reverse, 2 - add both",
379 |             cxxopts::value<uint8_t>()->default_value("0"), "<mode>")
380 | 
381 |         // Common (Bruteforce, Alphabet) - set start and end of execution
382 |         (ARG_START, "The first key value which will be used for selected mode(s)",
383 |             cxxopts::value<std::uint64_t>()->default_value("0"), "<value>")
384 |         (ARG_SEED, "The seed which is used for bruteforce. If you specify it, most probably you need to check seed-only learning types (SECURE, FAAC)",
385 |             cxxopts::value<std::uint32_t>()->default_value("0"), "<value>")
386 |         (ARG_COUNT, "How many keys selected mode(s) should check.",
387 |             cxxopts::value<std::uint64_t>()->default_value("0xFFFFFFFFFFFFFFFF"), "<value>")
388 | 
389 |         // Alphabet
390 |         (ARG_ALPHABET, "Alphabet binary file(s) or alphabet hex string(s) (like: AA:61:62:bb)",
391 |             cxxopts::value<std::vector<std::string>>(), "[f1,a1,...]")
392 | 
393 |         // Pattern
394 |         (ARG_PATTERN, "Pattern file (or pattern itself) - contains comma separated patterns like: AL1:0A:0x10-0x32:*:33|44|FA:FF\n"
395 |             "Pattern is in big endian. That means first byte in patter is highest byte (e.g. 01:.... equals key 0x01......)\n"
396 |             "Each byte in pattern separated by `:`, pattern types:\n"
397 |             "\tAL[0-N]   - alphabet N (index in " ARG_ALPHABET " )\n"
398 |             "\t0A        - constant. might be any byte as hex string\n"
399 |             "\t0x10-0x32 - range. bytes from first to second (including)\n"
400 |             "\t*         - any byte\n"
401 |             "\t33|44|FA  - exact 3 bytes",
402 |             cxxopts::value<std::vector<std::string>>(), "[f1,p1,...]")
403 | 
404 |         // Bruteforce filters
405 |         (ARG_EFILTER, "Exclude filter: key matching this filters will not be used in bruteforce.",
406 |             cxxopts::value<std::uint64_t>()->default_value("0"), "<value>")
407 |         (ARG_IFILTER, "Include filter: only keys matching this filters will be used in bruteforce. (WARNING: may be EXTREMELY heavy to compute)",
408 |             cxxopts::value<std::uint64_t>()->default_value("0xFFFFFFFFFFFFFFFF"), "<value>")
409 | 
410 |         // Stop config
411 |         (ARG_FMATCH, "Boolean. Stop bruteforce on first match. If inputs are 3+ probably should set to true",
412 |             cxxopts::value<bool>()->default_value("true"))
413 | 
414 |         // Tests run
415 |         (ARG_TEST, "Boolean. Run application tests. You'd better use them in debug.",
416 |             cxxopts::value<bool>()->default_value("false"))
417 | 
418 |         // Benchmarks run
419 |         (ARG_BENCHMARK, "Boolean. Run application benchmarks. You can specify learning and num loops type from command line also.",
420 |             cxxopts::value<bool>()->default_value("false"))
421 |         ;
422 | 
423 |     CommandLineArgs args;
424 | 
425 |     auto result = options.parse(argc, argv);
426 | 
427 |     // tests
428 |     args.run_tests = result[ARG_TEST].as<bool>();
429 | 
430 |     // benchmarks
431 |     args.run_bench = result[ARG_BENCHMARK].as<bool>();
432 | 
433 |     // CUDA setup
434 |     args.init_cuda(result[ARG_BLOCKS].as<uint16_t>(), result[ARG_THREADS].as<uint16_t>(),
435 |         result.count(ARG_LOOPS) > 0 ? result[ARG_LOOPS].as<uint16_t>() : 0);
436 | 
437 |     if (result.count(ARG_HELP) || result.arguments().size() == 0 || result.count(ARG_INPUTS) == 0)
438 |     {
439 |         if (!args.run_tests && !args.run_bench)
440 |         {
441 |             printf("\n%s\n", options.help().c_str());
442 |             printf("%s\n", Usage());
443 |         }
444 |         return args;
445 |     }
446 | 
447 |     // Inputs
448 |     if (result.count(ARG_INPUTS) > 0)
449 |     {
450 |         args.init_inputs(result[ARG_INPUTS].as<std::vector<uint64_t>>());
451 |         if (args.inputs.size() < 3)
452 |         {
453 |             printf("WARNING: No enough inputs: '%zd'! Need at least 3!\nHowever we'll proceed...\n", args.inputs.size());
454 |         }
455 |     }
456 |     else
457 |     {
458 |         printf("Error: No inputs! Nothing to brute!\n%s\n", options.help().c_str());
459 |         return args;
460 |     }
461 | 
462 |     // Stop if need
463 |     args.match_stop = result[ARG_FMATCH].as<bool>();
464 | 
465 |     // Alphabets
466 |     read_alphabets(args, result);
467 | 
468 |     // Bruteforce configs
469 |     if (result.count(ARG_MODE) > 0)
470 |     {
471 |         for (const auto& mode : result[ARG_MODE].as<std::vector<uint8_t>>())
472 |         {
473 |             switch (mode)
474 |             {
475 |             case (uint8_t)BruteforceType::Dictionary:
476 |                 parse_dictionary_mode(args, result);
477 |                 break;
478 |             case (uint8_t)BruteforceType::Simple:
479 |                 parse_bruteforce_mode(args, result);
480 |                 break;
481 |             case (uint8_t)BruteforceType::Filtered:
482 |                 parse_bruteforce_filtered_mode(args, result);
483 |                 break;
484 |             case (uint8_t)BruteforceType::Alphabet:
485 |                 parse_alphabet_mode(args, result);
486 |                 break;
487 |             case (uint8_t)BruteforceType::Pattern:
488 |                 parse_pattern_mode(args, result);
489 |                 break;
490 |             case (uint8_t)BruteforceType::Seed:
491 |                 parse_seed_mode(args, result);
492 |                 break;
493 |             default:
494 |                 break;
495 |             }
496 |         }
497 | 
498 |         if (args.brute_configs.size() == 0)
499 |         {
500 |             printf("Error: Cannot parse inputs to even single brute config! Result arguments:\n'%s'\n",
501 |                 result.arguments_string().c_str());
502 |         }
503 |     }
504 |     else
505 |     {
506 |         printf("Error: you need to specify bruteforce mode!\n%s\n",
507 |             options.help().c_str());
508 |     }
509 | 
510 |     if (result.count(ARG_LTYPE) > 0)
511 |     {
512 |         auto learning_type_bytes = result[ARG_LTYPE].as<std::vector<uint8_t>>();
513 | 
514 |         args.selected_learning.clear();
515 |         for (auto value : learning_type_bytes)
516 |         {
517 |             if (value < KeeloqLearningType::LAST)
518 |             {
519 |                 args.selected_learning.push_back(value);
520 |             }
521 |         }
522 |     }
523 | 
524 |     return args;
525 | }
526 | 
527 | bool CommandLineArgs::can_bruteforce()
528 | {
529 | 	return inputs.size() > 0 && brute_configs.size() > 0;
530 | }
531 | 
532 | void CommandLineArgs::init_inputs(const std::vector<uint64_t>& inp)
533 |  {
534 | 	 inputs.reserve(inp.size());
535 |      for (uint64_t ota : inp)
536 |      {
537 |         inputs.push_back(EncParcel(ota));
538 |      }
539 |  }
540 | 
541 | void CommandLineArgs::init_cuda(uint16_t b, uint16_t t, uint16_t l)
542 |  {
543 | 	 cuda_blocks	= b;
544 | 	 assert(cuda_blocks < max_cuda_blocks() && "This GPU cannot use this much blocks!");
545 | 
546 | 	 cuda_threads	= t;
547 | 	 cuda_loops		= l;
548 | 
549 | 	 if (cuda_threads == 0)
550 | 	 {
551 | 		 cuda_threads = (uint16_t)max_cuda_threads();
552 | 	 }
553 |  }
554 | 
555 | uint32_t CommandLineArgs::max_cuda_threads()
556 |  {
557 | 	 cudaDeviceProp prop;
558 | 	 cudaGetDeviceProperties(&prop, 0);
559 | 
560 | 	 return prop.maxThreadsPerBlock;
561 |  }
562 | 
563 | uint32_t CommandLineArgs::max_cuda_blocks()
564 |  {
565 | 	 cudaDeviceProp prop;
566 | 	 cudaGetDeviceProperties(&prop, 0);
567 | 
568 | 	 return prop.maxGridSize[0];
569 |  }
570 | 


--------------------------------------------------------------------------------
/src/host/command_line_args.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <vector>
 6 | 
 7 | #include "algorithm/keeloq/keeloq_learning_types.h"
 8 | #include "algorithm/keeloq/keeloq_encrypted.h"
 9 | #include "bruteforce/bruteforce_config.h"
10 | 
11 | #define APP_NAME "CudaKeeloq"
12 | 
13 | #define ARG_HELP "help"
14 | #define ARG_TEST "test"
15 | #define ARG_BENCHMARK "benchmark"
16 | #define ARG_INPUTS "inputs"
17 | #define ARG_BLOCKS "cuda-blocks"
18 | #define ARG_THREADS "cuda-threads"
19 | #define ARG_LOOPS "cuda-loops"
20 | #define ARG_MODE "mode"
21 | #define ARG_LTYPE "learning-type"
22 | #define ARG_WORDDICT "word-dict"
23 | #define ARG_BINDICT "bin-dict"
24 | #define ARG_BINDMODE "bin-dict-mode"
25 | #define ARG_START "start"
26 | #define ARG_SEED "seed"
27 | #define ARG_COUNT "count"
28 | #define ARG_ALPHABET "alphabet"
29 | #define ARG_PATTERN "pattern"
30 | #define ARG_IFILTER "include-filter"
31 | #define ARG_EFILTER "exclude-filter"
32 | #define ARG_FMATCH "first-match"
33 | 
34 | 
35 | /**
36 |  *  Aggregated configuration of application
37 |  */
38 | struct CommandLineArgs
39 | {
40 |     // Input encrypted data (3 caught OTA values)
41 |     std::vector<EncParcel> inputs;
42 | 
43 |     // How brute will be performed (may be several iterations)
44 |     std::vector<BruteforceConfig> brute_configs;
45 | 
46 |     // Do not do all 16 calculations, use predefined one
47 |     std::vector<KeeloqLearningType::Type> selected_learning = {};
48 | 
49 |     //  Alphabets are just set of possible byte values
50 |     // this sets may be shared between attacks
51 |     std::vector<MultibaseDigit> alphabets;
52 | 
53 |     // Stop on first match
54 |     bool match_stop;
55 | 
56 |     // Cuda setup
57 |     uint16_t cuda_blocks;
58 |     uint16_t cuda_threads;
59 |     uint16_t cuda_loops;
60 | 
61 |     // run also tests
62 |     bool run_tests;
63 | 
64 |     // Run only benchmarks (with selected values)
65 |     bool run_bench;
66 | 
67 | public:
68 | 
69 |     // Parse from standard terminal way
70 |     static CommandLineArgs parse(int argc, const char** argv);
71 | 
72 | public:
73 |     // Checks if arguments enough for bruteforcing
74 |     bool can_bruteforce();
75 | 
76 |     // Init enc parcel collection with raw OTA values
77 |     void init_inputs(const std::vector<uint64_t>& inp);
78 | 
79 |     void init_cuda(uint16_t b, uint16_t t, uint16_t l);
80 | 
81 |     // Check device capabilities and returns maximum thread allowed for single block
82 |     static uint32_t max_cuda_threads();
83 | 
84 |     // Check device capabilities and returns maximum allowed number of blocks
85 |     static uint32_t max_cuda_blocks();
86 | };


--------------------------------------------------------------------------------
/src/host/console.cpp:
--------------------------------------------------------------------------------
 1 | #include "console.h"
 2 | 
 3 | #include <cstring>
 4 | 
 5 | #ifdef _MSC_VER
 6 |     #pragma warning(push)
 7 |     #pragma warning(disable: 4996)
 8 | #endif
 9 |     #include "cpp-terminal/terminal_base.h"
10 |     #include "cpp-terminal/terminal.h"
11 | #ifdef _MSC_VER
12 |     #pragma warning(pop)
13 | #endif
14 | 
15 | static Term::Terminal s_term = Term::Terminal(true, false);
16 | 
17 | 
18 | void console::progress_bar(double percent, const std::chrono::seconds& elapsed)
19 | {
20 |     constexpr auto progress_width = 80;
21 | 
22 |     static char progress_fill[progress_width] = { 0 };
23 |     static char progress_none[progress_width] = { 0 };
24 |     if (progress_fill[0] == 0)
25 |     {
26 |         std::memset(progress_fill, '=', sizeof(progress_fill));
27 |         std::memset(progress_none, '-', sizeof(progress_none));
28 |     }
29 | 
30 |     std::chrono::seconds eta = elapsed.count() > 0 ?
31 |         std::chrono::seconds((uint64_t)(elapsed.count() / percent)) - elapsed : std::chrono::seconds(0);
32 | 
33 |     printf("[%.*s>", (int)(progress_width * percent), progress_fill);
34 |     printf("%.*s]", (int)(progress_width * (1 - percent)), progress_none);
35 |     printf("%d%%  %02" PRId64 ":%02" PRId64 ":%02" PRId64 "   ETA:%02" PRId64 ":%02" PRId64 ":%02" PRId64 "   \n", (int)(percent * 100),
36 |         elapsed.count() / 3600, (elapsed.count() / 60) % 60, elapsed.count() % 60,
37 |         eta.count() / 3600, (eta.count() / 60) % 60, eta.count() % 60);
38 | }
39 | 
40 | void console::clear_line(int width /*= 0*/)
41 | {
42 |     int tWidth = 0;
43 |     int tHeight = 0;
44 | 
45 |     s_term.get_term_size(tHeight, tWidth);
46 |     printf("\r%*s", width > 0 ? width : (tWidth - 1), "");
47 |     printf("\r");
48 | }
49 | 
50 | 
51 | int console::read_esc_press()
52 | {
53 |     return s_term.read_key0() == Term::ESC;
54 | }
55 | 
56 | 
57 | void console::set_cursor_state(bool visible)
58 | {
59 |     s_term.write(visible ? Term::cursor_on() : Term::cursor_off());
60 | }
61 | 
62 | uint32_t console::get_width()
63 | {
64 |     int cols = CONSOLE_WIDTH;
65 |     int rows = 0;
66 | 
67 |     s_term.get_term_size(rows, cols);
68 | 
69 |     return cols;
70 | }
71 | 


--------------------------------------------------------------------------------
/src/host/console.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <chrono>
 7 | 
 8 | #include "host/command_line_args.h"
 9 | 
10 | constexpr char WAIT_SPIN[] = "|/-\\";
11 | #define WAIT_CHAR(i) (WAIT_SPIN[i % (sizeof(WAIT_SPIN) - 1)])
12 | 
13 | #define CONSOLE_WIDTH 160
14 | 
15 | #define console_clear() printf("\033[H\033[J")
16 | 
17 | #define console_cursor_up(lines) printf("\033[%dA", (lines))
18 | #define console_set_width(col) printf("\033[%du", (col))
19 | 
20 | #define console_cursor_ret_up(lines) printf("\033[%dF", (lines))
21 | #define console_set_cursor(x,y) printf("\033[%d;%dH", (y), (x))
22 | 
23 | #define save_cursor_pos() printf("\033[s")
24 | #define load_cursor_pos() printf("\033[u")
25 | 
26 | namespace console
27 | {
28 | 
29 | void progress_bar(double percent, const std::chrono::seconds& elapsed);
30 | 
31 | void clear_line(int width = 0);
32 | 
33 | int read_esc_press();
34 | 
35 | void set_cursor_state(bool visible);
36 | 
37 | uint32_t get_width();
38 | 
39 | }


--------------------------------------------------------------------------------
/src/host/host_utils.cpp:
--------------------------------------------------------------------------------
  1 | #define _CRT_SECURE_NO_WARNINGS
  2 | 
  3 | #include "common.h"
  4 | 
  5 | #include <stdlib.h>
  6 | #include <cstring>
  7 | 
  8 | #include "host_utils.h"
  9 | 
 10 | #include "algorithm/keeloq/keeloq_decryptor.h"
 11 | 
 12 | 
 13 | #ifndef _MSC_VER
 14 |     #include <byteswap.h>
 15 | #else
 16 |     #define bswap_64(x) _byteswap_uint64(x)
 17 | #endif
 18 | 
 19 | 
 20 | namespace
 21 | {
 22 | 
 23 | bool parse_manufactorer_key(char* key_str, uint64_t& key)
 24 | {
 25 |     if (!key_str)
 26 |     {
 27 |         return 0;
 28 |     }
 29 | 
 30 |     int base = 0;
 31 |     if (key_str[0] == '0' && (key_str[1] == 'b' || key_str[1] == 'B'))
 32 |     {
 33 |         key_str[0] = ' ';
 34 |         key_str[1] = ' ';
 35 |         base = 2;
 36 |     }
 37 | 
 38 |     if (auto parsed = strtoull(key_str, nullptr, base))
 39 |     {
 40 |         key = parsed;
 41 |         return true;
 42 |     }
 43 |     else
 44 |     {
 45 |         return false;
 46 |     }
 47 | }
 48 | 
 49 | bool parse_seed(char* seed_str, uint32_t& seed)
 50 | {
 51 |     if (!seed_str)
 52 |     {
 53 |         return 0;
 54 |     }
 55 | 
 56 |     int base = 10;
 57 | 
 58 |     if (auto parsed = strtoul(seed_str, nullptr, base))
 59 |     {
 60 |         seed = parsed;
 61 |         return true;
 62 |     }
 63 |     else
 64 |     {
 65 |         return false;
 66 |     }
 67 | }
 68 | 
 69 | }
 70 | 
 71 | 
 72 | 
 73 | std::vector<Decryptor> host::utils::read_word_dictionary_file(const char* file)
 74 | {
 75 |     std::vector<Decryptor> results;
 76 | 
 77 |     if (FILE* file_dict = fopen(file, "r"))
 78 |     {
 79 |         char line[256] = { 0 };
 80 |         char delim[2] = ":";
 81 | 
 82 |         while (fgets(line, sizeof(line), file_dict))
 83 |         {
 84 |             auto man_str = strtok(line, delim);
 85 |             if (man_str == nullptr)
 86 |             {
 87 |                 man_str = line;
 88 |             }
 89 | 
 90 |             uint64_t man = (uint64_t)0;
 91 |             uint32_t seed = (uint32_t)0;
 92 | 
 93 |             if (!parse_manufactorer_key(man_str, man))
 94 |             {
 95 |                 printf("Error: invalid line: `%s` in file: '%s'\n", line, file);
 96 |             }
 97 | 
 98 |             auto seed_str = strtok(NULL, delim);
 99 |             parse_seed(seed_str, seed);
100 | 
101 |             results.emplace_back(man, seed);
102 |         }
103 | 
104 |         fclose(file_dict);
105 |     }
106 | 
107 |     return results;
108 | }
109 | 
110 | std::vector<Decryptor> host::utils::read_binary_dictionary_file(const char* file, uint8_t mode, uint32_t seed)
111 | {
112 |     std::vector<Decryptor> decryptors;
113 | 
114 |     if (FILE* bin_file = fopen(file, "rb"))
115 |     {
116 |         uint8_t key[sizeof(uint64_t)] = { 0 };
117 | 
118 |         while (fread(key, sizeof(uint64_t), sizeof(uint8_t), bin_file))
119 |         {
120 |             uint64_t reversed = *(uint64_t*)key;
121 |             uint64_t as_is = bswap_64(reversed);
122 | 
123 |             uint64_t key = mode == 0 ? as_is : reversed;
124 | 
125 |             decryptors.push_back(Decryptor(key, seed));
126 |             if (mode == 2)
127 |             {
128 |                 // reversed already added above
129 |                 decryptors.push_back(Decryptor(as_is, seed));
130 |             }
131 |         }
132 | 
133 |         fclose(bin_file);
134 |     }
135 | 
136 |     return decryptors;
137 | }
138 | 
139 | std::vector<uint8_t> host::utils::read_alphabet_binary_file(const char* file)
140 | {
141 |     constexpr uint32_t MaxFileSize = 256;
142 | 
143 |     if (FILE* alphabet_file = fopen(file, "rb"))
144 |     {
145 |         // alphabet with more than 256 bytes is impossible (or just has duplicates)
146 |         uint8_t bytes[MaxFileSize];
147 |         size_t read_bytes = fread(bytes, sizeof(uint8_t), sizeof(bytes), alphabet_file);
148 | 
149 |         fseek(alphabet_file, 0, SEEK_END);
150 |         uint64_t size = ftell(alphabet_file);
151 |         if (size > MaxFileSize)
152 |         {
153 |             printf("Warning: File's '%s' is %" PRIu64 " bytes. It is bigger than read %u bytes. Alphabet bytes should be unique!\n",
154 |                 file, size, MaxFileSize);
155 |         }
156 | 
157 |         fclose(alphabet_file);
158 | 
159 |         std::vector<uint8_t> alphabet_bytes(&bytes[0], &bytes[read_bytes]);
160 |         return alphabet_bytes;
161 |     }
162 | 
163 |     return {};
164 | }
165 | 


--------------------------------------------------------------------------------
/src/host/host_utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | struct Decryptor;
 6 | 
 7 | namespace host
 8 | {
 9 | namespace utils
10 | {
11 | 
12 | // Read file with set with hexadecimal string keys
13 | std::vector<Decryptor> read_word_dictionary_file(const char* file);
14 | 
15 | // Read binary file as set of keys 8-bytes each
16 | std::vector<Decryptor> read_binary_dictionary_file(const char* file, uint8_t mode, uint32_t seed);
17 | 
18 | // Read first 256 bytes of a binary file as alphabet
19 | std::vector<uint8_t> read_alphabet_binary_file(const char* file);
20 | 
21 | }
22 | }


--------------------------------------------------------------------------------
/src/host/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <chrono>
 4 | 
 5 | // Simple helper timer
 6 | template<typename TClock = std::chrono::high_resolution_clock>
 7 | struct Timer
 8 | {
 9 |     static inline Timer<TClock> start()
10 |     {
11 |         return Timer<TClock>(TClock::now());
12 |     }
13 | 
14 |     template <typename TDuration = std::chrono::milliseconds>
15 |     inline TDuration elapsed()
16 |     {
17 |         return std::chrono::duration_cast<TDuration>(TClock::now() - start_point);
18 |     }
19 | 
20 |     inline std::chrono::seconds elapsed_secods()
21 |     {
22 |         return elapsed<std::chrono::seconds>();
23 |     }
24 | 
25 | protected:
26 |     Timer(typename TClock::time_point initial) : start_point(initial)
27 |     {
28 |     }
29 | 
30 | private:
31 | 
32 |     typename TClock::time_point start_point;
33 | };


--------------------------------------------------------------------------------
/src/kernels/kernel_result.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "device/cuda_object.h"
 6 | 
 7 | /**
 8 |  *  Generic output from CUDA kernel
 9 |  */
10 | struct KernelResult : TGenericGpuObject<KernelResult>
11 | {
12 |     // num errors. negative are kernel errors. positive - number of threads error
13 |     int error = 0;
14 | 
15 |     // overall result
16 |     int value = 0;
17 | 
18 |     KernelResult() : TGenericGpuObject<KernelResult>(this)
19 |     {
20 |     }
21 | 
22 |     KernelResult(KernelResult&& other) noexcept : TGenericGpuObject<KernelResult>(this)
23 |     {
24 |         error = other.error;
25 |         value = other.value;
26 |     }
27 | 
28 |     KernelResult& operator=(KernelResult&& other) noexcept
29 |     {
30 |         error = other.error;
31 |         value = other.value;
32 |         SelfGpu.HOST_Ptr = this;
33 |         return *this;
34 |     }
35 | };
36 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | 
  3 | #include "stdio.h"
  4 | 
  5 | #include "host/console.h"
  6 | 
  7 | #include "algorithm/keeloq/keeloq_kernel.h"
  8 | #include "bruteforce/generators/generator_bruteforce.h"
  9 | #include "bruteforce/bruteforce_round.h"
 10 | 
 11 | #include "tests/test_all.h"
 12 | 
 13 | CommandLineArgs demoTestCommandlineArgs(int num_gen_input = 3)
 14 | {
 15 |     constexpr uint64_t debugKey =  0xC0FFEE00DEAD6666;
 16 | 
 17 | #if _DEBUG
 18 |     uint64_t first = debugKey & 0xFFFFFFFFFFC00000;
 19 | #else
 20 |     uint64_t first = debugKey & 0xFFFFFFFFF0000000;
 21 | #endif
 22 |     uint64_t count = 0xFFFFFFF;
 23 | 
 24 |     Decryptor first_decryptor_ptrn(0, tests::keeloq::default_seed);
 25 |     Decryptor first_decryptor_brtf(first, tests::keeloq::default_seed);
 26 | 
 27 |     CommandLineArgs cmd;
 28 |     cmd.inputs = tests::keeloq::gen_inputs<KeeloqLearningType::Faac>(debugKey, num_gen_input);
 29 |     cmd.alphabets.emplace_back(MultibaseDigit("abcdef"_b));
 30 |     cmd.alphabets.emplace_back(MultibaseDigit( { 0xC0, 0xFF, 0xEE, 0x00, 0xDE, 0xAD, 0x66 }));
 31 | 
 32 |     // Dictionary
 33 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetDictionary({
 34 |         Decryptor(666, tests::keeloq::default_seed),
 35 |         Decryptor(debugKey - 1, tests::keeloq::default_seed),
 36 |         Decryptor(debugKey, tests::keeloq::default_seed),
 37 |         Decryptor(debugKey + 1, tests::keeloq::default_seed)
 38 |     }));
 39 | 
 40 |     // Alphabet
 41 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetAlphabet(first_decryptor_ptrn, cmd.alphabets[1]));
 42 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetAlphabet(first_decryptor_ptrn, cmd.alphabets[0]));
 43 | 
 44 |     // Seed
 45 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetSeedBruteforce(Decryptor(debugKey, 0)));
 46 | 
 47 |     // Pattern (reversed)
 48 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetPattern(first_decryptor_ptrn, BruteforcePattern(
 49 |         {
 50 |             BruteforcePattern::ParseBytes("c0|c1|c2|c3"),
 51 |             BruteforcePattern::ParseBytes("F0-FF"),
 52 |             BruteforcePattern::ParseBytes("E0-EF"),
 53 |             BruteforcePattern::ParseBytes("00-99"),
 54 |             { 0xED, 0xDE },
 55 |             { 0xDA, 0xAD },
 56 |             { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 },
 57 |             { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 }
 58 |         }, "N/A")));
 59 | 
 60 |     // Simple
 61 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetBruteforce(first_decryptor_brtf, count));
 62 | 
 63 |     // Filters
 64 |     cmd.brute_configs.emplace_back(BruteforceConfig::GetBruteforce(first_decryptor_brtf, count, BruteforceFilters
 65 |         {
 66 |             // Include only
 67 |             BruteforceFilters::Flags::All,
 68 | 
 69 |             // Exclude
 70 |             BruteforceFilters::Flags::BytesIncremental
 71 |         }));
 72 | 
 73 |     cmd.selected_learning = { }; // ALL
 74 | 
 75 |     cmd.match_stop = false;
 76 |     cmd.run_bench = false;
 77 |     cmd.run_tests = false;
 78 | 
 79 | #if _DEBUG
 80 |     cmd.init_cuda(512, 0, 1);
 81 | #else
 82 |     cmd.init_cuda(4096, 0, 1);
 83 | #endif
 84 | 
 85 |     return cmd;
 86 | }
 87 | 
 88 | void bruteforce(const CommandLineArgs& args)
 89 | {
 90 |     if (args.selected_learning.size() == 0)
 91 |     {
 92 |         printf("Bruteforcing without specific learning type (slower)"
 93 |             "(1 KKey/s == %u Kkc (keeloq calcs) per second)\n"
 94 |             "In case of full range there also redundant checks since using _REV learning types ( X-00:11:22 == X_REV-22:11:00 )\n", KeeloqLearningType::LAST);
 95 |     }
 96 | 
 97 |     for (const auto& config : args.brute_configs)
 98 |     {
 99 |         BruteforceRound attackRound(args.inputs, config, args.selected_learning, args.cuda_blocks, args.cuda_threads, args.cuda_loops);
100 | 
101 |         printf("\nallocating...");
102 |         attackRound.Init();
103 | 
104 |         printf("\rRunning...    \n%s\n", attackRound.to_string().c_str());
105 | 
106 |         bool match = false;
107 | 
108 |         size_t batchesInRound = attackRound.num_batches();
109 |         size_t keysInBatch = attackRound.keys_per_batch();
110 | 
111 |         auto roundStartTime = std::chrono::system_clock::now();
112 | 
113 |         for (size_t batch = 0; !match && batch < batchesInRound; ++batch)
114 |         {
115 |             auto batchStartTime = std::chrono::high_resolution_clock::now();
116 | 
117 |             KeeloqKernelInput& kernelInput = attackRound.Inputs();
118 | 
119 |             if (attackRound.Type() != BruteforceType::Dictionary)
120 |             {
121 |                 // Generate decryptors (if available)
122 |                 int error = GeneratorBruteforce::PrepareDecryptors(kernelInput, attackRound.CudaBlocks(), attackRound.CudaThreads());
123 |                 if (error)
124 |                 {
125 |                     printf("Error: Key generation resulted with error: %d", error);
126 |                     assert(false);
127 |                     return;
128 |                 }
129 | 
130 |                 // Make previous last generated key be an initial for current generation batch
131 |                 kernelInput.NextDecryptor();
132 |             }
133 |             else
134 |             {
135 |                 // Write next batch of keys from dictionary
136 |                 kernelInput.WriteDecryptors(config.decryptors, batch * keysInBatch, keysInBatch);
137 |             }
138 | 
139 |             // do the bruteforce
140 |             auto kernelResults = keeloq::kernels::cuda_brute(kernelInput, attackRound.CudaBlocks(), attackRound.CudaThreads());
141 |             match = attackRound.check_results(kernelResults);
142 | 
143 |             auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
144 |                 std::chrono::high_resolution_clock::now() - batchStartTime);
145 | 
146 |             if (batch == 0 || match)
147 |             {
148 |                 console::set_cursor_state(false);
149 |                 printf("\n\n\n");
150 |             }
151 | 
152 |             if (!match)
153 |             {
154 |                 auto kilo_result_per_second = duration.count() == 0 ? 0 : keysInBatch / duration.count();
155 |                 auto progress_percent = (double)(batch + 1) / batchesInRound;
156 | 
157 |                 console_cursor_ret_up(2);
158 | 
159 |                 const Decryptor& last_used_decryptor = kernelInput.GetConfig().last;
160 | 
161 |                 printf("[%c][%zd/%zd]    %" PRIu64 "(ms)/batch Speed: %" PRIu64 " KKeys/s   Last key:0x%" PRIX64 " (%u)         \n",
162 |                     WAIT_CHAR(batch),
163 |                     batch, batchesInRound,
164 |                     duration.count(),
165 |                     kilo_result_per_second,
166 |                     last_used_decryptor.man(), last_used_decryptor.seed());
167 | 
168 |                 auto overall = std::chrono::duration_cast<std::chrono::seconds>(
169 |                     std::chrono::system_clock::now() - roundStartTime);
170 | 
171 |                 console::progress_bar(progress_percent, overall);
172 |             }
173 |         }
174 | 
175 |         if (!match)
176 |         {
177 |             printf("\n\nAfter: %zd batches no results was found. Keys checked:%zd\n\n",
178 |                 batchesInRound, batchesInRound * keysInBatch);
179 |         }
180 |         else if (args.match_stop)
181 |         {
182 |             break;
183 |         }
184 |     }
185 | }
186 | 
187 | int main(int argc, const char** argv)
188 | {
189 |     assert(tests::cuda_check_working());
190 | 
191 |     if (!keeloq::kernels::cuda_is_working())
192 |     {
193 |         printf("Error: This device cannot compute keeloq right. Single encryption and decryption mismatch.\n");
194 |         assert(false);
195 |         return 1;
196 |     }
197 | 
198 |     // Be default if no arguments specified - launch demo mode
199 |     bool demo_mode = argc <= 1;
200 |     auto args = demo_mode ? demoTestCommandlineArgs() : CommandLineArgs::parse(argc, argv);
201 | 
202 |     bool had_tests = args.run_bench || args.run_tests;
203 | 
204 |     if (args.run_tests)
205 |     {
206 |         printf("\n...RUNNING TESTS...\n");
207 |         tests::console::run();
208 | 
209 |         tests::pattern_generation();
210 |         tests::alphabet_generation();
211 |         tests::filters_generation();
212 | 
213 |         printf("\n...TESTS FINISHED...\n");
214 |     }
215 | 
216 |     if (args.run_bench)
217 |     {
218 |         benchmark::all(args);
219 |     }
220 | 
221 |     if (args.can_bruteforce())
222 |     {
223 |         if (demo_mode)
224 |         {
225 |             printf(R"(
226 |                      ___                   __  ___        __
227 |                     / _ \___ __ _  ___    /  |/  /__  ___/ /__
228 |                    / // / -_)  ' \/ _ \  / /|_/ / _ \/ _  / -_)
229 |                   /____/\__/_/_/_/\___/ /_/  /_/\___/\_,_/\__/
230 | 
231 |                 )");
232 |         }
233 | 
234 |         bruteforce(args);
235 |     }
236 |     else if (!had_tests)
237 |     {
238 |         printf("\nNot enough arguments for bruteforce\n");
239 |         return 1;
240 |     }
241 | 
242 |     // this will free all memory as well
243 |     cudaDeviceReset();
244 | 
245 |     console::set_cursor_state(true);
246 |     return 0;
247 | }


--------------------------------------------------------------------------------
/src/tests/test_all.h:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | #include "tests/test_alphabet.h"
 6 | #include "tests/test_benchmark.h"
 7 | #include "tests/test_console.h"
 8 | #include "tests/test_filters.h"
 9 | #include "tests/test_keeloq.h"
10 | #include "tests/test_pattern.h"
11 | 
12 | namespace tests
13 | {
14 | 	__host__ bool cuda_check_working();
15 | }


--------------------------------------------------------------------------------
/src/tests/test_alphabet.cpp:
--------------------------------------------------------------------------------
 1 | #include "tests/test_alphabet.h"
 2 | 
 3 | #include "device/cuda_vector.h"
 4 | #include "kernels/kernel_result.h"
 5 | #include "bruteforce/bruteforce_config.h"
 6 | #include "bruteforce/generators/generator_bruteforce.h"
 7 | #include "algorithm/keeloq/keeloq_kernel_input.h"
 8 | #include "algorithm/keeloq/keeloq_decryptor.h"
 9 | 
10 | 
11 | bool tests::alphabet_generation()
12 | {
13 |     // Filtered generator test itself
14 |     constexpr auto NumBlocks = 64;
15 |     constexpr auto NumThreads = 64;
16 | 
17 |     auto testConfig = BruteforceConfig::GetAlphabet(Decryptor(0,0), "abcd"_b, 0xFFFFFFFF);
18 | 
19 |     CudaVector<Decryptor> decryptors(NumBlocks * NumThreads);
20 | 
21 |     KeeloqKernelInput generatorInputs;
22 |     generatorInputs.decryptors = decryptors.gpu();
23 |     generatorInputs.Initialize(testConfig, KeeloqLearningType::full_mask());
24 | 
25 |     for (int i = 0; i < 16; ++i)
26 |     {
27 |         GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads);
28 | 
29 |         decryptors.read();
30 | 
31 |         assert((decryptors.cpu()[0].man() & 0x0000FFFFFFFFFFFF) == 0x616161616161);
32 | 
33 |         generatorInputs.NextDecryptor();
34 |     }
35 | 
36 |     assert(decryptors.cpu()[4095].man() == 0x6464646464646464);
37 | 
38 |     return true;
39 | }


--------------------------------------------------------------------------------
/src/tests/test_alphabet.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include <vector>
 6 | 
 7 | namespace tests
 8 | {
 9 | 	bool alphabet_generation();
10 | }


--------------------------------------------------------------------------------
/src/tests/test_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.h"
  2 | 
  3 | #include <numeric>
  4 | #include <algorithm>
  5 | 
  6 | #include "test_benchmark.h"
  7 | #include "host/console.h"
  8 | #include "host/timer.h"
  9 | 
 10 | #include "bruteforce/generators/generator_bruteforce.h"
 11 | #include "bruteforce/bruteforce_config.h"
 12 | #include "bruteforce/bruteforce_round.h"
 13 | 
 14 | #include "algorithm/keeloq/keeloq_kernel.h"
 15 | #include "tests/test_keeloq.h"
 16 | 
 17 | 
 18 | void benchmark::run(const CommandLineArgs& args, const BruteforceConfig& benchmarkConfig, const std::vector<uint16_t>& CudaBlocks, const std::vector<uint16_t>& CudaThreads)
 19 | {
 20 | #if _DEBUG
 21 |     constexpr size_t TargetCalculations = 10000000;
 22 | #else
 23 |     constexpr size_t TargetCalculations = 100000000;
 24 | #endif
 25 | 
 26 |     static const uint32_t MaxCudaThreads = CommandLineArgs::max_cuda_threads();
 27 |     static const uint32_t MaxCudaBlocks = CommandLineArgs::max_cuda_blocks();
 28 | 
 29 |     printf("BENCHMARK BEGIN\n\nConfig is: Alphabet\n");
 30 | #ifndef NO_INNER_LOOPS
 31 |     printf("Num loops inside CUDA                   : %u\n", args.cuda_loops);
 32 | #endif // !NO_INNER_LOOPS
 33 |     printf(
 34 |         "Max Available CUDA Threads per block       : %u\n"
 35 |         "Max Available CUDA Blocks                  : %u\n"
 36 |         "Num total calculations                     : %" PRIu64 " (Millions)\n"
 37 |         "Learning                                   : %s\n"
 38 |         "Seed specified                             : %s\n\n",
 39 |         MaxCudaThreads, MaxCudaBlocks,
 40 |         (TargetCalculations / 1000000),
 41 |         KeeloqLearningType::to_string(args.selected_learning).c_str(),
 42 |         (benchmarkConfig.start.seed() == 0 ? "false" : "true"));
 43 | 
 44 | 
 45 |     bool in_progress = true;
 46 |     for (auto NumCudaBlocks : CudaBlocks)
 47 |     {
 48 |         if (!in_progress || NumCudaBlocks > MaxCudaBlocks)
 49 |         {
 50 |             break;
 51 |         }
 52 | 
 53 |         for (auto NumCudaThreads : CudaThreads)
 54 |         {
 55 |             if (!in_progress || NumCudaThreads > MaxCudaThreads)
 56 |             {
 57 |                 break;
 58 |             }
 59 | 
 60 |             BruteforceRound benchmarkRound(args.inputs, benchmarkConfig, args.selected_learning, NumCudaBlocks, NumCudaThreads, args.cuda_loops);
 61 |             benchmarkRound.Init();
 62 | 
 63 |             size_t keysInBatch = benchmarkRound.keys_per_batch();
 64 |             size_t numBatches = std::max<size_t>(1, TargetCalculations / keysInBatch);
 65 | 
 66 |             std::vector<uint64_t> batches_kResults_per_sec;
 67 | 
 68 |             console::set_cursor_state(false);
 69 |             printf("\n");
 70 | 
 71 |             auto roundTimer = Timer<std::chrono::system_clock>::start();
 72 | 
 73 |             for (size_t i = 0; in_progress && i < numBatches; ++i)
 74 |             {
 75 |                 auto batchTimer = Timer<std::chrono::high_resolution_clock>::start();
 76 | 
 77 |                 KeeloqKernelInput& kernelInput = benchmarkRound.Inputs();
 78 |                 kernelInput.NextDecryptor();
 79 | 
 80 |                 GeneratorBruteforce::PrepareDecryptors(kernelInput, NumCudaBlocks, NumCudaThreads);
 81 |                 keeloq::kernels::cuda_brute(kernelInput, NumCudaBlocks, NumCudaThreads);
 82 | 
 83 |                 auto elapsedMs = batchTimer.elapsed().count();
 84 |                 if (elapsedMs > 0)
 85 |                 {
 86 |                     batches_kResults_per_sec.push_back(keysInBatch / elapsedMs);
 87 |                 }
 88 | 
 89 |                 console_cursor_ret_up(1);
 90 |                 console::progress_bar(i / (double)numBatches, roundTimer.elapsed_secods());
 91 | 
 92 |                 if (console::read_esc_press())
 93 |                 {
 94 |                     console_cursor_ret_up(1);
 95 |                     console::clear_line();
 96 |                     printf("Benchmark skipped\n");
 97 |                     in_progress = false;
 98 |                 }
 99 |             }
100 | 
101 |             if (in_progress)
102 |             {
103 |                 std::sort(batches_kResults_per_sec.begin(), batches_kResults_per_sec.end());
104 | 
105 |                 uint64_t median = 0;
106 |                 uint64_t avg = 0;
107 | 
108 |                 auto num_results = batches_kResults_per_sec.size();
109 |                 if (num_results > 0)
110 |                 {
111 |                     avg = std::reduce(batches_kResults_per_sec.begin(), batches_kResults_per_sec.end()) / num_results;
112 |                 }
113 | 
114 |                 if (num_results > 2)
115 |                 {
116 |                     median = num_results % 2 == 0 ?
117 |                         (batches_kResults_per_sec[num_results / 2] + batches_kResults_per_sec[num_results / 2 - 1]) / 2 :
118 |                         batches_kResults_per_sec[num_results / 2];
119 |                 }
120 | 
121 |                 console_cursor_ret_up(1);
122 |                 console::clear_line();
123 | 
124 |                 // Creating results
125 |                 printf("| CUDA: %" PRIu16 " x %" PRIu16 " \t| MEM: %" PRIu64 " MB\t | Time (ms): %" PRIu64 " \t |\tSpeed (K/s): %" PRIu64 " (avg.) %" PRIu64 " (median) |\t\t\t\t\n",
126 |                     NumCudaBlocks, NumCudaThreads,
127 |                     benchmarkRound.get_mem_size() / (1024 * 1024),
128 |                     roundTimer.elapsed().count(),
129 |                     avg, median);
130 |             }
131 |         }
132 |     }
133 | }
134 | 
135 | void benchmark::all(const CommandLineArgs& args)
136 | {
137 |     cudaDeviceProp prop;
138 |     cudaGetDeviceProperties(&prop, 0);
139 | 
140 |     std::vector<uint16_t> CudaBlocks  = { 256, 512, 1024, 2048, 4096, 8196 };
141 |     std::vector<uint16_t> CudaThreads = { 128, 256, 512, 1024, 2048 };
142 | 
143 |     BruteforceConfig benchmarkConfig_no_seed = BruteforceConfig::GetAlphabet(Decryptor(0, 0), "0123456789abcdefgh"_b);
144 |     BruteforceConfig benchmarkConfig_wt_seed = BruteforceConfig::GetAlphabet(Decryptor(0, 1234567), "0123456789abcdefgh"_b);
145 | 
146 |     console_clear();
147 |     CommandLineArgs copy = args;
148 |     copy.inputs = tests::keeloq::gen_inputs(0xFF123FF3434FFFFF);
149 | 
150 |     copy.selected_learning = {};
151 |     run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads);
152 |     run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads);
153 | 
154 |     copy.selected_learning = { KeeloqLearningType::Simple };
155 |     run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads);
156 | 
157 |     copy.selected_learning = { KeeloqLearningType::Normal };
158 |     run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads);
159 | 
160 |     copy.selected_learning = { KeeloqLearningType::Secure };
161 |     run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads);
162 | 
163 |     copy.selected_learning = { KeeloqLearningType::Faac };
164 |     run(copy, benchmarkConfig_wt_seed, CudaBlocks, CudaThreads);
165 | 
166 |     copy.selected_learning = { KeeloqLearningType::Simple, KeeloqLearningType::Normal, KeeloqLearningType::Xor};
167 |     run(copy, benchmarkConfig_no_seed, CudaBlocks, CudaThreads);
168 | 
169 | #ifndef NO_INNER_LOOPS
170 |     copy.cuda_loops = 4;
171 |     run(copy, CudaBlocks, CudaThreads);
172 | #endif
173 | }
174 | 


--------------------------------------------------------------------------------
/src/tests/test_benchmark.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "host/command_line_args.h"
 6 | 
 7 | #include <vector>
 8 | 
 9 | struct BruteforceConfig;
10 | 
11 | namespace benchmark
12 | {
13 |     void run(const CommandLineArgs& args, const BruteforceConfig& benchmarkConfig, const std::vector<uint16_t>& CudaBlocks, const std::vector<uint16_t>& CudaThreads);
14 | 
15 |     void all(const CommandLineArgs& args);
16 | }


--------------------------------------------------------------------------------
/src/tests/test_console.cpp:
--------------------------------------------------------------------------------
 1 | #include "test_console.h"
 2 | 
 3 | #include "host/command_line_args.h"
 4 | #include "host/console.h"
 5 | 
 6 | namespace
 7 | {
 8 | void print_args(const char** args, size_t num)
 9 | {
10 |     printf("\n\t*** TESTING COMMAND LINE:\n");
11 |     for (size_t i = 0; i < num; ++i)
12 |     {
13 |         printf("%s ", args[i]);
14 |     }
15 |     printf("\n\n");
16 | }
17 | }
18 | 
19 | 
20 | CommandLineArgs tests::console::run()
21 | {
22 |     const char* commandline[] = {
23 |         APP_NAME,
24 |         "--" ARG_INPUTS"=0xC65D52A0A81FD504,0xCCA9B335A81FD504,0xE0DA7372A81FD504",
25 |         "--" ARG_BLOCKS"=32",
26 |         "--" ARG_THREADS"=32",
27 |         "--" ARG_LOOPS"=4",
28 |         "--" ARG_MODE"=0,1,2,3,4,5",
29 |         "--" ARG_LTYPE"=0,1,2,3,4",
30 | 
31 |         "--" ARG_WORDDICT"=0xFDE4531BBACAD12,FDE4531BBACAD13,0xFDE4531BBACAD14,examples/dictionary.words",
32 |         "--" ARG_BINDICT"=examples/dictionary.bin",
33 |         "--" ARG_BINDMODE"=2",
34 | 
35 |         "--" ARG_START"=1",
36 |         "--" ARG_SEED"=777",
37 |         "--" ARG_COUNT"=0xFFFF",
38 | 
39 |         "--" ARG_ALPHABET"=61:62:63:64:zz:AB,examples/alphabet.bin",
40 | 
41 |         "--" ARG_IFILTER"=0x2", //SmartFilterFlags::Max6OnesInARow  other are very heavy, this one will allow all numbers less than 0x03FFFFFFFFFFFFFF
42 |         "--" ARG_EFILTER"=64",  //SmartFilterFlags::BytesRepeat4
43 | 
44 |         "--" ARG_PATTERN"=0x01:*:0x43-0x10:0xA0-FF:AA|0x34|0xBB:0x66|0x77:AL0,0x88:asd:w1:88:*:AL2:BB:73",
45 | 
46 |         "--" ARG_FMATCH,
47 |         "--" ARG_BENCHMARK "=1",
48 |         "--" ARG_TEST "=true",
49 |     };
50 | 
51 |     const char* help[] = {
52 |         "CudaKeeloq.exe",
53 |         "-h"
54 |     };
55 | 
56 |     const char* commandlineInvalidSeedMode[] = {
57 |         APP_NAME,
58 |         "--" ARG_INPUTS"=0xC65D52A0A81FD504,0xCCA9B335A81FD504,0xE0DA7372A81FD504",
59 |         "--" ARG_BLOCKS"=32",
60 |         "--" ARG_MODE"=5"
61 |     };
62 | 
63 |     // Print Help
64 |     print_args(help, sizeof(help) / sizeof(char*));
65 |     CommandLineArgs args = CommandLineArgs::parse(sizeof(help) / sizeof(char*), help);
66 | 
67 |     print_args(commandlineInvalidSeedMode, sizeof(commandlineInvalidSeedMode) / sizeof(char*));
68 |     args = CommandLineArgs::parse(sizeof(commandlineInvalidSeedMode) / sizeof(char*), commandlineInvalidSeedMode);
69 |     assert(args.brute_configs.size() == 0);
70 | 
71 |     print_args(commandline, sizeof(commandline) / sizeof(char*));
72 |     args = CommandLineArgs::parse(sizeof(commandline) / sizeof(char*), commandline);
73 | 
74 |     assert(args.alphabets.size() == 2);
75 |     assert(args.brute_configs.size() == 9);
76 | 
77 |     assert(args.match_stop);
78 |     assert(args.run_bench);
79 |     assert(args.run_tests);
80 | 
81 |     return args;
82 | }
83 | 


--------------------------------------------------------------------------------
/src/tests/test_console.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | struct CommandLineArgs;
 4 | 
 5 | namespace tests
 6 | {
 7 | namespace console
 8 | {
 9 |     CommandLineArgs run();
10 | }
11 | }


--------------------------------------------------------------------------------
/src/tests/test_filters.cpp:
--------------------------------------------------------------------------------
  1 | #include "tests/test_filters.h"
  2 | 
  3 | #include <cuda_runtime_api.h>
  4 | 
  5 | #include "device/cuda_vector.h"
  6 | #include "device/cuda_double_array.h"
  7 | 
  8 | #include "bruteforce/bruteforce_filters.h"
  9 | #include "bruteforce/generators/generator_bruteforce.h"
 10 | 
 11 | 
 12 | bool tests::filters_generation()
 13 |     {
 14 |         BruteforceFiltersTestInputs test_cases[] = {
 15 |            { 0x1111334404bbccee, BruteforceFilters::Flags::Max6ZerosInARow, true },
 16 |            { 0x11113344aabbccee, BruteforceFilters::Flags::Max6ZerosInARow, false },
 17 |            { 0x11113344aabbccee, BruteforceFilters::Flags::Max6OnesInARow, false },
 18 |            { 0x11113344aFFbccee, BruteforceFilters::Flags::Max6OnesInARow, true },
 19 | 
 20 |            { 0x3132333435363738, BruteforceFilters::Flags::AsciiNumbers, true },
 21 |            { 0x3132333435363738, BruteforceFilters::Flags::AsciiAlphaNum, true },
 22 |            { 0x3132333435363738, BruteforceFilters::Flags::AsciiAny, true },
 23 |            { 0x2931323334353637, BruteforceFilters::Flags::AsciiNumbers, false },
 24 | 
 25 |            { 0x4142434465666768, BruteforceFilters::Flags::AsciiAlpha,     true },
 26 |            { 0x4142434465666768, BruteforceFilters::Flags::AsciiAlphaNum,  true },
 27 |            { 0x4142434465666768, BruteforceFilters::Flags::AsciiAny, true },
 28 |            { 0x3142434465666768, BruteforceFilters::Flags::AsciiAlpha,     false },
 29 |            { 0x2142434465666768, BruteforceFilters::Flags::AsciiAlphaNum,  false },
 30 |            { 0x1142434465666768, BruteforceFilters::Flags::AsciiAny, false },
 31 | 
 32 |            { 0x214023245e28297e, BruteforceFilters::Flags::AsciiSpecial, true },
 33 |            { 0x214023245e28297e, BruteforceFilters::Flags::AsciiAny, true },
 34 |            { 0x114023245e28297e, BruteforceFilters::Flags::AsciiSpecial, false },
 35 | 
 36 |            { 0x0022222222556677, BruteforceFilters::Flags::BytesRepeat4, true },
 37 |            { 0x0022222222226677, BruteforceFilters::Flags::BytesRepeat4, true },
 38 |            { 0x00Abcdef11111111, BruteforceFilters::Flags::BytesRepeat4, true },
 39 |            { 0x0011222222556677, BruteforceFilters::Flags::BytesRepeat4, false },
 40 |            { 0x0011223344556677, BruteforceFilters::Flags::BytesRepeat4, false },
 41 | 
 42 |            { 0x112233445566aa00, BruteforceFilters::Flags::BytesIncremental, true },
 43 |            { 0xFFEEDDCCBBAA1234, BruteforceFilters::Flags::BytesIncremental, true },
 44 |            { 0x1122334455778899, BruteforceFilters::Flags::BytesIncremental, false },
 45 |         };
 46 | 
 47 |         static uint8_t NumTests = sizeof(test_cases) / sizeof(BruteforceFiltersTestInputs);
 48 |         bool result_success = true;
 49 | 
 50 |         // CPU tests
 51 |         for (int i = 0; i < NumTests; ++i)
 52 |         {
 53 |             bool value = BruteforceFilters::check_filters(test_cases[i].value, test_cases[i].flags);
 54 |             result_success &= value == test_cases[i].result;
 55 | 
 56 |             assert(result_success);
 57 |         }
 58 | 
 59 |         // GPU tests
 60 |         DoubleArray<BruteforceFiltersTestInputs> test_inputs(test_cases, NumTests);
 61 |         cuda_check_bruteforce_filters(test_inputs.CUDA_mem, NumTests);
 62 |         test_inputs.read_GPU(); // for asserts
 63 | 
 64 |         for (uint8_t i = 0; i < NumTests; ++i)
 65 |         {
 66 |             result_success &= test_inputs.HOST_mem[i].value == 1;
 67 |             assert(result_success);
 68 |         }
 69 | 
 70 |         // Filtered generator test itself
 71 |         constexpr auto NumBlocks = 32;// 1; //
 72 |         constexpr auto NumThreads = 512;// 2;// 512;
 73 | 
 74 |         constexpr auto NumToGenerate = 0xFFFFF;
 75 | 
 76 |         auto first_decryptor = Decryptor(0xAADEADBEEFA00000, 0);
 77 | 
 78 |         auto testConfig = BruteforceConfig::GetBruteforce(first_decryptor, NumToGenerate,
 79 |             BruteforceFilters{
 80 |                 BruteforceFilters::Flags::All,     // SmartFilterFlags::AsciiAny;       //
 81 |                 BruteforceFilters::Flags::BytesIncremental | BruteforceFilters::Flags::BytesRepeat4,    // SmartFilterFlags::BytesRepeat4;   //
 82 |             });
 83 | 
 84 |         CudaVector<Decryptor> decryptors(NumToGenerate);
 85 | 
 86 |         KeeloqKernelInput generatorInputs;
 87 |         generatorInputs.decryptors = decryptors.gpu();
 88 |         generatorInputs.Initialize(testConfig, KeeloqLearningType::full_mask());
 89 | 
 90 |         KernelResult result;
 91 | 
 92 |         auto error = GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads);
 93 |         result_success &= error == 0;
 94 | 
 95 |         decryptors.read();
 96 | 
 97 |         bool found = false;
 98 |         for (size_t i = 0; !found && i < decryptors.size(); ++i)
 99 |         {
100 |             // looking for exact code - check nothing missed
101 |             found |= decryptors.cpu()[i].man() == 0xAADEADBEEFA63ED2;
102 |         }
103 | 
104 |         assert(found);
105 |         result_success &= found;
106 | 
107 |         result.read();
108 |         return result_success;
109 |     }
110 | 


--------------------------------------------------------------------------------
/src/tests/test_filters.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common.h"
 4 | 
 5 | #include "bruteforce/bruteforce_filters.h"
 6 | 
 7 | struct BruteforceFiltersTestInputs
 8 | {
 9 | 	uint64_t value;
10 | 
11 | 	BruteforceFilters::Flags::Type flags;
12 | 
13 | 	bool result;
14 | };
15 | 
16 | namespace tests
17 | {
18 | 	bool filters_generation();
19 | 
20 | 	__host__ void cuda_check_bruteforce_filters(BruteforceFiltersTestInputs* tests, uint8_t num);
21 | }


--------------------------------------------------------------------------------
/src/tests/test_keeloq.cpp:
--------------------------------------------------------------------------------
 1 | #include "test_keeloq.h"
 2 | 
 3 | #include "algorithm/keeloq/keeloq_kernel.h"
 4 | 
 5 | 
 6 | std::vector<EncParcel> tests::keeloq::gen_inputs(uint64_t key, uint8_t num /*= 3*/,
 7 |     uint32_t serial /*= 0xDEADBEEF*/, uint16_t counter /*= 0x123*/, uint8_t button /*= 0x3*/, uint32_t seed /*= 987654321*/,
 8 |     KeeloqLearningType::Type learning /*= KeeloqLearningType::Normal*/)
 9 | {
10 |     std::vector<EncParcel> result { ::keeloq::GetOTA(key, seed, serial, button, counter, learning) };
11 | 
12 |     for (uint8_t i = 1; i < num; ++i)
13 |     {
14 |         result.emplace_back(::keeloq::GetOTA(key, seed, serial, button, counter + i, learning));
15 |     }
16 | 
17 |     return result;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/tests/test_keeloq.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "algorithm/keeloq/keeloq_learning_types.h"
 6 | #include "algorithm/keeloq/keeloq_encrypted.h"
 7 | 
 8 | namespace tests
 9 | {
10 | namespace keeloq
11 | {
12 |     constexpr uint32_t default_seed = 987654321;
13 | 
14 |     constexpr uint8_t default_button = 0x3;
15 | 
16 |     constexpr uint16_t default_counter = 0x123;
17 | 
18 |     constexpr uint32_t default_serial = 0xDEADBEEF;
19 | 
20 | 
21 |     // Generate test inputs for specific key
22 |     std::vector<EncParcel> gen_inputs(uint64_t key, uint8_t num = 3,
23 |         uint32_t serial = default_serial, uint16_t counter = default_counter, uint8_t button = default_button, uint32_t seed = default_seed,
24 |         KeeloqLearningType::Type learning = KeeloqLearningType::Normal);
25 | 
26 |     template<KeeloqLearningType::Type TLearning>
27 |     inline std::vector<EncParcel> gen_inputs(uint64_t key, uint8_t num = 3,
28 |         uint32_t serial = default_serial, uint16_t counter = default_counter, uint8_t button = default_button, uint32_t seed = default_seed)
29 |     {
30 |        return gen_inputs(key, num, serial, counter, button, seed, TLearning);
31 |     }
32 | }
33 | }


--------------------------------------------------------------------------------
/src/tests/test_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | #include "test_all.h"
 4 | #include "device/cuda_common.h"
 5 | 
 6 | #include <cuda_runtime_api.h>
 7 | 
 8 | namespace
 9 | {
10 |     __global__ void Kernel_Test(uint64_t* input)
11 |     {
12 |         *input = 0x1234567890ABCDEF;
13 |         *input = misc::rev_bytes(*input);
14 |     }
15 | 
16 |     __global__ void Kernel_RunFiltersTests(BruteforceFiltersTestInputs* tests, uint8_t num)
17 |     {
18 |         for (int i = 0; i < num; ++i)
19 |         {
20 |             bool value = BruteforceFilters::check_filters(tests[i].value, tests[i].flags);
21 |             assert(value == tests[i].result);
22 | 
23 |             tests[i].value = value == tests[i].result;
24 |         }
25 |     }
26 | }
27 | 
28 | __host__ void tests::cuda_check_bruteforce_filters(BruteforceFiltersTestInputs * tests, uint8_t num)
29 | {
30 |     void* args[] = { &tests, &num };
31 |     auto error = cudaLaunchKernel((void*) & Kernel_RunFiltersTests, dim3(), dim3(), args, 0, nullptr);
32 |     CUDA_CHECK(error);
33 | }
34 | 
35 | __host__ bool tests::cuda_check_working()
36 | {
37 |     uint64_t result = 0;
38 |     uint64_t* pInput;
39 | 
40 |     auto error = cudaMalloc((void**)&pInput, sizeof(uint64_t));
41 |     CUDA_CHECK(error);
42 | 
43 |     void *args[] = { &pInput };
44 |     auto* func = (void*) &Kernel_Test;
45 |     error = cudaLaunchKernel(func, dim3(), dim3(), args, 0, 0);
46 |     CUDA_CHECK(error);
47 | 
48 |     error = cudaMemcpy(&result, pInput, sizeof(uint64_t), cudaMemcpyDeviceToHost);
49 |     CUDA_CHECK(error);
50 | 
51 |     assert(result == 0xEFCDAB9078563412);
52 |     Kernel_Test<<<1, 1>>>(pInput);
53 | 
54 |     error = cudaFree(pInput);
55 |     CUDA_CHECK(error);
56 | 
57 |     return result == 0xEFCDAB9078563412;
58 | }


--------------------------------------------------------------------------------
/src/tests/test_pattern.cpp:
--------------------------------------------------------------------------------
 1 | #include "test_pattern.h"
 2 | 
 3 | #include <algorithm>
 4 | 
 5 | #include "tests/test_keeloq.h"
 6 | 
 7 | #include "bruteforce/bruteforce_config.h"
 8 | #include "bruteforce/bruteforce_pattern.h"
 9 | 
10 | #include "device/cuda_vector.h"
11 | 
12 | #include "algorithm/keeloq/keeloq_kernel.h"
13 | #include "algorithm/keeloq/keeloq_kernel_input.h"
14 | 
15 | #include "bruteforce/generators/generator_bruteforce.h"
16 | 
17 | 
18 | namespace
19 | {
20 | BruteforceConfig GetSingleKeyConfig(uint64_t key, bool rev = true)
21 | {
22 |     uint8_t* pKey = (uint8_t*)&key;
23 | 
24 |     std::vector<std::vector<uint8_t>> pattern =
25 |     {
26 |         { pKey[7] }, { pKey[6] }, { pKey[5] }, { pKey[4] }, { pKey[3] }, { pKey[2] }, { pKey[1] }, { pKey[0] },
27 |     };
28 | 
29 |     if (rev)
30 |     {
31 |         std::reverse(pattern.begin(), pattern.end());
32 |     }
33 | 
34 |     BruteforcePattern br_pattern(std::move(pattern), "Test");
35 |     return BruteforceConfig::GetPattern(Decryptor(0,0), br_pattern, 0xFFFFFFFF);
36 | }
37 | }
38 | 
39 | 
40 | bool tests::pattern_generation()
41 | {
42 |     constexpr auto NumBlocks = 64;
43 |     constexpr auto NumThreads = 64;
44 | 
45 |     const uint64_t debugKey = "hello_world"_u64;
46 | 
47 |     CudaVector<EncParcel> encrypted  = tests::keeloq::gen_inputs(debugKey);
48 | 
49 |     CudaVector<Decryptor> decryptors(NumBlocks * NumThreads);
50 |     CudaVector<SingleResult> results(decryptors.size() * encrypted.size());
51 | 
52 |     BruteforceConfig config = GetSingleKeyConfig(debugKey);
53 |     if (config.pattern.init(0).number() != debugKey)
54 |     {
55 |         assert(false);
56 |         return false;
57 |     }
58 | 
59 |     KeeloqKernelInput generatorInputs;
60 |     generatorInputs.encdata = encrypted.gpu();
61 |     generatorInputs.decryptors = decryptors.gpu();
62 |     generatorInputs.results = results.gpu();
63 |     generatorInputs.Initialize(config, KeeloqLearningType::full_mask());
64 | 
65 |     GeneratorBruteforce::PrepareDecryptors(generatorInputs, NumBlocks, NumThreads);
66 |     auto result = ::keeloq::kernels::cuda_brute(generatorInputs, NumBlocks, NumThreads);
67 | 
68 |     decryptors.read();
69 |     results.read();
70 |     assert(decryptors.cpu()[0].man() == debugKey);
71 | 
72 |     return decryptors.cpu()[0].man() == debugKey;
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/src/tests/test_pattern.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "common.h"
4 | 
5 | namespace tests
6 | {
7 |     bool pattern_generation();
8 | }


--------------------------------------------------------------------------------