├── vendor └── CMakeLists.txt ├── clean.bat ├── .gitmodules ├── .gitignore ├── README.md ├── appveyor.yml ├── .clang-format ├── LICENSE.txt ├── CMakeLists.txt ├── patterns ├── std_regex.cpp ├── baseline.cpp ├── cfx.cpp ├── brick.cpp ├── mrexodia.cpp ├── darth_ton.cpp └── forza.cpp ├── src ├── pattern_entry.cpp └── main.cpp └── include └── pattern_entry.h /vendor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(mem) 2 | add_subdirectory(fmt) 3 | -------------------------------------------------------------------------------- /clean.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rmdir /S /Q "build" 4 | rmdir /S /Q "lib" 5 | rmdir /S /Q "bin" 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/mem"] 2 | path = vendor/mem 3 | url = https://github.com/0x1F9F1/mem.git 4 | [submodule "vendor/fmt"] 5 | path = vendor/fmt 6 | url = https://github.com/fmtlib/fmt.git 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # User-specific files 2 | *.suo 3 | *.user 4 | *.userosscache 5 | *.sln.docstates 6 | 7 | # Build results 8 | [Bb]uild/ 9 | [Dd]ebug/ 10 | [Dd]ebugPublic/ 11 | [Rr]elease/ 12 | [Rr]eleases/ 13 | x64/ 14 | x86/ 15 | bld/ 16 | [Ll]ib/ 17 | [Bb]in/ 18 | [Oo]bj/ 19 | 20 | .vs/ 21 | .vscode/ 22 | build32/ 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pattern-bench 2 | 3 | [![Build status](https://ci.appveyor.com/api/projects/status/ns9iau87x4dbugif?svg=true)](https://ci.appveyor.com/project/0x1F9F1/pattern-bench) 4 | 5 | A randomized benchmark for pattern scanners. Also good at finding bugs. 6 | 7 | ## Leaderboard: 8 | 9 | Scanning file: witcher3.exe
10 | Begin Scan: Seed: 0x7FE81C77, Size: 0x2AF5000, Tests: 256, Skip Fails: true, Scanners: 8 11 | 12 | Name | Speed 13 | --- | --- 14 | mem::simd_scanner | 1929148619 cycles = 0.167 cycles/byte 15 | mem::boyer_moore_scanner | 11840465471 cycles = 1.027 cycles/byte 16 | DarthTon | 36983612762 cycles = 3.207 cycles/byte 17 | Simple | 39810411009 cycles = 3.452 cycles/byte 18 | CFX | 45554670556 cycles = 3.951 cycles/byte 19 | Forza (Boyer-Moore Variant) | failed 20 | mrexodia (horspool) | failed 21 | DarthTon v2 | failed 22 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 1.0.{build} 2 | build: 3 | verbosity: minimal 4 | 5 | environment: 6 | matrix: 7 | - APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu 8 | CMAKE_GENERATOR: Unix Makefiles 9 | 10 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 11 | CMAKE_GENERATOR: Visual Studio 15 2017 12 | 13 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 14 | CMAKE_GENERATOR: Visual Studio 15 2017 Win64 15 | 16 | before_build: 17 | - cmd: git submodule update --init --recursive 18 | - sh: git submodule update --init --recursive 19 | 20 | build_script: 21 | - cmd: mkdir build 22 | - cmd: cd build 23 | - cmd: cmake .. -G "%CMAKE_GENERATOR%" -DCMAKE_BUILD_TYPE=Release 24 | - cmd: cmake --build . --config Release 25 | - cmd: cd ../bin/Release 26 | - cmd: pattern-bench.exe --size=0x800000 --tests=512 --full 27 | 28 | - sh: mkdir build 29 | - sh: cd build 30 | - sh: cmake .. -G "$CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Release 31 | - sh: cmake --build . --config Release 32 | - sh: ../bin/pattern-bench --size=0x800000 --tests=512 --full 33 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: WebKit 2 | 3 | AlignEscapedNewlines: Left 4 | AlignTrailingComments: true 5 | AllowShortCaseLabelsOnASingleLine: true 6 | AllowShortFunctionsOnASingleLine: None 7 | AlwaysBreakTemplateDeclarations: Yes 8 | 9 | BreakBeforeBraces: Custom 10 | BraceWrapping: 11 | AfterClass: true 12 | AfterControlStatement: true 13 | AfterEnum: true 14 | AfterExternBlock: true 15 | AfterFunction: true 16 | AfterNamespace: true 17 | AfterStruct: true 18 | AfterUnion: true 19 | BeforeCatch: true 20 | BeforeElse: true 21 | IndentBraces: false 22 | SplitEmptyFunction: false 23 | SplitEmptyNamespace: true 24 | SplitEmptyRecord: false 25 | 26 | BreakBeforeBinaryOperators: None 27 | BreakInheritanceList: BeforeComma 28 | ColumnLimit: 120 29 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 30 | Cpp11BracedListStyle: true 31 | FixNamespaceComments: true 32 | IndentCaseLabels: true 33 | IndentPPDirectives: AfterHash 34 | KeepEmptyLinesAtTheStartOfBlocks: false 35 | NamespaceIndentation: All 36 | SpaceAfterCStyleCast: true 37 | TabWidth: 4 38 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 Brick 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 4 | and associated documentation files (the "Software"), to deal in the Software without restriction, 5 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 6 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 7 | furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or 10 | substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 13 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 14 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 15 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4 FATAL_ERROR) 2 | 3 | set(CMAKE_DISABLE_SOURCE_CHANGES ON) 4 | set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) 5 | 6 | project(pattern-bench CXX) 7 | 8 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") 9 | add_compile_options(/MP /EHa /arch:AVX2) 10 | else() 11 | add_compile_options(-mavx2) 12 | endif() 13 | 14 | add_executable(${PROJECT_NAME} 15 | src/main.cpp 16 | src/pattern_entry.cpp 17 | include/pattern_entry.h 18 | 19 | patterns/baseline.cpp 20 | patterns/brick.cpp 21 | patterns/cfx.cpp 22 | patterns/forza.cpp 23 | patterns/mrexodia.cpp 24 | patterns/darth_ton.cpp 25 | patterns/std_regex.cpp 26 | ) 27 | 28 | add_subdirectory(vendor EXCLUDE_FROM_ALL) 29 | 30 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) 31 | 32 | target_include_directories(${PROJECT_NAME} PRIVATE 33 | include) 34 | 35 | target_link_libraries(${PROJECT_NAME} 36 | mem fmt) 37 | 38 | set_target_properties(${PROJECT_NAME} PROPERTIES 39 | CXX_STANDARD 11 40 | ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin 41 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin 42 | RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin 43 | CXX_STANDARD_REQUIRED ON) 44 | -------------------------------------------------------------------------------- /patterns/std_regex.cpp: -------------------------------------------------------------------------------- 1 | #include "pattern_entry.h" 2 | 3 | #include 4 | #include 5 | 6 | struct std_regex_scanner : pattern_scanner 7 | { 8 | virtual std::vector Scan( 9 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 10 | { 11 | std::string pattern_str; 12 | 13 | static constexpr const char HexChars[] = "0123456789ABCDEF"; 14 | 15 | for (size_t i = 0, len = strlen(mask); i < len; ++i) 16 | { 17 | if (mask[i] != '?') 18 | { 19 | pattern_str += "\\x"; 20 | pattern_str += HexChars[pattern[i] >> 4]; 21 | pattern_str += HexChars[pattern[i] & 0xF]; 22 | } 23 | else 24 | { 25 | pattern_str += "[^]"; 26 | } 27 | } 28 | 29 | std::regex pattern_reg(pattern_str, std::regex_constants::optimize); 30 | 31 | std::vector results; 32 | std::cmatch cm; 33 | 34 | for (size_t i = 0; i < length; i += cm.position() + 1) 35 | { 36 | if (!std::regex_search((const char*) data + i, (const char*) data + length, cm, pattern_reg)) 37 | break; 38 | 39 | results.push_back(data + i + cm.position()); 40 | } 41 | 42 | return results; 43 | } 44 | 45 | virtual const char* GetName() const override 46 | { 47 | return "std::regex"; 48 | } 49 | }; 50 | 51 | // REGISTER_PATTERN(std_regex_scanner); 52 | -------------------------------------------------------------------------------- /patterns/baseline.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Brick 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 5 | and associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or 11 | substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | #include "pattern_entry.h" 21 | 22 | struct simple_pattern_scanner : pattern_scanner 23 | { 24 | virtual std::vector Scan( 25 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 26 | { 27 | return FindPatternSimple(data, length, pattern, mask); 28 | } 29 | 30 | virtual const char* GetName() const override 31 | { 32 | return "Simple"; 33 | } 34 | }; 35 | 36 | REGISTER_PATTERN(simple_pattern_scanner); -------------------------------------------------------------------------------- /patterns/cfx.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/citizenfx/fivem/blob/master/code/client/shared/Hooking.Patterns.cpp 2 | 3 | #include "pattern_entry.h" 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | struct cfx_pattern_scanner : pattern_scanner 11 | { 12 | virtual std::vector Scan( 13 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 14 | { 15 | size_t mask_size = strlen(mask); 16 | ptrdiff_t last[256]; 17 | 18 | const char* findWild = strrchr(mask, '?'); 19 | 20 | std::fill(std::begin(last), std::end(last), findWild ? (findWild - mask) : -1); 21 | 22 | for (ptrdiff_t i = 0; i < static_cast(mask_size); ++i) 23 | { 24 | if (last[pattern[i]] < i) 25 | { 26 | last[pattern[i]] = i; 27 | } 28 | } 29 | 30 | std::vector results; 31 | 32 | for (const byte *i = data, *end = data + length - mask_size; i <= end;) 33 | { 34 | ptrdiff_t j = mask_size - 1; 35 | 36 | while ((j >= 0) && (mask[j] == '?' || pattern[j] == i[j])) 37 | j--; 38 | 39 | if (j < 0) 40 | { 41 | results.emplace_back(i); 42 | 43 | i++; 44 | } 45 | else 46 | { 47 | i += std::max((ptrdiff_t) 1, j - last[i[j]]); 48 | } 49 | } 50 | 51 | return results; 52 | } 53 | 54 | virtual const char* GetName() const override 55 | { 56 | return "CFX"; 57 | } 58 | }; 59 | 60 | REGISTER_PATTERN(cfx_pattern_scanner); 61 | -------------------------------------------------------------------------------- /src/pattern_entry.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Brick 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 5 | and associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or 11 | substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | #include "pattern_entry.h" 21 | 22 | std::vector> PATTERN_SCANNERS; 23 | 24 | std::vector FindPatternSimple(const byte* data, size_t length, const byte* pattern, const char* masks) 25 | { 26 | size_t pattern_length = strlen(masks); 27 | 28 | if (pattern_length > length) 29 | { 30 | return {}; 31 | } 32 | 33 | std::vector results; 34 | 35 | length -= pattern_length; 36 | 37 | for (size_t i = 0; i <= length; ++i) 38 | { 39 | bool found = true; 40 | 41 | for (size_t j = 0; j < pattern_length; ++j) 42 | { 43 | if ((data[i + j] != pattern[j]) && (masks[j] != '?')) 44 | { 45 | found = false; 46 | 47 | break; 48 | } 49 | } 50 | 51 | if (found) 52 | { 53 | results.push_back(data + i); 54 | } 55 | } 56 | 57 | return results; 58 | } 59 | -------------------------------------------------------------------------------- /include/pattern_entry.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Brick 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 5 | and associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or 11 | substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | using mem::byte; 29 | 30 | struct pattern_scanner 31 | { 32 | uint64_t Elapsed {0}; 33 | size_t Failed {0}; 34 | 35 | virtual ~pattern_scanner() = default; 36 | 37 | virtual std::vector Scan( 38 | const byte* pattern, const char* mask, const byte* data, size_t length) const = 0; 39 | virtual const char* GetName() const = 0; 40 | }; 41 | 42 | extern std::vector> PATTERN_SCANNERS; 43 | 44 | #define REGISTER_PATTERN__(CLASS, LINE) \ 45 | static mem::init_function DO_REGISTER_PATTERN_##LINE \ 46 | { \ 47 | [] { PATTERN_SCANNERS.emplace_back(new CLASS()); } \ 48 | } 49 | #define REGISTER_PATTERN_(CLASS, LINE) REGISTER_PATTERN__(CLASS, LINE) 50 | #define REGISTER_PATTERN(CLASS) REGISTER_PATTERN_(CLASS, __LINE__) 51 | 52 | std::vector FindPatternSimple(const byte* data, size_t length, const byte* pattern, const char* masks); 53 | -------------------------------------------------------------------------------- /patterns/brick.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Brick 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 5 | and associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or 11 | substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | #include "pattern_entry.h" 21 | 22 | #include 23 | 24 | #include 25 | 26 | struct mem_boyer_moore_pattern_scanner : pattern_scanner 27 | { 28 | virtual std::vector Scan( 29 | const byte* bytes, const char* mask, const byte* data, size_t length) const override 30 | { 31 | mem::pattern pattern(bytes, mask); 32 | mem::boyer_moore_scanner scanner(pattern); 33 | 34 | std::vector results; 35 | 36 | scanner({data, length}, [&](mem::pointer result) { 37 | results.push_back(result.as()); 38 | 39 | return false; 40 | }); 41 | 42 | return results; 43 | } 44 | 45 | virtual const char* GetName() const override 46 | { 47 | return "mem::boyer_moore_scanner"; 48 | } 49 | }; 50 | 51 | REGISTER_PATTERN(mem_boyer_moore_pattern_scanner); 52 | 53 | #include 54 | 55 | struct mem_simd_pattern_scanner : pattern_scanner 56 | { 57 | virtual std::vector Scan( 58 | const byte* bytes, const char* mask, const byte* data, size_t length) const override 59 | { 60 | mem::pattern pattern(bytes, mask); 61 | mem::simd_scanner scanner(pattern); 62 | 63 | std::vector results; 64 | 65 | scanner({data, length}, [&](mem::pointer result) { 66 | results.push_back(result.as()); 67 | 68 | return false; 69 | }); 70 | 71 | return results; 72 | } 73 | 74 | virtual const char* GetName() const override 75 | { 76 | return "mem::simd_scanner"; 77 | } 78 | }; 79 | 80 | REGISTER_PATTERN(mem_simd_pattern_scanner); -------------------------------------------------------------------------------- /patterns/mrexodia.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/mrexodia_horspool.h 2 | 3 | #include "pattern_entry.h" 4 | 5 | // based on: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm 6 | 7 | bool matches(const byte haystack_ch, const byte needle_ch, const byte wildcard) 8 | { 9 | return needle_ch == wildcard || haystack_ch == needle_ch; 10 | } 11 | 12 | std::vector boyermoore_horspool_memmem( 13 | const byte* haystack, size_t hlen, const byte* needle, size_t nlen, const byte wildcard = '\0') 14 | { 15 | size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called: bad character shift */ 16 | 17 | /* Sanity checks on the parameters */ 18 | if (nlen <= 0 || !haystack || !needle) 19 | return {}; 20 | 21 | /* ---- Preprocess ---- */ 22 | /* Initialize the table to default value */ 23 | /* When a character is encountered that does not occur 24 | * in the needle, we can safely skip ahead for the whole 25 | * length of the needle. 26 | */ 27 | for (size_t scan = 0; scan <= UCHAR_MAX; scan = scan + 1) 28 | { 29 | bad_char_skip[scan] = nlen; 30 | } 31 | 32 | /* C arrays have the first byte at [0], therefore: 33 | * [nlen - 1] is the last byte of the array. */ 34 | size_t last = nlen - 1; 35 | 36 | /* Then populate it with the analysis of the needle */ 37 | for (size_t scan = 0; scan < last; scan = scan + 1) 38 | { 39 | byte needleByte = needle[scan]; 40 | bad_char_skip[needleByte] = last - scan; 41 | } 42 | 43 | /* ---- Do the matching ---- */ 44 | 45 | std::vector results; 46 | 47 | /* Search the haystack, while the needle can still be within it. */ 48 | while (hlen >= nlen) 49 | { 50 | /* scan from the end of the needle */ 51 | for (size_t scan = last; matches(haystack[scan], needle[scan], wildcard); scan = scan - 1) 52 | { 53 | if (scan == 0) /* If the first byte matches, we've found it. */ 54 | results.push_back(haystack); 55 | } 56 | 57 | /* otherwise, we need to skip some bytes and start again. 58 | Note that here we are getting the skip value based on the last byte 59 | of needle, no matter where we didn't match. So if needle is: "abcd" 60 | then we are skipping based on 'd' and that value will be 4, and 61 | for "abcdd" we again skip on 'd' but the value will be only 1. 62 | The alternative of pretending that the mismatched character was 63 | the last character is slower in the normal case (E.g. finding 64 | "abcd" in "...azcd..." gives 4 by using 'd' but only 65 | 4-2==2 using 'z'. */ 66 | byte lastByte = haystack[last]; 67 | hlen -= bad_char_skip[lastByte]; 68 | haystack += bad_char_skip[lastByte]; 69 | } 70 | 71 | return results; 72 | } 73 | 74 | struct mrexodia_pattern_scanner : pattern_scanner 75 | { 76 | virtual std::vector Scan( 77 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 78 | { 79 | return boyermoore_horspool_memmem(data, length, pattern, strlen(mask), 0); 80 | } 81 | 82 | virtual const char* GetName() const override 83 | { 84 | return "mrexodia (horspool)"; 85 | } 86 | }; 87 | 88 | REGISTER_PATTERN(mrexodia_pattern_scanner); 89 | -------------------------------------------------------------------------------- /patterns/darth_ton.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/DarthTon.h 2 | 3 | #include "pattern_entry.h" 4 | 5 | #include 6 | 7 | #define min(a, b) (((a) < (b)) ? (a) : (b)) 8 | 9 | // Boyer-Moore-Horspool with wildcards implementation 10 | void FillShiftTable(const uint8_t* pPattern, size_t patternSize, const char* pMask, size_t* bad_char_skip) 11 | { 12 | size_t idx = 0; 13 | size_t last = patternSize - 1; 14 | 15 | // Get last wildcard position 16 | for (idx = last; idx > 0 && pMask[idx] != '?'; --idx) 17 | ; 18 | size_t diff = last - idx; 19 | if (diff == 0) 20 | diff = 1; 21 | 22 | // Prepare shift table 23 | for (idx = 0; idx <= UCHAR_MAX; ++idx) 24 | bad_char_skip[idx] = diff; 25 | for (idx = last - diff; idx < last; ++idx) 26 | bad_char_skip[pPattern[idx]] = last - idx; 27 | } 28 | 29 | std::vector Search(const uint8_t* pScanPos, size_t scanSize, const uint8_t* pPattern, const char* pMask) 30 | { 31 | size_t patternSize = strlen(pMask); 32 | 33 | size_t bad_char_skip[UCHAR_MAX + 1]; 34 | const uint8_t* scanEnd = pScanPos + scanSize - patternSize; 35 | intptr_t last = static_cast(patternSize) - 1; 36 | 37 | FillShiftTable(pPattern, patternSize, pMask, bad_char_skip); 38 | 39 | std::vector results; 40 | 41 | // Search 42 | for (; pScanPos <= scanEnd; pScanPos += bad_char_skip[pScanPos[last]]) 43 | { 44 | for (intptr_t idx = last; idx >= 0; --idx) 45 | if (pMask[idx] != '?' && pScanPos[idx] != pPattern[idx]) 46 | goto skip; 47 | else if (idx == 0) 48 | results.push_back(pScanPos); 49 | skip:; 50 | } 51 | 52 | return results; 53 | } 54 | 55 | struct PartData 56 | { 57 | int32_t mask = 0; 58 | __m128i needle; // C2797: list initialization inside member initializer list or non-static data member initializer 59 | // is not implemented 60 | 61 | PartData() 62 | { 63 | memset(&needle, 0, sizeof(needle)); 64 | } 65 | }; 66 | 67 | std::vector Search2(const uint8_t* data, const uint32_t size, const uint8_t* pattern, const char* mask) 68 | { 69 | auto len = strlen(mask); 70 | auto first = strchr(mask, '?'); 71 | size_t len2 = (first != nullptr) ? (first - mask) : len; 72 | auto firstlen = min(len2, 16); 73 | intptr_t num_parts = (len < 16 || len % 16) ? (len / 16 + 1) : (len / 16); 74 | PartData parts[4]; 75 | 76 | for (intptr_t i = 0; i < num_parts; ++i, len -= 16) 77 | { 78 | for (size_t j = 0; j < min(len, 16) - 1; ++j) 79 | if (mask[16 * i + j] == 'x') 80 | parts[i].mask |= (1 << j); 81 | 82 | parts[i].needle = _mm_loadu_si128((const __m128i*) (pattern + i * 16)); 83 | } 84 | 85 | std::vector results; 86 | 87 | for (intptr_t i = 0; i < static_cast(size) / 32 - 1; ++i) 88 | { 89 | // auto block = _mm256_loadu_si256( (const __m256i*)data + i ); 90 | // if (_mm256_testz_si256( block, block )) 91 | // continue; 92 | 93 | auto offset = _mm_cmpestri( 94 | parts->needle, firstlen, _mm_loadu_si128((const __m128i*) (data + i * 32)), 16, _SIDD_CMP_EQUAL_ORDERED); 95 | if (offset == 16) 96 | { 97 | offset += _mm_cmpestri(parts->needle, firstlen, _mm_loadu_si128((const __m128i*) (data + i * 32 + 16)), 16, 98 | _SIDD_CMP_EQUAL_ORDERED); 99 | if (offset == 32) 100 | continue; 101 | } 102 | 103 | for (intptr_t j = 0; j < num_parts; ++j) 104 | { 105 | auto hay = _mm_loadu_si128((const __m128i*) (data + (2 * i + j) * 16 + offset)); 106 | auto bitmask = _mm_movemask_epi8(_mm_cmpeq_epi8(hay, parts[j].needle)); 107 | if ((bitmask & parts[j].mask) != parts[j].mask) 108 | goto next; 109 | } 110 | 111 | results.push_back(data + 32 * i + offset); 112 | 113 | next:; 114 | } 115 | 116 | return results; 117 | } 118 | 119 | struct darth_ton_pattern_scanner : pattern_scanner 120 | { 121 | virtual std::vector Scan( 122 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 123 | { 124 | return Search(data, length, pattern, mask); 125 | } 126 | 127 | virtual const char* GetName() const override 128 | { 129 | return "DarthTon"; 130 | } 131 | }; 132 | 133 | REGISTER_PATTERN(darth_ton_pattern_scanner); 134 | 135 | struct darth_ton2_pattern_scanner : pattern_scanner 136 | { 137 | virtual std::vector Scan( 138 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 139 | { 140 | return Search2(data, length, pattern, mask); 141 | } 142 | 143 | virtual const char* GetName() const override 144 | { 145 | return "DarthTon v2"; 146 | } 147 | }; 148 | 149 | REGISTER_PATTERN(darth_ton2_pattern_scanner); 150 | -------------------------------------------------------------------------------- /patterns/forza.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/Forza.h 2 | 3 | #include "pattern_entry.h" 4 | 5 | #include 6 | 7 | struct PatternData 8 | { 9 | uint32_t Count; 10 | uint32_t Size; 11 | uint32_t Length[16]; 12 | uint32_t Skip[16]; 13 | __m128i Value[16]; 14 | }; 15 | 16 | void GeneratePattern(const char* Signature, const char* Mask, PatternData* Out) 17 | { 18 | auto l = strlen(Mask); 19 | 20 | Out->Count = 0; 21 | 22 | for (auto i = 0; i < l; i++) 23 | { 24 | if (Mask[i] == '?') 25 | continue; 26 | 27 | auto ml = 0, sl = 0; 28 | 29 | for (auto j = i; j < l; j++) 30 | { 31 | if (Mask[j] == '?' || sl >= 16) 32 | break; 33 | sl++; 34 | } 35 | 36 | for (auto j = i + sl; j < l; j++) 37 | { 38 | if (Mask[j] != '?') 39 | break; 40 | ml++; 41 | } 42 | 43 | auto c = Out->Count; 44 | 45 | Out->Length[c] = sl; 46 | Out->Skip[c] = sl + ml; 47 | Out->Value[c] = _mm_loadu_si128((const __m128i*) ((uint8_t*) Signature + i)); 48 | 49 | Out->Count++; 50 | 51 | i += sl - 1; 52 | } 53 | 54 | Out->Size = l; 55 | } 56 | 57 | MEM_STRONG_INLINE bool Matches(const uint8_t* Data, PatternData* Patterns) 58 | { 59 | auto k = Data + Patterns->Skip[0]; 60 | 61 | for (auto i = 1; i < Patterns->Count; i++) 62 | { 63 | auto l = Patterns->Length[i]; 64 | 65 | if (_mm_cmpestri(Patterns->Value[i], l, _mm_loadu_si128((const __m128i*) k), l, 66 | _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY) != l) 67 | break; 68 | 69 | if (i + 1 == Patterns->Count) 70 | return true; 71 | 72 | k += Patterns->Skip[i]; 73 | } 74 | 75 | return false; 76 | } 77 | 78 | std::vector FindEx(const uint8_t* Data, const uint32_t Length, const char* Signature, const char* Mask) 79 | { 80 | PatternData d; 81 | GeneratePattern(Signature, Mask, &d); 82 | 83 | auto out = static_cast(nullptr); 84 | auto end = Data + Length - d.Size; 85 | 86 | std::vector results; 87 | 88 | // C3010: 'break' : jump out of OpenMP structured block not allowed 89 | for (intptr_t i = Length - 32; i >= 0; i -= 32) 90 | { 91 | auto p = Data + i; 92 | auto b = _mm256_loadu_si256((const __m256i*) p); 93 | 94 | // if (_mm256_test_all_zeros(b, b) == 1) 95 | // continue; 96 | 97 | auto f = _mm_cmpestri(d.Value[0], d.Length[0], _mm256_extractf128_si256(b, 0), 16, _SIDD_CMP_EQUAL_ORDERED); 98 | 99 | if (f == 16) 100 | { 101 | f += _mm_cmpestri(d.Value[0], d.Length[0], _mm256_extractf128_si256(b, 1), 16, _SIDD_CMP_EQUAL_ORDERED); 102 | 103 | if (f == 32) 104 | continue; 105 | } 106 | 107 | PossibleMatch: 108 | p += f; 109 | 110 | if (p + d.Size > end) 111 | { 112 | for (auto j = 0; j < d.Size && j + i + f < Length; j++) 113 | { 114 | if (Mask[j] == 'x' && (uint8_t) Signature[j] != p[j]) 115 | break; 116 | 117 | if (j + 1 == d.Size) 118 | results.push_back(p); 119 | } 120 | 121 | continue; 122 | } 123 | 124 | if (Matches(p, &d)) 125 | results.push_back(p); 126 | 127 | p++; 128 | f = _mm_cmpestri(d.Value[0], d.Length[0], _mm_loadu_si128((const __m128i*) p), 16, _SIDD_CMP_EQUAL_ORDERED); 129 | 130 | if (f < 16) 131 | goto PossibleMatch; 132 | } 133 | 134 | return results; 135 | } 136 | 137 | void FindLargestArray(const char* Signature, const char* Mask, int Out[2]) 138 | { 139 | uint32_t t1 = 0; 140 | uint32_t t2 = strlen(Signature); 141 | uint32_t len = strlen(Mask); 142 | 143 | for (auto j = t2; j < len; j++) 144 | { 145 | if (Mask[j] != 'x') 146 | continue; 147 | 148 | auto find = strrchr(&Mask[j], '?'); 149 | 150 | auto count = find ? (find - &Mask[j]) : (len - j); 151 | 152 | if (count > t2) 153 | { 154 | t1 = j; 155 | t2 = count; 156 | } 157 | 158 | j += (count - 1); 159 | } 160 | 161 | Out[0] = t1; 162 | Out[1] = t2; 163 | } 164 | 165 | std::vector Find(const byte* Data, const uint32_t Length, const char* Signature, const char* Mask) 166 | { 167 | int d[2] = {0}; 168 | FindLargestArray(Signature, Mask, d); 169 | 170 | const uint8_t len = static_cast(strlen(Mask)); 171 | const uint8_t mbeg = static_cast(d[0]); 172 | const uint8_t mlen = static_cast(d[1]); 173 | const uint8_t mfirst = static_cast(Signature[mbeg]); 174 | 175 | uint8_t wildcard[UCHAR_MAX + 1] = {0}; 176 | 177 | for (auto i = mbeg; i < mbeg + mlen; i++) 178 | wildcard[(uint8_t) Signature[i]] = 1; 179 | 180 | std::vector results; 181 | 182 | for (int i = Length - len; i >= 0; i--) 183 | { 184 | auto c = Data[i]; 185 | auto w = wildcard[c]; 186 | auto k = 0; 187 | 188 | while (w == 0 && i > mlen) 189 | { 190 | i -= mlen; 191 | w = wildcard[Data[i]]; 192 | k = 1; 193 | } 194 | 195 | if (k == 1) 196 | { 197 | i++; 198 | continue; 199 | } 200 | 201 | if (c != mfirst) 202 | continue; 203 | 204 | if (i - mbeg < 0 || i - mbeg + len > Length) 205 | break; 206 | 207 | for (auto j = 0; j < len - 1; j++) 208 | { 209 | if (j == mbeg || Mask[j] != 'x') 210 | continue; 211 | 212 | if (Data[i - mbeg + j] != (uint8_t) Signature[j]) 213 | break; 214 | 215 | if (j + 1 == len - 1) 216 | results.push_back((uint8_t*) (Data + i - mbeg)); 217 | } 218 | } 219 | 220 | return results; 221 | } 222 | 223 | struct forza_pattern_scanner : pattern_scanner 224 | { 225 | virtual std::vector Scan( 226 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 227 | { 228 | return Find(data, length, (const char*) pattern, mask); 229 | } 230 | 231 | virtual const char* GetName() const override 232 | { 233 | return "Forza (Boyer-Moore Variant)"; 234 | } 235 | }; 236 | 237 | REGISTER_PATTERN(forza_pattern_scanner); 238 | 239 | struct forza_simd_pattern_scanner : pattern_scanner 240 | { 241 | virtual std::vector Scan( 242 | const byte* pattern, const char* mask, const byte* data, size_t length) const override 243 | { 244 | return FindEx(data, length, (const char*) pattern, mask); 245 | } 246 | 247 | virtual const char* GetName() const override 248 | { 249 | return "Forza (SIMD)"; 250 | } 251 | }; 252 | 253 | // REGISTER_PATTERN(forza_simd_pattern_scanner); -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 Brick 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 5 | and associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, 7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or 11 | substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | 33 | #include 34 | 35 | #include 36 | 37 | #include 38 | #include 39 | 40 | #include 41 | 42 | #include 43 | 44 | #include "pattern_entry.h" 45 | 46 | static size_t LOG_LEVEL = 0; 47 | 48 | using mem::byte; 49 | 50 | mem::byte_buffer read_file(const char* path) 51 | { 52 | std::ifstream input(path, std::ifstream::binary | std::ifstream::ate); 53 | 54 | size_t length = static_cast(input.tellg()); 55 | 56 | input.seekg(0); 57 | 58 | mem::byte_buffer result(length); 59 | 60 | if (!input.read(reinterpret_cast(result.data()), result.size())) 61 | { 62 | result.reset(); 63 | } 64 | 65 | return result; 66 | } 67 | 68 | struct scan_bench 69 | { 70 | private: 71 | byte* raw_data_ {nullptr}; 72 | size_t raw_size_ {0}; 73 | 74 | byte* full_data_ {nullptr}; 75 | size_t full_size_ {0}; 76 | 77 | byte* data_ {nullptr}; 78 | size_t size_ {0}; 79 | 80 | uint32_t seed_ {0}; 81 | std::mt19937 rng_ {}; 82 | 83 | std::vector pattern_; 84 | std::string masks_; 85 | std::unordered_set expected_; 86 | 87 | public: 88 | scan_bench(uint32_t seed) 89 | : seed_(seed) 90 | , rng_(seed_) 91 | {} 92 | 93 | scan_bench(const scan_bench&) = delete; 94 | scan_bench(scan_bench&&) = delete; 95 | 96 | ~scan_bench() 97 | { 98 | mem::protect_free(raw_data_, raw_size_); 99 | } 100 | 101 | void reset(size_t region_size) 102 | { 103 | reset(nullptr, region_size); 104 | } 105 | 106 | void reset(const char* file_name) 107 | { 108 | mem::byte_buffer region_data = read_file(file_name); 109 | 110 | reset(region_data.data(), region_data.size()); 111 | } 112 | 113 | void reset(const byte* region_data, size_t region_size) 114 | { 115 | size_t page_size = mem::page_size(); 116 | 117 | full_size_ = (region_size + page_size - 1) / page_size * page_size; 118 | 119 | raw_size_ = full_size_ + (page_size * 2); 120 | raw_data_ = static_cast(mem::protect_alloc(raw_size_, mem::prot_flags::RW)); 121 | 122 | full_data_ = raw_data_ + page_size; 123 | 124 | mem::protect_modify(raw_data_, page_size, mem::prot_flags::NONE); 125 | mem::protect_modify(raw_data_ + raw_size_ - page_size, page_size, mem::prot_flags::NONE); 126 | 127 | if (region_data) 128 | { 129 | size_t extra = (full_size_ - region_size); 130 | 131 | std::memset(full_data_, 0, extra); 132 | std::memcpy(full_data_ + extra, region_data, region_size); 133 | } 134 | else 135 | { 136 | std::uniform_int_distribution byte_dist(0, 0xFF); 137 | 138 | std::generate_n(full_data_, full_size_, [&] { return (byte) byte_dist(rng_); }); 139 | } 140 | } 141 | 142 | size_t full_size() const noexcept 143 | { 144 | return full_size_; 145 | } 146 | 147 | const byte* data() const noexcept 148 | { 149 | return data_; 150 | } 151 | 152 | size_t size() const noexcept 153 | { 154 | return size_; 155 | } 156 | 157 | const byte* pattern() const noexcept 158 | { 159 | return pattern_.data(); 160 | } 161 | 162 | const char* masks() const noexcept 163 | { 164 | return masks_.data(); 165 | } 166 | 167 | uint32_t seed() const noexcept 168 | { 169 | return seed_; 170 | } 171 | 172 | std::unordered_set shift_results(const std::vector& results) 173 | { 174 | std::unordered_set shifted; 175 | 176 | for (const byte* result : results) 177 | { 178 | shifted.emplace(result - data()); 179 | } 180 | 181 | return shifted; 182 | } 183 | 184 | void generate() 185 | { 186 | std::uniform_int_distribution size_dist(0, 100); 187 | 188 | size_t variation = size_dist(rng_); 189 | 190 | data_ = full_data_ + variation; 191 | size_ = full_size_ - variation; 192 | 193 | std::uniform_int_distribution byte_dist(0, 0xFF); 194 | 195 | std::uniform_int_distribution length_dist(5, 32); 196 | 197 | size_t pattern_length = length_dist(rng_); 198 | 199 | pattern_.resize(pattern_length); 200 | masks_.resize(pattern_length); 201 | 202 | std::bernoulli_distribution mask_dist(0.9); 203 | 204 | bool all_masks = true; 205 | 206 | do 207 | { 208 | for (size_t i = 0; i < pattern_length; ++i) 209 | { 210 | if (mask_dist(rng_)) 211 | { 212 | pattern_[i] = (char) byte_dist(rng_); 213 | masks_[i] = 'x'; 214 | 215 | all_masks = false; 216 | } 217 | else 218 | { 219 | pattern_[i] = 0x00; 220 | masks_[i] = '?'; 221 | } 222 | } 223 | } while (all_masks); 224 | 225 | std::uniform_int_distribution count_dist(2, 10); 226 | 227 | size_t result_count = count_dist(rng_); 228 | 229 | std::uniform_int_distribution range_dist(0, size() - pattern_.size()); 230 | 231 | for (size_t i = 0; i < result_count; ++i) 232 | { 233 | size_t offset = range_dist(rng_); 234 | 235 | for (size_t j = 0; j < pattern_.size(); ++j) 236 | { 237 | if (masks_[j] != '?') 238 | data_[offset + j] = pattern_[j]; 239 | } 240 | } 241 | 242 | expected_ = shift_results(FindPatternSimple(data(), size(), pattern(), masks())); 243 | } 244 | 245 | bool check_results(const pattern_scanner& scanner, const std::vector& results) 246 | { 247 | std::unordered_set shifted = shift_results(results); 248 | 249 | if (shifted.size() != expected_.size()) 250 | { 251 | if (LOG_LEVEL > 2) 252 | fmt::print( 253 | "{0:<32} - Got {1} results, Expected {2}\n", scanner.GetName(), shifted.size(), expected_.size()); 254 | 255 | if (LOG_LEVEL > 3) 256 | { 257 | fmt::print("Got:\n"); 258 | 259 | for (size_t v : shifted) 260 | fmt::print("> 0x{0:X}\n", v); 261 | 262 | fmt::print("Expected:\n"); 263 | 264 | for (size_t v : expected_) 265 | fmt::print("> 0x{0:X}\n", v); 266 | } 267 | 268 | return false; 269 | } 270 | 271 | for (size_t result : shifted) 272 | { 273 | if (expected_.find(result) == expected_.end()) 274 | { 275 | if (LOG_LEVEL > 2) 276 | fmt::print("{0:<32} - Wasn't expecting 0x{1:X}\n", scanner.GetName(), result); 277 | 278 | return false; 279 | } 280 | } 281 | 282 | return true; 283 | } 284 | }; 285 | 286 | static mem::cmd_param cmd_region_size {"size"}; 287 | static mem::cmd_param cmd_test_count {"tests"}; 288 | static mem::cmd_param cmd_rng_seed {"seed"}; 289 | static mem::cmd_param cmd_test_file {"file"}; 290 | static mem::cmd_param cmd_log_level {"loglevel"}; 291 | static mem::cmd_param cmd_full_scan {"full"}; 292 | static mem::cmd_param cmd_filter {"filter"}; 293 | static mem::cmd_param cmd_test_index {"test"}; 294 | 295 | int main(int argc, char** argv) 296 | { 297 | #if defined(_WIN32) 298 | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); 299 | #endif 300 | 301 | mem::init_function::init(); 302 | mem::cmd_param::init(argc, argv); 303 | 304 | LOG_LEVEL = cmd_log_level.get_or(0); 305 | 306 | const char* filter = cmd_filter.get(); 307 | 308 | if (filter) 309 | { 310 | fmt::print("Filter: {}\n", filter); 311 | 312 | auto iter = PATTERN_SCANNERS.begin(); 313 | 314 | while (iter != PATTERN_SCANNERS.end()) 315 | { 316 | const char* name = (*iter)->GetName(); 317 | 318 | if (std::strstr(name, filter)) 319 | { 320 | ++iter; 321 | } 322 | else 323 | { 324 | iter = PATTERN_SCANNERS.erase(iter); 325 | } 326 | } 327 | } 328 | 329 | if (PATTERN_SCANNERS.empty()) 330 | { 331 | fmt::print("No Scanners\n"); 332 | 333 | return 1; 334 | } 335 | 336 | uint32_t seed = 0; 337 | 338 | if (!cmd_rng_seed.get(seed)) 339 | { 340 | seed = std::random_device {}(); 341 | } 342 | 343 | scan_bench reg(seed); 344 | 345 | if (const char* file_name = cmd_test_file.get()) 346 | { 347 | fmt::print("Scanning file: {}\n", file_name); 348 | 349 | reg.reset(file_name); 350 | } 351 | else 352 | { 353 | size_t region_size = cmd_region_size.get_or(32 * 1024 * 1024); 354 | 355 | if (region_size == 0) 356 | { 357 | fmt::print("Invalid region size\n"); 358 | 359 | std::abort(); 360 | } 361 | 362 | fmt::print("Scanning random data\n"); 363 | 364 | reg.reset(region_size); 365 | } 366 | 367 | const size_t test_count = cmd_test_count.get_or(256); 368 | const bool skip_fails = !cmd_full_scan.get(); 369 | 370 | const size_t test_index = cmd_test_index.get_or(SIZE_MAX); 371 | 372 | fmt::print("Begin Scan: Seed: 0x{0:08X}, Size: 0x{1:X}, Tests: {2}, Skip Fails: {3}, Scanners: {4}\n", reg.seed(), 373 | reg.full_size(), test_count, skip_fails, PATTERN_SCANNERS.size()); 374 | 375 | mem::execution_handler handler; 376 | 377 | for (size_t i = 0; i < test_count; ++i) 378 | { 379 | reg.generate(); 380 | 381 | if (test_index != SIZE_MAX && i != test_index) 382 | continue; 383 | 384 | if (LOG_LEVEL > 0) 385 | { 386 | if (!(i % 25)) 387 | fmt::print("{}/{}...\n", i, test_count); 388 | } 389 | 390 | for (auto& pattern : PATTERN_SCANNERS) 391 | { 392 | if (skip_fails && pattern->Failed != 0) 393 | continue; 394 | 395 | uint64_t start_clock = mem::rdtsc(); 396 | 397 | try 398 | { 399 | std::vector results = 400 | handler.execute([&] { return pattern->Scan(reg.pattern(), reg.masks(), reg.data(), reg.size()); }); 401 | 402 | if (!reg.check_results(*pattern, results)) 403 | { 404 | if (LOG_LEVEL > 1) 405 | fmt::print("{0:<32} - Failed test {1} ({2}, {3})\n", pattern->GetName(), i, 406 | mem::as_hex({reg.pattern(), strlen(reg.masks())}), reg.masks()); 407 | 408 | pattern->Failed++; 409 | } 410 | } 411 | catch (const std::exception& ex) 412 | { 413 | if (LOG_LEVEL > 0) 414 | fmt::print("{0:<32} - Failed test {1}: {2}\n", pattern->GetName(), i, ex.what()); 415 | 416 | pattern->Failed++; 417 | } 418 | catch (...) 419 | { 420 | if (LOG_LEVEL > 0) 421 | fmt::print("{0:<32} - Failed test {1} (Exception)\n", pattern->GetName(), i); 422 | 423 | pattern->Failed++; 424 | } 425 | 426 | uint64_t end_clock = mem::rdtsc(); 427 | 428 | pattern->Elapsed += end_clock - start_clock; 429 | } 430 | } 431 | 432 | std::sort(PATTERN_SCANNERS.begin(), PATTERN_SCANNERS.end(), 433 | [](const std::unique_ptr& lhs, const std::unique_ptr& rhs) { 434 | if ((lhs->Failed != 0) != (rhs->Failed != 0)) 435 | return lhs->Failed < rhs->Failed; 436 | 437 | return lhs->Elapsed < rhs->Elapsed; 438 | }); 439 | 440 | fmt::print("End Scan\n\n"); 441 | 442 | const uint64_t total_scan_length = static_cast(reg.full_size()) * test_count; 443 | 444 | double best_perf = 0.0f; 445 | 446 | for (size_t i = 0; i < PATTERN_SCANNERS.size(); ++i) 447 | { 448 | const auto& pattern = *PATTERN_SCANNERS[i]; 449 | 450 | fmt::print("{:<32} | ", pattern.GetName()); 451 | 452 | double cycles_per_byte = double(pattern.Elapsed) / total_scan_length; 453 | 454 | if (i == 0) 455 | best_perf = cycles_per_byte; 456 | 457 | double normalized_perf = cycles_per_byte / best_perf; 458 | 459 | if (skip_fails && pattern.Failed) 460 | { 461 | fmt::print("failed"); 462 | } 463 | else 464 | { 465 | fmt::print( 466 | "{:>12} cycles = {:>6.3f} cycles/byte | {:>5.2f}x", pattern.Elapsed, cycles_per_byte, normalized_perf); 467 | 468 | if (!skip_fails) 469 | { 470 | fmt::print(" | {} failed", pattern.Failed); 471 | } 472 | } 473 | 474 | fmt::print("\n"); 475 | } 476 | } 477 | --------------------------------------------------------------------------------