├── vendor
└── CMakeLists.txt
├── clean.bat
├── .gitmodules
├── .gitignore
├── README.md
├── appveyor.yml
├── .clang-format
├── LICENSE.txt
├── CMakeLists.txt
├── patterns
├── std_regex.cpp
├── baseline.cpp
├── cfx.cpp
├── brick.cpp
├── mrexodia.cpp
├── darth_ton.cpp
└── forza.cpp
├── src
├── pattern_entry.cpp
└── main.cpp
└── include
└── pattern_entry.h
/vendor/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(mem)
2 | add_subdirectory(fmt)
3 |
--------------------------------------------------------------------------------
/clean.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 |
3 | rmdir /S /Q "build"
4 | rmdir /S /Q "lib"
5 | rmdir /S /Q "bin"
6 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vendor/mem"]
2 | path = vendor/mem
3 | url = https://github.com/0x1F9F1/mem.git
4 | [submodule "vendor/fmt"]
5 | path = vendor/fmt
6 | url = https://github.com/fmtlib/fmt.git
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # User-specific files
2 | *.suo
3 | *.user
4 | *.userosscache
5 | *.sln.docstates
6 |
7 | # Build results
8 | [Bb]uild/
9 | [Dd]ebug/
10 | [Dd]ebugPublic/
11 | [Rr]elease/
12 | [Rr]eleases/
13 | x64/
14 | x86/
15 | bld/
16 | [Ll]ib/
17 | [Bb]in/
18 | [Oo]bj/
19 |
20 | .vs/
21 | .vscode/
22 | build32/
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pattern-bench
2 |
3 | [](https://ci.appveyor.com/project/0x1F9F1/pattern-bench)
4 |
5 | A randomized benchmark for pattern scanners. Also good at finding bugs.
6 |
7 | ## Leaderboard:
8 |
9 | Scanning file: witcher3.exe
10 | Begin Scan: Seed: 0x7FE81C77, Size: 0x2AF5000, Tests: 256, Skip Fails: true, Scanners: 8
11 |
12 | Name | Speed
13 | --- | ---
14 | mem::simd_scanner | 1929148619 cycles = 0.167 cycles/byte
15 | mem::boyer_moore_scanner | 11840465471 cycles = 1.027 cycles/byte
16 | DarthTon | 36983612762 cycles = 3.207 cycles/byte
17 | Simple | 39810411009 cycles = 3.452 cycles/byte
18 | CFX | 45554670556 cycles = 3.951 cycles/byte
19 | Forza (Boyer-Moore Variant) | failed
20 | mrexodia (horspool) | failed
21 | DarthTon v2 | failed
22 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | version: 1.0.{build}
2 | build:
3 | verbosity: minimal
4 |
5 | environment:
6 | matrix:
7 | - APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu
8 | CMAKE_GENERATOR: Unix Makefiles
9 |
10 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
11 | CMAKE_GENERATOR: Visual Studio 15 2017
12 |
13 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
14 | CMAKE_GENERATOR: Visual Studio 15 2017 Win64
15 |
16 | before_build:
17 | - cmd: git submodule update --init --recursive
18 | - sh: git submodule update --init --recursive
19 |
20 | build_script:
21 | - cmd: mkdir build
22 | - cmd: cd build
23 | - cmd: cmake .. -G "%CMAKE_GENERATOR%" -DCMAKE_BUILD_TYPE=Release
24 | - cmd: cmake --build . --config Release
25 | - cmd: cd ../bin/Release
26 | - cmd: pattern-bench.exe --size=0x800000 --tests=512 --full
27 |
28 | - sh: mkdir build
29 | - sh: cd build
30 | - sh: cmake .. -G "$CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Release
31 | - sh: cmake --build . --config Release
32 | - sh: ../bin/pattern-bench --size=0x800000 --tests=512 --full
33 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: WebKit
2 |
3 | AlignEscapedNewlines: Left
4 | AlignTrailingComments: true
5 | AllowShortCaseLabelsOnASingleLine: true
6 | AllowShortFunctionsOnASingleLine: None
7 | AlwaysBreakTemplateDeclarations: Yes
8 |
9 | BreakBeforeBraces: Custom
10 | BraceWrapping:
11 | AfterClass: true
12 | AfterControlStatement: true
13 | AfterEnum: true
14 | AfterExternBlock: true
15 | AfterFunction: true
16 | AfterNamespace: true
17 | AfterStruct: true
18 | AfterUnion: true
19 | BeforeCatch: true
20 | BeforeElse: true
21 | IndentBraces: false
22 | SplitEmptyFunction: false
23 | SplitEmptyNamespace: true
24 | SplitEmptyRecord: false
25 |
26 | BreakBeforeBinaryOperators: None
27 | BreakInheritanceList: BeforeComma
28 | ColumnLimit: 120
29 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
30 | Cpp11BracedListStyle: true
31 | FixNamespaceComments: true
32 | IndentCaseLabels: true
33 | IndentPPDirectives: AfterHash
34 | KeepEmptyLinesAtTheStartOfBlocks: false
35 | NamespaceIndentation: All
36 | SpaceAfterCStyleCast: true
37 | TabWidth: 4
38 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2018 Brick
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
4 | and associated documentation files (the "Software"), to deal in the Software without restriction,
5 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
6 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
7 | furnished to do so, subject to the following conditions:
8 |
9 | The above copyright notice and this permission notice shall be included in all copies or
10 | substantial portions of the Software.
11 |
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
13 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
14 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
15 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.4 FATAL_ERROR)
2 |
3 | set(CMAKE_DISABLE_SOURCE_CHANGES ON)
4 | set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
5 |
6 | project(pattern-bench CXX)
7 |
8 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
9 | add_compile_options(/MP /EHa /arch:AVX2)
10 | else()
11 | add_compile_options(-mavx2)
12 | endif()
13 |
14 | add_executable(${PROJECT_NAME}
15 | src/main.cpp
16 | src/pattern_entry.cpp
17 | include/pattern_entry.h
18 |
19 | patterns/baseline.cpp
20 | patterns/brick.cpp
21 | patterns/cfx.cpp
22 | patterns/forza.cpp
23 | patterns/mrexodia.cpp
24 | patterns/darth_ton.cpp
25 | patterns/std_regex.cpp
26 | )
27 |
28 | add_subdirectory(vendor EXCLUDE_FROM_ALL)
29 |
30 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
31 |
32 | target_include_directories(${PROJECT_NAME} PRIVATE
33 | include)
34 |
35 | target_link_libraries(${PROJECT_NAME}
36 | mem fmt)
37 |
38 | set_target_properties(${PROJECT_NAME} PROPERTIES
39 | CXX_STANDARD 11
40 | ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin
41 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin
42 | RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin
43 | CXX_STANDARD_REQUIRED ON)
44 |
--------------------------------------------------------------------------------
/patterns/std_regex.cpp:
--------------------------------------------------------------------------------
1 | #include "pattern_entry.h"
2 |
3 | #include
4 | #include
5 |
6 | struct std_regex_scanner : pattern_scanner
7 | {
8 | virtual std::vector Scan(
9 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
10 | {
11 | std::string pattern_str;
12 |
13 | static constexpr const char HexChars[] = "0123456789ABCDEF";
14 |
15 | for (size_t i = 0, len = strlen(mask); i < len; ++i)
16 | {
17 | if (mask[i] != '?')
18 | {
19 | pattern_str += "\\x";
20 | pattern_str += HexChars[pattern[i] >> 4];
21 | pattern_str += HexChars[pattern[i] & 0xF];
22 | }
23 | else
24 | {
25 | pattern_str += "[^]";
26 | }
27 | }
28 |
29 | std::regex pattern_reg(pattern_str, std::regex_constants::optimize);
30 |
31 | std::vector results;
32 | std::cmatch cm;
33 |
34 | for (size_t i = 0; i < length; i += cm.position() + 1)
35 | {
36 | if (!std::regex_search((const char*) data + i, (const char*) data + length, cm, pattern_reg))
37 | break;
38 |
39 | results.push_back(data + i + cm.position());
40 | }
41 |
42 | return results;
43 | }
44 |
45 | virtual const char* GetName() const override
46 | {
47 | return "std::regex";
48 | }
49 | };
50 |
51 | // REGISTER_PATTERN(std_regex_scanner);
52 |
--------------------------------------------------------------------------------
/patterns/baseline.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2018 Brick
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the "Software"), to deal in the Software without restriction,
6 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | */
19 |
20 | #include "pattern_entry.h"
21 |
22 | struct simple_pattern_scanner : pattern_scanner
23 | {
24 | virtual std::vector Scan(
25 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
26 | {
27 | return FindPatternSimple(data, length, pattern, mask);
28 | }
29 |
30 | virtual const char* GetName() const override
31 | {
32 | return "Simple";
33 | }
34 | };
35 |
36 | REGISTER_PATTERN(simple_pattern_scanner);
--------------------------------------------------------------------------------
/patterns/cfx.cpp:
--------------------------------------------------------------------------------
1 | // https://github.com/citizenfx/fivem/blob/master/code/client/shared/Hooking.Patterns.cpp
2 |
3 | #include "pattern_entry.h"
4 |
5 | #include
6 | #include
7 |
8 | #include
9 |
10 | struct cfx_pattern_scanner : pattern_scanner
11 | {
12 | virtual std::vector Scan(
13 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
14 | {
15 | size_t mask_size = strlen(mask);
16 | ptrdiff_t last[256];
17 |
18 | const char* findWild = strrchr(mask, '?');
19 |
20 | std::fill(std::begin(last), std::end(last), findWild ? (findWild - mask) : -1);
21 |
22 | for (ptrdiff_t i = 0; i < static_cast(mask_size); ++i)
23 | {
24 | if (last[pattern[i]] < i)
25 | {
26 | last[pattern[i]] = i;
27 | }
28 | }
29 |
30 | std::vector results;
31 |
32 | for (const byte *i = data, *end = data + length - mask_size; i <= end;)
33 | {
34 | ptrdiff_t j = mask_size - 1;
35 |
36 | while ((j >= 0) && (mask[j] == '?' || pattern[j] == i[j]))
37 | j--;
38 |
39 | if (j < 0)
40 | {
41 | results.emplace_back(i);
42 |
43 | i++;
44 | }
45 | else
46 | {
47 | i += std::max((ptrdiff_t) 1, j - last[i[j]]);
48 | }
49 | }
50 |
51 | return results;
52 | }
53 |
54 | virtual const char* GetName() const override
55 | {
56 | return "CFX";
57 | }
58 | };
59 |
60 | REGISTER_PATTERN(cfx_pattern_scanner);
61 |
--------------------------------------------------------------------------------
/src/pattern_entry.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2018 Brick
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the "Software"), to deal in the Software without restriction,
6 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | */
19 |
20 | #include "pattern_entry.h"
21 |
22 | std::vector> PATTERN_SCANNERS;
23 |
24 | std::vector FindPatternSimple(const byte* data, size_t length, const byte* pattern, const char* masks)
25 | {
26 | size_t pattern_length = strlen(masks);
27 |
28 | if (pattern_length > length)
29 | {
30 | return {};
31 | }
32 |
33 | std::vector results;
34 |
35 | length -= pattern_length;
36 |
37 | for (size_t i = 0; i <= length; ++i)
38 | {
39 | bool found = true;
40 |
41 | for (size_t j = 0; j < pattern_length; ++j)
42 | {
43 | if ((data[i + j] != pattern[j]) && (masks[j] != '?'))
44 | {
45 | found = false;
46 |
47 | break;
48 | }
49 | }
50 |
51 | if (found)
52 | {
53 | results.push_back(data + i);
54 | }
55 | }
56 |
57 | return results;
58 | }
59 |
--------------------------------------------------------------------------------
/include/pattern_entry.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2018 Brick
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the "Software"), to deal in the Software without restriction,
6 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | */
19 |
20 | #pragma once
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | using mem::byte;
29 |
30 | struct pattern_scanner
31 | {
32 | uint64_t Elapsed {0};
33 | size_t Failed {0};
34 |
35 | virtual ~pattern_scanner() = default;
36 |
37 | virtual std::vector Scan(
38 | const byte* pattern, const char* mask, const byte* data, size_t length) const = 0;
39 | virtual const char* GetName() const = 0;
40 | };
41 |
42 | extern std::vector> PATTERN_SCANNERS;
43 |
44 | #define REGISTER_PATTERN__(CLASS, LINE) \
45 | static mem::init_function DO_REGISTER_PATTERN_##LINE \
46 | { \
47 | [] { PATTERN_SCANNERS.emplace_back(new CLASS()); } \
48 | }
49 | #define REGISTER_PATTERN_(CLASS, LINE) REGISTER_PATTERN__(CLASS, LINE)
50 | #define REGISTER_PATTERN(CLASS) REGISTER_PATTERN_(CLASS, __LINE__)
51 |
52 | std::vector FindPatternSimple(const byte* data, size_t length, const byte* pattern, const char* masks);
53 |
--------------------------------------------------------------------------------
/patterns/brick.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2018 Brick
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the "Software"), to deal in the Software without restriction,
6 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | */
19 |
20 | #include "pattern_entry.h"
21 |
22 | #include
23 |
24 | #include
25 |
26 | struct mem_boyer_moore_pattern_scanner : pattern_scanner
27 | {
28 | virtual std::vector Scan(
29 | const byte* bytes, const char* mask, const byte* data, size_t length) const override
30 | {
31 | mem::pattern pattern(bytes, mask);
32 | mem::boyer_moore_scanner scanner(pattern);
33 |
34 | std::vector results;
35 |
36 | scanner({data, length}, [&](mem::pointer result) {
37 | results.push_back(result.as());
38 |
39 | return false;
40 | });
41 |
42 | return results;
43 | }
44 |
45 | virtual const char* GetName() const override
46 | {
47 | return "mem::boyer_moore_scanner";
48 | }
49 | };
50 |
51 | REGISTER_PATTERN(mem_boyer_moore_pattern_scanner);
52 |
53 | #include
54 |
55 | struct mem_simd_pattern_scanner : pattern_scanner
56 | {
57 | virtual std::vector Scan(
58 | const byte* bytes, const char* mask, const byte* data, size_t length) const override
59 | {
60 | mem::pattern pattern(bytes, mask);
61 | mem::simd_scanner scanner(pattern);
62 |
63 | std::vector results;
64 |
65 | scanner({data, length}, [&](mem::pointer result) {
66 | results.push_back(result.as());
67 |
68 | return false;
69 | });
70 |
71 | return results;
72 | }
73 |
74 | virtual const char* GetName() const override
75 | {
76 | return "mem::simd_scanner";
77 | }
78 | };
79 |
80 | REGISTER_PATTERN(mem_simd_pattern_scanner);
--------------------------------------------------------------------------------
/patterns/mrexodia.cpp:
--------------------------------------------------------------------------------
1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/mrexodia_horspool.h
2 |
3 | #include "pattern_entry.h"
4 |
5 | // based on: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
6 |
7 | bool matches(const byte haystack_ch, const byte needle_ch, const byte wildcard)
8 | {
9 | return needle_ch == wildcard || haystack_ch == needle_ch;
10 | }
11 |
12 | std::vector boyermoore_horspool_memmem(
13 | const byte* haystack, size_t hlen, const byte* needle, size_t nlen, const byte wildcard = '\0')
14 | {
15 | size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called: bad character shift */
16 |
17 | /* Sanity checks on the parameters */
18 | if (nlen <= 0 || !haystack || !needle)
19 | return {};
20 |
21 | /* ---- Preprocess ---- */
22 | /* Initialize the table to default value */
23 | /* When a character is encountered that does not occur
24 | * in the needle, we can safely skip ahead for the whole
25 | * length of the needle.
26 | */
27 | for (size_t scan = 0; scan <= UCHAR_MAX; scan = scan + 1)
28 | {
29 | bad_char_skip[scan] = nlen;
30 | }
31 |
32 | /* C arrays have the first byte at [0], therefore:
33 | * [nlen - 1] is the last byte of the array. */
34 | size_t last = nlen - 1;
35 |
36 | /* Then populate it with the analysis of the needle */
37 | for (size_t scan = 0; scan < last; scan = scan + 1)
38 | {
39 | byte needleByte = needle[scan];
40 | bad_char_skip[needleByte] = last - scan;
41 | }
42 |
43 | /* ---- Do the matching ---- */
44 |
45 | std::vector results;
46 |
47 | /* Search the haystack, while the needle can still be within it. */
48 | while (hlen >= nlen)
49 | {
50 | /* scan from the end of the needle */
51 | for (size_t scan = last; matches(haystack[scan], needle[scan], wildcard); scan = scan - 1)
52 | {
53 | if (scan == 0) /* If the first byte matches, we've found it. */
54 | results.push_back(haystack);
55 | }
56 |
57 | /* otherwise, we need to skip some bytes and start again.
58 | Note that here we are getting the skip value based on the last byte
59 | of needle, no matter where we didn't match. So if needle is: "abcd"
60 | then we are skipping based on 'd' and that value will be 4, and
61 | for "abcdd" we again skip on 'd' but the value will be only 1.
62 | The alternative of pretending that the mismatched character was
63 | the last character is slower in the normal case (E.g. finding
64 | "abcd" in "...azcd..." gives 4 by using 'd' but only
65 | 4-2==2 using 'z'. */
66 | byte lastByte = haystack[last];
67 | hlen -= bad_char_skip[lastByte];
68 | haystack += bad_char_skip[lastByte];
69 | }
70 |
71 | return results;
72 | }
73 |
74 | struct mrexodia_pattern_scanner : pattern_scanner
75 | {
76 | virtual std::vector Scan(
77 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
78 | {
79 | return boyermoore_horspool_memmem(data, length, pattern, strlen(mask), 0);
80 | }
81 |
82 | virtual const char* GetName() const override
83 | {
84 | return "mrexodia (horspool)";
85 | }
86 | };
87 |
88 | REGISTER_PATTERN(mrexodia_pattern_scanner);
89 |
--------------------------------------------------------------------------------
/patterns/darth_ton.cpp:
--------------------------------------------------------------------------------
1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/DarthTon.h
2 |
3 | #include "pattern_entry.h"
4 |
5 | #include
6 |
7 | #define min(a, b) (((a) < (b)) ? (a) : (b))
8 |
9 | // Boyer-Moore-Horspool with wildcards implementation
10 | void FillShiftTable(const uint8_t* pPattern, size_t patternSize, const char* pMask, size_t* bad_char_skip)
11 | {
12 | size_t idx = 0;
13 | size_t last = patternSize - 1;
14 |
15 | // Get last wildcard position
16 | for (idx = last; idx > 0 && pMask[idx] != '?'; --idx)
17 | ;
18 | size_t diff = last - idx;
19 | if (diff == 0)
20 | diff = 1;
21 |
22 | // Prepare shift table
23 | for (idx = 0; idx <= UCHAR_MAX; ++idx)
24 | bad_char_skip[idx] = diff;
25 | for (idx = last - diff; idx < last; ++idx)
26 | bad_char_skip[pPattern[idx]] = last - idx;
27 | }
28 |
29 | std::vector Search(const uint8_t* pScanPos, size_t scanSize, const uint8_t* pPattern, const char* pMask)
30 | {
31 | size_t patternSize = strlen(pMask);
32 |
33 | size_t bad_char_skip[UCHAR_MAX + 1];
34 | const uint8_t* scanEnd = pScanPos + scanSize - patternSize;
35 | intptr_t last = static_cast(patternSize) - 1;
36 |
37 | FillShiftTable(pPattern, patternSize, pMask, bad_char_skip);
38 |
39 | std::vector results;
40 |
41 | // Search
42 | for (; pScanPos <= scanEnd; pScanPos += bad_char_skip[pScanPos[last]])
43 | {
44 | for (intptr_t idx = last; idx >= 0; --idx)
45 | if (pMask[idx] != '?' && pScanPos[idx] != pPattern[idx])
46 | goto skip;
47 | else if (idx == 0)
48 | results.push_back(pScanPos);
49 | skip:;
50 | }
51 |
52 | return results;
53 | }
54 |
55 | struct PartData
56 | {
57 | int32_t mask = 0;
58 | __m128i needle; // C2797: list initialization inside member initializer list or non-static data member initializer
59 | // is not implemented
60 |
61 | PartData()
62 | {
63 | memset(&needle, 0, sizeof(needle));
64 | }
65 | };
66 |
67 | std::vector Search2(const uint8_t* data, const uint32_t size, const uint8_t* pattern, const char* mask)
68 | {
69 | auto len = strlen(mask);
70 | auto first = strchr(mask, '?');
71 | size_t len2 = (first != nullptr) ? (first - mask) : len;
72 | auto firstlen = min(len2, 16);
73 | intptr_t num_parts = (len < 16 || len % 16) ? (len / 16 + 1) : (len / 16);
74 | PartData parts[4];
75 |
76 | for (intptr_t i = 0; i < num_parts; ++i, len -= 16)
77 | {
78 | for (size_t j = 0; j < min(len, 16) - 1; ++j)
79 | if (mask[16 * i + j] == 'x')
80 | parts[i].mask |= (1 << j);
81 |
82 | parts[i].needle = _mm_loadu_si128((const __m128i*) (pattern + i * 16));
83 | }
84 |
85 | std::vector results;
86 |
87 | for (intptr_t i = 0; i < static_cast(size) / 32 - 1; ++i)
88 | {
89 | // auto block = _mm256_loadu_si256( (const __m256i*)data + i );
90 | // if (_mm256_testz_si256( block, block ))
91 | // continue;
92 |
93 | auto offset = _mm_cmpestri(
94 | parts->needle, firstlen, _mm_loadu_si128((const __m128i*) (data + i * 32)), 16, _SIDD_CMP_EQUAL_ORDERED);
95 | if (offset == 16)
96 | {
97 | offset += _mm_cmpestri(parts->needle, firstlen, _mm_loadu_si128((const __m128i*) (data + i * 32 + 16)), 16,
98 | _SIDD_CMP_EQUAL_ORDERED);
99 | if (offset == 32)
100 | continue;
101 | }
102 |
103 | for (intptr_t j = 0; j < num_parts; ++j)
104 | {
105 | auto hay = _mm_loadu_si128((const __m128i*) (data + (2 * i + j) * 16 + offset));
106 | auto bitmask = _mm_movemask_epi8(_mm_cmpeq_epi8(hay, parts[j].needle));
107 | if ((bitmask & parts[j].mask) != parts[j].mask)
108 | goto next;
109 | }
110 |
111 | results.push_back(data + 32 * i + offset);
112 |
113 | next:;
114 | }
115 |
116 | return results;
117 | }
118 |
119 | struct darth_ton_pattern_scanner : pattern_scanner
120 | {
121 | virtual std::vector Scan(
122 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
123 | {
124 | return Search(data, length, pattern, mask);
125 | }
126 |
127 | virtual const char* GetName() const override
128 | {
129 | return "DarthTon";
130 | }
131 | };
132 |
133 | REGISTER_PATTERN(darth_ton_pattern_scanner);
134 |
135 | struct darth_ton2_pattern_scanner : pattern_scanner
136 | {
137 | virtual std::vector Scan(
138 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
139 | {
140 | return Search2(data, length, pattern, mask);
141 | }
142 |
143 | virtual const char* GetName() const override
144 | {
145 | return "DarthTon v2";
146 | }
147 | };
148 |
149 | REGISTER_PATTERN(darth_ton2_pattern_scanner);
150 |
--------------------------------------------------------------------------------
/patterns/forza.cpp:
--------------------------------------------------------------------------------
1 | // https://github.com/learn-more/findpattern-bench/blob/master/patterns/Forza.h
2 |
3 | #include "pattern_entry.h"
4 |
5 | #include
6 |
7 | struct PatternData
8 | {
9 | uint32_t Count;
10 | uint32_t Size;
11 | uint32_t Length[16];
12 | uint32_t Skip[16];
13 | __m128i Value[16];
14 | };
15 |
16 | void GeneratePattern(const char* Signature, const char* Mask, PatternData* Out)
17 | {
18 | auto l = strlen(Mask);
19 |
20 | Out->Count = 0;
21 |
22 | for (auto i = 0; i < l; i++)
23 | {
24 | if (Mask[i] == '?')
25 | continue;
26 |
27 | auto ml = 0, sl = 0;
28 |
29 | for (auto j = i; j < l; j++)
30 | {
31 | if (Mask[j] == '?' || sl >= 16)
32 | break;
33 | sl++;
34 | }
35 |
36 | for (auto j = i + sl; j < l; j++)
37 | {
38 | if (Mask[j] != '?')
39 | break;
40 | ml++;
41 | }
42 |
43 | auto c = Out->Count;
44 |
45 | Out->Length[c] = sl;
46 | Out->Skip[c] = sl + ml;
47 | Out->Value[c] = _mm_loadu_si128((const __m128i*) ((uint8_t*) Signature + i));
48 |
49 | Out->Count++;
50 |
51 | i += sl - 1;
52 | }
53 |
54 | Out->Size = l;
55 | }
56 |
57 | MEM_STRONG_INLINE bool Matches(const uint8_t* Data, PatternData* Patterns)
58 | {
59 | auto k = Data + Patterns->Skip[0];
60 |
61 | for (auto i = 1; i < Patterns->Count; i++)
62 | {
63 | auto l = Patterns->Length[i];
64 |
65 | if (_mm_cmpestri(Patterns->Value[i], l, _mm_loadu_si128((const __m128i*) k), l,
66 | _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY) != l)
67 | break;
68 |
69 | if (i + 1 == Patterns->Count)
70 | return true;
71 |
72 | k += Patterns->Skip[i];
73 | }
74 |
75 | return false;
76 | }
77 |
78 | std::vector FindEx(const uint8_t* Data, const uint32_t Length, const char* Signature, const char* Mask)
79 | {
80 | PatternData d;
81 | GeneratePattern(Signature, Mask, &d);
82 |
83 | auto out = static_cast(nullptr);
84 | auto end = Data + Length - d.Size;
85 |
86 | std::vector results;
87 |
88 | // C3010: 'break' : jump out of OpenMP structured block not allowed
89 | for (intptr_t i = Length - 32; i >= 0; i -= 32)
90 | {
91 | auto p = Data + i;
92 | auto b = _mm256_loadu_si256((const __m256i*) p);
93 |
94 | // if (_mm256_test_all_zeros(b, b) == 1)
95 | // continue;
96 |
97 | auto f = _mm_cmpestri(d.Value[0], d.Length[0], _mm256_extractf128_si256(b, 0), 16, _SIDD_CMP_EQUAL_ORDERED);
98 |
99 | if (f == 16)
100 | {
101 | f += _mm_cmpestri(d.Value[0], d.Length[0], _mm256_extractf128_si256(b, 1), 16, _SIDD_CMP_EQUAL_ORDERED);
102 |
103 | if (f == 32)
104 | continue;
105 | }
106 |
107 | PossibleMatch:
108 | p += f;
109 |
110 | if (p + d.Size > end)
111 | {
112 | for (auto j = 0; j < d.Size && j + i + f < Length; j++)
113 | {
114 | if (Mask[j] == 'x' && (uint8_t) Signature[j] != p[j])
115 | break;
116 |
117 | if (j + 1 == d.Size)
118 | results.push_back(p);
119 | }
120 |
121 | continue;
122 | }
123 |
124 | if (Matches(p, &d))
125 | results.push_back(p);
126 |
127 | p++;
128 | f = _mm_cmpestri(d.Value[0], d.Length[0], _mm_loadu_si128((const __m128i*) p), 16, _SIDD_CMP_EQUAL_ORDERED);
129 |
130 | if (f < 16)
131 | goto PossibleMatch;
132 | }
133 |
134 | return results;
135 | }
136 |
137 | void FindLargestArray(const char* Signature, const char* Mask, int Out[2])
138 | {
139 | uint32_t t1 = 0;
140 | uint32_t t2 = strlen(Signature);
141 | uint32_t len = strlen(Mask);
142 |
143 | for (auto j = t2; j < len; j++)
144 | {
145 | if (Mask[j] != 'x')
146 | continue;
147 |
148 | auto find = strrchr(&Mask[j], '?');
149 |
150 | auto count = find ? (find - &Mask[j]) : (len - j);
151 |
152 | if (count > t2)
153 | {
154 | t1 = j;
155 | t2 = count;
156 | }
157 |
158 | j += (count - 1);
159 | }
160 |
161 | Out[0] = t1;
162 | Out[1] = t2;
163 | }
164 |
165 | std::vector Find(const byte* Data, const uint32_t Length, const char* Signature, const char* Mask)
166 | {
167 | int d[2] = {0};
168 | FindLargestArray(Signature, Mask, d);
169 |
170 | const uint8_t len = static_cast(strlen(Mask));
171 | const uint8_t mbeg = static_cast(d[0]);
172 | const uint8_t mlen = static_cast(d[1]);
173 | const uint8_t mfirst = static_cast(Signature[mbeg]);
174 |
175 | uint8_t wildcard[UCHAR_MAX + 1] = {0};
176 |
177 | for (auto i = mbeg; i < mbeg + mlen; i++)
178 | wildcard[(uint8_t) Signature[i]] = 1;
179 |
180 | std::vector results;
181 |
182 | for (int i = Length - len; i >= 0; i--)
183 | {
184 | auto c = Data[i];
185 | auto w = wildcard[c];
186 | auto k = 0;
187 |
188 | while (w == 0 && i > mlen)
189 | {
190 | i -= mlen;
191 | w = wildcard[Data[i]];
192 | k = 1;
193 | }
194 |
195 | if (k == 1)
196 | {
197 | i++;
198 | continue;
199 | }
200 |
201 | if (c != mfirst)
202 | continue;
203 |
204 | if (i - mbeg < 0 || i - mbeg + len > Length)
205 | break;
206 |
207 | for (auto j = 0; j < len - 1; j++)
208 | {
209 | if (j == mbeg || Mask[j] != 'x')
210 | continue;
211 |
212 | if (Data[i - mbeg + j] != (uint8_t) Signature[j])
213 | break;
214 |
215 | if (j + 1 == len - 1)
216 | results.push_back((uint8_t*) (Data + i - mbeg));
217 | }
218 | }
219 |
220 | return results;
221 | }
222 |
223 | struct forza_pattern_scanner : pattern_scanner
224 | {
225 | virtual std::vector Scan(
226 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
227 | {
228 | return Find(data, length, (const char*) pattern, mask);
229 | }
230 |
231 | virtual const char* GetName() const override
232 | {
233 | return "Forza (Boyer-Moore Variant)";
234 | }
235 | };
236 |
237 | REGISTER_PATTERN(forza_pattern_scanner);
238 |
239 | struct forza_simd_pattern_scanner : pattern_scanner
240 | {
241 | virtual std::vector Scan(
242 | const byte* pattern, const char* mask, const byte* data, size_t length) const override
243 | {
244 | return FindEx(data, length, (const char*) pattern, mask);
245 | }
246 |
247 | virtual const char* GetName() const override
248 | {
249 | return "Forza (SIMD)";
250 | }
251 | };
252 |
253 | // REGISTER_PATTERN(forza_simd_pattern_scanner);
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2018 Brick
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the "Software"), to deal in the Software without restriction,
6 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
7 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
14 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | */
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | #include
31 | #include
32 |
33 | #include
34 |
35 | #include
36 |
37 | #include
38 | #include
39 |
40 | #include
41 |
42 | #include
43 |
44 | #include "pattern_entry.h"
45 |
46 | static size_t LOG_LEVEL = 0;
47 |
48 | using mem::byte;
49 |
50 | mem::byte_buffer read_file(const char* path)
51 | {
52 | std::ifstream input(path, std::ifstream::binary | std::ifstream::ate);
53 |
54 | size_t length = static_cast(input.tellg());
55 |
56 | input.seekg(0);
57 |
58 | mem::byte_buffer result(length);
59 |
60 | if (!input.read(reinterpret_cast(result.data()), result.size()))
61 | {
62 | result.reset();
63 | }
64 |
65 | return result;
66 | }
67 |
68 | struct scan_bench
69 | {
70 | private:
71 | byte* raw_data_ {nullptr};
72 | size_t raw_size_ {0};
73 |
74 | byte* full_data_ {nullptr};
75 | size_t full_size_ {0};
76 |
77 | byte* data_ {nullptr};
78 | size_t size_ {0};
79 |
80 | uint32_t seed_ {0};
81 | std::mt19937 rng_ {};
82 |
83 | std::vector pattern_;
84 | std::string masks_;
85 | std::unordered_set expected_;
86 |
87 | public:
88 | scan_bench(uint32_t seed)
89 | : seed_(seed)
90 | , rng_(seed_)
91 | {}
92 |
93 | scan_bench(const scan_bench&) = delete;
94 | scan_bench(scan_bench&&) = delete;
95 |
96 | ~scan_bench()
97 | {
98 | mem::protect_free(raw_data_, raw_size_);
99 | }
100 |
101 | void reset(size_t region_size)
102 | {
103 | reset(nullptr, region_size);
104 | }
105 |
106 | void reset(const char* file_name)
107 | {
108 | mem::byte_buffer region_data = read_file(file_name);
109 |
110 | reset(region_data.data(), region_data.size());
111 | }
112 |
113 | void reset(const byte* region_data, size_t region_size)
114 | {
115 | size_t page_size = mem::page_size();
116 |
117 | full_size_ = (region_size + page_size - 1) / page_size * page_size;
118 |
119 | raw_size_ = full_size_ + (page_size * 2);
120 | raw_data_ = static_cast(mem::protect_alloc(raw_size_, mem::prot_flags::RW));
121 |
122 | full_data_ = raw_data_ + page_size;
123 |
124 | mem::protect_modify(raw_data_, page_size, mem::prot_flags::NONE);
125 | mem::protect_modify(raw_data_ + raw_size_ - page_size, page_size, mem::prot_flags::NONE);
126 |
127 | if (region_data)
128 | {
129 | size_t extra = (full_size_ - region_size);
130 |
131 | std::memset(full_data_, 0, extra);
132 | std::memcpy(full_data_ + extra, region_data, region_size);
133 | }
134 | else
135 | {
136 | std::uniform_int_distribution byte_dist(0, 0xFF);
137 |
138 | std::generate_n(full_data_, full_size_, [&] { return (byte) byte_dist(rng_); });
139 | }
140 | }
141 |
142 | size_t full_size() const noexcept
143 | {
144 | return full_size_;
145 | }
146 |
147 | const byte* data() const noexcept
148 | {
149 | return data_;
150 | }
151 |
152 | size_t size() const noexcept
153 | {
154 | return size_;
155 | }
156 |
157 | const byte* pattern() const noexcept
158 | {
159 | return pattern_.data();
160 | }
161 |
162 | const char* masks() const noexcept
163 | {
164 | return masks_.data();
165 | }
166 |
167 | uint32_t seed() const noexcept
168 | {
169 | return seed_;
170 | }
171 |
172 | std::unordered_set shift_results(const std::vector& results)
173 | {
174 | std::unordered_set shifted;
175 |
176 | for (const byte* result : results)
177 | {
178 | shifted.emplace(result - data());
179 | }
180 |
181 | return shifted;
182 | }
183 |
184 | void generate()
185 | {
186 | std::uniform_int_distribution size_dist(0, 100);
187 |
188 | size_t variation = size_dist(rng_);
189 |
190 | data_ = full_data_ + variation;
191 | size_ = full_size_ - variation;
192 |
193 | std::uniform_int_distribution byte_dist(0, 0xFF);
194 |
195 | std::uniform_int_distribution length_dist(5, 32);
196 |
197 | size_t pattern_length = length_dist(rng_);
198 |
199 | pattern_.resize(pattern_length);
200 | masks_.resize(pattern_length);
201 |
202 | std::bernoulli_distribution mask_dist(0.9);
203 |
204 | bool all_masks = true;
205 |
206 | do
207 | {
208 | for (size_t i = 0; i < pattern_length; ++i)
209 | {
210 | if (mask_dist(rng_))
211 | {
212 | pattern_[i] = (char) byte_dist(rng_);
213 | masks_[i] = 'x';
214 |
215 | all_masks = false;
216 | }
217 | else
218 | {
219 | pattern_[i] = 0x00;
220 | masks_[i] = '?';
221 | }
222 | }
223 | } while (all_masks);
224 |
225 | std::uniform_int_distribution count_dist(2, 10);
226 |
227 | size_t result_count = count_dist(rng_);
228 |
229 | std::uniform_int_distribution range_dist(0, size() - pattern_.size());
230 |
231 | for (size_t i = 0; i < result_count; ++i)
232 | {
233 | size_t offset = range_dist(rng_);
234 |
235 | for (size_t j = 0; j < pattern_.size(); ++j)
236 | {
237 | if (masks_[j] != '?')
238 | data_[offset + j] = pattern_[j];
239 | }
240 | }
241 |
242 | expected_ = shift_results(FindPatternSimple(data(), size(), pattern(), masks()));
243 | }
244 |
245 | bool check_results(const pattern_scanner& scanner, const std::vector& results)
246 | {
247 | std::unordered_set shifted = shift_results(results);
248 |
249 | if (shifted.size() != expected_.size())
250 | {
251 | if (LOG_LEVEL > 2)
252 | fmt::print(
253 | "{0:<32} - Got {1} results, Expected {2}\n", scanner.GetName(), shifted.size(), expected_.size());
254 |
255 | if (LOG_LEVEL > 3)
256 | {
257 | fmt::print("Got:\n");
258 |
259 | for (size_t v : shifted)
260 | fmt::print("> 0x{0:X}\n", v);
261 |
262 | fmt::print("Expected:\n");
263 |
264 | for (size_t v : expected_)
265 | fmt::print("> 0x{0:X}\n", v);
266 | }
267 |
268 | return false;
269 | }
270 |
271 | for (size_t result : shifted)
272 | {
273 | if (expected_.find(result) == expected_.end())
274 | {
275 | if (LOG_LEVEL > 2)
276 | fmt::print("{0:<32} - Wasn't expecting 0x{1:X}\n", scanner.GetName(), result);
277 |
278 | return false;
279 | }
280 | }
281 |
282 | return true;
283 | }
284 | };
285 |
286 | static mem::cmd_param cmd_region_size {"size"};
287 | static mem::cmd_param cmd_test_count {"tests"};
288 | static mem::cmd_param cmd_rng_seed {"seed"};
289 | static mem::cmd_param cmd_test_file {"file"};
290 | static mem::cmd_param cmd_log_level {"loglevel"};
291 | static mem::cmd_param cmd_full_scan {"full"};
292 | static mem::cmd_param cmd_filter {"filter"};
293 | static mem::cmd_param cmd_test_index {"test"};
294 |
295 | int main(int argc, char** argv)
296 | {
297 | #if defined(_WIN32)
298 | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
299 | #endif
300 |
301 | mem::init_function::init();
302 | mem::cmd_param::init(argc, argv);
303 |
304 | LOG_LEVEL = cmd_log_level.get_or(0);
305 |
306 | const char* filter = cmd_filter.get();
307 |
308 | if (filter)
309 | {
310 | fmt::print("Filter: {}\n", filter);
311 |
312 | auto iter = PATTERN_SCANNERS.begin();
313 |
314 | while (iter != PATTERN_SCANNERS.end())
315 | {
316 | const char* name = (*iter)->GetName();
317 |
318 | if (std::strstr(name, filter))
319 | {
320 | ++iter;
321 | }
322 | else
323 | {
324 | iter = PATTERN_SCANNERS.erase(iter);
325 | }
326 | }
327 | }
328 |
329 | if (PATTERN_SCANNERS.empty())
330 | {
331 | fmt::print("No Scanners\n");
332 |
333 | return 1;
334 | }
335 |
336 | uint32_t seed = 0;
337 |
338 | if (!cmd_rng_seed.get(seed))
339 | {
340 | seed = std::random_device {}();
341 | }
342 |
343 | scan_bench reg(seed);
344 |
345 | if (const char* file_name = cmd_test_file.get())
346 | {
347 | fmt::print("Scanning file: {}\n", file_name);
348 |
349 | reg.reset(file_name);
350 | }
351 | else
352 | {
353 | size_t region_size = cmd_region_size.get_or(32 * 1024 * 1024);
354 |
355 | if (region_size == 0)
356 | {
357 | fmt::print("Invalid region size\n");
358 |
359 | std::abort();
360 | }
361 |
362 | fmt::print("Scanning random data\n");
363 |
364 | reg.reset(region_size);
365 | }
366 |
367 | const size_t test_count = cmd_test_count.get_or(256);
368 | const bool skip_fails = !cmd_full_scan.get();
369 |
370 | const size_t test_index = cmd_test_index.get_or(SIZE_MAX);
371 |
372 | fmt::print("Begin Scan: Seed: 0x{0:08X}, Size: 0x{1:X}, Tests: {2}, Skip Fails: {3}, Scanners: {4}\n", reg.seed(),
373 | reg.full_size(), test_count, skip_fails, PATTERN_SCANNERS.size());
374 |
375 | mem::execution_handler handler;
376 |
377 | for (size_t i = 0; i < test_count; ++i)
378 | {
379 | reg.generate();
380 |
381 | if (test_index != SIZE_MAX && i != test_index)
382 | continue;
383 |
384 | if (LOG_LEVEL > 0)
385 | {
386 | if (!(i % 25))
387 | fmt::print("{}/{}...\n", i, test_count);
388 | }
389 |
390 | for (auto& pattern : PATTERN_SCANNERS)
391 | {
392 | if (skip_fails && pattern->Failed != 0)
393 | continue;
394 |
395 | uint64_t start_clock = mem::rdtsc();
396 |
397 | try
398 | {
399 | std::vector results =
400 | handler.execute([&] { return pattern->Scan(reg.pattern(), reg.masks(), reg.data(), reg.size()); });
401 |
402 | if (!reg.check_results(*pattern, results))
403 | {
404 | if (LOG_LEVEL > 1)
405 | fmt::print("{0:<32} - Failed test {1} ({2}, {3})\n", pattern->GetName(), i,
406 | mem::as_hex({reg.pattern(), strlen(reg.masks())}), reg.masks());
407 |
408 | pattern->Failed++;
409 | }
410 | }
411 | catch (const std::exception& ex)
412 | {
413 | if (LOG_LEVEL > 0)
414 | fmt::print("{0:<32} - Failed test {1}: {2}\n", pattern->GetName(), i, ex.what());
415 |
416 | pattern->Failed++;
417 | }
418 | catch (...)
419 | {
420 | if (LOG_LEVEL > 0)
421 | fmt::print("{0:<32} - Failed test {1} (Exception)\n", pattern->GetName(), i);
422 |
423 | pattern->Failed++;
424 | }
425 |
426 | uint64_t end_clock = mem::rdtsc();
427 |
428 | pattern->Elapsed += end_clock - start_clock;
429 | }
430 | }
431 |
432 | std::sort(PATTERN_SCANNERS.begin(), PATTERN_SCANNERS.end(),
433 | [](const std::unique_ptr& lhs, const std::unique_ptr& rhs) {
434 | if ((lhs->Failed != 0) != (rhs->Failed != 0))
435 | return lhs->Failed < rhs->Failed;
436 |
437 | return lhs->Elapsed < rhs->Elapsed;
438 | });
439 |
440 | fmt::print("End Scan\n\n");
441 |
442 | const uint64_t total_scan_length = static_cast(reg.full_size()) * test_count;
443 |
444 | double best_perf = 0.0f;
445 |
446 | for (size_t i = 0; i < PATTERN_SCANNERS.size(); ++i)
447 | {
448 | const auto& pattern = *PATTERN_SCANNERS[i];
449 |
450 | fmt::print("{:<32} | ", pattern.GetName());
451 |
452 | double cycles_per_byte = double(pattern.Elapsed) / total_scan_length;
453 |
454 | if (i == 0)
455 | best_perf = cycles_per_byte;
456 |
457 | double normalized_perf = cycles_per_byte / best_perf;
458 |
459 | if (skip_fails && pattern.Failed)
460 | {
461 | fmt::print("failed");
462 | }
463 | else
464 | {
465 | fmt::print(
466 | "{:>12} cycles = {:>6.3f} cycles/byte | {:>5.2f}x", pattern.Elapsed, cycles_per_byte, normalized_perf);
467 |
468 | if (!skip_fails)
469 | {
470 | fmt::print(" | {} failed", pattern.Failed);
471 | }
472 | }
473 |
474 | fmt::print("\n");
475 | }
476 | }
477 |
--------------------------------------------------------------------------------