├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── basicdfa.h ├── common_defs.h ├── main.cpp └── sheng.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, geofflangdale 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | .SUFFIXES: 3 | # 4 | .SUFFIXES: .cpp .o .c .h 5 | 6 | 7 | .PHONY: clean cleandist 8 | 9 | CXXFLAGS = -std=c++17 -O2 -march=native -Wall -Wextra -Wshadow 10 | 11 | EXECUTABLES=sheng 12 | 13 | 14 | 15 | all: $(EXECUTABLES) 16 | 17 | sheng: main.cpp sheng.h common_defs.h basicdfa.h 18 | $(CXX) $(CXXFLAGS) -o sheng main.cpp 19 | 20 | 21 | clean: 22 | rm -f $(EXECUTABLES) 23 | 24 | cleandist: 25 | rm -f $(EXECUTABLES) 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sheng 2 | A small DFA for under 16 states, designed to execute around the 1 cycle per byte level at all times. 3 | 4 | This is adapted from a similarly-named engine used in the Hyperscan regular expression matcher (https://github.com/intel/hyperscan) but is presented here as a standalone project for clarity. 5 | -------------------------------------------------------------------------------- /basicdfa.h: -------------------------------------------------------------------------------- 1 | #ifndef BASIC_DFA_H 2 | #define BASIC_DFA_H 3 | #include "common_defs.h" 4 | #include 5 | 6 | struct BasicDFA { 7 | typedef u8 State; 8 | u8 transitions[16][256]; 9 | State start_state; 10 | BasicDFA(std::vector> & trans_vec, u8 start_state_, u8 default_state) { 11 | for (u32 i = 0; i < 16; ++i) { 12 | for (u32 j = 0; j < 256; ++j) { 13 | transitions[i][j] = default_state; 14 | } 15 | } 16 | for (auto p : trans_vec) { 17 | u32 from, to; 18 | u8 c; 19 | std::tie(from, to, c) = p; 20 | transitions[from][c] = to; 21 | } 22 | start_state = start_state_; 23 | } 24 | State apply(const u8 * data, size_t len, State s) { 25 | size_t i = 0; 26 | for (; i+7 < len; i+=8) { 27 | u8 c1 = data[i+0]; 28 | u8 c2 = data[i+1]; 29 | u8 c3 = data[i+2]; 30 | u8 c4 = data[i+3]; 31 | u8 c5 = data[i+4]; 32 | u8 c6 = data[i+5]; 33 | u8 c7 = data[i+6]; 34 | u8 c8 = data[i+7]; 35 | s = transitions[s][c1]; 36 | s = transitions[s][c2]; 37 | s = transitions[s][c3]; 38 | s = transitions[s][c4]; 39 | s = transitions[s][c5]; 40 | s = transitions[s][c6]; 41 | s = transitions[s][c7]; 42 | s = transitions[s][c8]; 43 | } 44 | for (; i < len; ++i) { 45 | s = transitions[s][data[i]]; 46 | } 47 | return s; 48 | } 49 | }; 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /common_defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | typedef unsigned char u8; 4 | typedef unsigned short u16; 5 | typedef unsigned int u32; 6 | typedef unsigned long long u64; 7 | typedef signed char s8; 8 | typedef signed short s16; 9 | typedef signed int s32; 10 | typedef signed long long s64; 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | typedef __m128i m128; 17 | typedef __m256i m256; 18 | 19 | // Snippets from Hyperscan 20 | 21 | // Align to N-byte boundary 22 | #define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) 23 | #define ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) 24 | 25 | #define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0) 26 | 27 | #define really_inline inline __attribute__ ((always_inline, unused)) 28 | #define never_inline inline __attribute__ ((noinline, unused)) 29 | 30 | #define UNUSED __attribute__ ((unused)) 31 | 32 | #ifndef likely 33 | #define likely(x) __builtin_expect(!!(x), 1) 34 | #endif 35 | #ifndef unlikely 36 | #define unlikely(x) __builtin_expect(!!(x), 0) 37 | #endif 38 | 39 | static inline 40 | u32 ctz64(u64 x) { 41 | assert(x); // behaviour not defined for x == 0 42 | #if defined(_WIN64) 43 | unsigned long r; 44 | _BitScanForward64(&r, x); 45 | return r; 46 | #elif defined(_WIN32) 47 | unsigned long r; 48 | if (_BitScanForward(&r, (u32)x)) { 49 | return (u32)r; 50 | } 51 | _BitScanForward(&r, x >> 32); 52 | return (u32)(r + 32); 53 | #else 54 | return (u32)__builtin_ctzll(x); 55 | #endif 56 | } 57 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sheng.h" 5 | #include "basicdfa.h" 6 | #include 7 | 8 | const int BLOCK_SIZE = 16384; 9 | const int REPEATS = 100000; 10 | 11 | using namespace std; 12 | 13 | void demo_bdfa(BasicDFA & sh) { 14 | const u8 * char_data = (const u8 *)"Now is the time for all good men to come to the aid of the party. XXY"; 15 | for (u32 i = 0; i < strlen((const char *)char_data); ++i) { 16 | BasicDFA::State s = sh.apply(char_data, i, sh.start_state); 17 | cout << i << "/" << (int)s << " "; 18 | if (!((i + 1) % 10)) { 19 | cout << "\n"; 20 | } 21 | } 22 | cout << "\n"; 23 | } 24 | 25 | never_inline void performance_test_bdfa(BasicDFA & sh, u8 * data) { 26 | BasicDFA::State s = sh.start_state; 27 | auto start = std::chrono::steady_clock::now(); 28 | for (u32 i = 0; i < REPEATS; i++) { 29 | s = sh.apply(data, BLOCK_SIZE, s); 30 | } 31 | auto end = std::chrono::steady_clock::now(); 32 | std::chrono::duration secs_clock = end - start; 33 | double secs = secs_clock.count(); 34 | size_t bytes_scanned = BLOCK_SIZE * REPEATS; 35 | cout << "final state: " << (int)s 36 | << " bytes scanned: " << bytes_scanned 37 | << " seconds: " << secs << "\n"; 38 | cout << " bytes per ns " << (bytes_scanned/secs)/1000000000.0 << "\n"; 39 | } 40 | 41 | void demo(Sheng & sh) { 42 | const u8 * char_data = (const u8 *)"Now is the time for all good men to come to the aid of the party. XXY"; 43 | for (u32 i = 0; i < strlen((const char *)char_data); ++i) { 44 | Sheng::State s = sh.apply(char_data, i, sh.start_state); 45 | cout << i << "/" << (int)get_byte_at_offset(s, 0) << " "; 46 | if (!((i + 1) % 10)) { 47 | cout << "\n"; 48 | } 49 | } 50 | cout << "\n"; 51 | } 52 | 53 | never_inline void performance_test(Sheng & sh, u8 * data) { 54 | Sheng::State s = sh.start_state; 55 | auto start = std::chrono::steady_clock::now(); 56 | for (u32 i = 0; i < REPEATS; i++) { 57 | s = sh.apply(data, BLOCK_SIZE, s); 58 | } 59 | auto end = std::chrono::steady_clock::now(); 60 | std::chrono::duration secs_clock = end - start; 61 | double secs = secs_clock.count(); 62 | size_t bytes_scanned = BLOCK_SIZE * REPEATS; 63 | cout << "final state: " << (int)get_byte_at_offset(s, 0) 64 | << " bytes scanned: " << bytes_scanned 65 | << " seconds: " << secs << "\n"; 66 | cout << " bytes per ns " << (bytes_scanned/secs)/1000000000.0 << "\n"; 67 | } 68 | 69 | int main(UNUSED int argc, UNUSED char * argv[]) { 70 | vector> transitions; 71 | 72 | // detect the regex /good.*party/s by hand-building the DFA 73 | transitions.push_back(make_tuple(1,2,'g')); 74 | transitions.push_back(make_tuple(2,3,'o')); 75 | transitions.push_back(make_tuple(3,4,'o')); 76 | transitions.push_back(make_tuple(4,5,'d')); 77 | for (u32 i = 0; i < 256; i++) { 78 | if (i != 'p') { 79 | transitions.push_back(make_tuple(5,5,(u8)i)); 80 | } else { 81 | transitions.push_back(make_tuple(5,6,'p')); 82 | } 83 | } 84 | transitions.push_back(make_tuple(6,7,'a')); 85 | transitions.push_back(make_tuple(7,8,'r')); 86 | transitions.push_back(make_tuple(8,9,'t')); 87 | transitions.push_back(make_tuple(9,10,'y')); 88 | 89 | u8 * data = new u8[BLOCK_SIZE]; 90 | for (u32 i = 0; i < BLOCK_SIZE; i++) { 91 | data[i] = rand() % 256; 92 | } 93 | 94 | cout << "\nSheng\n"; 95 | Sheng sh(transitions, 1, 1); 96 | demo(sh); 97 | performance_test(sh, data); 98 | 99 | cout << "\nBasic DFA\n"; 100 | BasicDFA bdfa(transitions, 1, 1); 101 | demo_bdfa(bdfa); 102 | performance_test_bdfa(bdfa, data); 103 | cout << "\n"; 104 | } 105 | -------------------------------------------------------------------------------- /sheng.h: -------------------------------------------------------------------------------- 1 | #ifndef SHENG_H 2 | #define SHENG_H 3 | #include "common_defs.h" 4 | #include 5 | #include 6 | 7 | inline void set_byte_at_offset(m128 & in, u32 offset, u8 content) { 8 | union { 9 | m128 sse; 10 | u8 bytes[16]; 11 | } u; 12 | u.sse = in; 13 | u.bytes[offset] = content; 14 | in = u.sse; 15 | } 16 | 17 | inline u8 get_byte_at_offset(m128 in, u32 offset) { 18 | union { 19 | m128 sse; 20 | u8 bytes[16]; 21 | } u; 22 | u.sse = in; 23 | return u.bytes[offset]; 24 | } 25 | 26 | struct Sheng { 27 | typedef m128 State; 28 | m128 transitions[256]; 29 | State start_state; 30 | 31 | Sheng(std::vector> & trans_vec, u8 start_state_, u8 default_state) { 32 | // fill all transitions with default state 33 | for (u32 i = 0; i < 256; ++i) { 34 | transitions[i] = _mm_set1_epi8(default_state); 35 | } 36 | // fill in state transition for slot 'from' to point to 'to' for our character transition c 37 | for (auto p : trans_vec) { 38 | u32 from, to; 39 | u8 c; 40 | std::tie(from, to, c) = p; 41 | set_byte_at_offset(transitions[c], from, to); 42 | } 43 | start_state = _mm_set1_epi8(start_state_); // put everyone into start state - why not? 44 | } 45 | 46 | State apply(const u8 * data, size_t len, State s) { 47 | size_t i = 0; 48 | for (; i+7 < len; i+=8) { 49 | u8 c1 = data[i+0]; 50 | u8 c2 = data[i+1]; 51 | u8 c3 = data[i+2]; 52 | u8 c4 = data[i+3]; 53 | u8 c5 = data[i+4]; 54 | u8 c6 = data[i+5]; 55 | u8 c7 = data[i+6]; 56 | u8 c8 = data[i+7]; 57 | s = _mm_shuffle_epi8(transitions[c1], s); 58 | s = _mm_shuffle_epi8(transitions[c2], s); 59 | s = _mm_shuffle_epi8(transitions[c3], s); 60 | s = _mm_shuffle_epi8(transitions[c4], s); 61 | s = _mm_shuffle_epi8(transitions[c5], s); 62 | s = _mm_shuffle_epi8(transitions[c6], s); 63 | s = _mm_shuffle_epi8(transitions[c7], s); 64 | s = _mm_shuffle_epi8(transitions[c8], s); 65 | } 66 | for (; i < len; ++i) { 67 | s = _mm_shuffle_epi8(transitions[data[i]], s); 68 | } 69 | return s; 70 | } 71 | 72 | }; 73 | #endif 74 | --------------------------------------------------------------------------------