├── .gitmodules
├── LICENSE
├── Makefile
├── README.md
├── Switch
    ├── README.md
    ├── ansifmt.h
    ├── base64.cpp
    ├── base64.h
    ├── buffer.h
    ├── compress.h
    ├── data.cpp
    ├── data.h
    ├── date.h
    ├── ext
    │   ├── ext_xxhash.c
    │   ├── flat_hash_map.h
    │   ├── json.hpp
    │   └── tl
    │   │   └── optional.hpp
    ├── fs.h
    ├── network.h
    ├── portability.h
    ├── prioqueue.h
    ├── sparsefixedbitset.h
    ├── switch.h
    ├── switch_algorithms.h
    ├── switch_atomicops.h
    ├── switch_bitops.h
    ├── switch_common.h
    ├── switch_compiler_aux.h
    ├── switch_dictionary.h
    ├── switch_exceptions.h
    ├── switch_hash.h
    ├── switch_ll.h
    ├── switch_mallocators.h
    ├── switch_numops.h
    ├── switch_print.h
    ├── switch_ranges.h
    ├── switch_refcnt.h
    ├── switch_security.cpp
    ├── switch_security.h
    ├── switch_vector.h
    ├── text.cpp
    ├── text.h
    ├── thread.h
    └── timings.h
├── codecs.cpp
├── codecs.h
├── common.h
├── compilation_ctx.cpp
├── compilation_ctx.h
├── docidupdates.cpp
├── docidupdates.h
├── docset_iterators.cpp
├── docset_iterators.h
├── docset_iterators_base.h
├── docset_iterators_scorers.cpp
├── docset_spans.cpp
├── docset_spans.h
├── docwordspace.cpp
├── docwordspace.h
├── exec.cpp
├── exec.h
├── google_codec.cpp
├── google_codec.h
├── index_source.cpp
├── index_source.h
├── indexer.cpp
├── indexer.h
├── intersect.cpp
├── intersect.h
├── lucene_codec.cpp
├── lucene_codec.h
├── matches.h
├── merge.cpp
├── merge.h
├── percolator.cpp
├── percolator.h
├── queries.cpp
├── queries.h
├── queries_rewrite.h
├── queryexec_ctx.cpp
├── queryexec_ctx.h
├── relevant_documents.h
├── runtime.h
├── segment_index_source.cpp
├── segment_index_source.h
├── similarity.cpp
├── similarity.h
├── terms.cpp
├── terms.h
├── trinity_limits.h
├── utils.cpp
└── utils.h


/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "Switch/ext/FastPFor"]
 2 | 	path = Switch/ext/FastPFor
 3 | 	url = https://github.com/lemire/FastPFor.git
 4 | [submodule "Switch/ext_snappy"]
 5 | 	path = Switch/ext_snappy
 6 | 	url = https://github.com/google/snappy.git
 7 | [submodule "Switch/ext/streamvbyte"]
 8 | 	path = Switch/ext/streamvbyte
 9 | 	url = https://github.com/lemire/streamvbyte.git
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | HOST:=$(shell hostname)
 2 | 
 3 | ifeq ($(HOST), origin)
 4 |  ORIGIN=1
 5 | else
 6 |  ifeq ($(HOST), nigiro)
 7 |   ORIGIN=1
 8 |  endif
 9 | endif
10 | 
11 | # Please see lucene_codec.h comments
12 | # Your LUCENE_ENCODING_SCHEME value should match the defined LUCENE_USE_X macro set in lucene_codec.h
13 | LUCENE_ENCODING_SCHEME:=pfor
14 | EXTRA_CFLAGS:=
15 | 
16 | 
17 | ifeq ($(ORIGIN), 1)
18 | # When building on our dev.system
19 | 	include /home/system/Development/Switch/Makefile.dfl
20 | 	CPPFLAGS:=$(CPPFLAGS_SANITY) $(OPTIMIZER_CFLAGS) $(EXTRA_CFLAGS) #-Wold-style-cast
21 | 
22 | 	ifeq ($(LUCENE_ENCODING_SCHEME),streamvbyte)
23 | 		SWITCH_OBJS += $(SWITCH_BASE)/ext/streamvbyte/streamvbyte.o $(SWITCH_BASE)/ext/streamvbyte/streamvbytedelta.o
24 | 	else ifeq ($(LUCENE_ENCODING_SCHEME),maskedvbyte)
25 | 		# make sure you link against maskedvybte; -lmaskedvbyte
26 | 	else
27 | 		SWITCH_OBJS:=$(SWITCH_BASE)/ext/FastPFor/build/libFastPFor.a
28 | 		SWITCH_OBJS:=$(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/bitpacking.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/bitpackingaligned.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/bitpackingunaligned.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/horizontalbitpacking.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/simdunalignedbitpacking.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/simdbitpacking.cpp.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/varintdecode.c.o $(SWITCH_BASE)/ext/FastPFor/build/CMakeFiles/FastPFor.dir/src/streamvbyte.c.o
29 | 	endif
30 | 
31 | else
32 | # Lean switch bundled in this repo
33 | 	CXX:=clang++
34 | 	CXXFLAGS:=-std=c++1z -Wstrict-aliasing=2 -Wsequence-point -Warray-bounds -Wextra -Winit-self -Wformat=2 -Wno-format-nonliteral -Wformat-security \
35 | 		-Wunused-variable -Wunused-value -Wreturn-type -Wparentheses -Wmissing-braces -Wno-invalid-source-encoding -Wno-invalid-offsetof \
36 | 		-Wno-unknown-pragmas -Wno-missing-field-initializers -Wno-unused-parameter -Wno-sign-compare -Wno-invalid-offsetof   \
37 | 		-fno-rtti -ffast-math  -D_REENTRANT -DREENTRANT  -g3 -ggdb -fno-omit-frame-pointer   \
38 | 		-fno-strict-aliasing    -DLEAN_SWITCH  -ISwitch/ -Wno-uninitialized -Wno-unused-function -Wno-uninitialized -funroll-loops  -Ofast $(EXTRA_CFLAGS)
39 | 	CXXFLAGS+=-I Switch/ext_snappy/build -I Switch/ext/FastPFor/headers/ -I Switch/ext/streamvbyte/include/
40 | 	LDFLAGS:=-ldl -ffunction-sections -lpthread -ldl -lz  Switch/ext_snappy/build/libsnappy.a
41 | 	SWITCH_LIB:=
42 | endif
43 | 
44 | OBJS:=percolator.o compilation_ctx.o similarity.o docset_iterators_scorers.o google_codec.o docset_spans.o lucene_codec.o queryexec_ctx.o docset_iterators.o utils.o codecs.o queries.o exec.o docidupdates.o indexer.o docwordspace.o terms.o segment_index_source.o index_source.o merge.o intersect.o
45 | 
46 | ifeq ($(ORIGIN), 1)
47 | all : lib #app
48 | app:  app.o lib
49 | 	$(CXX) app.o -o T $(LDFLAGS_SANITY) -lswitch -lpthread $(SWITCH_TLS_LDFLAGS) -lz \
50 | 	-L /home/system/Development/Switch/ext/MaskedVByte -lmaskedvbyte \
51 | 	-L./ -lthe_trinity -lswitch #-fsanitize=address
52 | else
53 | all: switch lib
54 | 
55 | switch:
56 | 	# if this fails, you probably didn't get to import/build the submodules
57 | 	# see https://github.com/phaistos-networks/Trinity/wiki/How-to-Build-Trinity
58 | endif
59 | 
60 | 
61 | lib: $(OBJS) 
62 | 	rm -f libthe_trinity.a
63 | 	ar rcs libthe_trinity.a $(SWITCH_OBJS) $(OBJS) 
64 | 
65 | clean:
66 | 	rm -f *.o T *.a Switch/ext_snappy/*o Switch/ext_snappy/*.a
67 | 
68 | .PHONY: clean switch
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |   Please read the **[blog post introducing Trinity](https://medium.com/@markpapadakis/introducing-trinity-a-modern-high-performance-elegant-ir-search-library-a1a3b4e287a7)**, and the follow-up discussing the **[new major release](https://medium.com/@markpapadakis/new-trinity-major-release-available-e5de40355b8d)**.
2 | 
3 | Trinity is a modern C++ information-retrieval library for building queries, indexing documents and other content, running queries and scoring documents matching them. It facilitates the development of search engines and other systems and applications that depend on that functionality, and has been designed with simplicity, performance, modularity and extensibility, and elegance in mind. [Read More](https://github.com/phaistos-networks/Trinity/wiki/Trinity)
4 | 
5 | Please check [the wiki](https://github.com/phaistos-networks/Trinity/wiki) for documentation. There is barely any documentation for now (though I encourage you to check the codebase comments),  but this will change once I can find the time to improve the situation. Apologies for that, but, again, please spend some time studying the codebase; a lot of time went into designing the API and how the various subsystems interact with other, and everything is documented in the codebase.
6 | 
7 | Trinity is developed by [Phaistos Networks, S.A](http://phaistosnetworks.gr/)
8 | 


--------------------------------------------------------------------------------
/Switch/README.md:
--------------------------------------------------------------------------------
1 | This is a very leak distribution of Switch; it includes the absolute minimum code for this project.
2 | Eventually, we plan to OSS all Switch. Switch is similar in scope and goals to Facebook's Folly library.
3 | 
4 | Most of the methods and datastructures here are either wrappers for STL equivalent classes/functions, or are very trimmed down constructs written so that this project can be compiled with them.
5 | 


--------------------------------------------------------------------------------
/Switch/ansifmt.h:
--------------------------------------------------------------------------------
 1 | // http://en.wikipedia.org/wiki/ANSI_escape_code
 2 | #pragma once
 3 | 
 4 | namespace ansifmt
 5 | {
 6 | 	// http://en.wikipedia.org/wiki/ANSI_escape_code
 7 | 	static constexpr const char *bold = "\033[1m";
 8 | 	static constexpr const char *reset = "\033[0m";
 9 | 	static constexpr const char *inverse = "\033[3m";
10 | 
11 | 	static constexpr const char *color_black = "\033[30m";
12 | 	static constexpr const char *color_red = "\033[31m";
13 | 	static constexpr const char *color_green = "\033[32m";
14 | 	static constexpr const char *color_brown = "\033[33m";
15 | 	static constexpr const char *color_blue = "\033[34m";
16 | 	static constexpr const char *color_magenta = "\033[35m";
17 | 	static constexpr const char *color_cyan = "\033[36m";
18 | 	static constexpr const char *color_gray = "\033[37m";
19 | 
20 | 	
21 | 	static constexpr const char *bgcolor_black = "\033[40m";
22 | 	static constexpr const char *bgcolor_red = "\033[41m";
23 | 	static constexpr const char *bgcolor_green = "\033[42m";
24 | 	static constexpr const char *bgcolor_brown = "\033[43m";
25 | 	static constexpr const char *bgcolor_blue = "\033[44m";
26 | 	static constexpr const char *bgcolor_magenta = "\033[45m";
27 | 	static constexpr const char *bgcolor_cyan = "\033[46m";
28 | 	static constexpr const char *bgcolor_gray = "\033[47m";
29 | 	static constexpr const char *cls = "\033[2J\033[1;1H";
30 | 
31 | 	// http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x361.html
32 | 	struct set_col
33 | 	{
34 | 		const uint32_t column;
35 | 
36 | 		set_col(const uint32_t v)
37 | 			: column{v}
38 | 		{
39 | 
40 | 		}
41 | 	};
42 | };
43 | 
44 | [[maybe_unused]] static void PrintImpl(Buffer &out, const ansifmt::set_col &c)
45 | {
46 | 	// We need to reset to 0 first with \r and then advance
47 | 	// maybe there's another escape sequence for explicitly setting the column
48 | 	out.AppendFmt("\r\033\[<%uC", c.column);
49 | }
50 | 


--------------------------------------------------------------------------------
/Switch/base64.cpp:
--------------------------------------------------------------------------------
  1 | #include "switch.h"
  2 | #include "base64.h"
  3 | 
  4 | std::size_t Base64::decoded_repr_length(const str_view32 s) {
  5 |         size_t l = s.size(), padding;
  6 | 
  7 |         if (l && s.data()[l - 1] == '=') {
  8 |                 if (l >= 2 && s.data()[l - 2] == '=')
  9 |                         padding = 2;
 10 |                 else
 11 |                         padding = 1;
 12 |         }
 13 |         padding = 0;
 14 | 
 15 |         return (l * 3) / 4 - padding;
 16 | }
 17 | 
 18 | uint16_t b64_int(const uint8_t c) noexcept {
 19 |         if (c == 43)
 20 |                 return 62;
 21 |         else if (c == 47)
 22 |                 return 63;
 23 |         else if (c == 61)
 24 |                 return 64;
 25 |         else if (c > 47 && c < 58)
 26 |                 return c + 4;
 27 |         else if (c > 64 && c < 91)
 28 |                 return c - 'A';
 29 |         else if (c > 96 && c < 123)
 30 |                 return (c - 'a') + 26;
 31 |         else
 32 |                 return 256;
 33 | }
 34 | 
 35 | uint32_t Base64::Encode(const uint8_t *in, size_t in_len, Buffer *out) {
 36 |         static constexpr const char *b64_chr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 37 |         uint32_t                     s[3], j{0};
 38 |         const auto                   saved{out->size()};
 39 | 
 40 |         for (uint32_t i{0}; i != in_len; ++i) {
 41 |                 s[j++] = in[i];
 42 | 
 43 |                 if (j == 3) {
 44 |                         uint8_t o[4];
 45 | 
 46 |                         o[0] = b64_chr[(s[0] & 255) >> 2];
 47 |                         o[1] = b64_chr[((s[0] & 0x03) << 4) + ((s[1] & 0xF0) >> 4)];
 48 |                         o[2] = b64_chr[((s[1] & 0x0F) << 2) + ((s[2] & 0xC0) >> 6)];
 49 |                         o[3] = b64_chr[s[2] & 0x3F];
 50 |                         j    = 0;
 51 | 
 52 |                         out->append(str_view32(reinterpret_cast<const char *>(o), 4));
 53 |                 }
 54 |         }
 55 | 
 56 |         if (j) {
 57 |                 uint8_t o[4];
 58 | 
 59 |                 if (j == 1)
 60 |                         s[1] = 0;
 61 | 
 62 |                 o[0] = b64_chr[(s[0] & 255) >> 2];
 63 |                 o[1] = b64_chr[((s[0] & 0x03) << 4) + ((s[1] & 0xF0) >> 4)];
 64 | 
 65 |                 if (j == 2)
 66 |                         o[2] = b64_chr[((s[1] & 0x0F) << 2)];
 67 |                 else
 68 |                         o[2] = '=';
 69 | 
 70 |                 o[3] = '=';
 71 | 
 72 |                 out->append(str_view32(reinterpret_cast<const char *>(o), 4));
 73 |         }
 74 | 
 75 |         return out->size() - saved;
 76 | }
 77 | 
 78 | int32_t Base64::Decode(const uint8_t *in, const size_t in_len, Buffer *out) {
 79 |         uint32_t   s[4], j{0};
 80 |         const auto saved{out->size()};
 81 | 
 82 |         for (uint32_t i{0}; i < in_len; i++) {
 83 |                 if (const auto v = b64_int(in[i]); v == 256)
 84 |                         return -1;
 85 |                 else
 86 |                         s[j++] = v;
 87 | 
 88 |                 if (j == 4) {
 89 |                         uint8_t k{0}, o[4];
 90 | 
 91 |                         o[0] = ((s[0] & 255) << 2) + ((s[1] & 0x30) >> 4);
 92 |                         if (s[2] != 64) {
 93 |                                 o[1] = ((s[1] & 0x0F) << 4) + ((s[2] & 0x3C) >> 2);
 94 | 
 95 |                                 if ((s[3] != 64)) {
 96 |                                         o[2] = ((s[2] & 0x03) << 6) + (s[3]);
 97 |                                         k    = 3;
 98 |                                 } else {
 99 |                                         k = 2;
100 |                                 }
101 |                         } else {
102 |                                 k = 1;
103 |                         }
104 | 
105 |                         out->append(str_view32(reinterpret_cast<const char *>(o), k));
106 |                         j = 0;
107 |                 }
108 |         }
109 | 
110 |         return out->size() - saved;
111 | }
112 | 


--------------------------------------------------------------------------------
/Switch/base64.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "switch.h"
 3 | #include "buffer.h"
 4 | #include <openssl/bio.h>
 5 | #include <openssl/evp.h>
 6 | 
 7 | static inline bool is_base64(const uint8_t c) noexcept
 8 | {
 9 |         return c == '+' || c == '/' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
10 | }
11 | 
12 | 
13 | namespace Base64
14 | {
15 |         size_t decoded_repr_length(const str_view32 s);
16 | 
17 |         uint32_t Encode(const uint8_t *in, size_t in_len, Buffer *out);
18 | 
19 |         int32_t Decode(const uint8_t *in, const size_t in_len, Buffer *out);
20 | }
21 | 


--------------------------------------------------------------------------------
/Switch/compress.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "ext_snappy/snappy.h"
  3 | #include "switch.h"
  4 | 
  5 | namespace Compression
  6 | {
  7 | 
  8 |         enum class Algo : int8_t
  9 |         {
 10 |                 UNKNOWN = -1,
 11 |                 SNAPPY
 12 |         };
 13 | 
 14 |         inline bool Compress(const Algo algorithm, const void *data, const uint32_t dataLen, Buffer *dest)
 15 |         {
 16 |                 switch (algorithm)
 17 |                 {
 18 |                         case Algo::SNAPPY:
 19 |                         {
 20 |                                 size_t outLen = 0;
 21 | 
 22 |                                 dest->reserve(snappy::MaxCompressedLength(dataLen + 2));
 23 |                                 snappy::RawCompress((char *)data, dataLen, dest->At(dest->size()), &outLen);
 24 |                                 dest->advance_size(outLen);
 25 | 
 26 |                                 return true;
 27 |                         }
 28 |                         break;
 29 | 
 30 |                         default:
 31 |                                 return false;
 32 |                 }
 33 |         }
 34 | 
 35 |         inline bool UnCompress(const Algo algorithm, const void *const source, const uint32_t sourceLen, Buffer *const dest)
 36 |         {
 37 |                 switch (algorithm)
 38 |                 {
 39 |                         case Algo::SNAPPY:
 40 |                         {
 41 |                                 size_t outLen;
 42 | 
 43 |                                 if (unlikely(!snappy::GetUncompressedLength((char *)source, sourceLen, &outLen)))
 44 |                                         return false;
 45 |                                 else
 46 |                                 {
 47 |                                         dest->reserve(outLen + 8);
 48 | 
 49 |                                         if (unlikely(!snappy::RawUncompress((char *)source, sourceLen, dest->At(dest->size()))))
 50 |                                                 return false;
 51 |                                         else
 52 |                                         {
 53 |                                                 dest->advance_size(outLen);
 54 |                                                 return true;
 55 |                                         }
 56 |                                 }
 57 |                         }
 58 |                         break;
 59 | 
 60 |                         default:
 61 |                                 return false;
 62 |                 }
 63 |         }
 64 | 
 65 |         inline uint8_t *PackUInt32(const uint32_t n, uint8_t *out)
 66 |         {
 67 | #define AS_FLIPPED(_v_) (_v_) | 128
 68 | // This would have worked if it wasn't for the edge cases of e.g (1<<7), etc
 69 | //#define AS_FLIPPED_F(v) (((v)&127) + 128)
 70 | #define AS_FLIPPED_F(_v_) (_v_) | 128
 71 | 
 72 |                 if (n < (1 << 7))
 73 |                 {
 74 |                         *(out++) = n;
 75 |                 }
 76 |                 else if (n < (1 << 14))
 77 |                 {
 78 |                         *(out++) = AS_FLIPPED(n);
 79 |                         *(out++) = n >> 7;
 80 |                 }
 81 |                 else if (n < (1 << 21))
 82 |                 {
 83 |                         *(out++) = AS_FLIPPED(n);
 84 |                         *(out++) = AS_FLIPPED_F(n >> 7);
 85 |                         *(out++) = n >> 14;
 86 |                 }
 87 |                 else if (n < (1 << 28))
 88 |                 {
 89 |                         *(out++) = AS_FLIPPED(n);
 90 |                         *(out++) = AS_FLIPPED_F(n >> 7);
 91 |                         *(out++) = AS_FLIPPED_F(n >> 14);
 92 |                         *(out++) = n >> 21;
 93 |                 }
 94 |                 else
 95 |                 {
 96 |                         *(out++) = AS_FLIPPED(n);
 97 |                         *(out++) = AS_FLIPPED_F(n >> 7);
 98 |                         *(out++) = AS_FLIPPED_F(n >> 14);
 99 |                         *(out++) = AS_FLIPPED_F(n >> 21);
100 |                         *(out++) = n >> 28;
101 |                 }
102 | 
103 |                 return out;
104 | #undef AS_FLIPPED
105 | #undef AS_FLIPPED_V
106 |         }
107 | 
108 |         inline uint8_t UnpackUInt32Check(const uint8_t *p, const uint8_t *const e) noexcept
109 |         {
110 |                 for (uint8_t i{0}; i != 5; ++i)
111 |                 {
112 |                         if (unlikely(p >= e))
113 |                                 break;
114 |                         else if (*p < 128)
115 |                                 return i + 1;
116 |                         else
117 |                                 ++p;
118 |                 }
119 | 
120 |                 return 0;
121 |         }
122 | 
123 |         inline uint32_t UnpackUInt32(const uint8_t *&buf) noexcept
124 |         {
125 | #define FLIPPED(v) ((v) & ~128)
126 |                 if (buf[0] > 127)
127 |                 {
128 |                         if (buf[1] > 127)
129 |                         {
130 |                                 if (buf[2] > 127)
131 |                                 {
132 |                                         if (buf[3] > 127)
133 |                                         {
134 |                                                 const uint32_t r = FLIPPED(buf[0]) | (FLIPPED(buf[1]) << 7) | (FLIPPED(buf[2]) << 14) | (FLIPPED(buf[3]) << 21) | (buf[4] << 28);
135 | 
136 |                                                 buf += 5;
137 |                                                 return r;
138 |                                         }
139 |                                         else
140 |                                         {
141 |                                                 const uint32_t r = FLIPPED(buf[0]) | (FLIPPED(buf[1]) << 7) | (FLIPPED(buf[2]) << 14) | (buf[3] << 21);
142 | 
143 |                                                 buf += 4;
144 |                                                 return r;
145 |                                         }
146 |                                 }
147 |                                 else
148 |                                 {
149 |                                         const uint32_t r = FLIPPED(buf[0]) | (FLIPPED(buf[1]) << 7) | (buf[2] << 14);
150 | 
151 |                                         buf += 3;
152 |                                         return r;
153 |                                 }
154 |                         }
155 |                         else
156 |                         {
157 |                                 const uint32_t r = FLIPPED(buf[0]) | (buf[1] << 7);
158 | 
159 |                                 buf += 2;
160 |                                 return r;
161 |                         }
162 |                 }
163 |                 else
164 |                         return *buf++;
165 | #undef FLIPPED
166 |         }
167 | 
168 |         inline auto encode_varuint32(const uint32_t n, uint8_t *out)
169 |         {
170 |                 return PackUInt32(n, out);
171 |         }
172 | 
173 |         inline uint32_t decode_varuint32(const uint8_t *&buf)
174 |         {
175 |                 return UnpackUInt32(buf);
176 |         }
177 | }
178 | 
179 | void IOBuffer::SerializeVarUInt32(const uint32_t n)
180 | {
181 |         reserve(8);
182 | 
183 |         uint8_t *e = (uint8_t *)(buffer + length_);
184 |         length_ += Compression::PackUInt32(n, e) - e;
185 | }
186 | 
187 | uint32_t IOBuffer::UnserializeVarUInt32(void)
188 | {
189 |         const uint8_t *e = (uint8_t *)(buffer + position), *const b = e;
190 |         const uint32_t r = Compression::UnpackUInt32(e);
191 | 
192 |         position += e - b;
193 |         return r;
194 | }
195 | 
196 | void IOBuffer::encode_varuint32(const uint32_t n)
197 | {
198 |         SerializeVarUInt32(n);
199 | }
200 | 


--------------------------------------------------------------------------------
/Switch/date.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace Date
 4 | {
 5 |         struct ts_repr
 6 |         {
 7 |                 const time_t t;
 8 | 
 9 |                 ts_repr(const time_t v)
10 |                     : t{v}
11 |                 {
12 |                 }
13 | 
14 |                 strwlen8_t Get(char *const out) const
15 |                 {
16 |                         struct tm tm;
17 | 
18 |                         localtime_r(&t, &tm);
19 |                         return strwlen8_t(out, sprintf(out, "%02u.%02u.%02u %02u:%02u:%02u", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec));
20 |                 }
21 |         };
22 | }
23 | 
24 | static inline void PrintImpl(Buffer &out, const Date::ts_repr &r)
25 | {
26 |         struct tm tm;
27 | 
28 |         localtime_r(&r.t, &tm);
29 |         out.AppendFmt("%02u.%02u.%02u %02u:%02u:%02u", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
30 | }
31 | 


--------------------------------------------------------------------------------
/Switch/fs.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "switch.h"
 3 | #include <dirent.h>
 4 | 
 5 | class DirectoryEntries {
 6 |       private:
 7 |         DIR *const dh;
 8 | 
 9 |       public:
10 |         struct iterator {
11 | 		using de_t = struct dirent; // WAS: dirent64 
12 |                 DIR *const       dh;
13 | 		de_t *de, storage;
14 | 
15 |                 iterator(DIR *const h, de_t *d) 
16 |                     : dh(h), de(d) {
17 |                 }
18 | 
19 |                 inline bool operator!=(const iterator &o) const {
20 |                         return de != o.de;
21 |                 }
22 | 
23 |                 inline strwlen8_t operator*() const {
24 |                         return strwlen8_t(de->d_name, strlen(de->d_name));
25 |                 }
26 | 
27 |                 inline iterator &operator++() {
28 | 			// readdir64_r and readdir_r are deprecated
29 | 			de = readdir(dh);
30 |                         return *this;
31 |                 }
32 |         };
33 | 
34 |       public:
35 |         DirectoryEntries(const char *const path)
36 |             : dh(opendir(path)) {
37 |                 if (unlikely(!dh)) {
38 |                         throw Switch::exception("Failed to access directory ", path, ":", strerror(errno));
39 | 		}
40 |         }
41 | 
42 |         ~DirectoryEntries() {
43 |                 if (dh) {
44 |                         closedir(dh);
45 | 		}
46 |         }
47 | 
48 |         struct iterator begin() const {
49 |                 return iterator(dh, dh ? readdir(dh) : nullptr);
50 |         }
51 | 
52 |         struct iterator end() const {
53 |                 return iterator(dh, nullptr);
54 |         }
55 | 
56 |         operator bool() const {
57 |                 return dh;
58 |         }
59 | };
60 | 


--------------------------------------------------------------------------------
/Switch/portability.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <inttypes.h>
 3 | 
 4 | #ifdef __linux__
 5 | #define SWITCH_HAVE_MALLOC_USABLE_SIZE 1
 6 | #else
 7 | #define off64_t off_t
 8 | #define pread64 pread
 9 | #define pwrite64 pwrite
10 | #define dirent64 dirent
11 | #define readdir64_r readdir_r
12 | #define readdir64 readdir
13 | #endif
14 | 


--------------------------------------------------------------------------------
/Switch/switch.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "portability.h"
  3 | #include <assert.h>
  4 | #include <ctype.h>
  5 | #include <stdint.h>
  6 | #include <cstdio>
  7 | #include <string.h>
  8 | #include <type_traits>
  9 | #include <unistd.h>
 10 | #include <cmath>
 11 | #include <cstddef>
 12 | #include <cstdlib>
 13 | #include <limits.h>
 14 | #include <utility>
 15 | #include <limits>
 16 | #include <memory>
 17 | #include <functional>
 18 | #include <algorithm>
 19 | #include <cmath>
 20 | 
 21 | #define require(x) assert(x)
 22 | #define Drequire(x) assert(x)
 23 | #define EXPECT(x) assert(x)
 24 | #define DEXPECT(x) assert(x)
 25 | 
 26 | 
 27 | [[gnu::noreturn]] static inline void Unreachable()
 28 | {
 29 |         __builtin_unreachable();
 30 | }
 31 | 
 32 | 
 33 | static inline size_t goodMallocSize(const size_t n) noexcept
 34 | {
 35 |         return n;
 36 | }
 37 | 
 38 | // Ref: http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/
 39 | // our sizeof_array macro doesn no parameter type-checking; you can pass anything to it, and
 40 | // as long as x[0] can be evaluated, you 'll get a successful compilation.
 41 | namespace detail
 42 | {
 43 |         template <typename T, size_t N>
 44 |         char (&SIZEOF_ARRAY_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N]; // XXX: why does this work and what does it do?
 45 | }
 46 | #define sizeof_array(x) sizeof(detail::SIZEOF_ARRAY_REQUIRES_ARRAY_ARGUMENT(x))
 47 | 
 48 | 
 49 | 
 50 | 
 51 | #define STRLEN(p) (uint32_t)(sizeof(p) - 1)
 52 | #define STRWITHLEN(p) (p), (uint32_t)(sizeof(p) - 1)
 53 | #define LENWITHSTR(p) (uint32_t)(sizeof(p) - 1), (p)
 54 | #define STRWLEN(p) STRWITHLEN(p)
 55 | #define LENWSTR(p) LENWITHSTR(p)
 56 | #define _S(p) STRWITHLEN(p)
 57 | 
 58 | 
 59 | 
 60 | #include "switch_common.h"
 61 | #include "switch_ranges.h"
 62 | #include "buffer.h"
 63 | #include "switch_exceptions.h"
 64 | #include "switch_numops.h"
 65 | #include "switch_ranges.h"
 66 | #include "timings.h"
 67 | 
 68 | // Src: folly
 69 | template <class Lambda>
 70 | class AtScopeExit
 71 | {
 72 |       private:
 73 |         Lambda &l;
 74 | 
 75 |       public:
 76 |         AtScopeExit(Lambda &action)
 77 |             : l(action)
 78 |         {
 79 |         }
 80 | 
 81 |         ~AtScopeExit(void)
 82 |         {
 83 |                 l();
 84 |         }
 85 | };
 86 | 
 87 | template<typename T>
 88 | static inline T Min(const T a, const T b)
 89 | {
 90 | 	return std::min(a, b);
 91 | }
 92 | 
 93 | template<typename T>
 94 | static inline T Max(const T a, const T b)
 95 | {
 96 | 	return std::max(a, b);
 97 | }
 98 | 
 99 | [[gnu::always_inline]] inline void assume(bool cond)
100 | {
101 | #if defined(__clang__)
102 |         __builtin_assume(cond);
103 | #elif defined(__GNUC__)
104 |         if (!cond)
105 |                 __builtin_unreachable();
106 | #elif defined(_MSC_VER)
107 |         __assume(cond);
108 | #endif
109 | }
110 | 
111 | [[ noreturn, gnu::always_inline ]] inline void assume_unreachable()
112 | {
113 |         assume(false);
114 | #if defined(__GNUC__)
115 |         __builtin_unreachable();
116 | #elif defined(_MSC_VER)
117 |         __assume(0);
118 | #else
119 |         std::abort();
120 | #endif
121 | }
122 | 
123 | #define TOKEN_PASTE(x, y) x##y
124 | #define TOKEN_PASTE2(x, y) TOKEN_PASTE(x, y)
125 | 
126 | #define Auto_INTERNAL1(lname, aname, ...) \
127 |         auto lname = [&]() {              \
128 |                 __VA_ARGS__;              \
129 |         };                                \
130 |         AtScopeExit<decltype(lname)> aname(lname);
131 | #define Auto_INTERNAL2(ctr, ...) Auto_INTERNAL1(TOKEN_PASTE(Auto_func_, ctr), TOKEN_PASTE(Auto_Instance_, ctr), __VA_ARGS__)
132 | #define Defer(...) Auto_INTERNAL2(__COUNTER__, __VA_ARGS__)
133 | 
134 | #define IMPLEMENT_ME()                                                                         \
135 |         do                                                                                     \
136 |         {                                                                                      \
137 |                 Print(ansifmt::bold, ansifmt::color_red, "Implementation Missing", ansifmt::reset, " at ", __FILE__, ":", __LINE__, ": Will Exit\n"); \
138 |                 std::abort();                                                                  \
139 |         } while (0)
140 | 
141 | #define IMPLEMENT_ME_NOEXIT() Print(ansifmt::bold, ansifmt::color_red, "WARNING: Implementation Missing", ansifmt::reset, " at ", __FILE__, ":", __LINE__, "\n")
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | template <typename L>
151 | struct scope_guard
152 | {
153 |         L l_;
154 |         bool invoked_{false};
155 | 
156 |         scope_guard(L &&l)
157 |             : l_(std::move(l))
158 |         {
159 |         }
160 | 
161 |         ~scope_guard()
162 |         {
163 |                 invoke();
164 |         }
165 | 
166 |         void invoke()
167 |         {
168 |                 if (!invoked_)
169 |                 {
170 |                         l_();
171 |                         invoked_ = false;
172 |                 }
173 |         }
174 | 
175 |         void cancel()
176 |         {
177 |                 invoked_ = true;
178 |         }
179 | };
180 | 
181 | template <typename T>
182 | inline auto make_scope_guard(T &&l)
183 | {
184 |         return scope_guard<T>(std::forward<T>(l));
185 | }
186 | 
187 | #define DEFER(...) auto TOKEN_PASTE2(__deferred, __COUNTER__) = make_scope_guard([&] { __VA_ARGS__ ;});
188 | 
189 | #include "switch_exceptions.h"
190 | 


--------------------------------------------------------------------------------
/Switch/switch_algorithms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <stdlib.h>
 3 | 
 4 | namespace SwitchAlgorithms
 5 | {
 6 | 	static inline uint64_t Uniform(const uint64_t low, const uint64_t high)
 7 | 	{
 8 | 		return low + (rand()%(high - low));
 9 | 	}
10 | 
11 |         inline uint32_t ComputeExponentialBackoffWithDeccorelatedJitter(const uint64_t cap, const uint64_t base, const uint64_t prevSleep)
12 |         {
13 |                 return std::min<uint64_t>(cap, Uniform(base, prevSleep * 3));
14 |         }
15 | }
16 | 


--------------------------------------------------------------------------------
/Switch/switch_atomicops.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <atomic>
 3 | 
 4 | struct MemoryModel
 5 | {
 6 |         enum
 7 |         {
 8 |                 // No barriers or synchronization.
 9 |                 RELAXED = __ATOMIC_RELAXED,
10 |                 // Data dependency only for both barrier and synchronization with another thread.
11 |                 CONSUME = __ATOMIC_CONSUME,
12 |                 // Barrier to hoisting of code and synchronizes with release (or stronger) semantic stores from another thread.
13 |                 ACQUIRE = __ATOMIC_ACQUIRE,
14 |                 // Barrier to sinking of code and synchronizes with acquire (or stronger) semantic loads from another thread.
15 |                 RELEASE = __ATOMIC_RELEASE,
16 |                 // Full barrier in both directions and synchronizes with acquire loads and release stores in another thread.
17 |                 ACQ_REL = __ATOMIC_ACQ_REL,
18 |                 // Full barrier in both directions and synchronizes with acquire loads and release stores in all threads.
19 |                 SEQ_CST = __ATOMIC_SEQ_CST,
20 | 
21 |                 // moved out of Barriers NS
22 |                 order_relaxed,
23 |                 order_acquire,
24 |                 order_release,
25 |                 order_acq_rel,
26 |                 order_seq_cst,
27 | 
28 |                 // memory_order_sync: Forces a full sync:
29 |                 // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
30 |                 order_sync = order_seq_cst
31 |         };
32 | };
33 | 


--------------------------------------------------------------------------------
/Switch/switch_bitops.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <switch.h>
  3 | #include <sys/mman.h>
  4 | #include <fcntl.h>
  5 | 
  6 | namespace SwitchBitOps
  7 | {
  8 | 	// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
  9 | 	// Fast way to test if a zero byte is in a word
 10 | 	[[gnu::always_inline]] inline constexpr bool U32HasZero(const uint32_t v)
 11 | 	{
 12 | 		return (v - 0x01010101UL) & ~v & 0x80808080UL;
 13 | 	}
 14 | 
 15 | 	// To check for 4 different bytes, xor the word with those bytes and then check for zero bytes:
 16 | 	// v = (((unsigned char)c * 0x1010101U) ^ delimiter)
 17 | 	// where delimter is the 4byte value to look for (u32)
 18 | 	// and c is the character to check
 19 | 	//
 20 | 	// e.g  to check if a character is either of (?, /, -, :)
 21 | 	// IsDelimter(uint8_t(c), '?'<<24|'/'<<16, '-'<<8, ':');
 22 | 	//
 23 | 	// Those are used by HAProxy / pattern.c
 24 | 	[[gnu::always_inline]] inline static bool IsDelimter(const uint8_t c, uint32_t mask)
 25 | 	{
 26 | 		mask ^= (c * 0x01010101); // propagate the char to all 4 bytes
 27 | 		return (mask - 0x01010101) & ~mask & 0x80808080U;
 28 | 	}
 29 | 		
 30 | 
 31 | 
 32 | 
 33 | 	template<typename T>
 34 | 	[[gnu::always_inline]] inline bool IsSet(const T n, const uint8_t index)
 35 | 	{
 36 | 		return n&(((T)1)<<index);
 37 | 	}	
 38 | 
 39 | 	template<typename T>  
 40 | 	[[gnu::always_inline]] inline static constexpr T MSB(const T n, const uint8_t span)
 41 | 	{
 42 | 		return n >> ((sizeof(T) * 8) - span);
 43 | 	}
 44 | 
 45 | 	template<typename T>
 46 | 	[[gnu::always_inline]] inline static constexpr T LSBMask(const uint8_t span)
 47 | 	{
 48 | 		return ((T)-1) >> ((sizeof(T) * 8) - span);
 49 | 	}
 50 | 
 51 | 	template<typename T>
 52 | 	[[gnu::always_inline]] inline static constexpr T LSB(const T n, const uint8_t span)
 53 | 	{
 54 | 		return n & LSBMask<T>(span);
 55 | 	}
 56 | 
 57 | 	template<typename T>
 58 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeadingZeros(const T v)
 59 | 	{
 60 | 		return __builtin_clz(v);
 61 | 	}
 62 | 
 63 | 	// Need specialized for (u16, u8) because __builtin_clz() operates on uints
 64 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeadingZeros(const uint16_t v) 
 65 | 	{
 66 | 		return __builtin_clz(v) - 16;
 67 | 	}
 68 | 
 69 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeadingZeros(const uint8_t v) 
 70 | 	{
 71 | 		return __builtin_clz(v) - 24;
 72 | 	}
 73 | 	
 74 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeadingZeros(const uint64_t v) // if v == 0 return std::numeric_limits<T>::digits
 75 | 	{
 76 | 		return __builtin_clzll(v);
 77 | 	}
 78 | 
 79 | 	template<typename T>
 80 | 	[[gnu::always_inline]] inline static constexpr uint8_t TrailingZeros(const T v)
 81 | 	{
 82 | 		return __builtin_ctz(v);
 83 | 	}
 84 | 	
 85 | 	[[gnu::always_inline]] inline static constexpr uint8_t TrailingZeros(const uint64_t v)
 86 | 	{
 87 | 		return __builtin_ctzll(v); 
 88 | 	}
 89 | 
 90 | 	[[gnu::always_inline]] inline static uint8_t TrailingZeros(const int64_t v)
 91 | 	{
 92 | 		return __builtin_ctzll(*(uint64_t *)&v);
 93 | 	}
 94 | 
 95 | 
 96 | #ifdef SWITCH_ARCH_64BIT
 97 | 	[[gnu::always_inline]] inline static constexpr uint8_t TrailingZeros(const long long v)
 98 | 	{
 99 | 		return __builtin_ctzll(v);
100 | 	}
101 | #endif
102 | 
103 | 
104 | 	template<typename T>
105 | 	[[gnu::always_inline]] inline static constexpr uint8_t PopCnt(const T v)
106 | 	{
107 | 		return __builtin_popcount(v);
108 | 	}
109 | 
110 | 	[[gnu::always_inline]] inline static constexpr uint8_t PopCnt(const uint64_t v)
111 | 	{
112 | 		return __builtin_popcountll(v);
113 | 	}
114 | 
115 | 
116 | 	// Returns 1 + index of the least significant 1-bit of x, or if x == 0, returns 0
117 | 	template<typename T>
118 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeastSignificantBitSet(const T v) 
119 | 	{
120 | 		return __builtin_ffs(v);
121 | 	}
122 | 
123 | 	[[gnu::always_inline]] inline static constexpr uint8_t LeastSignificantBitSet(const uint64_t v)
124 | 	{
125 | 		return __builtin_ffsll(v);
126 | 	}
127 | 
128 | 
129 | 
130 | 	// https://en.wikipedia.org/wiki/Hamming_distance
131 | 	// The number of positions where the corresponding bits differ.
132 | 	template<typename T>
133 | 	[[gnu::always_inline]] inline constexpr uint8_t HammingDistance(const T h1, const T h2)
134 | 	{
135 | 		return PopCnt(h1 ^ h2);
136 | 	}
137 | 
138 | 
139 | 	// A lean bitmap
140 | 	//
141 | 	// We could get log2(sizeof(T) << 8) and shift by that instead of
142 | 	// dividing but the compiler should be smart enough to figure that out anyway
143 | 	template<typename T>
144 | 		struct Bitmap
145 | 		{
146 | 			static_assert(std::numeric_limits<T>::is_integer, "T must be an integer");
147 | 			static_assert(!std::numeric_limits<T>::is_signed, "T must be an unsigned integer");
148 | 
149 | 			private:
150 | 			T *const bitmap;
151 | 
152 | 			public:
153 | 			static inline uint32_t FirstSet(const T *const bm, const uint32_t bmSize /* in Ts not in bits */)
154 | 			{
155 | 				for (uint32_t i{0}; i != bmSize; ++i)
156 | 				{
157 | 					if (const auto v = bm[i])
158 | 						return (63 - SwitchBitOps::LeadingZeros(v)) + (i * sizeof(T) << 3);
159 | 				}
160 | 
161 | 				return UINT32_MAX;
162 | 			}
163 | 
164 | 			static auto cardinality(const T *const bm, const uint32_t n /* in Ts, not in bits */)
165 | 			{
166 | 				size_t cnt{0};
167 | 
168 | 				for (uint32_t i{0}; i != n; ++i)
169 | 				{
170 | 					if (const auto v = bm[i])
171 | 						cnt+=PopCnt(v);
172 | 				}
173 | 
174 | 				return cnt;
175 | 			}
176 | 
177 | 			static auto anySet(const T *const bm, const uint32_t n /* in Ts, not in bits */)
178 | 			{
179 | 				for (uint32_t i{0}; i != n; ++i)
180 | 				{
181 | 					if (bm[i])
182 | 						return true;
183 | 				}
184 | 				return false;
185 | 			}
186 | 
187 | 			static inline void Set(T *const bm, const uint32_t index)
188 | 			{
189 | 				const auto i = index / (sizeof(T)<<3);
190 | 				const T mask = (T)1U << (index&((sizeof(T) * 8) - 1));
191 | 
192 | 				bm[i]|=mask;
193 | 			}
194 | 
195 | 			static inline void Toggle(T *const bm, const uint32_t index)
196 | 			{
197 | 				const auto i = index / (sizeof(T)<<3);
198 | 				const T mask = (T)1U << (index&((sizeof(T) * 8) - 1));
199 | 
200 | 				bm[i]^=mask;
201 | 			}
202 | 
203 | 			static inline bool SetIfUnset(T *const bm, const uint32_t index)
204 | 			{
205 | 				const auto i = index / (sizeof(T)<<3);
206 | 				const T mask = (T)1U << (index&((sizeof(T) * 8) - 1));
207 | 				auto &v = bm[i];
208 | 
209 | 				if (v&mask)
210 | 					return false;
211 | 				else
212 | 				{
213 | 					v|=mask;
214 | 					return true;
215 | 				}
216 | 			}
217 | 
218 | 			static inline void Unset(T *const bm, const uint32_t index)
219 | 			{
220 | 				const auto i = index / (sizeof(T)<<3);
221 | 				const T mask = (T)1U << (index&((sizeof(T) * 8) - 1));
222 | 
223 | 				bm[i]&=~mask;
224 | 			}
225 | 
226 | 			static inline bool IsSet(T *const bm, const uint32_t index)
227 | 			{
228 | 				const auto i = index / (sizeof(T)<<3);
229 | 				const T mask = (T)1U << (index&((sizeof(T) * 8) - 1));
230 | 
231 | 				return bm[i]&mask;
232 | 			}
233 | 
234 | 
235 | 
236 | 			public:
237 | 			Bitmap(const uint32_t capacity)
238 | 				: bitmap{ (T *)calloc((capacity / (sizeof(T) << 3)) + 1, sizeof(T)) }
239 | 			{
240 | 
241 | 			}
242 | 
243 | 			~Bitmap(void)
244 | 			{
245 | 				::free(bitmap);
246 | 			}
247 | 
248 | 			int MLock(const uint32_t capacity)
249 | 			{
250 | 				return mlock(bitmap, ((capacity / (sizeof(T) << 3)) + 1) * sizeof(T));
251 | 			}
252 | 				
253 | 			int MUnlock(const uint32_t capacity)
254 | 			{
255 | 				return munlock(bitmap, ((capacity / (sizeof(T) << 3)) + 1) * sizeof(T));
256 | 			}
257 | 
258 | 			inline void Set(const uint32_t index)
259 | 			{
260 | 				return Set(bitmap, index);
261 | 			}
262 | 
263 | 			inline bool SetIfUnset(const uint32_t index)
264 | 			{
265 | 				return SetIfUnset(bitmap, index);
266 | 			}
267 | 
268 | 			inline void Unset(const uint32_t index)
269 | 			{
270 | 				return Unset(bitmap, index);
271 | 			}
272 | 
273 | 			inline bool IsSet(const uint32_t index) const
274 | 			{
275 | 				return IsSet(bitmap, index);
276 | 			}
277 | 
278 | 			T *Data(void) const
279 | 			{
280 | 				return bitmap;
281 | 			}
282 | 
283 | 			bool FromFile(const char *const path)
284 | 			{
285 | 				int fd = open(path, O_RDONLY|O_LARGEFILE);
286 | 
287 | 				if (fd == -1)
288 | 					return false;
289 | 
290 | 				const auto fileSize = lseek64(fd, 0, SEEK_END);
291 | 
292 | 				if (pread64(fd, bitmap, fileSize, 0) != fileSize)
293 | 				{
294 | 					(void)close(fd);
295 | 					return false;
296 | 				}
297 | 				else
298 | 				{
299 | 					(void)close(fd);
300 | 					return true;
301 | 				}
302 | 			}
303 | 
304 | 		};
305 | };
306 | 


--------------------------------------------------------------------------------
/Switch/switch_dictionary.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <unordered_map>
 3 | 
 4 | namespace Switch
 5 | {
 6 |         template <class Key, class T>
 7 |         class unordered_map
 8 |             : public std::unordered_map<Key, T>
 9 |         {
10 | 		public:
11 | 			struct kv
12 | 			{
13 | 				T v;
14 | 
15 | 				const T &value() const
16 | 				{
17 | 					return v;
18 | 				}
19 | 			};
20 | 
21 | 		public:
22 | 			bool Add(const Key &k, const T &v)
23 | 			{
24 | 				return this->insert({k, v}).second;
25 | 			}
26 | 
27 | 			bool Remove(const Key &k)
28 | 			{
29 | 				return this->erase(k);
30 | 			}
31 | 
32 | 			kv detach(const Key &k)
33 | 			{
34 | 				auto it = this->find(k);
35 | 
36 | 				if (it != this->end())
37 | 				{
38 | 					auto v = std::move(it->second);
39 | 					const auto n = this->size();
40 | 
41 | 					this->erase(it);
42 | 					require(n == this->size() + 1);
43 | 					return {v};
44 | 				}
45 | 				else
46 | 					return {};
47 | 			}
48 |         };
49 | }
50 | 
51 | namespace std
52 | {
53 | 	template<>
54 | 	struct hash<void *>
55 | 	{
56 | 		using argument_type = void*;
57 |                 using result_type = std::size_t;
58 | 
59 |                 std::size_t operator()(const void *const ptr) const
60 | 		{
61 | 			return std::hash<uintptr_t>{}(uintptr_t(ptr));
62 | 		}
63 | 	};
64 | }
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/Switch/switch_exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "switch_print.h"
 3 | 
 4 | namespace Switch
 5 | {
 6 |         struct exception
 7 |             : public std::exception
 8 |         {
 9 |                 Buffer b;
10 | 
11 |                 [[gnu::noinline]] explicit exception(const strwithlen32_t &s)
12 |                 {
13 |                         b.Append(s.p, s.len);
14 |                 }
15 | 
16 |                 template <typename... T>
17 |                 [[gnu::noinline]] exception(const T &... args)
18 |                 {
19 |                         PrintImpl(b, args...);
20 |                 }
21 | 
22 |                 exception(const exception &o)
23 | 			: b(o.b)
24 |                 {
25 |                 }
26 | 
27 |                 exception(exception &&o)
28 | 			: b(std::move(o.b))
29 |                 {
30 | 
31 |                 }
32 | 
33 |                 exception() = delete;
34 | 
35 |                 const char *what() const noexcept override
36 |                 {
37 |                         return b.data();
38 |                 }
39 |         };
40 | 
41 |         struct recoverable_error
42 |             : public std::exception
43 |         {
44 |                 Buffer b;
45 | 
46 |                 [[gnu::noinline]] explicit recoverable_error(const strwithlen32_t &s)
47 |                 {
48 |                         b.Append(s.p, s.len);
49 |                 }
50 | 
51 |                 template <typename... T>
52 |                 [[gnu::noinline]] recoverable_error(const T &... args)
53 |                 {
54 |                         PrintImpl(b, args...);
55 |                 }
56 | 
57 |                 recoverable_error(const recoverable_error &o)
58 | 			: b(o.b)
59 |                 {
60 | 
61 |                 }
62 | 
63 |                 recoverable_error(recoverable_error &&o)
64 | 			: b(std::move(o.b))
65 |                 {
66 | 
67 |                 }
68 | 
69 |                 recoverable_error() = delete;
70 | 
71 |                 const char *what() const noexcept override
72 |                 {
73 |                         return b.data();
74 |                 }
75 |         };
76 | 
77 |         using runtime_error = recoverable_error;
78 |         using range_error = recoverable_error;
79 |         using overflow_error = recoverable_error;
80 |         using underflow_error = recoverable_error;
81 |         using system_error = recoverable_error;
82 |         using invalid_argument = recoverable_error;
83 |         using length_error = recoverable_error;
84 |         using out_of_range = recoverable_error;
85 |         using data_error = recoverable_error;
86 | }
87 | 
88 | #define SLog(...) ::Print(srcline_repr(), __VA_ARGS__)
89 | 


--------------------------------------------------------------------------------
/Switch/switch_hash.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef __clang__
 4 | extern "C" {
 5 | #endif
 6 | unsigned long long XXH64(const void *input, unsigned int len, unsigned long long seed);
 7 | #ifndef __clang__
 8 | }
 9 | #endif
10 | 
11 | static inline uint64_t FNVHash64(const uint8_t *const p, const uint32_t len)
12 | {
13 |         uint64_t h{14695981039346656037ULL};
14 | 
15 |         for (uint32_t i = 0; i != len; ++i)
16 |                 h = (h * 1099511628211ULL) ^ p[i];
17 | 
18 |         return h;
19 | }
20 | 
21 | static inline constexpr uint64_t BeginFNVHash64(void)
22 | {
23 |         return 14695981039346656037ULL;
24 | }
25 | 
26 | static inline uint64_t FNVHash64(uint64_t h, const uint8_t *const p, const uint32_t len)
27 | {
28 |         for (uint32_t i = 0; i != len; ++i)
29 |                 h = (h * 1099511628211ULL) ^ p[i];
30 |         return h;
31 | }
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/Switch/switch_ll.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "switch_compiler_aux.h"
  3 | 
  4 | struct switch_slist {
  5 |         struct switch_slist *next;
  6 | };
  7 | 
  8 | #define switch_slist_foreach(PTR, IT) for (I = (PTR)->next; I; I = I->next)
  9 | 
 10 | static inline void switch_slist_append(switch_slist *s, switch_slist *a) {
 11 |         a->next = s->next;
 12 |         s->next = a;
 13 | }
 14 | 
 15 | inline void switch_slist_init(switch_slist *l) {
 16 |         l->next = l;
 17 | }
 18 | 
 19 | static inline switch_slist *switch_slist_removefirst(switch_slist *s) {
 20 |         switch_slist *r = s->next;
 21 | 
 22 |         s->next = r->next;
 23 |         return r;
 24 | }
 25 | 
 26 | inline bool switch_slist_isempty(const switch_slist *l) {
 27 |         return l->next == l;
 28 | }
 29 | 
 30 | inline bool switch_slist_any(const switch_slist *const l) {
 31 |         return l->next != l;
 32 | }
 33 | 
 34 | struct switch_dlist {
 35 |         switch_dlist *prev, *next;
 36 | 
 37 |         void push_back(switch_dlist *const a) noexcept {
 38 |                 // See switch_dlist_insert_after()
 39 |                 auto *const d  = this;
 40 |                 auto *const dn = d->next;
 41 | 
 42 |                 a->next  = dn;
 43 |                 a->prev  = d;
 44 |                 dn->prev = a;
 45 |                 d->next  = a;
 46 |         }
 47 | 
 48 |         void push_front(switch_dlist *const a) noexcept {
 49 |                 // Switch switch_dlist_insert_before()
 50 |                 auto *const d  = this;
 51 |                 auto *const dp = d->prev;
 52 | 
 53 |                 d->prev  = a;
 54 |                 a->next  = d;
 55 |                 a->prev  = dp;
 56 |                 dp->next = a;
 57 |         }
 58 | 
 59 |         void detach() noexcept {
 60 |                 // See switch_dlist_del
 61 |                 auto *const d  = this;
 62 |                 auto *const dp = d->prev;
 63 |                 auto *const dn = d->next;
 64 | 
 65 |                 dn->prev = dp;
 66 |                 dp->next = dn;
 67 |         }
 68 | 
 69 |         void detach_and_reset() noexcept {
 70 |                 detach();
 71 |                 reset();
 72 |         }
 73 | 
 74 |         constexpr void reset() noexcept {
 75 |                 next = this;
 76 |                 prev = this;
 77 |         }
 78 | 
 79 |         constexpr bool empty() const noexcept {
 80 |                 return next == this;
 81 |         }
 82 | 
 83 |         std::size_t size() const noexcept {
 84 |                 std::size_t res{0};
 85 | 
 86 |                 for (auto it{next}; it != this; it = it->next)
 87 |                         ++res;
 88 |                 return res;
 89 |         }
 90 | };
 91 | 
 92 | static inline void switch_dlist_init(switch_dlist *const l) {
 93 |         assume(l);
 94 |         l->next = l;
 95 |         l->prev = l;
 96 | }
 97 | 
 98 | // @a to head of the list @d
 99 | // e.g LIST.next = a
100 | // i.e d.push_back(a)
101 | static inline void switch_dlist_insert_after(switch_dlist *const d, switch_dlist *const a) {
102 |         auto *const dn = d->next;
103 | 
104 |         a->next  = dn;
105 |         a->prev  = d;
106 |         dn->prev = a;
107 |         d->next  = a;
108 | }
109 | 
110 | static inline void switch_dlist_del(switch_dlist *const d) {
111 |         auto *const dp = d->prev;
112 |         auto *const dn = d->next;
113 | 
114 |         dn->prev = dp;
115 |         dp->next = dn;
116 | }
117 | 
118 | // replace A with B
119 | static inline void switch_dlist_replace(switch_dlist *const a, switch_dlist *const b) {
120 |         switch_dlist_insert_after(a, b);
121 |         switch_dlist_del(a);
122 | }
123 | 
124 | // @a to tail of the list @d
125 | // e.g LIST.prev = a
126 | static inline void switch_dlist_insert_before(switch_dlist *const d, switch_dlist *const a) {
127 |         auto *const dp = d->prev;
128 | 
129 |         d->prev  = a;
130 |         a->next  = d;
131 |         a->prev  = dp;
132 |         dp->next = a;
133 | }
134 | 
135 | // switch_dlist_del() and switch_dlist_init() @d
136 | // If you want to e.g move an item in an list elsewhere (e.g LRU move to head), you need
137 | // to first switch_dlist_del(), then switch_dlist_init() it and THEN switch_dlist_insert_after() it, otherwise, if you only del()it but
138 | // don't initialize it, it will corrupt the list. Use switch_dlist_del_and_reset() always
139 | static inline void switch_dlist_del_and_reset(switch_dlist *const d) {
140 |         auto *const dp = d->prev;
141 |         auto *const dn = d->next;
142 | 
143 |         d->next = d;
144 |         d->prev = d;
145 | 
146 |         dn->prev = dp;
147 |         dp->next = dn;
148 | }
149 | 
150 | static inline switch_dlist *switch_dlist_poplast(switch_dlist *const d) {
151 |         auto *const dp = d->prev;
152 | 
153 |         if (dp == d)
154 |                 return nullptr;
155 |         else {
156 |                 switch_dlist_del(dp);
157 |                 return dp;
158 |         }
159 | }
160 | 
161 | static inline switch_dlist *switch_dlist_popfirst(switch_dlist *const d) {
162 |         auto *const dp = d->next;
163 | 
164 |         if (dp == d)
165 |                 return nullptr;
166 |         else {
167 |                 switch_dlist_del(dp);
168 |                 return dp;
169 |         }
170 | }
171 | 
172 | static inline void switch_dlist_merge(switch_dlist *const d, switch_dlist *const d2) {
173 |         auto *const dp  = d->prev;
174 |         auto *const d2n = d2->next;
175 |         auto *const d2p = d2->prev;
176 | 
177 |         if (d2n == d2)
178 |                 return; // Empty list, don't bother
179 | 
180 |         dp->next  = d2n;
181 |         d2n->prev = dp;
182 |         d->prev   = d2p;
183 |         d2p->next = d;
184 | }
185 | 
186 | static inline uint32_t switch_dlist_size(const switch_dlist *const l) {
187 |         uint32_t n = 0;
188 | 
189 |         for (auto *it = l->next; it != l; it = it->next)
190 |                 ++n;
191 | 
192 |         return n;
193 | }
194 | 
195 | template <typename T>
196 | static inline auto reverseSinglyList(T *h) {
197 |         T *rh{nullptr};
198 | 
199 |         while (h) {
200 |                 auto t = h;
201 | 
202 |                 h       = t->next;
203 |                 t->next = rh;
204 |                 rh      = t;
205 |         }
206 |         return rh;
207 | }
208 | 
209 | #ifdef __clang__
210 | #pragma GCC diagnostic push
211 | #pragma GCC diagnostic ignored "-Winvalid-offsetof"
212 | #endif
213 | 
214 | // Iterate from head .. to tail
215 | #define switch_dlist_foreach_fromhead(PTR, IT) for (IT = (PTR)->next; IT != (PTR); IT = IT->next)
216 | // Iterate from tail .. to head
217 | #define switch_dlist_foreach_fromtail(PTR, IT) for (IT = (PTR)->prev; IT != (PTR); IT = IT->prev)
218 | 
219 | #define switch_dlist_isempty(PTR) (((PTR)->next == (PTR)))
220 | #define switch_dlist_any(PTR) (((PTR)->next != (PTR)))
221 | // e.g struct container{ ...; struct switch_dlist itemsList; ..};  struct item { ...; struct switch_dlist list; ... }
222 | // struct item *const theItem = switch_list_entry(item, list, container.itemsList.next);
223 | //#define switch_list_entry(T, M, PTR /* switch_dlist ptr */) 	(CONTAINER_OF(PTR, T, M))
224 | #define switch_list_entry(T, M, PTR /* switch_dlist ptr */) (containerof(T, M, PTR))
225 | 
226 | #ifdef __clang__
227 | #pragma GCC diagnostic pop
228 | #endif
229 | 


--------------------------------------------------------------------------------
/Switch/switch_numops.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <algorithm>
 3 | 
 4 | template<typename T>
 5 | [[gnu::always_inline]] inline T Clamp(const T v, const T min, const T max)
 6 | {
 7 | 	static_assert(std::is_scalar<T>::value, "Expected scalar");
 8 | 	return std::min(std::max(v, min), max);
 9 | }
10 | 
11 | inline static int RoundToMultiple(const int v, const int alignment)
12 | {
13 |         const int mask = alignment - 1;
14 | 
15 |         return (v + mask) & ~mask;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/Switch/switch_print.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "buffer.h"
  3 | #ifndef __linux__
  4 | #include <pthread.h>
  5 | #endif
  6 | 
  7 | 
  8 | struct ptr_repr
  9 | {
 10 | 	const void *const ptr;
 11 | 
 12 | 	ptr_repr(const void *const p)
 13 | 		: ptr(p)
 14 | 	{
 15 | 
 16 | 	}
 17 | 
 18 | 	inline strwlen8_t Get(char *const out) const
 19 | 	{
 20 | 		return strwlen8_t(out, sprintf(out, "%p", ptr));
 21 | 	}
 22 | 
 23 | };
 24 | 
 25 | static inline void PrintImpl(Buffer &out, const ptr_repr &repr)
 26 | {
 27 | 	out.AppendFmt("%p", repr.ptr);
 28 | }
 29 | 
 30 | static inline void PrintImpl(void)
 31 | {
 32 | 
 33 | }
 34 | 
 35 | static inline void PrintImpl(Buffer &out)
 36 | {
 37 | 
 38 | }
 39 | 
 40 | static inline void PrintImpl(Buffer &out, const bool &v)
 41 | {
 42 | 	if (v)
 43 | 		out.Append(_S("true"));
 44 | 	else
 45 | 		out.Append(_S("false"));
 46 | }
 47 | 
 48 | static inline void PrintImpl(Buffer &out, const double &v)
 49 | {
 50 | 	out.AppendFmt("%lf", v);
 51 | }
 52 | 
 53 | static inline void PrintImpl(Buffer &out, const char v)
 54 | {
 55 | 	out.AppendFmt("%c", v);
 56 | }
 57 | 
 58 | static inline void PrintImpl(Buffer &out, const void *const ptr)
 59 | {
 60 | 	out.AppendFmt("%p", ptr);
 61 | }
 62 | 
 63 | static inline void PrintImpl(Buffer &out, void *const ptr)
 64 | {
 65 | 	out.AppendFmt("%p", ptr);
 66 | }
 67 | 
 68 | static inline void PrintImpl(Buffer &out, const float &v)
 69 | {
 70 | 	out.AppendFmt("%f", v);
 71 | }
 72 | 
 73 | static inline void PrintImpl(Buffer &out, const int &v)
 74 | {
 75 | 	out.AppendFmt("%d", v);
 76 | }
 77 | 
 78 | static inline void PrintImpl(Buffer &out, const char *p)
 79 | {
 80 | 	if (likely(p))
 81 | 		out.Append(p, strlen(p));
 82 | 	else
 83 | 		out.Append(_S("(nullptr)"));
 84 | }
 85 | 
 86 | static inline void PrintImpl(Buffer &out, char *p)
 87 | {
 88 | 	if (likely(p))
 89 | 		out.Append(p, strlen(p));
 90 | 	else
 91 | 		out.Append(_S("(nullptr)"));
 92 | 
 93 | }
 94 | 
 95 | static inline void PrintImpl(Buffer &out, const uint32_t &v)
 96 | {
 97 | 	out.AppendFmt("%" PRIu32, v);
 98 | }
 99 | 
100 | static inline void PrintImpl(Buffer &out, const uint8_t &v)
101 | {
102 | 	out.AppendFmt("%" PRIu32, v);
103 | }
104 | 
105 | static inline void PrintImpl(Buffer &out, const uint16_t &v)
106 | {
107 | 	out.AppendFmt("%" PRIu32, v);
108 | }
109 | 
110 | static inline void PrintImpl(Buffer &out, const int16_t &v)
111 | {
112 | 	out.AppendFmt("%" PRId32, v);
113 | }
114 | 
115 | static inline void PrintImpl(Buffer &out, const int8_t &v)
116 | {
117 | 	out.AppendFmt("%" PRId32, v);
118 | }
119 | 
120 | static inline void PrintImpl(Buffer &out, const uint64_t &v)
121 | {
122 | 	out.AppendFmt("%" PRIu64, v);
123 | }
124 | 
125 | static inline void PrintImpl(Buffer &out, const int64_t &v)
126 | {
127 | 	out.AppendFmt("%" PRId64, v);
128 | }
129 | 
130 | static inline void PrintImpl(Buffer &out, const Buffer &o)
131 | {
132 | 	out.Append(o);
133 | }
134 | 
135 | static inline void PrintImpl(Buffer &out, const strwlen8_t &o)
136 | {
137 | 	out.Append(o);
138 | }
139 | 
140 | static inline void PrintImpl(Buffer &out, const strwlen16_t &o)
141 | {
142 | 	out.Append(o);
143 | }
144 | 
145 | static inline void PrintImpl(Buffer &out, const strwlen32_t &o)
146 | {
147 | 	out.Append(o);
148 | }
149 | 
150 | 
151 | 
152 | 
153 | template<typename T, typename... Args>
154 | static void PrintImpl(Buffer &b, const T &v, const Args&... args)
155 | {
156 | 	PrintImpl(b, v);
157 | 	PrintImpl(b, args...);
158 | }
159 | 
160 | #ifndef __linux__
161 | static pthread_key_t bufKey;
162 | 
163 | [[gnu::constructor]] static void _init()
164 | {
165 | 	pthread_key_create(&bufKey, nullptr);
166 | }
167 | 
168 | [[gnu::destructor]] static void _tear_down()
169 | {
170 | 	pthread_key_delete(bufKey);
171 | }
172 | #endif
173 | 
174 | static inline Buffer &thread_local_buf()
175 | {
176 | #ifdef __linux__
177 |         static thread_local Buffer b;
178 | 
179 |         return b;
180 | #else
181 |         auto p = (Buffer  *)pthread_getspecific(bufKey);
182 | 
183 |         if (!p)
184 |         {
185 |                 p = new Buffer();
186 |                 pthread_setspecific(bufKey, p);
187 |         }
188 | 
189 |         return *p;
190 | #endif
191 | }
192 | 
193 | template<typename T, typename... Args>
194 | static void Print(const T &v, const Args&... args)
195 | {
196 | 	auto &b = thread_local_buf();
197 | 
198 | 	b.clear();
199 | 	PrintImpl(b, v);
200 | 	PrintImpl(b, args...);
201 | 	const auto r = write(STDOUT_FILENO, b.data(), b.size());
202 | 
203 | 	(void)r; // (void)write triggers warning if  -Wunused-result is set
204 | 		 // and write() is declared like so
205 | }
206 | 
207 | 
208 | template<typename T, typename... Args>
209 | static void ToBuffer(Buffer &out, const T &v, const Args&... args)
210 | {
211 | 	PrintImpl(out, v);
212 | 	PrintImpl(out, args...);
213 | }
214 | 
215 | 
216 | template<typename A, typename B>
217 | static void PrintImpl(Buffer &b, const std::pair<A, B> &pair)
218 | {
219 | 	b.Append('<');
220 | 	PrintImpl(b, pair.first);
221 | 	b.Append(_S(", "));
222 | 	PrintImpl(b, pair.second);
223 | 	b.Append('>');
224 | }
225 | 
226 | 
227 | 
228 | template<typename T>
229 | static void PrintImpl(Buffer &b, const T &v)
230 | {
231 | 	if (std::is_enum<T>::value)
232 | 	{
233 | 		// We can now use 'naked' enums and they will be printed properly
234 | 		PrintImpl(b, (typename std::underlying_type<T>::type)v);
235 | 	}
236 | 	else
237 |         {
238 | 		// catch-all for when we have no PrintImpl() specialization
239 |                 fprintf(stderr, "Specialization for type not defined\n");
240 | 		std::abort();
241 |         }
242 | }
243 | 
244 | 
245 | // Handy alternative to snprintf()
246 | // See also:
247 | // Buffer::append<>
248 | // Buffer::build<>
249 | // RPCString::build<>
250 | template<typename... Args>
251 | static size_t Snprint(char *out, const size_t outLen, Args&&... args)
252 | {
253 | 	auto &b = thread_local_buf();
254 | 
255 | 	b.clear();
256 | 	ToBuffer(b, std::forward<Args>(args)...);
257 | 
258 | 	const auto l = b.size();
259 | 
260 | 	b.AsS32().ToCString(out, outLen);
261 | 	return l;
262 | }
263 | 


--------------------------------------------------------------------------------
/Switch/switch_ranges.h:
--------------------------------------------------------------------------------
  1 | // See also http://en.cppreference.com/w/cpp/algorithm/iota
  2 | #pragma once
  3 | #include <utility>
  4 | 
  5 | // range is (start():inclusive ... stop():exclusive)
  6 | // i.e [left, right)
  7 | template<typename VT = uint32_t, typename LT = uint32_t>
  8 | struct range_base final
  9 | {
 10 | 	struct iterator
 11 | 	{
 12 | 		VT i;
 13 | 
 14 | 		constexpr iterator(const VT index)
 15 | 			: i{index}
 16 | 		{
 17 | 
 18 | 		}
 19 | 
 20 | 		constexpr VT inline operator*() const
 21 | 		{
 22 | 			return i;
 23 | 		}
 24 | 
 25 | 		constexpr void operator++() noexcept
 26 | 		{
 27 | 			++i;
 28 | 		}
 29 | 
 30 | 		constexpr bool operator!=(const iterator &o) const noexcept
 31 | 		{
 32 | 			return i != o.i;
 33 | 		}
 34 | 	};
 35 | 
 36 | 	VT offset;
 37 | 	LT len;
 38 | 
 39 | 	constexpr iterator begin() const noexcept
 40 | 	{
 41 | 		return iterator(offset);
 42 | 	}
 43 | 
 44 | 	constexpr iterator end() const noexcept
 45 | 	{
 46 | 		return iterator(stop());
 47 | 	}
 48 | 
 49 | 	constexpr range_base()
 50 | 		: offset{0}, len{0}
 51 | 	{
 52 | 
 53 | 	}
 54 | 
 55 | 	constexpr range_base(const range_base &o)
 56 | 		: offset(o.offset), len(o.len)
 57 | 	{
 58 | 
 59 | 	}
 60 | 
 61 | 	constexpr range_base(const VT _o, const LT _l)
 62 | 		: offset(_o), len(_l)
 63 | 	{
 64 | 
 65 | 	}
 66 | 
 67 | 	//e.g range32_t{10,15}
 68 | 	constexpr range_base(const std::pair<VT, VT> p)
 69 | 		: offset{p.first}, len{p.second - p.first}
 70 | 	{
 71 | 
 72 | 	}
 73 | 
 74 | 	constexpr range_base(const LT l)
 75 | 		: offset{0}, len{l}
 76 | 	{
 77 | 
 78 | 	}
 79 | 
 80 | 	constexpr auto size() const noexcept
 81 | 	{
 82 | 		return len;
 83 | 	}
 84 | 
 85 | 	constexpr auto empty() const noexcept
 86 | 	{
 87 | 		return 0 == len;
 88 | 	}
 89 | 	constexpr operator bool() const noexcept
 90 | 	{
 91 | 		return len;
 92 | 	}
 93 | 
 94 | 	constexpr bool SpansAll() const noexcept
 95 | 	{
 96 | 		return std::numeric_limits<VT>::min() == offset && std::numeric_limits<VT>::max() == stop();
 97 | 	}
 98 | 
 99 | 	void SetSpansAll() noexcept
100 | 	{
101 | 		offset 	= std::numeric_limits<VT>::min();
102 | 		len 	= std::numeric_limits<VT>::max() - offset;
103 | 	}
104 | 
105 | 	constexpr void Set(const VT _o, const LT _l) noexcept
106 | 	{
107 | 		offset 	= _o;
108 | 		len 	= _l;
109 | 	}
110 | 	
111 | 	constexpr void setStartEnd(const VT lo, const VT hi) noexcept
112 | 	{
113 | 		offset = lo;
114 | 		len = hi - lo;
115 | 	}
116 | 
117 | 	constexpr auto &operator=(const range_base &o) noexcept
118 | 	{
119 | 		offset 	= o.offset;
120 | 		len 	= o.len;
121 | 		return *this;
122 | 	}
123 | 
124 | 	constexpr void SetEnd(const VT e) noexcept
125 | 	{
126 | 		len = e - offset;
127 | 	}
128 | 
129 | 	// Matching SetEnd(); adjusts offset of a valid range
130 | 	constexpr void reset_offset(const VT start) noexcept
131 | 	{
132 | 		len = stop() - start;
133 | 		offset = start;
134 | 	}
135 | 
136 | 	constexpr VT mid() const noexcept // (left + right) / 2
137 | 	{
138 | 		return offset + (len >> 1);
139 | 	}
140 | 
141 | 	constexpr VT stop() const noexcept
142 | 	{
143 | 		return offset + len;
144 | 	}
145 | 
146 | 	constexpr VT start() const noexcept
147 | 	{
148 | 		return offset;
149 | 	}
150 | 
151 | 	// TODO: optimize
152 | 	// Very handy for iterating a subset e.g
153 | 	// for (auto i : range32(offset, perPage).ClippedTo(total) { .. }
154 | 	constexpr range_base<VT, LT> ClippedTo(const VT lim) const
155 | 	{
156 | 		range_base<VT, LT> res;
157 | 
158 | 		res.offset 	= Min(offset, lim);
159 | 		res.len 	= Min(stop(), lim) - res.offset;
160 | 
161 | 		return res;
162 | 	}
163 | 
164 | 	constexpr bool Contains(const VT o) const noexcept
165 | 	{
166 | 		// https://twitter.com/EricLengyel/status/546120250450653184
167 | 		// Single comparison impl. Works fine except shouldn't work for 64bit scalars
168 | 
169 | 		return sizeof(VT) == 8
170 | 			? o >= offset && o < stop()
171 | 			: uint32_t(o - offset) < len; 	// o in [offset, offset+len)
172 | 	}
173 | 
174 |         constexpr bool operator<(const range_base &o) const noexcept
175 |         {
176 |                 return offset < o.offset || (offset == o.offset && len < o.len);
177 |         }
178 |         
179 |         constexpr bool operator<=(const range_base &o) const noexcept
180 |         {
181 |                 return offset < o.offset || (offset == o.offset && len <= o.len);
182 |         }
183 |         
184 |         constexpr bool operator>(const range_base &o) const noexcept
185 |         {       
186 |                 return offset > o.offset || (offset == o.offset && len > o.len);
187 |         }               
188 | 
189 |         constexpr bool operator>=(const range_base &o) const noexcept
190 |         {               
191 |                 return offset > o.offset || (offset == o.offset && len >= o.len);
192 |         }
193 | 
194 | 
195 | 	template<typename T>
196 | 	constexpr bool operator==(const T &o) const noexcept
197 | 	{
198 | 		return offset == o.offset && len == o.len;
199 | 	}
200 | 
201 | 	template<typename T>
202 | 	constexpr bool operator!=(const T &o) const noexcept
203 | 	{
204 | 		return offset != o.offset || len != o.len;
205 | 	}
206 | 
207 | 	range_base Intersection(const range_base &o) const noexcept
208 | 	{
209 | 		// A range containing the indices that exist in both ranges
210 | 
211 | 		if (stop() <= o.offset || o.stop() <= offset)
212 | 			return range_base(0, 0);
213 | 		else
214 | 		{
215 | 			const auto _o = Max(offset, o.offset);
216 | 
217 | 			return range_base(_o, Min(stop(), o.stop()) - _o);
218 | 		}
219 | 	}
220 | 
221 | 	void ClipOffsetTo(const VT o) noexcept
222 | 	{
223 | 		if (offset < o)
224 | 		{
225 | 			if (o >= stop())
226 | 			{
227 | 				offset = o;
228 | 				len 	= 0;
229 | 			}
230 | 			else
231 | 			{
232 | 				const auto d = o - offset;
233 | 
234 | 				offset = o;
235 | 				len -= d;
236 | 			}
237 | 		}
238 | 	}
239 | 
240 | 	void ClipEndTo(const VT e) noexcept
241 | 	{
242 | 		const auto end = stop();
243 | 
244 | 		if (e < end)
245 | 		{
246 | 			if (e < offset)
247 | 				len = 0;
248 | 			else	
249 | 				len -= end - e;
250 | 		}
251 | 	}
252 | 
253 | 	constexpr bool Overlaps(const range_base &o) const noexcept
254 | 	{
255 | 		// range is (start() inclusive, stop() non inclusive)
256 | 		// e.g [start, end)
257 | 		//
258 | 		// alternative formula: a0 <= b1 && b0 <= a1
259 | 		// https://fgiesen.wordpress.com/2011/10/16/checking-for-interval-overlap/
260 | 		return !(stop() <= o.offset || o.stop() <= offset);
261 | 	}
262 | 
263 | 	constexpr bool Contains(const range_base &o) const noexcept
264 | 	{
265 | 		return offset <= o.offset && stop() >= o.stop();
266 | 	}
267 | 
268 | 	constexpr auto Union(const range_base &o) const noexcept
269 | 	{
270 | 		const auto _o = Min(offset, o.offset);
271 | 
272 | 		return range_base(_o, Max(stop(), o.stop()) - _o);
273 | 	}
274 | 
275 | 	// http://en.wikipedia.org/wiki/Disjoint_union
276 | 	// Make sure they overlap
277 | 	uint8_t DisjointUnion(const range_base &o, range_base *out) const noexcept
278 | 	{
279 | 		const range_base *const b = out;
280 | 
281 | 		if (offset < o.offset)
282 | 		{
283 | 			out->offset 	= offset;
284 | 			out->len 	= o.offset - offset;
285 | 			++out;
286 | 		}
287 | 		else if (o.offset < offset)
288 | 		{
289 | 			out->offset 	= o.offset;
290 | 			out->len  	= offset - o.offset;
291 | 			++out;
292 | 		}
293 | 
294 | 		const auto thisEnd = stop(), thatEnd = o.stop();
295 | 
296 | 		if (thisEnd < thatEnd)
297 | 		{
298 | 			out->offset 	= thisEnd;
299 | 			out->len 	= thatEnd - thisEnd;
300 | 			++out;
301 | 			
302 | 		}
303 | 		else if (thatEnd < thisEnd)
304 | 		{
305 | 			out->offset 	= thatEnd;
306 | 			out->len 	= thisEnd - thatEnd;
307 | 			++out;
308 | 		}
309 | 
310 | 		return out - b;
311 | 	}
312 | 
313 | 	void TrimLeft(const LT span) noexcept
314 | 	{
315 | 		offset+=span;
316 | 		len-=span;
317 | 	}
318 | 		
319 | 
320 | 	[[deprecated("use reset() please")]] void Unset()
321 | 	{
322 | 		offset = 0;
323 | 		len = 0;
324 | 	}
325 | 
326 | 	constexpr void reset() noexcept
327 | 	{
328 | 		offset = 0;
329 | 		len = 0;
330 | 	}
331 | };
332 | 
333 | 
334 | // e.g InBetweenRange(tm.tm_hour, 1, 5)
335 | template<typename VT>
336 | static constexpr bool IsBetweenRange(const VT v, const VT s, const VT e) noexcept
337 | {
338 | 	return sizeof(VT) == 8
339 | 		? v >= s && v < e
340 | 		: uint32_t(v - s) < (e - s); 	// o in [offset, offset+len)
341 | };
342 | 
343 | template<typename VT>
344 | static constexpr bool IsBetweenRangeInclusive(const VT v, const VT s, const VT e) noexcept
345 | {
346 | 	return sizeof(VT) == 8
347 | 		? v >= s && v <= e
348 | 		: uint32_t(v - s) <= (e - s); 	// o in [offset, offset+len]
349 | };
350 | 
351 | 
352 | using range8_t = range_base<uint8_t, uint8_t>;
353 | using range16_t = range_base<uint16_t, uint16_t>;
354 | using range32_t = range_base<uint32_t, uint32_t>;
355 | using range64_t = range_base<uint64_t, uint64_t>;
356 | using rangestr_t = range_base<const char *, uint32_t>; // Please ust strwlen instead
357 | 
358 | // great for iteration e.g
359 | // {
360 | // 	struct foo values[128];
361 | // 	uint8_t cnt=5;
362 | //
363 | // 	for (const auto v : Switch::make_range(values, cnt)) { .. }
364 | // }
365 | template<typename VT, typename LT>
366 | static constexpr auto MakeRange(const VT s, const LT l) noexcept -> range_base<VT, LT> 
367 | {
368 | 	return {s, l};
369 | }
370 | 
371 | namespace Switch
372 | {
373 | 	template<typename VT, typename LT> 
374 | 	static constexpr auto make_range(const VT s, const LT l) noexcept
375 | 	{
376 | 		return range_base<VT, LT>(s, l);
377 | 	}
378 | }
379 | 


--------------------------------------------------------------------------------
/Switch/switch_refcnt.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "switch_atomicops.h"
  3 | 
  4 | struct RCObject
  5 | {
  6 | 	virtual void Release() = 0;
  7 | 
  8 | 	virtual void Retain() = 0;
  9 | 
 10 | 	virtual int32_t RetainCount() const = 0;
 11 | 
 12 | 	virtual int32_t use_count() const
 13 | 	{
 14 | 		return RetainCount();
 15 | 	}
 16 | 
 17 | 	virtual ~RCObject()
 18 | 	{
 19 | 
 20 | 	}
 21 | };
 22 | 
 23 | 
 24 | template <class T>
 25 | struct RefCounted 
 26 | 	: public RCObject
 27 | {
 28 | 	private:
 29 | 		int32_t rc = 1;
 30 | 
 31 | 	public:
 32 | 		~RefCounted()
 33 | 		{
 34 | 			assert(__atomic_load_n(&rc, MemoryModel::RELAXED) == 0);
 35 | 		}
 36 | 
 37 | 		void SetOneRef()
 38 | 		{
 39 | 			__atomic_store_n(&rc, 1, MemoryModel::RELAXED);
 40 | 		}
 41 | 
 42 | 		void ResetRefs()
 43 | 		{
 44 | 			// Useful when you allocat on stack - otherwise destructor will abort
 45 | 			rc = 0;
 46 | 		}
 47 | 
 48 | 		[[gnu::always_inline]] inline int32_t RetainCount() const override
 49 | 		{
 50 | 			return rc;
 51 | 		}
 52 | 
 53 | 		[[gnu::always_inline]] inline void TryRelease()
 54 | 		{	
 55 | 			const auto p = (uintptr_t)this;
 56 | 
 57 | 			if (likely(p))
 58 | 				Release();
 59 | 		}
 60 | 
 61 | 		[[gnu::always_inline]] inline void Retain() override
 62 | 		{
 63 | 			const auto now = __atomic_add_fetch(&rc, 1, MemoryModel::RELAXED);
 64 | 
 65 | 			require(now > 1); // can't go from 0 to 1
 66 | 		}
 67 | 
 68 | 		[[gnu::always_inline]] inline bool ReleaseAndTestNoRefs()
 69 | 		{
 70 | 			return  __atomic_sub_fetch(&rc, 1, MemoryModel::RELEASE) == 0;
 71 | 		}
 72 | 
 73 | 
 74 | 		[[gnu::always_inline]] inline void Release() override
 75 | 		{
 76 | 			const auto res = __atomic_sub_fetch(&rc, 1, MemoryModel::RELEASE);
 77 | 
 78 | 			require(res >= 0);
 79 | 
 80 | 			if (!res)
 81 | 			{
 82 | 				std::atomic_thread_fence(std::memory_order_acquire);
 83 | 				delete static_cast<T *>(this);
 84 | 			}
 85 | 		}
 86 | 
 87 | 		[[gnu::always_inline]] inline void ReleaseNoDealloc()
 88 | 		{
 89 | 			if (!__atomic_sub_fetch(&rc, 1, MemoryModel::RELEASE))
 90 | 			{
 91 | 				std::atomic_thread_fence(std::memory_order_acquire);
 92 | 				static_cast<T *>(this)->~T();
 93 | 			}
 94 | 		}
 95 | };
 96 | 
 97 | 
 98 | namespace Switch
 99 | {
100 | 	template<class T>
101 | 	struct shared_refptr
102 | 	{
103 | 		T *v;
104 | 
105 | 		shared_refptr(T *p, const bool)
106 | 			: v{p}
107 | 		{
108 | 
109 | 		}
110 | 
111 | 		shared_refptr(T *p)
112 | 			: v{p}
113 | 		{
114 | 			if (v)
115 | 				v->Retain();
116 | 		}
117 | 
118 | 		~shared_refptr()
119 |                 {
120 |                         if (v)
121 |                                 v->Release();
122 |                 }
123 | 
124 | 		shared_refptr()
125 | 			: v{nullptr}
126 | 		{
127 | 
128 | 		}
129 | 
130 | 		shared_refptr(std::nullptr_t)
131 | 			: v{nullptr}
132 | 		{
133 | 
134 | 		}
135 | 
136 | 		shared_refptr(const shared_refptr &o)
137 | 			: v{o.v}
138 |                 {
139 |                         if (v)
140 |                                 v->Retain();
141 |                 }
142 | 
143 | 		shared_refptr(shared_refptr &&o)
144 | 			: v{o.v}
145 | 		{
146 | 			o.v = nullptr;
147 | 		}
148 | 
149 | 		auto &operator=(const shared_refptr &o)
150 | 		{
151 | 			if (o.v)
152 | 				o.v->Retain();
153 | 			if (v)
154 | 				v->Release();
155 | 			v = o.v;
156 | 			return *this;
157 | 		}
158 | 
159 |                 auto &operator=(shared_refptr &&o)
160 |                 {
161 |                         if (v != o.v)
162 |                         {
163 |                                 if (v)
164 |                                         v->Release();
165 |                                 v = o.v;
166 |                                 o.v = nullptr;
167 |                         }
168 |                         return *this;
169 |                 }
170 | 
171 | 		void reset(T *const o)
172 | 		{
173 | 			if (v != o)
174 | 			{
175 | 				if (v)
176 | 					v->Release();
177 | 
178 | 				v = o;
179 | 				if (v)
180 | 					v->Retain();
181 | 			}
182 | 		}
183 | 		T *release()
184 | 		{
185 | 			auto res = v;
186 | 
187 | 			v = nullptr;
188 | 			return res;
189 | 		}
190 | 
191 | 		[[gnu::always_inline]] inline T *get() const
192 | 		{
193 | 			return v;
194 | 		}
195 | 
196 | 		[[gnu::always_inline]] inline T &operator *() const
197 | 		{
198 | 			return *v;
199 | 		}
200 | 
201 | 		[[gnu::always_inline]] inline T *operator->() const
202 | 		{
203 | 			return v;
204 | 		}
205 | 
206 | 		uint32_t use_count() const
207 | 		{
208 | 			return v ? v->use_count() : 0;
209 | 		}
210 | 
211 | 		operator bool() const
212 | 		{
213 | 			return v != nullptr;
214 | 		}
215 | 
216 | 		auto unique() const
217 | 		{
218 | 			return use_count() == 1;
219 | 		}
220 | 
221 |                 // handy -- although not in std::unique_ptr<> API
222 |                 inline operator T *()
223 |                 {
224 |                         return v;
225 |                 }
226 |         };
227 | 
228 |         template <typename T, typename... Args>
229 |         static inline auto make_sharedref(Args &&... args)
230 |         {
231 |                 return shared_refptr<T>(new T(std::forward<Args>(args)...), true);
232 |         }
233 | 
234 |         template <typename T>
235 |         static inline auto make_sharedref_for(T *const ptr)
236 |         {
237 |                 return shared_refptr<T>(ptr);
238 |         }
239 | 
240 |         template <typename T>
241 |         static inline auto make_sharedref_with_ownership_transfer(T *const ptr)
242 |         {
243 |                 return shared_refptr<T>(ptr, true);
244 |         }
245 | };
246 | 
247 | template <typename T>
248 | static inline void tryRelease(T *const p)
249 | {
250 |         if (p)
251 |                 p->Release();
252 | }
253 | 


--------------------------------------------------------------------------------
/Switch/switch_security.cpp:
--------------------------------------------------------------------------------
  1 | #include "switch_security.h"
  2 | 
  3 | Buffer switch_security::ciphers::block_cipher::decrypt(const range_base<const uint8_t *, std::size_t> ciphertext)
  4 | {
  5 |         reset_ctx();
  6 | 
  7 |         if (1 != EVP_DecryptInit_ex(ctx, cipher, nullptr, reinterpret_cast<const uint8_t *>(key.offset), reinterpret_cast<const uint8_t *>(iv.offset)))
  8 |                 throw Switch::runtime_error("Failed to initialize EVP ctx");
  9 | 
 10 |         if (1 != EVP_CIPHER_CTX_set_padding(ctx, 1))
 11 |                 throw Switch::runtime_error("Failed to initialize EVP ctx");
 12 | 
 13 |         // shouldn't need to do this, i.e size should be set to ciphertext.size()
 14 |         // because it's already padded and aligned.
 15 |         const auto size = (ciphertext.size() + block_size_in_bytes) & ~(block_size_in_bytes - 1);
 16 |         Buffer res;
 17 | 
 18 |         res.reserve(size + 1);
 19 | 
 20 |         auto out = reinterpret_cast<uint8_t *>(res.data());
 21 |         int len;
 22 |         const auto *p = reinterpret_cast<const uint8_t *>(ciphertext.offset), *const e = p + ciphertext.size();
 23 | 
 24 |         if (1 != EVP_DecryptUpdate(ctx, out, &len, p, std::distance(p, e)))
 25 |         {
 26 |                 EVP_CIPHER_CTX_free(ctx);
 27 |                 ctx = nullptr;
 28 |                 throw Switch::data_error("Failed to decrypt");
 29 |         }
 30 |         out += len;
 31 | 
 32 |         if (1 != EVP_DecryptFinal_ex(ctx, out, &len))
 33 |         {
 34 |                 EVP_CIPHER_CTX_free(ctx);
 35 |                 ctx = nullptr;
 36 |                 throw Switch::data_error("Failed to decrypt");
 37 |         }
 38 |         out += len;
 39 | 
 40 |         const auto plaintext_len = std::distance(reinterpret_cast<uint8_t *>(res.data()), out);
 41 | 
 42 |         EVP_CIPHER_CTX_free(ctx);
 43 |         ctx = nullptr;
 44 | 
 45 |         EXPECT(plaintext_len <= size);
 46 |         res.resize(plaintext_len);
 47 |         return res;
 48 | }
 49 | 
 50 | Buffer switch_security::ciphers::block_cipher::encrypt(const str_view32 plaintext)
 51 | {
 52 |         reset_ctx();
 53 | 
 54 |         if (1 != EVP_EncryptInit_ex(ctx, cipher, nullptr, reinterpret_cast<const uint8_t *>(key.offset), reinterpret_cast<const uint8_t *>(iv.offset)))
 55 |                 throw Switch::runtime_error("Failed to initialize EVP ctx");
 56 | 
 57 |         if (1 != EVP_CIPHER_CTX_set_padding(ctx, 1))
 58 |         {
 59 |                 // Buy default, encryption operations are padded using standard block padding and the padding
 60 |                 // is checked and removed when decrypting.
 61 |                 //
 62 |                 // Disabing the padding here would mean that no padding is performed, by the size of plaintext would
 63 |                 // have to be a multiple of the block size, or encryption would fail
 64 |                 //
 65 |                 // We are going to enable it explicitly here anyway
 66 |                 throw Switch::runtime_error("Failed to initialize EVP ctx");
 67 |         }
 68 | 
 69 |         // e.g AES is a block cipher with 128-bit blocks(16 bytes)
 70 |         // Note that AES-256 also uses 128-bit blocks; the "256" is about the key length, not the block length.
 71 |         const auto size = (plaintext.size() + block_size_in_bytes) & ~(block_size_in_bytes - 1);
 72 |         Buffer res;
 73 | 
 74 |         res.reserve(size + 1);
 75 | 
 76 |         auto out = reinterpret_cast<uint8_t *>(res.data());
 77 |         int len;
 78 |         const auto *p = reinterpret_cast<const uint8_t *>(plaintext.data()), *const e = p + plaintext.size();
 79 | 
 80 |         if (1 != EVP_EncryptUpdate(ctx, out, &len, p, std::distance(p, e)))
 81 |         {
 82 |                 EVP_CIPHER_CTX_free(ctx);
 83 |                 ctx = nullptr;
 84 |                 throw Switch::data_error("Failed to encrypt");
 85 |         }
 86 |         out += len;
 87 | 
 88 |         if (1 != EVP_EncryptFinal_ex(ctx, out, &len))
 89 |         {
 90 |                 EVP_CIPHER_CTX_free(ctx);
 91 |                 ctx = nullptr;
 92 |                 throw Switch::data_error("Failed to encrypt");
 93 |         }
 94 |         out += len;
 95 | 
 96 |         const auto ciphertext_len = std::distance(reinterpret_cast<uint8_t *>(res.data()), out);
 97 | 
 98 |         EVP_CIPHER_CTX_free(ctx);
 99 |         ctx = nullptr;
100 | 
101 |         EXPECT(ciphertext_len <= size);
102 |         res.resize(ciphertext_len);
103 |         return res;
104 | }
105 | 
106 | switch_security::hmac::hmac(const EVP_MD *md, const void *key, const int key_len)
107 | {
108 |         ds = md->md_size;
109 |         HMAC_CTX_init(&ctx);
110 |         HMAC_Init_ex(&ctx, key, key_len, md, nullptr);
111 | }
112 | 
113 | void switch_security::hmac::finalize(uint8_t *digest_out)
114 | {
115 |         unsigned len = digest_size();
116 | 
117 |         HMAC_Final(&ctx, digest_out, &len);
118 | }
119 | 
120 | switch_security::hmac::~hmac()
121 | {
122 |         HMAC_CTX_cleanup(&ctx);
123 | }
124 | 
125 | void switch_security::hmac::PBKDF2(const str_view32 password, const void *salt, const std::size_t salt_len, const std::size_t iterations,
126 |                                    const EVP_MD *md,
127 |                                    const std::size_t key_out_capacity,
128 |                                    uint8_t *key_out)
129 | {
130 |         require(key_out_capacity >= md->md_size);
131 | 
132 |         if (1 != PKCS5_PBKDF2_HMAC(password.data(), password.size(),
133 |                                    reinterpret_cast<const uint8_t *>(salt), salt_len,
134 |                                    iterations,
135 |                                    md,
136 |                                    key_out_capacity, key_out))
137 |         {
138 |                 throw Switch::data_error("Failed to PKCS5_PBKDF2_HMAC()");
139 |         }
140 | }
141 | 
142 | 
143 | switch_security::rsa switch_security::rsa::generate(const uint32_t bits)
144 | {
145 | 	require(bits >= 1024 && bits <= 8192);
146 | 
147 | 	std::unique_ptr<RSA, decltype(&::RSA_free)> r{RSA_new(), ::RSA_free};
148 | 	std::unique_ptr<BIGNUM, decltype(&::BN_free)> bn(BN_new(), ::BN_free);
149 | 
150 | 	BN_set_word(bn.get(), RSA_F4);
151 | 	if (1 != RSA_generate_key_ex(r.get(), 2048, bn.get(), nullptr))
152 | 		throw Switch::data_error("Failed to generate RSA keys pair");
153 | 
154 | 	if (!RSA_check_key(r.get()))
155 | 		throw Switch::data_error("Failed to verify RSA keys pair");
156 | 
157 | 	return rsa(r.release());
158 | }
159 | 
160 | switch_security::rsa switch_security::rsa::make_from_pubkcy_pkcs(const uint8_t *content, const std::size_t len)
161 | {
162 |         std::unique_ptr<RSA, decltype(&::RSA_free)> r{RSA_new(), ::RSA_free};
163 |         auto local{r.get()};
164 |         auto res = d2i_RSAPublicKey(&local, &content, len);
165 | 
166 |         EXPECT(res == local);
167 |         return rsa(r.release());
168 | }
169 | 
170 | switch_security::rsa switch_security::rsa::make_from_privkey_pkcs(const uint8_t *content, const std::size_t len)
171 | {
172 |         std::unique_ptr<RSA, decltype(&::RSA_free)> r{RSA_new(), ::RSA_free};
173 |         auto local{r.get()};
174 |         auto res = d2i_RSAPrivateKey(&local, &content, len);
175 | 
176 |         EXPECT(res == local);
177 |         return rsa(r.release());
178 | }
179 | 
180 | int switch_security::rsa::priv_decrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding)
181 | {
182 |         // to must point to a memory section large enough to hold the plaintext data
183 |         // which is smaller than modulus_size()
184 |         // padding is the padding mode used to encrypt the data
185 | 
186 |         require(have_privkey());
187 |         return RSA_private_decrypt(content_len, content, to, r.get(), padding);
188 | }
189 | 
190 | void switch_security::rsa::pub_encrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding)
191 | {
192 |         require(have_pubkey());
193 | 
194 |         switch (padding)
195 |         {
196 |                 case RSA_PKCS1_PADDING:
197 |                         require(content_len < modulus_size() - 11);
198 |                         break;
199 | 
200 |                 case RSA_PKCS1_OAEP_PADDING:
201 |                         require(content_len < modulus_size() - 41);
202 |                         break;
203 | 
204 |                 case RSA_NO_PADDING:
205 |                         require(content_len == modulus_size());
206 |                         break;
207 |         }
208 | 
209 |         auto res = RSA_public_encrypt(content_len, content, to, r.get(), padding);
210 | 
211 |         require(res == modulus_size());
212 | }
213 | 
214 | void switch_security::rsa::priv_encrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding)
215 | {
216 |         require(have_privkey());
217 |         auto res = RSA_private_encrypt(content_len, content, to, r.get(), padding);
218 | 
219 |         require(res == modulus_size());
220 | }
221 | 
222 | int switch_security::rsa::pub_decrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding)
223 | {
224 |         require(have_pubkey());
225 |         return RSA_public_decrypt(content_len, content, to, r.get(), padding);
226 | }
227 | 
228 | bool switch_security::rsa::sign(const int type, const uint8_t *m, const std::size_t m_len, uint8_t *sigret)
229 | {
230 |         require(have_privkey());
231 |         unsigned siglen = modulus_size();
232 | 
233 |         return RSA_sign(type, m, m_len, sigret, &siglen, r.get()) == 1;
234 | }
235 | 
236 | int switch_security::rsa::verify(const int type, const uint8_t *m, const uint8_t m_len, uint8_t *sigbuf, const std::size_t siglen)
237 | {
238 |         require(have_pubkey());
239 |         return RSA_verify(type, m, m_len, sigbuf, siglen, r.get());
240 | }
241 | 


--------------------------------------------------------------------------------
/Switch/switch_security.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "switch.h"
  3 | #include "switch_ranges.h"
  4 | #include <openssl/conf.h>
  5 | #include <openssl/engine.h>
  6 | #include <openssl/err.h>
  7 | #include <openssl/evp.h>
  8 | #include <openssl/hmac.h>
  9 | #include <openssl/rand.h>
 10 | #include <openssl/rsa.h>
 11 | 
 12 | namespace switch_security
 13 | {
 14 | 
 15 | #pragma mark ciphers - symmetric key encryption
 16 |         struct ciphers
 17 |         {
 18 |                 struct block_cipher
 19 |                 {
 20 |                         static void verify_key_iv(const std::size_t key_size, const std::size_t iv_size, const std::size_t input_key_size, const std::size_t input_iv_size)
 21 |                         {
 22 |                                 EXPECT(input_key_size * (sizeof(uint8_t) << 3) == key_size);
 23 |                                 EXPECT(input_iv_size * (sizeof(uint8_t) << 3) == iv_size);
 24 |                         }
 25 | 
 26 |                         const std::size_t block_size_in_bytes;
 27 |                         const EVP_CIPHER *cipher;
 28 |                         const range_base<const uint8_t *, std::size_t> key, iv;
 29 |                         EVP_CIPHER_CTX *ctx{nullptr};
 30 | 
 31 |                         void reset_ctx()
 32 |                         {
 33 |                                 if (ctx)
 34 |                                         EVP_CIPHER_CTX_free(ctx);
 35 | 
 36 |                                 ctx = EVP_CIPHER_CTX_new();
 37 |                                 if (!ctx)
 38 |                                         throw Switch::data_error("Failed to initialize context");
 39 |                         }
 40 | 
 41 |                         block_cipher(const std::size_t block_size_, const EVP_CIPHER *cipher_,
 42 |                                      const range_base<const uint8_t *, std::size_t> key_, const range_base<const uint8_t *, std::size_t> iv_)
 43 |                             : block_size_in_bytes{block_size_ >> 3}, cipher{cipher_}, key{key_}, iv{iv_}
 44 |                         {
 45 |                         }
 46 | 
 47 |                         ~block_cipher()
 48 |                         {
 49 |                                 if (ctx)
 50 |                                         EVP_CIPHER_CTX_free(ctx);
 51 |                         }
 52 | 
 53 |                         Buffer decrypt(const range_base<const uint8_t *, std::size_t> ciphertext); // returns plaintext
 54 | 
 55 | 			Buffer decrypt(const str_view32 ciphertext)
 56 | 			{
 57 | 				return decrypt({reinterpret_cast<const uint8_t *>(ciphertext.data()), ciphertext.size()});
 58 | 			}
 59 | 
 60 |                         Buffer encrypt(const str_view32 plaintext); // returns ciphertext
 61 |                 };
 62 | 
 63 |                 struct aes256 final
 64 |                     : public block_cipher
 65 |                 {
 66 |                         aes256(const range_base<const uint8_t *, std::size_t> key, const range_base<const uint8_t *, std::size_t> iv)
 67 |                             : block_cipher(128, EVP_aes_256_cbc(), key, iv)
 68 |                         {
 69 |                                 verify_key_iv(256, 128, key.size(), iv.size());
 70 |                         }
 71 |                 };
 72 | 
 73 |                 struct aes128 final
 74 |                     : public block_cipher
 75 |                 {
 76 |                         aes128(const range_base<const uint8_t *, std::size_t> key, const range_base<const uint8_t *, std::size_t> iv)
 77 |                             : block_cipher(128, EVP_aes_128_cbc(), key, iv)
 78 |                         {
 79 |                                 verify_key_iv(128, 128, key.size(), iv.size());
 80 |                         }
 81 |                 };
 82 |         };
 83 | 
 84 | #pragma mark HMAC
 85 |         struct hmac final
 86 |         {
 87 |                 HMAC_CTX ctx;
 88 |                 uint8_t ds{0};
 89 | 
 90 |                 hmac(const EVP_MD *md, const void *key, const int key_len);
 91 | 
 92 |                 auto digest_size() noexcept
 93 |                 {
 94 |                         return ds;
 95 |                 }
 96 | 
 97 |                 // digest_out should be at least digest_size() in bytes
 98 |                 void finalize(uint8_t *digest_out);
 99 | 
100 |                 inline void update(const void *data, const std::size_t len)
101 |                 {
102 |                         HMAC_Update(&ctx, reinterpret_cast<const uint8_t *>(data), len);
103 |                 }
104 | 
105 |                 ~hmac();
106 | 
107 |                 static void PBKDF2(const str_view32 password, const void *salt, const std::size_t salt_len, const std::size_t iterations,
108 |                                    const EVP_MD *md,
109 |                                    const std::size_t key_out_capacity,
110 |                                    uint8_t *key_out);
111 |         };
112 | 
113 | #pragma mark RSA
114 |         struct rsa final
115 |         {
116 |                 std::unique_ptr<RSA, decltype(&::RSA_free)> r{nullptr, ::RSA_free};
117 | 
118 |               private:
119 |                 rsa(RSA *ptr)
120 |                     : r(ptr, ::RSA_free)
121 |                 {
122 |                 }
123 | 
124 |               public:
125 |                 rsa()
126 |                 {
127 |                 }
128 | 
129 |                 inline bool have_pubkey() const noexcept
130 |                 {
131 |                         return r && r->e && r->n;
132 |                 }
133 | 
134 |                 inline bool have_privkey() const noexcept
135 |                 {
136 |                         return r && r->d && r->p && r->q;
137 |                 }
138 | 
139 |                 static rsa generate(const uint32_t bits = 4096);
140 | 
141 |                 static rsa make_from_pubkcy_pkcs(const uint8_t *content, const std::size_t len);
142 | 
143 |                 static rsa make_from_privkey_pkcs(const uint8_t *content, const std::size_t len);
144 | 
145 |                 std::size_t pub_key_repr_pkcs(uint8_t *storage)
146 |                 {
147 |                         require(have_pubkey());
148 |                         return i2d_RSAPublicKey(r.get(), &storage);
149 |                 }
150 | 
151 |                 std::size_t priv_key_repr_pkcs(uint8_t *storage)
152 |                 {
153 |                         require(have_privkey());
154 |                         return i2d_RSAPrivateKey(r.get(), &storage);
155 |                 }
156 | 
157 |                 void print() const
158 |                 {
159 |                         RSA_print_fp(stdout, r.get(), 0);
160 |                 }
161 | 
162 |                 int modulus_size() const noexcept
163 |                 {
164 |                         return RSA_size(r.get());
165 |                 }
166 | 
167 |                 int priv_decrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding = RSA_PKCS1_PADDING);
168 | 
169 |                 void pub_encrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding = RSA_PKCS1_PADDING);
170 | 
171 |                 void priv_encrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding = RSA_PKCS1_PADDING);
172 | 
173 |                 int pub_decrypt(const uint8_t *content, const std::size_t content_len, uint8_t *to, int padding = RSA_PKCS1_PADDING);
174 | 
175 |                 bool sign(const int type, const uint8_t *m, const std::size_t m_len, uint8_t *sigret);
176 | 
177 |                 int verify(const int type, const uint8_t *m, const uint8_t m_len, uint8_t *sigbuf, const std::size_t siglen);
178 |         };
179 | 
180 |         static void gen_rnd(const std::size_t size, uint8_t *out)
181 |         {
182 |                 if (1 != RAND_bytes(out, size))
183 |                         throw Switch::data_error("Failed to generate random sequence");
184 |         }
185 | 
186 |         static void gen_pseudo_rnd(const std::size_t size, uint8_t *out)
187 |         {
188 |                 if (1 != RAND_pseudo_bytes(out, size))
189 |                         throw Switch::data_error("Failed to generate random sequence");
190 |         }
191 | 
192 | 	static inline auto gen_rnd(const std::size_t size)
193 |         {
194 |                 auto res = std::make_unique<uint8_t[]>(size);
195 | 
196 |                 gen_rnd(size, res.get());
197 |                 return res;
198 |         }
199 | 
200 | 	static inline auto gen_pseudo_rnd(const std::size_t size)
201 | 	{
202 |                 auto res = std::make_unique<uint8_t[]>(size);
203 | 
204 | 		gen_pseudo_rnd(size, res.get());
205 | 		return res;
206 | 	}
207 | }
208 | 


--------------------------------------------------------------------------------
/Switch/switch_vector.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | 
 4 | namespace Switch
 5 | {
 6 |         template <class T>
 7 |         class vector
 8 |             : public std::vector<T>
 9 |         {
10 |               public:
11 |                 void RemoveByValue(const T v)
12 |                 {
13 |                         const auto e = this->end();
14 |                         auto it = std::find(this->begin(), e, v);
15 | 
16 |                         if (it != e)
17 |                         {
18 |                                 const auto n = this->size();
19 | 
20 |                                 this->erase(it);
21 |                                 require(this->size() + 1 == n);
22 |                         }
23 |                 }
24 | 
25 |                 T Pop()
26 |                 {
27 |                         auto last{this->back()};
28 | 
29 |                         this->pop_back();
30 |                         return last;
31 |                 }
32 | 
33 |                 auto values() noexcept
34 |                 {
35 |                         return this->data();
36 |                 }
37 | 
38 |                 void pop_front()
39 |                 {
40 |                         auto it = this->begin();
41 | 
42 |                         this->erase(it);
43 |                 }
44 | 
45 |                 void Append(T *const list, const size_t n)
46 |                 {
47 |                         this->reserve(n);
48 |                         for (size_t i{0}; i != n; ++i)
49 |                                 this->push_back(list[i]);
50 |                 }
51 | 
52 |                 void PopByIndex(const size_t idx)
53 |                 {
54 |                         this->erase(this->begin() + idx);
55 |                 }
56 |         };
57 | }
58 | 


--------------------------------------------------------------------------------
/Switch/text.cpp:
--------------------------------------------------------------------------------
  1 | #include "text.h"
  2 | 
  3 | static constexpr inline uint8_t idx_of(const char c) noexcept
  4 | {
  5 |         switch (c)
  6 |         {
  7 |                 case '0' ... '9':
  8 |                         return c - '0';
  9 | 
 10 |                 case 'a' ... 'z':
 11 |                         return c - 'a' + 10;
 12 | 
 13 |                 case 'A' ... 'Z':
 14 |                         return c - 'A' + 10 + 26;
 15 | 
 16 |                 case '_':
 17 |                         return 26 + 10 + 26 + 0;
 18 | 
 19 |                 case '!':
 20 |                         return 26 + 10 + 26 + 1;
 21 | 
 22 |                 case '#':
 23 |                         return 26 + 10 + 26 + 2;
 24 | 
 25 |                 case '$':
 26 |                         return 26 + 10 + 26 + 3;
 27 | 
 28 |                 case '%':
 29 |                         return 26 + 10 + 26 + 4;
 30 | 
 31 |                 case '&':
 32 |                         return 26 + 10 + 26 + 5;
 33 | 
 34 |                 case '*':
 35 |                         return 26 + 10 + 26 + 6;
 36 | 
 37 |                 case '+':
 38 |                         return 26 + 10 + 26 + 7;
 39 | 
 40 |                 case '-':
 41 |                         return 26 + 10 + 26 + 8;
 42 | 
 43 |                 case '.':
 44 |                         return 26 + 10 + 26 + 9;
 45 | 
 46 |                 case '^':
 47 |                         return 26 + 10 + 26 + 10;
 48 | 
 49 |                 case '|':
 50 |                         return 26 + 10 + 26 + 11;
 51 | 
 52 |                 case '~':
 53 |                         return 26 + 10 + 26 + 12;
 54 | 
 55 |                 case '\'':
 56 |                         return 26 + 10 + 26 + 13;
 57 | 
 58 |                 case '`':
 59 |                         return 26 + 10 + 26 + 14;
 60 | 
 61 |                 default:
 62 |                         return 0xff;
 63 |         }
 64 | }
 65 | 
 66 | static const char numChars[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_!#$%&*+-.^|~'`";
 67 | 
 68 | uint8_t Text::ToBase(uint64_t input, const uint32_t toBase, char *out)
 69 | {
 70 |         char bufr[128], *c = out;
 71 |         uint32_t i{0};
 72 | 
 73 |         do
 74 |         {
 75 |                 bufr[i++] = numChars[input % toBase];
 76 |                 input /= toBase;
 77 |         } while (input);
 78 | 
 79 |         do
 80 |         {
 81 |                 *out++ = bufr[--i];
 82 |         } while (i);
 83 | 
 84 |         *out = '\0';
 85 |         return out - c;
 86 | }
 87 | 
 88 | uint64_t Text::FromBase(const char *const input, const uint32_t len, const uint8_t base)
 89 | {
 90 |         uint64_t res{0};
 91 | 
 92 |         for (uint32_t i{0}; i != len; ++i)
 93 |         {
 94 |                 if (const auto idx = idx_of(input[i]); idx == 0xff)
 95 |                         return 0;
 96 |                 else
 97 |                         res = res * base + idx;
 98 |         }
 99 |         return res;
100 | }
101 | 
102 | size_t as_escaped_repr_length(const char *p, const size_t len)
103 | {
104 |         size_t n{0};
105 | 
106 |         for (uint32_t i{0}; i != len; ++i)
107 |         {
108 |                 switch (p[i])
109 |                 {
110 |                         case '\"':
111 |                         case '\'':
112 |                         case '\\':
113 |                         case '\n':
114 |                         case '\r':
115 |                         case '\0':
116 |                         case '\032':
117 |                                 n += 2;
118 |                                 break;
119 | 
120 |                         default:
121 |                                 ++n;
122 |                                 break;
123 |                 }
124 |         }
125 | 
126 |         return n;
127 | }
128 | 
129 | uint32_t escape_impl(const char *const p, const uint32_t len, char *out, const uint32_t available)
130 | {
131 |         const char *const base = out, *const end = p + len;
132 |         const char *ckpt = p;
133 |         char chr;
134 | 
135 |         for (const char *it = p; it != end;)
136 |         {
137 |                 const char c = *it;
138 | 
139 |                 if (c == '\"' || c == '\\' || c == '\'')
140 |                         chr = c;
141 | #if 0 // Stupid mySQL won't handle \t and \v
142 | 		else if (c == '\t')
143 | 			chr = 't';
144 | 		else if (c == '\v')
145 | 			chr = 'v';
146 | #endif
147 |                 else if (c == '\n')
148 |                         chr = 'n';
149 |                 else if (c == '\r')
150 |                         chr = 'r';
151 |                 else if (c == '\0')
152 |                         chr = '0';
153 |                 else if (c == '\032') // Issues on Win32 (ref: mysql's escape_string_for_mysql() implementation)
154 |                         chr = 'Z';
155 |                 else
156 |                 {
157 |                         ++it;
158 |                         continue;
159 |                 }
160 | 
161 |                 const uint32_t _len = it - ckpt;
162 | 
163 |                 memcpy(out, ckpt, _len);
164 |                 out += _len;
165 |                 out[0] = '\\';
166 |                 out[1] = chr;
167 |                 out += 2;
168 |                 ckpt = ++it;
169 |         }
170 | 
171 |         const uint32_t _len = end - ckpt;
172 | 
173 |         memcpy(out, ckpt, _len);
174 |         out += _len;
175 | 
176 |         const auto actual = out - base;
177 | 
178 |         EXPECT(actual <= available);
179 |         return actual;
180 | }
181 | 


--------------------------------------------------------------------------------
/Switch/text.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "switch.h"
 3 | 
 4 | struct size_repr
 5 | {
 6 |         const uint64_t v;
 7 | 
 8 |         size_repr(const uint64_t value)
 9 |             : v{value}
10 |         {
11 |         }
12 | 
13 |         strwlen8_t Get(char *const p) const
14 |         {
15 |                 static const std::pair<const uint64_t, const char *> bases[] =
16 |                     {
17 |                         {1024ULL * 1024ULL * 1024ULL * 1024ULL, "tb"},
18 |                         {1024UL * 1024UL * 1024UL, "gb"},
19 |                         {1024 * 1024, "mb"},
20 |                         {1024, "kb"},
21 |                     };
22 | 
23 |                 for (const auto &it : bases)
24 |                 {
25 |                         const auto r = double(v) / it.first;
26 | 
27 |                         if (r >= 1)
28 |                         {
29 |                                 const auto repr = it.second;
30 |                                 uint8_t len = sprintf(p, "%.2lf", r);
31 | 
32 |                                 while (len && p[len - 1] == '0')
33 |                                         --len;
34 |                                 if (len && p[len - 1] == '.')
35 |                                         --len;
36 | 
37 |                                 p[len] = *repr;
38 |                                 p[len + 1] = repr[1];
39 |                                 return strwlen8_t(p, len + 2);
40 |                         }
41 |                 }
42 | 
43 |                 return strwlen8_t(p, sprintf(p, "%ub", uint32_t(v)));
44 |         }
45 | };
46 | 
47 | static inline void PrintImpl(Buffer &out, const size_repr &s)
48 | {
49 |         out.reserve(32);
50 | 
51 |         out.advance_size(s.Get(out.end()).len);
52 | }
53 | 
54 | struct dotnotation_repr
55 | {
56 |         const uint64_t value;
57 |         const char sep;
58 | 
59 |         dotnotation_repr(const uint64_t v, const char separator = ',')
60 |             : value{v}, sep{separator}
61 |         {
62 |         }
63 | 
64 |         strwlen8_t Get(char *const out) const
65 |         {
66 |                 uint16_t t[8];
67 |                 uint8_t n{0};
68 |                 auto r = value;
69 |                 char *o;
70 | 
71 |                 do
72 |                 {
73 | 
74 |                         t[n++] = r % 1000;
75 |                         r /= 1000;
76 |                 } while (r);
77 | 
78 |                 for (o = out + sprintf(out, "%u", t[--n]); n;)
79 |                 {
80 |                         *o++ = sep;
81 |                         o += sprintf(o, "%03u", t[--n]);
82 |                 }
83 | 
84 |                 return {out, uint8_t(o - out)};
85 |         }
86 | };
87 | 
88 | namespace Text
89 | {
90 |         uint8_t ToBase(uint64_t input, const uint32_t toBase, char *out);
91 |         uint64_t FromBase(const char *const input, const uint32_t len, const uint8_t base);
92 | }
93 | 
94 | static inline void PrintImpl(Buffer &out, const dotnotation_repr &r)
95 | {
96 |         out.reserve(32);
97 |         out.advance_size(r.Get(out.end()).len);
98 | }
99 | 


--------------------------------------------------------------------------------
/Switch/thread.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <mutex>
3 | 
4 | namespace Switch
5 | {
6 | 	using mutex = std::mutex;
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/Switch/timings.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <time.h>
  3 | #include <errno.h>
  4 | #include <sys/time.h>
  5 | #include <sys/types.h>
  6 | #include <sys/syscall.h>
  7 | #ifdef __MACH__
  8 | #include <mach/clock.h>
  9 | #include <mach/mach.h>
 10 | #endif
 11 | 
 12 | #ifdef __MACH__
 13 | static clock_serv_t cclock;
 14 | 
 15 | [[gnu::constructor]] static void __init()
 16 | {
 17 |         host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
 18 | }
 19 | 
 20 | [[gnu::destructor]] static void __dtor()
 21 | {
 22 |         mach_port_deallocate(mach_task_self(), cclock);
 23 | }
 24 | #endif
 25 | 
 26 | namespace Timings
 27 | {
 28 |         namespace SystemClock
 29 |         {
 30 |                 static uint64_t Tick()
 31 |                 {
 32 |                         struct timespec res;
 33 | 
 34 | #ifdef __MACH__
 35 |                         mach_timespec_t mts;
 36 | 
 37 |                         clock_get_time(cclock, &mts);
 38 |                         res.tv_sec = mts.tv_sec;
 39 |                         res.tv_nsec = mts.tv_nsec;
 40 | #else
 41 |                         clock_gettime(CLOCK_MONOTONIC, &res);
 42 | #endif
 43 | 
 44 |                         return (res.tv_sec * 1000000000ULL) + res.tv_nsec;
 45 |                 }
 46 | 
 47 |                 static inline uint64_t Time();
 48 | 
 49 |                 [[gnu::always_inline]] inline static uint64_t CurTimeInNS()
 50 |                 {
 51 |                         return Tick();
 52 |                 }
 53 | 
 54 |                 [[gnu::always_inline]] inline static uint64_t CurTimeInMillis()
 55 |                 {
 56 |                         return Time();
 57 |                 }
 58 |         }
 59 | 
 60 |         template <uint64_t asNanoseconds>
 61 |         struct Unit
 62 |         {
 63 |                 static inline void Set(const uint64_t n, timespec *const ts)
 64 |                 {
 65 |                         ts->tv_sec = ToSeconds(n);
 66 |                         ts->tv_nsec = (n * asNanoseconds) - (ts->tv_sec * 1000000000ULL);
 67 |                 }
 68 | 
 69 |                 static inline uint64_t Tick()
 70 |                 {
 71 |                         return SystemClock::Tick() / asNanoseconds;
 72 |                 }
 73 | 
 74 |                 static inline uint64_t Since(const uint64_t t)
 75 |                 {
 76 |                         return Tick() - t;
 77 |                 }
 78 | 
 79 |                 static inline uint64_t ToSeconds(const uint64_t n)
 80 |                 {
 81 |                         return n * asNanoseconds / 1000000000ULL;
 82 |                 }
 83 | 
 84 |                 static inline timespec ToTimespec(const uint64_t n)
 85 |                 {
 86 |                         timespec res;
 87 |                         const auto asN(n * asNanoseconds);
 88 | 
 89 |                         res.tv_sec = asN / 1000000000ULL;
 90 |                         res.tv_nsec = asN - res.tv_sec * 1000000000ULL;
 91 | 
 92 |                         return res;
 93 |                 }
 94 | 
 95 |                 static constexpr inline uint64_t ToMinutes(const uint64_t n)
 96 |                 {
 97 |                         return ToSeconds(n) / 60;
 98 |                 }
 99 | 
100 |                 static constexpr inline uint64_t ToHours(const uint64_t n)
101 |                 {
102 |                         return ToMinutes(n) / 60;
103 |                 }
104 | 
105 |                 static constexpr inline uint64_t ToDays(const uint64_t n)
106 |                 {
107 |                         return ToHours(n) / 24;
108 |                 }
109 | 
110 |                 static constexpr inline uint64_t ToMicros(const uint64_t n)
111 |                 {
112 |                         return n * asNanoseconds / 1000;
113 |                 }
114 | 
115 |                 static constexpr inline uint64_t ToMillis(const uint64_t n)
116 |                 {
117 |                         return n * asNanoseconds / 1000000;
118 |                 }
119 | 
120 |                 static constexpr inline uint64_t ToNanos(const uint64_t n)
121 |                 {
122 |                         return n * asNanoseconds;
123 |                 }
124 | 
125 |                 static void Sleep(const uint64_t n)
126 |                 {
127 |                         timespec req, rem;
128 | 
129 |                         req.tv_sec = ToSeconds(n);
130 |                         req.tv_nsec = (n * asNanoseconds) - (req.tv_sec * 1000000000);
131 | 
132 |                         while (unlikely(nanosleep(&req, &rem) == -1 && errno == EINTR))
133 |                                 req = rem;
134 |                 }
135 | 
136 |                 // Need all those signatures to avoid ambiguity
137 |                 static inline void Sleep(const uint32_t n)
138 |                 {
139 |                         Sleep((uint64_t)n);
140 |                 }
141 | 
142 |                 static inline void Sleep(const int32_t n)
143 |                 {
144 |                         Sleep((uint64_t)n);
145 |                 }
146 | 
147 |                 static inline void Sleep(const uint16_t n)
148 |                 {
149 |                         Sleep((uint64_t)n);
150 |                 }
151 | 
152 |                 static inline void Sleep(const uint8_t n)
153 |                 {
154 |                         Sleep((uint64_t)n);
155 |                 }
156 | 
157 |                 static void SleepInterruptible(const uint64_t n)
158 |                 {
159 |                         struct timespec req, rem;
160 | 
161 |                         req.tv_sec = ToSeconds(n);
162 |                         req.tv_nsec = (n * asNanoseconds) - (req.tv_sec * 1000000000);
163 | 
164 |                         nanosleep(&req, &rem);
165 |                 }
166 | 
167 |                 static uint64_t SysTime()
168 |                 {
169 |                         struct timeval tv;
170 | 
171 |                         if (unlikely(gettimeofday(&tv, nullptr) == -1))
172 |                         {
173 |                                 abort();
174 |                         }
175 |                         else if (unlikely(tv.tv_sec < 1451982426u))
176 |                         {
177 |                                 abort();
178 |                         }
179 |                         else
180 |                                 return ((tv.tv_sec * 1000000000ULL) + (tv.tv_usec * 1000ULL)) / asNanoseconds;
181 |                 }
182 |         };
183 | 
184 |         struct Seconds
185 |             : public Unit<1000000000ULL>
186 |         {
187 |         };
188 | 
189 |         struct Milliseconds
190 |             : public Unit<1000000UL>
191 |         {
192 |         };
193 | 
194 |         struct Microseconds
195 |             : public Unit<1000>
196 |         {
197 |         };
198 | 
199 |         struct Nanoseconds
200 |             : public Unit<1>
201 |         {
202 |         };
203 | 
204 |         struct Minutes
205 |             : public Unit<1000000000ULL * 60>
206 |         {
207 |         };
208 | 
209 |         struct Hours
210 |             : public Unit<1000000000ULL * 60 * 60>
211 |         {
212 |         };
213 | 
214 |         struct Days
215 |             : public Unit<1000000000ULL * 60 * 60 * 24>
216 |         {
217 |         };
218 | 
219 |         struct Weeks
220 |             : public Unit<1000000000ULL * 60 * 60 * 24 * 7>
221 |         {
222 |         };
223 | 
224 |         uint64_t SystemClock::Time()
225 |         {
226 |                 return Timings::Nanoseconds::ToMillis(Tick());
227 |         }
228 | }
229 | 


--------------------------------------------------------------------------------
/codecs.cpp:
--------------------------------------------------------------------------------
 1 | #include "codecs.h"
 2 | #include "terms.h"
 3 | #include "utils.h"
 4 | #include "queryexec_ctx.h"
 5 | 
 6 | void Trinity::Codecs::IndexSession::flush_index(int fd) {
 7 |         if (indexOut.size()) {
 8 |                 if (Utilities::to_file(indexOut.data(), indexOut.size(), fd) == -1)
 9 |                         throw Switch::data_error("Failed to flush index");
10 |                 else {
11 |                         indexOutFlushed += indexOut.size();
12 |                         indexOut.clear();
13 |                 }
14 |         }
15 | }
16 | 
17 | void Trinity::Codecs::IndexSession::persist_terms(std::vector<std::pair<str8_t, term_index_ctx>> &v) {
18 |         IOBuffer data, index;
19 | 
20 |         pack_terms(v, &data, &index);
21 | 
22 |         if (Utilities::to_file(data.data(), data.size(), Buffer{}.append(basePath, "/terms.data"_s32).c_str()) == -1)
23 |                 throw Switch::system_error("Failed to persist terms.data");
24 | 
25 |         if (Utilities::to_file(index.data(), index.size(), Buffer{}.append(basePath, "/terms.idx"_s32).c_str()) == -1)
26 |                 throw Switch::system_error("Failed to persist terms.idx");
27 | }
28 | 


--------------------------------------------------------------------------------
/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <switch.h>
 3 | #include <text.h>
 4 | //#define TRINITY_ENABLE_PREFETCH 1
 5 | #ifdef TRINITY_ENABLE_PREFETCH
 6 | #include <emmintrin.h> // for _mm_prefetch() intrinsic . We could have also used __builtin_prefetch()
 7 | #endif
 8 | #include <unordered_map>
 9 | 
10 | // For verifying integerity of meterialized hits
11 | // we no longer need to verify them, but it stays for future uses
12 | //#define TRINITY_VERIFY_HITS 1
13 | 
14 | #define TRINITY_VERSION (2 * 10 + 5)
15 | 
16 | // Define if you want to read in the contents of the index instead of memory mapping it to the process space
17 | // You probably don't want to do that though
18 | //#define TRINITY_MEMRESIDENT_INDEX 1
19 | 
20 | namespace Trinity {
21 |         // We will support unicode, so more appropriate string types will be better suited to the task.
22 |         // See: http://site.icu-project.org/
23 |         using str8_t             = strwlen8_t;
24 |         using str32_t            = strwlen32_t;
25 |         using char_t             = str8_t::value_type;
26 |         using query_term_flags_t = uint16_t;
27 | 
28 |         // Index Source Document ID
29 |         // It is specific to index sources and the execution engine (and by extension, to the various documents set iterators).
30 |         //
31 |         // Those can be translated to global docid_t via IndexSource::translate_docid() during query execution.
32 |         //
33 |         // When indexing, you are going to provide a meaningful isrc_docid. It can be the actual global ID of a document, or
34 |         // a translated - and you e.g store in a file at sizeof(docid_t) the actual value of the indexed isrc_docid and
35 |         // you consult it in translate_docid()
36 |         using isrc_docid_t = uint32_t;
37 | 
38 |         // The global document ID
39 |         using docid_t = uint32_t;
40 | 
41 |         // magic value; end of postinggs list or documents set
42 |         // This is specific to index source document IDs and DocsSets iterators -- not related to global document IDs.
43 |         static constexpr isrc_docid_t DocIDsEND{std::numeric_limits<isrc_docid_t>::max()};
44 | 
45 |         // Represents the position of a token(i.e word) in a document
46 |         using tokenpos_t = uint16_t;
47 | 
48 |         static inline int32_t terms_cmp(const char_t *a, const uint8_t aLen, const char_t *b, const uint8_t bLen) {
49 |                 // Your impl. may ignore case completely so that you can
50 |                 // index and query without having to care for distinctions between lower and upper case (e.g use Text::StrnncasecmpISO88597() )
51 |                 // However, if you were to do that, you 'd need to account for that wherever in the codebase
52 |                 // you either track strings(tokens) or check for equality, e.g
53 |                 // - Trinity::IndexSource::resolve_term_ctx()
54 |                 // - query and parser_ctx
55 |                 // - exec.cpp caches etc
56 |                 return Trinity::str32_t(a, aLen).Cmp(b, bLen);
57 |         }
58 | 
59 |         // Returns how many characters(char_t) were parsed from `content`, and how many were stored into `out`
60 |         //
61 |         // You may want to translate e.g "spider-man" to "spiderman", which is why this is not simply expected to
62 |         // return the number of characters consumed. Or you may want to translate from whatever case to lower-case.
63 |         // Or, for example, you may want to consume 'I.B.M' as 'IBM', etc.
64 |         //
65 |         // It is possible to consume characters, but not actually store any in out. (i.e result.first to be != 0 and result.second to be == 0)
66 |         //
67 |         // This default implementation simply consumes a token based in very simple heuristics and return it as-is, with no translation.
68 |         //
69 |         // XXX: out must be at least (Limits::MaxTermLength + 1) in size, you can then check if
70 |         // return value.second > Limits::MaxTermLength, like parse_term() does.
71 |         // Your alternative implementations must comply with this rule.
72 |         std::pair<uint32_t, uint8_t> default_token_parser_impl(const str32_t content, char_t *out, const bool in_phrase);
73 | } // namespace Trinity
74 | 
75 | #include "trinity_limits.h"
76 | #ifdef LEAN_SWITCH
77 | #include <compress.h>
78 | #endif
79 | 


--------------------------------------------------------------------------------
/compilation_ctx.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common.h"
  3 | #include "queries.h"
  4 | #include "runtime.h"
  5 | #include <switch_mallocators.h>
  6 | 
  7 | namespace Trinity {
  8 |         enum class ENT : uint8_t {
  9 |                 matchterm = 0,
 10 |                 constfalse,
 11 |                 consttrue,
 12 |                 dummyop,
 13 |                 matchallterms,
 14 |                 matchanyterms,
 15 |                 unaryand,
 16 |                 unarynot,
 17 |                 matchanyphrases,
 18 |                 matchallphrases,
 19 |                 matchphrase,
 20 |                 consttrueexpr,
 21 |                 logicaland,
 22 |                 logicalnot,
 23 |                 logicalor,
 24 |                 matchsome,
 25 |                 // matchallnodes and matchanynodes are handled by the compiler/optimizer, though
 26 |                 // no exec. nodes of that type are generated during compilation.
 27 |                 matchallnodes,
 28 |                 matchanynodes,
 29 |                 SPECIALIMPL_COLLECTION_LOGICALOR,
 30 |                 SPECIALIMPL_COLLECTION_LOGICALAND,
 31 |         };
 32 | 
 33 |         struct exec_node final {
 34 |                 ENT fp;
 35 | 
 36 |                 union {
 37 |                         void *   ptr;
 38 |                         uint32_t u32;
 39 |                         uint16_t u16;
 40 |                 };
 41 | 
 42 |                 // cost is computed by reorder_execnode()
 43 |                 // and we keep track of this here so that we can make some higher level optimizations later
 44 |                 uint64_t cost;
 45 |         };
 46 | 
 47 |         struct compilation_ctx {
 48 |                 simple_allocator    allocator{4096 * 6};
 49 |                 simple_allocator    runsAllocator{4096}, ctxAllocator{4096};
 50 |                 std::vector<void *> large_allocs;
 51 | 
 52 |                 ~compilation_ctx() {
 53 |                         for (auto p : large_allocs)
 54 |                                 std::free(p);
 55 |                 }
 56 | 
 57 |                 void *allocate(const std::size_t size) {
 58 |                         if (ctxAllocator.can_allocate(size))
 59 |                                 return ctxAllocator.Alloc(size);
 60 |                         else {
 61 |                                 auto ptr = malloc(size);
 62 | 
 63 |                                 large_allocs.emplace_back(ptr);
 64 |                                 return ptr;
 65 |                         }
 66 |                 }
 67 | 
 68 |                 struct partial_match_ctx final {
 69 |                         uint16_t  size;
 70 |                         uint16_t  min;
 71 |                         exec_node nodes[0];
 72 |                 };
 73 | 
 74 |                 struct nodes_group final {
 75 |                         uint16_t  size;
 76 |                         exec_node nodes[0];
 77 |                 };
 78 | 
 79 |                 struct termsrun final {
 80 |                         uint16_t       size;
 81 |                         exec_term_id_t terms[0];
 82 | 
 83 |                         static_assert(std::numeric_limits<decltype(size)>::max() >= Limits::MaxTermLength);
 84 | 
 85 |                         bool operator==(const termsrun &o) const noexcept;
 86 | 
 87 |                         bool is_set(const exec_term_id_t id) const noexcept;
 88 | 
 89 |                         bool erase(const exec_term_id_t id) noexcept;
 90 | 
 91 |                         bool erase(const termsrun &o);
 92 | 
 93 |                         auto empty() const noexcept {
 94 |                                 return !size;
 95 |                         }
 96 |                 };
 97 | 
 98 |                 struct cacheable_termsrun final {
 99 |                         isrc_docid_t    lastConsideredDID;
100 |                         const termsrun *run;
101 |                         bool            res;
102 |                 };
103 | 
104 |                 struct phrase final {
105 |                         uint8_t        size;
106 |                         exec_term_id_t termIDs[0];
107 | 
108 |                         static_assert(std::numeric_limits<decltype(size)>::max() >= Trinity::Limits::MaxPhraseSize);
109 | 
110 |                         uint8_t intersection(const termsrun *const tr, exec_term_id_t *const out) const noexcept;
111 | 
112 |                         // returns terms found in run, but missing from this phrase
113 |                         uint8_t disjoint_union(const termsrun *const tr, exec_term_id_t *const out) const noexcept;
114 | 
115 |                         bool intersected_by(const termsrun *const tr) const noexcept;
116 | 
117 |                         bool operator==(const phrase &o) const noexcept;
118 | 
119 |                         bool is_set(const exec_term_id_t id) const noexcept;
120 | 
121 |                         bool is_set(const exec_term_id_t *const l, const uint8_t n) const noexcept;
122 |                 };
123 | 
124 |                 struct phrasesrun final {
125 |                         uint16_t size;
126 |                         phrase * phrases[0];
127 |                 };
128 | 
129 |                 struct binop_ctx final {
130 |                         exec_node lhs;
131 |                         exec_node rhs;
132 |                 };
133 | 
134 |                 struct unaryop_ctx final {
135 |                         exec_node expr;
136 |                 };
137 | 
138 |                 inline uint16_t register_token(const Trinity::phrase *p) {
139 |                         return resolve_query_term(p->terms[0].token);
140 |                 }
141 | 
142 |                 phrase *register_phrase(const Trinity::phrase *p);
143 | 
144 |                 binop_ctx *register_binop(const exec_node lhs, const exec_node rhs) {
145 |                         auto ptr = ctxAllocator.New<binop_ctx>();
146 | 
147 |                         ptr->lhs = lhs;
148 |                         ptr->rhs = rhs;
149 |                         return ptr;
150 |                 }
151 | 
152 |                 unaryop_ctx *register_unaryop(const exec_node expr) {
153 |                         auto ptr = ctxAllocator.New<unaryop_ctx>();
154 | 
155 |                         ptr->expr = expr;
156 |                         return ptr;
157 |                 }
158 | 
159 |                 virtual uint16_t resolve_query_term(const str8_t term) = 0;
160 |         };
161 | 
162 |         exec_node compile_query(ast_node *root, compilation_ctx &cctx);
163 | 
164 |         void group_execnodes(exec_node &, simple_allocator &);
165 | } // namespace Trinity
166 | 


--------------------------------------------------------------------------------
/docidupdates.cpp:
--------------------------------------------------------------------------------
  1 | #include "docidupdates.h"
  2 | #include <ansifmt.h>
  3 | #include <switch_bitops.h>
  4 | #include <boost/sort/spreadsort/spreadsort.hpp>
  5 | 
  6 | // packs a list of updated/delete documents into a buffer that also contains
  7 | // a skiplist for random access to the bitmaps
  8 | void Trinity::pack_updates(std::vector<docid_t> &updatedDocumentIDs, IOBuffer *const buf) {
  9 |         if (updatedDocumentIDs.size()) {
 10 |                 static constexpr size_t BANK_SIZE{32 * 1024};
 11 |                 IOBuffer                skiplist;
 12 |                 static_assert((BANK_SIZE & 63) == 0, "Not divisable by 64");
 13 |                 // we are now creating the BF only if it REALLY makes sense, because for a 256k bits long BF
 14 |                 // we need 32K, which is likely important only if we got lots of documents
 15 |                 auto bf = (updatedDocumentIDs.size() > (BANK_SIZE * 8))
 16 |                               ? reinterpret_cast<uint64_t *>(calloc(updated_documents::K_bloom_filter_size / 64 + 1, sizeof(uint64_t)))
 17 |                               : nullptr;
 18 | 
 19 |                 boost::sort::spreadsort::spreadsort(updatedDocumentIDs.begin(), updatedDocumentIDs.end());
 20 | 
 21 | #if 0 // We are now throwing an exception if we attempt to erase or update the same document more than once in Trinity::SegmentIndexSession \
 22 | 		// We could have filtered dupes int he main loop, but keep it simple                                                                  \
 23 | 		// We will throw an exception if we attempt to update a document twice, but we can safely erase a document however many times
 24 | 		updatedDocumentIDs.resize(std::unique(updatedDocumentIDs.begin(), updatedDocumentIDs.end()) - updatedDocumentIDs.begin());
 25 | #endif
 26 | 
 27 |                 for (const auto *p = updatedDocumentIDs.data(), *const e = p + updatedDocumentIDs.size(); p != e;) {
 28 |                         // We can create a bitmap
 29 |                         // which will come down to about 4k that can hold 32k documents
 30 |                         // which allows for O(1) access
 31 |                         // For 10million IDs, this comes down to about 2MBs which is not much, considering
 32 |                         // how we are going to be advancing one range/time
 33 |                         const auto id = *p, base = id;
 34 |                         const auto upto = id + BANK_SIZE;
 35 | 
 36 |                         buf->reserve(BANK_SIZE / 8);
 37 | 
 38 |                         auto *const bm = reinterpret_cast<uint64_t *>(buf->end());
 39 | 
 40 |                         memset(bm, 0, BANK_SIZE / 8);
 41 | 
 42 |                         skiplist.pack(id);
 43 |                         do {
 44 |                                 const auto id  = *p;
 45 |                                 const auto rel = id - base;
 46 |                                 const auto h   = id & (updated_documents::K_bloom_filter_size - 1);
 47 | 
 48 |                                 if (bf) {
 49 |                                         bf[h / 64] |= static_cast<uint64_t>(1) << (h & 63);
 50 |                                 }
 51 | 
 52 |                                 SwitchBitOps::Bitmap<uint64_t>::Set(bm, rel);
 53 |                         } while (++p != e && *p < upto);
 54 | 
 55 |                         buf->advance_size(BANK_SIZE / 8);
 56 |                 }
 57 | 
 58 |                 if (bf) {
 59 |                         buf->serialize(bf, updated_documents::K_bloom_filter_size / 8);
 60 |                         std::free(bf);
 61 |                 }
 62 | 
 63 |                 buf->pack(uint8_t(log2(BANK_SIZE))); // 1 byte will suffice
 64 |                 if (bf) {
 65 |                         buf->pack(static_cast<uint8_t>(0)); // 0 if bloom filter is included
 66 |                 } else {
 67 |                         buf->pack(static_cast<uint8_t>(1));
 68 |                 }
 69 |                 buf->serialize(skiplist.data(), skiplist.size());                 // skiplist
 70 |                 buf->pack(uint32_t(skiplist.size() / sizeof(docid_t)));           // TODO: use varint encoding here
 71 |                 buf->pack(updatedDocumentIDs.front(), updatedDocumentIDs.back()); //lowest, highest
 72 |         }
 73 | }
 74 | 
 75 | // see pack_updates()
 76 | // use this function to unpack the represetnation we need to access the packed (into bitmaps)
 77 | // updated documents
 78 | Trinity::updated_documents Trinity::unpack_updates(const range_base<const uint8_t *, uint32_t> content) {
 79 |         if (content.size() <= sizeof(uint32_t) + sizeof(uint8_t)) {
 80 |                 return {};
 81 |         }
 82 | 
 83 |         const auto *const b = content.start();
 84 |         const auto *      p = b + content.size();
 85 | 
 86 |         p -= sizeof(docid_t);
 87 |         const auto highest = *reinterpret_cast<const docid_t *>(p);
 88 |         p -= sizeof(docid_t);
 89 |         const auto lowest = *reinterpret_cast<const docid_t *>(p);
 90 | 
 91 |         p -= sizeof(uint32_t);
 92 |         const auto skiplistSize = *reinterpret_cast<const uint32_t *>(p);
 93 |         p -= skiplistSize * sizeof(uint32_t);
 94 | 
 95 |         const auto      skiplist = reinterpret_cast<const docid_t *>(p);
 96 |         const uint64_t *bloom_filter;
 97 |         uint32_t        bank_size;
 98 | 
 99 |         if (*(--p) == 0) {
100 |                 // we have a bloom filter
101 |                 bank_size = 1u << *(--p);
102 | 
103 |                 p -= updated_documents::K_bloom_filter_size / 8;
104 |                 bloom_filter = reinterpret_cast<const uint64_t *>(p);
105 | 
106 | 		if (p - content.start() != bank_size / 8 * skiplistSize) {
107 | 			// Likely changed K_bloom_filter_size
108 | 			// play it safe
109 |         		return {skiplist, skiplistSize, bank_size, b, lowest, highest, nullptr};
110 | 		}
111 | 
112 |         } else {
113 |                 bank_size    = 1u << *(--p);
114 |                 bloom_filter = nullptr;
115 |         }
116 | 
117 |         EXPECT(p - content.start() == bank_size / 8 * skiplistSize);
118 |         return {skiplist, skiplistSize, bank_size, b, lowest, highest, bloom_filter};
119 | }
120 | 
121 | bool Trinity::updated_documents_scanner::test(const docid_t id) noexcept {
122 |         static constexpr bool trace{false}, traceAdvances{false};
123 | 
124 |         if (unlikely(id > maxDocID)) {
125 |                 // fast-path; flag it as drained
126 |                 curBankRange.offset = UINT32_MAX;
127 |                 return false;
128 |         } else if (id < low_doc_id) {
129 |                 // fast-path: definitely not here
130 |                 return false;
131 |         } else if (const auto m = bf) {
132 |                 const uint64_t h = id & (updated_documents::K_bloom_filter_size - 1);
133 | 
134 |                 if (0 == (m[h / 64] & (static_cast<uint64_t>(1) << (h & 63)))) {
135 |                         // fast-path: definitely not here
136 |                         return false;
137 |                 }
138 |         }
139 | 
140 |         if (id >= curBankRange.start()) {
141 |                 if (id < curBankRange.stop()) {
142 |                         if constexpr (trace) {
143 |                                 SLog("In bank range ", id - curBankRange.offset, "\n");
144 |                         }
145 | 
146 |                         return SwitchBitOps::Bitmap<uint64_t>::IsSet((uint64_t *)curBank, id - curBankRange.offset);
147 |                 } else if (id > maxDocID) {
148 |                         reset();
149 |                         return false;
150 |                 } else {
151 |                         int32_t btm{0};
152 | 
153 |                         if constexpr (traceAdvances) {
154 |                                 SLog("Binary search FOR ", id, " ", curBankRange, " ", curBankRange.size(), ", maxDocID = ", maxDocID, "\n");
155 |                         }
156 | 
157 |                         // binary search highest bank, where id < bank.end
158 |                         // There's no need to check for success, we already checked for (id > maxDocID)
159 |                         for (int32_t top{static_cast<int32_t>(end - skiplistBase) - 1}; btm <= top;) {
160 |                                 const auto mid = (btm + top) / 2;
161 |                                 const auto end = skiplistBase[mid] + bankSize;
162 | 
163 |                                 if (id < end) {
164 |                                         top = mid - 1;
165 |                                 } else {
166 |                                         btm = mid + 1;
167 |                                 }
168 |                         }
169 | 
170 |                         skiplistBase += btm;
171 |                         curBankRange.Set(*skiplistBase, bankSize);
172 |                         curBank = udBanks + ((skiplistBase - udSkipList) * (bankSize / 8));
173 | 
174 |                         if constexpr (trace || traceAdvances) {
175 |                                 SLog("Now at ", skiplistBase - udSkipList, " => ", curBankRange, " ", curBankRange.Contains(id), "\n");
176 |                         }
177 | 
178 |                         if (curBankRange.Contains(id)) {
179 |                                 if constexpr (trace) {
180 |                                         SLog("REL = ", id - curBankRange.offset, "\n");
181 |                                 }
182 | 
183 |                                 return SwitchBitOps::Bitmap<uint64_t>::IsSet((uint64_t *)curBank, id - curBankRange.offset);
184 |                         } else {
185 |                                 if constexpr (trace) {
186 |                                         SLog("id ", id, " out of range of ", curBankRange, "\n");
187 |                                 }
188 | 
189 |                                 return false;
190 |                         }
191 |                 }
192 |         } else {
193 |                 if constexpr (trace) {
194 |                         SLog("Not Even\n");
195 |                 }
196 | 
197 |                 return false;
198 |         }
199 | }
200 | 


--------------------------------------------------------------------------------
/docidupdates.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common.h"
  3 | #include <memory>
  4 | #include <switch.h>
  5 | 
  6 | // Efficient, lean, fixed-size bitmaps based document IDs tracking
  7 | // You are expected to test for document IDs in ascending order, but if you need a different behavior, it should be easy to modify
  8 | // the implementation to accomplish it.
  9 | //
 10 | // Note that it operates on docit_t, not on isrc_docid_t. Because we store isrc_docid in ascending order in
 11 | // postings lists, but that doesn't guarantee that the translated global document IDs will also be
 12 | // in ascending order, we should either account for that, or use e.g a SparseFixedBitSet which is
 13 | // almost as fast, takes up less memory and is great for random access
 14 | namespace Trinity {
 15 |         struct updated_documents final {
 16 |                 static constexpr size_t K_bloom_filter_size{256 * 1024};
 17 |                 static_assert(0 == (K_bloom_filter_size & 1));
 18 |                 // Each bitmaps bank can be accessed by a skiplist via binary search
 19 |                 const docid_t *skiplist;
 20 |                 const uint32_t skiplistSize;
 21 | 
 22 |                 // Fixed size bitmap banks
 23 |                 const uint32_t bankSize;
 24 |                 const uint8_t *banks;
 25 | 
 26 |                 docid_t lowestID;
 27 |                 docid_t highestID;
 28 | 
 29 |                 const uint64_t *bf;
 30 | 
 31 |                 inline operator bool() const {
 32 |                         return banks;
 33 |                 }
 34 |         };
 35 | 
 36 |         // Facilitates fast set test operations for updated/deleted documents packed
 37 |         // as bitmaps using pack_updates()
 38 |         struct updated_documents_scanner final {
 39 |                 const docid_t *const end;
 40 |                 const uint32_t       bankSize;
 41 | 
 42 |                 range_base<docid_t, docid_t> curBankRange;
 43 |                 const docid_t *              skiplistBase;
 44 |                 const uint8_t *              curBank;
 45 |                 docid_t                      low_doc_id;
 46 |                 docid_t                      maxDocID;
 47 |                 const docid_t *const         udSkipList;
 48 |                 const uint8_t *const         udBanks;
 49 |                 const uint64_t *const        bf;
 50 | 
 51 |                 void reset() {
 52 |                         maxDocID   = std::numeric_limits<docid_t>::max();
 53 |                         low_doc_id = 0;
 54 |                         curBankRange.Set(maxDocID, 0);
 55 |                 }
 56 | 
 57 |                 updated_documents_scanner(const updated_documents &ud)
 58 |                     : end{ud.skiplist + ud.skiplistSize}, bankSize{ud.bankSize}, skiplistBase{ud.skiplist}, udSkipList{ud.skiplist}, udBanks{ud.banks}, low_doc_id{ud.lowestID}, maxDocID{ud.highestID}, bf{ud.bf} {
 59 |                         if (skiplistBase != end) {
 60 |                                 curBankRange.Set(*skiplistBase, ud.bankSize);
 61 |                                 curBank = udBanks;
 62 |                         }
 63 |                 }
 64 | 
 65 |                 updated_documents_scanner(const updated_documents_scanner &o)
 66 |                     : end{o.end}, bankSize{o.bankSize}, skiplistBase{o.skiplistBase}, udSkipList{o.udSkipList}, udBanks{o.udBanks}, bf{o.bf} {
 67 |                         low_doc_id   = o.low_doc_id;
 68 |                         maxDocID     = o.maxDocID;
 69 |                         curBankRange = o.curBankRange;
 70 |                         curBank      = o.curBank;
 71 |                 }
 72 | 
 73 |                 constexpr bool drained() const noexcept {
 74 |                         return curBankRange.offset == UINT32_MAX;
 75 |                 }
 76 | 
 77 |                 // You are expected to test monotonically increasing document IDs
 78 |                 bool test(const docid_t id) noexcept;
 79 | 
 80 |                 inline bool operator==(const updated_documents_scanner &o) const noexcept {
 81 |                         return end == o.end && bankSize == o.bankSize && curBankRange == o.curBankRange && skiplistBase == o.skiplistBase && curBank == o.curBank && udSkipList == o.udSkipList && udBanks == o.udBanks;
 82 |                 }
 83 |         };
 84 | 
 85 |         void pack_updates(std::vector<docid_t> &updatedDocumentIDs, IOBuffer *const buf);
 86 | 
 87 |         updated_documents unpack_updates(const range_base<const uint8_t *, uint32_t> content);
 88 | 
 89 |         // manages multiple scanners and tests among all of them, and if any of them is exchausted, it is removed from the collection
 90 |         struct masked_documents_registry final {
 91 |                 bool test(const docid_t id) {
 92 |                         // O(1) checks first
 93 |                         // if we have 10s or 100s of scanner to iterate, we really
 94 |                         // want to be able to check here before we get to consider them all
 95 |                         if (id < min_doc_id || id > max_doc_id) {
 96 |                                 return false;
 97 |                         } else if (const auto m = bf) {
 98 |                                 const uint64_t h = id & (updated_documents::K_bloom_filter_size - 1);
 99 | 
100 |                                 if (0 == (m[h / 64] & (static_cast<uint64_t>(1) << (h & 63)))) {
101 |                                         // fast-path: definitely not here
102 |                                         return false;
103 |                                 }
104 |                         }
105 | 
106 |                         for (uint8_t i{0}; i < rem;) {
107 |                                 auto it = scanners + i;
108 | 
109 |                                 if (it->test(id)) {
110 |                                         return true;
111 |                                 } else if (it->drained()) {
112 |                                         new (it) updated_documents_scanner(scanners[--rem]);
113 |                                 } else {
114 |                                         ++i;
115 |                                 }
116 |                         }
117 | 
118 |                         return false;
119 |                 }
120 | 
121 |                 uint8_t                   rem;
122 |                 docid_t                   min_doc_id, max_doc_id;
123 |                 uint64_t *                bf{nullptr};
124 |                 updated_documents_scanner scanners[0];
125 | 
126 |                 masked_documents_registry()
127 |                     : rem{0} {
128 |                 }
129 | 
130 |                 ~masked_documents_registry() noexcept {
131 |                         if (bf) {
132 |                                 free(bf);
133 |                         }
134 |                 }
135 | 
136 |                 inline auto size() const noexcept {
137 |                         return rem;
138 |                 }
139 | 
140 |                 inline auto empty() const noexcept {
141 |                         return 0 == rem;
142 |                 }
143 | 
144 |                 // we no longer build a BF here
145 |                 // because this registry is materialized in every query and it's expensive-ish
146 |                 // we will instead rely on the per scanner bf
147 |                 static std::unique_ptr<Trinity::masked_documents_registry> make(const updated_documents *ud_list, const std::size_t n, const bool use_bf = false) {
148 |                         // ASAN will complain that about alloc-dealloc-mismatch
149 |                         // because we are using placement new operator and apparently there is no way to tell ASAN that this is fine
150 |                         // I need to figure this out
151 |                         // TODO: do whatever makes sense here later
152 |                         EXPECT(n <= std::numeric_limits<uint8_t>::max());
153 | 
154 |                         auto      ptr = new (malloc(sizeof(masked_documents_registry) + sizeof(updated_documents_scanner) * n)) masked_documents_registry();
155 |                         docid_t   min_doc_id{std::numeric_limits<docid_t>::max()}, max_doc_id{std::numeric_limits<docid_t>::min()};
156 |                         uint64_t *bf;
157 | 
158 |                         if (use_bf) {
159 |                                 bf = reinterpret_cast<uint64_t *>(calloc(sizeof(uint64_t), updated_documents::K_bloom_filter_size / 64));
160 |                         } else {
161 |                                 bf = nullptr;
162 |                         }
163 | 
164 |                         ptr->rem = n;
165 | 
166 |                         for (uint32_t i{0}; i != n; ++i) {
167 |                                 const auto ud    = ud_list + i;
168 |                                 const auto ud_bf = ud->bf;
169 | 
170 |                                 new (&ptr->scanners[i]) updated_documents_scanner(*ud);
171 | 
172 |                                 max_doc_id = std::max(max_doc_id, ud->highestID);
173 |                                 min_doc_id = std::min(min_doc_id, ud->lowestID);
174 |                                 if (bf) {
175 |                                         if (!ud_bf) {
176 |                                                 free(bf);
177 |                                                 bf = nullptr;
178 |                                         } else {
179 |                                                 for (size_t i{0}; i < updated_documents::K_bloom_filter_size / 64; ++i) {
180 |                                                         bf[i] |= ud_bf[i];
181 |                                                 }
182 |                                         }
183 |                                 }
184 |                         }
185 | 
186 |                         ptr->min_doc_id = min_doc_id;
187 |                         ptr->max_doc_id = max_doc_id;
188 |                         ptr->bf         = bf;
189 |                         return std::unique_ptr<Trinity::masked_documents_registry>(ptr);
190 |                 }
191 |         };
192 | } // namespace Trinity
193 | 


--------------------------------------------------------------------------------
/docset_iterators_base.h:
--------------------------------------------------------------------------------
  1 | // Keep Trinity::DocsSetIterators::Iterator separate, so that we can
  2 | // include just this file, and not docset_iterators.h, which "pollutes" Trinity namespace with a forward decl of queryexec_ctx
  3 | // in case some application needs it and would result in amiguous reference errors
  4 | #pragma once
  5 | #include "relevant_documents.h"
  6 | #include <switch.h>
  7 | 
  8 | namespace Trinity {
  9 |         namespace DocsSetIterators {
 10 |                 enum class Type : uint8_t {
 11 |                         PostingsListIterator = 0,
 12 |                         DisjunctionSome,
 13 |                         Filter,
 14 |                         Optional,
 15 |                         Disjunction,
 16 |                         DisjunctionAllPLI,
 17 |                         Phrase,
 18 |                         Conjuction,
 19 |                         ConjuctionAllPLI,
 20 |                         AppIterator,
 21 |                         VectorIDs,
 22 |                         Dummy,
 23 |                 };
 24 | 
 25 |                 // An iterator provides access to a set of documents ordered by increasing ID.
 26 |                 // Tpl e two main methods, next() and advance(), are used to access the documents.
 27 |                 // It subclasses relevant_document_provider, which means it may also provide a score
 28 |                 // if any Iterator subclass implements score() -- which is useful/required for support
 29 |                 // of "Accumulated Score Scheme" execution mode.
 30 |                 //
 31 |                 //
 32 |                 // Subclassing relevant_document_provider is somewhat expensive, but we need it to
 33 |                 // support the semantics described in relevant_documents.h
 34 |                 //
 35 |                 // UPDATE: we no longer sub-class it. Instead, a relevant_document_provider may be set for
 36 |                 // the iterator. This is so that we can perhaps support different schemes for different iterators, like Lucene does with e.g
 37 |                 //  ReqMultiOptScorer and other such fancier scorers.
 38 | 		// UPDATE: well, we had to subclass it
 39 |                 //
 40 |                 // Trinity is iterator-centric, not scorer-centric, and the default exec mode is not based on scores accumulation. Trinity
 41 |                 // is optimized for that use case, so it doesn't make sense to do what Lucene does; create scorers where each scorer
 42 |                 // wraps/owns an iterator, etc. Instead, an Iterator here may own an relevant_document_provider, which is responsible
 43 |                 // for scoring whatever the iterator matched. It may not be optimal for when you have selected AccumulatedScoreScheme but
 44 |                 // it's more elegant for all other use cases.
 45 |                 struct Iterator
 46 |                      : public relevant_document_provider {
 47 |                         friend struct IteratorScorer;
 48 | 
 49 |                       public:
 50 |                         // this is either thyself, or someone else
 51 |                         relevant_document_provider *rdp{this};
 52 | 
 53 |                         // This is here so that we can directly access it without having to go through the vtable
 54 |                         // to invoke current() which would be overriden by subclasses -- i.e subclasses are expected
 55 |                         // to update curDocument{} so that current() won't be virtual
 56 |                         struct __anonymous final {
 57 |                                 isrc_docid_t id{0};
 58 |                         } curDocument;
 59 | 
 60 |                         // This is handy, and beats bloating the vtable with e.g a virtual  a virtual ~Iterator(), a virtual cost() etc,
 61 |                         // that are only used during engine bootstrap, not runtime execution - or otherwise used rarely
 62 |                         const Type type;
 63 | 
 64 |                       public:
 65 |                         Iterator(const Type t)
 66 |                             : type{t} {
 67 |                         }
 68 | 
 69 |                         inline auto current() const noexcept {
 70 |                                 return curDocument.id;
 71 |                         }
 72 | 
 73 |                         // Advances to the first beyond the current whose document id that is >= target, and returns that document ID
 74 |                         // Example:
 75 |                         // isrc_docid_t advance(const isrc_docid_t target) { isrc_docid_t id; while ((id = next()) < target) {} return id; }
 76 |                         //
 77 |                         // XXX: some of the Iterators will check if (current == target), i.e won't next() before they check
 78 |                         // for performance and simplicity reasons. It doesn't really affect our use so it is OK
 79 |                         // UPDATE: it actually does to some extent
 80 |                         virtual isrc_docid_t advance(const isrc_docid_t target) = 0;
 81 | 
 82 |                         // If at the end of the set, returns DocIDsEND otherwise advances to the next document and returns the current document
 83 |                         virtual isrc_docid_t next() = 0;
 84 | 
 85 |                         // This is not virtual so that we won't bloat the vtable
 86 |                         // instead, it just invokes DocsSetIterators::cost(Iterator *) passing this
 87 |                         uint64_t cost();
 88 | 
 89 |                         // relevant_document_provider() overrides
 90 |                         // we are only going to need to implement total_matches()
 91 |                         // score() returns 0; if we needed actual scoring we 'd have
 92 |                         // set owner to an IteratorScorer
 93 |                         inline isrc_docid_t document() const noexcept override final {
 94 |                                 return curDocument.id;
 95 |                         }
 96 |                 };
 97 | 
 98 |                 class IndexSource;
 99 | 
100 |                 // If you are going to provide your own application iterator, you will need
101 |                 // to subclass AppIterator. It's main purpose is to provide a virtual destructor, which
102 |                 // is required for docsetsIterators destruction, and some other facilities specific to those iterators
103 |                 //
104 |                 // They are produced by factory functions and are trackedby the execution engine.
105 |                 // That is, ast_node nodes of Type::app_ids_set (or whaever) will embed a pointer to a factory class
106 |                 // which will be asked to provide an AppIterator instance (which would be passed the context embedded in
107 |                 // the ast_node).
108 |                 struct AppIterator
109 |                     : public Iterator {
110 |                         IndexSource *const isrc;
111 | 
112 |                         AppIterator(IndexSource *const src)
113 |                             : Iterator(Type::AppIterator), isrc{src} {
114 |                         }
115 | 
116 |                         virtual ~AppIterator() {
117 |                         }
118 |                 };
119 |         } // namespace DocsSetIterators
120 | } // namespace Trinity
121 | 


--------------------------------------------------------------------------------
/docwordspace.cpp:
--------------------------------------------------------------------------------
 1 | #include "docwordspace.h"
 2 | 
 3 | bool Trinity::DocWordsSpace::test_phrase(const std::vector<exec_term_id_t> &phraseTerms, const tokenpos_t *phraseFirstTokenPositions, const tokenpos_t phraseFirstTokenPositionsCnt) const {
 4 |         for (uint32_t i{0}; i != phraseFirstTokenPositionsCnt; ++i) {
 5 |                 const auto pos = phraseFirstTokenPositions[i];
 6 | 
 7 |                 for (tokenpos_t k{1};; ++k) {
 8 |                         if (k == phraseTerms.size())
 9 |                                 return true;
10 |                         else if (!test(phraseTerms[k], pos + k))
11 |                                 break;
12 |                 }
13 |         }
14 |         return false;
15 | }
16 | 


--------------------------------------------------------------------------------
/docwordspace.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "common.h"
  3 | #include "runtime.h"
  4 | #include "common.h"
  5 | 
  6 | 
  7 | namespace Trinity {
  8 |         class DocWordsSpace final {
  9 |               private:
 10 |                 // Just 4 bytes/position.
 11 |                 // We could have separated docSeq and termID into different arrays(as in, not in the same struct) so that reset()
 12 |                 // would only memset() the docSeq array(2 bytes vs 4 bytes) * maxPos
 13 |                 // but that 'd make access somewhat slower for set() and test() because we 'd get more cache misses so we optimise for it
 14 |                 using seq_t = uint16_t;
 15 | 
 16 |                 struct position final {
 17 |                         exec_term_id_t termID; // See IMPL.md
 18 |                         seq_t          docSeq;
 19 |                 };
 20 | 
 21 |                 position *const positions;
 22 |                 const uint32_t  maxPos;
 23 |                 seq_t           curSeq;
 24 | 
 25 |               public:
 26 |                 // Allocating max + Trinity::Limits::MaxPhraseSize, because that is the theoritical maximum phrase size
 27 |                 // and if we are going to test starting from maxPos extending to 10 positions ahead, we want to
 28 |                 // make sure we won't read outside positions.
 29 |                 // The extra positions will be always initialized to 0 and we won't need to reset those in reset()
 30 |                 DocWordsSpace(const uint32_t max = Trinity::Limits::MaxPosition)
 31 |                     : positions((position *)calloc(sizeof(position), max + 1 + Trinity::Limits::MaxPhraseSize)), maxPos{max} {
 32 |                         curSeq = 1; // IMPORTANT, start from (1)
 33 |                         EXPECT(max && max <= Trinity::Limits::MaxPosition);
 34 |                 }
 35 | 
 36 |                 ~DocWordsSpace() noexcept {
 37 |                         std::free(positions);
 38 |                 }
 39 | 
 40 |                 void reset() {
 41 |                         // In order to avoid resetting/clearing positions[] for every other document
 42 |                         // we track a document-specific identifier in positions[] so if positions[idx].docSeq != curDocSeq
 43 |                         // then whatever is in positions[] is stale and should be considered unset.
 44 |                         //
 45 |                         // In a previous Trinity design we stored the document ID as u32 but that is excessive, we care about cache-misses
 46 |                         // so now we instead use a `uint16_t curSeq` and periodically clear. This is more efficient
 47 |                         if (unlikely(curSeq == std::numeric_limits<seq_t>::max())) {
 48 |                                 // we reset every 65k(for u16 seq_t) documents
 49 |                                 // this is preferrable to using uint32_t to encode the actual document in position{}
 50 |                                 // no need to memset() for (maxPos + 1 + Trinity::Limits::MaxPhraseSize), just upto (maxPos + 1)
 51 |                                 memset(positions, 0, sizeof(position) * (maxPos + 1));
 52 |                                 curSeq = 1; // important; set to 1 not 0
 53 |                         } else {
 54 |                                 ++curSeq;
 55 |                         }
 56 |                 }
 57 | 
 58 |                 // XXX: pos must be > 0
 59 | #if !defined(TRINITY_VERIFY_HITS)
 60 |                 [[gnu::always_inline]] void set(const exec_term_id_t termID, const tokenpos_t pos) noexcept {
 61 |                         positions[pos] = {termID, curSeq};
 62 |                 }
 63 | #else
 64 |                 [[gnu::always_inline]] void set(const exec_term_id_t termID, const tokenpos_t pos) {
 65 |                         EXPECT(pos < maxPos);
 66 |                         positions[pos] = {termID, curSeq};
 67 |                 }
 68 | #endif
 69 | 
 70 | #if 1
 71 |                 /*
 72 | 		 * -O1
 73 | 
 74 |         movq    (%rdi), %rax
 75 |         movzwl  (%rax,%rdx,4), %ecx
 76 |         cmpw    12(%rdi), %cx
 77 |         jne     .LBB3_1
 78 |         cmpw    %si, 2(%rax,%rdx,4)
 79 |         sete    %al
 80 |         retq
 81 | .LBB3_1:
 82 |         xorl    %eax, %eax
 83 |         retq
 84 | 		*
 85 | 		*/
 86 | 
 87 |                 // Turns out with -O1 or higher, this is faster than the alternative impl
 88 |                 // based on my folly benchmarks
 89 |                 // XXX: pos must be > 0
 90 |                 inline bool test(const exec_term_id_t termID, const tokenpos_t pos) const noexcept {
 91 |                         return positions[pos].docSeq == curSeq && positions[pos].termID == termID;
 92 |                 }
 93 | 
 94 | #else
 95 |                 /* 
 96 | 		 * -O1
 97 | 
 98 |         shll    $16, %esi
 99 |         movzwl  12(%rdi), %eax
100 |         orl     %esi, %eax
101 |         movq    (%rdi), %rcx
102 |         cmpl    (%rcx,%rdx,4), %eax
103 |         sete    %al
104 |         retq
105 | 
106 | 		*
107 |  		*/
108 |                 inline bool test(const exec_term_id_t termID, const uint16_t pos) const noexcept {
109 |                         // this works thanks on little-endian arhs.
110 |                         // not sure if this is faster than the older impl.
111 |                         // in -O1, this saves us 2 instructions and looks faster. Will investigate
112 |                         //
113 |                         // TODO: We could in theory use this trick to check for _2_ positions ahead by
114 |                         // building a u64 and using *(uint64_t *)&positions[pos] which will also
115 |                         // access the adjacent positions[pos+1].
116 |                         // e.g test2() method
117 |                         static_assert(sizeof(termID) == sizeof(uint16_t));
118 |                         static_assert(sizeof(curSeq) == sizeof(uint16_t));
119 | 
120 |                         return ((uint32_t(termID) << 16) | curSeq) == *(uint32_t *)&positions[pos];
121 |                 }
122 | #endif
123 | 
124 |                 // This can facilitate tracking sequences(e.g 2+ qeury terms matches in a document) of a MatchedIndexDocumentsFilter::consider()  impl.
125 |                 inline void unset(const tokenpos_t pos) noexcept {
126 |                         positions[pos].docSeq = 0;
127 |                 }
128 | 
129 |                 // We can probably just sort all phrase terms by freq asc
130 |                 // and iterate across all hits, and for each hit, see if it is set
131 |                 // in the adjacement position for the next phrase term, and the next, and so on, but
132 |                 // we would need to track the offset(relative index in the phrase)
133 |                 // This is an example/reference implementation
134 |                 bool test_phrase(const std::vector<exec_term_id_t> &phraseTerms, const tokenpos_t *phraseFirstTokenPositions, const tokenpos_t phraseFirstTokenPositionsCnt) const;
135 | 
136 |                 auto max_pos() const noexcept {
137 |                         return maxPos;
138 |                 }
139 |         };
140 | } // namespace Trinity
141 | 


--------------------------------------------------------------------------------
/exec.h:
--------------------------------------------------------------------------------
  1 | // Please refer to https://github.com/phaistos-networks/Trinity/wiki/Query-Execution-Engine-Internals
  2 | #pragma once
  3 | #include "docidupdates.h"
  4 | #include "index_source.h"
  5 | #include "matches.h"
  6 | #include "queries.h"
  7 | #include "similarity.h"
  8 | #include <future>
  9 | 
 10 | namespace Trinity {
 11 |         enum class ExecFlags : uint32_t {
 12 |                 // If this set, then only the matching document IDs will be provided in MatchedIndexDocumentsFilter's subclass consider(const docid_t) call
 13 |                 // as opposed to when the default execution mode is selected where consider(matched_document &) is invoked instead, with rich information
 14 |                 // about any and all matched tokens etc.
 15 |                 //
 16 |                 // This is very helpful if you want to e.g just count or collect documents matching a query,
 17 |                 // or otherwise don't care for which of the terms (in case of ORs) matched the document, only for
 18 |                 // the documents(IDs) that match the query (so you won't get a chance to e.g compute a trinity/query score based on the matched terms).
 19 |                 //
 20 |                 // It is also helpful if you want to e.g build a prefix-search people search system(like LinkedIn's) where you want
 21 |                 // to match all users matching the query, and you really don't care
 22 |                 // for which of the terms (or their hits) to do so.
 23 |                 DocumentsOnly = 1,
 24 | 
 25 |                 // This flag selects a query execution mode that matches Lucene's, and can be useful for
 26 |                 // very specific use cases, like visual search, and in other cases where you prioritize faster execution over
 27 |                 // higher relevancy, which would be computed by having access to rich information Trinity tracks and provides in matched_document
 28 |                 // in the default execution mode.
 29 |                 //
 30 |                 // If this mode is selected, it will instead accumulate the scores of various iterators together into a "score" and invoke
 31 |                 // MatchedIndexDocumentsFilter's subclass consider(const docid_t, const double score)
 32 |                 //
 33 |                 // If your Similarity Scorer depends on index sources field statistics, check Trinity::merge() disableOptimizations parameter
 34 |                 AccumulatedScoreScheme = 2,
 35 | 
 36 |                 // If set, this doesn't track unique (termID, toNextSpan, flags) for MatchedIndexDocumentsFilter::queryIndicesTerms
 37 |                 // instead it tracks unique (termID, toNextSpan) -- that is, respects the older semantics.
 38 |                 // If you are not interested for that unique tripplet, but instead of the unique (termID, toNextSpan), you should use
 39 |                 // this flag. If set, query_index_term::flags will be set to 0.
 40 |                 // This is really only relevant if the default exec. mode is selected
 41 |                 // i.e neither DocumentsOnly nor AccumulatedScoreScheme are set in the passed flags to exec_query()
 42 |                 DisregardTokenFlagsForQueryIndicesTerms = 4
 43 |         };
 44 | 
 45 |         static inline void validate_flags(const uint32_t f) {
 46 |                 if (const auto mask = f & (unsigned(ExecFlags::DocumentsOnly) | unsigned(ExecFlags::AccumulatedScoreScheme)); mask && (mask & (mask - 1)))
 47 |                         throw Switch::invalid_argument("DocumentsOnly and AccumulatedScoreScheme are mutually exclusive modes");
 48 |         }
 49 | 
 50 |         void exec_query(const query &in, IndexSource *, masked_documents_registry *const maskedDocumentsRegistry, MatchedIndexDocumentsFilter *, IndexDocumentsFilter *const f = nullptr,
 51 |                         const uint32_t                      flags  = 0,
 52 |                         Similarity::IndexSourceTermsScorer *scorer = nullptr);
 53 | 
 54 |         // Handy utility function; executes query on all index sources in the provided collection in sequence and returns
 55 |         // a vector with the match filters/results of each execution.
 56 |         //
 57 |         // You are expected to merge/reduce/blend them.
 58 |         // It's trivial to do this in parallel using e.g std::async() or any other means of scheduling exec_query() for each index source in
 59 |         // a different thread. See exec_query_par() for a possible implementation.
 60 |         //
 61 |         // Note that execution of sources does not depend on state of other sources - they are isolated so parallel processing them requires
 62 |         // no coordination.
 63 |         template <typename T, typename... Arg>
 64 |         std::vector<std::unique_ptr<T>> exec_query(const query &in, IndexSourcesCollection *collection, IndexDocumentsFilter *f, const uint32_t flags, Arg &&... args) {
 65 |                 static_assert(std::is_base_of<MatchedIndexDocumentsFilter, T>::value, "Expected a MatchedIndexDocumentsFilter subclass");
 66 |                 const auto                      n = collection->sources.size();
 67 |                 std::vector<std::unique_ptr<T>> out;
 68 | 
 69 |                 validate_flags(flags);
 70 | 
 71 |                 for (size_t i{0}; i != n; ++i) {
 72 |                         auto source  = collection->sources[i];
 73 |                         auto scanner = collection->scanner_registry_for(i);
 74 |                         auto filter  = std::make_unique<T>(std::forward<Arg>(args)...);
 75 | 
 76 |                         exec_query(in, source, scanner.get(), filter.get(), f, flags);
 77 |                         out.push_back(std::move(filter));
 78 |                 }
 79 | 
 80 |                 return out;
 81 |         }
 82 | 
 83 |         // Parallel queries execution, using std::async()
 84 |         // This variant also supports ExecFlags::AccumulatedScoreScheme
 85 |         // You will need to provide a cs for this to work
 86 |         template <typename T, typename... Arg>
 87 |         std::vector<std::unique_ptr<T>> exec_query_par(const query &in, 
 88 | 		IndexSourcesCollection *collection, 
 89 | 		IndexDocumentsFilter *f, 
 90 | 		const uint32_t flags, 
 91 | 		Trinity::Similarity::IndexSourcesCollectionTermsScorer *cs, 
 92 | 		Arg &&... args) {
 93 |                 static_assert(std::is_base_of<MatchedIndexDocumentsFilter, T>::value, "Expected a MatchedIndexDocumentsFilter subclass");
 94 |                 const auto                      n = collection->sources.size();
 95 |                 std::vector<std::unique_ptr<T>> out;
 96 | 
 97 |                 validate_flags(flags);
 98 | 
 99 |                 if (!n) {
100 |                         return out;
101 | 		}
102 | 
103 |                 const bool accumScoreScheme = flags & unsigned(ExecFlags::AccumulatedScoreScheme);
104 | 
105 |                 if (accumScoreScheme) {
106 |                         if (!cs) {
107 |                                 throw Switch::invalid_argument("IndexSourcesCollectionTermsScorer not set");
108 | 			}
109 | 
110 |                         // May or may not do something here
111 |                         cs->reset(collection);
112 |                 }
113 | 
114 |                 if (n == 1) {
115 |                         // fast-path: single source
116 |                         if (false == collection->sources[0]->index_empty()) {
117 |                                 auto                                                source  = collection->sources[0];
118 |                                 auto                                                scanner = collection->scanner_registry_for(0);
119 |                                 auto                                                filter  = std::make_unique<T>(std::forward<Arg>(args)...);
120 |                                 std::unique_ptr<Similarity::IndexSourceTermsScorer> scorer;
121 | 
122 |                                 if (accumScoreScheme) {
123 |                                         scorer.reset(cs->new_source_scorer(source));
124 | 				}
125 | 
126 |                                 exec_query(in, source, scanner.get(), filter.get(), f, flags, scorer.get());
127 |                                 out.push_back(std::move(filter));
128 |                         }
129 |                         return out;
130 |                 }
131 | 
132 |                 std::vector<std::future<std::unique_ptr<T>>> futures;
133 | 
134 |                 // Schedule all but the first via std::async()
135 |                 // we 'll handle the first here.
136 |                 for (size_t i{1}; i != n; ++i) {
137 |                         if (false == collection->sources[i]->index_empty()) {
138 |                                 futures.emplace_back(
139 |                                     std::async(std::launch::async, [&, accumScoreScheme](const uint32_t i) {
140 |                                             auto                                                source  = collection->sources[i];
141 |                                             auto                                                scanner = collection->scanner_registry_for(i);
142 |                                             auto                                                filter  = std::make_unique<T>(std::forward<Arg>(args)...);
143 |                                             std::unique_ptr<Similarity::IndexSourceTermsScorer> scorer;
144 | 
145 |                                             if (accumScoreScheme) {
146 |                                                     scorer.reset(cs->new_source_scorer(source));
147 |                                             }
148 | 
149 |                                             exec_query(in, source, scanner.get(), filter.get(), f, flags, scorer.get());
150 |                                             return filter;
151 |                                     },
152 |                                                i));
153 |                         }
154 |                 }
155 | 
156 |                 if (auto source = collection->sources[0]; false == source->index_empty()) {
157 |                         auto                                                scanner = collection->scanner_registry_for(0);
158 |                         auto                                                filter  = std::make_unique<T>(std::forward<Arg>(args)...);
159 |                         std::unique_ptr<Similarity::IndexSourceTermsScorer> scorer;
160 | 
161 |                         if (accumScoreScheme) {
162 |                                 scorer.reset(cs->new_source_scorer(source));
163 | 			}
164 | 
165 |                         exec_query(in, source, scanner.get(), filter.get(), f, flags, scorer.get());
166 |                         out.push_back(std::move(filter));
167 |                 }
168 | 
169 |                 while (futures.size()) {
170 |                         auto &f = futures.back();
171 | 
172 |                         out.push_back(std::move(f.get()));
173 |                         futures.pop_back();
174 |                 }
175 | 
176 |                 return out;
177 |         }
178 | }; // namespace Trinity
179 | 


--------------------------------------------------------------------------------
/index_source.cpp:
--------------------------------------------------------------------------------
 1 | #include "index_source.h"
 2 | 
 3 | void Trinity::IndexSourcesCollection::commit() {
 4 |         std::sort(sources.begin(), sources.end(), [](const auto a, const auto b) noexcept {
 5 |                 return b->generation() < a->generation();
 6 |         });
 7 | 
 8 |         map.clear();
 9 |         all.clear();
10 |         for (auto s : sources) {
11 |                 auto ud = s->masked_documents();
12 | 
13 |                 map.push_back({s, all.size()});
14 |                 if (ud)
15 |                         all.push_back(ud);
16 |         }
17 | }
18 | 
19 | Trinity::IndexSourcesCollection::~IndexSourcesCollection() {
20 |         while (sources.size()) {
21 |                 sources.back()->Release();
22 |                 sources.pop_back();
23 |         }
24 | }
25 | 
26 | std::unique_ptr<Trinity::masked_documents_registry> Trinity::IndexSourcesCollection::scanner_registry_for(const uint16_t idx) {
27 |         const auto n = map[idx].second;
28 | 
29 |         return masked_documents_registry::make(all.data(), n);
30 | }
31 | 


--------------------------------------------------------------------------------
/intersect.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "docidupdates.h"
 3 | #include "index_source.h"
 4 | #include "matches.h"
 5 | #include "queries.h"
 6 | #include <unordered_set>
 7 | #include <switch_bitops.h>
 8 | 
 9 | // This only works(currently) if IndexSource::translate_docid() is an identify function
10 | // because we are merge-sorting expecting the order of IDs to be be monotonically ascending
11 | namespace Trinity {
12 |         // tokens is a std::vector<> of std::unordered_set<> where each set represents the list of synonymous tokens (e.g ball, balls)
13 |         // that std::vector<> size cannot exceed 64(because sizeof(uint64_t)*3 == 64)
14 | 
15 |         // If you want to ignore intersections here (as opposed to later when you process the results), by checking if the first or last token
16 |         // is either of the ones encoded in stopwordsMask, set stopwordsMask to a value != 0
17 |         // Just set uint64_t(1) << index, where index is the token index in tokens specified in the method
18 |         // You probably should use this mask because we ignore intersections that mask other intersections completely
19 |         //
20 |         // TODO: consider intersections for [world of warcraft gears of war]
21 |         // if we set 'of' as stop word in stopwordsMak, we 'll likely return (world of warcraft, gears, war)
22 |         // if we don't, we 'll return (world of warcraft, of gears war, of war)
23 |         // neither of them is optimal, but then again, there is not much we do around this except perhaps
24 |         // consider past queries compsied of those tokens and figure out which is the most popular arrangement of tokens
25 |         void intersect_impl(const uint64_t stopwordsMask, const std::vector<std::unordered_set<str8_t>> &tokens, IndexSource *src, masked_documents_registry *, std::vector<std::pair<uint64_t, uint32_t>> *);
26 | 
27 |         inline std::vector<std::pair<uint64_t, uint32_t>> intersect(const uint64_t stopwordsMask, const std::vector<std::unordered_set<str8_t>> &tokens, IndexSource *src, masked_documents_registry *reg) {
28 |                 std::vector<std::pair<uint64_t, uint32_t>> res;
29 | 
30 |                 intersect_impl(stopwordsMask, tokens, src, reg, &res);
31 |                 return res;
32 |         }
33 | 
34 |         // Should just merge from the collection and then return that
35 |         std::vector<std::pair<uint64_t, uint32_t>> intersect(const uint64_t                                 stopwordsMask,
36 |                                                              const std::vector<std::unordered_set<str8_t>> &tokens,
37 |                                                              IndexSourcesCollection *                       collection);
38 | 
39 |         // Returns the index (bits offsets in bitmap)
40 |         uint8_t intersection_indices(uint64_t bitmap, uint8_t *indices);
41 | 
42 |         static inline bool sort_intersections(const std::pair<uint64_t, uint32_t> &a, const std::pair<uint64_t, uint32_t> &b) noexcept {
43 |                 if (const auto ca = SwitchBitOps::PopCnt(a.first), cb = SwitchBitOps::PopCnt(b.first); cb < ca || (cb == ca && b.second < a.second))
44 |                         return true;
45 |                 else
46 |                         return false;
47 |         }
48 | 
49 |         // A handy function that generates alternatives, in the right order, by intersecting the rewrittenQuery(which is the result of
50 |         // Trinity::rewrite_query(), and you need to use it even if you don't end up rewriting the query, because it will set
51 |         // phrase::rewrite_ctx for every phrase/token in the query), across the collection of index sources.
52 |         //
53 |         // You must set K = 1 when invoking rewrite_query(), and only accept single word token expansions.
54 |         // e.g for [macbook], it's correct to expand to [macbooks] but not correct to expand to [mac book](two words expansion)
55 |         // Those restrictions are required for interesections where query rewrites are applied .
56 |         std::vector<std::pair<range_base<str8_t *, uint8_t>, std::pair<uint8_t, std::size_t>>> intersection_alternatives(const query &           originalQuery,
57 |                                                                                                                          const query &           rewrittenQuery,
58 |                                                                                                                          IndexSourcesCollection &collection,
59 |                                                                                                                          simple_allocator *const a);
60 | } // namespace Trinity
61 | 


--------------------------------------------------------------------------------
/matches.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "docwordspace.h"
  3 | #include "runtime.h"
  4 | 
  5 | namespace Trinity {
  6 |         // We assign an index (base 0) to each token in the query, which is monotonically increasing, except
  7 |         // when we are assigning to tokens in OR expressions, where we need to do more work and it gets more complicated (see assign_query_indices() for how that works).
  8 |         //
  9 |         // Long story short, we track all distinct (termIDs, toNextSpan) combinations for each query index, where
 10 |         // termID is the term ID (execution space) and toNextSpan is how many indices ahead to advance to get
 11 |         // to the net term (1 unless specific OR queries are processed.
 12 |         // Can also be 0 if there is no other token to the right)
 13 |         // Please see Trinity::phrase comments
 14 |         //
 15 |         // This is built by exec_query() and passed to MatchedIndexDocumentsFilter::prepare()
 16 |         // It is useful for proximity checks in conjuction with DocWordsSpace
 17 |         struct query_index_term final {
 18 |                 exec_term_id_t     termID;
 19 |                 query_term_flags_t flags;
 20 |                 uint8_t            toNextSpan;
 21 | 
 22 |                 inline bool operator==(const query_index_term &o) const noexcept {
 23 |                         return termID == o.termID && flags == o.flags && toNextSpan == o.toNextSpan;
 24 |                 }
 25 |         };
 26 | 
 27 |         struct query_index_terms final {
 28 |                 uint16_t cnt;
 29 |                 // all distinct query_index_termS
 30 |                 // uniques are sorted by (termID ASC, toNextSpan ASC, flags ASC)
 31 |                 query_index_term uniques[0];
 32 |         };
 33 | 
 34 |         // Materialized hits for a term and the current document
 35 |         // This is used both for evaluation and for scoring documents
 36 |         struct term_hits final {
 37 |                 // total hits for the term
 38 |                 tokenpos_t freq;
 39 |                 term_hit * all{0};
 40 | 
 41 |                 // Facilitates execution -- ignored during scoring
 42 |                 // This is internal and specific to the execution engine impl.
 43 |                 uint16_t allCapacity{0};
 44 |                 union {
 45 |                         uint16_t     docSeq;
 46 |                         isrc_docid_t doc_id{0};
 47 |                 };
 48 | 
 49 |                 void set_docid(const isrc_docid_t id) {
 50 |                         doc_id = id;
 51 |                 }
 52 | 
 53 |                 void set_freq(const tokenpos_t newFreq) {
 54 |                         if (unlikely(newFreq > allCapacity)) {
 55 |                                 allCapacity = newFreq + 128;
 56 | 
 57 |                                 if (all)
 58 |                                         std::free(all);
 59 | 
 60 |                                 all = (term_hit *)std::malloc(sizeof(term_hit) * allCapacity);
 61 |                         }
 62 | 
 63 |                         freq = newFreq;
 64 |                 }
 65 | 
 66 |                 ~term_hits() noexcept {
 67 |                         if (all) {
 68 |                                 std::free(all);
 69 | 			}
 70 |                 }
 71 |         };
 72 | 
 73 | 
 74 |         // We record an instance for each term instances in a original/input query
 75 |         // you can e.g use this information to determine if adjacent terms in the original query are both matched
 76 |         struct query_term_ctx final {
 77 |                 // Information about the term itself
 78 |                 // This is mostly for debugging during score consideration, but having access to
 79 |                 // the distinct termID may be used to facilitate fancy tracking schemes in your MatchedIndexDocumentsFilter::consider()
 80 |                 struct term_struct final {
 81 |                         exec_term_id_t id;
 82 |                         str8_t         token;
 83 |                 } term;
 84 | 
 85 |                 uint16_t instancesCnt; // i.e if your query is [world of warcraft mists of pandaria] then you will have 2 instances for token "of" in the query, with rep = 1
 86 | 
 87 |                 struct instance_struct {
 88 |                         // see Trinity::phrase decl. comments
 89 |                         uint16_t           index;
 90 |                         query_term_flags_t flags;
 91 |                         uint8_t            rep;
 92 |                         uint8_t            toNextSpan;
 93 |                         uint16_t           app_phrase_id;
 94 | 
 95 |                         // handy aux.method
 96 |                         inline range_base<uint16_t, uint8_t> subexpr_range_with_final_index(const uint16_t final_index) const noexcept {
 97 |                                 return {index, static_cast<uint8_t>(toNextSpan ?: static_cast<uint8_t>(final_index - index))};
 98 |                         }
 99 | 
100 |                         struct
101 |                         {
102 |                                 // see phrase::rewrite_ctx
103 |                                 range_base<uint16_t, uint8_t> range;
104 |                                 float                         translationCoefficient;
105 |                                 uint8_t                       srcSeqSize;
106 |                         } rewrite_ctx;
107 |                 } instances[0];
108 |         };
109 | 
110 |         struct matched_query_term final {
111 |                 const query_term_ctx *queryCtx;
112 |                 term_hits *           hits;
113 |         };
114 | 
115 |         // Score functions are provided with a matched_document
116 |         // and are expected to return a score
117 |         struct matched_document final {
118 |                 docid_t             id; // document ID (GLOBAL)
119 |                 uint16_t            matchedTermsCnt{0};
120 |                 matched_query_term *matchedTerms;
121 |                 // lazily initialized
122 |                 DocWordsSpace *dws{nullptr};
123 | 
124 |                 matched_document() {
125 |                 }
126 | 
127 |                 ~matched_document() {
128 |                         delete dws;
129 |                 }
130 |         };
131 | 
132 |         struct aborted_search_exception final
133 |             : public std::exception {
134 |                 const char *what() const noexcept override {
135 |                         return "Search Aborted";
136 |                 }
137 |         };
138 | 
139 |         struct MatchedIndexDocumentsFilter {
140 |                 const query_index_terms **queryIndicesTerms;
141 |                 uint16_t                  query_final_term_index; // may be handy
142 | 
143 |                 // There are 3 different consider() implementations, and which is invoked by the exec. enginedepends on the
144 |                 // ExecFlags passed to Trinity::exec_query().
145 |                 //
146 |                 // Your subclass should override whichever method(s) of those 3 are required based on which flags you use.
147 |                 //
148 |                 // When the default execution mode is seleted, this method will be invoked
149 |                 [[gnu::always_inline]] virtual void consider(const matched_document &match) {
150 | 			//
151 |                 }
152 | 
153 |                 // If the Documents Only mode is selected, this will be invoked, passed
154 |                 // the global document ID. Given that you only really want the
155 |                 virtual void consider(const docid_t id) {
156 | 			// 
157 |                 }
158 | 
159 |                 // You may want to provide a specialized implementation here.
160 |                 // The runtime may decide to consider() a list of documents for performance reasons
161 |                 virtual void consider(const docid_t *const ids, const size_t cnt) {
162 |                         for (size_t i{0}; i != cnt; ++i) {
163 |                                 consider(ids[i]);
164 | 			}
165 |                 }
166 | 
167 |                 // If the Accumulated Score Scheme mode is selected instead, this
168 |                 // will be invoked; the global document ID and its scaore will passed to the call
169 |                 virtual void consider(const docid_t id, const double score) {
170 | 			// 
171 |                 }
172 | 
173 |                 // Invoked before the query execution begins by the exec.engine
174 |                 // You may want to override this if you want to be notified and get a chance to do anything before
175 |                 // the engine executes the query in the index source
176 |                 //
177 |                 // This is only invoked if the default execution mode is selected; it makes no sense otherwise.
178 |                 virtual void prepare(const query_index_terms **queryIndicesTerms_, const uint16_t fi) {
179 |                         queryIndicesTerms      = queryIndicesTerms_;
180 |                         query_final_term_index = fi;
181 |                 }
182 | 
183 |                 virtual ~MatchedIndexDocumentsFilter() {
184 | 			// 
185 |                 }
186 |         };
187 | 
188 |         // You can provide an IndexDocumentsFilter derived class instance to exec_query() and friends, and if you do
189 |         // it will invoke test(documentId) and if it returns true, the document will be ignored (in addition to
190 |         // checking maskedDocumentsRegistry->test(docID), that is).
191 |         //
192 |         // That way, you will be able to ignore documents before the query is evaluated for them, like we do with
193 |         // maskedDocumentsRegistry.  For example, say you only care for documents created in a specific time range, or have a specific state etc. Instead
194 |         // of evaluating the query, and for matching documents, filtering them in consider() - thereby incurring the cost and overhead of evaluating the
195 |         // query on a document you will eventually disregard anyway - you get to do that before the query is evaluated.
196 |         //
197 |         // In addition to that, you may have your own rules for ignoring documents and that can be implemented in your filter.
198 |         struct IndexDocumentsFilter {
199 |                 // return true if you want to disregard/ignore the document
200 |                 virtual bool filter(const docid_t) = 0;
201 |         };
202 | } // namespace Trinity
203 | 


--------------------------------------------------------------------------------
/merge.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "docidupdates.h"
  3 | #include "terms.h"
  4 | #include "index_source.h"
  5 | 
  6 | namespace Trinity {
  7 |         struct merge_candidate final {
  8 |                 // generation of the index source
  9 |                 // See IndexSource::gen
 10 |                 uint64_t gen;
 11 | 
 12 |                 // Access to all terms of the index source
 13 |                 IndexSourceTermsView *terms;
 14 | 
 15 |                 // Faciliates access to the index and other content
 16 |                 Trinity::Codecs::AccessProxy *ap;
 17 | 
 18 |                 // All documents masked in this index source
 19 |                 // more recent candidates(i.e candidates where gen < this gen) will use it
 20 |                 // see MergeCandidatesCollection::merge() impl.
 21 |                 updated_documents maskedDocuments;
 22 | 
 23 |                 merge_candidate &operator=(const merge_candidate &o) {
 24 |                         gen   = o.gen;
 25 |                         terms = o.terms;
 26 |                         ap    = o.ap;
 27 |                         new (&maskedDocuments) updated_documents(o.maskedDocuments);
 28 |                         return *this;
 29 |                 }
 30 |         };
 31 | 
 32 |         // See IndexSourcesCollection
 33 |         class MergeCandidatesCollection final {
 34 |               private:
 35 |                 std::vector<updated_documents>                    all;
 36 |                 std::vector<std::pair<merge_candidate, uint16_t>> map;
 37 | 
 38 |               public:
 39 |                 std::vector<merge_candidate> candidates;
 40 | 
 41 |               public:
 42 | 		auto size() const noexcept {
 43 | 			return candidates.size();
 44 | 		}
 45 | 
 46 |                 void insert(const merge_candidate c) {
 47 |                         candidates.push_back(c);
 48 |                 }
 49 | 
 50 |                 void commit();
 51 | 
 52 |                 std::unique_ptr<Trinity::masked_documents_registry> scanner_registry_for(const uint16_t idx);
 53 | 
 54 |                 // This method will merge all registered merge candidates into a new index session and will also output all
 55 |                 // distinct terms and their term_index_ctx.
 56 |                 // It will properly and optimally handle different input codecs and mismatches between output codec(i.e is->codec_identifier() )
 57 |                 // and input codecs.
 58 |                 //
 59 |                 // You may want to use
 60 |                 // - Trinity::pack_terms() to build the terms files and then persist them
 61 |                 // - Trinity::persist_segment() to persist the actual index
 62 |                 //
 63 |                 // IMPORTANT:
 64 |                 // You should use consider_tracked_sources() after you have merge()ed to figure out what to do with all tracked sources.
 65 |                 //
 66 |                 // You are expected to outIndexSess->begin() before you merge(), and outIndexSess->end() afterwards, though you may
 67 |                 // want to use Trinity::persist_segment(outIndexSess) which will persist and invoke end() for you
 68 |                 //
 69 |                 // You may want to explicitly disable use of IndexSession::append_index_chunk() and IndexSession::merge(), even if it is supported by the outIndexSess's codec.
 70 |                 // If you are going to use ExecFlags::AccumulatedScoreScheme, and your scorer depends on IndexSource::field_statistics, those are
 71 |                 // only computed, during merge, for terms that are not handled by append_index_chunk(), so you may want to disable it, so that
 72 |                 // statistics for those terms as well will be collected.
 73 |                 void merge(Codecs::IndexSession *outIndexSess,
 74 |                            simple_allocator *,
 75 |                            std::vector<std::pair<str8_t, term_index_ctx>> *const outTerms,
 76 |                            IndexSource::field_statistics *                       fs,
 77 |                            const uint32_t                                        flushFreq            = 0,
 78 |                            const bool                                            disableOptimizations = false);
 79 | 
 80 |                 enum class IndexSourceRetention : uint8_t {
 81 |                         RetainAll = 0,
 82 |                         RetainDocumentIDsUpdates,
 83 |                         Delete
 84 |                 };
 85 | 
 86 |                 // Once you have committed(), and merge(d), you should provide
 87 |                 // all tracked sources, and this method will return another vector with a std::pair<IndexSourceRetention, uint64_t>
 88 |                 // for each of the sources(identified by generation) that you should consider;
 89 |                 // RetainAll:  don't delete anything, leave it alone
 90 |                 // RetainDocumentIDsUpdates: delete all index files but retain document IDs updates files
 91 |                 // Delete: wipe out the directory, retain nothing
 92 |                 std::vector<std::pair<uint64_t, IndexSourceRetention>> consider_tracked_sources(std::vector<uint64_t> trackedSources);
 93 |         };
 94 | } // namespace Trinity
 95 | 
 96 | static inline void PrintImpl(Buffer &b, const Trinity::MergeCandidatesCollection::IndexSourceRetention r) {
 97 |         switch (r) {
 98 |                 case Trinity::MergeCandidatesCollection::IndexSourceRetention::RetainAll:
 99 |                         b.append("ALL"_s32);
100 |                         break;
101 | 
102 |                 case Trinity::MergeCandidatesCollection::IndexSourceRetention::RetainDocumentIDsUpdates:
103 |                         b.append("DocIDs"_s32);
104 |                         break;
105 | 
106 |                 case Trinity::MergeCandidatesCollection::IndexSourceRetention::Delete:
107 |                         b.append("DELETE"_s32);
108 |                         break;
109 |         }
110 | }
111 | 


--------------------------------------------------------------------------------
/percolator.cpp:
--------------------------------------------------------------------------------
  1 | #include "percolator.h"
  2 | 
  3 | using namespace Trinity;
  4 | 
  5 | bool percolator_query::match(percolator_document_proxy &src) const {
  6 |         return exec(root, src);
  7 | }
  8 | 
  9 | bool percolator_query::exec(const exec_node n, percolator_document_proxy &src) const {
 10 |         switch (n.fp) {
 11 |                 case ENT::matchterm:
 12 |                         return src.match_term(n.u16);
 13 | 
 14 |                 case ENT::constfalse:
 15 |                         return false;
 16 | 
 17 |                 case ENT::consttrue:
 18 |                         return true;
 19 | 
 20 |                 case ENT::matchallterms: {
 21 |                         const auto run = static_cast<const compilation_ctx::termsrun *>(n.ptr);
 22 | 
 23 |                         for (decltype(run->size) i{0}; i != run->size; ++i) {
 24 |                                 if (!src.match_term(run->terms[i]))
 25 |                                         return false;
 26 |                         }
 27 | 
 28 |                         return true;
 29 |                 } break;
 30 | 
 31 |                 case ENT::matchanyterms: {
 32 |                         const auto run = static_cast<const compilation_ctx::termsrun *>(n.ptr);
 33 | 
 34 |                         for (decltype(run->size) i{0}; i != run->size; ++i) {
 35 |                                 if (src.match_term(run->terms[i]))
 36 |                                         return true;
 37 |                         }
 38 | 
 39 |                         return false;
 40 |                 } break;
 41 | 
 42 |                 case ENT::unaryand:
 43 |                         return exec(static_cast<const compilation_ctx::unaryop_ctx *>(n.ptr)->expr, src);
 44 | 
 45 |                 case ENT::unarynot:
 46 |                         return !exec(static_cast<const compilation_ctx::unaryop_ctx *>(n.ptr)->expr, src);
 47 | 
 48 |                 case ENT::matchanyphrases: {
 49 |                         const auto run = static_cast<const compilation_ctx::phrasesrun *>(n.ptr);
 50 | 
 51 |                         for (decltype(run->size) i{0}; i != run->size; ++i) {
 52 |                                 const auto p = run->phrases[i];
 53 | 
 54 |                                 if (src.match_phrase(p->termIDs, p->size))
 55 |                                         return true;
 56 |                         }
 57 | 
 58 |                         return false;
 59 |                 }
 60 | 
 61 |                 case ENT::matchallphrases: {
 62 |                         const auto run = static_cast<const compilation_ctx::phrasesrun *>(n.ptr);
 63 | 
 64 |                         for (decltype(run->size) i{0}; i != run->size; ++i) {
 65 |                                 const auto p = run->phrases[i];
 66 | 
 67 |                                 if (!src.match_phrase(p->termIDs, p->size))
 68 |                                         return false;
 69 |                         }
 70 | 
 71 |                         return true;
 72 |                 }
 73 | 
 74 |                 case ENT::matchphrase: {
 75 |                         const auto p = static_cast<const compilation_ctx::phrase *>(n.ptr);
 76 | 
 77 |                         return src.match_phrase(p->termIDs, p->size);
 78 |                 }
 79 | 
 80 |                 case ENT::logicaland: {
 81 |                         const auto b = static_cast<const compilation_ctx::binop_ctx *>(n.ptr);
 82 | 
 83 |                         return exec(b->lhs, src) && exec(b->rhs, src);
 84 |                 }
 85 | 
 86 |                 case ENT::logicalnot: {
 87 |                         const auto b = static_cast<const compilation_ctx::binop_ctx *>(n.ptr);
 88 | 
 89 |                         return exec(b->lhs, src) && !exec(b->rhs, src);
 90 |                 }
 91 | 
 92 |                 case ENT::logicalor: {
 93 |                         const auto b = static_cast<const compilation_ctx::binop_ctx *>(n.ptr);
 94 | 
 95 |                         return exec(b->lhs, src) || exec(b->rhs, src);
 96 |                 }
 97 | 
 98 |                 case ENT::matchsome: {
 99 |                         const auto pm = static_cast<compilation_ctx::partial_match_ctx *>(n.ptr);
100 |                         uint16_t   matched{0};
101 | 
102 |                         for (decltype(pm->size) i{0}; i != pm->size; ++i) {
103 |                                 if (exec(pm->nodes[i], src) && ++matched == pm->min)
104 |                                         return true;
105 |                         }
106 |                         return false;
107 |                 }
108 | 
109 |                 case ENT::matchallnodes: {
110 |                         const auto g = static_cast<compilation_ctx::nodes_group *>(n.ptr);
111 | 
112 |                         for (decltype(g->size) i{0}; i != g->size; ++i) {
113 |                                 if (!exec(g->nodes[i], src))
114 |                                         return false;
115 |                         }
116 |                         return true;
117 |                 }
118 | 
119 |                 case ENT::matchanynodes: {
120 |                         const auto g = static_cast<compilation_ctx::nodes_group *>(n.ptr);
121 | 
122 |                         for (decltype(g->size) i{0}; i != g->size; ++i) {
123 |                                 if (exec(g->nodes[i], src))
124 |                                         return true;
125 |                         }
126 |                         return false;
127 |                 }
128 | 
129 |                 case ENT::consttrueexpr:
130 |                         return true;
131 | 
132 |                 case ENT::dummyop:
133 |                 case ENT::SPECIALIMPL_COLLECTION_LOGICALOR:
134 |                 case ENT::SPECIALIMPL_COLLECTION_LOGICALAND:
135 |                         std::abort();
136 |         }
137 | }
138 | 


--------------------------------------------------------------------------------
/percolator.h:
--------------------------------------------------------------------------------
 1 | // See https://www.youtube.com/watch?v=f4lqBb1d7no&list=PLcGKfGEEONaDzd0Hkn2f1talsTu1HLDYu&index=21
 2 | //  Describes the Predicate Index Twitter employs to reduce number of distinct rules to
 3 | // attempt to match against a new tweet.
 4 | #include "common.h"
 5 | #include "compilation_ctx.h"
 6 | #include "queries.h"
 7 | 
 8 | namespace Trinity {
 9 |         struct percolator_document_proxy {
10 |                 // Just override those two methods
11 |                 // You can access the actual term via term_by_index(idx)
12 |                 //
13 |                 // You can e.g reset state, and then match()
14 |                 virtual bool match_term(const uint16_t term) = 0;
15 | 
16 |                 virtual bool match_phrase(const uint16_t *, const uint16_t cnt) = 0;
17 |         };
18 | 
19 |         class percolator_query final {
20 |               protected:
21 |                 struct CCTX final
22 |                     : public compilation_ctx {
23 |                         std::unordered_map<str8_t, uint16_t> localMap;
24 |                         std::vector<str8_t>                  allTerms; // we need to keep track of those here
25 | 
26 |                         uint16_t resolve_query_term(const str8_t term) override final {
27 |                                 const auto res = localMap.emplace(term, 0);	 // intern string
28 | 
29 |                                 if (res.second) {
30 |                                         res.first->second = localMap.size();
31 |                                         const_cast<str8_t *>(&res.first->first)->Set(allocator.CopyOf(term.data(), term.size()), term.size());
32 | 
33 |                                         EXPECT(allTerms.size() == localMap.size() - 1);
34 |                                         allTerms.emplace_back(res.first->first);
35 |                                 }
36 | 
37 |                                 return res.first->second;
38 |                         }
39 | 
40 |                 } comp_ctx;
41 | 
42 |                 exec_node root;
43 | 
44 |               protected:
45 |                 bool exec(const exec_node, percolator_document_proxy &) const;
46 | 
47 |               public:
48 |                 auto term_by_index(const uint16_t idx) const {
49 |                         return comp_ctx.allTerms[idx - 1];
50 |                 }
51 | 
52 |                 auto &distinct_terms() noexcept {
53 |                         return comp_ctx.allTerms;
54 |                 }
55 | 
56 |                 const auto &distinct_terms() const noexcept {
57 |                         return comp_ctx.allTerms;
58 |                 }
59 | 
60 |               public:
61 |                 // After compilation, you can access all distinct terms, i.e all distinct terms you may be
62 |                 // interested in, in a document, via distinct_terms()
63 |                 percolator_query(const Trinity::query &q) {
64 |                         if (!q) {
65 |                                 root.fp = ENT::constfalse;
66 |                                 return;
67 |                         }
68 | 
69 |                         root = compile_query(q.root, comp_ctx);
70 |                         if (root.fp == ENT::constfalse || root.fp == ENT::dummyop)
71 |                                 root.fp = ENT::constfalse;
72 |                         else
73 |                                 group_execnodes(root, comp_ctx.allocator);
74 |                 }
75 | 
76 |                 percolator_query() {
77 |                         root.fp = ENT::constfalse;
78 |                 }
79 | 
80 |                 operator bool() const noexcept {
81 |                         return root.fp != ENT::constfalse && root.fp != ENT::dummyop;
82 |                 }
83 | 
84 |                 bool match(percolator_document_proxy &) const; // percolator_document_proxy is not const, because you may want to do whatever there
85 |         };
86 | } // namespace Trinity
87 | 


--------------------------------------------------------------------------------
/queries.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaistos-networks/Trinity/a745a0c13719ca9d041e1dfcfeb81e6bf85a996f/queries.cpp


--------------------------------------------------------------------------------
/queries.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/phaistos-networks/Trinity/a745a0c13719ca9d041e1dfcfeb81e6bf85a996f/queries.h


--------------------------------------------------------------------------------
/relevant_documents.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | #ifdef __clang__
 5 | #pragma GCC diagnostic push
 6 | #pragma GCC diagnostic ignored "-Wundefined-inline"
 7 | #endif
 8 | 
 9 | namespace Trinity {
10 |         // Currently, we don't really need to query for total matches in the current document
11 |         // but if we do, or you need it in your handler for whatever reason, uncomment RDP_NEED_TOTAL_MATCHES
12 |         // and rebuild. This is not enabled because inflating the vtables is not worth it for a call that
13 |         // you may not use.
14 |         //
15 |         // This, along with moving score() out of relevant_document_provider is done so that we
16 |         // reduce the vtable size of Iterator and thereby reduce cache-misses.
17 |         //
18 |         // We could probably move Iterator::curDocument into relevant_document_provider thereby not having
19 |         // to make document() virtual, though I am not sure how we'd go about making IteratorScorer subclasses of
20 |         // relevant_document_provider do the right thing.
21 |         //#define RDP_NEED_TOTAL_MATCHES 1
22 |         struct relevant_document_provider {
23 |                 virtual isrc_docid_t document() const noexcept = 0;
24 | 
25 | #ifdef RDP_NEED_TOTAL_MATCHES
26 |                 virtual uint32_t total_matches() = 0;
27 | #endif
28 | 
29 |                 // In the past, relevant_document_provider() had a virtual double score() decl.
30 |                 // but that meant we 'd inflate the vtable of Iterators, which are also relevant_document_provider
31 |                 // for no reason -- they do not provide scores.
32 |                 //
33 |                 // Instead, considering that if you invoke score() it must be because the relevant_document_provider you are
34 |                 // invoking it on is an IteratorScorer(), we just do what score() impl. does and thereby
35 |                 // no longer need to make score() virtual.
36 |                 //
37 |                 // The only downside is that relevant_document is somewhat ugly now . It contains a dummy iterator which is
38 |                 // used just for setting the current document. It's a fair tradeoff though, and consdidering we only
39 |                 // make use of it for ExecFlags::DocumentsOnly, it's OK.
40 |                 inline double score();
41 |         };
42 | 
43 |         namespace DocsSetIterators {
44 |                 struct Iterator;
45 |         }
46 | 
47 |         // This simply wraps an iterator, and also provides a score for it when asked.
48 |         struct IteratorScorer
49 |             : public relevant_document_provider {
50 |                 DocsSetIterators::Iterator *const it;
51 | 
52 |                 IteratorScorer(DocsSetIterators::Iterator *const it_)
53 |                     : it{it_} {
54 |                 }
55 | 
56 |                 virtual ~IteratorScorer() {
57 |                 }
58 | 
59 | #ifdef RDP_NEED_TOTAL_MATCHES
60 |                 inline uint32_t total_matches() override final;
61 | #endif
62 | 
63 |                 inline isrc_docid_t document() const noexcept override final;
64 | 
65 |                 // Will return the wrapped iterator, except for e.g Filter and Optional
66 |                 // where we only really want to advance the required or main respectively iterator
67 |                 // For now, this is not supported, but it may be in the future as an optimization.
68 |                 // TODO: consider this
69 |                 inline virtual DocsSetIterators::Iterator *iterator() {
70 |                         return it;
71 |                 }
72 | 
73 |                 virtual double iterator_score() = 0;
74 |         };
75 | 
76 |         double relevant_document_provider::score() {
77 |                 // If you tried to get a score, then this is a IteratorScorer
78 |                 // we used to virtual double score() = 0 here
79 |                 // but we really don't want to inflate Iterator's vtable with it
80 |                 return static_cast<IteratorScorer *>(this)->iterator_score();
81 |         }
82 | } // namespace Trinity
83 | 
84 | #ifdef __clang__
85 | #pragma GCC diagnostic pop
86 | #endif
87 | 


--------------------------------------------------------------------------------
/runtime.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace Trinity {
 5 |         using exec_term_id_t = uint16_t;
 6 | 
 7 |         // a materialized document term hit
 8 |         struct term_hit final {
 9 |                 uint64_t   payload;
10 |                 tokenpos_t pos;
11 |                 uint8_t    payloadLen;
12 | 
13 |                 inline auto bytes() const noexcept {
14 |                         return reinterpret_cast<const uint8_t *>(&payload);
15 |                 }
16 | 
17 |                 inline auto bytes() noexcept {
18 |                         return reinterpret_cast<uint8_t *>(&payload);
19 |                 }
20 |         };
21 | } // namespace Trinity
22 | 
23 | 


--------------------------------------------------------------------------------
/segment_index_source.cpp:
--------------------------------------------------------------------------------
  1 | #include "segment_index_source.h"
  2 | #include "google_codec.h"
  3 | #include "lucene_codec.h"
  4 | 
  5 | Trinity::SegmentIndexSource::SegmentIndexSource(const char *basePath)
  6 | {
  7 |         int fd;
  8 |         char path[PATH_MAX];
  9 |         strwlen32_t bp(basePath);
 10 | 
 11 |         try
 12 |         {
 13 | 
 14 |                 bp.StripTrailingCharacter('/');
 15 | 
 16 |                 if (auto p = bp.SearchR('/'))
 17 |                         bp = bp.SuffixFrom(p + 1);
 18 | 
 19 |                 if (!bp.IsDigits())
 20 |                         throw Switch::data_error("Expected segment name to be a generation(digits)");
 21 | 
 22 |                 gen = bp.AsUint64();
 23 | 
 24 |                 snprintf(path, sizeof(path), "%s/updated_documents.ids", basePath);
 25 |                 fd = open(path, O_RDONLY | O_LARGEFILE);
 26 | 
 27 |                 if (fd == -1)
 28 |                 {
 29 |                         if (errno != ENOENT)
 30 |                                 throw Switch::system_error("open() failed for updated_documents.ids");
 31 |                 }
 32 |                 else if (const auto fileSize = lseek64(fd, 0, SEEK_END); fileSize > 0)
 33 |                 {
 34 |                         auto fileData = mmap(nullptr, fileSize, PROT_READ, MAP_SHARED, fd, 0);
 35 | 
 36 |                         close(fd);
 37 |                         if (unlikely(fileData == MAP_FAILED))
 38 |                                 throw Switch::data_error("Failed to access ", path, ":", strerror(errno));
 39 | 
 40 |                         madvise(fileData, fileSize, MADV_DONTDUMP);
 41 |                         maskedDocuments.fileData.Set(reinterpret_cast<uint8_t *>(fileData), fileSize);
 42 |                         new (&maskedDocuments.set) updated_documents(unpack_updates(maskedDocuments.fileData));
 43 |                 }
 44 |                 else
 45 |                         close(fd);
 46 | 
 47 |                 terms.reset(new SegmentTerms(basePath));
 48 | 
 49 |                 snprintf(path, sizeof(path), "%s/index", basePath);
 50 |                 fd = open(path, O_RDONLY | O_LARGEFILE);
 51 |                 if (fd == -1)
 52 |                 {
 53 |                         if (errno != ENOENT)
 54 |                                 throw Switch::data_error("Failed to access ", path);
 55 |                         else
 56 |                         {
 57 |                                 // Missing index? someone created a directory here that's incomplete? We can't proceed anyway, so
 58 |                                 // delegate responsibility to caller
 59 |                                 throw Switch::data_error("Unexpected index structure ", path);
 60 |                         }
 61 |                 }
 62 | 
 63 |                 auto fileSize = lseek64(fd, 0, SEEK_END);
 64 | 
 65 |                 if (0 == fileSize)
 66 |                 {
 67 |                         // just updated documents
 68 |                 }
 69 |                 else
 70 |                 {
 71 | #ifdef TRINITY_MEMRESIDENT_INDEX
 72 |                         auto p = (uint8_t *)malloc(fileSize + 1);
 73 | 
 74 |                         if (pread64(fd, p, fileSize, 0) != fileSize)
 75 |                         {
 76 |                                 free(p);
 77 |                                 close(fd);
 78 |                                 throw Switch::data_error("Failed to acess ", path);
 79 |                         }
 80 | 
 81 |                         close(fd);
 82 |                         index.Set(p, fileSize);
 83 | #else
 84 |                         auto fileData = mmap(nullptr, fileSize, PROT_READ, MAP_SHARED, fd, 0);
 85 | 
 86 |                         close(fd);
 87 |                         if (unlikely(fileData == MAP_FAILED))
 88 |                                 throw Switch::data_error("Failed to acess ", path);
 89 | 
 90 |                         madvise(fileData, fileSize, MADV_DONTDUMP);
 91 |                         index.Set(static_cast<const uint8_t *>(fileData), uint32_t(fileSize));
 92 | #endif
 93 |                 }
 94 | 
 95 |                 char codecStorage[128];
 96 |                 strwlen8_t codec;
 97 | 
 98 |                 snprintf(path, sizeof(path), "%s/id", basePath);
 99 |                 fd = open(path, O_RDONLY | O_LARGEFILE);
100 |                 if (fd == -1)
101 |                 {
102 |                         if (errno != ENOENT)
103 |                                 throw Switch::data_error("Failed to access ", path);
104 | 
105 |                         snprintf(path, sizeof(path), "%s/codec", basePath);
106 |                         fd = open(path, O_RDONLY | O_LARGEFILE);
107 |                         if (unlikely(fd == -1))
108 |                                 throw Switch::data_error("Failed to acess ", path);
109 | 
110 |                         fileSize = lseek64(fd, 0, SEEK_END);
111 | 
112 |                         if (!IsBetweenRange<size_t>(fileSize, 3, 128))
113 |                         {
114 |                                 close(fd);
115 |                                 throw Switch::data_error("Invalid segment codec file");
116 |                         }
117 | 
118 |                         if (pread64(fd, codecStorage, fileSize, 0) != fileSize)
119 |                         {
120 |                                 close(fd);
121 |                                 throw Switch::system_error("Failed to read codec");
122 |                         }
123 |                         else
124 |                         {
125 |                                 close(fd);
126 |                                 codec.Set(codecStorage, fileSize);
127 |                         }
128 |                 }
129 |                 else
130 |                 {
131 |                         const auto fileSize = lseek64(fd, 0, SEEK_END);
132 | 
133 |                         if (unlikely(fileSize > 1024 || (fileSize < sizeof(uint8_t) + 1 + sizeof(uint64_t) + sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t))))
134 |                         {
135 |                                 close(fd);
136 |                                 throw Switch::system_error("Unexpected ID contents");
137 |                         }
138 | 
139 |                         uint8_t b[1024], *p{b};
140 | 
141 |                         if (pread64(fd, b, fileSize, 0) != fileSize)
142 |                         {
143 |                                 close(fd);
144 |                                 throw Switch::system_error("Failed to read ID");
145 |                         }
146 | 
147 |                         if (*p++ != 1)
148 |                         {
149 |                                 close(fd);
150 |                                 throw Switch::system_error("Failed to read ID: unsupported release");
151 |                         }
152 | 
153 |                         close(fd);
154 | 
155 |                         codec.len = *p++;
156 |                         codec.p = codecStorage;
157 |                         memcpy(codecStorage, p, codec.len);
158 |                         p += codec.len;
159 | 
160 |                         defaultFieldStats.sumTermHits = *(uint64_t *)p;
161 |                         p += sizeof(uint64_t);
162 |                         defaultFieldStats.totalTerms = *(uint32_t *)p;
163 |                         p += sizeof(uint32_t);
164 |                         defaultFieldStats.sumTermsDocs = *(uint64_t *)p;
165 |                         p += sizeof(uint64_t);
166 |                         defaultFieldStats.docsCnt = *(uint32_t *)p;
167 |                         p += sizeof(uint32_t);
168 | 
169 |                         // SLog("Restored codec '", codec, "' sumTermHits = ", dotnotation_repr(defaultFieldStats.sumTermHits), ", totalTerms = ", dotnotation_repr(defaultFieldStats.totalTerms), ", sumTermsDocs = ", dotnotation_repr(defaultFieldStats.sumTermsDocs), ", docsCnt = ", dotnotation_repr(defaultFieldStats.docsCnt), "\n");
170 |                 }
171 | 
172 |                 if (codec.Eq(_S("LUCENE")))
173 |                         accessProxy.reset(new Trinity::Codecs::Lucene::AccessProxy(basePath, index.start()));
174 | #ifdef TRINITY_CODECS_GOOGLE_AVAILABLE
175 |                 else if (codec.Eq(_S("GOOGLE")))
176 |                         accessProxy.reset(new Trinity::Codecs::Google::AccessProxy(basePath, index.start()));
177 | #endif
178 |                 else
179 |                         throw Switch::data_error("Unknown codec");
180 |         }
181 |         catch (...)
182 |         {
183 | 		ResetRefs();
184 | 		throw;
185 |         }
186 | }
187 | 


--------------------------------------------------------------------------------
/segment_index_source.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "index_source.h"
 3 | #include "terms.h"
 4 | #include "docidupdates.h"
 5 | 
 6 | namespace Trinity {
 7 |         // You can use SegmentIndexSession to create a new segment
 8 |         // This is a utility class
 9 |         class SegmentIndexSource final
10 |             : public IndexSource {
11 |               private:
12 |                 field_statistics                              defaultFieldStats;
13 |                 std::unique_ptr<Trinity::Codecs::AccessProxy> accessProxy;
14 |                 std::unique_ptr<SegmentTerms>                 terms; // all terms for this segment
15 |                 range_base<const uint8_t *, uint32_t>         index;
16 | 
17 |                 struct masked_documents_struct final {
18 |                         updated_documents                     set;
19 |                         range_base<const uint8_t *, uint32_t> fileData;
20 | 
21 |                         ~masked_documents_struct() noexcept 	{
22 |                                 if (auto ptr = (void *)(fileData.offset))
23 |                                         munmap(ptr, fileData.size());
24 |                         }
25 | 
26 |                         masked_documents_struct()
27 |                             : set{} {
28 |                         }
29 |                 } maskedDocuments;
30 | 
31 |               public:
32 |                 SegmentIndexSource(const char *basePath);
33 | 
34 |                 bool index_empty() const noexcept override final {
35 |                         return accessProxy.get() == nullptr;
36 |                 }
37 | 
38 |                 auto backing_index() const noexcept {
39 |                         return index;
40 |                 }
41 | 
42 |                 auto access_proxy() {
43 |                         return accessProxy.get();
44 |                 }
45 | 
46 |                 field_statistics default_field_stats() override final {
47 |                         return defaultFieldStats;
48 |                 }
49 | 
50 |                 term_index_ctx resolve_term_ctx(const str8_t term) override final {
51 | #if 1
52 |                         return terms->lookup(term);
53 | #else
54 |                         const auto res =  terms->lookup(term);
55 | 
56 | 			if (res.documents) {
57 | 				SLog("MATCHED [", term, "] in ", generation(), "\n");
58 | 			}
59 | 
60 | 			return res;
61 | #endif
62 |                 }
63 | 
64 |                 auto segment_terms() const {
65 |                         return terms.get();
66 |                 }
67 | 
68 |                 Trinity::Codecs::Decoder *new_postings_decoder(strwlen8_t, const term_index_ctx ctx) override final {
69 |                         return accessProxy->new_decoder(ctx);
70 |                 }
71 | 
72 |                 updated_documents masked_documents() override final {
73 |                         return maskedDocuments.set;
74 |                 }
75 | 
76 |                 ~SegmentIndexSource() noexcept {
77 |                         if (auto ptr = (void *)index.offset) {
78 | #ifdef TRINITY_MEMRESIDENT_INDEX
79 |                                 std::free(ptr);
80 | #else
81 |                                 munmap(ptr, index.size());
82 | #endif
83 |                         }
84 |                 }
85 |         };
86 | } // namespace Trinity
87 | 


--------------------------------------------------------------------------------
/similarity.cpp:
--------------------------------------------------------------------------------
 1 | #include "similarity.h"
 2 | 
 3 | static bool init() {
 4 |         auto t{Trinity::Similarity::IndexSourcesCollectionBM25Scorer::Scorer::normalizationTable};
 5 | 
 6 |         for (uint32_t i{0}; i != 256; ++i) {
 7 |                 const float f(i);
 8 | 
 9 |                 t[i] = 1.0 / (f * f);
10 |         }
11 | 
12 |         return true;
13 | }
14 | 
15 | float Trinity::Similarity::IndexSourcesCollectionBM25Scorer::Scorer::normalizationTable[256];
16 | bool  Trinity::Similarity::IndexSourcesCollectionBM25Scorer::Scorer::initializer = init();
17 | 


--------------------------------------------------------------------------------
/terms.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "codecs.h"
  3 | #include <compress.h>
  4 | #include <switch_mallocators.h>
  5 | 
  6 | // Prefic compressed terms dictionary
  7 | // Maps from str8_t=>term_index_ctx
  8 | namespace Trinity {
  9 | 
 10 |         // We can no longer ommit (term, term_index_ctx) from the terms data file and keep
 11 |         // that just in the index, beause while it works great for lookups, it means we can't trivially iterate
 12 |         // over all terms in the terms data file (see terms_data_view struct), and this is important for merging segments.
 13 |         //
 14 |         // For other applications that do not need to access to all terms, one couild get those structures, make sure TRINITY_TERMS_FAT_INDEX is defined
 15 |         // and use it .
 16 |         //#define TRINITY_TERMS_FAT_INDEX
 17 |         struct terms_skiplist_entry final {
 18 |                 str8_t term;
 19 | #ifdef TRINITY_TERMS_FAT_INDEX
 20 |                 uint32_t       blockOffset; // offset in the terms datafile
 21 |                 term_index_ctx tctx;        // payload
 22 | #else
 23 |                 uint32_t blockOffset; // offset in the terms datafile
 24 | #endif
 25 |         };
 26 | 
 27 |         term_index_ctx lookup_term(range_base<const uint8_t *, uint32_t>    termsData,
 28 |                                    const str8_t                             term,
 29 |                                    const std::vector<terms_skiplist_entry> &skipList);
 30 | 
 31 |         void unpack_terms_skiplist(const range_base<const uint8_t *, const uint32_t> termsIndex,
 32 |                                    std::vector<terms_skiplist_entry> *               skipList,
 33 |                                    simple_allocator &                                allocator);
 34 | 
 35 |         void pack_terms(std::vector<std::pair<str8_t, term_index_ctx>> &terms,
 36 |                         IOBuffer *const                                 data,
 37 |                         IOBuffer *const                                 index);
 38 | 
 39 |         // An abstract index source terms access wrapper
 40 |         //
 41 |         // For segments, you will likely use the prefix-compressed terms infra. but you may have
 42 |         // an index source that is e.g storing all those terms in an in-memory std::unordered_map<> or whatever else
 43 |         // for some reason and you can just write an IndexSourceTermsView subclass to access that.
 44 |         //
 45 |         // IndexSourceTermsView subclasses are used while merging index sources.
 46 |         // see merge.h
 47 |         struct IndexSourceTermsView {
 48 |                 virtual std::pair<str8_t, term_index_ctx> cur() = 0;
 49 | 
 50 |                 virtual void next() = 0;
 51 | 
 52 |                 virtual bool done() = 0;
 53 | 
 54 |                 virtual ~IndexSourceTermsView() {
 55 |                 }
 56 |         };
 57 | 
 58 |         // iterator access to the terms data
 59 |         // this is very useful for merging terms dictionaries (see IndexSourcePrefixCompressedTermsView)
 60 |         struct terms_data_view final {
 61 |               public:
 62 |                 struct iterator final {
 63 |                         friend struct terms_data_view;
 64 | 
 65 |                       private:
 66 |                         const uint8_t *    p;
 67 |                         // WAS: str8_t::value_type termStorage[Limits::MaxTermLength];
 68 | 			//
 69 | 			// people make mistakes; sometimes they do index terms longer than Limits::MaxTermLength
 70 | 			// and when decoding said terms they will override termStorage.
 71 | 			// we are now explicitly sizing it so that it can fit anything and thanks to
 72 | 			// RVO the cost shouldn't be felt by trinity applications
 73 |                         str8_t::value_type termStorage[128]; 
 74 | 
 75 |                       public:
 76 |                         struct
 77 |                         {
 78 |                                 str8_t         term;
 79 |                                 term_index_ctx tctx;
 80 |                         } cur;
 81 | 
 82 |                         iterator(const uint8_t *ptr)
 83 |                             : p{ptr} {
 84 |                                 cur.term.p   = termStorage;
 85 |                                 cur.term.len = 0;
 86 |                         }
 87 | 
 88 | 			iterator(const iterator &o) = delete;
 89 | 
 90 | 			iterator &operator=(const iterator &) = delete;
 91 | 				
 92 |                         inline bool operator==(const iterator &o) const noexcept {
 93 |                                 return p == o.p;
 94 |                         }
 95 | 
 96 |                         inline bool operator!=(const iterator &o) const noexcept {
 97 |                                 return p != o.p;
 98 |                         }
 99 | 
100 |                         str8_t term() noexcept {
101 |                                 decode_cur();
102 |                                 return cur.term;
103 |                         }
104 | 
105 |                         term_index_ctx tctx() noexcept {
106 |                                 decode_cur();
107 |                                 return cur.tctx;
108 |                         }
109 | 
110 |                         inline iterator &operator++() {
111 |                                 cur.term.len = 0;
112 |                                 return *this;
113 |                         }
114 | 
115 |                         inline std::pair<str8_t, term_index_ctx> operator*() noexcept {
116 |                                 decode_cur();
117 |                                 return {cur.term, cur.tctx};
118 |                         }
119 | 
120 |                       protected:
121 |                         void decode_cur();
122 |                 };
123 | 
124 |               private:
125 |                 const range_base<const uint8_t *, uint32_t> termsData;
126 | 
127 |               public:
128 |                 iterator begin() const {
129 |                         return {termsData.start()};
130 |                 }
131 | 
132 |                 iterator end() const {
133 |                         return {termsData.stop()};
134 |                 }
135 | 
136 |                 terms_data_view(const range_base<const uint8_t *, uint32_t> d)
137 |                     : termsData{d} {
138 |                 }
139 |         };
140 | 
141 |         // A specialised IndexSourceTermsView for accessing prefix-encoded terms dictionaries
142 |         struct IndexSourcePrefixCompressedTermsView final
143 |             : public IndexSourceTermsView {
144 |               private:
145 |                 terms_data_view::iterator       it;
146 |                 const terms_data_view::iterator end;
147 | 
148 |               public:
149 |                 IndexSourcePrefixCompressedTermsView(const range_base<const uint8_t *, uint32_t> termsData)
150 |                     : it{termsData.start()}, end{termsData.stop()} {
151 |                 }
152 | 
153 |                 std::pair<str8_t, term_index_ctx> cur() override final {
154 |                         return *it;
155 |                 }
156 | 
157 |                 void next() override final {
158 |                         ++it;
159 |                 }
160 | 
161 |                 bool done() override final {
162 |                         return it == end;
163 |                 }
164 |         };
165 | 
166 |         //A handy wrapper for memory mapped terms data and a skiplist from the terms index
167 |         class SegmentTerms final {
168 |               private:
169 |                 std::vector<terms_skiplist_entry>     skiplist;
170 |                 simple_allocator                      allocator;
171 |                 range_base<const uint8_t *, uint32_t> termsData;
172 | 
173 |               public:
174 |                 SegmentTerms(const char *segmentBasePath);
175 | 
176 |                 ~SegmentTerms() noexcept {
177 |                         if (auto ptr = (void *)(termsData.offset)) {
178 |                                 munmap(ptr, termsData.size());
179 | 			}
180 |                 }
181 | 
182 |                 term_index_ctx lookup(const str8_t term) {
183 |                         return lookup_term(termsData, term, skiplist);
184 |                 }
185 | 
186 |                 auto terms_data_access() const {
187 |                         return terms_data_view(termsData);
188 |                 }
189 | 
190 |                 auto new_terms_view() const {
191 |                         return new IndexSourcePrefixCompressedTermsView(termsData);
192 |                 }
193 |         };
194 | } // namespace Trinity
195 | 


--------------------------------------------------------------------------------
/trinity_limits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "common.h"
 3 | 
 4 | namespace Trinity {
 5 |         // Google allows upto 32 different tokens in the query
 6 |         // e.g https://www.google.gr/?gfe_rd=cr&ei=hmzBWJJ3qd3wB427g_AP#q=apple+OR+samsung+OR+nokia+OR+iphone+OR+ipad+OR+microsoft+OR+the+OR+of+OR+in+OR+playstation+OR+pc+OR+xbox+OR+macbook+OR+case+OR+box+OR+mp3+OR+player+OR+tv+OR++panasonic+OR+windows+OR+out+OR+with+OR+over+OR+under+OR+soccer+OR+pro+OR+fifa+OR+ea+OR+ps2+OR+playstation+OR+nintendo+OR+fast+OR+imac+OR+pro+OR+lg+OR+adidas+OR+nike+OR+stan+OR+black+OR+white+OR+dpf+OR+air+OR+force+OR+indesit+OR+morris+OR+watch+OR+galaxy+OR+2016+OR+2017+OR+2105+OR+32gb++OR+1+OR+2+OR+10+OR+20+OR+50+OR+100+OR+a+OR+b+OR+c+OR+d+OR+foo+OR+ba++OR+greece+OR+UK+OR+US+OR+tea+OR+coffee+OR+water+OR+air+OR+space+OR+star+OR+sun+OR+foobarjuibar&*
 7 |         // > "nike" (and any subsequent words) was ignored because we limit queries to 32 words.  In fact, those are not 32 _distinct_ tokens, its 32 tokens, period. ( I checked )
 8 |         //
 9 |         // so if we want to support upto 32 query tokens and say upto 10 different synonyms for each of them, then we max out at 320 tokens/query. Just to be on the safe side though and because we want to
10 |         // make sure we can support a few more, we set MaxQueryTokens to 640. In practice, we can set it to however many but we need to keep things sane
11 |         namespace Limits {
12 |                 static constexpr size_t MaxPhraseSize{16};
13 |                 static constexpr size_t MaxQueryTokens{8192};
14 |                 static constexpr size_t MaxTermLength{64};
15 |                 static constexpr size_t MaxPosition{1 << 14};
16 | 
17 |                 // Sanity check
18 |                 static_assert(MaxTermLength < 250 && MaxTermLength > 8);
19 |                 static_assert(MaxPhraseSize <= 128);
20 |                 static_assert(MaxQueryTokens <= 8192);
21 |                 static_assert(MaxPosition <= std::numeric_limits<tokenpos_t>::max());
22 |         } // namespace Limits
23 | } // namespace Trinity
24 | 


--------------------------------------------------------------------------------
/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | #include "common.h"
 3 | #include <fcntl.h>
 4 | #include <sys/stat.h>
 5 | #include <sys/types.h>
 6 | 
 7 | int8_t Trinity::Utilities::to_file(const char *p, uint64_t len, int fd) {
 8 |         // can't write more than sizeof(ssize_t) bytes/time (EINVAL)
 9 |         static constexpr uint64_t MaxSpan{(2ul * 1024 * 1024 * 1024) - 1};
10 |         const auto *              ptr{p};
11 | 
12 |         for (auto n = len / MaxSpan; n; len -= MaxSpan, ptr += MaxSpan) {
13 |                 if (write(fd, ptr, MaxSpan) != MaxSpan) {
14 |                         close(fd);
15 |                         return -1;
16 |                 }
17 |         }
18 | 
19 |         if (len && write(fd, ptr, len) != len) {
20 |                 close(fd);
21 |                 return -1;
22 |         }
23 | 
24 |         return 0;
25 | }
26 | 
27 | int8_t Trinity::Utilities::to_file(const char *p, uint64_t len, const char *path) {
28 |         int fd = open(path, O_WRONLY | O_TRUNC | O_CREAT | O_LARGEFILE, 0775);
29 | 
30 |         if (fd == -1)
31 |                 return -1;
32 |         else if (const auto res = to_file(p, len, fd); res == -1) {
33 |                 close(fd);
34 |                 return -1;
35 |         }
36 | 
37 |         // http://www.jeffplaisance.com/2013/10/how-to-write-file.html
38 |         // fdatasync() is insufficient; it does not sync the file size
39 |         if (fsync(fd) == -1) {
40 |                 close(fd);
41 |                 return -1;
42 |         } else if (close(fd) == -1)
43 |                 return -1;
44 |         else
45 |                 return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <compress.h>
 3 | #include <switch.h>
 4 | 
 5 | namespace Trinity {
 6 |         namespace Utilities {
 7 |                 int8_t to_file(const char *p, uint64_t len, const char *path);
 8 | 
 9 |                 int8_t to_file(const char *p, uint64_t len, int fd);
10 |         } // namespace Utilities
11 | } // namespace Trinity
12 | 


--------------------------------------------------------------------------------