├── swar.h
├── compiler.h
├── LICENSE
├── README.md
├── test
├── swar_bench.cpp
└── swar_test.cpp
├── swar_fwd.h
└── swar_inl.h
/swar.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "swar_fwd.h"
3 | #include "swar_inl.h"
4 |
--------------------------------------------------------------------------------
/compiler.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #if defined(__linux__) && defined(__GNUC__)
4 | #define CODE_SECTION __attribute__ ((section (".text#")))
5 | #else
6 | #define CODE_SECTION
7 | #endif
8 |
9 | #if defined(__GNUC__)
10 | #define likely(X) __builtin_expect(!!(X), 1)
11 | #define unlikely(X) __builtin_expect(!!(X), 0)
12 | #else
13 | #define likely(X) X
14 | #define unlikely(X) X
15 | #endif
16 |
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 yb303
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SWAR
2 | Low level, branch-free functions for number and string conversion and other utils.
3 |
4 | SWAR stands for SIMD within a register. This means treating parts of a uint64_t as individual uint8_t's, uint16_t's, or uint32_t's.
5 |
6 | This library is header-only. There is nothing to build.
7 |
8 | Most functions have different variants optimized for limited lengths (8 and 4), and printable input (ascii < 128), and more
9 |
10 | ### Language and build
11 |
12 | I'm used to C++17 so used `if constexpr`, but the rest of the code is C++03 copatible, and can easily be converted to C.
13 | Include `swar.h` and build with -std=c++17
14 | For forward declarations only, include `swar_fwd.h` instead.
15 |
16 | ### Test and benchmark
17 |
18 | The test dir includes:
19 | - `swar_test.cpp` that is using google-test for unit testing. (**TODO** create a `build: passing` badge)
20 | - `swar_bench.cpp` that produces the numbers for the graph below.
21 |
22 | ### Performance
23 |
24 | Comparison of various atoi implementations (actually a-to-ull) on my home machine:
25 | 
26 | Machine and env spec: i5-3470, Cygwin on Windows 10 64 bit, g++ 9.3.0
27 |
28 | The not-perfectly-straight lines, in SWAR's performance, are just measurement artifacts. In *swar8*, for example, it's the same instructions executed for every input, so there should be no difference.
29 | You can see how SWAR is faster than the naive impl and is fixed cost per word. The stock implementation is surprisingly slow. I don't know why as I didn't read its code yet.
30 |
31 | Functions with 8, or 4, suffix are branchless and faster (see *swar8* vs *swar*). Functions with longer input must have a branch per word.
32 | An SSE implementation can follow the same ideas as here for longer inputs. However, using SSE instruction may switch some processors to a different P-state, if the BIOS allows, and the switching itself can take a few hundred cycles.
33 |
34 | Branchless code is not always faster than branched code.
35 | Benchmarks are typically less impacted by branch miss-predictions, then real world applications. This applies also in my benchmark. I did not take special care to litter the BP caches before each function call as this would make each call harder to measure.
36 | Loops may be fully predicted, especially if BP caches are all working for the benchmark. However, in a real world app, using branchless low level code means that BP caches have more room for the application logic so the app as a whole may become faster. The only way to know for sure is to test within the app.
37 |
38 | These performance characteristics are the same for strlen and similar functions.
39 |
40 | ### Functions
41 | All functions come in a few variants:
42 | * memchr and memrchr
43 | * strlen
44 | * atoi, htoi (hex string to int), atod
45 | * itoa
46 | * hasbyte - does word include a certain byte?
47 |
48 | ### Supported operating systems
49 | * Linux
50 | * Cygwin
51 |
52 | ### Supported programming languages
53 | * C++
54 |
55 | ### Supported compilers
56 | * g++
57 |
58 | ### Supported architectures
59 | * x86_64
60 |
61 |
--------------------------------------------------------------------------------
/test/swar_bench.cpp:
--------------------------------------------------------------------------------
1 | #include "../swar.h"
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 | #include
9 |
10 | inline int64_t rdtsc() {
11 | union {
12 | struct { uint32_t lo, hi; };
13 | int64_t ts;
14 | } u;
15 |
16 | asm volatile("rdtsc" : "=a"(u.lo), "=d"(u.hi) : : "memory");
17 | return u.ts;
18 | }
19 |
20 | inline naive_atoull(const char* p, int n) {
21 | uint64_t ret = 0;
22 | for (int i = 0; i < n; i++)
23 | ret = ret * 10 + p[i] - '0';
24 | return ret;
25 | }
26 |
27 | void acc(uint64_t& dst, uint64_t src)
28 | {
29 | if (dst == 0)
30 | dst = src;
31 | else if (src < dst)
32 | dst = src;
33 | }
34 |
35 | int main(int argc, char* argv[]) {
36 | (void)argc;
37 | (void)argv;
38 |
39 | int test_size = 10000;
40 | int test_repetitions = 10;
41 |
42 | for (int i = 1; i < argc; i++) {
43 | if (strcmp(argv[i], "-n") == 0) {
44 | test_size = atoi(argv[++i]);
45 | }
46 | else if (strcmp(argv[i], "-r") == 0) {
47 | test_repetitions = atoi(argv[++i]);
48 | }
49 | }
50 |
51 | // Generate a long string of random numbers for atoi
52 | std::vector> v(21);
53 | std::mt19937_64 mt(rdtsc());
54 | uint64_t mask = 1;
55 | for (int len = 1; len < 21; len++) {
56 | v[len].resize(test_size * (len + 1));
57 | mask *= 10;
58 | int total_len = 0;
59 | char* buf = v[len].data();
60 | for (int i = 1; i < test_size; i++) {
61 | sprintf(buf + total_len, "%0lu", mt() % mask);
62 | total_len += len + 1;
63 | }
64 | }
65 |
66 | uint64_t junk = 0;
67 | std::vector dt_no_op(21);
68 | std::vector dt_stock(21);
69 | std::vector dt_naive(21);
70 | std::vector dt_swar_(21);
71 | std::vector dt_swarX(21);
72 | std::vector dt_swar8(21);
73 | std::vector dt_swar4(21);
74 |
75 | for (int r = 0; r < test_repetitions; r++) {
76 | for (int len = 1; len < 21; len++) {
77 | char* buf = v[len].data();
78 |
79 | // Test no-op
80 | uint64_t t0 = rdtsc();
81 | int total_len = 0;
82 | for (int i = 0; i < test_size; i++ ) {
83 | junk += buf[total_len];
84 | total_len += len + 1;
85 | }
86 |
87 | // Test stock atoull
88 | uint64_t t1 = rdtsc();
89 | total_len = 0;
90 | for (int i = 0; i < test_size; i++ ) {
91 | junk += atoll(buf + total_len);
92 | total_len += len + 1;
93 | }
94 |
95 | // Test naive atoull
96 | uint64_t t2 = rdtsc();
97 | total_len = 0;
98 | for (int i = 0; i < test_size; i++ ) {
99 | junk += naive_atoull(buf + total_len, len);
100 | total_len += len + 1;
101 | }
102 |
103 | // Test swar atou
104 | uint64_t t3 = rdtsc();
105 | total_len = 0;
106 | for (int i = 0; i < test_size; i++ ) {
107 | junk += swar::atou(buf + total_len, len);
108 | total_len += len + 1;
109 | }
110 |
111 | // Test swar atou8
112 | uint64_t t4 = rdtsc();
113 | int len8 = len <= 8 ? len : 8;
114 | total_len = 0;
115 | for (int i = 0; i < test_size; i++ ) {
116 | junk += swar::atou8(buf + total_len, len8);
117 | total_len += len + 1;
118 | }
119 |
120 | // Test swar atou4
121 | uint64_t t5 = rdtsc();
122 | int len4 = len <= 4 ? len : 4;
123 | total_len = 0;
124 | for (int i = 0; i < test_size; i++ ) {
125 | junk += swar::atou4(buf + total_len, len4);
126 | total_len += len + 1;
127 | }
128 |
129 | uint64_t t6 = rdtsc();
130 | acc(dt_no_op[len], t1 - t0);
131 | acc(dt_stock[len], t2 - t1);
132 | acc(dt_naive[len], t3 - t2);
133 | acc(dt_swar_[len], t4 - t3);
134 | acc(dt_swar8[len], t5 - t4);
135 | acc(dt_swar4[len], t6 - t5);
136 | }
137 | }
138 |
139 | printf("%d%c", uint32_t(junk) % 10, 8);
140 | printf("len %7s %7s %7s %7s %7s\n",
141 | "stock", "naive", "swar", "swar8", "swar4");
142 | double f = 1.0 / test_size;
143 | for (int len = 1; len < 21; len++) {
144 | double tf_stock = (dt_stock[len] - dt_no_op[len]) * f;
145 | double tf_naive = (dt_naive[len] - dt_no_op[len]) * f;
146 | double tf_swar_ = (dt_swar_[len] - dt_no_op[len]) * f;
147 | double tf_swar8 = (dt_swar8[len] - dt_no_op[len]) * f;
148 | double tf_swar4 = (dt_swar4[len] - dt_no_op[len]) * f;
149 | printf("%3d %7.1f %7.1f %7.1f %7.1f %7.1f\n",
150 | len, tf_stock, tf_naive, tf_swar_, tf_swar8, tf_swar4);
151 | }
152 |
153 | return 0;
154 | }
155 |
156 |
157 |
--------------------------------------------------------------------------------
/swar_fwd.h:
--------------------------------------------------------------------------------
1 | #include "compiler.h"
2 |
3 | #include
4 | #include // for memcpy, memset
5 | #include
6 |
7 | namespace swar {
8 |
9 | //
10 | // Function naming convention :
11 | // Length
12 | // 8 means input is 8 bytes
13 | // 4 means input is 4 bytes
14 | // None means input is any length
15 | // Prefix
16 | // p Printable. Ascii 0 to 127.
17 | // pmemchr vs memchr is slighly optimized for printable input
18 | // _ Mostly internal use
19 | // Suffix
20 | // k Haystack is known to contain needle
21 | // _nc Non-const, modifiable, input
22 | // These functions modify and restore the input. Not thread safe
23 | //
24 | // Performance
25 | // Functions with 8 suffix are branchless. Function with longer input must
26 | // have a branch per word. This can be improved with SSE.
27 | // - SSE may switch some processors to different P state, if the BIOS allows,
28 | // and the switching itself can take a few hundred cycles
29 | // - Branchless code is not always faster than branched code.
30 | // Lab tests are typically less impacted by branch miss-predictions, then real
31 | // world applications.
32 | // Loops may be fully predicted, if BP caches are all working for the test.
33 | // However, in a real world app, using branchless low level code means that BP
34 | // caches have more room for the application logic so the app as a whole may
35 | // become faster.
36 | // The only way to know for sure, is to test within the app.
37 | //
38 |
39 | //
40 | // Utils
41 | //
42 |
43 | // Cast char* to T using memcpy. memcpy is optimized away on x86
44 | template inline T cast(const char* src);
45 |
46 | // Get the uint _cast_ of a string of up to 8 chars
47 | inline uint64_t cast8(const char* s, uint32_t len);
48 |
49 | // Fill T with c's
50 | template inline T extend(char c);
51 |
52 | // swap bytes
53 | inline uint64_t bswap(uint64_t x);
54 | inline uint32_t bswap(uint32_t x);
55 | inline uint16_t bswap(uint16_t x);
56 |
57 | //
58 | // Find byte in word
59 | //
60 |
61 | // Check if word has zero byte
62 | inline bool haszero(uint64_t x);
63 |
64 | // Check if word has some byte
65 | inline bool hasbyte(uint64_t x, uint8_t c);
66 |
67 | // Find char in string. Support all options.
68 | template
69 | inline uint32_t _memchr8(const char* s, uint8_t c);
70 |
71 | // Find char in string and trim it
72 | template
73 | inline uint32_t _trim8(const char* s, uint8_t c);
74 |
75 | // Find char in printable (chars < 128) string of 8 chars
76 | inline uint32_t pmemchr8(const char* s, uint8_t c);
77 |
78 | // Find char in printable (chars < 128) string of 8 chars
79 | // * The string is known to contain the char
80 | inline uint32_t pmemchr8k(const char* s, uint8_t c);
81 |
82 | // Find char in binary string of 8 chars
83 | inline uint32_t memchr8(const char* s, uint8_t c);
84 |
85 | // Find char in binary string of 8 chars
86 | // * The string is known to contain the char
87 | inline uint32_t memchr8k(const char* s, uint8_t c);
88 |
89 | //
90 | // Strlen variants
91 | //
92 |
93 | // Find zero byte in binary string up to 8 chars
94 | inline uint32_t strlen8(const char* s);
95 |
96 | // Find zero byte in printable string up to 8 chars
97 | inline uint32_t pstrlen8(const char* s);
98 |
99 | // Find zero byte in binary string
100 | inline uint32_t strlen(const char* s);
101 |
102 | // Find zero byte in printable string
103 | inline uint32_t pstrlen(const char* s);
104 |
105 | //
106 | // Find byte in word - reverse
107 | //
108 |
109 | // Find char in printable (chars < 128) string of 8 chars
110 | inline uint32_t pmemrchr8(const char* s, uint8_t c);
111 |
112 | // Find char in printable (chars < 128) string of 8 chars
113 | // * The string is known to contain the char
114 | inline uint32_t pmemrchr8k(const char* s, uint8_t c);
115 |
116 | // Find char in binary string of 8 chars
117 | inline uint32_t memrchr8(const char* s, uint8_t c);
118 |
119 | // Find char in binary string of 8 chars
120 | // * The string is known to contain the char
121 | inline uint32_t memrchr8k(const char* s, uint8_t c);
122 |
123 | //
124 | // Find byte in const string. Like memchr
125 | //
126 |
127 | // Find char in const binary string
128 | template
129 | inline uint32_t _memchr(const char* s, uint32_t len, uint8_t c);
130 |
131 | // Find char in binary string
132 | inline uint32_t memchr(const char* s, uint32_t len, uint8_t c);
133 |
134 | // Find char in binary string. Char c is known to be in s + len
135 | inline uint32_t memchrk(const char* s, uint32_t len, uint8_t c);
136 |
137 | // Find char in printable string
138 | inline uint32_t pmemchr(const char* s, uint32_t len, uint8_t c);
139 |
140 | // Find char in printable string. Char c is known to be in s + len
141 | inline uint32_t pmemchrk(const char* s, uint32_t len, uint8_t c);
142 |
143 | //
144 | // Find byte, from end, in const string. Like memrchr
145 | //
146 |
147 | // Find char, in reverse, in const binary string
148 | template
149 | inline uint32_t _memrchr(const char* s, uint32_t len, uint8_t c);
150 |
151 | // Find char in binary string
152 | inline uint32_t memrchr(const char* s, uint32_t len, uint8_t c);
153 |
154 | // Find char in binary string. Char c is known to be in s + len
155 | inline uint32_t memrchrk(const char* s, uint32_t len, uint8_t c);
156 |
157 | // Find char in printable string
158 | inline uint32_t pmemrchr(const char* s, uint32_t len, uint8_t c);
159 |
160 | // Find char in printable string. Char c is known to be in s + len
161 | inline uint32_t pmemrchrk(const char* s, uint32_t len, uint8_t c);
162 |
163 | //
164 | // Find byte in NON-CONST string
165 | //
166 |
167 | template
168 | inline uint32_t _memchr_nc(char* s, uint32_t len, uint8_t c);
169 |
170 | // Find char in binary NON-CONST string
171 | inline uint32_t memchr_nc(char* s, uint32_t len, uint8_t c);
172 |
173 | // Find char in printable NON-CONST string
174 | inline uint32_t pmemchr_nc(char* s, uint32_t len, uint8_t c);
175 |
176 | //// string to int
177 |
178 | // Parse uint from string of up to 4 chars
179 | inline uint16_t atou4(const char* s, uint32_t len);
180 |
181 | // Parse uint from string of up to 8 chars
182 | inline uint32_t atou8(const char* s, uint32_t len);
183 |
184 | // Parse uint64_t from string of up to 20 chars
185 | // *** More than 20 char returns junk.
186 | inline uint64_t atou(const char* s, uint32_t len);
187 |
188 | // Parse _signed_ int from string of up to 20 chars. No spaces
189 | inline int64_t atoi(const char* s, uint32_t len);
190 |
191 | // Parse hex int from string of up to 8 chars
192 | inline uint32_t htou8(const char* s, uint32_t len);
193 |
194 | // Parse hex int from string of up to 16 chars
195 | inline uint64_t htou(const char* s, uint32_t len);
196 |
197 | //// int to string
198 |
199 | // *** p suffix means zero-padded
200 |
201 | // Convert uint, of less than 100, to %02u, as int 16
202 | inline uint16_t utoa2p(uint64_t x);
203 |
204 | // Convert uint, of less than 100, to %02u
205 | inline void utoa2p(uint64_t x, char* s);
206 |
207 | // Convert uint to %0u, N <= 8
208 | template
209 | inline uint64_t _utoap(uint64_t x, char* s);
210 |
211 | // Convert uint to %0u, N <= 20
212 | template
213 | inline char* utoap(uint64_t x, char* s);
214 |
215 | // Convert signed int 32 to string of up to 8 bytes.
216 | inline uint32_t itoa8(int32_t x, char* buf);
217 |
218 | // Convert signed int 64 to string. String buffer is at least 22 bytes.
219 | // Returns length
220 | // *** this feels inefficient :( ***
221 | inline uint32_t itoa(int64_t x, char* buf);
222 |
223 | //// Double to string
224 |
225 | // Copy the sign from src to dst that is unsigned.
226 | // *** dst is up to 63 bit
227 | inline int64_t _copySign(int64_t src, uint64_t dst);
228 |
229 | // Parse double from string
230 | // *** More than 20 char integer part returns junk.
231 | // *** Too much decimal char will get lost to precision
232 | inline double atod(const char* s, uint32_t len);
233 |
234 | } // namespace swar
235 |
--------------------------------------------------------------------------------
/test/swar_test.cpp:
--------------------------------------------------------------------------------
1 | #include "../swar.h"
2 | #include
3 | #include
4 | #include
5 |
6 |
7 | TEST(r8, memchr) {
8 | EXPECT_EQ(swar::memchr8("12345678=90", '='), -1);
9 | EXPECT_EQ(swar::memchr8("1234567=890", '='), 7);
10 | EXPECT_EQ(swar::memchr8("123456=7890", '='), 6);
11 | EXPECT_EQ(swar::memchr8("12345=67890", '='), 5);
12 | EXPECT_EQ(swar::memchr8("1234=567890", '='), 4);
13 | EXPECT_EQ(swar::memchr8("123=4567890", '='), 3);
14 | EXPECT_EQ(swar::memchr8("12=34567890", '='), 2);
15 | EXPECT_EQ(swar::memchr8("1=234567890", '='), 1);
16 | EXPECT_EQ(swar::memchr8("=1234567890", '='), 0);
17 |
18 | EXPECT_EQ(swar::memchr8("1234=", '='), 4);
19 | EXPECT_EQ(swar::memchr8("123=4", '='), 3);
20 | EXPECT_EQ(swar::memchr8("12=34", '='), 2);
21 | EXPECT_EQ(swar::memchr8("1=234", '='), 1);
22 | EXPECT_EQ(swar::memchr8("=1234", '='), 0);
23 |
24 | EXPECT_EQ(swar::memchr8("===", '='), 0);
25 | EXPECT_EQ(swar::memchr8("==", '='), 0);
26 | EXPECT_EQ(swar::memchr8("=", '='), 0);
27 |
28 | // 123456789 123456789 123456789
29 | EXPECT_EQ(swar::memchr("1234567890abcdefghij=", 20, '='), -1);
30 | EXPECT_EQ(swar::memchr("12345678=90abcdefghi", 20, '='), 8);
31 | EXPECT_EQ(swar::memchr("1234=567890abcdefghi", 20, '='), 4);
32 |
33 | char nc[24] = "1234567890abcdefghij=12";
34 | EXPECT_EQ(swar::memchr_nc(nc, 20, '='), -1);
35 | EXPECT_EQ(swar::memchr_nc(nc, 21, '='), 20);
36 | EXPECT_EQ(swar::memchr_nc(nc, 23, '='), 20);
37 |
38 | }
39 |
40 | TEST(r8, cast8) {
41 | EXPECT_EQ(swar::cast8("1234567890", 0), 0);
42 | EXPECT_EQ(swar::cast8("1234567890", 1), 0x31ull);
43 | EXPECT_EQ(swar::cast8("1234567890", 2), 0x3231ull);
44 | EXPECT_EQ(swar::cast8("1234567890", 3), 0x333231ull);
45 | EXPECT_EQ(swar::cast8("1234567890", 4), 0x34333231ull);
46 | EXPECT_EQ(swar::cast8("1234567890", 5), 0x3534333231ull);
47 | EXPECT_EQ(swar::cast8("1234567890", 6), 0x363534333231ull);
48 | EXPECT_EQ(swar::cast8("1234567890", 7), 0x37363534333231ull);
49 | EXPECT_EQ(swar::cast8("1234567890", 8), 0x3837363534333231ull);
50 | }
51 |
52 | TEST(r8, atoi) {
53 | // short
54 | EXPECT_EQ(swar::atou4("1234567890", 0), 0);
55 | EXPECT_EQ(swar::atou4("1234567890", 1), 1);
56 | EXPECT_EQ(swar::atou4("1234567890", 2), 12);
57 | EXPECT_EQ(swar::atou4("1234567890", 3), 123);
58 | EXPECT_EQ(swar::atou4("1234567890", 4), 1234);
59 |
60 | EXPECT_EQ(swar::atou8("1234567890", 0), 0);
61 | EXPECT_EQ(swar::atou8("1234567890", 1), 1);
62 | EXPECT_EQ(swar::atou8("1234567890", 2), 12);
63 | EXPECT_EQ(swar::atou8("1234567890", 3), 123);
64 | EXPECT_EQ(swar::atou8("1234567890", 4), 1234);
65 | EXPECT_EQ(swar::atou8("1234567890", 5), 12345);
66 | EXPECT_EQ(swar::atou8("1234567890", 6), 123456);
67 | EXPECT_EQ(swar::atou8("1234567890", 7), 1234567);
68 | EXPECT_EQ(swar::atou8("1234567890", 8), 12345678);
69 |
70 | // long
71 | EXPECT_EQ(swar::atou("12345678901234567890", 0), 0ull);
72 | EXPECT_EQ(swar::atou("12345678901234567890", 1), 1ull);
73 | EXPECT_EQ(swar::atou("12345678901234567890", 2), 12ull);
74 | EXPECT_EQ(swar::atou("12345678901234567890", 3), 123ull);
75 | EXPECT_EQ(swar::atou("12345678901234567890", 4), 1234ull);
76 | EXPECT_EQ(swar::atou("12345678901234567890", 5), 12345ull);
77 | EXPECT_EQ(swar::atou("12345678901234567890", 6), 123456ull);
78 | EXPECT_EQ(swar::atou("12345678901234567890", 7), 1234567ull);
79 | EXPECT_EQ(swar::atou("12345678901234567890", 8), 12345678ull);
80 | EXPECT_EQ(swar::atou("12345678901234567890", 9), 123456789ull);
81 | EXPECT_EQ(swar::atou("12345678901234567890", 10), 1234567890ull);
82 | EXPECT_EQ(swar::atou("12345678901234567890", 11), 12345678901ull);
83 | EXPECT_EQ(swar::atou("12345678901234567890", 12), 123456789012ull);
84 | EXPECT_EQ(swar::atou("12345678901234567890", 13), 1234567890123ull);
85 | EXPECT_EQ(swar::atou("12345678901234567890", 14), 12345678901234ull);
86 | EXPECT_EQ(swar::atou("12345678901234567890", 15), 123456789012345ull);
87 | EXPECT_EQ(swar::atou("12345678901234567890", 16), 1234567890123456ull);
88 | EXPECT_EQ(swar::atou("12345678901234567890", 17), 12345678901234567ull);
89 | EXPECT_EQ(swar::atou("12345678901234567890", 18), 123456789012345678ull);
90 | EXPECT_EQ(swar::atou("12345678901234567890", 19), 1234567890123456789ull);
91 | EXPECT_EQ(swar::atou("12345678901234567890", 20), 12345678901234567890ull);
92 |
93 | // long signed w/o sign
94 | EXPECT_EQ(swar::atoi("12345678901234567890", 0), 0);
95 | EXPECT_EQ(swar::atoi("12345678901234567890", 1), 1ll);
96 | EXPECT_EQ(swar::atoi("12345678901234567890", 2), 12ll);
97 | EXPECT_EQ(swar::atoi("12345678901234567890", 3), 123ll);
98 | EXPECT_EQ(swar::atoi("12345678901234567890", 4), 1234ll);
99 | EXPECT_EQ(swar::atoi("12345678901234567890", 5), 12345ll);
100 | EXPECT_EQ(swar::atoi("12345678901234567890", 6), 123456ll);
101 | EXPECT_EQ(swar::atoi("12345678901234567890", 7), 1234567ll);
102 | EXPECT_EQ(swar::atoi("12345678901234567890", 8), 12345678ll);
103 | EXPECT_EQ(swar::atoi("12345678901234567890", 9), 123456789ll);
104 | EXPECT_EQ(swar::atoi("12345678901234567890", 10), 1234567890ll);
105 | EXPECT_EQ(swar::atoi("12345678901234567890", 11), 12345678901ll);
106 | EXPECT_EQ(swar::atoi("12345678901234567890", 12), 123456789012ll);
107 | EXPECT_EQ(swar::atoi("12345678901234567890", 13), 1234567890123ll);
108 | EXPECT_EQ(swar::atoi("12345678901234567890", 14), 12345678901234ll);
109 | EXPECT_EQ(swar::atoi("12345678901234567890", 15), 123456789012345ll);
110 | EXPECT_EQ(swar::atoi("12345678901234567890", 16), 1234567890123456ll);
111 | EXPECT_EQ(swar::atoi("12345678901234567890", 17), 12345678901234567ll);
112 | EXPECT_EQ(swar::atoi("12345678901234567890", 18), 123456789012345678ll);
113 | EXPECT_EQ(swar::atoi("12345678901234567890", 19), 1234567890123456789ll);
114 |
115 | // long signed +
116 | EXPECT_EQ(swar::atoi("+12345678901234567890", 2), 1ll);
117 | EXPECT_EQ(swar::atoi("+12345678901234567890", 3), 12ll);
118 | EXPECT_EQ(swar::atoi("+12345678901234567890", 4), 123ll);
119 | EXPECT_EQ(swar::atoi("+12345678901234567890", 5), 1234ll);
120 | EXPECT_EQ(swar::atoi("+12345678901234567890", 6), 12345ll);
121 | EXPECT_EQ(swar::atoi("+12345678901234567890", 7), 123456ll);
122 | EXPECT_EQ(swar::atoi("+12345678901234567890", 8), 1234567ll);
123 | EXPECT_EQ(swar::atoi("+12345678901234567890", 9), 12345678ll);
124 | EXPECT_EQ(swar::atoi("+12345678901234567890", 10), 123456789ll);
125 | EXPECT_EQ(swar::atoi("+12345678901234567890", 11), 1234567890ll);
126 | EXPECT_EQ(swar::atoi("+12345678901234567890", 12), 12345678901ll);
127 | EXPECT_EQ(swar::atoi("+12345678901234567890", 13), 123456789012ll);
128 | EXPECT_EQ(swar::atoi("+12345678901234567890", 14), 1234567890123ll);
129 | EXPECT_EQ(swar::atoi("+12345678901234567890", 15), 12345678901234ll);
130 | EXPECT_EQ(swar::atoi("+12345678901234567890", 16), 123456789012345ll);
131 | EXPECT_EQ(swar::atoi("+12345678901234567890", 17), 1234567890123456ll);
132 | EXPECT_EQ(swar::atoi("+12345678901234567890", 18), 12345678901234567ll);
133 | EXPECT_EQ(swar::atoi("+12345678901234567890", 19), 123456789012345678ll);
134 | EXPECT_EQ(swar::atoi("+12345678901234567890", 20), 1234567890123456789ll);
135 |
136 | // long signed -
137 | EXPECT_EQ(swar::atoi("-12345678901234567890", 2), -1ll);
138 | EXPECT_EQ(swar::atoi("-12345678901234567890", 3), -12ll);
139 | EXPECT_EQ(swar::atoi("-12345678901234567890", 4), -123ll);
140 | EXPECT_EQ(swar::atoi("-12345678901234567890", 5), -1234ll);
141 | EXPECT_EQ(swar::atoi("-12345678901234567890", 6), -12345ll);
142 | EXPECT_EQ(swar::atoi("-12345678901234567890", 7), -123456ll);
143 | EXPECT_EQ(swar::atoi("-12345678901234567890", 8), -1234567ll);
144 | EXPECT_EQ(swar::atoi("-12345678901234567890", 9), -12345678ll);
145 | EXPECT_EQ(swar::atoi("-12345678901234567890", 10), -123456789ll);
146 | EXPECT_EQ(swar::atoi("-12345678901234567890", 11), -1234567890ll);
147 | EXPECT_EQ(swar::atoi("-12345678901234567890", 12), -12345678901ll);
148 | EXPECT_EQ(swar::atoi("-12345678901234567890", 13), -123456789012ll);
149 | EXPECT_EQ(swar::atoi("-12345678901234567890", 14), -1234567890123ll);
150 | EXPECT_EQ(swar::atoi("-12345678901234567890", 15), -12345678901234ll);
151 | EXPECT_EQ(swar::atoi("-12345678901234567890", 16), -123456789012345ll);
152 | EXPECT_EQ(swar::atoi("-12345678901234567890", 17), -1234567890123456ll);
153 | EXPECT_EQ(swar::atoi("-12345678901234567890", 18), -12345678901234567ll);
154 | EXPECT_EQ(swar::atoi("-12345678901234567890", 19), -123456789012345678ll);
155 | EXPECT_EQ(swar::atoi("-12345678901234567890", 20), -1234567890123456789ll);
156 | }
157 |
158 | TEST(r8, htou) {
159 | EXPECT_EQ(swar::htou8("123456789abcdef0", 0), 0);
160 | EXPECT_EQ(swar::htou8("123456789abcdef0", 1), 0x1);
161 | EXPECT_EQ(swar::htou8("123456789abcdef0", 2), 0x12);
162 | EXPECT_EQ(swar::htou8("123456789abcdef0", 3), 0x123);
163 | EXPECT_EQ(swar::htou8("123456789abcdef0", 4), 0x1234);
164 | EXPECT_EQ(swar::htou8("123456789abcdef0", 5), 0x12345);
165 | EXPECT_EQ(swar::htou8("123456789abcdef0", 6), 0x123456);
166 | EXPECT_EQ(swar::htou8("123456789aBCDEf0", 7), 0x1234567);
167 | EXPECT_EQ(swar::htou8("123456789aBCDEf0", 8), 0x12345678);
168 |
169 | EXPECT_EQ(swar::htou("123456789abcdef0", 0), 0ull);
170 | EXPECT_EQ(swar::htou("123456789abcdef0", 1), 0x1ull);
171 | EXPECT_EQ(swar::htou("123456789abcdef0", 2), 0x12ull);
172 | EXPECT_EQ(swar::htou("123456789abcdef0", 3), 0x123ull);
173 | EXPECT_EQ(swar::htou("123456789abcdef0", 4), 0x1234ull);
174 | EXPECT_EQ(swar::htou("123456789abcdef0", 5), 0x12345ull);
175 | EXPECT_EQ(swar::htou("123456789abcdef0", 6), 0x123456ull);
176 | EXPECT_EQ(swar::htou("123456789aBCDEf0", 7), 0x1234567ull);
177 | EXPECT_EQ(swar::htou("123456789aBCDEf0", 8), 0x12345678ull);
178 | EXPECT_EQ(swar::htou("123456789abCDEF0", 9), 0x123456789ull);
179 | EXPECT_EQ(swar::htou("123456789abCDEF0", 10), 0x123456789aull);
180 | EXPECT_EQ(swar::htou("123456789abCDEF0", 11), 0x123456789abull);
181 | EXPECT_EQ(swar::htou("123456789abCDEF0", 12), 0x123456789abcull);
182 | EXPECT_EQ(swar::htou("123456789abCDEF0", 13), 0x123456789abcdull);
183 | EXPECT_EQ(swar::htou("123456789ABcdef0", 14), 0x123456789abcdeull);
184 | EXPECT_EQ(swar::htou("123456789ABcdef0", 15), 0x123456789abcdefull);
185 | EXPECT_EQ(swar::htou("123456789ABcdef0", 16), 0x123456789abcdef0ull);
186 |
187 | EXPECT_EQ(swar::htou8("abcdef..", 1), 0xa);
188 | EXPECT_EQ(swar::htou8("abcdef..", 2), 0xab);
189 | EXPECT_EQ(swar::htou8("abcdef..", 3), 0xabc);
190 | EXPECT_EQ(swar::htou8("abcdef..", 4), 0xabcd);
191 | EXPECT_EQ(swar::htou8("abcdef..", 5), 0xabcde);
192 | EXPECT_EQ(swar::htou8("abcdef..", 6), 0xabcdef);
193 |
194 | EXPECT_EQ(swar::htou8("ABCDEF..", 1), 0xa);
195 | EXPECT_EQ(swar::htou8("ABCDEF..", 2), 0xab);
196 | EXPECT_EQ(swar::htou8("ABCDEF..", 3), 0xabc);
197 | EXPECT_EQ(swar::htou8("ABCDEF..", 4), 0xabcd);
198 | EXPECT_EQ(swar::htou8("ABCDEF..", 5), 0xabcde);
199 | EXPECT_EQ(swar::htou8("ABCDEF..", 6), 0xabcdef);
200 |
201 | EXPECT_EQ(swar::htou8("abef0189", 5), 0xabef0);
202 | EXPECT_EQ(swar::htou8("abef0189", 6), 0xabef01);
203 | EXPECT_EQ(swar::htou8("abef0189", 7), 0xabef018);
204 | EXPECT_EQ(swar::htou8("abef0189", 8), 0xabef0189);
205 |
206 | EXPECT_EQ(swar::htou8("1234abef", 5), 0x1234a);
207 | EXPECT_EQ(swar::htou8("1234abef", 6), 0x1234ab);
208 | EXPECT_EQ(swar::htou8("1234abef", 7), 0x1234abe);
209 | EXPECT_EQ(swar::htou8("1234abef", 8), 0x1234abef);
210 | }
211 |
212 | TEST(r8, itoa) {
213 | union {
214 | char test_buf[100];
215 | uint16_t test16;
216 | };
217 | char itoa_ret[100];
218 |
219 | for (int i = -100000; i < 100000; i++) {
220 | sprintf(test_buf, "%d", i);
221 | swar::itoa(i, itoa_ret);
222 | EXPECT_STREQ(itoa_ret, test_buf);
223 | }
224 |
225 | for (int64_t i = std::numeric_limits::min();
226 | i < std::numeric_limits::min(); i += 13371) {
227 | sprintf(test_buf, "%ld", i);
228 | swar::itoa(i, itoa_ret);
229 | EXPECT_STREQ(itoa_ret, test_buf);
230 | }
231 |
232 | EXPECT_STREQ(swar::utoap< 1>(0, itoa_ret), "0");
233 | EXPECT_STREQ(swar::utoap< 2>(0, itoa_ret), "00");
234 | EXPECT_STREQ(swar::utoap< 3>(0, itoa_ret), "000");
235 | EXPECT_STREQ(swar::utoap< 4>(0, itoa_ret), "0000");
236 | EXPECT_STREQ(swar::utoap< 5>(0, itoa_ret), "00000");
237 | EXPECT_STREQ(swar::utoap< 6>(0, itoa_ret), "000000");
238 | EXPECT_STREQ(swar::utoap< 7>(0, itoa_ret), "0000000");
239 | EXPECT_STREQ(swar::utoap< 8>(0, itoa_ret), "00000000");
240 | EXPECT_STREQ(swar::utoap< 9>(0, itoa_ret), "000000000");
241 | EXPECT_STREQ(swar::utoap<10>(0, itoa_ret), "0000000000");
242 | EXPECT_STREQ(swar::utoap<11>(0, itoa_ret), "00000000000");
243 | EXPECT_STREQ(swar::utoap<12>(0, itoa_ret), "000000000000");
244 | EXPECT_STREQ(swar::utoap<13>(0, itoa_ret), "0000000000000");
245 | EXPECT_STREQ(swar::utoap<14>(0, itoa_ret), "00000000000000");
246 | EXPECT_STREQ(swar::utoap<15>(0, itoa_ret), "000000000000000");
247 | EXPECT_STREQ(swar::utoap<16>(0, itoa_ret), "0000000000000000");
248 | EXPECT_STREQ(swar::utoap<17>(0, itoa_ret), "00000000000000000");
249 | EXPECT_STREQ(swar::utoap<18>(0, itoa_ret), "000000000000000000");
250 | EXPECT_STREQ(swar::utoap<19>(0, itoa_ret), "0000000000000000000");
251 |
252 | EXPECT_STREQ(swar::utoap< 0>(7, itoa_ret), "");
253 | EXPECT_STREQ(swar::utoap< 1>(7, itoa_ret), "7");
254 | EXPECT_STREQ(swar::utoap< 2>(7, itoa_ret), "07");
255 | EXPECT_STREQ(swar::utoap< 3>(7, itoa_ret), "007");
256 | EXPECT_STREQ(swar::utoap< 4>(7, itoa_ret), "0007");
257 | EXPECT_STREQ(swar::utoap< 5>(7, itoa_ret), "00007");
258 | EXPECT_STREQ(swar::utoap< 6>(12345, itoa_ret), "012345");
259 | EXPECT_STREQ(swar::utoap< 7>(12345, itoa_ret), "0012345");
260 | EXPECT_STREQ(swar::utoap< 8>(12345, itoa_ret), "00012345");
261 | EXPECT_STREQ(swar::utoap< 9>(12345, itoa_ret), "000012345");
262 | EXPECT_STREQ(swar::utoap<10>(12345, itoa_ret), "0000012345");
263 | EXPECT_STREQ(swar::utoap<11>(12345, itoa_ret), "00000012345");
264 | EXPECT_STREQ(swar::utoap<12>(12345678901, itoa_ret), "012345678901");
265 | EXPECT_STREQ(swar::utoap<13>(12345678901, itoa_ret), "0012345678901");
266 | EXPECT_STREQ(swar::utoap<14>(12345678901, itoa_ret), "00012345678901");
267 | EXPECT_STREQ(swar::utoap<15>(12345678901, itoa_ret), "000012345678901");
268 | EXPECT_STREQ(swar::utoap<16>(12345678901, itoa_ret), "0000012345678901");
269 | EXPECT_STREQ(swar::utoap<17>(123456789012345, itoa_ret), "00123456789012345");
270 | EXPECT_STREQ(swar::utoap<18>(123456789012345, itoa_ret), "000123456789012345");
271 | EXPECT_STREQ(swar::utoap<19>(123456789012345, itoa_ret), "0000123456789012345");
272 | EXPECT_STREQ(swar::utoap<20>(123456789012345, itoa_ret), "00000123456789012345");
273 |
274 | swar::itoa8(0, itoa_ret); EXPECT_STREQ(itoa_ret, "0");
275 | swar::itoa8(1, itoa_ret); EXPECT_STREQ(itoa_ret, "1");
276 | swar::itoa8(12, itoa_ret); EXPECT_STREQ(itoa_ret, "12");
277 | swar::itoa8(123, itoa_ret); EXPECT_STREQ(itoa_ret, "123");
278 | swar::itoa8(1234, itoa_ret); EXPECT_STREQ(itoa_ret, "1234");
279 | swar::itoa8(12345, itoa_ret); EXPECT_STREQ(itoa_ret, "12345");
280 | swar::itoa8(123456, itoa_ret); EXPECT_STREQ(itoa_ret, "123456");
281 | swar::itoa8(1234567, itoa_ret); EXPECT_STREQ(itoa_ret, "1234567");
282 | swar::itoa8(12345678, itoa_ret); EXPECT_STREQ(itoa_ret, "12345678");
283 | }
284 |
285 |
286 |
--------------------------------------------------------------------------------
/swar_inl.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "compiler.h"
4 |
5 | #include
6 | #include // for memcpy, memset
7 | #include
8 |
9 | // Function naming convention [prefix] [length]
10 | // - function
11 |
12 | namespace swar {
13 |
14 | // cast and fill utils
15 | template
16 | inline T cast(const char* src) {
17 | T ret;
18 | ::memcpy(&ret, src, sizeof(T));
19 | return ret;
20 | }
21 |
22 | template
23 | inline T extend(char c) {
24 | T ret;
25 | ::memset(&ret, c, sizeof(T));
26 | return ret;
27 | }
28 |
29 | // swap bytes
30 | inline uint64_t bswap(uint64_t x) { return __builtin_bswap64(x); }
31 | inline uint32_t bswap(uint32_t x) { return __builtin_bswap32(x); }
32 | inline uint16_t bswap(uint16_t x) { return __builtin_bswap16(x); }
33 |
34 | //// Find bytes
35 |
36 | inline bool haszero(uint64_t x) {
37 | uint64_t a = 0x7f7f7f7f7f7f7f7full;
38 | uint64_t l = 0x0101010101010101ull;
39 | return (x - l) & ~x & ~a;
40 | }
41 |
42 | // Check if word has some byte
43 | inline bool hasbyte(uint64_t x, uint8_t c) {
44 | return haszero(x ^ extend(c));
45 | }
46 |
47 | // Find char in string. Support all options.
48 | template
49 | inline uint32_t _memchr8(const char* s, uint8_t c) {
50 | // int 64 of all c's
51 | uint64_t m = extend(c);
52 |
53 | // int 64 of s
54 | uint64_t x = cast(s);
55 |
56 | // remove c's from string
57 | // so now we have to find first zero byte
58 | x ^= m;
59 |
60 | uint64_t a = 0x7f7f7f7f7f7f7f7full;
61 |
62 | // set the high bit in non-zero bytes
63 | if (Printable) {
64 | x += a;
65 | }
66 | else {
67 | x = ((x & a) + a) | x;
68 | }
69 |
70 | // flip to set the high bit in zero bytes, and clear other high bits
71 | x = ~x;
72 |
73 | // clear all bits except the high bit of the zero byte
74 | x &= ~a;
75 |
76 | // find the high bit, from right (little endian)
77 | if (Exists) {
78 | if (!Reverse) {
79 | // ctz returns 7, 15, 23, etc
80 | return __builtin_ctzll(x) / 8;
81 | }
82 | else {
83 | // clz returns 0, 8, 16, etc
84 | return 7 - __builtin_clzll(x) / 8;
85 | }
86 | }
87 | else {
88 | if (!Reverse) {
89 | // ffs returns + 1, so that's going to be 8, 16, 24, etc
90 | return (__builtin_ffsll(x) - 8) / 8;
91 | }
92 | else {
93 | // clz returns 0, 8, 16, 24, 32, 40, 48, 56
94 | // x == 0 returns 63 that we increment to 64 to return -1
95 | return 7 - (__builtin_clzll(x | 1) + 1) / 8;
96 | }
97 | }
98 | }
99 |
100 | // Find char in string and trim it
101 | template
102 | inline uint32_t _trim8(const char* s, uint8_t c) {
103 | // int 64 of all c's
104 | uint64_t m = extend(c);
105 |
106 | // int 64 of s
107 | uint64_t x = cast(s);
108 | uint64_t xo = x;
109 |
110 | // remove c's from string
111 | // so now we have to find first zero byte
112 | x ^= m;
113 |
114 | uint64_t a = 0x7f7f7f7f7f7f7f7full;
115 |
116 | // set the high bit in non-zero bytes
117 | if (Printable) {
118 | x += a;
119 | }
120 | else {
121 | x = ((x & a) + a) | x;
122 | }
123 |
124 | // flip to set the high bit in zero bytes, and clear other high bits
125 | x = ~x;
126 |
127 | // clear all bits except the high bit of the zero byte
128 | x &= ~a;
129 |
130 | // set all bits under the lowest high bit
131 | x &= x - 1u;
132 |
133 | if (!Exists) {
134 | x >>= 7;
135 | }
136 | else if (Printable) {
137 | // complete the killed high bit
138 | x <<= 1u;
139 | x |= 1u;
140 | }
141 |
142 | return xo & x;
143 | }
144 |
145 | // Find char in printable (chars < 128) string of 8 chars
146 | inline uint32_t pmemchr8(const char* s, uint8_t c) {
147 | return _memchr8(s, c);
148 | }
149 |
150 | // Find char in printable (chars < 128) string of 8 chars
151 | // * The string is known to contain the char
152 | inline uint32_t pmemchr8k(const char* s, uint8_t c) {
153 | return _memchr8(s, c);
154 | }
155 |
156 | // Find char in binary string of 8 chars
157 | inline uint32_t memchr8(const char* s, uint8_t c) {
158 | return _memchr8(s, c);
159 | }
160 |
161 | // Find char in binary string of 8 chars
162 | // * The string is known to contain the char
163 | inline uint32_t memchr8k(const char* s, uint8_t c) {
164 | return _memchr8(s, c);
165 | }
166 |
167 | // Find char in printable (chars < 128) string of 8 chars
168 | inline uint32_t pmemrchr8(const char* s, uint8_t c) {
169 | return _memchr8(s, c);
170 | }
171 |
172 | // Find char in printable (chars < 128) string of 8 chars
173 | // * The string is known to contain the char
174 | inline uint32_t pmemrchr8k(const char* s, uint8_t c) {
175 | return _memchr8(s, c);
176 | }
177 |
178 | // Find char in binary string of 8 chars
179 | inline uint32_t memrchr8(const char* s, uint8_t c) {
180 | return _memchr8(s, c);
181 | }
182 |
183 | // Find char in binary string of 8 chars
184 | // * The string is known to contain the char
185 | inline uint32_t memrchr8k(const char* s, uint8_t c) {
186 | return _memchr8(s, c);
187 | }
188 |
189 | // Find char in const binary string
190 | template
191 | inline uint32_t _memchr(const char* s, uint32_t len, uint8_t c) {
192 | const char* p = s;
193 | const char* end = s + len;
194 |
195 | // If shorter than 8 bytes, we have to mask away c bytes past len
196 | uint32_t partLen = (len & 7) ? (len & 7) : 8;
197 | uint64_t partMask = len < 8 ? ~0ull >> (64 - partLen * 8) : 0ull;
198 | uint64_t first = cast(p) & ~(partMask & extend(c));
199 |
200 | // Check first 8 bytes
201 | if (hasbyte(first, c))
202 | return _memchr8((char*)&first, c);
203 |
204 | // Advance to leave multiple of 8 bytes
205 | p += partLen;
206 |
207 | // Check words for that byte
208 | for (;;) {
209 | if (hasbyte(cast(p), c)) {
210 | return (p - s) + _memchr8(p, c);
211 | }
212 | p += 8;
213 | if (!Known) {
214 | if (p == end)
215 | return -1;
216 | }
217 | }
218 | }
219 |
220 | // Find char, in reverse, in const binary string
221 | template
222 | inline uint32_t _memrchr(const char* s, uint32_t len, uint8_t c) {
223 | const char* p = s + len;
224 |
225 | // If shorter than 8 bytes, we have to mask away c bytes past len
226 | uint32_t partLen = (len & 7) ? (len & 7) : 8;
227 | uint64_t partMask = len < 8 ? ~0ull >> (64 - partLen * 8) : 0ull;
228 | uint64_t first = cast(p) & ~(partMask & extend(c));
229 |
230 | // Advance to leave multiple of 8 bytes
231 | p -= partLen;
232 |
233 | // Check first 8 bytes
234 | if (hasbyte(first, c)) {
235 | return (p - s) + _memchr8((char*)&first, c);
236 | }
237 |
238 | // Check words for that byte
239 | for (;;) {
240 | if (hasbyte(cast(p), c)) {
241 | return (p - s) + _memchr8(p, c);
242 | }
243 | p -= 8;
244 | if (!Known) {
245 | if (p == s)
246 | return -1;
247 | }
248 | }
249 | }
250 |
251 | // Find char in binary string
252 | inline uint32_t memchr(const char* s, uint32_t len, uint8_t c) {
253 | return _memchr(s, len, c);
254 | }
255 |
256 | // Find char in binary string. Char c is known to be in s + len
257 | inline uint32_t memchrk(const char* s, uint32_t len, uint8_t c) {
258 | return _memchr(s, len, c);
259 | }
260 |
261 | // Find char in printable string
262 | inline uint32_t pmemchr(const char* s, uint32_t len, uint8_t c) {
263 | return _memchr(s, len, c);
264 | }
265 |
266 | // Find char in printable string. Char c is known to be in s + len
267 | inline uint32_t pmemchrk(const char* s, uint32_t len, uint8_t c) {
268 | return _memchr(s, len, c);
269 | }
270 |
271 | // Find char in binary string
272 | inline uint32_t memrchr(const char* s, uint32_t len, uint8_t c) {
273 | return _memrchr(s, len, c);
274 | }
275 |
276 | // Find char in binary string. Char c is known to be in s + len
277 | inline uint32_t memrchrk(const char* s, uint32_t len, uint8_t c) {
278 | return _memrchr(s, len, c);
279 | }
280 |
281 | // Find char in printable string
282 | inline uint32_t pmemrchr(const char* s, uint32_t len, uint8_t c) {
283 | return _memrchr(s, len, c);
284 | }
285 |
286 | // Find char in printable string. Char c is known to be in s + len
287 | inline uint32_t pmemrchrk(const char* s, uint32_t len, uint8_t c) {
288 | return _memrchr(s, len, c);
289 | }
290 |
291 | // Find char in NON-CONST string
292 | template
293 | inline uint32_t _memchr_nc(char* s, uint32_t len, uint8_t c) {
294 | const char* p = s;
295 |
296 | // Replace back with sentinel so we don't have to check for length
297 | uint8_t back = s[len - 1];
298 | s[len - 1] = c;
299 |
300 | // Check words for that byte.
301 | while (!hasbyte(cast(p), c)) {
302 | p += 8;
303 | }
304 |
305 | // Find the position
306 | p += _memchr8(p, c);
307 |
308 | // restore sentinel
309 | s[len - 1] = back;
310 |
311 | uint32_t ret = p - s;
312 | return (ret != len - 1 || back == c) ? ret : -1;
313 | }
314 |
315 | // Find char in binary NON-CONST string
316 | inline uint32_t memchr_nc(char* s, uint32_t len, uint8_t c) {
317 | return _memchr_nc(s, len, c);
318 | }
319 |
320 | // Find char in printable NON-CONST string
321 | inline uint32_t pmemchr_nc(char* s, uint32_t len, uint8_t c) {
322 | return _memchr_nc(s, len, c);
323 | }
324 |
325 | // Find zero byte in binary string up to 8 chars
326 | inline uint32_t strlen8(const char* s) {
327 | return memchr8(s, 0);
328 | }
329 |
330 | // Find zero byte in printable string up to 8 chars
331 | inline uint32_t pstrlen8(const char* s) {
332 | return pmemchr8(s, 0);
333 | }
334 |
335 | // Find zero byte in binary string
336 | inline uint32_t strlen(const char* s) {
337 | // check words for zero
338 | const char* p = s;
339 | while (!haszero(cast(p))) {
340 | p += 8;
341 | }
342 |
343 | return p - s + memchr8k(p, 8);
344 | }
345 |
346 | // Find zero byte in printable string
347 | inline uint32_t pstrlen(const char* s) {
348 | // check words for zero
349 | const char* p = s;
350 | while (!haszero(cast(p))) {
351 | p += 8;
352 | }
353 |
354 | return p - s + pmemchr8k(p, 8);
355 | }
356 |
357 | // Get the uint _cast_ of a string of up to 8 chars
358 | inline uint64_t cast8(const char* s, uint32_t len) {
359 | assert(len <= 8);
360 |
361 | // int 64 of s
362 | uint64_t x = cast(s);
363 |
364 | uint64_t mask = (1ull << (len * 8)) - 1;
365 | mask |= -(len == 8); // fill 1's if len == 8
366 |
367 | return x & mask;
368 | }
369 |
370 | //// string to int
371 |
372 | // Parse uint from string of up to 4 chars
373 | inline uint16_t atou4(const char* s, uint32_t len) {
374 | assert(len <= 4);
375 |
376 | // int 64 of s. "1234" --> 0x34333231
377 | uint32_t x = cast(s);
378 |
379 | // apply len. len of 2 --> 0x32310000
380 | x <<= 32 - len * 8;
381 | x &= -(uint32_t)(len > 0);
382 |
383 | // add ones and tens, in int8's, from 0x0[2]0[1] to 0x00[12]
384 | x = (x & 0x0f0f0f0fu) * ((1u << 8) * 10 + 1) >> 8;
385 |
386 | // add int16's, from 0x00[34]00[12] to 0x0000[1234]
387 | x = (x & 0x00ff00ffu) * ((1u << 16) * 100 + 1) >> 16;
388 |
389 | return x;
390 | }
391 |
392 | // Parse uint from string of up to 8 chars
393 | inline uint32_t atou8(const char* s, uint32_t len) {
394 | assert(len <= 8);
395 |
396 | // int 64 of s. "12345678" --> 0x3837363534333231
397 | uint64_t x = cast(s);
398 |
399 | // apply len. len of 2 --> 0x3231000000000000
400 | x <<= 64 - len * 8;
401 | x &= -(uint64_t)(len > 0);
402 |
403 | // add ones and tens, in int8's, from 0x0[2]0[1] to 0x00[12]
404 | x = (x & 0x0f0f0f0f0f0f0f0full) * ((1ull << 8) * 10 + 1) >> 8;
405 |
406 | // add int16's, from 0x00[34]00[12] to 0x0000[1234]
407 | x = (x & 0x00ff00ff00ff00ffull) * ((1ull << 16) * 100 + 1) >> 16;
408 |
409 | // add int32's, from 0x0000[1234]0000[5678] to 0x[12345678]
410 | x = (x & 0x0000ffff0000ffffull) * ((1ull << 32) * 10000 + 1) >> 32;
411 |
412 | return x;
413 | }
414 |
415 | // Parse uint64_t from string of up to 20 chars
416 | // *** More than 20 char returns junk.
417 | inline uint64_t atou(const char* s, uint32_t len) {
418 | assert(len <= 20);
419 | uint64_t x = 0;
420 | if (len > 8) {
421 | uint32_t lh = len % 8;
422 | x = atou8(s, lh);
423 | x *= 100000000;
424 | len -= lh;
425 | s += lh;
426 | if (len > 8) {
427 | x += atou8(s, 8);
428 | x *= 100000000;
429 | len -= 8;
430 | s += 8;
431 | }
432 | }
433 | return x + atou8(s, len);
434 | }
435 |
436 | // Parse _signed_ int from string of up to 20 chars. No spaces
437 | inline int64_t atoi(const char* s, uint32_t len) {
438 | bool neg = !!len & (*s == '-');
439 | bool ls = !!len & (*s == '-' || *s == '+');
440 | s += ls;
441 |
442 | int64_t x = atou(s, len - ls);
443 |
444 | return neg ? -x : x;
445 | }
446 |
447 | // Parse hex int from string of up to 8 chars
448 | inline uint32_t htou8(const char* s, uint32_t len) {
449 | assert(len <= 8);
450 |
451 | // int 64 of s. "12345678" --> 0x3837363534333231
452 | uint64_t x = cast(s);
453 |
454 | // apply len. len of 2 --> 0x3231000000000000
455 | x <<= 64 - len * 8;
456 |
457 | // handle length of 0. remove all bits if zero
458 | x &= -(uint64_t)(len > 0);
459 |
460 | // change a-f to to number. 0x41 --> 0x0a
461 | x += ((x & 0x4040404040404040ull) >> 6) * 9;
462 |
463 | // add ones and 0x10's, in int8's, from 0x0[f]0[1] to 0x00[1f]
464 | x = (x & 0x0f0f0f0f0f0f0f0full) * ((1ull << 12) + 1) >> 8;
465 |
466 | // add int16's, from 0x00[ed]00[1f] to 0x0000[1fed]
467 | x = (x & 0x00ff00ff00ff00ffull) * ((1ull << 24) + 1) >> 16;
468 |
469 | // add int32's, from 0x0000[1fed]0000[cba9] to 0x[1fedcba9]
470 | x = (x & 0x0000ffff0000ffffull) * ((1ull << 48) + 1) >> 32;
471 |
472 | return x;
473 | }
474 |
475 | // Parse hex int from string of up to 16 chars
476 | inline uint64_t htou(const char* s, uint32_t len) {
477 | assert(len <= 16);
478 | uint64_t x = 0;
479 | if (len > 8) {
480 | uint32_t lh = len - 8;
481 | x = htou8(s, lh);
482 | x <<= 32;
483 | len -= lh;
484 | s += lh;
485 | }
486 | return x + htou8(s, len);
487 | }
488 |
489 | //// int to string
490 |
491 | // *** p suffix means zero-padded
492 |
493 | // Convert uint, of less than 100, to %02u, as int 16
494 | inline uint16_t utoa2p(uint64_t x) {
495 | static const CODE_SECTION uint8_t pairs[50] = { // 0..49, little endian
496 | 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90,
497 | 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, 0x81, 0x91,
498 | 0x02, 0x12, 0x22, 0x32, 0x42, 0x52, 0x62, 0x72, 0x82, 0x92,
499 | 0x03, 0x13, 0x23, 0x33, 0x43, 0x53, 0x63, 0x73, 0x83, 0x93,
500 | 0x04, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74, 0x84, 0x94,
501 | };
502 |
503 | uint32_t b50 = -(uint32_t)(x >= 50); // x >= 50 ? ~0 : 0;
504 | uint32_t x2 = x - (50u & b50); // x2 = x % 50;
505 | uint16_t t = pairs[x2] + (b50 & 5); // t = pairs[x % 50] + 5 in low nibble if x > 50
506 |
507 | // move upper nibble to next byte and add '00'
508 | return ((t | (t << 4)) & 0x0f0f) | 0x3030;
509 | }
510 |
511 | // Convert uint, of less than 100, to %02u
512 | inline void utoa2p(uint64_t x, char* s) {
513 | uint16_t t = utoa2p(x);
514 | memcpy(s, &t, sizeof(uint16_t));
515 | }
516 |
517 | // Convert uint to %0u, N <= 8
518 | template
519 | inline uint64_t _utoap(uint64_t x, char* s) {
520 | static_assert(N <= 8);
521 |
522 | uint64_t tmp = utoa2p(x % 100);
523 |
524 | for (int i = 0; i < N - 2; i += 2) {
525 | x /= 100;
526 | tmp <<= 16;
527 | tmp |= utoa2p(x % 100);
528 | }
529 |
530 | tmp >>= (N & 1) * 8;
531 | memcpy(s, &tmp, 8);
532 |
533 | return x;
534 | }
535 |
536 | // Convert uint to %0u, N <= 20
537 | template
538 | inline char* utoap(uint64_t x, char* s) {
539 | if constexpr (N <= 8) {
540 | _utoap(x, s);
541 | }
542 | else if constexpr (N <= 16) {
543 | x = _utoap(x, s + 8);
544 | x /= (N & 1) ? 10 : 100;
545 | _utoap<8>(x, s);
546 | }
547 | else {
548 | x = _utoap(x, s + 16);
549 | x /= (N & 1) ? 10 : 100;
550 | x = _utoap<8>(x, s + 8);
551 | _utoap<8>(x / 100, s);
552 | }
553 |
554 | s[N] = '\0';
555 | return s;
556 | }
557 |
558 | // Convert signed int 32 to string of up to 8 bytes.
559 | inline uint32_t itoa8(int32_t x, char* buf) {
560 | // Handle negatives
561 | bool neg = x < 0;
562 | *buf = '-'; // Always write
563 | buf += neg; // But advance only if negative
564 | x = __builtin_abs(x);
565 |
566 | uint64_t tmp = 0;
567 | int n = 0;
568 |
569 | // Convert pairs of digits
570 | while (x >= 100) {
571 | n += 2;
572 | tmp <<= 16;
573 | tmp |= utoa2p(x % 100);
574 | x /= 100;
575 | }
576 |
577 | // Last pair - no need to divide any more
578 | n += 2;
579 | tmp <<= 16;
580 | tmp |= utoa2p(x);
581 |
582 | // If last pair is 0 we want to remove this "0"
583 | n -= x < 10;
584 | tmp >>= x < 10 ? 8 : 0;
585 |
586 | // Copy to provided buffer
587 | memcpy(buf, &tmp, 8);
588 | buf[n] = '\0';
589 |
590 | return n + neg;
591 | }
592 |
593 | // Convert signed int 64 to string. String buffer is at least 22 bytes.
594 | // Returns length
595 | // *** this feels inefficient :( ***
596 | inline uint32_t itoa(int64_t x, char* buf) {
597 | // Handle negatives
598 | bool neg = x < 0;
599 | *buf = '-'; // Always write
600 | buf += neg; // But advance only if negative
601 | x = __builtin_abs(x);
602 |
603 | char tmp[20];
604 | char* p = tmp + 20;
605 |
606 | while (x >= 100) {
607 | p -= 2;
608 | utoa2p(x % 100, p);
609 | x /= 100;
610 | }
611 |
612 | p -= 2;
613 | utoa2p(x, p);
614 |
615 | p += x < 10;
616 |
617 | uint32_t len = tmp + 20 - p;
618 |
619 | memcpy(buf, p, 20);
620 | buf[len] = '\0';
621 |
622 | return len + neg;
623 | }
624 |
625 | //// Double to string
626 |
627 | // Copy the sign from src to dst that is unsigned.
628 | // *** dst is up to 63 bit
629 | inline int64_t _copySign(int64_t src, uint64_t dst) {
630 | // This is better than `src > 0 ? dst : -dst` that is using cmov
631 | uint64_t m = ~(src >> 63); // all 1s if >= 0 (opposite of abs)
632 | return (dst + m) ^ m; // flip if src negative
633 | }
634 |
635 | // Parse double from string
636 | // *** More than 20 char integer part returns junk.
637 | // *** Too much decimal char will get lost to precision
638 | inline double atod(const char* s, uint32_t len) {
639 | // Get int part
640 | int ilen = pmemchr(s, len, '.');
641 |
642 | // If no decimal dot, return the int
643 | if (ilen == -1) {
644 | return atoi(s, len);
645 | }
646 |
647 | // Do the int part
648 | int64_t ipart = atoi(s, ilen);
649 | s += ilen + 1;
650 | len -= ilen + 1;
651 |
652 | // Int of decimal part
653 | int64_t dpart = atou(s, len);
654 |
655 | // To add the two parts we need matching signs
656 | dpart = _copySign(ipart, dpart);
657 |
658 | // Array of 20 * 8 = 160 bytes
659 | static const CODE_SECTION double scales[20] = {
660 | 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10,
661 | 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, 1e-16, 1e-17, 1e-18, 1e-19, 1e-20 };
662 |
663 | return ipart + dpart * scales[len];
664 | }
665 |
666 | } // namespace swar
667 |
--------------------------------------------------------------------------------