├── .gitignore ├── LICENSE ├── README.md ├── data └── bpe-wiki-def.txt ├── nob.c ├── src ├── bpe.c ├── bpe.h ├── bpe2bpe.c ├── bpe2dot.c ├── bpe_gen.c ├── bpe_inspect.c ├── tkn_inspect.c └── txt2bpe.c └── thirdparty ├── flag.h ├── nob.h └── stb_ds.h /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | nob 3 | *.dot 4 | *.png 5 | *.bin 6 | *.bpe -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2025 Alexey Kutepov 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BPE 2 | 3 | ## Quick Start 4 | 5 | ```console 6 | $ cc -o nob nob.c 7 | $ ./nob 8 | ``` 9 | 10 | ## References 11 | 12 | - https://en.wikipedia.org/wiki/Byte_pair_encoding 13 | -------------------------------------------------------------------------------- /data/bpe-wiki-def.txt: -------------------------------------------------------------------------------- 1 | The original BPE algorithm operates by iteratively replacing the most common contiguous sequences of characters in a target text with unused 'placeholder' bytes. The iteration ends when no sequences can be found, leaving the target text effectively compressed. Decompression can be performed by reversing this process, querying known placeholder terms against their corresponding denoted sequence, using a lookup table. In the original paper, this lookup table is encoded and stored alongside the compressed text. 2 | -------------------------------------------------------------------------------- /nob.c: -------------------------------------------------------------------------------- 1 | #define NOB_IMPLEMENTATION 2 | #define NOB_STRIP_PREFIX 3 | #define NOB_EXPERIMENTAL_DELETE_OLD 4 | #include "./thirdparty/nob.h" 5 | 6 | #define BUILD_FOLDER "build/" 7 | #define SRC_FOLDER "src/" 8 | #define THIRDPARTY_FOLDER "thirdparty/" 9 | 10 | void build_tool_async(Cmd *cmd, Procs *procs, const char *bin_path, const char *src_path) 11 | { 12 | cmd_append(cmd, "gcc", "-Wall", "-Wextra", "-ggdb", "-I"THIRDPARTY_FOLDER, "-march=native", "-Ofast", "-o", bin_path, src_path, SRC_FOLDER"bpe.c"); 13 | da_append(procs, cmd_run_async_and_reset(cmd)); 14 | } 15 | 16 | const char *tools[] = { 17 | "txt2bpe", 18 | "bpe2dot", 19 | "bpe2bpe", 20 | "bpe_inspect", 21 | "bpe_gen", 22 | "tkn_inspect", 23 | }; 24 | 25 | int main(int argc, char **argv) 26 | { 27 | NOB_GO_REBUILD_URSELF(argc, argv); 28 | 29 | Cmd cmd = {0}; 30 | Procs procs = {0}; 31 | 32 | const char *program_name = shift(argv, argc); 33 | 34 | if (!nob_mkdir_if_not_exists(BUILD_FOLDER)) return 1; 35 | 36 | if (argc <= 0) { 37 | for (size_t i = 0; i < ARRAY_LEN(tools); ++i) { 38 | const char *bin_path = temp_sprintf(BUILD_FOLDER"%s", tools[i]); 39 | const char *src_path = temp_sprintf(SRC_FOLDER"%s.c", tools[i]); 40 | build_tool_async(&cmd, &procs, bin_path, src_path); 41 | } 42 | if (!procs_wait_and_reset(&procs)) return 1; 43 | return 0; 44 | } 45 | 46 | const char *tool_name = shift(argv, argc); 47 | 48 | for (size_t i = 0; i < ARRAY_LEN(tools); ++i) { 49 | if (strcmp(tool_name, tools[i]) == 0) { 50 | const char *bin_path = temp_sprintf(BUILD_FOLDER"%s", tools[i]); 51 | const char *src_path = temp_sprintf(SRC_FOLDER"%s.c", tools[i]); 52 | build_tool_async(&cmd, &procs, bin_path, src_path); 53 | if (!procs_wait_and_reset(&procs)) return 1; 54 | cmd_append(&cmd, bin_path); 55 | da_append_many(&cmd, argv, argc); 56 | if (!cmd_run_sync_and_reset(&cmd)) return 1; 57 | return 0; 58 | } 59 | } 60 | 61 | nob_log(ERROR, "Unknown tool `%s`", tool_name); 62 | nob_log(ERROR, "Available tools:"); 63 | for (size_t i = 0; i < ARRAY_LEN(tools); ++i) { 64 | nob_log(ERROR, " %s", tools[i]); 65 | } 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /src/bpe.c: -------------------------------------------------------------------------------- 1 | // The code that is shared between all the utilitize of the suite. 2 | #include "bpe.h" 3 | 4 | bool dump_tokens(const char *file_path, Tokens tokens) 5 | { 6 | // TODO: introduce magic and version to the format of the tokens 7 | return write_entire_file(file_path, tokens.items, tokens.count*sizeof(*tokens.items)); 8 | } 9 | 10 | bool load_tokens(const char *file_path, Tokens *tokens, String_Builder *tmp_sb) 11 | { 12 | tmp_sb->count = 0; 13 | if (!read_entire_file(file_path, tmp_sb)) return false; 14 | if (tmp_sb->count%sizeof(*tokens->items) != 0) { 15 | nob_log(ERROR, "%s: file size in bytes (%zu) must be divisible by %zu", file_path, tmp_sb->count, sizeof(*tokens->items)); 16 | return false; 17 | } 18 | uint32_t *items = (void*)tmp_sb->items; 19 | size_t items_count = tmp_sb->count/sizeof(*tokens->items); 20 | for (size_t i = 0; i < items_count; ++i){ 21 | da_append(tokens, items[i]); 22 | } 23 | return true; 24 | } 25 | 26 | bool dump_pairs(const char *file_path, Pairs pairs) 27 | { 28 | bool result = true; 29 | String_Builder sb = {0}; 30 | 31 | sb_append_cstr(&sb, "BPE"); 32 | da_append(&sb, BPE_VERSION_CURRENT); 33 | sb_append_buf(&sb, pairs.items, pairs.count*sizeof(*pairs.items)); 34 | if (!write_entire_file(file_path, sb.items, sb.count)) return_defer(false); 35 | 36 | defer: 37 | free(sb.items); 38 | return result; 39 | } 40 | 41 | typedef struct { 42 | uint32_t l, r; 43 | } Pair_V0; 44 | 45 | typedef struct { 46 | Pair_V0 *items; 47 | size_t count; 48 | size_t capacity; 49 | } Pairs_V0; 50 | 51 | // TODO: code duplication between parse_pairs_v0 and parse_pairs_v1 52 | 53 | bool parse_pairs_v0(const char *file_path, Pairs *pairs, String_View data) 54 | { 55 | bool result = true; 56 | Pairs_V0 pairs_v0 = {0}; 57 | 58 | if (data.count%sizeof(*pairs_v0.items) != 0) { 59 | nob_log(ERROR, "%s: file size in bytes (%zu) must be divisible by %zu", file_path, data.count, sizeof(*pairs_v0.items)); 60 | return_defer(false); 61 | } 62 | Pair_V0 *items = (void*)data.data; 63 | size_t items_count = data.count/sizeof(*pairs_v0.items); 64 | 65 | if (items_count < BPE_PRELUDE_SIZE) { 66 | nob_log(ERROR, "%s: pair count %zu is too small. It must be at least %d", file_path, items_count, BPE_PRELUDE_SIZE); 67 | return_defer(false); 68 | } 69 | 70 | for (uint32_t i = 0; i < BPE_PRELUDE_SIZE; ++i) { 71 | if (items[i].l != i) { 72 | nob_log(ERROR, "%s: pair %u: Left subtoken is equal to %u instead of itself", file_path, i, items[i].l); 73 | return_defer(false); 74 | } 75 | if (items[i].r != 0) { 76 | nob_log(ERROR, "%s: pair %u: Right subtoken is equal to %u instead of 0", file_path, i, items[i].r); 77 | return_defer(false); 78 | } 79 | da_append(&pairs_v0, items[i]); 80 | } 81 | 82 | for (uint32_t i = BPE_PRELUDE_SIZE; i < items_count; ++i) { 83 | if (items[i].l >= items_count) { 84 | nob_log(ERROR, "%s: pair %u: Left subtoken is %u >= %zu", file_path, i, items[i].l, items_count); 85 | return_defer(false); 86 | } 87 | if (items[i].r >= items_count) { 88 | nob_log(ERROR, "%s: pair %u: Right subtoken is %u >= %zu", file_path, i, items[i].r, items_count); 89 | return_defer(false); 90 | } 91 | da_append(&pairs_v0, items[i]); 92 | } 93 | 94 | da_foreach(Pair_V0, pair_v0, &pairs_v0) { 95 | da_append(pairs, ((Pair) { 96 | .l = pair_v0->l, 97 | .r = pair_v0->r, 98 | .freq = 1, 99 | })); 100 | } 101 | 102 | defer: 103 | free(pairs_v0.items); 104 | return result; 105 | } 106 | 107 | bool parse_pairs_v1(const char *file_path, Pairs *pairs, String_View data) 108 | { 109 | if (data.count%sizeof(*pairs->items) != 0) { 110 | nob_log(ERROR, "%s: file size in bytes (%zu) must be divisible by %zu", file_path, data.count, sizeof(*pairs->items)); 111 | return false; 112 | } 113 | Pair *items = (void*)data.data; 114 | size_t items_count = data.count/sizeof(*pairs->items); 115 | 116 | if (items_count < BPE_PRELUDE_SIZE) { 117 | nob_log(ERROR, "%s: pair count %zu is too small. It must be at least %d", file_path, items_count, BPE_PRELUDE_SIZE); 118 | return false; 119 | } 120 | 121 | for (uint32_t i = 0; i < BPE_PRELUDE_SIZE; ++i) { 122 | if (items[i].l != i) { 123 | nob_log(ERROR, "%s: pair %u: Left subtoken is equal to %u instead of itself", file_path, i, items[i].l); 124 | return false; 125 | } 126 | if (items[i].r != 0) { 127 | nob_log(ERROR, "%s: pair %u: Right subtoken is equal to %u instead of 0", file_path, i, items[i].r); 128 | return false; 129 | } 130 | da_append(pairs, items[i]); 131 | } 132 | 133 | for (uint32_t i = BPE_PRELUDE_SIZE; i < items_count; ++i) { 134 | if (items[i].l >= items_count) { 135 | nob_log(ERROR, "%s: pair %u: Left subtoken is %u >= %zu", file_path, i, items[i].l, items_count); 136 | return false; 137 | } 138 | if (items[i].r >= items_count) { 139 | nob_log(ERROR, "%s: pair %u: Right subtoken is %u >= %zu", file_path, i, items[i].r, items_count); 140 | return false; 141 | } 142 | da_append(pairs, items[i]); 143 | } 144 | 145 | return true; 146 | } 147 | 148 | bool load_pairs(const char *file_path, Pairs *pairs, String_Builder *tmp_sb, size_t *version) 149 | { 150 | tmp_sb->count = 0; 151 | if (!read_entire_file(file_path, tmp_sb)) return false; 152 | 153 | String_View data = sb_to_sv(*tmp_sb); 154 | 155 | size_t magic_count = 4; 156 | String_View magic_prefix = sv_from_cstr("BPE"); 157 | if (sv_starts_with(data, magic_prefix)) { 158 | if (data.count < magic_count) { 159 | nob_log(ERROR, "%s: missing BPE version", file_path); 160 | return false; 161 | } 162 | *version = data.data[3]; 163 | 164 | data.data += magic_count; 165 | data.count -= magic_count; 166 | 167 | switch (*version) { 168 | case 0: return parse_pairs_v0(file_path, pairs, data); 169 | case 1: return parse_pairs_v1(file_path, pairs, data); 170 | default: 171 | nob_log(ERROR, "%s: unsupported BPE version %zu", file_path, *version); 172 | return false; 173 | } 174 | } 175 | 176 | String_View magic_prefix_v0 = sv_from_parts("\0\0\0\0", magic_count); 177 | if (sv_starts_with(data, magic_prefix_v0)) { 178 | *version = 0; 179 | return parse_pairs_v0(file_path, pairs, data); 180 | } 181 | 182 | nob_log(ERROR, "%s: invalid magic", file_path); 183 | return false; 184 | } 185 | 186 | void render_token(Pairs pairs, uint32_t token, String_Builder *sb) 187 | { 188 | assert(token < pairs.count); 189 | 190 | // TODO: employ memoization so to not rerender the same tokens over and over again 191 | if (token == pairs.items[token].l) { 192 | da_append(sb, (char)token); 193 | return; 194 | } 195 | 196 | render_token(pairs, pairs.items[token].l, sb); 197 | render_token(pairs, pairs.items[token].r, sb); 198 | } 199 | 200 | void c_strlit_escape_bytes(const char *bytes, size_t bytes_size, String_Builder *sb_out) 201 | { 202 | for (size_t i = 0; i < bytes_size; ++i) { 203 | if (bytes[i] == '"') { 204 | sb_append_cstr(sb_out, "\\\""); 205 | } else if (bytes[i] == '\\') { 206 | sb_append_cstr(sb_out, "\\\\"); 207 | } else if (isprint(bytes[i])) { 208 | da_append(sb_out, bytes[i]); 209 | } else { 210 | sb_appendf(sb_out, "\\x%02X", (uint8_t)bytes[i]); 211 | } 212 | } 213 | } 214 | 215 | #define NOB_IMPLEMENTATION 216 | #include "nob.h" 217 | -------------------------------------------------------------------------------- /src/bpe.h: -------------------------------------------------------------------------------- 1 | // The code that is shared between all the utilitize of the suite. 2 | #ifndef BPE_H_ 3 | #define BPE_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #define NOB_STRIP_PREFIX 10 | #include "nob.h" 11 | #undef rename 12 | 13 | #define BPE_PRELUDE_SIZE 256 14 | 15 | // TODO: support unicode 16 | // Right now we assume everything is ASCII. 17 | 18 | typedef struct { 19 | uint32_t *items; 20 | size_t count; 21 | size_t capacity; 22 | } Tokens; 23 | 24 | #define BPE_VERSION_CURRENT 1 25 | 26 | typedef struct { 27 | uint32_t l, r; 28 | uint64_t freq; 29 | } Pair; 30 | 31 | typedef struct { 32 | Pair *items; 33 | size_t count; 34 | size_t capacity; 35 | } Pairs; 36 | 37 | bool load_pairs(const char *file_path, Pairs *pairs, String_Builder *tmp_sb, size_t *version); 38 | bool dump_pairs(const char *file_path, Pairs pairs); 39 | bool dump_tokens(const char *file_path, Tokens tokens); 40 | bool load_tokens(const char *file_path, Tokens *tokens, String_Builder *tmp_sb); 41 | void render_token(Pairs pairs, uint32_t token, String_Builder *sb); 42 | void c_strlit_escape_bytes(const char *bytes, size_t bytes_size, String_Builder *sb_out); 43 | 44 | #define swap(Type, x, y) \ 45 | do { \ 46 | Type *a = &(x); \ 47 | Type *b = &(y); \ 48 | Type t = *a; \ 49 | *a = *b; \ 50 | *b = t; \ 51 | } while(0) 52 | 53 | #endif // BPE_H_ 54 | -------------------------------------------------------------------------------- /src/bpe2bpe.c: -------------------------------------------------------------------------------- 1 | #include "bpe.h" 2 | #define FLAG_IMPLEMENTATION 3 | #include "flag.h" 4 | 5 | void usage(void) 6 | { 7 | fprintf(stderr, "Usage: %s [OPTIONS]\n", flag_program_name()); 8 | flag_print_options(stderr); 9 | } 10 | 11 | int main(int argc, char **argv) 12 | { 13 | char **bpe_path = flag_str("bpe", NULL, "Path to the input BPE file (MANDATORY)"); 14 | char **out_path = flag_str("out", NULL, "Path to the output BPE file (MANDATORY)"); 15 | 16 | if (!flag_parse(argc, argv)) { 17 | usage(); 18 | flag_print_error(stderr); 19 | return 1; 20 | } 21 | 22 | if (*bpe_path == NULL) { 23 | usage(); 24 | fprintf(stderr, "ERROR: No -%s was provided\n", flag_name(bpe_path)); 25 | return 1; 26 | } 27 | 28 | if (*out_path == NULL) { 29 | usage(); 30 | fprintf(stderr, "ERROR: No -%s was provided\n", flag_name(out_path)); 31 | return 1; 32 | } 33 | 34 | Pairs pairs = {0}; 35 | String_Builder sb = {0}; 36 | 37 | size_t version = 0; 38 | if (!load_pairs(*bpe_path, &pairs, &sb, &version)) return false; 39 | printf("INFO: Converting BPE version %zu -> %d\n", version, BPE_VERSION_CURRENT); 40 | if (!dump_pairs(*out_path, pairs)) return false; 41 | 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /src/bpe2dot.c: -------------------------------------------------------------------------------- 1 | // Render a graph out of bpe pairs generated by txt2bpe. 2 | // TODO: probably delete this. It never produced anything useful or interesting. 3 | #include 4 | 5 | #include "bpe.h" 6 | 7 | void render_dot(Pairs pairs, String_Builder *sb) 8 | { 9 | sb_append_cstr(sb, "digraph Pairs {\n"); 10 | for (uint32_t token = 0; token < pairs.count; ++token) { 11 | if (token != pairs.items[token].l) { 12 | sb_append_cstr(sb, temp_sprintf(" %u -> %u\n", token, pairs.items[token].l)); 13 | sb_append_cstr(sb, temp_sprintf(" %u -> %u\n", token, pairs.items[token].r)); 14 | } 15 | } 16 | sb_append_cstr(sb, "}\n"); 17 | } 18 | 19 | void usage(const char *program_name) 20 | { 21 | fprintf(stderr, "Usage: %s \n", program_name); 22 | } 23 | 24 | int main(int argc, char **argv) 25 | { 26 | const char *program_name = shift(argv, argc); 27 | 28 | if (argc <= 0) { 29 | usage(program_name); 30 | fprintf(stderr, "ERROR: no input is provided\n"); 31 | return 1; 32 | } 33 | const char *input_file_path = shift(argv, argc); 34 | 35 | if (argc <= 0) { 36 | usage(program_name); 37 | fprintf(stderr, "ERROR: no output is provided\n"); 38 | return 1; 39 | } 40 | const char *output_file_path = shift(argv, argc); 41 | 42 | Pairs pairs = {0}; 43 | String_Builder sb = {0}; 44 | 45 | size_t version = 0; 46 | if (!load_pairs(input_file_path, &pairs, &sb, &version)) return 1; 47 | printf("INFO: loaded %s BPE version %zu\n", input_file_path, version); 48 | sb.count = 0; 49 | render_dot(pairs, &sb); 50 | if (!write_entire_file(output_file_path, sb.items, sb.count)) return 1; 51 | printf("INFO: generated %s\n", output_file_path); 52 | 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /src/bpe_gen.c: -------------------------------------------------------------------------------- 1 | // Generate random text based on the BPE table created by txt2bpe. 2 | #include 3 | #include "bpe.h" 4 | #define FLAG_IMPLEMENTATION 5 | #include "flag.h" 6 | 7 | void usage(void) { 8 | fprintf(stderr, "Usage: %s [OPTIONS]\n", flag_program_name()); 9 | fprintf(stderr, "OPTIONS:\n"); 10 | flag_print_options(stderr); 11 | } 12 | 13 | int main(int argc, char **argv) 14 | { 15 | srand(time(0)); 16 | 17 | char **bpe_path = flag_str("bpe", NULL, "Path to BPE file (MANDATORY)"); 18 | bool *help = flag_bool("help", false, "Print this help message"); 19 | uint64_t *limit = flag_uint64("limit", 100, "Max amount of tokens to generate"); 20 | char **delim = flag_str("delim", "", "Token delimiter"); 21 | uint64_t *seed = flag_uint64("seed", 0, "Random seed (0 to use current time)"); 22 | 23 | if (!flag_parse(argc, argv)) { 24 | usage(); 25 | flag_print_error(stderr); 26 | return 1; 27 | } 28 | 29 | if (*bpe_path == NULL) { 30 | usage(); 31 | fprintf(stderr, "ERROR: no -%s is provided\n", flag_name(bpe_path)); 32 | return 1; 33 | } 34 | 35 | if (*help) { 36 | usage(); 37 | return 0; 38 | } 39 | 40 | if (*seed != 0) { 41 | srand(*seed); 42 | } 43 | 44 | Pairs pairs = {0}; 45 | String_Builder sb = {0}; 46 | Pairs next = {0}; 47 | 48 | size_t version = 0; 49 | if (!load_pairs(*bpe_path, &pairs, &sb, &version)) return 1; 50 | printf("INFO: loaded %s BPE version %zu\n", *bpe_path, version); 51 | 52 | uint32_t token = rand()%pairs.count; 53 | 54 | for (uint64_t i = 0; i < *limit; ++i) { 55 | sb.count = 0; 56 | render_token(pairs, token, &sb); 57 | sb_append_null(&sb); 58 | printf("%s%s", sb.items, *delim); 59 | 60 | for (;;) { 61 | next.count = 0; 62 | for (size_t i = 0; i < pairs.count; ++i) { 63 | if (pairs.items[i].l == token) { 64 | da_append(&next, pairs.items[i]); 65 | } 66 | } 67 | if (next.count > 0) break; 68 | if (token < BPE_PRELUDE_SIZE) break; 69 | token = pairs.items[token].r; 70 | } 71 | 72 | if (next.count == 0) break; 73 | 74 | uint64_t sum = 0; 75 | for (size_t j = 0; j < next.count; ++j) { 76 | sum += next.items[j].freq; 77 | } 78 | 79 | uint64_t roll = (uint64_t)rand()*sum/RAND_MAX; 80 | uint64_t curr = 0; 81 | 82 | // TODO: make sure there is no off-by-one here 83 | bool found = false; 84 | for (size_t j = 0; j < next.count; ++j) { 85 | if (curr + next.items[j].freq > roll) { 86 | token = next.items[j].r; 87 | found = true; 88 | break; 89 | } 90 | curr += next.items[j].freq; 91 | } 92 | assert(found); 93 | } 94 | 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /src/bpe_inspect.c: -------------------------------------------------------------------------------- 1 | // Inspect the bpe pairs generated by txt2bpe 2 | #include 3 | 4 | #include "bpe.h" 5 | 6 | void usage(const char *program_name) 7 | { 8 | fprintf(stderr, "Usage: %s \n", program_name); 9 | } 10 | 11 | int main(int argc, char **argv) 12 | { 13 | const char *program_name = shift(argv, argc); 14 | const char *input_file_path = NULL; 15 | bool no_ids = false; 16 | 17 | while (argc > 0) { 18 | const char *arg = shift(argv, argc); 19 | if (strcmp(arg, "--no-ids") == 0) { 20 | no_ids = true; 21 | } else { 22 | if (input_file_path != NULL) { 23 | fprintf(stderr, "ERROR: %s supports inspecting only single file\n", program_name); 24 | return 1; 25 | } 26 | input_file_path = arg; 27 | } 28 | } 29 | 30 | if (input_file_path == NULL) { 31 | usage(program_name); 32 | fprintf(stderr, "ERROR: no input is provided\n"); 33 | return 1; 34 | } 35 | 36 | Pairs pairs = {0}; 37 | String_Builder sb = {0}; 38 | String_Builder sb_tmp = {0}; 39 | 40 | size_t version = 0; 41 | if (!load_pairs(input_file_path, &pairs, &sb_tmp, &version)) return 1; 42 | printf("INFO: loaded %s BPE version %zu\n", input_file_path, version); 43 | 44 | for (uint32_t token = 1; token < pairs.count; ++token) { 45 | sb.count = 0; 46 | 47 | if (!no_ids) sb_appendf(&sb, "%u => ", token); 48 | 49 | sb_append_cstr(&sb, "\""); 50 | sb_tmp.count = 0; 51 | render_token(pairs, token, &sb_tmp); 52 | c_strlit_escape_bytes(sb_tmp.items, sb_tmp.count, &sb); 53 | sb_appendf(&sb, "\" (%zu)\n", pairs.items[token].freq); 54 | sb_append_null(&sb); 55 | 56 | printf("%s", sb.items); 57 | } 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /src/tkn_inspect.c: -------------------------------------------------------------------------------- 1 | // Inspect the tokens generated by txt2bpe. 2 | #include "bpe.h" 3 | #define FLAG_IMPLEMENTATION 4 | #include "flag.h" 5 | 6 | void usage(void) 7 | { 8 | fprintf(stderr, "Usage: %s [OPTIONS]\n", flag_program_name()); 9 | fprintf(stderr, "OPTIONS:\n"); 10 | flag_print_options(stderr); 11 | } 12 | 13 | int main(int argc, char **argv) 14 | { 15 | char **bpe_path = flag_str("bpe", NULL, "Path to the .bpe file that contains table of pairs (MANDATORY)"); 16 | char **tkn_path = flag_str("tkn", NULL, "Path to the .tkn file that contains the sequence of tokens (MANDATORY)"); 17 | bool *ids = flag_bool("ids", false, "Print the ids of the tokens"); 18 | bool *render = flag_bool("render", false, "Render tokens as text back"); 19 | bool *split = flag_bool("split", false, "When render is enabled, split tokens with bars |"); 20 | bool *help = flag_bool("help", false, "Print this help"); 21 | 22 | if (!flag_parse(argc, argv)) { 23 | usage(); 24 | flag_print_error(stderr); 25 | return 1; 26 | } 27 | 28 | if (*help) { 29 | usage(); 30 | return 0; 31 | } 32 | 33 | if (*bpe_path == NULL) { 34 | usage(); 35 | fprintf(stderr, "ERROR: no %s is provided\n", flag_name(bpe_path)); 36 | return 1; 37 | } 38 | 39 | if (*tkn_path == NULL) { 40 | usage(); 41 | fprintf(stderr, "ERROR: no %s is provided\n", flag_name(tkn_path)); 42 | return 1; 43 | } 44 | 45 | Tokens tokens = {0}; 46 | Pairs pairs = {0}; 47 | String_Builder sb = {0}; 48 | if (!load_tokens(*tkn_path, &tokens, &sb)) return 1; 49 | size_t version = 0; 50 | if (!load_pairs(*bpe_path, &pairs, &sb, &version)) return 1; 51 | printf("INFO: loaded %s BPE version %zu\n", *bpe_path, version); 52 | 53 | for (size_t i = 0; i < tokens.count; ++i) { 54 | if (tokens.items[i] >= pairs.count) { 55 | fprintf(stderr, "ERROR: token %zu: value %u is out of bounds (%zu)", i, tokens.items[i], pairs.count - 1); 56 | return 1; 57 | } 58 | } 59 | 60 | printf("OK: Loaded %zu tokens from %s\n", tokens.count, *tkn_path); 61 | printf("OK: Loaded %zu pairs from %s\n", pairs.count, *bpe_path); 62 | 63 | if (*ids) { 64 | printf("------------------------------\n"); 65 | for (size_t i = 0; i < tokens.count; ++i) { 66 | printf("%u\n", tokens.items[i]); 67 | } 68 | } 69 | 70 | if (*render) { 71 | printf("------------------------------\n"); 72 | for (size_t i = 0; i < tokens.count; ++i) { 73 | sb.count = 0; 74 | render_token(pairs, tokens.items[i], &sb); 75 | sb_append_null(&sb); 76 | printf("%s", sb.items); 77 | if (*split) printf("|"); 78 | } 79 | printf("\n"); 80 | } 81 | 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /src/txt2bpe.c: -------------------------------------------------------------------------------- 1 | // Generate the BPE table by compressing a text file. 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #define STB_DS_IMPLEMENTATION 10 | #include "stb_ds.h" 11 | #define FLAG_IMPLEMENTATION 12 | #include "flag.h" 13 | 14 | #include "bpe.h" 15 | 16 | typedef struct { 17 | uint32_t l, r; 18 | } Pair_Key; 19 | 20 | typedef struct { 21 | Pair_Key key; 22 | size_t value; 23 | } Freq; 24 | 25 | int compare_freqs(const void *a, const void *b) 26 | { 27 | const Freq *af = a; 28 | const Freq *bf = b; 29 | return (int)bf->value - (int)af->value; 30 | } 31 | 32 | typedef struct { 33 | Freq *items; 34 | size_t count; 35 | size_t capacity; 36 | } Freqs; 37 | 38 | void usage(void) 39 | { 40 | fprintf(stderr, "Usage: %s [OPTIONS]\n", flag_program_name()); 41 | fprintf(stderr, "OPTIONS:\n"); 42 | flag_print_options(stderr); 43 | } 44 | 45 | void report_progress(size_t iteration, Tokens tokens_in, Pairs pairs, double *profile_samples, size_t profile_samples_count) 46 | { 47 | double average_profile_samples = 0.0f; 48 | for (size_t i = 0; i < profile_samples_count; ++i) { 49 | average_profile_samples += profile_samples[i]; 50 | } 51 | average_profile_samples /= profile_samples_count; 52 | 53 | printf("INFO: -- ITERATION %zu --\n", iteration); 54 | printf("INFO: Token count: %zu\n", tokens_in.count); 55 | printf("INFO: Pair count: %zu\n", pairs.count); 56 | printf("INFO: Time: %lfsecs (avg. of %zu iter.)\n", average_profile_samples, profile_samples_count); 57 | } 58 | 59 | double get_secs(void) 60 | { 61 | struct timespec tp = {0}; 62 | int ret = clock_gettime(CLOCK_MONOTONIC, &tp); 63 | assert(ret == 0); 64 | return (double)tp.tv_sec + (double)tp.tv_nsec*1e-9; 65 | } 66 | 67 | Freq *collect_freqs(Tokens tokens_in) 68 | { 69 | Freq *freq = NULL; 70 | 71 | for (size_t i = 0; i < tokens_in.count - 1; ++i) { 72 | Pair_Key pair = { 73 | .l = tokens_in.items[i], 74 | .r = tokens_in.items[i + 1] 75 | }; 76 | ptrdiff_t place = hmgeti(freq, pair); 77 | if (place < 0) hmput(freq, pair, 1); 78 | else freq[place].value += 1; 79 | } 80 | 81 | return freq; 82 | } 83 | 84 | bool dump_state(size_t iteration, const char *output_dir_path, Pairs pairs, Tokens tokens) 85 | { 86 | const char *output_file_path = temp_sprintf("%s/%zu.bpe", output_dir_path, iteration); 87 | if (!dump_pairs(output_file_path, pairs)) return false; 88 | printf("INFO: generated %s\n", output_file_path); 89 | 90 | output_file_path = temp_sprintf("%s/%zu.tkn", output_dir_path, iteration); 91 | if (!dump_tokens(output_file_path, tokens)) return false; 92 | printf("INFO: generated %s\n", output_file_path); 93 | 94 | return true; 95 | } 96 | 97 | int main(int argc, char **argv) 98 | { 99 | uint64_t *report_freq = flag_uint64("report-freq", 10, "Per how many iterations report the progress"); 100 | uint64_t *dump_freq = flag_uint64("dump-freq", 10, "Per how many iterations dump the state of the progress"); 101 | uint64_t *term_freq = flag_uint64("term-freq", 1, "Termination pair frequency"); 102 | uint64_t *max_iterations = flag_uint64("max-iterations", 0, "Maximum amount of iterations. 0 means no limit"); 103 | uint64_t *threads_count = flag_uint64("threads-count", 16, "Threads count"); 104 | bool *help = flag_bool("help", false, "Print this help"); 105 | char **input_file = flag_str("input-file", NULL, "Input text file (MANDATORY)"); 106 | char **output_dir = flag_str("output-dir", NULL, "Output directory (MANDATORY)"); 107 | 108 | if (!flag_parse(argc, argv)) { 109 | usage(); 110 | flag_print_error(stderr); 111 | return 1; 112 | } 113 | 114 | if (*help) { 115 | usage(); 116 | return 0; 117 | } 118 | 119 | if (*input_file == NULL) { 120 | usage(); 121 | fprintf(stderr, "ERROR: no %s is provided\n", flag_name(input_file)); 122 | return 1; 123 | } 124 | const char *input_file_path = *input_file; 125 | 126 | if (*output_dir == NULL) { 127 | usage(); 128 | fprintf(stderr, "ERROR: no %s is provided\n", flag_name(output_dir)); 129 | } 130 | const char *output_dir_path = *output_dir; 131 | 132 | if (*threads_count <= 0) *threads_count = 1; 133 | 134 | int output_dir_exists = file_exists(output_dir_path); 135 | if (output_dir_exists < 0) return 1; 136 | if (output_dir_exists) { 137 | fprintf(stderr, "ERROR: Directory %s already exists, delete it or rename it to not lose any data in it\n", output_dir_path); 138 | return 1; 139 | } 140 | if (!mkdir_if_not_exists(output_dir_path)) return 1; 141 | 142 | String_Builder sb = {0}; 143 | Freq *freq = NULL; 144 | Pairs pairs = {0}; 145 | 146 | Tokens tokens_in = {0}; 147 | Tokens tokens_out = {0}; 148 | 149 | if (!read_entire_file(input_file_path, &sb)) return 1; 150 | 151 | // 0 => { .l = 0, .r = ??? } 152 | // 1 => { .l = 1, .r = ??? } 153 | // ... 154 | // 69 => { .l = 69, .r = ??? } 155 | // ... 156 | // 255 => { .l = 255, .r = ??? } 157 | for (uint32_t i = 0; i < BPE_PRELUDE_SIZE; ++i) { 158 | da_append(&pairs, ((Pair) {.l = i})); 159 | } 160 | 161 | for (size_t i = 0; i < sb.count; ++i) { 162 | da_append(&tokens_in, (uint8_t)sb.items[i]); 163 | } 164 | 165 | double *profile_samples = malloc((*report_freq)*sizeof(*profile_samples)); 166 | assert(profile_samples != NULL); 167 | 168 | freq = collect_freqs(tokens_in); 169 | 170 | size_t iteration = 0; 171 | for (; *max_iterations == 0 || iteration < *max_iterations ; ++iteration) { 172 | if (iteration%(*report_freq) == 0) report_progress(iteration, tokens_in, pairs, profile_samples, *report_freq); 173 | if (iteration%(*dump_freq) == 0) if (!dump_state(iteration, output_dir_path, pairs, tokens_in)) return 1; 174 | 175 | double begin_secs = get_secs(); 176 | 177 | ptrdiff_t max_index = 0; 178 | for (ptrdiff_t i = 1; i < hmlen(freq); ++i) { 179 | if (freq[i].value > freq[max_index].value || (freq[i].value == freq[max_index].value && memcmp(&freq[i].key, &freq[max_index].key, sizeof(freq[i].key)) > 0)) { 180 | max_index = i; 181 | } 182 | } 183 | 184 | if (freq[max_index].value <= (*term_freq)) break; // compression is done 185 | 186 | Pair_Key max_pair = freq[max_index].key; 187 | uint32_t max_token = pairs.count; 188 | da_append(&pairs, ((Pair) { 189 | .l = max_pair.l, 190 | .r = max_pair.r, 191 | .freq = freq[max_index].value, 192 | })); 193 | 194 | tokens_out.count = 0; 195 | 196 | for (size_t i = 0; i < tokens_in.count; ) { 197 | #if 0 198 | printf("in: "); for (size_t j = 0; j < tokens_in.count; ++j) { printf("%u ", tokens_in.items[j]); } printf("\n"); 199 | printf("out: "); for (size_t j = 0; j < tokens_out.count; ++j) { printf("%u ", tokens_out.items[j]); } printf("\n"); 200 | for (ptrdiff_t i = 0; i < hmlen(freq); ++i) printf(" (%u, %u) => %zu\n", freq[i].key.l, freq[i].key.r, freq[i].value); 201 | printf("------------------------------\n"); 202 | #endif 203 | if (i + 1 >= tokens_in.count) { 204 | da_append(&tokens_out, tokens_in.items[i]); 205 | i += 1; 206 | } else { 207 | Pair_Key pair; 208 | pair.l = tokens_in.items[i]; 209 | pair.r = tokens_in.items[i + 1]; 210 | if (memcmp(&pair, &max_pair, sizeof(pair)) == 0) { 211 | ptrdiff_t place; 212 | if (tokens_out.count > 0) { 213 | pair.l = tokens_out.items[tokens_out.count - 1]; 214 | 215 | pair.r = tokens_in.items[i]; 216 | place = hmgeti(freq, pair); 217 | if (!(place >= 0)) { 218 | printf("%s:%d: i = %zu, pair = (%u, %u)\n", __FILE__, __LINE__, i, pair.l, pair.r); 219 | abort(); 220 | } 221 | assert(freq[place].value > 0); 222 | freq[place].value -= 1; 223 | // TODO: if (freq[place].value == 0) hmdel(freq, piar) 224 | 225 | pair.r = max_token; 226 | ptrdiff_t place = hmgeti(freq, pair); 227 | if (place < 0) hmput(freq, pair, 1); 228 | else freq[place].value += 1; 229 | } 230 | 231 | pair = max_pair; 232 | place = hmgeti(freq, pair); 233 | assert(place >= 0); 234 | assert(freq[place].value > 0); 235 | freq[place].value -= 1; 236 | 237 | da_append(&tokens_out, max_token); 238 | i += 2; 239 | 240 | // v 241 | // in: abcd 242 | // out: aZ 243 | // Z=bc 244 | if (i < tokens_in.count) { 245 | pair.r = tokens_in.items[i]; 246 | 247 | pair.l = tokens_in.items[i-1]; 248 | place = hmgeti(freq, pair); 249 | assert(place >= 0); 250 | assert(freq[place].value > 0); 251 | freq[place].value -= 1; 252 | 253 | pair.l = tokens_out.items[tokens_out.count-1]; 254 | ptrdiff_t place = hmgeti(freq, pair); 255 | if (place < 0) hmput(freq, pair, 1); 256 | else freq[place].value += 1; 257 | } 258 | } else { 259 | da_append(&tokens_out, tokens_in.items[i]); 260 | i += 1; 261 | } 262 | } 263 | } 264 | 265 | profile_samples[iteration%(*report_freq)] = get_secs() - begin_secs; 266 | 267 | swap(Tokens, tokens_in, tokens_out); 268 | } 269 | 270 | size_t remainder_iterations = iteration%(*report_freq); 271 | report_progress(iteration, tokens_in, pairs, profile_samples, remainder_iterations == 0 ? *report_freq : remainder_iterations); 272 | if (!dump_state(iteration, output_dir_path, pairs, tokens_in)) return 1; 273 | 274 | return 0; 275 | } 276 | -------------------------------------------------------------------------------- /thirdparty/flag.h: -------------------------------------------------------------------------------- 1 | // flag.h -- v1.0.0 -- command-line flag parsing 2 | // 3 | // Inspired by Go's flag module: https://pkg.go.dev/flag 4 | // 5 | #ifndef FLAG_H_ 6 | #define FLAG_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | // TODO: add support for -flag=x syntax 20 | // TODO: *_var function variants 21 | // void flag_bool_var(bool *var, const char *name, bool def, const char *desc); 22 | // void flag_bool_uint64(uint64_t *var, const char *name, bool def, const char *desc); 23 | // etc. 24 | // WARNING! *_var functions may break the flag_name() functionality 25 | 26 | char *flag_name(void *val); 27 | bool *flag_bool(const char *name, bool def, const char *desc); 28 | uint64_t *flag_uint64(const char *name, uint64_t def, const char *desc); 29 | size_t *flag_size(const char *name, uint64_t def, const char *desc); 30 | char **flag_str(const char *name, const char *def, const char *desc); 31 | bool flag_parse(int argc, char **argv); 32 | int flag_rest_argc(void); 33 | char **flag_rest_argv(void); 34 | void flag_print_error(FILE *stream); 35 | void flag_print_options(FILE *stream); 36 | 37 | #endif // FLAG_H_ 38 | 39 | ////////////////////////////// 40 | 41 | #ifdef FLAG_IMPLEMENTATION 42 | 43 | typedef enum { 44 | FLAG_BOOL = 0, 45 | FLAG_UINT64, 46 | FLAG_SIZE, 47 | FLAG_STR, 48 | COUNT_FLAG_TYPES, 49 | } Flag_Type; 50 | 51 | static_assert(COUNT_FLAG_TYPES == 4, "Exhaustive Flag_Value definition"); 52 | typedef union { 53 | char *as_str; 54 | uint64_t as_uint64; 55 | bool as_bool; 56 | size_t as_size; 57 | } Flag_Value; 58 | 59 | typedef enum { 60 | FLAG_NO_ERROR = 0, 61 | FLAG_ERROR_UNKNOWN, 62 | FLAG_ERROR_NO_VALUE, 63 | FLAG_ERROR_INVALID_NUMBER, 64 | FLAG_ERROR_INTEGER_OVERFLOW, 65 | FLAG_ERROR_INVALID_SIZE_SUFFIX, 66 | COUNT_FLAG_ERRORS, 67 | } Flag_Error; 68 | 69 | typedef struct { 70 | Flag_Type type; 71 | char *name; 72 | char *desc; 73 | Flag_Value val; 74 | Flag_Value def; 75 | } Flag; 76 | 77 | #ifndef FLAGS_CAP 78 | #define FLAGS_CAP 256 79 | #endif 80 | 81 | typedef struct { 82 | Flag flags[FLAGS_CAP]; 83 | size_t flags_count; 84 | 85 | Flag_Error flag_error; 86 | char *flag_error_name; 87 | 88 | const char *program_name; 89 | 90 | int rest_argc; 91 | char **rest_argv; 92 | } Flag_Context; 93 | 94 | static Flag_Context flag_global_context; 95 | 96 | Flag *flag_new(Flag_Type type, const char *name, const char *desc) 97 | { 98 | Flag_Context *c = &flag_global_context; 99 | 100 | assert(c->flags_count < FLAGS_CAP); 101 | Flag *flag = &c->flags[c->flags_count++]; 102 | memset(flag, 0, sizeof(*flag)); 103 | flag->type = type; 104 | // NOTE: I won't touch them I promise Kappa 105 | flag->name = (char*) name; 106 | flag->desc = (char*) desc; 107 | return flag; 108 | } 109 | 110 | char *flag_name(void *val) 111 | { 112 | Flag *flag = (Flag*) ((char*) val - offsetof(Flag, val)); 113 | return flag->name; 114 | } 115 | 116 | bool *flag_bool(const char *name, bool def, const char *desc) 117 | { 118 | Flag *flag = flag_new(FLAG_BOOL, name, desc); 119 | flag->def.as_bool = def; 120 | flag->val.as_bool = def; 121 | return &flag->val.as_bool; 122 | } 123 | 124 | uint64_t *flag_uint64(const char *name, uint64_t def, const char *desc) 125 | { 126 | Flag *flag = flag_new(FLAG_UINT64, name, desc); 127 | flag->val.as_uint64 = def; 128 | flag->def.as_uint64 = def; 129 | return &flag->val.as_uint64; 130 | } 131 | 132 | size_t *flag_size(const char *name, uint64_t def, const char *desc) 133 | { 134 | Flag *flag = flag_new(FLAG_SIZE, name, desc); 135 | flag->val.as_size = def; 136 | flag->def.as_size = def; 137 | return &flag->val.as_size; 138 | } 139 | 140 | char **flag_str(const char *name, const char *def, const char *desc) 141 | { 142 | Flag *flag = flag_new(FLAG_STR, name, desc); 143 | flag->val.as_str = (char*) def; 144 | flag->def.as_str = (char*) def; 145 | return &flag->val.as_str; 146 | } 147 | 148 | static char *flag_shift_args(int *argc, char ***argv) 149 | { 150 | assert(*argc > 0); 151 | char *result = **argv; 152 | *argv += 1; 153 | *argc -= 1; 154 | return result; 155 | } 156 | 157 | int flag_rest_argc(void) 158 | { 159 | return flag_global_context.rest_argc; 160 | } 161 | 162 | char **flag_rest_argv(void) 163 | { 164 | return flag_global_context.rest_argv; 165 | } 166 | 167 | const char *flag_program_name(void) 168 | { 169 | return flag_global_context.program_name; 170 | } 171 | 172 | bool flag_parse(int argc, char **argv) 173 | { 174 | Flag_Context *c = &flag_global_context; 175 | 176 | if (c->program_name == NULL) { 177 | c->program_name = flag_shift_args(&argc, &argv); 178 | } 179 | 180 | while (argc > 0) { 181 | char *flag = flag_shift_args(&argc, &argv); 182 | 183 | if (*flag != '-') { 184 | // NOTE: pushing flag back into args 185 | c->rest_argc = argc + 1; 186 | c->rest_argv = argv - 1; 187 | return true; 188 | } 189 | 190 | if (strcmp(flag, "--") == 0) { 191 | // NOTE: but if it's the terminator we don't need to push it back 192 | c->rest_argc = argc; 193 | c->rest_argv = argv; 194 | return true; 195 | } 196 | 197 | // NOTE: remove the dash 198 | flag += 1; 199 | 200 | bool found = false; 201 | for (size_t i = 0; i < c->flags_count; ++i) { 202 | if (strcmp(c->flags[i].name, flag) == 0) { 203 | static_assert(COUNT_FLAG_TYPES == 4, "Exhaustive flag type parsing"); 204 | switch (c->flags[i].type) { 205 | case FLAG_BOOL: { 206 | c->flags[i].val.as_bool = true; 207 | } 208 | break; 209 | 210 | case FLAG_STR: { 211 | if (argc == 0) { 212 | c->flag_error = FLAG_ERROR_NO_VALUE; 213 | c->flag_error_name = flag; 214 | return false; 215 | } 216 | char *arg = flag_shift_args(&argc, &argv); 217 | c->flags[i].val.as_str = arg; 218 | } 219 | break; 220 | 221 | case FLAG_UINT64: { 222 | if (argc == 0) { 223 | c->flag_error = FLAG_ERROR_NO_VALUE; 224 | c->flag_error_name = flag; 225 | return false; 226 | } 227 | char *arg = flag_shift_args(&argc, &argv); 228 | 229 | static_assert(sizeof(unsigned long long int) == sizeof(uint64_t), "The original author designed this for x86_64 machine with the compiler that expects unsigned long long int and uint64_t to be the same thing, so they could use strtoull() function to parse it. Please adjust this code for your case and maybe even send the patch to upstream to make it work on a wider range of environments."); 230 | char *endptr; 231 | // TODO: replace strtoull with a custom solution 232 | // That way we can get rid of the dependency on errno and static_assert 233 | unsigned long long int result = strtoull(arg, &endptr, 10); 234 | 235 | if (*endptr != '\0') { 236 | c->flag_error = FLAG_ERROR_INVALID_NUMBER; 237 | c->flag_error_name = flag; 238 | return false; 239 | } 240 | 241 | if (result == ULLONG_MAX && errno == ERANGE) { 242 | c->flag_error = FLAG_ERROR_INTEGER_OVERFLOW; 243 | c->flag_error_name = flag; 244 | return false; 245 | } 246 | 247 | c->flags[i].val.as_uint64 = result; 248 | } 249 | break; 250 | 251 | case FLAG_SIZE: { 252 | if (argc == 0) { 253 | c->flag_error = FLAG_ERROR_NO_VALUE; 254 | c->flag_error_name = flag; 255 | return false; 256 | } 257 | char *arg = flag_shift_args(&argc, &argv); 258 | 259 | static_assert(sizeof(unsigned long long int) == sizeof(size_t), "The original author designed this for x86_64 machine with the compiler that expects unsigned long long int and size_t to be the same thing, so they could use strtoull() function to parse it. Please adjust this code for your case and maybe even send the patch to upstream to make it work on a wider range of environments."); 260 | char *endptr; 261 | // TODO: replace strtoull with a custom solution 262 | // That way we can get rid of the dependency on errno and static_assert 263 | unsigned long long int result = strtoull(arg, &endptr, 10); 264 | 265 | // TODO: handle more multiplicative suffixes like in dd(1). From the dd(1) man page: 266 | // > N and BYTES may be followed by the following 267 | // > multiplicative suffixes: c =1, w =2, b =512, kB =1000, K 268 | // > =1024, MB =1000*1000, M =1024*1024, xM =M, GB 269 | // > =1000*1000*1000, G =1024*1024*1024, and so on for T, P, 270 | // > E, Z, Y. 271 | if (strcmp(endptr, "K") == 0) { 272 | result *= 1024; 273 | } else if (strcmp(endptr, "M") == 0) { 274 | result *= 1024*1024; 275 | } else if (strcmp(endptr, "G") == 0) { 276 | result *= 1024*1024*1024; 277 | } else if (strcmp(endptr, "") != 0) { 278 | c->flag_error = FLAG_ERROR_INVALID_SIZE_SUFFIX; 279 | c->flag_error_name = flag; 280 | // TODO: capability to report what exactly is the wrong suffix 281 | return false; 282 | } 283 | 284 | if (result == ULLONG_MAX && errno == ERANGE) { 285 | c->flag_error = FLAG_ERROR_INTEGER_OVERFLOW; 286 | c->flag_error_name = flag; 287 | return false; 288 | } 289 | 290 | c->flags[i].val.as_size = result; 291 | } 292 | break; 293 | 294 | case COUNT_FLAG_TYPES: 295 | default: { 296 | assert(0 && "unreachable"); 297 | exit(69); 298 | } 299 | } 300 | 301 | found = true; 302 | } 303 | } 304 | 305 | if (!found) { 306 | c->flag_error = FLAG_ERROR_UNKNOWN; 307 | c->flag_error_name = flag; 308 | return false; 309 | } 310 | } 311 | 312 | c->rest_argc = argc; 313 | c->rest_argv = argv; 314 | return true; 315 | } 316 | 317 | void flag_print_options(FILE *stream) 318 | { 319 | Flag_Context *c = &flag_global_context; 320 | for (size_t i = 0; i < c->flags_count; ++i) { 321 | Flag *flag = &c->flags[i]; 322 | 323 | fprintf(stream, " -%s\n", flag->name); 324 | fprintf(stream, " %s\n", flag->desc); 325 | static_assert(COUNT_FLAG_TYPES == 4, "Exhaustive flag type defaults printing"); 326 | switch (c->flags[i].type) { 327 | case FLAG_BOOL: 328 | if (flag->def.as_bool) { 329 | fprintf(stream, " Default: %s\n", flag->def.as_bool ? "true" : "false"); 330 | } 331 | break; 332 | case FLAG_UINT64: 333 | fprintf(stream, " Default: %" PRIu64 "\n", flag->def.as_uint64); 334 | break; 335 | case FLAG_SIZE: 336 | fprintf(stream, " Default: %zu\n", flag->def.as_size); 337 | break; 338 | case FLAG_STR: 339 | if (flag->def.as_str) { 340 | fprintf(stream, " Default: %s\n", flag->def.as_str); 341 | } 342 | break; 343 | default: 344 | assert(0 && "unreachable"); 345 | exit(69); 346 | } 347 | } 348 | } 349 | 350 | void flag_print_error(FILE *stream) 351 | { 352 | Flag_Context *c = &flag_global_context; 353 | static_assert(COUNT_FLAG_ERRORS == 6, "Exhaustive flag error printing"); 354 | switch (c->flag_error) { 355 | case FLAG_NO_ERROR: 356 | // NOTE: don't call flag_print_error() if flag_parse() didn't return false, okay? ._. 357 | fprintf(stream, "Operation Failed Successfully! Please tell the developer of this software that they don't know what they are doing! :)"); 358 | break; 359 | case FLAG_ERROR_UNKNOWN: 360 | fprintf(stream, "ERROR: -%s: unknown flag\n", c->flag_error_name); 361 | break; 362 | case FLAG_ERROR_NO_VALUE: 363 | fprintf(stream, "ERROR: -%s: no value provided\n", c->flag_error_name); 364 | break; 365 | case FLAG_ERROR_INVALID_NUMBER: 366 | fprintf(stream, "ERROR: -%s: invalid number\n", c->flag_error_name); 367 | break; 368 | case FLAG_ERROR_INTEGER_OVERFLOW: 369 | fprintf(stream, "ERROR: -%s: integer overflow\n", c->flag_error_name); 370 | break; 371 | case FLAG_ERROR_INVALID_SIZE_SUFFIX: 372 | fprintf(stream, "ERROR: -%s: invalid size suffix\n", c->flag_error_name); 373 | break; 374 | case COUNT_FLAG_ERRORS: 375 | default: 376 | assert(0 && "unreachable"); 377 | exit(69); 378 | } 379 | } 380 | 381 | #endif // FLAG_IMPLEMENTATION 382 | 383 | /* 384 | Revision history: 385 | 386 | 1.0.0 (2025-03-03) Initial release 387 | Save program_name in the context 388 | 389 | */ 390 | 391 | // Copyright 2021 Alexey Kutepov 392 | // 393 | // Permission is hereby granted, free of charge, to any person obtaining a copy 394 | // of this software and associated documentation files (the "Software"), to 395 | // deal in the Software without restriction, including without limitation the 396 | // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 397 | // sell copies of the Software, and to permit persons to whom the Software is 398 | // furnished to do so, subject to the following conditions: 399 | // 400 | // The above copyright notice and this permission notice shall be included in 401 | // all copies or substantial portions of the Software. 402 | // 403 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 404 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 405 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 406 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 407 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 408 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 409 | // IN THE SOFTWARE. 410 | -------------------------------------------------------------------------------- /thirdparty/nob.h: -------------------------------------------------------------------------------- 1 | /* nob - v1.18.0 - Public Domain - https://github.com/tsoding/nob.h 2 | 3 | This library is the next generation of the [NoBuild](https://github.com/tsoding/nobuild) idea. 4 | 5 | # Quick Example 6 | 7 | ```c 8 | // nob.c 9 | #define NOB_IMPLEMENTATION 10 | #include "nob.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | NOB_GO_REBUILD_URSELF(argc, argv); 15 | Nob_Cmd cmd = {0}; 16 | nob_cmd_append(&cmd, "cc", "-Wall", "-Wextra", "-o", "main", "main.c"); 17 | if (!nob_cmd_run_sync(cmd)) return 1; 18 | return 0; 19 | } 20 | ``` 21 | 22 | ```console 23 | $ cc -o nob nob.c 24 | $ ./nob 25 | ``` 26 | 27 | The `nob` automatically rebuilds itself if `nob.c` is modified thanks to 28 | the `NOB_GO_REBUILD_URSELF` macro (don't forget to check out how it works below) 29 | 30 | # The Zoo of `nob_cmd_run_*` Functions 31 | 32 | `Nob_Cmd` is just a dynamic array of strings which represents a command and its arguments. 33 | First you append the arguments 34 | 35 | ```c 36 | Nob_Cmd cmd = {0}; 37 | nob_cmd_append(&cmd, "cc", "-Wall", "-Wextra", "-o", "main", "main.c"); 38 | ``` 39 | 40 | Then you run it 41 | 42 | ```c 43 | if (!nob_cmd_run_sync(cmd)) return 1; 44 | ``` 45 | 46 | `*_sync` at the end indicates that the function blocks until the command finishes executing 47 | and returns `true` on success and `false` on failure. You can run the command asynchronously 48 | but you have to explitictly wait for it afterwards: 49 | 50 | ```c 51 | Nob_Proc p = nob_cmd_run_async(cmd); 52 | if (p == NOB_INVALID_PROC) return 1; 53 | if (!nob_proc_wait(p)) return 1; 54 | ``` 55 | 56 | One of the problems with running commands like that is that `Nob_Cmd` still contains the arguments 57 | from the previously run command. If you want to reuse the same `Nob_Cmd` you have to not forget to reset 58 | it 59 | 60 | ```c 61 | Nob_Cmd cmd = {0}; 62 | 63 | nob_cmd_append(&cmd, "cc", "-Wall", "-Wextra", "-o", "main", "main.c"); 64 | if (!nob_cmd_run_sync(cmd)) return 1; 65 | cmd.count = 0; 66 | 67 | nob_cmd_append(&cmd, "./main", "foo", "bar", "baz"); 68 | if (!nob_cmd_run_sync(cmd)) return 1; 69 | cmd.count = 0; 70 | ``` 71 | 72 | Which is a bit error prone. To make it a bit easier we have `nob_cmd_run_sync_and_reset()` which 73 | accepts `Nob_Cmd` by reference and resets it for you: 74 | 75 | ```c 76 | Nob_Cmd cmd = {0}; 77 | 78 | nob_cmd_append(&cmd, "cc", "-Wall", "-Wextra", "-o", "main", "main.c"); 79 | if (!nob_cmd_run_sync_and_reset(&cmd)) return 1; 80 | 81 | nob_cmd_append(&cmd, "./main", "foo", "bar", "baz"); 82 | if (!nob_cmd_run_sync_and_reset(&cmd)) return 1; 83 | ``` 84 | 85 | There is of course also `nob_cmd_run_async_and_reset()` to maintain the pattern. 86 | 87 | The stdin, stdout and stderr of any command can be redirected by using `Nob_Cmd_Redirect` structure 88 | along with `nob_cmd_run_sync_redirect()` or `nob_cmd_run_async_redirect()` 89 | 90 | ```c 91 | // Opening all the necessary files 92 | Nob_Fd fdin = nob_fd_open_for_read("input.txt"); 93 | if (fdin == NOB_INVALID_FD) return 1; 94 | Nob_Fd fdout = nob_fd_open_for_write("output.txt"); 95 | if (fdout == NOB_INVALID_FD) return 1; 96 | Nob_Fd fderr = nob_fd_open_for_write("error.txt"); 97 | if (fderr == NOB_INVALID_FD) return 1; 98 | 99 | // Preparing the command 100 | Nob_Cmd cmd = {0}; 101 | nob_cmd_append(&cmd, "./main", "foo", "bar", "baz"); 102 | 103 | // Running the command synchronously redirecting the standard streams 104 | bool ok = nob_cmd_run_sync_redirect(cmd, (Nob_Cmd_Redirect) { 105 | .fdin = fdin, 106 | .fdout = fdout, 107 | .fderr = fderr, 108 | }); 109 | if (!ok) return 1; 110 | 111 | // Closing all the files 112 | nob_fd_close(fdin); 113 | nob_fd_close(fdout); 114 | nob_fd_close(fderr); 115 | 116 | // Reseting the command 117 | cmd.count = 0; 118 | ``` 119 | 120 | And of course if you find closing the files and reseting the command annoying we have 121 | `nob_cmd_run_sync_redirect_and_reset()` and `nob_cmd_run_async_redirect_and_reset()` 122 | which do all of that for you automatically. 123 | 124 | All the Zoo of `nob_cmd_run_*` functions follows the same pattern: sync/async, 125 | redirect/no redirect, and_reset/no and_reset. They always come in that order. 126 | 127 | # Stripping off `nob_` Prefixes 128 | 129 | Since Pure C does not have any namespaces we prefix each name of the API with the `nob_` to avoid any 130 | potential conflicts with any other names in your code. But sometimes it is very annoying and makes 131 | the code noisy. If you know that none of the names from nob.h conflict with anything in your code 132 | you can enable NOB_STRIP_PREFIX macro and just drop all the prefixes: 133 | 134 | ```c 135 | // nob.c 136 | #define NOB_IMPLEMENTATION 137 | #define NOB_STRIP_PREFIX 138 | #include "nob.h" 139 | 140 | int main(int argc, char **argv) 141 | { 142 | NOB_GO_REBUILD_URSELF(argc, argv); 143 | Cmd cmd = {0}; 144 | cmd_append(&cmd, "cc", "-Wall", "-Wextra", "-o", "main", "main.c"); 145 | if (!cmd_run_sync(cmd)) return 1; 146 | return 0; 147 | } 148 | ``` 149 | 150 | Not all the names have strippable prefixes. All the redefinable names like `NOB_GO_REBUILD_URSELF` 151 | for instance will retain their prefix even if NOB_STRIP_PREFIX is enabled. Notable exception is the 152 | nob_log() function. Stripping away the prefix results in log() which was historically always referring 153 | to the natural logarithmic function that is already defined in math.h. So there is no reason to strip 154 | off the prefix for nob_log(). 155 | 156 | The prefixes are stripped off only on the level of preprocessor. The names of the functions in the 157 | compiled object file will still retain the `nob_` prefix. Keep that in mind when you FFI with nob.h 158 | from other languages (for whatever reason). 159 | 160 | If only few specific names create conflicts for you, you can just #undef those names after the 161 | `#include ` since they are macros anyway. 162 | */ 163 | 164 | #ifndef NOB_H_ 165 | #define NOB_H_ 166 | 167 | #ifndef NOB_ASSERT 168 | #include 169 | #define NOB_ASSERT assert 170 | #endif /* NOB_ASSERT */ 171 | 172 | #ifndef NOB_REALLOC 173 | #include 174 | #define NOB_REALLOC realloc 175 | #endif /* NOB_REALLOC */ 176 | 177 | #ifndef NOB_FREE 178 | #include 179 | #define NOB_FREE free 180 | #endif /* NOB_FREE */ 181 | 182 | #include 183 | #include 184 | #include 185 | #include 186 | #include 187 | #include 188 | #include 189 | #include 190 | 191 | #ifdef _WIN32 192 | # define WIN32_LEAN_AND_MEAN 193 | # define _WINUSER_ 194 | # define _WINGDI_ 195 | # define _IMM_ 196 | # define _WINCON_ 197 | # include 198 | # include 199 | # include 200 | #else 201 | # include 202 | # include 203 | # include 204 | # include 205 | # include 206 | #endif 207 | 208 | #ifdef _WIN32 209 | # define NOB_LINE_END "\r\n" 210 | #else 211 | # define NOB_LINE_END "\n" 212 | #endif 213 | 214 | #if defined(__GNUC__) || defined(__clang__) 215 | // https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/Function-Attributes.html 216 | #define NOB_PRINTF_FORMAT(STRING_INDEX, FIRST_TO_CHECK) __attribute__ ((format (printf, STRING_INDEX, FIRST_TO_CHECK))) 217 | #else 218 | // TODO: implement NOB_PRINTF_FORMAT for MSVC 219 | #define NOB_PRINTF_FORMAT(STRING_INDEX, FIRST_TO_CHECK) 220 | #endif 221 | 222 | #define NOB_UNUSED(value) (void)(value) 223 | #define NOB_TODO(message) do { fprintf(stderr, "%s:%d: TODO: %s\n", __FILE__, __LINE__, message); abort(); } while(0) 224 | #define NOB_UNREACHABLE(message) do { fprintf(stderr, "%s:%d: UNREACHABLE: %s\n", __FILE__, __LINE__, message); abort(); } while(0) 225 | 226 | #define NOB_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0])) 227 | #define NOB_ARRAY_GET(array, index) \ 228 | (NOB_ASSERT((size_t)index < NOB_ARRAY_LEN(array)), array[(size_t)index]) 229 | 230 | typedef enum { 231 | NOB_INFO, 232 | NOB_WARNING, 233 | NOB_ERROR, 234 | NOB_NO_LOGS, 235 | } Nob_Log_Level; 236 | 237 | // Any messages with the level below nob_minimal_log_level are going to be suppressed. 238 | extern Nob_Log_Level nob_minimal_log_level; 239 | 240 | void nob_log(Nob_Log_Level level, const char *fmt, ...) NOB_PRINTF_FORMAT(2, 3); 241 | 242 | // It is an equivalent of shift command from bash. It basically pops an element from 243 | // the beginning of a sized array. 244 | #define nob_shift(xs, xs_sz) (NOB_ASSERT((xs_sz) > 0), (xs_sz)--, *(xs)++) 245 | // NOTE: nob_shift_args() is an alias for an old variant of nob_shift that only worked with 246 | // the command line arguments passed to the main() function. nob_shift() is more generic. 247 | // So nob_shift_args() is semi-deprecated, but I don't see much reason to urgently 248 | // remove it. This alias does not hurt anybody. 249 | #define nob_shift_args(argc, argv) nob_shift(*argv, *argc) 250 | 251 | typedef struct { 252 | const char **items; 253 | size_t count; 254 | size_t capacity; 255 | } Nob_File_Paths; 256 | 257 | typedef enum { 258 | NOB_FILE_REGULAR = 0, 259 | NOB_FILE_DIRECTORY, 260 | NOB_FILE_SYMLINK, 261 | NOB_FILE_OTHER, 262 | } Nob_File_Type; 263 | 264 | bool nob_mkdir_if_not_exists(const char *path); 265 | bool nob_copy_file(const char *src_path, const char *dst_path); 266 | bool nob_copy_directory_recursively(const char *src_path, const char *dst_path); 267 | bool nob_read_entire_dir(const char *parent, Nob_File_Paths *children); 268 | bool nob_write_entire_file(const char *path, const void *data, size_t size); 269 | Nob_File_Type nob_get_file_type(const char *path); 270 | bool nob_delete_file(const char *path); 271 | 272 | #define nob_return_defer(value) do { result = (value); goto defer; } while(0) 273 | 274 | // Initial capacity of a dynamic array 275 | #ifndef NOB_DA_INIT_CAP 276 | #define NOB_DA_INIT_CAP 256 277 | #endif 278 | 279 | #define nob_da_reserve(da, expected_capacity) \ 280 | do { \ 281 | if ((expected_capacity) > (da)->capacity) { \ 282 | if ((da)->capacity == 0) { \ 283 | (da)->capacity = NOB_DA_INIT_CAP; \ 284 | } \ 285 | while ((expected_capacity) > (da)->capacity) { \ 286 | (da)->capacity *= 2; \ 287 | } \ 288 | (da)->items = NOB_REALLOC((da)->items, (da)->capacity * sizeof(*(da)->items)); \ 289 | NOB_ASSERT((da)->items != NULL && "Buy more RAM lol"); \ 290 | } \ 291 | } while (0) 292 | 293 | // Append an item to a dynamic array 294 | #define nob_da_append(da, item) \ 295 | do { \ 296 | nob_da_reserve((da), (da)->count + 1); \ 297 | (da)->items[(da)->count++] = (item); \ 298 | } while (0) 299 | 300 | #define nob_da_free(da) NOB_FREE((da).items) 301 | 302 | // Append several items to a dynamic array 303 | #define nob_da_append_many(da, new_items, new_items_count) \ 304 | do { \ 305 | nob_da_reserve((da), (da)->count + (new_items_count)); \ 306 | memcpy((da)->items + (da)->count, (new_items), (new_items_count)*sizeof(*(da)->items)); \ 307 | (da)->count += (new_items_count); \ 308 | } while (0) 309 | 310 | #define nob_da_resize(da, new_size) \ 311 | do { \ 312 | nob_da_reserve((da), new_size); \ 313 | (da)->count = (new_size); \ 314 | } while (0) 315 | 316 | #define nob_da_last(da) (da)->items[(NOB_ASSERT((da)->count > 0), (da)->count-1)] 317 | #define nob_da_remove_unordered(da, i) \ 318 | do { \ 319 | size_t j = (i); \ 320 | NOB_ASSERT(j < (da)->count); \ 321 | (da)->items[j] = (da)->items[--(da)->count]; \ 322 | } while(0) 323 | 324 | // Foreach over Dynamic Arrays. Example: 325 | // ```c 326 | // typedef struct { 327 | // int *items; 328 | // size_t count; 329 | // size_t capacity; 330 | // } Numbers; 331 | // 332 | // Numbers xs = {0}; 333 | // 334 | // nob_da_append(&xs, 69); 335 | // nob_da_append(&xs, 420); 336 | // nob_da_append(&xs, 1337); 337 | // 338 | // nob_da_foreach(int, x, &xs) { 339 | // // `x` here is a pointer to the current element. You can get its index by taking a difference 340 | // // between `x` and the start of the array which is `x.items`. 341 | // size_t index = x - xs.items; 342 | // nob_log(INFO, "%zu: %d", index, *x); 343 | // } 344 | // ``` 345 | #define nob_da_foreach(Type, it, da) for (Type *it = (da)->items; it < (da)->items + (da)->count; ++it) 346 | 347 | typedef struct { 348 | char *items; 349 | size_t count; 350 | size_t capacity; 351 | } Nob_String_Builder; 352 | 353 | bool nob_read_entire_file(const char *path, Nob_String_Builder *sb); 354 | int nob_sb_appendf(Nob_String_Builder *sb, const char *fmt, ...) NOB_PRINTF_FORMAT(2, 3); 355 | 356 | // Append a sized buffer to a string builder 357 | #define nob_sb_append_buf(sb, buf, size) nob_da_append_many(sb, buf, size) 358 | 359 | // Append a NULL-terminated string to a string builder 360 | #define nob_sb_append_cstr(sb, cstr) \ 361 | do { \ 362 | const char *s = (cstr); \ 363 | size_t n = strlen(s); \ 364 | nob_da_append_many(sb, s, n); \ 365 | } while (0) 366 | 367 | // Append a single NULL character at the end of a string builder. So then you can 368 | // use it a NULL-terminated C string 369 | #define nob_sb_append_null(sb) nob_da_append_many(sb, "", 1) 370 | 371 | // Free the memory allocated by a string builder 372 | #define nob_sb_free(sb) NOB_FREE((sb).items) 373 | 374 | // Process handle 375 | #ifdef _WIN32 376 | typedef HANDLE Nob_Proc; 377 | #define NOB_INVALID_PROC INVALID_HANDLE_VALUE 378 | typedef HANDLE Nob_Fd; 379 | #define NOB_INVALID_FD INVALID_HANDLE_VALUE 380 | #else 381 | typedef int Nob_Proc; 382 | #define NOB_INVALID_PROC (-1) 383 | typedef int Nob_Fd; 384 | #define NOB_INVALID_FD (-1) 385 | #endif // _WIN32 386 | 387 | Nob_Fd nob_fd_open_for_read(const char *path); 388 | Nob_Fd nob_fd_open_for_write(const char *path); 389 | void nob_fd_close(Nob_Fd fd); 390 | 391 | typedef struct { 392 | Nob_Proc *items; 393 | size_t count; 394 | size_t capacity; 395 | } Nob_Procs; 396 | 397 | bool nob_procs_wait(Nob_Procs procs); 398 | bool nob_procs_wait_and_reset(Nob_Procs *procs); 399 | 400 | // Wait until the process has finished 401 | bool nob_proc_wait(Nob_Proc proc); 402 | 403 | // A command - the main workhorse of Nob. Nob is all about building commands an running them 404 | typedef struct { 405 | const char **items; 406 | size_t count; 407 | size_t capacity; 408 | } Nob_Cmd; 409 | 410 | // Example: 411 | // ```c 412 | // Nob_Fd fdin = nob_fd_open_for_read("input.txt"); 413 | // if (fdin == NOB_INVALID_FD) fail(); 414 | // Nob_Fd fdout = nob_fd_open_for_write("output.txt"); 415 | // if (fdout == NOB_INVALID_FD) fail(); 416 | // Nob_Cmd cmd = {0}; 417 | // nob_cmd_append(&cmd, "cat"); 418 | // if (!nob_cmd_run_sync_redirect_and_reset(&cmd, (Nob_Cmd_Redirect) { 419 | // .fdin = &fdin, 420 | // .fdout = &fdout 421 | // })) fail(); 422 | // ``` 423 | typedef struct { 424 | Nob_Fd *fdin; 425 | Nob_Fd *fdout; 426 | Nob_Fd *fderr; 427 | } Nob_Cmd_Redirect; 428 | 429 | // Render a string representation of a command into a string builder. Keep in mind the the 430 | // string builder is not NULL-terminated by default. Use nob_sb_append_null if you plan to 431 | // use it as a C string. 432 | void nob_cmd_render(Nob_Cmd cmd, Nob_String_Builder *render); 433 | 434 | #define nob_cmd_append(cmd, ...) \ 435 | nob_da_append_many(cmd, \ 436 | ((const char*[]){__VA_ARGS__}), \ 437 | (sizeof((const char*[]){__VA_ARGS__})/sizeof(const char*))) 438 | 439 | #define nob_cmd_extend(cmd, other_cmd) \ 440 | nob_da_append_many(cmd, (other_cmd)->items, (other_cmd)->count) 441 | 442 | // Free all the memory allocated by command arguments 443 | #define nob_cmd_free(cmd) NOB_FREE(cmd.items) 444 | 445 | // Run command asynchronously 446 | #define nob_cmd_run_async(cmd) nob_cmd_run_async_redirect(cmd, (Nob_Cmd_Redirect) {0}) 447 | // NOTE: nob_cmd_run_async_and_reset() is just like nob_cmd_run_async() except it also resets cmd.count to 0 448 | // so the Nob_Cmd instance can be seamlessly used several times in a row 449 | Nob_Proc nob_cmd_run_async_and_reset(Nob_Cmd *cmd); 450 | // Run redirected command asynchronously 451 | Nob_Proc nob_cmd_run_async_redirect(Nob_Cmd cmd, Nob_Cmd_Redirect redirect); 452 | // Run redirected command asynchronously and set cmd.count to 0 and close all the opened files 453 | Nob_Proc nob_cmd_run_async_redirect_and_reset(Nob_Cmd *cmd, Nob_Cmd_Redirect redirect); 454 | 455 | // Run command synchronously 456 | bool nob_cmd_run_sync(Nob_Cmd cmd); 457 | // NOTE: nob_cmd_run_sync_and_reset() is just like nob_cmd_run_sync() except it also resets cmd.count to 0 458 | // so the Nob_Cmd instance can be seamlessly used several times in a row 459 | bool nob_cmd_run_sync_and_reset(Nob_Cmd *cmd); 460 | // Run redirected command synchronously 461 | bool nob_cmd_run_sync_redirect(Nob_Cmd cmd, Nob_Cmd_Redirect redirect); 462 | // Run redirected command synchronously and set cmd.count to 0 and close all the opened files 463 | bool nob_cmd_run_sync_redirect_and_reset(Nob_Cmd *cmd, Nob_Cmd_Redirect redirect); 464 | 465 | #ifndef NOB_TEMP_CAPACITY 466 | #define NOB_TEMP_CAPACITY (8*1024*1024) 467 | #endif // NOB_TEMP_CAPACITY 468 | char *nob_temp_strdup(const char *cstr); 469 | void *nob_temp_alloc(size_t size); 470 | char *nob_temp_sprintf(const char *format, ...) NOB_PRINTF_FORMAT(1, 2); 471 | void nob_temp_reset(void); 472 | size_t nob_temp_save(void); 473 | void nob_temp_rewind(size_t checkpoint); 474 | 475 | // Given any path returns the last part of that path. 476 | // "/path/to/a/file.c" -> "file.c"; "/path/to/a/directory" -> "directory" 477 | const char *nob_path_name(const char *path); 478 | bool nob_rename(const char *old_path, const char *new_path); 479 | int nob_needs_rebuild(const char *output_path, const char **input_paths, size_t input_paths_count); 480 | int nob_needs_rebuild1(const char *output_path, const char *input_path); 481 | int nob_file_exists(const char *file_path); 482 | const char *nob_get_current_dir_temp(void); 483 | bool nob_set_current_dir(const char *path); 484 | 485 | // TODO: add MinGW support for Go Rebuild Urself™ Technology 486 | #ifndef NOB_REBUILD_URSELF 487 | # if _WIN32 488 | # if defined(__GNUC__) 489 | # define NOB_REBUILD_URSELF(binary_path, source_path) "gcc", "-o", binary_path, source_path 490 | # elif defined(__clang__) 491 | # define NOB_REBUILD_URSELF(binary_path, source_path) "clang", "-o", binary_path, source_path 492 | # elif defined(_MSC_VER) 493 | # define NOB_REBUILD_URSELF(binary_path, source_path) "cl.exe", nob_temp_sprintf("/Fe:%s", (binary_path)), source_path 494 | # endif 495 | # else 496 | # define NOB_REBUILD_URSELF(binary_path, source_path) "cc", "-o", binary_path, source_path 497 | # endif 498 | #endif 499 | 500 | // Go Rebuild Urself™ Technology 501 | // 502 | // How to use it: 503 | // int main(int argc, char** argv) { 504 | // NOB_GO_REBUILD_URSELF(argc, argv); 505 | // // actual work 506 | // return 0; 507 | // } 508 | // 509 | // After your added this macro every time you run ./nob it will detect 510 | // that you modified its original source code and will try to rebuild itself 511 | // before doing any actual work. So you only need to bootstrap your build system 512 | // once. 513 | // 514 | // The modification is detected by comparing the last modified times of the executable 515 | // and its source code. The same way the make utility usually does it. 516 | // 517 | // The rebuilding is done by using the NOB_REBUILD_URSELF macro which you can redefine 518 | // if you need a special way of bootstraping your build system. (which I personally 519 | // do not recommend since the whole idea of NoBuild is to keep the process of bootstrapping 520 | // as simple as possible and doing all of the actual work inside of ./nob) 521 | // 522 | void nob__go_rebuild_urself(int argc, char **argv, const char *source_path, ...); 523 | #define NOB_GO_REBUILD_URSELF(argc, argv) nob__go_rebuild_urself(argc, argv, __FILE__, NULL) 524 | // Sometimes your nob.c includes additional files, so you want the Go Rebuild Urself™ Technology to check 525 | // if they also were modified and rebuild nob.c accordingly. For that we have NOB_GO_REBUILD_URSELF_PLUS(): 526 | // ```c 527 | // #define NOB_IMPLEMENTATION 528 | // #include "nob.h" 529 | // 530 | // #include "foo.c" 531 | // #include "bar.c" 532 | // 533 | // int main(int argc, char **argv) 534 | // { 535 | // NOB_GO_REBUILD_URSELF_PLUS(argc, argv, "foo.c", "bar.c"); 536 | // // ... 537 | // return 0; 538 | // } 539 | #define NOB_GO_REBUILD_URSELF_PLUS(argc, argv, ...) nob__go_rebuild_urself(argc, argv, __FILE__, __VA_ARGS__, NULL); 540 | 541 | typedef struct { 542 | size_t count; 543 | const char *data; 544 | } Nob_String_View; 545 | 546 | const char *nob_temp_sv_to_cstr(Nob_String_View sv); 547 | 548 | Nob_String_View nob_sv_chop_by_delim(Nob_String_View *sv, char delim); 549 | Nob_String_View nob_sv_chop_left(Nob_String_View *sv, size_t n); 550 | Nob_String_View nob_sv_trim(Nob_String_View sv); 551 | Nob_String_View nob_sv_trim_left(Nob_String_View sv); 552 | Nob_String_View nob_sv_trim_right(Nob_String_View sv); 553 | bool nob_sv_eq(Nob_String_View a, Nob_String_View b); 554 | bool nob_sv_end_with(Nob_String_View sv, const char *cstr); 555 | bool nob_sv_starts_with(Nob_String_View sv, Nob_String_View expected_prefix); 556 | Nob_String_View nob_sv_from_cstr(const char *cstr); 557 | Nob_String_View nob_sv_from_parts(const char *data, size_t count); 558 | // nob_sb_to_sv() enables you to just view Nob_String_Builder as Nob_String_View 559 | #define nob_sb_to_sv(sb) nob_sv_from_parts((sb).items, (sb).count) 560 | 561 | // printf macros for String_View 562 | #ifndef SV_Fmt 563 | #define SV_Fmt "%.*s" 564 | #endif // SV_Fmt 565 | #ifndef SV_Arg 566 | #define SV_Arg(sv) (int) (sv).count, (sv).data 567 | #endif // SV_Arg 568 | // USAGE: 569 | // String_View name = ...; 570 | // printf("Name: "SV_Fmt"\n", SV_Arg(name)); 571 | 572 | 573 | // minirent.h HEADER BEGIN //////////////////////////////////////// 574 | // Copyright 2021 Alexey Kutepov 575 | // 576 | // Permission is hereby granted, free of charge, to any person obtaining 577 | // a copy of this software and associated documentation files (the 578 | // "Software"), to deal in the Software without restriction, including 579 | // without limitation the rights to use, copy, modify, merge, publish, 580 | // distribute, sublicense, and/or sell copies of the Software, and to 581 | // permit persons to whom the Software is furnished to do so, subject to 582 | // the following conditions: 583 | // 584 | // The above copyright notice and this permission notice shall be 585 | // included in all copies or substantial portions of the Software. 586 | // 587 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 588 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 589 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 590 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 591 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 592 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 593 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 594 | // 595 | // ============================================================ 596 | // 597 | // minirent — 0.0.1 — A subset of dirent interface for Windows. 598 | // 599 | // https://github.com/tsoding/minirent 600 | // 601 | // ============================================================ 602 | // 603 | // ChangeLog (https://semver.org/ is implied) 604 | // 605 | // 0.0.2 Automatically include dirent.h on non-Windows 606 | // platforms 607 | // 0.0.1 First Official Release 608 | 609 | #ifndef _WIN32 610 | #include 611 | #else // _WIN32 612 | 613 | #define WIN32_LEAN_AND_MEAN 614 | #include "windows.h" 615 | 616 | struct dirent 617 | { 618 | char d_name[MAX_PATH+1]; 619 | }; 620 | 621 | typedef struct DIR DIR; 622 | 623 | static DIR *opendir(const char *dirpath); 624 | static struct dirent *readdir(DIR *dirp); 625 | static int closedir(DIR *dirp); 626 | 627 | #endif // _WIN32 628 | // minirent.h HEADER END //////////////////////////////////////// 629 | 630 | #ifdef _WIN32 631 | 632 | char *nob_win32_error_message(DWORD err); 633 | 634 | #endif // _WIN32 635 | 636 | #endif // NOB_H_ 637 | 638 | #ifdef NOB_IMPLEMENTATION 639 | 640 | // Any messages with the level below nob_minimal_log_level are going to be suppressed. 641 | Nob_Log_Level nob_minimal_log_level = NOB_INFO; 642 | 643 | #ifdef _WIN32 644 | 645 | // Base on https://stackoverflow.com/a/75644008 646 | // > .NET Core uses 4096 * sizeof(WCHAR) buffer on stack for FormatMessageW call. And...thats it. 647 | // > 648 | // > https://github.com/dotnet/runtime/blob/3b63eb1346f1ddbc921374a5108d025662fb5ffd/src/coreclr/utilcode/posterror.cpp#L264-L265 649 | #ifndef NOB_WIN32_ERR_MSG_SIZE 650 | #define NOB_WIN32_ERR_MSG_SIZE (4 * 1024) 651 | #endif // NOB_WIN32_ERR_MSG_SIZE 652 | 653 | char *nob_win32_error_message(DWORD err) { 654 | static char win32ErrMsg[NOB_WIN32_ERR_MSG_SIZE] = {0}; 655 | DWORD errMsgSize = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, err, LANG_USER_DEFAULT, win32ErrMsg, 656 | NOB_WIN32_ERR_MSG_SIZE, NULL); 657 | 658 | if (errMsgSize == 0) { 659 | if (GetLastError() != ERROR_MR_MID_NOT_FOUND) { 660 | if (sprintf(win32ErrMsg, "Could not get error message for 0x%lX", err) > 0) { 661 | return (char *)&win32ErrMsg; 662 | } else { 663 | return NULL; 664 | } 665 | } else { 666 | if (sprintf(win32ErrMsg, "Invalid Windows Error code (0x%lX)", err) > 0) { 667 | return (char *)&win32ErrMsg; 668 | } else { 669 | return NULL; 670 | } 671 | } 672 | } 673 | 674 | while (errMsgSize > 1 && isspace(win32ErrMsg[errMsgSize - 1])) { 675 | win32ErrMsg[--errMsgSize] = '\0'; 676 | } 677 | 678 | return win32ErrMsg; 679 | } 680 | 681 | #endif // _WIN32 682 | 683 | // The implementation idea is stolen from https://github.com/zhiayang/nabs 684 | void nob__go_rebuild_urself(int argc, char **argv, const char *source_path, ...) 685 | { 686 | const char *binary_path = nob_shift(argv, argc); 687 | #ifdef _WIN32 688 | // On Windows executables almost always invoked without extension, so 689 | // it's ./nob, not ./nob.exe. For renaming the extension is a must. 690 | if (!nob_sv_end_with(nob_sv_from_cstr(binary_path), ".exe")) { 691 | binary_path = nob_temp_sprintf("%s.exe", binary_path); 692 | } 693 | #endif 694 | 695 | Nob_File_Paths source_paths = {0}; 696 | nob_da_append(&source_paths, source_path); 697 | va_list args; 698 | va_start(args, source_path); 699 | for (;;) { 700 | const char *path = va_arg(args, const char*); 701 | if (path == NULL) break; 702 | nob_da_append(&source_paths, path); 703 | } 704 | va_end(args); 705 | 706 | int rebuild_is_needed = nob_needs_rebuild(binary_path, source_paths.items, source_paths.count); 707 | if (rebuild_is_needed < 0) exit(1); // error 708 | if (!rebuild_is_needed) { // no rebuild is needed 709 | NOB_FREE(source_paths.items); 710 | return; 711 | } 712 | 713 | Nob_Cmd cmd = {0}; 714 | 715 | const char *old_binary_path = nob_temp_sprintf("%s.old", binary_path); 716 | 717 | if (!nob_rename(binary_path, old_binary_path)) exit(1); 718 | nob_cmd_append(&cmd, NOB_REBUILD_URSELF(binary_path, source_path)); 719 | if (!nob_cmd_run_sync_and_reset(&cmd)) { 720 | nob_rename(old_binary_path, binary_path); 721 | exit(1); 722 | } 723 | #ifdef NOB_EXPERIMENTAL_DELETE_OLD 724 | // TODO: this is an experimental behavior behind a compilation flag. 725 | // Once it is confirmed that it does not cause much problems on both POSIX and Windows 726 | // we may turn it on by default. 727 | nob_delete_file(old_binary_path); 728 | #endif // NOB_EXPERIMENTAL_DELETE_OLD 729 | 730 | nob_cmd_append(&cmd, binary_path); 731 | nob_da_append_many(&cmd, argv, argc); 732 | if (!nob_cmd_run_sync_and_reset(&cmd)) exit(1); 733 | exit(0); 734 | } 735 | 736 | static size_t nob_temp_size = 0; 737 | static char nob_temp[NOB_TEMP_CAPACITY] = {0}; 738 | 739 | bool nob_mkdir_if_not_exists(const char *path) 740 | { 741 | #ifdef _WIN32 742 | int result = mkdir(path); 743 | #else 744 | int result = mkdir(path, 0755); 745 | #endif 746 | if (result < 0) { 747 | if (errno == EEXIST) { 748 | nob_log(NOB_INFO, "directory `%s` already exists", path); 749 | return true; 750 | } 751 | nob_log(NOB_ERROR, "could not create directory `%s`: %s", path, strerror(errno)); 752 | return false; 753 | } 754 | 755 | nob_log(NOB_INFO, "created directory `%s`", path); 756 | return true; 757 | } 758 | 759 | bool nob_copy_file(const char *src_path, const char *dst_path) 760 | { 761 | nob_log(NOB_INFO, "copying %s -> %s", src_path, dst_path); 762 | #ifdef _WIN32 763 | if (!CopyFile(src_path, dst_path, FALSE)) { 764 | nob_log(NOB_ERROR, "Could not copy file: %s", nob_win32_error_message(GetLastError())); 765 | return false; 766 | } 767 | return true; 768 | #else 769 | int src_fd = -1; 770 | int dst_fd = -1; 771 | size_t buf_size = 32*1024; 772 | char *buf = NOB_REALLOC(NULL, buf_size); 773 | NOB_ASSERT(buf != NULL && "Buy more RAM lol!!"); 774 | bool result = true; 775 | 776 | src_fd = open(src_path, O_RDONLY); 777 | if (src_fd < 0) { 778 | nob_log(NOB_ERROR, "Could not open file %s: %s", src_path, strerror(errno)); 779 | nob_return_defer(false); 780 | } 781 | 782 | struct stat src_stat; 783 | if (fstat(src_fd, &src_stat) < 0) { 784 | nob_log(NOB_ERROR, "Could not get mode of file %s: %s", src_path, strerror(errno)); 785 | nob_return_defer(false); 786 | } 787 | 788 | dst_fd = open(dst_path, O_CREAT | O_TRUNC | O_WRONLY, src_stat.st_mode); 789 | if (dst_fd < 0) { 790 | nob_log(NOB_ERROR, "Could not create file %s: %s", dst_path, strerror(errno)); 791 | nob_return_defer(false); 792 | } 793 | 794 | for (;;) { 795 | ssize_t n = read(src_fd, buf, buf_size); 796 | if (n == 0) break; 797 | if (n < 0) { 798 | nob_log(NOB_ERROR, "Could not read from file %s: %s", src_path, strerror(errno)); 799 | nob_return_defer(false); 800 | } 801 | char *buf2 = buf; 802 | while (n > 0) { 803 | ssize_t m = write(dst_fd, buf2, n); 804 | if (m < 0) { 805 | nob_log(NOB_ERROR, "Could not write to file %s: %s", dst_path, strerror(errno)); 806 | nob_return_defer(false); 807 | } 808 | n -= m; 809 | buf2 += m; 810 | } 811 | } 812 | 813 | defer: 814 | NOB_FREE(buf); 815 | close(src_fd); 816 | close(dst_fd); 817 | return result; 818 | #endif 819 | } 820 | 821 | void nob_cmd_render(Nob_Cmd cmd, Nob_String_Builder *render) 822 | { 823 | for (size_t i = 0; i < cmd.count; ++i) { 824 | const char *arg = cmd.items[i]; 825 | if (arg == NULL) break; 826 | if (i > 0) nob_sb_append_cstr(render, " "); 827 | if (!strchr(arg, ' ')) { 828 | nob_sb_append_cstr(render, arg); 829 | } else { 830 | nob_da_append(render, '\''); 831 | nob_sb_append_cstr(render, arg); 832 | nob_da_append(render, '\''); 833 | } 834 | } 835 | } 836 | 837 | Nob_Proc nob_cmd_run_async_redirect(Nob_Cmd cmd, Nob_Cmd_Redirect redirect) 838 | { 839 | if (cmd.count < 1) { 840 | nob_log(NOB_ERROR, "Could not run empty command"); 841 | return NOB_INVALID_PROC; 842 | } 843 | 844 | Nob_String_Builder sb = {0}; 845 | nob_cmd_render(cmd, &sb); 846 | nob_sb_append_null(&sb); 847 | nob_log(NOB_INFO, "CMD: %s", sb.items); 848 | nob_sb_free(sb); 849 | memset(&sb, 0, sizeof(sb)); 850 | 851 | #ifdef _WIN32 852 | // https://docs.microsoft.com/en-us/windows/win32/procthread/creating-a-child-process-with-redirected-input-and-output 853 | 854 | STARTUPINFO siStartInfo; 855 | ZeroMemory(&siStartInfo, sizeof(siStartInfo)); 856 | siStartInfo.cb = sizeof(STARTUPINFO); 857 | // NOTE: theoretically setting NULL to std handles should not be a problem 858 | // https://docs.microsoft.com/en-us/windows/console/getstdhandle?redirectedfrom=MSDN#attachdetach-behavior 859 | // TODO: check for errors in GetStdHandle 860 | siStartInfo.hStdError = redirect.fderr ? *redirect.fderr : GetStdHandle(STD_ERROR_HANDLE); 861 | siStartInfo.hStdOutput = redirect.fdout ? *redirect.fdout : GetStdHandle(STD_OUTPUT_HANDLE); 862 | siStartInfo.hStdInput = redirect.fdin ? *redirect.fdin : GetStdHandle(STD_INPUT_HANDLE); 863 | siStartInfo.dwFlags |= STARTF_USESTDHANDLES; 864 | 865 | PROCESS_INFORMATION piProcInfo; 866 | ZeroMemory(&piProcInfo, sizeof(PROCESS_INFORMATION)); 867 | 868 | // TODO: use a more reliable rendering of the command instead of cmd_render 869 | // cmd_render is for logging primarily 870 | nob_cmd_render(cmd, &sb); 871 | nob_sb_append_null(&sb); 872 | BOOL bSuccess = CreateProcessA(NULL, sb.items, NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo); 873 | nob_sb_free(sb); 874 | 875 | if (!bSuccess) { 876 | nob_log(NOB_ERROR, "Could not create child process: %s", nob_win32_error_message(GetLastError())); 877 | return NOB_INVALID_PROC; 878 | } 879 | 880 | CloseHandle(piProcInfo.hThread); 881 | 882 | return piProcInfo.hProcess; 883 | #else 884 | pid_t cpid = fork(); 885 | if (cpid < 0) { 886 | nob_log(NOB_ERROR, "Could not fork child process: %s", strerror(errno)); 887 | return NOB_INVALID_PROC; 888 | } 889 | 890 | if (cpid == 0) { 891 | if (redirect.fdin) { 892 | if (dup2(*redirect.fdin, STDIN_FILENO) < 0) { 893 | nob_log(NOB_ERROR, "Could not setup stdin for child process: %s", strerror(errno)); 894 | exit(1); 895 | } 896 | } 897 | 898 | if (redirect.fdout) { 899 | if (dup2(*redirect.fdout, STDOUT_FILENO) < 0) { 900 | nob_log(NOB_ERROR, "Could not setup stdout for child process: %s", strerror(errno)); 901 | exit(1); 902 | } 903 | } 904 | 905 | if (redirect.fderr) { 906 | if (dup2(*redirect.fderr, STDERR_FILENO) < 0) { 907 | nob_log(NOB_ERROR, "Could not setup stderr for child process: %s", strerror(errno)); 908 | exit(1); 909 | } 910 | } 911 | 912 | // NOTE: This leaks a bit of memory in the child process. 913 | // But do we actually care? It's a one off leak anyway... 914 | Nob_Cmd cmd_null = {0}; 915 | nob_da_append_many(&cmd_null, cmd.items, cmd.count); 916 | nob_cmd_append(&cmd_null, NULL); 917 | 918 | if (execvp(cmd.items[0], (char * const*) cmd_null.items) < 0) { 919 | nob_log(NOB_ERROR, "Could not exec child process: %s", strerror(errno)); 920 | exit(1); 921 | } 922 | NOB_UNREACHABLE("nob_cmd_run_async_redirect"); 923 | } 924 | 925 | return cpid; 926 | #endif 927 | } 928 | 929 | Nob_Proc nob_cmd_run_async_and_reset(Nob_Cmd *cmd) 930 | { 931 | Nob_Proc proc = nob_cmd_run_async(*cmd); 932 | cmd->count = 0; 933 | return proc; 934 | } 935 | 936 | Nob_Proc nob_cmd_run_async_redirect_and_reset(Nob_Cmd *cmd, Nob_Cmd_Redirect redirect) 937 | { 938 | Nob_Proc proc = nob_cmd_run_async_redirect(*cmd, redirect); 939 | cmd->count = 0; 940 | if (redirect.fdin) { 941 | nob_fd_close(*redirect.fdin); 942 | *redirect.fdin = NOB_INVALID_FD; 943 | } 944 | if (redirect.fdout) { 945 | nob_fd_close(*redirect.fdout); 946 | *redirect.fdout = NOB_INVALID_FD; 947 | } 948 | if (redirect.fderr) { 949 | nob_fd_close(*redirect.fderr); 950 | *redirect.fderr = NOB_INVALID_FD; 951 | } 952 | return proc; 953 | } 954 | 955 | Nob_Fd nob_fd_open_for_read(const char *path) 956 | { 957 | #ifndef _WIN32 958 | Nob_Fd result = open(path, O_RDONLY); 959 | if (result < 0) { 960 | nob_log(NOB_ERROR, "Could not open file %s: %s", path, strerror(errno)); 961 | return NOB_INVALID_FD; 962 | } 963 | return result; 964 | #else 965 | // https://docs.microsoft.com/en-us/windows/win32/fileio/opening-a-file-for-reading-or-writing 966 | SECURITY_ATTRIBUTES saAttr = {0}; 967 | saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 968 | saAttr.bInheritHandle = TRUE; 969 | 970 | Nob_Fd result = CreateFile( 971 | path, 972 | GENERIC_READ, 973 | 0, 974 | &saAttr, 975 | OPEN_EXISTING, 976 | FILE_ATTRIBUTE_READONLY, 977 | NULL); 978 | 979 | if (result == INVALID_HANDLE_VALUE) { 980 | nob_log(NOB_ERROR, "Could not open file %s: %s", path, nob_win32_error_message(GetLastError())); 981 | return NOB_INVALID_FD; 982 | } 983 | 984 | return result; 985 | #endif // _WIN32 986 | } 987 | 988 | Nob_Fd nob_fd_open_for_write(const char *path) 989 | { 990 | #ifndef _WIN32 991 | Nob_Fd result = open(path, 992 | O_WRONLY | O_CREAT | O_TRUNC, 993 | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 994 | if (result < 0) { 995 | nob_log(NOB_ERROR, "could not open file %s: %s", path, strerror(errno)); 996 | return NOB_INVALID_FD; 997 | } 998 | return result; 999 | #else 1000 | SECURITY_ATTRIBUTES saAttr = {0}; 1001 | saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); 1002 | saAttr.bInheritHandle = TRUE; 1003 | 1004 | Nob_Fd result = CreateFile( 1005 | path, // name of the write 1006 | GENERIC_WRITE, // open for writing 1007 | 0, // do not share 1008 | &saAttr, // default security 1009 | CREATE_ALWAYS, // create always 1010 | FILE_ATTRIBUTE_NORMAL, // normal file 1011 | NULL // no attr. template 1012 | ); 1013 | 1014 | if (result == INVALID_HANDLE_VALUE) { 1015 | nob_log(NOB_ERROR, "Could not open file %s: %s", path, nob_win32_error_message(GetLastError())); 1016 | return NOB_INVALID_FD; 1017 | } 1018 | 1019 | return result; 1020 | #endif // _WIN32 1021 | } 1022 | 1023 | void nob_fd_close(Nob_Fd fd) 1024 | { 1025 | #ifdef _WIN32 1026 | CloseHandle(fd); 1027 | #else 1028 | close(fd); 1029 | #endif // _WIN32 1030 | } 1031 | 1032 | bool nob_procs_wait(Nob_Procs procs) 1033 | { 1034 | bool success = true; 1035 | for (size_t i = 0; i < procs.count; ++i) { 1036 | success = nob_proc_wait(procs.items[i]) && success; 1037 | } 1038 | return success; 1039 | } 1040 | 1041 | bool nob_procs_wait_and_reset(Nob_Procs *procs) 1042 | { 1043 | bool success = nob_procs_wait(*procs); 1044 | procs->count = 0; 1045 | return success; 1046 | } 1047 | 1048 | bool nob_proc_wait(Nob_Proc proc) 1049 | { 1050 | if (proc == NOB_INVALID_PROC) return false; 1051 | 1052 | #ifdef _WIN32 1053 | DWORD result = WaitForSingleObject( 1054 | proc, // HANDLE hHandle, 1055 | INFINITE // DWORD dwMilliseconds 1056 | ); 1057 | 1058 | if (result == WAIT_FAILED) { 1059 | nob_log(NOB_ERROR, "could not wait on child process: %s", nob_win32_error_message(GetLastError())); 1060 | return false; 1061 | } 1062 | 1063 | DWORD exit_status; 1064 | if (!GetExitCodeProcess(proc, &exit_status)) { 1065 | nob_log(NOB_ERROR, "could not get process exit code: %s", nob_win32_error_message(GetLastError())); 1066 | return false; 1067 | } 1068 | 1069 | if (exit_status != 0) { 1070 | nob_log(NOB_ERROR, "command exited with exit code %lu", exit_status); 1071 | return false; 1072 | } 1073 | 1074 | CloseHandle(proc); 1075 | 1076 | return true; 1077 | #else 1078 | for (;;) { 1079 | int wstatus = 0; 1080 | if (waitpid(proc, &wstatus, 0) < 0) { 1081 | nob_log(NOB_ERROR, "could not wait on command (pid %d): %s", proc, strerror(errno)); 1082 | return false; 1083 | } 1084 | 1085 | if (WIFEXITED(wstatus)) { 1086 | int exit_status = WEXITSTATUS(wstatus); 1087 | if (exit_status != 0) { 1088 | nob_log(NOB_ERROR, "command exited with exit code %d", exit_status); 1089 | return false; 1090 | } 1091 | 1092 | break; 1093 | } 1094 | 1095 | if (WIFSIGNALED(wstatus)) { 1096 | nob_log(NOB_ERROR, "command process was terminated by signal %d", WTERMSIG(wstatus)); 1097 | return false; 1098 | } 1099 | } 1100 | 1101 | return true; 1102 | #endif 1103 | } 1104 | 1105 | bool nob_cmd_run_sync_redirect(Nob_Cmd cmd, Nob_Cmd_Redirect redirect) 1106 | { 1107 | Nob_Proc p = nob_cmd_run_async_redirect(cmd, redirect); 1108 | if (p == NOB_INVALID_PROC) return false; 1109 | return nob_proc_wait(p); 1110 | } 1111 | 1112 | bool nob_cmd_run_sync(Nob_Cmd cmd) 1113 | { 1114 | Nob_Proc p = nob_cmd_run_async(cmd); 1115 | if (p == NOB_INVALID_PROC) return false; 1116 | return nob_proc_wait(p); 1117 | } 1118 | 1119 | bool nob_cmd_run_sync_and_reset(Nob_Cmd *cmd) 1120 | { 1121 | bool p = nob_cmd_run_sync(*cmd); 1122 | cmd->count = 0; 1123 | return p; 1124 | } 1125 | 1126 | bool nob_cmd_run_sync_redirect_and_reset(Nob_Cmd *cmd, Nob_Cmd_Redirect redirect) 1127 | { 1128 | bool p = nob_cmd_run_sync_redirect(*cmd, redirect); 1129 | cmd->count = 0; 1130 | if (redirect.fdin) { 1131 | nob_fd_close(*redirect.fdin); 1132 | *redirect.fdin = NOB_INVALID_FD; 1133 | } 1134 | if (redirect.fdout) { 1135 | nob_fd_close(*redirect.fdout); 1136 | *redirect.fdout = NOB_INVALID_FD; 1137 | } 1138 | if (redirect.fderr) { 1139 | nob_fd_close(*redirect.fderr); 1140 | *redirect.fderr = NOB_INVALID_FD; 1141 | } 1142 | return p; 1143 | } 1144 | 1145 | void nob_log(Nob_Log_Level level, const char *fmt, ...) 1146 | { 1147 | if (level < nob_minimal_log_level) return; 1148 | 1149 | switch (level) { 1150 | case NOB_INFO: 1151 | fprintf(stderr, "[INFO] "); 1152 | break; 1153 | case NOB_WARNING: 1154 | fprintf(stderr, "[WARNING] "); 1155 | break; 1156 | case NOB_ERROR: 1157 | fprintf(stderr, "[ERROR] "); 1158 | break; 1159 | case NOB_NO_LOGS: return; 1160 | default: 1161 | NOB_UNREACHABLE("nob_log"); 1162 | } 1163 | 1164 | va_list args; 1165 | va_start(args, fmt); 1166 | vfprintf(stderr, fmt, args); 1167 | va_end(args); 1168 | fprintf(stderr, "\n"); 1169 | } 1170 | 1171 | bool nob_read_entire_dir(const char *parent, Nob_File_Paths *children) 1172 | { 1173 | bool result = true; 1174 | DIR *dir = NULL; 1175 | 1176 | dir = opendir(parent); 1177 | if (dir == NULL) { 1178 | #ifdef _WIN32 1179 | nob_log(NOB_ERROR, "Could not open directory %s: %s", parent, nob_win32_error_message(GetLastError())); 1180 | #else 1181 | nob_log(NOB_ERROR, "Could not open directory %s: %s", parent, strerror(errno)); 1182 | #endif // _WIN32 1183 | nob_return_defer(false); 1184 | } 1185 | 1186 | errno = 0; 1187 | struct dirent *ent = readdir(dir); 1188 | while (ent != NULL) { 1189 | nob_da_append(children, nob_temp_strdup(ent->d_name)); 1190 | ent = readdir(dir); 1191 | } 1192 | 1193 | if (errno != 0) { 1194 | #ifdef _WIN32 1195 | nob_log(NOB_ERROR, "Could not read directory %s: %s", parent, nob_win32_error_message(GetLastError())); 1196 | #else 1197 | nob_log(NOB_ERROR, "Could not read directory %s: %s", parent, strerror(errno)); 1198 | #endif // _WIN32 1199 | nob_return_defer(false); 1200 | } 1201 | 1202 | defer: 1203 | if (dir) closedir(dir); 1204 | return result; 1205 | } 1206 | 1207 | bool nob_write_entire_file(const char *path, const void *data, size_t size) 1208 | { 1209 | bool result = true; 1210 | 1211 | FILE *f = fopen(path, "wb"); 1212 | if (f == NULL) { 1213 | nob_log(NOB_ERROR, "Could not open file %s for writing: %s\n", path, strerror(errno)); 1214 | nob_return_defer(false); 1215 | } 1216 | 1217 | // len 1218 | // v 1219 | // aaaaaaaaaa 1220 | // ^ 1221 | // data 1222 | 1223 | const char *buf = data; 1224 | while (size > 0) { 1225 | size_t n = fwrite(buf, 1, size, f); 1226 | if (ferror(f)) { 1227 | nob_log(NOB_ERROR, "Could not write into file %s: %s\n", path, strerror(errno)); 1228 | nob_return_defer(false); 1229 | } 1230 | size -= n; 1231 | buf += n; 1232 | } 1233 | 1234 | defer: 1235 | if (f) fclose(f); 1236 | return result; 1237 | } 1238 | 1239 | Nob_File_Type nob_get_file_type(const char *path) 1240 | { 1241 | #ifdef _WIN32 1242 | DWORD attr = GetFileAttributesA(path); 1243 | if (attr == INVALID_FILE_ATTRIBUTES) { 1244 | nob_log(NOB_ERROR, "Could not get file attributes of %s: %s", path, nob_win32_error_message(GetLastError())); 1245 | return -1; 1246 | } 1247 | 1248 | if (attr & FILE_ATTRIBUTE_DIRECTORY) return NOB_FILE_DIRECTORY; 1249 | // TODO: detect symlinks on Windows (whatever that means on Windows anyway) 1250 | return NOB_FILE_REGULAR; 1251 | #else // _WIN32 1252 | struct stat statbuf; 1253 | if (stat(path, &statbuf) < 0) { 1254 | nob_log(NOB_ERROR, "Could not get stat of %s: %s", path, strerror(errno)); 1255 | return -1; 1256 | } 1257 | 1258 | if (S_ISREG(statbuf.st_mode)) return NOB_FILE_REGULAR; 1259 | if (S_ISDIR(statbuf.st_mode)) return NOB_FILE_DIRECTORY; 1260 | if (S_ISLNK(statbuf.st_mode)) return NOB_FILE_SYMLINK; 1261 | return NOB_FILE_OTHER; 1262 | #endif // _WIN32 1263 | } 1264 | 1265 | bool nob_delete_file(const char *path) 1266 | { 1267 | nob_log(NOB_INFO, "deleting %s", path); 1268 | #ifdef _WIN32 1269 | if (!DeleteFileA(path)) { 1270 | nob_log(NOB_ERROR, "Could not delete file %s: %s", path, nob_win32_error_message(GetLastError())); 1271 | return false; 1272 | } 1273 | return true; 1274 | #else 1275 | if (remove(path) < 0) { 1276 | nob_log(NOB_ERROR, "Could not delete file %s: %s", path, strerror(errno)); 1277 | return false; 1278 | } 1279 | return true; 1280 | #endif // _WIN32 1281 | } 1282 | 1283 | bool nob_copy_directory_recursively(const char *src_path, const char *dst_path) 1284 | { 1285 | bool result = true; 1286 | Nob_File_Paths children = {0}; 1287 | Nob_String_Builder src_sb = {0}; 1288 | Nob_String_Builder dst_sb = {0}; 1289 | size_t temp_checkpoint = nob_temp_save(); 1290 | 1291 | Nob_File_Type type = nob_get_file_type(src_path); 1292 | if (type < 0) return false; 1293 | 1294 | switch (type) { 1295 | case NOB_FILE_DIRECTORY: { 1296 | if (!nob_mkdir_if_not_exists(dst_path)) nob_return_defer(false); 1297 | if (!nob_read_entire_dir(src_path, &children)) nob_return_defer(false); 1298 | 1299 | for (size_t i = 0; i < children.count; ++i) { 1300 | if (strcmp(children.items[i], ".") == 0) continue; 1301 | if (strcmp(children.items[i], "..") == 0) continue; 1302 | 1303 | src_sb.count = 0; 1304 | nob_sb_append_cstr(&src_sb, src_path); 1305 | nob_sb_append_cstr(&src_sb, "/"); 1306 | nob_sb_append_cstr(&src_sb, children.items[i]); 1307 | nob_sb_append_null(&src_sb); 1308 | 1309 | dst_sb.count = 0; 1310 | nob_sb_append_cstr(&dst_sb, dst_path); 1311 | nob_sb_append_cstr(&dst_sb, "/"); 1312 | nob_sb_append_cstr(&dst_sb, children.items[i]); 1313 | nob_sb_append_null(&dst_sb); 1314 | 1315 | if (!nob_copy_directory_recursively(src_sb.items, dst_sb.items)) { 1316 | nob_return_defer(false); 1317 | } 1318 | } 1319 | } break; 1320 | 1321 | case NOB_FILE_REGULAR: { 1322 | if (!nob_copy_file(src_path, dst_path)) { 1323 | nob_return_defer(false); 1324 | } 1325 | } break; 1326 | 1327 | case NOB_FILE_SYMLINK: { 1328 | nob_log(NOB_WARNING, "TODO: Copying symlinks is not supported yet"); 1329 | } break; 1330 | 1331 | case NOB_FILE_OTHER: { 1332 | nob_log(NOB_ERROR, "Unsupported type of file %s", src_path); 1333 | nob_return_defer(false); 1334 | } break; 1335 | 1336 | default: NOB_UNREACHABLE("nob_copy_directory_recursively"); 1337 | } 1338 | 1339 | defer: 1340 | nob_temp_rewind(temp_checkpoint); 1341 | nob_da_free(src_sb); 1342 | nob_da_free(dst_sb); 1343 | nob_da_free(children); 1344 | return result; 1345 | } 1346 | 1347 | char *nob_temp_strdup(const char *cstr) 1348 | { 1349 | size_t n = strlen(cstr); 1350 | char *result = nob_temp_alloc(n + 1); 1351 | NOB_ASSERT(result != NULL && "Increase NOB_TEMP_CAPACITY"); 1352 | memcpy(result, cstr, n); 1353 | result[n] = '\0'; 1354 | return result; 1355 | } 1356 | 1357 | void *nob_temp_alloc(size_t size) 1358 | { 1359 | if (nob_temp_size + size > NOB_TEMP_CAPACITY) return NULL; 1360 | void *result = &nob_temp[nob_temp_size]; 1361 | nob_temp_size += size; 1362 | return result; 1363 | } 1364 | 1365 | char *nob_temp_sprintf(const char *format, ...) 1366 | { 1367 | va_list args; 1368 | va_start(args, format); 1369 | int n = vsnprintf(NULL, 0, format, args); 1370 | va_end(args); 1371 | 1372 | NOB_ASSERT(n >= 0); 1373 | char *result = nob_temp_alloc(n + 1); 1374 | NOB_ASSERT(result != NULL && "Extend the size of the temporary allocator"); 1375 | // TODO: use proper arenas for the temporary allocator; 1376 | va_start(args, format); 1377 | vsnprintf(result, n + 1, format, args); 1378 | va_end(args); 1379 | 1380 | return result; 1381 | } 1382 | 1383 | void nob_temp_reset(void) 1384 | { 1385 | nob_temp_size = 0; 1386 | } 1387 | 1388 | size_t nob_temp_save(void) 1389 | { 1390 | return nob_temp_size; 1391 | } 1392 | 1393 | void nob_temp_rewind(size_t checkpoint) 1394 | { 1395 | nob_temp_size = checkpoint; 1396 | } 1397 | 1398 | const char *nob_temp_sv_to_cstr(Nob_String_View sv) 1399 | { 1400 | char *result = nob_temp_alloc(sv.count + 1); 1401 | NOB_ASSERT(result != NULL && "Extend the size of the temporary allocator"); 1402 | memcpy(result, sv.data, sv.count); 1403 | result[sv.count] = '\0'; 1404 | return result; 1405 | } 1406 | 1407 | int nob_needs_rebuild(const char *output_path, const char **input_paths, size_t input_paths_count) 1408 | { 1409 | #ifdef _WIN32 1410 | BOOL bSuccess; 1411 | 1412 | HANDLE output_path_fd = CreateFile(output_path, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); 1413 | if (output_path_fd == INVALID_HANDLE_VALUE) { 1414 | // NOTE: if output does not exist it 100% must be rebuilt 1415 | if (GetLastError() == ERROR_FILE_NOT_FOUND) return 1; 1416 | nob_log(NOB_ERROR, "Could not open file %s: %s", output_path, nob_win32_error_message(GetLastError())); 1417 | return -1; 1418 | } 1419 | FILETIME output_path_time; 1420 | bSuccess = GetFileTime(output_path_fd, NULL, NULL, &output_path_time); 1421 | CloseHandle(output_path_fd); 1422 | if (!bSuccess) { 1423 | nob_log(NOB_ERROR, "Could not get time of %s: %s", output_path, nob_win32_error_message(GetLastError())); 1424 | return -1; 1425 | } 1426 | 1427 | for (size_t i = 0; i < input_paths_count; ++i) { 1428 | const char *input_path = input_paths[i]; 1429 | HANDLE input_path_fd = CreateFile(input_path, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); 1430 | if (input_path_fd == INVALID_HANDLE_VALUE) { 1431 | // NOTE: non-existing input is an error cause it is needed for building in the first place 1432 | nob_log(NOB_ERROR, "Could not open file %s: %s", input_path, nob_win32_error_message(GetLastError())); 1433 | return -1; 1434 | } 1435 | FILETIME input_path_time; 1436 | bSuccess = GetFileTime(input_path_fd, NULL, NULL, &input_path_time); 1437 | CloseHandle(input_path_fd); 1438 | if (!bSuccess) { 1439 | nob_log(NOB_ERROR, "Could not get time of %s: %s", input_path, nob_win32_error_message(GetLastError())); 1440 | return -1; 1441 | } 1442 | 1443 | // NOTE: if even a single input_path is fresher than output_path that's 100% rebuild 1444 | if (CompareFileTime(&input_path_time, &output_path_time) == 1) return 1; 1445 | } 1446 | 1447 | return 0; 1448 | #else 1449 | struct stat statbuf = {0}; 1450 | 1451 | if (stat(output_path, &statbuf) < 0) { 1452 | // NOTE: if output does not exist it 100% must be rebuilt 1453 | if (errno == ENOENT) return 1; 1454 | nob_log(NOB_ERROR, "could not stat %s: %s", output_path, strerror(errno)); 1455 | return -1; 1456 | } 1457 | int output_path_time = statbuf.st_mtime; 1458 | 1459 | for (size_t i = 0; i < input_paths_count; ++i) { 1460 | const char *input_path = input_paths[i]; 1461 | if (stat(input_path, &statbuf) < 0) { 1462 | // NOTE: non-existing input is an error cause it is needed for building in the first place 1463 | nob_log(NOB_ERROR, "could not stat %s: %s", input_path, strerror(errno)); 1464 | return -1; 1465 | } 1466 | int input_path_time = statbuf.st_mtime; 1467 | // NOTE: if even a single input_path is fresher than output_path that's 100% rebuild 1468 | if (input_path_time > output_path_time) return 1; 1469 | } 1470 | 1471 | return 0; 1472 | #endif 1473 | } 1474 | 1475 | int nob_needs_rebuild1(const char *output_path, const char *input_path) 1476 | { 1477 | return nob_needs_rebuild(output_path, &input_path, 1); 1478 | } 1479 | 1480 | const char *nob_path_name(const char *path) 1481 | { 1482 | #ifdef _WIN32 1483 | const char *p1 = strrchr(path, '/'); 1484 | const char *p2 = strrchr(path, '\\'); 1485 | const char *p = (p1 > p2)? p1 : p2; // NULL is ignored if the other search is successful 1486 | return p ? p + 1 : path; 1487 | #else 1488 | const char *p = strrchr(path, '/'); 1489 | return p ? p + 1 : path; 1490 | #endif // _WIN32 1491 | } 1492 | 1493 | bool nob_rename(const char *old_path, const char *new_path) 1494 | { 1495 | nob_log(NOB_INFO, "renaming %s -> %s", old_path, new_path); 1496 | #ifdef _WIN32 1497 | if (!MoveFileEx(old_path, new_path, MOVEFILE_REPLACE_EXISTING)) { 1498 | nob_log(NOB_ERROR, "could not rename %s to %s: %s", old_path, new_path, nob_win32_error_message(GetLastError())); 1499 | return false; 1500 | } 1501 | #else 1502 | if (rename(old_path, new_path) < 0) { 1503 | nob_log(NOB_ERROR, "could not rename %s to %s: %s", old_path, new_path, strerror(errno)); 1504 | return false; 1505 | } 1506 | #endif // _WIN32 1507 | return true; 1508 | } 1509 | 1510 | bool nob_read_entire_file(const char *path, Nob_String_Builder *sb) 1511 | { 1512 | bool result = true; 1513 | 1514 | FILE *f = fopen(path, "rb"); 1515 | if (f == NULL) nob_return_defer(false); 1516 | if (fseek(f, 0, SEEK_END) < 0) nob_return_defer(false); 1517 | #ifndef _WIN32 1518 | long m = ftell(f); 1519 | #else 1520 | long long m = _ftelli64(f); 1521 | #endif 1522 | if (m < 0) nob_return_defer(false); 1523 | if (fseek(f, 0, SEEK_SET) < 0) nob_return_defer(false); 1524 | 1525 | size_t new_count = sb->count + m; 1526 | if (new_count > sb->capacity) { 1527 | sb->items = NOB_REALLOC(sb->items, new_count); 1528 | NOB_ASSERT(sb->items != NULL && "Buy more RAM lool!!"); 1529 | sb->capacity = new_count; 1530 | } 1531 | 1532 | fread(sb->items + sb->count, m, 1, f); 1533 | if (ferror(f)) { 1534 | // TODO: Afaik, ferror does not set errno. So the error reporting in defer is not correct in this case. 1535 | nob_return_defer(false); 1536 | } 1537 | sb->count = new_count; 1538 | 1539 | defer: 1540 | if (!result) nob_log(NOB_ERROR, "Could not read file %s: %s", path, strerror(errno)); 1541 | if (f) fclose(f); 1542 | return result; 1543 | } 1544 | 1545 | int nob_sb_appendf(Nob_String_Builder *sb, const char *fmt, ...) 1546 | { 1547 | va_list args; 1548 | 1549 | va_start(args, fmt); 1550 | int n = vsnprintf(NULL, 0, fmt, args); 1551 | va_end(args); 1552 | 1553 | // NOTE: the new_capacity needs to be +1 because of the null terminator. 1554 | // However, further below we increase sb->count by n, not n + 1. 1555 | // This is because we don't want the sb to include the null terminator. The user can always sb_append_null() if they want it 1556 | nob_da_reserve(sb, sb->count + n + 1); 1557 | char *dest = sb->items + sb->count; 1558 | va_start(args, fmt); 1559 | vsprintf(dest, fmt, args); 1560 | va_end(args); 1561 | 1562 | sb->count += n; 1563 | 1564 | return n; 1565 | } 1566 | 1567 | Nob_String_View nob_sv_chop_by_delim(Nob_String_View *sv, char delim) 1568 | { 1569 | size_t i = 0; 1570 | while (i < sv->count && sv->data[i] != delim) { 1571 | i += 1; 1572 | } 1573 | 1574 | Nob_String_View result = nob_sv_from_parts(sv->data, i); 1575 | 1576 | if (i < sv->count) { 1577 | sv->count -= i + 1; 1578 | sv->data += i + 1; 1579 | } else { 1580 | sv->count -= i; 1581 | sv->data += i; 1582 | } 1583 | 1584 | return result; 1585 | } 1586 | 1587 | Nob_String_View nob_sv_chop_left(Nob_String_View *sv, size_t n) 1588 | { 1589 | if (n > sv->count) { 1590 | n = sv->count; 1591 | } 1592 | 1593 | Nob_String_View result = nob_sv_from_parts(sv->data, n); 1594 | 1595 | sv->data += n; 1596 | sv->count -= n; 1597 | 1598 | return result; 1599 | } 1600 | 1601 | Nob_String_View nob_sv_from_parts(const char *data, size_t count) 1602 | { 1603 | Nob_String_View sv; 1604 | sv.count = count; 1605 | sv.data = data; 1606 | return sv; 1607 | } 1608 | 1609 | Nob_String_View nob_sv_trim_left(Nob_String_View sv) 1610 | { 1611 | size_t i = 0; 1612 | while (i < sv.count && isspace(sv.data[i])) { 1613 | i += 1; 1614 | } 1615 | 1616 | return nob_sv_from_parts(sv.data + i, sv.count - i); 1617 | } 1618 | 1619 | Nob_String_View nob_sv_trim_right(Nob_String_View sv) 1620 | { 1621 | size_t i = 0; 1622 | while (i < sv.count && isspace(sv.data[sv.count - 1 - i])) { 1623 | i += 1; 1624 | } 1625 | 1626 | return nob_sv_from_parts(sv.data, sv.count - i); 1627 | } 1628 | 1629 | Nob_String_View nob_sv_trim(Nob_String_View sv) 1630 | { 1631 | return nob_sv_trim_right(nob_sv_trim_left(sv)); 1632 | } 1633 | 1634 | Nob_String_View nob_sv_from_cstr(const char *cstr) 1635 | { 1636 | return nob_sv_from_parts(cstr, strlen(cstr)); 1637 | } 1638 | 1639 | bool nob_sv_eq(Nob_String_View a, Nob_String_View b) 1640 | { 1641 | if (a.count != b.count) { 1642 | return false; 1643 | } else { 1644 | return memcmp(a.data, b.data, a.count) == 0; 1645 | } 1646 | } 1647 | 1648 | bool nob_sv_end_with(Nob_String_View sv, const char *cstr) 1649 | { 1650 | size_t cstr_count = strlen(cstr); 1651 | if (sv.count >= cstr_count) { 1652 | size_t ending_start = sv.count - cstr_count; 1653 | Nob_String_View sv_ending = nob_sv_from_parts(sv.data + ending_start, cstr_count); 1654 | return nob_sv_eq(sv_ending, nob_sv_from_cstr(cstr)); 1655 | } 1656 | return false; 1657 | } 1658 | 1659 | 1660 | bool nob_sv_starts_with(Nob_String_View sv, Nob_String_View expected_prefix) 1661 | { 1662 | if (expected_prefix.count <= sv.count) { 1663 | Nob_String_View actual_prefix = nob_sv_from_parts(sv.data, expected_prefix.count); 1664 | return nob_sv_eq(expected_prefix, actual_prefix); 1665 | } 1666 | 1667 | return false; 1668 | } 1669 | 1670 | // RETURNS: 1671 | // 0 - file does not exists 1672 | // 1 - file exists 1673 | // -1 - error while checking if file exists. The error is logged 1674 | int nob_file_exists(const char *file_path) 1675 | { 1676 | #if _WIN32 1677 | // TODO: distinguish between "does not exists" and other errors 1678 | DWORD dwAttrib = GetFileAttributesA(file_path); 1679 | return dwAttrib != INVALID_FILE_ATTRIBUTES; 1680 | #else 1681 | struct stat statbuf; 1682 | if (stat(file_path, &statbuf) < 0) { 1683 | if (errno == ENOENT) return 0; 1684 | nob_log(NOB_ERROR, "Could not check if file %s exists: %s", file_path, strerror(errno)); 1685 | return -1; 1686 | } 1687 | return 1; 1688 | #endif 1689 | } 1690 | 1691 | const char *nob_get_current_dir_temp(void) 1692 | { 1693 | #ifdef _WIN32 1694 | DWORD nBufferLength = GetCurrentDirectory(0, NULL); 1695 | if (nBufferLength == 0) { 1696 | nob_log(NOB_ERROR, "could not get current directory: %s", nob_win32_error_message(GetLastError())); 1697 | return NULL; 1698 | } 1699 | 1700 | char *buffer = (char*) nob_temp_alloc(nBufferLength); 1701 | if (GetCurrentDirectory(nBufferLength, buffer) == 0) { 1702 | nob_log(NOB_ERROR, "could not get current directory: %s", nob_win32_error_message(GetLastError())); 1703 | return NULL; 1704 | } 1705 | 1706 | return buffer; 1707 | #else 1708 | char *buffer = (char*) nob_temp_alloc(PATH_MAX); 1709 | if (getcwd(buffer, PATH_MAX) == NULL) { 1710 | nob_log(NOB_ERROR, "could not get current directory: %s", strerror(errno)); 1711 | return NULL; 1712 | } 1713 | 1714 | return buffer; 1715 | #endif // _WIN32 1716 | } 1717 | 1718 | bool nob_set_current_dir(const char *path) 1719 | { 1720 | #ifdef _WIN32 1721 | if (!SetCurrentDirectory(path)) { 1722 | nob_log(NOB_ERROR, "could not set current directory to %s: %s", path, nob_win32_error_message(GetLastError())); 1723 | return false; 1724 | } 1725 | return true; 1726 | #else 1727 | if (chdir(path) < 0) { 1728 | nob_log(NOB_ERROR, "could not set current directory to %s: %s", path, strerror(errno)); 1729 | return false; 1730 | } 1731 | return true; 1732 | #endif // _WIN32 1733 | } 1734 | 1735 | // minirent.h SOURCE BEGIN //////////////////////////////////////// 1736 | #ifdef _WIN32 1737 | struct DIR 1738 | { 1739 | HANDLE hFind; 1740 | WIN32_FIND_DATA data; 1741 | struct dirent *dirent; 1742 | }; 1743 | 1744 | DIR *opendir(const char *dirpath) 1745 | { 1746 | NOB_ASSERT(dirpath); 1747 | 1748 | char buffer[MAX_PATH]; 1749 | snprintf(buffer, MAX_PATH, "%s\\*", dirpath); 1750 | 1751 | DIR *dir = (DIR*)NOB_REALLOC(NULL, sizeof(DIR)); 1752 | memset(dir, 0, sizeof(DIR)); 1753 | 1754 | dir->hFind = FindFirstFile(buffer, &dir->data); 1755 | if (dir->hFind == INVALID_HANDLE_VALUE) { 1756 | // TODO: opendir should set errno accordingly on FindFirstFile fail 1757 | // https://docs.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror 1758 | errno = ENOSYS; 1759 | goto fail; 1760 | } 1761 | 1762 | return dir; 1763 | 1764 | fail: 1765 | if (dir) { 1766 | NOB_FREE(dir); 1767 | } 1768 | 1769 | return NULL; 1770 | } 1771 | 1772 | struct dirent *readdir(DIR *dirp) 1773 | { 1774 | NOB_ASSERT(dirp); 1775 | 1776 | if (dirp->dirent == NULL) { 1777 | dirp->dirent = (struct dirent*)NOB_REALLOC(NULL, sizeof(struct dirent)); 1778 | memset(dirp->dirent, 0, sizeof(struct dirent)); 1779 | } else { 1780 | if(!FindNextFile(dirp->hFind, &dirp->data)) { 1781 | if (GetLastError() != ERROR_NO_MORE_FILES) { 1782 | // TODO: readdir should set errno accordingly on FindNextFile fail 1783 | // https://docs.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror 1784 | errno = ENOSYS; 1785 | } 1786 | 1787 | return NULL; 1788 | } 1789 | } 1790 | 1791 | memset(dirp->dirent->d_name, 0, sizeof(dirp->dirent->d_name)); 1792 | 1793 | strncpy( 1794 | dirp->dirent->d_name, 1795 | dirp->data.cFileName, 1796 | sizeof(dirp->dirent->d_name) - 1); 1797 | 1798 | return dirp->dirent; 1799 | } 1800 | 1801 | int closedir(DIR *dirp) 1802 | { 1803 | NOB_ASSERT(dirp); 1804 | 1805 | if(!FindClose(dirp->hFind)) { 1806 | // TODO: closedir should set errno accordingly on FindClose fail 1807 | // https://docs.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror 1808 | errno = ENOSYS; 1809 | return -1; 1810 | } 1811 | 1812 | if (dirp->dirent) { 1813 | NOB_FREE(dirp->dirent); 1814 | } 1815 | NOB_FREE(dirp); 1816 | 1817 | return 0; 1818 | } 1819 | #endif // _WIN32 1820 | // minirent.h SOURCE END //////////////////////////////////////// 1821 | 1822 | #endif // NOB_IMPLEMENTATION 1823 | 1824 | #ifndef NOB_STRIP_PREFIX_GUARD_ 1825 | #define NOB_STRIP_PREFIX_GUARD_ 1826 | // NOTE: The name stripping should be part of the header so it's not accidentally included 1827 | // several times. At the same time, it should be at the end of the file so to not create any 1828 | // potential conflicts in the NOB_IMPLEMENTATION. The header obviously cannot be at the end 1829 | // of the file because NOB_IMPLEMENTATION needs the forward declarations from there. So the 1830 | // solution is to split the header into two parts where the name stripping part is at the 1831 | // end of the file after the NOB_IMPLEMENTATION. 1832 | #ifdef NOB_STRIP_PREFIX 1833 | #define TODO NOB_TODO 1834 | #define UNREACHABLE NOB_UNREACHABLE 1835 | #define UNUSED NOB_UNUSED 1836 | #define ARRAY_LEN NOB_ARRAY_LEN 1837 | #define ARRAY_GET NOB_ARRAY_GET 1838 | #define INFO NOB_INFO 1839 | #define WARNING NOB_WARNING 1840 | #define ERROR NOB_ERROR 1841 | #define NO_LOGS NOB_NO_LOGS 1842 | #define Log_Level Nob_Log_Level 1843 | #define minimal_log_level nob_minimal_log_level 1844 | // NOTE: Name log is already defined in math.h and historically always was the natural logarithmic function. 1845 | // So there should be no reason to strip the `nob_` prefix in this specific case. 1846 | // #define log nob_log 1847 | #define shift nob_shift 1848 | #define shift_args nob_shift_args 1849 | #define File_Paths Nob_File_Paths 1850 | #define FILE_REGULAR NOB_FILE_REGULAR 1851 | #define FILE_DIRECTORY NOB_FILE_DIRECTORY 1852 | #define FILE_SYMLINK NOB_FILE_SYMLINK 1853 | #define FILE_OTHER NOB_FILE_OTHER 1854 | #define File_Type Nob_File_Type 1855 | #define mkdir_if_not_exists nob_mkdir_if_not_exists 1856 | #define copy_file nob_copy_file 1857 | #define copy_directory_recursively nob_copy_directory_recursively 1858 | #define read_entire_dir nob_read_entire_dir 1859 | #define write_entire_file nob_write_entire_file 1860 | #define get_file_type nob_get_file_type 1861 | #define delete_file nob_delete_file 1862 | #define return_defer nob_return_defer 1863 | #define da_append nob_da_append 1864 | #define da_free nob_da_free 1865 | #define da_append_many nob_da_append_many 1866 | #define da_resize nob_da_resize 1867 | #define da_reserve nob_da_reserve 1868 | #define da_last nob_da_last 1869 | #define da_remove_unordered nob_da_remove_unordered 1870 | #define da_foreach nob_da_foreach 1871 | #define String_Builder Nob_String_Builder 1872 | #define read_entire_file nob_read_entire_file 1873 | #define sb_appendf nob_sb_appendf 1874 | #define sb_append_buf nob_sb_append_buf 1875 | #define sb_append_cstr nob_sb_append_cstr 1876 | #define sb_append_null nob_sb_append_null 1877 | #define sb_free nob_sb_free 1878 | #define Proc Nob_Proc 1879 | #define INVALID_PROC NOB_INVALID_PROC 1880 | #define Fd Nob_Fd 1881 | #define INVALID_FD NOB_INVALID_FD 1882 | #define fd_open_for_read nob_fd_open_for_read 1883 | #define fd_open_for_write nob_fd_open_for_write 1884 | #define fd_close nob_fd_close 1885 | #define Procs Nob_Procs 1886 | #define procs_wait nob_procs_wait 1887 | #define procs_wait_and_reset nob_procs_wait_and_reset 1888 | #define proc_wait nob_proc_wait 1889 | #define Cmd Nob_Cmd 1890 | #define Cmd_Redirect Nob_Cmd_Redirect 1891 | #define cmd_render nob_cmd_render 1892 | #define cmd_append nob_cmd_append 1893 | #define cmd_extend nob_cmd_extend 1894 | #define cmd_free nob_cmd_free 1895 | #define cmd_run_async nob_cmd_run_async 1896 | #define cmd_run_async_and_reset nob_cmd_run_async_and_reset 1897 | #define cmd_run_async_redirect nob_cmd_run_async_redirect 1898 | #define cmd_run_async_redirect_and_reset nob_cmd_run_async_redirect_and_reset 1899 | #define cmd_run_sync nob_cmd_run_sync 1900 | #define cmd_run_sync_and_reset nob_cmd_run_sync_and_reset 1901 | #define cmd_run_sync_redirect nob_cmd_run_sync_redirect 1902 | #define cmd_run_sync_redirect_and_reset nob_cmd_run_sync_redirect_and_reset 1903 | #define temp_strdup nob_temp_strdup 1904 | #define temp_alloc nob_temp_alloc 1905 | #define temp_sprintf nob_temp_sprintf 1906 | #define temp_reset nob_temp_reset 1907 | #define temp_save nob_temp_save 1908 | #define temp_rewind nob_temp_rewind 1909 | #define path_name nob_path_name 1910 | #define rename nob_rename 1911 | #define needs_rebuild nob_needs_rebuild 1912 | #define needs_rebuild1 nob_needs_rebuild1 1913 | #define file_exists nob_file_exists 1914 | #define get_current_dir_temp nob_get_current_dir_temp 1915 | #define set_current_dir nob_set_current_dir 1916 | #define String_View Nob_String_View 1917 | #define temp_sv_to_cstr nob_temp_sv_to_cstr 1918 | #define sv_chop_by_delim nob_sv_chop_by_delim 1919 | #define sv_chop_left nob_sv_chop_left 1920 | #define sv_trim nob_sv_trim 1921 | #define sv_trim_left nob_sv_trim_left 1922 | #define sv_trim_right nob_sv_trim_right 1923 | #define sv_eq nob_sv_eq 1924 | #define sv_starts_with nob_sv_starts_with 1925 | #define sv_end_with nob_sv_end_with 1926 | #define sv_from_cstr nob_sv_from_cstr 1927 | #define sv_from_parts nob_sv_from_parts 1928 | #define sb_to_sv nob_sb_to_sv 1929 | #define win32_error_message nob_win32_error_message 1930 | #endif // NOB_STRIP_PREFIX 1931 | #endif // NOB_STRIP_PREFIX_GUARD_ 1932 | 1933 | /* 1934 | Revision history: 1935 | 1936 | 1.18.0 (2025-03-24) Add nob_da_foreach() (By @rexim) 1937 | Allow file sizes greater than 2GB to be read on windows (By @satchelfrost and @KillerxDBr) 1938 | Fix nob_fd_open_for_write behaviour on windows so it truncates the opened files (By @twixuss) 1939 | 1.17.0 (2025-03-16) Factor out nob_da_reserve() (By @rexim) 1940 | Add nob_sb_appendf() (By @angelcaru) 1941 | 1.16.1 (2025-03-16) Make nob_da_resize() exponentially grow capacity similar to no_da_append_many() 1942 | 1.16.0 (2025-03-16) Introduce NOB_PRINTF_FORMAT 1943 | 1.15.1 (2025-03-16) Make nob.h compilable in gcc/clang with -std=c99 on POSIX. This includes: 1944 | not using strsignal() 1945 | using S_IS* stat macros instead of S_IF* flags 1946 | 1.15.0 (2025-03-03) Add nob_sv_chop_left() 1947 | 1.14.1 (2025-03-02) Add NOB_EXPERIMENTAL_DELETE_OLD flag that enables deletion of nob.old in Go Rebuild Urself™ Technology 1948 | 1.14.0 (2025-02-17) Add nob_da_last() 1949 | Add nob_da_remove_unordered() 1950 | 1.13.1 (2025-02-17) Fix segfault in nob_delete_file() (By @SileNce5k) 1951 | 1.13.0 (2025-02-11) Add nob_da_resize() (By @satchelfrost) 1952 | 1.12.0 (2025-02-04) Add nob_delete_file() 1953 | Add nob_sv_start_with() 1954 | 1.11.0 (2025-02-04) Add NOB_GO_REBUILD_URSELF_PLUS() (By @rexim) 1955 | 1.10.0 (2025-02-04) Make NOB_ASSERT, NOB_REALLOC, and NOB_FREE redefinable (By @OleksiiBulba) 1956 | 1.9.1 (2025-02-04) Fix signature of nob_get_current_dir_temp() (By @julianstoerig) 1957 | 1.9.0 (2024-11-06) Add Nob_Cmd_Redirect mechanism (By @rexim) 1958 | Add nob_path_name() (By @0dminnimda) 1959 | 1.8.0 (2024-11-03) Add nob_cmd_extend() (By @0dminnimda) 1960 | 1.7.0 (2024-11-03) Add nob_win32_error_message and NOB_WIN32_ERR_MSG_SIZE (By @KillerxDBr) 1961 | 1.6.0 (2024-10-27) Add nob_cmd_run_sync_and_reset() 1962 | Add nob_sb_to_sv() 1963 | Add nob_procs_wait_and_reset() 1964 | 1.5.1 (2024-10-25) Include limits.h for Linux musl libc (by @pgalkin) 1965 | 1.5.0 (2024-10-23) Add nob_get_current_dir_temp() 1966 | Add nob_set_current_dir() 1967 | 1.4.0 (2024-10-21) Fix UX issues with NOB_GO_REBUILD_URSELF on Windows when you call nob without the .exe extension (By @pgalkin) 1968 | Add nob_sv_end_with (By @pgalkin) 1969 | 1.3.2 (2024-10-21) Fix unreachable error in nob_log on passing NOB_NO_LOGS 1970 | 1.3.1 (2024-10-21) Fix redeclaration error for minimal_log_level (By @KillerxDBr) 1971 | 1.3.0 (2024-10-17) Add NOB_UNREACHABLE 1972 | 1.2.2 (2024-10-16) Fix compilation of nob_cmd_run_sync_and_reset on Windows (By @KillerxDBr) 1973 | 1.2.1 (2024-10-16) Add a separate include guard for NOB_STRIP_PREFIX. 1974 | 1.2.0 (2024-10-15) Make NOB_DA_INIT_CAP redefinable 1975 | Add NOB_STRIP_PREFIX which strips off nob_* prefix from all the user facing names 1976 | Add NOB_UNUSED macro 1977 | Add NOB_TODO macro 1978 | Add nob_sv_trim_left and nob_sv_trim_right declarations to the header part 1979 | 1.1.1 (2024-10-15) Remove forward declaration for is_path1_modified_after_path2 1980 | 1.1.0 (2024-10-15) nob_minimal_log_level 1981 | nob_cmd_run_sync_and_reset 1982 | 1.0.0 (2024-10-15) first release based on https://github.com/tsoding/musializer/blob/4ac7cce9874bc19e02d8c160c8c6229de8919401/nob.h 1983 | */ 1984 | 1985 | /* 1986 | Version Conventions: 1987 | 1988 | We are following https://semver.org/ so the version has a format MAJOR.MINOR.PATCH: 1989 | - Modifying comments does not update the version. 1990 | - PATCH is incremented in case of a bug fix or refactoring without touching the API. 1991 | - MINOR is incremented when new functions and/or types are added in a way that does 1992 | not break any existing user code. We want to do this in the majority of the situation. 1993 | If we want to delete a certain function or type in favor of another one we should 1994 | just add the new function/type and deprecate the old one in a backward compatible way 1995 | and let them co-exist for a while. 1996 | - MAJOR update should be just a periodic cleanup of the deprecated functions and types 1997 | without really modifying any existing functionality. 1998 | 1999 | Naming Conventions: 2000 | 2001 | - All the user facing names should be prefixed with `nob_` or `NOB_` depending on the case. 2002 | - The prefixes of non-redefinable names should be strippable with NOB_STRIP_PREFIX (unless 2003 | explicitly stated otherwise like in case of nob_log). 2004 | - Internal functions should be prefixed with `nob__` (double underscore). 2005 | */ 2006 | 2007 | /* 2008 | ------------------------------------------------------------------------------ 2009 | This software is available under 2 licenses -- choose whichever you prefer. 2010 | ------------------------------------------------------------------------------ 2011 | ALTERNATIVE A - MIT License 2012 | Copyright (c) 2024 Alexey Kutepov 2013 | Permission is hereby granted, free of charge, to any person obtaining a copy of 2014 | this software and associated documentation files (the "Software"), to deal in 2015 | the Software without restriction, including without limitation the rights to 2016 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 2017 | of the Software, and to permit persons to whom the Software is furnished to do 2018 | so, subject to the following conditions: 2019 | The above copyright notice and this permission notice shall be included in all 2020 | copies or substantial portions of the Software. 2021 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2022 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2023 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 2024 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2025 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2026 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2027 | SOFTWARE. 2028 | ------------------------------------------------------------------------------ 2029 | ALTERNATIVE B - Public Domain (www.unlicense.org) 2030 | This is free and unencumbered software released into the public domain. 2031 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 2032 | software, either in source code form or as a compiled binary, for any purpose, 2033 | commercial or non-commercial, and by any means. 2034 | In jurisdictions that recognize copyright laws, the author or authors of this 2035 | software dedicate any and all copyright interest in the software to the public 2036 | domain. We make this dedication for the benefit of the public at large and to 2037 | the detriment of our heirs and successors. We intend this dedication to be an 2038 | overt act of relinquishment in perpetuity of all present and future rights to 2039 | this software under copyright law. 2040 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2041 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2042 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 2043 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2044 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 2045 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2046 | ------------------------------------------------------------------------------ 2047 | */ 2048 | -------------------------------------------------------------------------------- /thirdparty/stb_ds.h: -------------------------------------------------------------------------------- 1 | /* stb_ds.h - v0.67 - public domain data structures - Sean Barrett 2019 2 | 3 | This is a single-header-file library that provides easy-to-use 4 | dynamic arrays and hash tables for C (also works in C++). 5 | 6 | For a gentle introduction: 7 | http://nothings.org/stb_ds 8 | 9 | To use this library, do this in *one* C or C++ file: 10 | #define STB_DS_IMPLEMENTATION 11 | #include "stb_ds.h" 12 | 13 | TABLE OF CONTENTS 14 | 15 | Table of Contents 16 | Compile-time options 17 | License 18 | Documentation 19 | Notes 20 | Notes - Dynamic arrays 21 | Notes - Hash maps 22 | Credits 23 | 24 | COMPILE-TIME OPTIONS 25 | 26 | #define STBDS_NO_SHORT_NAMES 27 | 28 | This flag needs to be set globally. 29 | 30 | By default stb_ds exposes shorter function names that are not qualified 31 | with the "stbds_" prefix. If these names conflict with the names in your 32 | code, define this flag. 33 | 34 | #define STBDS_SIPHASH_2_4 35 | 36 | This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION. 37 | 38 | By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for 39 | 4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force 40 | stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes 41 | hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on 42 | 64-byte keys, and 10% slower on 256-byte keys on my test computer. 43 | 44 | #define STBDS_REALLOC(context,ptr,size) better_realloc 45 | #define STBDS_FREE(context,ptr) better_free 46 | 47 | These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION. 48 | 49 | By default stb_ds uses stdlib realloc() and free() for memory management. You can 50 | substitute your own functions instead by defining these symbols. You must either 51 | define both, or neither. Note that at the moment, 'context' will always be NULL. 52 | @TODO add an array/hash initialization function that takes a memory context pointer. 53 | 54 | #define STBDS_UNIT_TESTS 55 | 56 | Defines a function stbds_unit_tests() that checks the functioning of the data structures. 57 | 58 | Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x' 59 | (or equivalentally '-std=c++11') when using anonymous structures as seen on the web 60 | page or in STBDS_UNIT_TESTS. 61 | 62 | LICENSE 63 | 64 | Placed in the public domain and also MIT licensed. 65 | See end of file for detailed license information. 66 | 67 | DOCUMENTATION 68 | 69 | Dynamic Arrays 70 | 71 | Non-function interface: 72 | 73 | Declare an empty dynamic array of type T 74 | T* foo = NULL; 75 | 76 | Access the i'th item of a dynamic array 'foo' of type T, T* foo: 77 | foo[i] 78 | 79 | Functions (actually macros) 80 | 81 | arrfree: 82 | void arrfree(T*); 83 | Frees the array. 84 | 85 | arrlen: 86 | ptrdiff_t arrlen(T*); 87 | Returns the number of elements in the array. 88 | 89 | arrlenu: 90 | size_t arrlenu(T*); 91 | Returns the number of elements in the array as an unsigned type. 92 | 93 | arrpop: 94 | T arrpop(T* a) 95 | Removes the final element of the array and returns it. 96 | 97 | arrput: 98 | T arrput(T* a, T b); 99 | Appends the item b to the end of array a. Returns b. 100 | 101 | arrins: 102 | T arrins(T* a, int p, T b); 103 | Inserts the item b into the middle of array a, into a[p], 104 | moving the rest of the array over. Returns b. 105 | 106 | arrinsn: 107 | void arrinsn(T* a, int p, int n); 108 | Inserts n uninitialized items into array a starting at a[p], 109 | moving the rest of the array over. 110 | 111 | arraddnptr: 112 | T* arraddnptr(T* a, int n) 113 | Appends n uninitialized items onto array at the end. 114 | Returns a pointer to the first uninitialized item added. 115 | 116 | arraddnindex: 117 | size_t arraddnindex(T* a, int n) 118 | Appends n uninitialized items onto array at the end. 119 | Returns the index of the first uninitialized item added. 120 | 121 | arrdel: 122 | void arrdel(T* a, int p); 123 | Deletes the element at a[p], moving the rest of the array over. 124 | 125 | arrdeln: 126 | void arrdeln(T* a, int p, int n); 127 | Deletes n elements starting at a[p], moving the rest of the array over. 128 | 129 | arrdelswap: 130 | void arrdelswap(T* a, int p); 131 | Deletes the element at a[p], replacing it with the element from 132 | the end of the array. O(1) performance. 133 | 134 | arrsetlen: 135 | void arrsetlen(T* a, int n); 136 | Changes the length of the array to n. Allocates uninitialized 137 | slots at the end if necessary. 138 | 139 | arrsetcap: 140 | size_t arrsetcap(T* a, int n); 141 | Sets the length of allocated storage to at least n. It will not 142 | change the length of the array. 143 | 144 | arrcap: 145 | size_t arrcap(T* a); 146 | Returns the number of total elements the array can contain without 147 | needing to be reallocated. 148 | 149 | Hash maps & String hash maps 150 | 151 | Given T is a structure type: struct { TK key; TV value; }. Note that some 152 | functions do not require TV value and can have other fields. For string 153 | hash maps, TK must be 'char *'. 154 | 155 | Special interface: 156 | 157 | stbds_rand_seed: 158 | void stbds_rand_seed(size_t seed); 159 | For security against adversarially chosen data, you should seed the 160 | library with a strong random number. Or at least seed it with time(). 161 | 162 | stbds_hash_string: 163 | size_t stbds_hash_string(char *str, size_t seed); 164 | Returns a hash value for a string. 165 | 166 | stbds_hash_bytes: 167 | size_t stbds_hash_bytes(void *p, size_t len, size_t seed); 168 | These functions hash an arbitrary number of bytes. The function 169 | uses a custom hash for 4- and 8-byte data, and a weakened version 170 | of SipHash for everything else. On 64-bit platforms you can get 171 | specification-compliant SipHash-2-4 on all data by defining 172 | STBDS_SIPHASH_2_4, at a significant cost in speed. 173 | 174 | Non-function interface: 175 | 176 | Declare an empty hash map of type T 177 | T* foo = NULL; 178 | 179 | Access the i'th entry in a hash table T* foo: 180 | foo[i] 181 | 182 | Function interface (actually macros): 183 | 184 | hmfree 185 | shfree 186 | void hmfree(T*); 187 | void shfree(T*); 188 | Frees the hashmap and sets the pointer to NULL. 189 | 190 | hmlen 191 | shlen 192 | ptrdiff_t hmlen(T*) 193 | ptrdiff_t shlen(T*) 194 | Returns the number of elements in the hashmap. 195 | 196 | hmlenu 197 | shlenu 198 | size_t hmlenu(T*) 199 | size_t shlenu(T*) 200 | Returns the number of elements in the hashmap. 201 | 202 | hmgeti 203 | shgeti 204 | hmgeti_ts 205 | ptrdiff_t hmgeti(T*, TK key) 206 | ptrdiff_t shgeti(T*, char* key) 207 | ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar) 208 | Returns the index in the hashmap which has the key 'key', or -1 209 | if the key is not present. 210 | 211 | hmget 212 | hmget_ts 213 | shget 214 | TV hmget(T*, TK key) 215 | TV shget(T*, char* key) 216 | TV hmget_ts(T*, TK key, ptrdiff_t tempvar) 217 | Returns the value corresponding to 'key' in the hashmap. 218 | The structure must have a 'value' field 219 | 220 | hmgets 221 | shgets 222 | T hmgets(T*, TK key) 223 | T shgets(T*, char* key) 224 | Returns the structure corresponding to 'key' in the hashmap. 225 | 226 | hmgetp 227 | shgetp 228 | hmgetp_ts 229 | hmgetp_null 230 | shgetp_null 231 | T* hmgetp(T*, TK key) 232 | T* shgetp(T*, char* key) 233 | T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar) 234 | T* hmgetp_null(T*, TK key) 235 | T* shgetp_null(T*, char *key) 236 | Returns a pointer to the structure corresponding to 'key' in 237 | the hashmap. Functions ending in "_null" return NULL if the key 238 | is not present in the hashmap; the others return a pointer to a 239 | structure holding the default value (but not the searched-for key). 240 | 241 | hmdefault 242 | shdefault 243 | TV hmdefault(T*, TV value) 244 | TV shdefault(T*, TV value) 245 | Sets the default value for the hashmap, the value which will be 246 | returned by hmget/shget if the key is not present. 247 | 248 | hmdefaults 249 | shdefaults 250 | TV hmdefaults(T*, T item) 251 | TV shdefaults(T*, T item) 252 | Sets the default struct for the hashmap, the contents which will be 253 | returned by hmgets/shgets if the key is not present. 254 | 255 | hmput 256 | shput 257 | TV hmput(T*, TK key, TV value) 258 | TV shput(T*, char* key, TV value) 259 | Inserts a pair into the hashmap. If the key is already 260 | present in the hashmap, updates its value. 261 | 262 | hmputs 263 | shputs 264 | T hmputs(T*, T item) 265 | T shputs(T*, T item) 266 | Inserts a struct with T.key into the hashmap. If the struct is already 267 | present in the hashmap, updates it. 268 | 269 | hmdel 270 | shdel 271 | int hmdel(T*, TK key) 272 | int shdel(T*, char* key) 273 | If 'key' is in the hashmap, deletes its entry and returns 1. 274 | Otherwise returns 0. 275 | 276 | Function interface (actually macros) for strings only: 277 | 278 | sh_new_strdup 279 | void sh_new_strdup(T*); 280 | Overwrites the existing pointer with a newly allocated 281 | string hashmap which will automatically allocate and free 282 | each string key using realloc/free 283 | 284 | sh_new_arena 285 | void sh_new_arena(T*); 286 | Overwrites the existing pointer with a newly allocated 287 | string hashmap which will automatically allocate each string 288 | key to a string arena. Every string key ever used by this 289 | hash table remains in the arena until the arena is freed. 290 | Additionally, any key which is deleted and reinserted will 291 | be allocated multiple times in the string arena. 292 | 293 | NOTES 294 | 295 | * These data structures are realloc'd when they grow, and the macro 296 | "functions" write to the provided pointer. This means: (a) the pointer 297 | must be an lvalue, and (b) the pointer to the data structure is not 298 | stable, and you must maintain it the same as you would a realloc'd 299 | pointer. For example, if you pass a pointer to a dynamic array to a 300 | function which updates it, the function must return back the new 301 | pointer to the caller. This is the price of trying to do this in C. 302 | 303 | * The following are the only functions that are thread-safe on a single data 304 | structure, i.e. can be run in multiple threads simultaneously on the same 305 | data structure 306 | hmlen shlen 307 | hmlenu shlenu 308 | hmget_ts shget_ts 309 | hmgeti_ts shgeti_ts 310 | hmgets_ts shgets_ts 311 | 312 | * You iterate over the contents of a dynamic array and a hashmap in exactly 313 | the same way, using arrlen/hmlen/shlen: 314 | 315 | for (i=0; i < arrlen(foo); ++i) 316 | ... foo[i] ... 317 | 318 | * All operations except arrins/arrdel are O(1) amortized, but individual 319 | operations can be slow, so these data structures may not be suitable 320 | for real time use. Dynamic arrays double in capacity as needed, so 321 | elements are copied an average of once. Hash tables double/halve 322 | their size as needed, with appropriate hysteresis to maintain O(1) 323 | performance. 324 | 325 | NOTES - DYNAMIC ARRAY 326 | 327 | * If you know how long a dynamic array is going to be in advance, you can avoid 328 | extra memory allocations by using arrsetlen to allocate it to that length in 329 | advance and use foo[n] while filling it out, or arrsetcap to allocate the memory 330 | for that length and use arrput/arrpush as normal. 331 | 332 | * Unlike some other versions of the dynamic array, this version should 333 | be safe to use with strict-aliasing optimizations. 334 | 335 | NOTES - HASH MAP 336 | 337 | * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel 338 | and variants, the key must be an lvalue (so the macro can take the address of it). 339 | Extensions are used that eliminate this requirement if you're using C99 and later 340 | in GCC or clang, or if you're using C++ in GCC. But note that this can make your 341 | code less portable. 342 | 343 | * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'. 344 | 345 | * The iteration order of your data in the hashmap is determined solely by the 346 | order of insertions and deletions. In particular, if you never delete, new 347 | keys are always added at the end of the array. This will be consistent 348 | across all platforms and versions of the library. However, you should not 349 | attempt to serialize the internal hash table, as the hash is not consistent 350 | between different platforms, and may change with future versions of the library. 351 | 352 | * Use sh_new_arena() for string hashmaps that you never delete from. Initialize 353 | with NULL if you're managing the memory for your strings, or your strings are 354 | never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup(). 355 | @TODO: make an arena variant that garbage collects the strings with a trivial 356 | copy collector into a new arena whenever the table shrinks / rebuilds. Since 357 | current arena recommendation is to only use arena if it never deletes, then 358 | this can just replace current arena implementation. 359 | 360 | * If adversarial input is a serious concern and you're on a 64-bit platform, 361 | enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass 362 | a strong random number to stbds_rand_seed. 363 | 364 | * The default value for the hash table is stored in foo[-1], so if you 365 | use code like 'hmget(T,k)->value = 5' you can accidentally overwrite 366 | the value stored by hmdefault if 'k' is not present. 367 | 368 | CREDITS 369 | 370 | Sean Barrett -- library, idea for dynamic array API/implementation 371 | Per Vognsen -- idea for hash table API/implementation 372 | Rafael Sachetto -- arrpop() 373 | github:HeroicKatora -- arraddn() reworking 374 | 375 | Bugfixes: 376 | Andy Durdin 377 | Shane Liesegang 378 | Vinh Truong 379 | Andreas Molzer 380 | github:hashitaku 381 | github:srdjanstipic 382 | Macoy Madson 383 | Andreas Vennstrom 384 | Tobias Mansfield-Williams 385 | */ 386 | 387 | #ifdef STBDS_UNIT_TESTS 388 | #define _CRT_SECURE_NO_WARNINGS 389 | #endif 390 | 391 | #ifndef INCLUDE_STB_DS_H 392 | #define INCLUDE_STB_DS_H 393 | 394 | #include 395 | #include 396 | 397 | #ifndef STBDS_NO_SHORT_NAMES 398 | #define arrlen stbds_arrlen 399 | #define arrlenu stbds_arrlenu 400 | #define arrput stbds_arrput 401 | #define arrpush stbds_arrput 402 | #define arrpop stbds_arrpop 403 | #define arrfree stbds_arrfree 404 | #define arraddn stbds_arraddn // deprecated, use one of the following instead: 405 | #define arraddnptr stbds_arraddnptr 406 | #define arraddnindex stbds_arraddnindex 407 | #define arrsetlen stbds_arrsetlen 408 | #define arrlast stbds_arrlast 409 | #define arrins stbds_arrins 410 | #define arrinsn stbds_arrinsn 411 | #define arrdel stbds_arrdel 412 | #define arrdeln stbds_arrdeln 413 | #define arrdelswap stbds_arrdelswap 414 | #define arrcap stbds_arrcap 415 | #define arrsetcap stbds_arrsetcap 416 | 417 | #define hmput stbds_hmput 418 | #define hmputs stbds_hmputs 419 | #define hmget stbds_hmget 420 | #define hmget_ts stbds_hmget_ts 421 | #define hmgets stbds_hmgets 422 | #define hmgetp stbds_hmgetp 423 | #define hmgetp_ts stbds_hmgetp_ts 424 | #define hmgetp_null stbds_hmgetp_null 425 | #define hmgeti stbds_hmgeti 426 | #define hmgeti_ts stbds_hmgeti_ts 427 | #define hmdel stbds_hmdel 428 | #define hmlen stbds_hmlen 429 | #define hmlenu stbds_hmlenu 430 | #define hmfree stbds_hmfree 431 | #define hmdefault stbds_hmdefault 432 | #define hmdefaults stbds_hmdefaults 433 | 434 | #define shput stbds_shput 435 | #define shputi stbds_shputi 436 | #define shputs stbds_shputs 437 | #define shget stbds_shget 438 | #define shgeti stbds_shgeti 439 | #define shgets stbds_shgets 440 | #define shgetp stbds_shgetp 441 | #define shgetp_null stbds_shgetp_null 442 | #define shdel stbds_shdel 443 | #define shlen stbds_shlen 444 | #define shlenu stbds_shlenu 445 | #define shfree stbds_shfree 446 | #define shdefault stbds_shdefault 447 | #define shdefaults stbds_shdefaults 448 | #define sh_new_arena stbds_sh_new_arena 449 | #define sh_new_strdup stbds_sh_new_strdup 450 | 451 | #define stralloc stbds_stralloc 452 | #define strreset stbds_strreset 453 | #endif 454 | 455 | #if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE) 456 | #error "You must define both STBDS_REALLOC and STBDS_FREE, or neither." 457 | #endif 458 | #if !defined(STBDS_REALLOC) && !defined(STBDS_FREE) 459 | #include 460 | #define STBDS_REALLOC(c,p,s) realloc(p,s) 461 | #define STBDS_FREE(c,p) free(p) 462 | #endif 463 | 464 | #ifdef _MSC_VER 465 | #define STBDS_NOTUSED(v) (void)(v) 466 | #else 467 | #define STBDS_NOTUSED(v) (void)sizeof(v) 468 | #endif 469 | 470 | #ifdef __cplusplus 471 | extern "C" { 472 | #endif 473 | 474 | // for security against attackers, seed the library with a random number, at least time() but stronger is better 475 | extern void stbds_rand_seed(size_t seed); 476 | 477 | // these are the hash functions used internally if you want to test them or use them for other purposes 478 | extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed); 479 | extern size_t stbds_hash_string(char *str, size_t seed); 480 | 481 | // this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'. 482 | typedef struct stbds_string_arena stbds_string_arena; 483 | extern char * stbds_stralloc(stbds_string_arena *a, char *str); 484 | extern void stbds_strreset(stbds_string_arena *a); 485 | 486 | // have to #define STBDS_UNIT_TESTS to call this 487 | extern void stbds_unit_tests(void); 488 | 489 | /////////////// 490 | // 491 | // Everything below here is implementation details 492 | // 493 | 494 | extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap); 495 | extern void stbds_arrfreef(void *a); 496 | extern void stbds_hmfree_func(void *p, size_t elemsize); 497 | extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); 498 | extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode); 499 | extern void * stbds_hmput_default(void *a, size_t elemsize); 500 | extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); 501 | extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode); 502 | extern void * stbds_shmode_func(size_t elemsize, int mode); 503 | 504 | #ifdef __cplusplus 505 | } 506 | #endif 507 | 508 | #if defined(__GNUC__) || defined(__clang__) 509 | #define STBDS_HAS_TYPEOF 510 | #ifdef __cplusplus 511 | //#define STBDS_HAS_LITERAL_ARRAY // this is currently broken for clang 512 | #endif 513 | #endif 514 | 515 | #if !defined(__cplusplus) 516 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 517 | #define STBDS_HAS_LITERAL_ARRAY 518 | #endif 519 | #endif 520 | 521 | // this macro takes the address of the argument, but on gcc/clang can accept rvalues 522 | #if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF) 523 | #if __clang__ 524 | #define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value 525 | #else 526 | #define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value 527 | #endif 528 | #else 529 | #define STBDS_ADDRESSOF(typevar, value) &(value) 530 | #endif 531 | 532 | #define STBDS_OFFSETOF(var,field) ((char *) &(var)->field - (char *) (var)) 533 | 534 | #define stbds_header(t) ((stbds_array_header *) (t) - 1) 535 | #define stbds_temp(t) stbds_header(t)->temp 536 | #define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table) 537 | 538 | #define stbds_arrsetcap(a,n) (stbds_arrgrow(a,0,n)) 539 | #define stbds_arrsetlen(a,n) ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0) 540 | #define stbds_arrcap(a) ((a) ? stbds_header(a)->capacity : 0) 541 | #define stbds_arrlen(a) ((a) ? (ptrdiff_t) stbds_header(a)->length : 0) 542 | #define stbds_arrlenu(a) ((a) ? stbds_header(a)->length : 0) 543 | #define stbds_arrput(a,v) (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v)) 544 | #define stbds_arrpush stbds_arrput // synonym 545 | #define stbds_arrpop(a) (stbds_header(a)->length--, (a)[stbds_header(a)->length]) 546 | #define stbds_arraddn(a,n) ((void)(stbds_arraddnindex(a, n))) // deprecated, use one of the following instead: 547 | #define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)]) : (a)) 548 | #define stbds_arraddnindex(a,n)(stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), stbds_header(a)->length-(n)) : stbds_arrlen(a)) 549 | #define stbds_arraddnoff stbds_arraddnindex 550 | #define stbds_arrlast(a) ((a)[stbds_header(a)->length-1]) 551 | #define stbds_arrfree(a) ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL) 552 | #define stbds_arrdel(a,i) stbds_arrdeln(a,i,1) 553 | #define stbds_arrdeln(a,i,n) (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n)) 554 | #define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1) 555 | #define stbds_arrinsn(a,i,n) (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i)))) 556 | #define stbds_arrins(a,i,v) (stbds_arrinsn((a),(i),1), (a)[i]=(v)) 557 | 558 | #define stbds_arrmaybegrow(a,n) ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \ 559 | ? (stbds_arrgrow(a,n,0),0) : 0) 560 | 561 | #define stbds_arrgrow(a,b,c) ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c))) 562 | 563 | #define stbds_hmput(t, k, v) \ 564 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0), \ 565 | (t)[stbds_temp((t)-1)].key = (k), \ 566 | (t)[stbds_temp((t)-1)].value = (v)) 567 | 568 | #define stbds_hmputs(t, s) \ 569 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \ 570 | (t)[stbds_temp((t)-1)] = (s)) 571 | 572 | #define stbds_hmgeti(t,k) \ 573 | ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \ 574 | stbds_temp((t)-1)) 575 | 576 | #define stbds_hmgeti_ts(t,k,temp) \ 577 | ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \ 578 | (temp)) 579 | 580 | #define stbds_hmgetp(t, k) \ 581 | ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)]) 582 | 583 | #define stbds_hmgetp_ts(t, k, temp) \ 584 | ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp]) 585 | 586 | #define stbds_hmdel(t,k) \ 587 | (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0) 588 | 589 | #define stbds_hmdefault(t, v) \ 590 | ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v)) 591 | 592 | #define stbds_hmdefaults(t, s) \ 593 | ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s)) 594 | 595 | #define stbds_hmfree(p) \ 596 | ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL) 597 | 598 | #define stbds_hmgets(t, k) (*stbds_hmgetp(t,k)) 599 | #define stbds_hmget(t, k) (stbds_hmgetp(t,k)->value) 600 | #define stbds_hmget_ts(t, k, temp) (stbds_hmgetp_ts(t,k,temp)->value) 601 | #define stbds_hmlen(t) ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0) 602 | #define stbds_hmlenu(t) ((t) ? stbds_header((t)-1)->length-1 : 0) 603 | #define stbds_hmgetp_null(t,k) (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) 604 | 605 | #define stbds_shput(t, k, v) \ 606 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ 607 | (t)[stbds_temp((t)-1)].value = (v)) 608 | 609 | #define stbds_shputi(t, k, v) \ 610 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ 611 | (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1)) 612 | 613 | #define stbds_shputs(t, s) \ 614 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \ 615 | (t)[stbds_temp((t)-1)] = (s), \ 616 | (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally 617 | 618 | #define stbds_pshput(t, p) \ 619 | ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \ 620 | (t)[stbds_temp((t)-1)] = (p)) 621 | 622 | #define stbds_shgeti(t,k) \ 623 | ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ 624 | stbds_temp((t)-1)) 625 | 626 | #define stbds_pshgeti(t,k) \ 627 | ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \ 628 | stbds_temp((t)-1)) 629 | 630 | #define stbds_shgetp(t, k) \ 631 | ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)]) 632 | 633 | #define stbds_pshget(t, k) \ 634 | ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)]) 635 | 636 | #define stbds_shdel(t,k) \ 637 | (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0) 638 | #define stbds_pshdel(t,k) \ 639 | (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0) 640 | 641 | #define stbds_sh_new_arena(t) \ 642 | ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA)) 643 | #define stbds_sh_new_strdup(t) \ 644 | ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP)) 645 | 646 | #define stbds_shdefault(t, v) stbds_hmdefault(t,v) 647 | #define stbds_shdefaults(t, s) stbds_hmdefaults(t,s) 648 | 649 | #define stbds_shfree stbds_hmfree 650 | #define stbds_shlenu stbds_hmlenu 651 | 652 | #define stbds_shgets(t, k) (*stbds_shgetp(t,k)) 653 | #define stbds_shget(t, k) (stbds_shgetp(t,k)->value) 654 | #define stbds_shgetp_null(t,k) (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) 655 | #define stbds_shlen stbds_hmlen 656 | 657 | typedef struct 658 | { 659 | size_t length; 660 | size_t capacity; 661 | void * hash_table; 662 | ptrdiff_t temp; 663 | } stbds_array_header; 664 | 665 | typedef struct stbds_string_block 666 | { 667 | struct stbds_string_block *next; 668 | char storage[8]; 669 | } stbds_string_block; 670 | 671 | struct stbds_string_arena 672 | { 673 | stbds_string_block *storage; 674 | size_t remaining; 675 | unsigned char block; 676 | unsigned char mode; // this isn't used by the string arena itself 677 | }; 678 | 679 | #define STBDS_HM_BINARY 0 680 | #define STBDS_HM_STRING 1 681 | 682 | enum 683 | { 684 | STBDS_SH_NONE, 685 | STBDS_SH_DEFAULT, 686 | STBDS_SH_STRDUP, 687 | STBDS_SH_ARENA 688 | }; 689 | 690 | #ifdef __cplusplus 691 | // in C we use implicit assignment from these void*-returning functions to T*. 692 | // in C++ these templates make the same code work 693 | template static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) { 694 | return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap); 695 | } 696 | template static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { 697 | return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode); 698 | } 699 | template static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) { 700 | return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode); 701 | } 702 | template static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) { 703 | return (T*)stbds_hmput_default((void *)a, elemsize); 704 | } 705 | template static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { 706 | return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode); 707 | } 708 | template static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){ 709 | return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode); 710 | } 711 | template static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) { 712 | return (T*)stbds_shmode_func(elemsize, mode); 713 | } 714 | #else 715 | #define stbds_arrgrowf_wrapper stbds_arrgrowf 716 | #define stbds_hmget_key_wrapper stbds_hmget_key 717 | #define stbds_hmget_key_ts_wrapper stbds_hmget_key_ts 718 | #define stbds_hmput_default_wrapper stbds_hmput_default 719 | #define stbds_hmput_key_wrapper stbds_hmput_key 720 | #define stbds_hmdel_key_wrapper stbds_hmdel_key 721 | #define stbds_shmode_func_wrapper(t,e,m) stbds_shmode_func(e,m) 722 | #endif 723 | 724 | #endif // INCLUDE_STB_DS_H 725 | 726 | 727 | ////////////////////////////////////////////////////////////////////////////// 728 | // 729 | // IMPLEMENTATION 730 | // 731 | 732 | #ifdef STB_DS_IMPLEMENTATION 733 | #include 734 | #include 735 | 736 | #ifndef STBDS_ASSERT 737 | #define STBDS_ASSERT_WAS_UNDEFINED 738 | #define STBDS_ASSERT(x) ((void) 0) 739 | #endif 740 | 741 | #ifdef STBDS_STATISTICS 742 | #define STBDS_STATS(x) x 743 | size_t stbds_array_grow; 744 | size_t stbds_hash_grow; 745 | size_t stbds_hash_shrink; 746 | size_t stbds_hash_rebuild; 747 | size_t stbds_hash_probes; 748 | size_t stbds_hash_alloc; 749 | size_t stbds_rehash_probes; 750 | size_t stbds_rehash_items; 751 | #else 752 | #define STBDS_STATS(x) 753 | #endif 754 | 755 | // 756 | // stbds_arr implementation 757 | // 758 | 759 | //int *prev_allocs[65536]; 760 | //int num_prev; 761 | 762 | void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap) 763 | { 764 | stbds_array_header temp={0}; // force debugging 765 | void *b; 766 | size_t min_len = stbds_arrlen(a) + addlen; 767 | (void) sizeof(temp); 768 | 769 | // compute the minimum capacity needed 770 | if (min_len > min_cap) 771 | min_cap = min_len; 772 | 773 | if (min_cap <= stbds_arrcap(a)) 774 | return a; 775 | 776 | // increase needed capacity to guarantee O(1) amortized 777 | if (min_cap < 2 * stbds_arrcap(a)) 778 | min_cap = 2 * stbds_arrcap(a); 779 | else if (min_cap < 4) 780 | min_cap = 4; 781 | 782 | //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1); 783 | //if (num_prev == 2201) 784 | // num_prev = num_prev; 785 | b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header)); 786 | //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b; 787 | b = (char *) b + sizeof(stbds_array_header); 788 | if (a == NULL) { 789 | stbds_header(b)->length = 0; 790 | stbds_header(b)->hash_table = 0; 791 | stbds_header(b)->temp = 0; 792 | } else { 793 | STBDS_STATS(++stbds_array_grow); 794 | } 795 | stbds_header(b)->capacity = min_cap; 796 | 797 | return b; 798 | } 799 | 800 | void stbds_arrfreef(void *a) 801 | { 802 | STBDS_FREE(NULL, stbds_header(a)); 803 | } 804 | 805 | // 806 | // stbds_hm hash table implementation 807 | // 808 | 809 | #ifdef STBDS_INTERNAL_SMALL_BUCKET 810 | #define STBDS_BUCKET_LENGTH 4 811 | #else 812 | #define STBDS_BUCKET_LENGTH 8 813 | #endif 814 | 815 | #define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2) 816 | #define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1) 817 | #define STBDS_CACHE_LINE_SIZE 64 818 | 819 | #define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1)) 820 | 821 | typedef struct 822 | { 823 | size_t hash [STBDS_BUCKET_LENGTH]; 824 | ptrdiff_t index[STBDS_BUCKET_LENGTH]; 825 | } stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line 826 | 827 | typedef struct 828 | { 829 | char * temp_key; // this MUST be the first field of the hash table 830 | size_t slot_count; 831 | size_t used_count; 832 | size_t used_count_threshold; 833 | size_t used_count_shrink_threshold; 834 | size_t tombstone_count; 835 | size_t tombstone_count_threshold; 836 | size_t seed; 837 | size_t slot_count_log2; 838 | stbds_string_arena string; 839 | stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct 840 | } stbds_hash_index; 841 | 842 | #define STBDS_INDEX_EMPTY -1 843 | #define STBDS_INDEX_DELETED -2 844 | #define STBDS_INDEX_IN_USE(x) ((x) >= 0) 845 | 846 | #define STBDS_HASH_EMPTY 0 847 | #define STBDS_HASH_DELETED 1 848 | 849 | static size_t stbds_hash_seed=0x31415926; 850 | 851 | void stbds_rand_seed(size_t seed) 852 | { 853 | stbds_hash_seed = seed; 854 | } 855 | 856 | #define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo) \ 857 | temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */ \ 858 | var = v64_hi, var <<= 16, var <<= 16, /* discard if 32-bit */ \ 859 | var ^= temp ^ v32 860 | 861 | #define STBDS_SIZE_T_BITS ((sizeof (size_t)) * 8) 862 | 863 | static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2) 864 | { 865 | size_t pos; 866 | STBDS_NOTUSED(slot_log2); 867 | pos = hash & (slot_count-1); 868 | #ifdef STBDS_INTERNAL_BUCKET_START 869 | pos &= ~STBDS_BUCKET_MASK; 870 | #endif 871 | return pos; 872 | } 873 | 874 | static size_t stbds_log2(size_t slot_count) 875 | { 876 | size_t n=0; 877 | while (slot_count > 1) { 878 | slot_count >>= 1; 879 | ++n; 880 | } 881 | return n; 882 | } 883 | 884 | static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot) 885 | { 886 | stbds_hash_index *t; 887 | t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1); 888 | t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE); 889 | t->slot_count = slot_count; 890 | t->slot_count_log2 = stbds_log2(slot_count); 891 | t->tombstone_count = 0; 892 | t->used_count = 0; 893 | 894 | #if 0 // A1 895 | t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow 896 | t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild 897 | t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink 898 | #elif 1 // A2 899 | //t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow 900 | //t->tombstone_count_threshold = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild 901 | //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink 902 | 903 | // compute without overflowing 904 | t->used_count_threshold = slot_count - (slot_count>>2); 905 | t->tombstone_count_threshold = (slot_count>>3) + (slot_count>>4); 906 | t->used_count_shrink_threshold = slot_count >> 2; 907 | 908 | #elif 0 // B1 909 | t->used_count_threshold = slot_count*13/16; // if 13/16th of table is occupied, grow 910 | t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild 911 | t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink 912 | #else // C1 913 | t->used_count_threshold = slot_count*14/16; // if 14/16th of table is occupied, grow 914 | t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild 915 | t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink 916 | #endif 917 | // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 918 | // Note that the larger tables have high variance as they were run fewer times 919 | // A1 A2 B1 C1 920 | // 0.10ms : 0.10ms : 0.10ms : 0.11ms : 2,000 inserts creating 2K table 921 | // 0.96ms : 0.95ms : 0.97ms : 1.04ms : 20,000 inserts creating 20K table 922 | // 14.48ms : 14.46ms : 10.63ms : 11.00ms : 200,000 inserts creating 200K table 923 | // 195.74ms : 196.35ms : 203.69ms : 214.92ms : 2,000,000 inserts creating 2M table 924 | // 2193.88ms : 2209.22ms : 2285.54ms : 2437.17ms : 20,000,000 inserts creating 20M table 925 | // 65.27ms : 53.77ms : 65.33ms : 65.47ms : 500,000 inserts & deletes in 2K table 926 | // 72.78ms : 62.45ms : 71.95ms : 72.85ms : 500,000 inserts & deletes in 20K table 927 | // 89.47ms : 77.72ms : 96.49ms : 96.75ms : 500,000 inserts & deletes in 200K table 928 | // 97.58ms : 98.14ms : 97.18ms : 97.53ms : 500,000 inserts & deletes in 2M table 929 | // 118.61ms : 119.62ms : 120.16ms : 118.86ms : 500,000 inserts & deletes in 20M table 930 | // 192.11ms : 194.39ms : 196.38ms : 195.73ms : 500,000 inserts & deletes in 200M table 931 | 932 | if (slot_count <= STBDS_BUCKET_LENGTH) 933 | t->used_count_shrink_threshold = 0; 934 | // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes 935 | STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count); 936 | STBDS_STATS(++stbds_hash_alloc); 937 | if (ot) { 938 | t->string = ot->string; 939 | // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing 940 | t->seed = ot->seed; 941 | } else { 942 | size_t a,b,temp; 943 | memset(&t->string, 0, sizeof(t->string)); 944 | t->seed = stbds_hash_seed; 945 | // LCG 946 | // in 32-bit, a = 2147001325 b = 715136305 947 | // in 64-bit, a = 2862933555777941757 b = 3037000493 948 | stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd); 949 | stbds_load_32_or_64(b,temp, 715136305, 0, 0xb504f32d); 950 | stbds_hash_seed = stbds_hash_seed * a + b; 951 | } 952 | 953 | { 954 | size_t i,j; 955 | for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) { 956 | stbds_hash_bucket *b = &t->storage[i]; 957 | for (j=0; j < STBDS_BUCKET_LENGTH; ++j) 958 | b->hash[j] = STBDS_HASH_EMPTY; 959 | for (j=0; j < STBDS_BUCKET_LENGTH; ++j) 960 | b->index[j] = STBDS_INDEX_EMPTY; 961 | } 962 | } 963 | 964 | // copy out the old data, if any 965 | if (ot) { 966 | size_t i,j; 967 | t->used_count = ot->used_count; 968 | for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) { 969 | stbds_hash_bucket *ob = &ot->storage[i]; 970 | for (j=0; j < STBDS_BUCKET_LENGTH; ++j) { 971 | if (STBDS_INDEX_IN_USE(ob->index[j])) { 972 | size_t hash = ob->hash[j]; 973 | size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2); 974 | size_t step = STBDS_BUCKET_LENGTH; 975 | STBDS_STATS(++stbds_rehash_items); 976 | for (;;) { 977 | size_t limit,z; 978 | stbds_hash_bucket *bucket; 979 | bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT]; 980 | STBDS_STATS(++stbds_rehash_probes); 981 | 982 | for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) { 983 | if (bucket->hash[z] == 0) { 984 | bucket->hash[z] = hash; 985 | bucket->index[z] = ob->index[j]; 986 | goto done; 987 | } 988 | } 989 | 990 | limit = pos & STBDS_BUCKET_MASK; 991 | for (z = 0; z < limit; ++z) { 992 | if (bucket->hash[z] == 0) { 993 | bucket->hash[z] = hash; 994 | bucket->index[z] = ob->index[j]; 995 | goto done; 996 | } 997 | } 998 | 999 | pos += step; // quadratic probing 1000 | step += STBDS_BUCKET_LENGTH; 1001 | pos &= (t->slot_count-1); 1002 | } 1003 | } 1004 | done: 1005 | ; 1006 | } 1007 | } 1008 | } 1009 | 1010 | return t; 1011 | } 1012 | 1013 | #define STBDS_ROTATE_LEFT(val, n) (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n)))) 1014 | #define STBDS_ROTATE_RIGHT(val, n) (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n)))) 1015 | 1016 | size_t stbds_hash_string(char *str, size_t seed) 1017 | { 1018 | size_t hash = seed; 1019 | while (*str) 1020 | hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++; 1021 | 1022 | // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits 1023 | hash ^= seed; 1024 | hash = (~hash) + (hash << 18); 1025 | hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31); 1026 | hash = hash * 21; 1027 | hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11); 1028 | hash += (hash << 6); 1029 | hash ^= STBDS_ROTATE_RIGHT(hash,22); 1030 | return hash+seed; 1031 | } 1032 | 1033 | #ifdef STBDS_SIPHASH_2_4 1034 | #define STBDS_SIPHASH_C_ROUNDS 2 1035 | #define STBDS_SIPHASH_D_ROUNDS 4 1036 | typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1]; 1037 | #endif 1038 | 1039 | #ifndef STBDS_SIPHASH_C_ROUNDS 1040 | #define STBDS_SIPHASH_C_ROUNDS 1 1041 | #endif 1042 | #ifndef STBDS_SIPHASH_D_ROUNDS 1043 | #define STBDS_SIPHASH_D_ROUNDS 1 1044 | #endif 1045 | 1046 | #ifdef _MSC_VER 1047 | #pragma warning(push) 1048 | #pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()== 1049 | #endif 1050 | 1051 | static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed) 1052 | { 1053 | unsigned char *d = (unsigned char *) p; 1054 | size_t i,j; 1055 | size_t v0,v1,v2,v3, data; 1056 | 1057 | // hash that works on 32- or 64-bit registers without knowing which we have 1058 | // (computes different results on 32-bit and 64-bit platform) 1059 | // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit 1060 | v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^ seed; 1061 | v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed; 1062 | v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^ seed; 1063 | v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed; 1064 | 1065 | #ifdef STBDS_TEST_SIPHASH_2_4 1066 | // hardcoded with key material in the siphash test vectors 1067 | v0 ^= 0x0706050403020100ull ^ seed; 1068 | v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; 1069 | v2 ^= 0x0706050403020100ull ^ seed; 1070 | v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; 1071 | #endif 1072 | 1073 | #define STBDS_SIPROUND() \ 1074 | do { \ 1075 | v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13); v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \ 1076 | v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16); v3 ^= v2; \ 1077 | v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17); v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \ 1078 | v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21); v3 ^= v0; \ 1079 | } while (0) 1080 | 1081 | for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) { 1082 | data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); 1083 | data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4 1084 | 1085 | v3 ^= data; 1086 | for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) 1087 | STBDS_SIPROUND(); 1088 | v0 ^= data; 1089 | } 1090 | data = len << (STBDS_SIZE_T_BITS-8); 1091 | switch (len - i) { 1092 | case 7: data |= ((size_t) d[6] << 24) << 24; // fall through 1093 | case 6: data |= ((size_t) d[5] << 20) << 20; // fall through 1094 | case 5: data |= ((size_t) d[4] << 16) << 16; // fall through 1095 | case 4: data |= (d[3] << 24); // fall through 1096 | case 3: data |= (d[2] << 16); // fall through 1097 | case 2: data |= (d[1] << 8); // fall through 1098 | case 1: data |= d[0]; // fall through 1099 | case 0: break; 1100 | } 1101 | v3 ^= data; 1102 | for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) 1103 | STBDS_SIPROUND(); 1104 | v0 ^= data; 1105 | v2 ^= 0xff; 1106 | for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j) 1107 | STBDS_SIPROUND(); 1108 | 1109 | #ifdef STBDS_SIPHASH_2_4 1110 | return v0^v1^v2^v3; 1111 | #else 1112 | return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply 1113 | #endif 1114 | } 1115 | 1116 | size_t stbds_hash_bytes(void *p, size_t len, size_t seed) 1117 | { 1118 | #ifdef STBDS_SIPHASH_2_4 1119 | return stbds_siphash_bytes(p,len,seed); 1120 | #else 1121 | unsigned char *d = (unsigned char *) p; 1122 | 1123 | if (len == 4) { 1124 | unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); 1125 | #if 0 1126 | // HASH32-A Bob Jenkin's hash function w/o large constants 1127 | hash ^= seed; 1128 | hash -= (hash<<6); 1129 | hash ^= (hash>>17); 1130 | hash -= (hash<<9); 1131 | hash ^= seed; 1132 | hash ^= (hash<<4); 1133 | hash -= (hash<<3); 1134 | hash ^= (hash<<10); 1135 | hash ^= (hash>>15); 1136 | #elif 1 1137 | // HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts. 1138 | // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm 1139 | // not really sure what's going on. 1140 | hash ^= seed; 1141 | hash = (hash ^ 61) ^ (hash >> 16); 1142 | hash = hash + (hash << 3); 1143 | hash = hash ^ (hash >> 4); 1144 | hash = hash * 0x27d4eb2d; 1145 | hash ^= seed; 1146 | hash = hash ^ (hash >> 15); 1147 | #else // HASH32-C - Murmur3 1148 | hash ^= seed; 1149 | hash *= 0xcc9e2d51; 1150 | hash = (hash << 17) | (hash >> 15); 1151 | hash *= 0x1b873593; 1152 | hash ^= seed; 1153 | hash = (hash << 19) | (hash >> 13); 1154 | hash = hash*5 + 0xe6546b64; 1155 | hash ^= hash >> 16; 1156 | hash *= 0x85ebca6b; 1157 | hash ^= seed; 1158 | hash ^= hash >> 13; 1159 | hash *= 0xc2b2ae35; 1160 | hash ^= hash >> 16; 1161 | #endif 1162 | // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 1163 | // Note that the larger tables have high variance as they were run fewer times 1164 | // HASH32-A // HASH32-BB // HASH32-C 1165 | // 0.10ms // 0.10ms // 0.10ms : 2,000 inserts creating 2K table 1166 | // 0.96ms // 0.95ms // 0.99ms : 20,000 inserts creating 20K table 1167 | // 14.69ms // 14.43ms // 14.97ms : 200,000 inserts creating 200K table 1168 | // 199.99ms // 195.36ms // 202.05ms : 2,000,000 inserts creating 2M table 1169 | // 2234.84ms // 2187.74ms // 2240.38ms : 20,000,000 inserts creating 20M table 1170 | // 55.68ms // 53.72ms // 57.31ms : 500,000 inserts & deletes in 2K table 1171 | // 63.43ms // 61.99ms // 65.73ms : 500,000 inserts & deletes in 20K table 1172 | // 80.04ms // 77.96ms // 81.83ms : 500,000 inserts & deletes in 200K table 1173 | // 100.42ms // 97.40ms // 102.39ms : 500,000 inserts & deletes in 2M table 1174 | // 119.71ms // 120.59ms // 121.63ms : 500,000 inserts & deletes in 20M table 1175 | // 185.28ms // 195.15ms // 187.74ms : 500,000 inserts & deletes in 200M table 1176 | // 15.58ms // 14.79ms // 15.52ms : 200,000 inserts creating 200K table with varying key spacing 1177 | 1178 | return (((size_t) hash << 16 << 16) | hash) ^ seed; 1179 | } else if (len == 8 && sizeof(size_t) == 8) { 1180 | size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); 1181 | hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4 1182 | hash ^= seed; 1183 | hash = (~hash) + (hash << 21); 1184 | hash ^= STBDS_ROTATE_RIGHT(hash,24); 1185 | hash *= 265; 1186 | hash ^= STBDS_ROTATE_RIGHT(hash,14); 1187 | hash ^= seed; 1188 | hash *= 21; 1189 | hash ^= STBDS_ROTATE_RIGHT(hash,28); 1190 | hash += (hash << 31); 1191 | hash = (~hash) + (hash << 18); 1192 | return hash; 1193 | } else { 1194 | return stbds_siphash_bytes(p,len,seed); 1195 | } 1196 | #endif 1197 | } 1198 | #ifdef _MSC_VER 1199 | #pragma warning(pop) 1200 | #endif 1201 | 1202 | 1203 | static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i) 1204 | { 1205 | if (mode >= STBDS_HM_STRING) 1206 | return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset)); 1207 | else 1208 | return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize); 1209 | } 1210 | 1211 | #define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize)) 1212 | #define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize)) 1213 | 1214 | #define stbds_hash_table(a) ((stbds_hash_index *) stbds_header(a)->hash_table) 1215 | 1216 | void stbds_hmfree_func(void *a, size_t elemsize) 1217 | { 1218 | if (a == NULL) return; 1219 | if (stbds_hash_table(a) != NULL) { 1220 | if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) { 1221 | size_t i; 1222 | // skip 0th element, which is default 1223 | for (i=1; i < stbds_header(a)->length; ++i) 1224 | STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i)); 1225 | } 1226 | stbds_strreset(&stbds_hash_table(a)->string); 1227 | } 1228 | STBDS_FREE(NULL, stbds_header(a)->hash_table); 1229 | STBDS_FREE(NULL, stbds_header(a)); 1230 | } 1231 | 1232 | static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) 1233 | { 1234 | void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); 1235 | stbds_hash_index *table = stbds_hash_table(raw_a); 1236 | size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); 1237 | size_t step = STBDS_BUCKET_LENGTH; 1238 | size_t limit,i; 1239 | size_t pos; 1240 | stbds_hash_bucket *bucket; 1241 | 1242 | if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots 1243 | 1244 | pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); 1245 | 1246 | for (;;) { 1247 | STBDS_STATS(++stbds_hash_probes); 1248 | bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; 1249 | 1250 | // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache 1251 | for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { 1252 | if (bucket->hash[i] == hash) { 1253 | if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { 1254 | return (pos & ~STBDS_BUCKET_MASK)+i; 1255 | } 1256 | } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { 1257 | return -1; 1258 | } 1259 | } 1260 | 1261 | // search from beginning of bucket to pos 1262 | limit = pos & STBDS_BUCKET_MASK; 1263 | for (i = 0; i < limit; ++i) { 1264 | if (bucket->hash[i] == hash) { 1265 | if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { 1266 | return (pos & ~STBDS_BUCKET_MASK)+i; 1267 | } 1268 | } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { 1269 | return -1; 1270 | } 1271 | } 1272 | 1273 | // quadratic probing 1274 | pos += step; 1275 | step += STBDS_BUCKET_LENGTH; 1276 | pos &= (table->slot_count-1); 1277 | } 1278 | /* NOTREACHED */ 1279 | } 1280 | 1281 | void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) 1282 | { 1283 | size_t keyoffset = 0; 1284 | if (a == NULL) { 1285 | // make it non-empty so we can return a temp 1286 | a = stbds_arrgrowf(0, elemsize, 0, 1); 1287 | stbds_header(a)->length += 1; 1288 | memset(a, 0, elemsize); 1289 | *temp = STBDS_INDEX_EMPTY; 1290 | // adjust a to point after the default element 1291 | return STBDS_ARR_TO_HASH(a,elemsize); 1292 | } else { 1293 | stbds_hash_index *table; 1294 | void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); 1295 | // adjust a to point to the default element 1296 | table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; 1297 | if (table == 0) { 1298 | *temp = -1; 1299 | } else { 1300 | ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); 1301 | if (slot < 0) { 1302 | *temp = STBDS_INDEX_EMPTY; 1303 | } else { 1304 | stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; 1305 | *temp = b->index[slot & STBDS_BUCKET_MASK]; 1306 | } 1307 | } 1308 | return a; 1309 | } 1310 | } 1311 | 1312 | void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) 1313 | { 1314 | ptrdiff_t temp; 1315 | void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode); 1316 | stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp; 1317 | return p; 1318 | } 1319 | 1320 | void * stbds_hmput_default(void *a, size_t elemsize) 1321 | { 1322 | // three cases: 1323 | // a is NULL <- allocate 1324 | // a has a hash table but no entries, because of shmode <- grow 1325 | // a has entries <- do nothing 1326 | if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) { 1327 | a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1); 1328 | stbds_header(a)->length += 1; 1329 | memset(a, 0, elemsize); 1330 | a=STBDS_ARR_TO_HASH(a,elemsize); 1331 | } 1332 | return a; 1333 | } 1334 | 1335 | static char *stbds_strdup(char *str); 1336 | 1337 | void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) 1338 | { 1339 | size_t keyoffset=0; 1340 | void *raw_a; 1341 | stbds_hash_index *table; 1342 | 1343 | if (a == NULL) { 1344 | a = stbds_arrgrowf(0, elemsize, 0, 1); 1345 | memset(a, 0, elemsize); 1346 | stbds_header(a)->length += 1; 1347 | // adjust a to point AFTER the default element 1348 | a = STBDS_ARR_TO_HASH(a,elemsize); 1349 | } 1350 | 1351 | // adjust a to point to the default element 1352 | raw_a = a; 1353 | a = STBDS_HASH_TO_ARR(a,elemsize); 1354 | 1355 | table = (stbds_hash_index *) stbds_header(a)->hash_table; 1356 | 1357 | if (table == NULL || table->used_count >= table->used_count_threshold) { 1358 | stbds_hash_index *nt; 1359 | size_t slot_count; 1360 | 1361 | slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2; 1362 | nt = stbds_make_hash_index(slot_count, table); 1363 | if (table) 1364 | STBDS_FREE(NULL, table); 1365 | else 1366 | nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0; 1367 | stbds_header(a)->hash_table = table = nt; 1368 | STBDS_STATS(++stbds_hash_grow); 1369 | } 1370 | 1371 | // we iterate hash table explicitly because we want to track if we saw a tombstone 1372 | { 1373 | size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); 1374 | size_t step = STBDS_BUCKET_LENGTH; 1375 | size_t pos; 1376 | ptrdiff_t tombstone = -1; 1377 | stbds_hash_bucket *bucket; 1378 | 1379 | // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly 1380 | if (hash < 2) hash += 2; 1381 | 1382 | pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); 1383 | 1384 | for (;;) { 1385 | size_t limit, i; 1386 | STBDS_STATS(++stbds_hash_probes); 1387 | bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; 1388 | 1389 | // start searching from pos to end of bucket 1390 | for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { 1391 | if (bucket->hash[i] == hash) { 1392 | if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { 1393 | stbds_temp(a) = bucket->index[i]; 1394 | if (mode >= STBDS_HM_STRING) 1395 | stbds_temp_key(a) = * (char **) ((char *) raw_a + elemsize*bucket->index[i] + keyoffset); 1396 | return STBDS_ARR_TO_HASH(a,elemsize); 1397 | } 1398 | } else if (bucket->hash[i] == 0) { 1399 | pos = (pos & ~STBDS_BUCKET_MASK) + i; 1400 | goto found_empty_slot; 1401 | } else if (tombstone < 0) { 1402 | if (bucket->index[i] == STBDS_INDEX_DELETED) 1403 | tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); 1404 | } 1405 | } 1406 | 1407 | // search from beginning of bucket to pos 1408 | limit = pos & STBDS_BUCKET_MASK; 1409 | for (i = 0; i < limit; ++i) { 1410 | if (bucket->hash[i] == hash) { 1411 | if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { 1412 | stbds_temp(a) = bucket->index[i]; 1413 | return STBDS_ARR_TO_HASH(a,elemsize); 1414 | } 1415 | } else if (bucket->hash[i] == 0) { 1416 | pos = (pos & ~STBDS_BUCKET_MASK) + i; 1417 | goto found_empty_slot; 1418 | } else if (tombstone < 0) { 1419 | if (bucket->index[i] == STBDS_INDEX_DELETED) 1420 | tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); 1421 | } 1422 | } 1423 | 1424 | // quadratic probing 1425 | pos += step; 1426 | step += STBDS_BUCKET_LENGTH; 1427 | pos &= (table->slot_count-1); 1428 | } 1429 | found_empty_slot: 1430 | if (tombstone >= 0) { 1431 | pos = tombstone; 1432 | --table->tombstone_count; 1433 | } 1434 | ++table->used_count; 1435 | 1436 | { 1437 | ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a); 1438 | // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type 1439 | if ((size_t) i+1 > stbds_arrcap(a)) 1440 | *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0); 1441 | raw_a = STBDS_ARR_TO_HASH(a,elemsize); 1442 | 1443 | STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a)); 1444 | stbds_header(a)->length = i+1; 1445 | bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; 1446 | bucket->hash[pos & STBDS_BUCKET_MASK] = hash; 1447 | bucket->index[pos & STBDS_BUCKET_MASK] = i-1; 1448 | stbds_temp(a) = i-1; 1449 | 1450 | switch (table->string.mode) { 1451 | case STBDS_SH_STRDUP: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break; 1452 | case STBDS_SH_ARENA: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break; 1453 | case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break; 1454 | default: memcpy((char *) a + elemsize*i, key, keysize); break; 1455 | } 1456 | } 1457 | return STBDS_ARR_TO_HASH(a,elemsize); 1458 | } 1459 | } 1460 | 1461 | void * stbds_shmode_func(size_t elemsize, int mode) 1462 | { 1463 | void *a = stbds_arrgrowf(0, elemsize, 0, 1); 1464 | stbds_hash_index *h; 1465 | memset(a, 0, elemsize); 1466 | stbds_header(a)->length = 1; 1467 | stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL); 1468 | h->string.mode = (unsigned char) mode; 1469 | return STBDS_ARR_TO_HASH(a,elemsize); 1470 | } 1471 | 1472 | void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) 1473 | { 1474 | if (a == NULL) { 1475 | return 0; 1476 | } else { 1477 | stbds_hash_index *table; 1478 | void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); 1479 | table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; 1480 | stbds_temp(raw_a) = 0; 1481 | if (table == 0) { 1482 | return a; 1483 | } else { 1484 | ptrdiff_t slot; 1485 | slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); 1486 | if (slot < 0) 1487 | return a; 1488 | else { 1489 | stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; 1490 | int i = slot & STBDS_BUCKET_MASK; 1491 | ptrdiff_t old_index = b->index[i]; 1492 | ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last' 1493 | STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count); 1494 | --table->used_count; 1495 | ++table->tombstone_count; 1496 | stbds_temp(raw_a) = 1; 1497 | STBDS_ASSERT(table->used_count >= 0); 1498 | //STBDS_ASSERT(table->tombstone_count < table->slot_count/4); 1499 | b->hash[i] = STBDS_HASH_DELETED; 1500 | b->index[i] = STBDS_INDEX_DELETED; 1501 | 1502 | if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP) 1503 | STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index)); 1504 | 1505 | // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip 1506 | if (old_index != final_index) { 1507 | // swap delete 1508 | memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize); 1509 | 1510 | // now find the slot for the last element 1511 | if (mode == STBDS_HM_STRING) 1512 | slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode); 1513 | else 1514 | slot = stbds_hm_find_slot(a, elemsize, (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode); 1515 | STBDS_ASSERT(slot >= 0); 1516 | b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; 1517 | i = slot & STBDS_BUCKET_MASK; 1518 | STBDS_ASSERT(b->index[i] == final_index); 1519 | b->index[i] = old_index; 1520 | } 1521 | stbds_header(raw_a)->length -= 1; 1522 | 1523 | if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) { 1524 | stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table); 1525 | STBDS_FREE(NULL, table); 1526 | STBDS_STATS(++stbds_hash_shrink); 1527 | } else if (table->tombstone_count > table->tombstone_count_threshold) { 1528 | stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count , table); 1529 | STBDS_FREE(NULL, table); 1530 | STBDS_STATS(++stbds_hash_rebuild); 1531 | } 1532 | 1533 | return a; 1534 | } 1535 | } 1536 | } 1537 | /* NOTREACHED */ 1538 | } 1539 | 1540 | static char *stbds_strdup(char *str) 1541 | { 1542 | // to keep replaceable allocator simple, we don't want to use strdup. 1543 | // rolling our own also avoids problem of strdup vs _strdup 1544 | size_t len = strlen(str)+1; 1545 | char *p = (char*) STBDS_REALLOC(NULL, 0, len); 1546 | memmove(p, str, len); 1547 | return p; 1548 | } 1549 | 1550 | #ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN 1551 | #define STBDS_STRING_ARENA_BLOCKSIZE_MIN 512u 1552 | #endif 1553 | #ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX 1554 | #define STBDS_STRING_ARENA_BLOCKSIZE_MAX (1u<<20) 1555 | #endif 1556 | 1557 | char *stbds_stralloc(stbds_string_arena *a, char *str) 1558 | { 1559 | char *p; 1560 | size_t len = strlen(str)+1; 1561 | if (len > a->remaining) { 1562 | // compute the next blocksize 1563 | size_t blocksize = a->block; 1564 | 1565 | // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that 1566 | // there are log(SIZE) allocations to free when we destroy the table 1567 | blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1); 1568 | 1569 | // if size is under 1M, advance to next blocktype 1570 | if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX)) 1571 | ++a->block; 1572 | 1573 | if (len > blocksize) { 1574 | // if string is larger than blocksize, then just allocate the full size. 1575 | // note that we still advance string_block so block size will continue 1576 | // increasing, so e.g. if somebody only calls this with 1000-long strings, 1577 | // eventually the arena will start doubling and handling those as well 1578 | stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len); 1579 | memmove(sb->storage, str, len); 1580 | if (a->storage) { 1581 | // insert it after the first element, so that we don't waste the space there 1582 | sb->next = a->storage->next; 1583 | a->storage->next = sb; 1584 | } else { 1585 | sb->next = 0; 1586 | a->storage = sb; 1587 | a->remaining = 0; // this is redundant, but good for clarity 1588 | } 1589 | return sb->storage; 1590 | } else { 1591 | stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize); 1592 | sb->next = a->storage; 1593 | a->storage = sb; 1594 | a->remaining = blocksize; 1595 | } 1596 | } 1597 | 1598 | STBDS_ASSERT(len <= a->remaining); 1599 | p = a->storage->storage + a->remaining - len; 1600 | a->remaining -= len; 1601 | memmove(p, str, len); 1602 | return p; 1603 | } 1604 | 1605 | void stbds_strreset(stbds_string_arena *a) 1606 | { 1607 | stbds_string_block *x,*y; 1608 | x = a->storage; 1609 | while (x) { 1610 | y = x->next; 1611 | STBDS_FREE(NULL, x); 1612 | x = y; 1613 | } 1614 | memset(a, 0, sizeof(*a)); 1615 | } 1616 | 1617 | #endif 1618 | 1619 | ////////////////////////////////////////////////////////////////////////////// 1620 | // 1621 | // UNIT TESTS 1622 | // 1623 | 1624 | #ifdef STBDS_UNIT_TESTS 1625 | #include 1626 | #ifdef STBDS_ASSERT_WAS_UNDEFINED 1627 | #undef STBDS_ASSERT 1628 | #endif 1629 | #ifndef STBDS_ASSERT 1630 | #define STBDS_ASSERT assert 1631 | #include 1632 | #endif 1633 | 1634 | typedef struct { int key,b,c,d; } stbds_struct; 1635 | typedef struct { int key[2],b,c,d; } stbds_struct2; 1636 | 1637 | static char buffer[256]; 1638 | char *strkey(int n) 1639 | { 1640 | #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__) 1641 | sprintf_s(buffer, sizeof(buffer), "test_%d", n); 1642 | #else 1643 | sprintf(buffer, "test_%d", n); 1644 | #endif 1645 | return buffer; 1646 | } 1647 | 1648 | void stbds_unit_tests(void) 1649 | { 1650 | #if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus) 1651 | // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing! 1652 | STBDS_ASSERT(0); 1653 | #else 1654 | const int testsize = 100000; 1655 | const int testsize2 = testsize/20; 1656 | int *arr=NULL; 1657 | struct { int key; int value; } *intmap = NULL; 1658 | struct { char *key; int value; } *strmap = NULL, s; 1659 | struct { stbds_struct key; int value; } *map = NULL; 1660 | stbds_struct *map2 = NULL; 1661 | stbds_struct2 *map3 = NULL; 1662 | stbds_string_arena sa = { 0 }; 1663 | int key3[2] = { 1,2 }; 1664 | ptrdiff_t temp; 1665 | 1666 | int i,j; 1667 | 1668 | STBDS_ASSERT(arrlen(arr)==0); 1669 | for (i=0; i < 20000; i += 50) { 1670 | for (j=0; j < i; ++j) 1671 | arrpush(arr,j); 1672 | arrfree(arr); 1673 | } 1674 | 1675 | for (i=0; i < 4; ++i) { 1676 | arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); 1677 | arrdel(arr,i); 1678 | arrfree(arr); 1679 | arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); 1680 | arrdelswap(arr,i); 1681 | arrfree(arr); 1682 | } 1683 | 1684 | for (i=0; i < 5; ++i) { 1685 | arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); 1686 | stbds_arrins(arr,i,5); 1687 | STBDS_ASSERT(arr[i] == 5); 1688 | if (i < 4) 1689 | STBDS_ASSERT(arr[4] == 4); 1690 | arrfree(arr); 1691 | } 1692 | 1693 | i = 1; 1694 | STBDS_ASSERT(hmgeti(intmap,i) == -1); 1695 | hmdefault(intmap, -2); 1696 | STBDS_ASSERT(hmgeti(intmap, i) == -1); 1697 | STBDS_ASSERT(hmget (intmap, i) == -2); 1698 | for (i=0; i < testsize; i+=2) 1699 | hmput(intmap, i, i*5); 1700 | for (i=0; i < testsize; i+=1) { 1701 | if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); 1702 | else STBDS_ASSERT(hmget(intmap, i) == i*5); 1703 | if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 ); 1704 | else STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5); 1705 | } 1706 | for (i=0; i < testsize; i+=2) 1707 | hmput(intmap, i, i*3); 1708 | for (i=0; i < testsize; i+=1) 1709 | if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); 1710 | else STBDS_ASSERT(hmget(intmap, i) == i*3); 1711 | for (i=2; i < testsize; i+=4) 1712 | hmdel(intmap, i); // delete half the entries 1713 | for (i=0; i < testsize; i+=1) 1714 | if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 ); 1715 | else STBDS_ASSERT(hmget(intmap, i) == i*3); 1716 | for (i=0; i < testsize; i+=1) 1717 | hmdel(intmap, i); // delete the rest of the entries 1718 | for (i=0; i < testsize; i+=1) 1719 | STBDS_ASSERT(hmget(intmap, i) == -2 ); 1720 | hmfree(intmap); 1721 | for (i=0; i < testsize; i+=2) 1722 | hmput(intmap, i, i*3); 1723 | hmfree(intmap); 1724 | 1725 | #if defined(__clang__) || defined(__GNUC__) 1726 | #ifndef __cplusplus 1727 | intmap = NULL; 1728 | hmput(intmap, 15, 7); 1729 | hmput(intmap, 11, 3); 1730 | hmput(intmap, 9, 5); 1731 | STBDS_ASSERT(hmget(intmap, 9) == 5); 1732 | STBDS_ASSERT(hmget(intmap, 11) == 3); 1733 | STBDS_ASSERT(hmget(intmap, 15) == 7); 1734 | #endif 1735 | #endif 1736 | 1737 | for (i=0; i < testsize; ++i) 1738 | stralloc(&sa, strkey(i)); 1739 | strreset(&sa); 1740 | 1741 | { 1742 | s.key = "a", s.value = 1; 1743 | shputs(strmap, s); 1744 | STBDS_ASSERT(*strmap[0].key == 'a'); 1745 | STBDS_ASSERT(strmap[0].key == s.key); 1746 | STBDS_ASSERT(strmap[0].value == s.value); 1747 | shfree(strmap); 1748 | } 1749 | 1750 | { 1751 | s.key = "a", s.value = 1; 1752 | sh_new_strdup(strmap); 1753 | shputs(strmap, s); 1754 | STBDS_ASSERT(*strmap[0].key == 'a'); 1755 | STBDS_ASSERT(strmap[0].key != s.key); 1756 | STBDS_ASSERT(strmap[0].value == s.value); 1757 | shfree(strmap); 1758 | } 1759 | 1760 | { 1761 | s.key = "a", s.value = 1; 1762 | sh_new_arena(strmap); 1763 | shputs(strmap, s); 1764 | STBDS_ASSERT(*strmap[0].key == 'a'); 1765 | STBDS_ASSERT(strmap[0].key != s.key); 1766 | STBDS_ASSERT(strmap[0].value == s.value); 1767 | shfree(strmap); 1768 | } 1769 | 1770 | for (j=0; j < 2; ++j) { 1771 | STBDS_ASSERT(shgeti(strmap,"foo") == -1); 1772 | if (j == 0) 1773 | sh_new_strdup(strmap); 1774 | else 1775 | sh_new_arena(strmap); 1776 | STBDS_ASSERT(shgeti(strmap,"foo") == -1); 1777 | shdefault(strmap, -2); 1778 | STBDS_ASSERT(shgeti(strmap,"foo") == -1); 1779 | for (i=0; i < testsize; i+=2) 1780 | shput(strmap, strkey(i), i*3); 1781 | for (i=0; i < testsize; i+=1) 1782 | if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); 1783 | else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); 1784 | for (i=2; i < testsize; i+=4) 1785 | shdel(strmap, strkey(i)); // delete half the entries 1786 | for (i=0; i < testsize; i+=1) 1787 | if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); 1788 | else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); 1789 | for (i=0; i < testsize; i+=1) 1790 | shdel(strmap, strkey(i)); // delete the rest of the entries 1791 | for (i=0; i < testsize; i+=1) 1792 | STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); 1793 | shfree(strmap); 1794 | } 1795 | 1796 | { 1797 | struct { char *key; char value; } *hash = NULL; 1798 | char name[4] = "jen"; 1799 | shput(hash, "bob" , 'h'); 1800 | shput(hash, "sally" , 'e'); 1801 | shput(hash, "fred" , 'l'); 1802 | shput(hash, "jen" , 'x'); 1803 | shput(hash, "doug" , 'o'); 1804 | 1805 | shput(hash, name , 'l'); 1806 | shfree(hash); 1807 | } 1808 | 1809 | for (i=0; i < testsize; i += 2) { 1810 | stbds_struct s = { i,i*2,i*3,i*4 }; 1811 | hmput(map, s, i*5); 1812 | } 1813 | 1814 | for (i=0; i < testsize; i += 1) { 1815 | stbds_struct s = { i,i*2,i*3 ,i*4 }; 1816 | stbds_struct t = { i,i*2,i*3+1,i*4 }; 1817 | if (i & 1) STBDS_ASSERT(hmget(map, s) == 0); 1818 | else STBDS_ASSERT(hmget(map, s) == i*5); 1819 | if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0); 1820 | else STBDS_ASSERT(hmget_ts(map, s, temp) == i*5); 1821 | //STBDS_ASSERT(hmget(map, t.key) == 0); 1822 | } 1823 | 1824 | for (i=0; i < testsize; i += 2) { 1825 | stbds_struct s = { i,i*2,i*3,i*4 }; 1826 | hmputs(map2, s); 1827 | } 1828 | hmfree(map); 1829 | 1830 | for (i=0; i < testsize; i += 1) { 1831 | stbds_struct s = { i,i*2,i*3,i*4 }; 1832 | stbds_struct t = { i,i*2,i*3+1,i*4 }; 1833 | if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0); 1834 | else STBDS_ASSERT(hmgets(map2, s.key).d == i*4); 1835 | //STBDS_ASSERT(hmgetp(map2, t.key) == 0); 1836 | } 1837 | hmfree(map2); 1838 | 1839 | for (i=0; i < testsize; i += 2) { 1840 | stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 }; 1841 | hmputs(map3, s); 1842 | } 1843 | for (i=0; i < testsize; i += 1) { 1844 | stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 }; 1845 | stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 }; 1846 | if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0); 1847 | else STBDS_ASSERT(hmgets(map3, s.key).d == i*5); 1848 | //STBDS_ASSERT(hmgetp(map3, t.key) == 0); 1849 | } 1850 | #endif 1851 | } 1852 | #endif 1853 | 1854 | 1855 | /* 1856 | ------------------------------------------------------------------------------ 1857 | This software is available under 2 licenses -- choose whichever you prefer. 1858 | ------------------------------------------------------------------------------ 1859 | ALTERNATIVE A - MIT License 1860 | Copyright (c) 2019 Sean Barrett 1861 | Permission is hereby granted, free of charge, to any person obtaining a copy of 1862 | this software and associated documentation files (the "Software"), to deal in 1863 | the Software without restriction, including without limitation the rights to 1864 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 1865 | of the Software, and to permit persons to whom the Software is furnished to do 1866 | so, subject to the following conditions: 1867 | The above copyright notice and this permission notice shall be included in all 1868 | copies or substantial portions of the Software. 1869 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1870 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1871 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1872 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1873 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1874 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 1875 | SOFTWARE. 1876 | ------------------------------------------------------------------------------ 1877 | ALTERNATIVE B - Public Domain (www.unlicense.org) 1878 | This is free and unencumbered software released into the public domain. 1879 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 1880 | software, either in source code form or as a compiled binary, for any purpose, 1881 | commercial or non-commercial, and by any means. 1882 | In jurisdictions that recognize copyright laws, the author or authors of this 1883 | software dedicate any and all copyright interest in the software to the public 1884 | domain. We make this dedication for the benefit of the public at large and to 1885 | the detriment of our heirs and successors. We intend this dedication to be an 1886 | overt act of relinquishment in perpetuity of all present and future rights to 1887 | this software under copyright law. 1888 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1889 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1890 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1891 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 1892 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 1893 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1894 | ------------------------------------------------------------------------------ 1895 | */ 1896 | --------------------------------------------------------------------------------