├── .gitignore ├── LICENSE.txt ├── Makefile ├── README.md ├── hcre.c ├── rules.c ├── rules.h └── uthash.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.exe 3 | *.rule 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 llamasoft 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC = gcc 3 | OBJECTS = rules.o hcre.o 4 | BINARIES = hcre 5 | DEBUGS = 6 | 7 | COMPILE = $(CC) -O2 -std=c99 -march=native $(CFLAGS) $(DEBUGS) -Wall -Wextra -funsigned-char -Wno-pointer-sign -Wno-sign-compare 8 | 9 | 10 | all: $(BINARIES) 11 | 12 | %.o: %.c 13 | $(COMPILE) -c $< -o $@ 14 | 15 | hcre: rules.o hcre.o 16 | $(COMPILE) $^ -o hcre 17 | 18 | debug: DEBUGS = -DDEBUG_PARSING -DDEBUG_DUPES -DDEBUG_STATS -DDEBUG_OUTPUT 19 | debug: hcre 20 | 21 | clean: 22 | $(RM) $(BINARIES) $(OBJECTS) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HashcatRulesEngine 2 | ==================== 3 | 4 | A stand-alone implementation of [Hashcat's](https://github.com/hashcat/hashcat) rule engine, based on John the Ripper's rule engine. 5 | Implements rule parsing separate from rule application for better performance and better detection of duplicate rules. 6 | 7 | 8 | ### Usage 9 | 10 | 11 | USAGE: ./hcre rule_file [rule_file ...] 12 | 13 | Input words are read from STDIN, mangled words are written on STDOUT 14 | All rule files are unioned together and duplicate rules are removed 15 | For a cross product of rules, pipe this program into another instance of itself 16 | 17 | 18 | EXAMPLE: 19 | ./hcre best64.rule < words.txt 20 | some_process | ./hcre leetspeak.rule combinator.rule 21 | -------------------------------------------------------------------------------- /hcre.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "rules.h" 7 | 8 | 9 | typedef struct RuleHash { 10 | // The Rule struct itself 11 | Rule *rule; 12 | 13 | // Info about where the rule came from 14 | // This makes it easier to track down rule errors and dupes 15 | char *source_file, *source_text; 16 | unsigned int source_line; 17 | 18 | // Used by HASH_* functions 19 | UT_hash_handle hh; 20 | } RuleHash; 21 | 22 | 23 | void free_hash(RuleHash *hash) { 24 | if (hash == NULL) { return; } 25 | if (hash->rule ) { free_rule(hash->rule); } 26 | if (hash->source_file) { free(hash->source_file); } 27 | if (hash->source_text) { free(hash->source_text); } 28 | memset(hash, 0, sizeof(RuleHash)); 29 | } 30 | 31 | 32 | void usage(char *hcre) { 33 | printf("\n"); 34 | printf("USAGE: %s rule_file [rule_file ...]\n", hcre); 35 | printf("\n"); 36 | printf("Input words are read from STDIN, mangled words are written on STDOUT\n"); 37 | printf("All rule files are unioned together and duplicate rules are removed\n"); 38 | printf("For a cross product of rules, pipe this program into another instance of itself\n"); 39 | printf("\n"); 40 | printf("\n"); 41 | printf("EXAMPLE:\n"); 42 | printf(" %s best64.rule < words.txt\n", hcre); 43 | printf(" some_process | %s leetspeak.rule combinator.rule\n", hcre); 44 | printf("\n"); 45 | } 46 | 47 | 48 | int main(int argc, char **argv) { 49 | if (argc <= 1) { usage(argv[0]); return 0; } 50 | 51 | // Allows rule upper/lower/toggle to follow locale 52 | setlocale(LC_CTYPE, ""); 53 | 54 | 55 | // Our hash structure's head node 56 | RuleHash *rules = NULL; 57 | 58 | // For iterating rules hash with HASH_ITER 59 | RuleHash *hash_temp = NULL; 60 | 61 | // Output for prase_rule() 62 | Rule *cur_rule = NULL; 63 | 64 | // Reading/writing to rules hash 65 | RuleHash *cur_hash = NULL; 66 | 67 | // getline() will read into this 68 | char *line = NULL; 69 | ssize_t line_len = 0; 70 | size_t line_malloc_size = 0; 71 | 72 | unsigned int error_count = 0; 73 | unsigned int dupe_count = 0; 74 | 75 | 76 | // Process each file of rules 77 | for (int file_num = 1; file_num < argc; file_num++) { 78 | 79 | char *file_name = argv[file_num]; 80 | FILE *rule_file = fopen(file_name, "rb"); 81 | 82 | // Make sure we successfully opened the rule file 83 | if (rule_file == NULL) { 84 | fprintf(stderr, "ERROR: Failed to open input file <%s>\n", file_name); 85 | return -1; 86 | } 87 | 88 | 89 | // Read each rule from the input file 90 | unsigned int line_num = 0; 91 | while ( !feof(rule_file) ) { 92 | line_len = getline(&line, &line_malloc_size, rule_file); 93 | 94 | // Stop processing file on error or end of input (we don't care which) 95 | if (line_len <= 0) { break; } 96 | line_num++; 97 | 98 | // Trim trailing newline by overwriting it will a NULL terminator 99 | while (line[line_len - 1] == '\n' || line[line_len - 1] == '\r') { 100 | line[--line_len] = 0; 101 | } 102 | 103 | // Skip empty lines and commented lines 104 | if (line_len == 0 ) { continue; } 105 | if (line[0] == '#') { continue; } 106 | 107 | 108 | #ifdef DEBUG_PARSING 109 | fprintf(stderr, "Before Parsing: %s (Length: %d)\n", line, line_len); 110 | #endif 111 | 112 | // Prase and validate the rule into cur_rule 113 | int rule_check = parse_rule(line, line_len, &cur_rule); 114 | if (rule_check < 0) { 115 | fprintf( 116 | stderr, "File: <%s>; Line: <%u>; Rule: <%s>; Error: %s\n", 117 | file_name, line_num, line, cur_rule->text 118 | ); 119 | 120 | error_count++; 121 | continue; 122 | } 123 | 124 | #ifdef DEBUG_PARSING 125 | fprintf(stderr, "After Parsing: %s (Length: %d)\n", cur_rule->text, cur_rule->length); 126 | fprintf(stderr, "\n"); 127 | #endif 128 | 129 | 130 | // Check if the cleaned rule is a dupe 131 | HASH_FIND(hh, rules, cur_rule->text, cur_rule->length, hash_temp); 132 | if (hash_temp != NULL) { 133 | #ifdef DEBUG_DUPES 134 | fprintf( 135 | stderr, "File: <%s>; Line: <%u>; Rule: <%s>; Duplicate of <%s> from <%s> line <%u>\n", 136 | file_name, line_num, line, 137 | hash_temp->source_text, hash_temp->source_file, hash_temp->source_line 138 | ); 139 | #endif 140 | 141 | dupe_count++; 142 | continue; 143 | } 144 | 145 | 146 | // Make a new hash entry from the current rule 147 | cur_hash = (RuleHash *)calloc(1, sizeof(RuleHash)); 148 | cur_hash->rule = clone_rule(cur_rule); 149 | cur_hash->source_file = strdup(file_name); 150 | cur_hash->source_line = line_num; 151 | cur_hash->source_text = strdup(line); 152 | 153 | // If clone_rule() failed, we have a major problem 154 | if (!cur_hash->rule) { 155 | fprintf(stderr, "Failed to clone rule, aborting\n"); 156 | return -1; 157 | } 158 | 159 | HASH_ADD_KEYPTR(hh, rules, cur_hash->rule->text, cur_hash->rule->length, cur_hash); 160 | } 161 | 162 | 163 | fclose(rule_file); 164 | } 165 | 166 | #ifdef DEBUG_STATS 167 | fprintf( 168 | stderr, "Loaded %u rules, skipped %u broken and %u duplicate rules\n", 169 | HASH_COUNT(rules), error_count, dupe_count 170 | ); 171 | #endif 172 | 173 | 174 | 175 | // Our mangled text ends up here 176 | char rule_output[BLOCK_SIZE]; 177 | 178 | // Statistics 179 | unsigned long int word_count = 0; 180 | unsigned long int reject_count = 0; 181 | 182 | // Increase the output buffering 183 | if (setvbuf(stdout, NULL, _IOFBF, 1024 * 1024) != 0) { 184 | fprintf(stderr, "Failed to adjust stdout buffer size\n"); 185 | } 186 | 187 | // Main processing loop, runs for each line of input 188 | while ( !feof(stdin) ) { 189 | line_len = getline(&line, &line_malloc_size, stdin); 190 | 191 | // Error or end of input 192 | if (line_len <= 0) { break; } 193 | 194 | // Trim trailing newline, skip blank lines 195 | line[--line_len] = 0; 196 | if (line_len == 0) { continue; } 197 | 198 | 199 | // Safely iterate the rules hash 200 | HASH_ITER(hh, rules, cur_hash, hash_temp) { 201 | cur_rule = cur_hash->rule; 202 | 203 | // Apply the rule operations 204 | int rule_rtn = apply_rule(cur_rule, line, line_len, rule_output); 205 | 206 | // Something broke? 207 | if (rule_rtn < 0) { 208 | if (rule_rtn == REJECTED) { 209 | // Rejections are expected, they're okay 210 | reject_count++; 211 | 212 | } else { 213 | // We missed something in parsing and now our rule broke 214 | // We can't "fix" the rule, so our only option is to remove it 215 | // If you ever see this message, please contact the developer 216 | fprintf(stderr, 217 | "Input word <%s> broke rule <%s> from file <%s>, line <%u> (parsed as <%s>): %s\n", 218 | line, 219 | cur_hash->source_text, cur_hash->source_file, cur_hash->source_line, cur_rule->text, 220 | rule_output 221 | ); 222 | 223 | HASH_DEL(rules, cur_hash); 224 | free_hash(cur_hash); 225 | } 226 | 227 | // Regardless if this was a rejection or error, we're not printing this word 228 | continue; 229 | } 230 | 231 | 232 | // In debug mode, include the rule itself with the output 233 | #ifdef DEBUG_OUTPUT 234 | fwrite(cur_rule->text, cur_rule->length, 1, stdout); 235 | fputc('\t', stdout); 236 | #endif 237 | 238 | // Output the mangled word and a newline 239 | rule_output[rule_rtn++] = '\n'; 240 | fwrite(rule_output, rule_rtn, 1, stdout); 241 | word_count++; 242 | } 243 | } 244 | 245 | 246 | #ifdef DEBUG_STATS 247 | fprintf(stderr, "Created %lu words, rejected %lu\n", word_count, reject_count); 248 | #endif 249 | 250 | return 0; 251 | } -------------------------------------------------------------------------------- /rules.c: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // Original Author: Jens Steube 3 | // Rewritten By: llamasoft 4 | // License: MIT 5 | // ============================================================================= 6 | 7 | #include "rules.h" 8 | 9 | // Increment rule position, return syntax error on premature end 10 | #define NEXT_RULEPOS(rule_pos) \ 11 | do { \ 12 | if ( ++(rule_pos) == rule_len ) { \ 13 | errno = PREMATURE_END_OF_RULE; \ 14 | break; \ 15 | } \ 16 | } while (0) 17 | 18 | // Read current rule position as positional value into out_var 19 | #define NEXT_RPTOI(rule, rule_pos, out_var) \ 20 | do { \ 21 | (out_var) = conv_ctoi( (rule)[(rule_pos)] ); \ 22 | if ( (out_var) == -1 ) { \ 23 | errno = INVALID_POSITIONAL; \ 24 | break; \ 25 | } \ 26 | } while (0) 27 | 28 | 29 | static inline bool class_num(char c) { return( (c >= '0') && (c <= '9') ); } 30 | static inline bool class_lower(char c) { return( (c >= 'a') && (c <= 'z') ); } 31 | static inline bool class_upper(char c) { return( (c >= 'A') && (c <= 'Z') ); } 32 | static inline bool class_alpha(char c) { return(class_lower(c) || class_upper(c)); } 33 | 34 | // Single character to integer value 35 | // 0 .. 9 => 0 .. 9 36 | // A .. Z => 10 .. 35 37 | // a .. z => 36 .. 61 38 | // else -1 (error) 39 | static inline int conv_ctoi(char c) 40 | { 41 | if (class_num(c)) { 42 | return ((int)(c - '0')); 43 | 44 | } else if (class_upper(c)) { 45 | return ((int)(c - 'A' + (char)10)); 46 | 47 | } else if (class_lower(c)) { 48 | return ((int)(c - 'a' + (char)36)); 49 | 50 | } else { 51 | return(-1); 52 | } 53 | } 54 | 55 | // NOTE: toggle/lower/upper/switch functions used to be macros 56 | // To prevent breakage, the signatures haven't been changed 57 | // This also means that they have no return values and do no safety checks 58 | // The functions are only used internally, so it shouldn't be an issue 59 | 60 | // Toggle a character uppercase/lowercase at a given offset 61 | void mangle_toggle_at(char str[BLOCK_SIZE], int offset) { 62 | if ( class_alpha(str[offset]) ) { 63 | str[offset] ^= 0x20; 64 | } 65 | } 66 | 67 | 68 | // Convert a character at offset to lowercase 69 | void mangle_lower_at(char str[BLOCK_SIZE], int offset) { 70 | if ( class_upper(str[offset]) ) { 71 | str[offset] ^= 0x20; 72 | } 73 | } 74 | 75 | 76 | // Convert a character at offset to uppercase 77 | void mangle_upper_at(char str[BLOCK_SIZE], int offset) { 78 | if ( class_lower(str[offset]) ) { 79 | str[offset] ^= 0x20; 80 | } 81 | } 82 | 83 | 84 | // Swap the characters at offsets left and right 85 | void mangle_switch(char str[BLOCK_SIZE], int left, int right) { 86 | char temp = str[left]; 87 | str[left] = str[right]; 88 | str[right] = temp; 89 | } 90 | 91 | 92 | // Convert to lower 93 | int mangle_lower_all(char str[BLOCK_SIZE], int str_len) 94 | { 95 | for (int pos = 0; pos < str_len; pos++) { mangle_lower_at(str, pos); } 96 | 97 | return(str_len); 98 | } 99 | 100 | 101 | // Convert to upper 102 | int mangle_upper_all(char str[BLOCK_SIZE], int str_len) 103 | { 104 | for (int pos = 0; pos < str_len; pos++) { mangle_upper_at(str, pos); } 105 | 106 | return(str_len); 107 | } 108 | 109 | 110 | // Toggle case 111 | int mangle_toggle_all(char str[BLOCK_SIZE], int str_len) 112 | { 113 | for (int pos = 0; pos < str_len; pos++) { mangle_toggle_at(str, pos); } 114 | 115 | return(str_len); 116 | } 117 | 118 | 119 | // Reverse a string 120 | int mangle_reverse(char str[BLOCK_SIZE], int str_len) 121 | { 122 | char temp[BLOCK_SIZE]; 123 | memcpy(temp, str, str_len); 124 | 125 | for (int i = 0; i < str_len; i++) { 126 | str[i] = temp[str_len - i - 1]; 127 | } 128 | 129 | return(str_len); 130 | } 131 | 132 | 133 | // Append a string to itself 134 | int mangle_double(char str[BLOCK_SIZE], int str_len) 135 | { 136 | if ((str_len * 2) >= BLOCK_SIZE) { return(str_len); } 137 | 138 | memcpy(&str[str_len], str, (size_t)str_len); 139 | return(str_len * 2); 140 | } 141 | 142 | 143 | // Append a string to itself N times 144 | int mangle_double_times(char str[BLOCK_SIZE], int str_len, int times) 145 | { 146 | if ((str_len * times) + str_len >= BLOCK_SIZE) { return(str_len); } 147 | 148 | int orig_len = str_len; 149 | 150 | for (int i = 0; i < times; i++) { 151 | memcpy(&str[str_len], str, orig_len); 152 | 153 | str_len += orig_len; 154 | } 155 | 156 | return(str_len); 157 | } 158 | 159 | 160 | // Append a string to itself backwards 161 | int mangle_reflect(char str[BLOCK_SIZE], int str_len) 162 | { 163 | if ((str_len * 2) >= BLOCK_SIZE) { return(str_len); } 164 | 165 | mangle_double(str, str_len); 166 | mangle_reverse(&str[str_len], str_len); 167 | 168 | return(str_len * 2); 169 | } 170 | 171 | 172 | // Rotates a string left one character 173 | int mangle_rotate_left(char str[BLOCK_SIZE], int str_len) 174 | { 175 | if (str_len < 2) { return(str_len); } 176 | 177 | // Save the first character 178 | char temp = str[0]; 179 | 180 | // Shift everything left 181 | for (int str_pos = 0; str_pos < str_len - 1; str_pos++) { 182 | str[str_pos] = str[str_pos + 1]; 183 | } 184 | 185 | // Put the first character at the end 186 | str[str_len - 1] = temp; 187 | 188 | return(str_len); 189 | } 190 | 191 | 192 | // Rotates a string right one character 193 | int mangle_rotate_right(char str[BLOCK_SIZE], int str_len) 194 | { 195 | if (str_len < 2) { return(str_len); } 196 | 197 | // Save the last character 198 | char temp = str[str_len - 1]; 199 | 200 | // Shift everything right 201 | for (int str_pos = str_len - 1; str_pos > 0; str_pos--) { 202 | str[str_pos] = str[str_pos - 1]; 203 | } 204 | 205 | // Place the last character at the front 206 | str[0] = temp; 207 | 208 | return(str_len); 209 | } 210 | 211 | 212 | // Appends a single character to a string 213 | int mangle_append(char str[BLOCK_SIZE], int str_len, char c) 214 | { 215 | if ((str_len + 1) >= BLOCK_SIZE) { return(str_len); } 216 | 217 | str[str_len] = c; 218 | 219 | return(str_len + 1); 220 | } 221 | 222 | 223 | // Prepends a single character to a string 224 | int mangle_prepend(char str[BLOCK_SIZE], int str_len, char c) 225 | { 226 | if ((str_len + 1) >= BLOCK_SIZE) { return(str_len); } 227 | 228 | str[str_len] = c; 229 | mangle_rotate_right(str, str_len + 1); 230 | 231 | return(str_len + 1); 232 | } 233 | 234 | 235 | // Deletes a single character at offset 236 | int mangle_delete_at(char str[BLOCK_SIZE], int str_len, int offset) 237 | { 238 | if (offset >= str_len || offset < 0) { return(str_len); } 239 | 240 | for (int str_pos = offset; str_pos < str_len - 1; str_pos++) { 241 | str[str_pos] = str[str_pos + 1]; 242 | } 243 | 244 | return(str_len - 1); 245 | } 246 | 247 | 248 | // Replaces string with substr_len characters starting at offset 249 | int mangle_extract(char str[BLOCK_SIZE], int str_len, int offset, int substr_len) 250 | { 251 | if (offset >= str_len) { return(str_len); } 252 | 253 | // substr_len is too large, shorten it so it fits within this string 254 | if ((offset + substr_len) > str_len) { substr_len = str_len - offset; } 255 | 256 | for (int str_pos = 0; str_pos < substr_len; str_pos++) { 257 | str[str_pos] = str[offset + str_pos]; 258 | } 259 | 260 | return(substr_len); 261 | } 262 | 263 | 264 | // Removes substr_len characters starting at offset 265 | int mangle_omit(char str[BLOCK_SIZE], int str_len, int offset, int substr_len) 266 | { 267 | if (offset >= str_len) { return(str_len); } 268 | 269 | // We know offset is within the string, shorten substr_len 270 | // so that offset + substr_len stays within the string 271 | // This effectively skips the for loop and turns this into a truncate 272 | if ((offset + substr_len) > str_len) { substr_len = str_len - offset; } 273 | 274 | for (int str_pos = offset; str_pos < str_len - substr_len; str_pos++) { 275 | str[str_pos] = str[str_pos + substr_len]; 276 | } 277 | 278 | return(str_len - substr_len); 279 | } 280 | 281 | 282 | // Inserts a single character at offset, shifting the result down 283 | int mangle_insert(char str[BLOCK_SIZE], int str_len, int offset, char c) 284 | { 285 | // If offset is beyond end of string, treat as an append 286 | // offset == str_len allowed, same as appending character 287 | if (offset > str_len) { offset = str_len; } 288 | if ((str_len + 1) >= BLOCK_SIZE) { return(str_len); } 289 | 290 | for (int str_pos = str_len - 1; str_pos > offset - 1; str_pos--) { 291 | str[str_pos + 1] = str[str_pos]; 292 | } 293 | 294 | str[offset] = c; 295 | 296 | return(str_len + 1); 297 | } 298 | 299 | 300 | // Insert substr_len characters from mem starting at position mem_offset into position offset 301 | // str[0 .. offset - 1] + mem[mem_offset .. mem_offset + substr_len] + str[offset .. str_len] 302 | int mangle_insert_multi( 303 | char str[BLOCK_SIZE], int str_len, int str_offset, 304 | char mem[BLOCK_SIZE], int mem_len, int mem_offset, 305 | int substr_len 306 | ) 307 | { 308 | if (str_offset > str_len) { str_offset = str_len; } 309 | if ((str_len + substr_len) >= BLOCK_SIZE) { return(str_len); } 310 | 311 | if (mem_offset >= mem_len) { return(str_len); } 312 | if ((mem_offset + substr_len) > mem_len) { substr_len = mem_len - mem_offset; } 313 | if (substr_len < 1) { return(str_len); } 314 | 315 | // Shift mem down mem_offset characters 316 | // This is the substring we will add to str 317 | // mem[mem_offset .. mem_offset + substr_len] 318 | memcpy(mem, mem + mem_offset, mem_len - mem_offset); 319 | 320 | // Append the back half of str (after str_offset) to mem 321 | // This will become the back half of the result 322 | // mem[mem_offset .. mem_offset + substr_len] + str[str_offset .. str_len] 323 | memcpy(mem + substr_len, str + str_offset, str_len - str_offset); 324 | 325 | // Insert our result to the correct place in str 326 | memcpy(str + str_offset, mem, str_len - str_offset + substr_len); 327 | 328 | return(str_len + substr_len); 329 | } 330 | 331 | 332 | // Replace a single character at offset 333 | int mangle_overstrike(char str[BLOCK_SIZE], int str_len, int offset, char c) 334 | { 335 | if (offset >= str_len || offset < 0) { return(str_len); } 336 | 337 | str[offset] = c; 338 | 339 | return(str_len); 340 | } 341 | 342 | 343 | // Remove everything after position offset 344 | int mangle_truncate_at(char str[BLOCK_SIZE], int str_len, int offset) 345 | { 346 | if (offset >= str_len || offset < 0) { return(str_len); } 347 | 348 | // Not explicitly required, just messing with str to suppress a gcc warning 349 | str[offset] = 0; 350 | 351 | return(offset); 352 | } 353 | 354 | 355 | // Replace all instances of oldc with newc 356 | int mangle_replace(char str[BLOCK_SIZE], int str_len, char oldc, char newc) 357 | { 358 | for (int str_pos = 0; str_pos < str_len; str_pos++) { 359 | if (str[str_pos] != oldc) { continue; } 360 | 361 | str[str_pos] = newc; 362 | } 363 | 364 | return(str_len); 365 | } 366 | 367 | 368 | // Remove all instances of c 369 | int mangle_purgechar(char str[BLOCK_SIZE], int str_len, char c) 370 | { 371 | int ret_len, str_pos; 372 | for (ret_len = 0, str_pos = 0; str_pos < str_len; str_pos++) { 373 | if (str[str_pos] == c) { continue; } 374 | 375 | str[ret_len] = str[str_pos]; 376 | 377 | ret_len++; 378 | } 379 | 380 | return(ret_len); 381 | } 382 | 383 | 384 | // Duplicate the first substr_len characters, prepending them to the string 385 | // str = "Apple", substr_len = 3, result = "AppApple" 386 | int mangle_dupeblock_prepend(char str[BLOCK_SIZE], int str_len, int substr_len) 387 | { 388 | if (substr_len < 1) { return(str_len); } 389 | 390 | // If substr_len is too long, shorten it to the string's length 391 | // This effectively makes it a "dupe word" operation 392 | if (substr_len > str_len) { substr_len = str_len; } 393 | if ((str_len + substr_len) >= BLOCK_SIZE) { return(str_len); } 394 | 395 | for (int str_pos = str_len - 1; str_pos >= 0; str_pos--) { 396 | str[str_pos + substr_len] = str[str_pos]; 397 | } 398 | 399 | return(str_len + substr_len); 400 | } 401 | 402 | 403 | // Duplicate the first substr_len characters, appending them to the string 404 | // str = "Apple", substr_len = 3, result = "AppleApp" 405 | int mangle_dupeblock_append(char str[BLOCK_SIZE], int str_len, int substr_len) 406 | { 407 | if (substr_len < 1) { return(str_len); } 408 | 409 | // If substr_len is too long, shorten it to the string's length 410 | // This effectively makes it a "dupe word" operation 411 | if (substr_len > str_len) { substr_len = str_len; } 412 | if ((str_len + substr_len) >= BLOCK_SIZE) { return(str_len); } 413 | 414 | memcpy(&str[str_len], str, substr_len); 415 | 416 | return(str_len + substr_len); 417 | } 418 | 419 | 420 | // Duplicate the character at offset substr_len times 421 | int mangle_dupechar_at(char str[BLOCK_SIZE], int str_len, int offset, int substr_len) 422 | { 423 | if (str_len == 0) { return(str_len); } 424 | if ((str_len + substr_len) >= BLOCK_SIZE) { return(str_len); } 425 | 426 | char c = str[offset]; 427 | for (int i = 0; i < substr_len; i++) { 428 | str_len = mangle_insert(str, str_len, offset, c); 429 | } 430 | 431 | return(str_len); 432 | } 433 | 434 | 435 | // Duplicates every character 436 | int mangle_dupechar(char str[BLOCK_SIZE], int str_len) 437 | { 438 | if (str_len == 0) { return(str_len); } 439 | if ((str_len + str_len) >= BLOCK_SIZE) { return(str_len); } 440 | 441 | for (int str_pos = str_len - 1; str_pos > -1; str_pos--) { 442 | int new_pos = str_pos * 2; 443 | 444 | str[new_pos] = str[str_pos]; 445 | 446 | str[new_pos + 1] = str[str_pos]; 447 | } 448 | 449 | return(str_len * 2); 450 | } 451 | 452 | 453 | // Swap the characters at positions offset and offset2 454 | int mangle_switch_at_check(char str[BLOCK_SIZE], int str_len, int offset, int offset2) 455 | { 456 | if (offset >= str_len) { return(str_len); } 457 | if (offset2 >= str_len) { return(str_len); } 458 | 459 | mangle_switch(str, offset, offset2); 460 | 461 | return(str_len); 462 | } 463 | 464 | 465 | // Swap the characters at positions offset and offset2, no safety checks 466 | int mangle_switch_at(char str[BLOCK_SIZE], int str_len, int offset, int offset2) 467 | { 468 | mangle_switch(str, offset, offset2); 469 | 470 | return(str_len); 471 | } 472 | 473 | 474 | // Left bit-shift the character at offset 475 | int mangle_chr_shiftl(uint8_t str[BLOCK_SIZE], int str_len, int offset) 476 | { 477 | if (offset >= str_len) { return(str_len); } 478 | 479 | str[offset] <<= 1; 480 | 481 | return(str_len); 482 | } 483 | 484 | 485 | // Right bit-shift the character at offset 486 | int mangle_chr_shiftr(uint8_t str[BLOCK_SIZE], int str_len, int offset) 487 | { 488 | if (offset >= str_len) { return(str_len); } 489 | 490 | str[offset] >>= 1; 491 | 492 | return(str_len); 493 | } 494 | 495 | 496 | // Increment the character at offset 497 | int mangle_chr_incr(uint8_t str[BLOCK_SIZE], int str_len, int offset) 498 | { 499 | if (offset >= str_len) { return(str_len); } 500 | 501 | str[offset] += 1; 502 | 503 | return(str_len); 504 | } 505 | 506 | 507 | // Decrement the character at offset 508 | int mangle_chr_decr(uint8_t str[BLOCK_SIZE], int str_len, int offset) 509 | { 510 | if (offset >= str_len) { return(str_len); } 511 | 512 | str[offset] -= 1; 513 | 514 | return(str_len); 515 | } 516 | 517 | 518 | // Convert a string to title case 519 | int mangle_title(char str[BLOCK_SIZE], int str_len) 520 | { 521 | int upper_next = 1; 522 | 523 | for (int pos = 0; pos < str_len; pos++) { 524 | if (str[pos] == ' ') { 525 | upper_next = 1; 526 | continue; 527 | } 528 | 529 | if (upper_next) { 530 | upper_next = 0; 531 | mangle_upper_at(str, pos); 532 | 533 | } else { 534 | mangle_lower_at(str, pos); 535 | } 536 | } 537 | 538 | return(str_len); 539 | } 540 | 541 | 542 | 543 | Rule *clone_rule(Rule *source) { 544 | if (source == NULL) { return NULL; } 545 | if (source->text == NULL) { return NULL; } 546 | 547 | Rule *rtn = (Rule *)calloc(1, sizeof(Rule)); 548 | if (rtn == NULL) { 549 | fprintf(stderr, "clone_rule() failed to calloc() a Rule of size %zu\n", sizeof(Rule)); 550 | return NULL; 551 | } 552 | 553 | rtn->text = (char *)calloc(1, (source->length + 1) * sizeof(char)); 554 | if (rtn->text == NULL) { 555 | fprintf(stderr, "clone_rule() failed to calloc() a string of length %zu\n", source->length + 1); 556 | return NULL; 557 | } 558 | memcpy(rtn->text, source->text, source->length); 559 | 560 | rtn->length = source->length; 561 | return rtn; 562 | } 563 | 564 | 565 | void free_rule(Rule *rule) { 566 | if (rule == NULL) { return; } 567 | if (rule->text) { free(rule->text); } 568 | memset(rule, 0, sizeof(Rule)); 569 | } 570 | 571 | 572 | 573 | // Doesn't actually run the rule, but checks it for validity and does some preprocessing 574 | // e.g. removing noops, validating positionals, parameter count checking 575 | // In theory, this will make apply_rule be faster as it will require fewer validations 576 | // It also assists in detecting duplicate rules (e.g. 'lu' == 'l:u') 577 | int parse_rule(char *rule, int rule_len, Rule **output_rule) { 578 | if (rule == NULL) { return(INVALID_INPUT); } 579 | 580 | // Allocate a Rule if our user was lazy 581 | if ((*output_rule) == NULL) { 582 | (*output_rule) = (Rule *)calloc(1, sizeof(Rule)); 583 | } 584 | 585 | // Our new rule is guaranteed to be no larger than the unparsed rule 586 | // Note: realloc(NULL, size) is the same as malloc(size) 587 | char *new_rule = (char *)realloc((*output_rule)->text, (rule_len + 1) * sizeof(char)); 588 | int new_rule_len = 0; 589 | memset(new_rule, 0, rule_len + 1); 590 | 591 | int errno = 0; 592 | int mem_len = 0; 593 | int temp_int; 594 | 595 | // Our cursor positions for the rule 596 | int rule_pos = 0; 597 | for (rule_pos = 0; rule_pos < rule_len; rule_pos++) { 598 | // Always copy the operation 599 | new_rule[new_rule_len++] = rule[rule_pos]; 600 | 601 | switch (rule[rule_pos]) { 602 | // Whitespace and no-ops are skipped 603 | case ' ': 604 | case '\t': 605 | case '\r': 606 | case RULE_OP_MANGLE_NOOP: 607 | // Un-copy the operation 608 | new_rule[new_rule_len--] = 0; 609 | break; 610 | 611 | // No parameters 612 | case RULE_OP_MANGLE_LREST: 613 | case RULE_OP_MANGLE_UREST: 614 | case RULE_OP_MANGLE_LREST_UFIRST: 615 | case RULE_OP_MANGLE_UREST_LFIRST: 616 | case RULE_OP_MANGLE_TREST: 617 | case RULE_OP_MANGLE_REVERSE: 618 | case RULE_OP_MANGLE_DUPEWORD: 619 | case RULE_OP_MANGLE_REFLECT: 620 | case RULE_OP_MANGLE_ROTATE_LEFT: 621 | case RULE_OP_MANGLE_ROTATE_RIGHT: 622 | case RULE_OP_MANGLE_DELETE_FIRST: 623 | case RULE_OP_MANGLE_DELETE_LAST: 624 | case RULE_OP_MANGLE_DUPECHAR_ALL: 625 | case RULE_OP_MANGLE_SWITCH_FIRST: 626 | case RULE_OP_MANGLE_SWITCH_LAST: 627 | case RULE_OP_MANGLE_TITLE: 628 | // Operation already copied, nothing to do 629 | break; 630 | 631 | // Integer 632 | case RULE_OP_MANGLE_TOGGLE_AT: 633 | case RULE_OP_MANGLE_DUPEWORD_TIMES: 634 | case RULE_OP_MANGLE_DELETE_AT: 635 | case RULE_OP_MANGLE_TRUNCATE_AT: 636 | case RULE_OP_MANGLE_DUPECHAR_FIRST: 637 | case RULE_OP_MANGLE_DUPECHAR_LAST: 638 | case RULE_OP_MANGLE_DUPEBLOCK_FIRST: 639 | case RULE_OP_MANGLE_DUPEBLOCK_LAST: 640 | case RULE_OP_MANGLE_CHR_SHIFTL: 641 | case RULE_OP_MANGLE_CHR_SHIFTR: 642 | case RULE_OP_MANGLE_CHR_INCR: 643 | case RULE_OP_MANGLE_CHR_DECR: 644 | case RULE_OP_MANGLE_REPLACE_NP1: 645 | case RULE_OP_MANGLE_REPLACE_NM1: 646 | case RULE_OP_REJECT_LESS: 647 | case RULE_OP_REJECT_GREATER: 648 | NEXT_RULEPOS(rule_pos); 649 | NEXT_RPTOI(rule, rule_pos, temp_int); 650 | new_rule[new_rule_len++] = rule[rule_pos]; 651 | break; 652 | 653 | // Character 654 | case RULE_OP_MANGLE_APPEND: 655 | case RULE_OP_MANGLE_PREPEND: 656 | case RULE_OP_MANGLE_PURGECHAR: 657 | case RULE_OP_REJECT_CONTAIN: 658 | case RULE_OP_REJECT_NOT_CONTAIN: 659 | case RULE_OP_REJECT_EQUAL_FIRST: 660 | case RULE_OP_REJECT_EQUAL_LAST: 661 | NEXT_RULEPOS(rule_pos); 662 | new_rule[new_rule_len++] = rule[rule_pos]; 663 | break; 664 | 665 | // Character + Character 666 | case RULE_OP_MANGLE_REPLACE: 667 | NEXT_RULEPOS(rule_pos); 668 | new_rule[new_rule_len++] = rule[rule_pos]; 669 | 670 | NEXT_RULEPOS(rule_pos); 671 | new_rule[new_rule_len++] = rule[rule_pos]; 672 | break; 673 | 674 | // Integer + Integer 675 | case RULE_OP_MANGLE_EXTRACT: 676 | case RULE_OP_MANGLE_OMIT: 677 | case RULE_OP_MANGLE_SWITCH_AT: 678 | NEXT_RULEPOS(rule_pos); 679 | NEXT_RPTOI(rule, rule_pos, temp_int); 680 | new_rule[new_rule_len++] = rule[rule_pos]; 681 | 682 | NEXT_RULEPOS(rule_pos); 683 | NEXT_RPTOI(rule, rule_pos, temp_int); 684 | new_rule[new_rule_len++] = rule[rule_pos]; 685 | break; 686 | 687 | // Integer + Character 688 | case RULE_OP_MANGLE_INSERT: 689 | case RULE_OP_MANGLE_OVERSTRIKE: 690 | case RULE_OP_REJECT_EQUAL_AT: 691 | case RULE_OP_REJECT_CONTAINS: 692 | NEXT_RULEPOS(rule_pos); 693 | NEXT_RPTOI(rule, rule_pos, temp_int); 694 | new_rule[new_rule_len++] = rule[rule_pos]; 695 | 696 | NEXT_RULEPOS(rule_pos); 697 | new_rule[new_rule_len++] = rule[rule_pos]; 698 | break; 699 | 700 | // Memory write 701 | case RULE_OP_MEMORIZE_WORD: 702 | mem_len = 1; 703 | break; 704 | 705 | // Memory read 706 | case RULE_OP_MANGLE_APPEND_MEMORY: 707 | case RULE_OP_MANGLE_PREPEND_MEMORY: 708 | case RULE_OP_REJECT_MEMORY: 709 | if (!mem_len) { errno = MEMORY_ERROR; } 710 | break; 711 | 712 | // Memory read + Integer + Integer + Integer 713 | case RULE_OP_MANGLE_EXTRACT_MEMORY: 714 | if (!mem_len) { errno = MEMORY_ERROR; break; } 715 | 716 | NEXT_RULEPOS(rule_pos); 717 | NEXT_RPTOI(rule, rule_pos, temp_int); 718 | new_rule[new_rule_len++] = rule[rule_pos]; 719 | 720 | NEXT_RULEPOS(rule_pos); 721 | NEXT_RPTOI(rule, rule_pos, temp_int); 722 | new_rule[new_rule_len++] = rule[rule_pos]; 723 | 724 | NEXT_RULEPOS(rule_pos); 725 | NEXT_RPTOI(rule, rule_pos, temp_int); 726 | new_rule[new_rule_len++] = rule[rule_pos]; 727 | break; 728 | 729 | default: 730 | errno = UNKNOWN_RULE_OP; 731 | break; 732 | } 733 | 734 | if (errno != 0) { break; } 735 | } 736 | 737 | 738 | // Check for processing errors and create an error message 739 | if (errno != 0) { 740 | // We use asprintf() to dynamically allocate a buffer for our error message 741 | // We don't need our old rule buffer any more because asprintf will create a new one 742 | free(new_rule); 743 | 744 | if (errno == PREMATURE_END_OF_RULE) { 745 | new_rule_len = asprintf(&new_rule, 746 | "premature end of rule, expected char or positional value" 747 | ); 748 | 749 | } else if (errno == UNKNOWN_RULE_OP) { 750 | new_rule_len = asprintf(&new_rule, 751 | "'%c' (offset %d) is not a valid operation", 752 | rule[rule_pos], rule_pos 753 | ); 754 | 755 | } else if (errno == INVALID_POSITIONAL) { 756 | new_rule_len = asprintf(&new_rule, 757 | "'%c' (offset %d) is not a valid position or length value", 758 | rule[rule_pos], rule_pos 759 | ); 760 | 761 | } else if (errno == MEMORY_ERROR) { 762 | new_rule_len = asprintf(&new_rule, 763 | "'%c' (offset %d) cannot be used before memorize operation", 764 | rule[rule_pos], rule_pos 765 | ); 766 | 767 | } else { 768 | new_rule_len = asprintf(&new_rule, 769 | "unknown error %d at operation '%c' (offset %d)", 770 | errno, rule[rule_pos], rule_pos 771 | ); 772 | errno = UNKNOWN_ERROR; 773 | } 774 | } 775 | 776 | 777 | new_rule[new_rule_len] = 0; 778 | (*output_rule)->text = new_rule; 779 | (*output_rule)->length = new_rule_len; 780 | return (errno < 0 ? errno : new_rule_len); 781 | } 782 | 783 | 784 | 785 | int apply_rule(Rule *input_rule, char *input_word, int input_len, char out[BLOCK_SIZE]) 786 | { 787 | if (input_rule == NULL) { return(INVALID_INPUT); } 788 | if (input_rule->text == NULL) { return(INVALID_INPUT); } 789 | // Rule length not checked, noop rules are valid but empty 790 | char *rule = input_rule->text; 791 | int rule_len = input_rule->length; 792 | 793 | if (input_word == NULL) { return(INVALID_INPUT); } 794 | if (input_len < 1) { return(INVALID_INPUT); } 795 | 796 | int mem_len = -1; 797 | char mem[BLOCK_SIZE]; 798 | 799 | int out_len = (input_len < BLOCK_SIZE ? input_len : BLOCK_SIZE - 1); 800 | memcpy(out, input_word, out_len); 801 | 802 | 803 | // Operation parameters 804 | int errno, rule_pos; 805 | for (rule_pos = 0, errno = 0; rule_pos < rule_len && !errno; rule_pos++) { 806 | switch (rule[rule_pos]) { 807 | case ' ': 808 | case '\t': 809 | case '\r': 810 | case RULE_OP_MANGLE_NOOP: { 811 | // After validation, noops shouldn't exist in the rule 812 | // Just in case though, we'll skip them 813 | break; 814 | } 815 | case RULE_OP_MANGLE_LREST: { 816 | mangle_lower_all(out, out_len); 817 | break; 818 | } 819 | case RULE_OP_MANGLE_UREST: { 820 | mangle_upper_all(out, out_len); 821 | break; 822 | } 823 | case RULE_OP_MANGLE_LREST_UFIRST: { 824 | mangle_lower_all(&out[1], out_len - 1); 825 | if (out_len > 0) { mangle_upper_at(out, 0); } 826 | break; 827 | } 828 | case RULE_OP_MANGLE_UREST_LFIRST: { 829 | mangle_upper_all(&out[1], out_len - 1); 830 | if (out_len > 0) { mangle_lower_at(out, 0); } 831 | break; 832 | } 833 | case RULE_OP_MANGLE_TREST: { 834 | mangle_toggle_all(out, out_len); 835 | break; 836 | } 837 | case RULE_OP_MANGLE_TOGGLE_AT: { 838 | int offset = conv_ctoi(rule[++rule_pos]); 839 | 840 | if (out_len > offset) { mangle_toggle_at(out, offset); } 841 | break; 842 | } 843 | case RULE_OP_MANGLE_REVERSE: { 844 | mangle_reverse(out, out_len); 845 | break; 846 | } 847 | case RULE_OP_MANGLE_DUPEWORD: { 848 | out_len = mangle_double(out, out_len); 849 | break; 850 | } 851 | case RULE_OP_MANGLE_DUPEWORD_TIMES: { 852 | int times = conv_ctoi(rule[++rule_pos]); 853 | 854 | out_len = mangle_double_times(out, out_len, times); 855 | break; 856 | } 857 | case RULE_OP_MANGLE_REFLECT: { 858 | out_len = mangle_reflect(out, out_len); 859 | break; 860 | } 861 | case RULE_OP_MANGLE_ROTATE_LEFT: { 862 | mangle_rotate_left(out, out_len); 863 | break; 864 | } 865 | case RULE_OP_MANGLE_ROTATE_RIGHT: { 866 | mangle_rotate_right(out, out_len); 867 | break; 868 | } 869 | case RULE_OP_MANGLE_APPEND: { 870 | char chr = rule[++rule_pos]; 871 | 872 | out_len = mangle_append(out, out_len, chr); 873 | break; 874 | } 875 | case RULE_OP_MANGLE_PREPEND: { 876 | char chr = rule[++rule_pos]; 877 | 878 | out_len = mangle_prepend(out, out_len, chr); 879 | break; 880 | } 881 | case RULE_OP_MANGLE_DELETE_FIRST: { 882 | out_len = mangle_delete_at(out, out_len, 0); 883 | break; 884 | } 885 | case RULE_OP_MANGLE_DELETE_LAST: { 886 | out_len = mangle_delete_at(out, out_len, out_len - 1); 887 | break; 888 | } 889 | case RULE_OP_MANGLE_DELETE_AT: { 890 | int offset = conv_ctoi(rule[++rule_pos]); 891 | 892 | out_len = mangle_delete_at(out, out_len, offset); 893 | break; 894 | } 895 | case RULE_OP_MANGLE_EXTRACT: { 896 | int offset = conv_ctoi(rule[++rule_pos]); 897 | int substr_len = conv_ctoi(rule[++rule_pos]); 898 | 899 | out_len = mangle_extract(out, out_len, offset, substr_len); 900 | break; 901 | } 902 | case RULE_OP_MANGLE_OMIT: { 903 | int offset = conv_ctoi(rule[++rule_pos]); 904 | int substr_len = conv_ctoi(rule[++rule_pos]); 905 | 906 | out_len = mangle_omit(out, out_len, offset, substr_len); 907 | break; 908 | } 909 | case RULE_OP_MANGLE_INSERT: { 910 | int offset = conv_ctoi(rule[++rule_pos]); 911 | char chr = rule[++rule_pos]; 912 | 913 | out_len = mangle_insert(out, out_len, offset, chr); 914 | break; 915 | } 916 | case RULE_OP_MANGLE_OVERSTRIKE: { 917 | int offset = conv_ctoi(rule[++rule_pos]); 918 | char chr = rule[++rule_pos]; 919 | 920 | mangle_overstrike(out, out_len, offset, chr); 921 | break; 922 | } 923 | case RULE_OP_MANGLE_TRUNCATE_AT: { 924 | int offset = conv_ctoi(rule[++rule_pos]); 925 | 926 | out_len = mangle_truncate_at(out, out_len, offset); 927 | break; 928 | } 929 | case RULE_OP_MANGLE_REPLACE: { 930 | char search = rule[++rule_pos]; 931 | char replace = rule[++rule_pos]; 932 | 933 | mangle_replace(out, out_len, search, replace); 934 | break; 935 | } 936 | case RULE_OP_MANGLE_PURGECHAR: { 937 | char search = rule[++rule_pos]; 938 | 939 | out_len = mangle_purgechar(out, out_len, search); 940 | break; 941 | } 942 | case RULE_OP_MANGLE_DUPECHAR_FIRST: { 943 | int substr_len = conv_ctoi(rule[++rule_pos]); 944 | 945 | out_len = mangle_dupechar_at(out, out_len, 0, substr_len); 946 | break; 947 | } 948 | case RULE_OP_MANGLE_DUPECHAR_LAST: { 949 | int substr_len = conv_ctoi(rule[++rule_pos]); 950 | 951 | out_len = mangle_dupechar_at(out, out_len, out_len - 1, substr_len); 952 | break; 953 | } 954 | case RULE_OP_MANGLE_DUPECHAR_ALL: { 955 | out_len = mangle_dupechar(out, out_len); 956 | break; 957 | } 958 | case RULE_OP_MANGLE_DUPEBLOCK_FIRST: { 959 | int substr_len = conv_ctoi(rule[++rule_pos]); 960 | 961 | out_len = mangle_dupeblock_prepend(out, out_len, substr_len); 962 | break; 963 | } 964 | case RULE_OP_MANGLE_DUPEBLOCK_LAST: { 965 | int substr_len = conv_ctoi(rule[++rule_pos]); 966 | 967 | out_len = mangle_dupeblock_append(out, out_len, substr_len); 968 | break; 969 | } 970 | case RULE_OP_MANGLE_SWITCH_FIRST: { 971 | if (out_len > 2) { mangle_switch_at(out, out_len, 0, 1); } 972 | break; 973 | } 974 | case RULE_OP_MANGLE_SWITCH_LAST: { 975 | if (out_len > 2) { mangle_switch_at(out, out_len, out_len - 1, out_len - 2); } 976 | break; 977 | } 978 | case RULE_OP_MANGLE_SWITCH_AT: { 979 | int offset1 = conv_ctoi(rule[++rule_pos]); 980 | int offset2 = conv_ctoi(rule[++rule_pos]); 981 | 982 | mangle_switch_at_check(out, out_len, offset1, offset2); 983 | break; 984 | } 985 | case RULE_OP_MANGLE_CHR_SHIFTL: { 986 | int offset = conv_ctoi(rule[++rule_pos]); 987 | 988 | mangle_chr_shiftl((uint8_t *) out, out_len, offset); 989 | break; 990 | } 991 | case RULE_OP_MANGLE_CHR_SHIFTR: { 992 | int offset = conv_ctoi(rule[++rule_pos]); 993 | 994 | mangle_chr_shiftr((uint8_t *) out, out_len, offset); 995 | break; 996 | } 997 | case RULE_OP_MANGLE_CHR_INCR: { 998 | int offset = conv_ctoi(rule[++rule_pos]); 999 | 1000 | mangle_chr_incr((uint8_t *) out, out_len, offset); 1001 | break; 1002 | } 1003 | case RULE_OP_MANGLE_CHR_DECR: { 1004 | int offset = conv_ctoi(rule[++rule_pos]); 1005 | 1006 | mangle_chr_decr((uint8_t *) out, out_len, offset); 1007 | break; 1008 | } 1009 | case RULE_OP_MANGLE_REPLACE_NP1: { 1010 | int offset = conv_ctoi(rule[++rule_pos]); 1011 | 1012 | if ((offset + 1) < out_len) { 1013 | mangle_overstrike(out, out_len, offset, out[offset + 1]); 1014 | } 1015 | break; 1016 | } 1017 | case RULE_OP_MANGLE_REPLACE_NM1: { 1018 | int offset = conv_ctoi(rule[++rule_pos]); 1019 | 1020 | if (offset >= 1 && offset < out_len) { 1021 | mangle_overstrike(out, out_len, offset, out[offset - 1]); 1022 | } 1023 | break; 1024 | } 1025 | case RULE_OP_MANGLE_TITLE: { 1026 | mangle_title(out, out_len); 1027 | break; 1028 | } 1029 | case RULE_OP_MANGLE_EXTRACT_MEMORY: { 1030 | if (mem_len < 0) { errno = MEMORY_ERROR; break; } 1031 | 1032 | int mem_offset = conv_ctoi(rule[++rule_pos]); 1033 | int substr_len = conv_ctoi(rule[++rule_pos]); 1034 | int out_offset = conv_ctoi(rule[++rule_pos]); 1035 | 1036 | // If this results in a negative value, the memory was such that this output becomes rejected 1037 | out_len = mangle_insert_multi(out, out_len, out_offset, mem, mem_len, mem_offset, substr_len); 1038 | break; 1039 | } 1040 | case RULE_OP_MANGLE_APPEND_MEMORY: { 1041 | if (mem_len < 0) { errno = MEMORY_ERROR; break; } 1042 | if ((out_len + mem_len) >= BLOCK_SIZE) { break; } 1043 | 1044 | memcpy(out + out_len, mem, mem_len); 1045 | out_len += mem_len; 1046 | break; 1047 | } 1048 | case RULE_OP_MANGLE_PREPEND_MEMORY: { 1049 | if (mem_len < 0) { errno = MEMORY_ERROR; break; } 1050 | if ((out_len + mem_len) >= BLOCK_SIZE) { break; } 1051 | 1052 | // Use the unused portion of mem as a buffer area 1053 | // We know it fits because (mem_len + out_len <= BLOCK_SIZE) 1054 | memcpy(mem + mem_len, out, out_len); 1055 | out_len += mem_len; 1056 | memcpy(out, mem, out_len); 1057 | break; 1058 | } 1059 | case RULE_OP_MEMORIZE_WORD: { 1060 | memcpy(mem, out, out_len); 1061 | mem_len = out_len; 1062 | break; 1063 | } 1064 | case RULE_OP_REJECT_LESS: { 1065 | int target_len = conv_ctoi(rule[++rule_pos]); 1066 | 1067 | if ( !(out_len <= target_len) ) { errno = REJECTED; } 1068 | break; 1069 | } 1070 | case RULE_OP_REJECT_GREATER: { 1071 | int target_len = conv_ctoi(rule[++rule_pos]); 1072 | 1073 | if ( !(out_len >= target_len) ) { errno = REJECTED; } 1074 | break; 1075 | } 1076 | case RULE_OP_REJECT_CONTAIN: { 1077 | char chr = rule[++rule_pos]; 1078 | 1079 | if (memchr(out, chr, out_len) != NULL) { errno = REJECTED; } 1080 | break; 1081 | } 1082 | case RULE_OP_REJECT_NOT_CONTAIN: { 1083 | char chr = rule[++rule_pos]; 1084 | 1085 | if (memchr(out, chr, out_len) == NULL) { errno = REJECTED; } 1086 | break; 1087 | } 1088 | case RULE_OP_REJECT_EQUAL_FIRST: { 1089 | char chr = rule[++rule_pos]; 1090 | 1091 | if (out_len < 1 || out[0] != chr) { errno = REJECTED; } 1092 | break; 1093 | } 1094 | case RULE_OP_REJECT_EQUAL_LAST: { 1095 | char chr = rule[++rule_pos]; 1096 | 1097 | if (out_len < 1 || out[out_len - 1] != chr) { errno = REJECTED; } 1098 | break; 1099 | } 1100 | case RULE_OP_REJECT_EQUAL_AT: { 1101 | int offset = conv_ctoi(rule[++rule_pos]); 1102 | char chr = rule[++rule_pos]; 1103 | 1104 | if (offset >= out_len || out[offset] != chr) { errno = REJECTED; } 1105 | break; 1106 | } 1107 | case RULE_OP_REJECT_CONTAINS: { 1108 | int min_count = conv_ctoi(rule[++rule_pos]); 1109 | char chr = rule[++rule_pos]; 1110 | 1111 | if (min_count > out_len) { errno = REJECTED; } 1112 | 1113 | int count = 0; 1114 | for (int pos = 0; pos < out_len; pos++) { 1115 | if (out[pos] == chr) { count++; } 1116 | if (count >= min_count) { break; } 1117 | } 1118 | 1119 | if (count < min_count) { errno = REJECTED; } 1120 | break; 1121 | } 1122 | case RULE_OP_REJECT_MEMORY: { 1123 | if (mem_len < 0) { errno = MEMORY_ERROR; break; } 1124 | if (out_len == mem_len && memcmp(out, mem, out_len) == 0) { errno = REJECTED; } 1125 | break; 1126 | } 1127 | default: 1128 | errno = UNKNOWN_RULE_OP; 1129 | break; 1130 | } 1131 | } 1132 | 1133 | if (errno != 0) { return(errno); } 1134 | 1135 | // Add the null terminator and null extra bytes (just in case) 1136 | memset(out + out_len, 0, BLOCK_SIZE - out_len); 1137 | return(out_len); 1138 | } -------------------------------------------------------------------------------- /rules.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // Original Author: Jens Steube 3 | // Rewritten By: llamasoft 4 | // License: MIT 5 | // ============================================================================= 6 | 7 | #ifndef RULES_H 8 | #define RULES_H 9 | 10 | #if __STDC_VERSION__ < 199901L 11 | #error C99 compatibility is required 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "uthash.h" 20 | 21 | // Input is truncated to BLOCK_SIZE - 1 bytes 22 | // Rules which push the output over BLOCK_SIZE - 1 are skipped 23 | #ifndef BLOCK_SIZE 24 | #define BLOCK_SIZE 64 25 | #endif 26 | 27 | 28 | typedef struct Rule { 29 | char *text; 30 | size_t length; 31 | } Rule; 32 | 33 | 34 | // Makes a copy of a rule and returns a pointer to the new copy 35 | Rule *clone_rule(Rule *source); 36 | 37 | // Frees the members of a rule structure 38 | void free_rule(Rule *rule); 39 | 40 | // Parses a rule into a Rule struct 41 | // The rule is checked for validity and all no-ops are removed 42 | int parse_rule(char *rule_text, int rule_text_length, Rule **output_rule); 43 | 44 | // Applies a rule to an input word and saves the output to output_word 45 | // If a rule operation would cause the length to extend beyond BLOCK_SIZE, the operation is skipped 46 | // Do not pass a hand-crafted rule struct into this function, run it through parse_rule() first 47 | int apply_rule(Rule *rule_to_apply, char *input_word, int input_len, char output_word[BLOCK_SIZE]); 48 | 49 | 50 | 51 | enum RULE_RC { 52 | INVALID_INPUT = -99, 53 | PREMATURE_END_OF_RULE, 54 | UNKNOWN_RULE_OP, 55 | INVALID_POSITIONAL, 56 | MEMORY_ERROR, 57 | REJECTED, 58 | UNKNOWN_ERROR 59 | }; 60 | 61 | 62 | #define RULE_OP_MANGLE_NOOP ':' 63 | #define RULE_OP_MANGLE_LREST 'l' 64 | #define RULE_OP_MANGLE_UREST 'u' 65 | #define RULE_OP_MANGLE_LREST_UFIRST 'c' 66 | #define RULE_OP_MANGLE_UREST_LFIRST 'C' 67 | #define RULE_OP_MANGLE_TREST 't' 68 | #define RULE_OP_MANGLE_TOGGLE_AT 'T' 69 | #define RULE_OP_MANGLE_REVERSE 'r' 70 | #define RULE_OP_MANGLE_DUPEWORD 'd' 71 | #define RULE_OP_MANGLE_DUPEWORD_TIMES 'p' 72 | #define RULE_OP_MANGLE_REFLECT 'f' 73 | #define RULE_OP_MANGLE_ROTATE_LEFT '{' 74 | #define RULE_OP_MANGLE_ROTATE_RIGHT '}' 75 | #define RULE_OP_MANGLE_APPEND '$' 76 | #define RULE_OP_MANGLE_PREPEND '^' 77 | #define RULE_OP_MANGLE_DELETE_FIRST '[' 78 | #define RULE_OP_MANGLE_DELETE_LAST ']' 79 | #define RULE_OP_MANGLE_DELETE_AT 'D' 80 | #define RULE_OP_MANGLE_EXTRACT 'x' 81 | #define RULE_OP_MANGLE_OMIT 'O' 82 | #define RULE_OP_MANGLE_INSERT 'i' 83 | #define RULE_OP_MANGLE_OVERSTRIKE 'o' 84 | #define RULE_OP_MANGLE_TRUNCATE_AT '\'' 85 | #define RULE_OP_MANGLE_REPLACE 's' 86 | #define RULE_OP_MANGLE_PURGECHAR '@' 87 | #define RULE_OP_MANGLE_DUPECHAR_FIRST 'z' 88 | #define RULE_OP_MANGLE_DUPECHAR_LAST 'Z' 89 | #define RULE_OP_MANGLE_DUPECHAR_ALL 'q' 90 | #define RULE_OP_MANGLE_EXTRACT_MEMORY 'X' 91 | #define RULE_OP_MANGLE_APPEND_MEMORY '4' 92 | #define RULE_OP_MANGLE_PREPEND_MEMORY '6' 93 | 94 | #define RULE_OP_MEMORIZE_WORD 'M' 95 | 96 | #define RULE_OP_REJECT_LESS '<' 97 | #define RULE_OP_REJECT_GREATER '>' 98 | #define RULE_OP_REJECT_CONTAIN '!' 99 | #define RULE_OP_REJECT_NOT_CONTAIN '/' 100 | #define RULE_OP_REJECT_EQUAL_FIRST '(' 101 | #define RULE_OP_REJECT_EQUAL_LAST ')' 102 | #define RULE_OP_REJECT_EQUAL_AT '=' 103 | #define RULE_OP_REJECT_CONTAINS '%' 104 | #define RULE_OP_REJECT_MEMORY 'Q' 105 | 106 | #define RULE_OP_MANGLE_SWITCH_FIRST 'k' 107 | #define RULE_OP_MANGLE_SWITCH_LAST 'K' 108 | #define RULE_OP_MANGLE_SWITCH_AT '*' 109 | #define RULE_OP_MANGLE_CHR_SHIFTL 'L' 110 | #define RULE_OP_MANGLE_CHR_SHIFTR 'R' 111 | #define RULE_OP_MANGLE_CHR_INCR '+' 112 | #define RULE_OP_MANGLE_CHR_DECR '-' 113 | #define RULE_OP_MANGLE_REPLACE_NP1 '.' 114 | #define RULE_OP_MANGLE_REPLACE_NM1 ',' 115 | #define RULE_OP_MANGLE_DUPEBLOCK_FIRST 'y' 116 | #define RULE_OP_MANGLE_DUPEBLOCK_LAST 'Y' 117 | #define RULE_OP_MANGLE_TITLE 'E' 118 | 119 | #endif /* RULES_H */ -------------------------------------------------------------------------------- /uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #include /* memcmp,strlen */ 28 | #include /* ptrdiff_t */ 29 | #include /* exit() */ 30 | 31 | /* These macros use decltype or the earlier __typeof GNU extension. 32 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 | when compiling c++ source) this code uses whatever method is needed 34 | or, for VS2008 where neither is available, uses casting workarounds. */ 35 | #if defined(_MSC_VER) /* MS compiler */ 36 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 | #define DECLTYPE(x) (decltype(x)) 38 | #else /* VS2008 or older (or VS2010 in C mode) */ 39 | #define NO_DECLTYPE 40 | #define DECLTYPE(x) 41 | #endif 42 | #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) 43 | #define NO_DECLTYPE 44 | #define DECLTYPE(x) 45 | #else /* GNU, Sun and other compilers */ 46 | #define DECLTYPE(x) (__typeof(x)) 47 | #endif 48 | 49 | #ifdef NO_DECLTYPE 50 | #define DECLTYPE_ASSIGN(dst,src) \ 51 | do { \ 52 | char **_da_dst = (char**)(&(dst)); \ 53 | *_da_dst = (char*)(src); \ 54 | } while(0) 55 | #else 56 | #define DECLTYPE_ASSIGN(dst,src) \ 57 | do { \ 58 | (dst) = DECLTYPE(dst)(src); \ 59 | } while(0) 60 | #endif 61 | 62 | /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ 63 | #if defined(_WIN32) 64 | #if defined(_MSC_VER) && _MSC_VER >= 1600 65 | #include 66 | #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) 67 | #include 68 | #else 69 | typedef unsigned int uint32_t; 70 | typedef unsigned char uint8_t; 71 | #endif 72 | #elif defined(__GNUC__) && !defined(__VXWORKS__) 73 | #include 74 | #else 75 | typedef unsigned int uint32_t; 76 | typedef unsigned char uint8_t; 77 | #endif 78 | 79 | #define UTHASH_VERSION 1.9.9 80 | 81 | #ifndef uthash_fatal 82 | #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 83 | #endif 84 | #ifndef uthash_malloc 85 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 86 | #endif 87 | #ifndef uthash_free 88 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 89 | #endif 90 | 91 | #ifndef uthash_noexpand_fyi 92 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 93 | #endif 94 | #ifndef uthash_expand_fyi 95 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 96 | #endif 97 | 98 | /* initial number of buckets */ 99 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 100 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 101 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 102 | 103 | /* calculate the element whose hash handle address is hhe */ 104 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 105 | 106 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 107 | do { \ 108 | out=NULL; \ 109 | if (head != NULL) { \ 110 | unsigned _hf_bkt,_hf_hashv; \ 111 | HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 112 | if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) { \ 113 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 114 | keyptr,keylen,out); \ 115 | } \ 116 | } \ 117 | } while (0) 118 | 119 | #ifdef HASH_BLOOM 120 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 121 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 122 | #define HASH_BLOOM_MAKE(tbl) \ 123 | do { \ 124 | (tbl)->bloom_nbits = HASH_BLOOM; \ 125 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 126 | if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 127 | memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 128 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 129 | } while (0) 130 | 131 | #define HASH_BLOOM_FREE(tbl) \ 132 | do { \ 133 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 134 | } while (0) 135 | 136 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 137 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) 138 | 139 | #define HASH_BLOOM_ADD(tbl,hashv) \ 140 | HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 141 | 142 | #define HASH_BLOOM_TEST(tbl,hashv) \ 143 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 144 | 145 | #else 146 | #define HASH_BLOOM_MAKE(tbl) 147 | #define HASH_BLOOM_FREE(tbl) 148 | #define HASH_BLOOM_ADD(tbl,hashv) 149 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 150 | #define HASH_BLOOM_BYTELEN 0U 151 | #endif 152 | 153 | #define HASH_MAKE_TABLE(hh,head) \ 154 | do { \ 155 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 156 | sizeof(UT_hash_table)); \ 157 | if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 158 | memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 159 | (head)->hh.tbl->tail = &((head)->hh); \ 160 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 161 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 162 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 163 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 164 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 165 | if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 166 | memset((head)->hh.tbl->buckets, 0, \ 167 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 168 | HASH_BLOOM_MAKE((head)->hh.tbl); \ 169 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 170 | } while(0) 171 | 172 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 173 | HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) 174 | 175 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 176 | do { \ 177 | replaced=NULL; \ 178 | HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ 179 | if (replaced!=NULL) { \ 180 | HASH_DELETE(hh,head,replaced); \ 181 | } \ 182 | HASH_ADD(hh,head,fieldname,keylen_in,add); \ 183 | } while(0) 184 | 185 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 186 | do { \ 187 | unsigned _ha_bkt; \ 188 | (add)->hh.next = NULL; \ 189 | (add)->hh.key = (char*)(keyptr); \ 190 | (add)->hh.keylen = (unsigned)(keylen_in); \ 191 | if (!(head)) { \ 192 | head = (add); \ 193 | (head)->hh.prev = NULL; \ 194 | HASH_MAKE_TABLE(hh,head); \ 195 | } else { \ 196 | (head)->hh.tbl->tail->next = (add); \ 197 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 198 | (head)->hh.tbl->tail = &((add)->hh); \ 199 | } \ 200 | (head)->hh.tbl->num_items++; \ 201 | (add)->hh.tbl = (head)->hh.tbl; \ 202 | HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 203 | (add)->hh.hashv, _ha_bkt); \ 204 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 205 | HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 206 | HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 207 | HASH_FSCK(hh,head); \ 208 | } while(0) 209 | 210 | #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 211 | do { \ 212 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 213 | } while(0) 214 | 215 | /* delete "delptr" from the hash table. 216 | * "the usual" patch-up process for the app-order doubly-linked-list. 217 | * The use of _hd_hh_del below deserves special explanation. 218 | * These used to be expressed using (delptr) but that led to a bug 219 | * if someone used the same symbol for the head and deletee, like 220 | * HASH_DELETE(hh,users,users); 221 | * We want that to work, but by changing the head (users) below 222 | * we were forfeiting our ability to further refer to the deletee (users) 223 | * in the patch-up process. Solution: use scratch space to 224 | * copy the deletee pointer, then the latter references are via that 225 | * scratch pointer rather than through the repointed (users) symbol. 226 | */ 227 | #define HASH_DELETE(hh,head,delptr) \ 228 | do { \ 229 | struct UT_hash_handle *_hd_hh_del; \ 230 | if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 231 | uthash_free((head)->hh.tbl->buckets, \ 232 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 233 | HASH_BLOOM_FREE((head)->hh.tbl); \ 234 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 235 | head = NULL; \ 236 | } else { \ 237 | unsigned _hd_bkt; \ 238 | _hd_hh_del = &((delptr)->hh); \ 239 | if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 240 | (head)->hh.tbl->tail = \ 241 | (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 242 | (head)->hh.tbl->hho); \ 243 | } \ 244 | if ((delptr)->hh.prev != NULL) { \ 245 | ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 246 | (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 247 | } else { \ 248 | DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 249 | } \ 250 | if (_hd_hh_del->next != NULL) { \ 251 | ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ 252 | (head)->hh.tbl->hho))->prev = \ 253 | _hd_hh_del->prev; \ 254 | } \ 255 | HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 256 | HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 257 | (head)->hh.tbl->num_items--; \ 258 | } \ 259 | HASH_FSCK(hh,head); \ 260 | } while (0) 261 | 262 | 263 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 264 | #define HASH_FIND_STR(head,findstr,out) \ 265 | HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out) 266 | #define HASH_ADD_STR(head,strfield,add) \ 267 | HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add) 268 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 269 | HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced) 270 | #define HASH_FIND_INT(head,findint,out) \ 271 | HASH_FIND(hh,head,findint,sizeof(int),out) 272 | #define HASH_ADD_INT(head,intfield,add) \ 273 | HASH_ADD(hh,head,intfield,sizeof(int),add) 274 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 275 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 276 | #define HASH_FIND_PTR(head,findptr,out) \ 277 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 278 | #define HASH_ADD_PTR(head,ptrfield,add) \ 279 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 280 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 281 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 282 | #define HASH_DEL(head,delptr) \ 283 | HASH_DELETE(hh,head,delptr) 284 | 285 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 286 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 287 | */ 288 | #ifdef HASH_DEBUG 289 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 290 | #define HASH_FSCK(hh,head) \ 291 | do { \ 292 | struct UT_hash_handle *_thh; \ 293 | if (head) { \ 294 | unsigned _bkt_i; \ 295 | unsigned _count; \ 296 | char *_prev; \ 297 | _count = 0; \ 298 | for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 299 | unsigned _bkt_count = 0; \ 300 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 301 | _prev = NULL; \ 302 | while (_thh) { \ 303 | if (_prev != (char*)(_thh->hh_prev)) { \ 304 | HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 305 | _thh->hh_prev, _prev ); \ 306 | } \ 307 | _bkt_count++; \ 308 | _prev = (char*)(_thh); \ 309 | _thh = _thh->hh_next; \ 310 | } \ 311 | _count += _bkt_count; \ 312 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 313 | HASH_OOPS("invalid bucket count %u, actual %u\n", \ 314 | (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 315 | } \ 316 | } \ 317 | if (_count != (head)->hh.tbl->num_items) { \ 318 | HASH_OOPS("invalid hh item count %u, actual %u\n", \ 319 | (head)->hh.tbl->num_items, _count ); \ 320 | } \ 321 | /* traverse hh in app order; check next/prev integrity, count */ \ 322 | _count = 0; \ 323 | _prev = NULL; \ 324 | _thh = &(head)->hh; \ 325 | while (_thh) { \ 326 | _count++; \ 327 | if (_prev !=(char*)(_thh->prev)) { \ 328 | HASH_OOPS("invalid prev %p, actual %p\n", \ 329 | _thh->prev, _prev ); \ 330 | } \ 331 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 332 | _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 333 | (head)->hh.tbl->hho) : NULL ); \ 334 | } \ 335 | if (_count != (head)->hh.tbl->num_items) { \ 336 | HASH_OOPS("invalid app item count %u, actual %u\n", \ 337 | (head)->hh.tbl->num_items, _count ); \ 338 | } \ 339 | } \ 340 | } while (0) 341 | #else 342 | #define HASH_FSCK(hh,head) 343 | #endif 344 | 345 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 346 | * the descriptor to which this macro is defined for tuning the hash function. 347 | * The app can #include to get the prototype for write(2). */ 348 | #ifdef HASH_EMIT_KEYS 349 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 350 | do { \ 351 | unsigned _klen = fieldlen; \ 352 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 353 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 354 | } while (0) 355 | #else 356 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 357 | #endif 358 | 359 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 360 | #ifdef HASH_FUNCTION 361 | #define HASH_FCN HASH_FUNCTION 362 | #else 363 | #define HASH_FCN HASH_JEN 364 | #endif 365 | 366 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 367 | #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 368 | do { \ 369 | unsigned _hb_keylen=(unsigned)keylen; \ 370 | const unsigned char *_hb_key=(const unsigned char*)(key); \ 371 | (hashv) = 0; \ 372 | while (_hb_keylen-- != 0U) { \ 373 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 374 | } \ 375 | bkt = (hashv) & (num_bkts-1U); \ 376 | } while (0) 377 | 378 | 379 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 380 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 381 | #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 382 | do { \ 383 | unsigned _sx_i; \ 384 | const unsigned char *_hs_key=(const unsigned char*)(key); \ 385 | hashv = 0; \ 386 | for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ 387 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 388 | } \ 389 | bkt = hashv & (num_bkts-1U); \ 390 | } while (0) 391 | /* FNV-1a variation */ 392 | #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 393 | do { \ 394 | unsigned _fn_i; \ 395 | const unsigned char *_hf_key=(const unsigned char*)(key); \ 396 | hashv = 2166136261U; \ 397 | for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ 398 | hashv = hashv ^ _hf_key[_fn_i]; \ 399 | hashv = hashv * 16777619U; \ 400 | } \ 401 | bkt = hashv & (num_bkts-1U); \ 402 | } while(0) 403 | 404 | #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 405 | do { \ 406 | unsigned _ho_i; \ 407 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 408 | hashv = 0; \ 409 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 410 | hashv += _ho_key[_ho_i]; \ 411 | hashv += (hashv << 10); \ 412 | hashv ^= (hashv >> 6); \ 413 | } \ 414 | hashv += (hashv << 3); \ 415 | hashv ^= (hashv >> 11); \ 416 | hashv += (hashv << 15); \ 417 | bkt = hashv & (num_bkts-1U); \ 418 | } while(0) 419 | 420 | #define HASH_JEN_MIX(a,b,c) \ 421 | do { \ 422 | a -= b; a -= c; a ^= ( c >> 13 ); \ 423 | b -= c; b -= a; b ^= ( a << 8 ); \ 424 | c -= a; c -= b; c ^= ( b >> 13 ); \ 425 | a -= b; a -= c; a ^= ( c >> 12 ); \ 426 | b -= c; b -= a; b ^= ( a << 16 ); \ 427 | c -= a; c -= b; c ^= ( b >> 5 ); \ 428 | a -= b; a -= c; a ^= ( c >> 3 ); \ 429 | b -= c; b -= a; b ^= ( a << 10 ); \ 430 | c -= a; c -= b; c ^= ( b >> 15 ); \ 431 | } while (0) 432 | 433 | #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 434 | do { \ 435 | unsigned _hj_i,_hj_j,_hj_k; \ 436 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 437 | hashv = 0xfeedbeefu; \ 438 | _hj_i = _hj_j = 0x9e3779b9u; \ 439 | _hj_k = (unsigned)(keylen); \ 440 | while (_hj_k >= 12U) { \ 441 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 442 | + ( (unsigned)_hj_key[2] << 16 ) \ 443 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 444 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 445 | + ( (unsigned)_hj_key[6] << 16 ) \ 446 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 447 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 448 | + ( (unsigned)_hj_key[10] << 16 ) \ 449 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 450 | \ 451 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 452 | \ 453 | _hj_key += 12; \ 454 | _hj_k -= 12U; \ 455 | } \ 456 | hashv += (unsigned)(keylen); \ 457 | switch ( _hj_k ) { \ 458 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 459 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 460 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 461 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 462 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 463 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 464 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 465 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 466 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 467 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 468 | case 1: _hj_i += _hj_key[0]; \ 469 | } \ 470 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 471 | bkt = hashv & (num_bkts-1U); \ 472 | } while(0) 473 | 474 | /* The Paul Hsieh hash function */ 475 | #undef get16bits 476 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 477 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 478 | #define get16bits(d) (*((const uint16_t *) (d))) 479 | #endif 480 | 481 | #if !defined (get16bits) 482 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 483 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 484 | #endif 485 | #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 486 | do { \ 487 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 488 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 489 | \ 490 | unsigned _sfh_rem = _sfh_len & 3U; \ 491 | _sfh_len >>= 2; \ 492 | hashv = 0xcafebabeu; \ 493 | \ 494 | /* Main loop */ \ 495 | for (;_sfh_len > 0U; _sfh_len--) { \ 496 | hashv += get16bits (_sfh_key); \ 497 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 498 | hashv = (hashv << 16) ^ _sfh_tmp; \ 499 | _sfh_key += 2U*sizeof (uint16_t); \ 500 | hashv += hashv >> 11; \ 501 | } \ 502 | \ 503 | /* Handle end cases */ \ 504 | switch (_sfh_rem) { \ 505 | case 3: hashv += get16bits (_sfh_key); \ 506 | hashv ^= hashv << 16; \ 507 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 508 | hashv += hashv >> 11; \ 509 | break; \ 510 | case 2: hashv += get16bits (_sfh_key); \ 511 | hashv ^= hashv << 11; \ 512 | hashv += hashv >> 17; \ 513 | break; \ 514 | case 1: hashv += *_sfh_key; \ 515 | hashv ^= hashv << 10; \ 516 | hashv += hashv >> 1; \ 517 | } \ 518 | \ 519 | /* Force "avalanching" of final 127 bits */ \ 520 | hashv ^= hashv << 3; \ 521 | hashv += hashv >> 5; \ 522 | hashv ^= hashv << 4; \ 523 | hashv += hashv >> 17; \ 524 | hashv ^= hashv << 25; \ 525 | hashv += hashv >> 6; \ 526 | bkt = hashv & (num_bkts-1U); \ 527 | } while(0) 528 | 529 | #ifdef HASH_USING_NO_STRICT_ALIASING 530 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 531 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 532 | * MurmurHash uses the faster approach only on CPU's where we know it's safe. 533 | * 534 | * Note the preprocessor built-in defines can be emitted using: 535 | * 536 | * gcc -m64 -dM -E - < /dev/null (on gcc) 537 | * cc -## a.c (where a.c is a simple test file) (Sun Studio) 538 | */ 539 | #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) 540 | #define MUR_GETBLOCK(p,i) p[i] 541 | #else /* non intel */ 542 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) 543 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) 544 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) 545 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) 546 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 547 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 548 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 549 | #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 550 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 551 | #else /* assume little endian non-intel */ 552 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 553 | #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 554 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 555 | #endif 556 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 557 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 558 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 559 | MUR_ONE_THREE(p)))) 560 | #endif 561 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 562 | #define MUR_FMIX(_h) \ 563 | do { \ 564 | _h ^= _h >> 16; \ 565 | _h *= 0x85ebca6bu; \ 566 | _h ^= _h >> 13; \ 567 | _h *= 0xc2b2ae35u; \ 568 | _h ^= _h >> 16; \ 569 | } while(0) 570 | 571 | #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 572 | do { \ 573 | const uint8_t *_mur_data = (const uint8_t*)(key); \ 574 | const int _mur_nblocks = (int)(keylen) / 4; \ 575 | uint32_t _mur_h1 = 0xf88D5353u; \ 576 | uint32_t _mur_c1 = 0xcc9e2d51u; \ 577 | uint32_t _mur_c2 = 0x1b873593u; \ 578 | uint32_t _mur_k1 = 0; \ 579 | const uint8_t *_mur_tail; \ 580 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ 581 | int _mur_i; \ 582 | for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ 583 | _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 584 | _mur_k1 *= _mur_c1; \ 585 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 586 | _mur_k1 *= _mur_c2; \ 587 | \ 588 | _mur_h1 ^= _mur_k1; \ 589 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 590 | _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ 591 | } \ 592 | _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ 593 | _mur_k1=0; \ 594 | switch((keylen) & 3U) { \ 595 | case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ 596 | case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ 597 | case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ 598 | _mur_k1 *= _mur_c1; \ 599 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 600 | _mur_k1 *= _mur_c2; \ 601 | _mur_h1 ^= _mur_k1; \ 602 | } \ 603 | _mur_h1 ^= (uint32_t)(keylen); \ 604 | MUR_FMIX(_mur_h1); \ 605 | hashv = _mur_h1; \ 606 | bkt = hashv & (num_bkts-1U); \ 607 | } while(0) 608 | #endif /* HASH_USING_NO_STRICT_ALIASING */ 609 | 610 | /* key comparison function; return 0 if keys equal */ 611 | #define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len)) 612 | 613 | /* iterate over items in a known bucket to find desired item */ 614 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 615 | do { \ 616 | if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \ 617 | else { out=NULL; } \ 618 | while (out != NULL) { \ 619 | if ((out)->hh.keylen == (keylen_in)) { \ 620 | if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; } \ 621 | } \ 622 | if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \ 623 | else { out = NULL; } \ 624 | } \ 625 | } while(0) 626 | 627 | /* add an item to a bucket */ 628 | #define HASH_ADD_TO_BKT(head,addhh) \ 629 | do { \ 630 | head.count++; \ 631 | (addhh)->hh_next = head.hh_head; \ 632 | (addhh)->hh_prev = NULL; \ 633 | if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ 634 | (head).hh_head=addhh; \ 635 | if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ 636 | && ((addhh)->tbl->noexpand != 1U)) { \ 637 | HASH_EXPAND_BUCKETS((addhh)->tbl); \ 638 | } \ 639 | } while(0) 640 | 641 | /* remove an item from a given bucket */ 642 | #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 643 | (head).count--; \ 644 | if ((head).hh_head == hh_del) { \ 645 | (head).hh_head = hh_del->hh_next; \ 646 | } \ 647 | if (hh_del->hh_prev) { \ 648 | hh_del->hh_prev->hh_next = hh_del->hh_next; \ 649 | } \ 650 | if (hh_del->hh_next) { \ 651 | hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 652 | } 653 | 654 | /* Bucket expansion has the effect of doubling the number of buckets 655 | * and redistributing the items into the new buckets. Ideally the 656 | * items will distribute more or less evenly into the new buckets 657 | * (the extent to which this is true is a measure of the quality of 658 | * the hash function as it applies to the key domain). 659 | * 660 | * With the items distributed into more buckets, the chain length 661 | * (item count) in each bucket is reduced. Thus by expanding buckets 662 | * the hash keeps a bound on the chain length. This bounded chain 663 | * length is the essence of how a hash provides constant time lookup. 664 | * 665 | * The calculation of tbl->ideal_chain_maxlen below deserves some 666 | * explanation. First, keep in mind that we're calculating the ideal 667 | * maximum chain length based on the *new* (doubled) bucket count. 668 | * In fractions this is just n/b (n=number of items,b=new num buckets). 669 | * Since the ideal chain length is an integer, we want to calculate 670 | * ceil(n/b). We don't depend on floating point arithmetic in this 671 | * hash, so to calculate ceil(n/b) with integers we could write 672 | * 673 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 674 | * 675 | * and in fact a previous version of this hash did just that. 676 | * But now we have improved things a bit by recognizing that b is 677 | * always a power of two. We keep its base 2 log handy (call it lb), 678 | * so now we can write this with a bit shift and logical AND: 679 | * 680 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 681 | * 682 | */ 683 | #define HASH_EXPAND_BUCKETS(tbl) \ 684 | do { \ 685 | unsigned _he_bkt; \ 686 | unsigned _he_bkt_i; \ 687 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 688 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 689 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 690 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 691 | if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 692 | memset(_he_new_buckets, 0, \ 693 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 694 | tbl->ideal_chain_maxlen = \ 695 | (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ 696 | (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 697 | tbl->nonideal_items = 0; \ 698 | for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 699 | { \ 700 | _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 701 | while (_he_thh != NULL) { \ 702 | _he_hh_nxt = _he_thh->hh_next; \ 703 | HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ 704 | _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 705 | if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 706 | tbl->nonideal_items++; \ 707 | _he_newbkt->expand_mult = _he_newbkt->count / \ 708 | tbl->ideal_chain_maxlen; \ 709 | } \ 710 | _he_thh->hh_prev = NULL; \ 711 | _he_thh->hh_next = _he_newbkt->hh_head; \ 712 | if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ 713 | _he_thh; } \ 714 | _he_newbkt->hh_head = _he_thh; \ 715 | _he_thh = _he_hh_nxt; \ 716 | } \ 717 | } \ 718 | uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 719 | tbl->num_buckets *= 2U; \ 720 | tbl->log2_num_buckets++; \ 721 | tbl->buckets = _he_new_buckets; \ 722 | tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 723 | (tbl->ineff_expands+1U) : 0U; \ 724 | if (tbl->ineff_expands > 1U) { \ 725 | tbl->noexpand=1; \ 726 | uthash_noexpand_fyi(tbl); \ 727 | } \ 728 | uthash_expand_fyi(tbl); \ 729 | } while(0) 730 | 731 | 732 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 733 | /* Note that HASH_SORT assumes the hash handle name to be hh. 734 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 735 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 736 | #define HASH_SRT(hh,head,cmpfcn) \ 737 | do { \ 738 | unsigned _hs_i; \ 739 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 740 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 741 | if (head != NULL) { \ 742 | _hs_insize = 1; \ 743 | _hs_looping = 1; \ 744 | _hs_list = &((head)->hh); \ 745 | while (_hs_looping != 0U) { \ 746 | _hs_p = _hs_list; \ 747 | _hs_list = NULL; \ 748 | _hs_tail = NULL; \ 749 | _hs_nmerges = 0; \ 750 | while (_hs_p != NULL) { \ 751 | _hs_nmerges++; \ 752 | _hs_q = _hs_p; \ 753 | _hs_psize = 0; \ 754 | for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 755 | _hs_psize++; \ 756 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 757 | ((void*)((char*)(_hs_q->next) + \ 758 | (head)->hh.tbl->hho)) : NULL); \ 759 | if (! (_hs_q) ) { break; } \ 760 | } \ 761 | _hs_qsize = _hs_insize; \ 762 | while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ 763 | if (_hs_psize == 0U) { \ 764 | _hs_e = _hs_q; \ 765 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 766 | ((void*)((char*)(_hs_q->next) + \ 767 | (head)->hh.tbl->hho)) : NULL); \ 768 | _hs_qsize--; \ 769 | } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ 770 | _hs_e = _hs_p; \ 771 | if (_hs_p != NULL){ \ 772 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 773 | ((void*)((char*)(_hs_p->next) + \ 774 | (head)->hh.tbl->hho)) : NULL); \ 775 | } \ 776 | _hs_psize--; \ 777 | } else if (( \ 778 | cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 779 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 780 | ) <= 0) { \ 781 | _hs_e = _hs_p; \ 782 | if (_hs_p != NULL){ \ 783 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 784 | ((void*)((char*)(_hs_p->next) + \ 785 | (head)->hh.tbl->hho)) : NULL); \ 786 | } \ 787 | _hs_psize--; \ 788 | } else { \ 789 | _hs_e = _hs_q; \ 790 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 791 | ((void*)((char*)(_hs_q->next) + \ 792 | (head)->hh.tbl->hho)) : NULL); \ 793 | _hs_qsize--; \ 794 | } \ 795 | if ( _hs_tail != NULL ) { \ 796 | _hs_tail->next = ((_hs_e != NULL) ? \ 797 | ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 798 | } else { \ 799 | _hs_list = _hs_e; \ 800 | } \ 801 | if (_hs_e != NULL) { \ 802 | _hs_e->prev = ((_hs_tail != NULL) ? \ 803 | ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 804 | } \ 805 | _hs_tail = _hs_e; \ 806 | } \ 807 | _hs_p = _hs_q; \ 808 | } \ 809 | if (_hs_tail != NULL){ \ 810 | _hs_tail->next = NULL; \ 811 | } \ 812 | if ( _hs_nmerges <= 1U ) { \ 813 | _hs_looping=0; \ 814 | (head)->hh.tbl->tail = _hs_tail; \ 815 | DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 816 | } \ 817 | _hs_insize *= 2U; \ 818 | } \ 819 | HASH_FSCK(hh,head); \ 820 | } \ 821 | } while (0) 822 | 823 | /* This function selects items from one hash into another hash. 824 | * The end result is that the selected items have dual presence 825 | * in both hashes. There is no copy of the items made; rather 826 | * they are added into the new hash through a secondary hash 827 | * hash handle that must be present in the structure. */ 828 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 829 | do { \ 830 | unsigned _src_bkt, _dst_bkt; \ 831 | void *_last_elt=NULL, *_elt; \ 832 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 833 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 834 | if (src != NULL) { \ 835 | for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 836 | for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 837 | _src_hh != NULL; \ 838 | _src_hh = _src_hh->hh_next) { \ 839 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 840 | if (cond(_elt)) { \ 841 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 842 | _dst_hh->key = _src_hh->key; \ 843 | _dst_hh->keylen = _src_hh->keylen; \ 844 | _dst_hh->hashv = _src_hh->hashv; \ 845 | _dst_hh->prev = _last_elt; \ 846 | _dst_hh->next = NULL; \ 847 | if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ 848 | if (dst == NULL) { \ 849 | DECLTYPE_ASSIGN(dst,_elt); \ 850 | HASH_MAKE_TABLE(hh_dst,dst); \ 851 | } else { \ 852 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 853 | } \ 854 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 855 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 856 | (dst)->hh_dst.tbl->num_items++; \ 857 | _last_elt = _elt; \ 858 | _last_elt_hh = _dst_hh; \ 859 | } \ 860 | } \ 861 | } \ 862 | } \ 863 | HASH_FSCK(hh_dst,dst); \ 864 | } while (0) 865 | 866 | #define HASH_CLEAR(hh,head) \ 867 | do { \ 868 | if (head != NULL) { \ 869 | uthash_free((head)->hh.tbl->buckets, \ 870 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 871 | HASH_BLOOM_FREE((head)->hh.tbl); \ 872 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 873 | (head)=NULL; \ 874 | } \ 875 | } while(0) 876 | 877 | #define HASH_OVERHEAD(hh,head) \ 878 | ((head != NULL) ? ( \ 879 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 880 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 881 | sizeof(UT_hash_table) + \ 882 | (HASH_BLOOM_BYTELEN))) : 0U) 883 | 884 | #ifdef NO_DECLTYPE 885 | #define HASH_ITER(hh,head,el,tmp) \ 886 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 887 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 888 | #else 889 | #define HASH_ITER(hh,head,el,tmp) \ 890 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 891 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 892 | #endif 893 | 894 | /* obtain a count of items in the hash */ 895 | #define HASH_COUNT(head) HASH_CNT(hh,head) 896 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 897 | 898 | typedef struct UT_hash_bucket { 899 | struct UT_hash_handle *hh_head; 900 | unsigned count; 901 | 902 | /* expand_mult is normally set to 0. In this situation, the max chain length 903 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 904 | * the bucket's chain exceeds this length, bucket expansion is triggered). 905 | * However, setting expand_mult to a non-zero value delays bucket expansion 906 | * (that would be triggered by additions to this particular bucket) 907 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 908 | * (The multiplier is simply expand_mult+1). The whole idea of this 909 | * multiplier is to reduce bucket expansions, since they are expensive, in 910 | * situations where we know that a particular bucket tends to be overused. 911 | * It is better to let its chain length grow to a longer yet-still-bounded 912 | * value, than to do an O(n) bucket expansion too often. 913 | */ 914 | unsigned expand_mult; 915 | 916 | } UT_hash_bucket; 917 | 918 | /* random signature used only to find hash tables in external analysis */ 919 | #define HASH_SIGNATURE 0xa0111fe1u 920 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 921 | 922 | typedef struct UT_hash_table { 923 | UT_hash_bucket *buckets; 924 | unsigned num_buckets, log2_num_buckets; 925 | unsigned num_items; 926 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 927 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 928 | 929 | /* in an ideal situation (all buckets used equally), no bucket would have 930 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 931 | unsigned ideal_chain_maxlen; 932 | 933 | /* nonideal_items is the number of items in the hash whose chain position 934 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 935 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 936 | unsigned nonideal_items; 937 | 938 | /* ineffective expands occur when a bucket doubling was performed, but 939 | * afterward, more than half the items in the hash had nonideal chain 940 | * positions. If this happens on two consecutive expansions we inhibit any 941 | * further expansion, as it's not helping; this happens when the hash 942 | * function isn't a good fit for the key domain. When expansion is inhibited 943 | * the hash will still work, albeit no longer in constant time. */ 944 | unsigned ineff_expands, noexpand; 945 | 946 | uint32_t signature; /* used only to find hash tables in external analysis */ 947 | #ifdef HASH_BLOOM 948 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 949 | uint8_t *bloom_bv; 950 | uint8_t bloom_nbits; 951 | #endif 952 | 953 | } UT_hash_table; 954 | 955 | typedef struct UT_hash_handle { 956 | struct UT_hash_table *tbl; 957 | void *prev; /* prev element in app order */ 958 | void *next; /* next element in app order */ 959 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 960 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 961 | void *key; /* ptr to enclosing struct's key */ 962 | unsigned keylen; /* enclosing struct's key len */ 963 | unsigned hashv; /* result of hash-fcn(key) */ 964 | } UT_hash_handle; 965 | 966 | #endif /* UTHASH_H */ --------------------------------------------------------------------------------