├── .gitattributes ├── makefile ├── COPYING.txt ├── sniff.c ├── gamenotes.txt ├── compress.h ├── README.txt ├── README.md ├── .gitignore ├── exhal.c ├── memmem.c ├── inhal.c ├── compress.c └── uthash.h /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # HAL (de)compression tools 2 | # copyright 2013 Devin Acker (Revenant) 3 | # See copying.txt for legal information. 4 | 5 | CFLAGS += -std=c99 -O3 -Wall -s 6 | 7 | # Add extension when compiling for Windows 8 | ifeq ($(OS), Windows_NT) 9 | CC = gcc 10 | EXT = .exe 11 | endif 12 | 13 | # Comment this line to suppress detailed decompression information on stdout 14 | DEFINES += -DEXTRA_OUT 15 | # Uncomment this line to enable debug output 16 | #DEFINES += -DDEBUG_OUT 17 | 18 | CFLAGS += $(DEFINES) 19 | 20 | all: inhal$(EXT) exhal$(EXT) sniff$(EXT) 21 | 22 | clean: 23 | $(RM) inhal$(EXT) exhal$(EXT) sniff$(EXT) *.o 24 | 25 | sniff$(EXT): sniff.o compress.o memmem.o 26 | $(CC) $(CFLAGS) -o $@ $^ 27 | 28 | inhal$(EXT): inhal.o compress.o memmem.o 29 | $(CC) $(CFLAGS) -o $@ $^ 30 | 31 | exhal$(EXT): exhal.o compress.o memmem.o 32 | $(CC) $(CFLAGS) -o $@ $^ 33 | -------------------------------------------------------------------------------- /COPYING.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Devin Acker 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /sniff.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "compress.h" 6 | 7 | int main (int argc, char **argv) { 8 | printf("sniff - " __DATE__ " " __TIME__"\nby Devin Acker (Revenant)\n\n"); 9 | 10 | if (argc != 2) { 11 | fprintf(stderr, "Usage:\n%s romfile offset outfile\n" 12 | "Example: %s kirbybowl.sfc\n", 13 | argv[0], argv[0]); 14 | exit(-1); 15 | } 16 | 17 | FILE *infile; 18 | 19 | // open ROM file for input 20 | infile = fopen(argv[1], "rb"); 21 | if (!infile) { 22 | fprintf(stderr, "Error: unable to open %s\n", argv[1]); 23 | exit(-1); 24 | } 25 | 26 | size_t outputsize, filesize; 27 | uint8_t unpacked[DATA_SIZE] = {0}; 28 | unpack_stats_t stats; 29 | 30 | // decompress the file 31 | fseek(infile, 0, SEEK_END); 32 | filesize = ftell(infile); 33 | 34 | for (int i = 0; i < filesize; i++) { 35 | fseek(infile, i, SEEK_SET); 36 | outputsize = exhal_unpack_from_file(infile, i, unpacked, &stats); 37 | 38 | if (outputsize > stats.inputsize 39 | && outputsize >= 1024 /* TODO set minimum sizes/ratio/etc */) { 40 | printf("%06x: %u -> %u bytes\n", i, (unsigned)stats.inputsize, (unsigned)outputsize); 41 | } 42 | } 43 | 44 | fclose(infile); 45 | } 46 | -------------------------------------------------------------------------------- /gamenotes.txt: -------------------------------------------------------------------------------- 1 | This is an incomplete list of decompression routine addresses for games which are supported by the 2 | exhal compression tool. You can use these addresses to try finding compressed data by searching a 3 | disassembly (or doing a binary search, i.e. JSR $87CA -> 20 CA 87; JSL $8087C6 -> 22 C6 87 80.) 4 | 5 | Alcahest (SNES) 6 | JP: JSR $87CA (in bank 80) or JSL $8087C6 7 | 8 | Arcana / Card Master (SNES) 9 | US/JP: JSR $8766 (in bank 00) or JSL $808762 10 | 11 | EarthBound / Mother 2 (SNES) 12 | US: JSL $C41A9E 13 | JP: JSL $C419EA 14 | 15 | HAL's Hole in One Golf / Jumbo Ozaki no Hole in One (SNES) 16 | US/EU/JP: JSR $89AA (in bank 00) or JSL $0089A6 17 | 18 | HyperZone (SNES) 19 | US/EU/JP: JSR $89E6 (in bank 00) or JSL $0089E2 20 | 21 | Itoi Shigesato no Bass Tsuri No. 1 (SNES) 22 | JP: JSL $01DD8A or JSL $01DEAA 23 | 24 | Kirby no KiraKira Kids (SNES) 25 | JP: JSR $89DF (in bank 80) or JSL $8089DB 26 | 27 | Kirby Super Star (SNES) 28 | US/EU/JP: JSL $00889A 29 | 30 | Kirby's Dream Course / Kirby Bowl (SNES) 31 | US/EU: JSL $809F18 32 | JP: JSL $809F1A 33 | 34 | Kirby's Dream Land 3 (SNES) 35 | US/JP: JSL $00AA55 or JSL $00AA63 36 | 37 | Othello World (SNES) 38 | JP: JSR $CC48 (in bank 00/80) 39 | 40 | Okamoto Ayako to Match Play Golf (SNES) 41 | JP: JSR $983B (in bank 00) 42 | 43 | SimCity (SNES) [unused?] 44 | US: JSR $90DD 45 | EU: JSR $90D3 46 | JP: JSR $90A6 47 | 48 | SimCity 2000 (SNES) 49 | US/EU/JP: JSL $C10000 50 | 51 | Special Tee Shot (BS-X) 52 | JP: JSL $838E13 53 | 54 | Super Famicom Box BIOS (SNES) 55 | JP: JSR $88A2 (in bank 00) or JSL $00889E 56 | 57 | Vegas Stakes (SNES) 58 | US/EU: JSR $87DB (in bank 00) or JSL $0087D7 59 | JP: JSR $87F6 (in bank 00) or JSL $0087F2 60 | -------------------------------------------------------------------------------- /compress.h: -------------------------------------------------------------------------------- 1 | /* 2 | exhal / inhal (de)compression routines 3 | 4 | Copyright (c) 2013-2018 Devin Acker 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | 24 | */ 25 | 26 | #ifndef _COMPRESS_H 27 | #define _COMPRESS_H 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #define DATA_SIZE 65536 38 | 39 | typedef struct { 40 | // Speed up compression somewhat by avoiding less common compression methods 41 | int fast; 42 | // Improve compression ratios by performing a shortest-path search 43 | int optimal; 44 | } pack_options_t; 45 | 46 | typedef struct { 47 | // Number of times each compression method occurred in the input 48 | int methoduse[7]; 49 | // Size of compressed input 50 | size_t inputsize; 51 | } unpack_stats_t; 52 | 53 | size_t exhal_pack2 (const uint8_t *unpacked, size_t inputsize, uint8_t *packed, const pack_options_t *options); 54 | size_t exhal_pack (const uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast); 55 | size_t exhal_unpack(const uint8_t *packed, uint8_t *unpacked, unpack_stats_t *stats); 56 | 57 | size_t exhal_unpack_from_file(FILE *file, size_t offset, uint8_t *unpacked, unpack_stats_t *stats); 58 | 59 | #ifdef EXHAL_OLD_NAMES 60 | #define pack(...) exhal_pack(__VA_ARGS__) 61 | #define unpack(...) exhal_unpack(__VA_ARGS__, NULL) 62 | #define unpack_from_file(...) exhal_unpack_from_file(__VA_ARGS__, NULL) 63 | #endif 64 | 65 | #ifdef __cplusplus 66 | } 67 | #endif 68 | 69 | // end include guard 70 | #endif 71 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | exhal / inhal 2 | HAL Laboratory NES/SNES/GB (de)compression tools 3 | (version 1.21) 4 | by Devin Acker (Revenant), 2013-2015 5 | https://github.com/devinacker 6 | 7 | exhal and inhal are tools designed to decompress and recompress/insert data used by several NES, 8 | SNES and Game Boy games developed by HAL Laboratory. 9 | 10 | Due to the design of the original decompression algorithm (and hardware limitations), the size of 11 | a file to be compressed is limited to 64 kilobytes (65,536 bytes). Please note that depending on 12 | which system you are working with, the actual useful limit may be much smaller. 13 | 14 | The compression routine used by inhal is very fast and capable of producing output which is smaller 15 | than that of HAL's original compressor. 16 | 17 | Source code is available at https://github.com/devinacker and is released under the terms of the 18 | MIT license. See COPYING.txt for legal info. You are welcome to use compress.c in your own projects 19 | (if you do, I'd like to hear about it!) 20 | 21 | To use exhal (the decompressor): 22 | exhal romfile offset outfile 23 | 24 | To insert compressed data into a ROM: 25 | inhal [-fast] infile romfile offset 26 | 27 | To write compressed data to a new file: 28 | inhal [-fast] -n infile outfile 29 | 30 | Offsets can be specified in either hexadecimal (recommended) or decimal. 31 | 32 | Using the -fast switch results in compression which is about 3 to 4 times faster, but with 33 | slightly larger output data. Use this if you don't care about data sizes being 100% optimal. 34 | 35 | This is a list of games which are known to use the supported compression method, or are assumed 36 | to, based on a binary search of the games' ROMs: 37 | 38 | Adventures of Lolo (NES/GB) 39 | Adventures of Lolo 2 (NES) 40 | Adventures of Lolo 3 (NES) 41 | Alcahest (SNES) 42 | Arcana / Card Master (SNES) 43 | EarthBound / Mother 2 (SNES) 44 | Ghostbusters II (GB) 45 | HAL's Hole in One Golf / Jumbo Ozaki no Hole in One (SNES) 46 | HyperZone (SNES) 47 | Itoi Shigesato no Bass Tsuri No. 1 (SNES) 48 | Kirby no KiraKira Kids (SNES) 49 | Kirby Super Star (SNES) 50 | Kirby's Adventure (NES) 51 | Kirby's Dream Course / Kirby Bowl (SNES) 52 | Kirby's Dream Land (GB) 53 | Kirby's Dream Land 2 (GB) 54 | Kirby's Dream Land 3 (SNES) 55 | Kirby's Pinball Land (GB) 56 | Kirby's Star Stacker / KiraKira Kids (GB) 57 | NES Open Tournament Golf (NES) 58 | New Ghostbusters II (NES) 59 | Othello World (SNES) 60 | Okamoto Ayako to Match Play Golf (SNES) 61 | Revenge of the Gator / 66 Hiki no Wani Daikoushin (GB) 62 | SimCity (SNES) [unused?] 63 | SimCity 2000 (SNES) 64 | Special Tee Shot (BS-X) 65 | Super Famicom Box BIOS (SNES) 66 | Trax / Totsugeki! Ponkotsu Tank (GB) 67 | Vegas Stakes (SNES/GB) 68 | 69 | Also note, unfortunately, that exhal cannot automatically detect or locate compressed data. 70 | The included file "gamenotes.txt" contains an incomplete list of decompression routine addresses 71 | to make searching easier. 72 | 73 | These tools were originally used in the development of my Kirby's Dream Course editor. I hope you 74 | find your own exciting use for them. (I'm not the only Kirby hacker in the West, right? *sob*) 75 | 76 | Contact me: 77 | 78 | Email : d at revenant1.net 79 | IRC : "devin" on irc.badnik.net 80 | "Revenant" on irc.oftc.net 81 | "Revenant`" on irc.synirc.net 82 | and irc.dal.net 83 | Forums: http://jul.rustedlogic.net/profile.php?id=504 84 | http://www.romhacking.net/forum/index.php?action=profile;u=10455 85 | 86 | Special thanks to: 87 | 88 | - andlabs for helping me make the list of supported games 89 | - BMF54123 for naming the programs 90 | - Tiiffi and Anthony J. Bentley for misc. build fixes 91 | - You for downloading (and using?) my software -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # exhal / inhal 2 | **HAL Laboratory NES/SNES/GB (de)compression tools** 3 | **(version 1.21)** 4 | by Devin Acker (Revenant), 2013-2015 5 | https://github.com/devinacker 6 | 7 | exhal and inhal are tools designed to decompress and recompress/insert data used by several NES, SNES and Game Boy games developed by HAL Laboratory. 8 | 9 | Due to the design of the original decompression algorithm (and hardware limitations), the size of a file to be compressed is limited to 64 kilobytes (65,536 bytes). Please note that depending on which system you are working with, the actual useful limit may be much smaller. 10 | 11 | The compression routine used by inhal is very fast and capable of producing output which is smaller than that of HAL's original compressor. 12 | 13 | Source code is available at https://github.com/devinacker and is released under the terms of the MIT license. See COPYING.txt for legal info. You are welcome to use compress.c in your own projects (if you do, I'd like to hear about it!) 14 | 15 | **To use exhal (the decompressor):** 16 | exhal romfile offset outfile 17 | 18 | **To insert compressed data into a ROM:** 19 | inhal [-fast] infile romfile offset 20 | 21 | **To write compressed data to a new file:** 22 | inhal [-fast] -n infile outfile 23 | 24 | Offsets can be specified in either hexadecimal (recommended) or decimal. 25 | 26 | Using the -fast switch results in compression which is about 3 to 4 times faster, but with slightly larger output data. Use this if you don't care about data sizes being 100% identical to the original compressed data. 27 | 28 | This is a list of games which are known to use the supported compression method, or are assumed to, based on a binary search of the games' ROMs: 29 | 30 | * Adventures of Lolo (NES/GB) 31 | * Adventures of Lolo 2 (NES) 32 | * Adventures of Lolo 3 (NES) 33 | * Alcahest (SNES) 34 | * Arcana / Card Master (SNES) 35 | * EarthBound / Mother 2 (SNES) 36 | * Ghostbusters II (GB) 37 | * HAL's Hole in One Golf / Jumbo Ozaki no Hole in One (SNES) 38 | * HyperZone (SNES) 39 | * Itoi Shigesato no Bass Tsuri No. 1 (SNES) 40 | * Kirby no KiraKira Kids (SNES) 41 | * Kirby Super Star (SNES) 42 | * Kirby's Adventure (NES) 43 | * Kirby's Dream Course / Kirby Bowl (SNES) 44 | * Kirby's Dream Land (GB) 45 | * Kirby's Dream Land 2 (GB) 46 | * Kirby's Dream Land 3 (SNES) 47 | * Kirby's Pinball Land (GB) 48 | * Kirby's Star Stacker / KiraKira Kids (GB) 49 | * NES Open Tournament Golf (NES) 50 | * New Ghostbusters II (NES) 51 | * Othello World (SNES) 52 | * Okamoto Ayako to Match Play Golf (SNES) 53 | * Revenge of the Gator / 66 Hiki no Wani Daikoushin (GB) 54 | * SimCity (SNES) [unused?] 55 | * SimCity 2000 (SNES) 56 | * Special Tee Shot (BS-X) 57 | * Super Famicom Box BIOS (SNES) 58 | * Trax / Totsugeki! Ponkotsu Tank (GB) 59 | * Vegas Stakes (SNES/GB) 60 | 61 | Also note, unfortunately, that exhal cannot automatically detect or locate compressed data. The included file "gamenotes.txt" contains an incomplete list of decompression routine addresses to make searching easier. 62 | 63 | These tools were originally used in the development of my Kirby's Dream Course editor. I hope you find your own exciting use for them. (I'm not the only Kirby hacker in the West, right? *sob*) 64 | 65 | ## Contact me 66 | 67 | * Email : d at revenant1.net 68 | * IRC : 69 | * "devin" on irc.badnik.net 70 | * "Revenant" on irc.oftc.net 71 | * "Revenant`" on irc.synirc.net and irc.dal.net 72 | * Forums: 73 | * http://jul.rustedlogic.net/profile.php?id=504 74 | * http://www.romhacking.net/forum/index.php?action=profile;u=10455 75 | 76 | ## Special thanks to 77 | 78 | * andlabs for helping me make the list of supported games 79 | * BMF54123 for naming the programs 80 | * Tiiffi and Anthony J. Bentley for misc. build fixes 81 | * You for downloading (and using?) my software -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | *.exe 3 | *.o 4 | *.zip 5 | *.sfc 6 | *.nes 7 | *.gb 8 | *.gbc 9 | 10 | ################# 11 | ## Eclipse 12 | ################# 13 | 14 | *.pydevproject 15 | .project 16 | .metadata 17 | bin/ 18 | tmp/ 19 | *.tmp 20 | *.bak 21 | *.swp 22 | *~.nib 23 | local.properties 24 | .classpath 25 | .settings/ 26 | .loadpath 27 | 28 | # External tool builders 29 | .externalToolBuilders/ 30 | 31 | # Locally stored "Eclipse launch configurations" 32 | *.launch 33 | 34 | # CDT-specific 35 | .cproject 36 | 37 | # PDT-specific 38 | .buildpath 39 | 40 | 41 | ################# 42 | ## Visual Studio 43 | ################# 44 | 45 | ## Ignore Visual Studio temporary files, build results, and 46 | ## files generated by popular Visual Studio add-ons. 47 | 48 | # User-specific files 49 | *.suo 50 | *.user 51 | *.sln.docstates 52 | 53 | # Build results 54 | 55 | [Dd]ebug/ 56 | [Rr]elease/ 57 | x64/ 58 | build/ 59 | [Bb]in/ 60 | [Oo]bj/ 61 | 62 | # MSTest test Results 63 | [Tt]est[Rr]esult*/ 64 | [Bb]uild[Ll]og.* 65 | 66 | *_i.c 67 | *_p.c 68 | *.ilk 69 | *.meta 70 | *.obj 71 | *.pch 72 | *.pdb 73 | *.pgc 74 | *.pgd 75 | *.rsp 76 | *.sbr 77 | *.tlb 78 | *.tli 79 | *.tlh 80 | *.tmp 81 | *.tmp_proj 82 | *.log 83 | *.vspscc 84 | *.vssscc 85 | .builds 86 | *.pidb 87 | *.log 88 | *.scc 89 | 90 | # Visual C++ cache files 91 | ipch/ 92 | *.aps 93 | *.ncb 94 | *.opensdf 95 | *.sdf 96 | *.cachefile 97 | 98 | # Visual Studio profiler 99 | *.psess 100 | *.vsp 101 | *.vspx 102 | 103 | # Guidance Automation Toolkit 104 | *.gpState 105 | 106 | # ReSharper is a .NET coding add-in 107 | _ReSharper*/ 108 | *.[Rr]e[Ss]harper 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # NCrunch 117 | *.ncrunch* 118 | .*crunch*.local.xml 119 | 120 | # Installshield output folder 121 | [Ee]xpress/ 122 | 123 | # DocProject is a documentation generator add-in 124 | DocProject/buildhelp/ 125 | DocProject/Help/*.HxT 126 | DocProject/Help/*.HxC 127 | DocProject/Help/*.hhc 128 | DocProject/Help/*.hhk 129 | DocProject/Help/*.hhp 130 | DocProject/Help/Html2 131 | DocProject/Help/html 132 | 133 | # Click-Once directory 134 | publish/ 135 | 136 | # Publish Web Output 137 | *.Publish.xml 138 | *.pubxml 139 | 140 | # NuGet Packages Directory 141 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 142 | #packages/ 143 | 144 | # Windows Azure Build Output 145 | csx 146 | *.build.csdef 147 | 148 | # Windows Store app package directory 149 | AppPackages/ 150 | 151 | # Others 152 | sql/ 153 | *.Cache 154 | ClientBin/ 155 | [Ss]tyle[Cc]op.* 156 | ~$* 157 | *~ 158 | *.dbmdl 159 | *.[Pp]ublish.xml 160 | *.pfx 161 | *.publishsettings 162 | 163 | # RIA/Silverlight projects 164 | Generated_Code/ 165 | 166 | # Backup & report files from converting an old project file to a newer 167 | # Visual Studio version. Backup files are not needed, because we have git ;-) 168 | _UpgradeReport_Files/ 169 | Backup*/ 170 | UpgradeLog*.XML 171 | UpgradeLog*.htm 172 | 173 | # SQL Server files 174 | App_Data/*.mdf 175 | App_Data/*.ldf 176 | 177 | ############# 178 | ## Windows detritus 179 | ############# 180 | 181 | # Windows image file caches 182 | Thumbs.db 183 | ehthumbs.db 184 | 185 | # Folder config file 186 | Desktop.ini 187 | 188 | # Recycle Bin used on file shares 189 | $RECYCLE.BIN/ 190 | 191 | # Mac crap 192 | .DS_Store 193 | 194 | 195 | ############# 196 | ## Python 197 | ############# 198 | 199 | *.py[co] 200 | 201 | # Packages 202 | *.egg 203 | *.egg-info 204 | dist/ 205 | build/ 206 | eggs/ 207 | parts/ 208 | var/ 209 | sdist/ 210 | develop-eggs/ 211 | .installed.cfg 212 | 213 | # Installer logs 214 | pip-log.txt 215 | 216 | # Unit test / coverage reports 217 | .coverage 218 | .tox 219 | 220 | #Translations 221 | *.mo 222 | 223 | #Mr Developer 224 | .mr.developer.cfg 225 | -------------------------------------------------------------------------------- /exhal.c: -------------------------------------------------------------------------------- 1 | /* 2 | exhal - HAL Laboratory decompression tool 3 | 4 | Usage: 5 | exhal romfile offset outfile 6 | 7 | Copyright (c) 2013 Devin Acker 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | 27 | */ 28 | 29 | #include 30 | #include 31 | #include "compress.h" 32 | 33 | int main (int argc, char **argv) { 34 | printf("exhal - " __DATE__ " " __TIME__"\nby Devin Acker (Revenant)\n\n"); 35 | 36 | if (argc != 4) { 37 | fprintf(stderr, "Usage:\n%s romfile offset outfile\n" 38 | "Example: %s kirbybowl.sfc 0x70000 test.bin\n\n" 39 | "offset can be in either decimal or hex.\n", 40 | argv[0], argv[0]); 41 | exit(-1); 42 | } 43 | 44 | FILE *infile, *outfile; 45 | 46 | // open ROM file for input 47 | infile = fopen(argv[1], "rb"); 48 | if (!infile) { 49 | fprintf(stderr, "Error: unable to open %s\n", argv[1]); 50 | exit(-1); 51 | } 52 | 53 | // open target file for output 54 | outfile = fopen(argv[3], "wb"); 55 | if (!outfile) { 56 | fprintf(stderr, "Error: unable to open %s\n", argv[3]); 57 | exit(-1); 58 | } 59 | 60 | size_t outputsize, fileoffset; 61 | uint8_t unpacked[DATA_SIZE] = {0}; 62 | unpack_stats_t stats; 63 | 64 | fileoffset = strtol(argv[2], NULL, 0); 65 | 66 | // decompress the file 67 | fseek(infile, 0, SEEK_END); 68 | if (fileoffset < ftell(infile)) { 69 | outputsize = exhal_unpack_from_file(infile, fileoffset, unpacked, &stats); 70 | } else { 71 | fprintf(stderr, "Error: Unable to decompress %s because an invalid offset was specified\n" 72 | " (must be between zero and 0x%lX).\n", argv[1], ftell(infile)); 73 | exit(-1); 74 | } 75 | 76 | if (outputsize) { 77 | // write the uncompressed data to the file 78 | fseek(outfile, 0, SEEK_SET); 79 | fwrite((const void*)unpacked, 1, outputsize, outfile); 80 | if (ferror(outfile)) { 81 | perror("Error writing output file"); 82 | exit(-1); 83 | } 84 | 85 | #ifdef EXTRA_OUT 86 | printf("Method Uses\n"); 87 | printf("No compression : %i\n", stats.methoduse[0]); 88 | printf("RLE (8-bit) : %i\n", stats.methoduse[1]); 89 | printf("RLE (16-bit) : %i\n", stats.methoduse[2]); 90 | printf("RLE (sequence) : %i\n", stats.methoduse[3]); 91 | printf("Backref (normal) : %i\n", stats.methoduse[4]); 92 | printf("Backref (rotate) : %i\n", stats.methoduse[5]); 93 | printf("Backref (reverse): %i\n", stats.methoduse[6]); 94 | printf("\n"); 95 | #endif 96 | 97 | printf("Compressed size: %lu bytes\n", (unsigned long)stats.inputsize); 98 | printf("Uncompressed size: %lu bytes\n", (unsigned long)outputsize); 99 | printf("Compression ratio: %4.2f:1\n", (double)outputsize / stats.inputsize); 100 | } else { 101 | fprintf(stderr, "Error: Unable to decompress %s because the output would have been larger than\n" 102 | " 64 kb. The input at 0x%lX is likely not valid compressed data.\n", argv[1], (unsigned long)fileoffset); 103 | } 104 | 105 | fclose(infile); 106 | fclose(outfile); 107 | } 108 | -------------------------------------------------------------------------------- /memmem.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Copyright © 2005-2020 Rich Felker, et al. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining 7 | * a copy of this software and associated documentation files (the 8 | * "Software"), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sublicense, and/or sell copies of the Software, and to 11 | * permit persons to whom the Software is furnished to do so, subject to 12 | * the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | 29 | static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 30 | { 31 | uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; 32 | for (h+=2, k-=2; k; k--, hw = hw<<8 | *h++) 33 | if (hw == nw) return (char *)h-2; 34 | return hw == nw ? (char *)h-2 : 0; 35 | } 36 | 37 | static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 38 | { 39 | uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; 40 | uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; 41 | for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8) 42 | if (hw == nw) return (char *)h-3; 43 | return hw == nw ? (char *)h-3 : 0; 44 | } 45 | 46 | static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 47 | { 48 | uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; 49 | uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; 50 | for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++) 51 | if (hw == nw) return (char *)h-4; 52 | return hw == nw ? (char *)h-4 : 0; 53 | } 54 | 55 | #define MAX(a,b) ((a)>(b)?(a):(b)) 56 | #define MIN(a,b) ((a)<(b)?(a):(b)) 57 | 58 | #define BITOP(a,b,op) \ 59 | ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) 60 | 61 | static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l) 62 | { 63 | size_t i, ip, jp, k, p, ms, p0, mem, mem0; 64 | size_t byteset[32 / sizeof(size_t)] = { 0 }; 65 | size_t shift[256]; 66 | 67 | /* Computing length of needle and fill shift table */ 68 | for (i=0; i n[jp+k]) { 80 | jp += k; 81 | k = 1; 82 | p = jp - ip; 83 | } else { 84 | ip = jp++; 85 | k = p = 1; 86 | } 87 | } 88 | ms = ip; 89 | p0 = p; 90 | 91 | /* And with the opposite comparison */ 92 | ip = -1; jp = 0; k = p = 1; 93 | while (jp+k ms+1) ms = ip; 109 | else p = p0; 110 | 111 | /* Periodic needle? */ 112 | if (memcmp(n, n+p, ms+1)) { 113 | mem0 = 0; 114 | p = MAX(ms, l-ms-1) + 1; 115 | } else mem0 = l-p; 116 | mem = 0; 117 | 118 | /* Search loop */ 119 | for (;;) { 120 | /* If remainder of haystack is shorter than needle, done */ 121 | if (z-h < l) return 0; 122 | 123 | /* Check last byte first; advance by shift on mismatch */ 124 | if (BITOP(byteset, h[l-1], &)) { 125 | k = l-shift[h[l-1]]; 126 | if (k) { 127 | if (k < mem) k = mem; 128 | h += k; 129 | mem = 0; 130 | continue; 131 | } 132 | } else { 133 | h += l; 134 | mem = 0; 135 | continue; 136 | } 137 | 138 | /* Compare right half */ 139 | for (k=MAX(ms+1,mem); kmem && n[k-1] == h[k-1]; k--); 147 | if (k <= mem) return (char *)h; 148 | h += p; 149 | mem = mem0; 150 | } 151 | } 152 | 153 | void *memmem(const void *h0, size_t k, const void *n0, size_t l) 154 | { 155 | const unsigned char *h = h0, *n = n0; 156 | 157 | /* Return immediately on empty needle */ 158 | if (!l) return (void *)h; 159 | 160 | /* Return immediately when needle is longer than haystack */ 161 | if (k 31 | #include 32 | #include 33 | #include "compress.h" 34 | 35 | int main (int argc, char **argv) { 36 | printf("inhal - " __DATE__ " " __TIME__"\nby Devin Acker (Revenant)\n\n"); 37 | 38 | if (argc < 4) { 39 | fprintf(stderr, "To insert compressed data into a ROM:\n" 40 | "%s [options] infile romfile offset\n" 41 | 42 | "To write compressed data to a new file:\n" 43 | "%s [options] -n infile outfile\n\n" 44 | 45 | "Compression options:\n" 46 | "-fast avoid less common compression methods (faster compression, but larger output)\n" 47 | "-opt perform shortest-path searching (smaller output, but slower compression)\n" 48 | "\n" 49 | "-1 fastest compression (same as -fast)\n" 50 | "-2 fast compression (default)\n" 51 | "-3 better compression (same as -fast -opt)\n" 52 | "-4 best compression (same as -opt)\n" 53 | 54 | "\nExample:\n%s -fast test.chr kirbybowl.sfc 0x70000\n" 55 | "%s -n test.chr test-packed.bin\n\n" 56 | "offset can be in either decimal or hex.\n", 57 | argv[0], argv[0], argv[0], argv[0]); 58 | exit(-1); 59 | } 60 | 61 | FILE *infile, *outfile; 62 | int fileoffset; 63 | int newfile = 0; 64 | pack_options_t options; 65 | 66 | for (int i = 1; i < argc; i++) { 67 | if (!strcmp(argv[i], "-n")) { 68 | newfile = 1; 69 | } else if (!strcmp(argv[i], "-fast")) { 70 | options.fast = 1; 71 | } else if (!strcmp(argv[i], "-opt")) { 72 | options.optimal = 1; 73 | } else if (!strcmp(argv[i], "-1")) { 74 | options.fast = 1; 75 | options.optimal = 0; 76 | } else if (!strcmp(argv[i], "-2")) { 77 | options.fast = 0; 78 | options.optimal = 0; 79 | } else if (!strcmp(argv[i], "-3")) { 80 | options.fast = 1; 81 | options.optimal = 1; 82 | } else if (!strcmp(argv[i], "-4")) { 83 | options.fast = 0; 84 | options.optimal = 1; 85 | } 86 | } 87 | 88 | if (options.fast) 89 | printf("Fast compression enabled.\n"); 90 | if (options.optimal) 91 | printf("Optimal compression (shortest path) enabled.\n"); 92 | 93 | // check for -n switch 94 | if (newfile) { 95 | fileoffset = 0; 96 | infile = fopen(argv[argc - 2], "rb"); 97 | outfile = fopen(argv[argc - 1], "wb"); 98 | } else { 99 | fileoffset = strtol(argv[argc - 1], NULL, 0); 100 | infile = fopen(argv[argc - 3], "rb"); 101 | outfile = fopen(argv[argc - 2], "r+b"); 102 | } 103 | 104 | if (!infile) { 105 | fprintf(stderr, "Error: unable to open input file\n"); 106 | exit(-1); 107 | } 108 | if (!outfile) { 109 | fprintf(stderr, "Error: unable to open output file\n"); 110 | exit(-1); 111 | } 112 | 113 | size_t inputsize, outputsize; 114 | uint8_t unpacked[DATA_SIZE]; 115 | uint8_t packed[DATA_SIZE] = {0}; 116 | 117 | // check size of input file 118 | fseek(infile, 0, SEEK_END); 119 | inputsize = ftell(infile); 120 | 121 | printf("Uncompressed size: %lu bytes\n", (unsigned long)inputsize); 122 | 123 | if (inputsize > DATA_SIZE) { 124 | fprintf(stderr, "Error: File must be a maximum of 65,536 bytes!\n"); 125 | exit(-1); 126 | } else if (!inputsize) { 127 | fprintf(stderr, "Error: Input file is empty!\n"); 128 | exit(-1); 129 | } 130 | 131 | // read the file 132 | fseek(infile, 0, SEEK_SET); 133 | fread(unpacked, sizeof(uint8_t), inputsize, infile); 134 | if (ferror(infile)) { 135 | perror("Error reading input file"); 136 | exit(-1); 137 | } 138 | 139 | // compress the file 140 | clock_t time = clock(); 141 | outputsize = exhal_pack2(unpacked, inputsize, packed, &options); 142 | time = clock() - time; 143 | 144 | if (outputsize) { 145 | // write the compressed data to the file 146 | fseek(outfile, fileoffset, SEEK_SET); 147 | fwrite((const void*)packed, 1, outputsize, outfile); 148 | if (ferror(outfile)) { 149 | perror("Error writing output file"); 150 | exit(-1); 151 | } 152 | 153 | printf("Compressed size: %lu bytes\n", (unsigned long)outputsize); 154 | printf("Compression ratio: %4.2f:1\n", (double)inputsize / outputsize); 155 | printf("Compression time: %4.3f seconds\n\n", (double)time / CLOCKS_PER_SEC); 156 | 157 | printf("Inserted at 0x%06X - 0x%06lX\n", fileoffset, ftell(outfile) - 1); 158 | } else { 159 | fprintf(stderr, "Error: File could not be compressed because the resulting compressed data would\n" 160 | " have been larger than 64 kb.\n"); 161 | } 162 | 163 | fclose(infile); 164 | fclose(outfile); 165 | } 166 | -------------------------------------------------------------------------------- /compress.c: -------------------------------------------------------------------------------- 1 | /* 2 | exhal / inhal (de)compression routines 3 | 4 | This code is released under the terms of the MIT license. 5 | See COPYING.txt for details. 6 | 7 | Copyright (c) 2013-2018 Devin Acker 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | 27 | */ 28 | 29 | #include 30 | #include 31 | #include "compress.h" 32 | #define HASH_NONFATAL_OOM 1 33 | #undef uthash_nonfatal_oom 34 | #define uthash_nonfatal_oom(elt) do { \ 35 | exhal_failed_insertion_element = (elt); \ 36 | goto exhal_handle_uthash_failure; \ 37 | } while (0) 38 | #include "uthash.h" 39 | 40 | // memmem.c 41 | void *memmem(const void *h0, size_t k, const void *n0, size_t l); 42 | 43 | #ifdef DEBUG_OUT 44 | #define debug(...) printf(__VA_ARGS__) 45 | #else 46 | #define debug(...) 47 | #endif 48 | 49 | #define RUN_SIZE 32 50 | #define LONG_RUN_SIZE 1024 51 | 52 | // compression method values for backref_t and rle_t 53 | typedef enum { 54 | rle_8 = 0, 55 | rle_16 = 1, 56 | rle_seq = 2, 57 | 58 | lz_norm = 0, 59 | lz_rot = 1, 60 | lz_rev = 2 61 | } method_e; 62 | 63 | // used to store and compare backref candidates 64 | typedef struct { 65 | uint16_t offset, size; 66 | method_e method; 67 | } backref_t; 68 | 69 | // used to store RLE candidates 70 | typedef struct { 71 | uint16_t size, data; 72 | method_e method; 73 | } rle_t; 74 | 75 | // used to hash and index byte tuples 76 | typedef struct { 77 | int bytes; 78 | uint16_t offset; 79 | UT_hash_handle hh; 80 | } tuple_t; 81 | // turn 4 bytes into a single integer for quicker hashing/searching 82 | #define COMBINE(w, x, y, z) (((unsigned)(w) << 24) | ((x) << 16) | ((y) << 8) | (z)) 83 | 84 | typedef struct { 85 | const uint8_t *unpacked; 86 | size_t inputsize; 87 | uint8_t *packed; 88 | 89 | // current input/output positions 90 | uint32_t inpos; 91 | uint32_t outpos; 92 | 93 | // used to collect data which should be written uncompressed 94 | uint8_t dontpack[LONG_RUN_SIZE]; 95 | uint16_t dontpacksize; 96 | 97 | // index of first locations of byte-tuples used to speed up LZ string search 98 | tuple_t *offsets; 99 | 100 | } pack_context_t; 101 | 102 | // ------------------------------------------------------------------------------------------------ 103 | static void pack_context_free(pack_context_t* this) { 104 | tuple_t *curr, *temp; 105 | HASH_ITER(hh, this->offsets, curr, temp) { 106 | HASH_DEL(this->offsets, curr); 107 | free(curr); 108 | } 109 | 110 | free(this); 111 | } 112 | 113 | // ------------------------------------------------------------------------------------------------ 114 | static pack_context_t* pack_context_alloc(const uint8_t *unpacked, size_t inputsize, uint8_t *packed) { 115 | pack_context_t *this; 116 | 117 | if (inputsize > DATA_SIZE) return 0; 118 | if (!(this = calloc(1, sizeof(*this)))) return 0; 119 | 120 | this->unpacked = unpacked; 121 | this->inputsize = inputsize; 122 | this->packed = packed; 123 | 124 | tuple_t *exhal_failed_insertion_element = 0; 125 | 126 | // index locations of all 4-byte sequences occurring in the input 127 | for (uint16_t i = 0; inputsize >= 4 && i < inputsize - 4; i++) { 128 | tuple_t *tuple; 129 | int currbytes = (int)COMBINE(unpacked[i], unpacked[i+1], unpacked[i+2], unpacked[i+3]); 130 | 131 | // has this one been indexed already 132 | HASH_FIND_INT(this->offsets, &currbytes, tuple); 133 | if (!tuple) { 134 | tuple = (tuple_t*)malloc(sizeof(tuple_t)); 135 | if (!tuple) { 136 | pack_context_free(this); 137 | return 0; 138 | } 139 | tuple->bytes = currbytes; 140 | tuple->offset = i; 141 | HASH_ADD_INT(this->offsets, bytes, tuple); 142 | } 143 | } 144 | 145 | return this; 146 | 147 | exhal_handle_uthash_failure: 148 | free(exhal_failed_insertion_element); 149 | pack_context_free(this); 150 | return 0; 151 | } 152 | 153 | // ------------------------------------------------------------------------------------------------ 154 | static inline size_t input_bytes_left(const pack_context_t* this) { 155 | return this->inputsize - this->inpos; 156 | } 157 | 158 | // ------------------------------------------------------------------------------------------------ 159 | // Reverses the order of bits in a byte. 160 | // One of the back reference methods does this. As far as game data goes, it seems to be 161 | // pretty useful for compressing graphics. 162 | static inline uint8_t rotate (uint8_t i) { 163 | uint8_t j = 0; 164 | if (i & 0x01) j |= 0x80; 165 | if (i & 0x02) j |= 0x40; 166 | if (i & 0x04) j |= 0x20; 167 | if (i & 0x08) j |= 0x10; 168 | if (i & 0x10) j |= 0x08; 169 | if (i & 0x20) j |= 0x04; 170 | if (i & 0x40) j |= 0x02; 171 | if (i & 0x80) j |= 0x01; 172 | 173 | return j; 174 | } 175 | 176 | // ------------------------------------------------------------------------------------------------ 177 | static inline void rle_candidate(rle_t *candidate, size_t size, uint16_t data, method_e method) { 178 | // if this is better than the current candidate, use it 179 | if (method == rle_16 && size >= 2*LONG_RUN_SIZE) 180 | size = 2*LONG_RUN_SIZE; 181 | else if (size > LONG_RUN_SIZE) 182 | size = LONG_RUN_SIZE; 183 | 184 | if (size > 2 && size > candidate->size) { 185 | candidate->size = size; 186 | candidate->data = data; 187 | candidate->method = method; 188 | 189 | debug("\trle_check: found new candidate (size = %d, method = %d)\n", size, method); 190 | } 191 | } 192 | 193 | // ------------------------------------------------------------------------------------------------ 194 | // Searches for possible RLE compressed data. 195 | // start and current are positions within the uncompressed input stream. 196 | // fast enables faster compression by ignoring sequence RLE. 197 | static void rle_check(const pack_context_t *this, rle_t *candidate, int fast) { 198 | const uint8_t *start = this->unpacked; 199 | const uint8_t *current = start + this->inpos; 200 | size_t insize = this->inputsize; 201 | size_t size; 202 | 203 | candidate->size = 0; 204 | candidate->data = 0; 205 | candidate->method = 0; 206 | 207 | // check for possible 8-bit RLE 208 | for (size = 0; size <= LONG_RUN_SIZE && current + size < start + insize; size++) { 209 | if (current[size] != current[0]) break; 210 | } 211 | rle_candidate(candidate, size, current[0], rle_8); 212 | 213 | // check for possible 16-bit RLE 214 | if (insize - this->inpos >= 2) { 215 | uint16_t first = current[0] | (current[1] << 8); 216 | for (size = 0; size <= 2*LONG_RUN_SIZE && current + size < start + insize - 1; size += 2) { 217 | uint16_t next = current[size] | (current[size + 1] << 8); 218 | if (next != first) break; 219 | } 220 | rle_candidate(candidate, size, first, rle_16); 221 | } 222 | 223 | // fast mode: don't use sequence RLE 224 | if (fast) return; 225 | 226 | // check for possible sequence RLE 227 | for (size = 0; size <= LONG_RUN_SIZE && current + size < start + insize; size++) { 228 | if (current[size] != (current[0] + size)) break; 229 | } 230 | rle_candidate(candidate, size, current[0], rle_seq); 231 | } 232 | 233 | // ------------------------------------------------------------------------------------------------ 234 | static inline void backref_candidate(backref_t *candidate, size_t offset, size_t size, method_e method) { 235 | // if this is better than the current candidate, use it 236 | if (size > LONG_RUN_SIZE) size = LONG_RUN_SIZE; 237 | if (size >= 4 && size > candidate->size) { 238 | candidate->size = size; 239 | candidate->offset = offset; 240 | candidate->method = method; 241 | 242 | debug("\tref_search: found new candidate (offset: %4x, size: %d, method = %d)\n", offset, size, method); 243 | } 244 | } 245 | 246 | // ------------------------------------------------------------------------------------------------ 247 | // Searches for the best possible back reference. 248 | // start and current are positions within the uncompressed input stream. 249 | // fast enables fast mode which only uses regular forward references 250 | static void ref_search (const pack_context_t *this, backref_t *candidate, int fast) { 251 | const uint8_t *start = this->unpacked; 252 | const uint8_t *current = start + this->inpos; 253 | size_t insize = this->inputsize; 254 | tuple_t *offsets = this->offsets; 255 | 256 | uint16_t size; 257 | int currbytes; 258 | tuple_t *tuple; 259 | 260 | candidate->size = 0; 261 | candidate->offset = 0; 262 | candidate->method = 0; 263 | 264 | // references to previous data which goes in the same direction 265 | // see if this byte sequence exists elsewhere, then start searching. 266 | currbytes = (int)COMBINE(current[0], current[1], current[2], current[3]); 267 | size = 4; 268 | HASH_FIND_INT(offsets, &currbytes, tuple); 269 | if (tuple) for (const uint8_t *pos = start + tuple->offset; pos && pos < current;) { 270 | // see how many bytes in a row are the same between the current uncompressed data 271 | // and the data at the position being searched 272 | for (; size <= LONG_RUN_SIZE && current + size < start + insize; size++) { 273 | if (pos[size] != current[size]) break; 274 | } 275 | backref_candidate(candidate, pos - start, size, lz_norm); 276 | // find another instance of the current data to see if it can be a better candidate 277 | pos = memmem(pos + 1, insize - (pos - start) - 1, pos, size); 278 | } 279 | 280 | // fast mode: forward references only 281 | if (fast) return; 282 | 283 | // references to data where the bits are rotated 284 | currbytes = (int)COMBINE(rotate(current[0]), rotate(current[1]), rotate(current[2]), rotate(current[3])); 285 | size = 4; 286 | HASH_FIND_INT(offsets, &currbytes, tuple); 287 | if (tuple) for (const uint8_t *pos = start + tuple->offset; pos && pos < current;) { 288 | // now repeat the check with the bit rotation method 289 | for (; size <= LONG_RUN_SIZE && current + size < start + insize; size++) { 290 | if (pos[size] != rotate(current[size])) break; 291 | } 292 | backref_candidate(candidate, pos - start, size, lz_rot); 293 | // find another instance of the current data to see if it can be a better candidate 294 | pos = memmem(pos + 1, insize - (pos - start) - 1, pos, size); 295 | } 296 | 297 | // references to data which goes backwards 298 | currbytes = (int)COMBINE(current[3], current[2], current[1], current[0]); 299 | size = 4; 300 | HASH_FIND_INT(offsets, &currbytes, tuple); 301 | if (tuple) for (const uint8_t *pos = start + tuple->offset + 3; pos && pos < current; pos++) { 302 | // now repeat the check but go backwards 303 | // TODO: possibly use memmem to speed up this one a bit also, 304 | // though we'd then basically be searching both backwards and forwards, 305 | // which would be a bit weird to manage correctly... 306 | for (size = 0; size <= LONG_RUN_SIZE && start + size <= pos 307 | && current + size < start + insize; size++) { 308 | if (start[pos - start - size] != current[size]) break; 309 | } 310 | backref_candidate(candidate, pos - start, size, lz_rev); 311 | } 312 | } 313 | 314 | // ------------------------------------------------------------------------------------------------ 315 | static inline int write_check_size(const pack_context_t *this, size_t size) { 316 | return this->outpos + this->dontpacksize + size < DATA_SIZE; 317 | } 318 | 319 | // ------------------------------------------------------------------------------------------------ 320 | // Write uncompressed data to the output stream. 321 | // Returns number of bytes written. 322 | static uint16_t write_raw (pack_context_t *this) { 323 | uint8_t *out = this->packed; 324 | uint16_t insize = this->dontpacksize; 325 | 326 | if (!insize) return 0; 327 | 328 | debug("%04x %04x write_raw: writing %d bytes unpacked data\n", 329 | this->inpos - insize, this->outpos, insize); 330 | 331 | uint16_t size = insize - 1; 332 | int outsize; 333 | 334 | if (size >= RUN_SIZE) { 335 | // write_check_size already accounts for size of raw data, 336 | // but also check the size of the command/size byte(s) 337 | outsize = 2; 338 | if (!write_check_size(this, outsize)) return 0; 339 | 340 | // write command byte + MSB of size 341 | out[this->outpos++] = 0xE0 + (size >> 8); 342 | // write LSB of size 343 | out[this->outpos++] = size & 0xFF; 344 | } 345 | // normal size run 346 | else { 347 | outsize = 1; 348 | if (!write_check_size(this, outsize)) return 0; 349 | 350 | // write command byte / size 351 | out[this->outpos++] = size; 352 | } 353 | 354 | // write data 355 | memcpy(&out[this->outpos], this->dontpack, insize); 356 | this->outpos += insize; 357 | this->dontpacksize = 0; 358 | // total size written is the command + size + all data 359 | return outsize + insize; 360 | } 361 | 362 | // ------------------------------------------------------------------------------------------------ 363 | static inline uint16_t backref_outsize(const backref_t *backref) { 364 | return (backref->size - 1 >= RUN_SIZE) ? 4 : 3; 365 | } 366 | 367 | // ------------------------------------------------------------------------------------------------ 368 | // Writes a back reference to the compressed output stream. 369 | // Returns number of bytes written 370 | static uint16_t write_backref (pack_context_t *this, const backref_t *backref) { 371 | uint16_t size = backref->size - 1; 372 | uint8_t *out = this->packed; 373 | 374 | uint16_t outsize = backref_outsize(backref); 375 | if (!write_check_size(this, outsize)) return 0; 376 | 377 | // flush the raw data buffer first 378 | write_raw(this); 379 | 380 | debug("%04x %04x write_backref: writing backref to %4x, size %d (method %d)\n", 381 | this->inpos, this->outpos, backref->offset, backref->size, backref->method); 382 | 383 | // long run 384 | if (size >= RUN_SIZE) { 385 | // write command byte / MSB of size 386 | out[this->outpos++] = (0xF0 + (backref->method << 2)) | (size >> 8); 387 | // write LSB of size 388 | out[this->outpos++] = size & 0xFF; 389 | } 390 | // normal size run 391 | else { 392 | // write command byte / size 393 | out[this->outpos++] = (0x80 + (backref->method << 5)) | size; 394 | } 395 | 396 | // write MSB of offset 397 | out[this->outpos++] = backref->offset >> 8; 398 | // write LSB of offset 399 | out[this->outpos++] = backref->offset & 0xFF; 400 | 401 | this->inpos += backref->size; 402 | return outsize; 403 | } 404 | 405 | // ------------------------------------------------------------------------------------------------ 406 | static inline uint16_t rle_outsize(const rle_t *rle) { 407 | uint16_t size = (rle->size - 1 >= RUN_SIZE) ? 3 : 2; 408 | if (rle->method == rle_16) size++; // account for extra byte of value 409 | return size; 410 | } 411 | 412 | // ------------------------------------------------------------------------------------------------ 413 | // Writes RLE data to the compressed output stream. 414 | // Returns number of bytes written 415 | static uint16_t write_rle (pack_context_t *this, const rle_t *rle) { 416 | uint16_t size; 417 | uint8_t *out = this->packed; 418 | 419 | uint16_t outsize = rle_outsize(rle); 420 | if (!write_check_size(this, outsize)) return 0; 421 | 422 | if (rle->method == rle_16) { 423 | size = (rle->size / 2) - 1; 424 | } else { 425 | size = rle->size - 1; 426 | } 427 | 428 | // flush the raw data buffer first 429 | write_raw(this); 430 | 431 | debug("%04x %04x write_rle: writing %d bytes of data 0x%02x (method %d)\n", 432 | this->inpos, this->outpos, rle->size, rle->data, rle->method); 433 | 434 | // long run 435 | if (size >= RUN_SIZE) { 436 | // write command byte / MSB of size 437 | out[this->outpos++] = (0xE4 + (rle->method << 2)) | (size >> 8); 438 | // write LSB of size 439 | out[this->outpos++] = size & 0xFF; 440 | } 441 | // normal size run 442 | else { 443 | // write command byte / size 444 | out[this->outpos++] = (0x20 + (rle->method << 5)) | size; 445 | } 446 | 447 | out[this->outpos++] = rle->data; 448 | // write upper byte of 16-bit RLE (and adjust written data size) 449 | if (rle->method == rle_16) { 450 | out[this->outpos++] = rle->data >> 8; 451 | } 452 | 453 | this->inpos += rle->size; 454 | return outsize; 455 | } 456 | 457 | // ------------------------------------------------------------------------------------------------ 458 | // Writes a single byte of raw (literal) data from the input. 459 | // Returns number of bytes written 460 | static uint16_t write_next_byte(pack_context_t *this) { 461 | if (!write_check_size(this, 1)) return 0; 462 | 463 | this->dontpack[this->dontpacksize++] = this->unpacked[this->inpos++]; 464 | 465 | // if the raw data buffer is full, flush it 466 | if (this->dontpacksize == LONG_RUN_SIZE) { 467 | write_raw(this); 468 | } 469 | 470 | return 1; 471 | } 472 | 473 | // ------------------------------------------------------------------------------------------------ 474 | // Writes a single byte to terminate the compressed data. 475 | // Returns number of bytes written 476 | static uint16_t write_trailer(pack_context_t *this) { 477 | if (!write_check_size(this, 1)) return 0; 478 | 479 | write_raw(this); 480 | 481 | //add the terminating byte 482 | this->packed[this->outpos++] = 0xFF; 483 | 484 | return 1; 485 | } 486 | 487 | // ------------------------------------------------------------------------------------------------ 488 | static void pack_normal(pack_context_t *this, int fast) { 489 | size_t inputsize = this->inputsize; 490 | // backref and RLE compression candidates 491 | backref_t backref = {0}; 492 | rle_t rle = {0}; 493 | 494 | while (inputsize > 0) { 495 | // check for a potential RLE 496 | rle_check(this, &rle, fast); 497 | // check for a potential back reference 498 | if (rle.size < LONG_RUN_SIZE && inputsize >= 4) 499 | ref_search(this, &backref, fast); 500 | else backref.size = 0; 501 | 502 | // if the backref is a better candidate, use it 503 | if (backref.size > rle.size) { 504 | if (!write_backref(this, &backref)) break; 505 | } 506 | // or if the RLE is a better candidate, use it instead 507 | else if (rle.size >= 2) { 508 | if (!write_rle(this, &rle)) break; 509 | } 510 | // otherwise, write this byte uncompressed 511 | else { 512 | if (!write_next_byte(this)) break; 513 | } 514 | 515 | inputsize = input_bytes_left(this); 516 | } 517 | } 518 | 519 | // ------------------------------------------------------------------------------------------------ 520 | static int pack_optimal(pack_context_t *this, int fast) { 521 | size_t inputsize = this->inputsize; 522 | // backref and RLE compression candidates 523 | backref_t backref = {0}; 524 | rle_t rle = {0}; 525 | 526 | // test - just go through entire input and score each byte 527 | typedef struct node_s { 528 | // previous and next nodes in directed graph 529 | // (populated when doing shortest-path search) 530 | struct node_s *next, *prev; 531 | // distance to second neighboring node (first is n+1) 532 | size_t neighbor; 533 | // graph edge length between this and neighbor (i.e. size of compressed data) 534 | size_t length; 535 | // distance to start of data 536 | size_t distance; 537 | // backref used for compression (else RLE if neighbor > 0) 538 | int backref; 539 | // RLE data or backref offset 540 | uint16_t data; 541 | // RLE/backref method used 542 | method_e method; 543 | } node_t; 544 | node_t *nodes = calloc(inputsize+1, sizeof(node_t)); 545 | node_t *node, *other; 546 | 547 | if (!nodes) return 1; 548 | 549 | for (this->inpos = 0; this->inpos < inputsize; this->inpos++) { 550 | node = nodes+this->inpos; 551 | node->distance = 1<<16; 552 | 553 | // check for a potential RLE 554 | rle_check(this, &rle, fast); 555 | // check for a potential back reference 556 | if (rle.size < LONG_RUN_SIZE && inputsize - this->inpos >= 4) 557 | ref_search(this, &backref, fast); 558 | else backref.size = 0; 559 | 560 | // if the backref is a better candidate, use it 561 | if (backref.size > rle.size) { 562 | node->neighbor = backref.size; 563 | node->length = backref_outsize(&backref); 564 | node->method = backref.method; 565 | node->data = backref.offset; 566 | node->backref = 1; 567 | } 568 | // or if the RLE is a better candidate, use it instead 569 | else if (rle.size >= 2) { 570 | node->neighbor = rle.size; 571 | node->length = rle_outsize(&rle); 572 | node->method = rle.method; 573 | node->data = rle.data; 574 | } 575 | } 576 | 577 | // find shortest path through input 578 | nodes[0].distance = 0; 579 | nodes[inputsize].distance = 1<<16; 580 | 581 | for (size_t i = 0; i < inputsize; i++) { 582 | node = nodes+i; 583 | size_t newdist; 584 | 585 | // check first neighbor (next byte) 586 | other = node+1; 587 | newdist = node->distance + 2; // at least 1 literal byte + 1 control byte 588 | if (newdist < other->distance) { 589 | other->distance = newdist; 590 | other->prev = node; 591 | } 592 | 593 | // check second neighbor (next byte after compression, if possible) 594 | if (!node->neighbor) continue; 595 | 596 | other = node+node->neighbor; 597 | newdist = node->distance + node->length; 598 | if (newdist < other->distance) { 599 | other->distance = newdist; 600 | other->prev = node; 601 | } 602 | } 603 | debug("final distance = %u prev = %04x\n", nodes[inputsize].distance, nodes[inputsize].prev); 604 | // create path back from end to start of data 605 | for (node = nodes+inputsize; node->prev; node = node->prev) { 606 | debug("node = %u prev = %u\n", node-nodes, node->prev-nodes); 607 | node->prev->next = node; 608 | } 609 | 610 | // compress data based on shortest path 611 | this->inpos = 0; 612 | for (node = nodes; node->next; node = node->next) { 613 | debug("node = %u next = %u\n", node-nodes, node->next-nodes); 614 | if (node->next == node+1) { 615 | if (!write_next_byte(this)) break; 616 | } else if (node->backref) { 617 | backref.size = node->neighbor; 618 | backref.method = node->method; 619 | backref.offset = node->data; 620 | if (!write_backref(this, &backref)) break; 621 | } else { 622 | rle.size = node->neighbor; 623 | rle.method = node->method; 624 | rle.data = node->data; 625 | if (!write_rle(this, &rle)) break; 626 | } 627 | } 628 | 629 | free(nodes); 630 | return 0; 631 | } 632 | 633 | // ------------------------------------------------------------------------------------------------ 634 | // Compresses a file of up to 64 kb. 635 | // unpacked/packed are 65536 byte buffers to read/from write to, 636 | // inputsize is the length of the uncompressed data. 637 | // Returns the size of the compressed data in bytes, or 0 if compression failed. 638 | size_t exhal_pack2(const uint8_t *unpacked, size_t inputsize, uint8_t *packed, const pack_options_t *options) { 639 | size_t outpos = 0; 640 | 641 | debug("inputsize = %d\n", inputsize); 642 | 643 | pack_context_t *ctx = pack_context_alloc(unpacked, inputsize, packed); 644 | if (!ctx) return 0; 645 | 646 | int failed = 0; 647 | if (inputsize > 0) { 648 | if (options && options->optimal) { 649 | failed = pack_optimal(ctx, options ? options->fast : 0); 650 | } else { 651 | pack_normal(ctx, options ? options->fast : 0); 652 | } 653 | } 654 | 655 | if (!failed && write_trailer(ctx)) { 656 | // compressed data was written successfully 657 | outpos = (size_t)ctx->outpos; 658 | } 659 | 660 | pack_context_free(ctx); 661 | return outpos; 662 | } 663 | 664 | // ------------------------------------------------------------------------------------------------ 665 | size_t exhal_pack(const uint8_t *unpacked, size_t inputsize, uint8_t *packed, int fast) { 666 | pack_options_t options = { 667 | .fast = fast, 668 | }; 669 | return exhal_pack2(unpacked, inputsize, packed, &options); 670 | } 671 | 672 | // ------------------------------------------------------------------------------------------------ 673 | // Decompresses a file of up to 64 kb. 674 | // unpacked/packed are 65536 byte buffers to read/from write to, 675 | // Returns the size of the uncompressed data in bytes or 0 if decompression failed. 676 | size_t exhal_unpack(const uint8_t *packed, uint8_t *unpacked, unpack_stats_t *stats) { 677 | // current input/output positions 678 | uint32_t inpos = 0; 679 | uint32_t outpos = 0; 680 | 681 | uint8_t input; 682 | uint16_t command, length, offset; 683 | 684 | if (stats) memset(stats, 0, sizeof(*stats)); 685 | 686 | while (1) { 687 | int32_t insize = DATA_SIZE - inpos; 688 | 689 | // read command byte from input 690 | if (insize < 1) return 0; 691 | input = packed[inpos++]; 692 | 693 | // command 0xff = end of data 694 | if (input == 0xFF) 695 | break; 696 | 697 | // check if it is a long or regular command, get the command no. and size 698 | if ((input & 0xE0) == 0xE0) { 699 | if (insize < 1) return 0; 700 | 701 | command = (input >> 2) & 0x07; 702 | // get LSB of length from next byte 703 | length = (((input & 0x03) << 8) | packed[inpos++]) + 1; 704 | } else { 705 | command = input >> 5; 706 | length = (input & 0x1F) + 1; 707 | } 708 | 709 | // don't try to decompress > 64kb 710 | if (((command == 2) && (outpos + 2*length > DATA_SIZE)) 711 | || (outpos + length > DATA_SIZE)) { 712 | return 0; 713 | } 714 | 715 | switch (command) { 716 | // write uncompressed bytes 717 | case 0: 718 | if (insize < length) return 0; 719 | debug("%06x: writing %u raw bytes\n", inpos, length); 720 | memcpy(&unpacked[outpos], &packed[inpos], length); 721 | 722 | outpos += length; 723 | inpos += length; 724 | break; 725 | 726 | // 8-bit RLE 727 | case 1: 728 | if (insize < 1) return 0; 729 | debug("%06x: writing %u bytes RLE, value %02x\n", inpos, length, packed[inpos]); 730 | for (int i = 0; i < length; i++) 731 | unpacked[outpos++] = packed[inpos]; 732 | 733 | inpos++; 734 | break; 735 | 736 | // 16-bit RLE 737 | case 2: 738 | if (insize < 2) return 0; 739 | debug("%06x: writing %u words RLE, value %02x%02x\n", inpos, length, packed[inpos], packed[inpos+1]); 740 | for (int i = 0; i < length; i++) { 741 | unpacked[outpos++] = packed[inpos]; 742 | unpacked[outpos++] = packed[inpos+1]; 743 | } 744 | 745 | inpos += 2; 746 | break; 747 | 748 | // 8-bit increasing sequence 749 | case 3: 750 | if (insize < 1) return 0; 751 | debug("%06x: writing %u bytes sequence RLE, value %02x\n", inpos, length, packed[inpos]); 752 | for (int i = 0; i < length; i++) 753 | unpacked[outpos++] = packed[inpos] + i; 754 | 755 | inpos++; 756 | break; 757 | 758 | // regular backref 759 | // (offset is big-endian) 760 | case 4: 761 | case 7: 762 | // 7 isn't a real method number, but it behaves the same as 4 due to a quirk in how 763 | // the original decompression routine is programmed. (one of Parasyte's docs confirms 764 | // this for GB games as well). let's handle it anyway 765 | command = 4; 766 | 767 | if (insize < 2) return 0; 768 | 769 | offset = (packed[inpos] << 8) | packed[inpos+1]; 770 | debug("%06x: writing %u byte forward ref to %x\n", inpos, length, offset); 771 | 772 | if (offset + length > DATA_SIZE) return 0; 773 | 774 | for (int i = 0; i < length; i++) 775 | unpacked[outpos++] = unpacked[offset + i]; 776 | 777 | inpos += 2; 778 | break; 779 | 780 | // backref with bit rotation 781 | // (offset is big-endian) 782 | case 5: 783 | if (insize < 2) return 0; 784 | 785 | offset = (packed[inpos] << 8) | packed[inpos+1]; 786 | debug("%06x: writing %u byte rotated ref to %x\n", inpos, length, offset); 787 | 788 | if (offset + length > DATA_SIZE) return 0; 789 | 790 | for (int i = 0; i < length; i++) 791 | unpacked[outpos++] = rotate(unpacked[offset + i]); 792 | 793 | inpos += 2; 794 | break; 795 | 796 | // backwards backref 797 | // (offset is big-endian) 798 | case 6: 799 | if (insize < 2) return 0; 800 | 801 | offset = (packed[inpos] << 8) | packed[inpos+1]; 802 | debug("%06x: writing %u byte backward ref to %x\n", inpos, length, offset); 803 | 804 | if (offset < length - 1) return 0; 805 | 806 | for (int i = 0; i < length; i++) 807 | unpacked[outpos++] = unpacked[offset - i]; 808 | 809 | inpos += 2; 810 | } 811 | 812 | // keep track of how many times each compression method is used 813 | if (stats) stats->methoduse[command]++; 814 | } 815 | 816 | if (stats) stats->inputsize = (size_t)inpos; 817 | 818 | return (size_t)outpos; 819 | } 820 | 821 | // ------------------------------------------------------------------------------------------------ 822 | // Decompress data from an offset into a file 823 | size_t exhal_unpack_from_file(FILE *file, size_t offset, uint8_t *unpacked, unpack_stats_t *stats) { 824 | uint8_t packed[DATA_SIZE] = {0}; 825 | 826 | fseek(file, offset, SEEK_SET); 827 | fread((void*)packed, DATA_SIZE, 1, file); 828 | if (!ferror(file)) 829 | return exhal_unpack(packed, unpacked, stats); 830 | 831 | return 0; 832 | } 833 | -------------------------------------------------------------------------------- /uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2025, Troy D. Hanson https://troydhanson.github.io/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #define UTHASH_VERSION 2.3.0 28 | 29 | #include /* memcmp, memset, strlen */ 30 | #include /* ptrdiff_t */ 31 | #include /* exit */ 32 | 33 | #if defined(HASH_NO_STDINT) && HASH_NO_STDINT 34 | /* The user doesn't have , and must figure out their own way 35 | to provide definitions for uint8_t and uint32_t. */ 36 | #else 37 | #include /* uint8_t, uint32_t */ 38 | #endif 39 | 40 | /* These macros use decltype or the earlier __typeof GNU extension. 41 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 42 | when compiling c++ source) this code uses whatever method is needed 43 | or, for VS2008 where neither is available, uses casting workarounds. */ 44 | #if !defined(DECLTYPE) && !defined(NO_DECLTYPE) 45 | #if defined(_MSC_VER) /* MS compiler */ 46 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 47 | #define DECLTYPE(x) (decltype(x)) 48 | #else /* VS2008 or older (or VS2010 in C mode) */ 49 | #define NO_DECLTYPE 50 | #endif 51 | #elif defined(__MCST__) /* Elbrus C Compiler */ 52 | #define DECLTYPE(x) (__typeof(x)) 53 | #elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) 54 | #define NO_DECLTYPE 55 | #else /* GNU, Sun and other compilers */ 56 | #define DECLTYPE(x) (__typeof(x)) 57 | #endif 58 | #endif 59 | 60 | #ifdef NO_DECLTYPE 61 | #define DECLTYPE(x) 62 | #define DECLTYPE_ASSIGN(dst,src) \ 63 | do { \ 64 | char **_da_dst = (char**)(&(dst)); \ 65 | *_da_dst = (char*)(src); \ 66 | } while (0) 67 | #else 68 | #define DECLTYPE_ASSIGN(dst,src) \ 69 | do { \ 70 | (dst) = DECLTYPE(dst)(src); \ 71 | } while (0) 72 | #endif 73 | 74 | #ifndef uthash_malloc 75 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 76 | #endif 77 | #ifndef uthash_free 78 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 79 | #endif 80 | #ifndef uthash_bzero 81 | #define uthash_bzero(a,n) memset(a,'\0',n) 82 | #endif 83 | #ifndef uthash_strlen 84 | #define uthash_strlen(s) strlen(s) 85 | #endif 86 | 87 | #ifndef HASH_FUNCTION 88 | #define HASH_FUNCTION(keyptr,keylen,hashv) HASH_JEN(keyptr, keylen, hashv) 89 | #endif 90 | 91 | #ifndef HASH_KEYCMP 92 | #define HASH_KEYCMP(a,b,n) memcmp(a,b,n) 93 | #endif 94 | 95 | #ifndef uthash_noexpand_fyi 96 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 97 | #endif 98 | #ifndef uthash_expand_fyi 99 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 100 | #endif 101 | 102 | #ifndef HASH_NONFATAL_OOM 103 | #define HASH_NONFATAL_OOM 0 104 | #endif 105 | 106 | #if HASH_NONFATAL_OOM 107 | /* malloc failures can be recovered from */ 108 | 109 | #ifndef uthash_nonfatal_oom 110 | #define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ 111 | #endif 112 | 113 | #define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) 114 | #define IF_HASH_NONFATAL_OOM(x) x 115 | 116 | #else 117 | /* malloc failures result in lost memory, hash tables are unusable */ 118 | 119 | #ifndef uthash_fatal 120 | #define uthash_fatal(msg) exit(-1) /* fatal OOM error */ 121 | #endif 122 | 123 | #define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") 124 | #define IF_HASH_NONFATAL_OOM(x) 125 | 126 | #endif 127 | 128 | /* initial number of buckets */ 129 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 130 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 131 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 132 | 133 | /* calculate the element whose hash handle address is hhp */ 134 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 135 | /* calculate the hash handle from element address elp */ 136 | #define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle*)(void*)(((char*)(elp)) + ((tbl)->hho))) 137 | 138 | #define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ 139 | do { \ 140 | struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ 141 | unsigned _hd_bkt; \ 142 | HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 143 | (head)->hh.tbl->buckets[_hd_bkt].count++; \ 144 | _hd_hh_item->hh_next = NULL; \ 145 | _hd_hh_item->hh_prev = NULL; \ 146 | } while (0) 147 | 148 | #define HASH_VALUE(keyptr,keylen,hashv) \ 149 | do { \ 150 | HASH_FUNCTION(keyptr, keylen, hashv); \ 151 | } while (0) 152 | 153 | #define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ 154 | do { \ 155 | (out) = NULL; \ 156 | if (head) { \ 157 | unsigned _hf_bkt; \ 158 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ 159 | if (HASH_BLOOM_TEST((head)->hh.tbl, hashval)) { \ 160 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ 161 | } \ 162 | } \ 163 | } while (0) 164 | 165 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 166 | do { \ 167 | (out) = NULL; \ 168 | if (head) { \ 169 | unsigned _hf_hashv; \ 170 | HASH_VALUE(keyptr, keylen, _hf_hashv); \ 171 | HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ 172 | } \ 173 | } while (0) 174 | 175 | #ifdef HASH_BLOOM 176 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 177 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 178 | #define HASH_BLOOM_MAKE(tbl,oomed) \ 179 | do { \ 180 | (tbl)->bloom_nbits = HASH_BLOOM; \ 181 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 182 | if (!(tbl)->bloom_bv) { \ 183 | HASH_RECORD_OOM(oomed); \ 184 | } else { \ 185 | uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 186 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 187 | } \ 188 | } while (0) 189 | 190 | #define HASH_BLOOM_FREE(tbl) \ 191 | do { \ 192 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 193 | } while (0) 194 | 195 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 196 | #define HASH_BLOOM_BITTEST(bv,idx) ((bv[(idx)/8U] & (1U << ((idx)%8U))) != 0) 197 | 198 | #define HASH_BLOOM_ADD(tbl,hashv) \ 199 | HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 200 | 201 | #define HASH_BLOOM_TEST(tbl,hashv) \ 202 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 203 | 204 | #else 205 | #define HASH_BLOOM_MAKE(tbl,oomed) 206 | #define HASH_BLOOM_FREE(tbl) 207 | #define HASH_BLOOM_ADD(tbl,hashv) 208 | #define HASH_BLOOM_TEST(tbl,hashv) 1 209 | #define HASH_BLOOM_BYTELEN 0U 210 | #endif 211 | 212 | #define HASH_MAKE_TABLE(hh,head,oomed) \ 213 | do { \ 214 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ 215 | if (!(head)->hh.tbl) { \ 216 | HASH_RECORD_OOM(oomed); \ 217 | } else { \ 218 | uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ 219 | (head)->hh.tbl->tail = &((head)->hh); \ 220 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 221 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 222 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 223 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 224 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 225 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 226 | if (!(head)->hh.tbl->buckets) { \ 227 | HASH_RECORD_OOM(oomed); \ 228 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 229 | } else { \ 230 | uthash_bzero((head)->hh.tbl->buckets, \ 231 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 232 | HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ 233 | IF_HASH_NONFATAL_OOM( \ 234 | if (oomed) { \ 235 | uthash_free((head)->hh.tbl->buckets, \ 236 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 237 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 238 | } \ 239 | ) \ 240 | } \ 241 | } \ 242 | } while (0) 243 | 244 | #define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ 245 | do { \ 246 | (replaced) = NULL; \ 247 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 248 | if (replaced) { \ 249 | HASH_DELETE(hh, head, replaced); \ 250 | } \ 251 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ 252 | } while (0) 253 | 254 | #define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ 255 | do { \ 256 | (replaced) = NULL; \ 257 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 258 | if (replaced) { \ 259 | HASH_DELETE(hh, head, replaced); \ 260 | } \ 261 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ 262 | } while (0) 263 | 264 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 265 | do { \ 266 | unsigned _hr_hashv; \ 267 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 268 | HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ 269 | } while (0) 270 | 271 | #define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ 272 | do { \ 273 | unsigned _hr_hashv; \ 274 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 275 | HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ 276 | } while (0) 277 | 278 | #define HASH_APPEND_LIST(hh, head, add) \ 279 | do { \ 280 | (add)->hh.next = NULL; \ 281 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 282 | (head)->hh.tbl->tail->next = (add); \ 283 | (head)->hh.tbl->tail = &((add)->hh); \ 284 | } while (0) 285 | 286 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 287 | do { \ 288 | do { \ 289 | if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ 290 | break; \ 291 | } \ 292 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 293 | } while (0) 294 | 295 | #ifdef NO_DECLTYPE 296 | #undef HASH_AKBI_INNER_LOOP 297 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 298 | do { \ 299 | char *_hs_saved_head = (char*)(head); \ 300 | do { \ 301 | DECLTYPE_ASSIGN(head, _hs_iter); \ 302 | if (cmpfcn(head, add) > 0) { \ 303 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 304 | break; \ 305 | } \ 306 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 307 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 308 | } while (0) 309 | #endif 310 | 311 | #if HASH_NONFATAL_OOM 312 | 313 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 314 | do { \ 315 | if (!(oomed)) { \ 316 | unsigned _ha_bkt; \ 317 | (head)->hh.tbl->num_items++; \ 318 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 319 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 320 | if (oomed) { \ 321 | HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ 322 | HASH_DELETE_HH(hh, head, &(add)->hh); \ 323 | (add)->hh.tbl = NULL; \ 324 | uthash_nonfatal_oom(add); \ 325 | } else { \ 326 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 327 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 328 | } \ 329 | } else { \ 330 | (add)->hh.tbl = NULL; \ 331 | uthash_nonfatal_oom(add); \ 332 | } \ 333 | } while (0) 334 | 335 | #else 336 | 337 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 338 | do { \ 339 | unsigned _ha_bkt; \ 340 | (head)->hh.tbl->num_items++; \ 341 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 342 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 343 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 344 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 345 | } while (0) 346 | 347 | #endif 348 | 349 | 350 | #define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ 351 | do { \ 352 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 353 | (add)->hh.hashv = (hashval); \ 354 | (add)->hh.key = (char*) (keyptr); \ 355 | (add)->hh.keylen = (unsigned) (keylen_in); \ 356 | if (!(head)) { \ 357 | (add)->hh.next = NULL; \ 358 | (add)->hh.prev = NULL; \ 359 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 360 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 361 | (head) = (add); \ 362 | IF_HASH_NONFATAL_OOM( } ) \ 363 | } else { \ 364 | void *_hs_iter = (head); \ 365 | (add)->hh.tbl = (head)->hh.tbl; \ 366 | HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ 367 | if (_hs_iter) { \ 368 | (add)->hh.next = _hs_iter; \ 369 | if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ 370 | HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ 371 | } else { \ 372 | (head) = (add); \ 373 | } \ 374 | HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ 375 | } else { \ 376 | HASH_APPEND_LIST(hh, head, add); \ 377 | } \ 378 | } \ 379 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 380 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ 381 | } while (0) 382 | 383 | #define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ 384 | do { \ 385 | unsigned _hs_hashv; \ 386 | HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ 387 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ 388 | } while (0) 389 | 390 | #define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ 391 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) 392 | 393 | #define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ 394 | HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) 395 | 396 | #define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ 397 | do { \ 398 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 399 | (add)->hh.hashv = (hashval); \ 400 | (add)->hh.key = (const void*) (keyptr); \ 401 | (add)->hh.keylen = (unsigned) (keylen_in); \ 402 | if (!(head)) { \ 403 | (add)->hh.next = NULL; \ 404 | (add)->hh.prev = NULL; \ 405 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 406 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 407 | (head) = (add); \ 408 | IF_HASH_NONFATAL_OOM( } ) \ 409 | } else { \ 410 | (add)->hh.tbl = (head)->hh.tbl; \ 411 | HASH_APPEND_LIST(hh, head, add); \ 412 | } \ 413 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 414 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ 415 | } while (0) 416 | 417 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 418 | do { \ 419 | unsigned _ha_hashv; \ 420 | HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ 421 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ 422 | } while (0) 423 | 424 | #define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ 425 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) 426 | 427 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 428 | HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) 429 | 430 | #define HASH_TO_BKT(hashv,num_bkts,bkt) \ 431 | do { \ 432 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 433 | } while (0) 434 | 435 | /* delete "delptr" from the hash table. 436 | * "the usual" patch-up process for the app-order doubly-linked-list. 437 | * The use of _hd_hh_del below deserves special explanation. 438 | * These used to be expressed using (delptr) but that led to a bug 439 | * if someone used the same symbol for the head and deletee, like 440 | * HASH_DELETE(hh,users,users); 441 | * We want that to work, but by changing the head (users) below 442 | * we were forfeiting our ability to further refer to the deletee (users) 443 | * in the patch-up process. Solution: use scratch space to 444 | * copy the deletee pointer, then the latter references are via that 445 | * scratch pointer rather than through the repointed (users) symbol. 446 | */ 447 | #define HASH_DELETE(hh,head,delptr) \ 448 | HASH_DELETE_HH(hh, head, &(delptr)->hh) 449 | 450 | #define HASH_DELETE_HH(hh,head,delptrhh) \ 451 | do { \ 452 | const struct UT_hash_handle *_hd_hh_del = (delptrhh); \ 453 | if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ 454 | HASH_BLOOM_FREE((head)->hh.tbl); \ 455 | uthash_free((head)->hh.tbl->buckets, \ 456 | (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 457 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 458 | (head) = NULL; \ 459 | } else { \ 460 | unsigned _hd_bkt; \ 461 | if (_hd_hh_del == (head)->hh.tbl->tail) { \ 462 | (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ 463 | } \ 464 | if (_hd_hh_del->prev != NULL) { \ 465 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ 466 | } else { \ 467 | DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ 468 | } \ 469 | if (_hd_hh_del->next != NULL) { \ 470 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ 471 | } \ 472 | HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 473 | HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 474 | (head)->hh.tbl->num_items--; \ 475 | } \ 476 | HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ 477 | } while (0) 478 | 479 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 480 | #define HASH_FIND_STR(head,findstr,out) \ 481 | do { \ 482 | unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ 483 | HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ 484 | } while (0) 485 | #define HASH_ADD_STR(head,strfield,add) \ 486 | do { \ 487 | unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 488 | HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ 489 | } while (0) 490 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 491 | do { \ 492 | unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 493 | HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ 494 | } while (0) 495 | #define HASH_FIND_INT(head,findint,out) \ 496 | HASH_FIND(hh,head,findint,sizeof(int),out) 497 | #define HASH_ADD_INT(head,intfield,add) \ 498 | HASH_ADD(hh,head,intfield,sizeof(int),add) 499 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 500 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 501 | #define HASH_FIND_PTR(head,findptr,out) \ 502 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 503 | #define HASH_ADD_PTR(head,ptrfield,add) \ 504 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 505 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 506 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 507 | #define HASH_DEL(head,delptr) \ 508 | HASH_DELETE(hh,head,delptr) 509 | 510 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 511 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 512 | */ 513 | #ifdef HASH_DEBUG 514 | #include /* fprintf, stderr */ 515 | #define HASH_OOPS(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) 516 | #define HASH_FSCK(hh,head,where) \ 517 | do { \ 518 | struct UT_hash_handle *_thh; \ 519 | if (head) { \ 520 | unsigned _bkt_i; \ 521 | unsigned _count = 0; \ 522 | char *_prev; \ 523 | for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ 524 | unsigned _bkt_count = 0; \ 525 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 526 | _prev = NULL; \ 527 | while (_thh) { \ 528 | if (_prev != (char*)(_thh->hh_prev)) { \ 529 | HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ 530 | (where), (void*)_thh->hh_prev, (void*)_prev); \ 531 | } \ 532 | _bkt_count++; \ 533 | _prev = (char*)(_thh); \ 534 | _thh = _thh->hh_next; \ 535 | } \ 536 | _count += _bkt_count; \ 537 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 538 | HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ 539 | (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 540 | } \ 541 | } \ 542 | if (_count != (head)->hh.tbl->num_items) { \ 543 | HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ 544 | (where), (head)->hh.tbl->num_items, _count); \ 545 | } \ 546 | _count = 0; \ 547 | _prev = NULL; \ 548 | _thh = &(head)->hh; \ 549 | while (_thh) { \ 550 | _count++; \ 551 | if (_prev != (char*)_thh->prev) { \ 552 | HASH_OOPS("%s: invalid prev %p, actual %p\n", \ 553 | (where), (void*)_thh->prev, (void*)_prev); \ 554 | } \ 555 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 556 | _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ 557 | } \ 558 | if (_count != (head)->hh.tbl->num_items) { \ 559 | HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ 560 | (where), (head)->hh.tbl->num_items, _count); \ 561 | } \ 562 | } \ 563 | } while (0) 564 | #else 565 | #define HASH_FSCK(hh,head,where) 566 | #endif 567 | 568 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 569 | * the descriptor to which this macro is defined for tuning the hash function. 570 | * The app can #include to get the prototype for write(2). */ 571 | #ifdef HASH_EMIT_KEYS 572 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 573 | do { \ 574 | unsigned _klen = fieldlen; \ 575 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 576 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 577 | } while (0) 578 | #else 579 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 580 | #endif 581 | 582 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 583 | #define HASH_BER(key,keylen,hashv) \ 584 | do { \ 585 | unsigned _hb_keylen = (unsigned)keylen; \ 586 | const unsigned char *_hb_key = (const unsigned char*)(key); \ 587 | (hashv) = 0; \ 588 | while (_hb_keylen-- != 0U) { \ 589 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 590 | } \ 591 | } while (0) 592 | 593 | 594 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 595 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx 596 | * (archive link: https://archive.is/Ivcan ) 597 | */ 598 | #define HASH_SAX(key,keylen,hashv) \ 599 | do { \ 600 | unsigned _sx_i; \ 601 | const unsigned char *_hs_key = (const unsigned char*)(key); \ 602 | hashv = 0; \ 603 | for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ 604 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 605 | } \ 606 | } while (0) 607 | /* FNV-1a variation */ 608 | #define HASH_FNV(key,keylen,hashv) \ 609 | do { \ 610 | unsigned _fn_i; \ 611 | const unsigned char *_hf_key = (const unsigned char*)(key); \ 612 | (hashv) = 2166136261U; \ 613 | for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ 614 | hashv = hashv ^ _hf_key[_fn_i]; \ 615 | hashv = hashv * 16777619U; \ 616 | } \ 617 | } while (0) 618 | 619 | #define HASH_OAT(key,keylen,hashv) \ 620 | do { \ 621 | unsigned _ho_i; \ 622 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 623 | hashv = 0; \ 624 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 625 | hashv += _ho_key[_ho_i]; \ 626 | hashv += (hashv << 10); \ 627 | hashv ^= (hashv >> 6); \ 628 | } \ 629 | hashv += (hashv << 3); \ 630 | hashv ^= (hashv >> 11); \ 631 | hashv += (hashv << 15); \ 632 | } while (0) 633 | 634 | #define HASH_JEN_MIX(a,b,c) \ 635 | do { \ 636 | a -= b; a -= c; a ^= ( c >> 13 ); \ 637 | b -= c; b -= a; b ^= ( a << 8 ); \ 638 | c -= a; c -= b; c ^= ( b >> 13 ); \ 639 | a -= b; a -= c; a ^= ( c >> 12 ); \ 640 | b -= c; b -= a; b ^= ( a << 16 ); \ 641 | c -= a; c -= b; c ^= ( b >> 5 ); \ 642 | a -= b; a -= c; a ^= ( c >> 3 ); \ 643 | b -= c; b -= a; b ^= ( a << 10 ); \ 644 | c -= a; c -= b; c ^= ( b >> 15 ); \ 645 | } while (0) 646 | 647 | #define HASH_JEN(key,keylen,hashv) \ 648 | do { \ 649 | unsigned _hj_i,_hj_j,_hj_k; \ 650 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 651 | hashv = 0xfeedbeefu; \ 652 | _hj_i = _hj_j = 0x9e3779b9u; \ 653 | _hj_k = (unsigned)(keylen); \ 654 | while (_hj_k >= 12U) { \ 655 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 656 | + ( (unsigned)_hj_key[2] << 16 ) \ 657 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 658 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 659 | + ( (unsigned)_hj_key[6] << 16 ) \ 660 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 661 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 662 | + ( (unsigned)_hj_key[10] << 16 ) \ 663 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 664 | \ 665 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 666 | \ 667 | _hj_key += 12; \ 668 | _hj_k -= 12U; \ 669 | } \ 670 | hashv += (unsigned)(keylen); \ 671 | switch ( _hj_k ) { \ 672 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 673 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 674 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 675 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 676 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 677 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 678 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 679 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 680 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 681 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 682 | case 1: _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ 683 | default: ; \ 684 | } \ 685 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 686 | } while (0) 687 | 688 | /* The Paul Hsieh hash function */ 689 | #undef get16bits 690 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 691 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 692 | #define get16bits(d) (*((const uint16_t *) (d))) 693 | #endif 694 | 695 | #if !defined (get16bits) 696 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 697 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 698 | #endif 699 | #define HASH_SFH(key,keylen,hashv) \ 700 | do { \ 701 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 702 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 703 | \ 704 | unsigned _sfh_rem = _sfh_len & 3U; \ 705 | _sfh_len >>= 2; \ 706 | hashv = 0xcafebabeu; \ 707 | \ 708 | /* Main loop */ \ 709 | for (;_sfh_len > 0U; _sfh_len--) { \ 710 | hashv += get16bits (_sfh_key); \ 711 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 712 | hashv = (hashv << 16) ^ _sfh_tmp; \ 713 | _sfh_key += 2U*sizeof (uint16_t); \ 714 | hashv += hashv >> 11; \ 715 | } \ 716 | \ 717 | /* Handle end cases */ \ 718 | switch (_sfh_rem) { \ 719 | case 3: hashv += get16bits (_sfh_key); \ 720 | hashv ^= hashv << 16; \ 721 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 722 | hashv += hashv >> 11; \ 723 | break; \ 724 | case 2: hashv += get16bits (_sfh_key); \ 725 | hashv ^= hashv << 11; \ 726 | hashv += hashv >> 17; \ 727 | break; \ 728 | case 1: hashv += *_sfh_key; \ 729 | hashv ^= hashv << 10; \ 730 | hashv += hashv >> 1; \ 731 | break; \ 732 | default: ; \ 733 | } \ 734 | \ 735 | /* Force "avalanching" of final 127 bits */ \ 736 | hashv ^= hashv << 3; \ 737 | hashv += hashv >> 5; \ 738 | hashv ^= hashv << 4; \ 739 | hashv += hashv >> 17; \ 740 | hashv ^= hashv << 25; \ 741 | hashv += hashv >> 6; \ 742 | } while (0) 743 | 744 | /* iterate over items in a known bucket to find desired item */ 745 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ 746 | do { \ 747 | if ((head).hh_head != NULL) { \ 748 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ 749 | } else { \ 750 | (out) = NULL; \ 751 | } \ 752 | while ((out) != NULL) { \ 753 | if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ 754 | if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ 755 | break; \ 756 | } \ 757 | } \ 758 | if ((out)->hh.hh_next != NULL) { \ 759 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ 760 | } else { \ 761 | (out) = NULL; \ 762 | } \ 763 | } \ 764 | } while (0) 765 | 766 | /* add an item to a bucket */ 767 | #define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ 768 | do { \ 769 | UT_hash_bucket *_ha_head = &(head); \ 770 | _ha_head->count++; \ 771 | (addhh)->hh_next = _ha_head->hh_head; \ 772 | (addhh)->hh_prev = NULL; \ 773 | if (_ha_head->hh_head != NULL) { \ 774 | _ha_head->hh_head->hh_prev = (addhh); \ 775 | } \ 776 | _ha_head->hh_head = (addhh); \ 777 | if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ 778 | && !(addhh)->tbl->noexpand) { \ 779 | HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ 780 | IF_HASH_NONFATAL_OOM( \ 781 | if (oomed) { \ 782 | HASH_DEL_IN_BKT(head,addhh); \ 783 | } \ 784 | ) \ 785 | } \ 786 | } while (0) 787 | 788 | /* remove an item from a given bucket */ 789 | #define HASH_DEL_IN_BKT(head,delhh) \ 790 | do { \ 791 | UT_hash_bucket *_hd_head = &(head); \ 792 | _hd_head->count--; \ 793 | if (_hd_head->hh_head == (delhh)) { \ 794 | _hd_head->hh_head = (delhh)->hh_next; \ 795 | } \ 796 | if ((delhh)->hh_prev) { \ 797 | (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ 798 | } \ 799 | if ((delhh)->hh_next) { \ 800 | (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ 801 | } \ 802 | } while (0) 803 | 804 | /* Bucket expansion has the effect of doubling the number of buckets 805 | * and redistributing the items into the new buckets. Ideally the 806 | * items will distribute more or less evenly into the new buckets 807 | * (the extent to which this is true is a measure of the quality of 808 | * the hash function as it applies to the key domain). 809 | * 810 | * With the items distributed into more buckets, the chain length 811 | * (item count) in each bucket is reduced. Thus by expanding buckets 812 | * the hash keeps a bound on the chain length. This bounded chain 813 | * length is the essence of how a hash provides constant time lookup. 814 | * 815 | * The calculation of tbl->ideal_chain_maxlen below deserves some 816 | * explanation. First, keep in mind that we're calculating the ideal 817 | * maximum chain length based on the *new* (doubled) bucket count. 818 | * In fractions this is just n/b (n=number of items,b=new num buckets). 819 | * Since the ideal chain length is an integer, we want to calculate 820 | * ceil(n/b). We don't depend on floating point arithmetic in this 821 | * hash, so to calculate ceil(n/b) with integers we could write 822 | * 823 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 824 | * 825 | * and in fact a previous version of this hash did just that. 826 | * But now we have improved things a bit by recognizing that b is 827 | * always a power of two. We keep its base 2 log handy (call it lb), 828 | * so now we can write this with a bit shift and logical AND: 829 | * 830 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 831 | * 832 | */ 833 | #define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ 834 | do { \ 835 | unsigned _he_bkt; \ 836 | unsigned _he_bkt_i; \ 837 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 838 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 839 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 840 | sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ 841 | if (!_he_new_buckets) { \ 842 | HASH_RECORD_OOM(oomed); \ 843 | } else { \ 844 | uthash_bzero(_he_new_buckets, \ 845 | sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ 846 | (tbl)->ideal_chain_maxlen = \ 847 | ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ 848 | ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 849 | (tbl)->nonideal_items = 0; \ 850 | for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ 851 | _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ 852 | while (_he_thh != NULL) { \ 853 | _he_hh_nxt = _he_thh->hh_next; \ 854 | HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ 855 | _he_newbkt = &(_he_new_buckets[_he_bkt]); \ 856 | if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ 857 | (tbl)->nonideal_items++; \ 858 | if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \ 859 | _he_newbkt->expand_mult++; \ 860 | } \ 861 | } \ 862 | _he_thh->hh_prev = NULL; \ 863 | _he_thh->hh_next = _he_newbkt->hh_head; \ 864 | if (_he_newbkt->hh_head != NULL) { \ 865 | _he_newbkt->hh_head->hh_prev = _he_thh; \ 866 | } \ 867 | _he_newbkt->hh_head = _he_thh; \ 868 | _he_thh = _he_hh_nxt; \ 869 | } \ 870 | } \ 871 | uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ 872 | (tbl)->num_buckets *= 2U; \ 873 | (tbl)->log2_num_buckets++; \ 874 | (tbl)->buckets = _he_new_buckets; \ 875 | (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ 876 | ((tbl)->ineff_expands+1U) : 0U; \ 877 | if ((tbl)->ineff_expands > 1U) { \ 878 | (tbl)->noexpand = 1; \ 879 | uthash_noexpand_fyi(tbl); \ 880 | } \ 881 | uthash_expand_fyi(tbl); \ 882 | } \ 883 | } while (0) 884 | 885 | 886 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 887 | /* Note that HASH_SORT assumes the hash handle name to be hh. 888 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 889 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 890 | #define HASH_SRT(hh,head,cmpfcn) \ 891 | do { \ 892 | unsigned _hs_i; \ 893 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 894 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 895 | if (head != NULL) { \ 896 | _hs_insize = 1; \ 897 | _hs_looping = 1; \ 898 | _hs_list = &((head)->hh); \ 899 | while (_hs_looping != 0U) { \ 900 | _hs_p = _hs_list; \ 901 | _hs_list = NULL; \ 902 | _hs_tail = NULL; \ 903 | _hs_nmerges = 0; \ 904 | while (_hs_p != NULL) { \ 905 | _hs_nmerges++; \ 906 | _hs_q = _hs_p; \ 907 | _hs_psize = 0; \ 908 | for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ 909 | _hs_psize++; \ 910 | _hs_q = ((_hs_q->next != NULL) ? \ 911 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 912 | if (_hs_q == NULL) { \ 913 | break; \ 914 | } \ 915 | } \ 916 | _hs_qsize = _hs_insize; \ 917 | while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ 918 | if (_hs_psize == 0U) { \ 919 | _hs_e = _hs_q; \ 920 | _hs_q = ((_hs_q->next != NULL) ? \ 921 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 922 | _hs_qsize--; \ 923 | } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ 924 | _hs_e = _hs_p; \ 925 | if (_hs_p != NULL) { \ 926 | _hs_p = ((_hs_p->next != NULL) ? \ 927 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 928 | } \ 929 | _hs_psize--; \ 930 | } else if ((cmpfcn( \ 931 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ 932 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ 933 | )) <= 0) { \ 934 | _hs_e = _hs_p; \ 935 | if (_hs_p != NULL) { \ 936 | _hs_p = ((_hs_p->next != NULL) ? \ 937 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 938 | } \ 939 | _hs_psize--; \ 940 | } else { \ 941 | _hs_e = _hs_q; \ 942 | _hs_q = ((_hs_q->next != NULL) ? \ 943 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 944 | _hs_qsize--; \ 945 | } \ 946 | if ( _hs_tail != NULL ) { \ 947 | _hs_tail->next = ((_hs_e != NULL) ? \ 948 | ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ 949 | } else { \ 950 | _hs_list = _hs_e; \ 951 | } \ 952 | if (_hs_e != NULL) { \ 953 | _hs_e->prev = ((_hs_tail != NULL) ? \ 954 | ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ 955 | } \ 956 | _hs_tail = _hs_e; \ 957 | } \ 958 | _hs_p = _hs_q; \ 959 | } \ 960 | if (_hs_tail != NULL) { \ 961 | _hs_tail->next = NULL; \ 962 | } \ 963 | if (_hs_nmerges <= 1U) { \ 964 | _hs_looping = 0; \ 965 | (head)->hh.tbl->tail = _hs_tail; \ 966 | DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 967 | } \ 968 | _hs_insize *= 2U; \ 969 | } \ 970 | HASH_FSCK(hh, head, "HASH_SRT"); \ 971 | } \ 972 | } while (0) 973 | 974 | /* This function selects items from one hash into another hash. 975 | * The end result is that the selected items have dual presence 976 | * in both hashes. There is no copy of the items made; rather 977 | * they are added into the new hash through a secondary hash 978 | * hash handle that must be present in the structure. */ 979 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 980 | do { \ 981 | unsigned _src_bkt, _dst_bkt; \ 982 | void *_last_elt = NULL, *_elt; \ 983 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 984 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 985 | if ((src) != NULL) { \ 986 | for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 987 | for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 988 | _src_hh != NULL; \ 989 | _src_hh = _src_hh->hh_next) { \ 990 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 991 | if (cond(_elt)) { \ 992 | IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ 993 | _dst_hh = (UT_hash_handle*)(void*)(((char*)_elt) + _dst_hho); \ 994 | _dst_hh->key = _src_hh->key; \ 995 | _dst_hh->keylen = _src_hh->keylen; \ 996 | _dst_hh->hashv = _src_hh->hashv; \ 997 | _dst_hh->prev = _last_elt; \ 998 | _dst_hh->next = NULL; \ 999 | if (_last_elt_hh != NULL) { \ 1000 | _last_elt_hh->next = _elt; \ 1001 | } \ 1002 | if ((dst) == NULL) { \ 1003 | DECLTYPE_ASSIGN(dst, _elt); \ 1004 | HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ 1005 | IF_HASH_NONFATAL_OOM( \ 1006 | if (_hs_oomed) { \ 1007 | uthash_nonfatal_oom(_elt); \ 1008 | (dst) = NULL; \ 1009 | continue; \ 1010 | } \ 1011 | ) \ 1012 | } else { \ 1013 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 1014 | } \ 1015 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 1016 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ 1017 | (dst)->hh_dst.tbl->num_items++; \ 1018 | IF_HASH_NONFATAL_OOM( \ 1019 | if (_hs_oomed) { \ 1020 | HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ 1021 | HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ 1022 | _dst_hh->tbl = NULL; \ 1023 | uthash_nonfatal_oom(_elt); \ 1024 | continue; \ 1025 | } \ 1026 | ) \ 1027 | HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ 1028 | _last_elt = _elt; \ 1029 | _last_elt_hh = _dst_hh; \ 1030 | } \ 1031 | } \ 1032 | } \ 1033 | } \ 1034 | HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ 1035 | } while (0) 1036 | 1037 | #define HASH_CLEAR(hh,head) \ 1038 | do { \ 1039 | if ((head) != NULL) { \ 1040 | HASH_BLOOM_FREE((head)->hh.tbl); \ 1041 | uthash_free((head)->hh.tbl->buckets, \ 1042 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 1043 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 1044 | (head) = NULL; \ 1045 | } \ 1046 | } while (0) 1047 | 1048 | #define HASH_OVERHEAD(hh,head) \ 1049 | (((head) != NULL) ? ( \ 1050 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 1051 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 1052 | sizeof(UT_hash_table) + \ 1053 | (HASH_BLOOM_BYTELEN))) : 0U) 1054 | 1055 | #ifdef NO_DECLTYPE 1056 | #define HASH_ITER(hh,head,el,tmp) \ 1057 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 1058 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1059 | #else 1060 | #define HASH_ITER(hh,head,el,tmp) \ 1061 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 1062 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1063 | #endif 1064 | 1065 | /* obtain a count of items in the hash */ 1066 | #define HASH_COUNT(head) HASH_CNT(hh,head) 1067 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 1068 | 1069 | typedef struct UT_hash_bucket { 1070 | struct UT_hash_handle *hh_head; 1071 | unsigned count; 1072 | 1073 | /* expand_mult is normally set to 0. In this situation, the max chain length 1074 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 1075 | * the bucket's chain exceeds this length, bucket expansion is triggered). 1076 | * However, setting expand_mult to a non-zero value delays bucket expansion 1077 | * (that would be triggered by additions to this particular bucket) 1078 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 1079 | * (The multiplier is simply expand_mult+1). The whole idea of this 1080 | * multiplier is to reduce bucket expansions, since they are expensive, in 1081 | * situations where we know that a particular bucket tends to be overused. 1082 | * It is better to let its chain length grow to a longer yet-still-bounded 1083 | * value, than to do an O(n) bucket expansion too often. 1084 | */ 1085 | unsigned expand_mult; 1086 | 1087 | } UT_hash_bucket; 1088 | 1089 | /* random signature used only to find hash tables in external analysis */ 1090 | #define HASH_SIGNATURE 0xa0111fe1u 1091 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 1092 | 1093 | typedef struct UT_hash_table { 1094 | UT_hash_bucket *buckets; 1095 | unsigned num_buckets, log2_num_buckets; 1096 | unsigned num_items; 1097 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 1098 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 1099 | 1100 | /* in an ideal situation (all buckets used equally), no bucket would have 1101 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 1102 | unsigned ideal_chain_maxlen; 1103 | 1104 | /* nonideal_items is the number of items in the hash whose chain position 1105 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 1106 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 1107 | unsigned nonideal_items; 1108 | 1109 | /* ineffective expands occur when a bucket doubling was performed, but 1110 | * afterward, more than half the items in the hash had nonideal chain 1111 | * positions. If this happens on two consecutive expansions we inhibit any 1112 | * further expansion, as it's not helping; this happens when the hash 1113 | * function isn't a good fit for the key domain. When expansion is inhibited 1114 | * the hash will still work, albeit no longer in constant time. */ 1115 | unsigned ineff_expands, noexpand; 1116 | 1117 | uint32_t signature; /* used only to find hash tables in external analysis */ 1118 | #ifdef HASH_BLOOM 1119 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 1120 | uint8_t *bloom_bv; 1121 | uint8_t bloom_nbits; 1122 | #endif 1123 | 1124 | } UT_hash_table; 1125 | 1126 | typedef struct UT_hash_handle { 1127 | struct UT_hash_table *tbl; 1128 | void *prev; /* prev element in app order */ 1129 | void *next; /* next element in app order */ 1130 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 1131 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 1132 | const void *key; /* ptr to enclosing struct's key */ 1133 | unsigned keylen; /* enclosing struct's key len */ 1134 | unsigned hashv; /* result of hash-fcn(key) */ 1135 | } UT_hash_handle; 1136 | 1137 | #endif /* UTHASH_H */ 1138 | --------------------------------------------------------------------------------