├── GNUmakefile ├── LICENSE ├── README.md ├── mk └── luapath ├── phf.cc └── phf.h /GNUmakefile: -------------------------------------------------------------------------------- 1 | all: # default target 2 | 3 | -include .config 4 | 5 | prefix ?= /usr/local 6 | includedir ?= $(prefix)/include 7 | libdir ?= $(prefix)/lib 8 | bindir ?= $(prefix)/bin 9 | 10 | luacpath ?= 11 | lua51cpath ?= 12 | lua52cpath ?= 13 | lua53cpath ?= 14 | 15 | RMDIR ?= rmdir 16 | MKDIR ?= mkdir 17 | CP ?= cp 18 | 19 | OSNAME ?= $(shell uname -s) 20 | CXXNAME ?= $(shell env CC=$(CXX) mk/luapath ccname) 21 | 22 | ifeq ($(CXXNAME), sunpro) 23 | CXXFLAGS ?= -g 24 | else 25 | CXXFLAGS ?= -std=c++11 -fno-rtti -fno-exceptions -g -O3 -Wall -Wextra 26 | endif 27 | 28 | ifeq ($(CXXNAME), sunpro) 29 | SOFLAGS ?= -xcode=pic13 -shared 30 | else 31 | ifeq ($(OSNAME), Darwin) 32 | SOFLAGS ?= -fPIC -dynamiclib -undefined dynamic_lookup 33 | else 34 | SOFLAGS ?= -fPIC -shared 35 | endif 36 | endif 37 | 38 | ifeq ($(CXXNAME), sunpro) 39 | LOFLAGS ?= -xcode=pic13 -shared 40 | else 41 | ifeq ($(OSNAME), Darwin) 42 | LOFLAGS ?= -fPIC -bundle -undefined dynamic_lookup 43 | else 44 | LOFLAGS ?= -fPIC -shared 45 | endif 46 | endif 47 | 48 | ifeq ($(OSNAME), Darwin) 49 | LIBPHF ?= libphf.dylib 50 | else 51 | LIBPHF ?= libphf.so 52 | endif 53 | 54 | .PHONY: config 55 | 56 | config: 57 | printf 'prefix ?= $(value prefix)'"\n" >| .config 58 | printf 'includedir ?= $(value includedir)'"\n" >> .config 59 | printf 'libdir ?= $(value libdir)'"\n" >> .config 60 | printf 'bindir ?= $(value bindir)'"\n" >> .config 61 | printf 'luacpath ?= $(value luacpath)'"\n" >> .config 62 | printf 'lua51cpath ?= $(value lua51cpath)'"\n" >> .config 63 | printf 'lua52cpath ?= $(value lua52cpath)'"\n" >> .config 64 | printf 'lua53cpath ?= $(value lua53cpath)'"\n" >> .config 65 | printf 'CXX ?= $(value CXX)'"\n" >> .config 66 | printf 'CPPFLAGS ?= $(value CPPFLAGS)'"\n" >> .config 67 | printf 'CXXFLAGS ?= $(value CXXFLAGS)'"\n" >> .config 68 | printf 'LDFLAGS ?= $(value LDFLAGS)'"\n" >> .config 69 | printf 'SOFLAGS ?= $(value SOFLAGS)'"\n" >> .config 70 | printf 'LOFLAGS ?= $(value LOFLAGS)'"\n" >> .config 71 | printf 'LIBS ?= $(value LIBS)'"\n" >> .config 72 | printf 'LIBPHF ?= $(value LIBPHF)'"\n" >> .config 73 | printf 'RM ?= $(value RM)'"\n" >> .config 74 | printf 'RMDIR ?= $(value RMDIR)'"\n" >> .config 75 | printf 'MKDIR ?= $(value MKDIR)'"\n" >> .config 76 | printf 'CP ?= $(value CP)'"\n" >> .config 77 | printf 'OSNAME ?= $(value OSNAME)'"\n" >> .config 78 | printf 'CXXNAME ?= $(value CXXNAME)'"\n" >> .config 79 | 80 | phf: phf.cc phf.h 81 | $(CXX) -o $@ $< $(CXXFLAGS) $(CPPFLAGS) -DPHF_MAIN $(LIBS) 82 | 83 | $(LIBPHF): phf.cc phf.h 84 | $(CXX) -o $@ $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(SOFLAGS) $(LIBS) 85 | 86 | all: phf $(LIBPHF) 87 | 88 | LUAPATH = $(shell env CC="$(CXX)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" mk/luapath -krxm3 $(if $(includedir),$(if $(DESTDIR), -I$(DESTDIR)$(includedir)) -I$(includedir)) -I/usr/include -I/usr/local/include $(if $(DESTDIR),-P$(DESTDIR)$(bindir)) -P$(bindir) -v$(1) $(2)) 89 | 90 | define LUALIB_BUILD 91 | $(1)/phf.so: phf.cc phf.h 92 | test "$(1)" = "$$(call LUAPATH, $(1), version)" 93 | $$(MKDIR) -p $$(@D) 94 | $$(CXX) -o $$@ $$< $$(CXXFLAGS) $$(CPPFLAGS) $$(call LUAPATH, $(1), cppflags) -DPHF_LUALIB $$(LDFLAGS) $$(LOFLAGS) $(LIBS) 95 | 96 | .SECONDARY: all$(1) 97 | 98 | lua$(1) all$(1): $(1)/phf.so 99 | 100 | ifeq (all, $(filter all, $(or $(MAKECMDGOALS), all))) 101 | ifeq ($(1), $$(call LUAPATH, $(1), version)) 102 | all: $(1)/phf.so 103 | endif 104 | endif 105 | 106 | endef # LUALIB_BUILD 107 | 108 | $(eval $(call LUALIB_BUILD,5.1)) 109 | 110 | $(eval $(call LUALIB_BUILD,5.2)) 111 | 112 | $(eval $(call LUALIB_BUILD,5.3)) 113 | 114 | 115 | define LUALIB_INSTALL 116 | ifneq ($(filter install install$(1) uninstall uninstall$(1), $(MAKECMDGOALS)),) 117 | ifeq ($$($(2)),) # define lua5?cpath if empty 118 | $(2)_dyn = $$(call LUAPATH, $(1), cdir) 119 | $(2)_sed = $$(shell printf "$$(luacpath)" | sed -ne 's/[[:digit:]].[[:digit:]]/$(1)/p') 120 | $(2)_lib = $$(libdir)/lua/$(1) 121 | 122 | override $(2) = $$(or $$($(2)_dyn), $$($(2)_sed), $$($(2)_lib)) 123 | endif 124 | 125 | $$($(2))/phf.so: $(1)/phf.so 126 | $$(MKDIR) -p $$(@D) 127 | $$(CP) -fp $$< $$@ 128 | 129 | .SECONDARY: install install$(1) 130 | 131 | install install$(1): $$($(2))/phf.so 132 | 133 | .PHONY: uninstall uninstall$(1) 134 | 135 | uninstall$(1): 136 | $(RM) -f $$($(2))/phf.so 137 | 138 | uninstall: uninstall$(1) 139 | 140 | endif # if install or install$(1) 141 | endef # LUALIB_INSTALL 142 | 143 | $(eval $(call LUALIB_INSTALL,5.1,lua51cpath)) 144 | 145 | $(eval $(call LUALIB_INSTALL,5.2,lua52cpath)) 146 | 147 | $(eval $(call LUALIB_INSTALL,5.3,lua53cpath)) 148 | 149 | 150 | .PHONY: clean distclean clean~ 151 | 152 | distclean: clean 153 | $(RM) -f .config 154 | 155 | clean: 156 | $(RM) -f phf 157 | $(RM) -f $(LIBPHF) 158 | $(RM) -fr 5.?/ 159 | $(RM) -fr *.dSYM/ 160 | 161 | clean~: 162 | find . -name '*~' -exec rm {} + 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015, 2019 William Ahern 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction # 2 | 3 | This is a simple implementation of the CHD perfect hash algorithm. CHD can 4 | generate perfect hash functions for very large key sets--on the order of 5 | millions of keys--in a very short time. On my circa 2012 desktop and using 6 | the default parameters (hash load factor of 80% and average displacement map 7 | bucket load of 4.0 keys) this implementation can generate a hash function 8 | for 1,000 keys in less than 1/100th of a second, and 1,000,000 keys in less 9 | than a second. 10 | 11 | For more information about the algorithm, see 12 | http://cmph.sourceforge.net/chd.html. 13 | 14 | # Dependencies # 15 | 16 | * No runtime dependencies. 17 | * Requires a modern C++ compiler to build. 18 | * The included build requires GNU Make. 19 | 20 | # Building # 21 | 22 | ## Make Macros ## 23 | 24 | The typical GNU macros can be used control the build. 25 | 26 | ### Compilation ### 27 | 28 | Note that the modules for Lua 5.1, 5.2, and 5.3 can be built simultaneously. 29 | 30 | * CXX: C++ compiler path. 31 | * CXXFLAGS: C++ compiler flags. 32 | * CPPFLAGS: C preprocessor flags. Necessary if Lua API cannot be discovered 33 | automatically. You can specify multiple include paths if building more than 34 | one Lua module. 35 | * LDFLAGS: Linker flags. Not normally needed. 36 | * SOFLAGS: Flags needed to build dynamic library. 37 | * LOFLAGS: Flags needed to build loadable module. Normally should be the 38 | same as SOFLAGS, except on OS X. 39 | * LIBS: Library dependencies. Normally empty, but see the section Avoiding 40 | C++ Dependencies. 41 | 42 | #### Avoiding C++ Dependencies 43 | 44 | Defining the preprocessor macro PHF_NO_LIBCXX to 1 will prevent usage of C++ 45 | interfaces such as std::string that would require a dependency on libc++ or 46 | libstdc++. This allows using platform-dependent flags in CXXFLAGS, LDFLAGS, 47 | and SOFLAGS to prevent a dependency on the system C++ library. 48 | 49 | For example, on OS X you can do: 50 | ```sh 51 | $ make CPPFLAGS="-DPHF_NO_LIBCXX" \ 52 | CXXFLAGS="-std=c++11 -fno-rtti -fno-exceptions -O3 -march=native" \ 53 | LDFLAGS="-nostdlib" \ 54 | LIBS="-lSystem" 55 | ``` 56 | 57 | ### Installation #### 58 | * prefix 59 | * includedir 60 | * libdir 61 | * luacpath: Lua C module install path. Can be used for one-shot installation 62 | of a particular Lua version module. 63 | * lua51cpath: Lua 5.1 C module install path. 64 | * lua52cpath: Same as above, for 5.2. 65 | * lua53cpath: Same as above, for 5.3. 66 | 67 | ## Make Targets ## 68 | 69 | * phf: Builds command-line utility (development) 70 | * libphf.so: Builds dynamic library for non-OS X 71 | * libphf.dylib: Builds dynamic library for OS X 72 | * lua5.1: Builds Lua 5.1 module at 5.1/phf.so. Lua 5.1 headers should be 73 | specified using CPPFLAGS if not in normal locations. 74 | * lua5.2: Same as above, for Lua 5.2. 75 | * lua5.3: Same as above, for Lua 5.3. 76 | 77 | # Usage # 78 | 79 | ## Lua ## 80 | 81 | ## API ### 82 | 83 | ### phf.new(keys[, lambda][, alpha][, seed][, nodiv]) ### 84 | 85 | * keys: array of keys in order from 1..#keys. They should be all 86 | numbers or all strings. 87 | 88 | * lambda: number of keys per bucket when generating the g() function mapping. 89 | 90 | * alpha: output hash space loading factor as percentage from 91 | 1..100. 100% generates a *minimal* perfect hash function. But note that 92 | the implementation does *not* implement the necessary optimizations to 93 | ensure timely generation of minimal perfect hash functions. Normally you 94 | want a loading factor of 80% to 90% for large key sets. 95 | 96 | * seed: random integer seed. 97 | 98 | * nodiv: if true rounds r and m to powers of 2, and performs modular 99 | reduction using bitwise AND. Otherwise, r and m are rounded up to the 100 | nearest primes and modulo division used when indexing tables. Note that 101 | the rounding occurs after calculation of the intermediate and output hash 102 | table loading. 103 | 104 | This is more important when building small hash tables with the C 105 | interface. The optimization is substantial when the compiler can inline 106 | the code, but isn't substantial from Lua. 107 | 108 | Returns a callable object. 109 | 110 | ### phf:hash(key) 111 | 112 | * Returns an integer hash in the range 1..phf:m(). The returned integer will 113 | be unique for all keys in the original set. Otherwise the result is 114 | unspecified. 115 | 116 | ### Example ### 117 | 118 | ```Lua 119 | local phf = require"phf" 120 | 121 | local lambda = 4 -- how many keys per intermediate bucket 122 | local alpha = 80 -- output hash space loading in percentage. 123 | 124 | local keys = { "apple", "banana", "cherry", "date", "eggplant", "fig", 125 | "guava", "honeydew", "jackfruit", "kiwi", "lemon", "mango" } 126 | 127 | local F = phf.new(keys, lambda, alpha) 128 | 129 | for i=1,#keys do 130 | print(keys[i], F(keys[i])) 131 | end 132 | 133 | ``` 134 | 135 | ## C++ ## 136 | 137 | ## API ## 138 | 139 | ### PHF::uniq(T k[], size_t n); ### 140 | 141 | Similar to the shell command `sort | uniq`. Sorts, deduplicates, and shifts 142 | down the keys in the array k. Returns the number of unique keys, which will 143 | have been moved to the beginning of the array. If necessary do this before 144 | calling PHF::init, as PHF::init does not tolerate duplicate keys. 145 | 146 | ### int PHF::init(struct phf *f, const T k[], size_t n, size_t l, size_t a, phf_seed_t s); 147 | 148 | Generate a perfect hash function for the n keys in array k and store the 149 | results in f. Returns a system error number on failure, or 0 on success. f 150 | is unmodified on failure. 151 | 152 | ### void PHF::destroy(struct phf *); 153 | 154 | Deallocates internal tables, but not the struct object itself. 155 | 156 | ### void PHF::compact(struct phf *); 157 | 158 | By default the displacement map is an array of uint32_t integers. This 159 | function will select the smallest type necessary to hold the largest 160 | displacement value and update the internal state accordingly. For a loading 161 | factor of 80% (0.8) in the output hash space, and displacement map loading 162 | factor of 4 (400%), the smallest primitive type will often be uint8_t. 163 | 164 | ### phf_hash_t PHF::hash(struct phf *f, T k); 165 | 166 | Returns an integer hash value, h, where 0 <= h < f->m. h will be unique for 167 | each unique key provided when generating the function. f->m will be larger 168 | than the number of unique keys and is based on the specified loading factor 169 | (alpha), rounded up to the nearest prime or nearest power of 2, depending on 170 | the mode of modular reduction selected. For example, for a loading factor of 171 | 80% m will be 127: 100 is 80% of 125, and 127 is the closest prime greater 172 | than or equal to 125. With the nodiv option, m would be 128: 100 is 80% of 173 | 125, and 128 is the closest power of 2 greater than or equal to 125. 174 | 175 | ## C ## 176 | 177 | The C API is nearly identical to the C++ API, except the prefix is phf_ 178 | instead of PHF::. phf_uniq, phf_init, and phf_hash are macros which utilize 179 | C11's _Generic or GCC's __builtin_types_compatible_p interfaces to overload 180 | the interfaces by key type. The explicit suffixes _uint32, _uint64, and 181 | _string may be used directly. 182 | 183 | -------------------------------------------------------------------------------- /mk/luapath: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # This script is used to derive compiler flags and filesystem paths 4 | # necessary to utilize Lua, LuaJIT, and particular versions thereof in both 5 | # simple and mixed installation environments. 6 | # 7 | # For usage help information use the -h switch. 8 | # 9 | # This script attempts to adhere strictly to POSIX shell specifications. The 10 | # known non-POSIX features used are the path of the shell at the very first 11 | # line of this script, the default compiler command name of `cc' instead of 12 | # `c99', and the use of /dev/urandom for generating a random sandbox 13 | # directory suffix. All of these can be override. For any other issues 14 | # please contact the author. 15 | # 16 | # WARNING: When searching for a Lua interpreter this script may execute 17 | # various utilities in an attempt to deduce their fitness and release 18 | # version. By default this script will search for and execute utilities 19 | # using the glob patterns luac* and lua*. But this script CANNOT GUARANTEE 20 | # that executing such utilities, or any other utilities, either wittingly or 21 | # unwittingly, will not result in your COMPUTER EXPLODING. You have been 22 | # warned. 23 | # 24 | # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 25 | # 26 | # Changelog: 27 | # 28 | # * 2013-08-02 - Published. Derived from an earlier script, lua.path, 29 | # written for the cqueues project. 30 | # 31 | # * 2013-08-05 - Redirect stdin from /dev/null when probing so we don't 32 | # freeze if a utility tries to read from stdin. 33 | # 34 | # chdir to a read-only directory by default to try to prevent creation 35 | # of temporary files. These features address the issues of LuaTeX 36 | # reading from stdin and creating a luatex.out file in the current 37 | # working directory. By default a directory with a random suffix 38 | # generated from /dev/urandom is placed in TMPDIR and removed on exit. 39 | # 40 | # If CPPFLAGS is empty and no -I options directly specified then set 41 | # INCDIRS to "/usr/include:/usr/local/include". 42 | # 43 | # * 2013-08-07 - Add pkg-config support and refactor header probing to delay 44 | # recursive searching. 45 | # 46 | # * 2013-09-09 - NetBSD's sh gets upset over the noclobber option and 47 | # redirection to /dev/null, so use append operator. And check $# 48 | # before iterating over a null parameter set with `do X; ... done` 49 | # when `set -u` is enabled--it complains about $@ being unset. 50 | # 51 | # * 2013-10-22 - Initial ldflags detection. 52 | # 53 | # * 2014-01-26 - Migrate CC vendor detection from external script. 54 | # 55 | # * 2014-09-29 - Add ldir and cdir modes which print install path by parsing 56 | # package.path and package.cpath. 57 | # 58 | # * 2014-12-18 - Add -e GLOB option. 59 | # 60 | # Deprecate ldir and cdir modes. 61 | # 62 | # Add package.path and package.cpath to replace ldir and dir modes. 63 | # Optional arguments to the new modes are preferred install paths, 64 | # rather than globs for finding the lua utility path (use the new -e 65 | # option, instead). 66 | # 67 | # * 2014-12-19 - Fix pkg-config version matching. The --modversion of 68 | # the lua package might be stale. For example, it's 5.2.0 on Ubuntu 69 | # 14.04 even though the Lua release is 5.2.3. 70 | # 71 | # Use the interpreter path as a reference point when searching for 72 | # headers. $(dirname ${LUA_PATH})/../include is a very likely location 73 | # as bindir and includedir have the same prefix in most installations. 74 | # 75 | # * 2015-01-15 - Quote more command names and arguments. Still need to 76 | # handle space characters in code that employs command substitution. I 77 | # think we could handle all whitespace characters, including newlines, 78 | # by using a control character in IFS and using --exec printf "%s\1" {} 79 | # rather than -print with find(1). 80 | # 81 | # * 2015-01-19 - Add fix for LuaJIT's default package.cpath, which tends to 82 | # hardcode /usr/local/lib/lua/5.1, ordered before the LuaJIT 83 | # installation prefix. 84 | # 85 | # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 86 | # 87 | # Copyright (C) 2012-2015 William Ahern 88 | # 89 | # Permission is hereby granted, free of charge, to any person obtaining a 90 | # copy of this software and associated documentation files (the "Software"), 91 | # to deal in the Software without restriction, including without limitation 92 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 93 | # and/or sell copies of the Software, and to permit persons to whom the 94 | # Software is furnished to do so, subject to the following conditions: 95 | # 96 | # The above copyright notice and this permission notice shall be included in 97 | # all copies or substantial portions of the Software. 98 | # 99 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 100 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 101 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 102 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 103 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 104 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 105 | # DEALINGS IN THE SOFTWARE. 106 | # 107 | set -e # strict errors 108 | set -u # don't expand unbound variables 109 | set -f # disable pathname expansion 110 | set -C # noclobber 111 | \unalias -a # no command surprises 112 | export LC_ALL=C # no locale headaches 113 | unset IFS # no field splitting surprises 114 | : ${TMPDIR:=/tmp} # sane TMPDIR 115 | : ${CC:=cc} 116 | unset LUA_PATH || true # interferes search for module install directory 117 | unset LUA_CPATH || true 118 | 119 | MYVERSION=20150119 120 | MYVENDOR="william@25thandClement.com" 121 | 122 | 123 | DEVRANDOM=/dev/urandom 124 | SANDBOX="${TMPDIR}/${0##*/}-" 125 | 126 | CPPDIRS= # -I directories from CPPFLAGS 127 | INCDIRS= 128 | LDDIRS= # -L directories from LDFLAGS 129 | LIBDIRS= 130 | BINDIRS= 131 | RECURSE=no 132 | MAXDEPTH= # full command switch, like "-maxdepth 3", if supported 133 | XDEV= # do not cross device boundaries; i.e. "-xdev" 134 | SHORTEST= # continue searching until shortest pathname found 135 | PKGCONFIG= # path to pkg-config, found by `command -v` when -k option invoked 136 | GLOB= # -e GLOB expression for lua, luac, ldir, and cdir 137 | 138 | GLOB_LUA="lua:lua[5-9]*:lua-[5-9]*:luajit*" 139 | GLOB_LUAC="luac:luac[5-9]*:luac-[5-9]*" 140 | 141 | API_MIN=500 142 | API_MAX=999 143 | API_VER= 144 | API_DIR= 145 | 146 | JIT_REQ= 147 | JIT_MIN=20000 148 | JIT_MAX=99999 149 | JIT_VER= 150 | JIT_DIR= 151 | 152 | LIBLUA_VER= 153 | LIBLUA_DIR= 154 | LIBLUA_LIB= 155 | 156 | LIBJIT_VER= 157 | LIBJIT_DIR= 158 | LIBJIT_LIB= 159 | 160 | LUAC_PATH= 161 | LUAC_VER= 162 | 163 | LUA_PATH= 164 | LUA_VER= 165 | 166 | 167 | # 168 | # parse CPPFLAGS -I or LDFLAGS -L directories 169 | # 170 | xdirs() { 171 | OPTC="${1:-I}" 172 | DIRS= 173 | 174 | set -- ${2:-} 175 | 176 | while [ $# -gt 0 ]; do 177 | case "${1}" in 178 | -${OPTC}) 179 | shift 180 | 181 | if [ -n "${1:-}" ]; then 182 | DIRS="${DIRS}${DIRS:+:}${1}" 183 | fi 184 | 185 | ;; 186 | -${OPTC}*) 187 | if [ "${1}" != "-${OPTC}" ]; then 188 | DIRS="${DIRS}${DIRS:+:}${1#-${OPTC}}" 189 | fi 190 | 191 | ;; 192 | esac 193 | 194 | shift 195 | done 196 | 197 | printf -- "${DIRS}" 198 | } 199 | 200 | idirs() { 201 | xdirs "I" "${1:-}" 202 | } 203 | 204 | ldirs() { 205 | xdirs "L" "${1:-}" 206 | } 207 | 208 | # count ":"-delimited substrings 209 | count() { 210 | IFS=: 211 | set -- ${1:-} 212 | unset IFS 213 | 214 | printf "$#" 215 | } 216 | 217 | # append to ":"-delimited string variable 218 | append() { 219 | NAME=${1} 220 | eval VALUE="\${${NAME}}" 221 | shift 222 | 223 | IFS=: 224 | TMP="$*" 225 | 226 | IFS="\n" 227 | read -r "${NAME}" <<-EOF 228 | ${VALUE:-}${VALUE:+:}${TMP} 229 | EOF 230 | unset IFS 231 | } 232 | 233 | # 234 | # evalmacro PATH MACRO [REGEX] [SUBST] 235 | # 236 | # PATH Header identifier--#include 237 | # MACRO Macro identifier 238 | # REGEX Optional regex pattern to match macro evaluation result 239 | # SUBST Optional replacement expression 240 | # 241 | evalmacro() { 242 | printf "#include <$1>\n[===[$2]===]\n" \ 243 | | "${CC:-cc}" ${CPPFLAGS:-} -E - 2>>/dev/null \ 244 | | sed -ne " 245 | s/^.*\\[===\\[ *\\(${3:-.*}\\) *\\]===\\].*$/${4:-\\1}/ 246 | t Found 247 | d 248 | :Found 249 | p 250 | q 251 | " 252 | } 253 | 254 | 255 | # 256 | # testsym PATH NAME 257 | # 258 | # Test whether global symbol NAME exists in object file at PATH. Exits with 259 | # 0 (true) when found, non-0 (false) otherwise. 260 | # 261 | testsym() { 262 | # NOTE: No -P for OpenBSD nm(1), but the default output format is 263 | # close enough. Section types always have a leading and trailing 264 | # space. U section type means undefined. On AIX [VWZ] are weak 265 | # global symbols. Solaris and OS X have additional symbol types 266 | # beyond the canonical POSIX/BSD types, all of which are uppercase 267 | # and within [A-T]. 268 | (nm -Pg ${1} 2>>/dev/null || nm -g 2>>/dev/null) \ 269 | | sed -ne '/ [A-T] /p' \ 270 | | grep -qE "${2}" 271 | } 272 | 273 | 274 | tryluainclude() { 275 | V="$(evalmacro "${1}" LUA_VERSION_NUM '[0123456789][0123456789]*')" 276 | : ${V:=0} 277 | 278 | if [ "${1%/*}" != "${1}" ]; then 279 | D="${1%/*}" 280 | 281 | # cleanup after Solaris directory prune trick 282 | if [ "${D##*/./}" != "${D}" ]; then 283 | D="${D%%/./*}/${D##*/./}" 284 | else 285 | D="${D%/.}" 286 | fi 287 | else 288 | D= 289 | fi 290 | 291 | [ "$V" -gt 0 -a "$V" -ge "${API_VER:-0}" ] || return 0 292 | 293 | [ "$V" -gt "${API_VER:-0}" -o "${#D}" -lt "${#API_DIR}" -o \( "${JIT_REQ}" = "yes" -a "${JIT_VER:-0}" -lt "${JIT_MAX}" \) ] || return 0 294 | 295 | [ "$V" -ge "${API_MIN}" -a "$V" -le "${API_MAX}" ] || return 0 296 | 297 | if [ -n "${JIT_REQ}" ]; then 298 | J="$(evalmacro "${1%%lua.h}luajit.h" LUAJIT_VERSION_NUM '[0123456789][0123456789]*')" 299 | : ${J:=0} 300 | 301 | if [ "${JIT_REQ}" = "skip" ]; then 302 | [ "${J}" -eq 0 ] || return 0 303 | elif [ "${JIT_REQ}" = "yes" ]; then 304 | [ "$J" -ge "${JIT_VER:-0}" ] || return 0 305 | [ "$J" -gt "${JIT_VER:-0}" -o "${#D}" -lt "${#JIT_DIR}" ] || return 0 306 | [ "$J" -ge ${JIT_MIN} ] || return 0 307 | [ "$J" -le "${JIT_MAX}" ] || return 0 308 | 309 | JIT_VER="$J" 310 | JIT_DIR="$D" 311 | fi 312 | fi 313 | 314 | API_VER="$V" 315 | API_DIR="$D" 316 | } 317 | 318 | 319 | # 320 | # foundversion 321 | # 322 | # true if found the best (maximum) possible version, false otherwise 323 | # 324 | foundversion() { 325 | if [ "${API_VER:-0}" -lt "${API_MAX}" ]; then 326 | return 1 327 | fi 328 | 329 | if [ "${JIT_REQ}" = "yes" -a "${JIT_VER:-0}" -lt "${JIT_MAX}" ]; then 330 | return 1 331 | fi 332 | 333 | if [ "${SHORTEST}" = "yes" ]; then 334 | return 1 335 | fi 336 | 337 | return 0 338 | } 339 | 340 | 341 | # 342 | # luapc 343 | # 344 | # wrapper around `pkg-config ... LIB`, where LIB is derived by 345 | # searching for all libraries with "lua" in the name that have a 346 | # --modversion equal to the release version printed by ${LUA_PATH} -v. 347 | # 348 | LUAPC_LIB= 349 | 350 | luapc() { 351 | [ -n "${LUA_PATH}" ] || return 0 352 | 353 | [ -n "${PKGCONFIG}" ] || return 0 354 | 355 | # find pkg-config library name 356 | if [ -z "${LUAPC_LIB}" ]; then 357 | V="$("${LUA_PATH}" -v &1 | head -n1 | sed -ne 's/^Lua[^ ]* \([0123456789][0123456789]*\(\.[0123456789][0123456789]*\)*\).*/\1/p')" 358 | 359 | [ -n "${V}" ] || return 0 360 | 361 | V_N=$(mmp2num "${V}") 362 | 363 | for LIB in $("${PKGCONFIG}" --list-all >/dev/null | sed -ne 's/^\(lua[^ ]*\).*/\1/p'); do 364 | M="$("${PKGCONFIG}" --modversion ${LIB} || true)" 365 | 366 | # break immediately on exact match 367 | if [ "${V}" = "${M}" ]; then 368 | LUAPC_LIB="${LIB}" 369 | 370 | break 371 | fi 372 | 373 | # 374 | # NOTE: On Ubuntu 14.04 pkg-config --modversion 375 | # lua5.2 prints 5.2.0 even though the release 376 | # version is 5.2.3 (what lua5.2 -v prints). 377 | # 378 | # If the major.minor components match, then 379 | # tentatively use that package name. 380 | # 381 | M_N=$(mmp2num "${M}" 0 0 0) 382 | 383 | if [ "$((${V_N} / 100))" -eq "$((${M_N} / 100))" ]; then 384 | LUAPC_LIB="${LIB}" 385 | fi 386 | done 387 | 388 | [ -n "${LUAPC_LIB}" ] || return 0 389 | fi 390 | 391 | ${PKGCONFIG} "$@" "${LUAPC_LIB}" >/dev/null || true 392 | } 393 | 394 | 395 | # 396 | # findinstalldir package.path|package.cpath [preferred-path ...] 397 | # 398 | findinstalldir() { 399 | V_DIR=$((${LUA_VER} / 100 % 100)).$((${LUA_VER} % 100)) 400 | 401 | if [ "${1}" = "package.cpath" ]; then 402 | DIR="$(luapc --variable INSTALL_CMOD)" 403 | [ -n "${DIR}" ] && set -- "$@" "${DIR}" 404 | 405 | DIR="$(luapc --variable INSTALL_LIB)" 406 | [ -n "${DIR}" ] && set -- "$@" "${DIR}/lua/${V_DIR}" 407 | 408 | DIR="$(luapc --variable libdir)" 409 | [ -n "${DIR}" ] && set -- "$@" "${DIR}/lua/${V_DIR}" 410 | 411 | DIR="$(luapc --variable prefix)" 412 | [ -n "${DIR}" ] && set -- "$@" "${DIR}/lib/lua/${V_DIR}" 413 | 414 | # LuaJIT installations tend to include 415 | # /usr/local/lib/lua/5.1 as one of the first paths, ordered 416 | # before the LuaJIT installation prefix, and regardless of 417 | # whether there exists a /usr/local/lib/lua/5.1. 418 | set -- "$@" "${LUA_PATH}/../../lib/lua/${V_DIR}" 419 | set -- "$@" "${LUA_PATH}/../../lib/*/lua/${V_DIR}" # e.g. lib/x86_64-linux-gnu 420 | else 421 | DIR="$(luapc --variable INSTALL_LMOD)" 422 | [ -n "${DIR}" ] && set -- "$@" "${DIR}" 423 | 424 | DIR="$(luapc --variable prefix)" 425 | [ -n "${DIR}" ] && set -- "$@" "${DIR}/share/lua/${V_DIR}" 426 | 427 | # See above LuaJIT note. Although the built-in package.path 428 | # usually orders the LuaJIT installation prefix first. 429 | set -- "$@" "${LUA_PATH}/../../share/lua/${V_DIR}" 430 | fi 431 | 432 | ARRAY="${1}" 433 | shift 434 | 435 | if [ $# -eq 0 ]; then 436 | set -- "/nonexistent" # cannot expand empty $@ on some implementations 437 | fi 438 | 439 | "${LUA_PATH}" - "$@" <<-EOF 440 | -- 441 | -- actual pkg-config variable on Ubuntu 14.04 442 | -- 443 | -- /usr//share/lua/5.1 444 | -- 445 | local function fixpath(path) 446 | local stack = { path:match"^/" and "" or "." } 447 | 448 | for ent in path:gmatch"([^/]+)" do 449 | if ent == ".." and #stack > 1 then 450 | stack[#stack] = nil 451 | elseif ent ~= "." then 452 | stack[#stack + 1] = ent 453 | end 454 | end 455 | 456 | return table.concat(stack, "/") 457 | end 458 | 459 | local function topattern(path) 460 | if string.match(path, "*") then 461 | path = string.gsub(path, "%%", "%%") 462 | return string.gsub(path, "*", "[^/]+") 463 | end 464 | end 465 | 466 | local dirs = { } 467 | 468 | for dir in ${ARRAY}:gmatch"([^;?]+)/" do 469 | dir = fixpath(dir) 470 | 471 | if dir ~= "." then 472 | dirs[#dirs + 1] = dir 473 | end 474 | end 475 | 476 | for _, arg in ipairs{ ... } do 477 | arg = fixpath(arg) 478 | 479 | local pat = topattern(arg) 480 | 481 | for _, dir in ipairs(dirs) do 482 | if arg == dir then 483 | print(dir) 484 | os.exit(0) 485 | elseif pat and string.match(dir, pat) then 486 | print(dir) 487 | os.exit(0) 488 | end 489 | end 490 | end 491 | 492 | if dirs[1] then 493 | print(dirs[1]) 494 | os.exit(0) 495 | else 496 | os.exit(1) 497 | end 498 | EOF 499 | } 500 | 501 | 502 | # 503 | # findversion 504 | # 505 | findversion() { 506 | tryluainclude "lua.h" 507 | 508 | if foundversion; then 509 | return 0 510 | fi 511 | 512 | 513 | # iterate through CPPFLAGS to probe different precedence 514 | if [ "${API_VER:-0}" -lt "${API_MAX}" ]; then 515 | IFS=: 516 | set -- ${CPPDIRS} 517 | unset IFS 518 | 519 | if [ $# -gt 0 ]; then 520 | for D; do 521 | tryluainclude "${D}/lua.h" 522 | 523 | if foundversion; then 524 | return 0 525 | fi 526 | done 527 | fi 528 | fi 529 | 530 | 531 | if [ -n "${PKGCONFIG}" ]; then 532 | PKGFLAGS="$("${PKGCONFIG}" --list-all >/dev/null | sed -ne 's/^\(lua[^ ]*\).*/\1/p' | xargs -- ${PKGCONFIG} --cflags 2>>/dev/null | cat)" 533 | PKGDIRS="$(idirs "${PKGFLAGS}")" 534 | 535 | IFS=: 536 | set -- ${PKGDIRS} 537 | unset IFS 538 | 539 | if [ $# -gt 0 ]; then 540 | for D; do 541 | tryluainclude "${D}/lua.h" 542 | 543 | if foundversion; then 544 | return 0 545 | fi 546 | done 547 | fi 548 | fi 549 | 550 | 551 | IFS=: 552 | set -- ${INCDIRS} 553 | unset IFS 554 | 555 | if [ $# -gt 0 ]; then 556 | for D; do 557 | tryluainclude "${D}/lua.h" 558 | 559 | if foundversion; then 560 | return 0 561 | fi 562 | done 563 | fi 564 | 565 | 566 | if [ "${RECURSE}" != "yes" ]; then 567 | [ "${API_VER:-0}" -gt 0 ] 568 | return $? 569 | fi 570 | 571 | 572 | # recurse into CPPDIRS 573 | IFS=: 574 | set -- ${CPPDIRS} 575 | unset IFS 576 | 577 | if [ $# -gt 0 ]; then 578 | for D; do 579 | for F in $(find "${D}" ${MAXDEPTH} ${XDEV} -name lua.h -print 2>>/dev/null); do 580 | tryluainclude "${F}" 581 | 582 | if foundversion; then 583 | return 0 584 | fi 585 | done 586 | done 587 | fi 588 | 589 | 590 | # recurse into INCDIRS 591 | IFS=: 592 | set -- ${INCDIRS} 593 | unset IFS 594 | 595 | if [ $# -gt 0 ]; then 596 | for D; do 597 | for F in $(find "${D}/." ${MAXDEPTH} ${XDEV} -name lua.h -print 2>>/dev/null); do 598 | tryluainclude "${F}" 599 | 600 | if foundversion; then 601 | return 0 602 | fi 603 | done 604 | done 605 | fi 606 | 607 | 608 | # if we can find the lua interpreter, use it as a reference for 609 | # header locations. 610 | if findlua; then 611 | D="${LUA_PATH%/*}" 612 | D="${D%/*}/include" 613 | 614 | if [ -d "${D}" ]; then 615 | for F in $(find "${D}" ${MAXDEPTH} ${XDEV} -name lua.h -print 2>>/dev/null); do 616 | tryluainclude "${F}" 617 | 618 | if foundversion; then 619 | return 0 620 | fi 621 | done 622 | fi 623 | fi 624 | 625 | [ "${API_VER:-0}" -gt 0 ] 626 | } 627 | 628 | 629 | # 630 | # Unlike API version checking, this is less likely to be accurately forward 631 | # compatible. 632 | # 633 | trylib() { 634 | if ! testsym "${1}" "lua_newstate"; then 635 | return 0 636 | fi 637 | 638 | V=0 639 | J=0 640 | D= 641 | F="${1##*/}" 642 | L= 643 | 644 | if [ "${1%/*}" != "${1}" ]; then 645 | D="${1%/*}" 646 | 647 | # cleanup after Solaris directory prune trick 648 | if [ "${D##*/./}" != "${D}" ]; then 649 | D="${D%%/./*}/${D##*/./}" 650 | else 651 | D="${D%/.}" 652 | fi 653 | fi 654 | 655 | L="${F#lib}" 656 | L="${L%.so}" 657 | L="${L%.a}" 658 | L="${L%.dylib}" 659 | 660 | 661 | # FIXME: need more versioning tests 662 | if testsym "${1}" "lua_getfenv"; then 663 | V=501 664 | elif testsym "${1}" "lua_yieldk"; then 665 | V=502 666 | else 667 | return 0 668 | fi 669 | 670 | [ "$V" -gt 0 -a "$V" -ge "${LIBLUA_VER:-0}" ] || return 0 671 | 672 | [ "$V" -gt "${LIBLUA_VER:-0}" -o "${#D}" -lt "${#LIBLUA_DIR}" -o \( "${JIT_REQ}" = "yes" -a "${LIBJIT_VER:-0}" -lt "${JIT_MAX}" \) ] || return 0 673 | 674 | [ "$V" -ge "${API_MIN}" -a "$V" -le "${API_MAX}" ] || return 0 675 | 676 | 677 | if [ -n "${JIT_REQ}" ]; then 678 | # FIXME: need more versioning tests 679 | if testsym "${1}" "luaopen_jit"; then 680 | J=20000 681 | fi 682 | 683 | if [ "${JIT_REQ}" = "skip" ]; then 684 | [ "${J}" -eq 0 ] || return 0 685 | elif [ "${JIT_REQ}" = "yes" ]; then 686 | [ "$J" -ge "${LIBJIT_VER:-0}" ] || return 0 687 | [ "$J" -gt "${LIBJIT_VER:-0}" -o "${#D}" -lt "${#LIBJIT_DIR}" ] || return 0 688 | [ "$J" -ge ${JIT_MIN} ] || return 0 689 | [ "$J" -le "${JIT_MAX}" ] || return 0 690 | 691 | LIBJIT_VER="$J" 692 | LIBJIT_DIR="$D" 693 | LIBJIT_LIB="$L" 694 | fi 695 | fi 696 | 697 | LIBLUA_VER="$V" 698 | LIBLUA_DIR="$D" 699 | LIBLUA_LIB="$L" 700 | } 701 | 702 | 703 | # 704 | # foundlib 705 | # 706 | # true if found the best (maximum) possible version, false otherwise 707 | # 708 | foundlib() { 709 | if [ "${LIBLUA_VER:-0}" -lt "${API_MAX}" ]; then 710 | return 1 711 | fi 712 | 713 | if [ "${JIT_REQ}" = "yes" -a "${LIBJIT_VER:-0}" -lt "${JIT_MAX}" ]; then 714 | return 1 715 | fi 716 | 717 | if [ "${SHORTEST}" = "yes" ]; then 718 | return 1 719 | fi 720 | 721 | return 0 722 | } 723 | 724 | 725 | findlib() { 726 | if [ -n "${PKGCONFIG}" ]; then 727 | PKGFLAGS="$("${PKGCONFIG}" --list-all >/dev/null | sed -ne 's/^\(lua[^ ]*\).*/\1/p' | xargs -- ${PKGCONFIG} --libs 2>>/dev/null | cat)" 728 | PKGDIRS="$(ldirs "${PKGFLAGS}")" 729 | PKGDIRS="${PKGDIRS}${PKGDIRS:+:}/lib:/usr/lib:/usr/local/lib" 730 | NUMDIRS="$(count "${PKGDIRS}")" 731 | PKGLIBS="$(xdirs "l" "${PKGFLAGS}")" 732 | NUMLIBS="$(count "${PKGLIBS}")" 733 | ALLDIRS="${PKGDIRS}${PKGLIBS:+:}${PKGLIBS}" 734 | 735 | IFS=: 736 | set -- ${ALLDIRS} 737 | unset IFS 738 | 739 | I=1 740 | while [ $I -le ${NUMDIRS} ]; do 741 | K=$((1 + ${NUMDIRS})) 742 | while [ $K -le $# ]; do 743 | findlib_L=$(eval "printf \${$I}") 744 | findlib_l=$(eval "printf \${$K}") 745 | 746 | #printf -- "I=$I K=$K $findlib_L/lib$findlib_l*.*\n" 747 | 748 | for findlib_R in no ${RECURSE}; do 749 | for findlib_lib in $(findpath "lib${findlib_l}*.*" ${findlib_R} "${findlib_L}"); do 750 | trylib "${findlib_lib}" 751 | done 752 | 753 | if foundlib; then 754 | return 0 755 | fi 756 | done 757 | 758 | K=$(($K + 1)) 759 | done 760 | I=$(($I + 1)) 761 | done 762 | fi 763 | 764 | ALLDIRS="${LDDIRS}${LDDIRS:+:}${LIBDIRS}" 765 | 766 | IFS=: 767 | set -- ${ALLDIRS} 768 | unset IFS 769 | 770 | for findlib_D; do 771 | for findlib_R in no ${RECURSE}; do 772 | for findlib_lib in $(findpath "liblua*.*" ${findlib_R} "${findlib_D}"); do 773 | trylib "${findlib_lib}" 774 | done 775 | 776 | if foundlib; then 777 | return 0 778 | fi 779 | done 780 | done 781 | } 782 | 783 | 784 | findpath() { 785 | NAME="$1" 786 | WHERE="$3" 787 | 788 | PRUNE= 789 | 790 | if [ "${2}" = "no" ]; then 791 | PRUNE="-name . -o -type d -prune -o" 792 | fi 793 | 794 | [ ${#WHERE} -gt 0 ] || return 0 795 | 796 | IFS=: 797 | set -- ${WHERE} 798 | unset IFS 799 | 800 | if [ $# -gt 0 ]; then 801 | for findpath_D; do 802 | find "${findpath_D}/." ${MAXDEPTH} ${XDEV} ${PRUNE} -name "${NAME}" -print 2>>/dev/null | sed -e 's/\/\.//' 803 | done 804 | fi 805 | } 806 | 807 | 808 | # check setuid and setgid mode 809 | safeperm() { 810 | [ -f "$1" -a ! -u "$1" -a ! -g "$1" ] 811 | } 812 | 813 | 814 | findluac() { 815 | if [ $# -eq 0 ]; then 816 | IFS=: 817 | set -- ${GLOB:-${GLOB_LUAC}} 818 | unset IFS 819 | fi 820 | 821 | while [ $# -gt 0 ]; do 822 | for F in $(findpath "${1}" no "${PATH}"; findpath "${1}" "${RECURSE}" "${BINDIRS}"); do 823 | [ -x "$F" ] && safeperm "$F" || continue 824 | 825 | V="$("$F" -v &1 | head -n1 | sed -ne 's/^Lua \([0123456789][0123456789]*\.[0123456789][0123456789]*\).*/\1/p')" 826 | : ${V:=0} 827 | V="$((${V%%.*} * 100 + ${V##*.} % 100))" 828 | 829 | [ "${V}" -gt 0 -a "${V}" -ge "${LUAC_VER:-0}" ] || continue 830 | 831 | [ "${V}" -gt "${LUAC_VER:-0}" -o "${#F}" -lt "${#LUAC_PATH}" ] || continue 832 | 833 | [ "${V}" -ge "${API_MIN}" -a "${V}" -le "${API_MAX}" ] || continue 834 | 835 | printf "return true" 2>>/dev/null | ${F} -p - >/dev/null 2>&1 || continue 836 | 837 | LUAC_PATH="$F" 838 | LUAC_VER="$V" 839 | 840 | [ "${SHORTEST}" = "yes" -o "${LUAC_VER}" -lt "${API_MAX}" ] || break 2 841 | done 842 | 843 | shift 844 | done 845 | 846 | if [ -n "${LUAC_PATH}" -a -n "${LUAC_VER}" ]; then 847 | return 0 848 | else 849 | return 1 850 | fi 851 | } 852 | 853 | 854 | isinteger() { 855 | I="${1}" 856 | 857 | [ "${#I}" -gt 0 ] || return 1 858 | 859 | while [ "${#I}" -gt 0 ]; do 860 | if [ "${I##[0123456789]}" = "${I}" ]; then 861 | return 1 862 | fi 863 | 864 | I=${I##[0123456789]} 865 | done 866 | 867 | return 0 868 | } 869 | 870 | 871 | checkints() { 872 | while [ $# -gt 0 ]; do 873 | if ! isinteger "${1}"; then 874 | printf -- "${0##*/}: ${1}: not a number\n" >&2 875 | return 1 876 | fi 877 | 878 | shift 879 | done 880 | } 881 | 882 | 883 | # Only major.minor for matching LUA_VERSION_NUM in lua.h. Also, _VERSION 884 | # only includes major.minor. 885 | lua2num() { 886 | M=0 887 | m="${2:-0}" 888 | 889 | IFS=. 890 | set -- ${1} 891 | unset IFS 892 | 893 | M=${1:-${M}} 894 | m=${2:-${m}} 895 | 896 | checkints $M $m 897 | 898 | printf "$((${M} * 100 + ${m}))\n" 899 | } 900 | 901 | 902 | # All major.minor.patch for matching LUAJIT_VERSION_NUM in luajit.h. 903 | jit2num() { 904 | M=0 905 | m="${2:-0}" 906 | p="${3:-0}" 907 | 908 | IFS=. 909 | set -- ${1} 910 | unset IFS 911 | 912 | M=${1:-${M}} 913 | m=${2:-${m}} 914 | p=${3:-${p}} 915 | 916 | checkints $M $m $p 917 | 918 | printf "$((${M} * 10000 + ${m} * 100 + ${p}))\n" 919 | } 920 | 921 | 922 | mmp2num() { 923 | M="${2:-0}" 924 | m="${3:-0}" 925 | p="${4:-0}" 926 | 927 | IFS=".+-_" 928 | set -- ${1} 929 | unset IFS 930 | 931 | if isinteger "${1:-}"; then 932 | M=${1} 933 | fi 934 | 935 | if isinteger "${2:-}"; then 936 | m=${2} 937 | fi 938 | 939 | if isinteger "${3:-}"; then 940 | p=${3} 941 | fi 942 | 943 | checkints $M $m $p 944 | 945 | printf "$((${M} * 10000 + ${m} * 100 + ${p}))\n" 946 | } 947 | 948 | 949 | findlua() { 950 | if [ $# -eq 0 ]; then 951 | IFS=: 952 | set -- ${GLOB:-${GLOB_LUA}} 953 | unset IFS 954 | fi 955 | 956 | while [ $# -gt 0 ]; do 957 | for F in $(findpath "${1}" no "${PATH}"; findpath "${1}" "${RECURSE}" "${BINDIRS}"); do 958 | [ -x "$F" ] && safeperm "$F" || continue 959 | 960 | V="$("$F" -e 'print(string.match(_VERSION, [[[%d.]+]]))' >/dev/null | head -n1 | sed -ne 's/^\([0123456789][0123456789]*\.[0123456789][0123456789]*\).*/\1/p')" 961 | : ${V:=0} 962 | V="$((${V%%.*} * 100 + ${V##*.} % 100))" 963 | 964 | [ "${V}" -gt 0 -a "${V}" -ge "${LUA_VER:-0}" ] || continue 965 | 966 | [ "${V}" -gt "${LUA_VER:-0}" -o "${#F}" -lt "${#LUA_PATH}" ] || continue 967 | 968 | [ "${V}" -ge "${API_MIN}" -a "${V}" -le "${API_MAX}" ] || continue 969 | 970 | if [ -n "${JIT_REQ}" ]; then 971 | J="$("$F" -v &1 | head -n1 | sed -ne 's/^LuaJIT \([0123456789][0123456789]*\.[0123456789][0123456789]*\.[0123456789][0123456789]*\).*/\1/p')" 972 | J="$(jit2num ${J:-0})" 973 | 974 | if [ "${JIT_REQ}" = "skip" ]; then 975 | [ "${J}" -eq 0 ] || continue 976 | elif [ "${JIT_REQ}" = "yes" ]; then 977 | [ "${J}" -gt 0 ] || continue 978 | [ "${J}" -ge "${JIT_MIN}" ] || continue 979 | [ "${J}" -le "${JIT_MAX}" ] || continue 980 | fi 981 | fi 982 | 983 | LUA_PATH="$F" 984 | LUA_VER="$V" 985 | 986 | [ "${SHORTEST}" = "yes" -o "${LUA_VER}" -lt "${API_MAX}" ] || break 2 987 | done 988 | 989 | shift 990 | done 991 | 992 | if [ -n "${LUA_PATH}" -a -n "${LUA_VER}" ]; then 993 | return 0 994 | else 995 | return 1 996 | fi 997 | } 998 | 999 | 1000 | ccname() { 1001 | "${CC}" -E - <<-EOF | awk '/sunpro/||/clang/||/gcc/||/other/{ print $1; exit; }' 1002 | #if defined __SUNPRO_C 1003 | sunpro 1004 | #elif defined __clang__ 1005 | clang 1006 | #elif defined __GNUC__ 1007 | gcc 1008 | #else 1009 | other 1010 | #endif 1011 | EOF 1012 | } 1013 | 1014 | 1015 | usage() { 1016 | cat <<-EOF 1017 | usage: ${0##*/} [-I:L:P:d:De:krm:xsv:j:JVh] cppflags|version|lua|luac|... 1018 | -I PATH additional search directory for includes 1019 | -L PATH additional search directory for libraries 1020 | -P PATH additional search directory for binaries 1021 | -d PATH use PATH as sandbox directory; a random 16 byte suffix is 1022 | generated from /dev/urandom and the directory removed on exit 1023 | unless a trailing "/" is present 1024 | (default sandbox is \$TMPDIR/${0##*/}-XXXXXXXXXXXXXXXX) 1025 | -D do not create a sandbox 1026 | -e GLOB glob pattern for finding utilities (lua, luac, etc) 1027 | -k query pkg-config if available 1028 | -r recursively search directories 1029 | -m MAXDEPTH limit recursion to MAXDEPTH (only for GNU and BSD find) 1030 | -x do not cross device mounts when recursing 1031 | -s find shortest pathname, otherwise print first best match 1032 | -v VERSION require specific Lua version or range 1033 | (e.g. "5.1" or "5.1-5.2") 1034 | -j VERSION require specific LuaJIT version or range 1035 | (e.g. "2.0.1"; empty ranges like "-" force any LuaJIT version) 1036 | -J skip LuaJIT if encountered 1037 | -V print this script's version information 1038 | -h print this usage message 1039 | 1040 | cppflags print derived additional CPPFLAGS necessary 1041 | ldflags print derived additional LDFLAGS necessary (TODO) 1042 | version print derived Lua API version 1043 | luac print path to luac utility ($(printf "${GLOB_LUA}" | tr ':' ' ')) 1044 | lua print path to lua interpreter ($(printf "${GLOB_LUAC}" | tr ':' ' ')) 1045 | package.path print preferred module install path 1046 | package.cpath print preferred C module install path 1047 | ccname print CC name (e.g. sunpro, clang, gcc) 1048 | evalmacro run internal macro evaluator for debugging 1049 | testsym run internal library symbol reader for debugging 1050 | 1051 | This utility is used to derive compiler flags and filesystem paths 1052 | necessary to utilize Lua, LuaJIT, and particular versions thereof. 1053 | On success it prints the requested information and exits with 0, 1054 | otherwise it fails with an exit status of 1. 1055 | 1056 | Note that cppflags may not print anything if no additional flags are 1057 | required to compile against the requested API version. 1058 | 1059 | When searching, the highest Lua version is preferred. Searching 1060 | stops once the highest version in the allowable range is found 1061 | unless the -s flag is specified. 1062 | 1063 | LuaJIT is treated like any other Lua installation. If an explicit 1064 | LuaJIT version or range is specified, then only LuaJIT installations 1065 | will match. To exclude LuaJIT entirely use the -J switch. 1066 | 1067 | This utility processes the environment variables CC, CPPFLAGS, 1068 | LDFLAGS, and PATH if present. If recursion is requested, then 1069 | directories specified in CPPFLAGS, LDFLAGS, and PATH are also 1070 | recursed. 1071 | 1072 | If the environment variable CPPFLAGS is empty and no -I options are 1073 | specified directly, then /usr/include and /usr/local/include are 1074 | used when probing for cppflags and API version. 1075 | 1076 | Report bugs to 1077 | EOF 1078 | } 1079 | 1080 | 1081 | version() { 1082 | cat <<-EOF 1083 | luapath $MYVERSION 1084 | vendor $MYVENDOR 1085 | release $MYVERSION 1086 | EOF 1087 | } 1088 | 1089 | 1090 | while getopts I:L:P:d:De:krm:xsv:j:JVh OPT; do 1091 | case "${OPT}" in 1092 | I) 1093 | INCDIRS="${INCDIRS:-}${INCDIRS:+:}${OPTARG}" 1094 | ;; 1095 | L) 1096 | LIBDIRS="${LIBDIRS:-}${LIBDIRS:+:}${OPTARG}" 1097 | ;; 1098 | P) 1099 | BINDIRS="${BINDIRS:-}${BINDIRS:+:}${OPTARG}" 1100 | ;; 1101 | d) 1102 | SANDBOX="${OPTARG}" 1103 | ;; 1104 | D) 1105 | SANDBOX= 1106 | ;; 1107 | e) 1108 | GLOB="${GLOB:-}${GLOB:+:}${OPTARG}" 1109 | ;; 1110 | k) 1111 | PKGCONFIG="$(command -v pkg-config || true)" 1112 | ;; 1113 | r) 1114 | RECURSE=yes 1115 | ;; 1116 | m) 1117 | if [ -n "${OPTARG##[0123456789]}" ]; then 1118 | printf -- "${0##*/}: ${OPTARG}: invalid maxdepth\n" >&2 1119 | exit 1 1120 | fi 1121 | 1122 | if find "${TMPDIR:-/tmp}" -maxdepth ${OPTARG} -prune >>/dev/null 2>&1; then 1123 | MAXDEPTH="-maxdepth ${OPTARG}" 1124 | else 1125 | printf -- "${0##*/}: $(command -v find): -maxdepth unsupported\n" >&2 1126 | fi 1127 | 1128 | ;; 1129 | x) 1130 | XDEV="-xdev" 1131 | ;; 1132 | s) 1133 | SHORTEST=yes 1134 | ;; 1135 | v) 1136 | MIN=${OPTARG%%[,:-]*} 1137 | MAX=${OPTARG##*[,:-]} 1138 | 1139 | API_MIN="$(lua2num ${MIN:-0} 0)" 1140 | API_MAX="$(lua2num ${MAX:-99} 99)" 1141 | 1142 | if [ "${API_MIN}" -gt "${API_MAX}" ]; then 1143 | printf -- "${0##*/}: ${OPTARG}: invalid version range\n" >&2 1144 | exit 1 1145 | fi 1146 | 1147 | ;; 1148 | j) 1149 | MIN=${OPTARG%%[,:-]*} 1150 | MAX=${OPTARG##*[,:-]} 1151 | 1152 | JIT_MIN="$(jit2num ${MIN:-0} 0 0)" 1153 | JIT_MAX="$(jit2num ${MAX:-99} 99 99)" 1154 | 1155 | if [ "${JIT_MIN}" -gt "${JIT_MAX}" ]; then 1156 | printf -- "${0##*/}: ${OPTARG}: invalid version range\n" >&2 1157 | exit 1 1158 | fi 1159 | 1160 | JIT_REQ=yes 1161 | ;; 1162 | J) 1163 | JIT_REQ=skip 1164 | ;; 1165 | V) 1166 | version 1167 | exit 0 1168 | ;; 1169 | h) 1170 | usage 1171 | exit 0 1172 | ;; 1173 | *) 1174 | usage >&2 1175 | exit 1 1176 | ;; 1177 | esac 1178 | done 1179 | 1180 | shift $(($OPTIND - 1)) 1181 | 1182 | 1183 | for U in "${CC:-cc}" find grep od rm rmdir sed xargs; do 1184 | if ! command -v "${U}" >>/dev/null 2>&1; then 1185 | printf -- "${0##*/}: ${U}: command not found\n" >&2 1186 | fi 1187 | done 1188 | 1189 | 1190 | if [ -n "${SANDBOX}" ]; then 1191 | if [ "${SANDBOX}" = "${SANDBOX%/}" ]; then 1192 | if [ ! -c "${DEVRANDOM}" ]; then 1193 | # TODO: expand DEVRANDOM into set of different possibilities to check 1194 | printf -- "${0##*/}: ${DEVRANDDOM}: no character random device available\n" >&2 1195 | exit 1 1196 | fi 1197 | 1198 | TMP="${SANDBOX}$(od -An -N8 -tx1 < ${DEVRANDOM} 2>>/dev/null | tr -d ' ')" 1199 | 1200 | if [ ${#TMP} -ne $((${#SANDBOX} + 16)) ]; then 1201 | printf -- "${0##*/}: ${SANDBOX}: unable to generate random suffix\n" >&2 1202 | exit 1 1203 | fi 1204 | 1205 | SANDBOX="${TMP}" 1206 | 1207 | trap "cd .. && rm -f -- ${SANDBOX}/* && rmdir -- ${SANDBOX}" EXIT 1208 | fi 1209 | 1210 | if [ ! -d "${SANDBOX}" ]; then 1211 | OMASK="$(umask)" 1212 | umask 0777 1213 | mkdir -m0550 -- "${SANDBOX}" || exit 1 1214 | umask ${OMASK} 1215 | fi 1216 | 1217 | cd ${SANDBOX} 1218 | fi 1219 | 1220 | 1221 | CPPDIRS="$(idirs "${CPPFLAGS:-}")" 1222 | 1223 | if [ -z "${CPPDIRS}" -a -z "${INCDIRS}" ]; then 1224 | INCDIRS="/usr/include:/usr/local/include" 1225 | fi 1226 | 1227 | 1228 | LDDIRS="$(ldirs "${LDFLAGS:-}")" 1229 | 1230 | if [ -z "${LDDIRS}" -a -z "${LIBDIRS}" ]; then 1231 | LIBDIRS="/lib:/usr/lib:/usr/local/lib" 1232 | fi 1233 | 1234 | 1235 | case "${1:-}" in 1236 | cppflags) 1237 | findversion || exit 1 1238 | 1239 | [ "${API_VER:-0}" -gt 0 ] || exit 1 1240 | 1241 | [ -z "${API_DIR:-}" ] || printf -- "-I${API_DIR}\n" 1242 | 1243 | ;; 1244 | ldflags) 1245 | findlib 1246 | 1247 | [ "${LIBLUA_VER:-0}" -gt 0 ] || exit 1 1248 | 1249 | printf -- "-L${LIBLUA_DIR} -l${LIBLUA_LIB}\n" 1250 | 1251 | ;; 1252 | version) 1253 | findversion || exit 1 1254 | 1255 | printf "$(((${API_VER} / 100) % 100)).$((($API_VER) % 100))\n" 1256 | 1257 | ;; 1258 | libv*) 1259 | findlib 1260 | 1261 | [ "${LIBLUA_VER:-0}" -gt 0 ] || exit 1 1262 | 1263 | printf "$(((${LIBLUA_VER} / 100) % 100)).$((($LIBLUA_VER) % 100))\n" 1264 | 1265 | ;; 1266 | luac) 1267 | shift 1268 | 1269 | if [ $# -gt 0 ]; then 1270 | append GLOB $* 1271 | fi 1272 | 1273 | findluac || exit 1 1274 | 1275 | printf -- "${LUAC_PATH}\n" 1276 | 1277 | ;; 1278 | lua) 1279 | shift 1280 | 1281 | if [ $# -gt 0 ]; then 1282 | append GLOB $* 1283 | fi 1284 | 1285 | findlua || exit 1 1286 | 1287 | printf -- "${LUA_PATH}\n" 1288 | 1289 | ;; 1290 | ldir|cdir) 1291 | printf -- "${0##*/}: ${1}: deprecated command\n" >&2 1292 | MODE="${1}" 1293 | shift 1294 | 1295 | if [ $# -gt 0 ]; then 1296 | append GLOB $* 1297 | fi 1298 | 1299 | findlua || exit 1 1300 | 1301 | if [ "${MODE}" = "cdir" ]; then 1302 | findinstalldir package.cpath 1303 | else 1304 | findinstalldir package.path 1305 | fi 1306 | 1307 | ;; 1308 | package.path|package.cpath) 1309 | findlua || exit 1 1310 | 1311 | findinstalldir "$@" || exit 1 1312 | 1313 | ;; 1314 | ccname) 1315 | ccname 1316 | 1317 | ;; 1318 | evalmacro) 1319 | shift 1320 | 1321 | evalmacro $* 1322 | ;; 1323 | testsym) 1324 | shift 1325 | 1326 | if testsym $*; then 1327 | printf "found\n" 1328 | exit 0 1329 | else 1330 | printf "not found\n" 1331 | exit 1 1332 | fi 1333 | ;; 1334 | *) 1335 | if [ -n "${1:-}" ]; then 1336 | printf -- "${0##*/}: ${1}: unknown command\n" >&2 1337 | else 1338 | printf -- "${0##*/}: no command specified\n" >&2 1339 | fi 1340 | 1341 | exit 1 1342 | ;; 1343 | esac 1344 | 1345 | exit 0 1346 | -------------------------------------------------------------------------------- /phf.cc: -------------------------------------------------------------------------------- 1 | /* ========================================================================== 2 | * phf.cc - Tiny perfect hash function library. 3 | * -------------------------------------------------------------------------- 4 | * Copyright (c) 2014-2015, 2019 William Ahern 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the 8 | * "Software"), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sublicense, and/or sell copies of the Software, and to permit 11 | * persons to whom the Software is furnished to do so, subject to the 12 | * following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included 15 | * in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 20 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 21 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | * ========================================================================== 25 | */ 26 | #include /* CHAR_BIT SIZE_MAX */ 27 | #include /* PRIu32 PRIu64 PRIx64 */ 28 | #include /* UINT32_C UINT64_C uint32_t uint64_t */ 29 | #include /* abort(3) calloc(3) free(3) qsort(3) */ 30 | #include /* memset(3) */ 31 | #include /* errno */ 32 | #include /* assert(3) */ 33 | 34 | #if !PHF_NO_LIBCXX 35 | #include /* std::sort */ 36 | #include /* std::begin std::end */ 37 | #include /* std::nothrow */ 38 | #include /* std::string */ 39 | #endif 40 | #include /* std::is_standard_layout std::is_trivial */ 41 | #include /* std::move */ 42 | 43 | #include "phf.h" 44 | 45 | 46 | #ifndef PHF_HAVE_ATTRIBUTE_FALLTHROUGH 47 | #define PHF_HAVE_ATTRIBUTE_FALLTHROUGH phf_has_attribute(fallthrough) 48 | #endif 49 | 50 | #if PHF_HAVE_ATTRIBUTE_FALLTHROUGH 51 | #define PHF_FALLTHROUGH __attribute__((fallthrough)) 52 | #else 53 | #define PHF_FALLTHROUGH (void)0 /* fall through */ 54 | #endif 55 | 56 | #ifdef __clang__ 57 | #pragma clang diagnostic ignored "-Wunused-function" 58 | #pragma clang diagnostic ignored "-Wunused-label" 59 | #if __cplusplus < 201103L 60 | #pragma clang diagnostic ignored "-Wc++11-long-long" 61 | #endif 62 | #elif PHF_GNUC_PREREQ(4, 6) 63 | #pragma GCC diagnostic ignored "-Wunused-function" 64 | #pragma GCC diagnostic ignored "-Wunused-label" 65 | #if __cplusplus < 201103L 66 | #pragma GCC diagnostic ignored "-Wlong-long" 67 | #pragma GCC diagnostic ignored "-Wformat" // %zu 68 | #endif 69 | #endif 70 | 71 | 72 | /* 73 | * M A C R O R O U T I N E S 74 | * 75 | * Mostly copies of 76 | * 77 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 78 | 79 | #define PHF_BITS(T) (sizeof (T) * CHAR_BIT) 80 | #define PHF_HOWMANY(x, y) (((x) + ((y) - 1)) / (y)) 81 | #define PHF_MIN(a, b) (((a) < (b))? (a) : (b)) 82 | #define PHF_MAX(a, b) (((a) > (b))? (a) : (b)) 83 | #define PHF_ROTL(x, y) (((x) << (y)) | ((x) >> (PHF_BITS(x) - (y)))) 84 | #define PHF_COUNTOF(a) (sizeof (a) / sizeof *(a)) 85 | 86 | #if PHF_NO_LIBCXX 87 | #define PHF_IFELSE_LIBCXX(a, b) b 88 | #else 89 | #define PHF_IFELSE_LIBCXX(a, b) a 90 | #endif 91 | 92 | 93 | /* 94 | * A L L O C A T I O N R O U T I N E S 95 | * 96 | * C allocation semantics in a nominally safe manner for C++. 97 | * 98 | * NOTE: phf.cc wasn't written as a proper C++ library, but refactored from 99 | * C to C++ primarily to make use of templates for the convenience of the 100 | * command-line utility, and secondarily as a C++ learning exercise. 101 | * 102 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 103 | 104 | template 105 | static phf_error_t phf_calloc(T **p, size_t count) 106 | { 107 | #if !PHF_NO_LIBCXX 108 | if (!std::is_trivially_copyable::value) { 109 | if (SIZE_MAX / sizeof **p < count) 110 | return ENOMEM; 111 | 112 | if (!(*p = static_cast(malloc(count * sizeof **p)))) 113 | return errno; 114 | 115 | for (size_t i = 0; i < count; i++) 116 | new (&(*p)[i]) T; 117 | 118 | return 0; 119 | } 120 | #else 121 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 122 | #endif 123 | if (!(*p = static_cast(calloc(count, sizeof **p)))) 124 | return errno; 125 | 126 | return 0; 127 | } /* phf_calloc() */ 128 | 129 | template 130 | static void phf_freearray(T *p, size_t count) 131 | { 132 | #if !PHF_NO_LIBCXX 133 | if (!std::is_trivially_destructible::value) { 134 | for (size_t i = 0; i < count; i++) 135 | p[i].~T(); 136 | } 137 | #else 138 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 139 | #endif 140 | (void)count; 141 | free(p); 142 | } /* phf_freearray() */ 143 | 144 | 145 | /* 146 | * M O D U L A R A R I T H M E T I C R O U T I N E S 147 | * 148 | * Two modular reduction schemes are supported: bitwise AND and naive 149 | * modular division. For bitwise AND we must round up the values r and m to 150 | * a power of 2. 151 | * 152 | * TODO: Implement and test Barrett reduction as alternative to naive 153 | * modular division. 154 | * 155 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 156 | 157 | /* round up to nearest power of 2 */ 158 | static inline size_t phf_powerup(size_t i) { 159 | #if defined SIZE_MAX 160 | i--; 161 | i |= i >> 1; 162 | i |= i >> 2; 163 | i |= i >> 4; 164 | i |= i >> 8; 165 | i |= i >> 16; 166 | #if SIZE_MAX != 0xffffffffu 167 | i |= i >> 32; 168 | #endif 169 | return ++i; 170 | #else 171 | #error No SIZE_MAX defined 172 | #endif 173 | } /* phf_powerup() */ 174 | 175 | static inline uint64_t phf_a_s_mod_n(uint64_t a, uint64_t s, uint64_t n) { 176 | uint64_t v; 177 | 178 | assert(n <= UINT32_MAX); 179 | 180 | v = 1; 181 | a %= n; 182 | 183 | while (s > 0) { 184 | if (s % 2 == 1) 185 | v = (v * a) % n; 186 | a = (a * a) % n; 187 | s /= 2; 188 | } 189 | 190 | return v; 191 | } /* phf_a_s_mod_n() */ 192 | 193 | /* 194 | * Rabin-Miller primality test adapted from Niels Ferguson and Bruce 195 | * Schneier, "Practical Cryptography" (Wiley, 2003), 201-204. 196 | */ 197 | static inline bool phf_witness(uint64_t n, uint64_t a, uint64_t s, uint64_t t) { 198 | uint64_t v, i; 199 | 200 | assert(a > 0 && a < n); 201 | assert(n <= UINT32_MAX); 202 | 203 | if (1 == (v = phf_a_s_mod_n(a, s, n))) 204 | return 1; 205 | 206 | for (i = 0; v != n - 1; i++) { 207 | if (i == t - 1) 208 | return 0; 209 | v = (v * v) % n; 210 | } 211 | 212 | return 1; 213 | } /* phf_witness() */ 214 | 215 | static inline bool phf_rabinmiller(uint64_t n) { 216 | /* 217 | * Witness 2 is deterministic for all n < 2047. Witnesses 2, 7, 61 218 | * are deterministic for all n < 4,759,123,141. 219 | */ 220 | static const int witness[] = { 2, 7, 61 }; 221 | uint64_t s, t, i; 222 | 223 | assert(n <= UINT32_MAX); 224 | 225 | if (n < 3 || n % 2 == 0) 226 | return 0; 227 | 228 | /* derive 2^t * s = n - 1 where s is odd */ 229 | s = n - 1; 230 | t = 0; 231 | while (s % 2 == 0) { 232 | s /= 2; 233 | t++; 234 | } 235 | 236 | /* NB: witness a must be 1 <= a < n */ 237 | if (n < 2047) 238 | return phf_witness(n, 2, s, t); 239 | 240 | for (i = 0; i < PHF_COUNTOF(witness); i++) { 241 | if (!phf_witness(n, witness[i], s, t)) 242 | return 0; 243 | } 244 | 245 | return 1; 246 | } /* phf_rabinmiller() */ 247 | 248 | static inline bool phf_isprime(size_t n) { 249 | static const char map[] = { 0, 0, 2, 3, 0, 5, 0, 7 }; 250 | size_t i; 251 | 252 | if (n < PHF_COUNTOF(map)) 253 | return map[n]; 254 | 255 | for (i = 2; i < PHF_COUNTOF(map); i++) { 256 | if (map[i] && (n % map[i] == 0)) 257 | return 0; 258 | } 259 | 260 | return phf_rabinmiller(n); 261 | } /* phf_isprime() */ 262 | 263 | static inline size_t phf_primeup(size_t n) { 264 | /* NB: 4294967291 is largest 32-bit prime */ 265 | if (n > 4294967291) 266 | return 0; 267 | 268 | while (n < SIZE_MAX && !phf_isprime(n)) 269 | n++; 270 | 271 | return n; 272 | } /* phf_primeup() */ 273 | 274 | 275 | /* 276 | * B I T M A P R O U T I N E S 277 | * 278 | * We use a bitmap to track output hash occupancy when searching for 279 | * displacement values. 280 | * 281 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 282 | 283 | typedef unsigned long phf_bits_t; 284 | 285 | static inline bool phf_isset(phf_bits_t *set, size_t i) { 286 | return set[i / PHF_BITS(*set)] & ((size_t)1 << (i % PHF_BITS(*set))); 287 | } /* phf_isset() */ 288 | 289 | static inline void phf_setbit(phf_bits_t *set, size_t i) { 290 | set[i / PHF_BITS(*set)] |= ((size_t)1 << (i % PHF_BITS(*set))); 291 | } /* phf_setbit() */ 292 | 293 | static inline void phf_clrbit(phf_bits_t *set, size_t i) { 294 | set[i / PHF_BITS(*set)] &= ~((size_t)1 << (i % PHF_BITS(*set))); 295 | } /* phf_clrbit() */ 296 | 297 | static inline void phf_clrall(phf_bits_t *set, size_t n) { 298 | memset(set, '\0', PHF_HOWMANY(n, PHF_BITS(*set)) * sizeof *set); 299 | } /* phf_clrall() */ 300 | 301 | 302 | /* 303 | * K E Y D E D U P L I C A T I O N 304 | * 305 | * Auxiliary routine to ensure uniqueness of each key in array. 306 | * 307 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 308 | 309 | namespace PHF { 310 | namespace Uniq { 311 | static bool operator!=(const phf_string_t &a, const phf_string_t &b) { 312 | return a.n != b.n || 0 != memcmp(a.p, b.p, a.n); 313 | } 314 | 315 | template 316 | static int cmp(const T *a, const T *b) { 317 | if (*a > *b) 318 | return -1; 319 | if (*a < *b) 320 | return 1; 321 | return 0; 322 | } /* cmp() */ 323 | 324 | template<> 325 | int cmp(const phf_string_t *a, const phf_string_t *b) { 326 | int cmp; 327 | if ((cmp = memcmp(a->p, b->p, PHF_MIN(a->n, b->n)))) 328 | return cmp; 329 | if (a->n > b->n) 330 | return -1; 331 | if (a->n < b->n) 332 | return 1; 333 | return 0; 334 | } /* cmp() */ 335 | 336 | template 337 | static void sort(T k[], const size_t n) { 338 | 339 | #if !PHF_NO_LIBCXX 340 | if (!std::is_trivially_copyable::value) { 341 | std::sort(k, &k[n]); 342 | return; 343 | } 344 | #else 345 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 346 | #endif 347 | qsort(k, n, sizeof *k, reinterpret_cast(&cmp)); 348 | } /* sort() */ 349 | } /* Uniq:: */ 350 | } /* PHF:: */ 351 | 352 | template 353 | PHF_PUBLIC size_t PHF::uniq(key_t k[], const size_t n) { 354 | using namespace PHF::Uniq; 355 | size_t i, j; 356 | 357 | sort(k, n); 358 | 359 | for (i = 1, j = 0; i < n; i++) { 360 | if (k[i] != k[j]) 361 | k[++j] = k[i]; 362 | } 363 | 364 | return (n > 0)? j + 1 : 0; 365 | } /* PHF::uniq() */ 366 | 367 | template size_t PHF::uniq(uint32_t[], const size_t); 368 | template size_t PHF::uniq(uint64_t[], const size_t); 369 | template size_t PHF::uniq(phf_string_t[], const size_t); 370 | #if !PHF_NO_LIBCXX 371 | template size_t PHF::uniq(std::string[], const size_t); 372 | #endif 373 | 374 | PHF_PUBLIC size_t phf_uniq_uint32(uint32_t k[], const size_t n) { 375 | return PHF::uniq(k, n); 376 | } /* phf_uniq_uint32() */ 377 | 378 | PHF_PUBLIC size_t phf_uniq_uint64(uint64_t k[], const size_t n) { 379 | return PHF::uniq(k, n); 380 | } /* phf_uniq_uint64() */ 381 | 382 | PHF_PUBLIC size_t phf_uniq_string(phf_string_t k[], const size_t n) { 383 | return PHF::uniq(k, n); 384 | } /* phf_uniq_string() */ 385 | 386 | 387 | /* 388 | * H A S H P R I M I T I V E S 389 | * 390 | * Universal hash based on MurmurHash3_x86_32. Variants for 32- and 64-bit 391 | * integer keys, and string keys. 392 | * 393 | * We use a random seed to address the non-cryptographic-strength collision 394 | * resistance of MurmurHash3. A stronger hash like SipHash is just too slow 395 | * and unnecessary for my particular needs. For some environments a 396 | * cryptographically stronger hash may be prudent. 397 | * 398 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 399 | 400 | static inline uint32_t phf_round32(uint32_t k1, uint32_t h1) { 401 | k1 *= UINT32_C(0xcc9e2d51); 402 | k1 = PHF_ROTL(k1, 15); 403 | k1 *= UINT32_C(0x1b873593); 404 | 405 | h1 ^= k1; 406 | h1 = PHF_ROTL(h1, 13); 407 | h1 = h1 * 5 + UINT32_C(0xe6546b64); 408 | 409 | return h1; 410 | } /* phf_round32() */ 411 | 412 | static inline uint32_t phf_round32(const unsigned char *p, size_t n, uint32_t h1) { 413 | uint32_t k1; 414 | 415 | while (n >= 4) { 416 | k1 = (p[0] << 24) 417 | | (p[1] << 16) 418 | | (p[2] << 8) 419 | | (p[3] << 0); 420 | 421 | h1 = phf_round32(k1, h1); 422 | 423 | p += 4; 424 | n -= 4; 425 | } 426 | 427 | k1 = 0; 428 | 429 | switch (n & 3) { 430 | case 3: 431 | k1 |= p[2] << 8; 432 | PHF_FALLTHROUGH; 433 | case 2: 434 | k1 |= p[1] << 16; 435 | PHF_FALLTHROUGH; 436 | case 1: 437 | k1 |= p[0] << 24; 438 | h1 = phf_round32(k1, h1); 439 | } 440 | 441 | return h1; 442 | } /* phf_round32() */ 443 | 444 | static inline uint32_t phf_round32(phf_string_t k, uint32_t h1) { 445 | return phf_round32(reinterpret_cast(k.p), k.n, h1); 446 | } /* phf_round32() */ 447 | 448 | #if !PHF_NO_LIBCXX 449 | static inline uint32_t phf_round32(std::string k, uint32_t h1) { 450 | return phf_round32(reinterpret_cast(k.c_str()), k.length(), h1); 451 | } /* phf_round32() */ 452 | #endif 453 | 454 | static inline uint32_t phf_mix32(uint32_t h1) { 455 | h1 ^= h1 >> 16; 456 | h1 *= UINT32_C(0x85ebca6b); 457 | h1 ^= h1 >> 13; 458 | h1 *= UINT32_C(0xc2b2ae35); 459 | h1 ^= h1 >> 16; 460 | 461 | return h1; 462 | } /* phf_mix32() */ 463 | 464 | 465 | /* 466 | * g(k) & f(d, k) S P E C I A L I Z A T I O N S 467 | * 468 | * For every key we first calculate g(k). Then for every group of collisions 469 | * from g(k) we search for a displacement value d such that f(d, k) places 470 | * each key into a unique hash slot. 471 | * 472 | * g() and f() are specialized for 32-bit, 64-bit, and string keys. 473 | * 474 | * g_mod_r() and f_mod_n() are specialized for the method of modular 475 | * reduction--modular division or bitwise AND. bitwise AND is substantially 476 | * faster than modular division, and more than makes up for any space 477 | * inefficiency, particularly for small hash tables. 478 | * 479 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 480 | 481 | /* 32-bit, phf_string_t, and std::string keys */ 482 | template 483 | static inline uint32_t phf_g(T k, uint32_t seed) { 484 | uint32_t h1 = seed; 485 | 486 | h1 = phf_round32(k, h1); 487 | 488 | return phf_mix32(h1); 489 | } /* phf_g() */ 490 | 491 | template 492 | static inline uint32_t phf_f(uint32_t d, T k, uint32_t seed) { 493 | uint32_t h1 = seed; 494 | 495 | h1 = phf_round32(d, h1); 496 | h1 = phf_round32(k, h1); 497 | 498 | return phf_mix32(h1); 499 | } /* phf_f() */ 500 | 501 | 502 | /* 64-bit keys */ 503 | static inline uint32_t phf_g(uint64_t k, uint32_t seed) { 504 | uint32_t h1 = seed; 505 | 506 | h1 = phf_round32(k, h1); 507 | h1 = phf_round32(k >> 32, h1); 508 | 509 | return phf_mix32(h1); 510 | } /* phf_g() */ 511 | 512 | static inline uint32_t phf_f(uint32_t d, uint64_t k, uint32_t seed) { 513 | uint32_t h1 = seed; 514 | 515 | h1 = phf_round32(d, h1); 516 | h1 = phf_round32(static_cast(k), h1); 517 | h1 = phf_round32(static_cast(k >> 32), h1); 518 | 519 | return phf_mix32(h1); 520 | } /* phf_f() */ 521 | 522 | 523 | /* g() and f() which parameterize modular reduction */ 524 | template 525 | static inline uint32_t phf_g_mod_r(T k, uint32_t seed, size_t r) { 526 | return (nodiv)? (phf_g(k, seed) & (r - 1)) : (phf_g(k, seed) % r); 527 | } /* phf_g_mod_r() */ 528 | 529 | template 530 | static inline uint32_t phf_f_mod_m(uint32_t d, T k, uint32_t seed, size_t m) { 531 | return (nodiv)? (phf_f(d, k, seed) & (m - 1)) : (phf_f(d, k, seed) % m); 532 | } /* phf_f_mod_m() */ 533 | 534 | 535 | /* 536 | * B U C K E T S O R T I N G I N T E R F A C E S 537 | * 538 | * For every key [0..n) we calculate g(k) % r, where 0 < r <= n, and 539 | * associate it with a bucket [0..r). We then sort the buckets in decreasing 540 | * order according to the number of keys. The sorting is required for both 541 | * optimal time complexity when calculating f(d, k) (less contention) and 542 | * optimal space complexity (smaller d). 543 | * 544 | * The actual sorting is done in the core routine. The buckets are organized 545 | * and sorted as a 1-dimensional array to minimize run-time memory (less 546 | * data structure overhead) and improve data locality (less pointer 547 | * indirection). The following section merely implements a templated 548 | * bucket-key structure and the comparison routine passed to qsort(3). 549 | * 550 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 551 | 552 | static bool operator==(const phf_string_t &a, const phf_string_t &b) { 553 | return a.n == b.n && 0 == memcmp(a.p, b.p, a.n); 554 | } 555 | 556 | static bool operator<(const phf_string_t &a, const phf_string_t &b) { 557 | int cmp = memcmp(a.p, b.p, PHF_MIN(a.n, b.n)); 558 | if (cmp) 559 | return cmp < 0; 560 | return a.n < b.n; 561 | } 562 | 563 | static bool operator>(const phf_string_t &a, const phf_string_t &b) { 564 | int cmp = memcmp(a.p, b.p, PHF_MIN(a.n, b.n)); 565 | if (cmp) 566 | return cmp > 0; 567 | return a.n > b.n; 568 | } 569 | 570 | template 571 | struct phf_key { 572 | T k; 573 | phf_hash_t g; /* result of g(k) % r */ 574 | size_t *n; /* number of keys in bucket g */ 575 | }; /* struct phf_key */ 576 | 577 | template 578 | static int phf_keycmp(const phf_key *a, const phf_key *b) { 579 | if (*(a->n) > *(b->n)) 580 | return -1; 581 | if (*(a->n) < *(b->n)) 582 | return 1; 583 | if (a->g > b->g) 584 | return -1; 585 | if (a->g < b->g) 586 | return 1; 587 | 588 | /* duplicate key? */ 589 | if (a->k == b->k && a != b) { 590 | assert(!(a->k == b->k)); 591 | abort(); /* if NDEBUG defined */ 592 | } 593 | 594 | return 0; 595 | } /* phf_keycmp() */ 596 | 597 | template 598 | static bool operator>(const phf_key &a, const phf_key &b) { 599 | return phf_keycmp(&a, &b) > 0; 600 | } 601 | 602 | template 603 | static bool operator<(const phf_key &a, const phf_key &b) { 604 | return phf_keycmp(&a, &b) < 0; 605 | } 606 | 607 | template 608 | static void phf_keysort(phf_key k[], const size_t n) { 609 | #if !PHF_NO_LIBCXX 610 | if (!std::is_trivially_copyable::value) { 611 | std::sort(k, &k[n]); 612 | return; 613 | } 614 | #else 615 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 616 | #endif 617 | qsort(k, n, sizeof *k, reinterpret_cast(&phf_keycmp)); 618 | } /* phf_keysort() */ 619 | 620 | 621 | /* 622 | * C O R E F U N C T I O N G E N E R A T O R 623 | * 624 | * The entire algorithm is contained in PHF:init. Everything else in this 625 | * source file is either a simple utility routine used by PHF:init, or an 626 | * interface to PHF:init or the generated function state. 627 | * 628 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 629 | 630 | template 631 | PHF_PUBLIC int PHF::init(struct phf *phf, const key_t k[], const size_t n, const size_t l, const size_t a, const phf_seed_t seed) { 632 | size_t n1 = PHF_MAX(n, 1); /* for computations that require n > 0 */ 633 | size_t l1 = PHF_MAX(l, 1); 634 | size_t a1 = PHF_MAX(PHF_MIN(a, 100), 1); 635 | size_t r; /* number of buckets */ 636 | size_t m; /* size of output array */ 637 | phf_key *B_k = NULL; /* linear bucket-slot array */ 638 | size_t *B_z = NULL; /* number of slots per bucket */ 639 | phf_key *B_p, *B_pe; 640 | phf_bits_t *T = NULL; /* bitmap to track index occupancy */ 641 | phf_bits_t *T_b; /* per-bucket working bitmap */ 642 | size_t T_n; 643 | uint32_t *g = NULL; /* displacement map */ 644 | uint32_t d_max = 0; /* maximum displacement value */ 645 | int error; 646 | 647 | if ((phf->nodiv = nodiv)) { 648 | /* round to power-of-2 so we can use bit masks instead of modulo division */ 649 | r = phf_powerup(n1 / PHF_MIN(l1, n1)); 650 | m = phf_powerup((n1 * 100) / a1); 651 | } else { 652 | r = phf_primeup(PHF_HOWMANY(n1, l1)); 653 | /* XXX: should we bother rounding m to prime number for small n? */ 654 | m = phf_primeup((n1 * 100) / a1); 655 | } 656 | 657 | if (r == 0 || m == 0) 658 | return ERANGE; 659 | 660 | if ((error = phf_calloc(&B_k, n1))) 661 | goto error; 662 | if (!(B_z = static_cast(calloc(r, sizeof *B_z)))) 663 | goto syerr; 664 | 665 | for (size_t i = 0; i < n; i++) { 666 | phf_hash_t g = phf_g_mod_r(k[i], seed, r); 667 | 668 | B_k[i].k = k[i]; 669 | B_k[i].g = g; 670 | B_k[i].n = &B_z[g]; 671 | ++*B_k[i].n; 672 | } 673 | 674 | phf_keysort(B_k, n1); 675 | 676 | T_n = PHF_HOWMANY(m, PHF_BITS(*T)); 677 | if (!(T = static_cast(calloc(T_n * 2, sizeof *T)))) 678 | goto syerr; 679 | T_b = &T[T_n]; /* share single allocation */ 680 | 681 | /* 682 | * FIXME: T_b[] is unnecessary. We could clear T[] the same way we 683 | * clear T_b[]. In fact, at the end of generation T_b[] is identical 684 | * to T[] because we don't clear T_b[] on success. 685 | * 686 | * We just need to tweak the current reset logic to stop before the 687 | * key that failed, and then we can elide the commit to T[] at the 688 | * end of the outer loop. 689 | */ 690 | 691 | if (!(g = static_cast(calloc(r, sizeof *g)))) 692 | goto syerr; 693 | 694 | B_p = B_k; 695 | B_pe = &B_k[n]; 696 | 697 | for (; B_p < B_pe && *B_p->n > 0; B_p += *B_p->n) { 698 | phf_key *Bi_p, *Bi_pe; 699 | size_t d = 0; 700 | uint32_t f; 701 | retry: 702 | d++; 703 | Bi_p = B_p; 704 | Bi_pe = B_p + *B_p->n; 705 | 706 | for (; Bi_p < Bi_pe; Bi_p++) { 707 | f = phf_f_mod_m(d, Bi_p->k, seed, m); 708 | 709 | if (phf_isset(T, f) || phf_isset(T_b, f)) { 710 | /* reset T_b[] */ 711 | for (Bi_p = B_p; Bi_p < Bi_pe; Bi_p++) { 712 | f = phf_f_mod_m(d, Bi_p->k, seed, m); 713 | phf_clrbit(T_b, f); 714 | } 715 | 716 | goto retry; 717 | } else { 718 | phf_setbit(T_b, f); 719 | } 720 | } 721 | 722 | /* commit to T[] */ 723 | for (Bi_p = B_p; Bi_p < Bi_pe; Bi_p++) { 724 | f = phf_f_mod_m(d, Bi_p->k, seed, m); 725 | phf_setbit(T, f); 726 | } 727 | 728 | /* commit to g[] */ 729 | g[B_p->g] = d; 730 | d_max = PHF_MAX(d, d_max); 731 | } 732 | 733 | phf->seed = seed; 734 | phf->r = r; 735 | phf->m = m; 736 | 737 | phf->g = g; 738 | g = NULL; 739 | 740 | phf->d_max = d_max; 741 | phf->g_op = (nodiv)? phf::PHF_G_UINT32_BAND_R : phf::PHF_G_UINT32_MOD_R; 742 | phf->g_jmp = NULL; 743 | 744 | error = 0; 745 | 746 | goto clean; 747 | syerr: 748 | error = errno; 749 | error: 750 | (void)0; 751 | clean: 752 | free(g); 753 | free(T); 754 | free(B_z); 755 | phf_freearray(B_k, n1); 756 | 757 | return error; 758 | } /* PHF::init() */ 759 | 760 | 761 | /* 762 | * D I S P L A C E M E N T M A P C O M P A C T I O N 763 | * 764 | * By default the displacement map is an array of uint32_t. This routine 765 | * compacts the map by using the smallest primitive type that will fit the 766 | * largest displacement value. 767 | * 768 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 769 | 770 | template 771 | static inline void phf_memmove(dst_t *dst, src_t *src, size_t n) { 772 | for (size_t i = 0; i < n; i++) { 773 | dst_t tmp = src[i]; 774 | dst[i] = tmp; 775 | } 776 | } /* phf_memmove() */ 777 | 778 | PHF_PUBLIC void PHF::compact(struct phf *phf) { 779 | size_t size = 0; 780 | void *tmp; 781 | 782 | switch (phf->g_op) { 783 | case phf::PHF_G_UINT32_MOD_R: 784 | case phf::PHF_G_UINT32_BAND_R: 785 | break; 786 | default: 787 | return; /* already compacted */ 788 | } 789 | 790 | if (phf->d_max <= 255) { 791 | phf_memmove(reinterpret_cast(phf->g), reinterpret_cast(phf->g), phf->r); 792 | phf->g_op = (phf->nodiv)? phf::PHF_G_UINT8_BAND_R : phf::PHF_G_UINT8_MOD_R; 793 | size = sizeof (uint8_t); 794 | } else if (phf->d_max <= 65535) { 795 | phf_memmove(reinterpret_cast(phf->g), reinterpret_cast(phf->g), phf->r); 796 | phf->g_op = (phf->nodiv)? phf::PHF_G_UINT16_BAND_R : phf::PHF_G_UINT16_MOD_R; 797 | size = sizeof (uint16_t); 798 | } else { 799 | return; /* nothing to compact */ 800 | } 801 | 802 | /* simply keep old array if realloc fails */ 803 | if ((tmp = realloc(phf->g, phf->r * size))) 804 | phf->g = static_cast(tmp); 805 | } /* PHF::compact() */ 806 | 807 | 808 | /* 809 | * F U N C T I O N G E N E R A T O R & S T A T E I N T E R F A C E S 810 | * 811 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 812 | 813 | template int PHF::init(struct phf *, const uint32_t[], const size_t, const size_t, const size_t, const phf_seed_t); 814 | template int PHF::init(struct phf *, const uint64_t[], const size_t, const size_t, const size_t, const phf_seed_t); 815 | template int PHF::init(struct phf *, const phf_string_t[], const size_t, const size_t, const size_t, const phf_seed_t); 816 | #if !PHF_NO_LIBCXX 817 | template int PHF::init(struct phf *, const std::string[], const size_t, const size_t, const size_t, const phf_seed_t); 818 | #endif 819 | 820 | template int PHF::init(struct phf *, const uint32_t[], const size_t, const size_t, const size_t, const phf_seed_t); 821 | template int PHF::init(struct phf *, const uint64_t[], const size_t, const size_t, const size_t, const phf_seed_t); 822 | template int PHF::init(struct phf *, const phf_string_t[], const size_t, const size_t, const size_t, const phf_seed_t); 823 | #if !PHF_NO_LIBCXX 824 | template int PHF::init(struct phf *, const std::string[], const size_t, const size_t, const size_t, const phf_seed_t); 825 | #endif 826 | 827 | template 828 | static inline phf_hash_t phf_hash_(map_t *g, key_t k, uint32_t seed, size_t r, size_t m) { 829 | if (nodiv) { 830 | uint32_t d = g[phf_g(k, seed) & (r - 1)]; 831 | 832 | return phf_f(d, k, seed) & (m - 1); 833 | } else { 834 | uint32_t d = g[phf_g(k, seed) % r]; 835 | 836 | return phf_f(d, k, seed) % m; 837 | } 838 | } /* phf_hash_() */ 839 | 840 | template 841 | PHF_PUBLIC phf_hash_t PHF::hash(struct phf *phf, T k) { 842 | #if PHF_HAVE_COMPUTED_GOTOS && !PHF_NO_COMPUTED_GOTOS 843 | static const void *const jmp[] = { 844 | NULL, 845 | &&uint8_mod_r, &&uint8_band_r, 846 | &&uint16_mod_r, &&uint16_band_r, 847 | &&uint32_mod_r, &&uint32_band_r, 848 | }; 849 | 850 | goto *((phf->g_jmp)? phf->g_jmp : (phf->g_jmp = jmp[phf->g_op])); 851 | 852 | uint8_mod_r: 853 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 854 | uint8_band_r: 855 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 856 | uint16_mod_r: 857 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 858 | uint16_band_r: 859 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 860 | uint32_mod_r: 861 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 862 | uint32_band_r: 863 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 864 | #else 865 | switch (phf->g_op) { 866 | case phf::PHF_G_UINT8_MOD_R: 867 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 868 | case phf::PHF_G_UINT8_BAND_R: 869 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 870 | case phf::PHF_G_UINT16_MOD_R: 871 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 872 | case phf::PHF_G_UINT16_BAND_R: 873 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 874 | case phf::PHF_G_UINT32_MOD_R: 875 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 876 | case phf::PHF_G_UINT32_BAND_R: 877 | return phf_hash_(reinterpret_cast(phf->g), k, phf->seed, phf->r, phf->m); 878 | default: 879 | abort(); 880 | return 0; 881 | } 882 | #endif 883 | } /* PHF::hash() */ 884 | 885 | template phf_hash_t PHF::hash(struct phf *, uint32_t); 886 | template phf_hash_t PHF::hash(struct phf *, uint64_t); 887 | template phf_hash_t PHF::hash(struct phf *, phf_string_t); 888 | #if !PHF_NO_LIBCXX 889 | template phf_hash_t PHF::hash(struct phf *, std::string); 890 | #endif 891 | 892 | PHF_PUBLIC void PHF::destroy(struct phf *phf) { 893 | free(phf->g); 894 | phf->g = NULL; 895 | } /* PHF::destroy() */ 896 | 897 | PHF_PUBLIC int phf_init_uint32(struct phf *phf, const uint32_t *k, const size_t n, const size_t lambda, const size_t alpha, const phf_seed_t seed, const bool nodiv) { 898 | if (nodiv) 899 | return PHF::init(phf, k, n, lambda, alpha, seed); 900 | else 901 | return PHF::init(phf, k, n, lambda, alpha, seed); 902 | } /* phf_init_uint32() */ 903 | 904 | PHF_PUBLIC int phf_init_uint64(struct phf *phf, const uint64_t *k, const size_t n, const size_t lambda, const size_t alpha, const phf_seed_t seed, const bool nodiv) { 905 | if (nodiv) 906 | return PHF::init(phf, k, n, lambda, alpha, seed); 907 | else 908 | return PHF::init(phf, k, n, lambda, alpha, seed); 909 | } /* phf_init_uint64() */ 910 | 911 | PHF_PUBLIC int phf_init_string(struct phf *phf, const phf_string_t *k, const size_t n, const size_t lambda, const size_t alpha, const phf_seed_t seed, const bool nodiv) { 912 | if (nodiv) 913 | return PHF::init(phf, k, n, lambda, alpha, seed); 914 | else 915 | return PHF::init(phf, k, n, lambda, alpha, seed); 916 | } /* phf_init_string() */ 917 | 918 | PHF_PUBLIC void phf_compact(struct phf *phf) { 919 | PHF::compact(phf); 920 | } /* phf_compact() */ 921 | 922 | PHF_PUBLIC phf_hash_t phf_hash_uint32(struct phf *phf, const uint32_t k) { 923 | return PHF::hash(phf, k); 924 | } /* phf_hash_uint32() */ 925 | 926 | PHF_PUBLIC phf_hash_t phf_hash_uint64(struct phf *phf, const uint64_t k) { 927 | return PHF::hash(phf, k); 928 | } /* phf_hash_uint64() */ 929 | 930 | PHF_PUBLIC phf_hash_t phf_hash_string(struct phf *phf, const phf_string_t k) { 931 | return PHF::hash(phf, k); 932 | } /* phf_hash_string() */ 933 | 934 | PHF_PUBLIC void phf_destroy(struct phf *phf) { 935 | PHF::destroy(phf); 936 | } /* phf_destroy() */ 937 | 938 | 939 | #if PHF_LUALIB 940 | #include /* time(2) */ 941 | 942 | #include 943 | 944 | 945 | #if LUA_VERSION_NUM < 502 946 | static int lua_absindex(lua_State *L, int idx) { 947 | return (idx > 0 || idx <= LUA_REGISTRYINDEX)? idx : lua_gettop(L) + idx + 1; 948 | } /* lua_absindex() */ 949 | 950 | #define lua_rawlen(t, index) lua_objlen(t, index) 951 | #endif 952 | 953 | 954 | struct phfctx { 955 | int (*hash)(struct phf *, lua_State *, int index); 956 | struct phf ctx; 957 | }; /* struct phfctx */ 958 | 959 | 960 | static int phf_hash_uint32(struct phf *phf, lua_State *L, int index) { 961 | uint32_t k = static_cast(luaL_checkinteger(L, index)); 962 | 963 | lua_pushinteger(L, static_cast(PHF::hash(phf, k) + 1)); 964 | 965 | return 1; 966 | } /* phf_hash_uint32() */ 967 | 968 | static int phf_hash_uint64(struct phf *phf, lua_State *L, int index) { 969 | uint64_t k = static_cast(luaL_checkinteger(L, index)); 970 | 971 | lua_pushinteger(L, static_cast(PHF::hash(phf, k) + 1)); 972 | 973 | return 1; 974 | } /* phf_hash_uint64() */ 975 | 976 | static int phf_hash_string(struct phf *phf, lua_State *L, int index) { 977 | phf_string_t k; 978 | 979 | k.p = const_cast(luaL_checklstring(L, index, &k.n)); 980 | 981 | lua_pushinteger(L, static_cast(PHF::hash(phf, k) + 1)); 982 | 983 | return 1; 984 | } /* phf_hash_string() */ 985 | 986 | static phf_seed_t phf_seed(lua_State *L) { 987 | return phf_g(static_cast(reinterpret_cast(L)), static_cast(time(NULL))); 988 | } /* phf_seed() */ 989 | 990 | static phf_error_t phf_tokey(lua_State *L, int index, uint32_t *k) { 991 | if (LUA_TNUMBER != lua_type(L, index)) 992 | return EINVAL; 993 | 994 | #if LUA_VERSION_NUM > 502 995 | lua_Integer v; 996 | 997 | v = static_cast(lua_tointeger(L, index)); 998 | 999 | if (v > UINT32_MAX) 1000 | return ERANGE; 1001 | #else 1002 | lua_Number v; 1003 | 1004 | v = static_cast(lua_tonumber(L, index)); 1005 | 1006 | if (v > UINT32_MAX) 1007 | return ERANGE; 1008 | #endif 1009 | *k = static_cast(v); 1010 | 1011 | return 0; 1012 | } /* phf_tokey() */ 1013 | 1014 | static phf_error_t phf_tokey(lua_State *L, int index, uint64_t *k) { 1015 | if (LUA_TNUMBER != lua_type(L, index)) 1016 | return EINVAL; 1017 | 1018 | #if LUA_VERSION_NUM > 502 1019 | lua_Integer v; 1020 | 1021 | v = static_cast(lua_tointeger(L, index)); 1022 | #else 1023 | lua_Number v; 1024 | 1025 | v = static_cast(lua_tonumber(L, index)); 1026 | #endif 1027 | *k = static_cast(v); 1028 | 1029 | return 0; 1030 | } /* phf_tokey() */ 1031 | 1032 | static phf_error_t phf_tokey(lua_State *L, int index, phf_string_t *k) { 1033 | if (LUA_TSTRING != lua_type(L, index)) 1034 | return EINVAL; 1035 | 1036 | k->p = const_cast(lua_tolstring(L, index, &k->n)); 1037 | 1038 | return 0; 1039 | } /* phf_tokey() */ 1040 | 1041 | template 1042 | static phf_error_t phf_addkeys(lua_State *L, int index, T **_keys, size_t *_count) { 1043 | T *keys = *_keys, *kp; 1044 | size_t count, i; 1045 | int error; 1046 | 1047 | /* 1048 | * XXX: because of the way phf_addkeys is used in phf_new we cannot 1049 | * support non-trivial types 1050 | */ 1051 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 1052 | 1053 | count = lua_rawlen(L, index); 1054 | if (SIZE_MAX / sizeof *keys < count) 1055 | return ENOMEM; 1056 | if (!(keys = static_cast(realloc(keys, count * sizeof *keys)))) 1057 | return errno; 1058 | 1059 | *_keys = kp = keys; 1060 | *_count = count; 1061 | 1062 | for (i = 1; i <= count; i++) { 1063 | lua_rawgeti(L, index, i); 1064 | 1065 | error = phf_tokey(L, -1, kp++); 1066 | 1067 | lua_pop(L, 1); 1068 | 1069 | if (error) 1070 | return error; 1071 | } 1072 | 1073 | *_count = PHF::uniq(keys, count); 1074 | 1075 | return 0; 1076 | } /* phf_addkeys() */ 1077 | 1078 | static int phf_new(lua_State *L) { 1079 | size_t l = static_cast(luaL_optinteger(L, 2, 4)); 1080 | size_t a = static_cast(luaL_optinteger(L, 3, 80)); 1081 | phf_seed_t seed = (lua_isnoneornil(L, 4))? phf_seed(L) : static_cast(luaL_checkinteger(L, 4)); 1082 | bool nodiv = static_cast(lua_toboolean(L, 5)); 1083 | void *keys = NULL; 1084 | struct phfctx *phf; 1085 | size_t n = 0; 1086 | int error; 1087 | 1088 | lua_settop(L, 5); 1089 | luaL_checktype(L, 1, LUA_TTABLE); 1090 | 1091 | phf = static_cast(lua_newuserdata(L, sizeof *phf)); 1092 | memset(phf, 0, sizeof *phf); 1093 | 1094 | luaL_getmetatable(L, "PHF*"); 1095 | lua_setmetatable(L, -2); 1096 | 1097 | switch ((error = phf_addkeys(L, 1, reinterpret_cast(&keys), &n))) { 1098 | case 0: 1099 | break; 1100 | case ERANGE: 1101 | goto uint64; 1102 | case EINVAL: 1103 | goto string; 1104 | default: 1105 | goto error; 1106 | } 1107 | 1108 | if (n == 0) 1109 | goto empty; 1110 | 1111 | if ((error = phf_init_uint32(&phf->ctx, reinterpret_cast(keys), n, l, a, seed, nodiv))) 1112 | goto error; 1113 | 1114 | phf->hash = &phf_hash_uint32; 1115 | 1116 | goto done; 1117 | uint64: 1118 | switch ((error = phf_addkeys(L, 1, reinterpret_cast(&keys), &n))) { 1119 | case 0: 1120 | break; 1121 | case EINVAL: 1122 | goto string; 1123 | default: 1124 | goto error; 1125 | } 1126 | 1127 | if (n == 0) 1128 | goto empty; 1129 | 1130 | if ((error = phf_init_uint64(&phf->ctx, reinterpret_cast(keys), n, l, a, seed, nodiv))) 1131 | goto error; 1132 | 1133 | phf->hash = &phf_hash_uint64; 1134 | 1135 | goto done; 1136 | string: 1137 | if ((error = phf_addkeys(L, 1, reinterpret_cast(&keys), &n))) 1138 | goto error; 1139 | 1140 | if (n == 0) 1141 | goto empty; 1142 | 1143 | if ((error = phf_init_string(&phf->ctx, reinterpret_cast(keys), n, l, a, seed, nodiv))) 1144 | goto error; 1145 | 1146 | phf->hash = &phf_hash_string; 1147 | 1148 | goto done; 1149 | done: 1150 | free(keys); 1151 | 1152 | PHF::compact(&phf->ctx); 1153 | 1154 | return 1; 1155 | empty: 1156 | free(keys); 1157 | 1158 | lua_pushstring(L, "empty key set"); 1159 | 1160 | return lua_error(L); 1161 | error: 1162 | free(keys); 1163 | 1164 | lua_pushstring(L, strerror(error)); 1165 | 1166 | return lua_error(L); 1167 | } /* phf_new() */ 1168 | 1169 | static int phf_r(lua_State *L) { 1170 | struct phfctx *phf = static_cast(luaL_checkudata(L, 1, "PHF*")); 1171 | 1172 | lua_pushinteger(L, static_cast(phf->ctx.r)); 1173 | 1174 | return 1; 1175 | } /* phf_r() */ 1176 | 1177 | static int phf_m(lua_State *L) { 1178 | struct phfctx *phf = static_cast(luaL_checkudata(L, 1, "PHF*")); 1179 | 1180 | lua_pushinteger(L, static_cast(phf->ctx.m)); 1181 | 1182 | return 1; 1183 | } /* phf_m() */ 1184 | 1185 | static int (phf_hash)(lua_State *L) { 1186 | struct phfctx *phf = static_cast(luaL_checkudata(L, 1, "PHF*")); 1187 | 1188 | return phf->hash(&phf->ctx, L, 2); 1189 | } /* phf_hash() */ 1190 | 1191 | static int phf__gc(lua_State *L) { 1192 | struct phfctx *phf = (struct phfctx *)luaL_checkudata(L, 1, "PHF*"); 1193 | 1194 | phf_destroy(&phf->ctx); 1195 | 1196 | return 0; 1197 | } /* phf__gc() */ 1198 | 1199 | static const luaL_Reg phf_methods[] = { 1200 | { "hash", &(phf_hash) }, 1201 | { "r", &phf_r }, 1202 | { "m", &phf_m }, 1203 | { NULL, NULL }, 1204 | }; /* phf_methods[] */ 1205 | 1206 | static const luaL_Reg phf_metatable[] = { 1207 | { "__call", &phf_hash }, 1208 | { "__gc", &phf__gc }, 1209 | { NULL, NULL }, 1210 | }; /* phf_metatable[] */ 1211 | 1212 | static const luaL_Reg phf_globals[] = { 1213 | { "new", &phf_new }, 1214 | { NULL, NULL }, 1215 | }; /* phf_globals[] */ 1216 | 1217 | static void phf_register(lua_State *L, const luaL_Reg *l) { 1218 | #if LUA_VERSION_NUM >= 502 1219 | luaL_setfuncs(L, l, 0); 1220 | #else 1221 | luaL_register(L, NULL, l); 1222 | #endif 1223 | } /* phf_register() */ 1224 | 1225 | extern "C" int luaopen_phf(lua_State *L) { 1226 | if (luaL_newmetatable(L, "PHF*")) { 1227 | phf_register(L, phf_metatable); 1228 | lua_newtable(L); 1229 | phf_register(L, phf_methods); 1230 | lua_setfield(L, -2, "__index"); 1231 | } 1232 | 1233 | lua_pop(L, 1); 1234 | 1235 | lua_newtable(L); 1236 | phf_register(L, phf_globals); 1237 | 1238 | return 1; 1239 | } /* luaopen_phf() */ 1240 | 1241 | #endif /* PHF_LUALIB */ 1242 | 1243 | 1244 | #if PHF_MAIN 1245 | 1246 | #include /* arc4random(3) free(3) realloc(3) */ 1247 | #include /* fclose(3) fopen(3) fprintf(3) fread(3) freopen(3) printf(3) */ 1248 | #include /* CLOCKS_PER_SEC clock(3) */ 1249 | #include /* strcmp(3) */ 1250 | #include /* BSD */ 1251 | #include /* getopt(3) */ 1252 | #include /* ffsl(3) */ 1253 | #include /* err(3) errx(3) warnx(3) */ 1254 | 1255 | #ifndef HAVE_VALGRIND_MEMCHECK_H 1256 | #define HAVE_VALGRIND_MEMCHECK_H __has_include() 1257 | #endif 1258 | 1259 | #if HAVE_VALGRIND_MEMCHECK_H 1260 | #include 1261 | #endif 1262 | 1263 | static uint32_t randomseed(void) { 1264 | #if __APPLE__ 1265 | /* 1266 | * As of macOS 10.13.6 ccaes_vng_ctr_crypt and drbg_update in 1267 | * libcorecrypto.dylib trigger a "Conditional jump or move on 1268 | * uninitialized value(s)". As of Valgrind 3.15.0.GIT (20190214) 1269 | * even when suppressed it still taints code indirectly conditioned 1270 | * on the seed value. 1271 | */ 1272 | uint32_t seed = arc4random(); 1273 | #ifdef VALGRIND_MAKE_MEM_DEFINED 1274 | VALGRIND_MAKE_MEM_DEFINED(&seed, sizeof seed); 1275 | #else 1276 | #warning unable to suppress CoreCrypto CSPRNG uninitialized value tainting 1277 | #endif 1278 | return seed; 1279 | #elif defined BSD /* catchall for modern BSDs, which all have arc4random */ 1280 | return arc4random(); 1281 | #else 1282 | FILE *fp; 1283 | uint32_t seed; 1284 | 1285 | if (!(fp = fopen("/dev/urandom", "r"))) 1286 | err(1, "/dev/urandom"); 1287 | 1288 | if (1 != fread(&seed, sizeof seed, 1, fp)) 1289 | err(1, "/dev/urandom"); 1290 | 1291 | fclose(fp); 1292 | 1293 | return seed; 1294 | #endif 1295 | } /* randomseed() */ 1296 | 1297 | 1298 | template 1299 | static void pushkey(T **k, size_t *n, size_t *z, T kn) { 1300 | if (!(*n < *z)) { 1301 | size_t z1 = PHF_MAX(*z, 1) * 2; 1302 | T *p; 1303 | 1304 | #if !PHF_NO_LIBCXX 1305 | if (!std::is_trivially_copyable::value) { 1306 | int error; 1307 | 1308 | if ((error = phf_calloc(&p, z1))) 1309 | errx(1, "calloc: %s", strerror(error)); 1310 | for (size_t i = 0; i < *n; i++) 1311 | p[i] = std::move((*k)[i]); 1312 | phf_freearray(*k, *z); 1313 | 1314 | goto commit; 1315 | } 1316 | #else 1317 | static_assert(std::is_standard_layout::value && std::is_trivial::value, "compiled without C++ runtime support"); 1318 | #endif 1319 | if (z1 < *z || (SIZE_MAX / sizeof **k) < z1) 1320 | errx(1, "addkey: %s", strerror(ERANGE)); 1321 | 1322 | if (!(p = (T *)realloc(*k, z1 * sizeof **k))) 1323 | err(1, "realloc"); 1324 | 1325 | commit: 1326 | *k = p; 1327 | *z = z1; 1328 | } 1329 | 1330 | (*k)[(*n)++] = std::move(kn); 1331 | } /* pushkey() */ 1332 | 1333 | 1334 | template 1335 | static void addkey(T **k, size_t *n, size_t *z, const char *src) { 1336 | pushkey(k, n, z, static_cast(strtoull(src, NULL, 0))); 1337 | } /* addkey() */ 1338 | 1339 | static void addkey(phf_string_t **k, size_t *n, size_t *z, char *src, size_t len) { 1340 | phf_string_t kn = { (void *)src, len }; 1341 | pushkey(k, n, z, kn); 1342 | } /* addkey() */ 1343 | 1344 | static void addkey(phf_string_t **k, size_t *n, size_t *z, char *src) { 1345 | addkey(k, n, z, src, strlen(src)); 1346 | } /* addkey() */ 1347 | 1348 | #if !PHF_NO_LIBCXX 1349 | static void addkey(std::string **k, size_t *n, size_t *z, char *src, size_t len) { 1350 | pushkey(k, n, z, std::string(src, len)); 1351 | } /* addkey() */ 1352 | 1353 | static void addkey(std::string **k, size_t *n, size_t *z, char *src) { 1354 | addkey(k, n, z, src, strlen(src)); 1355 | } /* addkey() */ 1356 | #endif 1357 | 1358 | template 1359 | static void addkeys(T **k, size_t *n, size_t *z, char **src, int count) { 1360 | for (int i = 0; i < count; i++) 1361 | addkey(k, n, z, src[i]); 1362 | } /* addkey() */ 1363 | 1364 | template 1365 | static void addkeys(T **k, size_t *n, size_t *z, FILE *fp, char **data) { 1366 | char *ln = NULL; 1367 | size_t lz = 0; 1368 | ssize_t len; 1369 | 1370 | (void)data; 1371 | 1372 | while ((len = getline(&ln, &lz, fp)) > 0) { 1373 | if (--len > 0) { 1374 | if (ln[len] == '\n') 1375 | ln[len] = '\0'; 1376 | addkey(k, n, z, ln); 1377 | } 1378 | } 1379 | 1380 | free(ln); 1381 | } /* addkeys() */ 1382 | 1383 | /* slurp file into a single string and take pointers */ 1384 | static void addkeys(phf_string_t **k, size_t *n, size_t *z, FILE *fp, char **data) { 1385 | size_t p = 0, pe = 0, tp; 1386 | char buf[BUFSIZ], *tmp; 1387 | size_t buflen; 1388 | 1389 | while ((buflen = fread(buf, 1, sizeof buf, fp))) { 1390 | if (buflen > (pe - p)) { 1391 | if (~buflen < pe || 0 == (pe = phf_powerup(buflen + pe))) 1392 | errx(1, "realloc: %s", strerror(ERANGE)); 1393 | if (!(tmp = (char *)realloc(*data, pe))) 1394 | err(1, "realloc"); 1395 | *data = tmp; 1396 | } 1397 | 1398 | memcpy(*data + p, buf, buflen); 1399 | p += buflen; 1400 | } 1401 | 1402 | for (pe = p, p = 0; p < pe; ) { 1403 | while (p < pe && (*data)[p] == '\n') 1404 | p++; 1405 | 1406 | tp = p; 1407 | 1408 | while (p < pe && (*data)[p] != '\n') 1409 | p++; 1410 | 1411 | if (p > tp) 1412 | addkey(k, n, z, &(*data)[tp], (size_t)(p - tp)); 1413 | } 1414 | } /* addkeys() */ 1415 | 1416 | 1417 | static inline void printkey(phf_string_t &k, phf_hash_t hash) { 1418 | printf("%-32.*s : %" PHF_PRIuHASH "\n", (int)k.n, (char *)k.p, hash); 1419 | } /* printkey() */ 1420 | 1421 | #if !PHF_NO_LIBCXX 1422 | static inline void printkey(std::string &k, phf_hash_t hash) { 1423 | printf("%-32s : %" PHF_PRIuHASH "\n", k.c_str(), hash); 1424 | } /* printkey() */ 1425 | #endif 1426 | 1427 | template 1428 | static inline void printkey(T k, phf_hash_t hash) { 1429 | printf("%llu : %" PHF_PRIuHASH "\n", (unsigned long long)k, hash); 1430 | } /* printkey() */ 1431 | 1432 | template 1433 | static inline void exec(int argc, char **argv, size_t lambda, size_t alpha, size_t seed, bool verbose, bool noprint) { 1434 | T *k = NULL; 1435 | size_t n = 0, z = 0; 1436 | char *data = NULL; 1437 | struct phf phf; 1438 | clock_t begin, end; 1439 | 1440 | addkeys(&k, &n, &z, argv, argc); 1441 | addkeys(&k, &n, &z, stdin, &data); 1442 | 1443 | size_t m = PHF::uniq(k, n); 1444 | if (verbose) 1445 | warnx("loaded %zu keys (%zu duplicates)", m, (n - m)); 1446 | n = m; 1447 | 1448 | begin = clock(); 1449 | PHF::init(&phf, k, n, lambda, alpha, seed); 1450 | end = clock(); 1451 | 1452 | 1453 | if (verbose) { 1454 | warnx("found perfect hash for %zu keys in %fs", n, (double)(end - begin) / CLOCKS_PER_SEC); 1455 | 1456 | begin = clock(); 1457 | PHF::compact(&phf); 1458 | end = clock(); 1459 | warnx("compacted displacement map in %fs", (double)(end - begin) / CLOCKS_PER_SEC); 1460 | 1461 | int d_bits = ffsl((long)phf_powerup(phf.d_max)); 1462 | double k_bits = ((double)phf.r * d_bits) / n; 1463 | double g_load = (double)n / phf.r; 1464 | warnx("r:%zu m:%zu d_max:%zu d_bits:%d k_bits:%.2f g_load:%.2f", phf.r, phf.m, phf.d_max, d_bits, k_bits, g_load); 1465 | 1466 | size_t x = 0; 1467 | begin = clock(); 1468 | for (size_t i = 0; i < n; i++) { 1469 | x += PHF::hash(&phf, k[i]); 1470 | } 1471 | end = clock(); 1472 | warnx("hashed %zu keys in %fs (x:%zu)", n, (double)(end - begin) / CLOCKS_PER_SEC, x); 1473 | } 1474 | 1475 | if (!noprint) { 1476 | for (size_t i = 0; i < n; i++) { 1477 | printkey(k[i], PHF::hash(&phf, k[i])); 1478 | } 1479 | } 1480 | 1481 | phf_destroy(&phf); 1482 | free(data); 1483 | phf_freearray(k, n); 1484 | } /* exec() */ 1485 | 1486 | static void printprimes(int argc, char **argv) { 1487 | intmax_t n = 0, m = UINT32_MAX; 1488 | char *end; 1489 | 1490 | if (argc > 0) { 1491 | n = strtoimax(argv[0], &end, 0); 1492 | if (end == argv[0] || *end != '\0' || n < 0 || n > UINT32_MAX) 1493 | errx(1, "%s: invalid number", argv[0]); 1494 | n = PHF_MAX(n, 2); 1495 | } 1496 | 1497 | if (argc > 1) { 1498 | m = strtoimax(argv[1], &end, 0); 1499 | if (end == argv[1] || *end != '\0' || m < n || m > UINT32_MAX) 1500 | errx(1, "%s: invalid number", argv[1]); 1501 | } 1502 | 1503 | for (; n <= m; n++) { 1504 | if (phf_isprime(n)) 1505 | printf("%" PRIdMAX "\n", n); 1506 | } 1507 | } /* printprimes() */ 1508 | 1509 | int main(int argc, char **argv) { 1510 | const char *path = "/dev/null"; 1511 | size_t lambda = 4; 1512 | size_t alpha = 80; 1513 | uint32_t seed = randomseed(); 1514 | bool verbose = 0; 1515 | bool noprint = 0; 1516 | bool nodiv = 0; 1517 | enum { 1518 | PHF_UINT32, 1519 | PHF_UINT64, 1520 | PHF_STRING, 1521 | #if !PHF_NO_LIBCXX 1522 | PHF_STD_STRING 1523 | #endif 1524 | } type = PHF_UINT32; 1525 | bool primes = 0; 1526 | extern char *optarg; 1527 | extern int optind; 1528 | int optc; 1529 | 1530 | while (-1 != (optc = getopt(argc, argv, "f:l:a:s:2t:nvph"))) { 1531 | switch (optc) { 1532 | case 'f': 1533 | path = optarg; 1534 | break; 1535 | case 'l': 1536 | lambda = strtoul(optarg, NULL, 0); 1537 | break; 1538 | case 'a': 1539 | alpha = strtoul(optarg, NULL, 0); 1540 | break; 1541 | case 's': 1542 | seed = strtoul(optarg, NULL, 0); 1543 | break; 1544 | case '2': 1545 | nodiv = 1; 1546 | break; 1547 | case 't': 1548 | if (!strcmp(optarg, "uint32")) { 1549 | type = PHF_UINT32; 1550 | } else if (!strcmp(optarg, "uint64")) { 1551 | type = PHF_UINT64; 1552 | } else if (!strcmp(optarg, "string")) { 1553 | type = PHF_STRING; 1554 | #if !PHF_NO_LIBCXX 1555 | } else if (!strcmp(optarg, "std::string")) { 1556 | type = PHF_STD_STRING; 1557 | #endif 1558 | } else { 1559 | errx(1, "%s: invalid key type", optarg); 1560 | } 1561 | 1562 | break; 1563 | case 'n': 1564 | noprint = 1; 1565 | break; 1566 | case 'v': 1567 | verbose = 1; 1568 | break; 1569 | case 'p': 1570 | primes = 1; 1571 | break; 1572 | case 'h': 1573 | /* FALL THROUGH */ 1574 | default: 1575 | fprintf(optc == 'h'? stdout : stderr, 1576 | "%s [-f:l:a:s:t:2nvph] [key [...]]\n" 1577 | " -f PATH read keys from PATH (- for stdin)\n" 1578 | " -l NUM number of keys per displacement map bucket (reported as g_load)\n" 1579 | " -a PCT hash table load factor (1%% - 100%%)\n" 1580 | " -s SEED random seed\n" 1581 | " -t TYPE parse and hash keys as uint32, uint64, " PHF_IFELSE_LIBCXX("string, or std::string\n", "or string\n") 1582 | " -2 avoid modular division by rounding r and m to power of 2\n" 1583 | " -n do not print key-hash pairs\n" 1584 | " -v report hashing status\n" 1585 | " -p operate like primes(3) utility\n" 1586 | " -h print usage message\n" 1587 | "\n" 1588 | "Report bugs to \n", 1589 | argv[0] 1590 | ); 1591 | 1592 | return optc == 'h'? 0 : 1; 1593 | } 1594 | } 1595 | 1596 | argc -= optind; 1597 | argv += optind; 1598 | 1599 | if (primes) 1600 | return printprimes(argc, argv), 0; 1601 | 1602 | if (strcmp(path, "-") && !freopen(path, "r", stdin)) 1603 | err(1, "%s", path); 1604 | 1605 | switch (type) { 1606 | case PHF_UINT32: 1607 | if (nodiv) 1608 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1609 | else 1610 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1611 | break; 1612 | case PHF_UINT64: 1613 | if (nodiv) 1614 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1615 | else 1616 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1617 | break; 1618 | case PHF_STRING: 1619 | if (nodiv) 1620 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1621 | else 1622 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1623 | break; 1624 | #if !PHF_NO_LIBCXX 1625 | case PHF_STD_STRING: 1626 | if (nodiv) 1627 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1628 | else 1629 | exec(argc, argv, lambda, alpha, seed, verbose, noprint); 1630 | break; 1631 | #endif 1632 | } 1633 | 1634 | return 0; 1635 | } /* main() */ 1636 | 1637 | #endif /* PHF_MAIN */ 1638 | -------------------------------------------------------------------------------- /phf.h: -------------------------------------------------------------------------------- 1 | /* ========================================================================== 2 | * phf.h - Tiny perfect hash function library. 3 | * -------------------------------------------------------------------------- 4 | * Copyright (c) 2014-2015, 2019 William Ahern 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the 8 | * "Software"), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sublicense, and/or sell copies of the Software, and to permit 11 | * persons to whom the Software is furnished to do so, subject to the 12 | * following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included 15 | * in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 20 | * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 21 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | * ========================================================================== 25 | */ 26 | #ifndef PHF_H 27 | #define PHF_H 28 | 29 | #include /* size_t */ 30 | #include /* UINT32_MAX uint32_t uint64_t */ 31 | #include /* bool */ 32 | #include /* PRIu32 PRIx32 */ 33 | 34 | 35 | /* 36 | * C O M P I L E R F E A T U R E S & D I A G N O S T I C S 37 | * 38 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 39 | 40 | #define PHF_GNUC_PREREQ(M, m) (__GNUC__ > (M) || (__GNUC__ == (M) && __GNUC_MINOR__ >= (m))) 41 | 42 | #ifdef __has_extension 43 | #define phf_has_extension(x) __has_extension(x) 44 | #else 45 | #define phf_has_extension(x) 0 46 | #endif 47 | 48 | #ifdef __has_attribute 49 | #define phf_has_attribute(x) __has_attribute(x) 50 | #else 51 | #define phf_has_attribute(x) 0 52 | #endif 53 | 54 | #ifndef PHF_HAVE_NOEXCEPT 55 | #define PHF_HAVE_NOEXCEPT \ 56 | (__cplusplus >= 201103L || \ 57 | phf_has_extension(cxx_noexcept) || \ 58 | PHF_GNUC_PREREQ(4, 6)) 59 | #endif 60 | 61 | #ifndef PHF_HAVE_GENERIC 62 | #define PHF_HAVE_GENERIC \ 63 | (__STDC_VERSION__ >= 201112L || \ 64 | phf_has_extension(c_generic_selections) || \ 65 | PHF_GNUC_PREREQ(4, 9)) 66 | #endif 67 | 68 | #ifndef PHF_HAVE_BUILTIN_TYPES_COMPATIBLE_P 69 | #define PHF_HAVE_BUILTIN_TYPES_COMPATIBLE_P (__GNUC__ > 0) 70 | #endif 71 | 72 | #ifndef PHF_HAVE_BUILTIN_CHOOSE_EXPR 73 | #define PHF_HAVE_BUILTIN_CHOOSE_EXPR (__GNUC__ > 0) 74 | #endif 75 | 76 | #ifndef PHF_HAVE_ATTRIBUTE_VISIBILITY 77 | #define PHF_HAVE_ATTRIBUTE_VISIBILITY \ 78 | (phf_has_attribute(visibility) || PHF_GNUC_PREREQ(4, 0)) 79 | #endif 80 | 81 | #ifndef PHF_HAVE_COMPUTED_GOTOS 82 | #define PHF_HAVE_COMPUTED_GOTOS (__GNUC__ > 0) 83 | #endif 84 | 85 | #ifdef __clang__ 86 | #pragma clang diagnostic push 87 | #if __cplusplus < 201103L 88 | #pragma clang diagnostic ignored "-Wc++11-extensions" 89 | #pragma clang diagnostic ignored "-Wvariadic-macros" 90 | #endif 91 | #elif PHF_GNUC_PREREQ(4, 6) 92 | #pragma GCC diagnostic push 93 | #if __cplusplus < 201103L 94 | #pragma GCC diagnostic ignored "-Wpedantic" 95 | #pragma GCC diagnostic ignored "-Wvariadic-macros" 96 | #endif 97 | #endif 98 | 99 | 100 | /* 101 | * C / C + + V I S I B I L I T Y 102 | * 103 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 104 | 105 | #ifndef PHF_PUBLIC 106 | #define PHF_PUBLIC 107 | #endif 108 | 109 | #ifndef PHF_LOCAL 110 | #if PHF_HAVE_ATTRIBUTE_VISIBILITY 111 | #define PHF_LOCAL __attribute__((visibility("hidden"))) 112 | #else 113 | #define PHF_LOCAL 114 | #endif 115 | #endif 116 | 117 | 118 | /* 119 | * C / C + + S H A R E D T Y P E S 120 | * 121 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 122 | 123 | #define phf_error_t int /* for documentation purposes */ 124 | 125 | #define PHF_HASH_MAX UINT32_MAX 126 | #define PHF_PRIuHASH PRIu32 127 | #define PHF_PRIxHASH PRIx32 128 | 129 | typedef uint32_t phf_hash_t; 130 | typedef uint32_t phf_seed_t; 131 | 132 | typedef struct phf_string { 133 | void *p; 134 | size_t n; 135 | } phf_string_t; 136 | 137 | struct phf { 138 | bool nodiv; 139 | 140 | phf_seed_t seed; 141 | 142 | size_t r; /* number of elements in g */ 143 | size_t m; /* number of elements in perfect hash */ 144 | uint32_t *g; /* displacement map indexed by g(k) % r */ 145 | 146 | size_t d_max; /* maximum displacement value in g */ 147 | 148 | enum { 149 | PHF_G_UINT8_MOD_R = 1, 150 | PHF_G_UINT8_BAND_R, 151 | PHF_G_UINT16_MOD_R, 152 | PHF_G_UINT16_BAND_R, 153 | PHF_G_UINT32_MOD_R, 154 | PHF_G_UINT32_BAND_R, 155 | } g_op; 156 | 157 | const void *g_jmp; 158 | }; /* struct phf */ 159 | 160 | 161 | /* 162 | * C + + I N T E R F A C E S 163 | * 164 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 165 | #ifdef __cplusplus 166 | 167 | #if !PHF_NO_LIBCXX 168 | #include /* std::string */ 169 | #endif 170 | 171 | namespace PHF { 172 | template 173 | PHF_PUBLIC size_t uniq(key_t[], const size_t); 174 | 175 | template 176 | PHF_PUBLIC phf_error_t init(struct phf *, const key_t[], const size_t, const size_t, const size_t, const phf_seed_t); 177 | 178 | PHF_PUBLIC void compact(struct phf *); 179 | 180 | template 181 | PHF_PUBLIC phf_hash_t hash(struct phf *, key_t); 182 | 183 | PHF_PUBLIC void destroy(struct phf *); 184 | } 185 | 186 | extern template size_t PHF::uniq(uint32_t[], const size_t); 187 | extern template size_t PHF::uniq(uint64_t[], const size_t); 188 | extern template size_t PHF::uniq(phf_string_t[], const size_t); 189 | #if !PHF_NO_LIBCXX 190 | extern template size_t PHF::uniq(std::string[], const size_t); 191 | #endif 192 | 193 | extern template phf_error_t PHF::init(struct phf *, const uint32_t[], const size_t, const size_t, const size_t, const phf_seed_t); 194 | extern template phf_error_t PHF::init(struct phf *, const uint64_t[], const size_t, const size_t, const size_t, const phf_seed_t); 195 | extern template phf_error_t PHF::init(struct phf *, const phf_string_t[], const size_t, const size_t, const size_t, const phf_seed_t); 196 | #if !PHF_NO_LIBCXX 197 | extern template phf_error_t PHF::init(struct phf *, const std::string[], const size_t, const size_t, const size_t, const phf_seed_t); 198 | #endif 199 | 200 | extern template phf_error_t PHF::init(struct phf *, const uint32_t[], const size_t, const size_t, const size_t, const phf_seed_t); 201 | extern template phf_error_t PHF::init(struct phf *, const uint64_t[], const size_t, const size_t, const size_t, const phf_seed_t); 202 | extern template phf_error_t PHF::init(struct phf *, const phf_string_t[], const size_t, const size_t, const size_t, const phf_seed_t); 203 | #if !PHF_NO_LIBCXX 204 | extern template phf_error_t PHF::init(struct phf *, const std::string[], const size_t, const size_t, const size_t, const phf_seed_t); 205 | #endif 206 | 207 | extern template phf_hash_t PHF::hash(struct phf *, uint32_t); 208 | extern template phf_hash_t PHF::hash(struct phf *, uint64_t); 209 | extern template phf_hash_t PHF::hash(struct phf *, phf_string_t); 210 | #if !PHF_NO_LIBCXX 211 | extern template phf_hash_t PHF::hash(struct phf *, std::string); 212 | #endif 213 | 214 | #endif /* __cplusplus */ 215 | 216 | 217 | /* 218 | * C 8 9 I N T E R F A C E S 219 | * 220 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 221 | #ifdef __cplusplus 222 | extern "C" { 223 | #endif 224 | 225 | PHF_PUBLIC size_t phf_uniq_uint32(uint32_t *, const size_t); 226 | PHF_PUBLIC size_t phf_uniq_uint64(uint64_t *, const size_t); 227 | PHF_PUBLIC size_t phf_uniq_string(phf_string_t *, const size_t); 228 | 229 | PHF_PUBLIC phf_error_t phf_init_uint32(struct phf *, const uint32_t *, const size_t, const size_t, const size_t, const phf_seed_t, const bool nodiv); 230 | PHF_PUBLIC phf_error_t phf_init_uint64(struct phf *, const uint64_t *, const size_t, const size_t, const size_t, const phf_seed_t, const bool nodiv); 231 | PHF_PUBLIC phf_error_t phf_init_string(struct phf *, const phf_string_t *, const size_t, const size_t, const size_t, const phf_seed_t, const bool nodiv); 232 | 233 | PHF_PUBLIC void phf_compact(struct phf *); 234 | 235 | PHF_PUBLIC phf_hash_t phf_hash_uint32(struct phf *, const uint32_t); 236 | PHF_PUBLIC phf_hash_t phf_hash_uint64(struct phf *, const uint64_t); 237 | PHF_PUBLIC phf_hash_t phf_hash_string(struct phf *, const phf_string_t); 238 | 239 | PHF_PUBLIC void phf_destroy(struct phf *); 240 | 241 | #ifdef __cplusplus 242 | } 243 | #endif 244 | 245 | 246 | /* 247 | * C 1 1 / G N U I N T E R F A C E S 248 | * 249 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 250 | #if PHF_HAVE_GENERIC 251 | 252 | #define phf_uniq(k, n) _Generic(*(k), \ 253 | uint32_t: phf_uniq_uint32, \ 254 | uint64_t: phf_uniq_uint64, \ 255 | phf_string_t: phf_uniq_string)((k), (n)) 256 | 257 | #define phf_init(f, k, ...) _Generic(*(k), \ 258 | uint32_t: phf_init_uint32, \ 259 | uint64_t: phf_init_uint64, \ 260 | phf_string_t: phf_init_string)((f), (k), __VA_ARGS__) 261 | 262 | #define phf_hash(f, k) _Generic((k), \ 263 | uint32_t: phf_hash_uint32, \ 264 | uint64_t: phf_hash_uint64, \ 265 | phf_string_t: phf_hash_string)((f), (k)) 266 | 267 | #elif PHF_HAVE_BUILTIN_TYPES_COMPATIBLE_P && PHF_HAVE_BUILTIN_CHOOSE_EXPR 268 | 269 | #define phf_choose(cond, a, b) __builtin_choose_expr(cond, a, b) 270 | #define phf_istype(E, T) __builtin_types_compatible_p(__typeof__(E), T) 271 | 272 | #define phf_uniq(k, n) \ 273 | phf_choose(phf_istype(*(k), uint32_t), phf_uniq_uint32((uint32_t *)(k), (n)), \ 274 | phf_choose(phf_istype(*(k), uint64_t), phf_uniq_uint64((uint64_t *)(k), (n)), \ 275 | phf_choose(phf_istype(*(k), phf_string_t), phf_uniq_string((phf_string_t *)(k), (n)), \ 276 | (void)0))) 277 | 278 | #define phf_init(f, k, ...) \ 279 | phf_choose(phf_istype(*(k), uint32_t), phf_init_uint32((f), (const uint32_t *)(k), __VA_ARGS__), \ 280 | phf_choose(phf_istype(*(k), uint64_t), phf_init_uint64((f), (const uint64_t *)(k), __VA_ARGS__), \ 281 | phf_choose(phf_istype(*(k), phf_string_t), phf_init_string((f), (const phf_string_t *)(k), __VA_ARGS__), \ 282 | (void)0))) 283 | 284 | #define phf_hash(f, k) ((*(phf_hash_t (*)()) \ 285 | phf_choose(phf_istype((k), uint32_t), &phf_hash_uint32, \ 286 | phf_choose(phf_istype((k), uint64_t), &phf_hash_uint64, \ 287 | phf_choose(phf_istype((k), phf_string_t), &phf_hash_string, \ 288 | (void)0))))((f), (k))) 289 | 290 | #endif 291 | 292 | 293 | #ifdef __clang__ 294 | #pragma clang diagnostic pop 295 | #elif PHF_GNUC_PREREQ(4, 6) 296 | #pragma GCC diagnostic pop 297 | #endif 298 | 299 | #endif /* PHF_H */ 300 | --------------------------------------------------------------------------------