├── Makefile ├── xxhsum.sh ├── LICENSE ├── README.md └── xxhash.sh.in /Makefile: -------------------------------------------------------------------------------- 1 | xxhash.sh: xxhash.sh.in 2 | @echo "You monster." 3 | m4 xxhash.sh.in > xxhash.sh 4 | chmod +x xxhash.sh 5 | -------------------------------------------------------------------------------- /xxhsum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | algorithm=XXH64 5 | if [ $# -gt 1 ] && [ "$1" = "-H0" ]; then 6 | algorithm=XXH32 7 | shift 8 | elif [ $# -gt 1 ] && [ "$1" = "-H1" ]; then 9 | algorithm=XXH64 10 | shift 11 | fi 12 | 13 | # get the hashes 14 | . "$(dirname $0)/xxhash.sh" 15 | 16 | if [ $# -eq 0 ]; then 17 | set -- "$(dirname $0)/xxhash.sh" 18 | fi 19 | 20 | for file in $@; do 21 | DATA=$(hexdump -e '/1 "%02X"' -v -- "$file") 22 | echo "$($algorithm "$DATA" 0) $file" 23 | done 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2012-2016, Yann Collet 4 | Copyright (c) 2019, easyaspi314 (Devin) 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xxbash 2 | xxHash in pure Bash (with m4) 3 | 4 | As the title says, this is a pure Bash implementation of XXH32 and XXH64 5 | (m4 is used to expand macros, nothing else). 6 | 7 | **This code is a joke/proof of concept.** The performance 8 | is **terrible**, and you are better with any other 9 | implementation. XXH64 runs about 150 times slower than 10 | the native implementation, and XXH32 runs about 250 11 | times slower than the native implementation. 12 | 13 | If you use this in production code, you are a monster. 14 | 15 | The original xxHash code can be found at https://github.com/Cyan4973/xxhash, 16 | and you are 150-250 times better off using that implementaion. 17 | 18 | Requirements: A Bash with 64-bit arithmetic (preferably 19 | a recent version, although macOS's bash 3.2 works), and m4 for building. 20 | 21 | xxhsum.sh requires hexdump. 22 | 23 | **Usage:** 24 | Don't. 25 | 26 | Ok, this is how you use it: 27 | ``` 28 | ./xxhsum.sh [-H0|-H1] [FILES...] (defaults to xxhash.sh) 29 | -H0: XXH32, -H1: XXH64 (default) 30 | 31 | . xxhash.sh 32 | XXH32 "file in hex, no spaces or prefix" "seed" 33 | XXH64 "file in hex, no spaces or prefix" "seed" 34 | ``` 35 | 36 | # License 37 | 38 | ``` 39 | xxHash Library 40 | Copyright (c) 2012-2016, Yann Collet 41 | Copyright (c) 2019, easyaspi314 42 | All rights reserved. 43 | 44 | Redistribution and use in source and binary forms, with or without modification, 45 | are permitted provided that the following conditions are met: 46 | 47 | * Redistributions of source code must retain the above copyright notice, this 48 | list of conditions and the following disclaimer. 49 | 50 | * Redistributions in binary form must reproduce the above copyright notice, this 51 | list of conditions and the following disclaimer in the documentation and/or 52 | other materials provided with the distribution. 53 | 54 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 55 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 56 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 57 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 58 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 60 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 61 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 62 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 | ``` 65 | -------------------------------------------------------------------------------- /xxhash.sh.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ######################################################################### 3 | # xxHash - Fast Hash algorithm 4 | # Copyright (C) 2012-2016, Yann Collet 5 | # Copyright (C) 2019, easyaspi314 6 | # 7 | # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 8 | # 9 | # Redistribution and use in source and binary forms, with or without 10 | # modification, are permitted provided that the following conditions are 11 | # met: 12 | # 13 | # * Redistributions of source code must retain the above copyright 14 | # notice, this list of conditions and the following disclaimer. 15 | # * Redistributions in binary form must reproduce the above 16 | # copyright notice, this list of conditions and the following disclaimer 17 | # in the documentation and/or other materials provided with the 18 | # distribution. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | # You can contact the author at : 33 | # - xxHash homepage: http://www.xxhash.com 34 | # - xxHash source repository : https://github.com/Cyan4973/xxHash 35 | ######################################################################### 36 | 37 | ######################################################################### 38 | # This is XXH32 and XXH64 in pure Bash. 39 | # 40 | # This code is a joke. Don't actually use this. It is incredibly slow and 41 | # you are much better using literally any other implementation. 42 | # 43 | # If you actually use this for production code, you are a monster and do 44 | # not deserve access to a computer ever again. 45 | # 46 | # Usage: 47 | # XXH32 "data in hex with no spaces or 0x" "seed" 48 | # XXH64 "data in hex with no spaces or 0x" "seed" 49 | # 50 | # The hash will be printf'd in hex, without a 0x prefix. 51 | # 52 | # Performance: 53 | # 54 | # Default implementation: 55 | # $ time xxh64sum xxhash.sh >/dev/null 56 | # real 0m0.012s 57 | # user 0m0.003s 58 | # sys 0m0.004s 59 | # $ time xxh32sum xxhash.sh >/dev/null 60 | # real 0m0.007s 61 | # user 0m0.002s 62 | # sys 0m0.003s 63 | # 64 | # This implementation: 65 | # $ . xxhash.sh 66 | # $ XXHASH_SH=$(hexdump -e '/1 "%02X"' -v "xxhash.sh") 67 | # $ time XXH64 "$XXHASH_SH" "0" >/dev/null 68 | # real 0m16.672s 69 | # user 0m16.632s 70 | # sys 0m0.036s 71 | # $ time XXH32 "$XXHASH_SH" "0" >/dev/null 72 | # real 0m32.185s 73 | # user 0m32.099s 74 | # sys 0m0.080s 75 | # 76 | # As you can see, performance is terrible for a few reasons: 77 | # - Bash uses signed 64-bit integers. Shifting on them performs an 78 | # arithmetic shift right, so we need to emulate a logical shift 79 | # right. 80 | # - XXH32 is slow because all 32-bit arithmetic needs to be emulated with 81 | # masks. Hopefully some of these masks can be removed someday. 82 | # - It is arithmetic in a Bash script. What do you expect? 83 | # - Maybe we should use functions, idk. 84 | ######################################################################### 85 | 86 | # This is an m4 script. This inlines everything. 87 | # Note that whitespace is left out in some loops as it slows down bash. 88 | 89 | # Good practice. 90 | set -euo pipefail 91 | 92 | # Bash uses signed arithmetic, so shifting right is wonk. 93 | define(ushr_impl,(($1>>($2))&$3)) 94 | define(ushr,ushr_impl($1,$2,(~(((1<<63)>>($2))<<1)))) 95 | 96 | # Bash reads files in big endian. 97 | define(swap,$(( \ 98 | (((tmp=$1)<<56)&(255<<56))|\ 99 | ((tmp<<40)&(255<<48))|\ 100 | ((tmp<<24)&(255<<40))|\ 101 | ((tmp<<8)&(255<<32))|\ 102 | ((ushr(tmp,8))&(255<<24))|\ 103 | ((ushr(tmp,24))&(255<<16))|\ 104 | ((ushr(tmp,40))&(255<<8))|\ 105 | ((ushr(tmp,56))&255)\ 106 | )) \ 107 | ) 108 | define(swap32,$(( \ 109 | ( ( (tmp=$1) << 24) & 0xff000000 ) | \ 110 | ( (tmp << 8) & 0x00ff0000 ) | \ 111 | ( (ushr(tmp, 8)) & 0x0000ff00 ) | \ 112 | ( (ushr(tmp, 24)) & 0x000000ff ) \ 113 | )) \ 114 | ) 115 | 116 | 117 | define(XXH64_mergeRound, 118 | zero=0 119 | XXH64_round(zero, $2) 120 | $1=$(( (($1 ^ zero) * PRIME64_1) + PRIME64_4 )) 121 | ) 122 | define(XXH_rotl64,(($1<<$2)|(ushr($1,(64-$2))))) 123 | define(XXH_rotl32,(((($1)<<($2))|(($1>>(32-$2)))))&0xFFFFFFFF) 124 | 125 | dnl calculate the right shift magic beforehand 126 | define(XXH64_round,(($1+=(($2)*PRIME64_2)));$1=$(((($1<<31)|ushr_impl($1,33,0x7fffffff) )*PRIME64_1))) 127 | define(XXH64_avalanche, 128 | (( h64 ^= ushr(h64, 33) )) 129 | (( h64 *= PRIME64_2 )) 130 | (( h64 ^= ushr(h64, 29) )) 131 | (( h64 *= PRIME64_3 )) 132 | (( h64 ^= ushr(h64, 32) )) 133 | ) 134 | 135 | # uint64_t XXH64(text_in_hex, seed = 0) 136 | XXH64() 137 | { 138 | dnl Performance is multiplied by 8-10x if we switch to C locale. 139 | local OLD_LC_ALL="${LC_ALL:-}" 140 | export LC_ALL="C" 141 | 142 | local -i PRIME64_1=0x9E3779B185EBCA87 143 | local -i PRIME64_2=0xC2B2AE3D27D4EB4F 144 | local -i PRIME64_3=0x165667B19E3779F9 145 | local -i PRIME64_4=0x85EBCA77C2B2AE63 146 | local -i PRIME64_5=0x27D4EB2F165667C5 147 | 148 | local -i seed=0 149 | if [ $# -ge 2 ]; then 150 | seed=$2 151 | fi 152 | local inp=" " 153 | local -i tmp=0 154 | local -i zero=0 155 | local -i h64=0 156 | local input="$1" 157 | local -i len=$(( ${#input} / 2 )) 158 | local -i remaining=$len 159 | local -i offset=0 160 | local -i bEnd=$(( ${#input} - 64 )) 161 | if [ $remaining -ge 32 ]; then 162 | local -i v1=$(( seed + PRIME64_1 + PRIME64_2 )) 163 | local -i v2=$(( seed + PRIME64_2 )) 164 | local -i v3=$(( seed )) 165 | local -i v4=$(( seed - PRIME64_1 )) 166 | 167 | dnl tight loop, no whitespace 168 | while [ $offset -le $bEnd ]; do 169 | inp="${input:$offset:64}" 170 | XXH64_round(v1, swap(0x${inp:0:16})) 171 | XXH64_round(v2,swap(0x${inp:16:16})) 172 | XXH64_round(v3,swap(0x${inp:32:16})) 173 | XXH64_round(v4,swap(0x${inp:48:16})) 174 | ((offset+=64)) 175 | done 176 | 177 | h64=$(( (XXH_rotl64(v1, 1)) + (XXH_rotl64(v2, 7)) + (XXH_rotl64(v3, 12)) + (XXH_rotl64(v4, 18)) )) 178 | 179 | XXH64_mergeRound(h64, v1) 180 | XXH64_mergeRound(h64, v2) 181 | XXH64_mergeRound(h64, v3) 182 | XXH64_mergeRound(h64, v4) 183 | else 184 | h64=$((seed + PRIME64_5)); 185 | fi 186 | remaining=$(( ${#input} - offset )) 187 | ((h64 += len)) 188 | while [ $remaining -ge 16 ]; do 189 | local -i k1=0 190 | XXH64_round(k1, swap(0x${input:$offset:16})) 191 | (( offset += 16 )) 192 | h64=$(( h64 ^ k1 )); 193 | h64=$(( (XXH_rotl64(h64, 27) * PRIME64_1) + PRIME64_4 )); 194 | ((remaining -= 16 )); 195 | done 196 | if [ $remaining -ge 8 ]; then 197 | (( h64 ^= (swap32(0x00000000${input:$offset:8}) * PRIME64_1) )) 198 | (( offset += 8 )) 199 | h64=$(( (XXH_rotl64(h64, 23) * PRIME64_2) + PRIME64_3 )) 200 | ((remaining-=8)) 201 | fi 202 | while [ $remaining -gt 0 ]; do 203 | (( h64 ^= (0x${input:$offset:2} * PRIME64_5) )) 204 | (( offset += 2 )) 205 | h64=$(( XXH_rotl64(h64, 11) * PRIME64_1 )) 206 | ((remaining-=2)) 207 | done 208 | XXH64_avalanche 209 | 210 | # Reset LC_ALL. 211 | if [ -n "$OLD_LC_ALL"]; then 212 | export LC_ALL="$OLD_LC_ALL" 213 | else 214 | unset LC_ALL 215 | fi 216 | printf "%016x" $h64 217 | } 218 | 219 | # I would kill for 32-bit integers right now. 220 | define(XXH32_round,$1=$(((($1+(($2*PRIME32_2) & 0xFFFFFFFF)) & 0xFFFFFFFF) )); 221 | $1=$(( ((XXH_rotl32($1,13)&0xFFFFFFFF)*PRIME32_1) & 0xFFFFFFFF )) 222 | ) 223 | define(XXH32_avalanche, 224 | h32=$(( (h32 ^ (ushr(h32, 15) & 0xFFFFFFFF) )& 0xFFFFFFFF )) 225 | h32=$(( (h32 * PRIME32_2) & 0xFFFFFFFF )) 226 | h32=$(( (h32 ^ (ushr(h32, 13) & 0xFFFFFFFF) )& 0xFFFFFFFF )) 227 | h32=$(( (h32 * PRIME32_3) & 0xFFFFFFFF )) 228 | h32=$(( (h32 ^ (ushr(h32, 16) & 0xFFFFFFFF)) & 0xFFFFFFFF )) 229 | ) 230 | 231 | # uint32_t XXH32(data, seed = 0) 232 | XXH32() 233 | { 234 | # Performance is multiplied by eightfold if we switch to C locale. 235 | local OLD_LC_ALL="${LC_ALL:-}" 236 | export LC_ALL="C" 237 | 238 | local -i PRIME32_1=2654435761 239 | local -i PRIME32_2=2246822519 240 | local -i PRIME32_3=3266489917 241 | local -i PRIME32_4=668265263 242 | local -i PRIME32_5=374761393 243 | 244 | local -i seed=0 245 | if [ $# -ge 2 ]; then 246 | seed=$2 247 | fi 248 | local -i tmp=0 249 | local -i zero=0 250 | local -i h32=0 251 | local input="$1" 252 | local -i len=$(( ${#input} / 2 )) 253 | local -i remaining=$len 254 | local -i offset=0 255 | local -i bEnd=$(( ${#input} - 32 )) 256 | if [ $remaining -ge 16 ]; then 257 | local -i v1=$(( ( ( (seed + PRIME32_1) & 0xFFFFFFFF) + PRIME32_2) & 0xFFFFFFFF )) 258 | local -i v2=$(( (seed + PRIME32_2) & 0xFFFFFFFF )) 259 | local -i v3=$(( seed )) 260 | local -i v4=$(( (seed - PRIME32_1) & 0xFFFFFFFF )) 261 | 262 | while [ $offset -le $bEnd ]; do 263 | local inp="${input:$offset:32}" 264 | XXH32_round(v1, swap32(0x${inp:0:8})) 265 | XXH32_round(v2, swap32(0x${inp:8:8})) 266 | XXH32_round(v3, swap32(0x${inp:16:8})) 267 | XXH32_round(v4, swap32(0x${inp:24:8})) 268 | ((offset+=32)) 269 | done 270 | 271 | h32=$(( XXH_rotl32(v1, 1) & 0xFFFFFFFF )) 272 | h32=$(( (h32 + (XXH_rotl32(v2, 7) )) & 0xFFFFFFFF )) 273 | h32=$(( (h32 + (XXH_rotl32(v3, 12) )) & 0xFFFFFFFF )) 274 | h32=$(( (h32 + (XXH_rotl32(v4, 18) )) & 0xFFFFFFFF )) 275 | else 276 | h32=$(( (seed + PRIME32_5) & 0xFFFFFFFF )); 277 | fi 278 | remaining=$(( ${#input} - offset )) 279 | h32=$(( (h32 + (len & 0xFFFFFFFF)) & 0xFFFFFFFF )) 280 | while [ $remaining -ge 8 ]; do 281 | h32=$(( (h32 + ( ( swap32(0x${input:$offset:8}) * PRIME32_3) & 0xFFFFFFFF)) & 0xFFFFFFFF )) 282 | (( offset += 8 )) 283 | h32=$(( ((XXH_rotl32(h32, 17) & 0xFFFFFFFF) * PRIME32_4) & 0xFFFFFFFF )) 284 | ((remaining-=8)) 285 | done 286 | 287 | while [ $remaining -gt 0 ]; do 288 | h32=$(( (h32 + ( (0x${input:$offset:2} * PRIME32_5) & 0xFFFFFFFF) ) & 0xFFFFFFFF)) 289 | (( offset += 2 )) 290 | h32=$(( ((XXH_rotl32(h32, 11) & 0xFFFFFFFF) * PRIME32_1) & 0xFFFFFFFF )) 291 | ((remaining-=2)) 292 | done 293 | XXH32_avalanche 294 | 295 | # Reset LC_ALL. 296 | if [ -n "$OLD_LC_ALL"]; then 297 | export LC_ALL="$OLD_LC_ALL" 298 | else 299 | unset LC_ALL 300 | fi 301 | 302 | printf "%08x" $h32 303 | } 304 | --------------------------------------------------------------------------------