├── samples ├── password.word ├── words.english └── words.nato ├── cpp ├── make.sh ├── md5.h ├── order32.h ├── gcs.h ├── main.cpp ├── gcs.cpp └── md5.c ├── test.sh ├── README ├── js ├── gcs.js ├── md5.js └── index.html └── python └── gcs.py /samples/password.word: -------------------------------------------------------------------------------- 1 | password 2 | -------------------------------------------------------------------------------- /cpp/make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | g++ -O3 -o gcs main.cpp gcs.cpp md5.c 3 | -------------------------------------------------------------------------------- /samples/words.english: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasky/gcs/HEAD/samples/words.english -------------------------------------------------------------------------------- /samples/words.nato: -------------------------------------------------------------------------------- 1 | alpha 2 | bravo 3 | charlie 4 | delta 5 | echo 6 | foxtrot 7 | golf 8 | hotel 9 | india 10 | juliet 11 | kilo 12 | lima 13 | mike 14 | november 15 | oscar 16 | papa 17 | quebec 18 | romeo 19 | sierra 20 | tango 21 | uniform 22 | victor 23 | whiskey 24 | xray 25 | yankee 26 | zulu 27 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo Building GCS... 3 | python python/gcs.py build samples/words.english 4 | echo Querying GCS... 5 | python python/gcs.py query affect afect school skool circumvolution circumvollution 6 | echo Single Word Test 7 | python python/gcs.py build samples/password.word 8 | echo Querying GCS... 9 | python python/gcs.py query password 10 | -------------------------------------------------------------------------------- /cpp/md5.h: -------------------------------------------------------------------------------- 1 | #ifndef MD5_H 2 | #define MD5_H 3 | 4 | #include "order32.h" 5 | #include 6 | 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | typedef uint32_t uint32; 13 | 14 | struct MD5Context { 15 | uint32 buf[4]; 16 | uint32 bits[2]; 17 | unsigned char in[64]; 18 | }; 19 | 20 | 21 | void MD5Init(struct MD5Context *ctx); 22 | void MD5Update(struct MD5Context *ctx, const void *buf, unsigned len); 23 | void MD5Final(unsigned char digest[16], struct MD5Context *ctx); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif /* !MD5_H */ 30 | -------------------------------------------------------------------------------- /cpp/order32.h: -------------------------------------------------------------------------------- 1 | #ifndef ORDER32_H 2 | #define ORDER32_H 3 | 4 | #include 5 | #include 6 | 7 | #if CHAR_BIT != 8 8 | #error "unsupported char size" 9 | #endif 10 | 11 | enum 12 | { 13 | O32_LITTLE_ENDIAN = 0x03020100ul, 14 | O32_BIG_ENDIAN = 0x00010203ul, 15 | O32_PDP_ENDIAN = 0x01000302ul 16 | }; 17 | 18 | static const union { unsigned char bytes[4]; uint32_t value; } o32_host_order = 19 | { { 0, 1, 2, 3 } }; 20 | 21 | #define O32_HOST_ORDER (o32_host_order.value) 22 | #define O32_SWAP(x) __builtin_bswap32(x) 23 | 24 | #define O32_HOST_TO_BE(x) ((O32_HOST_ORDER == O32_LITTLE_ENDIAN) ? (O32_SWAP(x)) : (x)) 25 | #define O32_HOST_TO_LE(x) ((O32_HOST_ORDER == O32_BIG_ENDIAN) ? (O32_SWAP(x)) : (x)) 26 | 27 | #define O32_BE_TO_HOST(x) O32_HOST_TO_BE(x) 28 | #define O32_LE_TO_HOST(x) O32_HOST_TO_LE(x) 29 | 30 | 31 | #endif 32 | 33 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Simple implementation of the Golomb Compressed Sets (GCS), a statistical 2 | compressed data-structure. It is similar to Bloom filters, but it is far more 3 | compact: given N elements, and P probability of a false positive, an optimal 4 | Bloom filter requires at least N*log2(e)*log2(1/P) bits, where GCS gets the 5 | bar closer to theoretical minimum of N*log2(1/P). With real-world data sets, 6 | GCS can be 20-30% more compact than a Bloom filter. 7 | 8 | The cons is of course speed: GCS is fully compressed so a query is an order of 9 | magnituted slower than Bloom filters. On the other hand, it is not required to 10 | decompress it fully in RAM, so it can be streamed. Thus, they make sense in an 11 | environment where queries are performed at interactive rate and RAM is scarce 12 | compared to the dataset. 13 | 14 | A full explanation of GCS can be found in my blog post on the subject: 15 | http://giovanni.bajo.it/2011/09/golomb-coded-sets/ 16 | 17 | The provided implementations are in Python, C++ and Javascript. They are fully 18 | equivalent, but the C++ and JS implementations cache the GCS in memory, while 19 | the Python implementation streams it from disk. Examples of data sets (English 20 | and Italian dictionaries) are provided to play with them. See test.sh. 21 | 22 | Test live Javascript implementation here: 23 | http://cybercase.github.io/gcs/ 24 | 25 | See these references for more details: 26 | http://www.imperialviolet.org/2011/04/29/filters.html 27 | http://algo2.iti.uni-karlsruhe.de/singler/publications/cacheefficientbloomfilters-wea2007.pdf 28 | -------------------------------------------------------------------------------- /cpp/gcs.h: -------------------------------------------------------------------------------- 1 | // This is free and unencumbered software released into the public domain. 2 | 3 | // Anyone is free to copy, modify, publish, use, compile, sell, or 4 | // distribute this software, either in source code form or as a compiled 5 | // binary, for any purpose, commercial or non-commercial, and by any 6 | // means. 7 | 8 | // In jurisdictions that recognize copyright laws, the author or authors 9 | // of this software dedicate any and all copyright interest in the 10 | // software to the public domain. We make this dedication for the benefit 11 | // of the public at large and to the detriment of our heirs and 12 | // successors. We intend this dedication to be an overt act of 13 | // relinquishment in perpetuity of all present and future rights to this 14 | // software under copyright law. 15 | 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | // For more information, please refer to 25 | 26 | #ifndef GCS_H 27 | #define GCS_H 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | typedef uint32_t hash_t; 34 | 35 | class GCSBuilder 36 | { 37 | int N, P; 38 | std::vector values; 39 | 40 | public: 41 | GCSBuilder(int N, int P); 42 | void add(const void *data, int size); 43 | void finalize(std::ostream &f); 44 | }; 45 | 46 | class GCSQuery 47 | { 48 | int N, P; 49 | std::istream &f; 50 | uint8_t *gcs; 51 | int gcs_len; 52 | 53 | public: 54 | GCSQuery(std::istream &f); 55 | ~GCSQuery(); 56 | bool query(const void *data, int size); 57 | }; 58 | 59 | 60 | #endif /* GCS_H */ 61 | 62 | -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | // This is free and unencumbered software released into the public domain. 2 | 3 | // Anyone is free to copy, modify, publish, use, compile, sell, or 4 | // distribute this software, either in source code form or as a compiled 5 | // binary, for any purpose, commercial or non-commercial, and by any 6 | // means. 7 | 8 | // In jurisdictions that recognize copyright laws, the author or authors 9 | // of this software dedicate any and all copyright interest in the 10 | // software to the public domain. We make this dedication for the benefit 11 | // of the public at large and to the detriment of our heirs and 12 | // successors. We intend this dedication to be an overt act of 13 | // relinquishment in perpetuity of all present and future rights to this 14 | // software under copyright law. 15 | 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | // For more information, please refer to 25 | 26 | #include "gcs.h" 27 | #include 28 | #include 29 | 30 | static void usage(void) 31 | { 32 | std::cerr << "Usage:\n"; 33 | std::cerr << " gcs build /path/to/wordlist\n"; 34 | std::cerr << " gcs query word1 [word2...]\n"; 35 | } 36 | 37 | int main(int argc, char *argv[]) 38 | { 39 | if (argc < 2) 40 | { 41 | usage(); 42 | return 1; 43 | } 44 | 45 | std::string cmd(argv[1]); 46 | if (cmd == "build") 47 | { 48 | if (argc != 3) 49 | { 50 | usage(); 51 | return 1; 52 | } 53 | 54 | std::ifstream f(argv[2], std::ios::binary|std::ios::in); 55 | if (!f.is_open()) 56 | { 57 | std::cerr << "Cannot open file: " << argv[2] << "\n"; 58 | return 1; 59 | } 60 | 61 | int numwords = 0; 62 | std::string line; 63 | 64 | while (std::getline(f,line), !f.eof()) 65 | ++numwords; 66 | f.clear(); 67 | f.seekg(0); 68 | std::cout << "numwords: " << numwords << "\n"; 69 | 70 | GCSBuilder gcs(numwords, 1024); 71 | while (std::getline(f,line), !f.eof()) 72 | gcs.add(line.data(), line.size()); 73 | f.close(); 74 | 75 | std::ofstream out("table.gcs", std::ios::binary|std::ios::out); 76 | if (!out.is_open()) 77 | { 78 | std::cerr << "Cannot open output file: " << "table.gcs" << "\n"; 79 | return 1; 80 | } 81 | gcs.finalize(out); 82 | out.close(); 83 | } 84 | else if (cmd == "query") 85 | { 86 | if (argc < 3) 87 | { 88 | usage(); 89 | return 1; 90 | } 91 | 92 | std::ifstream f("table.gcs", std::ios::binary|std::ios::in); 93 | if (!f.is_open()) 94 | { 95 | std::cerr << "Cannot open table: " << "table.gcs" << "\n"; 96 | return 1; 97 | } 98 | 99 | GCSQuery q(f); 100 | for (int i=2; i < argc; ++i) 101 | { 102 | std::string s(argv[i]); 103 | bool found = q.query(s.data(), s.size()); 104 | 105 | std::cout << "Querying for \"" << s << "\": " << 106 | (found ? "TRUE" : "FALSE") << "\n"; 107 | } 108 | } 109 | else 110 | { 111 | std::cerr << "Invalid command: " << cmd << "\n"; 112 | usage(); 113 | return 1; 114 | } 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /js/gcs.js: -------------------------------------------------------------------------------- 1 | // This is free and unencumbered software released into the public domain. 2 | 3 | // Anyone is free to copy, modify, publish, use, compile, sell, or 4 | // distribute this software, either in source code form or as a compiled 5 | // binary, for any purpose, commercial or non-commercial, and by any 6 | // means. 7 | 8 | // In jurisdictions that recognize copyright laws, the author or authors 9 | // of this software dedicate any and all copyright interest in the 10 | // software to the public domain. We make this dedication for the benefit 11 | // of the public at large and to the detriment of our heirs and 12 | // successors. We intend this dedication to be an overt act of 13 | // relinquishment in perpetuity of all present and future rights to this 14 | // software under copyright law. 15 | 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | // OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | // For more information, please refer to 25 | 26 | function bitreader(arr) { 27 | var offset = 0, 28 | accum = 0, 29 | n = 0; 30 | 31 | function c (n2, v) { 32 | if (typeof v === 'undefined') v = 0; 33 | if (n2 > 8) { 34 | v = v * 256 + c(8); 35 | return c(n2-8, v); 36 | } else { 37 | n -= n2; 38 | if (n < 0) { 39 | if (offset >= arr.length) throw "End of array"; 40 | accum = (accum << 8) | arr[offset++]; 41 | n += 8; 42 | } 43 | v = v * Math.pow(2, n2) + (accum >>> n); 44 | accum &= (1 << n) - 1; 45 | return v; 46 | } 47 | } 48 | return c; 49 | } 50 | 51 | function bitwriter(arr) { 52 | var accum = 0, 53 | n = 0, 54 | tmp = 0; 55 | function c (n2, v2) { 56 | if (n2 > 8) { 57 | n2 -= 8; 58 | tmp = v2 / Math.pow(2, n2) >>> 0; 59 | c(8, tmp); 60 | c(n2, v2 - tmp * Math.pow(2, n2)); 61 | } else { 62 | accum = accum * Math.pow(2, n2) + v2; 63 | n += n2; 64 | if (n >= 8) { 65 | arr.push(accum / Math.pow(2, n-8)); 66 | n -= 8; 67 | accum = accum & ((1 << n) - 1); 68 | } 69 | } 70 | } 71 | 72 | c.close = function () { 73 | if (n !== 0) { 74 | accum = (accum << (8-n)) & 255; 75 | arr.push(accum); 76 | } 77 | }; 78 | 79 | return c; 80 | } 81 | 82 | function gcs_hash(w, N, P) { 83 | h = md5(w); 84 | h = parseInt(h.substring(24,32), 16) % (N*P); 85 | return h; 86 | } 87 | 88 | function golomb_enc(arr, P) { 89 | var logp = Math.round(Math.log(P) * Math.LOG2E); 90 | var f = bitwriter(arr); 91 | function c (v) { 92 | var q = ~~(v / P), 93 | r = v % P; 94 | f(q+1, (1 << (q+1)) - 2); 95 | f(logp, r); 96 | return; 97 | } 98 | 99 | c.close = function () { 100 | f.close(); 101 | }; 102 | 103 | c.write = f; 104 | 105 | return c; 106 | } 107 | 108 | function golomb_dec(arr, P) { 109 | var logp = Math.round(Math.log(P) * Math.LOG2E); 110 | var f = bitreader(arr); 111 | var v; 112 | return function () { 113 | while(1) { 114 | v = 0; 115 | while (f(1)) { 116 | v += P; 117 | } 118 | var tmp = f(logp); 119 | v += tmp; 120 | return v; 121 | } 122 | }; 123 | } 124 | 125 | function GCSBuilder(_N, _P) { 126 | var N = _N, P = _P, values = [0], words = []; 127 | 128 | this.add = function (v) { 129 | words.push(v); 130 | values.push(gcs_hash(v, N, P)); 131 | }; 132 | 133 | this.finalize = function () { 134 | var i, 135 | d, 136 | ab, 137 | res = [], 138 | header = new Array(8), 139 | f = golomb_enc(res, P); 140 | values.sort(function (a, b) { return a - b; }); 141 | for (i = 0; i < values.length - 1; i += 1) { 142 | d = values[i+1] - values[i]; 143 | if (d === 0 && i > 0) { 144 | continue; 145 | } 146 | f(d); 147 | } 148 | f.close(); 149 | res = header.concat(res); 150 | res = new Uint8Array(res); 151 | dw = new DataView(res.buffer); 152 | dw.setUint32(0, N); 153 | dw.setUint32(4, P); 154 | return res.buffer; 155 | }; 156 | } 157 | 158 | function GCSQuery(_arrBuff) { 159 | var dw = new DataView(_arrBuff), 160 | N = dw.getUint32(0), 161 | P = dw.getUint32(4), 162 | u8arr = new Uint8Array(_arrBuff, 8); 163 | this.query = function (w) { 164 | var h = gcs_hash(w, N, P), 165 | n = 0, 166 | d, 167 | f = golomb_dec(u8arr, P); 168 | 169 | while (1) { 170 | try { 171 | d = f(); 172 | n += d; 173 | if (h === n) { 174 | return true; 175 | } 176 | if (h < n) { 177 | return false; 178 | } 179 | } catch (err) { 180 | break; 181 | } 182 | } 183 | return false; 184 | }; 185 | } -------------------------------------------------------------------------------- /js/md5.js: -------------------------------------------------------------------------------- 1 | // Source code from: http://www.myersdaily.org/joseph/javascript/md5-text.html 2 | 3 | function md5cycle(x, k) { 4 | var a = x[0], b = x[1], c = x[2], d = x[3]; 5 | 6 | a = ff(a, b, c, d, k[0], 7, -680876936); 7 | d = ff(d, a, b, c, k[1], 12, -389564586); 8 | c = ff(c, d, a, b, k[2], 17, 606105819); 9 | b = ff(b, c, d, a, k[3], 22, -1044525330); 10 | a = ff(a, b, c, d, k[4], 7, -176418897); 11 | d = ff(d, a, b, c, k[5], 12, 1200080426); 12 | c = ff(c, d, a, b, k[6], 17, -1473231341); 13 | b = ff(b, c, d, a, k[7], 22, -45705983); 14 | a = ff(a, b, c, d, k[8], 7, 1770035416); 15 | d = ff(d, a, b, c, k[9], 12, -1958414417); 16 | c = ff(c, d, a, b, k[10], 17, -42063); 17 | b = ff(b, c, d, a, k[11], 22, -1990404162); 18 | a = ff(a, b, c, d, k[12], 7, 1804603682); 19 | d = ff(d, a, b, c, k[13], 12, -40341101); 20 | c = ff(c, d, a, b, k[14], 17, -1502002290); 21 | b = ff(b, c, d, a, k[15], 22, 1236535329); 22 | 23 | a = gg(a, b, c, d, k[1], 5, -165796510); 24 | d = gg(d, a, b, c, k[6], 9, -1069501632); 25 | c = gg(c, d, a, b, k[11], 14, 643717713); 26 | b = gg(b, c, d, a, k[0], 20, -373897302); 27 | a = gg(a, b, c, d, k[5], 5, -701558691); 28 | d = gg(d, a, b, c, k[10], 9, 38016083); 29 | c = gg(c, d, a, b, k[15], 14, -660478335); 30 | b = gg(b, c, d, a, k[4], 20, -405537848); 31 | a = gg(a, b, c, d, k[9], 5, 568446438); 32 | d = gg(d, a, b, c, k[14], 9, -1019803690); 33 | c = gg(c, d, a, b, k[3], 14, -187363961); 34 | b = gg(b, c, d, a, k[8], 20, 1163531501); 35 | a = gg(a, b, c, d, k[13], 5, -1444681467); 36 | d = gg(d, a, b, c, k[2], 9, -51403784); 37 | c = gg(c, d, a, b, k[7], 14, 1735328473); 38 | b = gg(b, c, d, a, k[12], 20, -1926607734); 39 | 40 | a = hh(a, b, c, d, k[5], 4, -378558); 41 | d = hh(d, a, b, c, k[8], 11, -2022574463); 42 | c = hh(c, d, a, b, k[11], 16, 1839030562); 43 | b = hh(b, c, d, a, k[14], 23, -35309556); 44 | a = hh(a, b, c, d, k[1], 4, -1530992060); 45 | d = hh(d, a, b, c, k[4], 11, 1272893353); 46 | c = hh(c, d, a, b, k[7], 16, -155497632); 47 | b = hh(b, c, d, a, k[10], 23, -1094730640); 48 | a = hh(a, b, c, d, k[13], 4, 681279174); 49 | d = hh(d, a, b, c, k[0], 11, -358537222); 50 | c = hh(c, d, a, b, k[3], 16, -722521979); 51 | b = hh(b, c, d, a, k[6], 23, 76029189); 52 | a = hh(a, b, c, d, k[9], 4, -640364487); 53 | d = hh(d, a, b, c, k[12], 11, -421815835); 54 | c = hh(c, d, a, b, k[15], 16, 530742520); 55 | b = hh(b, c, d, a, k[2], 23, -995338651); 56 | 57 | a = ii(a, b, c, d, k[0], 6, -198630844); 58 | d = ii(d, a, b, c, k[7], 10, 1126891415); 59 | c = ii(c, d, a, b, k[14], 15, -1416354905); 60 | b = ii(b, c, d, a, k[5], 21, -57434055); 61 | a = ii(a, b, c, d, k[12], 6, 1700485571); 62 | d = ii(d, a, b, c, k[3], 10, -1894986606); 63 | c = ii(c, d, a, b, k[10], 15, -1051523); 64 | b = ii(b, c, d, a, k[1], 21, -2054922799); 65 | a = ii(a, b, c, d, k[8], 6, 1873313359); 66 | d = ii(d, a, b, c, k[15], 10, -30611744); 67 | c = ii(c, d, a, b, k[6], 15, -1560198380); 68 | b = ii(b, c, d, a, k[13], 21, 1309151649); 69 | a = ii(a, b, c, d, k[4], 6, -145523070); 70 | d = ii(d, a, b, c, k[11], 10, -1120210379); 71 | c = ii(c, d, a, b, k[2], 15, 718787259); 72 | b = ii(b, c, d, a, k[9], 21, -343485551); 73 | 74 | x[0] = add32(a, x[0]); 75 | x[1] = add32(b, x[1]); 76 | x[2] = add32(c, x[2]); 77 | x[3] = add32(d, x[3]); 78 | 79 | } 80 | 81 | function cmn(q, a, b, x, s, t) { 82 | a = add32(add32(a, q), add32(x, t)); 83 | return add32((a << s) | (a >>> (32 - s)), b); 84 | } 85 | 86 | function ff(a, b, c, d, x, s, t) { 87 | return cmn((b & c) | ((~b) & d), a, b, x, s, t); 88 | } 89 | 90 | function gg(a, b, c, d, x, s, t) { 91 | return cmn((b & d) | (c & (~d)), a, b, x, s, t); 92 | } 93 | 94 | function hh(a, b, c, d, x, s, t) { 95 | return cmn(b ^ c ^ d, a, b, x, s, t); 96 | } 97 | 98 | function ii(a, b, c, d, x, s, t) { 99 | return cmn(c ^ (b | (~d)), a, b, x, s, t); 100 | } 101 | 102 | function md51(s) { 103 | txt = ''; 104 | var n = s.length, 105 | state = [1732584193, -271733879, -1732584194, 271733878], i; 106 | for (i=64; i<=s.length; i+=64) { 107 | md5cycle(state, md5blk(s.substring(i-64, i))); 108 | } 109 | s = s.substring(i-64); 110 | var tail = [0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0]; 111 | for (i=0; i>2] |= s.charCodeAt(i) << ((i%4) << 3); 113 | tail[i>>2] |= 0x80 << ((i%4) << 3); 114 | if (i > 55) { 115 | md5cycle(state, tail); 116 | for (i=0; i<16; i++) tail[i] = 0; 117 | } 118 | tail[14] = n*8; 119 | md5cycle(state, tail); 120 | return state; 121 | } 122 | 123 | /* there needs to be support for Unicode here, 124 | * unless we pretend that we can redefine the MD-5 125 | * algorithm for multi-byte characters (perhaps 126 | * by adding every four 16-bit characters and 127 | * shortening the sum to 32 bits). Otherwise 128 | * I suggest performing MD-5 as if every character 129 | * was two bytes--e.g., 0040 0025 = @%--but then 130 | * how will an ordinary MD-5 sum be matched? 131 | * There is no way to standardize text to something 132 | * like UTF-8 before transformation; speed cost is 133 | * utterly prohibitive. The JavaScript standard 134 | * itself needs to look at this: it should start 135 | * providing access to strings as preformed UTF-8 136 | * 8-bit unsigned value arrays. 137 | */ 138 | function md5blk(s) { /* I figured global was faster. */ 139 | var md5blks = [], i; /* Andy King said do it this way. */ 140 | for (i=0; i<64; i+=4) { 141 | md5blks[i>>2] = s.charCodeAt(i) 142 | + (s.charCodeAt(i+1) << 8) 143 | + (s.charCodeAt(i+2) << 16) 144 | + (s.charCodeAt(i+3) << 24); 145 | } 146 | return md5blks; 147 | } 148 | 149 | var hex_chr = '0123456789abcdef'.split(''); 150 | 151 | function rhex(n) 152 | { 153 | var s='', j=0; 154 | for(; j<4; j++) 155 | s += hex_chr[(n >> (j * 8 + 4)) & 0x0F] 156 | + hex_chr[(n >> (j * 8)) & 0x0F]; 157 | return s; 158 | } 159 | 160 | function hex(x) { 161 | for (var i=0; i> 16) + (y >> 16) + (lsw >> 16); 184 | return (msw << 16) | (lsw & 0xFFFF); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /python/gcs.py: -------------------------------------------------------------------------------- 1 | # This is free and unencumbered software released into the public domain. 2 | 3 | # Anyone is free to copy, modify, publish, use, compile, sell, or 4 | # distribute this software, either in source code form or as a compiled 5 | # binary, for any purpose, commercial or non-commercial, and by any 6 | # means. 7 | 8 | # In jurisdictions that recognize copyright laws, the author or authors 9 | # of this software dedicate any and all copyright interest in the 10 | # software to the public domain. We make this dedication for the benefit 11 | # of the public at large and to the detriment of our heirs and 12 | # successors. We intend this dedication to be an overt act of 13 | # relinquishment in perpetuity of all present and future rights to this 14 | # software under copyright law. 15 | 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | # OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | # For more information, please refer to 25 | 26 | from __future__ import division 27 | import sys, math 28 | from hashlib import md5 29 | import struct 30 | from array import array 31 | import codecs 32 | import sys 33 | 34 | def bitwriter(f): 35 | """ 36 | Coroutine to write bits into a file-like. 37 | Use .send((N, V)) to write the N least-significant 38 | bits of V into the file. 39 | Remember to call .close() to flush the internal 40 | state when done. 41 | """ 42 | v = 0 43 | n = 0 44 | try: 45 | while 1: 46 | (n2,v2) = yield None 47 | v <<= n2 48 | v |= v2 & ((1<= 8: 51 | b = (v >> (n-8)) & 255 52 | f.write(chr(b)) 53 | n -= 8 54 | v &= 255 55 | except GeneratorExit: 56 | if n != 0: 57 | v = (v << (8-n)) & 255 58 | f.write(chr(v)) 59 | raise 60 | 61 | def bitreader(f): 62 | """ 63 | Coroutine to read bits from a file-like. 64 | Use .send(N) to read N bits from the file. 65 | """ 66 | accum = 0 67 | n = 0 68 | v = None 69 | while 1: 70 | n2 = yield v 71 | while n <= n2: 72 | accum <<= 8 73 | try: 74 | accum |= ord(f.read(1)) 75 | except TypeError: 76 | # ord(None) => eof 77 | return 78 | n += 8 79 | v = (accum >> (n-n2)) & ((1< 25 | 26 | #include "gcs.h" 27 | #include "md5.h" 28 | #include "order32.h" 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #define BITMASK(n) ((1 << (n)) - 1) 35 | 36 | 37 | static uint32_t gcs_hash(const void *data, int size, int N, int P) 38 | { 39 | unsigned char digest[16]; 40 | MD5Context ctx; 41 | 42 | MD5Init(&ctx); 43 | MD5Update(&ctx, (unsigned char*)data, size); 44 | MD5Final(digest, &ctx); 45 | 46 | hash_t dignum = 0; 47 | for (int i = 16-sizeof(hash_t); i < 16; ++i) 48 | { 49 | dignum <<= 8; 50 | dignum += digest[i]; 51 | } 52 | 53 | return dignum % (N*P); 54 | } 55 | 56 | static int floor_log2(int v) 57 | { 58 | return sizeof(int)*8 - __builtin_clz(v-1); 59 | } 60 | 61 | /**************************************************************/ 62 | 63 | class BitWriter 64 | { 65 | private: 66 | std::ostream &f; 67 | unsigned long accum; 68 | int n; 69 | enum { ACCUM_BITS = sizeof(unsigned long)*8 }; 70 | 71 | public: 72 | BitWriter(std::ostream &_f) 73 | : f(_f), accum(0), n(0) {} 74 | 75 | void write(int nbits, unsigned value) 76 | { 77 | assert(nbits >= 0); 78 | while (nbits) 79 | { 80 | int nb = std::min(ACCUM_BITS-n, nbits); 81 | accum <<= nb; 82 | value &= BITMASK(nbits); 83 | accum |= value >> (nbits-nb); 84 | n += nb; 85 | nbits -= nb; 86 | 87 | while (n >= 8) 88 | { 89 | f.put((accum >> (n-8)) & BITMASK(8)); 90 | n -= 8; 91 | accum &= BITMASK(n); 92 | } 93 | } 94 | } 95 | 96 | void flush(void) 97 | { 98 | if (n > 0) 99 | { 100 | assert(n < 8); 101 | f.put(accum & BITMASK(8)); 102 | n = 0; 103 | accum = 0; 104 | } 105 | } 106 | }; 107 | 108 | 109 | class GolombEncoder 110 | { 111 | private: 112 | BitWriter f; 113 | int P, log2P; 114 | 115 | public: 116 | GolombEncoder(std::ostream &_f, int _P) 117 | : f(_f), P(_P) 118 | { 119 | log2P = floor_log2(P); 120 | assert(log2P > 0); 121 | } 122 | 123 | void encode(hash_t value) 124 | { 125 | hash_t q = value / P; 126 | hash_t r = value - q*P; 127 | 128 | f.write(q+1, BITMASK(q)<<1); 129 | f.write(log2P, r); 130 | } 131 | 132 | void flush(void) 133 | { 134 | f.flush(); 135 | } 136 | }; 137 | 138 | 139 | GCSBuilder::GCSBuilder(int _n, int _p) 140 | : N(_n), P(_p) 141 | { 142 | assert(N <= ~(hash_t)0 / P); 143 | values.reserve(N); 144 | values.push_back(0); 145 | } 146 | 147 | void GCSBuilder::add(const void *data, int size) 148 | { 149 | hash_t h = gcs_hash(data, size, N, P); 150 | values.push_back(h); 151 | } 152 | 153 | void GCSBuilder::finalize(std::ostream& f) 154 | { 155 | std::sort(values.begin(), values.end()); 156 | 157 | int32_t v = O32_HOST_TO_BE(N); 158 | f.write((char*)&v, 4); 159 | v = O32_HOST_TO_BE(P); 160 | f.write((char*)&v, 4); 161 | 162 | GolombEncoder ge(f, P); 163 | for (int i=0; i<(int)values.size()-1; ++i) 164 | { 165 | hash_t diff = values[i+1] - values[i]; 166 | if (diff != 0) 167 | ge.encode(diff); 168 | } 169 | ge.flush(); 170 | } 171 | 172 | 173 | /**************************************************************/ 174 | 175 | class BitReader 176 | { 177 | private: 178 | uint8_t *data; 179 | int len; 180 | uint32_t accum; 181 | int n; 182 | 183 | public: 184 | BitReader(uint8_t *data_, int len_) 185 | : data(data_), len(len_), accum(0), n(0) 186 | {} 187 | 188 | bool eof(void) 189 | { 190 | return (len == 0 && n == 0); 191 | } 192 | 193 | uint32_t read(int nbits) 194 | { 195 | assert(nbits < 32); 196 | 197 | uint32_t ret = 0; 198 | while (nbits) 199 | { 200 | if (!n) 201 | { 202 | if (len > 4) 203 | { 204 | accum = ((uint32_t)data[0] << 24) | 205 | ((uint32_t)data[1] << 16) | 206 | ((uint32_t)data[2] << 8) | 207 | ((uint32_t)data[3]); 208 | data += 4; 209 | len -= 4; 210 | n += 32; 211 | } 212 | else if (len > 0) 213 | { 214 | accum = *data++; 215 | --len; 216 | n += 8; 217 | } 218 | else 219 | return 0; 220 | } 221 | 222 | int toread = std::min(n, nbits); 223 | ret <<= toread; 224 | ret |= (accum >> (n-toread)); 225 | n -= toread; 226 | nbits -= toread; 227 | accum &= BITMASK(n); 228 | } 229 | 230 | return ret; 231 | } 232 | }; 233 | 234 | 235 | class GolombDecoder 236 | { 237 | BitReader f; 238 | int P, log2P; 239 | 240 | public: 241 | GolombDecoder(uint8_t *gcs, int len, int P_) 242 | : f(gcs, len), P(P_) 243 | { 244 | log2P = floor_log2(P); 245 | } 246 | 247 | bool eof(void) 248 | { 249 | return f.eof(); 250 | } 251 | 252 | hash_t next(void) 253 | { 254 | hash_t v = 0; 255 | while (f.read(1)) 256 | { 257 | v += P; 258 | if (f.eof()) 259 | return 0; 260 | } 261 | v += f.read(log2P); 262 | return v; 263 | } 264 | }; 265 | 266 | GCSQuery::GCSQuery(std::istream &f_) 267 | : f(f_), gcs(NULL) 268 | { 269 | int32_t v; 270 | 271 | f.read((char*)&v, 4); 272 | N = O32_BE_TO_HOST(v); 273 | 274 | f.read((char*)&v, 4); 275 | P = O32_BE_TO_HOST(v); 276 | 277 | f.seekg(0, std::ios::end); 278 | int len = f.tellg(); 279 | f.seekg(8); 280 | 281 | gcs_len = len-8; 282 | gcs = new uint8_t[gcs_len]; 283 | f.read((char*)gcs, gcs_len); 284 | } 285 | 286 | GCSQuery::~GCSQuery() 287 | { 288 | delete [] gcs; 289 | } 290 | 291 | bool GCSQuery::query(const void *data, int size) 292 | { 293 | unsigned h = gcs_hash(data, size, N, P); 294 | unsigned int value = 0; 295 | 296 | GolombDecoder gd(gcs, gcs_len, P); 297 | while (!gd.eof()) 298 | { 299 | unsigned int diff = gd.next(); 300 | value += diff; 301 | 302 | if (value == h) 303 | return true; 304 | else if (value > h) 305 | return false; 306 | } 307 | 308 | return false; 309 | } 310 | 311 | -------------------------------------------------------------------------------- /js/index.html: -------------------------------------------------------------------------------- 1 | 2 | 28 | 29 | 30 | 31 | Golomb-coded sets in Javascript 32 | 45 | 46 | 47 |

Golomb-coded sets builder

48 |

Loading...

49 |
50 |

Build the set

51 |
52 | 53 | 1 / 2 ^ = 1 / {{ ext_prob }} = {{ 1/ext_prob|number:11 }} 54 |
55 |
56 |
57 |

Add a word

58 | 59 | 60 | 61 | 62 |
63 | 64 | 65 | 66 | 67 |
68 | 69 |

Words: {{encoded_words.length}}, Set size: {{gcs.byteLength}} bytes 70 | Download the set 71 |

72 | 73 |

Wordlist

74 |

Click a word to delete

75 |
Empty Wordlist!
76 | 79 |
80 |
81 |

Query

82 | 83 | 84 | 85 |

Word found!

86 |

Word not found!

87 |
88 |
89 | 90 | 91 | 92 | 93 | 175 | -------------------------------------------------------------------------------- /cpp/md5.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This code implements the MD5 message-digest algorithm. 3 | * The algorithm is due to Ron Rivest. This code was 4 | * written by Colin Plumb in 1993, no copyright is claimed. 5 | * This code is in the public domain; do with it what you wish. 6 | * 7 | * Equivalent code is available from RSA Data Security, Inc. 8 | * This code has been tested against that, and is equivalent, 9 | * except that you don't need to include two pages of legalese 10 | * with every copy. 11 | * 12 | * To compute the message digest of a chunk of bytes, declare an 13 | * MD5Context structure, pass it to MD5Init, call MD5Update as 14 | * needed on buffers full of bytes, and then call MD5Final, which 15 | * will fill a supplied 16-byte array with the digest. 16 | */ 17 | 18 | /* Brutally hacked by John Walker back from ANSI C to K&R (no 19 | prototypes) to maintain the tradition that Netfone will compile 20 | with Sun's original "cc". */ 21 | 22 | #include /* for memcpy() */ 23 | #include "md5.h" 24 | 25 | static void byteReverse(unsigned char *buf, unsigned longs) 26 | { 27 | uint32 t; 28 | do { 29 | *(uint32*)buf = O32_HOST_TO_LE(*(uint32*)buf); 30 | buf += 4; 31 | } while (--longs); 32 | } 33 | 34 | 35 | static void MD5Transform(uint32 buf[4], uint32 in[16]); 36 | 37 | /* 38 | * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious 39 | * initialization constants. 40 | */ 41 | void MD5Init(struct MD5Context *ctx) 42 | { 43 | ctx->buf[0] = 0x67452301; 44 | ctx->buf[1] = 0xefcdab89; 45 | ctx->buf[2] = 0x98badcfe; 46 | ctx->buf[3] = 0x10325476; 47 | 48 | ctx->bits[0] = 0; 49 | ctx->bits[1] = 0; 50 | } 51 | 52 | /* 53 | * Update context to reflect the concatenation of another buffer full 54 | * of bytes. 55 | */ 56 | void MD5Update(struct MD5Context *ctx, const void *bufv, unsigned len) 57 | { 58 | uint32 t; 59 | const unsigned char* buf = (const unsigned char*)bufv; 60 | 61 | /* Update bitcount */ 62 | 63 | t = ctx->bits[0]; 64 | if ((ctx->bits[0] = t + ((uint32) len << 3)) < t) 65 | ctx->bits[1]++; /* Carry from low to high */ 66 | ctx->bits[1] += len >> 29; 67 | 68 | t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ 69 | 70 | /* Handle any leading odd-sized chunks */ 71 | 72 | if (t) { 73 | unsigned char *p = (unsigned char *) ctx->in + t; 74 | 75 | t = 64 - t; 76 | if (len < t) { 77 | memcpy(p, buf, len); 78 | return; 79 | } 80 | memcpy(p, buf, t); 81 | byteReverse(ctx->in, 16); 82 | MD5Transform(ctx->buf, (uint32 *) ctx->in); 83 | buf += t; 84 | len -= t; 85 | } 86 | /* Process data in 64-byte chunks */ 87 | 88 | while (len >= 64) { 89 | memcpy(ctx->in, buf, 64); 90 | byteReverse(ctx->in, 16); 91 | MD5Transform(ctx->buf, (uint32 *) ctx->in); 92 | buf += 64; 93 | len -= 64; 94 | } 95 | 96 | /* Handle any remaining bytes of data. */ 97 | 98 | memcpy(ctx->in, buf, len); 99 | } 100 | 101 | /* 102 | * Final wrapup - pad to 64-byte boundary with the bit pattern 103 | * 1 0* (64-bit count of bits processed, MSB-first) 104 | */ 105 | void MD5Final(unsigned char digest[16], struct MD5Context *ctx) 106 | { 107 | unsigned count; 108 | unsigned char *p; 109 | 110 | /* Compute number of bytes mod 64 */ 111 | count = (ctx->bits[0] >> 3) & 0x3F; 112 | 113 | /* Set the first char of padding to 0x80. This is safe since there is 114 | always at least one byte free */ 115 | p = ctx->in + count; 116 | *p++ = 0x80; 117 | 118 | /* Bytes of padding needed to make 64 bytes */ 119 | count = 64 - 1 - count; 120 | 121 | /* Pad out to 56 mod 64 */ 122 | if (count < 8) { 123 | /* Two lots of padding: Pad the first block to 64 bytes */ 124 | memset(p, 0, count); 125 | byteReverse(ctx->in, 16); 126 | MD5Transform(ctx->buf, (uint32 *) ctx->in); 127 | 128 | /* Now fill the next block with 56 bytes */ 129 | memset(ctx->in, 0, 56); 130 | } else { 131 | /* Pad block to 56 bytes */ 132 | memset(p, 0, count - 8); 133 | } 134 | byteReverse(ctx->in, 14); 135 | 136 | /* Append length in bits and transform */ 137 | ((uint32 *) ctx->in)[14] = ctx->bits[0]; 138 | ((uint32 *) ctx->in)[15] = ctx->bits[1]; 139 | 140 | MD5Transform(ctx->buf, (uint32 *) ctx->in); 141 | byteReverse((unsigned char *) ctx->buf, 4); 142 | memcpy(digest, ctx->buf, 16); 143 | memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ 144 | } 145 | 146 | 147 | /* The four core functions - F1 is optimized somewhat */ 148 | 149 | /* #define F1(x, y, z) (x & y | ~x & z) */ 150 | #define F1(x, y, z) (z ^ (x & (y ^ z))) 151 | #define F2(x, y, z) F1(z, x, y) 152 | #define F3(x, y, z) (x ^ y ^ z) 153 | #define F4(x, y, z) (y ^ (x | ~z)) 154 | 155 | /* This is the central step in the MD5 algorithm. */ 156 | #define MD5STEP(f, w, x, y, z, data, s) \ 157 | ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) 158 | 159 | /* 160 | * The core of the MD5 algorithm, this alters an existing MD5 hash to 161 | * reflect the addition of 16 longwords of new data. MD5Update blocks 162 | * the data and converts bytes into longwords for this routine. 163 | */ 164 | static void MD5Transform(uint32 buf[4], uint32 in[16]) 165 | { 166 | register uint32 a, b, c, d; 167 | 168 | a = buf[0]; 169 | b = buf[1]; 170 | c = buf[2]; 171 | d = buf[3]; 172 | 173 | MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); 174 | MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); 175 | MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); 176 | MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); 177 | MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); 178 | MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); 179 | MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); 180 | MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); 181 | MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); 182 | MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); 183 | MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); 184 | MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); 185 | MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); 186 | MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); 187 | MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); 188 | MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); 189 | 190 | MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); 191 | MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); 192 | MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); 193 | MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); 194 | MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); 195 | MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); 196 | MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); 197 | MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); 198 | MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); 199 | MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); 200 | MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); 201 | MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); 202 | MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); 203 | MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); 204 | MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); 205 | MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); 206 | 207 | MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); 208 | MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); 209 | MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); 210 | MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); 211 | MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); 212 | MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); 213 | MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); 214 | MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); 215 | MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); 216 | MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); 217 | MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); 218 | MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); 219 | MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); 220 | MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); 221 | MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); 222 | MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); 223 | 224 | MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); 225 | MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); 226 | MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); 227 | MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); 228 | MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); 229 | MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); 230 | MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); 231 | MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); 232 | MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); 233 | MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); 234 | MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); 235 | MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); 236 | MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); 237 | MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); 238 | MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); 239 | MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); 240 | 241 | buf[0] += a; 242 | buf[1] += b; 243 | buf[2] += c; 244 | buf[3] += d; 245 | } 246 | --------------------------------------------------------------------------------