├── .gitignore ├── Makefile ├── MurmurHash3.c ├── MurmurHash3.h ├── README.md ├── arraylist.c ├── arraylist.h ├── bitvector-ops.c ├── bitvector.c ├── bitvector.h ├── dirpath.c ├── dirpath.h ├── dtypes.h ├── dump.c ├── extra ├── bswap.c ├── cplxprint.c ├── memalign.c └── swapreverse.c ├── hashing.c ├── hashing.h ├── htable.c ├── htable.h ├── htable.inc ├── htableh.inc ├── ieee754.h ├── int2str.c ├── ios.c ├── ios.h ├── libsupport.h ├── libsupportinit.c ├── ptrhash.c ├── ptrhash.h ├── socket.c ├── socket.h ├── timefuncs.c ├── timefuncs.h ├── utf8.c ├── utf8.h └── utils.h /.gitignore: -------------------------------------------------------------------------------- 1 | /*.o 2 | /*.do 3 | /*.a 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRCS = hashing.c timefuncs.c ptrhash.c socket.c \ 2 | utf8.c ios.c dirpath.c htable.c bitvector.c bitvector-ops.c \ 3 | int2str.c dump.c libsupportinit.c arraylist.c 4 | 5 | OBJS = $(SRCS:%.c=%.o) 6 | DOBJS = $(SRCS:%.c=%.do) 7 | 8 | ifneq ($(MAKECMDGOALS),debug) 9 | XOBJS = $(OBJS) 10 | else 11 | XOBJS = $(DOBJS) 12 | endif 13 | 14 | FLAGS = -std=gnu99 -fPIC -Wall -Wno-strict-aliasing $(CFLAGS) 15 | 16 | DEBUGFLAGS = -ggdb3 -DDEBUG 17 | SHIPFLAGS = -O3 -DNDEBUG -falign-functions -momit-leaf-frame-pointer 18 | 19 | DEBUGFLAGS += $(FLAGS) 20 | SHIPFLAGS += $(FLAGS) 21 | 22 | default: release 23 | 24 | %.o: %.c 25 | $(CC) $(SHIPFLAGS) -c $< -o $@ 26 | %.do: %.c 27 | $(CC) $(DEBUGFLAGS) -c $< -o $@ 28 | 29 | release debug: libsupport.a 30 | 31 | libsupport.a: $(XOBJS) 32 | rm -rf $@ 33 | ar -rcs $@ $^ 34 | 35 | clean: 36 | rm -f *.o 37 | rm -f *.do 38 | rm -f *.a 39 | rm -f *~ *# 40 | rm -f core* 41 | rm -f libsupport.a 42 | -------------------------------------------------------------------------------- /MurmurHash3.c: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | // algorithms are optimized for their respective platforms. You can still 7 | // compile and run any of them on any platform, but your performance with the 8 | // non-native version will be less than optimal. 9 | 10 | #include "MurmurHash3.h" 11 | 12 | //----------------------------------------------------------------------------- 13 | // Platform-specific functions and macros 14 | 15 | // Microsoft Visual Studio 16 | 17 | #if defined(_MSC_VER) 18 | 19 | #define FORCE_INLINE __forceinline 20 | 21 | #include 22 | 23 | #define ROTL32(x,y) _rotl(x,y) 24 | #define ROTL64(x,y) _rotl64(x,y) 25 | 26 | #define BIG_CONSTANT(x) (x) 27 | 28 | // Other compilers 29 | 30 | #else // defined(_MSC_VER) 31 | 32 | #define FORCE_INLINE __attribute__((always_inline)) 33 | 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 35 | { 36 | return (x << r) | (x >> (32 - r)); 37 | } 38 | 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 40 | { 41 | return (x << r) | (x >> (64 - r)); 42 | } 43 | 44 | #define ROTL32(x,y) rotl32(x,y) 45 | #define ROTL64(x,y) rotl64(x,y) 46 | 47 | #define BIG_CONSTANT(x) (x##LLU) 48 | 49 | #endif // !defined(_MSC_VER) 50 | 51 | //----------------------------------------------------------------------------- 52 | // Block read - if your platform needs to do endian-swapping or can only 53 | // handle aligned reads, do the conversion here 54 | 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) 56 | { 57 | return p[i]; 58 | } 59 | 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) 61 | { 62 | return p[i]; 63 | } 64 | 65 | //----------------------------------------------------------------------------- 66 | // Finalization mix - force all bits of a hash block to avalanche 67 | 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h ) 69 | { 70 | h ^= h >> 16; 71 | h *= 0x85ebca6b; 72 | h ^= h >> 13; 73 | h *= 0xc2b2ae35; 74 | h ^= h >> 16; 75 | 76 | return h; 77 | } 78 | 79 | //---------- 80 | 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k ) 82 | { 83 | k ^= k >> 33; 84 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 85 | k ^= k >> 33; 86 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 87 | k ^= k >> 33; 88 | 89 | return k; 90 | } 91 | 92 | //----------------------------------------------------------------------------- 93 | 94 | void MurmurHash3_x86_32 ( const void * key, int len, 95 | uint32_t seed, void * out ) 96 | { 97 | const uint8_t * data = (const uint8_t*)key; 98 | const int nblocks = len / 4; 99 | 100 | uint32_t h1 = seed; 101 | 102 | uint32_t c1 = 0xcc9e2d51; 103 | uint32_t c2 = 0x1b873593; 104 | 105 | //---------- 106 | // body 107 | 108 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); 109 | 110 | for(int i = -nblocks; i; i++) 111 | { 112 | uint32_t k1 = getblock32(blocks,i); 113 | 114 | k1 *= c1; 115 | k1 = ROTL32(k1,15); 116 | k1 *= c2; 117 | 118 | h1 ^= k1; 119 | h1 = ROTL32(h1,13); 120 | h1 = h1*5+0xe6546b64; 121 | } 122 | 123 | //---------- 124 | // tail 125 | 126 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 127 | 128 | uint32_t k1 = 0; 129 | 130 | switch(len & 3) 131 | { 132 | case 3: k1 ^= tail[2] << 16; 133 | case 2: k1 ^= tail[1] << 8; 134 | case 1: k1 ^= tail[0]; 135 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 136 | }; 137 | 138 | //---------- 139 | // finalization 140 | 141 | h1 ^= len; 142 | 143 | h1 = fmix32(h1); 144 | 145 | *(uint32_t*)out = h1; 146 | } 147 | 148 | //----------------------------------------------------------------------------- 149 | 150 | void MurmurHash3_x86_128 ( const void * key, const int len, 151 | uint32_t seed, void * out ) 152 | { 153 | const uint8_t * data = (const uint8_t*)key; 154 | const int nblocks = len / 16; 155 | 156 | uint32_t h1 = seed; 157 | uint32_t h2 = seed; 158 | uint32_t h3 = seed; 159 | uint32_t h4 = seed; 160 | 161 | uint32_t c1 = 0x239b961b; 162 | uint32_t c2 = 0xab0e9789; 163 | uint32_t c3 = 0x38b34ae5; 164 | uint32_t c4 = 0xa1e38b93; 165 | 166 | //---------- 167 | // body 168 | 169 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); 170 | 171 | for(int i = -nblocks; i; i++) 172 | { 173 | uint32_t k1 = getblock32(blocks,i*4+0); 174 | uint32_t k2 = getblock32(blocks,i*4+1); 175 | uint32_t k3 = getblock32(blocks,i*4+2); 176 | uint32_t k4 = getblock32(blocks,i*4+3); 177 | 178 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 179 | 180 | h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; 181 | 182 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 183 | 184 | h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; 185 | 186 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 187 | 188 | h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; 189 | 190 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 191 | 192 | h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; 193 | } 194 | 195 | //---------- 196 | // tail 197 | 198 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 199 | 200 | uint32_t k1 = 0; 201 | uint32_t k2 = 0; 202 | uint32_t k3 = 0; 203 | uint32_t k4 = 0; 204 | 205 | switch(len & 15) 206 | { 207 | case 15: k4 ^= tail[14] << 16; 208 | case 14: k4 ^= tail[13] << 8; 209 | case 13: k4 ^= tail[12] << 0; 210 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 211 | 212 | case 12: k3 ^= tail[11] << 24; 213 | case 11: k3 ^= tail[10] << 16; 214 | case 10: k3 ^= tail[ 9] << 8; 215 | case 9: k3 ^= tail[ 8] << 0; 216 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 217 | 218 | case 8: k2 ^= tail[ 7] << 24; 219 | case 7: k2 ^= tail[ 6] << 16; 220 | case 6: k2 ^= tail[ 5] << 8; 221 | case 5: k2 ^= tail[ 4] << 0; 222 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 223 | 224 | case 4: k1 ^= tail[ 3] << 24; 225 | case 3: k1 ^= tail[ 2] << 16; 226 | case 2: k1 ^= tail[ 1] << 8; 227 | case 1: k1 ^= tail[ 0] << 0; 228 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 229 | }; 230 | 231 | //---------- 232 | // finalization 233 | 234 | h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; 235 | 236 | h1 += h2; h1 += h3; h1 += h4; 237 | h2 += h1; h3 += h1; h4 += h1; 238 | 239 | h1 = fmix32(h1); 240 | h2 = fmix32(h2); 241 | h3 = fmix32(h3); 242 | h4 = fmix32(h4); 243 | 244 | h1 += h2; h1 += h3; h1 += h4; 245 | h2 += h1; h3 += h1; h4 += h1; 246 | 247 | ((uint32_t*)out)[0] = h1; 248 | ((uint32_t*)out)[1] = h2; 249 | ((uint32_t*)out)[2] = h3; 250 | ((uint32_t*)out)[3] = h4; 251 | } 252 | 253 | //----------------------------------------------------------------------------- 254 | 255 | void MurmurHash3_x64_128 ( const void * key, const int len, 256 | const uint32_t seed, void * out ) 257 | { 258 | const uint8_t * data = (const uint8_t*)key; 259 | const int nblocks = len / 16; 260 | 261 | uint64_t h1 = seed; 262 | uint64_t h2 = seed; 263 | 264 | uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 265 | uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 266 | 267 | //---------- 268 | // body 269 | 270 | const uint64_t * blocks = (const uint64_t *)(data); 271 | 272 | for(int i = 0; i < nblocks; i++) 273 | { 274 | uint64_t k1 = getblock64(blocks,i*2+0); 275 | uint64_t k2 = getblock64(blocks,i*2+1); 276 | 277 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 278 | 279 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 280 | 281 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 282 | 283 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 284 | } 285 | 286 | //---------- 287 | // tail 288 | 289 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 290 | 291 | uint64_t k1 = 0; 292 | uint64_t k2 = 0; 293 | 294 | switch(len & 15) 295 | { 296 | case 15: k2 ^= ((uint64_t)(tail[14])) << 48; 297 | case 14: k2 ^= ((uint64_t)(tail[13])) << 40; 298 | case 13: k2 ^= ((uint64_t)(tail[12])) << 32; 299 | case 12: k2 ^= ((uint64_t)(tail[11])) << 24; 300 | case 11: k2 ^= ((uint64_t)(tail[10])) << 16; 301 | case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; 302 | case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; 303 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 304 | 305 | case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; 306 | case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; 307 | case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; 308 | case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; 309 | case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; 310 | case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; 311 | case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; 312 | case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; 313 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 314 | }; 315 | 316 | //---------- 317 | // finalization 318 | 319 | h1 ^= len; h2 ^= len; 320 | 321 | h1 += h2; 322 | h2 += h1; 323 | 324 | h1 = fmix64(h1); 325 | h2 = fmix64(h2); 326 | 327 | h1 += h2; 328 | h2 += h1; 329 | 330 | ((uint64_t*)out)[0] = h1; 331 | ((uint64_t*)out)[1] = h2; 332 | } 333 | 334 | //----------------------------------------------------------------------------- 335 | -------------------------------------------------------------------------------- /MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef MURMURHASH3_H 6 | #define MURMURHASH3_H 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 30 | 31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 32 | 33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | #endif // MURMURHASH3_H 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## What's here 2 | 3 | This is a collection of useful public-domain C code by myself and others. 4 | The best way to use it is to pick and choose the bits you need and 5 | statically compile them in. 6 | Take a look at `libsupportinit.c` to see if there are any initialization 7 | functions you might need to call. 8 | A summary of the available functionality follows. 9 | 10 | ### IOS 11 | 12 | A replacement for parts of C's stdio library, supporting file-descriptor-backed 13 | and memory-backed I/O streams with a uniform interface. Also has special 14 | support for UTF-8, and slightly relaxed semantics that allow keeping data 15 | buffered longer. 16 | 17 | ### Bit vectors 18 | 19 | This is a pretty thorough bit vector library. The most interesting thing about 20 | it is that it supports many operations on contiguous sub-vectors of bits. 21 | This is helpful when implementing strided multi-dimensional arrays of bits. 22 | 23 | ### Arraylist 24 | 25 | A very simple growable array. 26 | 27 | ### dirpath.c 28 | 29 | Contains the function `get_exename`, which can tell you the path to your 30 | running executable on Linux, Windows, and Mac OS X. Allows you to locate 31 | application files without "installing" anything. 32 | 33 | ### dump.c 34 | 35 | A routine for printing hex dumps. 36 | 37 | ### Hashing 38 | 39 | High-quality hash functions for strings and integers. 40 | 41 | ### Hash table 42 | 43 | A fast linear-probing hash table. Hardly ever allocates memory when 44 | inserting a key. It generally avoids collisions by using good hash functions. 45 | Use it by defining your hash function and equality predicate, then 46 | invoking a macro. See `ptrhash.c` for an example. 47 | 48 | ### Other 49 | 50 | Some convenient wrapper functions for using sockets and time info on 51 | multiple platforms. 52 | -------------------------------------------------------------------------------- /arraylist.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "dtypes.h" 8 | #include "arraylist.h" 9 | 10 | arraylist_t *arraylist_new(arraylist_t *a, size_t size) 11 | { 12 | a->len = 0; 13 | if (size <= AL_N_INLINE) { 14 | a->items = &a->_space[0]; 15 | a->max = AL_N_INLINE; 16 | } 17 | else { 18 | a->items = (void**)LLT_ALLOC(size*sizeof(void*)); 19 | a->max = size; 20 | } 21 | if (a->items == NULL) return NULL; 22 | return a; 23 | } 24 | 25 | void arraylist_free(arraylist_t *a) 26 | { 27 | if (a->items != &a->_space[0]) 28 | LLT_FREE(a->items); 29 | a->len = 0; 30 | a->max = AL_N_INLINE; 31 | a->items = &a->_space[0]; 32 | } 33 | 34 | static void al_grow(arraylist_t *a, size_t n) 35 | { 36 | if (a->len+n > a->max) { 37 | if (a->items == &a->_space[0]) { 38 | void **p = LLT_ALLOC((a->len+n)*sizeof(void*)); 39 | if (p == NULL) return; 40 | memcpy(p, a->items, a->len*sizeof(void*)); 41 | a->items = p; 42 | a->max = a->len+n; 43 | } 44 | else { 45 | size_t nm = a->max*2; 46 | if (nm == 0) nm = 1; 47 | while (a->len+n > nm) nm*=2; 48 | void **p = LLT_REALLOC(a->items, nm*sizeof(void*)); 49 | if (p == NULL) return; 50 | a->items = p; 51 | a->max = nm; 52 | } 53 | } 54 | a->len += n; 55 | } 56 | 57 | void arraylist_push(arraylist_t *a, void *elt) 58 | { 59 | al_grow(a, 1); 60 | a->items[a->len-1] = elt; 61 | } 62 | 63 | void *arraylist_pop(arraylist_t *a) 64 | { 65 | if (a->len == 0) return NULL; 66 | void *p = a->items[--a->len]; 67 | a->items[a->len] = NULL; 68 | return p; 69 | } 70 | -------------------------------------------------------------------------------- /arraylist.h: -------------------------------------------------------------------------------- 1 | #ifndef ARRAYLIST_H 2 | #define ARRAYLIST_H 3 | 4 | #define AL_N_INLINE 29 5 | 6 | typedef struct { 7 | size_t len; 8 | size_t max; 9 | void **items; 10 | void *_space[AL_N_INLINE]; 11 | } arraylist_t; 12 | 13 | arraylist_t *arraylist_new(arraylist_t *a, size_t size); 14 | void arraylist_free(arraylist_t *a); 15 | 16 | void arraylist_push(arraylist_t *a, void *elt); 17 | void *arraylist_pop(arraylist_t *a); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /bitvector-ops.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "dtypes.h" 6 | #include "bitvector.h" 7 | 8 | #ifdef WIN32 9 | #include 10 | #define alloca _alloca 11 | #endif 12 | 13 | // greater than this # of words we use malloc instead of alloca 14 | #define MALLOC_CUTOFF 2000 15 | 16 | u_int32_t bitreverse(u_int32_t x) 17 | { 18 | u_int32_t m; 19 | 20 | #ifdef __INTEL_COMPILER 21 | x = _bswap(x); 22 | #else 23 | x = (x >> 16) | (x << 16); m = 0xff00ff00; 24 | x = ((x & m) >> 8) | ((x & ~m) << 8); 25 | #endif 26 | m = 0xf0f0f0f0; 27 | x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc; 28 | x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa; 29 | x = ((x & m) >> 1) | ((x & ~m) << 1); 30 | 31 | return x; 32 | } 33 | 34 | // shift all bits in a long bit vector 35 | // n is # of int32s to consider, s is shift distance 36 | // lowest bit-index is bit 0 of word 0 37 | // TODO: handle boundary case of shift distance >= data size? 38 | void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s) 39 | { 40 | u_int32_t i; 41 | if (s == 0 || n == 0) return; 42 | i = (s>>5); 43 | if (i) { 44 | n -= i; 45 | memmove(b, &b[i], n*4); 46 | memset(&b[n], 0, i*4); 47 | s &= 31; 48 | } 49 | for(i=0; i < n-1; i++) { 50 | b[i] = (b[i]>>s) | (b[i+1]<<(32-s)); 51 | } 52 | b[i]>>=s; 53 | } 54 | 55 | // out-of-place version, good for re-aligning a strided submatrix to 56 | // linear representation when a copy is needed 57 | // assumes that dest has the same amount of space as source, even if it 58 | // wouldn't have been necessary to hold the shifted bits 59 | void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s) 60 | { 61 | u_int32_t i, j; 62 | if (n == 0) return; 63 | if (s == 0) { 64 | memcpy(dest, b, n*4); 65 | return; 66 | } 67 | j = (s>>5); 68 | if (j) { 69 | n -= j; 70 | memset(&dest[n], 0, j*4); 71 | s &= 31; 72 | b = &b[j]; 73 | } 74 | for(i=0; i < n-1; i++) { 75 | dest[i] = (b[i]>>s) | (b[i+1]<<(32-s)); 76 | } 77 | dest[i] = b[i]>>s; 78 | } 79 | 80 | void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s) 81 | { 82 | u_int32_t i, scrap=0, temp; 83 | if (s == 0 || n == 0) return; 84 | i = (s>>5); 85 | if (i) { 86 | n -= i; 87 | memmove(&b[i], b, n*4); 88 | memset(b, 0, i*4); 89 | s &= 31; 90 | b = &b[i]; 91 | } 92 | for(i=0; i < n; i++) { 93 | temp = (b[i]<>(32-s); 95 | b[i] = temp; 96 | } 97 | } 98 | 99 | // if dest has more space than source, set scrap to true to keep the 100 | // top bits that would otherwise be shifted out 101 | void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s, 102 | bool_t scrap) 103 | { 104 | u_int32_t i, j, sc=0; 105 | if (n == 0) return; 106 | if (s == 0) { 107 | memcpy(dest, b, n*4); 108 | return; 109 | } 110 | j = (s>>5); 111 | if (j) { 112 | n -= j; 113 | memset(dest, 0, j*4); 114 | s &= 31; 115 | dest = &dest[j]; 116 | } 117 | for(i=0; i < n; i++) { 118 | dest[i] = (b[i]<>(32-s); 120 | } 121 | if (scrap) 122 | dest[i] = sc; 123 | } 124 | 125 | // set nbits to c, starting at given bit offset 126 | // assumes offs < 32 127 | void bitvector_fill(u_int32_t *b, u_int32_t offs, u_int32_t c, u_int32_t nbits) 128 | { 129 | index_t i; 130 | u_int32_t nw, tail; 131 | u_int32_t mask; 132 | 133 | if (nbits == 0) return; 134 | nw = (offs+nbits+31)>>5; 135 | 136 | if (nw == 1) { 137 | mask = (lomask(nbits)<>5; 167 | 168 | if (nw == 1) { 169 | mask = (lomask(nbits)<>5; \ 202 | \ 203 | if (soffs == doffs) { \ 204 | if (nw == 1) { \ 205 | mask = (lomask(nbits)<>5; \ 220 | if (soffs < doffs) { \ 221 | s = doffs-soffs; \ 222 | if (nw == 1) { \ 223 | mask = (lomask(nbits)<>(32-s); \ 230 | for(i=1; i < snw-1; i++) { \ 231 | dest[i] = (OP(src[i])<>(32-s); \ 233 | } \ 234 | tail = (doffs+nbits)&31; \ 235 | if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ 236 | if (snw == nw) { \ 237 | dest[i] = (dest[i] & ~mask) | (((OP(src[i])<>(32-s); \ 247 | i++; \ 248 | dest[i] = (dest[i] & ~mask) | (scrap & mask); \ 249 | } \ 250 | } \ 251 | } \ 252 | else { \ 253 | s = soffs-doffs; \ 254 | if (snw == 1) { \ 255 | mask = (lomask(nbits)<>s) & mask); \ 257 | return; \ 258 | } \ 259 | if (nw == 1) { \ 260 | mask = (lomask(nbits)<>s)|(OP(src[1])<<(32-s))) & mask); \ 263 | return; \ 264 | } \ 265 | mask = ~lomask(doffs); \ 266 | dest[0] = (dest[0] & ~mask) | \ 267 | (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \ 268 | for(i=1; i < nw-1; i++) { \ 269 | dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \ 270 | } \ 271 | tail = (doffs+nbits)&31; \ 272 | if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ 273 | if (snw == nw) { \ 274 | dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \ 275 | } \ 276 | else /* snw > nw */ { \ 277 | dest[i] = (dest[i] & ~mask) | \ 278 | (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \ 279 | } \ 280 | } \ 281 | } 282 | 283 | #define BV_COPY(a) (a) 284 | #define BV_NOT(a) (~(a)) 285 | BITVECTOR_COPY_OP(copy, BV_COPY) 286 | BITVECTOR_COPY_OP(not_to, BV_NOT) 287 | 288 | // right-shift the bits in one logical "row" of a long 2d bit vector 289 | /* 290 | void bitvector_shr_row(u_int32_t *b, u_int32_t offs, size_t nbits, u_int32_t s) 291 | { 292 | } 293 | */ 294 | 295 | // copy from source to dest while reversing bit-order 296 | // assumes dest offset == 0 297 | // assumes source and dest don't overlap 298 | // assumes offset < 32 299 | void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs, 300 | u_int32_t nbits) 301 | { 302 | index_t i; 303 | u_int32_t nw, tail; 304 | 305 | if (nbits == 0) return; 306 | 307 | nw = (soffs+nbits+31)>>5; 308 | // first, reverse the words while reversing bit order within each word 309 | for(i=0; i < nw/2; i++) { 310 | dest[i] = bitreverse(src[nw-i-1]); 311 | dest[nw-i-1] = bitreverse(src[i]); 312 | } 313 | if (nw&0x1) 314 | dest[i] = bitreverse(src[i]); 315 | 316 | tail = (soffs+nbits)&31; 317 | if (tail) 318 | bitvector_shr(dest, nw, 32-tail); 319 | } 320 | 321 | void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits) 322 | { 323 | index_t i; 324 | u_int32_t nw, tail; 325 | u_int32_t *temp; 326 | 327 | if (nbits == 0) return; 328 | 329 | nw = (offs+nbits+31)>>5; 330 | temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : alloca(nw*4); 331 | for(i=0; i < nw/2; i++) { 332 | temp[i] = bitreverse(b[nw-i-1]); 333 | temp[nw-i-1] = bitreverse(b[i]); 334 | } 335 | if (nw&0x1) 336 | temp[i] = bitreverse(b[i]); 337 | 338 | tail = (offs+nbits)&31; 339 | bitvector_copy(b, offs, temp, (32-tail)&31, nbits); 340 | if (nw > MALLOC_CUTOFF) free(temp); 341 | } 342 | 343 | u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits) 344 | { 345 | index_t i; 346 | u_int32_t nw, tail; 347 | u_int32_t mask; 348 | 349 | if (nbits == 0) return 0; 350 | nw = (offs+nbits+31)>>5; 351 | 352 | if (nw == 1) { 353 | mask = (lomask(nbits)< soffs) 380 | bitvector_shl_to(dest, src, nw, newoffs-soffs, 1); 381 | else 382 | bitvector_shr_to(dest, src, nw, soffs-newoffs); 383 | } 384 | 385 | #define BITVECTOR_BINARY_OP_TO(opname, OP) \ 386 | void bitvector_##opname##_to(u_int32_t *dest, u_int32_t doffs, \ 387 | u_int32_t *a, u_int32_t aoffs, \ 388 | u_int32_t *b, u_int32_t boffs, u_int32_t nbits) \ 389 | { \ 390 | u_int32_t nw = (doffs+nbits+31)>>5; \ 391 | u_int32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : alloca((nw+1)*4);\ 392 | u_int32_t i, anw, bnw; \ 393 | if (aoffs == boffs) { \ 394 | anw = (aoffs+nbits+31)>>5; \ 395 | } \ 396 | else if (aoffs == doffs) { \ 397 | bnw = (boffs+nbits+31)>>5; \ 398 | adjust_offset_to(temp, b, bnw, boffs, aoffs); \ 399 | b = temp; anw = nw; \ 400 | } \ 401 | else { \ 402 | anw = (aoffs+nbits+31)>>5; \ 403 | bnw = (boffs+nbits+31)>>5; \ 404 | adjust_offset_to(temp, a, anw, aoffs, boffs); \ 405 | a = temp; aoffs = boffs; anw = bnw; \ 406 | } \ 407 | for(i=0; i < anw; i++) temp[i] = OP(a[i], b[i]); \ 408 | bitvector_copy(dest, doffs, temp, aoffs, nbits); \ 409 | if (nw>MALLOC_CUTOFF) free(temp); \ 410 | } 411 | 412 | #define BV_AND(a,b) ((a)&(b)) 413 | #define BV_OR(a,b) ((a)|(b)) 414 | #define BV_XOR(a,b) ((a)^(b)) 415 | BITVECTOR_BINARY_OP_TO(and, BV_AND) 416 | BITVECTOR_BINARY_OP_TO(or, BV_OR) 417 | BITVECTOR_BINARY_OP_TO(xor, BV_XOR) 418 | -------------------------------------------------------------------------------- /bitvector.c: -------------------------------------------------------------------------------- 1 | /* 2 | bit vector primitives 3 | 4 | todo: 5 | * reverse 6 | * nreverse 7 | (- rotate left/right) 8 | * shl_to 9 | * not 10 | - shr_row, shl_row 11 | 12 | These routines are the back end supporting bit matrices. Many operations 13 | on bit matrices are slow (such as accessing or setting a single element!) 14 | but certain operations are privileged and lend themselves to extremely 15 | efficient implementation due to the bit-vector nature of machine integers. 16 | These are: 17 | done: 18 | & | $ ~ copy reverse fill sum prod 19 | todo: 20 | shift trans rowswap 21 | would be nice: 22 | channel interleave 23 | 24 | Important note: 25 | Out-of-place functions always assume dest and source have the same amount 26 | of space available. 27 | 28 | shr_to, shl_to, not_to, and reverse_to assume source and dest don't overlap 29 | and_to, or_to, and xor_to allow overlap. 30 | */ 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "dtypes.h" 37 | #include "bitvector.h" 38 | 39 | #ifdef WIN32 40 | #include 41 | #endif 42 | 43 | u_int32_t *bitvector_resize(u_int32_t *b, uint64_t oldsz, uint64_t newsz, 44 | int initzero) 45 | { 46 | u_int32_t *p; 47 | size_t sz = ((newsz+31)>>5) * sizeof(uint32_t); 48 | p = LLT_REALLOC(b, sz); 49 | if (p == NULL) return NULL; 50 | if (initzero && newsz>oldsz) { 51 | size_t osz = ((oldsz+31)>>5) * sizeof(uint32_t); 52 | memset(&p[osz/sizeof(uint32_t)], 0, sz-osz); 53 | } 54 | return p; 55 | } 56 | 57 | u_int32_t *bitvector_new(u_int64_t n, int initzero) 58 | { 59 | return bitvector_resize(NULL, 0, n, initzero); 60 | } 61 | 62 | size_t bitvector_nwords(u_int64_t nbits) 63 | { 64 | return ((nbits+31)>>5); 65 | } 66 | 67 | void bitvector_set(u_int32_t *b, u_int64_t n, u_int32_t c) 68 | { 69 | if (c) 70 | b[n>>5] |= (1<<(n&31)); 71 | else 72 | b[n>>5] &= ~(1<<(n&31)); 73 | } 74 | 75 | u_int32_t bitvector_get(u_int32_t *b, u_int64_t n) 76 | { 77 | return b[n>>5] & (1<<(n&31)); 78 | } 79 | 80 | static int ntz(uint32_t x) 81 | { 82 | int n; 83 | 84 | if (x == 0) return 32; 85 | n = 1; 86 | if ((x & 0x0000FFFF) == 0) {n = n +16; x = x >>16;} 87 | if ((x & 0x000000FF) == 0) {n = n + 8; x = x >> 8;} 88 | if ((x & 0x0000000F) == 0) {n = n + 4; x = x >> 4;} 89 | if ((x & 0x00000003) == 0) {n = n + 2; x = x >> 2;} 90 | return n - (x & 1); 91 | } 92 | 93 | // given a bitvector of n bits, starting at bit n0 find the next 94 | // set bit, including n0. 95 | // returns n if no set bits. 96 | uint64_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n) 97 | { 98 | if (n0 >= n) return n; 99 | 100 | uint32_t i = n0>>5; 101 | uint32_t nb = n0&31; 102 | uint32_t nw = (n+31)>>5; 103 | uint32_t w; 104 | 105 | if (i < nw-1 || (n&31)==0) 106 | w = b[i]>>nb; 107 | else 108 | w = (b[i]&lomask(n&31))>>nb; 109 | if (w != 0) 110 | return ntz(w)+n0; 111 | if (i == nw-1) 112 | return n; 113 | i++; 114 | while (i < nw-1) { 115 | w = b[i]; 116 | if (w != 0) { 117 | return ntz(w) + (i<<5); 118 | } 119 | i++; 120 | } 121 | w = b[i]; 122 | nb = n&31; 123 | i = ntz(w); 124 | if (nb == 0) 125 | return i + (n-32); 126 | if (i >= nb) 127 | return n; 128 | return i + (n-nb); 129 | } 130 | 131 | u_int64_t bitvector_count(u_int32_t *b, u_int64_t offs, u_int64_t nbits) 132 | { 133 | size_t i, nw; 134 | u_int32_t ntail; 135 | u_int64_t ans; 136 | 137 | if (nbits == 0) return 0; 138 | nw = (offs+nbits+31)>>5; 139 | 140 | if (nw == 1) { 141 | if (nbits == 32) 142 | return count_bits(b[0] & (ONES32<>offs); // first end cap 147 | 148 | for(i=1; i < nw-1; i++) { 149 | ans += count_bits(b[i]); 150 | } 151 | 152 | ntail = (offs+nbits)&31; 153 | ans += count_bits(b[i]&(ntail>0?lomask(ntail):ONES32)); // last end cap 154 | 155 | return ans; 156 | } 157 | 158 | u_int32_t bitvector_any1(u_int32_t *b, u_int64_t offs, u_int64_t nbits) 159 | { 160 | index_t i; 161 | u_int32_t nw, tail; 162 | u_int32_t mask; 163 | 164 | if (nbits == 0) return 0; 165 | nw = (offs+nbits+31)>>5; 166 | 167 | if (nw == 1) { 168 | if (nbits == 32) 169 | mask = (ONES32<>1)&0x55555555); 15 | b = ((b>>2)&0x33333333) + (b&0x33333333); 16 | b = ((b>>4)+b)&0x0f0f0f0f; 17 | b += (b>>8); 18 | b += (b>>16); 19 | return b & 0x3f; 20 | // here is the non-optimized version, for clarity: 21 | /* 22 | b = ((b>> 1)&0x55555555) + (b&0x55555555); 23 | b = ((b>> 2)&0x33333333) + (b&0x33333333); 24 | b = ((b>> 4)&0x0f0f0f0f) + (b&0x0f0f0f0f); 25 | b = ((b>> 8)&0x00ff00ff) + (b&0x00ff00ff); 26 | b = ((b>>16)&0x0000ffff) + (b&0x0000ffff); 27 | return b & 0x3f; 28 | */ 29 | } 30 | #endif 31 | 32 | u_int32_t bitreverse(u_int32_t x); 33 | 34 | DLLEXPORT u_int32_t *bitvector_new(u_int64_t n, int initzero); 35 | DLLEXPORT 36 | u_int32_t *bitvector_resize(u_int32_t *b, uint64_t oldsz, uint64_t newsz, 37 | int initzero); 38 | size_t bitvector_nwords(u_int64_t nbits); 39 | DLLEXPORT void bitvector_set(u_int32_t *b, u_int64_t n, u_int32_t c); 40 | DLLEXPORT u_int32_t bitvector_get(u_int32_t *b, u_int64_t n); 41 | 42 | DLLEXPORT uint64_t bitvector_next(uint32_t *b, uint64_t n0, uint64_t n); 43 | 44 | void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s); 45 | void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s); 46 | void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s); 47 | void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s, 48 | bool_t scrap); 49 | void bitvector_fill(u_int32_t *b,u_int32_t offs, u_int32_t c, u_int32_t nbits); 50 | void bitvector_copy(u_int32_t *dest, u_int32_t doffs, 51 | u_int32_t *a, u_int32_t aoffs, u_int32_t nbits); 52 | void bitvector_not(u_int32_t *b, u_int32_t offs, u_int32_t nbits); 53 | void bitvector_not_to(u_int32_t *dest, u_int32_t doffs, 54 | u_int32_t *a, u_int32_t aoffs, u_int32_t nbits); 55 | void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits); 56 | void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs, 57 | u_int32_t nbits); 58 | void bitvector_and_to(u_int32_t *dest, u_int32_t doffs, 59 | u_int32_t *a, u_int32_t aoffs, 60 | u_int32_t *b, u_int32_t boffs, u_int32_t nbits); 61 | void bitvector_or_to(u_int32_t *dest, u_int32_t doffs, 62 | u_int32_t *a, u_int32_t aoffs, 63 | u_int32_t *b, u_int32_t boffs, u_int32_t nbits); 64 | void bitvector_xor_to(u_int32_t *dest, u_int32_t doffs, 65 | u_int32_t *a, u_int32_t aoffs, 66 | u_int32_t *b, u_int32_t boffs, u_int32_t nbits); 67 | DLLEXPORT 68 | u_int64_t bitvector_count(u_int32_t *b, u_int64_t offs, u_int64_t nbits); 69 | u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits); 70 | DLLEXPORT 71 | u_int32_t bitvector_any1(u_int32_t *b, u_int64_t offs, u_int64_t nbits); 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /dirpath.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "dtypes.h" 12 | 13 | #ifdef WIN32 14 | #include 15 | #include 16 | #include 17 | #undef NO_ERROR 18 | #undef MOD_SHIFT 19 | #undef TRUE 20 | #undef FALSE 21 | #undef VOID 22 | #else 23 | #include 24 | #include 25 | #include 26 | #endif 27 | 28 | #include "dirpath.h" 29 | 30 | void get_cwd(char *buf, size_t size) 31 | { 32 | #ifndef WIN32 33 | // TODO: handle error more gracefully. 34 | if (getcwd(buf, size) == NULL) 35 | perror("getcwd error"); 36 | #else 37 | GetCurrentDirectory(size, buf); 38 | #endif 39 | } 40 | 41 | int set_cwd(char *buf) 42 | { 43 | #ifndef WIN32 44 | if (chdir(buf) == -1) 45 | return 1; 46 | #else 47 | if (SetCurrentDirectory(buf) == 0) 48 | return 1; 49 | #endif 50 | return 0; 51 | } 52 | 53 | #ifdef __linux 54 | char *get_exename(char *buf, size_t size) 55 | { 56 | char linkname[64]; /* /proc//exe */ 57 | pid_t pid; 58 | ssize_t ret; 59 | 60 | /* Get our PID and build the name of the link in /proc */ 61 | pid = getpid(); 62 | 63 | if (snprintf(linkname, sizeof(linkname), "/proc/%i/exe", pid) < 0) 64 | return NULL; 65 | 66 | /* Now read the symbolic link */ 67 | ret = readlink(linkname, buf, size); 68 | 69 | /* In case of an error, leave the handling up to the caller */ 70 | if (ret == -1) 71 | return NULL; 72 | 73 | /* Report insufficient buffer size */ 74 | if ((size_t)ret >= size) 75 | return NULL; 76 | 77 | /* Ensure proper NUL termination */ 78 | buf[ret] = 0; 79 | 80 | return buf; 81 | } 82 | #elif defined(__FreeBSD__) 83 | #include 84 | #include 85 | 86 | char *get_exename(char *buf, size_t size) 87 | { 88 | int mib[4]; 89 | mib[0] = CTL_KERN; 90 | mib[1] = KERN_PROC; 91 | mib[2] = KERN_PROC_PATHNAME; 92 | mib[3] = -1; 93 | sysctl(mib, 4, buf, &size, NULL, 0); 94 | 95 | return buf; 96 | } 97 | #elif defined(WIN32) 98 | char *get_exename(char *buf, size_t size) 99 | { 100 | if (GetModuleFileName(NULL, buf, size) == 0) 101 | return NULL; 102 | 103 | return buf; 104 | } 105 | #elif defined(__APPLE__) 106 | #include "/Developer/Headers/FlatCarbon/Processes.h" 107 | #include "/Developer/Headers/FlatCarbon/Files.h" 108 | char *get_exename(char *buf, size_t size) 109 | { 110 | ProcessSerialNumber PSN; 111 | FSRef ref; 112 | 113 | if (GetCurrentProcess(&PSN) < 0 || 114 | GetProcessBundleLocation(&PSN, &ref) < 0 || 115 | FSRefMakePath(&ref, (uint8_t*)buf, size) < 0) 116 | return NULL; 117 | 118 | return buf; 119 | } 120 | #endif 121 | -------------------------------------------------------------------------------- /dirpath.h: -------------------------------------------------------------------------------- 1 | #ifndef DIRPATH_H 2 | #define DIRPATH_H 3 | 4 | #ifdef WIN32 5 | #define PATHSEP '\\' 6 | #define PATHSEPSTRING "\\" 7 | #define PATHLISTSEP ';' 8 | #define PATHLISTSEPSTRING ";" 9 | #define ISPATHSEP(c) ((c)=='/' || (c)=='\\') 10 | #define MAXPATHLEN 1024 11 | #else 12 | #define PATHSEP '/' 13 | #define PATHSEPSTRING "/" 14 | #define PATHLISTSEP ':' 15 | #define PATHLISTSEPSTRING ":" 16 | #define ISPATHSEP(c) ((c)=='/') 17 | #endif 18 | 19 | void get_cwd(char *buf, size_t size); 20 | int set_cwd(char *buf); 21 | DLLEXPORT char *get_exename(char *buf, size_t size); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /dtypes.h: -------------------------------------------------------------------------------- 1 | #ifndef DTYPES_H 2 | #define DTYPES_H 3 | 4 | /* 5 | This file defines sane integer types for our target platforms. This 6 | library only runs on machines with the following characteristics: 7 | 8 | - supports integer word sizes of 8, 16, 32, and 64 bits 9 | - uses unsigned and signed 2's complement representations 10 | - all pointer types are the same size 11 | - there is an integer type with the same size as a pointer 12 | 13 | Some features require: 14 | - IEEE 754 single- and double-precision floating point 15 | 16 | We assume the LP64 convention for 64-bit platforms. 17 | */ 18 | 19 | #ifdef WIN32 20 | #define STDCALL __stdcall 21 | # ifdef IMPORT_EXPORTS 22 | # define DLLEXPORT __declspec(dllimport) 23 | # else 24 | # define DLLEXPORT __declspec(dllexport) 25 | # endif 26 | #else 27 | #define STDCALL 28 | #define DLLEXPORT __attribute__ ((visibility("default"))) 29 | #endif 30 | 31 | #ifdef __linux 32 | #include 33 | #include 34 | #define LITTLE_ENDIAN __LITTLE_ENDIAN 35 | #define BIG_ENDIAN __BIG_ENDIAN 36 | #define PDP_ENDIAN __PDP_ENDIAN 37 | #define BYTE_ORDER __BYTE_ORDER 38 | #endif 39 | 40 | #ifdef __APPLE__ 41 | #include 42 | #define __LITTLE_ENDIAN LITTLE_ENDIAN 43 | #define __BIG_ENDIAN BIG_ENDIAN 44 | #define __PDP_ENDIAN PDP_ENDIAN 45 | #define __BYTE_ORDER BYTE_ORDER 46 | #endif 47 | 48 | #ifdef WIN32 49 | #define __LITTLE_ENDIAN 1234 50 | #define __BIG_ENDIAN 4321 51 | #define __PDP_ENDIAN 3412 52 | #define __BYTE_ORDER __LITTLE_ENDIAN 53 | #define __FLOAT_WORD_ORDER __LITTLE_ENDIAN 54 | #define LITTLE_ENDIAN __LITTLE_ENDIAN 55 | #define BIG_ENDIAN __BIG_ENDIAN 56 | #define PDP_ENDIAN __PDP_ENDIAN 57 | #define BYTE_ORDER __BYTE_ORDER 58 | #endif 59 | 60 | #if (__STDC_VERSION__ >= 199901L) || defined(__GNUG__) 61 | // argument counting macros for C99 62 | #define VA_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \ 63 | _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \ 64 | _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \ 65 | _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \ 66 | _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \ 67 | _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \ 68 | _61,_62,_63,N,...) N 69 | #define VA_RSEQ_N() 63,62,61,60,59,58,57,56,55,54,53,52,51,50, \ 70 | 49,48,47,46,45,44,43,42,41,40,39,38,37,36, \ 71 | 35,34,33,32,31,30,29,28,27,26,25,24,23,22, \ 72 | 21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 73 | #define VA_NARG_(...) VA_ARG_N(__VA_ARGS__) 74 | #define VA_NARG(...) VA_NARG_(__VA_ARGS__,VA_RSEQ_N()) 75 | #endif 76 | 77 | #define LLT_ALLOC(n) malloc(n) 78 | #define LLT_REALLOC(p,n) realloc((p),(n)) 79 | #define LLT_FREE(x) free(x) 80 | 81 | typedef int bool_t; 82 | 83 | #if defined(__INTEL_COMPILER) && defined(WIN32) 84 | # define STATIC_INLINE static 85 | # define INLINE 86 | # ifdef __LP64__ 87 | typedef unsigned long size_t; 88 | # else 89 | typedef unsigned int size_t; 90 | # endif 91 | #else 92 | # define STATIC_INLINE static inline 93 | # define INLINE inline 94 | #endif 95 | 96 | typedef unsigned char byte_t; /* 1 byte */ 97 | #if defined(WIN32) 98 | typedef short int16_t; 99 | typedef int int32_t; 100 | typedef long long int64_t; 101 | typedef unsigned char u_int8_t; 102 | typedef unsigned short u_int16_t; 103 | typedef unsigned int u_int32_t; 104 | #ifdef __LP64__ 105 | typedef unsigned long u_int64_t; 106 | #else 107 | typedef unsigned long long u_int64_t; 108 | #endif 109 | #ifdef __INTEL_COMPILER 110 | typedef signed char int8_t; 111 | typedef short int16_t; 112 | typedef int int32_t; 113 | #endif 114 | #else 115 | #include 116 | #endif 117 | 118 | #ifdef __LP64__ 119 | #define TOP_BIT 0x8000000000000000 120 | #define NBITS 64 121 | typedef unsigned long uint_t; // preferred int type on platform 122 | typedef long int_t; 123 | typedef int64_t offset_t; 124 | typedef u_int64_t index_t; 125 | typedef int64_t ptrint_t; // pointer-size int 126 | typedef u_int64_t u_ptrint_t; 127 | #else 128 | #define TOP_BIT 0x80000000 129 | #define NBITS 32 130 | typedef unsigned long uint_t; 131 | typedef long int_t; 132 | typedef int32_t offset_t; 133 | typedef u_int32_t index_t; 134 | typedef int32_t ptrint_t; 135 | typedef u_int32_t u_ptrint_t; 136 | #endif 137 | 138 | typedef u_int8_t uint8_t; 139 | typedef u_int16_t uint16_t; 140 | typedef u_int32_t uint32_t; 141 | typedef u_int64_t uint64_t; 142 | typedef u_ptrint_t uptrint_t; 143 | 144 | #define LLT_ALIGN(x, sz) (((x) + (sz-1)) & (-sz)) 145 | 146 | // branch prediction annotations 147 | #ifdef __GNUC__ 148 | #define __unlikely(x) __builtin_expect(!!(x), 0) 149 | #define __likely(x) __builtin_expect(!!(x), 1) 150 | #else 151 | #define __unlikely(x) (x) 152 | #define __likely(x) (x) 153 | #endif 154 | 155 | #define DBL_MAXINT 9007199254740992LL 156 | #define FLT_MAXINT 16777216 157 | #define U64_MAX 18446744073709551615ULL 158 | #define S64_MAX 9223372036854775807LL 159 | #define S64_MIN (-S64_MAX - 1LL) 160 | #define BIT63 0x8000000000000000LL 161 | #define U32_MAX 4294967295L 162 | #define S32_MAX 2147483647L 163 | #define S32_MIN (-S32_MAX - 1L) 164 | #define BIT31 0x80000000 165 | 166 | #define DBL_EPSILON 2.2204460492503131e-16 167 | #define FLT_EPSILON 1.192092896e-7 168 | #define DBL_MAX 1.7976931348623157e+308 169 | #define DBL_MIN 2.2250738585072014e-308 170 | #define FLT_MAX 3.402823466e+38 171 | #define FLT_MIN 1.175494351e-38 172 | #define LOG2_10 3.3219280948873626 173 | #define rel_zero(a, b) (fabs((a)/(b)) < DBL_EPSILON) 174 | #define sign_bit(r) ((*(int64_t*)&(r)) & BIT63) 175 | #define LABS(n) (((n)^((n)>>(NBITS-1))) - ((n)>>(NBITS-1))) 176 | #define NBABS(n,nb) (((n)^((n)>>((nb)-1))) - ((n)>>((nb)-1))) 177 | #define DFINITE(d) (((*(int64_t*)&(d))&0x7ff0000000000000LL)!=0x7ff0000000000000LL) 178 | #define DNAN(d) ((d)!=(d)) 179 | 180 | extern double D_PNAN; 181 | extern double D_NNAN; 182 | extern double D_PINF; 183 | extern double D_NINF; 184 | extern float F_PNAN; 185 | extern float F_NNAN; 186 | extern float F_PINF; 187 | extern float F_NINF; 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /dump.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dtypes.h" 3 | #include "ios.h" 4 | #include "utils.h" 5 | 6 | static char hexdig[] = "0123456789abcdef"; 7 | 8 | /* 9 | display a given number of bytes from a buffer, with the first 10 | address label being startoffs 11 | */ 12 | void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs) 13 | { 14 | size_t offs=0; 15 | size_t i, pos; 16 | char ch, linebuffer[16]; 17 | char hexc[4]; 18 | static char *spc50 = " "; 19 | 20 | hexc[2] = hexc[3] = ' '; 21 | do { 22 | ios_printf(dest, "%.8x ", offs+startoffs); 23 | pos = 10; 24 | for(i=0; i < 16 && offs < len; i++, offs++) { 25 | ch = buffer[offs]; 26 | linebuffer[i] = (ch<32 || ch>=0x7f) ? '.' : ch; 27 | hexc[0] = hexdig[((unsigned char)ch)>>4]; 28 | hexc[1] = hexdig[ch&0x0f]; 29 | pos += ios_write(dest, hexc, (i==7 || i==15) ? 4 : 3); 30 | } 31 | for(; i < 16; i++) 32 | linebuffer[i] = ' '; 33 | ios_write(dest, spc50, 60-pos); 34 | ios_putc('|', dest); 35 | ios_write(dest, linebuffer, 16); 36 | ios_write(dest, "|\n", 2); 37 | } while (offs < len); 38 | } 39 | -------------------------------------------------------------------------------- /extra/bswap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dtypes.h" 6 | #include "utils.h" 7 | 8 | void bswap_buffer(byte_t *data, size_t sz, size_t npts) 9 | { 10 | size_t i, b; 11 | byte_t *el; 12 | byte_t temp; 13 | 14 | if (sz <= 1) 15 | return; 16 | 17 | switch (sz) { 18 | case 8: 19 | for(i=0; i < npts; i++) { 20 | ((u_int64_t*)data)[i] = bswap_64(((u_int64_t*)data)[i]); 21 | } 22 | break; 23 | case 4: 24 | for(i=0; i < npts; i++) { 25 | ((u_int32_t*)data)[i] = bswap_32(((u_int32_t*)data)[i]); 26 | } 27 | break; 28 | case 2: 29 | for(i=0; i < npts; i++) { 30 | ((u_int16_t*)data)[i] = bswap_16(((u_int16_t*)data)[i]); 31 | } 32 | break; 33 | default: 34 | for(i=0; i < sz * npts; i += sz) { 35 | el = data + i; 36 | for(b=0; b < sz/2; b++) { 37 | temp = el[b]; 38 | el[b] = el[sz-b-1]; 39 | el[sz-b-1] = temp; 40 | } 41 | } 42 | } 43 | } 44 | 45 | void bswap(byte_t *s, size_t n) 46 | { 47 | unsigned int i; 48 | char temp; 49 | 50 | switch (n) { 51 | case 8: 52 | *(u_int64_t*)s = bswap_64(*(u_int64_t*)s); break; 53 | case 4: 54 | *(u_int32_t*)s = bswap_32(*(u_int32_t*)s); break; 55 | case 2: 56 | *(u_int16_t*)s = bswap_16(*(u_int16_t*)s); break; 57 | case 1: 58 | break; 59 | default: 60 | for(i=0; i < n/2; i++) { 61 | temp = s[i]; 62 | s[i] = s[n-i-1]; 63 | s[n-i-1] = temp; 64 | } 65 | } 66 | } 67 | 68 | void bswap_to(byte_t *dest, byte_t *src, size_t n) 69 | { 70 | unsigned int i; 71 | 72 | switch (n) { 73 | case 8: 74 | *(u_int64_t*)dest = bswap_64(*(u_int64_t*)src); break; 75 | case 4: 76 | *(u_int32_t*)dest = bswap_32(*(u_int32_t*)src); break; 77 | case 2: 78 | *(u_int16_t*)dest = bswap_16(*(u_int16_t*)src); break; 79 | case 1: 80 | break; 81 | default: 82 | for(i=0; i < n; i++) { 83 | dest[i] = src[n-i-1]; 84 | } 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /extra/cplxprint.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "dtypes.h" 5 | #include "utils.h" 6 | #include "ieee754.h" 7 | 8 | int double_exponent(double d) 9 | { 10 | union ieee754_double dl; 11 | 12 | dl.d = d; 13 | return dl.ieee.exponent - IEEE754_DOUBLE_BIAS; 14 | } 15 | 16 | void snprint_real(char *s, size_t cnt, double r, 17 | int width, // printf field width, or 0 18 | int dec, // # decimal digits desired, recommend 16 19 | // # of zeros in .00...0x before using scientific notation 20 | // recommend 3-4 or so 21 | int max_digs_rt, 22 | // # of digits left of decimal before scientific notation 23 | // recommend 10 24 | int max_digs_lf) 25 | { 26 | int mag; 27 | double fpart, temp; 28 | char format[8]; 29 | char num_format[3]; 30 | int sz, keepz=0; 31 | 32 | s[0] = '\0'; 33 | if (width == -1) { 34 | width = 0; 35 | keepz=1; 36 | } 37 | if (isnan(r)) { 38 | if (sign_bit(r)) 39 | strncpy(s, "-nan", cnt); 40 | else 41 | strncpy(s, "nan", cnt); 42 | return; 43 | } 44 | if (r == 0) { 45 | strncpy(s, "0", cnt); 46 | return; 47 | } 48 | 49 | num_format[0] = 'l'; 50 | num_format[2] = '\0'; 51 | 52 | mag = double_exponent(r); 53 | 54 | mag = (int)(((double)mag)/LOG2_10 + 0.5); 55 | if (r == 0) 56 | mag = 0; 57 | if ((mag > max_digs_lf-1) || (mag < -max_digs_rt)) { 58 | num_format[1] = 'e'; 59 | temp = r/pow(10, mag); /* see if number will have a decimal */ 60 | fpart = temp - floor(temp); /* when written in scientific notation */ 61 | } 62 | else { 63 | num_format[1] = 'f'; 64 | fpart = r - floor(r); 65 | } 66 | if (fpart == 0) 67 | dec = 0; 68 | if (width == 0) { 69 | snprintf(format, 8, "%%.%d%s", dec, num_format); 70 | } 71 | else { 72 | snprintf(format, 8, "%%%d.%d%s", width, dec, num_format); 73 | } 74 | sz = snprintf(s, cnt, format, r); 75 | /* trim trailing zeros from fractions. not when using scientific 76 | notation, since we might have e.g. 1.2000e+100. also not when we 77 | need a specific output width */ 78 | if (width == 0 && !keepz) { 79 | if (sz > 2 && fpart && num_format[1]!='e') { 80 | while (s[sz-1] == '0') { 81 | s[sz-1]='\0'; 82 | sz--; 83 | } 84 | // don't need trailing . 85 | if (s[sz-1] == '.') { 86 | s[sz-1] = '\0'; 87 | sz--; 88 | } 89 | } 90 | } 91 | // TODO. currently 1.1e20 prints as 1.1000000000000000e+20; be able to 92 | // get rid of all those zeros. 93 | } 94 | 95 | void snprint_cplx(char *s, size_t cnt, double re, double im, 96 | // args to pass on to snprint_real 97 | int width, int dec, 98 | int max_digs_rt, int max_digs_lf, 99 | // print spaces around sign in a+bi 100 | int spflag) 101 | { 102 | int fzr = (re==0) || rel_zero(re,im); 103 | int fzi = (im==0) || rel_zero(im,re); 104 | size_t len, sl; 105 | size_t space = cnt; 106 | 107 | s[0] = '\0'; 108 | if (isnan(im) && fzr) { 109 | if (space < 2) return; 110 | snprint_real(s, space-2, im, width, dec, max_digs_rt, max_digs_lf); 111 | strcat(s, "i"); 112 | return; 113 | } 114 | if (!fzr || (fzr && fzi)) { 115 | if (space < 4) return; 116 | snprint_real(s, space-4, re, width, dec, max_digs_rt, max_digs_lf); 117 | if ((im >= 0 || (isnan(im)&&!sign_bit(im))) && !fzi) { 118 | if (spflag) { 119 | strcat(s, " + "); 120 | } 121 | else { 122 | strcat(s, "+"); 123 | } 124 | } 125 | else if (!fzi) { 126 | im = -im; 127 | if (spflag) 128 | strcat(s, " - "); 129 | else 130 | strcat(s, "-"); 131 | } 132 | } 133 | if (!fzi) { 134 | len = sl = strlen(s); 135 | if (im == -1) { 136 | while ((long)(len-sl) < (long)(width-2) && len < (space-3)) 137 | s[len++] = ' '; 138 | s[len] = '-'; 139 | s[len+1] = 'i'; 140 | s[len+2] = '\0'; 141 | } 142 | else if (im == 1) { 143 | while ((long)(len-sl) < (long)(width-1) && len < (space-2)) 144 | s[len++] = ' '; 145 | s[len] = 'i'; 146 | s[len+1] = '\0'; 147 | } 148 | else { 149 | snprint_real(s+len, space-len-2, im, width, dec, 150 | max_digs_rt, max_digs_lf); 151 | strcat(s, "i"); 152 | } 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /extra/memalign.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dtypes.h" 6 | #include "utils.h" 7 | 8 | #define ALIGNED_TO_ACTUAL(p) (((char*)p) - ((long*)p)[-1]) 9 | 10 | static void *aligned_ptr(char *ptr, size_t align_size) 11 | { 12 | char *ptr2, *aligned_ptr; 13 | 14 | ptr2 = ptr + sizeof(long); 15 | aligned_ptr = (char*)LLT_ALIGN(((uptrint_t)ptr2), align_size); 16 | 17 | ((long*)aligned_ptr)[-1] = (long)(aligned_ptr - ptr); 18 | 19 | return aligned_ptr; 20 | } 21 | 22 | /* align_size has to be a power of two */ 23 | void *malloc_aligned(size_t size, size_t align_size) 24 | { 25 | char *ptr; 26 | 27 | ptr = (char*)LLT_ALLOC(size + align_size-1 + sizeof(long)); 28 | if (ptr == NULL) 29 | return NULL; 30 | 31 | return aligned_ptr(ptr, align_size); 32 | } 33 | 34 | void free_aligned(void *ptr) 35 | { 36 | LLT_FREE(ALIGNED_TO_ACTUAL(ptr)); 37 | } 38 | 39 | void *realloc_aligned(void *ptr, size_t size, size_t align_size) 40 | { 41 | char *pnew; 42 | 43 | if (ptr != NULL) 44 | ptr = ALIGNED_TO_ACTUAL(ptr); 45 | pnew = LLT_REALLOC(ptr, size + align_size-1 + sizeof(long)); 46 | if (pnew == NULL) 47 | return NULL; 48 | 49 | return aligned_ptr(pnew, align_size); 50 | } 51 | -------------------------------------------------------------------------------- /extra/swapreverse.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dtypes.h" 6 | #include "utils.h" 7 | 8 | void memswap(char *a, char *b, size_t sz) 9 | { 10 | int8_t i8; 11 | int32_t i32; 12 | int32_t *a4, *b4; 13 | 14 | if (sz < 4) { 15 | while (sz--) { 16 | i8 = *a; 17 | *a++ = *b; 18 | *b++ = i8; 19 | } 20 | } 21 | else { 22 | while (sz & 0x3) { 23 | i8 = *a; 24 | *a++ = *b; 25 | *b++ = i8; 26 | sz--; 27 | } 28 | a4 = (int32_t*)a; 29 | b4 = (int32_t*)b; 30 | sz >>= 2; 31 | while (sz--) { 32 | i32 = *a4; 33 | *a4++ = *b4; 34 | *b4++ = i32; 35 | } 36 | } 37 | } 38 | 39 | void memreverse(char *a, size_t n, size_t elsz) 40 | { 41 | int64_t i64, *pi64; 42 | int32_t i32, *pi32; 43 | int16_t i16, *pi16; 44 | int8_t i8; 45 | size_t i; 46 | char *temp; 47 | size_t eli, tot; 48 | 49 | if (n==0 || elsz==0) return; 50 | switch(elsz) { 51 | case 16: 52 | pi64 = (int64_t*)a; 53 | for(i=0; i < n/2; i++) { 54 | i64 = pi64[2*i]; 55 | pi64[2*i] = pi64[2*(n-i-1)]; 56 | pi64[2*(n-i-1)] = i64; 57 | 58 | i64 = pi64[2*i+1]; 59 | pi64[2*i+1] = pi64[2*(n-i-1)+1]; 60 | pi64[2*(n-i-1)+1] = i64; 61 | } 62 | break; 63 | case 8: 64 | pi64 = (int64_t*)a; 65 | for(i=0; i < n/2; i++) { 66 | i64 = pi64[i]; 67 | pi64[i] = pi64[n-i-1]; 68 | pi64[n-i-1] = i64; 69 | } 70 | break; 71 | case 4: 72 | pi32 = (int32_t*)a; 73 | for(i=0; i < n/2; i++) { 74 | i32 = pi32[i]; 75 | pi32[i] = pi32[n-i-1]; 76 | pi32[n-i-1] = i32; 77 | } 78 | break; 79 | case 2: 80 | pi16 = (int16_t*)a; 81 | for(i=0; i < n/2; i++) { 82 | i16 = pi16[i]; 83 | pi16[i] = pi16[n-i-1]; 84 | pi16[n-i-1] = i16; 85 | } 86 | break; 87 | case 1: 88 | for(i=0; i < n/2; i++) { 89 | i8 = a[i]; 90 | a[i] = a[n-i-1]; 91 | a[n-i-1] = i8; 92 | } 93 | break; 94 | default: 95 | tot = n*elsz; 96 | if (elsz < 4097) 97 | temp = alloca(elsz); 98 | else 99 | temp = malloc(elsz); 100 | 101 | if (temp != NULL) { 102 | for(i=0, eli=0; i < n/2; i++, eli+=elsz) { 103 | memcpy(temp, &a[eli], elsz); 104 | memcpy(&a[eli], &a[tot-eli-elsz], elsz); 105 | memcpy(&a[tot-eli-elsz], temp, elsz); 106 | } 107 | 108 | if (elsz >= 4097) 109 | free(temp); 110 | } 111 | break; 112 | } 113 | } 114 | 115 | void memreverse_to(char *dest, char *a, size_t n, size_t elsz) 116 | { 117 | int64_t *pi64, *di64; 118 | int32_t *pi32, *di32; 119 | int16_t *pi16, *di16; 120 | size_t i; 121 | size_t eli, tot; 122 | if (n==0 || elsz==0) return; 123 | switch(elsz) { 124 | case 16: 125 | pi64 = (int64_t*)a; 126 | di64 = (int64_t*)dest; 127 | for(i=0; i < n/2; i++) { 128 | di64[2*i] = pi64[2*(n-i-1)]; 129 | di64[2*(n-i-1)] = pi64[2*i]; 130 | 131 | di64[2*i+1] = pi64[2*(n-i-1)+1]; 132 | di64[2*(n-i-1)+1] = pi64[2*i+1]; 133 | } 134 | if (n&0x1) { 135 | di64[2*i] = pi64[2*i]; 136 | di64[2*i+1] = pi64[2*i+1]; 137 | } 138 | break; 139 | case 8: 140 | pi64 = (int64_t*)a; 141 | di64 = (int64_t*)dest; 142 | for(i=0; i < n/2; i++) { 143 | di64[i] = pi64[n-i-1]; 144 | di64[n-i-1] = pi64[i]; 145 | } 146 | if (n&0x1) 147 | di64[i] = pi64[i]; 148 | break; 149 | case 4: 150 | pi32 = (int32_t*)a; 151 | di32 = (int32_t*)dest; 152 | for(i=0; i < n/2; i++) { 153 | di32[i] = pi32[n-i-1]; 154 | di32[n-i-1] = pi32[i]; 155 | } 156 | if (n&0x1) 157 | di32[i] = pi32[i]; 158 | break; 159 | case 2: 160 | pi16 = (int16_t*)a; 161 | di16 = (int16_t*)dest; 162 | for(i=0; i < n/2; i++) { 163 | di16[i] = pi16[n-i-1]; 164 | di16[n-i-1] = pi16[i]; 165 | } 166 | if (n&0x1) 167 | di16[i] = pi16[i]; 168 | break; 169 | case 1: 170 | for(i=0; i < n/2; i++) { 171 | dest[i] = a[n-i-1]; 172 | dest[n-i-1] = a[i]; 173 | } 174 | if (n&0x1) 175 | dest[i] = a[i]; 176 | break; 177 | default: 178 | tot = n*elsz; 179 | for(i=0, eli=0; i < n/2; i++, eli+=elsz) { 180 | memcpy(&dest[eli], &a[tot - eli - elsz], elsz); 181 | memcpy(&dest[tot - eli - elsz], &a[eli], elsz); 182 | } 183 | if (n&0x1) 184 | memcpy(&dest[eli], &a[eli], elsz); 185 | break; 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /hashing.c: -------------------------------------------------------------------------------- 1 | /* 2 | Hashing 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "dtypes.h" 8 | #include "utils.h" 9 | #include "hashing.h" 10 | #include "timefuncs.h" 11 | #include "ios.h" 12 | 13 | uint_t nextipow2(uint_t i) 14 | { 15 | if (i==0) return 1; 16 | if ((i&(i-1))==0) return i; 17 | if (i&TOP_BIT) return TOP_BIT; 18 | 19 | // repeatedly clear bottom bit 20 | while (i&(i-1)) 21 | i = i&(i-1); 22 | 23 | return i<<1; 24 | } 25 | 26 | u_int32_t int32hash(u_int32_t a) 27 | { 28 | a = (a+0x7ed55d16) + (a<<12); 29 | a = (a^0xc761c23c) ^ (a>>19); 30 | a = (a+0x165667b1) + (a<<5); 31 | a = (a+0xd3a2646c) ^ (a<<9); 32 | a = (a+0xfd7046c5) + (a<<3); 33 | a = (a^0xb55a4f09) ^ (a>>16); 34 | return a; 35 | } 36 | 37 | u_int64_t int64hash(u_int64_t key) 38 | { 39 | key = (~key) + (key << 21); // key = (key << 21) - key - 1; 40 | key = key ^ (key >> 24); 41 | key = (key + (key << 3)) + (key << 8); // key * 265 42 | key = key ^ (key >> 14); 43 | key = (key + (key << 2)) + (key << 4); // key * 21 44 | key = key ^ (key >> 28); 45 | key = key + (key << 31); 46 | return key; 47 | } 48 | 49 | u_int32_t int64to32hash(u_int64_t key) 50 | { 51 | key = (~key) + (key << 18); // key = (key << 18) - key - 1; 52 | key = key ^ (key >> 31); 53 | key = key * 21; // key = (key + (key << 2)) + (key << 4); 54 | key = key ^ (key >> 11); 55 | key = key + (key << 6); 56 | key = key ^ (key >> 22); 57 | return (u_int32_t)key; 58 | } 59 | 60 | #include "MurmurHash3.c" 61 | 62 | #define _MHASH_SEED_ 0xcafe8881 63 | 64 | uint64_t memhash(const char* buf, size_t n) 65 | { 66 | uint64_t out[2]; 67 | 68 | // TODO: expose 128-bit hash 69 | #ifdef __LP64__ 70 | MurmurHash3_x64_128(buf, n, _MHASH_SEED_, out); 71 | #else 72 | MurmurHash3_x86_128(buf, n, _MHASH_SEED_, out); 73 | #endif 74 | return out[1]; 75 | } 76 | 77 | uint32_t memhash32(const char* buf, size_t n) 78 | { 79 | uint32_t out; 80 | 81 | MurmurHash3_x86_32(buf, n, _MHASH_SEED_, &out); 82 | return out; 83 | } 84 | -------------------------------------------------------------------------------- /hashing.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHING_H 2 | #define HASHING_H 3 | 4 | uint_t nextipow2(uint_t i); 5 | DLLEXPORT u_int32_t int32hash(u_int32_t a); 6 | DLLEXPORT u_int64_t int64hash(u_int64_t key); 7 | DLLEXPORT u_int32_t int64to32hash(u_int64_t key); 8 | #ifdef __LP64__ 9 | #define inthash int64hash 10 | #else 11 | #define inthash int32hash 12 | #endif 13 | DLLEXPORT u_int64_t memhash(const char* buf, size_t n); 14 | DLLEXPORT u_int32_t memhash32(const char* buf, size_t n); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /htable.c: -------------------------------------------------------------------------------- 1 | /* 2 | functions common to all hash table instantiations 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "dtypes.h" 12 | #include "htable.h" 13 | #include "hashing.h" 14 | 15 | htable_t *htable_new(htable_t *h, size_t size) 16 | { 17 | if (size <= HT_N_INLINE/2) { 18 | h->size = size = HT_N_INLINE; 19 | h->table = &h->_space[0]; 20 | } 21 | else { 22 | size = nextipow2(size); 23 | size *= 2; // 2 pointers per key/value pair 24 | size *= 2; // aim for 50% occupancy 25 | h->size = size; 26 | h->table = (void**)LLT_ALLOC(size*sizeof(void*)); 27 | } 28 | if (h->table == NULL) return NULL; 29 | size_t i; 30 | for(i=0; i < size; i++) 31 | h->table[i] = HT_NOTFOUND; 32 | return h; 33 | } 34 | 35 | void htable_free(htable_t *h) 36 | { 37 | if (h->table != &h->_space[0]) 38 | LLT_FREE(h->table); 39 | } 40 | 41 | // empty and reduce size 42 | void htable_reset(htable_t *h, size_t sz) 43 | { 44 | sz = nextipow2(sz); 45 | if (h->size > sz*4 && h->size > HT_N_INLINE) { 46 | size_t newsz = sz*4; 47 | void **newtab = (void**)LLT_REALLOC(h->table, newsz*sizeof(void*)); 48 | h->size = newsz; 49 | h->table = newtab; 50 | } 51 | size_t i, hsz=h->size; 52 | for(i=0; i < hsz; i++) 53 | h->table[i] = HT_NOTFOUND; 54 | } 55 | -------------------------------------------------------------------------------- /htable.h: -------------------------------------------------------------------------------- 1 | #ifndef HTABLE_H 2 | #define HTABLE_H 3 | 4 | #define HT_N_INLINE 32 5 | 6 | typedef struct { 7 | size_t size; 8 | void **table; 9 | void *_space[HT_N_INLINE]; 10 | } htable_t; 11 | 12 | // define this to be an invalid key/value 13 | #define HT_NOTFOUND ((void*)1) 14 | 15 | // initialize and free 16 | htable_t *htable_new(htable_t *h, size_t size); 17 | void htable_free(htable_t *h); 18 | 19 | // clear and (possibly) change size 20 | void htable_reset(htable_t *h, size_t sz); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /htable.inc: -------------------------------------------------------------------------------- 1 | //-*- mode:c -*- 2 | 3 | /* 4 | include this file and call HTIMPL to generate an implementation 5 | */ 6 | 7 | #define hash_size(h) ((h)->size/2) 8 | 9 | // compute empirical max-probe for a given size 10 | #define max_probe(size) ((size)<=(HT_N_INLINE*2) ? (HT_N_INLINE/2) : (size)>>3) 11 | 12 | #define HTIMPL(HTNAME, HFUNC, EQFUNC) \ 13 | static void **HTNAME##_lookup_bp(htable_t *h, void *key) \ 14 | { \ 15 | uint_t hv; \ 16 | size_t i, orig, index, iter; \ 17 | size_t newsz, sz = hash_size(h); \ 18 | size_t maxprobe = max_probe(sz); \ 19 | void **tab = h->table; \ 20 | void **ol; \ 21 | \ 22 | hv = HFUNC((uptrint_t)key); \ 23 | retry_bp: \ 24 | iter = 0; \ 25 | index = (index_t)(hv & (sz-1)) * 2; \ 26 | sz *= 2; \ 27 | orig = index; \ 28 | \ 29 | do { \ 30 | if (tab[index+1] == HT_NOTFOUND) { \ 31 | tab[index] = key; \ 32 | return &tab[index+1]; \ 33 | } \ 34 | \ 35 | if (EQFUNC(key, tab[index])) \ 36 | return &tab[index+1]; \ 37 | \ 38 | index = (index+2) & (sz-1); \ 39 | iter++; \ 40 | if (iter > maxprobe) \ 41 | break; \ 42 | } while (index != orig); \ 43 | \ 44 | /* table full */ \ 45 | /* quadruple size, rehash, retry the insert */ \ 46 | /* it's important to grow the table really fast; otherwise we waste */ \ 47 | /* lots of time rehashing all the keys over and over. */ \ 48 | sz = h->size; \ 49 | ol = h->table; \ 50 | if (sz >= (1<<19) || (sz <= (1<<8))) \ 51 | newsz = sz<<1; \ 52 | else if (sz <= HT_N_INLINE) \ 53 | newsz = HT_N_INLINE; \ 54 | else \ 55 | newsz = sz<<2; \ 56 | /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \ 57 | tab = (void**)LLT_ALLOC(newsz*sizeof(void*)); \ 58 | if (tab == NULL) \ 59 | return NULL; \ 60 | for(i=0; i < newsz; i++) \ 61 | tab[i] = HT_NOTFOUND; \ 62 | h->table = tab; \ 63 | h->size = newsz; \ 64 | for(i=0; i < sz; i+=2) { \ 65 | if (ol[i+1] != HT_NOTFOUND) { \ 66 | (*HTNAME##_lookup_bp(h, ol[i])) = ol[i+1]; \ 67 | } \ 68 | } \ 69 | if (ol != &h->_space[0]) \ 70 | LLT_FREE(ol); \ 71 | \ 72 | sz = hash_size(h); \ 73 | maxprobe = max_probe(sz); \ 74 | tab = h->table; \ 75 | \ 76 | goto retry_bp; \ 77 | \ 78 | return NULL; \ 79 | } \ 80 | \ 81 | void HTNAME##_put(htable_t *h, void *key, void *val) \ 82 | { \ 83 | void **bp = HTNAME##_lookup_bp(h, key); \ 84 | \ 85 | *bp = val; \ 86 | } \ 87 | \ 88 | void **HTNAME##_bp(htable_t *h, void *key) \ 89 | { \ 90 | return HTNAME##_lookup_bp(h, key); \ 91 | } \ 92 | \ 93 | /* returns bp if key is in hash, otherwise NULL */ \ 94 | /* if return is non-NULL and *bp == HT_NOTFOUND then key was deleted */ \ 95 | static void **HTNAME##_peek_bp(htable_t *h, void *key) \ 96 | { \ 97 | size_t sz = hash_size(h); \ 98 | size_t maxprobe = max_probe(sz); \ 99 | void **tab = h->table; \ 100 | size_t index = (index_t)(HFUNC((uptrint_t)key) & (sz-1)) * 2; \ 101 | sz *= 2; \ 102 | size_t orig = index; \ 103 | size_t iter = 0; \ 104 | \ 105 | do { \ 106 | if (tab[index] == HT_NOTFOUND) \ 107 | return NULL; \ 108 | if (EQFUNC(key, tab[index])) \ 109 | return &tab[index+1]; \ 110 | \ 111 | index = (index+2) & (sz-1); \ 112 | iter++; \ 113 | if (iter > maxprobe) \ 114 | break; \ 115 | } while (index != orig); \ 116 | \ 117 | return NULL; \ 118 | } \ 119 | \ 120 | void *HTNAME##_get(htable_t *h, void *key) \ 121 | { \ 122 | void **bp = HTNAME##_peek_bp(h, key); \ 123 | if (bp == NULL) \ 124 | return HT_NOTFOUND; \ 125 | return *bp; \ 126 | } \ 127 | \ 128 | int HTNAME##_has(htable_t *h, void *key) \ 129 | { \ 130 | return (HTNAME##_get(h,key) != HT_NOTFOUND); \ 131 | } \ 132 | \ 133 | int HTNAME##_remove(htable_t *h, void *key) \ 134 | { \ 135 | void **bp = HTNAME##_peek_bp(h, key); \ 136 | if (bp != NULL) { \ 137 | *bp = HT_NOTFOUND; \ 138 | return 1; \ 139 | } \ 140 | return 0; \ 141 | } \ 142 | \ 143 | void HTNAME##_adjoin(htable_t *h, void *key, void *val) \ 144 | { \ 145 | void **bp = HTNAME##_lookup_bp(h, key); \ 146 | if (*bp == HT_NOTFOUND) \ 147 | *bp = val; \ 148 | } 149 | -------------------------------------------------------------------------------- /htableh.inc: -------------------------------------------------------------------------------- 1 | //-*- mode:c -*- 2 | 3 | #include "htable.h" 4 | 5 | #define HTPROT(HTNAME) \ 6 | void *HTNAME##_get(htable_t *h, void *key); \ 7 | void HTNAME##_put(htable_t *h, void *key, void *val); \ 8 | void HTNAME##_adjoin(htable_t *h, void *key, void *val); \ 9 | int HTNAME##_has(htable_t *h, void *key); \ 10 | int HTNAME##_remove(htable_t *h, void *key); \ 11 | void **HTNAME##_bp(htable_t *h, void *key); 12 | 13 | // return value, or HT_NOTFOUND if key not found 14 | 15 | // add key/value binding 16 | 17 | // add binding iff key is unbound 18 | 19 | // does key exist? 20 | 21 | // logically remove key 22 | 23 | // get a pointer to the location of the value for the given key. 24 | // creates the location if it doesn't exist. only returns NULL 25 | // if memory allocation fails. 26 | // this should be used for updates, for example: 27 | // void **bp = ptrhash_bp(h, key); 28 | // *bp = f(*bp); 29 | // do not reuse bp if there might be intervening calls to ptrhash_put, 30 | // ptrhash_bp, ptrhash_reset, or ptrhash_free. 31 | -------------------------------------------------------------------------------- /ieee754.h: -------------------------------------------------------------------------------- 1 | #ifndef IEEE754_H 2 | #define IEEE754_H 3 | 4 | #ifdef __linux 5 | 6 | #include 7 | 8 | #else 9 | 10 | union ieee754_float { 11 | float f; 12 | 13 | struct { 14 | #if BYTE_ORDER == BIG_ENDIAN 15 | unsigned int negative:1; 16 | unsigned int exponent:8; 17 | unsigned int mantissa:23; 18 | #endif 19 | #if BYTE_ORDER == LITTLE_ENDIAN 20 | unsigned int mantissa:23; 21 | unsigned int exponent:8; 22 | unsigned int negative:1; 23 | #endif 24 | } ieee; 25 | }; 26 | 27 | #define IEEE754_FLOAT_BIAS 0x7f 28 | 29 | union ieee754_double { 30 | double d; 31 | 32 | struct { 33 | #if BYTE_ORDER == BIG_ENDIAN 34 | unsigned int negative:1; 35 | unsigned int exponent:11; 36 | unsigned int mantissa0:20; 37 | unsigned int mantissa1:32; 38 | #endif 39 | #if BYTE_ORDER == LITTLE_ENDIAN 40 | unsigned int mantissa1:32; 41 | unsigned int mantissa0:20; 42 | unsigned int exponent:11; 43 | unsigned int negative:1; 44 | #endif 45 | } ieee; 46 | }; 47 | 48 | #define IEEE754_DOUBLE_BIAS 0x3ff 49 | 50 | union ieee854_long_double { 51 | long double d; 52 | 53 | struct { 54 | #if BYTE_ORDER == BIG_ENDIAN 55 | unsigned int negative:1; 56 | unsigned int exponent:15; 57 | unsigned int empty:16; 58 | unsigned int mantissa0:32; 59 | unsigned int mantissa1:32; 60 | #endif 61 | #if BYTE_ORDER == LITTLE_ENDIAN 62 | unsigned int mantissa1:32; 63 | unsigned int mantissa0:32; 64 | unsigned int exponent:15; 65 | unsigned int negative:1; 66 | unsigned int empty:16; 67 | #endif 68 | } ieee; 69 | }; 70 | 71 | #define IEEE854_LONG_DOUBLE_BIAS 0x3fff 72 | 73 | #endif 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /int2str.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dtypes.h" 3 | #include "utils.h" 4 | 5 | char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base) 6 | { 7 | int i = len-1; 8 | uint64_t b = (uint64_t)base; 9 | char ch; 10 | dest[i--] = '\0'; 11 | while (i >= 0) { 12 | ch = (char)(num % b); 13 | if (ch < 10) 14 | ch += '0'; 15 | else 16 | ch = ch-10+'a'; 17 | dest[i--] = ch; 18 | num /= b; 19 | if (num == 0) 20 | break; 21 | } 22 | return &dest[i+1]; 23 | } 24 | 25 | int isdigit_base(char c, int base) 26 | { 27 | if (base < 11) 28 | return (c >= '0' && c < '0'+base); 29 | return ((c >= '0' && c <= '9') || 30 | (c >= 'a' && c < 'a'+base-10) || 31 | (c >= 'A' && c < 'A'+base-10)); 32 | } 33 | 34 | /* assumes valid base, returns 1 on error, 0 if OK */ 35 | int str2int(char *str, size_t len, int64_t *res, uint32_t base) 36 | { 37 | int64_t result, place; 38 | char digit; 39 | int i; 40 | 41 | place = 1; result = 0; 42 | for(i=len-1; i>=0; i--) { 43 | digit = str[i]; 44 | if (!isdigit_base(digit, base)) 45 | return 1; 46 | if (digit <= '9') 47 | digit -= '0'; 48 | else if (digit >= 'a') 49 | digit = digit-'a'+10; 50 | else if (digit >= 'A') 51 | digit = digit-'A'+10; 52 | result += digit * place; 53 | place *= base; 54 | } 55 | *res = result; 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /ios.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // for printf 9 | 10 | #include "dtypes.h" 11 | 12 | #ifdef WIN32 13 | #include 14 | #include 15 | #include 16 | #define fileno _fileno 17 | #else 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #endif 24 | 25 | #include "utils.h" 26 | #include "utf8.h" 27 | #include "ios.h" 28 | #include "timefuncs.h" 29 | 30 | #define MOST_OF(x) ((x) - ((x)>>4)) 31 | 32 | /* OS-level primitive wrappers */ 33 | 34 | #if defined(__APPLE__) 35 | void *memrchr(const void *s, int c, size_t n) 36 | { 37 | const unsigned char *src = s + n; 38 | unsigned char uc = c; 39 | while (--src >= (unsigned char *) s) 40 | if (*src == uc) 41 | return (void *) src; 42 | return NULL; 43 | } 44 | #else 45 | extern void *memrchr(const void *s, int c, size_t n); 46 | #endif 47 | 48 | /* 49 | static int _fd_available(long fd) 50 | { 51 | #ifndef WIN32 52 | fd_set set; 53 | struct timeval tv = {0, 0}; 54 | 55 | FD_ZERO(&set); 56 | FD_SET(fd, &set); 57 | return (select(fd+1, &set, NULL, NULL, &tv)!=0); 58 | #else 59 | return 1; 60 | #endif 61 | } 62 | */ 63 | 64 | static int _enonfatal(int err) 65 | { 66 | return (err == EAGAIN || err == EINPROGRESS || err == EINTR || 67 | err == EWOULDBLOCK); 68 | } 69 | 70 | #define SLEEP_TIME 5//ms 71 | 72 | // return error code, #bytes read in *nread 73 | // these wrappers retry operations until success or a fatal error 74 | static int _os_read(long fd, void *buf, size_t n, size_t *nread) 75 | { 76 | ssize_t r; 77 | 78 | while (1) { 79 | r = read((int)fd, buf, n); 80 | if (r > -1) { 81 | *nread = (size_t)r; 82 | return 0; 83 | } 84 | if (!_enonfatal(errno)) { 85 | *nread = 0; 86 | return errno; 87 | } 88 | sleep_ms(SLEEP_TIME); 89 | } 90 | return 0; 91 | } 92 | 93 | static int _os_read_all(long fd, void *buf, size_t n, size_t *nread) 94 | { 95 | size_t got; 96 | 97 | *nread = 0; 98 | 99 | while (n>0) { 100 | int err = _os_read(fd, buf, n, &got); 101 | n -= got; 102 | *nread += got; 103 | buf += got; 104 | if (err || got==0) 105 | return err; 106 | } 107 | return 0; 108 | } 109 | 110 | static int _os_write(long fd, void *buf, size_t n, size_t *nwritten) 111 | { 112 | ssize_t r; 113 | 114 | while (1) { 115 | r = write((int)fd, buf, n); 116 | if (r > -1) { 117 | *nwritten = (size_t)r; 118 | return 0; 119 | } 120 | if (!_enonfatal(errno)) { 121 | *nwritten = 0; 122 | return errno; 123 | } 124 | sleep_ms(SLEEP_TIME); 125 | } 126 | return 0; 127 | } 128 | 129 | int _os_write_all(long fd, void *buf, size_t n, size_t *nwritten) 130 | { 131 | size_t wrote; 132 | 133 | *nwritten = 0; 134 | 135 | while (n>0) { 136 | int err = _os_write(fd, buf, n, &wrote); 137 | n -= wrote; 138 | *nwritten += wrote; 139 | buf += wrote; 140 | if (err) 141 | return err; 142 | } 143 | return 0; 144 | } 145 | 146 | 147 | /* internal utility functions */ 148 | 149 | static char *_buf_realloc(ios_t *s, size_t sz) 150 | { 151 | char *temp; 152 | 153 | if ((s->buf==NULL || s->buf==&s->local[0]) && (sz <= IOS_INLSIZE)) { 154 | /* TODO: if we want to allow shrinking, see if the buffer shrank 155 | down to this size, in which case we need to copy. */ 156 | s->buf = &s->local[0]; 157 | s->maxsize = IOS_INLSIZE; 158 | s->ownbuf = 1; 159 | return s->buf; 160 | } 161 | 162 | if (sz <= s->maxsize) return s->buf; 163 | 164 | if (s->ownbuf && s->buf != &s->local[0]) { 165 | // if we own the buffer we're free to resize it 166 | // always allocate 1 bigger in case user wants to add a NUL 167 | // terminator after taking over the buffer 168 | temp = LLT_REALLOC(s->buf, sz+1); 169 | if (temp == NULL) 170 | return NULL; 171 | } 172 | else { 173 | temp = LLT_ALLOC(sz+1); 174 | if (temp == NULL) 175 | return NULL; 176 | s->ownbuf = 1; 177 | if (s->size > 0) 178 | memcpy(temp, s->buf, s->size); 179 | } 180 | 181 | s->buf = temp; 182 | s->maxsize = sz; 183 | return s->buf; 184 | } 185 | 186 | // write a block of data into the buffer at the current position, resizing 187 | // if necessary. returns # written. 188 | static size_t _write_grow(ios_t *s, char *data, size_t n) 189 | { 190 | size_t amt; 191 | size_t newsize; 192 | 193 | if (n == 0) 194 | return 0; 195 | 196 | if (s->bpos + n > s->size) { 197 | if (s->bpos + n > s->maxsize) { 198 | /* TODO: here you might want to add a mechanism for limiting 199 | the growth of the stream. */ 200 | newsize = s->maxsize ? s->maxsize * 2 : 8; 201 | while (s->bpos + n > newsize) 202 | newsize *= 2; 203 | if (_buf_realloc(s, newsize) == NULL) { 204 | /* no more space; write as much as we can */ 205 | amt = s->maxsize - s->bpos; 206 | if (amt > 0) { 207 | memcpy(&s->buf[s->bpos], data, amt); 208 | } 209 | s->bpos += amt; 210 | s->size = s->maxsize; 211 | return amt; 212 | } 213 | } 214 | s->size = s->bpos + n; 215 | } 216 | memcpy(s->buf + s->bpos, data, n); 217 | s->bpos += n; 218 | 219 | return n; 220 | } 221 | 222 | 223 | /* interface functions, low level */ 224 | 225 | static size_t _ios_read(ios_t *s, char *dest, size_t n, int all) 226 | { 227 | size_t tot = 0; 228 | size_t got, avail; 229 | //int result; 230 | 231 | while (n > 0) { 232 | avail = s->size - s->bpos; 233 | 234 | if (avail > 0) { 235 | size_t ncopy = (avail >= n) ? n : avail; 236 | memcpy(dest, s->buf + s->bpos, ncopy); 237 | s->bpos += ncopy; 238 | if (ncopy >= n) { 239 | s->state = bst_rd; 240 | return tot+ncopy; 241 | } 242 | } 243 | if (s->bm == bm_mem || s->fd == -1) { 244 | // can't get any more data 245 | s->state = bst_rd; 246 | if (avail == 0) 247 | s->_eof = 1; 248 | return avail; 249 | } 250 | 251 | dest += avail; 252 | n -= avail; 253 | tot += avail; 254 | 255 | ios_flush(s); 256 | s->bpos = s->size = 0; 257 | s->state = bst_rd; 258 | 259 | s->fpos = -1; 260 | if (n > MOST_OF(s->maxsize)) { 261 | // doesn't fit comfortably in buffer; go direct 262 | if (all) { 263 | //result = _os_read_all(s->fd, dest, n, &got); 264 | _os_read_all(s->fd, dest, n, &got); 265 | } else { 266 | //result = _os_read(s->fd, dest, n, &got); 267 | _os_read(s->fd, dest, n, &got); 268 | } 269 | tot += got; 270 | if (got == 0) 271 | s->_eof = 1; 272 | return tot; 273 | } 274 | else { 275 | // refill buffer 276 | if (_os_read(s->fd, s->buf, s->maxsize, &got)) { 277 | s->_eof = 1; 278 | return tot; 279 | } 280 | if (got == 0) { 281 | s->_eof = 1; 282 | return tot; 283 | } 284 | s->size = got; 285 | } 286 | } 287 | 288 | return tot; 289 | } 290 | 291 | size_t ios_read(ios_t *s, char *dest, size_t n) 292 | { 293 | return _ios_read(s, dest, n, 0); 294 | } 295 | 296 | size_t ios_readall(ios_t *s, char *dest, size_t n) 297 | { 298 | return _ios_read(s, dest, n, 1); 299 | } 300 | 301 | size_t ios_readprep(ios_t *s, size_t n) 302 | { 303 | if (s->state == bst_wr && s->bm != bm_mem) { 304 | ios_flush(s); 305 | s->bpos = s->size = 0; 306 | } 307 | size_t space = s->size - s->bpos; 308 | s->state = bst_rd; 309 | if (space >= n || s->bm == bm_mem || s->fd == -1) 310 | return space; 311 | if (s->maxsize < s->bpos+n) { 312 | // it won't fit. grow buffer or move data back. 313 | if (n <= s->maxsize && space <= ((s->maxsize)>>2)) { 314 | if (space) 315 | memmove(s->buf, s->buf+s->bpos, space); 316 | s->size -= s->bpos; 317 | s->bpos = 0; 318 | } 319 | else { 320 | if (_buf_realloc(s, s->bpos + n)==NULL) 321 | return space; 322 | } 323 | } 324 | size_t got; 325 | int result = _os_read(s->fd, s->buf+s->size, s->maxsize - s->size, &got); 326 | if (result) 327 | return space; 328 | s->size += got; 329 | return s->size - s->bpos; 330 | } 331 | 332 | static void _write_update_pos(ios_t *s) 333 | { 334 | if (s->bpos > s->ndirty) s->ndirty = s->bpos; 335 | if (s->bpos > s->size) s->size = s->bpos; 336 | } 337 | 338 | // directly copy a buffer to a descriptor 339 | DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src) 340 | { 341 | char *data = src->buf; 342 | size_t n = src->size; 343 | size_t nwr; 344 | dest->fpos = -1; 345 | _os_write_all(dest->fd, data, n, &nwr); 346 | return nwr; 347 | } 348 | 349 | size_t ios_write(ios_t *s, char *data, size_t n) 350 | { 351 | if (s->readonly) return 0; 352 | if (n == 0) return 0; 353 | size_t space; 354 | size_t wrote = 0; 355 | 356 | if (s->state == bst_none) s->state = bst_wr; 357 | if (s->state == bst_rd) { 358 | if (!s->rereadable) { 359 | s->size = 0; 360 | s->bpos = 0; 361 | } 362 | space = s->size - s->bpos; 363 | } 364 | else { 365 | space = s->maxsize - s->bpos; 366 | } 367 | 368 | if (s->bm == bm_mem) { 369 | wrote = _write_grow(s, data, n); 370 | } 371 | else if (s->bm == bm_none) { 372 | s->fpos = -1; 373 | _os_write_all(s->fd, data, n, &wrote); 374 | return wrote; 375 | } 376 | else if (n <= space) { 377 | if (s->bm == bm_line) { 378 | char *nl; 379 | if ((nl=(char*)memrchr(data, '\n', n)) != NULL) { 380 | size_t linesz = nl-data+1; 381 | s->bm = bm_block; 382 | wrote += ios_write(s, data, linesz); 383 | ios_flush(s); 384 | s->bm = bm_line; 385 | n -= linesz; 386 | data += linesz; 387 | } 388 | } 389 | memcpy(s->buf + s->bpos, data, n); 390 | s->bpos += n; 391 | wrote += n; 392 | } 393 | else { 394 | s->state = bst_wr; 395 | ios_flush(s); 396 | if (n > MOST_OF(s->maxsize)) { 397 | _os_write_all(s->fd, data, n, &wrote); 398 | return wrote; 399 | } 400 | return ios_write(s, data, n); 401 | } 402 | _write_update_pos(s); 403 | return wrote; 404 | } 405 | 406 | off_t ios_seek(ios_t *s, off_t pos) 407 | { 408 | s->_eof = 0; 409 | if (s->bm == bm_mem) { 410 | if ((size_t)pos > s->size) 411 | return -1; 412 | s->bpos = pos; 413 | } 414 | else { 415 | ios_flush(s); 416 | off_t fdpos = lseek(s->fd, pos, SEEK_SET); 417 | if (fdpos == (off_t)-1) 418 | return fdpos; 419 | s->bpos = s->size = 0; 420 | } 421 | return 0; 422 | } 423 | 424 | off_t ios_seek_end(ios_t *s) 425 | { 426 | s->_eof = 1; 427 | if (s->bm == bm_mem) { 428 | s->bpos = s->size; 429 | } 430 | else { 431 | ios_flush(s); 432 | off_t fdpos = lseek(s->fd, 0, SEEK_END); 433 | if (fdpos == (off_t)-1) 434 | return fdpos; 435 | s->bpos = s->size = 0; 436 | } 437 | return 0; 438 | } 439 | 440 | off_t ios_skip(ios_t *s, off_t offs) 441 | { 442 | if (offs != 0) { 443 | if (offs > 0) { 444 | if (offs <= (off_t)(s->size-s->bpos)) { 445 | s->bpos += offs; 446 | return 0; 447 | } 448 | else if (s->bm == bm_mem) { 449 | // TODO: maybe grow buffer 450 | return -1; 451 | } 452 | } 453 | else if (offs < 0) { 454 | if (-offs <= (off_t)s->bpos) { 455 | s->bpos += offs; 456 | s->_eof = 0; 457 | return 0; 458 | } 459 | else if (s->bm == bm_mem) { 460 | return -1; 461 | } 462 | } 463 | ios_flush(s); 464 | if (s->state == bst_wr) 465 | offs += s->bpos; 466 | else if (s->state == bst_rd) 467 | offs -= (s->size - s->bpos); 468 | off_t fdpos = lseek(s->fd, offs, SEEK_CUR); 469 | if (fdpos == (off_t)-1) 470 | return fdpos; 471 | s->bpos = s->size = 0; 472 | s->_eof = 0; 473 | } 474 | return 0; 475 | } 476 | 477 | off_t ios_pos(ios_t *s) 478 | { 479 | if (s->bm == bm_mem) 480 | return (off_t)s->bpos; 481 | 482 | off_t fdpos = s->fpos; 483 | if (fdpos == (off_t)-1) { 484 | fdpos = lseek(s->fd, 0, SEEK_CUR); 485 | if (fdpos == (off_t)-1) 486 | return fdpos; 487 | s->fpos = fdpos; 488 | } 489 | 490 | if (s->state == bst_wr) 491 | fdpos += s->bpos; 492 | else if (s->state == bst_rd) 493 | fdpos -= (s->size - s->bpos); 494 | return fdpos; 495 | } 496 | 497 | size_t ios_trunc(ios_t *s, size_t size) 498 | { 499 | if (s->bm == bm_mem) { 500 | if (size == s->size) 501 | return s->size; 502 | if (size < s->size) { 503 | if (s->bpos > size) 504 | s->bpos = size; 505 | } 506 | else { 507 | if (_buf_realloc(s, size)==NULL) 508 | return s->size; 509 | } 510 | s->size = size; 511 | return size; 512 | } 513 | //todo 514 | return 0; 515 | } 516 | 517 | int ios_eof(ios_t *s) 518 | { 519 | if (s->bm == bm_mem) 520 | return (s->_eof ? 1 : 0); 521 | if (s->fd == -1) 522 | return 1; 523 | if (s->_eof) 524 | return 1; 525 | return 0; 526 | /* 527 | if (_fd_available(s->fd)) 528 | return 0; 529 | s->_eof = 1; 530 | return 1; 531 | */ 532 | } 533 | 534 | int ios_flush(ios_t *s) 535 | { 536 | if (s->ndirty == 0 || s->bm == bm_mem || s->buf == NULL) 537 | return 0; 538 | if (s->fd == -1) 539 | return -1; 540 | 541 | if (s->state == bst_rd) { 542 | if (lseek(s->fd, -(off_t)s->size, SEEK_CUR) == (off_t)-1) { 543 | } 544 | } 545 | 546 | size_t nw, ntowrite=s->ndirty; 547 | s->fpos = -1; 548 | int err = _os_write_all(s->fd, s->buf, ntowrite, &nw); 549 | // todo: try recovering from some kinds of errors (e.g. retry) 550 | 551 | if (s->state == bst_rd) { 552 | if (lseek(s->fd, s->size - nw, SEEK_CUR) == (off_t)-1) { 553 | } 554 | } 555 | else if (s->state == bst_wr) { 556 | if (s->bpos != nw && 557 | lseek(s->fd, (off_t)s->bpos - (off_t)nw, SEEK_CUR) == (off_t)-1) { 558 | } 559 | // now preserve the invariant that data to write 560 | // begins at the beginning of the buffer, and s->size refers 561 | // to how much valid file data is stored in the buffer. 562 | if (s->size > s->ndirty) { 563 | size_t delta = s->size - s->ndirty; 564 | memmove(s->buf, s->buf + s->ndirty, delta); 565 | } 566 | s->size -= s->ndirty; 567 | s->bpos = 0; 568 | } 569 | 570 | s->ndirty = 0; 571 | 572 | if (err) 573 | return err; 574 | if (nw < ntowrite) 575 | return -1; 576 | return 0; 577 | } 578 | 579 | void ios_close(ios_t *s) 580 | { 581 | ios_flush(s); 582 | if (s->fd != -1 && s->ownfd) 583 | close(s->fd); 584 | s->fd = -1; 585 | if (s->buf!=NULL && s->ownbuf && s->buf!=&s->local[0]) { 586 | LLT_FREE(s->buf); 587 | } 588 | s->buf = NULL; 589 | s->size = s->maxsize = s->bpos = 0; 590 | } 591 | 592 | static void _buf_init(ios_t *s, bufmode_t bm) 593 | { 594 | s->bm = bm; 595 | if (s->bm == bm_mem || s->bm == bm_none) { 596 | s->buf = &s->local[0]; 597 | s->maxsize = IOS_INLSIZE; 598 | } 599 | else { 600 | s->buf = NULL; 601 | _buf_realloc(s, IOS_BUFSIZE); 602 | } 603 | s->size = s->bpos = 0; 604 | } 605 | 606 | char *ios_takebuf(ios_t *s, size_t *psize) 607 | { 608 | char *buf; 609 | 610 | ios_flush(s); 611 | 612 | if (s->buf == &s->local[0]) { 613 | buf = LLT_ALLOC(s->size+1); 614 | if (buf == NULL) 615 | return NULL; 616 | if (s->size) 617 | memcpy(buf, s->buf, s->size); 618 | } 619 | else { 620 | buf = s->buf; 621 | } 622 | buf[s->size] = '\0'; 623 | 624 | *psize = s->size+1; // buffer is actually 1 bigger for terminating NUL 625 | 626 | /* empty stream and reinitialize */ 627 | _buf_init(s, s->bm); 628 | 629 | return buf; 630 | } 631 | 632 | int ios_setbuf(ios_t *s, char *buf, size_t size, int own) 633 | { 634 | ios_flush(s); 635 | size_t nvalid=0; 636 | 637 | nvalid = (size < s->size) ? size : s->size; 638 | if (nvalid > 0) 639 | memcpy(buf, s->buf, nvalid); 640 | if (s->bpos > nvalid) { 641 | // truncated 642 | s->bpos = nvalid; 643 | } 644 | s->size = nvalid; 645 | 646 | if (s->buf!=NULL && s->ownbuf && s->buf!=&s->local[0]) { 647 | LLT_FREE(s->buf); 648 | } 649 | s->buf = buf; 650 | s->maxsize = size; 651 | s->ownbuf = own; 652 | return 0; 653 | } 654 | 655 | int ios_bufmode(ios_t *s, bufmode_t mode) 656 | { 657 | // no fd; can only do mem-only buffering 658 | if (s->fd == -1 && mode != bm_mem) 659 | return -1; 660 | s->bm = mode; 661 | return 0; 662 | } 663 | 664 | void ios_set_readonly(ios_t *s) 665 | { 666 | if (s->readonly) return; 667 | ios_flush(s); 668 | s->state = bst_none; 669 | s->readonly = 1; 670 | } 671 | 672 | static size_t ios_copy_(ios_t *to, ios_t *from, size_t nbytes, bool_t all) 673 | { 674 | size_t total = 0, avail; 675 | if (!ios_eof(from)) { 676 | do { 677 | avail = ios_readprep(from, IOS_BUFSIZE/2); 678 | if (avail == 0) { 679 | from->_eof = 1; 680 | break; 681 | } 682 | size_t written, ntowrite; 683 | ntowrite = (avail <= nbytes || all) ? avail : nbytes; 684 | written = ios_write(to, from->buf+from->bpos, ntowrite); 685 | // TODO: should this be +=written instead? 686 | from->bpos += ntowrite; 687 | total += written; 688 | if (!all) { 689 | nbytes -= written; 690 | if (nbytes == 0) 691 | break; 692 | } 693 | if (written < ntowrite) 694 | break; 695 | } while (!ios_eof(from)); 696 | } 697 | return total; 698 | } 699 | 700 | size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes) 701 | { 702 | return ios_copy_(to, from, nbytes, 0); 703 | } 704 | 705 | size_t ios_copyall(ios_t *to, ios_t *from) 706 | { 707 | return ios_copy_(to, from, 0, 1); 708 | } 709 | 710 | #define LINE_CHUNK_SIZE 160 711 | 712 | size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) 713 | { 714 | size_t total = 0, avail=from->size - from->bpos; 715 | int first = 1; 716 | if (!ios_eof(from)) { 717 | do { 718 | if (avail == 0) { 719 | first = 0; 720 | avail = ios_readprep(from, LINE_CHUNK_SIZE); 721 | } 722 | size_t written; 723 | char *pd = (char*)memchr(from->buf+from->bpos, delim, avail); 724 | if (pd == NULL) { 725 | written = ios_write(to, from->buf+from->bpos, avail); 726 | from->bpos += avail; 727 | total += written; 728 | avail = 0; 729 | } 730 | else { 731 | size_t ntowrite = pd - (from->buf+from->bpos) + 1; 732 | written = ios_write(to, from->buf+from->bpos, ntowrite); 733 | from->bpos += ntowrite; 734 | total += written; 735 | return total; 736 | } 737 | } while (!ios_eof(from) && (first || avail >= LINE_CHUNK_SIZE)); 738 | } 739 | from->_eof = 1; 740 | return total; 741 | } 742 | 743 | static void _ios_init(ios_t *s) 744 | { 745 | // put all fields in a sane initial state 746 | s->bm = bm_block; 747 | s->state = bst_none; 748 | s->errcode = 0; 749 | s->buf = NULL; 750 | s->maxsize = 0; 751 | s->size = 0; 752 | s->bpos = 0; 753 | s->ndirty = 0; 754 | s->fpos = -1; 755 | s->lineno = 1; 756 | s->fd = -1; 757 | s->ownbuf = 1; 758 | s->ownfd = 0; 759 | s->_eof = 0; 760 | s->rereadable = 0; 761 | s->readonly = 0; 762 | s->mutex_initialized = 0; 763 | } 764 | 765 | /* stream object initializers. we do no allocation. */ 766 | 767 | ios_t *ios_file(ios_t *s, char *fname, int rd, int wr, int create, int trunc) 768 | { 769 | int fd; 770 | if (!(rd || wr)) 771 | // must specify read and/or write 772 | goto open_file_err; 773 | int flags = wr ? (rd ? O_RDWR : O_WRONLY) : O_RDONLY; 774 | if (create) flags |= O_CREAT; 775 | if (trunc) flags |= O_TRUNC; 776 | fd = open(fname, flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH/*644*/); 777 | if (fd == -1) 778 | goto open_file_err; 779 | s = ios_fd(s, fd, 1, 1); 780 | if (!wr) 781 | s->readonly = 1; 782 | return s; 783 | open_file_err: 784 | s->fd = -1; 785 | return NULL; 786 | } 787 | 788 | ios_t *ios_mem(ios_t *s, size_t initsize) 789 | { 790 | _ios_init(s); 791 | s->bm = bm_mem; 792 | _buf_realloc(s, initsize); 793 | return s; 794 | } 795 | 796 | ios_t *ios_str(ios_t *s, char *str) 797 | { 798 | size_t n = strlen(str); 799 | if (ios_mem(s, n+1)==NULL) return NULL; 800 | ios_write(s, str, n+1); 801 | ios_seek(s, 0); 802 | return s; 803 | } 804 | 805 | ios_t *ios_static_buffer(ios_t *s, char *buf, size_t sz) 806 | { 807 | ios_mem(s, 0); 808 | ios_setbuf(s, buf, sz, 0); 809 | s->size = sz; 810 | ios_set_readonly(s); 811 | return s; 812 | } 813 | 814 | ios_t *ios_fd(ios_t *s, long fd, int isfile, int own) 815 | { 816 | _ios_init(s); 817 | s->fd = fd; 818 | if (isfile) s->rereadable = 1; 819 | _buf_init(s, bm_block); 820 | s->ownfd = own; 821 | if (fd == STDERR_FILENO) 822 | s->bm = bm_none; 823 | return s; 824 | } 825 | 826 | ios_t *ios_stdin = NULL; 827 | ios_t *ios_stdout = NULL; 828 | ios_t *ios_stderr = NULL; 829 | 830 | void ios_init_stdstreams(void) 831 | { 832 | ios_stdin = malloc(sizeof(ios_t)); 833 | ios_fd(ios_stdin, STDIN_FILENO, 0, 0); 834 | 835 | ios_stdout = malloc(sizeof(ios_t)); 836 | ios_fd(ios_stdout, STDOUT_FILENO, 0, 0); 837 | ios_stdout->bm = bm_line; 838 | 839 | ios_stderr = malloc(sizeof(ios_t)); 840 | ios_fd(ios_stderr, STDERR_FILENO, 0, 0); 841 | ios_stderr->bm = bm_none; 842 | } 843 | 844 | /* higher level interface */ 845 | 846 | int ios_putc(int c, ios_t *s) 847 | { 848 | char ch = (char)c; 849 | 850 | if (s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none) { 851 | s->buf[s->bpos++] = ch; 852 | _write_update_pos(s); 853 | if (s->bm == bm_line && ch == '\n') 854 | ios_flush(s); 855 | return 1; 856 | } 857 | return (int)ios_write(s, &ch, 1); 858 | } 859 | 860 | int ios_getc(ios_t *s) 861 | { 862 | char ch; 863 | if (s->state == bst_rd && s->bpos < s->size) { 864 | ch = s->buf[s->bpos++]; 865 | } 866 | else { 867 | if (s->_eof) return IOS_EOF; 868 | if (ios_read(s, &ch, 1) < 1) 869 | return IOS_EOF; 870 | } 871 | if (ch == '\n') s->lineno++; 872 | return (unsigned char)ch; 873 | } 874 | 875 | int ios_peekc(ios_t *s) 876 | { 877 | if (s->bpos < s->size) 878 | return (unsigned char)s->buf[s->bpos]; 879 | if (s->_eof) return IOS_EOF; 880 | size_t n = ios_readprep(s, 1); 881 | if (n == 0) return IOS_EOF; 882 | return (unsigned char)s->buf[s->bpos]; 883 | } 884 | 885 | int ios_ungetc(int c, ios_t *s) 886 | { 887 | if (s->state == bst_wr) 888 | return IOS_EOF; 889 | if (s->bpos > 0) { 890 | s->bpos--; 891 | s->buf[s->bpos] = (char)c; 892 | s->_eof = 0; 893 | return c; 894 | } 895 | if (s->size == s->maxsize) { 896 | if (_buf_realloc(s, s->maxsize*2) == NULL) 897 | return IOS_EOF; 898 | } 899 | memmove(s->buf + 1, s->buf, s->size); 900 | s->buf[0] = (char)c; 901 | s->size++; 902 | s->_eof = 0; 903 | return c; 904 | } 905 | 906 | int ios_getutf8(ios_t *s, uint32_t *pwc) 907 | { 908 | int c; 909 | size_t sz; 910 | char c0; 911 | char buf[8]; 912 | 913 | c = ios_getc(s); 914 | if (c == IOS_EOF) 915 | return IOS_EOF; 916 | c0 = (char)c; 917 | if ((unsigned char)c0 < 0x80) { 918 | *pwc = (uint32_t)(unsigned char)c0; 919 | return 1; 920 | } 921 | sz = u8_seqlen(&c0)-1; 922 | if (ios_ungetc(c, s) == IOS_EOF) 923 | return IOS_EOF; 924 | if (ios_readprep(s, sz) < sz) 925 | // NOTE: this can return EOF even if some bytes are available 926 | return IOS_EOF; 927 | size_t i = s->bpos; 928 | *pwc = u8_nextchar(s->buf, &i); 929 | ios_read(s, buf, sz+1); 930 | return 1; 931 | } 932 | 933 | int ios_peekutf8(ios_t *s, uint32_t *pwc) 934 | { 935 | int c; 936 | size_t sz; 937 | char c0; 938 | 939 | c = ios_peekc(s); 940 | if (c == IOS_EOF) 941 | return IOS_EOF; 942 | c0 = (char)c; 943 | if ((unsigned char)c0 < 0x80) { 944 | *pwc = (uint32_t)(unsigned char)c0; 945 | return 1; 946 | } 947 | sz = u8_seqlen(&c0)-1; 948 | if (ios_readprep(s, sz) < sz) 949 | return IOS_EOF; 950 | size_t i = s->bpos; 951 | *pwc = u8_nextchar(s->buf, &i); 952 | return 1; 953 | } 954 | 955 | int ios_pututf8(ios_t *s, uint32_t wc) 956 | { 957 | char buf[8]; 958 | if (wc < 0x80) 959 | return ios_putc((int)wc, s); 960 | size_t n = u8_toutf8(buf, 8, &wc, 1); 961 | return ios_write(s, buf, n); 962 | } 963 | 964 | void ios_purge(ios_t *s) 965 | { 966 | if (s->state == bst_rd) { 967 | s->bpos = s->size; 968 | } 969 | } 970 | 971 | char *ios_readline(ios_t *s) 972 | { 973 | ios_t dest; 974 | ios_mem(&dest, 0); 975 | ios_copyuntil(&dest, s, '\n'); 976 | size_t n; 977 | return ios_takebuf(&dest, &n); 978 | } 979 | 980 | int vasprintf(char **strp, const char *fmt, va_list ap); 981 | 982 | int ios_vprintf(ios_t *s, const char *format, va_list args) 983 | { 984 | char *str=NULL; 985 | int c; 986 | va_list al; 987 | va_copy(al, args); 988 | 989 | if (s->state == bst_wr && s->bpos < s->maxsize && s->bm != bm_none) { 990 | size_t avail = s->maxsize - s->bpos; 991 | char *start = s->buf + s->bpos; 992 | c = vsnprintf(start, avail, format, args); 993 | if (c < 0) { 994 | va_end(al); 995 | return c; 996 | } 997 | if (c < avail) { 998 | s->bpos += (size_t)c; 999 | _write_update_pos(s); 1000 | // TODO: only works right if newline is at end 1001 | if (s->bm == bm_line && memrchr(start, '\n', (size_t)c)) 1002 | ios_flush(s); 1003 | va_end(al); 1004 | return c; 1005 | } 1006 | } 1007 | c = vasprintf(&str, format, al); 1008 | 1009 | if (c >= 0) { 1010 | ios_write(s, str, c); 1011 | 1012 | LLT_FREE(str); 1013 | } 1014 | va_end(al); 1015 | return c; 1016 | } 1017 | 1018 | int ios_printf(ios_t *s, const char *format, ...) 1019 | { 1020 | va_list args; 1021 | int c; 1022 | 1023 | va_start(args, format); 1024 | c = ios_vprintf(s, format, args); 1025 | va_end(args); 1026 | return c; 1027 | } 1028 | -------------------------------------------------------------------------------- /ios.h: -------------------------------------------------------------------------------- 1 | #ifndef IOS_H 2 | #define IOS_H 3 | 4 | #include 5 | #include 6 | 7 | // this flag controls when data actually moves out to the underlying I/O 8 | // channel. memory streams are a special case of this where the data 9 | // never moves out. 10 | typedef enum { bm_none, bm_line, bm_block, bm_mem } bufmode_t; 11 | 12 | typedef enum { bst_none, bst_rd, bst_wr } bufstate_t; 13 | 14 | #define IOS_INLSIZE 54 15 | #define IOS_BUFSIZE 131072 16 | 17 | typedef struct { 18 | bufmode_t bm; 19 | 20 | // the state only indicates where the underlying file position is relative 21 | // to the buffer. reading: at the end. writing: at the beginning. 22 | // in general, you can do any operation in any state. 23 | bufstate_t state; 24 | 25 | int errcode; 26 | 27 | char *buf; // start of buffer 28 | size_t maxsize; // space allocated to buffer 29 | size_t size; // length of valid data in buf, >=ndirty 30 | size_t bpos; // current position in buffer 31 | size_t ndirty; // # bytes at &buf[0] that need to be written 32 | 33 | off_t fpos; // cached file pos 34 | size_t lineno; // current line number 35 | 36 | // pointer-size integer to support platforms where it might have 37 | // to be a pointer 38 | long fd; 39 | 40 | unsigned char readonly:1; 41 | unsigned char ownbuf:1; 42 | unsigned char ownfd:1; 43 | unsigned char _eof:1; 44 | 45 | // this means you can read, seek back, then read the same data 46 | // again any number of times. usually only true for files and strings. 47 | unsigned char rereadable:1; 48 | 49 | // this enables "stenciled writes". you can alternately write and 50 | // seek without flushing in between. this performs read-before-write 51 | // to populate the buffer, so "rereadable" capability is required. 52 | // this is off by default. 53 | //unsigned char stenciled:1; 54 | 55 | // request durable writes (fsync) 56 | // unsigned char durable:1; 57 | 58 | unsigned char mutex_initialized:1; 59 | 60 | int64_t userdata; 61 | pthread_mutex_t mutex; 62 | 63 | char local[IOS_INLSIZE]; 64 | } ios_t; 65 | 66 | /* low-level interface functions */ 67 | DLLEXPORT size_t ios_read(ios_t *s, char *dest, size_t n); 68 | DLLEXPORT size_t ios_readall(ios_t *s, char *dest, size_t n); 69 | DLLEXPORT size_t ios_write(ios_t *s, char *data, size_t n); 70 | DLLEXPORT off_t ios_seek(ios_t *s, off_t pos); // absolute seek 71 | DLLEXPORT off_t ios_seek_end(ios_t *s); 72 | DLLEXPORT off_t ios_skip(ios_t *s, off_t offs); // relative seek 73 | DLLEXPORT off_t ios_pos(ios_t *s); // get current position 74 | DLLEXPORT size_t ios_trunc(ios_t *s, size_t size); 75 | DLLEXPORT int ios_eof(ios_t *s); 76 | DLLEXPORT int ios_flush(ios_t *s); 77 | DLLEXPORT void ios_close(ios_t *s); 78 | DLLEXPORT char *ios_takebuf(ios_t *s, size_t *psize); // release buffer to caller 79 | // set buffer space to use 80 | DLLEXPORT int ios_setbuf(ios_t *s, char *buf, size_t size, int own); 81 | DLLEXPORT int ios_bufmode(ios_t *s, bufmode_t mode); 82 | DLLEXPORT void ios_set_readonly(ios_t *s); 83 | DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes); 84 | DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from); 85 | DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim); 86 | // ensure at least n bytes are buffered if possible. returns # available. 87 | DLLEXPORT size_t ios_readprep(ios_t *from, size_t n); 88 | //void ios_lock(ios_t *s); 89 | //int ios_trylock(ios_t *s); 90 | //int ios_unlock(ios_t *s); 91 | 92 | /* stream creation */ 93 | DLLEXPORT 94 | ios_t *ios_file(ios_t *s, char *fname, int rd, int wr, int create, int trunc); 95 | DLLEXPORT ios_t *ios_mem(ios_t *s, size_t initsize); 96 | ios_t *ios_str(ios_t *s, char *str); 97 | ios_t *ios_static_buffer(ios_t *s, char *buf, size_t sz); 98 | DLLEXPORT ios_t *ios_fd(ios_t *s, long fd, int isfile, int own); 99 | // todo: ios_socket 100 | extern DLLEXPORT ios_t *ios_stdin; 101 | extern DLLEXPORT ios_t *ios_stdout; 102 | extern DLLEXPORT ios_t *ios_stderr; 103 | void ios_init_stdstreams(void); 104 | 105 | /* high-level functions - output */ 106 | int ios_putnum(ios_t *s, char *data, uint32_t type); 107 | int ios_putint(ios_t *s, int n); 108 | DLLEXPORT int ios_pututf8(ios_t *s, uint32_t wc); 109 | int ios_putstringz(ios_t *s, char *str, bool_t do_write_nulterm); 110 | DLLEXPORT int ios_printf(ios_t *s, const char *format, ...); 111 | DLLEXPORT int ios_vprintf(ios_t *s, const char *format, va_list args); 112 | 113 | void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs); 114 | 115 | /* high-level stream functions - input */ 116 | int ios_getnum(ios_t *s, char *data, uint32_t type); 117 | DLLEXPORT int ios_getutf8(ios_t *s, uint32_t *pwc); 118 | int ios_peekutf8(ios_t *s, uint32_t *pwc); 119 | int ios_ungetutf8(ios_t *s, uint32_t wc); 120 | //int ios_getstringz(ios_t *dest, ios_t *src); 121 | //int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars); 122 | //int ios_getline(ios_t *s, char **pbuf, size_t *psz); 123 | DLLEXPORT char *ios_readline(ios_t *s); 124 | 125 | // discard data buffered for reading 126 | DLLEXPORT void ios_purge(ios_t *s); 127 | 128 | // seek by utf8 sequence increments 129 | int ios_nextutf8(ios_t *s); 130 | int ios_prevutf8(ios_t *s); 131 | 132 | /* stdio-style functions */ 133 | #define IOS_EOF (-1) 134 | DLLEXPORT int ios_putc(int c, ios_t *s); 135 | //wint_t ios_putwc(ios_t *s, wchar_t wc); 136 | DLLEXPORT int ios_getc(ios_t *s); 137 | int ios_peekc(ios_t *s); 138 | //wint_t ios_getwc(ios_t *s); 139 | int ios_ungetc(int c, ios_t *s); 140 | //wint_t ios_ungetwc(ios_t *s, wint_t wc); 141 | #define ios_puts(str, s) ios_write(s, str, strlen(str)) 142 | 143 | /* 144 | With memory streams, mixed reads and writes are equivalent to performing 145 | sequences of *p++, as either an lvalue or rvalue. File streams behave 146 | similarly, but other streams might not support this. Using unbuffered 147 | mode makes this more predictable. 148 | 149 | Note on "unget" functions: 150 | There are two kinds of functions here: those that operate on sized 151 | blocks of bytes and those that operate on logical units like "character" 152 | or "integer". The "unget" functions only work on logical units. There 153 | is no "unget n bytes". You can only do an unget after a matching get. 154 | However, data pushed back by an unget is available to all read operations. 155 | The reason for this is that unget is defined in terms of its effect on 156 | the underlying buffer (namely, it rebuffers data as if it had been 157 | buffered but not read yet). IOS reserves the right to perform large block 158 | operations directly, bypassing the buffer. In such a case data was 159 | never buffered, so "rebuffering" has no meaning (i.e. there is no 160 | correspondence between the buffer and the physical stream). 161 | 162 | Single-bit I/O is able to write partial bytes ONLY IF the stream supports 163 | seeking. Also, line buffering is not well-defined in the context of 164 | single-bit I/O, so it might not do what you expect. 165 | 166 | implementation notes: 167 | in order to know where we are in a file, we must ensure the buffer 168 | is only populated from the underlying stream starting with p==buf. 169 | 170 | to switch from writing to reading: flush, set p=buf, cnt=0 171 | to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0 172 | 173 | when writing: buf starts at curr. physical stream pos, p - buf is how 174 | many bytes we've written logically. cnt==0 175 | 176 | dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from 177 | reading to writing, where we might be in the middle of a byte without 178 | having changed it. 179 | 180 | to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then 181 | seek back by the same amount (undo it). write onto those bits. now set 182 | the dirty bit. in this state, we can bit-read up to the end of the byte, 183 | then formally switch to the read state using flush. 184 | 185 | design points: 186 | - data-source independence, including memory streams 187 | - expose buffer to user, allow user-owned buffers 188 | - allow direct I/O, don't always go through buffer 189 | - buffer-internal seeking. makes seeking back 1-2 bytes very fast, 190 | and makes it possible for sockets where it otherwise wouldn't be 191 | - tries to allow switching between reading and writing 192 | - support 64-bit and large files 193 | - efficient, low-latency buffering 194 | - special support for utf8 195 | - type-aware functions with byte-order swapping service 196 | - position counter for meaningful data offsets with sockets 197 | 198 | theory of operation: 199 | 200 | the buffer is a view of part of a file/stream. you can seek, read, and 201 | write around in it as much as you like, as if it were just a string. 202 | 203 | we keep track of the part of the buffer that's invalid (written to). 204 | we remember whether the position of the underlying stream is aligned 205 | with the end of the buffer (reading mode) or the beginning (writing mode). 206 | 207 | based on this info, we might have to seek back before doing a flush. 208 | 209 | as optimizations, we do no writing if the buffer isn't "dirty", and we 210 | do no reading if the data will only be overwritten. 211 | */ 212 | 213 | #endif 214 | -------------------------------------------------------------------------------- /libsupport.h: -------------------------------------------------------------------------------- 1 | #ifndef LIBSUPPORT_H 2 | #define LIBSUPPORT_H 3 | 4 | #include 5 | #include "dtypes.h" 6 | #include "utils.h" 7 | #include "utf8.h" 8 | #include "ios.h" 9 | #include "socket.h" 10 | #include "timefuncs.h" 11 | #include "hashing.h" 12 | #include "ptrhash.h" 13 | #include "bitvector.h" 14 | #include "dirpath.h" 15 | 16 | DLLEXPORT void libsupport_init(void); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /libsupportinit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "libsupport.h" 7 | 8 | double D_PNAN; 9 | double D_NNAN; 10 | double D_PINF; 11 | double D_NINF; 12 | float F_PNAN; 13 | float F_NNAN; 14 | float F_PINF; 15 | float F_NINF; 16 | 17 | int locale_is_utf8; 18 | 19 | void libsupport_init(void) 20 | { 21 | locale_is_utf8 = u8_is_locale_utf8(setlocale(LC_ALL, "")); 22 | 23 | ios_init_stdstreams(); 24 | 25 | D_PNAN = strtod("+NaN",NULL); 26 | D_NNAN = -strtod("+NaN",NULL); 27 | D_PINF = strtod("+Inf",NULL); 28 | D_NINF = strtod("-Inf",NULL); 29 | F_PNAN = strtof("+NaN",NULL); 30 | F_NNAN = -strtof("+NaN",NULL); 31 | F_PINF = strtof("+Inf",NULL); 32 | F_NINF = strtof("-Inf",NULL); 33 | } 34 | -------------------------------------------------------------------------------- /ptrhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | pointer hash table 3 | optimized for storing info about particular values 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "dtypes.h" 13 | #include "ptrhash.h" 14 | 15 | #define OP_EQ(x,y) ((x)==(y)) 16 | 17 | #ifdef __LP64__ 18 | static u_int64_t _pinthash(u_int64_t key) 19 | { 20 | key = (~key) + (key << 21); // key = (key << 21) - key - 1; 21 | key = key ^ (key >> 24); 22 | key = (key + (key << 3)) + (key << 8); // key * 265 23 | key = key ^ (key >> 14); 24 | key = (key + (key << 2)) + (key << 4); // key * 21 25 | key = key ^ (key >> 28); 26 | key = key + (key << 31); 27 | return key; 28 | } 29 | #else 30 | static u_int32_t _pinthash(u_int32_t a) 31 | { 32 | a = (a+0x7ed55d16) + (a<<12); 33 | a = (a^0xc761c23c) ^ (a>>19); 34 | a = (a+0x165667b1) + (a<<5); 35 | a = (a+0xd3a2646c) ^ (a<<9); 36 | a = (a+0xfd7046c5) + (a<<3); 37 | a = (a^0xb55a4f09) ^ (a>>16); 38 | return a; 39 | } 40 | #endif 41 | 42 | #include "htable.inc" 43 | 44 | HTIMPL(ptrhash, _pinthash, OP_EQ) 45 | -------------------------------------------------------------------------------- /ptrhash.h: -------------------------------------------------------------------------------- 1 | #ifndef PTRHASH_H 2 | #define PTRHASH_H 3 | 4 | #include "htableh.inc" 5 | 6 | HTPROT(ptrhash) 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /socket.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "dtypes.h" 9 | 10 | #if defined(__APPLE__) 11 | #include 12 | #include 13 | #include 14 | #endif 15 | 16 | #include "socket.h" 17 | 18 | 19 | int mysocket(int domain, int type, int protocol) 20 | { 21 | int val; 22 | int s = socket(domain, type, protocol); 23 | if (s < 0) 24 | return s; 25 | val = 131072; 26 | setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char*)&val, sizeof(int)); 27 | val = 131072; 28 | setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char*)&val, sizeof(int)); 29 | return s; 30 | } 31 | 32 | #ifdef WIN32 33 | void bzero(void *s, size_t n) 34 | { 35 | memset(s, 0, n); 36 | } 37 | #endif 38 | 39 | /* returns a socket on which to accept() connections */ 40 | int open_tcp_port(short portno) 41 | { 42 | int sockfd; 43 | //int val; 44 | struct sockaddr_in serv_addr; 45 | 46 | sockfd = mysocket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 47 | if (sockfd < 0) 48 | return -1; 49 | //val = 1; 50 | //setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); 51 | bzero(&serv_addr, sizeof(serv_addr)); 52 | serv_addr.sin_family = AF_INET; 53 | serv_addr.sin_addr.s_addr = htonl(INADDR_ANY); 54 | serv_addr.sin_port = htons(portno); 55 | if (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) { 56 | return -1; 57 | } 58 | 59 | listen(sockfd, 4); 60 | return sockfd; 61 | } 62 | 63 | /* returns a socket on which to accept() connections, finding some 64 | available port (portno is value-return) */ 65 | int open_any_tcp_port(short *portno) 66 | 67 | { 68 | int sockfd; 69 | //int val; 70 | struct sockaddr_in serv_addr; 71 | 72 | sockfd = mysocket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 73 | if (sockfd < 0) 74 | return -1; 75 | //val = 1; 76 | //setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); 77 | bzero(&serv_addr, sizeof(serv_addr)); 78 | serv_addr.sin_family = AF_INET; 79 | serv_addr.sin_addr.s_addr = htonl(INADDR_ANY); 80 | serv_addr.sin_port = htons(*portno); 81 | while (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) { 82 | (*portno)++; 83 | serv_addr.sin_port = htons(*portno); 84 | } 85 | 86 | listen(sockfd, 4); 87 | return sockfd; 88 | } 89 | 90 | /* returns a socket on which to accept() connections, finding some 91 | available port (portno is value-return) */ 92 | int open_any_udp_port(short *portno) 93 | { 94 | int sockfd; 95 | struct sockaddr_in serv_addr; 96 | 97 | sockfd = mysocket(PF_INET, SOCK_DGRAM, 0); 98 | if (sockfd < 0) 99 | return -1; 100 | bzero(&serv_addr, sizeof(serv_addr)); 101 | serv_addr.sin_family = AF_INET; 102 | serv_addr.sin_addr.s_addr = htonl(INADDR_ANY); 103 | serv_addr.sin_port = htons(*portno); 104 | while (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) { 105 | (*portno)++; 106 | serv_addr.sin_port = htons(*portno); 107 | } 108 | 109 | return sockfd; 110 | } 111 | 112 | #ifndef WIN32 113 | void closesocket(int fd) 114 | { 115 | close(fd); 116 | } 117 | #endif 118 | 119 | /* returns a socket to use to send data to the given address */ 120 | int connect_to_host(char *hostname, short portno) 121 | { 122 | struct hostent *host_info; 123 | int sockfd, yes=1; 124 | struct sockaddr_in host_addr; 125 | 126 | host_info = gethostbyname(hostname); 127 | if (host_info == NULL) { 128 | return -1; 129 | } 130 | 131 | sockfd = mysocket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 132 | if (sockfd < 0) { 133 | return -1; 134 | } 135 | (void)setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)); 136 | memset((char*)&host_addr, 0, sizeof(host_addr)); 137 | host_addr.sin_family = host_info->h_addrtype; 138 | memcpy((char*)&host_addr.sin_addr, host_info->h_addr, 139 | host_info->h_length); 140 | 141 | host_addr.sin_port = htons(portno); 142 | 143 | if (connect(sockfd, (struct sockaddr*)&host_addr, 144 | sizeof(struct sockaddr_in)) != 0) { 145 | closesocket(sockfd); 146 | return -1; 147 | } 148 | 149 | return sockfd; 150 | } 151 | 152 | int connect_to_addr(struct sockaddr_in *host_addr) 153 | { 154 | int sockfd, yes=1; 155 | 156 | sockfd = mysocket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 157 | if (sockfd < 0) { 158 | return -1; 159 | } 160 | (void)setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)); 161 | 162 | if (connect(sockfd, (struct sockaddr*)host_addr, 163 | sizeof(struct sockaddr_in)) != 0) { 164 | closesocket(sockfd); 165 | return -1; 166 | } 167 | 168 | return sockfd; 169 | } 170 | 171 | DLLEXPORT 172 | void getlocalip(char *buf, size_t len) 173 | { 174 | struct ifaddrs * ifAddrStruct=NULL; 175 | struct ifaddrs * ifa=NULL; 176 | void * tmpAddrPtr=NULL; 177 | buf[0] = '\0'; 178 | 179 | getifaddrs(&ifAddrStruct); 180 | 181 | for (ifa = ifAddrStruct; ifa != NULL; ifa = ifa->ifa_next) { 182 | if (ifa ->ifa_addr->sa_family==AF_INET) { // check it is IP4 183 | // is a valid IP4 Address 184 | tmpAddrPtr=&((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 185 | inet_ntop(AF_INET, tmpAddrPtr, buf, len); 186 | if (strcmp(buf,"127.0.0.1")) 187 | break; 188 | //printf("%s IP Address %s\n", ifa->ifa_name, addressBuffer); 189 | } 190 | /* 191 | else if (ifa->ifa_addr->sa_family==AF_INET6) { // check it is IP6 192 | // is a valid IP6 Address 193 | tmpAddrPtr=&((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 194 | char addressBuffer[INET6_ADDRSTRLEN]; 195 | inet_ntop(AF_INET6, tmpAddrPtr, addressBuffer, INET6_ADDRSTRLEN); 196 | printf("%s IP Address %s\n", ifa->ifa_name, addressBuffer); 197 | } 198 | */ 199 | } 200 | if (ifAddrStruct!=NULL) freeifaddrs(ifAddrStruct); 201 | } 202 | -------------------------------------------------------------------------------- /socket.h: -------------------------------------------------------------------------------- 1 | #ifndef LLTSOCKET_H 2 | #define LLTSOCKET_H 3 | 4 | #ifdef WIN32 5 | #include 6 | #else 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #endif 16 | 17 | int open_tcp_port(short portno); 18 | DLLEXPORT int open_any_tcp_port(short *portno); 19 | DLLEXPORT int open_any_udp_port(short *portno); 20 | DLLEXPORT int connect_to_host(char *hostname, short portno); 21 | int connect_to_addr(struct sockaddr_in *host_addr); 22 | 23 | #ifdef WIN32 24 | void bzero(void *s, size_t n); 25 | #endif 26 | #ifndef WIN32 27 | void closesocket(int fd); 28 | #endif 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /timefuncs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "dtypes.h" 12 | 13 | #ifdef WIN32 14 | #include 15 | #include 16 | #include 17 | #else 18 | #include 19 | #include 20 | #include 21 | #endif 22 | 23 | #include "timefuncs.h" 24 | 25 | #ifdef WIN32 26 | double floattime(void) 27 | { 28 | struct timeb tstruct; 29 | 30 | ftime(&tstruct); 31 | return (double)tstruct.time + (double)tstruct.millitm/1.0e3; 32 | } 33 | #else 34 | double tv2float(struct timeval *tv) 35 | { 36 | return (double)tv->tv_sec + (double)tv->tv_usec/1.0e6; 37 | } 38 | 39 | double diff_time(struct timeval *tv1, struct timeval *tv2) 40 | { 41 | return tv2float(tv1) - tv2float(tv2); 42 | } 43 | #endif 44 | 45 | // return as many bits of system randomness as we can get our hands on 46 | u_int64_t i64time(void) 47 | { 48 | u_int64_t a; 49 | #ifdef WIN32 50 | struct timeb tstruct; 51 | ftime(&tstruct); 52 | a = (((u_int64_t)tstruct.time)<<32) + (u_int64_t)tstruct.millitm; 53 | #else 54 | struct timeval now; 55 | gettimeofday(&now, NULL); 56 | a = (((u_int64_t)now.tv_sec)<<32) + (u_int64_t)now.tv_usec; 57 | #endif 58 | 59 | return a; 60 | } 61 | 62 | double clock_now(void) 63 | { 64 | #ifdef WIN32 65 | return floattime(); 66 | #else 67 | struct timeval now; 68 | 69 | gettimeofday(&now, NULL); 70 | return tv2float(&now); 71 | #endif 72 | } 73 | 74 | void sleep_ms(int ms) 75 | { 76 | if (ms == 0) 77 | return; 78 | 79 | #ifdef WIN32 80 | Sleep(ms); 81 | #else 82 | struct timeval timeout; 83 | 84 | timeout.tv_sec = ms/1000; 85 | timeout.tv_usec = (ms % 1000) * 1000; 86 | select(0, NULL, NULL, NULL, &timeout); 87 | #endif 88 | } 89 | -------------------------------------------------------------------------------- /timefuncs.h: -------------------------------------------------------------------------------- 1 | #ifndef TIMEFUNCS_H 2 | #define TIMEFUNCS_H 3 | 4 | u_int64_t i64time(void); 5 | DLLEXPORT double clock_now(void); 6 | void sleep_ms(int ms); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /utf8.c: -------------------------------------------------------------------------------- 1 | /* 2 | Basic UTF-8 manipulation routines 3 | by Jeff Bezanson 4 | placed in the public domain Fall 2005 5 | 6 | This code is designed to provide the utilities you need to manipulate 7 | UTF-8 as an internal string encoding. These functions do not perform the 8 | error checking normally needed when handling UTF-8 data, so if you happen 9 | to be from the Unicode Consortium you will want to flay me alive. 10 | I do this because error checking can be performed at the boundaries (I/O), 11 | with these routines reserved for higher performance on data known to be 12 | valid. 13 | A UTF-8 validation routine is included. 14 | */ 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #ifdef WIN32 24 | #include 25 | #define snprintf _snprintf 26 | #else 27 | #ifndef __FreeBSD__ 28 | #include 29 | #endif /* __FreeBSD__ */ 30 | #endif 31 | #include 32 | 33 | #include "utf8.h" 34 | 35 | static const uint32_t offsetsFromUTF8[6] = { 36 | 0x00000000UL, 0x00003080UL, 0x000E2080UL, 37 | 0x03C82080UL, 0xFA082080UL, 0x82082080UL 38 | }; 39 | 40 | static const char trailingBytesForUTF8[256] = { 41 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 42 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 43 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 44 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 45 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 46 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 47 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 48 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 49 | }; 50 | 51 | /* returns length of next utf-8 sequence */ 52 | size_t u8_seqlen(const char *s) 53 | { 54 | return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1; 55 | } 56 | 57 | /* returns the # of bytes needed to encode a certain character 58 | 0 means the character cannot (or should not) be encoded. */ 59 | size_t u8_charlen(uint32_t ch) 60 | { 61 | if (ch < 0x80) 62 | return 1; 63 | else if (ch < 0x800) 64 | return 2; 65 | else if (ch < 0x10000) 66 | return 3; 67 | else if (ch < 0x110000) 68 | return 4; 69 | return 0; 70 | } 71 | 72 | size_t u8_codingsize(uint32_t *wcstr, size_t n) 73 | { 74 | size_t i, c=0; 75 | 76 | for(i=0; i < n; i++) 77 | c += u8_charlen(wcstr[i]); 78 | return c; 79 | } 80 | 81 | /* conversions without error checking 82 | only works for valid UTF-8, i.e. no 5- or 6-byte sequences 83 | srcsz = source size in bytes 84 | sz = dest size in # of wide characters 85 | 86 | returns # characters converted 87 | if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space. 88 | */ 89 | size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz) 90 | { 91 | uint32_t ch; 92 | const char *src_end = src + srcsz; 93 | size_t nb; 94 | size_t i=0; 95 | 96 | if (sz == 0 || srcsz == 0) 97 | return 0; 98 | 99 | while (i < sz) { 100 | if (!isutf(*src)) { // invalid sequence 101 | dest[i++] = 0xFFFD; 102 | src++; 103 | if (src >= src_end) break; 104 | continue; 105 | } 106 | nb = trailingBytesForUTF8[(unsigned char)*src]; 107 | if (src + nb >= src_end) 108 | break; 109 | ch = 0; 110 | switch (nb) { 111 | /* these fall through deliberately */ 112 | case 5: ch += (unsigned char)*src++; ch <<= 6; 113 | case 4: ch += (unsigned char)*src++; ch <<= 6; 114 | case 3: ch += (unsigned char)*src++; ch <<= 6; 115 | case 2: ch += (unsigned char)*src++; ch <<= 6; 116 | case 1: ch += (unsigned char)*src++; ch <<= 6; 117 | case 0: ch += (unsigned char)*src++; 118 | } 119 | ch -= offsetsFromUTF8[nb]; 120 | dest[i++] = ch; 121 | } 122 | return i; 123 | } 124 | 125 | /* srcsz = number of source characters 126 | sz = size of dest buffer in bytes 127 | 128 | returns # bytes stored in dest 129 | the destination string will never be bigger than the source string. 130 | */ 131 | size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz) 132 | { 133 | uint32_t ch; 134 | size_t i = 0; 135 | char *dest0 = dest; 136 | char *dest_end = dest + sz; 137 | 138 | while (i < srcsz) { 139 | ch = src[i]; 140 | if (ch < 0x80) { 141 | if (dest >= dest_end) 142 | break; 143 | *dest++ = (char)ch; 144 | } 145 | else if (ch < 0x800) { 146 | if (dest >= dest_end-1) 147 | break; 148 | *dest++ = (ch>>6) | 0xC0; 149 | *dest++ = (ch & 0x3F) | 0x80; 150 | } 151 | else if (ch < 0x10000) { 152 | if (dest >= dest_end-2) 153 | break; 154 | *dest++ = (ch>>12) | 0xE0; 155 | *dest++ = ((ch>>6) & 0x3F) | 0x80; 156 | *dest++ = (ch & 0x3F) | 0x80; 157 | } 158 | else if (ch < 0x110000) { 159 | if (dest >= dest_end-3) 160 | break; 161 | *dest++ = (ch>>18) | 0xF0; 162 | *dest++ = ((ch>>12) & 0x3F) | 0x80; 163 | *dest++ = ((ch>>6) & 0x3F) | 0x80; 164 | *dest++ = (ch & 0x3F) | 0x80; 165 | } 166 | i++; 167 | } 168 | return (dest-dest0); 169 | } 170 | 171 | size_t u8_wc_toutf8(char *dest, uint32_t ch) 172 | { 173 | if (ch < 0x80) { 174 | dest[0] = (char)ch; 175 | return 1; 176 | } 177 | if (ch < 0x800) { 178 | dest[0] = (ch>>6) | 0xC0; 179 | dest[1] = (ch & 0x3F) | 0x80; 180 | return 2; 181 | } 182 | if (ch < 0x10000) { 183 | dest[0] = (ch>>12) | 0xE0; 184 | dest[1] = ((ch>>6) & 0x3F) | 0x80; 185 | dest[2] = (ch & 0x3F) | 0x80; 186 | return 3; 187 | } 188 | if (ch < 0x110000) { 189 | dest[0] = (ch>>18) | 0xF0; 190 | dest[1] = ((ch>>12) & 0x3F) | 0x80; 191 | dest[2] = ((ch>>6) & 0x3F) | 0x80; 192 | dest[3] = (ch & 0x3F) | 0x80; 193 | return 4; 194 | } 195 | return 0; 196 | } 197 | 198 | /* charnum => byte offset */ 199 | size_t u8_offset(const char *s, size_t charnum) 200 | { 201 | size_t i=0; 202 | 203 | while (charnum > 0) { 204 | if (s[i++] & 0x80) { 205 | (void)(isutf(s[++i]) || isutf(s[++i]) || ++i); 206 | } 207 | charnum--; 208 | } 209 | return i; 210 | } 211 | 212 | /* byte offset => charnum */ 213 | size_t u8_charnum(const char *s, size_t offset) 214 | { 215 | size_t charnum = 0, i=0; 216 | 217 | while (i < offset) { 218 | if (s[i++] & 0x80) { 219 | (void)(isutf(s[++i]) || isutf(s[++i]) || ++i); 220 | } 221 | charnum++; 222 | } 223 | return charnum; 224 | } 225 | 226 | /* number of characters in NUL-terminated string */ 227 | size_t u8_strlen(const char *s) 228 | { 229 | size_t count = 0; 230 | size_t i = 0, lasti; 231 | 232 | while (1) { 233 | lasti = i; 234 | while (s[i] > 0) 235 | i++; 236 | count += (i-lasti); 237 | if (s[i++]==0) break; 238 | (void)(isutf(s[++i]) || isutf(s[++i]) || ++i); 239 | count++; 240 | } 241 | return count; 242 | } 243 | 244 | int wcwidth(wchar_t c); 245 | 246 | size_t u8_strwidth(const char *s) 247 | { 248 | uint32_t ch; 249 | size_t nb, tot=0; 250 | int w; 251 | signed char sc; 252 | 253 | while ((sc = (signed char)*s) != 0) { 254 | if (sc >= 0) { 255 | s++; 256 | if (sc) tot++; 257 | } 258 | else { 259 | if (!isutf(sc)) { tot++; s++; continue; } 260 | nb = trailingBytesForUTF8[(unsigned char)sc]; 261 | ch = 0; 262 | switch (nb) { 263 | /* these fall through deliberately */ 264 | case 5: ch += (unsigned char)*s++; ch <<= 6; 265 | case 4: ch += (unsigned char)*s++; ch <<= 6; 266 | case 3: ch += (unsigned char)*s++; ch <<= 6; 267 | case 2: ch += (unsigned char)*s++; ch <<= 6; 268 | case 1: ch += (unsigned char)*s++; ch <<= 6; 269 | case 0: ch += (unsigned char)*s++; 270 | } 271 | ch -= offsetsFromUTF8[nb]; 272 | w = wcwidth(ch); // might return -1 273 | if (w > 0) tot += w; 274 | } 275 | } 276 | return tot; 277 | } 278 | 279 | /* reads the next utf-8 sequence out of a string, updating an index */ 280 | uint32_t u8_nextchar(const char *s, size_t *i) 281 | { 282 | uint32_t ch = 0; 283 | size_t sz = 0; 284 | 285 | do { 286 | ch <<= 6; 287 | ch += (unsigned char)s[(*i)]; 288 | sz++; 289 | } while (s[*i] && (++(*i)) && !isutf(s[*i])); 290 | ch -= offsetsFromUTF8[sz-1]; 291 | 292 | return ch; 293 | } 294 | 295 | /* next character without NUL character terminator */ 296 | uint32_t u8_nextmemchar(const char *s, size_t *i) 297 | { 298 | uint32_t ch = 0; 299 | size_t sz = 0; 300 | 301 | do { 302 | ch <<= 6; 303 | ch += (unsigned char)s[(*i)++]; 304 | sz++; 305 | } while (!isutf(s[*i])); 306 | ch -= offsetsFromUTF8[sz-1]; 307 | 308 | return ch; 309 | } 310 | 311 | void u8_inc(const char *s, size_t *i) 312 | { 313 | (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || isutf(s[++(*i)]) || ++(*i)); 314 | } 315 | 316 | void u8_dec(const char *s, size_t *i) 317 | { 318 | (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i)); 319 | } 320 | 321 | int octal_digit(char c) 322 | { 323 | return (c >= '0' && c <= '7'); 324 | } 325 | 326 | int hex_digit(char c) 327 | { 328 | return ((c >= '0' && c <= '9') || 329 | (c >= 'A' && c <= 'F') || 330 | (c >= 'a' && c <= 'f')); 331 | } 332 | 333 | char read_escape_control_char(char c) 334 | { 335 | if (c == 'n') 336 | return '\n'; 337 | else if (c == 't') 338 | return '\t'; 339 | else if (c == 'r') 340 | return '\r'; 341 | else if (c == 'e') 342 | return '\e'; 343 | else if (c == 'b') 344 | return '\b'; 345 | else if (c == 'f') 346 | return '\f'; 347 | else if (c == 'v') 348 | return '\v'; 349 | else if (c == 'a') 350 | return '\a'; 351 | return c; 352 | } 353 | 354 | /* assumes that src points to the character after a backslash 355 | returns number of input characters processed, 0 if error */ 356 | size_t u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest) 357 | { 358 | assert(ssz > 0); 359 | uint32_t ch; 360 | char digs[10]; 361 | int dno=0, ndig; 362 | size_t i=1; 363 | char c0 = str[0]; 364 | 365 | if (octal_digit(c0)) { 366 | i = 0; 367 | do { 368 | digs[dno++] = str[i++]; 369 | } while (i sz-c) 412 | break; 413 | memcpy(&buf[c], temp, amt); 414 | c += amt; 415 | } 416 | if (c < sz) 417 | buf[c] = '\0'; 418 | return c; 419 | } 420 | 421 | static inline int buf_put2c(char *buf, const char *src) 422 | { 423 | buf[0] = src[0]; 424 | buf[1] = src[1]; 425 | buf[2] = '\0'; 426 | return 2; 427 | } 428 | 429 | int u8_escape_wchar(char *buf, size_t sz, uint32_t ch) 430 | { 431 | assert(sz > 2); 432 | if (ch == L'\n') 433 | return buf_put2c(buf, "\\n"); 434 | else if (ch == L'\t') 435 | return buf_put2c(buf, "\\t"); 436 | else if (ch == L'\r') 437 | return buf_put2c(buf, "\\r"); 438 | else if (ch == L'\e') 439 | return buf_put2c(buf, "\\e"); 440 | else if (ch == L'\b') 441 | return buf_put2c(buf, "\\b"); 442 | else if (ch == L'\f') 443 | return buf_put2c(buf, "\\f"); 444 | else if (ch == L'\v') 445 | return buf_put2c(buf, "\\v"); 446 | else if (ch == L'\a') 447 | return buf_put2c(buf, "\\a"); 448 | else if (ch == L'\\') 449 | return buf_put2c(buf, "\\\\"); 450 | else if (ch < 32 || ch == 0x7f) 451 | return snprintf(buf, sz, "\\x%.2hhx", (unsigned char)ch); 452 | else if (ch > 0xFFFF) 453 | return snprintf(buf, sz, "\\U%.8x", (uint32_t)ch); 454 | else if (ch >= 0x80) 455 | return snprintf(buf, sz, "\\u%.4hx", (unsigned short)ch); 456 | 457 | buf[0] = (char)ch; 458 | buf[1] = '\0'; 459 | return 1; 460 | } 461 | 462 | size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end, 463 | int escape_quotes, int ascii) 464 | { 465 | size_t i = *pi, i0; 466 | uint32_t ch; 467 | char *start = buf; 468 | char *blim = start + sz-11; 469 | assert(sz > 11); 470 | 471 | while (i tempi) 563 | break; 564 | } 565 | return NULL; 566 | } 567 | 568 | int u8_is_locale_utf8(const char *locale) 569 | { 570 | if (locale == NULL) return 0; 571 | 572 | /* this code based on libutf8 */ 573 | const char* cp = locale; 574 | 575 | for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) { 576 | if (*cp == '.') { 577 | const char* encoding = ++cp; 578 | for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) 579 | ; 580 | if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5)) 581 | || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4))) 582 | return 1; /* it's UTF-8 */ 583 | break; 584 | } 585 | } 586 | return 0; 587 | } 588 | 589 | size_t u8_vprintf(const char *fmt, va_list ap) 590 | { 591 | size_t cnt, sz=0, nc, needfree=0; 592 | char *buf; 593 | uint32_t *wcs; 594 | 595 | sz = 512; 596 | buf = (char*)alloca(sz); 597 | cnt = vsnprintf(buf, sz, fmt, ap); 598 | if ((ssize_t)cnt < 0) 599 | return 0; 600 | if (cnt >= sz) { 601 | buf = (char*)malloc(cnt + 1); 602 | needfree = 1; 603 | vsnprintf(buf, cnt+1, fmt, ap); 604 | } 605 | wcs = (uint32_t*)alloca((cnt+1) * sizeof(uint32_t)); 606 | nc = u8_toucs(wcs, cnt+1, buf, cnt); 607 | wcs[nc] = 0; 608 | printf("%ls", (wchar_t*)wcs); 609 | if (needfree) free(buf); 610 | return nc; 611 | } 612 | 613 | size_t u8_printf(const char *fmt, ...) 614 | { 615 | size_t cnt; 616 | va_list args; 617 | 618 | va_start(args, fmt); 619 | 620 | cnt = u8_vprintf(fmt, args); 621 | 622 | va_end(args); 623 | return cnt; 624 | } 625 | 626 | /* based on the valid_utf8 routine from the PCRE library by Philip Hazel 627 | 628 | length is in bytes, since without knowing whether the string is valid 629 | it's hard to know how many characters there are! */ 630 | int u8_isvalid(const char *str, size_t length) 631 | { 632 | const unsigned char *p, *pend = (unsigned char*)str + length; 633 | unsigned char c; 634 | int ret = 1; /* ASCII */ 635 | int ab; 636 | 637 | for (p = (unsigned char*)str; p < pend; p++) { 638 | c = *p; 639 | if (c < 128) 640 | continue; 641 | ret = 2; /* non-ASCII UTF-8 */ 642 | if ((c & 0xc0) != 0xc0) 643 | return 0; 644 | ab = trailingBytesForUTF8[c]; 645 | if (length < ab) 646 | return 0; 647 | length -= ab; 648 | 649 | p++; 650 | /* Check top bits in the second byte */ 651 | if ((*p & 0xc0) != 0x80) 652 | return 0; 653 | 654 | /* Check for overlong sequences for each different length */ 655 | switch (ab) { 656 | /* Check for xx00 000x */ 657 | case 1: 658 | if ((c & 0x3e) == 0) return 0; 659 | continue; /* We know there aren't any more bytes to check */ 660 | 661 | /* Check for 1110 0000, xx0x xxxx */ 662 | case 2: 663 | if (c == 0xe0 && (*p & 0x20) == 0) return 0; 664 | break; 665 | 666 | /* Check for 1111 0000, xx00 xxxx */ 667 | case 3: 668 | if (c == 0xf0 && (*p & 0x30) == 0) return 0; 669 | break; 670 | 671 | /* Check for 1111 1000, xx00 0xxx */ 672 | case 4: 673 | if (c == 0xf8 && (*p & 0x38) == 0) return 0; 674 | break; 675 | 676 | /* Check for leading 0xfe or 0xff, 677 | and then for 1111 1100, xx00 00xx */ 678 | case 5: 679 | if (c == 0xfe || c == 0xff || 680 | (c == 0xfc && (*p & 0x3c) == 0)) return 0; 681 | break; 682 | } 683 | 684 | /* Check for valid bytes after the 2nd, if any; all must start 10 */ 685 | while (--ab > 0) { 686 | if ((*(++p) & 0xc0) != 0x80) return 0; 687 | } 688 | } 689 | 690 | return ret; 691 | } 692 | 693 | int u8_reverse(char *dest, char * src, size_t len) 694 | { 695 | size_t si=0, di=len; 696 | unsigned char c; 697 | 698 | dest[di] = '\0'; 699 | while (si < len) { 700 | c = (unsigned char)src[si]; 701 | if ((~c) & 0x80) { 702 | di--; 703 | dest[di] = c; 704 | si++; 705 | } 706 | else { 707 | switch (c>>4) { 708 | case 0xC: 709 | case 0xD: 710 | di -= 2; 711 | *((int16_t*)&dest[di]) = *((int16_t*)&src[si]); 712 | si += 2; 713 | break; 714 | case 0xE: 715 | di -= 3; 716 | dest[di] = src[si]; 717 | *((int16_t*)&dest[di+1]) = *((int16_t*)&src[si+1]); 718 | si += 3; 719 | break; 720 | case 0xF: 721 | di -= 4; 722 | *((int32_t*)&dest[di]) = *((int32_t*)&src[si]); 723 | si += 4; 724 | break; 725 | default: 726 | return 1; 727 | } 728 | } 729 | } 730 | return 0; 731 | } 732 | -------------------------------------------------------------------------------- /utf8.h: -------------------------------------------------------------------------------- 1 | #ifndef UTF8_H 2 | #define UTF8_H 3 | 4 | extern int locale_is_utf8; 5 | 6 | /* is c the start of a utf8 sequence? */ 7 | #define isutf(c) (((c)&0xC0)!=0x80) 8 | 9 | #define UEOF ((uint32_t)-1) 10 | 11 | /* convert UTF-8 data to wide character */ 12 | size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz); 13 | 14 | /* the opposite conversion */ 15 | size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz); 16 | 17 | /* single character to UTF-8, returns # bytes written */ 18 | size_t u8_wc_toutf8(char *dest, uint32_t ch); 19 | 20 | /* character number to byte offset */ 21 | size_t u8_offset(const char *str, size_t charnum); 22 | 23 | /* byte offset to character number */ 24 | size_t u8_charnum(const char *s, size_t offset); 25 | 26 | /* return next character, updating an index variable */ 27 | uint32_t u8_nextchar(const char *s, size_t *i); 28 | 29 | /* next character without NUL character terminator */ 30 | uint32_t u8_nextmemchar(const char *s, size_t *i); 31 | 32 | /* move to next character */ 33 | void u8_inc(const char *s, size_t *i); 34 | 35 | /* move to previous character */ 36 | void u8_dec(const char *s, size_t *i); 37 | 38 | /* returns length of next utf-8 sequence */ 39 | size_t u8_seqlen(const char *s); 40 | 41 | /* returns the # of bytes needed to encode a certain character */ 42 | size_t u8_charlen(uint32_t ch); 43 | 44 | /* computes the # of bytes needed to encode a WC string as UTF-8 */ 45 | size_t u8_codingsize(uint32_t *wcstr, size_t n); 46 | 47 | char read_escape_control_char(char c); 48 | 49 | /* assuming src points to the character after a backslash, read an 50 | escape sequence, storing the result in dest and returning the number of 51 | input characters processed */ 52 | size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest); 53 | 54 | /* given a wide character, convert it to an ASCII escape sequence stored in 55 | buf, where buf is "sz" bytes. returns the number of characters output. 56 | sz must be at least 3. */ 57 | int u8_escape_wchar(char *buf, size_t sz, uint32_t ch); 58 | 59 | /* convert a string "src" containing escape sequences to UTF-8 */ 60 | size_t u8_unescape(char *buf, size_t sz, const char *src); 61 | 62 | /* convert UTF-8 "src" to escape sequences. 63 | 64 | sz is buf size in bytes. must be at least 12. 65 | 66 | if escape_quotes is nonzero, quote characters will be escaped. 67 | 68 | if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives. 69 | 70 | starts at src[*pi], updates *pi to point to the first unprocessed 71 | byte of the input. 72 | 73 | end is one more than the last allowable value of *pi. 74 | 75 | returns number of bytes placed in buf, including a NUL terminator. 76 | */ 77 | size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end, 78 | int escape_quotes, int ascii); 79 | 80 | /* utility predicates used by the above */ 81 | int octal_digit(char c); 82 | int hex_digit(char c); 83 | 84 | /* return a pointer to the first occurrence of ch in s, or NULL if not 85 | found. character index of found character returned in *charn. */ 86 | char *u8_strchr(const char *s, uint32_t ch, size_t *charn); 87 | 88 | /* same as the above, but searches a buffer of a given size instead of 89 | a NUL-terminated string. */ 90 | char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn); 91 | 92 | char *u8_memrchr(const char *s, uint32_t ch, size_t sz); 93 | 94 | /* count the number of characters in a UTF-8 string */ 95 | size_t u8_strlen(const char *s); 96 | 97 | /* number of columns occupied by a string */ 98 | size_t u8_strwidth(const char *s); 99 | 100 | int u8_is_locale_utf8(const char *locale); 101 | 102 | /* printf where the format string and arguments may be in UTF-8. 103 | you can avoid this function and just use ordinary printf() if the current 104 | locale is UTF-8. */ 105 | size_t u8_vprintf(const char *fmt, va_list ap); 106 | size_t u8_printf(const char *fmt, ...); 107 | 108 | /* determine whether a sequence of bytes is valid UTF-8. length is in bytes */ 109 | int u8_isvalid(const char *str, size_t length); 110 | 111 | /* reverse a UTF-8 string. len is length in bytes. dest and src must both 112 | be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */ 113 | int u8_reverse(char *dest, char *src, size_t len); 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base); 5 | int str2int(char *str, size_t len, int64_t *res, uint32_t base); 6 | int isdigit_base(char c, int base); 7 | 8 | #ifdef __x86_64__ 9 | # define LEGACY_REGS "=Q" 10 | #else 11 | # define LEGACY_REGS "=q" 12 | #endif 13 | 14 | #if !defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) 15 | STATIC_INLINE u_int16_t ByteSwap16(u_int16_t x) 16 | { 17 | __asm("xchgb %b0,%h0" : 18 | LEGACY_REGS (x) : 19 | "0" (x)); 20 | return x; 21 | } 22 | #define bswap_16(x) ByteSwap16(x) 23 | 24 | STATIC_INLINE u_int32_t ByteSwap32(u_int32_t x) 25 | { 26 | __asm("bswap %0": 27 | "=r" (x) : 28 | "0" (x)); 29 | return x; 30 | } 31 | 32 | #define bswap_32(x) ByteSwap32(x) 33 | 34 | STATIC_INLINE u_int64_t ByteSwap64(u_int64_t x) 35 | { 36 | #ifdef __x86_64__ 37 | __asm("bswap %0": 38 | "=r" (x) : 39 | "0" (x)); 40 | return x; 41 | #else 42 | register union { __extension__ u_int64_t __ll; 43 | u_int32_t __l[2]; } __x; 44 | asm("xchgl %0,%1": 45 | "=r"(__x.__l[0]),"=r"(__x.__l[1]): 46 | "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32)))); 47 | return __x.__ll; 48 | #endif 49 | } 50 | #define bswap_64(x) ByteSwap64(x) 51 | 52 | #else 53 | 54 | #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8) 55 | 56 | #ifdef __INTEL_COMPILER 57 | #define bswap_32(x) _bswap(x) 58 | #else 59 | #define bswap_32(x) \ 60 | ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ 61 | (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) 62 | #endif 63 | 64 | STATIC_INLINE u_int64_t ByteSwap64(u_int64_t x) 65 | { 66 | union { 67 | u_int64_t ll; 68 | u_int32_t l[2]; 69 | } w, r; 70 | w.ll = x; 71 | r.l[0] = bswap_32 (w.l[1]); 72 | r.l[1] = bswap_32 (w.l[0]); 73 | return r.ll; 74 | } 75 | #define bswap_64(x) ByteSwap64(x) 76 | 77 | #endif 78 | 79 | #endif 80 | --------------------------------------------------------------------------------