├── Makefile ├── test.c ├── LICENSE ├── README.md ├── cstring.h └── cstring.c /Makefile: -------------------------------------------------------------------------------- 1 | test : test.c cstring.c 2 | gcc -g -Wall -o $@ $^ 3 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include "cstring.h" 2 | 3 | #include 4 | 5 | static cstring 6 | foo(cstring t) { 7 | CSTRING_LITERAL(hello, "hello"); 8 | CSTRING_BUFFER(ret); 9 | if (cstring_equal(hello,t)) { 10 | cstring_cat(ret, "equal"); 11 | } else { 12 | cstring_cat(ret, "not equal"); 13 | } 14 | return cstring_grab(CSTRING(ret)); 15 | } 16 | 17 | static void 18 | test() { 19 | CSTRING_BUFFER(a); 20 | cstring_printf(a, "%s", "hello"); 21 | cstring b = foo(CSTRING(a)); 22 | printf("%s\n", b->cstr); 23 | cstring_printf(a, "very long string %01024d",0); 24 | printf("%s\n", CSTRING(a)->cstr); 25 | CSTRING_CLOSE(a); 26 | cstring_release(b); 27 | } 28 | 29 | int 30 | main() { 31 | test(); 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 codingnow.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cstring 库 2 | 3 | C 语言没有原生的 string 类型,这使得 string 的管理非常麻烦。cstring 是一个简单的 string 库,它主要解决以下几个问题: 4 | 5 | * 对于短字符串(小于 32 字节),进行 string interning 。这可以在文本处理时节约不少内存。短 string 相当于 symbol 类型,对它做比较操作的代价可以减少到 O(1) 。 6 | * 对于临时字符串,如果长度不大(小于 128 字节),尽可能的放在 stack 上,避免动态内存分配。 7 | * 支持常量字符串,对于常量短字符串只做一次 string interning 操作。 8 | * 使用引用计数管理相同的字符串,减少字符串的拷贝。 9 | * 短字符串,常量字符串,以及引用次数非常多(大于 64K 次)的字符串可以不动态释放,简化生命期管理。 10 | * 惰性计算,以及缓存字符串的 hash 值,以方便实现 hashmap 。 11 | * 这个库是线程安全的。 12 | 13 | # cstring_buffer 14 | 15 | 不要直接定义 cstring_buffer 类型,而应该用 CSTRING_BUFFER(var) 声明,它相当于声明了一个名为 var 的 cstring_buffer 对象。 16 | 17 | cstring_buffer 位于栈上,通常不需要回收。但是在函数结束时,应该使用 CSTRING_CLOSE(var) 关闭它。 18 | 19 | 新声明的 cstring_buffer 对象是一个空字符串,可以用下面两个 api 修改它。 20 | 21 | ```C 22 | cstring cstring_cat(cstring_buffer sb, const char * str); 23 | cstring cstring_printf(cstring_buffer sb, const char * format, ...); 24 | ``` 25 | 26 | # cstring 27 | 28 | 如果需要把字符串做参数传递,就应该使用 cstring 类型,而不是 cstring_buffer 类型。CSTRING(var) 可以把 var 这个 cstring_buffer 对象,转换为 cstring 类型。 29 | 30 | 但是,在对 cstring_buffer 对象做新的操作后,这个 cstring 可能无效。所以每次传递 cstring_buffer 内的值,最好都重新用 CSTRING 宏取一次。 31 | 32 | 函数调用的参数以及返回值,都应该使用 cstring 类型。如果 cstring 是由外部传入的,无法确定它的数据在栈上还是堆上,所以不能长期持有。如果需要把 cstring 保存在数据结构中,可以使用这对 API : 33 | ```C 34 | cstring cstring_grab(cstring s); 35 | void cstring_release(cstring s); 36 | ``` 37 | 38 | 把 cstring 转化为标准的 const char * ,只需要用 s->cstr 即可。 39 | 40 | cstring 的比较操作以及 hash 操作都比 const char * 廉价,所以,请使用以下 API : 41 | ```C 42 | int cstring_equal(cstring a, cstring b); 43 | uint32_t cstring_hash(cstring s); 44 | ``` 45 | 46 | # literal 47 | 48 | CSTRING_LITERAL(var, literal) 可以声明一个常量 cstring 。这里 literal 必须是一个 " 引起的字符串常量。 49 | 50 | 51 | -------------------------------------------------------------------------------- /cstring.h: -------------------------------------------------------------------------------- 1 | #ifndef cstring_h 2 | #define cstring_h 3 | 4 | #include 5 | #include 6 | 7 | #define CSTRING_PERMANENT 1 8 | #define CSTRING_INTERNING 2 9 | #define CSTRING_ONSTACK 4 10 | 11 | #define CSTRING_INTERNING_SIZE 32 12 | #define CSTRING_STACK_SIZE 128 13 | 14 | struct cstring_data { 15 | char * cstr; 16 | uint32_t hash_size; 17 | uint16_t type; 18 | uint16_t ref; 19 | }; 20 | 21 | typedef struct _cstring_buffer { 22 | struct cstring_data * str; 23 | } cstring_buffer[1]; 24 | 25 | typedef struct cstring_data * cstring; 26 | 27 | #define CSTRING_BUFFER(var) \ 28 | char var##_cstring [CSTRING_STACK_SIZE] = { '\0' }; \ 29 | struct cstring_data var##_cstring_data = { var##_cstring , 0, CSTRING_ONSTACK, 0 }; \ 30 | cstring_buffer var; \ 31 | var->str = &var##_cstring_data; 32 | 33 | #define CSTRING_LITERAL(var, cstr) \ 34 | static cstring var = NULL; \ 35 | if (var) {} else { \ 36 | cstring tmp = cstring_persist(""cstr, (sizeof(cstr)/sizeof(char))-1); \ 37 | if (!__sync_bool_compare_and_swap(&var, NULL, tmp)) { \ 38 | cstring_free_persist(tmp); \ 39 | } \ 40 | } 41 | 42 | #define CSTRING(s) ((s)->str) 43 | 44 | #define CSTRING_CLOSE(var) \ 45 | if ((var)->str->type != 0) {} else \ 46 | cstring_release((var)->str); 47 | 48 | /* low level api, don't use directly */ 49 | cstring cstring_persist(const char * cstr, size_t sz); 50 | void cstring_free_persist(cstring s); 51 | 52 | /* public api */ 53 | cstring cstring_grab(cstring s); 54 | void cstring_release(cstring s); 55 | cstring cstring_cat(cstring_buffer sb, const char * str); 56 | cstring cstring_printf(cstring_buffer sb, const char * format, ...) 57 | #ifdef __GNUC__ 58 | __attribute__((format(printf, 2, 3))) 59 | #endif 60 | ; 61 | int cstring_equal(cstring a, cstring b); 62 | uint32_t cstring_hash(cstring s); 63 | 64 | #endif 65 | 66 | 67 | -------------------------------------------------------------------------------- /cstring.c: -------------------------------------------------------------------------------- 1 | #include "cstring.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define FORMAT_TEMP_SIZE 1024 10 | 11 | #define INTERNING_POOL_SIZE 1024 12 | // HASH_START_SIZE must be 2 pow 13 | #define HASH_START_SIZE 16 14 | 15 | struct string_node { 16 | struct cstring_data str; 17 | char buffer[CSTRING_INTERNING_SIZE]; 18 | struct string_node * next; 19 | }; 20 | 21 | struct string_pool { 22 | struct string_node node[INTERNING_POOL_SIZE]; 23 | }; 24 | 25 | struct string_interning { 26 | int lock; 27 | int size; 28 | struct string_node ** hash; 29 | struct string_pool * pool; 30 | int index; 31 | int total; 32 | }; 33 | 34 | static struct string_interning S; 35 | 36 | static inline void 37 | LOCK() { 38 | while (__sync_lock_test_and_set(&(S.lock),1)) {} 39 | } 40 | 41 | static inline void 42 | UNLOCK() { 43 | __sync_lock_release(&(S.lock)); 44 | } 45 | 46 | static void 47 | insert_node(struct string_node ** hash, int sz, struct string_node *n) { 48 | uint32_t h = n->str.hash_size; 49 | int index = h & (sz-1); 50 | n->next = hash[index]; 51 | hash[index] = n; 52 | } 53 | 54 | static void 55 | expand(struct string_interning * si) { 56 | int new_size = si->size * 2; 57 | if (new_size < HASH_START_SIZE) { 58 | new_size = HASH_START_SIZE; 59 | } 60 | assert(new_size > si->total); 61 | struct string_node ** new_hash = malloc(sizeof(struct string_node *) * new_size); 62 | memset(new_hash, 0, sizeof(struct string_node *) * new_size); 63 | int i; 64 | for (i=0;isize;i++) { 65 | struct string_node *node = si->hash[i]; 66 | while (node) { 67 | struct string_node * tmp = node->next; 68 | insert_node(new_hash, new_size, node); 69 | node = tmp; 70 | } 71 | } 72 | free(si->hash); 73 | si->hash = new_hash; 74 | si->size = new_size; 75 | } 76 | 77 | static cstring 78 | interning(struct string_interning * si, const char * cstr, size_t sz, uint32_t hash) { 79 | if (si->hash == NULL) { 80 | return NULL; 81 | } 82 | int index = (int)(hash & (si->size-1)); 83 | struct string_node * n = si->hash[index]; 84 | while(n) { 85 | if (n->str.hash_size == hash) { 86 | if (strcmp(n->str.cstr, cstr) == 0) { 87 | return &n->str; 88 | } 89 | } 90 | n = n->next; 91 | } 92 | // 80% (4/5) threshold 93 | if (si->total * 5 >= si->size * 4) { 94 | return NULL; 95 | } 96 | if (si->pool == NULL) { 97 | // need not free pool 98 | // todo: check memory alloc error 99 | si->pool = malloc(sizeof(struct string_pool)); 100 | assert(si->pool); 101 | si->index = 0; 102 | } 103 | n = &si->pool->node[si->index++]; 104 | memcpy(n->buffer, cstr, sz); 105 | n->buffer[sz] = '\0'; 106 | 107 | cstring cs = &n->str; 108 | cs->cstr = n->buffer; 109 | cs->hash_size = hash; 110 | cs->type = CSTRING_INTERNING; 111 | cs->ref = 0; 112 | 113 | n->next = si->hash[index]; 114 | si->hash[index] = n; 115 | 116 | return cs; 117 | } 118 | 119 | static cstring 120 | cstring_interning(const char * cstr, size_t sz, uint32_t hash) { 121 | cstring ret; 122 | LOCK(); 123 | ret = interning(&S, cstr, sz, hash); 124 | if (ret == NULL) { 125 | expand(&S); 126 | ret = interning(&S, cstr, sz, hash); 127 | } 128 | ++S.total; 129 | UNLOCK(); 130 | assert(ret); 131 | return ret; 132 | } 133 | 134 | 135 | static uint32_t 136 | hash_blob(const char * buffer, size_t len) { 137 | const uint8_t * ptr = (const uint8_t *) buffer; 138 | size_t h = len; 139 | size_t step = (len>>5)+1; 140 | size_t i; 141 | for (i=len; i>=step; i-=step) 142 | h = h ^ ((h<<5)+(h>>2)+ptr[i-1]); 143 | if (h == 0) 144 | return 1; 145 | else 146 | return h; 147 | } 148 | 149 | void 150 | cstring_free_persist(cstring s) { 151 | if (s->type == CSTRING_PERMANENT) { 152 | free(s); 153 | } 154 | } 155 | 156 | static cstring 157 | cstring_clone(const char * cstr, size_t sz) { 158 | if (sz < CSTRING_INTERNING_SIZE) { 159 | return cstring_interning(cstr, sz, hash_blob(cstr,sz)); 160 | } 161 | struct cstring_data * p = malloc(sizeof(struct cstring_data) + sz + 1); 162 | // todo: memory alloc error 163 | assert(p); 164 | void * ptr = (void *)(p + 1); 165 | p->cstr = ptr; 166 | p->type = 0; 167 | p->ref = 1; 168 | memcpy(ptr, cstr, sz); 169 | ((char *)ptr)[sz] = '\0'; 170 | p->hash_size = 0; 171 | return p; 172 | } 173 | 174 | cstring 175 | cstring_persist(const char * cstr, size_t sz) { 176 | cstring s = cstring_clone(cstr, sz); 177 | if (s->type == 0) { 178 | s->type = CSTRING_PERMANENT; 179 | s->ref = 0; 180 | } 181 | return s; 182 | } 183 | 184 | cstring 185 | cstring_grab(cstring s) { 186 | if (s->type & (CSTRING_PERMANENT | CSTRING_INTERNING)) { 187 | return s; 188 | } 189 | if (s->type == CSTRING_ONSTACK) { 190 | cstring tmp = cstring_clone(s->cstr, s->hash_size); 191 | return tmp; 192 | } else { 193 | if (s->ref == 0) { 194 | s->type = CSTRING_PERMANENT; 195 | } else { 196 | __sync_add_and_fetch(&s->ref,1); 197 | } 198 | return s; 199 | } 200 | } 201 | 202 | void 203 | cstring_release(cstring s) { 204 | if (s->type != 0) { 205 | return; 206 | } 207 | if (s->ref == 0) { 208 | return; 209 | } 210 | if (__sync_sub_and_fetch(&s->ref,1) == 0) { 211 | free(s); 212 | } 213 | } 214 | 215 | uint32_t 216 | cstring_hash(cstring s) { 217 | if (s->type == CSTRING_ONSTACK) 218 | return hash_blob(s->cstr, s->hash_size); 219 | if (s->hash_size == 0) { 220 | s->hash_size = hash_blob(s->cstr, strlen(s->cstr)); 221 | } 222 | return s->hash_size; 223 | } 224 | 225 | int 226 | cstring_equal(cstring a, cstring b) { 227 | if (a == b) 228 | return 1; 229 | if ((a->type == CSTRING_INTERNING) && 230 | (b->type == CSTRING_INTERNING)) { 231 | return 0; 232 | } 233 | if ((a->type == CSTRING_ONSTACK) && 234 | (b->type == CSTRING_ONSTACK)) { 235 | if (a->hash_size != b->hash_size) { 236 | return 0; 237 | } 238 | return memcmp(a->cstr, b->cstr, a->hash_size) == 0; 239 | } 240 | uint32_t hasha = cstring_hash(a); 241 | uint32_t hashb = cstring_hash(b); 242 | if (hasha != hashb) { 243 | return 0; 244 | } 245 | return strcmp(a->cstr, b->cstr) == 0; 246 | } 247 | 248 | static cstring 249 | cstring_cat2(const char * a, const char * b) { 250 | size_t sa = strlen(a); 251 | size_t sb = strlen(b); 252 | if (sa + sb < CSTRING_INTERNING_SIZE) { 253 | char tmp[CSTRING_INTERNING_SIZE]; 254 | memcpy(tmp, a, sa); 255 | memcpy(tmp+sa, b, sb); 256 | tmp[sa+sb] = '\0'; 257 | return cstring_interning(tmp, sa+sb, hash_blob(tmp,sa+sb)); 258 | } 259 | struct cstring_data * p = malloc(sizeof(struct cstring_data) + sa + sb + 1); 260 | // todo: memory alloc error 261 | assert(p); 262 | char * ptr = (char *)(p + 1); 263 | p->cstr = ptr; 264 | p->type = 0; 265 | p->ref = 1; 266 | memcpy(ptr, a, sa); 267 | memcpy(ptr+sa, b, sb); 268 | ptr[sa+sb] = '\0'; 269 | p->hash_size = 0; 270 | return p; 271 | } 272 | 273 | cstring 274 | cstring_cat(cstring_buffer sb, const char * str) { 275 | cstring s = sb->str; 276 | if (s->type == CSTRING_ONSTACK) { 277 | int i = (int)s->hash_size; 278 | while (i < CSTRING_STACK_SIZE-1) { 279 | s->cstr[i] = *str; 280 | if (*str == '\0') { 281 | return s; 282 | } 283 | ++s->hash_size; 284 | ++str; 285 | ++i; 286 | } 287 | s->cstr[i] = '\0'; 288 | } 289 | cstring tmp = s; 290 | sb->str = cstring_cat2(tmp->cstr, str); 291 | cstring_release(tmp); 292 | return sb->str; 293 | } 294 | 295 | static cstring 296 | cstring_format(const char * format, va_list ap) { 297 | static char * cache = NULL; 298 | char * result; 299 | char * temp = cache; 300 | // read cache buffer atomic 301 | if (temp) { 302 | temp = __sync_val_compare_and_swap(&cache, temp, NULL); 303 | } 304 | if (temp == NULL) { 305 | temp = (char *)malloc(FORMAT_TEMP_SIZE); 306 | // todo : check malloc 307 | assert(temp); 308 | } 309 | va_list ap2; 310 | va_copy(ap2, ap); 311 | int n = vsnprintf(temp, FORMAT_TEMP_SIZE, format, ap2); 312 | if (n >= FORMAT_TEMP_SIZE) { 313 | int sz = FORMAT_TEMP_SIZE * 2; 314 | for (;;) { 315 | result = malloc(sz); 316 | // todo : check malloc 317 | assert(result); 318 | va_copy(ap2, ap); 319 | n = vsnprintf(result, sz, format, ap2); 320 | if (n >= sz) { 321 | free(result); 322 | sz *= 2; 323 | } else { 324 | break; 325 | } 326 | } 327 | } else { 328 | result = temp; 329 | } 330 | cstring r = (cstring)malloc(sizeof(struct cstring_data) + n + 1); 331 | // todo : check malloc 332 | assert(r); 333 | r->cstr = (char *)(r+1); 334 | r->type = 0; 335 | r->ref = 1; 336 | r->hash_size = 0; 337 | memcpy(r->cstr, result, n+1); 338 | if (temp != result) { 339 | free(result); 340 | } 341 | // save temp atomic 342 | if (!__sync_bool_compare_and_swap(&cache, NULL, temp)) { 343 | free(temp); 344 | } else { 345 | } 346 | 347 | return r; 348 | } 349 | 350 | cstring 351 | cstring_printf(cstring_buffer sb, const char * format, ...) { 352 | cstring s = sb->str; 353 | va_list ap; 354 | va_start(ap, format); 355 | if (s->type == CSTRING_ONSTACK) { 356 | int n = vsnprintf(s->cstr, CSTRING_STACK_SIZE, format, ap); 357 | if (n >= CSTRING_STACK_SIZE) { 358 | va_end(ap); 359 | va_start(ap, format); 360 | s = cstring_format(format, ap); 361 | sb->str = s; 362 | } else { 363 | s->hash_size = n; 364 | } 365 | } else { 366 | cstring_release(sb->str); 367 | s = cstring_format(format, ap); 368 | sb->str = s; 369 | } 370 | va_end(ap); 371 | return s; 372 | } 373 | --------------------------------------------------------------------------------