├── .package ├── LICENSE ├── tstr.h ├── test.c ├── README.md └── tstr.c /.package: -------------------------------------------------------------------------------- 1 | file tstr.c 2 | file tstr.h -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Josh Baker 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /tstr.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Joshua J Baker. All rights reserved. 2 | // Use of this source code is governed by an MIT-style 3 | // license that can be found in the LICENSE file. 4 | // 5 | // https://github.com/tidwall/tstr 6 | 7 | #ifndef TSTR_H 8 | #define TSTR_H 9 | 10 | #include 11 | #include 12 | 13 | // tstr is string that tracks it's length, is null-terminated, is compatible 14 | // with C strings, and can optionally store binary. 15 | typedef const char tstr; 16 | 17 | tstr *tstr_from_bytes(const void *bytes, size_t nbytes); 18 | tstr *tstr_from_cstr(const char *cstr); 19 | tstr *tstr_from_format(const char *format, ...); 20 | tstr *tstr_clone(const tstr *); 21 | void tstr_free(tstr *); 22 | size_t tstr_len(const tstr *); 23 | const char *tstr_cstr(tstr *); 24 | const void *tstr_bytes(tstr *); 25 | bool tstr_equal(tstr *a, tstr *b); 26 | int tstr_cmp(tstr *a, tstr *b); 27 | int tstr_casecmp(tstr *a, tstr *b); 28 | int tstr_ncmp(tstr *a, tstr *b, size_t n); 29 | int tstr_ncasecmp(tstr *a, tstr *b, size_t n); 30 | int tstr_cmp_cstr(tstr *str, const char *cstr); 31 | int tstr_casecmp_cstr(tstr *str, const char *cstr); 32 | void tstr_set_allocator(void *(*malloc)(size_t), void (*free)(void*)); 33 | 34 | // DEPRECATED 35 | tstr *tstr_from_zeros(size_t nbytes); 36 | int tstr_compare(tstr *a, tstr *b); 37 | 38 | #endif // TSTR_H 39 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tstr.h" 5 | 6 | tstr *tstr_alloc(size_t nbytes); 7 | tstr *tstr_from_zeros(size_t nbytes); 8 | int tstr_compare_cstr(tstr *str, const char *cstr); 9 | 10 | int main(void) { 11 | tstr *str1 = tstr_from_bytes("HELLO", 3); 12 | assert(tstr_len(str1) == 3); 13 | assert(strcmp(str1, "HEL") == 0); 14 | 15 | tstr *str2 = tstr_clone(str1); 16 | tstr *str3 = tstr_from_format("%s %d", "HI", 76); 17 | tstr *str4 = tstr_from_format("%s %d", "HI", 77); 18 | tstr *str5 = tstr_from_format("%s %d", "HI", 78); 19 | tstr *str6 = tstr_from_zeros(10); 20 | 21 | assert(tstr_equal(str1, str2)); 22 | assert(!tstr_equal(str1, str3)); 23 | assert(!tstr_equal(str4, str3)); 24 | 25 | assert(tstr_cmp(str4, str3) == 1); 26 | assert(tstr_cmp(str4, str4) == 0); 27 | assert(tstr_cmp(str4, str5) == -1); 28 | 29 | assert(tstr_cmp_cstr(str4, "HI 76") == 1); 30 | assert(tstr_cmp_cstr(str4, "HI 77") == 0); 31 | assert(tstr_cmp_cstr(str4, "HI 78") == -1); 32 | 33 | assert(tstr_casecmp_cstr(str4, "Hi 76") == 1); 34 | assert(tstr_casecmp_cstr(str4, "Hi 77") == 0); 35 | assert(tstr_casecmp_cstr(str4, "Hi 78") == -1); 36 | 37 | assert(tstr_len(str6) == 10); 38 | 39 | for (size_t i = 0; i < tstr_len(str6); i++) { 40 | assert(str6[i] == 0); 41 | } 42 | tstr_free(str6); 43 | tstr_free(str5); 44 | tstr_free(str4); 45 | tstr_free(str3); 46 | tstr_free(str2); 47 | assert(tstr_len(str1) == 3); 48 | assert(strcmp(str1, "HEL") == 0); 49 | tstr_free(str1); 50 | 51 | printf("PASSED\n"); 52 | return 0; 53 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tstr 2 | 3 | A safe immutable string format for C. 4 | 5 | - Supports binary as well as null-terminated cstr. 6 | - Backwards compatible with the "char *" C-string. 7 | - Provides a clone method for sharing the same data without new allocations. 8 | 9 | ## Example 10 | 11 | ```C 12 | #include "tstr.h" 13 | 14 | // Create a tstr. 15 | tstr *str = tstr_from_cstr("fantastic words"); 16 | if (!str) ... // check for out of memory 17 | 18 | // Make a clone of str. 19 | // The result shares the same memory as the original. 20 | tstr *str2 = tstr_clone(str); 21 | 22 | // Print the string "fantastic words". 23 | printf("%s\n", str2); 24 | 25 | // Free both the clone and original 26 | tstr_free(str); 27 | tstr_free(str2); 28 | ``` 29 | 30 | ## API 31 | 32 | ```C 33 | tstr *tstr_from_bytes(const void *bytes, size_t nbytes); 34 | tstr *tstr_from_cstr(const char *cstr); 35 | tstr *tstr_from_format(const char *format, ...); 36 | tstr *tstr_clone(const tstr *); 37 | void tstr_free(tstr *); 38 | size_t tstr_len(const tstr *); 39 | const char *tstr_cstr(tstr *); 40 | const void *tstr_bytes(tstr *); 41 | bool tstr_equal(tstr *a, tstr *b); 42 | int tstr_cmp(tstr *a, tstr *b); 43 | int tstr_casecmp(tstr *a, tstr *b); 44 | int tstr_ncmp(tstr *a, tstr *b, size_t n); 45 | int tstr_ncasecmp(tstr *a, tstr *b, size_t n); 46 | int tstr_cmp_cstr(tstr *str, const char *cstr); 47 | int tstr_casecmp_cstr(tstr *str, const char *cstr); 48 | ``` 49 | 50 | ## Structure 51 | 52 | The internal structure is: 53 | 54 | ```C 55 | struct { 56 | atomic_int rc; // reference counter for cloning 57 | size_t len; // length of data. Does not include the null character. 58 | char data[]; // raw string binary. Always null-terminated. 59 | }; 60 | ``` 61 | 62 | A complete tstr allocation will always be a little larger than the 63 | original data in order to include the reference counter, length, and the 64 | null-terminator character. 65 | 66 | The actual tstr pointer (`tstr *`) starts at the first byte of the `cstr` 67 | field, which ensures that the tstr works like a C-string and can be used 68 | by all `string.h` functions as well as `tstr.h` functions. 69 | -------------------------------------------------------------------------------- /tstr.c: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Joshua J Baker. All rights reserved. 2 | // Use of this source code is governed by an MIT-style 3 | // license that can be found in the LICENSE file. 4 | // 5 | // https://github.com/tidwall/tstr 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "tstr.h" 18 | 19 | /* 20 | The relaxed/release/acquire pattern is based on: 21 | http://boost.org/doc/libs/1_87_0/libs/atomic/doc/html/atomic/usage_examples.html 22 | */ 23 | 24 | typedef atomic_int rc_t; 25 | 26 | static void rc_init(rc_t *rc) { 27 | atomic_init(rc, 0); 28 | } 29 | 30 | static void rc_retain(rc_t *rc) { 31 | atomic_fetch_add_explicit(rc, 1, __ATOMIC_RELAXED); 32 | } 33 | 34 | static bool rc_release(rc_t *rc) { 35 | if (atomic_fetch_sub_explicit(rc, 1, __ATOMIC_RELEASE) == 1) { 36 | atomic_thread_fence(__ATOMIC_ACQUIRE); 37 | return true; 38 | } 39 | return false; 40 | } 41 | 42 | struct tstr_internal { 43 | rc_t rc; 44 | uint32_t len; 45 | char data[]; 46 | }; 47 | 48 | static void *(*_tstr_malloc)(size_t); 49 | static void (*_tstr_free)(void*); 50 | 51 | void tstr_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) { 52 | _tstr_malloc = malloc; 53 | _tstr_free = free; 54 | } 55 | 56 | static struct tstr_internal *tstr_toistr(const tstr *str) { 57 | size_t dataoff = offsetof(struct tstr_internal, data); 58 | return (struct tstr_internal *)(((char*)str)-dataoff); 59 | } 60 | 61 | /// Return a newly allocated tstr that _is not_ initialized or null-terminated. 62 | /// This is an intentionally unsafe and undocumented function used primarly to 63 | /// to allocate a new tstr before initialization. 64 | tstr *tstr_alloc(size_t nbytes) { 65 | size_t memsize = sizeof(struct tstr_internal)+nbytes+1; 66 | if (memsize > UINT32_MAX) { 67 | return 0; 68 | } 69 | struct tstr_internal *istr = (_tstr_malloc?_tstr_malloc:malloc)(memsize); 70 | if (!istr) { 71 | return 0; 72 | } 73 | rc_init(&istr->rc); 74 | rc_retain(&istr->rc); 75 | istr->len = nbytes; 76 | return (tstr*)(&istr->data[0]); 77 | } 78 | 79 | /// DEPRECATED 80 | /// Use tstr_alloc(size) & memset(str, 0, size) instead. 81 | tstr *tstr_from_zeros(size_t nbytes) { 82 | tstr *str = tstr_alloc(nbytes); 83 | if (!str) { 84 | return 0; 85 | } 86 | memset((char*)str, 0, nbytes+1); 87 | return str; 88 | } 89 | 90 | tstr *tstr_from_format(const char *format, ...) { 91 | va_list args; 92 | va_start(args, format); 93 | int nbytes = vsnprintf(0, 0, format, args); 94 | va_end(args); 95 | assert(nbytes >= 0); 96 | tstr *str = tstr_alloc(nbytes); 97 | if (!str) { 98 | return 0; 99 | } 100 | va_start(args, format); 101 | nbytes = vsnprintf((char*)str, nbytes+1, format, args); 102 | va_end(args); 103 | assert(nbytes >= 0); 104 | return str; 105 | } 106 | 107 | tstr *tstr_from_bytes(const void *bytes, size_t nbytes) { 108 | tstr *str = tstr_alloc(nbytes); 109 | if (!str) { 110 | return 0; 111 | } 112 | memcpy((char*)str, bytes, nbytes); 113 | ((char*)str)[nbytes] = '\0'; 114 | return str; 115 | } 116 | 117 | tstr *tstr_from_cstr(const char *cstr) { 118 | if (!cstr) { 119 | return 0; 120 | } 121 | return tstr_from_bytes(cstr, strlen(cstr)); 122 | } 123 | 124 | tstr *tstr_clone(tstr *str) { 125 | if (!str) { 126 | return 0; 127 | } 128 | struct tstr_internal *istr = tstr_toistr(str); 129 | rc_retain(&istr->rc); 130 | return (tstr*)str; 131 | } 132 | 133 | void tstr_free(tstr *str) { 134 | if (!str) { 135 | return; 136 | } 137 | struct tstr_internal *istr = tstr_toistr(str); 138 | if (!rc_release(&istr->rc)) { 139 | return; 140 | } 141 | (_tstr_free?_tstr_free:free)(istr); 142 | } 143 | 144 | size_t tstr_len(tstr *str) { 145 | if (!str) { 146 | return 0; 147 | } 148 | struct tstr_internal *istr = tstr_toistr(str); 149 | return istr->len; 150 | } 151 | 152 | const char *tstr_cstr(tstr *str) { 153 | if (!str) { 154 | return 0; 155 | } 156 | struct tstr_internal *istr = tstr_toistr(str); 157 | return istr->data; 158 | } 159 | 160 | const void *tstr_bytes(tstr *str) { 161 | return tstr_cstr(str); 162 | } 163 | 164 | bool tstr_equal(tstr *a, tstr *b) { 165 | size_t alen = tstr_len(a); 166 | size_t blen = tstr_len(b); 167 | if (alen != blen) { 168 | return false; 169 | } 170 | if (alen == 0) { 171 | return true; 172 | } 173 | return memcmp(a, b, alen) == 0; 174 | } 175 | 176 | // memcmpz compares A to B. 177 | // Works much like strcmp but for two memory segments that are not 178 | // null-terminated, and of varying sizes. 179 | static int memcmpz(const void *a, size_t asize, const void *b, size_t bsize, 180 | bool insenstive) 181 | { 182 | size_t size = asize < bsize ? asize : bsize; 183 | int cmp; 184 | if (insenstive) { 185 | cmp = 0; 186 | for (size_t i = 0; i < size && cmp == 0; i++) { 187 | int ca = tolower(((unsigned char*)a)[i]); 188 | int cb = tolower(((unsigned char*)b)[i]); 189 | cmp = ca < cb ? -1 : ca > cb; 190 | } 191 | } else { 192 | cmp = memcmp(a, b, size); 193 | } 194 | return cmp == 0 ? asize < bsize ? -1 : asize > bsize : cmp; 195 | } 196 | 197 | int tstr_cmp(tstr *a, tstr *b) { 198 | return memcmpz(tstr_bytes(a), tstr_len(a), tstr_bytes(b), tstr_len(b), 0); 199 | } 200 | 201 | int tstr_casecmp(tstr *a, tstr *b) { 202 | return memcmpz(tstr_bytes(a), tstr_len(a), tstr_bytes(b), tstr_len(b), 1); 203 | } 204 | 205 | int tstr_cmp_cstr(tstr *str, const char *cstr) { 206 | return memcmpz(tstr_bytes(str), tstr_len(str), cstr, strlen(cstr), 0); 207 | } 208 | 209 | int tstr_casecmp_cstr(tstr *str, const char *cstr) { 210 | return memcmpz(tstr_bytes(str), tstr_len(str), cstr, strlen(cstr), 1); 211 | } 212 | 213 | static int tstr_ncmp0(tstr *a, tstr *b, size_t n, bool ci) { 214 | size_t na = tstr_len(a); 215 | size_t nb = tstr_len(b); 216 | na = na < n ? na : n; 217 | nb = nb < n ? nb : n; 218 | return memcmpz(tstr_bytes(a), na, tstr_bytes(b), nb, ci); 219 | } 220 | 221 | int tstr_ncmp(tstr *a, tstr *b, size_t n) { 222 | return tstr_ncmp0(a, b, n, 0); 223 | } 224 | 225 | int tstr_ncasecmp(tstr *a, tstr *b, size_t n) { 226 | return tstr_ncmp0(a, b, n, 1); 227 | } 228 | 229 | // DEPRECATED 230 | int tstr_compare(tstr *a, tstr *b) { 231 | return tstr_cmp(a, b); 232 | } 233 | 234 | --------------------------------------------------------------------------------