├── Debug ├── makefile ├── objects.mk ├── sources.mk └── src │ └── subdir.mk ├── README ├── Release ├── makefile ├── objects.mk ├── sources.mk └── src │ └── subdir.mk └── src ├── main.c ├── tnyDB_list.c ├── tnyDB_list.h ├── tnyDB_mem.c ├── tnyDB_mem.h ├── tnyDB_tarray.c ├── tnyDB_tarray.h ├── tnyDB_tmap.c ├── tnyDB_tmap.h ├── tnyDB_tword.c ├── tnyDB_tword.h ├── tnyDB_wtree.c └── tnyDB_wtree.h /Debug/makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | -include ../makefile.init 6 | 7 | RM := rm -rf 8 | 9 | # All of the sources participating in the build are defined here 10 | -include sources.mk 11 | -include subdir.mk 12 | -include src/subdir.mk 13 | -include objects.mk 14 | 15 | ifneq ($(MAKECMDGOALS),clean) 16 | ifneq ($(strip $(C_DEPS)),) 17 | -include $(C_DEPS) 18 | endif 19 | endif 20 | 21 | -include ../makefile.defs 22 | 23 | # Add inputs and outputs from these tool invocations to the build variables 24 | 25 | # All Target 26 | all: tny 27 | 28 | # Tool invocations 29 | tny: $(OBJS) $(USER_OBJS) 30 | @echo 'Building target: $@' 31 | @echo 'Invoking: GCC C Linker' 32 | gcc -o "tny" $(OBJS) $(USER_OBJS) $(LIBS) 33 | @echo 'Finished building target: $@' 34 | @echo ' ' 35 | 36 | # Other Targets 37 | clean: 38 | -$(RM) $(OBJS)$(C_DEPS)$(EXECUTABLES) tny 39 | -@echo ' ' 40 | 41 | .PHONY: all clean dependents 42 | .SECONDARY: 43 | 44 | -include ../makefile.targets 45 | -------------------------------------------------------------------------------- /Debug/objects.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | USER_OBJS := 6 | 7 | LIBS := 8 | 9 | -------------------------------------------------------------------------------- /Debug/sources.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | O_SRCS := 6 | C_SRCS := 7 | S_UPPER_SRCS := 8 | OBJ_SRCS := 9 | ASM_SRCS := 10 | OBJS := 11 | C_DEPS := 12 | EXECUTABLES := 13 | 14 | # Every subdirectory with source files must be described here 15 | SUBDIRS := \ 16 | src \ 17 | 18 | -------------------------------------------------------------------------------- /Debug/src/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | C_SRCS += \ 7 | ../src/main.c \ 8 | ../src/tnyDB_list.c \ 9 | ../src/tnyDB_mem.c \ 10 | ../src/tnyDB_tarray.c \ 11 | ../src/tnyDB_tmap.c \ 12 | ../src/tnyDB_tword.c \ 13 | ../src/tnyDB_wtree.c 14 | 15 | OBJS += \ 16 | ./src/main.o \ 17 | ./src/tnyDB_list.o \ 18 | ./src/tnyDB_mem.o \ 19 | ./src/tnyDB_tarray.o \ 20 | ./src/tnyDB_tmap.o \ 21 | ./src/tnyDB_tword.o \ 22 | ./src/tnyDB_wtree.o 23 | 24 | C_DEPS += \ 25 | ./src/main.d \ 26 | ./src/tnyDB_list.d \ 27 | ./src/tnyDB_mem.d \ 28 | ./src/tnyDB_tarray.d \ 29 | ./src/tnyDB_tmap.d \ 30 | ./src/tnyDB_tword.d \ 31 | ./src/tnyDB_wtree.d 32 | 33 | 34 | # Each subdirectory must supply rules for building sources it contributes 35 | src/%.o: ../src/%.c 36 | @echo 'Building file: $<' 37 | @echo 'Invoking: GCC C Compiler' 38 | gcc -O0 -g3 -Wall -c -fmessage-length=0 -std=c99 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" 39 | @echo 'Finished building: $<' 40 | @echo ' ' 41 | 42 | 43 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ======================== 2 | Introduction 3 | ======================== 4 | 5 | Tny is a project that seeks to find and develop high performance data 6 | strutures that have very low memory foot prints. The hope is that these 7 | structures may form the basis of a high performance in-memory column oriented 8 | database for analyzing genomic information. 9 | 10 | In developing software for large data sets (billions of records, terabytes in size) 11 | the way you store your data in memory is critical – and you want your data in memory 12 | if you want to be able to analyse it quickly (e.g. minutes not days). 13 | 14 | Any data structure that relies on pointers for each data element quickly becomes 15 | unworkable due to the overhead of pointers. On a 64 bit system, with one pointer 16 | for each data element across a billion records you have just blown near 8GB of 17 | memory just in pointers. 18 | 19 | Thus there is a need for compact data structures that still have fast access characteristics. 20 | 21 | ======================== 22 | The Challenge 23 | ======================== 24 | 25 | The challenge is to come up with the fastest data structure that meets the following requirements: 26 | • Use less memory than an array in all circumstances 27 | • Fast Seek is more important than Fast Access 28 | • Seek and Access must be better than O(N). 29 | 30 | Where Seek and Access are defined as: 31 | 32 | Access (int index): Return me the value at the specified index ( like array[idx] ). 33 | 34 | Seek (int value): Return me all the Indexes that match value. 35 | 36 | 37 | (The actual return type of Seek is a little different, but logically the same. What we need to return is a bitmap where a bit set to 1 at position X means that value was found at index X. This allows us to combine results using logical ANDs rather than intersections as detailed here) 38 | 39 | 40 | For more information pleace check my blog at: 41 | 42 | http://siganakis.com 43 | 44 | This project is released under the GPL. 45 | 46 | Contact me at terence@siganakis.com. 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /Release/makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | -include ../makefile.init 6 | 7 | RM := rm -rf 8 | 9 | # All of the sources participating in the build are defined here 10 | -include sources.mk 11 | -include subdir.mk 12 | -include src/subdir.mk 13 | -include objects.mk 14 | 15 | ifneq ($(MAKECMDGOALS),clean) 16 | ifneq ($(strip $(C_DEPS)),) 17 | -include $(C_DEPS) 18 | endif 19 | endif 20 | 21 | -include ../makefile.defs 22 | 23 | # Add inputs and outputs from these tool invocations to the build variables 24 | 25 | # All Target 26 | all: tny 27 | 28 | # Tool invocations 29 | tny: $(OBJS) $(USER_OBJS) 30 | @echo 'Building target: $@' 31 | @echo 'Invoking: GCC C Linker' 32 | gcc -o "tny" $(OBJS) $(USER_OBJS) $(LIBS) 33 | @echo 'Finished building target: $@' 34 | @echo ' ' 35 | 36 | # Other Targets 37 | clean: 38 | -$(RM) $(OBJS)$(C_DEPS)$(EXECUTABLES) tny 39 | -@echo ' ' 40 | 41 | .PHONY: all clean dependents 42 | .SECONDARY: 43 | 44 | -include ../makefile.targets 45 | -------------------------------------------------------------------------------- /Release/objects.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | USER_OBJS := 6 | 7 | LIBS := 8 | 9 | -------------------------------------------------------------------------------- /Release/sources.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | O_SRCS := 6 | C_SRCS := 7 | S_UPPER_SRCS := 8 | OBJ_SRCS := 9 | ASM_SRCS := 10 | OBJS := 11 | C_DEPS := 12 | EXECUTABLES := 13 | 14 | # Every subdirectory with source files must be described here 15 | SUBDIRS := \ 16 | src \ 17 | 18 | -------------------------------------------------------------------------------- /Release/src/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | C_SRCS += \ 7 | ../src/main.c \ 8 | ../src/tnyDB_list.c \ 9 | ../src/tnyDB_mem.c \ 10 | ../src/tnyDB_tarray.c \ 11 | ../src/tnyDB_tmap.c \ 12 | ../src/tnyDB_tword.c \ 13 | ../src/tnyDB_wtree.c 14 | 15 | OBJS += \ 16 | ./src/main.o \ 17 | ./src/tnyDB_list.o \ 18 | ./src/tnyDB_mem.o \ 19 | ./src/tnyDB_tarray.o \ 20 | ./src/tnyDB_tmap.o \ 21 | ./src/tnyDB_tword.o \ 22 | ./src/tnyDB_wtree.o 23 | 24 | C_DEPS += \ 25 | ./src/main.d \ 26 | ./src/tnyDB_list.d \ 27 | ./src/tnyDB_mem.d \ 28 | ./src/tnyDB_tarray.d \ 29 | ./src/tnyDB_tmap.d \ 30 | ./src/tnyDB_tword.d \ 31 | ./src/tnyDB_wtree.d 32 | 33 | 34 | # Each subdirectory must supply rules for building sources it contributes 35 | src/%.o: ../src/%.c 36 | @echo 'Building file: $<' 37 | @echo 'Invoking: GCC C Compiler' 38 | gcc -O3 -march=native -Wall -c -fmessage-length=0 -std=c99 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" 39 | @echo 'Finished building: $<' 40 | @echo ' ' 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "tnyDB_tmap.h" 27 | #include "tnyDB_wtree.h" 28 | #include "tnyDB_tarray.h" 29 | #include "tnyDB_tword.h" 30 | 31 | void tnyDB_tmap_access_test(int * values, int length) { 32 | tnyDB_mem_init(); 33 | 34 | double elapsed; // in milliseconds 35 | clock_t start, end; 36 | start = clock(); 37 | 38 | printf("Building TMAP... "); 39 | tnyDB_tmap *tree = tnyDB_tmap_create(values, length); 40 | 41 | end = clock(); 42 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 43 | printf("Done (took %fms)\n", elapsed); 44 | 45 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 46 | 47 | // printf("\n====== TMAP Mem Stats ======\n"); 48 | printf("Bytes current: %i\n", memStats.bytes_current); 49 | // printf("Bytes total: %i\n", memStats.bytes_total); 50 | // printf("Allocations: %i\n", memStats.allocations); 51 | // printf("Frees: %i\n", memStats.frees); 52 | // printf("====== TMAP Mem Stats ======\n\n"); 53 | 54 | printf("Testing TMAP access performance..."); 55 | start = clock(); 56 | 57 | for (int i = 0; i < length; i++) { // 58 | if (values[i] != tnyDB_tmap_access(tree, i)) { 59 | end = clock(); 60 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 61 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_tmap_access(tree, i), 62 | values[i], elapsed); 63 | 64 | exit(-1); 65 | } 66 | } 67 | 68 | end = clock(); 69 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 70 | printf("TMAP Passed test in %fms\n\n", elapsed); 71 | } 72 | 73 | 74 | void tnyDB_wtree_access_test(int * values, int length) { 75 | tnyDB_mem_init(); 76 | 77 | double elapsed; // in milliseconds 78 | clock_t start, end; 79 | start = clock(); 80 | 81 | printf("Building WTREE... "); 82 | tnyDB_wtree *tree = tnyDB_wtree_create(values, length); 83 | 84 | end = clock(); 85 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 86 | printf("Done (took %fms)\n", elapsed); 87 | 88 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 89 | 90 | // printf("\n====== WTREE Mem Stats ======\n"); 91 | printf("Bytes current: %i\n", memStats.bytes_current); 92 | // printf("Bytes total: %i\n", memStats.bytes_total); 93 | // printf("Allocations: %i\n", memStats.allocations); 94 | // printf("Frees: %i\n", memStats.frees); 95 | // printf("====== WTREE Mem Stats ======\n\n"); 96 | 97 | printf("Testing wTree access performance..."); 98 | start = clock(); 99 | 100 | for (int i = 0; i < length; i++) { // 101 | if (values[i] != tnyDB_wtree_access(tree, i)) { 102 | end = clock(); 103 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 104 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_wtree_access(tree, i), 105 | values[i], elapsed); 106 | 107 | exit(-1); 108 | } 109 | } 110 | 111 | end = clock(); 112 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 113 | printf("wtree Passed test in %fms\n\n", elapsed); 114 | } 115 | 116 | void tnyDB_tarray_access_test(int * values, int length) { 117 | tnyDB_mem_init(); 118 | 119 | double elapsed; // in milliseconds 120 | clock_t start, end; 121 | start = clock(); 122 | 123 | printf("Building ARRAY... "); 124 | tnyDB_tarray *tree = tnyDB_tarray_create(values, length); 125 | 126 | end = clock(); 127 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 128 | printf("Done (took %fms)\n", elapsed); 129 | 130 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 131 | 132 | // printf("\n====== array Mem Stats ======\n"); 133 | printf("Bytes current: %i\n", memStats.bytes_current); 134 | // printf("Bytes total: %i\n", memStats.bytes_total); 135 | // printf("Allocations: %i\n", memStats.allocations); 136 | // printf("Frees: %i\n", memStats.frees); 137 | // printf("====== array Mem Stats ======\n\n"); 138 | 139 | printf("Testing ARRAY access performance..."); 140 | start = clock(); 141 | 142 | for (int i = 0; i < length; i++) { // 143 | if (values[i] != tnyDB_tarray_access(tree, i)) { 144 | end = clock(); 145 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 146 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_tarray_access(tree, i), 147 | values[i], elapsed); 148 | 149 | exit(-1); 150 | } 151 | } 152 | 153 | end = clock(); 154 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 155 | printf("ARRAY Passed test in %fms\n\n", elapsed); 156 | } 157 | 158 | int tnyDB_access_test(int keys, int length) { 159 | srand(time(NULL)); 160 | 161 | int *values = malloc(length * sizeof(int)); 162 | 163 | printf("Generating data..."); 164 | for (int i = 0; i < length; i++) { 165 | values[i] = rand() % keys; 166 | } 167 | printf("Done\nh"); 168 | 169 | tnyDB_tarray_access_test(values, length); 170 | tnyDB_tmap_access_test(values, length); 171 | tnyDB_wtree_access_test(values, length); 172 | 173 | free(values); 174 | 175 | return 1; 176 | } 177 | 178 | void tnyDB_tarray_seek_test(int * values, int length, int seeks) { 179 | tnyDB_mem_init(); 180 | 181 | double elapsed; // in milliseconds 182 | clock_t start, end; 183 | start = clock(); 184 | 185 | printf("Building ARRAY... "); 186 | tnyDB_tarray *tree = tnyDB_tarray_create(values, length); 187 | 188 | end = clock(); 189 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 190 | printf("Done (took %fms)\n", elapsed); 191 | 192 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 193 | 194 | // printf("\n====== ARRAY Mem Stats ======\n"); 195 | printf("Bytes current: %i\n", memStats.bytes_current); 196 | // printf("Bytes total: %i\n", memStats.bytes_total); 197 | // printf("Allocations: %i\n", memStats.allocations); 198 | // printf("Frees: %i\n", memStats.frees); 199 | 200 | printf("Testing ARRAY SEEK performance... "); 201 | start = clock(); 202 | 203 | for (int i = 0; i < seeks; i++) { // 204 | int seeking = values[i]; 205 | TWORD * results = tnyDB_tarray_seek(tree, seeking); 206 | 207 | // Now in the results, a bit at "i" should be set 208 | if (tnyDB_tword_bit_is_set(results, i) == 0) { 209 | end = clock(); 210 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 211 | fprintf(stderr, "\n ARRAY SEEK: missmatch at: %i, BIT NOT SET\n", i); 212 | exit(-1); 213 | } 214 | 215 | } 216 | 217 | end = clock(); 218 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 219 | printf("Passed test in %fms\n\n", elapsed); 220 | } 221 | 222 | void tnyDB_tmap_seek_test(int * values, int length, int seeks) { 223 | tnyDB_mem_init(); 224 | 225 | double elapsed; // in milliseconds 226 | clock_t start, end; 227 | start = clock(); 228 | 229 | printf("Building TMAP... "); 230 | tnyDB_tmap *tree = tnyDB_tmap_create(values, length); 231 | 232 | end = clock(); 233 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 234 | printf("Done (took %fms)\n", elapsed); 235 | 236 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 237 | 238 | // printf("\n====== TMAP Mem Stats ======\n"); 239 | printf("Bytes current: %i\n", memStats.bytes_current); 240 | // printf("Bytes total: %i\n", memStats.bytes_total); 241 | // printf("Allocations: %i\n", memStats.allocations); 242 | // printf("Frees: %i\n", memStats.frees); 243 | // printf("====== TMAP Mem Stats ======\n\n"); 244 | 245 | printf("Testing TMAP SEEK performance... "); 246 | start = clock(); 247 | 248 | for (int i = 0; i < seeks; i++) { // 249 | int seeking = values[i]; 250 | TWORD * results = tnyDB_tmap_seek(tree, seeking); 251 | 252 | // Now in the results, a bit at "i" should be set 253 | if (tnyDB_tword_bit_is_set(results, i) == 0) { 254 | end = clock(); 255 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 256 | fprintf(stderr, "\n TMAP Seek: missmatch at: %i, BIT NOT SET\n", i); 257 | exit(-1); 258 | } 259 | 260 | } 261 | 262 | end = clock(); 263 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 264 | printf(" Passed test in %fms\n\n", elapsed); 265 | } 266 | 267 | 268 | void tnyDB_wtree_seek_test(int * values, int length, int seeks) { 269 | tnyDB_mem_init(); 270 | 271 | double elapsed; // in milliseconds 272 | clock_t start, end; 273 | start = clock(); 274 | 275 | printf("Building WTREE... "); 276 | tnyDB_wtree *tree = tnyDB_wtree_create(values, length); 277 | 278 | end = clock(); 279 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 280 | printf("Done (took %fms)\n", elapsed); 281 | 282 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics(); 283 | 284 | // printf("\n====== TMAP Mem Stats ======\n"); 285 | printf("Bytes current: %i\n", memStats.bytes_current); 286 | // printf("Bytes total: %i\n", memStats.bytes_total); 287 | // printf("Allocations: %i\n", memStats.allocations); 288 | // printf("Frees: %i\n", memStats.frees); 289 | // printf("====== TMAP Mem Stats ======\n\n"); 290 | 291 | printf("Testing WTREE SEEK performance... "); 292 | start = clock(); 293 | 294 | for (int i = 0; i < seeks; i++) { // 295 | int seeking = values[i]; 296 | TWORD * results = tnyDB_wtree_seek(tree, seeking); 297 | 298 | // Now in the results, a bit at "i" should be set 299 | if (tnyDB_tword_bit_is_set(results, i) == 0) { 300 | end = clock(); 301 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 302 | fprintf(stderr, "\n WTREE Seek: missmatch at: %i, BIT NOT SET\n", i); 303 | exit(-1); 304 | } 305 | 306 | } 307 | 308 | end = clock(); 309 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC; 310 | printf(" Passed test in %fms\n\n", elapsed); 311 | } 312 | 313 | 314 | int tnyDB_seek_test(int keys, int length, int seeks) { 315 | srand(time(NULL)); 316 | 317 | int *values = malloc(length * sizeof(int)); 318 | 319 | printf("Generating data..."); 320 | for (int i = 0; i < length; i++) { 321 | values[i] = rand() % keys; 322 | } 323 | printf("Done\n\n"); 324 | 325 | tnyDB_tarray_seek_test(values, length, seeks); 326 | tnyDB_tmap_seek_test(values, length, seeks); 327 | tnyDB_wtree_seek_test(values, length, seeks); 328 | 329 | free(values); 330 | 331 | return 1; 332 | } 333 | 334 | int main(int argc, char *argv[]) { 335 | 336 | if (argc < 4 || argc > 4){ 337 | printf("Please supply 3 arguments:\n"); 338 | printf("\t1.\t The number of values to generate\n"); 339 | printf("\t2.\t The number of distinct values\n"); 340 | printf("\t3.\t The number of times the structures should be seeked\n"); 341 | printf("You supplied %i arguments\n", argc); 342 | return EXIT_SUCCESS; 343 | } 344 | int values= atoi(argv[1]); 345 | int keys = atoi(argv[2]); 346 | int seeks = atoi(argv[3]); 347 | 348 | tnyDB_access_test(keys, values); 349 | tnyDB_seek_test(keys, values, seeks); 350 | 351 | // int values[10] = {1, 4, 7, 4, 5, 6, 7, 9, 1, 2}; 352 | // tnyDB_tmap_seek_test(values, 10); 353 | 354 | return EXIT_SUCCESS; 355 | } 356 | -------------------------------------------------------------------------------- /src/tnyDB_list.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include "tnyDB_list.h" 25 | 26 | int tnyDB_list_check_resize(tnyDB_list *list, int newLength) { 27 | 28 | if (newLength >= list->allocated_length) { 29 | int oldLength = list->allocated_length; 30 | int increaseBy = newLength * 0.2; 31 | if (increaseBy < 3) { 32 | increaseBy = 3; 33 | } 34 | int oldAllocated = list->allocated_length; 35 | list->allocated_length = newLength + increaseBy; 36 | 37 | if (oldLength != list->allocated_length) { 38 | // We need to create a new array I reckon... 39 | int * new = tnyDB_malloc_data(list->allocated_length * sizeof(int), "tnyDB_list_check_resize.new"); 40 | for (int i = 0; i < oldAllocated; i++) { 41 | new[i] = list->values[i]; 42 | } 43 | 44 | if (list->values != NULL) { 45 | tnyDB_free_data(list->values, oldAllocated * sizeof(int), "tnyDB_list_check_resize"); 46 | } 47 | list->values = new; 48 | 49 | // printf("tnyDB_list resized to: %i\n", list->allocated_length); 50 | } 51 | } 52 | 53 | return 0; 54 | } 55 | 56 | void tnyDB_list_push(tnyDB_list *list, int value) { 57 | tnyDB_list_check_resize(list, list->length + 1); 58 | 59 | if (list->length >= list->allocated_length) { 60 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n", 61 | list->length, list->allocated_length); 62 | exit(-1); 63 | } 64 | 65 | list->values[list->length] = value; 66 | list->length++; 67 | } 68 | 69 | void tnyDB_list_insert(tnyDB_list *list, int index, int value) { 70 | 71 | int biggest = list->length + 1 > index + 1 ? list->length + 1 : index + 1; 72 | 73 | tnyDB_list_check_resize(list, biggest); 74 | 75 | if (index >= list->allocated_length) { 76 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n", 77 | list->length, list->allocated_length); 78 | exit(-1); 79 | } 80 | 81 | for (int i = list->length - 1; i >= index; i--) { 82 | list->values[i + 1] = list->values[i]; 83 | } 84 | 85 | list->values[index] = value; 86 | 87 | list->length++; 88 | 89 | } 90 | 91 | void tnyDB_list_set(tnyDB_list *list, int index, int value) { 92 | 93 | int biggest = list->length + 1 > index + 1 ? list->length + 1 : index + 1; 94 | 95 | tnyDB_list_check_resize(list, biggest); 96 | 97 | if (index >= list->allocated_length) { 98 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n", 99 | list->length, list->allocated_length); 100 | exit(-1); 101 | } 102 | 103 | list->values[index] = value; 104 | 105 | if (index < list->length) { 106 | list->length++; 107 | } else { 108 | list->length = index + 1; 109 | } 110 | 111 | } 112 | 113 | int tnyDB_list_get(tnyDB_list *list, int index) { 114 | if (index > list->length) { 115 | fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); 116 | return (-1); 117 | } 118 | 119 | return list->values[index]; 120 | } 121 | 122 | int tnyDB_list_binary_find(tnyDB_list *list, int seeking) { 123 | int l = 0; 124 | int r = list->length; 125 | int m = 0; 126 | 127 | if (r == 0) { 128 | return ~0; 129 | } 130 | 131 | while (seeking != list->values[m] && l <= r) { 132 | m = (l + r) / 2; 133 | 134 | if (m >= list->length) 135 | break; 136 | 137 | if (seeking < list->values[m]) 138 | r = m - 1; 139 | if (seeking > list->values[m]) 140 | l = m + 1; 141 | } 142 | 143 | if (l <= r && m < list->length) { 144 | //printf("tnyDB_list_binary_find: Found (Seeking: %i, l: %i, m: %i,r: %i, Length: %i)\n", seeking, l, m, r, list->length); 145 | return m; 146 | } else { 147 | //printf("tnyDB_list_binary_find: Not found (Seeking: %i, l: %i, m: %i,r: %i, Length: %i)\n", seeking, l, m, r, list->length); 148 | return ~l; 149 | } 150 | 151 | } 152 | 153 | int tnyDB_list_find(tnyDB_list *list, int seeking) { 154 | return tnyDB_list_binary_find(list, seeking); 155 | } 156 | tnyDB_list *tnyDB_list_create_allocated(int size) { 157 | 158 | tnyDB_list *list = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_create_allocated.list"); 159 | list->values = tnyDB_malloc_data(sizeof(int), "tnyDB_list_create_allocated.list->values"); 160 | list->allocated_length = size; 161 | list->length = 0; 162 | 163 | return list; 164 | } 165 | tnyDB_list *tnyDB_list_create() { 166 | tnyDB_list *list = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_create"); 167 | list->allocated_length = 0; 168 | list->length = 0; 169 | list->values = NULL; 170 | return list; 171 | } 172 | 173 | /* 174 | void tnyDB_quick_sort(int *arr, int elements) { 175 | 176 | #define MAX_LEVELS 300 177 | 178 | int piv, beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R, swap ; 179 | 180 | beg[0]=0; end[0]=elements; 181 | while (i>=0) { 182 | L=beg[i]; R=end[i]-1; 183 | if (L=piv && Lend[i-1]-beg[i-1]) { 190 | swap=beg[i]; beg[i]=beg[i-1]; beg[i-1]=swap; 191 | swap=end[i]; end[i]=end[i-1]; end[i-1]=swap; 192 | } 193 | } 194 | else { 195 | i--; 196 | } 197 | } 198 | } 199 | */ 200 | 201 | #define MIN_MERGESORT_LIST_SIZE 32 202 | void mergesort_array(int a[], int size, int temp[]) { 203 | int i1, i2, tempi; 204 | if (size < MIN_MERGESORT_LIST_SIZE) { 205 | /* Use insertion sort */ 206 | int i; 207 | for (i=0; i < size; i++) { 208 | int j, v = a[i]; 209 | for (j = i - 1; j >= 0; j--) { 210 | if (a[j] <= v) break; 211 | a[j + 1] = a[j]; 212 | } 213 | a[j + 1] = v; 214 | } 215 | return; 216 | } 217 | 218 | mergesort_array(a, size/2, temp); 219 | mergesort_array(a + size/2, size - size/2, temp); 220 | i1 = 0; 221 | i2 = size/2; 222 | tempi = 0; 223 | while (i1 < size/2 && i2 < size) { 224 | if (a[i1] <= a[i2]) { 225 | temp[tempi] = a[i1]; 226 | i1++; 227 | } else { 228 | temp[tempi] = a[i2]; 229 | i2++; 230 | } 231 | tempi++; 232 | } 233 | 234 | while (i1 < size/2) { 235 | temp[tempi] = a[i1]; 236 | i1++; 237 | tempi++; 238 | } 239 | while (i2 < size) { 240 | temp[tempi] = a[i2]; 241 | i2++; 242 | tempi++; 243 | } 244 | 245 | memcpy(a, temp, size*sizeof(int)); 246 | } 247 | 248 | void tnyDB_swap(int *a, int *b) 249 | { 250 | int t=*a; *a=*b; *b=t; 251 | } 252 | void tnyDB_quick_sort(int arr[], int beg, int end) 253 | { 254 | if (end > beg + 1) 255 | { 256 | int piv = arr[beg], l = beg + 1, r = end; 257 | while (l < r) 258 | { 259 | if (arr[l] <= piv) 260 | l++; 261 | else 262 | tnyDB_swap(&arr[l], &arr[--r]); 263 | } 264 | tnyDB_swap(&arr[--l], &arr[beg]); 265 | tnyDB_quick_sort(arr, beg, l); 266 | tnyDB_quick_sort(arr, r, end); 267 | } 268 | } 269 | 270 | 271 | tnyDB_list *tnyDB_list_sorted_distinct(int *list, int length){ 272 | int *sorted = tnyDB_malloc_data(sizeof(int) * length, "tnyDB_list_sorted_distinct.sorted"); 273 | int *tmpSorted = tnyDB_malloc_data(sizeof(int) * length, "tnyDB_list_sorted_distinct.tmpSorted"); 274 | memcpy(sorted, list, length* sizeof(int)); 275 | 276 | 277 | // Sort it 278 | // tnyDB_quick_sort(sorted, 0, length); 279 | mergesort_array(sorted, length, tmpSorted); 280 | tnyDB_free_data(tmpSorted, sizeof(int) * length, "tnyDB_list_sorted_distinct.tmpSorted"); 281 | 282 | // Extract only the unique values 283 | int ci=1, last=sorted[0]; 284 | 285 | 286 | for (int i =1; i < length; i++){ 287 | if (sorted[i] != last){ 288 | last = sorted[i]; 289 | sorted[ci] = sorted[i]; 290 | ci++; 291 | } 292 | } 293 | 294 | // Resize the array 295 | sorted = tnyDB_realloc_data(sorted, sizeof(int) * (ci), sizeof(int) * length, "tnyDB_list_sorted_distinct.shrink"); 296 | 297 | 298 | tnyDB_list *result = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_sorted_distinct.list"); 299 | result->values = sorted; 300 | result->allocated_length = ci+1; 301 | result->length = ci; 302 | 303 | return result; 304 | } 305 | 306 | void tnyDB_list_free(tnyDB_list * toFree) { 307 | tnyDB_free_data(toFree->values, toFree->allocated_length * sizeof(int), "tnyDB_list_free (values)"); 308 | tnyDB_free_data(toFree, sizeof(tnyDB_list), "tnyDB_list_free"); 309 | } 310 | 311 | -------------------------------------------------------------------------------- /src/tnyDB_list.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #ifndef TNYDB_LIST_H_ 22 | #define TNYDB_LIST_H_ 23 | 24 | #include "tnyDB_mem.h" 25 | 26 | typedef struct { 27 | int length; 28 | int allocated_length; 29 | int *values; 30 | } tnyDB_list; 31 | 32 | 33 | // Appends a value to the end of the list 34 | void tnyDB_list_push(tnyDB_list *list, int value); 35 | 36 | // Inserts a value into the list at the specified index, 37 | // moving items that occur after the index to their index+i 38 | void tnyDB_list_insert(tnyDB_list *list, int index, int value); 39 | 40 | // Sets the value at index specified to the value specified 41 | void tnyDB_list_set(tnyDB_list *list, int index, int value); 42 | 43 | // Gets the value at the specified index 44 | int tnyDB_list_get(tnyDB_list *list, int index); 45 | 46 | // Located the first occurence of the specified value 47 | // by doing a binary search. If no item is found, the not 48 | // value (~) of where it would be found is returned. 49 | // This functions reauires that the list is already sorted 50 | int tnyDB_list_find(tnyDB_list *list, int value); 51 | 52 | 53 | // Creates a list and returns its reference 54 | tnyDB_list *tnyDB_list_create(); 55 | 56 | 57 | // Creates a list that is pre-allocated to the size specified 58 | tnyDB_list *tnyDB_list_create_allocated(int size); 59 | 60 | 61 | // Creates a list of the disctinct values from *list, sorted ascending 62 | tnyDB_list *tnyDB_list_sorted_distinct(int *list, int length); 63 | 64 | 65 | // Frees memory used by this list 66 | void tnyDB_list_free(tnyDB_list * toFree) ; 67 | 68 | 69 | #endif /* TNYDB_LIST_H_ */ 70 | -------------------------------------------------------------------------------- /src/tnyDB_mem.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include "tnyDB_mem.h" 22 | 23 | tnyDB_mem_statistics tnyDB_mem_stats; 24 | 25 | void tnyDB_mem_init() { 26 | 27 | tnyDB_mem_stats.allocations = 0; 28 | tnyDB_mem_stats.frees = 0; 29 | tnyDB_mem_stats.bytes_total = 0; 30 | tnyDB_mem_stats.bytes_current = 0; 31 | } 32 | 33 | void *tnyDB_malloc(int amount, char* tag) { 34 | return malloc(amount); 35 | } 36 | 37 | void tnyDB_free(void *ptr, char* tag) { 38 | free(ptr); 39 | 40 | } 41 | 42 | tnyDB_mem_statistics tnyDB_mem_get_statistics() { 43 | return tnyDB_mem_stats; 44 | } 45 | 46 | void tnyDB_free_data(void *ptr, int bytes, char* tag) { 47 | 48 | tnyDB_mem_stats.frees++; 49 | tnyDB_mem_stats.bytes_current -= bytes; 50 | 51 | free(ptr); 52 | } 53 | 54 | void *tnyDB_realloc_data(void *ptr, int new_bytes, int old_bytes, char* tag) { 55 | int difference = new_bytes - old_bytes; 56 | tnyDB_mem_stats.frees++; 57 | tnyDB_mem_stats.allocations++; 58 | tnyDB_mem_stats.bytes_current += difference; 59 | 60 | void* newptr = realloc(ptr, new_bytes); 61 | if (newptr != NULL) { 62 | return newptr; 63 | } else { 64 | fprintf(stderr, "REALLOC Failed! {new_bytes: %i, old_bytes: %i}", new_bytes, old_bytes); 65 | exit(-1); 66 | } 67 | } 68 | void *tnyDB_calloc_data(int item_size, int item_count, char* tag) { 69 | tnyDB_mem_stats.allocations++; 70 | tnyDB_mem_stats.bytes_current += (item_size * item_count); 71 | tnyDB_mem_stats.bytes_total += (item_size * item_count); 72 | 73 | 74 | 75 | void *ptr = calloc(item_count, item_size); 76 | 77 | return ptr; 78 | } 79 | 80 | void *tnyDB_malloc_data(int bytes, char* tag) { 81 | tnyDB_mem_stats.allocations++; 82 | tnyDB_mem_stats.bytes_current += bytes; 83 | tnyDB_mem_stats.bytes_total += bytes; 84 | 85 | void * ptr = malloc(bytes); 86 | 87 | return ptr; 88 | } 89 | -------------------------------------------------------------------------------- /src/tnyDB_mem.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #ifndef TNYDB_MEM_H_ 22 | #define TNYDB_MEM_H_ 23 | 24 | 25 | #include 26 | #include 27 | 28 | 29 | typedef struct { 30 | int allocations; 31 | int frees; 32 | 33 | int bytes_total; 34 | int bytes_current; 35 | 36 | } tnyDB_mem_statistics; 37 | 38 | 39 | 40 | 41 | void *tnyDB_malloc(int amount, char *tag); 42 | void tnyDB_free(void *ptr, char *tag); 43 | 44 | 45 | void tnyDB_free_data(void *ptr, int bytes, char* tag); 46 | void *tnyDB_malloc_data(int bytes, char* tag); 47 | void *tnyDB_realloc_data(void *ptr, int new_bytes, int old_bytes, char* tag); 48 | void *tnyDB_calloc_data(int item_size, int item_count, char* tag); 49 | 50 | void tnyDB_mem_init(); 51 | tnyDB_mem_statistics tnyDB_mem_get_statistics(); 52 | 53 | #endif /* SPRDB_MEM_H_ */ 54 | -------------------------------------------------------------------------------- /src/tnyDB_tarray.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include "tnyDB_tarray.h" 22 | 23 | tnyDB_tarray *tnyDB_tarray_create(int *data, int dataLength) { 24 | tnyDB_tarray * result = tnyDB_malloc_data(sizeof(tnyDB_tarray), "tnyDB_wtree_create.result"); 25 | result->length = dataLength; 26 | result->data = tnyDB_calloc_data(sizeof(int), dataLength, "tnyDB_wtree_create.result->data"); 27 | 28 | for (int i =0; i < dataLength; i++){ 29 | result->data[i] = data[i]; 30 | } 31 | 32 | return result; 33 | } 34 | 35 | TWORD * tnyDB_tarray_seek(tnyDB_tarray *tree, int value){ 36 | TWORD *result = calloc(sizeof(TWORD), (tree->length/sizeof(TWORD))+1 ); 37 | 38 | for (int i =0; i < tree->length; i++){ 39 | if (tree->data[i] == value){ 40 | tnyDB_tword_set(result, i); 41 | } 42 | } 43 | 44 | return result; 45 | } 46 | 47 | int tnyDB_tarray_access(tnyDB_tarray *tree, int rowIndex){ 48 | return tree->data[rowIndex]; 49 | } 50 | -------------------------------------------------------------------------------- /src/tnyDB_tarray.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #ifndef TNYDB_TARRAY_H_ 22 | #define TNYDB_TARRAY_H_ 23 | 24 | #include "tnyDB_tword.h" 25 | 26 | typedef struct { 27 | int* data; 28 | int length; 29 | } tnyDB_tarray; 30 | 31 | TWORD * tnyDB_tarray_seek(tnyDB_tarray *tree, int value); 32 | int tnyDB_tarray_access(tnyDB_tarray *wtree, int rowIndex); 33 | tnyDB_tarray *tnyDB_tarray_create(int *data, int dataLength); 34 | 35 | #endif /* TNYDB_TARRAY_H_ */ 36 | -------------------------------------------------------------------------------- /src/tnyDB_tmap.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include "tnyDB_tmap.h" 22 | 23 | tnyDB_list * _tmap_get_keys(int *data, int dataLength) { 24 | tnyDB_list *keys = tnyDB_list_create(); 25 | for (int i = 0; i < dataLength; i++) { 26 | int m = tnyDB_list_find(keys, data[i]); 27 | if (m < 0) { 28 | tnyDB_list_insert(keys, ~m, data[i]); 29 | } 30 | } 31 | return keys; 32 | } 33 | 34 | static inline int _tmap_log2(unsigned int v) { 35 | const unsigned int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 }; 36 | const unsigned int S[] = { 1, 2, 4, 8, 16 }; 37 | int i; 38 | unsigned int r = 0; // result of log2(v) will go here 39 | for (i = 4; i >= 0; i--) // unroll for speed... 40 | { 41 | if (v & b[i]) { 42 | v >>= S[i]; 43 | r |= S[i]; 44 | } 45 | } 46 | return r; 47 | } 48 | 49 | static inline int _tmap_row_word_length(tnyDB_tmap *map) { 50 | return (map->length / 64) + 1; 51 | 52 | } 53 | 54 | 55 | tnyDB_tmap *tnyDB_tmap_create(int *data, int dataLength) { 56 | tnyDB_tmap *map = tnyDB_malloc_data(sizeof(tnyDB_tmap), "tnyDB_tmap_create.tmap"); 57 | 58 | map->length = dataLength; 59 | // map->keys = _tmap_get_keys(data, dataLength); 60 | map->keys = tnyDB_list_sorted_distinct(data, dataLength); 61 | 62 | 63 | // tnyDB_list * list1 = _tmap_get_keys(data, dataLength); 64 | // tnyDB_list * list2 = tnyDB_list_sorted_distinct(data, dataLength); 65 | 66 | // for (int i =0; i < list1->length || i < list2->length; i++){ 67 | // printf("%i %i\n", list1->values[i], list2->values[i]); 68 | // } 69 | // exit(0); 70 | 71 | 72 | map->depth = _tmap_log2(map->keys->length) + 1; 73 | map->data = tnyDB_calloc_data(sizeof(TWORD*), map->depth, "tnyDB_tmap_create.tmap->data[]"); 74 | 75 | int wordLength = _tmap_row_word_length(map); 76 | for (int d =0; d < map->depth; d++){ 77 | map->data[d] = tnyDB_calloc_data(sizeof(TWORD), wordLength, "tnyDB_tmap_create.tmap->data[i]"); 78 | } 79 | 80 | // Build a new list "translated" which will contain all the 81 | // distinct values within the data array. We then use the 82 | // bit vector stored in Data to handle references to this array 83 | 84 | int *translated = malloc(sizeof(int) * dataLength); 85 | for (int i = 0; i < dataLength; i++) { 86 | translated[i] = tnyDB_list_find(map->keys, data[i]); 87 | } 88 | 89 | 90 | for (int d = 0; d < map->depth; d++) { 91 | for (int w = 0; w < wordLength; w++){ 92 | TWORD tmp=0; 93 | for (int i = 0; i < 64; i++){ 94 | if ((translated[(w*64) + i] & (1 << d)) != 0) { 95 | tmp |= 1ull << i; 96 | } 97 | } 98 | map->data[d][w] = tmp; 99 | 100 | } 101 | 102 | } 103 | free(translated); 104 | 105 | return map; 106 | } 107 | 108 | int tnyDB_tmap_access(tnyDB_tmap *map, int rowIndex) { 109 | int keyIndex = 0; 110 | for (int d = 0; d < map->depth; d++) { 111 | if (tnyDB_tword_bit_is_set(map->data[d], rowIndex)) { 112 | keyIndex |= 1 << d; 113 | } 114 | } 115 | return map->keys->values[keyIndex]; 116 | 117 | } 118 | 119 | TWORD * tnyDB_tmap_seek(tnyDB_tmap *map, int value) { 120 | TWORD * result = tnyDB_calloc_data(sizeof(TWORD), _tmap_row_word_length(map), "tnyDB_tmap_seek.result"); 121 | int keyIndex = tnyDB_list_find(map->keys, value); 122 | 123 | // Make a little cache... 124 | int keyBitMask[map->depth]; 125 | for (int j = 0; j < map->depth; j++) { 126 | keyBitMask[j] = (keyIndex & (1 << j)); 127 | } 128 | 129 | int rowWordLength = _tmap_row_word_length(map); 130 | 131 | // Do we have it in our key index? 132 | if (keyIndex >= 0) { 133 | // Go through the data one word at a time... 134 | for (int w = 0; w < rowWordLength; w++) { 135 | 136 | TWORD wordMatches = keyBitMask[0] == 0 ? ~map->data[0][w] : map->data[0][w]; 137 | if (wordMatches != 0) { 138 | for (int d = 1; d < map->depth; d++) { 139 | if (keyBitMask[d] == 0) { 140 | wordMatches &= ~(map->data[d][w]); 141 | } else { 142 | wordMatches &= (map->data[d][w]); 143 | } 144 | if (wordMatches == 0) 145 | break; 146 | } 147 | result[w] = wordMatches; 148 | } 149 | } 150 | 151 | } 152 | 153 | return result; 154 | } 155 | 156 | -------------------------------------------------------------------------------- /src/tnyDB_tmap.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #ifndef TNYDB_TMAP_H_ 22 | #define TNYDB_TMAP_H_ 23 | 24 | #include "tnyDB_tword.h" 25 | #include "tnyDB_list.h" 26 | 27 | typedef struct { 28 | TWORD** data; 29 | int length; 30 | int depth; 31 | tnyDB_list *keys; 32 | } tnyDB_tmap; 33 | 34 | 35 | TWORD * tnyDB_tmap_seek(tnyDB_tmap *tree, int value); 36 | int tnyDB_tmap_access(tnyDB_tmap *wtree, int rowIndex); 37 | tnyDB_tmap *tnyDB_tmap_create(int *data, int dataLength); 38 | 39 | 40 | #endif /* TNYDB_TMAP_H_ */ 41 | -------------------------------------------------------------------------------- /src/tnyDB_tword.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | 22 | #include 23 | #include 24 | #include "tnyDB_tword.h" 25 | #include "tnyDB_mem.h" 26 | 27 | static const TWORD k1 = 0x5555555555555555; /* -1/3 */ 28 | static const TWORD k2 = 0x3333333333333333; /* -1/5 */ 29 | static const TWORD k4 = 0x0f0f0f0f0f0f0f0f; /* -1/17 */ 30 | static const TWORD kf = 0x0101010101010101; /* -1/255 */ 31 | static const int TWORD_SIZE_BITS = 64; 32 | static const TWORD ONE_64BIT = 1; 33 | 34 | void tnyDB_tword_print(TWORD *r, int length) { 35 | printf(" (%i) ", length); 36 | 37 | if (r == NULL) { 38 | printf("NULL"); 39 | } else { 40 | for (int i = 0; i < length; i++) { 41 | if (i % 8 == 0 && i != 0) { 42 | printf(" "); 43 | 44 | } 45 | if (tnyDB_tword_bit_is_set(r, i)) { 46 | printf("1"); 47 | } else { 48 | printf("0"); 49 | } 50 | } 51 | 52 | } 53 | } 54 | 55 | void tnyDB_tword_print_offset(TWORD *r, int start, int length) { 56 | 57 | 58 | if (r == NULL) { 59 | printf("NULL"); 60 | } else { 61 | for (int i = start; i < start+length; i++) { 62 | if (i % 8 == 0 && i != 0) { 63 | printf(" "); 64 | 65 | } 66 | if (tnyDB_tword_bit_is_set(r, i)) { 67 | printf("1"); 68 | } else { 69 | printf("0"); 70 | } 71 | } 72 | 73 | 74 | } 75 | 76 | printf(" (s:%i, l:%i)", start, length); 77 | //printf("\n"); 78 | 79 | } 80 | 81 | int tnyDB_tword_bit_is_set(TWORD *r, int position) { 82 | 83 | int wordNumber = position / TWORD_SIZE_BITS; 84 | int bitPosition = position % TWORD_SIZE_BITS; 85 | 86 | if ((r[wordNumber] & (ONE_64BIT << bitPosition)) != 0) { 87 | return 1; 88 | } else { 89 | return 0; 90 | } 91 | 92 | } 93 | 94 | void tnyDB_tword_set(TWORD *r, int position) { 95 | int wordNumber = position / TWORD_SIZE_BITS; 96 | int bitPosition = position % TWORD_SIZE_BITS; 97 | 98 | r[wordNumber] = r[wordNumber] | (ONE_64BIT << bitPosition); 99 | } 100 | 101 | int tnyDB_tword_last_set_index(TWORD val) { 102 | int result = 0; 103 | if (val >= 0x100000000) { 104 | result += 32; 105 | val >>= 32; 106 | } 107 | if (val >= 0x10000) { 108 | result += 16; 109 | val >>= 16; 110 | } 111 | if (val >= 0x100) { 112 | result += 8; 113 | val >>= 8; 114 | } 115 | if (val >= 0x10) { 116 | result += 4; 117 | val >>= 4; 118 | } 119 | if (val >= 0x4) { 120 | result += 2; 121 | val >>= 2; 122 | } 123 | if (val >= 0x2) { 124 | result += 1; 125 | val >>= 1; 126 | } 127 | return result + (int) val; 128 | 129 | } 130 | 131 | TWORD *tnyDB_tword_copy_words(TWORD *old, int bit_length) { 132 | 133 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1; 134 | TWORD *new = tnyDB_malloc_data(sizeof(TWORD) * wordLength, "tnyDB_vector_copy->data"); 135 | 136 | for (int i = 0; i < wordLength; i++) { 137 | new[i] = old[i]; 138 | } 139 | 140 | return new; 141 | } 142 | 143 | void tnyDB_tword_and(TWORD *a, TWORD *b, int bit_length) { 144 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1; 145 | for (int i = 0; i < wordLength; i++) { 146 | a[i] &= b[i]; 147 | } 148 | } 149 | 150 | void tnyDB_tword_and_not(TWORD *a, TWORD *b, int bit_length) { 151 | 152 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1; 153 | for (int i = 0; i < wordLength; i++) { 154 | a[i] &= ~(b[i]); 155 | } 156 | 157 | } 158 | 159 | void tnyDB_tword_nand(TWORD *a, TWORD *b, int bit_length) { 160 | 161 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1; 162 | for (int i = 0; i < wordLength; i++) { 163 | a[i] = (~(a[i])) & b[i]; 164 | } 165 | 166 | } 167 | 168 | void tnyDB_tword_inverse(TWORD *a, int bit_length) { 169 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1; 170 | for (int i = 0; i < wordLength; i++) { 171 | a[i] = ~(a[i]); 172 | } 173 | 174 | } 175 | 176 | 177 | void tnyDB_tword_refine(TWORD *m, int mBitLen, TWORD *c, int cOffset, int cBitLen, int isLeft) { 178 | 179 | 180 | 181 | 182 | // Move through the compressed word to the correct word (word position) 183 | c += cOffset/64; 184 | 185 | // The bit position in the current Compressed WORD 186 | int cPosInWord = cOffset % 64; 187 | 188 | // The bit position in the current MASK word 189 | int mPosInWord = 0; 190 | 191 | // How many bits in M we have visited 192 | int mCount = 0; 193 | 194 | for (int ci = 0; ci < cBitLen; ci++) { 195 | 196 | TWORD isSet = ( (*c) & (1ul << cPosInWord)); 197 | if (isLeft == 1){ 198 | isSet = isSet == 0 ? 1 : 0; 199 | } 200 | 201 | // Go through the MASK until we find the next 1 bit 202 | while ((*m & (1ul << mPosInWord)) == 0) { 203 | mPosInWord++; 204 | mCount++; 205 | if (mPosInWord == 64) { 206 | mPosInWord = 0; 207 | m++; 208 | } 209 | 210 | if (mCount > mBitLen) { 211 | printf("Error in tnyDB_tword_refine: mCount > mBitLen (%i > %i)\n", mCount, mBitLen); 212 | 213 | printf("Mask:\t"); 214 | tnyDB_tword_print_offset(m, 0, mBitLen); 215 | printf("\nModifier:\t"); 216 | tnyDB_tword_print_offset(c, cOffset, cBitLen); 217 | printf("\n"); 218 | exit(-1); 219 | } 220 | } 221 | if (isSet == 0) { 222 | // Reset the bit in M corresponding to mPosInWord... 223 | *m &= ~(1ul << mPosInWord); 224 | } 225 | 226 | mPosInWord++; 227 | cPosInWord++; 228 | 229 | mCount++; 230 | 231 | if (cPosInWord == 64) { 232 | cPosInWord = 0; 233 | c++; 234 | } 235 | if (mPosInWord == 64) { 236 | mPosInWord = 0; 237 | m++; 238 | } 239 | } 240 | } 241 | 242 | // Expand 243 | // c = "0101" using 244 | // m = "0100110100", to come up with 245 | // r = "0*001*0100" (where a * is a 1 reset to a 0) 246 | void tnyDB_tword_combine(TWORD *m, int mOffset, int mBitLen, TWORD *c, int cOffset, int cBitLen) { 247 | 248 | int mCount = 0; 249 | int cPosInWord = cOffset; // The position in the current Compressed WORD 250 | int mPosInWord = mOffset; // The position in the current MASK word 251 | TWORD mStart = *m; 252 | TWORD * cStart = c; 253 | 254 | printf("tnyDB_tword_combine\n---------------------\nM: "); 255 | tnyDB_tword_print_offset(m, mOffset, mBitLen); 256 | 257 | printf("\nC: "); 258 | tnyDB_tword_print_offset(c, cOffset, cBitLen); 259 | printf("\n"); 260 | 261 | for (int ci = 0; ci < cBitLen; ci++) { 262 | 263 | TWORD isSet = (*c & (1ul << cPosInWord)); 264 | 265 | // Go through the MASK until we find the next 1 bit 266 | while ((*m & (1ul << mPosInWord)) == 0) { 267 | mPosInWord++; 268 | mCount++; 269 | if (mPosInWord == 64) { 270 | mPosInWord = 0; 271 | m++; 272 | } 273 | if (mCount > mBitLen) { 274 | printf("Error in tnyDB_tword_combine: mCount > mBitLen (%i > %i)\n M:", mCount, mBitLen); 275 | 276 | tnyDB_tword_print(&mStart, mBitLen + mOffset); 277 | printf("\nC:"); 278 | tnyDB_tword_print(cStart, cBitLen); 279 | printf("\n"); 280 | exit(-1); 281 | } 282 | } 283 | 284 | if (isSet == 0) { 285 | // Reset it... 286 | *m &= ~(1ul << mPosInWord); 287 | } 288 | 289 | mPosInWord++; 290 | cPosInWord++; 291 | 292 | mCount++; 293 | 294 | 295 | 296 | 297 | if (ci == 64) { 298 | cPosInWord = 0; 299 | c++; 300 | } 301 | } 302 | 303 | } 304 | 305 | void tnyDB_tword_combine_test() { 306 | 307 | TWORD mask = 0; 308 | 309 | mask |= 1ul << 0; 310 | mask |= 1ul << 2; 311 | mask |= 1ul << 4; 312 | mask |= 1ul << 6; 313 | mask |= 1ul << 8; 314 | mask |= 1ul << 10; 315 | 316 | TWORD compressed = 0; 317 | compressed |= 1ul << 0; 318 | compressed |= 1ul << 1; 319 | compressed |= 1ul << 2; 320 | 321 | printf("M: "); 322 | tnyDB_tword_print(&mask, 10); 323 | printf("\nC: "); 324 | tnyDB_tword_print(&compressed, 10); 325 | printf("\nR: "); 326 | tnyDB_tword_combine(&mask, 0, 10, &compressed, 0, 7); 327 | tnyDB_tword_print(&mask, 10); 328 | 329 | printf("\n"); 330 | 331 | } 332 | 333 | int tnyDB_tword_population(TWORD *mask, int start, int end) { 334 | int startWord = start / TWORD_SIZE_BITS; 335 | int startIndex = start % TWORD_SIZE_BITS; 336 | 337 | int endWord = end / TWORD_SIZE_BITS; 338 | int endIndex = end % TWORD_SIZE_BITS; 339 | 340 | int startEndIndex = startWord == endWord ? endIndex : TWORD_SIZE_BITS - 1; 341 | 342 | int pop = 0; 343 | 344 | TWORD first = *(mask + startWord); 345 | // Lets do this the slow and steady way.... 346 | for (int i = startIndex; i <= startEndIndex; i++) { 347 | if ((first & (ONE_64BIT << i)) != 0) { 348 | pop++; 349 | } 350 | } 351 | 352 | // Now we can do it Word at a time... 353 | for (int j = startWord + 1; j < endWord; j++) { 354 | TWORD x = *(mask + j); 355 | x = x - ((x >> 1) & k1); /* put count of each 2 bits into those 2 bits */ 356 | x = (x & k2) + ((x >> 2) & k2); /* put count of each 4 bits into those 4 bits */ 357 | x = (x + (x >> 4)) & k4; /* put count of each 8 bits into those 8 bits */ 358 | x = (x * kf) >> 56; /* returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ... */ 359 | 360 | pop += (int) x; 361 | } 362 | 363 | if (startWord != endWord) { 364 | TWORD last = *(mask + endWord); 365 | // Lets do this the slow and steady way.... 366 | for (int k = 0; k <= endIndex; k++) { 367 | if ((last & (ONE_64BIT << k)) != 0) { 368 | pop++; 369 | } 370 | } 371 | } 372 | return pop; 373 | 374 | } 375 | -------------------------------------------------------------------------------- /src/tnyDB_tword.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #ifndef TNYDB_TWORD_H_ 22 | #define TNYDB_TWORD_H_ 23 | 24 | #include "tnyDB_mem.h" 25 | 26 | 27 | 28 | typedef unsigned long long TWORD; 29 | 30 | 31 | void tnyDB_tword_print(TWORD *r, int length); 32 | int tnyDB_tword_bit_is_set(TWORD *r, int position) ; 33 | void tnyDB_tword_set(TWORD *r, int position); 34 | int tnyDB_tword_last_set_index(TWORD val) ; 35 | 36 | TWORD *tnyDB_tword_copy_words(TWORD *old, int bit_length) ; 37 | 38 | void tnyDB_tword_nand(TWORD *a, TWORD *b, int bit_length) ; 39 | void tnyDB_tword_and(TWORD *a, TWORD *b, int bit_length); 40 | void tnyDB_tword_and_not(TWORD *a, TWORD *b, int bit_length); 41 | void tnyDB_tword_inverse(TWORD *a, int bit_length); 42 | void tnyDB_tword_combine(TWORD *m, int mOffset, int mBitLen, TWORD *c, int cOffset, int cBitLen) ; 43 | int tnyDB_tword_population(TWORD *mask, int start, int end); 44 | 45 | 46 | void tnyDB_tword_refine(TWORD *m, int mBitLen, TWORD *c, int cOffset, int cBitLen, int isLeft); 47 | void tnyDB_tword_print_offset(TWORD *r, int start, int length) ; 48 | 49 | 50 | void tnyDB_tword_combine_test(); 51 | 52 | #endif /* TNYDB_TWORD_H_ */ 53 | -------------------------------------------------------------------------------- /src/tnyDB_wtree.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "tnyDB_list.h" 27 | #include "tnyDB_wtree.h" 28 | 29 | tnyDB_list * _get_keys(int *data, int dataLength) { 30 | tnyDB_list *keys = tnyDB_list_create(); 31 | 32 | for (int i = 0; i < dataLength; i++) { 33 | int m = tnyDB_list_find(keys, data[i]); 34 | if (m < 0) { 35 | tnyDB_list_insert(keys, ~m, data[i]); 36 | } 37 | } 38 | return keys; 39 | } 40 | 41 | static inline int _log2(unsigned int v) { 42 | const unsigned int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 }; 43 | const unsigned int S[] = { 1, 2, 4, 8, 16 }; 44 | int i; 45 | register unsigned int r = 0; // result of log2(v) will go here 46 | for (i = 4; i >= 0; i--) // unroll for speed... 47 | { 48 | if (v & b[i]) { 49 | v >>= S[i]; 50 | r |= S[i]; 51 | } 52 | } 53 | return r; 54 | } 55 | 56 | static inline int _row_word_length(tnyDB_wtree *tree) { 57 | return (tree->value_count / 64) + 1; 58 | } 59 | 60 | 61 | void _write_tree(tnyDB_wtree * tree, int *data, int dataLen, int rankIndex, int dataLeft, int keyLeft, int keyRight, 62 | int depth) { 63 | 64 | int keyMiddle = (keyLeft + keyRight) / 2; 65 | 66 | TWORD *row = tree->data + (depth * tree->row_length); 67 | // printf("Working at depth: %i (%i)\n", depth, depth * tree->row_length); 68 | 69 | int* leftValues = tnyDB_calloc_data(sizeof(int), dataLen, "_write_tree Left"); 70 | int* rightValues = tnyDB_calloc_data(sizeof(int), dataLen, "_write_tree Right"); 71 | int lCount = 0, rCount = 0; 72 | 73 | int i = 0; 74 | while (i < dataLen) { 75 | 76 | if (data[i] <= keyMiddle) { 77 | leftValues[lCount] = data[i]; 78 | lCount++; 79 | } else { 80 | tnyDB_tword_set(row, dataLeft + i); 81 | rightValues[rCount] = data[i]; 82 | rCount++; 83 | } 84 | i++; 85 | } 86 | 87 | tnyDB_list_set(tree->ranks, rankIndex - 1, lCount); 88 | 89 | // Go LEFT 90 | if (keyLeft < keyMiddle) { 91 | _write_tree(tree, leftValues, lCount, rankIndex * 2, dataLeft, keyLeft, keyMiddle, depth + 1); 92 | } 93 | 94 | tnyDB_free_data(leftValues, sizeof(int) * dataLen, "_write_tree Left"); 95 | 96 | // Go RIGHT 97 | if (keyMiddle + 1 < keyRight) { 98 | _write_tree(tree, rightValues, rCount, (rankIndex * 2) + 1, dataLeft + lCount, keyMiddle + 1, keyRight, 99 | depth + 1); 100 | 101 | } 102 | tnyDB_free_data(rightValues, sizeof(int) * dataLen, "_write_tree Right"); 103 | } 104 | 105 | 106 | // Given a bitmap vector, return the actual values at each set bits index 107 | tnyDB_list* tnyDB_wtree_scan(tnyDB_wtree *tree, TWORD *scan_list, int scan_list_length) { 108 | tnyDB_list *list = tnyDB_list_create(); 109 | 110 | // Now we are going to grab these ints in reverse order, 111 | // as its faster to read the TWORDs that way 112 | 113 | int words = (scan_list_length / 64) + 1; 114 | // int finalWordBitLength = vector->bit_length%64; 115 | int idx; 116 | for (int i = words - 1; i >= 0; i--) { 117 | 118 | TWORD v = scan_list[i]; 119 | while (v > 0) { 120 | TWORD t = v; 121 | 122 | v &= v - 1; 123 | 124 | idx = i * 64 + tnyDB_tword_last_set_index(t - v) - 1; 125 | 126 | int val = tnyDB_wtree_access(tree, idx); 127 | tnyDB_list_push(list, val); 128 | } 129 | 130 | } 131 | 132 | return list; 133 | 134 | } 135 | 136 | TWORD * tnyDB_wtree_seek(tnyDB_wtree *tree, int value) { 137 | int rowBitLength = tree->value_count; 138 | int rowWordLength = (rowBitLength / 64) + 1; 139 | 140 | int l = 0; 141 | int r = tree->keys->length - 1; 142 | int m = (l + r) / 2; 143 | 144 | int rankIndex = 1; 145 | int vectorStart = 0, vectorLength = rowBitLength; 146 | TWORD *results = tnyDB_tword_copy_words(tree->data, rowBitLength); 147 | TWORD *row = tree->data; 148 | 149 | // printf("Seek: %i:\t", value); 150 | for (int i = 0; i < tree->depth; i++) { 151 | int zeroCount = tree->ranks->values[rankIndex - 1]; 152 | 153 | // printf("\n----------------------\n(l:%i, m:%i, r:%i)\n:\t", l, m, r); 154 | // tnyDB_tword_print_offset(results, 0, rowBitLength); 155 | 156 | if (value > tree->keys->values[m]) { 157 | // BIGGER / RIGHT 158 | if (i == 0) { 159 | 160 | } else { 161 | tnyDB_tword_refine(results, rowBitLength, row, vectorStart, vectorLength, 0); 162 | } 163 | // printf("\n>R:\t"); 164 | // tnyDB_tword_print_offset(row, vectorStart, vectorLength); 165 | // printf("\n"); 166 | 167 | vectorStart += zeroCount; 168 | vectorLength = vectorLength - zeroCount; 169 | rankIndex = (rankIndex * 2) + 1; 170 | l = m + 1; 171 | 172 | } else { 173 | // SMALLER / LEFT 174 | if (i == 0) { 175 | tnyDB_tword_inverse(results, rowBitLength); 176 | } else { 177 | tnyDB_tword_refine(results, rowBitLength, row, vectorStart, vectorLength, 1); 178 | } 179 | // printf("\n> L:\t"); 180 | // tnyDB_tword_print_offset(row, vectorStart, vectorLength); 181 | // printf("\n"); 182 | 183 | vectorLength = zeroCount; 184 | rankIndex = (rankIndex * 2); 185 | r = m; 186 | } 187 | 188 | if (l == r) { 189 | break; 190 | 191 | } 192 | m = (l + r) / 2; 193 | // Next row please 194 | row += rowWordLength; 195 | zeroCount = tree->ranks->values[rankIndex - 1]; 196 | 197 | } 198 | 199 | // printf("\t"); 200 | // tnyDB_tword_print_offset(results, 0, rowBitLength); 201 | // printf("\n"); 202 | return results; 203 | 204 | } 205 | 206 | // Finds the value at the specified index 207 | int tnyDB_wtree_access(tnyDB_wtree *wtree, int rowIndex) { 208 | // printf("Access: %i ", rowIndex); 209 | int vectorStart = 0; 210 | 211 | int l = 0, r = wtree->keys->length - 1, m; 212 | 213 | int index = rowIndex; 214 | int rankIndex = 1; 215 | 216 | TWORD *ptr = wtree->data; 217 | 218 | for (int i = 0; i < wtree->depth; i++) { 219 | if (l == r) { 220 | 221 | break; 222 | } 223 | m = (l + r) / 2; 224 | 225 | // zeroCount tells us where the "fold" for the next row is (e.g. where 0's end and 1's begin) 226 | int zeroCount = wtree->ranks->values[rankIndex - 1]; 227 | if (zeroCount < 0) { 228 | printf("Negative zeroCount... What the fuck?\n"); 229 | } 230 | int pop = tnyDB_tword_population(ptr, vectorStart, vectorStart + index); 231 | 232 | if (!tnyDB_tword_bit_is_set(ptr, vectorStart + index)) { 233 | // printf("L"); 234 | r = m; 235 | 236 | // Any "1" we see up to here can be removed from my index 237 | // as it will be placed after the "fold" 238 | index -= (pop); 239 | rankIndex = rankIndex * 2; 240 | 241 | } else { 242 | // printf("R"); 243 | l = m + 1; 244 | 245 | // Move the start of the vector up to the 246 | vectorStart += zeroCount; 247 | // The index is after all the 0's (zeroCount) PLUS the number of 1's before my index 248 | index = pop - 1; 249 | 250 | rankIndex = (rankIndex * 2) + 1; 251 | } 252 | 253 | // Move the pointer to the next row down... 254 | ptr += wtree->row_length; 255 | } 256 | 257 | m = (l + r) / 2; 258 | // printf(" m=%i \n", m); 259 | return wtree->keys->values[m]; 260 | 261 | } 262 | 263 | // Creates a new wtree from the integer array passed in 264 | tnyDB_wtree *tnyDB_wtree_create(int *data, int dataLength) { 265 | 266 | tnyDB_wtree *tree = tnyDB_malloc_data(sizeof(tnyDB_wtree), "tnyDB_tree_create.wtree"); 267 | 268 | tree->value_count = dataLength; 269 | tree->keys = _get_keys(data, dataLength); 270 | tree->depth = _log2(tree->keys->length) + 1; 271 | tree->row_length = _row_word_length(tree); 272 | 273 | // Build a new list "translated" which will contain all the 274 | // distinct values within the data array. We then use the 275 | // bit vector stored in Data to handle references to this array 276 | int *translated = malloc(sizeof(int) * dataLength); 277 | for (int i = 0; i < dataLength; i++) { 278 | translated[i] = tnyDB_list_find(tree->keys, data[i]); 279 | } 280 | 281 | // Allocate enough words for each "row" times "depth"... 282 | tree->data = tnyDB_calloc_data(sizeof(TWORD), _row_word_length(tree) * tree->depth, "tnyDB_wtree_create.data"); 283 | 284 | tree->ranks = tnyDB_list_create(); 285 | _write_tree(tree, translated, dataLength, 1, 0, 0, tree->keys->length - 1, 0); 286 | 287 | free(translated); 288 | 289 | // for (int i = 0; i < tree->depth; i++) { 290 | // tnyDB_tword_print(tree->data + (i * tree->row_length), tree->value_count); 291 | // printf("\n"); 292 | // } 293 | 294 | return tree; 295 | } 296 | 297 | -------------------------------------------------------------------------------- /src/tnyDB_wtree.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011 Terence Siganakis. 2 | 3 | This file is part of TnyDB. 4 | 5 | TnyDB is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | TnyDB is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with TnyDB. If not, see . 17 | */ 18 | 19 | /* Contact: Terence Siganakis */ 20 | 21 | 22 | #ifndef SPRDB_WTREE_H_ 23 | #define SPRDB_WTREE_H_ 24 | 25 | #include "tnyDB_list.h" 26 | #include "tnyDB_tword.h" 27 | #include "tnyDB_mem.h" 28 | 29 | typedef struct TWORD* wtree_row; 30 | 31 | typedef struct{ 32 | int value_count; 33 | int depth; 34 | int row_length; 35 | 36 | tnyDB_list *keys; 37 | TWORD *data; 38 | tnyDB_list *ranks; 39 | 40 | int vector_length; 41 | 42 | } tnyDB_wtree; 43 | 44 | 45 | 46 | tnyDB_wtree *tnyDB_wtree_create(int *data, int dataLength); 47 | TWORD * tnyDB_wtree_seek(tnyDB_wtree *tree, int value); 48 | int tnyDB_wtree_access(tnyDB_wtree *wtree, int rowIndex); 49 | tnyDB_list* tnyDB_wtree_scan(tnyDB_wtree *tree, TWORD *scan_list, int scan_list_length); 50 | 51 | void tnyDB_wtree_free(tnyDB_wtree *wtree); 52 | 53 | 54 | 55 | #endif /* SPRDB_TREE_H_ */ 56 | --------------------------------------------------------------------------------