├── Debug
├── makefile
├── objects.mk
├── sources.mk
└── src
│ └── subdir.mk
├── README
├── Release
├── makefile
├── objects.mk
├── sources.mk
└── src
│ └── subdir.mk
└── src
├── main.c
├── tnyDB_list.c
├── tnyDB_list.h
├── tnyDB_mem.c
├── tnyDB_mem.h
├── tnyDB_tarray.c
├── tnyDB_tarray.h
├── tnyDB_tmap.c
├── tnyDB_tmap.h
├── tnyDB_tword.c
├── tnyDB_tword.h
├── tnyDB_wtree.c
└── tnyDB_wtree.h
/Debug/makefile:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | -include ../makefile.init
6 |
7 | RM := rm -rf
8 |
9 | # All of the sources participating in the build are defined here
10 | -include sources.mk
11 | -include subdir.mk
12 | -include src/subdir.mk
13 | -include objects.mk
14 |
15 | ifneq ($(MAKECMDGOALS),clean)
16 | ifneq ($(strip $(C_DEPS)),)
17 | -include $(C_DEPS)
18 | endif
19 | endif
20 |
21 | -include ../makefile.defs
22 |
23 | # Add inputs and outputs from these tool invocations to the build variables
24 |
25 | # All Target
26 | all: tny
27 |
28 | # Tool invocations
29 | tny: $(OBJS) $(USER_OBJS)
30 | @echo 'Building target: $@'
31 | @echo 'Invoking: GCC C Linker'
32 | gcc -o "tny" $(OBJS) $(USER_OBJS) $(LIBS)
33 | @echo 'Finished building target: $@'
34 | @echo ' '
35 |
36 | # Other Targets
37 | clean:
38 | -$(RM) $(OBJS)$(C_DEPS)$(EXECUTABLES) tny
39 | -@echo ' '
40 |
41 | .PHONY: all clean dependents
42 | .SECONDARY:
43 |
44 | -include ../makefile.targets
45 |
--------------------------------------------------------------------------------
/Debug/objects.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | USER_OBJS :=
6 |
7 | LIBS :=
8 |
9 |
--------------------------------------------------------------------------------
/Debug/sources.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | O_SRCS :=
6 | C_SRCS :=
7 | S_UPPER_SRCS :=
8 | OBJ_SRCS :=
9 | ASM_SRCS :=
10 | OBJS :=
11 | C_DEPS :=
12 | EXECUTABLES :=
13 |
14 | # Every subdirectory with source files must be described here
15 | SUBDIRS := \
16 | src \
17 |
18 |
--------------------------------------------------------------------------------
/Debug/src/subdir.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | # Add inputs and outputs from these tool invocations to the build variables
6 | C_SRCS += \
7 | ../src/main.c \
8 | ../src/tnyDB_list.c \
9 | ../src/tnyDB_mem.c \
10 | ../src/tnyDB_tarray.c \
11 | ../src/tnyDB_tmap.c \
12 | ../src/tnyDB_tword.c \
13 | ../src/tnyDB_wtree.c
14 |
15 | OBJS += \
16 | ./src/main.o \
17 | ./src/tnyDB_list.o \
18 | ./src/tnyDB_mem.o \
19 | ./src/tnyDB_tarray.o \
20 | ./src/tnyDB_tmap.o \
21 | ./src/tnyDB_tword.o \
22 | ./src/tnyDB_wtree.o
23 |
24 | C_DEPS += \
25 | ./src/main.d \
26 | ./src/tnyDB_list.d \
27 | ./src/tnyDB_mem.d \
28 | ./src/tnyDB_tarray.d \
29 | ./src/tnyDB_tmap.d \
30 | ./src/tnyDB_tword.d \
31 | ./src/tnyDB_wtree.d
32 |
33 |
34 | # Each subdirectory must supply rules for building sources it contributes
35 | src/%.o: ../src/%.c
36 | @echo 'Building file: $<'
37 | @echo 'Invoking: GCC C Compiler'
38 | gcc -O0 -g3 -Wall -c -fmessage-length=0 -std=c99 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
39 | @echo 'Finished building: $<'
40 | @echo ' '
41 |
42 |
43 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | ========================
2 | Introduction
3 | ========================
4 |
5 | Tny is a project that seeks to find and develop high performance data
6 | strutures that have very low memory foot prints. The hope is that these
7 | structures may form the basis of a high performance in-memory column oriented
8 | database for analyzing genomic information.
9 |
10 | In developing software for large data sets (billions of records, terabytes in size)
11 | the way you store your data in memory is critical – and you want your data in memory
12 | if you want to be able to analyse it quickly (e.g. minutes not days).
13 |
14 | Any data structure that relies on pointers for each data element quickly becomes
15 | unworkable due to the overhead of pointers. On a 64 bit system, with one pointer
16 | for each data element across a billion records you have just blown near 8GB of
17 | memory just in pointers.
18 |
19 | Thus there is a need for compact data structures that still have fast access characteristics.
20 |
21 | ========================
22 | The Challenge
23 | ========================
24 |
25 | The challenge is to come up with the fastest data structure that meets the following requirements:
26 | • Use less memory than an array in all circumstances
27 | • Fast Seek is more important than Fast Access
28 | • Seek and Access must be better than O(N).
29 |
30 | Where Seek and Access are defined as:
31 |
32 | Access (int index): Return me the value at the specified index ( like array[idx] ).
33 |
34 | Seek (int value): Return me all the Indexes that match value.
35 |
36 |
37 | (The actual return type of Seek is a little different, but logically the same. What we need to return is a bitmap where a bit set to 1 at position X means that value was found at index X. This allows us to combine results using logical ANDs rather than intersections as detailed here)
38 |
39 |
40 | For more information pleace check my blog at:
41 |
42 | http://siganakis.com
43 |
44 | This project is released under the GPL.
45 |
46 | Contact me at terence@siganakis.com.
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/Release/makefile:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | -include ../makefile.init
6 |
7 | RM := rm -rf
8 |
9 | # All of the sources participating in the build are defined here
10 | -include sources.mk
11 | -include subdir.mk
12 | -include src/subdir.mk
13 | -include objects.mk
14 |
15 | ifneq ($(MAKECMDGOALS),clean)
16 | ifneq ($(strip $(C_DEPS)),)
17 | -include $(C_DEPS)
18 | endif
19 | endif
20 |
21 | -include ../makefile.defs
22 |
23 | # Add inputs and outputs from these tool invocations to the build variables
24 |
25 | # All Target
26 | all: tny
27 |
28 | # Tool invocations
29 | tny: $(OBJS) $(USER_OBJS)
30 | @echo 'Building target: $@'
31 | @echo 'Invoking: GCC C Linker'
32 | gcc -o "tny" $(OBJS) $(USER_OBJS) $(LIBS)
33 | @echo 'Finished building target: $@'
34 | @echo ' '
35 |
36 | # Other Targets
37 | clean:
38 | -$(RM) $(OBJS)$(C_DEPS)$(EXECUTABLES) tny
39 | -@echo ' '
40 |
41 | .PHONY: all clean dependents
42 | .SECONDARY:
43 |
44 | -include ../makefile.targets
45 |
--------------------------------------------------------------------------------
/Release/objects.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | USER_OBJS :=
6 |
7 | LIBS :=
8 |
9 |
--------------------------------------------------------------------------------
/Release/sources.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | O_SRCS :=
6 | C_SRCS :=
7 | S_UPPER_SRCS :=
8 | OBJ_SRCS :=
9 | ASM_SRCS :=
10 | OBJS :=
11 | C_DEPS :=
12 | EXECUTABLES :=
13 |
14 | # Every subdirectory with source files must be described here
15 | SUBDIRS := \
16 | src \
17 |
18 |
--------------------------------------------------------------------------------
/Release/src/subdir.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 |
5 | # Add inputs and outputs from these tool invocations to the build variables
6 | C_SRCS += \
7 | ../src/main.c \
8 | ../src/tnyDB_list.c \
9 | ../src/tnyDB_mem.c \
10 | ../src/tnyDB_tarray.c \
11 | ../src/tnyDB_tmap.c \
12 | ../src/tnyDB_tword.c \
13 | ../src/tnyDB_wtree.c
14 |
15 | OBJS += \
16 | ./src/main.o \
17 | ./src/tnyDB_list.o \
18 | ./src/tnyDB_mem.o \
19 | ./src/tnyDB_tarray.o \
20 | ./src/tnyDB_tmap.o \
21 | ./src/tnyDB_tword.o \
22 | ./src/tnyDB_wtree.o
23 |
24 | C_DEPS += \
25 | ./src/main.d \
26 | ./src/tnyDB_list.d \
27 | ./src/tnyDB_mem.d \
28 | ./src/tnyDB_tarray.d \
29 | ./src/tnyDB_tmap.d \
30 | ./src/tnyDB_tword.d \
31 | ./src/tnyDB_wtree.d
32 |
33 |
34 | # Each subdirectory must supply rules for building sources it contributes
35 | src/%.o: ../src/%.c
36 | @echo 'Building file: $<'
37 | @echo 'Invoking: GCC C Compiler'
38 | gcc -O3 -march=native -Wall -c -fmessage-length=0 -std=c99 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
39 | @echo 'Finished building: $<'
40 | @echo ' '
41 |
42 |
43 |
--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | #include "tnyDB_tmap.h"
27 | #include "tnyDB_wtree.h"
28 | #include "tnyDB_tarray.h"
29 | #include "tnyDB_tword.h"
30 |
31 | void tnyDB_tmap_access_test(int * values, int length) {
32 | tnyDB_mem_init();
33 |
34 | double elapsed; // in milliseconds
35 | clock_t start, end;
36 | start = clock();
37 |
38 | printf("Building TMAP... ");
39 | tnyDB_tmap *tree = tnyDB_tmap_create(values, length);
40 |
41 | end = clock();
42 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
43 | printf("Done (took %fms)\n", elapsed);
44 |
45 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
46 |
47 | // printf("\n====== TMAP Mem Stats ======\n");
48 | printf("Bytes current: %i\n", memStats.bytes_current);
49 | // printf("Bytes total: %i\n", memStats.bytes_total);
50 | // printf("Allocations: %i\n", memStats.allocations);
51 | // printf("Frees: %i\n", memStats.frees);
52 | // printf("====== TMAP Mem Stats ======\n\n");
53 |
54 | printf("Testing TMAP access performance...");
55 | start = clock();
56 |
57 | for (int i = 0; i < length; i++) { //
58 | if (values[i] != tnyDB_tmap_access(tree, i)) {
59 | end = clock();
60 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
61 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_tmap_access(tree, i),
62 | values[i], elapsed);
63 |
64 | exit(-1);
65 | }
66 | }
67 |
68 | end = clock();
69 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
70 | printf("TMAP Passed test in %fms\n\n", elapsed);
71 | }
72 |
73 |
74 | void tnyDB_wtree_access_test(int * values, int length) {
75 | tnyDB_mem_init();
76 |
77 | double elapsed; // in milliseconds
78 | clock_t start, end;
79 | start = clock();
80 |
81 | printf("Building WTREE... ");
82 | tnyDB_wtree *tree = tnyDB_wtree_create(values, length);
83 |
84 | end = clock();
85 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
86 | printf("Done (took %fms)\n", elapsed);
87 |
88 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
89 |
90 | // printf("\n====== WTREE Mem Stats ======\n");
91 | printf("Bytes current: %i\n", memStats.bytes_current);
92 | // printf("Bytes total: %i\n", memStats.bytes_total);
93 | // printf("Allocations: %i\n", memStats.allocations);
94 | // printf("Frees: %i\n", memStats.frees);
95 | // printf("====== WTREE Mem Stats ======\n\n");
96 |
97 | printf("Testing wTree access performance...");
98 | start = clock();
99 |
100 | for (int i = 0; i < length; i++) { //
101 | if (values[i] != tnyDB_wtree_access(tree, i)) {
102 | end = clock();
103 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
104 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_wtree_access(tree, i),
105 | values[i], elapsed);
106 |
107 | exit(-1);
108 | }
109 | }
110 |
111 | end = clock();
112 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
113 | printf("wtree Passed test in %fms\n\n", elapsed);
114 | }
115 |
116 | void tnyDB_tarray_access_test(int * values, int length) {
117 | tnyDB_mem_init();
118 |
119 | double elapsed; // in milliseconds
120 | clock_t start, end;
121 | start = clock();
122 |
123 | printf("Building ARRAY... ");
124 | tnyDB_tarray *tree = tnyDB_tarray_create(values, length);
125 |
126 | end = clock();
127 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
128 | printf("Done (took %fms)\n", elapsed);
129 |
130 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
131 |
132 | // printf("\n====== array Mem Stats ======\n");
133 | printf("Bytes current: %i\n", memStats.bytes_current);
134 | // printf("Bytes total: %i\n", memStats.bytes_total);
135 | // printf("Allocations: %i\n", memStats.allocations);
136 | // printf("Frees: %i\n", memStats.frees);
137 | // printf("====== array Mem Stats ======\n\n");
138 |
139 | printf("Testing ARRAY access performance...");
140 | start = clock();
141 |
142 | for (int i = 0; i < length; i++) { //
143 | if (values[i] != tnyDB_tarray_access(tree, i)) {
144 | end = clock();
145 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
146 | fprintf(stderr, "\nmissmatch at: %i, found %i, expected %i (%f)\n", i, tnyDB_tarray_access(tree, i),
147 | values[i], elapsed);
148 |
149 | exit(-1);
150 | }
151 | }
152 |
153 | end = clock();
154 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
155 | printf("ARRAY Passed test in %fms\n\n", elapsed);
156 | }
157 |
158 | int tnyDB_access_test(int keys, int length) {
159 | srand(time(NULL));
160 |
161 | int *values = malloc(length * sizeof(int));
162 |
163 | printf("Generating data...");
164 | for (int i = 0; i < length; i++) {
165 | values[i] = rand() % keys;
166 | }
167 | printf("Done\nh");
168 |
169 | tnyDB_tarray_access_test(values, length);
170 | tnyDB_tmap_access_test(values, length);
171 | tnyDB_wtree_access_test(values, length);
172 |
173 | free(values);
174 |
175 | return 1;
176 | }
177 |
178 | void tnyDB_tarray_seek_test(int * values, int length, int seeks) {
179 | tnyDB_mem_init();
180 |
181 | double elapsed; // in milliseconds
182 | clock_t start, end;
183 | start = clock();
184 |
185 | printf("Building ARRAY... ");
186 | tnyDB_tarray *tree = tnyDB_tarray_create(values, length);
187 |
188 | end = clock();
189 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
190 | printf("Done (took %fms)\n", elapsed);
191 |
192 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
193 |
194 | // printf("\n====== ARRAY Mem Stats ======\n");
195 | printf("Bytes current: %i\n", memStats.bytes_current);
196 | // printf("Bytes total: %i\n", memStats.bytes_total);
197 | // printf("Allocations: %i\n", memStats.allocations);
198 | // printf("Frees: %i\n", memStats.frees);
199 |
200 | printf("Testing ARRAY SEEK performance... ");
201 | start = clock();
202 |
203 | for (int i = 0; i < seeks; i++) { //
204 | int seeking = values[i];
205 | TWORD * results = tnyDB_tarray_seek(tree, seeking);
206 |
207 | // Now in the results, a bit at "i" should be set
208 | if (tnyDB_tword_bit_is_set(results, i) == 0) {
209 | end = clock();
210 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
211 | fprintf(stderr, "\n ARRAY SEEK: missmatch at: %i, BIT NOT SET\n", i);
212 | exit(-1);
213 | }
214 |
215 | }
216 |
217 | end = clock();
218 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
219 | printf("Passed test in %fms\n\n", elapsed);
220 | }
221 |
222 | void tnyDB_tmap_seek_test(int * values, int length, int seeks) {
223 | tnyDB_mem_init();
224 |
225 | double elapsed; // in milliseconds
226 | clock_t start, end;
227 | start = clock();
228 |
229 | printf("Building TMAP... ");
230 | tnyDB_tmap *tree = tnyDB_tmap_create(values, length);
231 |
232 | end = clock();
233 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
234 | printf("Done (took %fms)\n", elapsed);
235 |
236 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
237 |
238 | // printf("\n====== TMAP Mem Stats ======\n");
239 | printf("Bytes current: %i\n", memStats.bytes_current);
240 | // printf("Bytes total: %i\n", memStats.bytes_total);
241 | // printf("Allocations: %i\n", memStats.allocations);
242 | // printf("Frees: %i\n", memStats.frees);
243 | // printf("====== TMAP Mem Stats ======\n\n");
244 |
245 | printf("Testing TMAP SEEK performance... ");
246 | start = clock();
247 |
248 | for (int i = 0; i < seeks; i++) { //
249 | int seeking = values[i];
250 | TWORD * results = tnyDB_tmap_seek(tree, seeking);
251 |
252 | // Now in the results, a bit at "i" should be set
253 | if (tnyDB_tword_bit_is_set(results, i) == 0) {
254 | end = clock();
255 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
256 | fprintf(stderr, "\n TMAP Seek: missmatch at: %i, BIT NOT SET\n", i);
257 | exit(-1);
258 | }
259 |
260 | }
261 |
262 | end = clock();
263 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
264 | printf(" Passed test in %fms\n\n", elapsed);
265 | }
266 |
267 |
268 | void tnyDB_wtree_seek_test(int * values, int length, int seeks) {
269 | tnyDB_mem_init();
270 |
271 | double elapsed; // in milliseconds
272 | clock_t start, end;
273 | start = clock();
274 |
275 | printf("Building WTREE... ");
276 | tnyDB_wtree *tree = tnyDB_wtree_create(values, length);
277 |
278 | end = clock();
279 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
280 | printf("Done (took %fms)\n", elapsed);
281 |
282 | tnyDB_mem_statistics memStats = tnyDB_mem_get_statistics();
283 |
284 | // printf("\n====== TMAP Mem Stats ======\n");
285 | printf("Bytes current: %i\n", memStats.bytes_current);
286 | // printf("Bytes total: %i\n", memStats.bytes_total);
287 | // printf("Allocations: %i\n", memStats.allocations);
288 | // printf("Frees: %i\n", memStats.frees);
289 | // printf("====== TMAP Mem Stats ======\n\n");
290 |
291 | printf("Testing WTREE SEEK performance... ");
292 | start = clock();
293 |
294 | for (int i = 0; i < seeks; i++) { //
295 | int seeking = values[i];
296 | TWORD * results = tnyDB_wtree_seek(tree, seeking);
297 |
298 | // Now in the results, a bit at "i" should be set
299 | if (tnyDB_tword_bit_is_set(results, i) == 0) {
300 | end = clock();
301 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
302 | fprintf(stderr, "\n WTREE Seek: missmatch at: %i, BIT NOT SET\n", i);
303 | exit(-1);
304 | }
305 |
306 | }
307 |
308 | end = clock();
309 | elapsed = ((double) (end - start) * 1000) / CLOCKS_PER_SEC;
310 | printf(" Passed test in %fms\n\n", elapsed);
311 | }
312 |
313 |
314 | int tnyDB_seek_test(int keys, int length, int seeks) {
315 | srand(time(NULL));
316 |
317 | int *values = malloc(length * sizeof(int));
318 |
319 | printf("Generating data...");
320 | for (int i = 0; i < length; i++) {
321 | values[i] = rand() % keys;
322 | }
323 | printf("Done\n\n");
324 |
325 | tnyDB_tarray_seek_test(values, length, seeks);
326 | tnyDB_tmap_seek_test(values, length, seeks);
327 | tnyDB_wtree_seek_test(values, length, seeks);
328 |
329 | free(values);
330 |
331 | return 1;
332 | }
333 |
334 | int main(int argc, char *argv[]) {
335 |
336 | if (argc < 4 || argc > 4){
337 | printf("Please supply 3 arguments:\n");
338 | printf("\t1.\t The number of values to generate\n");
339 | printf("\t2.\t The number of distinct values\n");
340 | printf("\t3.\t The number of times the structures should be seeked\n");
341 | printf("You supplied %i arguments\n", argc);
342 | return EXIT_SUCCESS;
343 | }
344 | int values= atoi(argv[1]);
345 | int keys = atoi(argv[2]);
346 | int seeks = atoi(argv[3]);
347 |
348 | tnyDB_access_test(keys, values);
349 | tnyDB_seek_test(keys, values, seeks);
350 |
351 | // int values[10] = {1, 4, 7, 4, 5, 6, 7, 9, 1, 2};
352 | // tnyDB_tmap_seek_test(values, 10);
353 |
354 | return EXIT_SUCCESS;
355 | }
356 |
--------------------------------------------------------------------------------
/src/tnyDB_list.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include
22 | #include
23 | #include
24 | #include "tnyDB_list.h"
25 |
26 | int tnyDB_list_check_resize(tnyDB_list *list, int newLength) {
27 |
28 | if (newLength >= list->allocated_length) {
29 | int oldLength = list->allocated_length;
30 | int increaseBy = newLength * 0.2;
31 | if (increaseBy < 3) {
32 | increaseBy = 3;
33 | }
34 | int oldAllocated = list->allocated_length;
35 | list->allocated_length = newLength + increaseBy;
36 |
37 | if (oldLength != list->allocated_length) {
38 | // We need to create a new array I reckon...
39 | int * new = tnyDB_malloc_data(list->allocated_length * sizeof(int), "tnyDB_list_check_resize.new");
40 | for (int i = 0; i < oldAllocated; i++) {
41 | new[i] = list->values[i];
42 | }
43 |
44 | if (list->values != NULL) {
45 | tnyDB_free_data(list->values, oldAllocated * sizeof(int), "tnyDB_list_check_resize");
46 | }
47 | list->values = new;
48 |
49 | // printf("tnyDB_list resized to: %i\n", list->allocated_length);
50 | }
51 | }
52 |
53 | return 0;
54 | }
55 |
56 | void tnyDB_list_push(tnyDB_list *list, int value) {
57 | tnyDB_list_check_resize(list, list->length + 1);
58 |
59 | if (list->length >= list->allocated_length) {
60 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n",
61 | list->length, list->allocated_length);
62 | exit(-1);
63 | }
64 |
65 | list->values[list->length] = value;
66 | list->length++;
67 | }
68 |
69 | void tnyDB_list_insert(tnyDB_list *list, int index, int value) {
70 |
71 | int biggest = list->length + 1 > index + 1 ? list->length + 1 : index + 1;
72 |
73 | tnyDB_list_check_resize(list, biggest);
74 |
75 | if (index >= list->allocated_length) {
76 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n",
77 | list->length, list->allocated_length);
78 | exit(-1);
79 | }
80 |
81 | for (int i = list->length - 1; i >= index; i--) {
82 | list->values[i + 1] = list->values[i];
83 | }
84 |
85 | list->values[index] = value;
86 |
87 | list->length++;
88 |
89 | }
90 |
91 | void tnyDB_list_set(tnyDB_list *list, int index, int value) {
92 |
93 | int biggest = list->length + 1 > index + 1 ? list->length + 1 : index + 1;
94 |
95 | tnyDB_list_check_resize(list, biggest);
96 |
97 | if (index >= list->allocated_length) {
98 | printf("Buffer overrun in tnyDB_list_push: list->length >= list->allocated_length (%i >= %i)\nExiting!\n",
99 | list->length, list->allocated_length);
100 | exit(-1);
101 | }
102 |
103 | list->values[index] = value;
104 |
105 | if (index < list->length) {
106 | list->length++;
107 | } else {
108 | list->length = index + 1;
109 | }
110 |
111 | }
112 |
113 | int tnyDB_list_get(tnyDB_list *list, int index) {
114 | if (index > list->length) {
115 | fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
116 | return (-1);
117 | }
118 |
119 | return list->values[index];
120 | }
121 |
122 | int tnyDB_list_binary_find(tnyDB_list *list, int seeking) {
123 | int l = 0;
124 | int r = list->length;
125 | int m = 0;
126 |
127 | if (r == 0) {
128 | return ~0;
129 | }
130 |
131 | while (seeking != list->values[m] && l <= r) {
132 | m = (l + r) / 2;
133 |
134 | if (m >= list->length)
135 | break;
136 |
137 | if (seeking < list->values[m])
138 | r = m - 1;
139 | if (seeking > list->values[m])
140 | l = m + 1;
141 | }
142 |
143 | if (l <= r && m < list->length) {
144 | //printf("tnyDB_list_binary_find: Found (Seeking: %i, l: %i, m: %i,r: %i, Length: %i)\n", seeking, l, m, r, list->length);
145 | return m;
146 | } else {
147 | //printf("tnyDB_list_binary_find: Not found (Seeking: %i, l: %i, m: %i,r: %i, Length: %i)\n", seeking, l, m, r, list->length);
148 | return ~l;
149 | }
150 |
151 | }
152 |
153 | int tnyDB_list_find(tnyDB_list *list, int seeking) {
154 | return tnyDB_list_binary_find(list, seeking);
155 | }
156 | tnyDB_list *tnyDB_list_create_allocated(int size) {
157 |
158 | tnyDB_list *list = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_create_allocated.list");
159 | list->values = tnyDB_malloc_data(sizeof(int), "tnyDB_list_create_allocated.list->values");
160 | list->allocated_length = size;
161 | list->length = 0;
162 |
163 | return list;
164 | }
165 | tnyDB_list *tnyDB_list_create() {
166 | tnyDB_list *list = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_create");
167 | list->allocated_length = 0;
168 | list->length = 0;
169 | list->values = NULL;
170 | return list;
171 | }
172 |
173 | /*
174 | void tnyDB_quick_sort(int *arr, int elements) {
175 |
176 | #define MAX_LEVELS 300
177 |
178 | int piv, beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R, swap ;
179 |
180 | beg[0]=0; end[0]=elements;
181 | while (i>=0) {
182 | L=beg[i]; R=end[i]-1;
183 | if (L=piv && Lend[i-1]-beg[i-1]) {
190 | swap=beg[i]; beg[i]=beg[i-1]; beg[i-1]=swap;
191 | swap=end[i]; end[i]=end[i-1]; end[i-1]=swap;
192 | }
193 | }
194 | else {
195 | i--;
196 | }
197 | }
198 | }
199 | */
200 |
201 | #define MIN_MERGESORT_LIST_SIZE 32
202 | void mergesort_array(int a[], int size, int temp[]) {
203 | int i1, i2, tempi;
204 | if (size < MIN_MERGESORT_LIST_SIZE) {
205 | /* Use insertion sort */
206 | int i;
207 | for (i=0; i < size; i++) {
208 | int j, v = a[i];
209 | for (j = i - 1; j >= 0; j--) {
210 | if (a[j] <= v) break;
211 | a[j + 1] = a[j];
212 | }
213 | a[j + 1] = v;
214 | }
215 | return;
216 | }
217 |
218 | mergesort_array(a, size/2, temp);
219 | mergesort_array(a + size/2, size - size/2, temp);
220 | i1 = 0;
221 | i2 = size/2;
222 | tempi = 0;
223 | while (i1 < size/2 && i2 < size) {
224 | if (a[i1] <= a[i2]) {
225 | temp[tempi] = a[i1];
226 | i1++;
227 | } else {
228 | temp[tempi] = a[i2];
229 | i2++;
230 | }
231 | tempi++;
232 | }
233 |
234 | while (i1 < size/2) {
235 | temp[tempi] = a[i1];
236 | i1++;
237 | tempi++;
238 | }
239 | while (i2 < size) {
240 | temp[tempi] = a[i2];
241 | i2++;
242 | tempi++;
243 | }
244 |
245 | memcpy(a, temp, size*sizeof(int));
246 | }
247 |
248 | void tnyDB_swap(int *a, int *b)
249 | {
250 | int t=*a; *a=*b; *b=t;
251 | }
252 | void tnyDB_quick_sort(int arr[], int beg, int end)
253 | {
254 | if (end > beg + 1)
255 | {
256 | int piv = arr[beg], l = beg + 1, r = end;
257 | while (l < r)
258 | {
259 | if (arr[l] <= piv)
260 | l++;
261 | else
262 | tnyDB_swap(&arr[l], &arr[--r]);
263 | }
264 | tnyDB_swap(&arr[--l], &arr[beg]);
265 | tnyDB_quick_sort(arr, beg, l);
266 | tnyDB_quick_sort(arr, r, end);
267 | }
268 | }
269 |
270 |
271 | tnyDB_list *tnyDB_list_sorted_distinct(int *list, int length){
272 | int *sorted = tnyDB_malloc_data(sizeof(int) * length, "tnyDB_list_sorted_distinct.sorted");
273 | int *tmpSorted = tnyDB_malloc_data(sizeof(int) * length, "tnyDB_list_sorted_distinct.tmpSorted");
274 | memcpy(sorted, list, length* sizeof(int));
275 |
276 |
277 | // Sort it
278 | // tnyDB_quick_sort(sorted, 0, length);
279 | mergesort_array(sorted, length, tmpSorted);
280 | tnyDB_free_data(tmpSorted, sizeof(int) * length, "tnyDB_list_sorted_distinct.tmpSorted");
281 |
282 | // Extract only the unique values
283 | int ci=1, last=sorted[0];
284 |
285 |
286 | for (int i =1; i < length; i++){
287 | if (sorted[i] != last){
288 | last = sorted[i];
289 | sorted[ci] = sorted[i];
290 | ci++;
291 | }
292 | }
293 |
294 | // Resize the array
295 | sorted = tnyDB_realloc_data(sorted, sizeof(int) * (ci), sizeof(int) * length, "tnyDB_list_sorted_distinct.shrink");
296 |
297 |
298 | tnyDB_list *result = tnyDB_malloc_data(sizeof(tnyDB_list), "tnyDB_list_sorted_distinct.list");
299 | result->values = sorted;
300 | result->allocated_length = ci+1;
301 | result->length = ci;
302 |
303 | return result;
304 | }
305 |
306 | void tnyDB_list_free(tnyDB_list * toFree) {
307 | tnyDB_free_data(toFree->values, toFree->allocated_length * sizeof(int), "tnyDB_list_free (values)");
308 | tnyDB_free_data(toFree, sizeof(tnyDB_list), "tnyDB_list_free");
309 | }
310 |
311 |
--------------------------------------------------------------------------------
/src/tnyDB_list.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #ifndef TNYDB_LIST_H_
22 | #define TNYDB_LIST_H_
23 |
24 | #include "tnyDB_mem.h"
25 |
26 | typedef struct {
27 | int length;
28 | int allocated_length;
29 | int *values;
30 | } tnyDB_list;
31 |
32 |
33 | // Appends a value to the end of the list
34 | void tnyDB_list_push(tnyDB_list *list, int value);
35 |
36 | // Inserts a value into the list at the specified index,
37 | // moving items that occur after the index to their index+i
38 | void tnyDB_list_insert(tnyDB_list *list, int index, int value);
39 |
40 | // Sets the value at index specified to the value specified
41 | void tnyDB_list_set(tnyDB_list *list, int index, int value);
42 |
43 | // Gets the value at the specified index
44 | int tnyDB_list_get(tnyDB_list *list, int index);
45 |
46 | // Located the first occurence of the specified value
47 | // by doing a binary search. If no item is found, the not
48 | // value (~) of where it would be found is returned.
49 | // This functions reauires that the list is already sorted
50 | int tnyDB_list_find(tnyDB_list *list, int value);
51 |
52 |
53 | // Creates a list and returns its reference
54 | tnyDB_list *tnyDB_list_create();
55 |
56 |
57 | // Creates a list that is pre-allocated to the size specified
58 | tnyDB_list *tnyDB_list_create_allocated(int size);
59 |
60 |
61 | // Creates a list of the disctinct values from *list, sorted ascending
62 | tnyDB_list *tnyDB_list_sorted_distinct(int *list, int length);
63 |
64 |
65 | // Frees memory used by this list
66 | void tnyDB_list_free(tnyDB_list * toFree) ;
67 |
68 |
69 | #endif /* TNYDB_LIST_H_ */
70 |
--------------------------------------------------------------------------------
/src/tnyDB_mem.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include "tnyDB_mem.h"
22 |
23 | tnyDB_mem_statistics tnyDB_mem_stats;
24 |
25 | void tnyDB_mem_init() {
26 |
27 | tnyDB_mem_stats.allocations = 0;
28 | tnyDB_mem_stats.frees = 0;
29 | tnyDB_mem_stats.bytes_total = 0;
30 | tnyDB_mem_stats.bytes_current = 0;
31 | }
32 |
33 | void *tnyDB_malloc(int amount, char* tag) {
34 | return malloc(amount);
35 | }
36 |
37 | void tnyDB_free(void *ptr, char* tag) {
38 | free(ptr);
39 |
40 | }
41 |
42 | tnyDB_mem_statistics tnyDB_mem_get_statistics() {
43 | return tnyDB_mem_stats;
44 | }
45 |
46 | void tnyDB_free_data(void *ptr, int bytes, char* tag) {
47 |
48 | tnyDB_mem_stats.frees++;
49 | tnyDB_mem_stats.bytes_current -= bytes;
50 |
51 | free(ptr);
52 | }
53 |
54 | void *tnyDB_realloc_data(void *ptr, int new_bytes, int old_bytes, char* tag) {
55 | int difference = new_bytes - old_bytes;
56 | tnyDB_mem_stats.frees++;
57 | tnyDB_mem_stats.allocations++;
58 | tnyDB_mem_stats.bytes_current += difference;
59 |
60 | void* newptr = realloc(ptr, new_bytes);
61 | if (newptr != NULL) {
62 | return newptr;
63 | } else {
64 | fprintf(stderr, "REALLOC Failed! {new_bytes: %i, old_bytes: %i}", new_bytes, old_bytes);
65 | exit(-1);
66 | }
67 | }
68 | void *tnyDB_calloc_data(int item_size, int item_count, char* tag) {
69 | tnyDB_mem_stats.allocations++;
70 | tnyDB_mem_stats.bytes_current += (item_size * item_count);
71 | tnyDB_mem_stats.bytes_total += (item_size * item_count);
72 |
73 |
74 |
75 | void *ptr = calloc(item_count, item_size);
76 |
77 | return ptr;
78 | }
79 |
80 | void *tnyDB_malloc_data(int bytes, char* tag) {
81 | tnyDB_mem_stats.allocations++;
82 | tnyDB_mem_stats.bytes_current += bytes;
83 | tnyDB_mem_stats.bytes_total += bytes;
84 |
85 | void * ptr = malloc(bytes);
86 |
87 | return ptr;
88 | }
89 |
--------------------------------------------------------------------------------
/src/tnyDB_mem.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #ifndef TNYDB_MEM_H_
22 | #define TNYDB_MEM_H_
23 |
24 |
25 | #include
26 | #include
27 |
28 |
29 | typedef struct {
30 | int allocations;
31 | int frees;
32 |
33 | int bytes_total;
34 | int bytes_current;
35 |
36 | } tnyDB_mem_statistics;
37 |
38 |
39 |
40 |
41 | void *tnyDB_malloc(int amount, char *tag);
42 | void tnyDB_free(void *ptr, char *tag);
43 |
44 |
45 | void tnyDB_free_data(void *ptr, int bytes, char* tag);
46 | void *tnyDB_malloc_data(int bytes, char* tag);
47 | void *tnyDB_realloc_data(void *ptr, int new_bytes, int old_bytes, char* tag);
48 | void *tnyDB_calloc_data(int item_size, int item_count, char* tag);
49 |
50 | void tnyDB_mem_init();
51 | tnyDB_mem_statistics tnyDB_mem_get_statistics();
52 |
53 | #endif /* SPRDB_MEM_H_ */
54 |
--------------------------------------------------------------------------------
/src/tnyDB_tarray.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include "tnyDB_tarray.h"
22 |
23 | tnyDB_tarray *tnyDB_tarray_create(int *data, int dataLength) {
24 | tnyDB_tarray * result = tnyDB_malloc_data(sizeof(tnyDB_tarray), "tnyDB_wtree_create.result");
25 | result->length = dataLength;
26 | result->data = tnyDB_calloc_data(sizeof(int), dataLength, "tnyDB_wtree_create.result->data");
27 |
28 | for (int i =0; i < dataLength; i++){
29 | result->data[i] = data[i];
30 | }
31 |
32 | return result;
33 | }
34 |
35 | TWORD * tnyDB_tarray_seek(tnyDB_tarray *tree, int value){
36 | TWORD *result = calloc(sizeof(TWORD), (tree->length/sizeof(TWORD))+1 );
37 |
38 | for (int i =0; i < tree->length; i++){
39 | if (tree->data[i] == value){
40 | tnyDB_tword_set(result, i);
41 | }
42 | }
43 |
44 | return result;
45 | }
46 |
47 | int tnyDB_tarray_access(tnyDB_tarray *tree, int rowIndex){
48 | return tree->data[rowIndex];
49 | }
50 |
--------------------------------------------------------------------------------
/src/tnyDB_tarray.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #ifndef TNYDB_TARRAY_H_
22 | #define TNYDB_TARRAY_H_
23 |
24 | #include "tnyDB_tword.h"
25 |
26 | typedef struct {
27 | int* data;
28 | int length;
29 | } tnyDB_tarray;
30 |
31 | TWORD * tnyDB_tarray_seek(tnyDB_tarray *tree, int value);
32 | int tnyDB_tarray_access(tnyDB_tarray *wtree, int rowIndex);
33 | tnyDB_tarray *tnyDB_tarray_create(int *data, int dataLength);
34 |
35 | #endif /* TNYDB_TARRAY_H_ */
36 |
--------------------------------------------------------------------------------
/src/tnyDB_tmap.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include "tnyDB_tmap.h"
22 |
23 | tnyDB_list * _tmap_get_keys(int *data, int dataLength) {
24 | tnyDB_list *keys = tnyDB_list_create();
25 | for (int i = 0; i < dataLength; i++) {
26 | int m = tnyDB_list_find(keys, data[i]);
27 | if (m < 0) {
28 | tnyDB_list_insert(keys, ~m, data[i]);
29 | }
30 | }
31 | return keys;
32 | }
33 |
34 | static inline int _tmap_log2(unsigned int v) {
35 | const unsigned int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
36 | const unsigned int S[] = { 1, 2, 4, 8, 16 };
37 | int i;
38 | unsigned int r = 0; // result of log2(v) will go here
39 | for (i = 4; i >= 0; i--) // unroll for speed...
40 | {
41 | if (v & b[i]) {
42 | v >>= S[i];
43 | r |= S[i];
44 | }
45 | }
46 | return r;
47 | }
48 |
49 | static inline int _tmap_row_word_length(tnyDB_tmap *map) {
50 | return (map->length / 64) + 1;
51 |
52 | }
53 |
54 |
55 | tnyDB_tmap *tnyDB_tmap_create(int *data, int dataLength) {
56 | tnyDB_tmap *map = tnyDB_malloc_data(sizeof(tnyDB_tmap), "tnyDB_tmap_create.tmap");
57 |
58 | map->length = dataLength;
59 | // map->keys = _tmap_get_keys(data, dataLength);
60 | map->keys = tnyDB_list_sorted_distinct(data, dataLength);
61 |
62 |
63 | // tnyDB_list * list1 = _tmap_get_keys(data, dataLength);
64 | // tnyDB_list * list2 = tnyDB_list_sorted_distinct(data, dataLength);
65 |
66 | // for (int i =0; i < list1->length || i < list2->length; i++){
67 | // printf("%i %i\n", list1->values[i], list2->values[i]);
68 | // }
69 | // exit(0);
70 |
71 |
72 | map->depth = _tmap_log2(map->keys->length) + 1;
73 | map->data = tnyDB_calloc_data(sizeof(TWORD*), map->depth, "tnyDB_tmap_create.tmap->data[]");
74 |
75 | int wordLength = _tmap_row_word_length(map);
76 | for (int d =0; d < map->depth; d++){
77 | map->data[d] = tnyDB_calloc_data(sizeof(TWORD), wordLength, "tnyDB_tmap_create.tmap->data[i]");
78 | }
79 |
80 | // Build a new list "translated" which will contain all the
81 | // distinct values within the data array. We then use the
82 | // bit vector stored in Data to handle references to this array
83 |
84 | int *translated = malloc(sizeof(int) * dataLength);
85 | for (int i = 0; i < dataLength; i++) {
86 | translated[i] = tnyDB_list_find(map->keys, data[i]);
87 | }
88 |
89 |
90 | for (int d = 0; d < map->depth; d++) {
91 | for (int w = 0; w < wordLength; w++){
92 | TWORD tmp=0;
93 | for (int i = 0; i < 64; i++){
94 | if ((translated[(w*64) + i] & (1 << d)) != 0) {
95 | tmp |= 1ull << i;
96 | }
97 | }
98 | map->data[d][w] = tmp;
99 |
100 | }
101 |
102 | }
103 | free(translated);
104 |
105 | return map;
106 | }
107 |
108 | int tnyDB_tmap_access(tnyDB_tmap *map, int rowIndex) {
109 | int keyIndex = 0;
110 | for (int d = 0; d < map->depth; d++) {
111 | if (tnyDB_tword_bit_is_set(map->data[d], rowIndex)) {
112 | keyIndex |= 1 << d;
113 | }
114 | }
115 | return map->keys->values[keyIndex];
116 |
117 | }
118 |
119 | TWORD * tnyDB_tmap_seek(tnyDB_tmap *map, int value) {
120 | TWORD * result = tnyDB_calloc_data(sizeof(TWORD), _tmap_row_word_length(map), "tnyDB_tmap_seek.result");
121 | int keyIndex = tnyDB_list_find(map->keys, value);
122 |
123 | // Make a little cache...
124 | int keyBitMask[map->depth];
125 | for (int j = 0; j < map->depth; j++) {
126 | keyBitMask[j] = (keyIndex & (1 << j));
127 | }
128 |
129 | int rowWordLength = _tmap_row_word_length(map);
130 |
131 | // Do we have it in our key index?
132 | if (keyIndex >= 0) {
133 | // Go through the data one word at a time...
134 | for (int w = 0; w < rowWordLength; w++) {
135 |
136 | TWORD wordMatches = keyBitMask[0] == 0 ? ~map->data[0][w] : map->data[0][w];
137 | if (wordMatches != 0) {
138 | for (int d = 1; d < map->depth; d++) {
139 | if (keyBitMask[d] == 0) {
140 | wordMatches &= ~(map->data[d][w]);
141 | } else {
142 | wordMatches &= (map->data[d][w]);
143 | }
144 | if (wordMatches == 0)
145 | break;
146 | }
147 | result[w] = wordMatches;
148 | }
149 | }
150 |
151 | }
152 |
153 | return result;
154 | }
155 |
156 |
--------------------------------------------------------------------------------
/src/tnyDB_tmap.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #ifndef TNYDB_TMAP_H_
22 | #define TNYDB_TMAP_H_
23 |
24 | #include "tnyDB_tword.h"
25 | #include "tnyDB_list.h"
26 |
27 | typedef struct {
28 | TWORD** data;
29 | int length;
30 | int depth;
31 | tnyDB_list *keys;
32 | } tnyDB_tmap;
33 |
34 |
35 | TWORD * tnyDB_tmap_seek(tnyDB_tmap *tree, int value);
36 | int tnyDB_tmap_access(tnyDB_tmap *wtree, int rowIndex);
37 | tnyDB_tmap *tnyDB_tmap_create(int *data, int dataLength);
38 |
39 |
40 | #endif /* TNYDB_TMAP_H_ */
41 |
--------------------------------------------------------------------------------
/src/tnyDB_tword.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 |
22 | #include
23 | #include
24 | #include "tnyDB_tword.h"
25 | #include "tnyDB_mem.h"
26 |
27 | static const TWORD k1 = 0x5555555555555555; /* -1/3 */
28 | static const TWORD k2 = 0x3333333333333333; /* -1/5 */
29 | static const TWORD k4 = 0x0f0f0f0f0f0f0f0f; /* -1/17 */
30 | static const TWORD kf = 0x0101010101010101; /* -1/255 */
31 | static const int TWORD_SIZE_BITS = 64;
32 | static const TWORD ONE_64BIT = 1;
33 |
34 | void tnyDB_tword_print(TWORD *r, int length) {
35 | printf(" (%i) ", length);
36 |
37 | if (r == NULL) {
38 | printf("NULL");
39 | } else {
40 | for (int i = 0; i < length; i++) {
41 | if (i % 8 == 0 && i != 0) {
42 | printf(" ");
43 |
44 | }
45 | if (tnyDB_tword_bit_is_set(r, i)) {
46 | printf("1");
47 | } else {
48 | printf("0");
49 | }
50 | }
51 |
52 | }
53 | }
54 |
55 | void tnyDB_tword_print_offset(TWORD *r, int start, int length) {
56 |
57 |
58 | if (r == NULL) {
59 | printf("NULL");
60 | } else {
61 | for (int i = start; i < start+length; i++) {
62 | if (i % 8 == 0 && i != 0) {
63 | printf(" ");
64 |
65 | }
66 | if (tnyDB_tword_bit_is_set(r, i)) {
67 | printf("1");
68 | } else {
69 | printf("0");
70 | }
71 | }
72 |
73 |
74 | }
75 |
76 | printf(" (s:%i, l:%i)", start, length);
77 | //printf("\n");
78 |
79 | }
80 |
81 | int tnyDB_tword_bit_is_set(TWORD *r, int position) {
82 |
83 | int wordNumber = position / TWORD_SIZE_BITS;
84 | int bitPosition = position % TWORD_SIZE_BITS;
85 |
86 | if ((r[wordNumber] & (ONE_64BIT << bitPosition)) != 0) {
87 | return 1;
88 | } else {
89 | return 0;
90 | }
91 |
92 | }
93 |
94 | void tnyDB_tword_set(TWORD *r, int position) {
95 | int wordNumber = position / TWORD_SIZE_BITS;
96 | int bitPosition = position % TWORD_SIZE_BITS;
97 |
98 | r[wordNumber] = r[wordNumber] | (ONE_64BIT << bitPosition);
99 | }
100 |
101 | int tnyDB_tword_last_set_index(TWORD val) {
102 | int result = 0;
103 | if (val >= 0x100000000) {
104 | result += 32;
105 | val >>= 32;
106 | }
107 | if (val >= 0x10000) {
108 | result += 16;
109 | val >>= 16;
110 | }
111 | if (val >= 0x100) {
112 | result += 8;
113 | val >>= 8;
114 | }
115 | if (val >= 0x10) {
116 | result += 4;
117 | val >>= 4;
118 | }
119 | if (val >= 0x4) {
120 | result += 2;
121 | val >>= 2;
122 | }
123 | if (val >= 0x2) {
124 | result += 1;
125 | val >>= 1;
126 | }
127 | return result + (int) val;
128 |
129 | }
130 |
131 | TWORD *tnyDB_tword_copy_words(TWORD *old, int bit_length) {
132 |
133 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1;
134 | TWORD *new = tnyDB_malloc_data(sizeof(TWORD) * wordLength, "tnyDB_vector_copy->data");
135 |
136 | for (int i = 0; i < wordLength; i++) {
137 | new[i] = old[i];
138 | }
139 |
140 | return new;
141 | }
142 |
143 | void tnyDB_tword_and(TWORD *a, TWORD *b, int bit_length) {
144 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1;
145 | for (int i = 0; i < wordLength; i++) {
146 | a[i] &= b[i];
147 | }
148 | }
149 |
150 | void tnyDB_tword_and_not(TWORD *a, TWORD *b, int bit_length) {
151 |
152 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1;
153 | for (int i = 0; i < wordLength; i++) {
154 | a[i] &= ~(b[i]);
155 | }
156 |
157 | }
158 |
159 | void tnyDB_tword_nand(TWORD *a, TWORD *b, int bit_length) {
160 |
161 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1;
162 | for (int i = 0; i < wordLength; i++) {
163 | a[i] = (~(a[i])) & b[i];
164 | }
165 |
166 | }
167 |
168 | void tnyDB_tword_inverse(TWORD *a, int bit_length) {
169 | int wordLength = (bit_length / TWORD_SIZE_BITS) + 1;
170 | for (int i = 0; i < wordLength; i++) {
171 | a[i] = ~(a[i]);
172 | }
173 |
174 | }
175 |
176 |
177 | void tnyDB_tword_refine(TWORD *m, int mBitLen, TWORD *c, int cOffset, int cBitLen, int isLeft) {
178 |
179 |
180 |
181 |
182 | // Move through the compressed word to the correct word (word position)
183 | c += cOffset/64;
184 |
185 | // The bit position in the current Compressed WORD
186 | int cPosInWord = cOffset % 64;
187 |
188 | // The bit position in the current MASK word
189 | int mPosInWord = 0;
190 |
191 | // How many bits in M we have visited
192 | int mCount = 0;
193 |
194 | for (int ci = 0; ci < cBitLen; ci++) {
195 |
196 | TWORD isSet = ( (*c) & (1ul << cPosInWord));
197 | if (isLeft == 1){
198 | isSet = isSet == 0 ? 1 : 0;
199 | }
200 |
201 | // Go through the MASK until we find the next 1 bit
202 | while ((*m & (1ul << mPosInWord)) == 0) {
203 | mPosInWord++;
204 | mCount++;
205 | if (mPosInWord == 64) {
206 | mPosInWord = 0;
207 | m++;
208 | }
209 |
210 | if (mCount > mBitLen) {
211 | printf("Error in tnyDB_tword_refine: mCount > mBitLen (%i > %i)\n", mCount, mBitLen);
212 |
213 | printf("Mask:\t");
214 | tnyDB_tword_print_offset(m, 0, mBitLen);
215 | printf("\nModifier:\t");
216 | tnyDB_tword_print_offset(c, cOffset, cBitLen);
217 | printf("\n");
218 | exit(-1);
219 | }
220 | }
221 | if (isSet == 0) {
222 | // Reset the bit in M corresponding to mPosInWord...
223 | *m &= ~(1ul << mPosInWord);
224 | }
225 |
226 | mPosInWord++;
227 | cPosInWord++;
228 |
229 | mCount++;
230 |
231 | if (cPosInWord == 64) {
232 | cPosInWord = 0;
233 | c++;
234 | }
235 | if (mPosInWord == 64) {
236 | mPosInWord = 0;
237 | m++;
238 | }
239 | }
240 | }
241 |
242 | // Expand
243 | // c = "0101" using
244 | // m = "0100110100", to come up with
245 | // r = "0*001*0100" (where a * is a 1 reset to a 0)
246 | void tnyDB_tword_combine(TWORD *m, int mOffset, int mBitLen, TWORD *c, int cOffset, int cBitLen) {
247 |
248 | int mCount = 0;
249 | int cPosInWord = cOffset; // The position in the current Compressed WORD
250 | int mPosInWord = mOffset; // The position in the current MASK word
251 | TWORD mStart = *m;
252 | TWORD * cStart = c;
253 |
254 | printf("tnyDB_tword_combine\n---------------------\nM: ");
255 | tnyDB_tword_print_offset(m, mOffset, mBitLen);
256 |
257 | printf("\nC: ");
258 | tnyDB_tword_print_offset(c, cOffset, cBitLen);
259 | printf("\n");
260 |
261 | for (int ci = 0; ci < cBitLen; ci++) {
262 |
263 | TWORD isSet = (*c & (1ul << cPosInWord));
264 |
265 | // Go through the MASK until we find the next 1 bit
266 | while ((*m & (1ul << mPosInWord)) == 0) {
267 | mPosInWord++;
268 | mCount++;
269 | if (mPosInWord == 64) {
270 | mPosInWord = 0;
271 | m++;
272 | }
273 | if (mCount > mBitLen) {
274 | printf("Error in tnyDB_tword_combine: mCount > mBitLen (%i > %i)\n M:", mCount, mBitLen);
275 |
276 | tnyDB_tword_print(&mStart, mBitLen + mOffset);
277 | printf("\nC:");
278 | tnyDB_tword_print(cStart, cBitLen);
279 | printf("\n");
280 | exit(-1);
281 | }
282 | }
283 |
284 | if (isSet == 0) {
285 | // Reset it...
286 | *m &= ~(1ul << mPosInWord);
287 | }
288 |
289 | mPosInWord++;
290 | cPosInWord++;
291 |
292 | mCount++;
293 |
294 |
295 |
296 |
297 | if (ci == 64) {
298 | cPosInWord = 0;
299 | c++;
300 | }
301 | }
302 |
303 | }
304 |
305 | void tnyDB_tword_combine_test() {
306 |
307 | TWORD mask = 0;
308 |
309 | mask |= 1ul << 0;
310 | mask |= 1ul << 2;
311 | mask |= 1ul << 4;
312 | mask |= 1ul << 6;
313 | mask |= 1ul << 8;
314 | mask |= 1ul << 10;
315 |
316 | TWORD compressed = 0;
317 | compressed |= 1ul << 0;
318 | compressed |= 1ul << 1;
319 | compressed |= 1ul << 2;
320 |
321 | printf("M: ");
322 | tnyDB_tword_print(&mask, 10);
323 | printf("\nC: ");
324 | tnyDB_tword_print(&compressed, 10);
325 | printf("\nR: ");
326 | tnyDB_tword_combine(&mask, 0, 10, &compressed, 0, 7);
327 | tnyDB_tword_print(&mask, 10);
328 |
329 | printf("\n");
330 |
331 | }
332 |
333 | int tnyDB_tword_population(TWORD *mask, int start, int end) {
334 | int startWord = start / TWORD_SIZE_BITS;
335 | int startIndex = start % TWORD_SIZE_BITS;
336 |
337 | int endWord = end / TWORD_SIZE_BITS;
338 | int endIndex = end % TWORD_SIZE_BITS;
339 |
340 | int startEndIndex = startWord == endWord ? endIndex : TWORD_SIZE_BITS - 1;
341 |
342 | int pop = 0;
343 |
344 | TWORD first = *(mask + startWord);
345 | // Lets do this the slow and steady way....
346 | for (int i = startIndex; i <= startEndIndex; i++) {
347 | if ((first & (ONE_64BIT << i)) != 0) {
348 | pop++;
349 | }
350 | }
351 |
352 | // Now we can do it Word at a time...
353 | for (int j = startWord + 1; j < endWord; j++) {
354 | TWORD x = *(mask + j);
355 | x = x - ((x >> 1) & k1); /* put count of each 2 bits into those 2 bits */
356 | x = (x & k2) + ((x >> 2) & k2); /* put count of each 4 bits into those 4 bits */
357 | x = (x + (x >> 4)) & k4; /* put count of each 8 bits into those 8 bits */
358 | x = (x * kf) >> 56; /* returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ... */
359 |
360 | pop += (int) x;
361 | }
362 |
363 | if (startWord != endWord) {
364 | TWORD last = *(mask + endWord);
365 | // Lets do this the slow and steady way....
366 | for (int k = 0; k <= endIndex; k++) {
367 | if ((last & (ONE_64BIT << k)) != 0) {
368 | pop++;
369 | }
370 | }
371 | }
372 | return pop;
373 |
374 | }
375 |
--------------------------------------------------------------------------------
/src/tnyDB_tword.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #ifndef TNYDB_TWORD_H_
22 | #define TNYDB_TWORD_H_
23 |
24 | #include "tnyDB_mem.h"
25 |
26 |
27 |
28 | typedef unsigned long long TWORD;
29 |
30 |
31 | void tnyDB_tword_print(TWORD *r, int length);
32 | int tnyDB_tword_bit_is_set(TWORD *r, int position) ;
33 | void tnyDB_tword_set(TWORD *r, int position);
34 | int tnyDB_tword_last_set_index(TWORD val) ;
35 |
36 | TWORD *tnyDB_tword_copy_words(TWORD *old, int bit_length) ;
37 |
38 | void tnyDB_tword_nand(TWORD *a, TWORD *b, int bit_length) ;
39 | void tnyDB_tword_and(TWORD *a, TWORD *b, int bit_length);
40 | void tnyDB_tword_and_not(TWORD *a, TWORD *b, int bit_length);
41 | void tnyDB_tword_inverse(TWORD *a, int bit_length);
42 | void tnyDB_tword_combine(TWORD *m, int mOffset, int mBitLen, TWORD *c, int cOffset, int cBitLen) ;
43 | int tnyDB_tword_population(TWORD *mask, int start, int end);
44 |
45 |
46 | void tnyDB_tword_refine(TWORD *m, int mBitLen, TWORD *c, int cOffset, int cBitLen, int isLeft);
47 | void tnyDB_tword_print_offset(TWORD *r, int start, int length) ;
48 |
49 |
50 | void tnyDB_tword_combine_test();
51 |
52 | #endif /* TNYDB_TWORD_H_ */
53 |
--------------------------------------------------------------------------------
/src/tnyDB_wtree.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | #include "tnyDB_list.h"
27 | #include "tnyDB_wtree.h"
28 |
29 | tnyDB_list * _get_keys(int *data, int dataLength) {
30 | tnyDB_list *keys = tnyDB_list_create();
31 |
32 | for (int i = 0; i < dataLength; i++) {
33 | int m = tnyDB_list_find(keys, data[i]);
34 | if (m < 0) {
35 | tnyDB_list_insert(keys, ~m, data[i]);
36 | }
37 | }
38 | return keys;
39 | }
40 |
41 | static inline int _log2(unsigned int v) {
42 | const unsigned int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
43 | const unsigned int S[] = { 1, 2, 4, 8, 16 };
44 | int i;
45 | register unsigned int r = 0; // result of log2(v) will go here
46 | for (i = 4; i >= 0; i--) // unroll for speed...
47 | {
48 | if (v & b[i]) {
49 | v >>= S[i];
50 | r |= S[i];
51 | }
52 | }
53 | return r;
54 | }
55 |
56 | static inline int _row_word_length(tnyDB_wtree *tree) {
57 | return (tree->value_count / 64) + 1;
58 | }
59 |
60 |
61 | void _write_tree(tnyDB_wtree * tree, int *data, int dataLen, int rankIndex, int dataLeft, int keyLeft, int keyRight,
62 | int depth) {
63 |
64 | int keyMiddle = (keyLeft + keyRight) / 2;
65 |
66 | TWORD *row = tree->data + (depth * tree->row_length);
67 | // printf("Working at depth: %i (%i)\n", depth, depth * tree->row_length);
68 |
69 | int* leftValues = tnyDB_calloc_data(sizeof(int), dataLen, "_write_tree Left");
70 | int* rightValues = tnyDB_calloc_data(sizeof(int), dataLen, "_write_tree Right");
71 | int lCount = 0, rCount = 0;
72 |
73 | int i = 0;
74 | while (i < dataLen) {
75 |
76 | if (data[i] <= keyMiddle) {
77 | leftValues[lCount] = data[i];
78 | lCount++;
79 | } else {
80 | tnyDB_tword_set(row, dataLeft + i);
81 | rightValues[rCount] = data[i];
82 | rCount++;
83 | }
84 | i++;
85 | }
86 |
87 | tnyDB_list_set(tree->ranks, rankIndex - 1, lCount);
88 |
89 | // Go LEFT
90 | if (keyLeft < keyMiddle) {
91 | _write_tree(tree, leftValues, lCount, rankIndex * 2, dataLeft, keyLeft, keyMiddle, depth + 1);
92 | }
93 |
94 | tnyDB_free_data(leftValues, sizeof(int) * dataLen, "_write_tree Left");
95 |
96 | // Go RIGHT
97 | if (keyMiddle + 1 < keyRight) {
98 | _write_tree(tree, rightValues, rCount, (rankIndex * 2) + 1, dataLeft + lCount, keyMiddle + 1, keyRight,
99 | depth + 1);
100 |
101 | }
102 | tnyDB_free_data(rightValues, sizeof(int) * dataLen, "_write_tree Right");
103 | }
104 |
105 |
106 | // Given a bitmap vector, return the actual values at each set bits index
107 | tnyDB_list* tnyDB_wtree_scan(tnyDB_wtree *tree, TWORD *scan_list, int scan_list_length) {
108 | tnyDB_list *list = tnyDB_list_create();
109 |
110 | // Now we are going to grab these ints in reverse order,
111 | // as its faster to read the TWORDs that way
112 |
113 | int words = (scan_list_length / 64) + 1;
114 | // int finalWordBitLength = vector->bit_length%64;
115 | int idx;
116 | for (int i = words - 1; i >= 0; i--) {
117 |
118 | TWORD v = scan_list[i];
119 | while (v > 0) {
120 | TWORD t = v;
121 |
122 | v &= v - 1;
123 |
124 | idx = i * 64 + tnyDB_tword_last_set_index(t - v) - 1;
125 |
126 | int val = tnyDB_wtree_access(tree, idx);
127 | tnyDB_list_push(list, val);
128 | }
129 |
130 | }
131 |
132 | return list;
133 |
134 | }
135 |
136 | TWORD * tnyDB_wtree_seek(tnyDB_wtree *tree, int value) {
137 | int rowBitLength = tree->value_count;
138 | int rowWordLength = (rowBitLength / 64) + 1;
139 |
140 | int l = 0;
141 | int r = tree->keys->length - 1;
142 | int m = (l + r) / 2;
143 |
144 | int rankIndex = 1;
145 | int vectorStart = 0, vectorLength = rowBitLength;
146 | TWORD *results = tnyDB_tword_copy_words(tree->data, rowBitLength);
147 | TWORD *row = tree->data;
148 |
149 | // printf("Seek: %i:\t", value);
150 | for (int i = 0; i < tree->depth; i++) {
151 | int zeroCount = tree->ranks->values[rankIndex - 1];
152 |
153 | // printf("\n----------------------\n(l:%i, m:%i, r:%i)\n:\t", l, m, r);
154 | // tnyDB_tword_print_offset(results, 0, rowBitLength);
155 |
156 | if (value > tree->keys->values[m]) {
157 | // BIGGER / RIGHT
158 | if (i == 0) {
159 |
160 | } else {
161 | tnyDB_tword_refine(results, rowBitLength, row, vectorStart, vectorLength, 0);
162 | }
163 | // printf("\n>R:\t");
164 | // tnyDB_tword_print_offset(row, vectorStart, vectorLength);
165 | // printf("\n");
166 |
167 | vectorStart += zeroCount;
168 | vectorLength = vectorLength - zeroCount;
169 | rankIndex = (rankIndex * 2) + 1;
170 | l = m + 1;
171 |
172 | } else {
173 | // SMALLER / LEFT
174 | if (i == 0) {
175 | tnyDB_tword_inverse(results, rowBitLength);
176 | } else {
177 | tnyDB_tword_refine(results, rowBitLength, row, vectorStart, vectorLength, 1);
178 | }
179 | // printf("\n> L:\t");
180 | // tnyDB_tword_print_offset(row, vectorStart, vectorLength);
181 | // printf("\n");
182 |
183 | vectorLength = zeroCount;
184 | rankIndex = (rankIndex * 2);
185 | r = m;
186 | }
187 |
188 | if (l == r) {
189 | break;
190 |
191 | }
192 | m = (l + r) / 2;
193 | // Next row please
194 | row += rowWordLength;
195 | zeroCount = tree->ranks->values[rankIndex - 1];
196 |
197 | }
198 |
199 | // printf("\t");
200 | // tnyDB_tword_print_offset(results, 0, rowBitLength);
201 | // printf("\n");
202 | return results;
203 |
204 | }
205 |
206 | // Finds the value at the specified index
207 | int tnyDB_wtree_access(tnyDB_wtree *wtree, int rowIndex) {
208 | // printf("Access: %i ", rowIndex);
209 | int vectorStart = 0;
210 |
211 | int l = 0, r = wtree->keys->length - 1, m;
212 |
213 | int index = rowIndex;
214 | int rankIndex = 1;
215 |
216 | TWORD *ptr = wtree->data;
217 |
218 | for (int i = 0; i < wtree->depth; i++) {
219 | if (l == r) {
220 |
221 | break;
222 | }
223 | m = (l + r) / 2;
224 |
225 | // zeroCount tells us where the "fold" for the next row is (e.g. where 0's end and 1's begin)
226 | int zeroCount = wtree->ranks->values[rankIndex - 1];
227 | if (zeroCount < 0) {
228 | printf("Negative zeroCount... What the fuck?\n");
229 | }
230 | int pop = tnyDB_tword_population(ptr, vectorStart, vectorStart + index);
231 |
232 | if (!tnyDB_tword_bit_is_set(ptr, vectorStart + index)) {
233 | // printf("L");
234 | r = m;
235 |
236 | // Any "1" we see up to here can be removed from my index
237 | // as it will be placed after the "fold"
238 | index -= (pop);
239 | rankIndex = rankIndex * 2;
240 |
241 | } else {
242 | // printf("R");
243 | l = m + 1;
244 |
245 | // Move the start of the vector up to the
246 | vectorStart += zeroCount;
247 | // The index is after all the 0's (zeroCount) PLUS the number of 1's before my index
248 | index = pop - 1;
249 |
250 | rankIndex = (rankIndex * 2) + 1;
251 | }
252 |
253 | // Move the pointer to the next row down...
254 | ptr += wtree->row_length;
255 | }
256 |
257 | m = (l + r) / 2;
258 | // printf(" m=%i \n", m);
259 | return wtree->keys->values[m];
260 |
261 | }
262 |
263 | // Creates a new wtree from the integer array passed in
264 | tnyDB_wtree *tnyDB_wtree_create(int *data, int dataLength) {
265 |
266 | tnyDB_wtree *tree = tnyDB_malloc_data(sizeof(tnyDB_wtree), "tnyDB_tree_create.wtree");
267 |
268 | tree->value_count = dataLength;
269 | tree->keys = _get_keys(data, dataLength);
270 | tree->depth = _log2(tree->keys->length) + 1;
271 | tree->row_length = _row_word_length(tree);
272 |
273 | // Build a new list "translated" which will contain all the
274 | // distinct values within the data array. We then use the
275 | // bit vector stored in Data to handle references to this array
276 | int *translated = malloc(sizeof(int) * dataLength);
277 | for (int i = 0; i < dataLength; i++) {
278 | translated[i] = tnyDB_list_find(tree->keys, data[i]);
279 | }
280 |
281 | // Allocate enough words for each "row" times "depth"...
282 | tree->data = tnyDB_calloc_data(sizeof(TWORD), _row_word_length(tree) * tree->depth, "tnyDB_wtree_create.data");
283 |
284 | tree->ranks = tnyDB_list_create();
285 | _write_tree(tree, translated, dataLength, 1, 0, 0, tree->keys->length - 1, 0);
286 |
287 | free(translated);
288 |
289 | // for (int i = 0; i < tree->depth; i++) {
290 | // tnyDB_tword_print(tree->data + (i * tree->row_length), tree->value_count);
291 | // printf("\n");
292 | // }
293 |
294 | return tree;
295 | }
296 |
297 |
--------------------------------------------------------------------------------
/src/tnyDB_wtree.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2011 Terence Siganakis.
2 |
3 | This file is part of TnyDB.
4 |
5 | TnyDB is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | TnyDB is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with TnyDB. If not, see .
17 | */
18 |
19 | /* Contact: Terence Siganakis */
20 |
21 |
22 | #ifndef SPRDB_WTREE_H_
23 | #define SPRDB_WTREE_H_
24 |
25 | #include "tnyDB_list.h"
26 | #include "tnyDB_tword.h"
27 | #include "tnyDB_mem.h"
28 |
29 | typedef struct TWORD* wtree_row;
30 |
31 | typedef struct{
32 | int value_count;
33 | int depth;
34 | int row_length;
35 |
36 | tnyDB_list *keys;
37 | TWORD *data;
38 | tnyDB_list *ranks;
39 |
40 | int vector_length;
41 |
42 | } tnyDB_wtree;
43 |
44 |
45 |
46 | tnyDB_wtree *tnyDB_wtree_create(int *data, int dataLength);
47 | TWORD * tnyDB_wtree_seek(tnyDB_wtree *tree, int value);
48 | int tnyDB_wtree_access(tnyDB_wtree *wtree, int rowIndex);
49 | tnyDB_list* tnyDB_wtree_scan(tnyDB_wtree *tree, TWORD *scan_list, int scan_list_length);
50 |
51 | void tnyDB_wtree_free(tnyDB_wtree *wtree);
52 |
53 |
54 |
55 | #endif /* SPRDB_TREE_H_ */
56 |
--------------------------------------------------------------------------------