├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── EXAMPLES.md
├── EXAMPLES.objc.md
├── LICENSE
├── README.md
├── build-mac
    ├── kvdb.xcodeproj
    │   └── project.pbxproj
    └── kvdbtest
    │   └── main.c
├── objc
    ├── KVDatabase.h
    ├── KVDatabase.m
    ├── KVIndexer.h
    ├── KVIndexer.m
    ├── KVOrderedDatabase.h
    └── KVOrderedDatabase.m
└── src
    ├── CMakeLists.txt
    ├── ConvertUTF.c
    ├── ConvertUTF.h
    ├── ConvertUTFNamespace.h
    ├── kvassert.c
    ├── kvassert.h
    ├── kvblock.c
    ├── kvblock.h
    ├── kvbloom.h
    ├── kvdb.c
    ├── kvdb.h
    ├── kvdbo.cpp
    ├── kvdbo.h
    ├── kvendian.h
    ├── kvmurmurhash.h
    ├── kvpaddingutils.h
    ├── kvprime.c
    ├── kvprime.h
    ├── kvserialization.cpp
    ├── kvserialization.h
    ├── kvtable.c
    ├── kvtable.h
    ├── kvtypes.h
    ├── kvunicode.c
    ├── kvunicode.h
    ├── sfts.cpp
    └── sfts.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | 
 4 | # Libraries
 5 | *.lib
 6 | *.a
 7 | 
 8 | # Shared objects (inc. Windows DLLs)
 9 | *.dll
10 | *.so
11 | *.so.*
12 | *.dylib
13 | 
14 | # Executables
15 | *.exe
16 | *.out
17 | *.app
18 | 
19 | .DS_Store
20 | xcuserdata
21 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third-party/lz4"]
2 | 	path = third-party/lz4
3 | 	url = https://github.com/Cyan4973/lz4
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.6)
2 | project (kvdb) 
3 | 
4 | add_subdirectory (src)
5 | 


--------------------------------------------------------------------------------
/EXAMPLES.md:
--------------------------------------------------------------------------------
  1 | kvdb
  2 | ====
  3 | 
  4 | A Lightweight Key-Value Database.
  5 | 
  6 | - Use only one file
  7 | - Low memory usage
  8 | - Good performance
  9 | 
 10 | Example:
 11 | 
 12 | ```c
 13 | #include <stdio.h>
 14 | #include <kvdb/kvdb.h>
 15 | 
 16 | int main(int argc, char ** argv)
 17 | {
 18 |   struct kvdb * db;
 19 |   db = kvdb_new("kvdb-test.kvdb");
 20 |   kvdb_open(db);
 21 | 
 22 |   int r;
 23 | 
 24 |   char * key = "some key";
 25 |   char * value = "some value";
 26 |   r = kvdb_set(db, key, strlen(key), value, strlen(value));
 27 |   switch (r) {
 28 |     case 0:
 29 |       fprintf(stderr, "value stored\n");
 30 |       break;
 31 |       
 32 |     case -2:
 33 |       fprintf(stderr, "I/O error\n");
 34 |       break;
 35 |   }
 36 | 
 37 |   key = "some other key";
 38 |   char * read_value = NULL;
 39 |   size_t read_value_size = 0;
 40 |   r = kvdb_get(db, key, strlen(key), &read_value, &read_value_size);
 41 |   switch (r) {
 42 |     case 0:
 43 |       fprintf(stderr, "key: %.*s\n", (int) read_value, read_value_size);
 44 |       free(read_value);  
 45 |       break;
 46 |       
 47 |     case -1:
 48 |       fprintf(stderr, "not found\n");
 49 |       break;
 50 |       
 51 |     case -2:
 52 |       fprintf(stderr, "I/O error\n");
 53 |       break;
 54 |   }
 55 | 
 56 |   key = "yet another key";
 57 |   r = kvdb_delete(db, key, strlen(key));
 58 |   switch (r) {
 59 |     case 0:
 60 |       fprintf(stderr, "value removed\n");
 61 |       break;
 62 |       
 63 |     case -2:
 64 |       fprintf(stderr, "I/O error\n");
 65 |       break;
 66 |   }
 67 |   
 68 |   kvdb_close(db);
 69 |   kvdb_free(db);
 70 |   exit(EXIT_SUCCESS);
 71 | }
 72 | ```
 73 | 
 74 | kvdbo
 75 | =====
 76 | 
 77 | A Lightweight ordered Key-Value Database.
 78 | 
 79 | - Use only one file
 80 | - Low memory usage
 81 | - Good performance
 82 | - Iteratable
 83 | 
 84 | Example:
 85 | 
 86 | ```c
 87 | #include <stdio.h>
 88 | #include <kvdb/kvdbo.h>
 89 | 
 90 | int main(int argc, char ** argv)
 91 | {
 92 |   struct kvdbo * db;
 93 |   db = kvdbo_new("kvdb-test.kvdbo");
 94 |   kvdb_open(db);
 95 | 
 96 |   int r;
 97 | 
 98 |   char * key = "some key";
 99 |   char * value = "some value";
100 |   r = kvdbo_set(db, key, strlen(key), value, strlen(value));
101 |   switch (r) {
102 |     case 0:
103 |       fprintf(stderr, "value stored\n");
104 |       break;
105 |       
106 |     case -2:
107 |       fprintf(stderr, "I/O error\n");
108 |       break;
109 |   }
110 | 
111 |   key = "some other key";
112 |   char * read_value = NULL;
113 |   size_t read_value_size = 0;
114 |   r = kvdbo_get(db, key, strlen(key), &read_value, &read_value_size);
115 |   switch (r) {
116 |     case 0:
117 |       fprintf(stderr, "key: %.*s\n", (int) read_value, read_value_size);
118 |       free(read_value);  
119 |       break;
120 |       
121 |     case -1:
122 |       fprintf(stderr, "not found\n");
123 |       break;
124 |       
125 |     case -2:
126 |       fprintf(stderr, "I/O error\n");
127 |       break;
128 |   }
129 | 
130 |   key = "yet another key";
131 |   r = kvdbo_delete(db, key, strlen(key));
132 |   switch (r) {
133 |     case 0:
134 |       fprintf(stderr, "value removed\n");
135 |       break;
136 |       
137 |     case -2:
138 |       fprintf(stderr, "I/O error\n");
139 |       break;
140 |   }
141 |   
142 |   struct kvdbo_iterator * iterator = kvdbo_iterator_new(db);
143 |   kvdbo_iterator_seek_first(iterator);
144 |   while (kvdbo_iterator_is_valid(iterator)) {
145 |     const char * key;
146 |     size_t size;
147 |     kvdbo_iterator_get_key(iterator, &key, &size);
148 |     printf("key: %.*s\n", size, key);
149 |   }
150 |   kvdbo_iterator_free(db);
151 |   
152 |   kvdbo_close(db);
153 |   kvdbo_free(db);
154 |   exit(EXIT_SUCCESS);
155 | }
156 | ```
157 | 
158 | sfts
159 | ====
160 | 
161 | A Simple Full Text Search.
162 | 
163 | - Use only one file
164 | - Low memory usage
165 | - Good performance
166 | - Unicode support
167 | 
168 | Example:
169 | 
170 | ```c
171 | #include <stdio.h>
172 | #include <kvdb/sfts.h>
173 | 
174 | int main(int argc, char ** argv)
175 | {
176 |   sfts * indexer;
177 |   int r;
178 |   uint64_t * result;
179 |   size_t result_count;
180 | 
181 |   // Opens the index.
182 |   indexer = sfts_new();
183 |   sfts_open(indexer, "index.sfts");
184 | 
185 |   // Adds data to the index.
186 |   sfts_set(indexer, 0, "George Washington");
187 |   sfts_set(indexer, 1, "John Adams");
188 |   sfts_set(indexer, 2, "Thomas Jefferson");
189 |   sfts_set(indexer, 3, "George Michael");
190 |   sfts_set(indexer, 4, "George Méliès");
191 | 
192 |   // Search "geor".
193 |   print("searching geor");
194 |   sfts_search(indexer, "geor", sfts_search_kind_prefix, &result, &result_count);
195 |   for(size_t i = 0 ; i < result_count ; i ++) {
196 |     printf("found: %i\n", result[i]);
197 |   }
198 |   // returns 0, 3 and 4.
199 |   free(result);
200 | 
201 |   // Search "mel".
202 |   print("searching mel");
203 |   sfts_search(indexer, "mel", sfts_search_kind_prefix, &result, &result_count);
204 |   for(size_t i = 0 ; i < result_count ; i ++) {
205 |     printf("found: %i\n", result[i]);
206 |   }
207 |   // return 4
208 |   free(result);
209 | 
210 |   sfts_close(indexer);
211 |   sfts_free(indexer);
212 | }
213 | ```
214 | 


--------------------------------------------------------------------------------
/EXAMPLES.objc.md:
--------------------------------------------------------------------------------
  1 | KVDatabase
  2 | ==========
  3 | 
  4 | A Lightweight Key-Value Database.
  5 | 
  6 | - Use only one file
  7 | - Low memory usage
  8 | - Good performance
  9 | 
 10 | Example:
 11 | 
 12 | ```objc
 13 | #include <stdio.h>
 14 | #include <kvdb/KVDatabase.h>
 15 | 
 16 | int main(int argc, char ** argv)
 17 | {
 18 |   KVDatabase * db;
 19 |   db = [[KVDatabase alloc] initWithPath:@"kvdb-test.kvdb"];
 20 |   [db open];
 21 | 
 22 |   [db setData:[NSData dataWithBytes:"some value" length:10] forKey:@"some key"];
 23 |   NSData * data = [db dataForKey:@"some other key"];
 24 |   NSLog(@"value; %@", data);
 25 | 
 26 |   [db removeDataForKey:@"yet another key"];
 27 |   
 28 |   [db close];
 29 |   exit(EXIT_SUCCESS);
 30 | }
 31 | ```
 32 | 
 33 | KVOrderedDatabase
 34 | =================
 35 | 
 36 | A Lightweight ordered Key-Value Database.
 37 | 
 38 | - Use only one file
 39 | - Low memory usage
 40 | - Good performance
 41 | - Iteratable
 42 | 
 43 | Example:
 44 | 
 45 | ```objc
 46 | #include <stdio.h>
 47 | #include <kvdb/KVOrderedDatabase.h>
 48 | 
 49 | int main(int argc, char ** argv)
 50 | {
 51 |   KVOrderedDatabase * db;
 52 |   db = [[KVOrderedDatabase alloc] initWithPath:@"kvdb-test.kvdb"];
 53 |   [db open];
 54 | 
 55 |   [db setData:[NSData dataWithBytes:"some value" length:10] forKey:@"some key"];
 56 |   NSData * data = [db dataForKey:@"some other key"];
 57 |   NSLog(@"value; %@", data);
 58 | 
 59 |   [db removeDataForKey:@"yet another key"];
 60 |   
 61 |   KVOrderedDatabaseIterator * iterator = [db keyIterator];
 62 |   [iterator seekToFirstKey];
 63 |   while ([iterator isValid]) {
 64 |     NSLog(@"key: %@", [iterator currentKey]);
 65 |     [iterator next];
 66 |   }
 67 |   
 68 |   [db close];
 69 |   exit(EXIT_SUCCESS);
 70 | }
 71 | ```
 72 | 
 73 | KVIndexer
 74 | =========
 75 | 
 76 | A Simple Full Text Search.
 77 | 
 78 | - Use only one file
 79 | - Low memory usage
 80 | - Good performance
 81 | - Unicode support
 82 | 
 83 | Example:
 84 | 
 85 | ```objc
 86 | #include <stdio.h>
 87 | #include <kvdb/KVIndexer.h>
 88 | 
 89 | int main(int argc, char ** argv)
 90 | {
 91 |   KVIndexer * indexer;
 92 | 
 93 |   // Opens the index.
 94 |   indexer = [[KVIndexer alloc] initWithPath:@"index.sfts"];
 95 | 
 96 |   // Adds data to the index.
 97 |   [indexer setString:@"George Washington" forDocID:0];
 98 |   [indexer setString:@"John Adams" forDocID:1];
 99 |   [indexer setString:@"Thomas Jefferson" forDocID:2];
100 |   [indexer setString:@"George Michael" forDocID:3];
101 |   [indexer setString:@"George Méliès" forDocID:4];
102 | 
103 |   // Search "geor".
104 |   NSLog(@"searching geor");
105 |   NSArray * result = [indexer search:@"geor" kind:KVIndexerSearchKindPrefix];
106 |   NSLog(@"found: %@", result);
107 | 
108 |   // Search "mel".
109 |   NSLog(@"searching mel");
110 |   NSArray * result = [indexer search:@"mel" kind:KVIndexerSearchKindPrefix];
111 |   NSLog(@"found: %@", result);
112 | 
113 |   [indexer close];
114 | }
115 | ```
116 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | KVDB
 2 | 
 3 | Copyright (C) 2001 - 2013 - Hoà V. Dinh
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions
 8 | are met:
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 2. Redistributions in binary form must reproduce the above copyright
12 |    notice, this list of conditions and the following disclaimer in the
13 |    documentation and/or other materials provided with the distribution.
14 | 3. Neither the name of the KVDB project nor the names of its
15 |    contributors may be used to endorse or promote products derived
16 |    from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 | SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | kvdb
 2 | ====
 3 | 
 4 | This library implements:
 5 | - a Key Value Store,
 6 | - an ordered Key Value Store,
 7 | - a Full Text Search Index.
 8 | 
 9 | It targets embedded platforms where there are memory, disk and file descriptors constraints.
10 | The API are available in C and Objective-C.
11 | 
12 | kvdb
13 | ====
14 | 
15 | A Key-Value Database.
16 | 
17 | - Use only one file
18 | - Low memory usage
19 | - Good performance
20 | 
21 | kvdbo
22 | =====
23 | 
24 | An ordered Key-Value Database.
25 | 
26 | - Use only one file
27 | - Low memory usage
28 | - Good performance
29 | - Keys can be iterated in lexicographical order
30 | 
31 | sfts
32 | ====
33 | 
34 | A Simple Full Text Search.
35 | 
36 | - Use only one file
37 | - Low memory usage
38 | - Good performance
39 | - Unicode support
40 | 
41 | Examples
42 | ========
43 | 
44 | - [Examples for C](EXAMPLES.md)
45 | - [Examples for Objective-C](EXAMPLES.objc.md)
46 | 


--------------------------------------------------------------------------------
/build-mac/kvdb.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 46;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		BD1E7C841AAA47DD0030673D /* kvdbo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD1E7C821AAA47DD0030673D /* kvdbo.cpp */; };
 11 | 		BD520F301ABAB24E00681B8B /* kvdbo.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BD1E7C831AAA47DD0030673D /* kvdbo.h */; };
 12 | 		BD520F391ABB548D00681B8B /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = BD520F311ABB548D00681B8B /* ConvertUTF.c */; };
 13 | 		BD520F3A1ABB548D00681B8B /* kvserialization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD520F331ABB548D00681B8B /* kvserialization.cpp */; };
 14 | 		BD520F3B1ABB548D00681B8B /* kvunicode.c in Sources */ = {isa = PBXBuildFile; fileRef = BD520F351ABB548D00681B8B /* kvunicode.c */; };
 15 | 		BD520F3C1ABB548D00681B8B /* sfts.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD520F371ABB548D00681B8B /* sfts.cpp */; };
 16 | 		BDB104621ABE82B000FD6FF6 /* KVIndexer.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB104611ABE82B000FD6FF6 /* KVIndexer.m */; };
 17 | 		BDB104691ABE82CB00FD6FF6 /* KVDatabase.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB104681ABE82CB00FD6FF6 /* KVDatabase.m */; };
 18 | 		BDB1046C1ABE82D900FD6FF6 /* KVOrderedDatabase.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB1046B1ABE82D900FD6FF6 /* KVOrderedDatabase.m */; };
 19 | 		BDB104841AC4D55E00FD6FF6 /* lz4.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB104791AC4D55E00FD6FF6 /* lz4.c */; };
 20 | 		BDB104851AC4D55E00FD6FF6 /* lz4frame.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB1047B1AC4D55E00FD6FF6 /* lz4frame.c */; };
 21 | 		BDB104861AC4D55E00FD6FF6 /* lz4hc.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB1047E1AC4D55E00FD6FF6 /* lz4hc.c */; };
 22 | 		BDB104891AC4D55E00FD6FF6 /* xxhash.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB104821AC4D55E00FD6FF6 /* xxhash.c */; };
 23 | 		C618377C1763F6B8009E00E4 /* kvdb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C66823611763C472000C603C /* kvdb.h */; };
 24 | 		C618377F1763F6CC009E00E4 /* kvdb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C66823611763C472000C603C /* kvdb.h */; };
 25 | 		C668236A1763C472000C603C /* kvassert.c in Sources */ = {isa = PBXBuildFile; fileRef = C668235B1763C472000C603C /* kvassert.c */; };
 26 | 		C668236C1763C472000C603C /* kvblock.c in Sources */ = {isa = PBXBuildFile; fileRef = C668235D1763C472000C603C /* kvblock.c */; };
 27 | 		C668236F1763C472000C603C /* kvdb.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823601763C472000C603C /* kvdb.c */; };
 28 | 		C66823741763C472000C603C /* kvprime.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823651763C472000C603C /* kvprime.c */; };
 29 | 		C66823761763C472000C603C /* kvtable.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823671763C472000C603C /* kvtable.c */; };
 30 | 		C66823821763C48F000C603C /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823811763C48F000C603C /* main.c */; };
 31 | 		C66823881763C4D6000C603C /* libkvdb.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C66823531763C246000C603C /* libkvdb.a */; };
 32 | 		C668239B1763EA77000C603C /* kvassert.c in Sources */ = {isa = PBXBuildFile; fileRef = C668235B1763C472000C603C /* kvassert.c */; };
 33 | 		C668239D1763EA77000C603C /* kvblock.c in Sources */ = {isa = PBXBuildFile; fileRef = C668235D1763C472000C603C /* kvblock.c */; };
 34 | 		C66823A01763EA77000C603C /* kvdb.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823601763C472000C603C /* kvdb.c */; };
 35 | 		C66823A51763EA77000C603C /* kvprime.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823651763C472000C603C /* kvprime.c */; };
 36 | 		C66823A71763EA77000C603C /* kvtable.c in Sources */ = {isa = PBXBuildFile; fileRef = C66823671763C472000C603C /* kvtable.c */; };
 37 | 		C698FAF01AC66D2F00501892 /* KVIndexer.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB104611ABE82B000FD6FF6 /* KVIndexer.m */; };
 38 | 		C698FAF11AC66D3300501892 /* KVDatabase.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB104681ABE82CB00FD6FF6 /* KVDatabase.m */; };
 39 | 		C698FAF21AC66D3600501892 /* KVOrderedDatabase.m in Sources */ = {isa = PBXBuildFile; fileRef = BDB1046B1ABE82D900FD6FF6 /* KVOrderedDatabase.m */; };
 40 | 		C698FAF31AC66D4200501892 /* lz4.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB104791AC4D55E00FD6FF6 /* lz4.c */; };
 41 | 		C698FAF41AC66D4500501892 /* lz4frame.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB1047B1AC4D55E00FD6FF6 /* lz4frame.c */; };
 42 | 		C698FAF51AC66D4D00501892 /* lz4hc.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB1047E1AC4D55E00FD6FF6 /* lz4hc.c */; };
 43 | 		C698FAF61AC66D5100501892 /* xxhash.c in Sources */ = {isa = PBXBuildFile; fileRef = BDB104821AC4D55E00FD6FF6 /* xxhash.c */; };
 44 | 		C698FAF71AC66D5900501892 /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = BD520F311ABB548D00681B8B /* ConvertUTF.c */; };
 45 | 		C698FAF81AC66D6200501892 /* kvserialization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD520F331ABB548D00681B8B /* kvserialization.cpp */; };
 46 | 		C698FAF91AC66D6A00501892 /* kvunicode.c in Sources */ = {isa = PBXBuildFile; fileRef = BD520F351ABB548D00681B8B /* kvunicode.c */; };
 47 | 		C698FAFA1AC66D6D00501892 /* sfts.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD520F371ABB548D00681B8B /* sfts.cpp */; };
 48 | 		C698FAFB1AC66D7200501892 /* kvdbo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BD1E7C821AAA47DD0030673D /* kvdbo.cpp */; };
 49 | 		C6B7F6671AC66DD700444CFB /* sfts.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BD520F381ABB548D00681B8B /* sfts.h */; };
 50 | 		C6B7F6681AC66DDF00444CFB /* KVIndexer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB104601ABE82B000FD6FF6 /* KVIndexer.h */; };
 51 | 		C6B7F66A1AC66DE900444CFB /* KVOrderedDatabase.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB1046A1ABE82D900FD6FF6 /* KVOrderedDatabase.h */; };
 52 | 		C6B7F66B1AC66DEF00444CFB /* KVDatabase.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB104671ABE82CB00FD6FF6 /* KVDatabase.h */; };
 53 | 		C6B7F66C1AC66E0700444CFB /* KVIndexer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB104601ABE82B000FD6FF6 /* KVIndexer.h */; };
 54 | 		C6B7F66D1AC66E0900444CFB /* KVDatabase.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB104671ABE82CB00FD6FF6 /* KVDatabase.h */; };
 55 | 		C6B7F66E1AC66E0C00444CFB /* KVOrderedDatabase.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BDB1046A1ABE82D900FD6FF6 /* KVOrderedDatabase.h */; };
 56 | 		C6B7F66F1AC66E1100444CFB /* sfts.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BD520F381ABB548D00681B8B /* sfts.h */; };
 57 | 		C6B7F6701AC66E1500444CFB /* kvdbo.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = BD1E7C831AAA47DD0030673D /* kvdbo.h */; };
 58 | /* End PBXBuildFile section */
 59 | 
 60 | /* Begin PBXCopyFilesBuildPhase section */
 61 | 		C618377B1763F6AE009E00E4 /* CopyFiles */ = {
 62 | 			isa = PBXCopyFilesBuildPhase;
 63 | 			buildActionMask = 2147483647;
 64 | 			dstPath = include/kvdb;
 65 | 			dstSubfolderSpec = 16;
 66 | 			files = (
 67 | 				C6B7F66C1AC66E0700444CFB /* KVIndexer.h in CopyFiles */,
 68 | 				C6B7F66D1AC66E0900444CFB /* KVDatabase.h in CopyFiles */,
 69 | 				C6B7F66E1AC66E0C00444CFB /* KVOrderedDatabase.h in CopyFiles */,
 70 | 				C618377C1763F6B8009E00E4 /* kvdb.h in CopyFiles */,
 71 | 				C6B7F66F1AC66E1100444CFB /* sfts.h in CopyFiles */,
 72 | 				C6B7F6701AC66E1500444CFB /* kvdbo.h in CopyFiles */,
 73 | 			);
 74 | 			runOnlyForDeploymentPostprocessing = 0;
 75 | 		};
 76 | 		C618377E1763F6C3009E00E4 /* CopyFiles */ = {
 77 | 			isa = PBXCopyFilesBuildPhase;
 78 | 			buildActionMask = 2147483647;
 79 | 			dstPath = include/kvdb;
 80 | 			dstSubfolderSpec = 16;
 81 | 			files = (
 82 | 				C618377F1763F6CC009E00E4 /* kvdb.h in CopyFiles */,
 83 | 				BD520F301ABAB24E00681B8B /* kvdbo.h in CopyFiles */,
 84 | 				C6B7F6671AC66DD700444CFB /* sfts.h in CopyFiles */,
 85 | 				C6B7F6681AC66DDF00444CFB /* KVIndexer.h in CopyFiles */,
 86 | 				C6B7F66A1AC66DE900444CFB /* KVOrderedDatabase.h in CopyFiles */,
 87 | 				C6B7F66B1AC66DEF00444CFB /* KVDatabase.h in CopyFiles */,
 88 | 			);
 89 | 			runOnlyForDeploymentPostprocessing = 0;
 90 | 		};
 91 | 		C668237D1763C48F000C603C /* CopyFiles */ = {
 92 | 			isa = PBXCopyFilesBuildPhase;
 93 | 			buildActionMask = 2147483647;
 94 | 			dstPath = /usr/share/man/man1/;
 95 | 			dstSubfolderSpec = 0;
 96 | 			files = (
 97 | 			);
 98 | 			runOnlyForDeploymentPostprocessing = 1;
 99 | 		};
100 | /* End PBXCopyFilesBuildPhase section */
101 | 
102 | /* Begin PBXFileReference section */
103 | 		BD1E7C821AAA47DD0030673D /* kvdbo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kvdbo.cpp; sourceTree = "<group>"; };
104 | 		BD1E7C831AAA47DD0030673D /* kvdbo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvdbo.h; sourceTree = "<group>"; };
105 | 		BD520F311ABB548D00681B8B /* ConvertUTF.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ConvertUTF.c; sourceTree = "<group>"; };
106 | 		BD520F321ABB548D00681B8B /* ConvertUTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConvertUTF.h; sourceTree = "<group>"; };
107 | 		BD520F331ABB548D00681B8B /* kvserialization.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kvserialization.cpp; sourceTree = "<group>"; };
108 | 		BD520F341ABB548D00681B8B /* kvserialization.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvserialization.h; sourceTree = "<group>"; };
109 | 		BD520F351ABB548D00681B8B /* kvunicode.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvunicode.c; sourceTree = "<group>"; };
110 | 		BD520F361ABB548D00681B8B /* kvunicode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvunicode.h; sourceTree = "<group>"; };
111 | 		BD520F371ABB548D00681B8B /* sfts.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sfts.cpp; sourceTree = "<group>"; };
112 | 		BD520F381ABB548D00681B8B /* sfts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sfts.h; sourceTree = "<group>"; };
113 | 		BD520F3D1ABBE79C00681B8B /* ConvertUTFNamespace.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ConvertUTFNamespace.h; sourceTree = "<group>"; };
114 | 		BDB104601ABE82B000FD6FF6 /* KVIndexer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KVIndexer.h; sourceTree = "<group>"; };
115 | 		BDB104611ABE82B000FD6FF6 /* KVIndexer.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KVIndexer.m; sourceTree = "<group>"; };
116 | 		BDB104671ABE82CB00FD6FF6 /* KVDatabase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KVDatabase.h; sourceTree = "<group>"; };
117 | 		BDB104681ABE82CB00FD6FF6 /* KVDatabase.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KVDatabase.m; sourceTree = "<group>"; };
118 | 		BDB1046A1ABE82D900FD6FF6 /* KVOrderedDatabase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KVOrderedDatabase.h; sourceTree = "<group>"; };
119 | 		BDB1046B1ABE82D900FD6FF6 /* KVOrderedDatabase.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = KVOrderedDatabase.m; sourceTree = "<group>"; };
120 | 		BDB104791AC4D55E00FD6FF6 /* lz4.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lz4.c; sourceTree = "<group>"; };
121 | 		BDB1047A1AC4D55E00FD6FF6 /* lz4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lz4.h; sourceTree = "<group>"; };
122 | 		BDB1047B1AC4D55E00FD6FF6 /* lz4frame.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lz4frame.c; sourceTree = "<group>"; };
123 | 		BDB1047C1AC4D55E00FD6FF6 /* lz4frame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lz4frame.h; sourceTree = "<group>"; };
124 | 		BDB1047D1AC4D55E00FD6FF6 /* lz4frame_static.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lz4frame_static.h; sourceTree = "<group>"; };
125 | 		BDB1047E1AC4D55E00FD6FF6 /* lz4hc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lz4hc.c; sourceTree = "<group>"; };
126 | 		BDB1047F1AC4D55E00FD6FF6 /* lz4hc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lz4hc.h; sourceTree = "<group>"; };
127 | 		BDB104821AC4D55E00FD6FF6 /* xxhash.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = xxhash.c; sourceTree = "<group>"; };
128 | 		BDB104831AC4D55E00FD6FF6 /* xxhash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = xxhash.h; sourceTree = "<group>"; };
129 | 		C66823531763C246000C603C /* libkvdb.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkvdb.a; sourceTree = BUILT_PRODUCTS_DIR; };
130 | 		C668235B1763C472000C603C /* kvassert.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvassert.c; sourceTree = "<group>"; };
131 | 		C668235C1763C472000C603C /* kvassert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvassert.h; sourceTree = "<group>"; };
132 | 		C668235D1763C472000C603C /* kvblock.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvblock.c; sourceTree = "<group>"; };
133 | 		C668235E1763C472000C603C /* kvblock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvblock.h; sourceTree = "<group>"; };
134 | 		C668235F1763C472000C603C /* kvbloom.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvbloom.h; sourceTree = "<group>"; };
135 | 		C66823601763C472000C603C /* kvdb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvdb.c; sourceTree = "<group>"; };
136 | 		C66823611763C472000C603C /* kvdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvdb.h; sourceTree = "<group>"; };
137 | 		C66823621763C472000C603C /* kvendian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvendian.h; sourceTree = "<group>"; };
138 | 		C66823631763C472000C603C /* kvmurmurhash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvmurmurhash.h; sourceTree = "<group>"; };
139 | 		C66823641763C472000C603C /* kvpaddingutils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvpaddingutils.h; sourceTree = "<group>"; };
140 | 		C66823651763C472000C603C /* kvprime.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvprime.c; sourceTree = "<group>"; };
141 | 		C66823661763C472000C603C /* kvprime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvprime.h; sourceTree = "<group>"; };
142 | 		C66823671763C472000C603C /* kvtable.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kvtable.c; sourceTree = "<group>"; };
143 | 		C66823681763C472000C603C /* kvtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvtable.h; sourceTree = "<group>"; };
144 | 		C66823691763C472000C603C /* kvtypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kvtypes.h; sourceTree = "<group>"; };
145 | 		C668237F1763C48F000C603C /* kvdbtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kvdbtest; sourceTree = BUILT_PRODUCTS_DIR; };
146 | 		C66823811763C48F000C603C /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
147 | 		C668238D1763EA47000C603C /* libkvdb-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkvdb-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; };
148 | 		C668238F1763EA47000C603C /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
149 | /* End PBXFileReference section */
150 | 
151 | /* Begin PBXFrameworksBuildPhase section */
152 | 		C66823501763C246000C603C /* Frameworks */ = {
153 | 			isa = PBXFrameworksBuildPhase;
154 | 			buildActionMask = 2147483647;
155 | 			files = (
156 | 			);
157 | 			runOnlyForDeploymentPostprocessing = 0;
158 | 		};
159 | 		C668237C1763C48F000C603C /* Frameworks */ = {
160 | 			isa = PBXFrameworksBuildPhase;
161 | 			buildActionMask = 2147483647;
162 | 			files = (
163 | 				C66823881763C4D6000C603C /* libkvdb.a in Frameworks */,
164 | 			);
165 | 			runOnlyForDeploymentPostprocessing = 0;
166 | 		};
167 | 		C668238A1763EA47000C603C /* Frameworks */ = {
168 | 			isa = PBXFrameworksBuildPhase;
169 | 			buildActionMask = 2147483647;
170 | 			files = (
171 | 			);
172 | 			runOnlyForDeploymentPostprocessing = 0;
173 | 		};
174 | /* End PBXFrameworksBuildPhase section */
175 | 
176 | /* Begin PBXGroup section */
177 | 		BDB1045D1ABE825800FD6FF6 /* objc */ = {
178 | 			isa = PBXGroup;
179 | 			children = (
180 | 				BDB104601ABE82B000FD6FF6 /* KVIndexer.h */,
181 | 				BDB104611ABE82B000FD6FF6 /* KVIndexer.m */,
182 | 				BDB104671ABE82CB00FD6FF6 /* KVDatabase.h */,
183 | 				BDB104681ABE82CB00FD6FF6 /* KVDatabase.m */,
184 | 				BDB1046A1ABE82D900FD6FF6 /* KVOrderedDatabase.h */,
185 | 				BDB1046B1ABE82D900FD6FF6 /* KVOrderedDatabase.m */,
186 | 			);
187 | 			name = objc;
188 | 			path = ../objc;
189 | 			sourceTree = "<group>";
190 | 		};
191 | 		BDB104761AC4D55E00FD6FF6 /* lz4 */ = {
192 | 			isa = PBXGroup;
193 | 			children = (
194 | 				BDB104791AC4D55E00FD6FF6 /* lz4.c */,
195 | 				BDB1047A1AC4D55E00FD6FF6 /* lz4.h */,
196 | 				BDB1047B1AC4D55E00FD6FF6 /* lz4frame.c */,
197 | 				BDB1047C1AC4D55E00FD6FF6 /* lz4frame.h */,
198 | 				BDB1047D1AC4D55E00FD6FF6 /* lz4frame_static.h */,
199 | 				BDB1047E1AC4D55E00FD6FF6 /* lz4hc.c */,
200 | 				BDB1047F1AC4D55E00FD6FF6 /* lz4hc.h */,
201 | 				BDB104821AC4D55E00FD6FF6 /* xxhash.c */,
202 | 				BDB104831AC4D55E00FD6FF6 /* xxhash.h */,
203 | 			);
204 | 			name = lz4;
205 | 			path = "../third-party/lz4/lib";
206 | 			sourceTree = "<group>";
207 | 		};
208 | 		C668234A1763C246000C603C = {
209 | 			isa = PBXGroup;
210 | 			children = (
211 | 				BDB104761AC4D55E00FD6FF6 /* lz4 */,
212 | 				BDB1045D1ABE825800FD6FF6 /* objc */,
213 | 				C668235A1763C472000C603C /* src */,
214 | 				C66823801763C48F000C603C /* kvdbtest */,
215 | 				C668238E1763EA47000C603C /* Frameworks */,
216 | 				C66823541763C246000C603C /* Products */,
217 | 			);
218 | 			sourceTree = "<group>";
219 | 		};
220 | 		C66823541763C246000C603C /* Products */ = {
221 | 			isa = PBXGroup;
222 | 			children = (
223 | 				C66823531763C246000C603C /* libkvdb.a */,
224 | 				C668237F1763C48F000C603C /* kvdbtest */,
225 | 				C668238D1763EA47000C603C /* libkvdb-ios.a */,
226 | 			);
227 | 			name = Products;
228 | 			sourceTree = "<group>";
229 | 		};
230 | 		C668235A1763C472000C603C /* src */ = {
231 | 			isa = PBXGroup;
232 | 			children = (
233 | 				BD520F311ABB548D00681B8B /* ConvertUTF.c */,
234 | 				BD520F321ABB548D00681B8B /* ConvertUTF.h */,
235 | 				BD520F3D1ABBE79C00681B8B /* ConvertUTFNamespace.h */,
236 | 				BD520F331ABB548D00681B8B /* kvserialization.cpp */,
237 | 				BD520F341ABB548D00681B8B /* kvserialization.h */,
238 | 				BD520F351ABB548D00681B8B /* kvunicode.c */,
239 | 				BD520F361ABB548D00681B8B /* kvunicode.h */,
240 | 				BD520F371ABB548D00681B8B /* sfts.cpp */,
241 | 				BD520F381ABB548D00681B8B /* sfts.h */,
242 | 				BD1E7C821AAA47DD0030673D /* kvdbo.cpp */,
243 | 				BD1E7C831AAA47DD0030673D /* kvdbo.h */,
244 | 				C668235B1763C472000C603C /* kvassert.c */,
245 | 				C668235C1763C472000C603C /* kvassert.h */,
246 | 				C668235D1763C472000C603C /* kvblock.c */,
247 | 				C668235E1763C472000C603C /* kvblock.h */,
248 | 				C668235F1763C472000C603C /* kvbloom.h */,
249 | 				C66823601763C472000C603C /* kvdb.c */,
250 | 				C66823611763C472000C603C /* kvdb.h */,
251 | 				C66823621763C472000C603C /* kvendian.h */,
252 | 				C66823631763C472000C603C /* kvmurmurhash.h */,
253 | 				C66823641763C472000C603C /* kvpaddingutils.h */,
254 | 				C66823651763C472000C603C /* kvprime.c */,
255 | 				C66823661763C472000C603C /* kvprime.h */,
256 | 				C66823671763C472000C603C /* kvtable.c */,
257 | 				C66823681763C472000C603C /* kvtable.h */,
258 | 				C66823691763C472000C603C /* kvtypes.h */,
259 | 			);
260 | 			name = src;
261 | 			path = ../src;
262 | 			sourceTree = "<group>";
263 | 		};
264 | 		C66823801763C48F000C603C /* kvdbtest */ = {
265 | 			isa = PBXGroup;
266 | 			children = (
267 | 				C66823811763C48F000C603C /* main.c */,
268 | 			);
269 | 			path = kvdbtest;
270 | 			sourceTree = "<group>";
271 | 		};
272 | 		C668238E1763EA47000C603C /* Frameworks */ = {
273 | 			isa = PBXGroup;
274 | 			children = (
275 | 				C668238F1763EA47000C603C /* Foundation.framework */,
276 | 			);
277 | 			name = Frameworks;
278 | 			sourceTree = "<group>";
279 | 		};
280 | /* End PBXGroup section */
281 | 
282 | /* Begin PBXNativeTarget section */
283 | 		C66823521763C246000C603C /* kvdb */ = {
284 | 			isa = PBXNativeTarget;
285 | 			buildConfigurationList = C66823571763C246000C603C /* Build configuration list for PBXNativeTarget "kvdb" */;
286 | 			buildPhases = (
287 | 				C618377E1763F6C3009E00E4 /* CopyFiles */,
288 | 				C668234F1763C246000C603C /* Sources */,
289 | 				C66823501763C246000C603C /* Frameworks */,
290 | 			);
291 | 			buildRules = (
292 | 			);
293 | 			dependencies = (
294 | 			);
295 | 			name = kvdb;
296 | 			productName = kvdb;
297 | 			productReference = C66823531763C246000C603C /* libkvdb.a */;
298 | 			productType = "com.apple.product-type.library.static";
299 | 		};
300 | 		C668237E1763C48F000C603C /* kvdbtest */ = {
301 | 			isa = PBXNativeTarget;
302 | 			buildConfigurationList = C66823851763C48F000C603C /* Build configuration list for PBXNativeTarget "kvdbtest" */;
303 | 			buildPhases = (
304 | 				C668237B1763C48F000C603C /* Sources */,
305 | 				C668237C1763C48F000C603C /* Frameworks */,
306 | 				C668237D1763C48F000C603C /* CopyFiles */,
307 | 			);
308 | 			buildRules = (
309 | 			);
310 | 			dependencies = (
311 | 			);
312 | 			name = kvdbtest;
313 | 			productName = kvdbtest;
314 | 			productReference = C668237F1763C48F000C603C /* kvdbtest */;
315 | 			productType = "com.apple.product-type.tool";
316 | 		};
317 | 		C668238C1763EA47000C603C /* kvdb-ios */ = {
318 | 			isa = PBXNativeTarget;
319 | 			buildConfigurationList = C66823981763EA47000C603C /* Build configuration list for PBXNativeTarget "kvdb-ios" */;
320 | 			buildPhases = (
321 | 				C618377B1763F6AE009E00E4 /* CopyFiles */,
322 | 				C66823891763EA47000C603C /* Sources */,
323 | 				C668238A1763EA47000C603C /* Frameworks */,
324 | 			);
325 | 			buildRules = (
326 | 			);
327 | 			dependencies = (
328 | 			);
329 | 			name = "kvdb-ios";
330 | 			productName = "kvdb-ios";
331 | 			productReference = C668238D1763EA47000C603C /* libkvdb-ios.a */;
332 | 			productType = "com.apple.product-type.library.static";
333 | 		};
334 | /* End PBXNativeTarget section */
335 | 
336 | /* Begin PBXProject section */
337 | 		C668234B1763C246000C603C /* Project object */ = {
338 | 			isa = PBXProject;
339 | 			attributes = {
340 | 				LastUpgradeCheck = 0460;
341 | 				ORGANIZATIONNAME = etpan;
342 | 			};
343 | 			buildConfigurationList = C668234E1763C246000C603C /* Build configuration list for PBXProject "kvdb" */;
344 | 			compatibilityVersion = "Xcode 3.2";
345 | 			developmentRegion = English;
346 | 			hasScannedForEncodings = 0;
347 | 			knownRegions = (
348 | 				en,
349 | 			);
350 | 			mainGroup = C668234A1763C246000C603C;
351 | 			productRefGroup = C66823541763C246000C603C /* Products */;
352 | 			projectDirPath = "";
353 | 			projectRoot = "";
354 | 			targets = (
355 | 				C66823521763C246000C603C /* kvdb */,
356 | 				C668237E1763C48F000C603C /* kvdbtest */,
357 | 				C668238C1763EA47000C603C /* kvdb-ios */,
358 | 			);
359 | 		};
360 | /* End PBXProject section */
361 | 
362 | /* Begin PBXSourcesBuildPhase section */
363 | 		C668234F1763C246000C603C /* Sources */ = {
364 | 			isa = PBXSourcesBuildPhase;
365 | 			buildActionMask = 2147483647;
366 | 			files = (
367 | 				BDB104841AC4D55E00FD6FF6 /* lz4.c in Sources */,
368 | 				BDB104691ABE82CB00FD6FF6 /* KVDatabase.m in Sources */,
369 | 				BD1E7C841AAA47DD0030673D /* kvdbo.cpp in Sources */,
370 | 				BD520F3B1ABB548D00681B8B /* kvunicode.c in Sources */,
371 | 				C668236A1763C472000C603C /* kvassert.c in Sources */,
372 | 				BD520F391ABB548D00681B8B /* ConvertUTF.c in Sources */,
373 | 				BDB104891AC4D55E00FD6FF6 /* xxhash.c in Sources */,
374 | 				BDB104851AC4D55E00FD6FF6 /* lz4frame.c in Sources */,
375 | 				BD520F3A1ABB548D00681B8B /* kvserialization.cpp in Sources */,
376 | 				BDB1046C1ABE82D900FD6FF6 /* KVOrderedDatabase.m in Sources */,
377 | 				BDB104621ABE82B000FD6FF6 /* KVIndexer.m in Sources */,
378 | 				C668236C1763C472000C603C /* kvblock.c in Sources */,
379 | 				C668236F1763C472000C603C /* kvdb.c in Sources */,
380 | 				C66823741763C472000C603C /* kvprime.c in Sources */,
381 | 				BDB104861AC4D55E00FD6FF6 /* lz4hc.c in Sources */,
382 | 				BD520F3C1ABB548D00681B8B /* sfts.cpp in Sources */,
383 | 				C66823761763C472000C603C /* kvtable.c in Sources */,
384 | 			);
385 | 			runOnlyForDeploymentPostprocessing = 0;
386 | 		};
387 | 		C668237B1763C48F000C603C /* Sources */ = {
388 | 			isa = PBXSourcesBuildPhase;
389 | 			buildActionMask = 2147483647;
390 | 			files = (
391 | 				C66823821763C48F000C603C /* main.c in Sources */,
392 | 			);
393 | 			runOnlyForDeploymentPostprocessing = 0;
394 | 		};
395 | 		C66823891763EA47000C603C /* Sources */ = {
396 | 			isa = PBXSourcesBuildPhase;
397 | 			buildActionMask = 2147483647;
398 | 			files = (
399 | 				C698FAF71AC66D5900501892 /* ConvertUTF.c in Sources */,
400 | 				C698FAF81AC66D6200501892 /* kvserialization.cpp in Sources */,
401 | 				C698FAF91AC66D6A00501892 /* kvunicode.c in Sources */,
402 | 				C698FAFA1AC66D6D00501892 /* sfts.cpp in Sources */,
403 | 				C698FAFB1AC66D7200501892 /* kvdbo.cpp in Sources */,
404 | 				C668239B1763EA77000C603C /* kvassert.c in Sources */,
405 | 				C668239D1763EA77000C603C /* kvblock.c in Sources */,
406 | 				C66823A01763EA77000C603C /* kvdb.c in Sources */,
407 | 				C66823A51763EA77000C603C /* kvprime.c in Sources */,
408 | 				C66823A71763EA77000C603C /* kvtable.c in Sources */,
409 | 				C698FAF01AC66D2F00501892 /* KVIndexer.m in Sources */,
410 | 				C698FAF11AC66D3300501892 /* KVDatabase.m in Sources */,
411 | 				C698FAF21AC66D3600501892 /* KVOrderedDatabase.m in Sources */,
412 | 				C698FAF31AC66D4200501892 /* lz4.c in Sources */,
413 | 				C698FAF41AC66D4500501892 /* lz4frame.c in Sources */,
414 | 				C698FAF51AC66D4D00501892 /* lz4hc.c in Sources */,
415 | 				C698FAF61AC66D5100501892 /* xxhash.c in Sources */,
416 | 			);
417 | 			runOnlyForDeploymentPostprocessing = 0;
418 | 		};
419 | /* End PBXSourcesBuildPhase section */
420 | 
421 | /* Begin XCBuildConfiguration section */
422 | 		C66823551763C246000C603C /* Debug */ = {
423 | 			isa = XCBuildConfiguration;
424 | 			buildSettings = {
425 | 				ALWAYS_SEARCH_USER_PATHS = NO;
426 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
427 | 				CLANG_CXX_LIBRARY = "libc++";
428 | 				CLANG_ENABLE_OBJC_ARC = YES;
429 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
430 | 				CLANG_WARN_EMPTY_BODY = YES;
431 | 				CLANG_WARN_ENUM_CONVERSION = YES;
432 | 				CLANG_WARN_INT_CONVERSION = YES;
433 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
434 | 				COPY_PHASE_STRIP = NO;
435 | 				GCC_C_LANGUAGE_STANDARD = gnu99;
436 | 				GCC_DYNAMIC_NO_PIC = NO;
437 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
438 | 				GCC_OPTIMIZATION_LEVEL = 0;
439 | 				GCC_PREPROCESSOR_DEFINITIONS = (
440 | 					"DEBUG=1",
441 | 					"$(inherited)",
442 | 				);
443 | 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
444 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
445 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
446 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
447 | 				GCC_WARN_UNUSED_VARIABLE = YES;
448 | 				HEADER_SEARCH_PATHS = (
449 | 					"$(inherited)",
450 | 					/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include,
451 | 					"$(SRCROOT)/../third-party/lz4/lib",
452 | 				);
453 | 				ONLY_ACTIVE_ARCH = YES;
454 | 			};
455 | 			name = Debug;
456 | 		};
457 | 		C66823561763C246000C603C /* Release */ = {
458 | 			isa = XCBuildConfiguration;
459 | 			buildSettings = {
460 | 				ALWAYS_SEARCH_USER_PATHS = NO;
461 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
462 | 				CLANG_CXX_LIBRARY = "libc++";
463 | 				CLANG_ENABLE_OBJC_ARC = YES;
464 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
465 | 				CLANG_WARN_EMPTY_BODY = YES;
466 | 				CLANG_WARN_ENUM_CONVERSION = YES;
467 | 				CLANG_WARN_INT_CONVERSION = YES;
468 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
469 | 				COPY_PHASE_STRIP = YES;
470 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
471 | 				GCC_C_LANGUAGE_STANDARD = gnu99;
472 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
473 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
474 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
475 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
476 | 				GCC_WARN_UNUSED_VARIABLE = YES;
477 | 				HEADER_SEARCH_PATHS = (
478 | 					"$(inherited)",
479 | 					/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include,
480 | 					"$(SRCROOT)/../third-party/lz4/lib",
481 | 				);
482 | 			};
483 | 			name = Release;
484 | 		};
485 | 		C66823581763C246000C603C /* Debug */ = {
486 | 			isa = XCBuildConfiguration;
487 | 			buildSettings = {
488 | 				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
489 | 				EXECUTABLE_PREFIX = lib;
490 | 				MACOSX_DEPLOYMENT_TARGET = 10.8;
491 | 				PRODUCT_NAME = "$(TARGET_NAME)";
492 | 				SDKROOT = macosx;
493 | 			};
494 | 			name = Debug;
495 | 		};
496 | 		C66823591763C246000C603C /* Release */ = {
497 | 			isa = XCBuildConfiguration;
498 | 			buildSettings = {
499 | 				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
500 | 				EXECUTABLE_PREFIX = lib;
501 | 				MACOSX_DEPLOYMENT_TARGET = 10.8;
502 | 				PRODUCT_NAME = "$(TARGET_NAME)";
503 | 				SDKROOT = macosx;
504 | 			};
505 | 			name = Release;
506 | 		};
507 | 		C66823861763C48F000C603C /* Debug */ = {
508 | 			isa = XCBuildConfiguration;
509 | 			buildSettings = {
510 | 				PRODUCT_NAME = "$(TARGET_NAME)";
511 | 			};
512 | 			name = Debug;
513 | 		};
514 | 		C66823871763C48F000C603C /* Release */ = {
515 | 			isa = XCBuildConfiguration;
516 | 			buildSettings = {
517 | 				PRODUCT_NAME = "$(TARGET_NAME)";
518 | 			};
519 | 			name = Release;
520 | 		};
521 | 		C66823991763EA47000C603C /* Debug */ = {
522 | 			isa = XCBuildConfiguration;
523 | 			buildSettings = {
524 | 				DSTROOT = /tmp/kvdb_ios.dst;
525 | 				IPHONEOS_DEPLOYMENT_TARGET = 6.1;
526 | 				PRODUCT_NAME = "$(TARGET_NAME)";
527 | 				SDKROOT = iphoneos;
528 | 				SKIP_INSTALL = YES;
529 | 			};
530 | 			name = Debug;
531 | 		};
532 | 		C668239A1763EA47000C603C /* Release */ = {
533 | 			isa = XCBuildConfiguration;
534 | 			buildSettings = {
535 | 				DSTROOT = /tmp/kvdb_ios.dst;
536 | 				IPHONEOS_DEPLOYMENT_TARGET = 6.1;
537 | 				PRODUCT_NAME = "$(TARGET_NAME)";
538 | 				SDKROOT = iphoneos;
539 | 				SKIP_INSTALL = YES;
540 | 				VALIDATE_PRODUCT = YES;
541 | 			};
542 | 			name = Release;
543 | 		};
544 | /* End XCBuildConfiguration section */
545 | 
546 | /* Begin XCConfigurationList section */
547 | 		C668234E1763C246000C603C /* Build configuration list for PBXProject "kvdb" */ = {
548 | 			isa = XCConfigurationList;
549 | 			buildConfigurations = (
550 | 				C66823551763C246000C603C /* Debug */,
551 | 				C66823561763C246000C603C /* Release */,
552 | 			);
553 | 			defaultConfigurationIsVisible = 0;
554 | 			defaultConfigurationName = Release;
555 | 		};
556 | 		C66823571763C246000C603C /* Build configuration list for PBXNativeTarget "kvdb" */ = {
557 | 			isa = XCConfigurationList;
558 | 			buildConfigurations = (
559 | 				C66823581763C246000C603C /* Debug */,
560 | 				C66823591763C246000C603C /* Release */,
561 | 			);
562 | 			defaultConfigurationIsVisible = 0;
563 | 			defaultConfigurationName = Release;
564 | 		};
565 | 		C66823851763C48F000C603C /* Build configuration list for PBXNativeTarget "kvdbtest" */ = {
566 | 			isa = XCConfigurationList;
567 | 			buildConfigurations = (
568 | 				C66823861763C48F000C603C /* Debug */,
569 | 				C66823871763C48F000C603C /* Release */,
570 | 			);
571 | 			defaultConfigurationIsVisible = 0;
572 | 			defaultConfigurationName = Release;
573 | 		};
574 | 		C66823981763EA47000C603C /* Build configuration list for PBXNativeTarget "kvdb-ios" */ = {
575 | 			isa = XCConfigurationList;
576 | 			buildConfigurations = (
577 | 				C66823991763EA47000C603C /* Debug */,
578 | 				C668239A1763EA47000C603C /* Release */,
579 | 			);
580 | 			defaultConfigurationIsVisible = 0;
581 | 			defaultConfigurationName = Release;
582 | 		};
583 | /* End XCConfigurationList section */
584 | 	};
585 | 	rootObject = C668234B1763C246000C603C /* Project object */;
586 | }
587 | 


--------------------------------------------------------------------------------
/build-mac/kvdbtest/main.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  main.c
  3 | //  kvdbtest
  4 | //
  5 | //  Created by DINH Viêt Hoà on 6/8/13.
  6 | //  Copyright (c) 2013 etpan. All rights reserved.
  7 | //
  8 | 
  9 | #include <stdio.h>
 10 | #include "kvdb.h"
 11 | #include <uuid/uuid.h>
 12 | #include <stdlib.h>
 13 | #include <string.h>
 14 | 
 15 | static void enumerate_keys_callback(kvdb * db, struct kvdb_enumerate_cb_params * params, void * data, int * stop) {
 16 | 	printf("key = %.*s\n", (int) params->key_size, params->key);
 17 | }
 18 | 
 19 | int main(void)
 20 | {
 21 |     uuid_t key;
 22 |     uuid_string_t keyString;
 23 |     uuid_t value;
 24 |     uuid_string_t valueString;
 25 |     
 26 |     struct kvdb * db;
 27 |     db = kvdb_new("kvdb-test.kvdb");
 28 |     kvdb_open(db);
 29 |     
 30 |     int r;
 31 |     
 32 |     char * data;
 33 |     size_t data_size;
 34 |     r = kvdb_get(db, "hoa", 3, &data, &data_size);
 35 |     fprintf(stderr, "1: ");
 36 |     if (r == 0) {
 37 |         fprintf(stderr, "found\n");
 38 |         free(data);
 39 |     }
 40 |     else {
 41 |         fprintf(stderr, "not found\n");
 42 |     }
 43 |     
 44 |     kvdb_set(db, "hoa", 3, "test", 4);
 45 |     r = kvdb_get(db, "hoa", 3, &data, &data_size);
 46 |     fprintf(stderr, "2: ");
 47 |     if (r == 0) {
 48 |         fprintf(stderr, "found\n");
 49 |         free(data);
 50 |     }
 51 |     else {
 52 |         fprintf(stderr, "not found\n");
 53 |     }
 54 | 	
 55 | 	r = kvdb_enumerate_keys(db, enumerate_keys_callback, NULL);
 56 |     
 57 |     kvdb_delete(db, "hoa", 3);
 58 |     r = kvdb_get(db, "hoa", 3, &data, &data_size);
 59 |     fprintf(stderr, "3: ");
 60 |     if (r == 0) {
 61 |         fprintf(stderr, "found\n");
 62 |         free(data);
 63 |     }
 64 |     else {
 65 |         fprintf(stderr, "not found\n");
 66 |     }
 67 |     
 68 |     kvdb_set(db, "hoa", 3, "test", 4);
 69 |     r = kvdb_get(db, "hoa", 3, &data, &data_size);
 70 |     fprintf(stderr, "4: ");
 71 |     if (r == 0) {
 72 |         fprintf(stderr, "found\n");
 73 |         free(data);
 74 |     }
 75 |     else {
 76 |         fprintf(stderr, "not found\n");
 77 |     }
 78 |     kvdb_delete(db, "hoa", 3);
 79 |     
 80 | #define COUNT 1000
 81 |     char * keys[COUNT];
 82 |     for(unsigned int i = 0 ; i < COUNT ; i ++) {
 83 |         //fprintf(stderr, "add %i\n", i);
 84 |         uuid_generate(key);
 85 |         uuid_unparse_lower(key, keyString);
 86 |         uuid_generate(value);
 87 |         uuid_unparse_lower(value, valueString);
 88 |         char * dupKey = malloc(37);
 89 |         memcpy(dupKey, keyString, 36);
 90 |         dupKey[36] = 0;
 91 |         keys[i] = dupKey;
 92 |         kvdb_set(db, keyString, 36, valueString, 36);
 93 |     }
 94 |     
 95 |     kvdb_close(db);
 96 |     kvdb_free(db);
 97 |     
 98 |     db = kvdb_new("kvdb-test.kvdb");
 99 |     kvdb_open(db);
100 |     
101 |     for(unsigned int i = 0 ; i < COUNT / 2 ; i ++) {
102 |         char * key = keys[i];
103 |         kvdb_delete(db, key, 36);
104 |     }
105 |     
106 |     for(unsigned int i = 0 ; i < COUNT / 2 ; i ++) {
107 |         char * value;
108 |         size_t value_size;
109 |         char * key = keys[i];
110 |         int r = kvdb_get(db, key, 36, &value, &value_size);
111 |         if (r == 0) {
112 |             fprintf(stderr, "still exists %s\n", key);
113 |             free(value);
114 |         }
115 |     }
116 | 
117 |     for(unsigned int i = COUNT / 2 ; i < COUNT ; i ++) {
118 |         char * value;
119 |         size_t value_size;
120 |         char * key = keys[i];
121 |         int r = kvdb_get(db, key, 36, &value, &value_size);
122 |         if (r < 0) {
123 |             fprintf(stderr, "could not get key %s %i\n", key, i);
124 |         }
125 |         else {
126 |             free(value);
127 |         }
128 |     }
129 |     
130 |     kvdb_close(db);
131 |     kvdb_free(db);
132 | }
133 | 


--------------------------------------------------------------------------------
/objc/KVDatabase.h:
--------------------------------------------------------------------------------
 1 | #import <Foundation/Foundation.h>
 2 | 
 3 | @interface KVDatabase : NSObject
 4 | 
 5 | @property (nonatomic, copy, readonly) NSString *path;
 6 | 
 7 | // Create a key value store.
 8 | - (id) initWithPath:(NSString *)path;
 9 | 
10 | // Opens the database.
11 | - (BOOL) open;
12 | 
13 | // Closes the database.
14 | - (void) close;
15 | 
16 | // Returns the data associated with the key.
17 | - (NSData *) dataForKey:(NSString *)key;
18 | 
19 | // Sets the data to associate with a key.
20 | - (BOOL) setData:(NSData *)data forKey:(NSString *)key;
21 | 
22 | // Remove the given key.
23 | - (void) removeDataForKey:(NSString *)key;
24 | 
25 | // Enumerate all keys of the database.
26 | // Be careful, this method will iterate over all the on-disk database, then
27 | // will perform slowly.
28 | - (void)enumerateKeysAndValuesUsingBlock:(void(^)(NSString *key, BOOL *stop))block;
29 | 
30 | @end
31 | 


--------------------------------------------------------------------------------
/objc/KVDatabase.m:
--------------------------------------------------------------------------------
  1 | #import "KVDatabase.h"
  2 | 
  3 | #include "kvdb.h"
  4 | 
  5 | enum {
  6 |     KVDBIOErrorCode = -2,
  7 |     KVDBNotFoundErrorCode = -1,
  8 | };
  9 | 
 10 | @interface KVDatabase ()
 11 | 
 12 | @property (nonatomic, copy) void(^enumerationBlock)(NSString *, BOOL *);
 13 | 
 14 | @end
 15 | 
 16 | @implementation KVDatabase {
 17 |     kvdb * _db;
 18 | }
 19 | 
 20 | - (id) initWithPath:(NSString *)path
 21 | {
 22 |     self = [super init];
 23 |     _path = [path copy];
 24 |     _db = kvdb_new([path fileSystemRepresentation]);
 25 |     return self;
 26 | }
 27 | 
 28 | - (BOOL) open
 29 | {
 30 |     int r = kvdb_open(_db);
 31 |     if (r < 0) {
 32 |         return NO;
 33 |     }
 34 |     return YES;
 35 | }
 36 | 
 37 | - (void) close
 38 | {
 39 |     kvdb_close(_db);
 40 | }
 41 | 
 42 | - (NSData *) dataForKey:(NSString *)key
 43 | {
 44 |     const char * cKey = [key UTF8String];
 45 |     char * value = NULL;
 46 |     size_t value_size;
 47 |     int code = kvdb_get(_db, cKey, strlen(cKey), &value, &value_size);
 48 |     if (code == KVDBIOErrorCode) {
 49 |         NSLog(@"[%@]: I/O error reading key \"%@\"", self, key);
 50 |         return nil;
 51 |     }
 52 |     else if (code < 0) {
 53 |         return nil;
 54 |     }
 55 |     else {
 56 |         return [NSData dataWithBytesNoCopy:value length:value_size freeWhenDone:YES];
 57 |     }
 58 | }
 59 | 
 60 | - (BOOL) setData:(NSData *)data forKey:(NSString *)key
 61 | {
 62 |     const char * cKey = [key UTF8String];
 63 |     int code = kvdb_set(_db, cKey, strlen(cKey), [data bytes], [data length]);
 64 |     if (code == KVDBIOErrorCode) {
 65 |         NSLog(@"[%@]: I/O error writing key \"%@\"", self, key);
 66 |         return NO;
 67 |     }
 68 |     else if (code < 0) {
 69 |         return NO;
 70 |     }
 71 |     else {
 72 |         return YES;
 73 |     }
 74 | }
 75 | 
 76 | - (void) removeDataForKey:(NSString *)key
 77 | {
 78 |     const char * cKey = [key UTF8String];
 79 |     int code = kvdb_delete(_db, cKey, strlen(cKey));
 80 |     if (code == KVDBIOErrorCode) {
 81 |         NSLog(@"[%@]: I/O error removing key \"%@\"", self, key);
 82 |     }
 83 | }
 84 | 
 85 | - (void)enumerateKeysAndValuesUsingBlock:(void(^)(NSString *key, BOOL * stop))block
 86 | {
 87 |     if (block == nil) {
 88 |         return;
 89 |     }
 90 |     self.enumerationBlock = block;
 91 |     kvdb_enumerate_keys(_db, enumeration_callback, (__bridge void *)self);
 92 | }
 93 | 
 94 | static void enumeration_callback(kvdb * db, struct kvdb_enumerate_cb_params * params,
 95 |                                  void * data, int * stop)
 96 | {
 97 |     KVDatabase * database = (__bridge id) data;
 98 |     NSString * key = [[NSString alloc] initWithBytes:params->key length:params->key_size encoding:NSUTF8StringEncoding];
 99 |     database.enumerationBlock(key, (BOOL *) stop);
100 | }
101 | 
102 | @end
103 | 


--------------------------------------------------------------------------------
/objc/KVIndexer.h:
--------------------------------------------------------------------------------
 1 | #import <Foundation/Foundation.h>
 2 | 
 3 | typedef enum {
 4 |     KVIndexerSearchKindPrefix, // Search documents that has strings that start with the given token.
 5 |     KVIndexerSearchKindSubstr, // Search documents that has strings that contain the given token.
 6 |     KVIndexerSearchKindSuffix, // Search documents that has strings that end the given token.
 7 | } KVIndexerSearchKind;
 8 | // KVIndexerSearchKindPrefix provides the best performance.
 9 | 
10 | @interface KVIndexer : NSObject
11 | 
12 | @property (nonatomic, copy, readonly) NSString *path;
13 | 
14 | // Create a full text indexer.
15 | - (id) initWithPath:(NSString *)path;
16 | 
17 | // Opens the indexer.
18 | - (BOOL) open;
19 | 
20 | // Closes the indexer.
21 | - (void) close;
22 | 
23 | // Write pending changes to disk.
24 | - (BOOL) flush;
25 | 
26 | // Add a document to the indexer. string is the content to index.
27 | // the string will be tokenized.
28 | // The document is designated by an identifier docID.
29 | - (BOOL) setString:(NSString *)string forDocID:(uint64_t)docID;
30 | 
31 | // Add a document to the indexer. strings is the result of a custom tokenizer.
32 | // It's the list of tokens to index.
33 | // The document is designated by an identifier docID.
34 | - (BOOL) setStrings:(NSArray * /* NSString */)strings forDocID:(uint64_t)docID;
35 | 
36 | // Remove a document from the indexer.
37 | - (void) removeDocID:(uint64_t)docID;
38 | 
39 | // Search a token. Returns a list of documents IDs.
40 | - (NSArray *) search:(NSString *)token kind:(KVIndexerSearchKind)kind;
41 | 
42 | @end
43 | 


--------------------------------------------------------------------------------
/objc/KVIndexer.m:
--------------------------------------------------------------------------------
  1 | #import "KVIndexer.h"
  2 | 
  3 | #include "sfts.h"
  4 | 
  5 | enum {
  6 |     KVDBIOErrorCode = -2,
  7 |     KVDBNotFoundErrorCode = -1,
  8 | };
  9 | 
 10 | @implementation KVIndexer {
 11 |     sfts * _db;
 12 | }
 13 | 
 14 | - (id) initWithPath:(NSString *)path
 15 | {
 16 |     self = [super init];
 17 |     _path = [path copy];
 18 |     _db = sfts_new([_path fileSystemRepresentation]);
 19 |     return self;
 20 | }
 21 | 
 22 | - (BOOL) open
 23 | {
 24 |     int r = sfts_open(_db);
 25 |     if (r < 0) {
 26 |         return NO;
 27 |     }
 28 |     return YES;
 29 | }
 30 | 
 31 | - (void) close
 32 | {
 33 |     sfts_close(_db);
 34 | }
 35 | 
 36 | - (BOOL) flush
 37 | {
 38 |     int r = sfts_flush(_db);
 39 |     if (r < 0) {
 40 |         return NO;
 41 |     }
 42 |     return YES;
 43 | }
 44 | 
 45 | - (BOOL) setString:(NSString *)string forDocID:(uint64_t)docID
 46 | {
 47 |     unichar * buffer = malloc(sizeof(* buffer) * ([string length] + 1));
 48 |     [string getCharacters:buffer range:NSMakeRange(0, [string length])];
 49 |     buffer[[string length]] = 0;
 50 |     int r = sfts_u_set(_db, docID, buffer);
 51 |     free(buffer);
 52 |     if (r == KVDBIOErrorCode) {
 53 |         NSLog(@"[%@]: I/O error indexing document \"%llu\"", self, (unsigned long long) docID);
 54 |         return NO;
 55 |     }
 56 |     else if (r < 0) {
 57 |         return NO;
 58 |     }
 59 |     else {
 60 |         return YES;
 61 |     }
 62 | }
 63 | 
 64 | - (BOOL) setStrings:(NSArray * /* NSString */)strings forDocID:(uint64_t)docID
 65 | {
 66 |     UChar ** table = malloc(sizeof(* table) * [strings count]);
 67 |     for(unsigned int i = 0 ; i < [strings count] ; i ++) {
 68 |         unichar * buffer = malloc(sizeof(* buffer) * ([strings[i] length] + 1));
 69 |         table[i] = buffer;
 70 |         [strings[i] getCharacters:buffer range:NSMakeRange(0, [strings[i] length])];
 71 |         buffer[[strings[i] length]] = 0;
 72 |     }
 73 |     int r = sfts_u_set2(_db, docID, (const UChar **) table, (int) [strings count]);
 74 |     for(unsigned int i = 0 ; i < [strings count] ; i ++) {
 75 |         free(table[i]);
 76 |     }
 77 |     free(table);
 78 |     if (r == KVDBIOErrorCode) {
 79 |         NSLog(@"[%@]: I/O error indexing document \"%llu\"", self, (unsigned long long) docID);
 80 |         return NO;
 81 |     }
 82 |     else if (r < 0) {
 83 |         return NO;
 84 |     }
 85 |     else {
 86 |         return YES;
 87 |     }
 88 | }
 89 | 
 90 | - (void) removeDocID:(uint64_t)docID
 91 | {
 92 |     int r = sfts_remove(_db, docID);
 93 |     if (r == KVDBIOErrorCode) {
 94 |         NSLog(@"[%@]: I/O error removing indexed document \"%llu\"", self, (unsigned long long) docID);
 95 |     }
 96 | }
 97 | 
 98 | - (NSArray *) search:(NSString *)token kind:(KVIndexerSearchKind)kind
 99 | {
100 |     uint64_t * docids = NULL;
101 |     size_t count = 0;
102 |     UChar * buffer = malloc(sizeof(* buffer) * ([token length] + 1));
103 |     [token getCharacters:buffer range:NSMakeRange(0, [token length])];
104 |     int r = sfts_u_search(_db, buffer, (sfts_search_kind) kind, &docids, &count);
105 |     free(buffer);
106 |     if (r == KVDBIOErrorCode) {
107 |         NSLog(@"[%@]: I/O error searching for token \"%@\"", self, token);
108 |         return nil;
109 |     }
110 |     else if (r < 0) {
111 |         return nil;
112 |     }
113 |     
114 |     NSMutableArray * result = [NSMutableArray array];
115 |     for(size_t i = 0 ; i < count ; i ++) {
116 |         [result addObject:[NSNumber numberWithUnsignedLongLong:docids[i]]];
117 |     }
118 |     free(docids);
119 |     
120 |     return result;
121 | }
122 | 
123 | 
124 | @end
125 | 


--------------------------------------------------------------------------------
/objc/KVOrderedDatabase.h:
--------------------------------------------------------------------------------
 1 | #import <Foundation/Foundation.h>
 2 | 
 3 | @class KVOrderedDatabaseIterator;
 4 | 
 5 | @interface KVOrderedDatabase : NSObject
 6 | 
 7 | @property (nonatomic, copy, readonly) NSString *path;
 8 | 
 9 | // Create a ordered key value store.
10 | - (id) initWithPath:(NSString *)path;
11 | 
12 | // Opens the database.
13 | - (BOOL) open;
14 | 
15 | // Closes the database.
16 | - (void) close;
17 | 
18 | // Write pending changes to disk.
19 | - (BOOL) flush;
20 | 
21 | // Returns the data associated with the key.
22 | - (NSData *) dataForKey:(NSString *)key;
23 | 
24 | // Sets the data to associate with a key.
25 | - (BOOL) setData:(NSData *)data forKey:(NSString *)key;
26 | 
27 | // Remove the given key.
28 | - (void) removeDataForKey:(NSString *)key;
29 | 
30 | // Returns an efficient ordered iterator.
31 | // The order is lexicographical.
32 | - (KVOrderedDatabaseIterator *) keyIterator;
33 | 
34 | @end
35 | 
36 | @interface KVOrderedDatabaseIterator : NSObject
37 | 
38 | // Seeks to the first key.
39 | - (void) seekToFirstKey;
40 | 
41 | // Seeks to the last key.
42 | - (void) seekToLastKey;
43 | 
44 | // Seeks to the key larger or equal to the given key.
45 | - (void) seekAfterKey:(NSString *)key;
46 | 
47 | // Iterate to the next key.
48 | - (void) next;
49 | 
50 | // Iterate to the previous key.
51 | - (void) previous;
52 | 
53 | // Returns the current key.
54 | - (NSString *) currentKey;
55 | 
56 | // Returns whether the iterator is at a valid location.
57 | - (BOOL) isValid;
58 | 
59 | @end
60 | 
61 | 


--------------------------------------------------------------------------------
/objc/KVOrderedDatabase.m:
--------------------------------------------------------------------------------
  1 | #import "KVOrderedDatabase.h"
  2 | 
  3 | #include "kvdbo.h"
  4 | 
  5 | enum {
  6 |     KVDBIOErrorCode = -2,
  7 |     KVDBNotFoundErrorCode = -1,
  8 | };
  9 | 
 10 | @interface KVOrderedDatabaseIterator ()
 11 | 
 12 | - (id) initWithDatabase:(KVOrderedDatabase *)database;
 13 | 
 14 | @end
 15 | 
 16 | @implementation KVOrderedDatabase {
 17 |     kvdbo * _db;
 18 | }
 19 | 
 20 | - (id) initWithPath:(NSString *)path
 21 | {
 22 |     self = [super init];
 23 |     _path = [path copy];
 24 |     _db = kvdbo_new([path fileSystemRepresentation]);
 25 |     return self;
 26 | }
 27 | 
 28 | - (void) dealloc
 29 | {
 30 |     kvdbo_free(_db);
 31 | }
 32 | 
 33 | - (BOOL) open
 34 | {
 35 |     int r = kvdbo_open(_db);
 36 |     if (r < 0) {
 37 |         return NO;
 38 |     }
 39 |     return YES;
 40 | }
 41 | 
 42 | - (void) close
 43 | {
 44 |     kvdbo_close(_db);
 45 | }
 46 | 
 47 | - (BOOL) flush
 48 | {
 49 |     int r = kvdbo_flush(_db);
 50 |     if (r < 0) {
 51 |         return NO;
 52 |     }
 53 |     return YES;
 54 | }
 55 | 
 56 | - (NSData *) dataForKey:(NSString *)key
 57 | {
 58 |     const char * cKey = [key UTF8String];
 59 |     char * value = NULL;
 60 |     size_t value_size;
 61 |     int code = kvdbo_get(_db, cKey, strlen(cKey), &value, &value_size);
 62 |     if (code == KVDBIOErrorCode) {
 63 |         NSLog(@"[%@]: I/O error reading key \"%@\"", self, key);
 64 |         return nil;
 65 |     }
 66 |     else if (code < 0) {
 67 |         return nil;
 68 |     }
 69 |     else {
 70 |         return [NSData dataWithBytesNoCopy:value length:value_size freeWhenDone:YES];
 71 |     }
 72 | }
 73 | 
 74 | - (BOOL) setData:(NSData *)data forKey:(NSString *)key
 75 | {
 76 |     const char * cKey = [key UTF8String];
 77 |     int code = kvdbo_set(_db, cKey, strlen(cKey), [data bytes], [data length]);
 78 |     if (code == KVDBIOErrorCode) {
 79 |         NSLog(@"[%@]: I/O error writing key \"%@\"", self, key);
 80 |         return NO;
 81 |     }
 82 |     else if (code < 0) {
 83 |         return NO;
 84 |     }
 85 |     else {
 86 |         return YES;
 87 |     }
 88 | }
 89 | 
 90 | - (void) removeDataForKey:(NSString *)key
 91 | {
 92 |     const char * cKey = [key UTF8String];
 93 |     int code = kvdbo_delete(_db, cKey, strlen(cKey));
 94 |     if (code == KVDBIOErrorCode) {
 95 |         NSLog(@"[%@]: I/O error removing key \"%@\"", self, key);
 96 |     }
 97 | }
 98 | 
 99 | - (kvdbo *) _db
100 | {
101 |     return _db;
102 | }
103 | 
104 | - (KVOrderedDatabaseIterator *) keyIterator
105 | {
106 |     return [[KVOrderedDatabaseIterator alloc] initWithDatabase:self];
107 | }
108 | 
109 | @end
110 | 
111 | 
112 | @implementation KVOrderedDatabaseIterator {
113 |     kvdbo_iterator * _iterator;
114 | }
115 | 
116 | - (id) initWithDatabase:(KVOrderedDatabase *)database
117 | {
118 |     self = [super init];
119 |     _iterator = kvdbo_iterator_new([database _db]);
120 |     return self;
121 | }
122 | 
123 | - (void) dealloc
124 | {
125 |     kvdbo_iterator_free(_iterator);
126 | }
127 | 
128 | - (void) seekToFirstKey
129 | {
130 |     kvdbo_iterator_seek_first(_iterator);
131 | }
132 | 
133 | - (void) seekToLastKey
134 | {
135 |     kvdbo_iterator_seek_last(_iterator);
136 | }
137 | 
138 | - (void) seekAfterKey:(NSString *)key
139 | {
140 |     const char * cKey = [key UTF8String];
141 |     kvdbo_iterator_seek_after(_iterator, cKey, strlen(cKey));
142 | }
143 | 
144 | - (void) next
145 | {
146 |     kvdbo_iterator_next(_iterator);
147 | }
148 | 
149 | - (void) previous
150 | {
151 |     kvdbo_iterator_previous(_iterator);
152 | }
153 | 
154 | - (NSString *) currentKey
155 | {
156 |     const char * key;
157 |     size_t size;
158 |     kvdbo_iterator_get_key(_iterator, &key, &size);
159 |     return [[NSString alloc] initWithBytes:key length:size encoding:NSUTF8StringEncoding];
160 | }
161 | 
162 | - (BOOL) isValid
163 | {
164 |     return kvdbo_iterator_is_valid(_iterator);
165 | }
166 | 
167 | @end
168 | 
169 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #set(CMAKE_INCLUDE_CURRENT_DIR ON)
 2 | set(CMAKE_C_FLAGS "-g -O2")
 3 | IF(APPLE)
 4 |   set(CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ -g -O2")
 5 | ELSE()
 6 |   set(CMAKE_CXX_FLAGS "-std=gnu++0x")
 7 | ENDIF()
 8 | 
 9 | file(GLOB_RECURSE
10 |     source_files
11 |     *.h
12 |     *.m
13 |     *.c
14 | )
15 | 
16 | SET_SOURCE_FILES_PROPERTIES(
17 | 	${source_files}
18 |     PROPERTIES LANGUAGE C
19 | )
20 | 
21 | file(COPY
22 | kvdb.h
23 | kvdbo.h
24 | sfts.h
25 | DESTINATION
26 | ${CMAKE_CURRENT_BINARY_DIR}/include/kvdb
27 | )
28 | 
29 | set(LZ4_DIR "../third-party/lz4/lib")
30 | 
31 | include_directories(${LZ4_DIR})
32 | 
33 | add_library (kvdb
34 |     kvassert.c
35 |     kvblock.c
36 |     kvdb.c
37 |     kvprime.c
38 |     kvtable.c
39 |     kvdbo.cpp
40 |     sfts.cpp
41 |     kvunicode.c
42 |     kvserialization.cpp
43 |     ConvertUTF.c
44 |     ${LZ4_DIR}/lz4.c
45 |     ${LZ4_DIR}/lz4hc.c
46 |     ${LZ4_DIR}/lz4frame.c
47 |     ${LZ4_DIR}/xxhash.c
48 | )
49 | 


--------------------------------------------------------------------------------
/src/ConvertUTF.c:
--------------------------------------------------------------------------------
  1 | /*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
  2 |  *
  3 |  *                     The LLVM Compiler Infrastructure
  4 |  *
  5 |  * This file is distributed under the University of Illinois Open Source
  6 |  * License. See LICENSE.TXT for details.
  7 |  *
  8 |  *===------------------------------------------------------------------------=*/
  9 | /*
 10 |  * Copyright 2001-2004 Unicode, Inc.
 11 |  * 
 12 |  * Disclaimer
 13 |  * 
 14 |  * This source code is provided as is by Unicode, Inc. No claims are
 15 |  * made as to fitness for any particular purpose. No warranties of any
 16 |  * kind are expressed or implied. The recipient agrees to determine
 17 |  * applicability of information provided. If this file has been
 18 |  * purchased on magnetic or optical media from Unicode, Inc., the
 19 |  * sole remedy for any claim will be exchange of defective media
 20 |  * within 90 days of receipt.
 21 |  * 
 22 |  * Limitations on Rights to Redistribute This Code
 23 |  * 
 24 |  * Unicode, Inc. hereby grants the right to freely use the information
 25 |  * supplied in this file in the creation of products supporting the
 26 |  * Unicode Standard, and to make copies of this file in any form
 27 |  * for internal or external distribution as long as this notice
 28 |  * remains attached.
 29 |  */
 30 | 
 31 | /* ---------------------------------------------------------------------
 32 | 
 33 |     Conversions between UTF32, UTF-16, and UTF-8. Source code file.
 34 |     Author: Mark E. Davis, 1994.
 35 |     Rev History: Rick McGowan, fixes & updates May 2001.
 36 |     Sept 2001: fixed const & error conditions per
 37 |         mods suggested by S. Parent & A. Lillich.
 38 |     June 2002: Tim Dodd added detection and handling of incomplete
 39 |         source sequences, enhanced error detection, added casts
 40 |         to eliminate compiler warnings.
 41 |     July 2003: slight mods to back out aggressive FFFE detection.
 42 |     Jan 2004: updated switches in from-UTF8 conversions.
 43 |     Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
 44 | 
 45 |     See the header file "ConvertUTF.h" for complete documentation.
 46 | 
 47 | ------------------------------------------------------------------------ */
 48 | 
 49 | 
 50 | #include "ConvertUTF.h"
 51 | #ifdef CVTUTF_DEBUG
 52 | #include <stdio.h>
 53 | #endif
 54 | #include <assert.h>
 55 | 
 56 | static const int halfShift  = 10; /* used for shifting by 10 bits */
 57 | 
 58 | static const UTF32 halfBase = 0x0010000UL;
 59 | static const UTF32 halfMask = 0x3FFUL;
 60 | 
 61 | #define UNI_SUR_HIGH_START  (UTF32)0xD800
 62 | #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 63 | #define UNI_SUR_LOW_START   (UTF32)0xDC00
 64 | #define UNI_SUR_LOW_END     (UTF32)0xDFFF
 65 | #define false      0
 66 | #define true        1
 67 | 
 68 | /* --------------------------------------------------------------------- */
 69 | 
 70 | /*
 71 |  * Index into the table below with the first byte of a UTF-8 sequence to
 72 |  * get the number of trailing bytes that are supposed to follow it.
 73 |  * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
 74 |  * left as-is for anyone who may want to do such conversion, which was
 75 |  * allowed in earlier algorithms.
 76 |  */
 77 | static const char trailingBytesForUTF8[256] = {
 78 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 79 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 80 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 81 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 82 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 83 |     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 84 |     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 85 |     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
 86 | };
 87 | 
 88 | /*
 89 |  * Magic values subtracted from a buffer value during UTF8 conversion.
 90 |  * This table contains as many values as there might be trailing bytes
 91 |  * in a UTF-8 sequence.
 92 |  */
 93 | static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
 94 |                      0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 95 | 
 96 | /*
 97 |  * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
 98 |  * into the first byte, depending on how many bytes follow.  There are
 99 |  * as many entries in this table as there are UTF-8 sequence types.
100 |  * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
101 |  * for *legal* UTF-8 will be 4 or fewer bytes total.
102 |  */
103 | static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
104 | 
105 | /* --------------------------------------------------------------------- */
106 | 
107 | /* The interface converts a whole buffer to avoid function-call overhead.
108 |  * Constants have been gathered. Loops & conditionals have been removed as
109 |  * much as possible for efficiency, in favor of drop-through switches.
110 |  * (See "Note A" at the bottom of the file for equivalent code.)
111 |  * If your compiler supports it, the "isLegalUTF8" call can be turned
112 |  * into an inline function.
113 |  */
114 | 
115 | 
116 | /* --------------------------------------------------------------------- */
117 | 
118 | ConversionResult ConvertUTF32toUTF16 (
119 |         const UTF32** sourceStart, const UTF32* sourceEnd, 
120 |         UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
121 |     ConversionResult result = conversionOK;
122 |     const UTF32* source = *sourceStart;
123 |     UTF16* target = *targetStart;
124 |     while (source < sourceEnd) {
125 |         UTF32 ch;
126 |         if (target >= targetEnd) {
127 |             result = targetExhausted; break;
128 |         }
129 |         ch = *source++;
130 |         if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
131 |             /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
132 |             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
133 |                 if (flags == strictConversion) {
134 |                     --source; /* return to the illegal value itself */
135 |                     result = sourceIllegal;
136 |                     break;
137 |                 } else {
138 |                     *target++ = UNI_REPLACEMENT_CHAR;
139 |                 }
140 |             } else {
141 |                 *target++ = (UTF16)ch; /* normal case */
142 |             }
143 |         } else if (ch > UNI_MAX_LEGAL_UTF32) {
144 |             if (flags == strictConversion) {
145 |                 result = sourceIllegal;
146 |             } else {
147 |                 *target++ = UNI_REPLACEMENT_CHAR;
148 |             }
149 |         } else {
150 |             /* target is a character in range 0xFFFF - 0x10FFFF. */
151 |             if (target + 1 >= targetEnd) {
152 |                 --source; /* Back up source pointer! */
153 |                 result = targetExhausted; break;
154 |             }
155 |             ch -= halfBase;
156 |             *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
157 |             *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
158 |         }
159 |     }
160 |     *sourceStart = source;
161 |     *targetStart = target;
162 |     return result;
163 | }
164 | 
165 | /* --------------------------------------------------------------------- */
166 | 
167 | ConversionResult ConvertUTF16toUTF32 (
168 |         const UTF16** sourceStart, const UTF16* sourceEnd, 
169 |         UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
170 |     ConversionResult result = conversionOK;
171 |     const UTF16* source = *sourceStart;
172 |     UTF32* target = *targetStart;
173 |     UTF32 ch, ch2;
174 |     while (source < sourceEnd) {
175 |         const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
176 |         ch = *source++;
177 |         /* If we have a surrogate pair, convert to UTF32 first. */
178 |         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
179 |             /* If the 16 bits following the high surrogate are in the source buffer... */
180 |             if (source < sourceEnd) {
181 |                 ch2 = *source;
182 |                 /* If it's a low surrogate, convert to UTF32. */
183 |                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
184 |                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
185 |                         + (ch2 - UNI_SUR_LOW_START) + halfBase;
186 |                     ++source;
187 |                 } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
188 |                     --source; /* return to the illegal value itself */
189 |                     result = sourceIllegal;
190 |                     break;
191 |                 }
192 |             } else { /* We don't have the 16 bits following the high surrogate. */
193 |                 --source; /* return to the high surrogate */
194 |                 result = sourceExhausted;
195 |                 break;
196 |             }
197 |         } else if (flags == strictConversion) {
198 |             /* UTF-16 surrogate values are illegal in UTF-32 */
199 |             if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
200 |                 --source; /* return to the illegal value itself */
201 |                 result = sourceIllegal;
202 |                 break;
203 |             }
204 |         }
205 |         if (target >= targetEnd) {
206 |             source = oldSource; /* Back up source pointer! */
207 |             result = targetExhausted; break;
208 |         }
209 |         *target++ = ch;
210 |     }
211 |     *sourceStart = source;
212 |     *targetStart = target;
213 | #ifdef CVTUTF_DEBUG
214 | if (result == sourceIllegal) {
215 |     fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
216 |     fflush(stderr);
217 | }
218 | #endif
219 |     return result;
220 | }
221 | ConversionResult ConvertUTF16toUTF8 (
222 |         const UTF16** sourceStart, const UTF16* sourceEnd, 
223 |         UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
224 |     ConversionResult result = conversionOK;
225 |     const UTF16* source = *sourceStart;
226 |     UTF8* target = *targetStart;
227 |     while (source < sourceEnd) {
228 |         UTF32 ch;
229 |         unsigned short bytesToWrite = 0;
230 |         const UTF32 byteMask = 0xBF;
231 |         const UTF32 byteMark = 0x80; 
232 |         const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
233 |         ch = *source++;
234 |         /* If we have a surrogate pair, convert to UTF32 first. */
235 |         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
236 |             /* If the 16 bits following the high surrogate are in the source buffer... */
237 |             if (source < sourceEnd) {
238 |                 UTF32 ch2 = *source;
239 |                 /* If it's a low surrogate, convert to UTF32. */
240 |                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
241 |                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
242 |                         + (ch2 - UNI_SUR_LOW_START) + halfBase;
243 |                     ++source;
244 |                 } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
245 |                     --source; /* return to the illegal value itself */
246 |                     result = sourceIllegal;
247 |                     break;
248 |                 }
249 |             } else { /* We don't have the 16 bits following the high surrogate. */
250 |                 --source; /* return to the high surrogate */
251 |                 result = sourceExhausted;
252 |                 break;
253 |             }
254 |         } else if (flags == strictConversion) {
255 |             /* UTF-16 surrogate values are illegal in UTF-32 */
256 |             if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
257 |                 --source; /* return to the illegal value itself */
258 |                 result = sourceIllegal;
259 |                 break;
260 |             }
261 |         }
262 |         /* Figure out how many bytes the result will require */
263 |         if (ch < (UTF32)0x80) {      bytesToWrite = 1;
264 |         } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
265 |         } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
266 |         } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
267 |         } else {                            bytesToWrite = 3;
268 |                                             ch = UNI_REPLACEMENT_CHAR;
269 |         }
270 | 
271 |         target += bytesToWrite;
272 |         if (target > targetEnd) {
273 |             source = oldSource; /* Back up source pointer! */
274 |             target -= bytesToWrite; result = targetExhausted; break;
275 |         }
276 |         switch (bytesToWrite) { /* note: everything falls through. */
277 |             case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
278 |             case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
279 |             case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
280 |             case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
281 |         }
282 |         target += bytesToWrite;
283 |     }
284 |     *sourceStart = source;
285 |     *targetStart = target;
286 |     return result;
287 | }
288 | 
289 | /* --------------------------------------------------------------------- */
290 | 
291 | ConversionResult ConvertUTF32toUTF8 (
292 |         const UTF32** sourceStart, const UTF32* sourceEnd, 
293 |         UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
294 |     ConversionResult result = conversionOK;
295 |     const UTF32* source = *sourceStart;
296 |     UTF8* target = *targetStart;
297 |     while (source < sourceEnd) {
298 |         UTF32 ch;
299 |         unsigned short bytesToWrite = 0;
300 |         const UTF32 byteMask = 0xBF;
301 |         const UTF32 byteMark = 0x80; 
302 |         ch = *source++;
303 |         if (flags == strictConversion ) {
304 |             /* UTF-16 surrogate values are illegal in UTF-32 */
305 |             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
306 |                 --source; /* return to the illegal value itself */
307 |                 result = sourceIllegal;
308 |                 break;
309 |             }
310 |         }
311 |         /*
312 |          * Figure out how many bytes the result will require. Turn any
313 |          * illegally large UTF32 things (> Plane 17) into replacement chars.
314 |          */
315 |         if (ch < (UTF32)0x80) {      bytesToWrite = 1;
316 |         } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
317 |         } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
318 |         } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
319 |         } else {                            bytesToWrite = 3;
320 |                                             ch = UNI_REPLACEMENT_CHAR;
321 |                                             result = sourceIllegal;
322 |         }
323 |         
324 |         target += bytesToWrite;
325 |         if (target > targetEnd) {
326 |             --source; /* Back up source pointer! */
327 |             target -= bytesToWrite; result = targetExhausted; break;
328 |         }
329 |         switch (bytesToWrite) { /* note: everything falls through. */
330 |             case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
331 |             case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
332 |             case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
333 |             case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
334 |         }
335 |         target += bytesToWrite;
336 |     }
337 |     *sourceStart = source;
338 |     *targetStart = target;
339 |     return result;
340 | }
341 | 
342 | /* --------------------------------------------------------------------- */
343 | 
344 | /*
345 |  * Utility routine to tell whether a sequence of bytes is legal UTF-8.
346 |  * This must be called with the length pre-determined by the first byte.
347 |  * If not calling this from ConvertUTF8to*, then the length can be set by:
348 |  *  length = trailingBytesForUTF8[*source]+1;
349 |  * and the sequence is illegal right away if there aren't that many bytes
350 |  * available.
351 |  * If presented with a length > 4, this returns false.  The Unicode
352 |  * definition of UTF-8 goes up to 4-byte sequences.
353 |  */
354 | 
355 | static Boolean isLegalUTF8(const UTF8 *source, int length) {
356 |     UTF8 a;
357 |     const UTF8 *srcptr = source+length;
358 |     switch (length) {
359 |     default: return false;
360 |         /* Everything else falls through when "true"... */
361 |     case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
362 |     case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
363 |     case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
364 | 
365 |         switch (*source) {
366 |             /* no fall-through in this inner switch */
367 |             case 0xE0: if (a < 0xA0) return false; break;
368 |             case 0xED: if (a > 0x9F) return false; break;
369 |             case 0xF0: if (a < 0x90) return false; break;
370 |             case 0xF4: if (a > 0x8F) return false; break;
371 |             default:   if (a < 0x80) return false;
372 |         }
373 | 
374 |     case 1: if (*source >= 0x80 && *source < 0xC2) return false;
375 |     }
376 |     if (*source > 0xF4) return false;
377 |     return true;
378 | }
379 | 
380 | /* --------------------------------------------------------------------- */
381 | 
382 | /*
383 |  * Exported function to return whether a UTF-8 sequence is legal or not.
384 |  * This is not used here; it's just exported.
385 |  */
386 | Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
387 |     int length = trailingBytesForUTF8[*source]+1;
388 |     if (length > sourceEnd - source) {
389 |         return false;
390 |     }
391 |     return isLegalUTF8(source, length);
392 | }
393 | 
394 | /* --------------------------------------------------------------------- */
395 | 
396 | static unsigned
397 | findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
398 |                                           const UTF8 *sourceEnd) {
399 |   UTF8 b1, b2, b3;
400 | 
401 |   assert(!isLegalUTF8Sequence(source, sourceEnd));
402 | 
403 |   /*
404 |    * Unicode 6.3.0, D93b:
405 |    *
406 |    *   Maximal subpart of an ill-formed subsequence: The longest code unit
407 |    *   subsequence starting at an unconvertible offset that is either:
408 |    *   a. the initial subsequence of a well-formed code unit sequence, or
409 |    *   b. a subsequence of length one.
410 |    */
411 | 
412 |   if (source == sourceEnd)
413 |     return 0;
414 | 
415 |   /*
416 |    * Perform case analysis.  See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
417 |    * Byte Sequences.
418 |    */
419 | 
420 |   b1 = *source;
421 |   ++source;
422 |   if (b1 >= 0xC2 && b1 <= 0xDF) {
423 |     /*
424 |      * First byte is valid, but we know that this code unit sequence is
425 |      * invalid, so the maximal subpart has to end after the first byte.
426 |      */
427 |     return 1;
428 |   }
429 | 
430 |   if (source == sourceEnd)
431 |     return 1;
432 | 
433 |   b2 = *source;
434 |   ++source;
435 | 
436 |   if (b1 == 0xE0) {
437 |     return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
438 |   }
439 |   if (b1 >= 0xE1 && b1 <= 0xEC) {
440 |     return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
441 |   }
442 |   if (b1 == 0xED) {
443 |     return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
444 |   }
445 |   if (b1 >= 0xEE && b1 <= 0xEF) {
446 |     return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
447 |   }
448 |   if (b1 == 0xF0) {
449 |     if (b2 >= 0x90 && b2 <= 0xBF) {
450 |       if (source == sourceEnd)
451 |         return 2;
452 | 
453 |       b3 = *source;
454 |       return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
455 |     }
456 |     return 1;
457 |   }
458 |   if (b1 >= 0xF1 && b1 <= 0xF3) {
459 |     if (b2 >= 0x80 && b2 <= 0xBF) {
460 |       if (source == sourceEnd)
461 |         return 2;
462 | 
463 |       b3 = *source;
464 |       return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
465 |     }
466 |     return 1;
467 |   }
468 |   if (b1 == 0xF4) {
469 |     if (b2 >= 0x80 && b2 <= 0x8F) {
470 |       if (source == sourceEnd)
471 |         return 2;
472 | 
473 |       b3 = *source;
474 |       return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
475 |     }
476 |     return 1;
477 |   }
478 | 
479 |   assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
480 |   /*
481 |    * There are no valid sequences that start with these bytes.  Maximal subpart
482 |    * is defined to have length 1 in these cases.
483 |    */
484 |   return 1;
485 | }
486 | 
487 | /* --------------------------------------------------------------------- */
488 | 
489 | /*
490 |  * Exported function to return the total number of bytes in a codepoint
491 |  * represented in UTF-8, given the value of the first byte.
492 |  */
493 | unsigned getNumBytesForUTF8(UTF8 first) {
494 |   return trailingBytesForUTF8[first] + 1;
495 | }
496 | 
497 | /* --------------------------------------------------------------------- */
498 | 
499 | /*
500 |  * Exported function to return whether a UTF-8 string is legal or not.
501 |  * This is not used here; it's just exported.
502 |  */
503 | Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
504 |     while (*source != sourceEnd) {
505 |         int length = trailingBytesForUTF8[**source] + 1;
506 |         if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
507 |             return false;
508 |         *source += length;
509 |     }
510 |     return true;
511 | }
512 | 
513 | /* --------------------------------------------------------------------- */
514 | 
515 | ConversionResult ConvertUTF8toUTF16 (
516 |         const UTF8** sourceStart, const UTF8* sourceEnd, 
517 |         UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
518 |     ConversionResult result = conversionOK;
519 |     const UTF8* source = *sourceStart;
520 |     UTF16* target = *targetStart;
521 |     while (source < sourceEnd) {
522 |         UTF32 ch = 0;
523 |         unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
524 |         if (extraBytesToRead >= sourceEnd - source) {
525 |             result = sourceExhausted; break;
526 |         }
527 |         /* Do this check whether lenient or strict */
528 |         if (!isLegalUTF8(source, extraBytesToRead+1)) {
529 |             result = sourceIllegal;
530 |             if (flags == strictConversion) {
531 |                 /* Abort conversion. */
532 |                 break;
533 |             } else {
534 |                 /*
535 |                  * Replace the maximal subpart of ill-formed sequence with
536 |                  * replacement character.
537 |                  */
538 |                 source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
539 |                                                                     sourceEnd);
540 |                 *target++ = UNI_REPLACEMENT_CHAR;
541 |                 continue;
542 |             }
543 |         }
544 |         /*
545 |          * The cases all fall through. See "Note A" below.
546 |          */
547 |         switch (extraBytesToRead) {
548 |             case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
549 |             case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
550 |             case 3: ch += *source++; ch <<= 6;
551 |             case 2: ch += *source++; ch <<= 6;
552 |             case 1: ch += *source++; ch <<= 6;
553 |             case 0: ch += *source++;
554 |         }
555 |         ch -= offsetsFromUTF8[extraBytesToRead];
556 | 
557 |         if (target >= targetEnd) {
558 |             source -= (extraBytesToRead+1); /* Back up source pointer! */
559 |             result = targetExhausted; break;
560 |         }
561 |         if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
562 |             /* UTF-16 surrogate values are illegal in UTF-32 */
563 |             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
564 |                 if (flags == strictConversion) {
565 |                     source -= (extraBytesToRead+1); /* return to the illegal value itself */
566 |                     result = sourceIllegal;
567 |                     break;
568 |                 } else {
569 |                     *target++ = UNI_REPLACEMENT_CHAR;
570 |                 }
571 |             } else {
572 |                 *target++ = (UTF16)ch; /* normal case */
573 |             }
574 |         } else if (ch > UNI_MAX_UTF16) {
575 |             if (flags == strictConversion) {
576 |                 result = sourceIllegal;
577 |                 source -= (extraBytesToRead+1); /* return to the start */
578 |                 break; /* Bail out; shouldn't continue */
579 |             } else {
580 |                 *target++ = UNI_REPLACEMENT_CHAR;
581 |             }
582 |         } else {
583 |             /* target is a character in range 0xFFFF - 0x10FFFF. */
584 |             if (target + 1 >= targetEnd) {
585 |                 source -= (extraBytesToRead+1); /* Back up source pointer! */
586 |                 result = targetExhausted; break;
587 |             }
588 |             ch -= halfBase;
589 |             *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
590 |             *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
591 |         }
592 |     }
593 |     *sourceStart = source;
594 |     *targetStart = target;
595 |     return result;
596 | }
597 | 
598 | /* --------------------------------------------------------------------- */
599 | 
600 | static ConversionResult ConvertUTF8toUTF32Impl(
601 |         const UTF8** sourceStart, const UTF8* sourceEnd, 
602 |         UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
603 |         Boolean InputIsPartial) {
604 |     ConversionResult result = conversionOK;
605 |     const UTF8* source = *sourceStart;
606 |     UTF32* target = *targetStart;
607 |     while (source < sourceEnd) {
608 |         UTF32 ch = 0;
609 |         unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
610 |         if (extraBytesToRead >= sourceEnd - source) {
611 |             if (flags == strictConversion || InputIsPartial) {
612 |                 result = sourceExhausted;
613 |                 break;
614 |             } else {
615 |                 result = sourceIllegal;
616 | 
617 |                 /*
618 |                  * Replace the maximal subpart of ill-formed sequence with
619 |                  * replacement character.
620 |                  */
621 |                 source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
622 |                                                                     sourceEnd);
623 |                 *target++ = UNI_REPLACEMENT_CHAR;
624 |                 continue;
625 |             }
626 |         }
627 |         if (target >= targetEnd) {
628 |             result = targetExhausted; break;
629 |         }
630 | 
631 |         /* Do this check whether lenient or strict */
632 |         if (!isLegalUTF8(source, extraBytesToRead+1)) {
633 |             result = sourceIllegal;
634 |             if (flags == strictConversion) {
635 |                 /* Abort conversion. */
636 |                 break;
637 |             } else {
638 |                 /*
639 |                  * Replace the maximal subpart of ill-formed sequence with
640 |                  * replacement character.
641 |                  */
642 |                 source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
643 |                                                                     sourceEnd);
644 |                 *target++ = UNI_REPLACEMENT_CHAR;
645 |                 continue;
646 |             }
647 |         }
648 |         /*
649 |          * The cases all fall through. See "Note A" below.
650 |          */
651 |         switch (extraBytesToRead) {
652 |             case 5: ch += *source++; ch <<= 6;
653 |             case 4: ch += *source++; ch <<= 6;
654 |             case 3: ch += *source++; ch <<= 6;
655 |             case 2: ch += *source++; ch <<= 6;
656 |             case 1: ch += *source++; ch <<= 6;
657 |             case 0: ch += *source++;
658 |         }
659 |         ch -= offsetsFromUTF8[extraBytesToRead];
660 | 
661 |         if (ch <= UNI_MAX_LEGAL_UTF32) {
662 |             /*
663 |              * UTF-16 surrogate values are illegal in UTF-32, and anything
664 |              * over Plane 17 (> 0x10FFFF) is illegal.
665 |              */
666 |             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
667 |                 if (flags == strictConversion) {
668 |                     source -= (extraBytesToRead+1); /* return to the illegal value itself */
669 |                     result = sourceIllegal;
670 |                     break;
671 |                 } else {
672 |                     *target++ = UNI_REPLACEMENT_CHAR;
673 |                 }
674 |             } else {
675 |                 *target++ = ch;
676 |             }
677 |         } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
678 |             result = sourceIllegal;
679 |             *target++ = UNI_REPLACEMENT_CHAR;
680 |         }
681 |     }
682 |     *sourceStart = source;
683 |     *targetStart = target;
684 |     return result;
685 | }
686 | 
687 | ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
688 |                                            const UTF8 *sourceEnd,
689 |                                            UTF32 **targetStart,
690 |                                            UTF32 *targetEnd,
691 |                                            ConversionFlags flags) {
692 |   return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
693 |                                 flags, /*InputIsPartial=*/true);
694 | }
695 | 
696 | ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
697 |                                     const UTF8 *sourceEnd, UTF32 **targetStart,
698 |                                     UTF32 *targetEnd, ConversionFlags flags) {
699 |   return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
700 |                                 flags, /*InputIsPartial=*/false);
701 | }
702 | 
703 | /* ---------------------------------------------------------------------
704 | 
705 |     Note A.
706 |     The fall-through switches in UTF-8 reading code save a
707 |     temp variable, some decrements & conditionals.  The switches
708 |     are equivalent to the following loop:
709 |         {
710 |             int tmpBytesToRead = extraBytesToRead+1;
711 |             do {
712 |                 ch += *source++;
713 |                 --tmpBytesToRead;
714 |                 if (tmpBytesToRead) ch <<= 6;
715 |             } while (tmpBytesToRead > 0);
716 |         }
717 |     In UTF-8 writing code, the switches on "bytesToWrite" are
718 |     similarly unrolled loops.
719 | 
720 |    --------------------------------------------------------------------- */
721 | 


--------------------------------------------------------------------------------
/src/ConvertUTF.h:
--------------------------------------------------------------------------------
  1 | /*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
  2 |  *
  3 |  *                     The LLVM Compiler Infrastructure
  4 |  *
  5 |  * This file is distributed under the University of Illinois Open Source
  6 |  * License. See LICENSE.TXT for details.
  7 |  *
  8 |  *==------------------------------------------------------------------------==*/
  9 | /*
 10 |  * Copyright 2001-2004 Unicode, Inc.
 11 |  *
 12 |  * Disclaimer
 13 |  *
 14 |  * This source code is provided as is by Unicode, Inc. No claims are
 15 |  * made as to fitness for any particular purpose. No warranties of any
 16 |  * kind are expressed or implied. The recipient agrees to determine
 17 |  * applicability of information provided. If this file has been
 18 |  * purchased on magnetic or optical media from Unicode, Inc., the
 19 |  * sole remedy for any claim will be exchange of defective media
 20 |  * within 90 days of receipt.
 21 |  *
 22 |  * Limitations on Rights to Redistribute This Code
 23 |  *
 24 |  * Unicode, Inc. hereby grants the right to freely use the information
 25 |  * supplied in this file in the creation of products supporting the
 26 |  * Unicode Standard, and to make copies of this file in any form
 27 |  * for internal or external distribution as long as this notice
 28 |  * remains attached.
 29 |  */
 30 | 
 31 | /* ---------------------------------------------------------------------
 32 | 
 33 |     Conversions between UTF32, UTF-16, and UTF-8.  Header file.
 34 | 
 35 |     Several funtions are included here, forming a complete set of
 36 |     conversions between the three formats.  UTF-7 is not included
 37 |     here, but is handled in a separate source file.
 38 | 
 39 |     Each of these routines takes pointers to input buffers and output
 40 |     buffers.  The input buffers are const.
 41 | 
 42 |     Each routine converts the text between *sourceStart and sourceEnd,
 43 |     putting the result into the buffer between *targetStart and
 44 |     targetEnd. Note: the end pointers are *after* the last item: e.g.
 45 |     *(sourceEnd - 1) is the last item.
 46 | 
 47 |     The return result indicates whether the conversion was successful,
 48 |     and if not, whether the problem was in the source or target buffers.
 49 |     (Only the first encountered problem is indicated.)
 50 | 
 51 |     After the conversion, *sourceStart and *targetStart are both
 52 |     updated to point to the end of last text successfully converted in
 53 |     the respective buffers.
 54 | 
 55 |     Input parameters:
 56 |         sourceStart - pointer to a pointer to the source buffer.
 57 |                 The contents of this are modified on return so that
 58 |                 it points at the next thing to be converted.
 59 |         targetStart - similarly, pointer to pointer to the target buffer.
 60 |         sourceEnd, targetEnd - respectively pointers to the ends of the
 61 |                 two buffers, for overflow checking only.
 62 | 
 63 |     These conversion functions take a ConversionFlags argument. When this
 64 |     flag is set to strict, both irregular sequences and isolated surrogates
 65 |     will cause an error.  When the flag is set to lenient, both irregular
 66 |     sequences and isolated surrogates are converted.
 67 | 
 68 |     Whether the flag is strict or lenient, all illegal sequences will cause
 69 |     an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
 70 |     or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
 71 |     must check for illegal sequences.
 72 | 
 73 |     When the flag is set to lenient, characters over 0x10FFFF are converted
 74 |     to the replacement character; otherwise (when the flag is set to strict)
 75 |     they constitute an error.
 76 | 
 77 |     Output parameters:
 78 |         The value "sourceIllegal" is returned from some routines if the input
 79 |         sequence is malformed.  When "sourceIllegal" is returned, the source
 80 |         value will point to the illegal value that caused the problem. E.g.,
 81 |         in UTF-8 when a sequence is malformed, it points to the start of the
 82 |         malformed sequence.
 83 | 
 84 |     Author: Mark E. Davis, 1994.
 85 |     Rev History: Rick McGowan, fixes & updates May 2001.
 86 |          Fixes & updates, Sept 2001.
 87 | 
 88 | ------------------------------------------------------------------------ */
 89 | 
 90 | #ifndef LLVM_SUPPORT_CONVERTUTF_H
 91 | #define LLVM_SUPPORT_CONVERTUTF_H
 92 | 
 93 | /* ---------------------------------------------------------------------
 94 |     The following 4 definitions are compiler-specific.
 95 |     The C standard does not guarantee that wchar_t has at least
 96 |     16 bits, so wchar_t is no less portable than unsigned short!
 97 |     All should be unsigned values to avoid sign extension during
 98 |     bit mask & shift operations.
 99 | ------------------------------------------------------------------------ */
100 | 
101 | typedef unsigned int    UTF32;  /* at least 32 bits */
102 | typedef unsigned short  UTF16;  /* at least 16 bits */
103 | typedef unsigned char   UTF8;   /* typically 8 bits */
104 | typedef unsigned char   Boolean; /* 0 or 1 */
105 | 
106 | /* Some fundamental constants */
107 | #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
108 | #define UNI_MAX_BMP (UTF32)0x0000FFFF
109 | #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
110 | #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
111 | #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
112 | 
113 | #define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
114 | 
115 | #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE  0xFEFF
116 | #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
117 | 
118 | typedef enum {
119 |   conversionOK,           /* conversion successful */
120 |   sourceExhausted,        /* partial character in source, but hit end */
121 |   targetExhausted,        /* insuff. room in target for conversion */
122 |   sourceIllegal           /* source sequence is illegal/malformed */
123 | } ConversionResult;
124 | 
125 | typedef enum {
126 |   strictConversion = 0,
127 |   lenientConversion
128 | } ConversionFlags;
129 | 
130 | /* This is for C++ and does no harm in C */
131 | #ifdef __cplusplus
132 | extern "C" {
133 | #endif
134 | 
135 | #include "ConvertUTFNamespace.h"
136 | 
137 | ConversionResult ConvertUTF8toUTF16 (
138 |   const UTF8** sourceStart, const UTF8* sourceEnd,
139 |   UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
140 | 
141 | /**
142 |  * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
143 |  * incomplete code unit sequence, returns \c sourceExhausted.
144 |  */
145 | ConversionResult ConvertUTF8toUTF32Partial(
146 |   const UTF8** sourceStart, const UTF8* sourceEnd,
147 |   UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
148 | 
149 | /**
150 |  * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
151 |  * incomplete code unit sequence, returns \c sourceIllegal.
152 |  */
153 | ConversionResult ConvertUTF8toUTF32(
154 |   const UTF8** sourceStart, const UTF8* sourceEnd,
155 |   UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
156 | 
157 | ConversionResult ConvertUTF16toUTF8 (
158 |   const UTF16** sourceStart, const UTF16* sourceEnd,
159 |   UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
160 | 
161 | ConversionResult ConvertUTF32toUTF8 (
162 |   const UTF32** sourceStart, const UTF32* sourceEnd,
163 |   UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
164 | 
165 | ConversionResult ConvertUTF16toUTF32 (
166 |   const UTF16** sourceStart, const UTF16* sourceEnd,
167 |   UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
168 | 
169 | ConversionResult ConvertUTF32toUTF16 (
170 |   const UTF32** sourceStart, const UTF32* sourceEnd,
171 |   UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
172 | 
173 | Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
174 | 
175 | Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
176 | 
177 | unsigned getNumBytesForUTF8(UTF8 firstByte);
178 | 
179 | #ifdef __cplusplus
180 | }
181 | 
182 | #if 0 // ignored in mailcore2
183 | /*************************************************************************/
184 | /* Below are LLVM-specific wrappers of the functions above. */
185 | 
186 | #include "llvm/ADT/ArrayRef.h"
187 | #include "llvm/ADT/StringRef.h"
188 | 
189 | namespace llvm {
190 | 
191 | /**
192 |  * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
193 |  * WideCharWidth. The converted data is written to ResultPtr, which needs to
194 |  * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
195 |  * ResultPtr will point one after the end of the copied string. On failure,
196 |  * ResultPtr will not be changed, and ErrorPtr will be set to the location of
197 |  * the first character which could not be converted.
198 |  * \return true on success.
199 |  */
200 | bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
201 |                        char *&ResultPtr, const UTF8 *&ErrorPtr);
202 | 
203 | /**
204 |  * Convert an Unicode code point to UTF8 sequence.
205 |  *
206 |  * \param Source a Unicode code point.
207 |  * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
208 |  * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes.  On success \c ResultPtr is
209 |  * updated one past end of the converted sequence.
210 |  *
211 |  * \returns true on success.
212 |  */
213 | bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
214 | 
215 | /**
216 |  * Convert the first UTF8 sequence in the given source buffer to a UTF32
217 |  * code point.
218 |  *
219 |  * \param [in,out] source A pointer to the source buffer. If the conversion
220 |  * succeeds, this pointer will be updated to point to the byte just past the
221 |  * end of the converted sequence.
222 |  * \param sourceEnd A pointer just past the end of the source buffer.
223 |  * \param [out] target The converted code
224 |  * \param flags Whether the conversion is strict or lenient.
225 |  *
226 |  * \returns conversionOK on success
227 |  *
228 |  * \sa ConvertUTF8toUTF32
229 |  */
230 | static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
231 |                                                    const UTF8 *sourceEnd,
232 |                                                    UTF32 *target,
233 |                                                    ConversionFlags flags) {
234 |   if (*source == sourceEnd)
235 |     return sourceExhausted;
236 |   unsigned size = getNumBytesForUTF8(**source);
237 |   if ((ptrdiff_t)size > sourceEnd - *source)
238 |     return sourceExhausted;
239 |   return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
240 | }
241 | 
242 | /**
243 |  * Returns true if a blob of text starts with a UTF-16 big or little endian byte
244 |  * order mark.
245 |  */
246 | bool hasUTF16ByteOrderMark(ArrayRef<char> SrcBytes);
247 | 
248 | /**
249 |  * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
250 |  *
251 |  * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
252 |  * \param [out] Out Converted UTF-8 is stored here on success.
253 |  * \returns true on success
254 |  */
255 | bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
256 | 
257 | } /* end namespace llvm */
258 | #endif // ignored in mailcore2
259 | 
260 | #endif
261 | 
262 | /* --------------------------------------------------------------------- */
263 | 
264 | #endif
265 | 


--------------------------------------------------------------------------------
/src/ConvertUTFNamespace.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ConvertUTFNamespace.h
 3 | //  kvdb
 4 | //
 5 | //  Created by Hoa Dinh on 3/19/15.
 6 | //  Copyright (c) 2015 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef kvdb_ConvertUTFNamespace_h
10 | #define kvdb_ConvertUTFNamespace_h
11 | 
12 | #define ConvertUTF16toUTF32 kv_ConvertUTF16toUTF32
13 | #define ConvertUTF16toUTF8 kv_ConvertUTF16toUTF8
14 | #define ConvertUTF32toUTF16 kv_ConvertUTF32toUTF16
15 | #define ConvertUTF32toUTF8 kv_ConvertUTF32toUTF8
16 | #define ConvertUTF8toUTF16 kv_ConvertUTF8toUTF16
17 | #define ConvertUTF8toUTF32 kv_ConvertUTF8toUTF32
18 | #define ConvertUTF8toUTF32Partial kv_ConvertUTF8toUTF32Partial
19 | #define getNumBytesForUTF8 kv_getNumBytesForUTF8
20 | #define isLegalUTF8Sequence kv_isLegalUTF8Sequence
21 | #define isLegalUTF8String kv_isLegalUTF8String
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/kvassert.c:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvassert.c
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/1/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | void assertInternal(const char * filename, unsigned int line, int cond, const char * condString)
13 | {
14 |     if (cond) {
15 |         return;
16 |     }
17 |     
18 |     fprintf(stderr, "%s:%i: assert %s\n", filename, line, condString);
19 |     abort();
20 | }
21 | 


--------------------------------------------------------------------------------
/src/kvassert.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvassert.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/1/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef KV_ASSERT_H
10 | #define KV_ASSERT_H
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | #define KVDBAssert(cond) assertInternal(__FILE__, __LINE__, cond, #cond)
17 | 
18 | void assertInternal(const char * filename, unsigned int line, int cond, const char * condString);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/kvblock.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  kvblock.c
  3 | //  kvdb
  4 | //
  5 | //  Created by DINH Viêt Hoà on 6/2/13.
  6 | //  Copyright (c) 2013 etpan. All rights reserved.
  7 | //
  8 | 
  9 | #include "kvblock.h"
 10 | 
 11 | #include <sys/types.h>
 12 | #include <sys/uio.h>
 13 | #include <unistd.h>
 14 | #include <string.h>
 15 | #include <stdlib.h>
 16 | 
 17 | #include "kvdb.h"
 18 | #include "kvtypes.h"
 19 | #include "kvendian.h"
 20 | #include "kvpaddingutils.h"
 21 | 
 22 | int kv_block_recycle(kvdb * db, uint64_t offset)
 23 | {
 24 |     uint8_t log2_size;
 25 |     ssize_t count;
 26 |     
 27 |     count = pread(db->kv_fd, &log2_size, 1, offset + 8 + 4);
 28 |     if (count < 0)
 29 |         return -1;
 30 |     uint64_t next_free_offset = db->kv_free_blocks[log2_size];
 31 |     // keep it in network order.
 32 |     count = pwrite(db->kv_fd, &next_free_offset, sizeof(next_free_offset), offset);
 33 |     if (count < 0)
 34 |         return -1;
 35 |     db->kv_free_blocks[log2_size] = hton64(offset);
 36 |     
 37 |     return 0;
 38 | }
 39 | 
 40 | uint64_t kv_block_create(kvdb * db, uint64_t next_block_offset, uint32_t hash_value,
 41 |                          const char * key, size_t key_size,
 42 |                          const char * value, size_t value_size)
 43 | {
 44 |     uint64_t block_size = block_size_round_up(key_size + value_size);
 45 |     uint8_t log2_size = log2_round_up(block_size);
 46 |     uint64_t offset = ntoh64(db->kv_free_blocks[log2_size]);
 47 |     int use_new_block = 0;
 48 |     //fprintf(stderr, "key, value: %i %i\n", (int) key_size, (int) value_size);
 49 |     if (offset != 0) {
 50 |         // Use free block.
 51 |         uint64_t next_free_offset;
 52 |         //fprintf(stderr, "Use free block %i %i %i\n", (int) offset, (int) log2_size, (int)block_size);
 53 |         // keep it in network order.
 54 |         pread(db->kv_fd, &next_free_offset, sizeof(next_free_offset), offset);
 55 |         db->kv_free_blocks[log2_size] = next_free_offset;
 56 |     }
 57 |     else {
 58 |         // Use new block.
 59 |         offset = ntoh64(* db->kv_filesize);
 60 |         use_new_block = 1;
 61 |     }
 62 |     
 63 |     uint64_t current_key_size = key_size;
 64 |     uint64_t current_value_size = value_size;
 65 |     char * data;
 66 |     char * allocated = NULL;
 67 |     if (8 + 4 + 1 + 8 + 8 + block_size > 4096) {
 68 |         allocated = calloc(1, 8 + 4 + 1 + 8 + 8 + (size_t) block_size);
 69 |         data = allocated;
 70 |     }
 71 |     else {
 72 |         data = alloca(8 + 4 + 1 + 8 + 8 + (size_t) block_size);
 73 |         bzero(data, 8 + 4 + 1 + 8 + 8 + (size_t) block_size);
 74 |     }
 75 |     char * p = data;
 76 |     next_block_offset = hton64(next_block_offset);
 77 |     memcpy(p, &next_block_offset, sizeof(next_block_offset));
 78 |     p += sizeof(next_block_offset);
 79 |     hash_value = htonl(hash_value);
 80 |     memcpy(p, &hash_value, sizeof(hash_value));
 81 |     p += sizeof(hash_value);
 82 |     memcpy(p, &log2_size, sizeof(log2_size));
 83 |     p += sizeof(log2_size);
 84 |     current_key_size = hton64(current_key_size);
 85 |     memcpy(p, &current_key_size, sizeof(current_key_size));
 86 |     p += sizeof(current_key_size);
 87 |     memcpy(p, key, key_size);
 88 |     p += key_size;
 89 |     current_value_size = hton64(current_value_size);
 90 |     memcpy(p, &current_value_size, sizeof(current_value_size));
 91 |     p += sizeof(current_value_size);
 92 |     memcpy(p, value, value_size);
 93 |     p += value_size;
 94 |     size_t remaining = (8 + 4 + 1 + 8 + 8 + block_size);
 95 |     size_t write_offset = offset;
 96 |     char * remaining_data = data;
 97 |     while (remaining > 0) {
 98 |         ssize_t count = pwrite(db->kv_fd, remaining_data, remaining, write_offset);
 99 |         if (count < 0) {
100 |             if (allocated != NULL) {
101 |                 free(allocated);
102 |             }
103 |             return 0;
104 |         }
105 |         write_offset += count;
106 |         remaining_data += count;
107 |         remaining -= count;
108 |     }
109 |     if (allocated != NULL) {
110 |         free(allocated);
111 |     }
112 |     if (use_new_block) {
113 |         uint64_t filesize = ntoh64(* db->kv_filesize);
114 |         filesize += 8 + 4 + 1 + 8 + 8 + block_size;
115 |         (* db->kv_filesize) = hton64(filesize);
116 |     }
117 |     
118 |     return offset;
119 | }
120 | 


--------------------------------------------------------------------------------
/src/kvblock.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvblock.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef kvdb_kvblock_h
10 | #define kvdb_kvblock_h
11 | 
12 | #include "kvtypes.h"
13 | 
14 | uint64_t kv_block_create(kvdb * db, uint64_t next_block_offset, uint32_t hash_value,
15 |                          const char * key, size_t key_size,
16 |                          const char * value, size_t value_size);
17 | 
18 | int kv_block_recycle(kvdb * db, uint64_t offset);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/kvbloom.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvbloom.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef KVBLOOM_H
10 | #define KVBLOOM_H
11 | 
12 | #include "kvmurmurhash.h"
13 | 
14 | static inline void table_bloom_filter_set(struct kvdb_table * table, uint32_t * hash_values,
15 |                                           int hash_count)
16 | {
17 |     //fprintf(stderr, "----set\n");
18 |     for(unsigned int i = 0 ; i < hash_count ; i ++) {
19 |         uint64_t idx = hash_values[i] % ntoh64(* table->kv_bloom_filter_size);
20 |         //fprintf(stderr, "%u\n", (unsigned int) idx);
21 |         table->kv_bloom_filter[idx / 8] |= 1 << (idx % 8);
22 |     }
23 | }
24 | 
25 | static inline int table_bloom_filter_might_contain(struct kvdb_table * table, uint32_t * hash_values,
26 |                                                    int hash_count)
27 | {
28 |     //fprintf(stderr, "----get\n");
29 |     for(unsigned int i = 0 ; i < hash_count ; i ++) {
30 |         uint64_t idx = hash_values[i] % ntoh64(* table->kv_bloom_filter_size);
31 |         //fprintf(stderr, "%u\n", (unsigned int) idx);
32 |         if ((table->kv_bloom_filter[idx / 8] & (1 << (idx % 8))) == 0) {
33 |             //fprintf(stderr, "----not found\n");
34 |             return 0;
35 |         }
36 |     }
37 |     //fprintf(stderr, "----found\n");
38 |     return 1;
39 | }
40 | 
41 | static inline void table_bloom_filter_compute_hash(uint32_t * hash_values, unsigned int hash_count,
42 |                                                    const char * key, size_t key_size)
43 | {
44 |     uint32_t previous_hash_value = 0;
45 |     for(unsigned int i = 0 ; i < hash_count ; i ++) {
46 |         hash_values[i] = kv_murmur_hash(key, key_size, previous_hash_value);
47 |         previous_hash_value = hash_values[i];
48 |     }
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/kvdb.c:
--------------------------------------------------------------------------------
  1 | #include "kvdb.h"
  2 | 
  3 | #include <inttypes.h>
  4 | #include <sys/types.h>
  5 | #include <sys/stat.h>
  6 | #include <unistd.h>
  7 | #include <stdlib.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <fcntl.h>
 11 | #include <sys/mman.h>
 12 | #include <sys/types.h>
 13 | #include <sys/uio.h>
 14 | #include <unistd.h>
 15 | 
 16 | #include <lz4.h>
 17 | 
 18 | #include "kvassert.h"
 19 | #include "kvendian.h"
 20 | #include "kvtypes.h"
 21 | #include "kvprime.h"
 22 | #include "kvpaddingutils.h"
 23 | #include "kvbloom.h"
 24 | #include "kvmurmurhash.h"
 25 | #include "kvtable.h"
 26 | #include "kvblock.h"
 27 | 
 28 | #define MARKER "KVDB"
 29 | #define VERSION 5
 30 | 
 31 | static int kvdb_debug = 0;
 32 | 
 33 | static int internal_kvdb_set(kvdb * db, const char * key, size_t key_size, const char * value, size_t value_size);
 34 | static int internal_kvdb_get2(kvdb * db, const char * key, size_t key_size,
 35 |               char ** p_value, size_t * p_value_size, size_t * p_free_size);
 36 | static int kvdb_get2(kvdb * db, const char * key, size_t key_size,
 37 |                      char ** p_value, size_t * p_value_size, size_t * p_free_size);
 38 | 
 39 | kvdb * kvdb_new(const char * filename)
 40 | {
 41 |     kvdb * db = malloc(sizeof(* db));
 42 |     KVDBAssert(filename != NULL);
 43 |     db->kv_filename = strdup(filename);
 44 |     KVDBAssert(db->kv_filename != NULL);
 45 |     db->kv_fd = -1;
 46 |     db->kv_opened = 0;
 47 |     db->kv_firstmaxcount = kv_getnextprime(KV_FIRST_TABLE_MAX_COUNT);
 48 |     db->kv_compression_type = KVDB_COMPRESSION_TYPE_LZ4;
 49 |     db->kv_filesize = NULL;
 50 |     db->kv_free_blocks = NULL;
 51 |     db->kv_first_table = NULL;
 52 |     db->kv_current_table = NULL;
 53 |     
 54 |     return db;
 55 | }
 56 | 
 57 | void kvdb_free(kvdb * db)
 58 | {
 59 |     if (db->kv_opened) {
 60 |         fprintf(stderr, "should be closed before freeing - %s\n", db->kv_filename);
 61 |     }
 62 |     free(db->kv_filename);
 63 |     free(db);
 64 | }
 65 | 
 66 | void kvdb_set_compression_type(kvdb * db, int compression_type)
 67 | {
 68 |     if (db->kv_opened) {
 69 |         return;
 70 |     }
 71 |     db->kv_compression_type = compression_type;
 72 | }
 73 | 
 74 | int kvdb_get_compression_type(kvdb * db)
 75 | {
 76 |     return db->kv_compression_type;
 77 | }
 78 | 
 79 | int kvdb_open(kvdb * db)
 80 | {
 81 |     int r;
 82 |     struct stat stat_buf;
 83 |     int create_file = 0;
 84 |     
 85 |     if (db->kv_opened)
 86 |         return -1;
 87 |     
 88 |     db->kv_pagesize = getpagesize();
 89 |     
 90 |     db->kv_fd = open(db->kv_filename, O_RDWR | O_CREAT, 0600);
 91 |     if (db->kv_fd == -1) {
 92 |         fprintf(stderr, "open failed\n");
 93 |         return -1;
 94 |     }
 95 |     
 96 |     r = fstat(db->kv_fd, &stat_buf);
 97 |     if (r < 0) {
 98 |         close(db->kv_fd);
 99 |         // close file.
100 |         fprintf(stderr, "fstat failed\n");
101 |         return -1;
102 |     }
103 |     
104 |     uint64_t firstmaxcount = kv_getnextprime(KV_FIRST_TABLE_MAX_COUNT);
105 |     uint64_t first_mapping_size = KV_HEADER_SIZE + KV_TABLE_SIZE(firstmaxcount);
106 |     
107 |     char data[4 + 4 + 8 + 1];
108 |     
109 |     if (stat_buf.st_size == 0) {
110 |         create_file = 1;
111 |         r = ftruncate(db->kv_fd, KV_PAGE_ROUND_UP(db, first_mapping_size));
112 |         if (r < 0) {
113 |             close(db->kv_fd);
114 |             // close file.
115 |             fprintf(stderr, "truncate failed\n");
116 |             return -1;
117 |         }
118 |         memcpy(data, MARKER, 4);
119 |         h32_to_bytes(&data[4], VERSION);
120 |         h64_to_bytes(&data[4 + 4], firstmaxcount);
121 |         data[4 + 4 + 8] = db->kv_compression_type;
122 |         write(db->kv_fd, data, sizeof(data));
123 |         
124 |         kv_table_header_write(db, KV_HEADER_SIZE, firstmaxcount);
125 |     }
126 |     
127 |     char marker[4];
128 |     uint32_t version;
129 |     int compression_type;
130 |     pread(db->kv_fd, data, sizeof(data), 0);
131 |     memcpy(marker, data, 4);
132 |     version = bytes_to_h32(&data[4]);
133 |     firstmaxcount = bytes_to_h64(&data[4 + 4]);
134 |     compression_type = data[4 + 4 + 8];
135 |     
136 |     r = memcmp(marker, MARKER, 4);
137 |     if (r != 0) {
138 |         fprintf(stderr, "file corrupted\n");
139 |         return -1;
140 |     }
141 |     if (version != VERSION) {
142 |         fprintf(stderr, "bad file version\n");
143 |         return -1;
144 |     }
145 |     
146 |     db->kv_firstmaxcount = firstmaxcount;
147 |     db->kv_compression_type = compression_type;
148 |     db->kv_opened = 1;
149 |     
150 |     r = kv_tables_setup(db);
151 |     if (r < 0) {
152 |         fprintf(stderr, "can't map files\n");
153 |         return -1;
154 |     }
155 |     
156 |     char * first_mapping = db->kv_first_table->kv_mapping.kv_bytes;
157 |     db->kv_filesize = (uint64_t *) (first_mapping + KV_HEADER_FILESIZE_OFFSET);
158 |     db->kv_free_blocks = (uint64_t *) (first_mapping + KV_HEADER_FREELIST_OFFSET);
159 |     if (create_file) {
160 |         * db->kv_filesize = hton64(first_mapping_size);
161 |     }
162 |     
163 |     return 0;
164 | }
165 | 
166 | void kvdb_close(kvdb * db)
167 | {
168 |     if (!db->kv_opened) {
169 |         return;
170 |     }
171 |     
172 |     kv_tables_unsetup(db);
173 |     close(db->kv_fd);
174 |     db->kv_opened = 0;
175 | }
176 | 
177 | int kvdb_set(kvdb * db, const char * key, size_t key_size, const char * value, size_t value_size)
178 | {
179 |     if (db->kv_compression_type == KVDB_COMPRESSION_TYPE_RAW) {
180 |         return internal_kvdb_set(db, key, key_size, value, value_size);
181 |     }
182 |     else if (db->kv_compression_type == KVDB_COMPRESSION_TYPE_LZ4) {
183 |         if (value_size == 0) {
184 |             return internal_kvdb_set(db, key, key_size, value, value_size);
185 |         }
186 |         else {
187 |             int max_compressed_size = LZ4_compressBound((int) value_size);
188 |             char * compressed_value = NULL;
189 |             int allocated = 0;
190 |             if (max_compressed_size < 4096) {
191 |                 compressed_value = alloca(sizeof(uint32_t) + max_compressed_size);
192 |             }
193 |             else {
194 |                 allocated = 1;
195 |                 compressed_value = malloc(sizeof(uint32_t) + max_compressed_size);
196 |             }
197 |             * (uint32_t *) compressed_value = htonl(value_size);
198 |             int compressed_value_size = LZ4_compress(value, compressed_value + sizeof(uint32_t), (int) value_size);
199 |             int r = internal_kvdb_set(db, key, key_size, compressed_value, sizeof(uint32_t) + compressed_value_size);
200 |             if (allocated) {
201 |                 free(compressed_value);
202 |             }
203 |             return r;
204 |         }
205 |     }
206 |     else {
207 |         KVDBAssert(0);
208 |         return 0;
209 |     }
210 | }
211 | 
212 | static int internal_kvdb_set(kvdb * db, const char * key, size_t key_size, const char * value, size_t value_size)
213 | {
214 |     uint32_t hash_value[KV_BLOOM_FILTER_HASH_COUNT];
215 |     table_bloom_filter_compute_hash(hash_value, KV_BLOOM_FILTER_HASH_COUNT, key, key_size);
216 |     
217 |     int r;
218 |     r = kvdb_delete(db, key, key_size);
219 |     if (r == -1) {
220 |         // Not found: ignore.
221 |     }
222 |     else if (r == -2) {
223 |         return -2;
224 |     }
225 |     
226 |     r = kv_select_table(db);
227 |     if (r < 0) {
228 |         return -2;
229 |     }
230 |     struct kvdb_table * table = db->kv_current_table;
231 |     
232 |     uint32_t idx = hash_value[0] % ntoh64(* table->kv_maxcount);
233 |     struct kvdb_item * item = &table->kv_items[idx];
234 |     uint64_t offset = kv_block_create(db, ntoh64(item->kv_offset), hash_value[0], key, key_size, value, value_size);
235 |     if (offset == 0) {
236 |         return -2;
237 |     }
238 |     item->kv_offset = hton64(offset);
239 |     table_bloom_filter_set(table, hash_value + 1, KV_BLOOM_FILTER_HASH_COUNT - 1);
240 |     
241 |     uint64_t count;
242 |     count = ntoh64(* table->kv_count);
243 |     count ++;
244 |     * table->kv_count = hton64(count);
245 |     
246 |     return 0;
247 | }
248 | 
249 | #define PRE_READ_KEY_SIZE 128
250 | #define MAX_ALLOCA_SIZE 4096
251 | 
252 | static void show_bucket(kvdb * db, uint32_t idx)
253 | {
254 |     struct kvdb_table * table = db->kv_first_table;
255 |     struct kvdb_item * item = &table->kv_items[idx];
256 |     uint64_t next_offset = ntoh64(item->kv_offset);
257 |     
258 |     fprintf(stderr, "bucket: %llu\n", (unsigned long long) idx);
259 |     
260 |     uint64_t previous_offset = 0;
261 |     
262 |     // Run through all chained blocks in the bucket.
263 |     while (next_offset != 0) {
264 |         uint32_t current_hash_value;
265 |         uint64_t current_offset;
266 |         uint8_t log2_size;
267 |         uint64_t current_key_size;
268 |         char * current_key;
269 |         ssize_t r;
270 |         
271 |         current_offset = next_offset;
272 |         char block_header_data[KV_BLOCK_KEY_BYTES_OFFSET + PRE_READ_KEY_SIZE];
273 |         
274 |         r = pread(db->kv_fd, block_header_data, sizeof(block_header_data), (off_t) next_offset);
275 |         if (r < 0)
276 |             return;
277 |         char * p = block_header_data;
278 |         next_offset = bytes_to_h64(p);
279 |         p += 8;
280 |         current_hash_value = bytes_to_h32(p);
281 |         p += 4;
282 |         log2_size = bytes_to_h8(p);
283 |         p += 1;
284 |         current_key_size = bytes_to_h64(p);
285 |         p += 8;
286 |         current_key = block_header_data + KV_BLOCK_KEY_BYTES_OFFSET;
287 |         
288 |         fprintf(stderr, "previous, current, next: %llu, %llu , %llu\n", (unsigned long long) previous_offset, (unsigned long long) current_offset, (unsigned long long) next_offset);
289 |         fprintf(stderr, "hash: %llu\n", (unsigned long long) current_hash_value);
290 |         
291 |         char * allocated = NULL;
292 |         if (current_key_size > PRE_READ_KEY_SIZE) {
293 |             if (current_key_size <= MAX_ALLOCA_SIZE) {
294 |                 current_key = alloca(current_key_size);
295 |             }
296 |             else {
297 |                 allocated = malloc((size_t) current_key_size);
298 |                 current_key = allocated;
299 |             }
300 |             r = pread(db->kv_fd, current_key, (size_t) current_key_size, (off_t) (current_offset + KV_BLOCK_KEY_BYTES_OFFSET));
301 |             if (r < 0) {
302 |                 if (allocated != NULL) {
303 |                     free(allocated);
304 |                 }
305 |                 return;
306 |             }
307 |         }
308 |         fprintf(stderr, "key: %.*s\n", (int) current_key_size, current_key);
309 |         if (allocated != NULL) {
310 |             free(allocated);
311 |         }
312 |         previous_offset = current_offset;
313 |     }
314 |     fprintf(stderr, "-----\n");
315 | }
316 | 
317 | static int find_key(kvdb * db, const char * key, size_t key_size,
318 |                     findkey_callback callback, void * cb_data)
319 | {
320 |     uint32_t hash_values[KV_BLOOM_FILTER_HASH_COUNT];
321 |     table_bloom_filter_compute_hash(hash_values, KV_BLOOM_FILTER_HASH_COUNT, key, key_size);
322 |     
323 |     struct find_key_cb_params params;
324 |     params.key = key;
325 |     params.key_size = key_size;
326 |     
327 |     // Run through all tables.
328 |     struct kvdb_table * table = db->kv_first_table;
329 |     while (table != NULL) {
330 |         // Is the key likely to be in this table?
331 |         // Use a bloom filter to guess.
332 |         if (!table_bloom_filter_might_contain(table, hash_values + 1, KV_BLOOM_FILTER_HASH_COUNT - 1)) {
333 |             table = table->kv_next_table;
334 |             continue;
335 |         }
336 |         
337 |         // Find a bucket.
338 |         uint64_t previous_offset = 0;
339 |         uint32_t idx = hash_values[0] % ntoh64(* table->kv_maxcount);
340 |         struct kvdb_item * item = &table->kv_items[idx];
341 |         uint64_t next_offset = ntoh64(item->kv_offset);
342 |         if (kvdb_debug) {
343 |             fprintf(stderr, "before\n");
344 |             show_bucket(db, idx);
345 |         }
346 |         
347 |         // Run through all chained blocks in the bucket.
348 |         while (next_offset != 0) {
349 |             uint32_t current_hash_value;
350 |             uint64_t current_offset;
351 |             uint8_t log2_size;
352 |             uint64_t current_key_size;
353 |             char * current_key;
354 |             ssize_t r;
355 |             
356 |             current_offset = next_offset;
357 |             char block_header_data[KV_BLOCK_KEY_BYTES_OFFSET + PRE_READ_KEY_SIZE];
358 |             
359 |             r = pread(db->kv_fd, block_header_data, sizeof(block_header_data), (off_t) next_offset);
360 |             if (r < 0)
361 |                 return -1;
362 |             char * p = block_header_data;
363 |             next_offset = bytes_to_h64(p);
364 |             p += 8;
365 |             current_hash_value = bytes_to_h32(p);
366 |             p += 4;
367 |             log2_size = bytes_to_h8(p);
368 |             p += 1;
369 |             current_key_size = bytes_to_h64(p);
370 |             p += 8;
371 |             current_key = block_header_data + KV_BLOCK_KEY_BYTES_OFFSET;
372 |             
373 |             if (current_hash_value != hash_values[0]) {
374 |                 previous_offset = current_offset;
375 |                 continue;
376 |             }
377 |             
378 |             int cmp_result;
379 |             
380 |             if (current_key_size != key_size) {
381 |                 previous_offset = current_offset;
382 |                 continue;
383 |             }
384 |             char * allocated = NULL;
385 |             if (current_key_size > PRE_READ_KEY_SIZE) {
386 |                 if (current_key_size <= MAX_ALLOCA_SIZE) {
387 |                     current_key = alloca(current_key_size);
388 |                 }
389 |                 else {
390 |                     allocated = malloc((size_t) current_key_size);
391 |                     current_key = allocated;
392 |                 }
393 |                 r = pread(db->kv_fd, current_key, (size_t) current_key_size, (off_t) (current_offset + KV_BLOCK_KEY_BYTES_OFFSET));
394 |                 if (r < 0) {
395 |                     if (allocated != NULL) {
396 |                         free(allocated);
397 |                     }
398 |                     return -1;
399 |                 }
400 |             }
401 |             cmp_result = memcmp(key, current_key, key_size) != 0;
402 |             if (allocated != NULL) {
403 |                 free(allocated);
404 |             }
405 |             if (cmp_result != 0) {
406 |                 previous_offset = current_offset;
407 |                 continue;
408 |             }
409 |             
410 |             params.previous_offset = previous_offset;
411 |             params.current_offset = current_offset;
412 |             params.next_offset = next_offset;
413 |             params.item = item;
414 |             params.table_count = table->kv_count;
415 |             params.log2_size = log2_size;
416 |             
417 |             callback(db, &params, cb_data);
418 |             
419 |             if (kvdb_debug) {
420 |                 fprintf(stderr, "after\n");
421 |                 show_bucket(db, idx);
422 |             }
423 |             
424 |             return 0;
425 |         }
426 |         table = table->kv_next_table;
427 |     }
428 | 
429 |     return 0;
430 | }
431 | 
432 | struct delete_key_params {
433 |     int result;
434 |     int found;
435 | };
436 | 
437 | static void delete_key_callback(kvdb * db, struct find_key_cb_params * params,
438 |                                 void * data) {
439 |     struct delete_key_params * deletekeyparams = data;
440 |     ssize_t write_count;
441 |     int r;
442 |     
443 |     if (params->previous_offset == 0) {
444 |         params->item->kv_offset = hton64(params->next_offset);
445 |     }
446 |     else {
447 |         uint64_t offset_to_write = hton64(params->next_offset);
448 |         write_count = pwrite(db->kv_fd, &offset_to_write, sizeof(offset_to_write), params->previous_offset);
449 |         if (write_count < 0) {
450 |             deletekeyparams->result = -2;
451 |             return;
452 |         }
453 |     }
454 |     r = kv_block_recycle(db, params->current_offset);
455 |     if (r < 0) {
456 |         deletekeyparams->result = -2;
457 |         return;
458 |     }
459 |     
460 |     * params->table_count = hton64(ntoh64(* params->table_count) - 1);
461 |     deletekeyparams->result = 0;
462 |     deletekeyparams->found = 1;
463 | }
464 | 
465 | int kvdb_delete(kvdb * db, const char * key, size_t key_size)
466 | {
467 |     int r;
468 |     struct delete_key_params data;
469 |     
470 |     data.found = 0;
471 |     data.result = -1;
472 |     
473 |     r = find_key(db, key, key_size, delete_key_callback, &data);
474 |     if (r < 0) {
475 |         return -2;
476 |     }
477 |     if (data.result < 0) {
478 |         return data.result;
479 |     }
480 |     if (!data.found) {
481 |         return -1;
482 |     }
483 |     
484 |     return 0;
485 | }
486 | 
487 | struct read_value_params {
488 |     uint64_t value_size;
489 |     char * value;
490 |     int result;
491 |     int found;
492 |     size_t free_size;
493 | };
494 | 
495 | static void read_value_callback(kvdb * db, struct find_key_cb_params * params,
496 |                                 void * data)
497 | {
498 |     struct read_value_params * readparams = data;
499 |     ssize_t r;
500 |     
501 |     uint64_t value_size;
502 |     r = pread(db->kv_fd, &value_size, sizeof(value_size),
503 |               params->current_offset + 8 + 4 + 1 + 8 + params->key_size);
504 |     if (r < 0) {
505 |         readparams->result = -2;
506 |         return;
507 |     }
508 |     
509 |     value_size = ntoh64(value_size);
510 |     readparams->value_size = value_size;
511 |     readparams->value = malloc((size_t) value_size);
512 |     
513 |     uint64_t remaining = value_size;
514 |     char * value_p = readparams->value;
515 |     while (remaining > 0) {
516 |         ssize_t count = pread(db->kv_fd, value_p, (size_t) remaining,
517 |                               params->current_offset + 8 + 4 + 1 + 8 + params->key_size + 8);
518 |         if (count < 0) {
519 |             readparams->result = -2;
520 |             free(readparams->value);
521 |             readparams->value = NULL;
522 |             return;
523 |         }
524 |         remaining -= count;
525 |         value_p += count;
526 |     }
527 |     
528 |     readparams->result = 0;
529 |     readparams->found = 1;
530 |     readparams->free_size = (1 << params->log2_size) - (value_size + params->key_size);
531 | }
532 | 
533 | int kvdb_get(kvdb * db, const char * key, size_t key_size,
534 |              char ** p_value, size_t * p_value_size)
535 | {
536 |     return kvdb_get2(db, key, key_size, p_value, p_value_size, NULL);
537 | }
538 | 
539 | static int kvdb_get2(kvdb * db, const char * key, size_t key_size,
540 |                      char ** p_value, size_t * p_value_size, size_t * p_free_size)
541 | {
542 |     if (db->kv_compression_type == KVDB_COMPRESSION_TYPE_RAW) {
543 |         return internal_kvdb_get2(db, key, key_size, p_value, p_value_size, p_free_size);
544 |     }
545 |     else if (db->kv_compression_type == KVDB_COMPRESSION_TYPE_LZ4) {
546 |         char * compressed_value;
547 |         size_t compressed_value_size;
548 |         int r = internal_kvdb_get2(db, key, key_size, &compressed_value, &compressed_value_size, p_free_size);
549 |         if (r < 0) {
550 |             return r;
551 |         }
552 |         if (compressed_value_size == 0) {
553 |             * p_value = NULL;
554 |             * p_value_size = 0;
555 |             return 0;
556 |         }
557 |     
558 |         size_t value_size = ntohl(* (uint32_t *) compressed_value);
559 |         char * value = malloc(value_size);
560 |         LZ4_decompress_fast(compressed_value + sizeof(uint32_t), value, (int) value_size);
561 |         free(compressed_value);
562 |         if (p_free_size != NULL) {
563 |             * p_free_size = 0;
564 |         }
565 |         * p_value_size = value_size;
566 |         * p_value = value;
567 |         return 0;
568 |     }
569 |     else {
570 |         KVDBAssert(0);
571 |         return 0;
572 |     }
573 | }
574 | 
575 | static int internal_kvdb_get2(kvdb * db, const char * key, size_t key_size,
576 |               char ** p_value, size_t * p_value_size, size_t * p_free_size)
577 | {
578 |     int r;
579 |     struct read_value_params data;
580 |     
581 |     data.value_size = 0;
582 |     data.value = NULL;
583 |     data.result = -1;
584 |     data.found = 0;
585 |     data.free_size = 0;
586 | 
587 |     r = find_key(db, key, key_size, read_value_callback, &data);
588 |     if (r < 0) {
589 |         return -2;
590 |     }
591 |     if (data.result < 0) {
592 |         return data.result;
593 |     }
594 |     if (!data.found) {
595 |         return -1;
596 |     }
597 |     
598 |     if (p_free_size != NULL) {
599 |         * p_free_size = data.free_size;
600 |     }
601 |     
602 |     * p_value = data.value;
603 |     * p_value_size = (size_t) data.value_size;
604 |     
605 |     return 0;
606 | }
607 | 
608 | int kvdb_enumerate_keys(kvdb * db, kvdb_enumerate_callback callback, void * cb_data)
609 | {
610 |     struct kvdb_table * table = db->kv_first_table;
611 | 	struct kvdb_enumerate_cb_params cb_params;
612 | 	int stop = 0;
613 | 	
614 |     // Run through all tables.
615 |     while (table != NULL) {
616 | 		struct kvdb_item * item = table->kv_items;
617 | 		// Run through all buckets.
618 | 		uint64_t count = ntoh64(*table->kv_maxcount);
619 | 		while (count) {
620 | 			uint64_t current_offset = ntoh64(item->kv_offset);
621 | 			// Run through all chained blocks in the bucket.
622 | 			while (current_offset != 0) {
623 | 				char block_header_data[KV_BLOCK_KEY_BYTES_OFFSET + PRE_READ_KEY_SIZE];
624 | 				ssize_t r = pread(db->kv_fd, block_header_data, sizeof(block_header_data), (off_t) current_offset);
625 | 				if (r < 0) {
626 | 					return -2;
627 | 				}
628 | 				char * p = block_header_data;
629 | 				uint64_t next_offset = bytes_to_h64(p);
630 | 				p += 8+4+1; // ignore hash_value and log2_size
631 | 				size_t current_key_size = (size_t) bytes_to_h64(p);
632 | 				p += 8;
633 | 				char * current_key = block_header_data + KV_BLOCK_KEY_BYTES_OFFSET;
634 | 				char * allocated = NULL;
635 | 				if (current_key_size > PRE_READ_KEY_SIZE) {
636 | 					if (current_key_size <= MAX_ALLOCA_SIZE) {
637 | 						current_key = alloca(current_key_size);
638 | 					}
639 | 					else {
640 | 						allocated = malloc(current_key_size);
641 | 						current_key = allocated;
642 | 					}
643 | 					r = pread(db->kv_fd, current_key, current_key_size, (off_t) (current_offset + KV_BLOCK_KEY_BYTES_OFFSET));
644 | 					if (r < 0) {
645 | 						if (allocated != NULL) {
646 | 							free(allocated);
647 | 						}
648 | 						return -2;
649 | 					}
650 | 				}
651 | 				cb_params.key = current_key;
652 | 				cb_params.key_size = current_key_size;
653 | 				callback(db, &cb_params, cb_data, &stop);
654 | 				if (allocated != NULL) {
655 | 					free(allocated);
656 | 				}
657 | 				if (stop) {
658 | 					return 0;
659 | 				}
660 |                 current_offset = next_offset;
661 | 			}
662 | 			item ++;
663 | 			count --;
664 | 		}
665 | 		table = table->kv_next_table;
666 | 	}
667 | 	return 0;
668 | }
669 | 


--------------------------------------------------------------------------------
/src/kvdb.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVDB_H
 2 | 
 3 | #define KVDB_H
 4 | 
 5 | #include <sys/types.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | // kvdb is a key-value database.
12 | 
13 | typedef struct kvdb kvdb;
14 | 
15 | enum {
16 |     KVDB_COMPRESSION_TYPE_RAW,
17 |     KVDB_COMPRESSION_TYPE_LZ4,
18 | };
19 | 
20 | // creates a kvdb.
21 | kvdb * kvdb_new(const char * filename);
22 | 
23 | void kvdb_set_compression_type(kvdb * db, int compression_type);
24 | int kvdb_get_compression_type(kvdb * db);
25 | 
26 | // destroy a kvdb.
27 | void kvdb_free(kvdb * db);
28 | 
29 | // opens a kvdb.
30 | int kvdb_open(kvdb * db);
31 | 
32 | // closes a kvdb.
33 | void kvdb_close(kvdb * db);
34 | 
35 | // insert a key / value in the database.
36 | // Returns -2 if there's a I/O error.
37 | int kvdb_set(kvdb * db, const char * key, size_t key_size,
38 |              const char * value, size_t value_size);
39 | 
40 | // result stored in p_value should be released using free().
41 | // Returns -1 if item is not found.
42 | // Returns -2 if there's a I/O error.
43 | int kvdb_get(kvdb * db, const char * key, size_t key_size,
44 |              char ** p_value, size_t * p_value_size);
45 | 
46 | // Returns -1 if item is not found.
47 | // Returns -2 if there's a I/O error.
48 | int kvdb_delete(kvdb * db, const char * key, size_t key_size);
49 | 
50 | struct kvdb_enumerate_cb_params {
51 | 	const char * key;
52 | 	size_t key_size;
53 | };
54 | 
55 | typedef void kvdb_enumerate_callback(kvdb * db,
56 | 	                                 struct kvdb_enumerate_cb_params * params,
57 |                                      void * data, int * stop);
58 | 
59 | // Returns -2 if there's a I/O error.
60 | int kvdb_enumerate_keys(kvdb * db, kvdb_enumerate_callback callback, void * cb_data);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/src/kvdbo.cpp:
--------------------------------------------------------------------------------
  1 | #include "kvdbo.h"
  2 | 
  3 | #include "kvdb.h"
  4 | #include "kvendian.h"
  5 | #include "kvassert.h"
  6 | #include "kvserialization.h"
  7 | 
  8 | #include <set>
  9 | #include <string>
 10 | #include <vector>
 11 | #include <stdlib.h>
 12 | 
 13 | struct kvdbo {
 14 |     // underlaying kvdb.
 15 |     kvdb * db;
 16 |     
 17 |     // in memory buffers for operations.
 18 |     std::set<std::string> pending_keys;
 19 |     std::set<std::string> pending_keys_delete;
 20 |     // node identifier allocation.
 21 |     uint64_t next_node_id;
 22 |     
 23 |     // master node.
 24 |     // identifiers of the nodes.
 25 |     std::vector<uint64_t> nodes_ids;
 26 |     // first keys of the nodes.
 27 |     std::vector<std::string> nodes_first_keys;
 28 |     // number of keys in each node.
 29 |     std::vector<uint32_t> nodes_keys_count;
 30 | };
 31 | 
 32 | // iterator over kvdbo.
 33 | // it will also cache the keys of the current node.
 34 | struct kvdbo_iterator {
 35 |     kvdbo * db;
 36 |     // identifier of the node.
 37 |     uint64_t node_id;
 38 |     // index of the node.
 39 |     unsigned int node_index;
 40 |     // keys in the node.
 41 |     std::vector<std::string> keys;
 42 |     // current key index in the node.
 43 |     int key_index;
 44 | };
 45 | 
 46 | #define NODE_PREFIX "n"
 47 | 
 48 | static int flush_pending_keys(kvdbo * db);
 49 | static int write_master_node(kvdbo * db);
 50 | static int read_master_node(kvdbo * db);
 51 | static unsigned int find_node(kvdbo * db, const std::string key);
 52 | static unsigned int find_key(kvdbo_iterator * iterator, const std::string key);
 53 | static void unserialize_words_list(std::vector<std::string> & word_list, char * value, size_t size);
 54 | static void unserialize_words_set(std::set<std::string> & word_set, char * value, size_t size, bool clear_words_set);
 55 | static void serialize_words_set(std::string & value, std::set<std::string> & word_set);
 56 | static int iterator_load_node(kvdbo_iterator * iterator, uint64_t node_id);
 57 | static int add_first_node(kvdbo * db);
 58 | static int load_node(struct modified_node * node, unsigned int node_index);
 59 | static int load_from_node_id(struct modified_node * node, uint64_t node_id);
 60 | static int write_loaded_node(struct modified_node * node);
 61 | static int write_single_loaded_node(struct modified_node * node);
 62 | static int try_merge(kvdbo * db, unsigned int node_index, bool * pDidMerge);
 63 | static int remove_node_id(kvdbo * db, uint64_t node_id);
 64 | static int remove_node(kvdbo * db, unsigned int node_index);
 65 | static int split_node(kvdbo * db, unsigned int node_index, unsigned int count,
 66 |                       std::set<std::string> & keys);
 67 | 
 68 | static void show_nodes(kvdbo * db);
 69 | 
 70 | #pragma mark kvdbo data structure management.
 71 | 
 72 | kvdbo * kvdbo_new(const char* filename)
 73 | {
 74 |     kvdbo * db;
 75 |     db = new kvdbo;
 76 |     db->db = kvdb_new(filename);
 77 |     db->next_node_id = 1;
 78 |     return db;
 79 | }
 80 | 
 81 | void kvdbo_free(kvdbo * db)
 82 | {
 83 |     kvdb_free(db->db);
 84 |     delete db;
 85 | }
 86 | 
 87 | #pragma mark opening / closing the database.
 88 | 
 89 | int kvdbo_open(kvdbo * db)
 90 | {
 91 |     int r = kvdb_open(db->db);
 92 |     if (r < 0) {
 93 |         return r;
 94 |     }
 95 |     r = read_master_node(db);
 96 |     if (r < 0) {
 97 |         kvdbo_close(db);
 98 |         return r;
 99 |     }
100 |     return 0;
101 | }
102 | 
103 | void kvdbo_close(kvdbo * db)
104 | {
105 |     db->nodes_keys_count.clear();
106 |     db->nodes_first_keys.clear();
107 |     db->nodes_ids.clear();
108 |     flush_pending_keys(db);
109 |     kvdb_close(db->db);
110 | }
111 | 
112 | int kvdbo_flush(kvdbo * db)
113 | {
114 |     return flush_pending_keys(db);
115 | }
116 | 
117 | #pragma mark key insertion / deletion / retrieval.
118 | 
119 | const char METAKEY_PREFIX[7] = "\0kvdbo";
120 | #define METAKEY_PREFIX_SIZE (sizeof(METAKEY_PREFIX) - 1)
121 | 
122 | int kvdbo_set(kvdbo * db,
123 |               const char * key,
124 |               size_t key_size,
125 |               const char * value,
126 |               size_t value_size)
127 | {
128 |     int r;
129 |     
130 |     std::string key_str(key, key_size);
131 |     if (key_str.find(std::string(METAKEY_PREFIX, METAKEY_PREFIX_SIZE)) == 0) {
132 |         // invalid key.
133 |         return -3;
134 |     }
135 |     db->pending_keys_delete.erase(key_str);
136 |     db->pending_keys.insert(key_str);
137 |     r = kvdb_set(db->db, key, key_size, value, value_size);
138 |     if (r != 0) {
139 |         return r;
140 |     }
141 |     return 0;
142 | }
143 | 
144 | int kvdbo_get(kvdbo * db,
145 |               const char * key,
146 |               size_t key_size,
147 |               char ** p_value,
148 |               size_t * p_value_size)
149 | {
150 |     if (db->pending_keys_delete.find(std::string(key, key_size)) != db->pending_keys_delete.end()) {
151 |         return -1;
152 |     }
153 |     return kvdb_get(db->db, key, key_size, p_value, p_value_size);
154 | }
155 | 
156 | int kvdbo_delete(kvdbo * db, const char* key, size_t key_size)
157 | {
158 |     std::string key_str(key, key_size);
159 |     db->pending_keys.erase(key_str);
160 |     db->pending_keys_delete.insert(key_str);
161 |     return kvdb_delete(db->db, key, key_size);
162 | }
163 | 
164 | #pragma mark iterator management.
165 | 
166 | kvdbo_iterator * kvdbo_iterator_new(kvdbo * db)
167 | {
168 |     kvdbo_iterator * iterator = new kvdbo_iterator;
169 |     iterator->key_index = -1;
170 |     iterator->db = db;
171 |     return iterator;
172 | }
173 | 
174 | void kvdbo_iterator_free(kvdbo_iterator * iterator)
175 | {
176 |     delete iterator;
177 | }
178 | 
179 | void kvdbo_iterator_seek_first(kvdbo_iterator * iterator)
180 | {
181 |     if (iterator->db->nodes_ids.size() == 0) {
182 |         return;
183 |     }
184 |     uint64_t node_id = iterator->db->nodes_ids[0];
185 |     int r = iterator_load_node(iterator, node_id);
186 |     KVDBAssert(r == 0);
187 |     iterator->node_index = 0;
188 |     iterator->key_index = 0;
189 | }
190 | 
191 | void kvdbo_iterator_seek_last(kvdbo_iterator * iterator)
192 | {
193 |     if (iterator->db->nodes_ids.size() == 0) {
194 |         return;
195 |     }
196 |     uint64_t node_id = iterator->db->nodes_ids[iterator->db->nodes_ids.size() - 1];
197 |     int r = iterator_load_node(iterator, node_id);
198 |     KVDBAssert(r == 0);
199 |     iterator->node_index = (unsigned int) (iterator->db->nodes_ids.size() - 1);
200 |     iterator->key_index = (unsigned int) (iterator->keys.size() - 1);
201 | }
202 | 
203 | void kvdbo_iterator_seek_after(kvdbo_iterator * iterator,
204 |                                const char * key,
205 |                                size_t key_size)
206 | {
207 |     if (iterator->db->nodes_ids.size() == 0) {
208 |         return;
209 |     }
210 |     std::string key_string(key, key_size);
211 |     unsigned int idx = find_node(iterator->db, key_string);
212 |     uint64_t node_id = iterator->db->nodes_ids[idx];
213 |     int r = iterator_load_node(iterator, node_id);
214 |     KVDBAssert(r == 0);
215 |     iterator->node_index = idx;
216 |     iterator->key_index = find_key(iterator, key_string);
217 |     while (kvdbo_iterator_is_valid(iterator)) {
218 |         const char * current_key;
219 |         size_t current_key_len;
220 |         kvdbo_iterator_get_key(iterator, &current_key, &current_key_len);
221 |         if (std::string(current_key, current_key_len) >= key_string) {
222 |             break;
223 |         }
224 |         kvdbo_iterator_next(iterator);
225 |     }
226 | }
227 | 
228 | void kvdbo_iterator_next(kvdbo_iterator * iterator)
229 | {
230 |     iterator->key_index ++;
231 |     if (iterator->key_index < iterator->keys.size()) {
232 |         return;
233 |     }
234 |     
235 |     // reached end of the node.
236 |     if (iterator->node_index == iterator->db->nodes_ids.size() - 1) {
237 |         // was in the last node.
238 |         return;
239 |     }
240 |     iterator->node_index ++;
241 |     
242 |     uint64_t node_id = iterator->db->nodes_ids[iterator->node_index];
243 |     int r = iterator_load_node(iterator, node_id);
244 |     KVDBAssert(r == 0);
245 |     iterator->key_index = 0;
246 | }
247 | 
248 | void kvdbo_iterator_previous(kvdbo_iterator * iterator)
249 | {
250 |     iterator->key_index --;
251 |     if (iterator->key_index >= 0) {
252 |         return;
253 |     }
254 |     
255 |     // reached beginning of the node.
256 |     if (iterator->node_index == 0) {
257 |         // was in the first node.
258 |         return;
259 |     }
260 |     iterator->node_index --;
261 |     
262 |     uint64_t node_id = iterator->db->nodes_ids[iterator->node_index];
263 |     int r= iterator_load_node(iterator, node_id);
264 |     KVDBAssert(r == 0);
265 |     iterator->key_index = (unsigned int) (iterator->keys.size() - 1);
266 | }
267 | 
268 | void kvdbo_iterator_get_key(kvdbo_iterator * iterator, const char ** p_key, size_t * p_key_size)
269 | {
270 |     if (!kvdbo_iterator_is_valid(iterator)) {
271 |         * p_key = NULL;
272 |         * p_key_size = 0;
273 |         return;
274 |     }
275 |     
276 |     std::string & key = iterator->keys[iterator->key_index];
277 |     * p_key = key.c_str();
278 |     * p_key_size = key.length();
279 | }
280 | 
281 | int kvdbo_iterator_is_valid(kvdbo_iterator * iterator)
282 | {
283 |     return (iterator->key_index != -1) && (iterator->key_index < iterator->keys.size());
284 | }
285 | 
286 | static int iterator_load_node(kvdbo_iterator * iterator, uint64_t node_id)
287 | {
288 |     iterator->node_id = node_id;
289 |     
290 |     // load all keys of the node in memory.
291 |     std::string node_key;
292 |     node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
293 |     node_key.append(NODE_PREFIX, strlen(NODE_PREFIX));
294 |     uint64_t identifier = hton64(node_id);
295 |     node_key.append((const char *) &identifier, sizeof(identifier));
296 |     char * value = NULL;
297 |     size_t size = 0;
298 |     int r = kvdb_get(iterator->db->db, node_key.c_str(), node_key.length(), &value, &size);
299 |     if (r == -1) {
300 |         return 0;
301 |     }
302 |     if (r == -2) {
303 |         return -2;
304 |     }
305 |     // load all nodes in a vector.
306 |     unserialize_words_list(iterator->keys, value, size);
307 |     free(value);
308 |     return 0;
309 | }
310 | 
311 | #pragma mark master node reading / writing.
312 | 
313 | #define MASTER_NODE_KEY "m"
314 | 
315 | static int write_master_node(kvdbo * db)
316 | {
317 |     std::string buffer;
318 |     kv_encode_uint64(buffer, db->nodes_ids.size());
319 |     for(uint64_t i = 0 ; i < db->nodes_ids.size() ; i ++) {
320 |         kv_encode_uint64(buffer, db->nodes_ids[i]);
321 |     }
322 |     for(uint64_t i = 0 ; i < db->nodes_keys_count.size() ; i ++) {
323 |         kv_encode_uint64(buffer, db->nodes_keys_count[i]);
324 |     }
325 |     for(uint64_t i = 0 ; i < db->nodes_first_keys.size() ; i ++) {
326 |         // write first key of the node.
327 |         std::string key = db->nodes_first_keys[i];
328 |         buffer.append(key.c_str(), key.length());
329 |         buffer.push_back(0);
330 |     }
331 |     std::string master_node_key;
332 |     master_node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
333 |     master_node_key.append(MASTER_NODE_KEY, strlen(MASTER_NODE_KEY));
334 |     int r = kvdb_set(db->db, master_node_key.c_str(), master_node_key.length(),
335 |                      buffer.c_str(), buffer.length());
336 |     return r;
337 | }
338 | 
339 | static int read_master_node(kvdbo * db)
340 | {
341 |     char * value = NULL;
342 |     size_t size = 0;
343 |     uint64_t max_node_id = 0;
344 |     
345 |     std::string master_node_key;
346 |     master_node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
347 |     master_node_key.append(MASTER_NODE_KEY, strlen(MASTER_NODE_KEY));
348 |     int r = kvdb_get(db->db, master_node_key.c_str(), master_node_key.length(),
349 |                      &value, &size);
350 |     if (r == -1) {
351 |         return 0;
352 |     }
353 |     if (r == -2) {
354 |         return -2;
355 |     }
356 |     std::string buffer(value, size);
357 |     db->nodes_ids.clear();
358 |     uint64_t count = 0;
359 |     size_t position = 0;
360 |     position = kv_decode_uint64(buffer, position, &count);
361 |     for(uint64_t i = 0 ; i < count ; i ++) {
362 |         uint64_t node_id = 0;
363 |         position = kv_decode_uint64(buffer, position, &node_id);
364 |         db->nodes_ids.push_back(node_id);
365 |         if (node_id > max_node_id) {
366 |             max_node_id = node_id;
367 |         }
368 |     }
369 |     for(uint64_t i = 0 ; i < count ; i ++) {
370 |         uint64_t keys_count = 0;
371 |         position = kv_decode_uint64(buffer, position, &keys_count);
372 |         db->nodes_keys_count.push_back((uint32_t) keys_count);
373 |     }
374 |     //size_t remaining = size - (p - value);
375 |     size_t remaining = size - position;
376 |     unserialize_words_list(db->nodes_first_keys, value + position, remaining);
377 |     return 0;
378 | }
379 | 
380 | // binary search of a node that should contain the given key.
381 | // returns the index of the node within the given boundaries.
382 | // used by find_node() below.
383 | static unsigned int find_node_with_boundaries(kvdbo * db, const std::string key,
384 |                                               unsigned int left, unsigned int right)
385 | {
386 |     unsigned int middle = (left + right) / 2;
387 |     if (key >= db->nodes_first_keys[right]) {
388 |         return right;
389 |     }
390 |     if (left == middle) {
391 |         return left;
392 |     }
393 |     
394 |     if (key >= db->nodes_first_keys[middle]) {
395 |         return find_node_with_boundaries(db, key, middle, right);
396 |     }
397 |     else {
398 |         return find_node_with_boundaries(db, key, left, middle - 1);
399 |     }
400 | }
401 | 
402 | // binary search of a node that should contain the given key.
403 | // returns the index of the node.
404 | static unsigned int find_node(kvdbo * db, const std::string key)
405 | {
406 |     return find_node_with_boundaries(db, key, 0, (unsigned int) db->nodes_first_keys.size() - 1);
407 | }
408 | 
409 | // binary search of a key in the node loaded by the iterator.
410 | // returns the index of the key within the node, in the given range.
411 | // used by find_key() below.
412 | static unsigned int find_key_with_boundaries(kvdbo_iterator * iterator, const std::string key,
413 |                                              unsigned int left, unsigned int right)
414 | {
415 |     unsigned int middle = (left + right) / 2;
416 |     if (key >= iterator->keys[right]) {
417 |         return right;
418 |     }
419 |     if (left == middle) {
420 |         return left;
421 |     }
422 |     
423 |     if (key >= iterator->keys[middle]) {
424 |         return find_key_with_boundaries(iterator, key, middle, right);
425 |     }
426 |     else {
427 |         return find_key_with_boundaries(iterator, key, left, middle - 1);
428 |     }
429 | }
430 | 
431 | // binary search of a key in the node loaded by the iterator.
432 | // returns the index of the key within the node.
433 | static unsigned int find_key(kvdbo_iterator * iterator, const std::string key)
434 | {
435 |     return find_key_with_boundaries(iterator, key, 0, (unsigned int) (iterator->keys.size() - 1));
436 | }
437 | 
438 | // unserialize a list of words to a vector.
439 | static void unserialize_words_list(std::vector<std::string> & word_list, char * value, size_t size)
440 | {
441 |     word_list.clear();
442 |     const char * p = value;
443 |     const char * key_start = value;
444 |     while (size > 0) {
445 |         if (* p == 0) {
446 |             // add key.
447 |             size_t len = p - key_start;
448 |             word_list.push_back(std::string(key_start, len));
449 |             key_start = p + 1;
450 |         }
451 |         p ++;
452 |         size --;
453 |     }
454 | }
455 | 
456 | // unserialize a list of words to a set.
457 | static void unserialize_words_set(std::set<std::string> & word_set, char * value, size_t size, bool clear_words_set)
458 | {
459 |     if (clear_words_set) {
460 |         word_set.clear();
461 |     }
462 |     const char * p = value;
463 |     const char * key_start = value;
464 |     while (size > 0) {
465 |         if (* p == 0) {
466 |             // add key.
467 |             size_t len = p - key_start;
468 |             word_set.insert(std::string(key_start, len));
469 |             key_start = p + 1;
470 |         }
471 |         p ++;
472 |         size --;
473 |     }
474 | }
475 | 
476 | // serialize a list of words stored in a set.
477 | // the result will be stored in the variable value.
478 | static void serialize_words_set(std::string & value, std::set<std::string> & word_set)
479 | {
480 |     std::set<std::string>::iterator it = word_set.begin();
481 |     while (it != word_set.end()) {
482 |         value.append(* it);
483 |         value.push_back(0);
484 |         it ++;
485 |     }
486 | }
487 | 
488 | // pending modification to a node.
489 | struct modified_node {
490 |     kvdbo * db;
491 |     uint64_t node_id;
492 |     unsigned int node_index;
493 |     std::set<std::string> keys;
494 | };
495 | 
496 | // flush the pending changes of the keys list in memory.
497 | static int flush_pending_keys(kvdbo * db)
498 | {
499 |     if ((db->pending_keys.size() > 0) && (db->nodes_ids.size() == 0)) {
500 |         add_first_node(db);
501 |     }
502 |     
503 |     struct modified_node current_node;
504 |     current_node.db = db;
505 |     current_node.node_id = 0;
506 |     current_node.node_index = -1;
507 |     
508 |     std::set<std::string>::iterator addition_it = db->pending_keys.begin();
509 |     std::set<std::string>::iterator deletion_it = db->pending_keys_delete.begin();
510 |     for(unsigned int node_index = 0 ; node_index < db->nodes_ids.size() ; node_index ++) {
511 |         // if it's the last node.
512 |         if (node_index == db->nodes_ids.size() - 1) {
513 |             // also applies when nodes_ids->size() == 1, node_index == 0
514 |             while (deletion_it != db->pending_keys_delete.end()) {
515 |                 if (current_node.node_index != node_index) {
516 |                     load_node(&current_node, node_index);
517 |                 }
518 |                 current_node.keys.erase(* deletion_it);
519 |                 deletion_it ++;
520 |             }
521 |             while (addition_it != db->pending_keys.end()) {
522 |                 if (current_node.node_index != node_index) {
523 |                     load_node(&current_node, node_index);
524 |                 }
525 |                 current_node.keys.insert(* addition_it);
526 |                 addition_it ++;
527 |             }
528 |         }
529 |         else {
530 |             // applies when nodes_ids->size() >= 2
531 |             while (deletion_it != db->pending_keys_delete.end()) {
532 |                 // make sure that we don't reach the boundary of the next node.
533 |                 if (* deletion_it >= db->nodes_first_keys[node_index + 1]) {
534 |                     // stop here.
535 |                     break;
536 |                 }
537 |                 if (current_node.node_index != node_index) {
538 |                     load_node(&current_node, node_index);
539 |                 }
540 |                 current_node.keys.erase(* deletion_it);
541 |                 deletion_it ++;
542 |             }
543 |             while (addition_it != db->pending_keys.end()) {
544 |                 // make sure that we don't reach the boundary of the next node.
545 |                 if (* addition_it >= db->nodes_first_keys[node_index + 1]) {
546 |                     // stop here.
547 |                     break;
548 |                 }
549 |                 if (current_node.node_index != node_index) {
550 |                     load_node(&current_node, node_index);
551 |                 }
552 |                 current_node.keys.insert(* addition_it);
553 |                 addition_it ++;
554 |             }
555 |         }
556 |     }
557 |     // write the last node.
558 |     write_loaded_node(&current_node);
559 |     db->pending_keys.clear();
560 |     db->pending_keys_delete.clear();
561 |     
562 |     return 0;
563 | }
564 | 
565 | // load the given node in memory.
566 | static int load_node(struct modified_node * node, unsigned int node_index)
567 | {
568 |     write_loaded_node(node);
569 |     
570 |     uint64_t node_id = node->db->nodes_ids[node_index];
571 |     node->node_index = node_index;
572 |     node->node_id = node_id;
573 |     node->keys.clear();
574 |     
575 |     int r = load_from_node_id(node, node_id);
576 |     if (r != 0) {
577 |         return r;
578 |     }
579 |     
580 |     return r;
581 | }
582 | 
583 | // add the keys from the given node to the data structure.
584 | static int load_from_node_id(struct modified_node * node, uint64_t node_id)
585 | {
586 |     std::string node_key;
587 |     node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
588 |     node_key.append(NODE_PREFIX, strlen(NODE_PREFIX));
589 |     uint64_t identifier = hton64(node_id);
590 |     node_key.append((const char *) &identifier, sizeof(identifier));
591 |     char * value;
592 |     size_t value_size;
593 |     int r = kvdb_get(node->db->db, node_key.c_str(), node_key.length(), &value, &value_size);
594 |     if (r == -2) {
595 |         node->node_index = -1;
596 |         return -2;
597 |     }
598 |     if (r == 0) {
599 |         unserialize_words_set(node->keys, value, value_size, false);
600 |         free(value);
601 |     }
602 |     
603 |     return 0;
604 | }
605 | 
606 | static int remove_node_id(kvdbo * db, uint64_t node_id)
607 | {
608 |     std::string node_key;
609 |     node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
610 |     node_key.append(NODE_PREFIX, strlen(NODE_PREFIX));
611 |     uint64_t identifier = hton64(node_id);
612 |     node_key.append((const char *) &identifier, sizeof(identifier));
613 |     int r = kvdb_delete(db->db, node_key.c_str(), node_key.length());
614 |     if (r == -1) {
615 |         return 0;
616 |     }
617 |     if (r != 0) {
618 |         return r;
619 |     }
620 |     return 0;
621 | }
622 | 
623 | // returns the next usable node identifier.
624 | static uint64_t allocate_node_id(kvdbo * db)
625 | {
626 |     uint64_t node_id = db->next_node_id;
627 |     db->next_node_id ++;
628 |     return node_id;
629 | }
630 | 
631 | // create the first node.
632 | static int add_first_node(kvdbo * db)
633 | {
634 |     uint64_t node_id = allocate_node_id(db);
635 |     db->nodes_ids.push_back(node_id);
636 |     db->nodes_first_keys.push_back("");
637 |     db->nodes_keys_count.push_back(0);
638 |     int r = write_master_node(db);
639 |     if (r != 0) {
640 |         return r;
641 |     }
642 |     return 0;
643 | }
644 | 
645 | #define MAX_KEYS_PER_NODE 16384
646 | #define KEYS_PER_NODE_MERGE_THRESHOLD_FACTOR 4
647 | #define KEYS_PER_NODE_MERGE_THRESHOLD (MAX_KEYS_PER_NODE / KEYS_PER_NODE_MERGE_THRESHOLD_FACTOR)
648 | #define MEAN_KEYS_PER_NODE_FACTOR 2
649 | #define MEAN_KEYS_PER_NODE (MAX_KEYS_PER_NODE / MEAN_KEYS_PER_NODE_FACTOR)
650 | 
651 | // write the node to disk.
652 | static int write_loaded_node(struct modified_node * node)
653 | {
654 |     // not valid.
655 |     if (node->node_index == -1) {
656 |         return 0;
657 |     }
658 |     
659 |     if (node->keys.size() == 0) {
660 |         // if there's no keys.
661 |         int r = remove_node(node->db, node->node_index);
662 |         // invalidate.
663 |         node->node_index = -1;
664 |         return r;
665 |     }
666 |     else if (node->keys.size() > MAX_KEYS_PER_NODE) {
667 |         // if there's more keys than the limit, split node.
668 |         unsigned int node_index = node->node_index;
669 |         // compute the number of nodes to create to replace this one.
670 |         unsigned int count = (unsigned int) ((node->keys.size() + MEAN_KEYS_PER_NODE - 1) / MEAN_KEYS_PER_NODE);
671 |         int r = split_node(node->db, node_index, count, node->keys);
672 |         if (r != 0) {
673 |             return r;
674 |         }
675 |         bool didMerge = false;
676 |         // try to merge the last one with the next one.
677 |         r = try_merge(node->db, node_index + count - 1, &didMerge);
678 |         if (r != 0) {
679 |             return r;
680 |         }
681 |         // invalidate.
682 |         node->node_index = -1;
683 |         return 0;
684 |     }
685 |     else if (node->keys.size() < KEYS_PER_NODE_MERGE_THRESHOLD) {
686 |         // if there's a low number of keys.
687 |         int r = write_single_loaded_node(node);
688 |         if (r != 0) {
689 |             return r;
690 |         }
691 |         
692 |         // try to merge node with previous...
693 |         unsigned int node_index = node->node_index;
694 |         bool didMerge = false;
695 |         if (node_index > 0) {
696 |             r = try_merge(node->db, node_index - 1, &didMerge);
697 |             if (r != 0) {
698 |                 return r;
699 |             }
700 |             if (didMerge) {
701 |                 node_index --;
702 |             }
703 |         }
704 |         // then, with next.
705 |         r = try_merge(node->db, node_index, &didMerge);
706 |         if (r != 0) {
707 |             return r;
708 |         }
709 |         // invalidate.
710 |         node->node_index = -1;
711 |         return 0;
712 |     }
713 |     else {
714 |         // in other cases.
715 |         int r = write_single_loaded_node(node);
716 |         // invalidate.
717 |         node->node_index = -1;
718 |         return r;
719 |     }
720 |     return 0;
721 | }
722 | 
723 | static int write_single_loaded_node(struct modified_node * node)
724 | {
725 |     // write the node.
726 |     std::string value;
727 |     serialize_words_set(value, node->keys);
728 |     std::string node_key;
729 |     node_key.append(METAKEY_PREFIX, METAKEY_PREFIX_SIZE);
730 |     node_key.append(NODE_PREFIX, strlen(NODE_PREFIX));
731 |     uint64_t identifier = hton64(node->node_id);
732 |     node_key.append((const char *) &identifier, sizeof(identifier));
733 |     int r = kvdb_set(node->db->db, node_key.c_str(), node_key.length(), value.c_str(), value.length());
734 |     if (r != 0) {
735 |         return r;
736 |     }
737 |     // update the master node.
738 |     bool changed = false;
739 |     if (node->node_id != node->db->nodes_ids[node->node_index]) {
740 |         node->db->nodes_ids[node->node_index] = node->node_id;
741 |         changed = true;
742 |     }
743 |     if (node->db->nodes_keys_count[node->node_index] != node->keys.size()) {
744 |         node->db->nodes_keys_count[node->node_index] = (uint32_t) node->keys.size();
745 |         changed = true;
746 |     }
747 |     std::string first_key;
748 |     if (node->keys.begin() != node->keys.end()) {
749 |         first_key = * node->keys.begin();
750 |     }
751 |     if (node->db->nodes_first_keys[node->node_index] != first_key) {
752 |         node->db->nodes_first_keys[node->node_index] = first_key;
753 |         changed = true;
754 |     }
755 |     if (changed) {
756 |         r = write_master_node(node->db);
757 |         if (r != 0) {
758 |             return r;
759 |         }
760 |     }
761 |     
762 |     return 0;
763 | }
764 | 
765 | // try to merge with the next node.
766 | static int try_merge(kvdbo * db, unsigned int node_index, bool * pDidMerge)
767 | {
768 |     // there's no next node.
769 |     if (node_index + 1 >= db->nodes_ids.size()) {
770 |         * pDidMerge = false;
771 |         return 0;
772 |     }
773 |     
774 |     // would it make the number of keys larger than the threshold?
775 |     if (db->nodes_keys_count[node_index] + db->nodes_keys_count[node_index + 1] > MEAN_KEYS_PER_NODE) {
776 |         * pDidMerge = false;
777 |         return 0;
778 |     }
779 |     
780 |     struct modified_node current_node;
781 |     current_node.db = db;
782 |     current_node.node_id = db->nodes_ids[node_index];
783 |     current_node.node_index = node_index;
784 |     
785 |     // add keys of node at node_index into memory.
786 |     int r = load_from_node_id(&current_node, db->nodes_ids[node_index]);
787 |     if (r != 0) {
788 |         return r;
789 |     }
790 |     // add keys of node at (node_index + 1) into memory.
791 |     r = load_from_node_id(&current_node, db->nodes_ids[node_index + 1]);
792 |     if (r != 0) {
793 |         return r;
794 |     }
795 |     
796 |     // write the result.
797 |     r = write_single_loaded_node(&current_node);
798 |     if (r != 0) {
799 |         return r;
800 |     }
801 |     
802 |     //delete current_node.keys;
803 |     
804 |     // remove the node at (node_index + 1).
805 |     r = remove_node(db, node_index + 1);
806 |     if (r != 0) {
807 |         return r;
808 |     }
809 |     
810 |     * pDidMerge = true;
811 |     
812 |     return 0;
813 | }
814 | 
815 | // remove node at the given index.
816 | static int remove_node(kvdbo * db, unsigned int node_index)
817 | {
818 |     int r = remove_node_id(db, db->nodes_ids[node_index]);
819 |     if (r != 0) {
820 |         return r;
821 |     }
822 |     db->nodes_ids.erase(db->nodes_ids.begin() + node_index);
823 |     db->nodes_first_keys.erase(db->nodes_first_keys.begin() + node_index);
824 |     db->nodes_keys_count.erase(db->nodes_keys_count.begin() + node_index);
825 |     if (r != 0) {
826 |         return r;
827 |     }
828 |     r = write_master_node(db);
829 |     if (r != 0) {
830 |         return r;
831 |     }
832 |     
833 |     return 0;
834 | }
835 | 
836 | // create 'count' new nodes to replace the given node at node_index.
837 | // the given keys will be used to fill the new nodes.
838 | static int split_node(kvdbo * db, unsigned int node_index, unsigned int count,
839 |                       std::set<std::string> & keys)
840 | {
841 |     // creates as many nodes as needed for the split.
842 |     struct modified_node * nodes = new modified_node[count];
843 |     for(unsigned int i = 0 ; i < count ; i ++) {
844 |         nodes[i].db = db;
845 |         nodes[i].node_id = allocate_node_id(db);
846 |         nodes[i].node_index = node_index + i;
847 |         //nodes[i].keys = new std::set<std::string>();
848 |     }
849 |     
850 |     // fill the new nodes with keys.
851 |     struct modified_node * current_node = &nodes[0];
852 |     unsigned int added_count = 0;
853 |     std::set<std::string>::iterator it = keys.begin();
854 |     while (it != keys.end()) {
855 |         if (added_count >= MAX_KEYS_PER_NODE / MEAN_KEYS_PER_NODE_FACTOR) {
856 |             current_node ++;
857 |             added_count = 0;
858 |         }
859 |         current_node->keys.insert(* it);
860 |         added_count ++;
861 |         it ++;
862 |     }
863 |     
864 |     // adjust the master node information.
865 |     int r;
866 |     remove_node_id(db, db->nodes_ids[node_index]);
867 |     db->nodes_ids.erase(db->nodes_ids.begin() + node_index);
868 |     db->nodes_first_keys.erase(db->nodes_first_keys.begin() + node_index);
869 |     db->nodes_keys_count.erase(db->nodes_keys_count.begin() + node_index);
870 |     db->nodes_ids.insert(db->nodes_ids.begin() + node_index, count, 0);
871 |     db->nodes_first_keys.insert(db->nodes_first_keys.begin() + node_index, count, "");
872 |     db->nodes_keys_count.insert(db->nodes_keys_count.begin() + node_index, count, 0);
873 |     // write the nodes.
874 |     for(unsigned int i = 0 ; i < count ; i ++) {
875 |         r = write_single_loaded_node(&nodes[i]);
876 |         if (r != 0) {
877 |             return r;
878 |         }
879 |     }
880 |     delete [] nodes;
881 |     
882 |     return 0;
883 | }
884 | 
885 | // for debug purpose.
886 | static void show_nodes(kvdbo * db)
887 | {
888 |     printf("*******\n");
889 |     printf("node_ids: ");
890 |     for(unsigned int i = 0 ; i < db->nodes_ids.size() ; i ++) {
891 |         printf("%i ", (int) db->nodes_ids[i]);
892 |     }
893 |     printf("\n");
894 |     printf("keys: ");
895 |     for(unsigned int i = 0 ; i < db->nodes_first_keys.size() ; i ++) {
896 |         printf("%s ", db->nodes_first_keys[i].c_str());
897 |     }
898 |     printf("\n");
899 |     printf("count: ");
900 |     for(unsigned int i = 0 ; i < db->nodes_keys_count.size() ; i ++) {
901 |         printf("%i ", (int) db->nodes_keys_count[i]);
902 |     }
903 |     printf("\n");
904 |     printf("*******\n");
905 | }


--------------------------------------------------------------------------------
/src/kvdbo.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVDBO_H
 2 | 
 3 | #define KVDBO_H
 4 | 
 5 | #include <sys/types.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | // kvdbo is like kvdb except it maintains an efficient ordered list of keys.
12 | // It will let you iterate on the list of keys.
13 | 
14 | typedef struct kvdbo kvdbo;
15 | 
16 | // create a kvdbo.
17 | kvdbo * kvdbo_new(const char * filename);
18 | // destroy a kvdbo.
19 | void kvdbo_free(kvdbo * db);
20 | 
21 | // opens a kvdbo.
22 | int kvdbo_open(kvdbo * db);
23 | // closes a kvdbo.
24 | void kvdbo_close(kvdbo * db);
25 | 
26 | // write pending changes.
27 | int kvdbo_flush(kvdbo * db);
28 | 
29 | // insert a key / value. if the key already exists, it's replaced.
30 | // Returns -2 if there's a I/O error.
31 | // Returns -3 if the key is invalid (starting with \0kvdbo).
32 | // kvdbo_flush() must be called to write on disk all pending changes.
33 | int kvdbo_set(kvdbo * db, const char * key, size_t key_size,
34 |               const char * value, size_t value_size);
35 | 
36 | // retrieve the value for the given key.
37 | // result stored in p_value should be released using free().
38 | // Returns -1 if item is not found.
39 | // Returns -2 if there's a I/O error.
40 | // kvdbo_flush() must be called to write on disk all pending changes.
41 | int kvdbo_get(kvdbo * db, const char * key, size_t key_size,
42 |               char ** p_value, size_t * p_value_size);
43 | 
44 | // remove the given key.
45 | // Returns -1 if item is not found.
46 | // Returns -2 if there's a I/O error.
47 | // kvdbo_flush() must be called to write on disk all pending changes.
48 | int kvdbo_delete(kvdbo * db, const char * key, size_t key_size);
49 | 
50 | typedef struct kvdbo_iterator kvdbo_iterator;
51 | 
52 | // create an iterator on the given kvdbo (order is lexicographical).
53 | kvdbo_iterator * kvdbo_iterator_new(kvdbo * db);
54 | 
55 | // destroy an iterator.
56 | void kvdbo_iterator_free(kvdbo_iterator * iterator);
57 | 
58 | // seek to the first key.
59 | void kvdbo_iterator_seek_first(kvdbo_iterator * iterator);
60 | 
61 | // seek to the position of the given key or after.
62 | void kvdbo_iterator_seek_after(kvdbo_iterator * iterator, const char * key, size_t key_size);
63 | 
64 | // seek to the last key.
65 | void kvdbo_iterator_seek_last(kvdbo_iterator * iterator);
66 | 
67 | // seek to the next key.
68 | void kvdbo_iterator_next(kvdbo_iterator * iterator);
69 | 
70 | // seek to the previous key.
71 | void kvdbo_iterator_previous(kvdbo_iterator * iterator);
72 | 
73 | // returns the key at the position of the iterator.
74 | // result is valid until the next call to any iterator function.
75 | void kvdbo_iterator_get_key(kvdbo_iterator * iterator, const char ** p_key, size_t * p_key_size);
76 | 
77 | // returns whether the iterator is valid.
78 | int kvdbo_iterator_is_valid(kvdbo_iterator * iterator);
79 | 
80 | #ifdef __cplusplus
81 | }
82 | #endif
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/src/kvendian.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvendian.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/1/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef kvdb_kvendian_h
10 | #define kvdb_kvendian_h
11 | 
12 | #include <inttypes.h>
13 | 
14 | // Convert a 64 bit value to network byte order.
15 | static inline uint64_t hton64(uint64_t val)
16 | {
17 |     union { uint64_t ll;
18 |         uint32_t l[2];
19 |     } w, r;
20 |     
21 |     // platform already in network byte order?
22 |     if (htonl(1) == 1L)
23 |         return val;
24 |     w.ll = val;
25 |     r.l[0] = htonl(w.l[1]);
26 |     r.l[1] = htonl(w.l[0]);
27 |     return r.ll;
28 | }
29 | 
30 | // Convert a 64 bit value from network to host byte order.
31 | static inline uint64_t ntoh64(uint64_t val)
32 | {
33 |     union { uint64_t ll;
34 |         uint32_t l[2];
35 |     } w, r;
36 |     
37 |     // platform already in network byte order?
38 |     if (htonl(1) == 1L)
39 |         return val;
40 |     w.ll = val;
41 |     r.l[0] = ntohl(w.l[1]);
42 |     r.l[1] = ntohl(w.l[0]);
43 |     return r.ll;
44 | }
45 | 
46 | static inline uint64_t bytes_to_h64(char * bytes)
47 | {
48 |     uint64_t result = * (uint64_t *) bytes;
49 |     return ntoh64(result);
50 | }
51 | 
52 | static inline void h64_to_bytes(char * bytes, uint64_t value)
53 | {
54 |     value = hton64(value);
55 |     * (uint64_t *) bytes = value;
56 | }
57 | 
58 | static inline uint32_t bytes_to_h32(char * bytes)
59 | {
60 |     uint32_t result = * (uint32_t *) bytes;
61 |     return ntohl(result);
62 | }
63 | 
64 | static inline void h32_to_bytes(char * bytes, uint32_t value)
65 | {
66 |     value = htonl(value);
67 |     * (uint32_t *) bytes = value;
68 | }
69 | 
70 | static inline uint8_t bytes_to_h8(char * bytes)
71 | {
72 |     uint8_t result = * (uint8_t *) bytes;
73 |     return result;
74 | }
75 | 
76 | static inline void h8_to_bytes(char * bytes, uint8_t value)
77 | {
78 |     * (uint8_t *) bytes = value;
79 | }
80 | 
81 | #endif
82 | 


--------------------------------------------------------------------------------
/src/kvmurmurhash.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  murmurhash.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef KVMURMURHASH_H
10 | #define KVMURMURHASH_H
11 | 
12 | static inline uint32_t kv_murmur_hash(const char * data, size_t length, uint32_t seed)
13 | {
14 |     uint32_t m = 0x5bd1e995;
15 |     uint32_t r = 24;
16 |     unsigned char * bytes = (unsigned char *) data;
17 |     
18 |     uint32_t h = seed ^ (uint32_t) length;
19 |     
20 |     size_t len_4 = length >> 2;
21 |     
22 |     for (int i = 0; i < len_4; i++) {
23 |         int i_4 = i << 2;
24 |         uint32_t k = bytes[i_4 + 3];
25 |         k = k << 8;
26 |         k = k | bytes[i_4 + 2];
27 |         k = k << 8;
28 |         k = k | bytes[i_4 + 1];
29 |         k = k << 8;
30 |         k = k | bytes[i_4 + 0];
31 |         k *= m;
32 |         k ^= k >> r;
33 |         k *= m;
34 |         h *= m;
35 |         h ^= k;
36 |     }
37 |     
38 |     // avoid calculating modulo
39 |     size_t len_m = len_4 << 2;
40 |     size_t left = length - len_m;
41 |     
42 |     if (left != 0) {
43 |         if (left >= 3) {
44 |             h ^= (uint32_t) data[length - 3] << 16;
45 |         }
46 |         if (left >= 2) {
47 |             h ^= (uint32_t) data[length - 2] << 8;
48 |         }
49 |         if (left >= 1) {
50 |             h ^= (uint32_t) data[length - 1];
51 |         }
52 |         
53 |         h *= m;
54 |     }
55 |     
56 |     h ^= h >> 13;
57 |     h *= m;
58 |     h ^= h >> 15;
59 |     
60 |     return h;
61 | }
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/kvpaddingutils.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvpaddingutils.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef KVPADDINGUTILS_H
10 | #define KVPADDINGUTILS_H
11 | 
12 | #include <sys/mman.h>
13 | 
14 | static inline uint64_t power2_round_up(uint64_t value)
15 | {
16 |     uint64_t power = 1;
17 |     while (power < value)
18 |         power <<= 1;
19 |     return power;
20 | }
21 | 
22 | static inline uint64_t block_size_round_up(uint64_t value)
23 | {
24 |     if (value < 16) {
25 |         value = 16;
26 |     }
27 |     return power2_round_up(value);
28 | }
29 | 
30 | static inline unsigned int log2_round_up(uint64_t value)
31 | {
32 |     uint64_t power = 1;
33 |     unsigned int log2_value = 0;
34 |     while (power < value) {
35 |         power <<= 1;
36 |         log2_value ++;
37 |     }
38 |     return log2_value;
39 | }
40 | 
41 | #define KV_ULONG_PTR unsigned long
42 | #define KV_PAGE_ROUND_UP(db, x) ( (((KV_ULONG_PTR)(x)) + db->kv_pagesize-1)  & (~(db->kv_pagesize-1)) )
43 | #define KV_PAGE_ROUND_DOWN(db, x) ( ((KV_ULONG_PTR)(x)) & (~(db->kv_pagesize-1)) )
44 | #define KV_BYTE_ROUND_UP(x) ( (((KV_ULONG_PTR)(x)) + 8-1)  & (~(8-1)) )
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/src/kvprime.c:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvprime.c
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #include "kvprime.h"
10 | 
11 | uint64_t kv_getnextprime(uint64_t num)
12 | {
13 |     uint64_t prime_numbers[] = {
14 |         1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 43, 47, 53, 59, 61, 71, 79, 83,
15 |         89, 103, 109, 113, 127, 139, 157, 173, 191, 199, 223, 239, 251, 283, 317, 349,
16 |         383, 409, 443, 479, 509, 571, 631, 701, 761, 829, 887, 953, 1021, 1151, 1279,
17 |         1399, 1531, 1663, 1789, 1913, 2039, 2297, 2557, 2803, 3067, 3323, 3583, 3833,
18 |         4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 9209, 10223, 11261,
19 |         12281, 13309, 14327, 15359, 16381, 18427, 20479, 22511, 24571, 26597, 28669,
20 |         30713, 32749, 36857, 40949, 45053, 49139, 53239, 57331, 61417, 65521, 73727,
21 |         81919, 90107, 98299, 106487, 114679, 122869, 131071, 147451, 163819, 180221,
22 |         196597, 212987, 229373, 245759, 262139, 294911, 327673, 360439, 393209, 425977,
23 |         458747, 491503, 524287, 589811, 655357, 720887, 786431, 851957, 917503, 982981,
24 |         1048573, 1179641, 1310719, 1441771, 1572853, 1703903, 1835003, 1966079,
25 |         2097143, 2359267, 2621431, 2883577, 3145721, 3407857, 3670013, 3932153,
26 |         4194301, 4718579, 5242877, 5767129, 6291449, 6815741, 7340009, 7864301,
27 |         8388593, 9437179, 10485751, 11534329, 12582893, 13631477, 14680063, 15728611,
28 |         16777213, 18874367, 20971507, 23068667, 25165813, 27262931, 29360087, 31457269,
29 |         33554393, 37748717, 41943023, 46137319, 50331599, 54525917, 58720253, 62914549,
30 |         67108859, 75497467, 83886053, 92274671, 100663291, 109051903, 117440509,
31 |         125829103, 134217689, 150994939, 167772107, 184549373, 201326557, 218103799,
32 |         234881011, 251658227, 268435399, 301989881, 335544301, 369098707, 402653171,
33 |         436207613, 469762043, 503316469, 536870909, 603979769, 671088637, 738197503,
34 |         805306357, 872415211, 939524087, 1006632947, 1073741789, 1207959503,
35 |         1342177237, 1476394991, 1610612711, 1744830457, 1879048183, 2013265907,
36 |         2576980349, 3092376431, 3710851741, 4718021527, 6133428047, 7973456459,
37 |         10365493393, 13475141413, 17517683831, 22772988923, 29604885677, 38486351381,
38 |         50032256819, 65041933867, 84554514043, 109920868241, 153889215497, 0
39 |     };
40 |     static int count = sizeof(prime_numbers) / sizeof(prime_numbers[0]);
41 |     
42 |     for(int i = 0 ; i < count ; i ++){
43 |         if (num <= prime_numbers[i])
44 |             return prime_numbers[i];
45 |     }
46 |     return prime_numbers[count - 1];
47 | }
48 | 


--------------------------------------------------------------------------------
/src/kvprime.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvprime.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef KVPRIME_H
10 | #define KVPRIME_H
11 | 
12 | #include <inttypes.h>
13 | 
14 | uint64_t kv_getnextprime(uint64_t num);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/src/kvserialization.cpp:
--------------------------------------------------------------------------------
 1 | #include "kvserialization.h"
 2 | 
 3 | #include <stdlib.h>
 4 | 
 5 | void kv_encode_uint64(std::string & buffer, uint64_t value)
 6 | {
 7 |     char valuestr[10];
 8 |     int len = 0;
 9 |     while (1) {
10 |         unsigned char remainder = value & 0x7f;
11 |         value = value >> 7;
12 |         if (value == 0) {
13 |             // last item to write.
14 |             valuestr[len] = remainder;
15 |             len ++;
16 |             break;
17 |         }
18 |         else {
19 |             valuestr[len] = remainder | 0x80;
20 |             len ++;
21 |         }
22 |     }
23 |     buffer.append(valuestr, len);
24 | }
25 | 
26 | size_t kv_decode_uint64(std::string & buffer, size_t position, uint64_t * p_value)
27 | {
28 |     uint64_t value = 0;
29 |     int s = 0;
30 |     
31 |     while (1) {
32 |         unsigned char remainder = buffer[position];
33 |         position ++;
34 |         value += ((uint64_t) remainder & 0x7f) << s;
35 |         if ((remainder & 0x80) == 0) {
36 |             break;
37 |         }
38 |         s += 7;
39 |     }
40 |     
41 |     * p_value = value;
42 |     
43 |     return position;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/kvserialization.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVSERIALIZATION_H
 2 | 
 3 | #define KVSERIALIZATION_H
 4 | 
 5 | #include <string>
 6 | 
 7 | void kv_encode_uint64(std::string & buffer, uint64_t value);
 8 | size_t kv_decode_uint64(std::string & buffer, size_t position, uint64_t * p_value);
 9 | 
10 | #endif
11 | 
12 | 


--------------------------------------------------------------------------------
/src/kvtable.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  table.c
  3 | //  kvdb
  4 | //
  5 | //  Created by DINH Viêt Hoà on 6/2/13.
  6 | //  Copyright (c) 2013 etpan. All rights reserved.
  7 | //
  8 | 
  9 | #include "kvtable.h"
 10 | 
 11 | #include <strings.h>
 12 | #include <unistd.h>
 13 | #include <sys/mman.h>
 14 | #include <stdio.h>
 15 | 
 16 | #include "kvtypes.h"
 17 | #include "kvprime.h"
 18 | #include "kvpaddingutils.h"
 19 | 
 20 | static int map_table(kvdb * db, struct kvdb_table ** result, uint64_t offset, int is_first);
 21 | static int mapping_setup(struct kvdb_mapping * mapping, int fd, off_t offset, size_t size);
 22 | static void mapping_unsetup(struct kvdb_mapping * mapping);
 23 | static void unmap_table(struct kvdb_table * table);
 24 | 
 25 | int kv_table_header_write(kvdb * db, uint64_t table_start, uint64_t maxcount)
 26 | {
 27 |     uint64_t bloomsize = kv_getnextprime(maxcount * KV_TABLE_BITS_FOR_BLOOM_FILTER);
 28 |     char data[KV_TABLE_HEADER_SIZE];
 29 |     bzero(data, KV_TABLE_HEADER_SIZE);
 30 |     h64_to_bytes(&data[KV_TABLE_BLOOM_SIZE_OFFSET], bloomsize);
 31 |     h64_to_bytes(&data[KV_TABLE_MAX_COUNT_OFFSET], maxcount);
 32 |     ssize_t r;
 33 |     r = pwrite(db->kv_fd, data, KV_TABLE_HEADER_SIZE, table_start);
 34 |     if (r < 0)
 35 |         return -1;
 36 |     return 0;
 37 | }
 38 | 
 39 | int kv_tables_setup(kvdb * db)
 40 | {
 41 |     map_table(db, &db->kv_first_table, KV_HEADER_SIZE, 1);
 42 |     return 0;
 43 | }
 44 | 
 45 | void kv_tables_unsetup(kvdb * db)
 46 | {
 47 |     unmap_table(db->kv_first_table);
 48 | }
 49 | 
 50 | uint64_t kv_table_create(kvdb * db, uint64_t size, struct kvdb_table ** result)
 51 | {
 52 |     //fprintf(stderr, "create table %llu", (unsigned long long) size);
 53 |     uint64_t mapping_size = KV_TABLE_SIZE(size);
 54 |     uint64_t offset = ntoh64(* db->kv_filesize);
 55 |     int r;
 56 |     r = ftruncate(db->kv_fd, offset + mapping_size);
 57 |     if (r < 0)
 58 |         return 0;
 59 |     uint64_t filesize = ntoh64(* db->kv_filesize);
 60 |     filesize += mapping_size;
 61 |     r = kv_table_header_write(db, offset, size);
 62 |     if (r < 0)
 63 |         return 0;
 64 |     r = map_table(db, result, offset, 0);
 65 |     if (r < 0)
 66 |         return 0;
 67 |     
 68 |     // When everything succeeded, update file size
 69 |     * db->kv_filesize = hton64(filesize);
 70 |     
 71 |     return offset;
 72 | }
 73 | 
 74 | static int map_table(kvdb * db, struct kvdb_table ** result, uint64_t offset, int is_first)
 75 | {
 76 |     struct kvdb_table * table;
 77 |     uint64_t maxcount;
 78 |     ssize_t read_result;
 79 |     char data[8];
 80 |     int r;
 81 |     off_t pre_page_align_size;
 82 |     
 83 |     table = calloc(1, sizeof(* table));
 84 |     if (is_first) {
 85 |         pre_page_align_size = KV_HEADER_SIZE;
 86 |     }
 87 |     else {
 88 |         off_t mapping_offset = KV_PAGE_ROUND_DOWN(db, offset);
 89 |         pre_page_align_size = offset - mapping_offset;
 90 |     }
 91 |     
 92 |     read_result = pread(db->kv_fd, data, 8, offset + KV_TABLE_MAX_COUNT_OFFSET);
 93 |     if (read_result < 0) {
 94 |         return -1;
 95 |     }
 96 |     maxcount = bytes_to_h64(data);
 97 |     uint64_t mapping_size = pre_page_align_size + KV_TABLE_SIZE(maxcount);
 98 |     r = mapping_setup(&table->kv_mapping, db->kv_fd, offset - pre_page_align_size, (size_t) mapping_size);
 99 |     if (r < 0) {
100 |         return -1;
101 |     }
102 |     table->kv_table_start = table->kv_mapping.kv_bytes + pre_page_align_size;
103 |     
104 |     table->kv_items = (struct kvdb_item *) (table->kv_table_start + KV_TABLE_ITEMS_OFFSET_OFFSET(maxcount));
105 |     table->kv_next_table_offset = (uint64_t *) (table->kv_table_start + KV_TABLE_NEXT_TABLE_OFFSET_OFFSET);
106 |     table->kv_count = (uint64_t *) (table->kv_table_start + KV_TABLE_COUNT_OFFSET);
107 |     table->kv_bloom_filter_size = (uint64_t *) (table->kv_table_start + KV_TABLE_BLOOM_SIZE_OFFSET);
108 |     table->kv_maxcount = (uint64_t *) (table->kv_table_start + KV_TABLE_MAX_COUNT_OFFSET);
109 |     table->kv_bloom_filter = (uint8_t *) (table->kv_table_start + KV_TABLE_BLOOM_FILTER_OFFSET);
110 |     
111 |     * result = table;
112 |     
113 |     if (* table->kv_next_table_offset != 0) {
114 |         r = map_table(db, &table->kv_next_table, ntoh64(* table->kv_next_table_offset), 0);
115 |         if (r < 0) {
116 |             return -1;
117 |         }
118 |     }
119 |     else {
120 |         table->kv_next_table = NULL;
121 |     }
122 |     
123 |     return 0;
124 | }
125 | 
126 | static void unmap_table(struct kvdb_table * table)
127 | {
128 |     if (table == NULL)
129 |         return;
130 |     
131 |     struct kvdb_table * next_table = table->kv_next_table;
132 |     mapping_unsetup(&table->kv_mapping);
133 |     free(table);
134 |     
135 |     unmap_table(next_table);
136 | }
137 | 
138 | static int mapping_setup(struct kvdb_mapping * mapping, int fd, off_t offset, size_t size)
139 | {
140 |     mapping->kv_bytes = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
141 |     if (mapping->kv_bytes == MAP_FAILED) {
142 |         return -1;
143 |     }
144 |     mapping->kv_size = size;
145 |     
146 |     return 0;
147 | }
148 | 
149 | static void mapping_unsetup(struct kvdb_mapping * mapping)
150 | {
151 |     if (mapping->kv_bytes == NULL) {
152 |         return;
153 |     }
154 |     
155 |     int r;
156 |     r = munmap(mapping->kv_bytes, mapping->kv_size);
157 |     if (r < 0) {
158 |         fprintf(stderr, "Could not unmap memory\n");
159 |     }
160 |     mapping->kv_bytes = NULL;
161 |     mapping->kv_size = 0;
162 | }
163 | 


--------------------------------------------------------------------------------
/src/kvtable.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kvtable.h
 3 | //  kvdb
 4 | //
 5 | //  Created by DINH Viêt Hoà on 6/2/13.
 6 | //  Copyright (c) 2013 etpan. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef kvdb_kvtable_h
10 | #define kvdb_kvtable_h
11 | 
12 | #include <stdlib.h>
13 | 
14 | #include "kvtypes.h"
15 | #include "kvendian.h"
16 | #include "kvprime.h"
17 | 
18 | int kv_table_header_write(kvdb * db, uint64_t table_start, uint64_t maxcount);
19 | uint64_t kv_table_create(kvdb * db, uint64_t size, struct kvdb_table ** result);
20 | 
21 | int kv_tables_setup(kvdb * db);
22 | void kv_tables_unsetup(kvdb * db);
23 | 
24 | static inline int kv_select_table(kvdb * db)
25 | {
26 |     if (db->kv_current_table == NULL) {
27 |         db->kv_current_table = db->kv_first_table;
28 |     }
29 |     
30 |     //fprintf(stderr, "count %i\n", (int) (* db->kv_current_table->kv_count));
31 |     while (ntoh64(* db->kv_current_table->kv_count) > ntoh64(* db->kv_current_table->kv_maxcount) * KV_MAX_MEAN_COLLISION) {
32 |         if (db->kv_current_table->kv_next_table == NULL) {
33 |             uint64_t nextsize = kv_getnextprime(ntoh64(* db->kv_current_table->kv_maxcount) * 2);
34 |             uint64_t offset = kv_table_create(db, nextsize, &db->kv_current_table->kv_next_table);
35 |             if (offset == 0) {
36 |                 return -1;
37 |             }
38 |             * db->kv_current_table->kv_next_table_offset = hton64(offset);
39 |         }
40 |         
41 |         db->kv_current_table = db->kv_current_table->kv_next_table;
42 |     }
43 |     
44 |     return 0;
45 | }
46 | 
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/kvtypes.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  kvtypes.h
  3 | //  kvdb
  4 | //
  5 | //  Created by DINH Viêt Hoà on 6/1/13.
  6 | //  Copyright (c) 2013 etpan. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef KVTYPES_H
 10 | #define KVTYPES_H
 11 | 
 12 | #include <inttypes.h>
 13 | #include <sys/types.h>
 14 | 
 15 | #include "kvdb.h"
 16 | 
 17 | #define KV_HEADER_SIZE (4 + 4 + 8 + 1 + 8 + 64 * 8)
 18 | #define KV_HEADER_MARKER_OFFSET 0
 19 | #define KV_HEADER_VERSION_OFFSET 4
 20 | #define KV_HEADER_FIRSTMAXCOUNT_OFFSET (4 + 4)
 21 | #define KV_HEADER_FILESIZE_OFFSET (4 + 4 + 8 + 1)
 22 | #define KV_HEADER_FREELIST_OFFSET (4 + 4 + 8 + 1 + 8)
 23 | 
 24 | // 1. marker                                  4 bytes
 25 | // 2. version                                 4 bytes
 26 | // 3. first table max count                   8 bytes
 27 | // 4. storage type                            1 byte
 28 | // 5. recycled blocks offset (for each size)  64 * 8 bytes
 29 | 
 30 | /*
 31 |  table:
 32 |  1. next offset:                         8 bytes
 33 |  2. count:                               8 bytes
 34 |  3. bloom_size:                          8 bytes
 35 |  4. maxcount                             8 bytes
 36 |  5. bloom filter table                   BLOOM_FILTER_SIZE(size) bytes
 37 |  6. offset to items (actual hash table)  maxcount items of 8 bytes
 38 |  
 39 |  table mapping size: 8 + 8 + 8 + 8 + BLOOM_FILTER_SIZE(maxcount) + (maxcount * 8)
 40 | */
 41 | 
 42 | #define KV_TABLE_NEXT_TABLE_OFFSET_OFFSET 0
 43 | #define KV_TABLE_COUNT_OFFSET 8
 44 | #define KV_TABLE_BLOOM_SIZE_OFFSET 16
 45 | #define KV_TABLE_MAX_COUNT_OFFSET 24
 46 | #define KV_TABLE_BLOOM_FILTER_OFFSET 32
 47 | #define KV_TABLE_ITEMS_OFFSET_OFFSET(maxcount) (KV_TABLE_HEADER_SIZE + KV_TABLE_BLOOM_FILTER_SIZE(maxcount))
 48 | 
 49 | #define KV_TABLE_HEADER_SIZE (8 + 8 + 8 + 8)
 50 | 
 51 | #define KV_TABLE_SIZE(maxcount) (KV_TABLE_HEADER_SIZE + KV_TABLE_BLOOM_FILTER_SIZE(maxcount) + maxcount * 8)
 52 | #define KV_FIRST_TABLE_MAX_COUNT (1 << 17)
 53 | 
 54 | #define KV_TABLE_BITS_FOR_BLOOM_FILTER 5
 55 | #define KV_TABLE_BLOOM_FILTER_SIZE(maxcount) (KV_BYTE_ROUND_UP(kv_getnextprime(maxcount * KV_TABLE_BITS_FOR_BLOOM_FILTER)) / 8)
 56 | #define KV_BLOOM_FILTER_HASH_COUNT 3
 57 | 
 58 | #define KV_MAX_MEAN_COLLISION 3
 59 | 
 60 | /*
 61 |  block:
 62 |  1. next offset  8 bytes
 63 |  2. hash_value   4 bytes
 64 |  3. key size     8 bytes
 65 |  4. key bytes    variable length
 66 |  5. data size    8 bytes
 67 |  6. data bytes   variable length
 68 |  */
 69 | 
 70 | #define KV_BLOCK_NEXT_OFFSET_OFFSET 0
 71 | #define KV_BLOCK_HASH_VALUE_OFFSET 8
 72 | #define KV_BLOCK_LOG2SIZE_OFFSET 9
 73 | #define KV_BLOCK_KEY_SIZE_OFFSET 13
 74 | #define KV_BLOCK_KEY_BYTES_OFFSET 21
 75 | 
 76 | struct kvdb_mapping {
 77 |     char * kv_bytes;
 78 |     size_t kv_size;
 79 | };
 80 | 
 81 | struct kvdb {
 82 |     char * kv_filename;
 83 |     int kv_pagesize;
 84 |     int kv_fd;
 85 |     int kv_opened;
 86 |     uint64_t kv_firstmaxcount;
 87 |     int kv_compression_type;
 88 |     uint64_t * kv_filesize; // host order
 89 |     uint64_t * kv_free_blocks; // host order
 90 |     struct kvdb_table * kv_first_table;
 91 |     struct kvdb_table * kv_current_table;
 92 | };
 93 | 
 94 | struct kvdb_item {
 95 |     // host order
 96 |     uint64_t kv_offset;
 97 | };
 98 | 
 99 | struct kvdb_table {
100 |     struct kvdb_mapping kv_mapping;
101 |     char * kv_table_start;
102 |     struct kvdb_item * kv_items;
103 |     uint64_t * kv_bloom_filter_size; // host order
104 |     uint8_t * kv_bloom_filter;
105 |     uint64_t * kv_next_table_offset; // host order
106 |     uint64_t * kv_count; // host order
107 |     uint64_t * kv_maxcount; // host order
108 |     struct kvdb_table * kv_next_table;
109 | };
110 | 
111 | struct find_key_cb_params {
112 |     const char * key;
113 |     size_t key_size;
114 |     uint64_t previous_offset;
115 |     uint64_t current_offset;
116 |     uint64_t next_offset;
117 |     struct kvdb_item * item;
118 |     uint64_t * table_count;
119 |     size_t log2_size;
120 | };
121 | 
122 | typedef void findkey_callback(kvdb * db, struct find_key_cb_params * params,
123 |                               void * data);
124 | 
125 | #endif
126 | 


--------------------------------------------------------------------------------
/src/kvunicode.c:
--------------------------------------------------------------------------------
  1 | #include "kvunicode.h"
  2 | 
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <pthread.h>
  6 | 
  7 | #if __APPLE__
  8 | #include <CoreFoundation/CoreFoundation.h>
  9 | #endif
 10 | 
 11 | #include "ConvertUTF.h"
 12 | 
 13 | #if !__APPLE__
 14 | // Transliteration helpers.
 15 | 
 16 | typedef struct XReplaceable {
 17 |     UChar* text;    /* MUST BE null-terminated */
 18 | } XReplaceable;
 19 | 
 20 | static void InitXReplaceable(XReplaceable* rep, const UChar* str, int length)
 21 | {
 22 |     if (length == 0) {
 23 |         length = u_strlen(str);
 24 |     }
 25 |     rep->text = (UChar*) malloc(sizeof(* rep->text) * (length + 1));
 26 |     rep->text[length] = 0;
 27 |     u_strncpy(rep->text, str, length);
 28 | }
 29 | 
 30 | static void FreeXReplaceable(XReplaceable* rep)
 31 | {
 32 |     if (rep->text != NULL) {
 33 |         free(rep->text);
 34 |         rep->text = NULL;
 35 |     }
 36 | }
 37 | 
 38 | /* UReplaceableCallbacks callback */
 39 | static int32_t Xlength(const UReplaceable* rep)
 40 | {
 41 |     const XReplaceable* x = (const XReplaceable*)rep;
 42 |     return u_strlen(x->text);
 43 | }
 44 | 
 45 | /* UReplaceableCallbacks callback */
 46 | static UChar XcharAt(const UReplaceable* rep, int32_t offset)
 47 | {
 48 |     const XReplaceable* x = (const XReplaceable*)rep;
 49 |     return x->text[offset];
 50 | }
 51 | 
 52 | /* UReplaceableCallbacks callback */
 53 | static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset)
 54 | {
 55 |     const XReplaceable* x = (const XReplaceable*)rep;
 56 |     return x->text[offset];
 57 | }
 58 | 
 59 | /* UReplaceableCallbacks callback */
 60 | static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit,
 61 |                      const UChar* text, int32_t textLength)
 62 | {
 63 |     XReplaceable* x = (XReplaceable*)rep;
 64 |     int32_t newLen = Xlength(rep) + limit - start + textLength;
 65 |     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
 66 |     u_strncpy(newText, x->text, start);
 67 |     u_strncpy(newText + start, text, textLength);
 68 |     u_strcpy(newText + start + textLength, x->text + limit);
 69 |     free(x->text);
 70 |     x->text = newText;
 71 | }
 72 | 
 73 | /* UReplaceableCallbacks callback */
 74 | static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest)
 75 | {
 76 |     XReplaceable* x = (XReplaceable*)rep;
 77 |     int32_t newLen = Xlength(rep) + limit - start;
 78 |     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
 79 |     u_strncpy(newText, x->text, dest);
 80 |     u_strncpy(newText + dest, x->text + start, limit - start);
 81 |     u_strcpy(newText + dest + limit - start, x->text + dest);
 82 |     free(x->text);
 83 |     x->text = newText;
 84 | }
 85 | 
 86 | /* UReplaceableCallbacks callback */
 87 | static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst)
 88 | {
 89 |     XReplaceable* x = (XReplaceable*)rep;
 90 |     int32_t len = limit - start;
 91 |     u_strncpy(dst, x->text, len);
 92 | }
 93 | 
 94 | static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks)
 95 | {
 96 |     callbacks->length = Xlength;
 97 |     callbacks->charAt = XcharAt;
 98 |     callbacks->char32At = Xchar32At;
 99 |     callbacks->replace = Xreplace;
100 |     callbacks->extract = Xextract;
101 |     callbacks->copy = Xcopy;
102 | }
103 | 
104 | 
105 | // init and deinit.
106 | 
107 | static UReplaceableCallbacks s_xrepVtable;
108 | static UTransliterator * s_trans = NULL;
109 | static pthread_mutex_t s_lock = PTHREAD_MUTEX_INITIALIZER;
110 | static int s_initialized = 0;
111 | static int pthread_once_t s_once = PTHREAD_ONCE_INIT;
112 | 
113 | static void kv_unicode_init(void)
114 | {
115 |     pthread_mutex_lock(&s_lock);
116 |     if (!s_initialized) {
117 |         UChar urules[1024];
118 |         UErrorCode status = U_ZERO_ERROR;
119 |         u_strFromUTF8(urules, sizeof(urules), NULL, "Any-Latin; NFD; Lower; [:nonspacing mark:] remove; nfc", -1, &status);
120 |         LIDX_ASSERT(status == U_ZERO_ERROR);
121 |         
122 |         UParseError parseError;
123 |         s_trans = utrans_openU(urules, -1, UTRANS_FORWARD,
124 |                                NULL, -1, &parseError, &status);
125 |         LIDX_ASSERT(status == U_ZERO_ERROR);
126 |         
127 |         InitXReplaceableCallbacks(&s_xrepVtable);
128 |         s_initialized = 1;
129 |     }
130 |     pthread_mutex_unlock(&s_lock);
131 | }
132 | 
133 | static void kv_unicode_deinit(void)
134 | {
135 |     utrans_close(s_trans);
136 | }
137 | #endif
138 | 
139 | unsigned int kv_u_get_length(const UChar * word)
140 | {
141 |     unsigned int length = 0;
142 |     while (* word != 0) {
143 |         word ++;
144 |         length ++;
145 |     }
146 |     return length;
147 | }
148 | 
149 | // UTF <-> UTF16
150 | 
151 | UChar * kv_from_utf8(const char * word)
152 | {
153 |     size_t len = strlen(word);
154 |     const UTF8 * source = (const UTF8 *) word;
155 |     UTF16 * target = (UTF16 *) malloc((len + 1) * sizeof(* target));
156 |     UTF16 * targetStart = target;
157 |     ConvertUTF8toUTF16(&source, source + len,
158 |                        &targetStart, targetStart + len, lenientConversion);
159 |     unsigned int utf16length = (unsigned int) (targetStart - target);
160 |     target[utf16length] = 0;
161 |     return (UChar *) target;
162 | }
163 | 
164 | char * kv_to_utf8(const UChar * word)
165 | {
166 |     unsigned int len = kv_u_get_length(word);
167 |     const UTF16 * source = (const UTF16 *) word;
168 |     UTF8 * target = (UTF8 *) malloc(len * 6 + 1);
169 |     UTF8 * targetStart = target;
170 |     ConvertUTF16toUTF8(&source, source + len,
171 |                        &targetStart, targetStart + len * 6 + 1, lenientConversion);
172 |     unsigned int utf8length = (unsigned int) (targetStart - target);
173 |     target[utf8length] = 0;
174 |     return (char *) target;
175 | }
176 | 
177 | // transliterate to ASCII
178 | 
179 | char * kv_transliterate(const UChar * text, int length)
180 | {
181 | #if __APPLE__
182 |     if (length == -1) {
183 |         length = kv_u_get_length(text);
184 |     }
185 |     
186 |     int is_ascii = 1;
187 |     const UChar * p = text;
188 |     for(int i = 0 ; i < length ; i ++) {
189 |         if ((* p < 32) || (* p >= 127)) {
190 |         //if (!isalnum(* p)) {
191 |             is_ascii = 0;
192 |             break;
193 |         }
194 |         p ++;
195 |     }
196 |     
197 |     if (is_ascii) {
198 |         char * result = malloc(length + 1);
199 |         char * q = result;
200 |         for(int i = 0 ; i < length ; i ++) {
201 |             * q = tolower(text[i]);
202 |             q ++;
203 |         }
204 |         * q = 0;
205 |         return result;
206 |     }
207 |     
208 |     CFMutableStringRef cfStr = CFStringCreateMutable(NULL, 0);
209 |     CFStringAppendCharacters(cfStr, (const UniChar *) text, length);
210 |     CFStringTransform(cfStr, NULL, CFSTR("Any-Latin; NFD; Lower; [:nonspacing mark:] remove; nfc"), false);
211 |     CFIndex resultLength = CFStringGetLength(cfStr);
212 |     char * buffer = (char *) malloc(resultLength + 1);
213 |     buffer[resultLength] = 0;
214 |     CFStringGetCString(cfStr, buffer, resultLength + 1, kCFStringEncodingUTF8);
215 |     CFRelease(cfStr);
216 |     return buffer;
217 | #else
218 |     if (length == -1) {
219 |         length = u_strlen(text);
220 |     }
221 |     
222 |     pthread_once(&s_once, kv_unicode_init);
223 |     
224 |     XReplaceable xrep;
225 |     InitXReplaceable(&xrep, text, length);
226 |     UErrorCode status = U_ZERO_ERROR;
227 |     
228 |     int32_t limit = length;
229 |     utrans_trans(s_trans, (UReplaceable *) &xrep, &s_xrepVtable, 0, &limit, &status);
230 |     if (status != U_ZERO_ERROR) {
231 |         goto free_xrep;
232 |     }
233 |     
234 |     char * result = lidx_to_utf8(xrep.text);
235 |     FreeXReplaceable(&xrep);
236 |     
237 |     return result;
238 |     
239 | free_xrep:
240 |     FreeXReplaceable(&xrep);
241 | err:
242 |     return NULL;
243 | #endif
244 | }
245 | 


--------------------------------------------------------------------------------
/src/kvunicode.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVUNICODE_H
 2 | 
 3 | #define KVUNICODE_H
 4 | 
 5 | #if !__APPLE__
 6 | #include "unicode/utypes.h"
 7 | #include "unicode/uloc.h"
 8 | #include "unicode/utext.h"
 9 | #include "unicode/localpointer.h"
10 | #include "unicode/parseerr.h"
11 | #include "unicode/ubrk.h"
12 | #include "unicode/urep.h"
13 | #include "unicode/utrans.h"
14 | #include "unicode/parseerr.h"
15 | #include "unicode/uenum.h"
16 | #include "unicode/uset.h"
17 | #include "unicode/putil.h"
18 | #include "unicode/uiter.h"
19 | #include "unicode/ustring.h"
20 | #else
21 | #if defined(__CHAR16_TYPE__)
22 | typedef __CHAR16_TYPE__ UChar;
23 | #else
24 | typedef uint16_t UChar;
25 | #endif
26 | #endif
27 | 
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 | 
32 | unsigned int kv_u_get_length(const UChar * word);
33 | UChar * kv_from_utf8(const char * word);
34 | char * kv_to_utf8(const UChar * word);
35 | char * kv_transliterate(const UChar * text, int length);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/sfts.cpp:
--------------------------------------------------------------------------------
  1 | #include "sfts.h"
  2 | 
  3 | #include <stdlib.h>
  4 | 
  5 | #include "kvdbo.h"
  6 | 
  7 | #include "kvunicode.h"
  8 | #include "kvserialization.h"
  9 | 
 10 | #include <set>
 11 | #include <map>
 12 | #include <unordered_set>
 13 | #include <unordered_map>
 14 | 
 15 | #if __APPLE__
 16 | #include <CoreFoundation/CoreFoundation.h>
 17 | #endif
 18 | 
 19 | static int db_put(sfts * index, std::string & key, std::string & value);
 20 | static int db_get(sfts * index, std::string & key, std::string * p_value);
 21 | static int db_delete(sfts * index, std::string & key);
 22 | static int db_flush(sfts * index);
 23 | static int tokenize(sfts * index, uint64_t doc, const UChar * text);
 24 | static int add_to_indexer(sfts * index, uint64_t doc, const char * word,
 25 |                           std::set<uint64_t> & wordsids_set);
 26 | 
 27 | // . -> next word id
 28 | // ,[docid] -> [words ids]
 29 | // /[word id] -> word
 30 | // word -> [word id], [docs ids]
 31 | 
 32 | struct sfts {
 33 |     kvdbo * sfts_db;
 34 |     std::unordered_map<std::string, std::string> sfts_buffer;
 35 |     std::unordered_set<std::string> sfts_buffer_dirty;
 36 |     std::unordered_set<std::string> sfts_deleted;
 37 | };
 38 | 
 39 | sfts * sfts_new(const char * filename)
 40 | {
 41 |     sfts * result = new sfts;
 42 |     result->sfts_db = kvdbo_new(filename);
 43 |     return result;
 44 | }
 45 | 
 46 | void sfts_free(sfts * index)
 47 | {
 48 |     kvdbo_free(index->sfts_db);
 49 |     free(index);
 50 | }
 51 | 
 52 | int sfts_open(sfts * index)
 53 | {
 54 |     kvdbo_open(index->sfts_db);
 55 |     
 56 |     return 0;
 57 | }
 58 | 
 59 | void sfts_close(sfts * index)
 60 | {
 61 |     db_flush(index);
 62 |     kvdbo_close(index->sfts_db);
 63 | }
 64 | 
 65 | int sfts_flush(sfts * index)
 66 | {
 67 |     return db_flush(index);
 68 | }
 69 | 
 70 | //int lidx_set(lidx * index, uint64_t doc, const char * text);
 71 | // text -> wordboundaries -> transliterated word -> store word with new word id
 72 | // word -> append doc id to docs ids
 73 | // store doc id -> words ids
 74 | 
 75 | int sfts_set(sfts * index, uint64_t doc, const char * text)
 76 | {
 77 |     UChar * utext = kv_from_utf8(text);
 78 |     int r = sfts_u_set(index, doc, utext);
 79 |     free(utext);
 80 |     return r;
 81 | }
 82 | 
 83 | int sfts_set2(sfts * index, uint64_t doc, const char ** text, int count)
 84 | {
 85 |     UChar ** utext = (UChar **) malloc(count * sizeof(* utext));
 86 |     for(int i = 0 ; i < count ; i ++) {
 87 |         utext[i] = kv_from_utf8(text[i]);
 88 |     }
 89 |     int result = sfts_u_set2(index, doc, (const UChar **) utext, count);
 90 |     for(int i = 0 ; i < count ; i ++) {
 91 |         free((void *) utext[i]);
 92 |     }
 93 |     free((void *) utext);
 94 |     return result;
 95 | }
 96 | 
 97 | int sfts_u_set(sfts * index, uint64_t doc, const UChar * utext)
 98 | {
 99 |     int r = sfts_remove(index, doc);
100 |     if (r < 0) {
101 |         return r;
102 |     }
103 |     r = tokenize(index, doc, utext);
104 |     if (r < 0) {
105 |         return r;
106 |     }
107 |     return 0;
108 | }
109 | 
110 | int sfts_u_set2(sfts * index, uint64_t doc, const UChar ** utext, int count)
111 | {
112 |     int r = sfts_remove(index, doc);
113 |     if (r < 0) {
114 |         return r;
115 |     }
116 |     int result = 0;
117 |     std::set<uint64_t> wordsids_set;
118 |     for(unsigned int i = 0 ; i < count ; i ++) {
119 |         if (utext[i] == NULL) {
120 |             continue;
121 |         }
122 |         char * transliterated = kv_transliterate(utext[i], kv_u_get_length(utext[i]));
123 |         if (transliterated == NULL) {
124 |             continue;
125 |         }
126 |         int r = add_to_indexer(index, doc, transliterated, wordsids_set);
127 |         if (r < 0) {
128 |             result = r;
129 |             break;
130 |         }
131 |         free(transliterated);
132 |     }
133 |     if (result != 0) {
134 |         return result;
135 |     }
136 |     
137 |     std::string key(",");
138 |     kv_encode_uint64(key, doc);
139 |     
140 |     std::string value_str;
141 |     for(std::set<uint64_t>::iterator wordsids_set_iterator = wordsids_set.begin() ; wordsids_set_iterator != wordsids_set.end() ; ++ wordsids_set_iterator) {
142 |         kv_encode_uint64(value_str, * wordsids_set_iterator);
143 |     }
144 |     r = db_put(index, key, value_str);
145 |     if (r < 0) {
146 |         return r;
147 |     }
148 |     
149 |     return 0;
150 | }
151 | 
152 | static int tokenize(sfts * index, uint64_t doc, const UChar * text)
153 | {
154 |     int result = 0;
155 |     std::set<uint64_t> wordsids_set;
156 | #if __APPLE__
157 |     unsigned int len = kv_u_get_length(text);
158 |     CFStringRef str = CFStringCreateWithBytes(NULL, (const UInt8 *) text, len * sizeof(* text), kCFStringEncodingUTF16LE, false);
159 |     CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(NULL, str, CFRangeMake(0, len), kCFStringTokenizerUnitWord, NULL);
160 |     while (1) {
161 |         CFStringTokenizerTokenType wordKind = CFStringTokenizerAdvanceToNextToken(tokenizer);
162 |         if (wordKind == kCFStringTokenizerTokenNone) {
163 |             break;
164 |         }
165 |         if (wordKind == kCFStringTokenizerTokenHasNonLettersMask) {
166 |             continue;
167 |         }
168 |         CFRange range = CFStringTokenizerGetCurrentTokenRange(tokenizer);
169 |         char * transliterated = kv_transliterate(&text[range.location], (int) range.length);
170 |         if (transliterated == NULL) {
171 |             continue;
172 |         }
173 |         int r = add_to_indexer(index, doc, transliterated, wordsids_set);
174 |         if (r < 0) {
175 |             result = r;
176 |             break;
177 |         }
178 |         
179 |         free(transliterated);
180 |     }
181 |     CFRelease(str);
182 |     CFRelease(tokenizer);
183 | #else
184 |     UErrorCode status;
185 |     status = U_ZERO_ERROR;
186 |     UBreakIterator * iterator = ubrk_open(UBRK_WORD, NULL, text, u_strlen(text), &status);
187 |     LIDX_ASSERT(status <= U_ZERO_ERROR);
188 |     
189 |     int32_t left = 0;
190 |     int32_t right = 0;
191 |     int word_kind = 0;
192 |     ubrk_first(iterator);
193 |     
194 |     while (1) {
195 |         left = right;
196 |         right = ubrk_next(iterator);
197 |         if (right == UBRK_DONE) {
198 |             break;
199 |         }
200 |         
201 |         word_kind = ubrk_getRuleStatus(iterator);
202 |         if (word_kind == 0) {
203 |             // skip punctuation and space.
204 |             continue;
205 |         }
206 |         
207 |         char * transliterated = lidx_transliterate(&text[left], right - left);
208 |         if (transliterated == NULL) {
209 |             continue;
210 |         }
211 |         int r = add_to_indexer(index, doc, transliterated, wordsids_set);
212 |         if (r < 0) {
213 |             result = r;
214 |             break;
215 |         }
216 |         
217 |         free(transliterated);
218 |     }
219 |     ubrk_close(iterator);
220 | #endif
221 |     if (result != 0) {
222 |         return result;
223 |     }
224 |     
225 |     std::string key(",");
226 |     kv_encode_uint64(key, doc);
227 |     
228 |     std::string value_str;
229 |     for(std::set<uint64_t>::iterator wordsids_set_iterator = wordsids_set.begin() ; wordsids_set_iterator != wordsids_set.end() ; ++ wordsids_set_iterator) {
230 |         kv_encode_uint64(value_str, * wordsids_set_iterator);
231 |     }
232 |     int r = db_put(index, key, value_str);
233 |     if (r < 0) {
234 |         return r;
235 |     }
236 |     
237 |     return 0;
238 | }
239 | 
240 | static int add_to_indexer(sfts * index, uint64_t doc, const char * word,
241 |                           std::set<uint64_t> & wordsids_set)
242 | {
243 |     std::string word_str(word);
244 |     std::string value;
245 |     uint64_t wordid;
246 |     
247 |     //fprintf(stderr, "adding word: %s\n", word);
248 |     
249 |     int r = db_get(index, word_str, &value);
250 |     if (r < -1) {
251 |         return -1;
252 |     }
253 |     if (r == 0) {
254 |         // Adding doc id to existing entry.
255 |         kv_decode_uint64(value, 0, &wordid);
256 |         kv_encode_uint64(value, doc);
257 |         int r = db_put(index, word_str, value);
258 |         if (r < 0) {
259 |             return r;
260 |         }
261 |     }
262 |     else /* r == -1 */ {
263 |         // Not found.
264 |         
265 |         // Creating an entry.
266 |         // store word with new id
267 |         
268 |         // read next word it
269 |         std::string str;
270 |         std::string nextwordidkey(".");
271 |         int r = db_get(index, nextwordidkey, &str);
272 |         if (r == -1) {
273 |             wordid = 0;
274 |         }
275 |         else if (r < 0) {
276 |             return -1;
277 |         }
278 |         else {
279 |             kv_decode_uint64(str, 0, &wordid);
280 |         }
281 |         
282 |         // write next word id
283 |         std::string value;
284 |         uint64_t next_wordid = wordid;
285 |         next_wordid ++;
286 |         kv_encode_uint64(value, next_wordid);
287 |         r = db_put(index, nextwordidkey, value);
288 |         if (r < 0) {
289 |             return r;
290 |         }
291 |         
292 |         std::string value_str;
293 |         kv_encode_uint64(value_str, wordid);
294 |         kv_encode_uint64(value_str, doc);
295 |         r = db_put(index, word_str, value_str);
296 |         if (r < 0) {
297 |             return r;
298 |         }
299 |         
300 |         std::string key("/");
301 |         kv_encode_uint64(key, wordid);
302 |         r = db_put(index, key, word_str);
303 |         if (r < 0) {
304 |             return r;
305 |         }
306 |     }
307 |     
308 |     wordsids_set.insert(wordid);
309 |     
310 |     return 0;
311 | }
312 | 
313 | //int lidx_remove(lidx * index, uint64_t doc);
314 | // docid -> words ids -> remove docid from word
315 | // if docs ids for word is empty, we remove the word id
316 | 
317 | static std::string get_word_for_wordid(sfts * index, uint64_t wordid);
318 | static int remove_docid_in_word(sfts * index, std::string word, uint64_t doc);
319 | static int remove_word(sfts * index, std::string word, uint64_t wordid);
320 | 
321 | int sfts_remove(sfts * index, uint64_t doc)
322 | {
323 |     std::string key(",");
324 |     kv_encode_uint64(key, doc);
325 |     std::string str;
326 |     int r = db_get(index, key, &str);
327 |     if (r == -1) {
328 |         // do nothing
329 |     }
330 |     else if (r < 0) {
331 |         return -1;
332 |     }
333 |     
334 |     db_delete(index, key);
335 |     size_t position = 0;
336 |     while (position < str.size()) {
337 |         uint64_t wordid;
338 |         position = kv_decode_uint64(str, position, &wordid);
339 |         std::string word = get_word_for_wordid(index, wordid);
340 |         if (word.size() == 0) {
341 |             continue;
342 |         }
343 |         int r = remove_docid_in_word(index, word, doc);
344 |         if (r < 0) {
345 |             return -1;
346 |         }
347 |     }
348 |     
349 |     return 0;
350 | }
351 | 
352 | static std::string get_word_for_wordid(sfts * index, uint64_t wordid)
353 | {
354 |     std::string wordidkey("/");
355 |     kv_encode_uint64(wordidkey, wordid);
356 |     std::string str;
357 |     int r = db_get(index, wordidkey, &str);
358 |     if (r < 0) {
359 |         return std::string();
360 |     }
361 |     return str;
362 | }
363 | 
364 | static int remove_docid_in_word(sfts * index, std::string word, uint64_t doc)
365 | {
366 |     std::string str;
367 |     int r = db_get(index, word, &str);
368 |     if (r == -1) {
369 |         return 0;
370 |     }
371 |     else if (r < 0) {
372 |         return -1;
373 |     }
374 |     
375 |     uint64_t wordid;
376 |     std::string buffer;
377 |     size_t position = 0;
378 |     position = kv_decode_uint64(str, position, &wordid);
379 |     while (position < str.size()) {
380 |         uint64_t current_docid;
381 |         position = kv_decode_uint64(str, position, &current_docid);
382 |         if (current_docid != doc) {
383 |             kv_encode_uint64(buffer, current_docid);
384 |         }
385 |     }
386 |     if (buffer.size() == 0) {
387 |         // remove word entry
388 |         int r = remove_word(index, word, wordid);
389 |         if (r < 0) {
390 |             return -1;
391 |         }
392 |     }
393 |     else {
394 |         // update word entry
395 |         int r = db_put(index, word, buffer);
396 |         if (r < 0) {
397 |             return r;
398 |         }
399 |     }
400 |     
401 |     return 0;
402 | }
403 | 
404 | static int remove_word(sfts * index, std::string word, uint64_t wordid)
405 | {
406 |     std::string wordidkey("/");
407 |     kv_encode_uint64(wordidkey, wordid);
408 |     int r;
409 |     r = db_delete(index, wordidkey);
410 |     if (r < 0) {
411 |         return -1;
412 |     }
413 |     r = db_delete(index, word);
414 |     if (r < 0) {
415 |         return -1;
416 |     }
417 |     
418 |     return 0;
419 | }
420 | 
421 | //int lidx_search(lidx * index, const char * token);
422 | // token -> transliterated token -> docs ids
423 | 
424 | int sfts_search(sfts * index, const char * token, sfts_search_kind kind, uint64_t ** p_docsids, size_t * p_count)
425 | {
426 |     int result;
427 |     UChar * utoken = kv_from_utf8(token);
428 |     result = sfts_u_search(index, utoken, kind, p_docsids, p_count);
429 |     free((void *) utoken);
430 |     return result;
431 | }
432 | 
433 | int sfts_u_search(sfts * index, const UChar * utoken, sfts_search_kind kind,
434 |                   uint64_t ** p_docsids, size_t * p_count)
435 | {
436 |     db_flush(index);
437 |     
438 |     char * transliterated = kv_transliterate(utoken, -1);
439 |     unsigned int transliterated_length = (unsigned int) strlen(transliterated);
440 |     std::set<uint64_t> result_set;
441 |     
442 |     kvdbo_iterator * iterator = kvdbo_iterator_new(index->sfts_db);
443 |     if (kind == sfts_search_kind_prefix) {
444 |         kvdbo_iterator_seek_after(iterator, transliterated, strlen(transliterated));
445 |     }
446 |     else {
447 |         kvdbo_iterator_seek_first(iterator);
448 |     }
449 |     while (kvdbo_iterator_is_valid(iterator)) {
450 |         int add_to_result = 0;
451 |         
452 |         const char * key;
453 |         size_t key_size;
454 |         kvdbo_iterator_get_key(iterator, &key, &key_size);
455 |         std::string key_str(key, key_size);
456 |         if (key_str.find(".") == 0 || key_str.find(",") == 0 || key_str.find("/") == 0) {
457 |             kvdbo_iterator_next(iterator);
458 |             continue;
459 |         }
460 |         if (kind == sfts_search_kind_prefix) {
461 |             if (key_str.find(transliterated) != 0) {
462 |                 break;
463 |             }
464 |             add_to_result = 1;
465 |         }
466 |         else if (kind == sfts_search_kind_substr) {
467 |             //fprintf(stderr, "matching: %s %s\n", key_str.c_str(), transliterated);
468 |             if (key_str.find(transliterated) != std::string::npos) {
469 |                 add_to_result = 1;
470 |             }
471 |         }
472 |         else if (kind == sfts_search_kind_suffix) {
473 |             if ((key_str.length() >= transliterated_length) &&
474 |                 (key_str.compare(key_str.length() - transliterated_length, transliterated_length, transliterated) == 0)) {
475 |                 add_to_result = 1;
476 |             }
477 |         }
478 |         if (add_to_result) {
479 |             size_t position = 0;
480 |             uint64_t wordid;
481 |             char * value;
482 |             size_t value_size;
483 |             int r = kvdbo_get(index->sfts_db, key_str.c_str(), key_str.length(), &value, &value_size);
484 |             if (r != 0) {
485 |                 fprintf(stderr, "VALUE NOT FOUND for key %s\n", key_str.c_str());
486 |             }
487 |             std::string value_str(value, value_size);
488 |             free(value);
489 |             position = kv_decode_uint64(value_str, position, &wordid);
490 |             while (position < value_str.size()) {
491 |                 uint64_t docid;
492 |                 position = kv_decode_uint64(value_str, position, &docid);
493 |                 result_set.insert(docid);
494 |             }
495 |         }
496 |         
497 |         kvdbo_iterator_next(iterator);
498 |     }
499 |     kvdbo_iterator_free(iterator);
500 |     
501 |     free(transliterated);
502 |     
503 |     uint64_t * result = (uint64_t *) calloc(result_set.size(), sizeof(* result));
504 |     unsigned int count = 0;
505 |     for(std::set<uint64_t>::iterator set_iterator = result_set.begin() ; set_iterator != result_set.end() ; ++ set_iterator) {
506 |         result[count] = * set_iterator;
507 |         count ++;
508 |     }
509 |     
510 |     * p_docsids = result;
511 |     * p_count = count;
512 |     
513 |     return 0;
514 | }
515 | 
516 | static int db_put(sfts * index, std::string & key, std::string & value)
517 | {
518 |     index->sfts_deleted.erase(key);
519 |     index->sfts_buffer[key] = value;
520 |     index->sfts_buffer_dirty.insert(key);
521 |     
522 |     return 0;
523 | }
524 | 
525 | static int db_get(sfts * index, std::string & key, std::string * p_value)
526 | {
527 |     if (index->sfts_deleted.find(key) != index->sfts_deleted.end()) {
528 |         return -1;
529 |     }
530 |     
531 |     if (index->sfts_buffer.find(key) != index->sfts_buffer.end()) {
532 |         * p_value = index->sfts_buffer[key];
533 |         return 0;
534 |     }
535 |     
536 |     char * value;
537 |     size_t value_size;
538 |     int r = kvdbo_get(index->sfts_db, key.c_str(), key.length(), &value, &value_size);
539 |     if (r != 0) {
540 |         return r;
541 |     }
542 |     * p_value = std::string(value, value_size);
543 |     index->sfts_buffer[key] = * p_value;
544 |     free(value);
545 |     return 0;
546 | }
547 | 
548 | static int db_delete(sfts * index, std::string & key)
549 | {
550 |     index->sfts_deleted.insert(key);
551 |     index->sfts_buffer_dirty.erase(key);
552 |     index->sfts_buffer.erase(key);
553 |     return 0;
554 | }
555 | 
556 | static int db_flush(sfts * index)
557 | {
558 |     if ((index->sfts_buffer_dirty.size() == 0) && (index->sfts_deleted.size() == 0)) {
559 |         return 0;
560 |     }
561 |     for(std::unordered_set<std::string>::iterator set_iterator = index->sfts_buffer_dirty.begin() ; set_iterator != index->sfts_buffer_dirty.end() ; ++ set_iterator) {
562 |         std::string key = * set_iterator;
563 |         std::string value = index->sfts_buffer[key];
564 |         kvdbo_set(index->sfts_db, key.c_str(), key.length(), value.c_str(), value.length());
565 |     }
566 |     for(std::unordered_set<std::string>::iterator set_iterator = index->sfts_deleted.begin() ; set_iterator != index->sfts_deleted.end() ; ++ set_iterator) {
567 |         std::string key = * set_iterator;
568 |         kvdbo_delete(index->sfts_db, key.c_str(), key.length());
569 |     }
570 |     kvdbo_flush(index->sfts_db);
571 |     index->sfts_buffer.clear();
572 |     index->sfts_buffer_dirty.clear();
573 |     index->sfts_deleted.clear();
574 |     return 0;
575 | }
576 | 


--------------------------------------------------------------------------------
/src/sfts.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIDX_H
 2 | 
 3 | #define LIDX_H
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C" {
 7 | #endif
 8 | 
 9 | #include <inttypes.h>
10 | #include <stdlib.h>
11 | 
12 | // We're using the same UChar as the ICU library.
13 | #if defined(__CHAR16_TYPE__)
14 | typedef __CHAR16_TYPE__ UChar;
15 | #else
16 | typedef uint16_t UChar;
17 | #endif
18 | 
19 | typedef struct sfts sfts;
20 | 
21 | // prefix provides the best performance, two other options
22 | // have poor performance.
23 | typedef enum sfts_search_kind {
24 |   sfts_search_kind_prefix, // Search documents that has strings that start with the given token.
25 |   sfts_search_kind_substr, // Search documents that has strings that contain the given token.
26 |   sfts_search_kind_suffix, // Search documents that has strings that end the given token.
27 | } sfts_search_kind;
28 | 
29 | // Create a new indexer.
30 | sfts * sfts_new(const char * filename);
31 | 
32 | // Release resource of the new indexer.
33 | void sfts_free(sfts * index);
34 | 
35 | // Open the indexer.
36 | int sfts_open(sfts * index);
37 | 
38 | // Close the indexer.
39 | void sfts_close(sfts * index);
40 | 
41 | // Adds a UTF-8 document to the indexer.
42 | // `doc`: document identifier (numerical identifier in a 64-bits range)
43 | // `text`: content of the document in UTF-8 encoding.
44 | int sfts_set(sfts * index, uint64_t doc, const char * text);
45 | 
46 | // Adds an unicode document to the indexer.
47 | // `utext`: content of the document in UTF-16 encoding.
48 | int sfts_u_set(sfts * index, uint64_t doc, const UChar * utext);
49 | 
50 | // Adds a UTF-8 document to the indexer.
51 | // `doc`: document identifier (numerical identifier in a 64-bits range)
52 | int sfts_set2(sfts * index, uint64_t doc, const char ** text, int count);
53 | 
54 | // Adds an unicode document to the indexer.
55 | int sfts_u_set2(sfts * index, uint64_t doc, const UChar ** utext, int count);
56 | 
57 | // Removes a document from the indexer.
58 | int sfts_remove(sfts * index, uint64_t doc);
59 | 
60 | // Searches a UTF-8 token in the indexer.
61 | // `token`: string to search in UTF-8 encoding.
62 | // `kind`: kind of matching to perform. See `lidx_search_kind`.
63 | // The result is an array of documents IDs. The array is stored in `*p_docsids`.
64 | // The number of items in the result array is stored in `*p_count`.
65 | //
66 | // The result array has to be freed using `free()`.
67 | int sfts_search(sfts * index, const char * token, sfts_search_kind kind,
68 |     uint64_t ** p_docsids, size_t * p_count);
69 | 
70 | // Searches a unicode token in the indexer.
71 | // `token`: string to search in UTF-16 encoding.
72 | int sfts_u_search(sfts * index, const UChar * utoken, sfts_search_kind kind,
73 |     uint64_t ** p_docsids, size_t * p_count);
74 | 
75 | // Writes changes to disk if they are still pending in memory.
76 | int sfts_flush(sfts * index);
77 | 
78 | #ifdef __cplusplus
79 | }
80 | #endif
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------