├── .gitignore
├── FAQ
├── INSTALL
├── README.md
├── api
    ├── load.py
    ├── python
    │   ├── .gitignore
    │   ├── test.py
    │   ├── test_search.py
    │   └── zsearch.py
    └── readme.txt
├── buildall.sh
├── data
    ├── .gitignore
    ├── document01.xml
    ├── document02.xml
    ├── document03.xml
    ├── document04.xml
    ├── input01.txt
    ├── input02.txt
    ├── load
    │   ├── badinput.txt
    │   ├── document_10035.txt
    │   ├── document_10161.txt
    │   ├── document_10303.txt
    │   ├── document_11629.txt
    │   ├── document_11674.txt
    │   ├── document_12090.txt
    │   ├── document_13116.txt
    │   ├── document_14043.txt
    │   ├── document_14919.txt
    │   ├── document_1641.txt
    │   ├── document_17756.txt
    │   ├── document_19019.txt
    │   ├── document_19029.txt
    │   ├── document_20482.txt
    │   ├── document_21.txt
    │   ├── document_21505.txt
    │   ├── document_21675.txt
    │   ├── document_21676.txt
    │   ├── document_21699.txt
    │   ├── document_21726.txt
    │   ├── document_22737.txt
    │   ├── document_22813.txt
    │   ├── document_22920.txt
    │   ├── document_22983.txt
    │   ├── document_23228.txt
    │   ├── document_23513.txt
    │   ├── document_24307.txt
    │   ├── document_24467.txt
    │   ├── document_24862.txt
    │   ├── document_25842.txt
    │   ├── document_26056.txt
    │   ├── document_26734.txt
    │   ├── document_27501.txt
    │   ├── document_27927.txt
    │   ├── document_27949.txt
    │   ├── document_2807.txt
    │   ├── document_28140.txt
    │   ├── document_28891.txt
    │   ├── document_28936.txt
    │   ├── document_30201.txt
    │   ├── document_30388.txt
    │   ├── document_31127.txt
    │   ├── document_31288.txt
    │   ├── document_3142.txt
    │   ├── document_31971.txt
    │   ├── document_32395.txt
    │   ├── document_32427.txt
    │   ├── document_33192.txt
    │   ├── document_33417.txt
    │   ├── document_33870.txt
    │   ├── document_34257.txt
    │   ├── document_35761.txt
    │   ├── document_3605.txt
    │   ├── document_36521.txt
    │   ├── document_36750.txt
    │   ├── document_37773.txt
    │   ├── document_37824.txt
    │   ├── document_38116.txt
    │   ├── document_38164.txt
    │   ├── document_38169.txt
    │   ├── document_3890.txt
    │   ├── document_39592.txt
    │   ├── document_39998.txt
    │   ├── document_4010.txt
    │   ├── document_42772.txt
    │   ├── document_42845.txt
    │   ├── document_44040.txt
    │   ├── document_4481.txt
    │   ├── document_4487.txt
    │   ├── document_44885.txt
    │   ├── document_45024.txt
    │   ├── document_45415.txt
    │   ├── document_45516.txt
    │   ├── document_46330.txt
    │   ├── document_46648.txt
    │   ├── document_4709.txt
    │   ├── document_47156.txt
    │   ├── document_47474.txt
    │   ├── document_4802.txt
    │   ├── document_48177.txt
    │   ├── document_48329.txt
    │   ├── document_49683.txt
    │   ├── document_49819.txt
    │   ├── document_50736.txt
    │   ├── document_51033.txt
    │   ├── document_51679.txt
    │   ├── document_51756.txt
    │   ├── document_53100.txt
    │   ├── document_5454.txt
    │   ├── document_5861.txt
    │   ├── document_5872.txt
    │   ├── document_591.txt
    │   ├── document_6355.txt
    │   ├── document_7403.txt
    │   ├── document_750.txt
    │   ├── document_8215.txt
    │   ├── document_8495.txt
    │   ├── document_9403.txt
    │   ├── document_9566.txt
    │   └── document_9802.txt
    ├── lorem_ipsum.txt
    └── readme.txt
├── docroot
    └── post.htm
├── leveldb
    ├── AUTHORS
    ├── LICENSE
    ├── Makefile
    ├── NEWS
    ├── README
    ├── TODO
    ├── build_detect_platform
    ├── db
    │   ├── autocompact_test.cc
    │   ├── builder.cc
    │   ├── builder.h
    │   ├── c.cc
    │   ├── c_test.c
    │   ├── corruption_test.cc
    │   ├── db_bench.cc
    │   ├── db_impl.cc
    │   ├── db_impl.h
    │   ├── db_iter.cc
    │   ├── db_iter.h
    │   ├── db_test.cc
    │   ├── dbformat.cc
    │   ├── dbformat.h
    │   ├── dbformat_test.cc
    │   ├── filename.cc
    │   ├── filename.h
    │   ├── filename_test.cc
    │   ├── leveldb_main.cc
    │   ├── log_format.h
    │   ├── log_reader.cc
    │   ├── log_reader.h
    │   ├── log_test.cc
    │   ├── log_writer.cc
    │   ├── log_writer.h
    │   ├── memtable.cc
    │   ├── memtable.h
    │   ├── repair.cc
    │   ├── skiplist.h
    │   ├── skiplist_test.cc
    │   ├── snapshot.h
    │   ├── table_cache.cc
    │   ├── table_cache.h
    │   ├── version_edit.cc
    │   ├── version_edit.h
    │   ├── version_edit_test.cc
    │   ├── version_set.cc
    │   ├── version_set.h
    │   ├── version_set_test.cc
    │   ├── write_batch.cc
    │   ├── write_batch_internal.h
    │   └── write_batch_test.cc
    ├── doc
    │   ├── bench
    │   │   ├── db_bench_sqlite3.cc
    │   │   └── db_bench_tree_db.cc
    │   ├── benchmark.html
    │   ├── doc.css
    │   ├── impl.html
    │   ├── index.html
    │   ├── log_format.txt
    │   └── table_format.txt
    ├── helpers
    │   └── memenv
    │   │   ├── memenv.cc
    │   │   ├── memenv.h
    │   │   └── memenv_test.cc
    ├── include
    │   └── leveldb
    │   │   ├── c.h
    │   │   ├── cache.h
    │   │   ├── comparator.h
    │   │   ├── db.h
    │   │   ├── env.h
    │   │   ├── filter_policy.h
    │   │   ├── iterator.h
    │   │   ├── options.h
    │   │   ├── slice.h
    │   │   ├── status.h
    │   │   ├── table.h
    │   │   ├── table_builder.h
    │   │   └── write_batch.h
    ├── issues
    │   ├── issue178_test.cc
    │   └── issue200_test.cc
    ├── port
    │   ├── README
    │   ├── atomic_pointer.h
    │   ├── port.h
    │   ├── port_example.h
    │   ├── port_posix.cc
    │   ├── port_posix.h
    │   ├── thread_annotations.h
    │   └── win
    │   │   └── stdint.h
    ├── table
    │   ├── block.cc
    │   ├── block.h
    │   ├── block_builder.cc
    │   ├── block_builder.h
    │   ├── filter_block.cc
    │   ├── filter_block.h
    │   ├── filter_block_test.cc
    │   ├── format.cc
    │   ├── format.h
    │   ├── iterator.cc
    │   ├── iterator_wrapper.h
    │   ├── merger.cc
    │   ├── merger.h
    │   ├── table.cc
    │   ├── table_builder.cc
    │   ├── table_test.cc
    │   ├── two_level_iterator.cc
    │   └── two_level_iterator.h
    └── util
    │   ├── arena.cc
    │   ├── arena.h
    │   ├── arena_test.cc
    │   ├── bloom.cc
    │   ├── bloom_test.cc
    │   ├── cache.cc
    │   ├── cache_test.cc
    │   ├── coding.cc
    │   ├── coding.h
    │   ├── coding_test.cc
    │   ├── comparator.cc
    │   ├── crc32c.cc
    │   ├── crc32c.h
    │   ├── crc32c_test.cc
    │   ├── env.cc
    │   ├── env_posix.cc
    │   ├── env_test.cc
    │   ├── filter_policy.cc
    │   ├── hash.cc
    │   ├── hash.h
    │   ├── histogram.cc
    │   ├── histogram.h
    │   ├── logging.cc
    │   ├── logging.h
    │   ├── mutexlock.h
    │   ├── options.cc
    │   ├── posix_logger.h
    │   ├── random.h
    │   ├── status.cc
    │   ├── testharness.cc
    │   ├── testharness.h
    │   ├── testutil.cc
    │   └── testutil.h
├── lib
    ├── EWAHBoolArray
    │   ├── .DS_Store
    │   ├── CHANGELOG
    │   ├── README
    │   ├── doxyconfig.txt
    │   ├── example.cpp
    │   ├── headers
    │   │   ├── boolarray.h
    │   │   ├── ewah.h
    │   │   ├── ewahutil.h
    │   │   └── runninglengthword.h
    │   ├── makefile
    │   ├── src
    │   │   ├── benchmark.cpp
    │   │   └── unit.cpp
    │   └── unit
    ├── rapidxml-1.13
    │   ├── document01.xml
    │   ├── license.txt
    │   ├── manual.html
    │   ├── rapidxml.hpp
    │   ├── rapidxml_iterators.hpp
    │   ├── rapidxml_print.hpp
    │   ├── rapidxml_utils.hpp
    │   └── test_rapidxml.cpp
    └── tpunit++.hpp
├── misc
    ├── Makefile
    ├── example_compressedset.cpp
    └── example_leveldb.cpp
├── src
    ├── .gitignore
    ├── Constants.hpp
    ├── DocumentImpl.hpp
    ├── DocumentKVStore.hpp
    ├── Engine.hpp
    ├── EngineDataKVStore.hpp
    ├── Field.cpp
    ├── Field.h
    ├── FieldKVStore.hpp
    ├── IDocument.h
    ├── IInvertedIndex.h
    ├── IKVStore.h
    ├── ITokenizer.h
    ├── InvertedIndexBatch.hpp
    ├── InvertedIndexImpl.hpp
    ├── InvertedIndexSimpleBatch.hpp
    ├── KVStoreInMemory.hpp
    ├── KVStoreLevelDb.hpp
    ├── LRUCache.hpp
    ├── Makefile
    ├── NameSpaceKVStore.hpp
    ├── SparseSet.hpp
    ├── Statistics.hpp
    ├── TokenizerImpl.cpp
    ├── TokenizerImpl.h
    ├── TokenizerImpl.hpp
    ├── WordIndexKVStore.hpp
    ├── ZException.hpp
    ├── ZUtil.hpp
    ├── atomic_pointer.h
    ├── bloom_filter.hpp
    ├── cedarpp.h
    ├── engine_simple_main.cpp
    ├── server.cpp
    └── trie
    │   ├── EntropyTrie.hpp
    │   ├── bit_access.hpp
    │   ├── bit_vector.cpp
    │   ├── bit_vector.hpp
    │   ├── block_info.hpp
    │   ├── exp_golomb.hpp
    │   ├── huffman.hpp
    │   ├── main.cpp
    │   ├── sign_interleave.hpp
    │   └── trie.hpp
├── tests
    ├── BasicSetTest.cpp
    ├── CompressedSetTests.cpp
    ├── CompressedSet_test.cpp
    ├── DocumentImplTest.hpp
    ├── InvertedIndexImplTest.hpp
    ├── InvertedIndexSimpleBatchTest.hpp
    ├── Makefile
    ├── SparseSet_test.cpp
    ├── StatisticsTest.hpp
    ├── TestUtils.hpp
    ├── TokenizerTest.hpp
    ├── XmlTest.hpp
    ├── document_test.cpp
    ├── inverted_index_test.cpp
    ├── memory_leak_test.cpp
    ├── runTests.sh
    ├── statistics_test.cpp
    ├── thread_test.cpp
    ├── tokenizer_test.cpp
    └── xml_test.cpp
├── unicode
    ├── UTF8Stream.cpp
    ├── UnicodeUtils.cpp
    └── unicode
    │   ├── gunichartables.h
    │   ├── guniprop.cpp
    │   └── guniprop.h
└── varint
    ├── BasicSet.cpp
    ├── BasicSet.h
    ├── BasicSetFactory.h
    ├── Codec.h
    ├── CollectionHelper.h
    ├── Common.h
    ├── CompressedDeltaChunk.cpp
    ├── CompressedDeltaChunk.h
    ├── CompressedSet.cpp
    ├── CompressedSet.h
    ├── ConsiseSet.hpp
    ├── DeltaChunkStore.h
    ├── ISetFactory.h
    ├── LazyAndNotSet.h
    ├── LazyAndSet.cpp
    ├── LazyAndSet.h
    ├── LazyOrSet.cpp
    ├── LazyOrSet.h
    ├── Makefile
    ├── README
    ├── Set.h
    ├── SetFactory.h
    ├── Sink.h
    ├── SliceInput.h
    ├── SliceOutput.h
    ├── Source.h
    ├── bitpacking
        ├── bitpacksimd.cpp
        ├── bitpacksimd.h
        ├── codecs.h
        ├── common.h
        ├── compositecodec.h
        ├── memutil.h
        ├── simdbinarypacking.h
        ├── simdcomputil.c
        ├── simdcomputil.h
        ├── simdintegratedbitpacking.c
        ├── simdintegratedbitpacking.h
        ├── util.cpp
        ├── util.h
        └── variablebyte.h
    └── slice.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | a.out
 2 | 
 3 | simplehttpserver
 4 | 
 5 | build
 6 | bin
 7 | 
 8 | # Compiled Object files
 9 | *.slo
10 | *.lo
11 | *.o
12 | *.obj
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.so.*
17 | *.dylib
18 | *.dll
19 | 
20 | # Compiled Static libraries
21 | *.lai
22 | *.la
23 | *.a
24 | *.lib


--------------------------------------------------------------------------------
/FAQ:
--------------------------------------------------------------------------------
1 | Plain-text Frequently-Asked-Questions document for the project.
2 | ---------------------------------------------------------------
3 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 | Configuration, build, and installation instructions.
2 | ----------------------------------------------------
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # **zsearch** A high performance search engine
 2 | 
 3 | Low Data fragmentation and good random write performance by using levelDB Log Structured Merge Trees. High performance query speed by using CompressedBitmap to store DocumentIds in an InvertedIndex interface provided by a simple libEvent2 http server.
 4 | 
 5 | ## Project Design
 6 | 
 7 | 	Engine 
 8 | 		-> tokenizer	
 9 | 		-> documentStore
10 | 		-> invertedIndex
11 | 			-> KVStore
12 | 				-> InMemory
13 | 				-> LevelDb
14 | 			-> setFactory
15 | 		-> setFactory
16 | 		-> wordIndex
17 | 
18 | ## Project Organization
19 | 
20 | TODO
21 | 
22 | ## Contact
23 | 
24 | -   [Homepage] (http://victorparmar.github.com/zsearch/)
25 | -   Victor at victorparmar@gmail.com
26 | -   Maxime at maximecaron@gmail.com
27 | 
28 | ## Dependencies
29 | 
30 | All of the following:
31 | 
32 | -	[g++] (http://gcc.gnu.org/) >= 4.7.2
33 | -	[libevent2] (http://libevent.org/) >= 2.0.19
34 | -   [Python](http://python.org/) >= 2.7
35 | 
36 | ## Cloning and Running
37 | 
38 | You can clone this repo and simply execute:
39 | 
40 |     git clone git://github.com/victorparmar/zsearch.git
41 |     cd zsearch
42 |     ./buildall.sh
43 | 	./build/server ./docroot
44 | 
45 | ## License
46 | 
47 | -   Mozilla
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/api/load.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import codecs
 4 | import httplib, urllib
 5 | import os, shutil
 6 | 
 7 | from xml.sax.saxutils import escape
 8 | 
 9 | def printText(txt):
10 |     lines = txt.split('\n')
11 |     for line in lines:
12 |         print line.strip()
13 | 
14 | 
15 | inputs = list()
16 | 
17 | httpServ = httplib.HTTPConnection("localhost", 8080)
18 | httpServ.connect()
19 | 
20 | path = '../data/load/'
21 | 
22 | listing = os.listdir(path)
23 | 
24 | for file in listing:
25 | 
26 | 	filename = os.path.join(path, file)
27 | 
28 | 	if (os.path.isfile(filename)):
29 | 
30 | 		#if (filename != '../data/load/document_42772.txt'):
31 | 		#	continue
32 | 
33 | 		print filename
34 | 		# f = codecs.open(filename, "r", "utf-8")
35 | 		linestring = open(filename, "r").read()
36 | 
37 | 		# print linestring
38 | 
39 | 		# params = urllib.urlencode({'data': escape(linestring)})
40 | 		params = urllib.urlencode({'data' : linestring})
41 | 		params = params.encode('utf-8')
42 | 		# params = {'data' : linestring}
43 | 		httpServ.request('POST', '/index', params)
44 | 		# inputs.append(linestring)
45 | 
46 | 		response = httpServ.getresponse()
47 | 
48 | 		# if response.status == httplib.OK:
49 | 		print "Output from POST request"
50 | 		printText (response.read())
51 | 
52 | 
53 | # print "loaded " + str(len(inputs)) + " files"
54 | 
55 | httpServ.close()
56 | 
57 | 


--------------------------------------------------------------------------------
/api/python/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 
3 | 


--------------------------------------------------------------------------------
/api/python/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os, shutil
 5 | from zsearch import zsearch
 6 | 
 7 | z = zsearch("http://localhost:8080")
 8 | print z
 9 | 
10 | try:
11 | 
12 | 	print "<test.py> get document id 1"
13 | 	d = z.getDocument(1)
14 | 
15 | 	print "<test.py> print document contents"
16 | 	for key, value in d.iteritems():
17 | 		print key
18 | 		print value
19 | 
20 | except Exception as err:
21 | 	print err
22 | 
23 | data = {'input' : 'victor &amp; parmar &Eacute;'}
24 | print "<test.py> add document", data
25 | docId = z.addDocument(data)
26 | print "<test.py> returned docId:" + docId
27 | 
28 | print "<test.py> retrieve inserted document"
29 | d = z.getDocument(docId)
30 | 
31 | print "<test.py> print document contents"
32 | for key, value in d.iteritems():
33 | 	print key
34 | 	print value
35 | 
36 | 
37 | data = {'input' : 'snoop < dawg', 'complex' : '72.8/km²'}
38 | print "<test.py> add document", data
39 | docId = z.addDocument(data)
40 | print "<test.py> returned docId:" + docId
41 | 
42 | print "<test.py> retrieve inserted document"
43 | d = z.getDocument(docId)
44 | 
45 | print "<test.py> print document contents"
46 | for key, value in d.iteritems():
47 | 	print key
48 | 	print value
49 | 
50 | 
51 | 
52 | print "<test.py> search for 'some'"
53 | docIds = z.search("some")
54 | 
55 | print "<test.py> returned docIds:"
56 | for docId in docIds:
57 | 	print docId
58 | 
59 | try:
60 | 	print "<test.py> try getting invalid documentId (-1)"
61 | 	d = z.getDocument(-1)
62 | except Exception as err:
63 | 	print err
64 | 


--------------------------------------------------------------------------------
/api/python/test_search.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os, shutil
 5 | from zsearch import zsearch
 6 | 
 7 | z = zsearch("http://localhost:8080")
 8 | print z
 9 | 
10 | print "<test.py> search for 'some'"
11 | docIds = z.search("some")
12 | 
13 | print "<test.py> returned docIds:"
14 | for docId in docIds:
15 | 	print docId
16 | 
17 | print "<test.py> search for 'some' with start and offset"
18 | docIds = z.search("some", 0, 3)
19 | 
20 | print "<test.py> returned docIds:"
21 | for docId in docIds:
22 | 	print docId
23 | 
24 | print "<test.py> bad start"
25 | try:
26 | 	z.search("some", -1, 5)
27 | except Exception, e:
28 | 	print e
29 | 
30 | 
31 | print "<test.py> bad offset"
32 | try:
33 | 	z.search("some", 0, 'snoop')
34 | except Exception, e:
35 | 	print e
36 | 
37 | print "<test.py search for 'snoop'"
38 | docIds = z.search("snoop")
39 | for docId in docIds:
40 | 	print docId
41 | 


--------------------------------------------------------------------------------
/api/readme.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Input simple text / key value pairs
 3 | 	-> xml escape (api)
 4 | 	-> wrap within xml frame (api)
 5 | 	-> html escape (api)
 6 | 	-> post (api)
 7 | 	-> html unescape (server)
 8 | 	-> build document and unescape (DocumentImpl / rapidxml)
 9 | 	-> store
10 | 	-> query
11 | 	-> xml escape (DocumentImpl)
12 | 	-> return 	
13 | 
14 | 


--------------------------------------------------------------------------------
/buildall.sh:
--------------------------------------------------------------------------------
1 | rm -rf ./build
2 | make -C ./leveldb/ clean
3 | 
4 | mkdir build
5 | mkdir build/tests
6 | make -C ./varint/ all
7 | make -C ./leveldb/ all
8 | make -C ./src/ all
9 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | big.txt
2 | tweet.txt
3 | tweet.txt.bz2
4 | 
5 | 


--------------------------------------------------------------------------------
/data/document01.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <document>
3 | 	<title>Input document</title>
4 | 	<input1> some text</input1>
5 | 	<input1> some more text</input1>
6 | </document>
7 | 


--------------------------------------------------------------------------------
/data/document02.xml:
--------------------------------------------------------------------------------
 1 | <document>
 2 | 	<title>document02</title>
 3 | 	<field1>some text</field1>
 4 | 	<field2><![CDATA[
 5 | function matchwo(a,b)
 6 | {
 7 | if (a < b && a < 0) then
 8 |   {
 9 |   return 1;
10 |   }
11 | else
12 |   {
13 |   return 0;
14 |   }
15 | }
16 | ]]></field2>
17 | </document>
18 | 


--------------------------------------------------------------------------------
/data/document03.xml:
--------------------------------------------------------------------------------
1 | <document>
2 | 	<title>document03</title>
3 | 	<field1>some text</field1>
4 | 	<field2>Kingdom Hearts (キングダム ハーツ, Kingudamu Hātsu?) est une série de jeux vidéo d'action-RPG développée et éditée par Square Enix, qui marque l'association entre Disney Interactive Studios et l'univers des jeux de Square sous la direction de Tetsuya Nomura. Kingdom Hearts est donc un cross-over entre plusieurs personnages de Square et l'univers Disney qui a lieu dans un monde parallèle créé spécialement pour la série. Dans chacun des opus, les voix des personnages de Disney sont interprétées par les mêmes célébrités que dans leur œuvre d'origine.
5 | area1 Les personnages de la franchise de Square Enix Final Fantasy font eux aussi des apparitions et interagissent avec le joueur et avec les personnages de Disney. La série tourne autour de la recherche des amis de Sora, le personnage principal, et de ses rencontres avec les personnages de Disney et de Final Fantasy au travers de différents mondes.</field2>
6 | </document>
7 | 


--------------------------------------------------------------------------------
/data/document04.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?><document><input1> some &lt; text</input1><input2>72.8/km²</input2><title>Input document</title></document>
2 | 


--------------------------------------------------------------------------------
/data/input01.txt:
--------------------------------------------------------------------------------
1 | input1 some text
2 | input2 some more text
3 | input3 xxx
4 | 


--------------------------------------------------------------------------------
/data/input02.txt:
--------------------------------------------------------------------------------
 1 | each line corresponds to a document
 2 | with the first word of each line corresponding to the field
 3 | please ensure that there is no newline at the end of this file, thank you
 4 | input1 some text
 5 | input1 some more text
 6 | area1 Kingdom Hearts (キングダム ハーツ, Kingudamu Hātsu?) est une série de jeux vidéo d'action-RPG développée et éditée par Square Enix, qui marque l'association entre Disney Interactive Studios et l'univers des jeux de Square sous la direction de Tetsuya Nomura. Kingdom Hearts est donc un cross-over entre plusieurs personnages de Square et l'univers Disney qui a lieu dans un monde parallèle créé spécialement pour la série. Dans chacun des opus, les voix des personnages de Disney sont interprétées par les mêmes célébrités que dans leur œuvre d'origine.
 7 | area1 Les personnages de la franchise de Square Enix Final Fantasy font eux aussi des apparitions et interagissent avec le joueur et avec les personnages de Disney. La série tourne autour de la recherche des amis de Sora, le personnage principal, et de ses rencontres avec les personnages de Disney et de Final Fantasy au travers de différents mondes.
 8 | area1 En 2012, la série compte sept épisodes sortis sur différentes consoles de jeux vidéo, mais de futurs opus sont programmés. Même si chaque titre ne reçoit pas le même succès, la plupart des jeux de la série sont acclamés par les critiques et reçoivent un important succès commercial. En mars 2011, la série Kingdom Hearts s'est vendue à plus de 17 millions d'exemplaires à travers le monde. Une grande variété de produits dérivés est sortie, comprenant les bandes originales, des figurines, guides de jeux, romans et une série en manga.
 9 | area1 de
10 | area1 de
11 | area1 de
12 | area1 de
13 | area1 de
14 | area1 de
15 | area1 de
16 | area1 de
17 | area1 de
18 | area1 de
19 | area1 de
20 | area1 de
21 | area1 de
22 | area1 de
23 | area1 de
24 | area1 de
25 | area1 de
26 | area1 de
27 | area1 de
28 | area1 de
29 | area1 de
30 | area1 de
31 | area1 de
32 | area1 de
33 | area1 de
34 | area1 de
35 | area1 de
36 | 


--------------------------------------------------------------------------------
/data/load/badinput.txt:
--------------------------------------------------------------------------------
1 | blahblah
2 | 
3 | 


--------------------------------------------------------------------------------
/data/load/document_17756.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Lower_Canada</title>
 3 | <text>
 4 | 
 5 | The Province of Lower Canada (French: Province du Bas-Canada) was a [[British colonization of the Americas|British colony]] on the lower [[Saint Lawrence River]] and the shores of the [[Gulf of Saint Lawrence]] (1791–1841). It covered the southern portion of the modern-day Province of [[Quebec]], [[Canada]], and the [[Labrador]] region of the modern-day Province of [[Newfoundland and Labrador]].
 6 |  
 7 | 
 8 | The Province of Lower Canada was created by the [[Constitutional Act of 1791]] from the partition of the British colony of the [[Province of Quebec (1763–1791)|Province of Quebec]] (1763–1791) into the Province of Lower Canada and the Province of [[Upper Canada]].
 9 | 
10 | Lower Canada consisted of part of  former [[French colonization of the Americas|French colony]] of [[New France]], populated mainly by French Canadians, which was ceded to Great Britain after that empire&apos;s victory in the [[Seven Years&apos; War]], also called the [[French and Indian Wars]] in the United States. Other parts of New France ceded to Britain became the Colonies of [[Nova Scotia]], [[New Brunswick]] and [[Prince Edward Island]].
11 | 
12 | Like Upper Canada, there was political unrest and a rebellion challenged the [[United Kingdom|British]] rule of the predominantly [[French people|French]] population. After the [[Lower Canada Rebellion|Patriote Rebellion]] was crushed by the British army and [[Loyal (Lower Canada)|Loyal]] volunteers, the 1791 Constitution was suspended on March 27, 1838 and a [[Special Council of Lower Canada|special council]] was appointed to administer the colony.
13 | 
14 | The provinces of Lower Canada and Upper Canada were combined as the United Province of Canada in 1841, when the [[Act of Union (1840)|The Union Act]] came into force. Their separate legislatures were combined into a single parliament with equal representation for both constituent parts, even if Lower Canada had more population.
15 | 
16 | [[File:Constitution-of-lower-canada.png|thumb|left|Constitution of Lower Canada in 1791]]The Province of Lower Canada inherited the mixed set of French and English institutions that existed in the Province of Quebec during the 1763–1791 period and which continued to exist later in Canada-East (1841–1867) and ultimately in the current Province of Quebec (1867–).
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_35761.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Blue_Ridge,_Alabama</title>
 3 | <text>
 4 | Blue Ridge is a [[census-designated place]] (CDP) in [[Elmore County, Alabama|Elmore County]], [[Alabama]], [[United States]]. At the 2000 census the population was 1,331. It is part of the [[Montgomery, Alabama|Montgomery]] [[Montgomery Metropolitan Area|Metropolitan Statistical Area]].
 5 | 
 6 | Blue Ridge is located at  (32.486744, -86.190823).
 7 | 
 8 | According to the [[U.S. Census Bureau]], the CDP has a total area of 7.9 square miles (20.5 km²), of which, 7.9 square miles (20.4 km²) of it is land and 0.1 square miles (0.2 km²) of it (0.88%) is water.
 9 | 
10 | As of the [[census]] of 2000, there were 1,331 people, 511 households, and 436 families residing in the CDP. The [[population density]] was 169.3 people per square mile (65.4/km²). There were 526 housing units at an average density of 66.9/sq mi (25.8/km²). The racial makeup of the CDP was 95.79% [[Race (United States Census)|White]], 3.08% [[Race (United States Census)|Black]] or [[Race (United States Census)|African American]], 0.38% [[Race (United States Census)|Native American]], 0.30% [[Race (United States Census)|Asian]], 0.08% [[Race (United States Census)|Pacific Islander]], and 0.38% from two or more races. 0.53% of the population were [[Race (United States Census)|Hispanic]] or [[Race (United States Census)|Latino]] of any race.
11 | 
12 | There were 511 households out of which 30.7% had children under the age of 18 living with them, 0.0% were [[Marriage|married couples]] living together, 6.5% had a female householder with no husband present, and 14.5% were non-families. 13.1% of all households were made up of individuals and 5.3% had someone living alone who was 65 years of age or older. The average household size was 2.60 and the average family size was 2.84.
13 | 
14 | In the CDP the population was spread out with 22.2% under the age of 18, 4.7% from 18 to 24, 21.0% from 25 to 44, 37.6% from 45 to 64, and 14.6% who were 65 years of age or older. The median age was 46 years. For every 100 females there were 101.4 males. For every 100 females age 18 and over, there were 94.4 males.
15 | 
16 | The median income for a household in the CDP was $73,162, and the median income for a family was $83,320. Males had a median income of $60,625 versus $37,875 for females. The [[per capita income]] for the CDP was $32,774. None of the families and 1.1% of the population were living below the [[poverty line]], including no under eighteens and none of those over 64.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_39592.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Cypress_Quarters,_Florida</title>
 3 | <text>
 4 | 
 5 | Cypress Quarters is a [[census-designated place]] (CDP) in [[Okeechobee County, Florida|Okeechobee County]], [[Florida]], [[United States]].  The population was 1,150 at the 2000 census.
 6 | 
 7 | Cypress Quarters is located at .
 8 | 
 9 | According to the [[United States Census Bureau]], the CDP has a total area of 2.7 square miles (7.0 km²), all of it land.
10 | 
11 | As of the [[census]] of 2000, there were 1,150 people, 420 households, and 291 families residing in the CDP.  The [[population density]] was 425.0 people per square mile (163.8/km²).  There were 469 housing units at an average density of 173.3/sq mi (66.8/km²).  The racial makeup of the CDP was 29.22% [[White (U.S. Census)|White]], 65.83% [[African American (U.S. Census)|African American]], 0.61% [[Native American (U.S. Census)|Native American]], 0.43% [[Asian (U.S. Census)|Asian]], 3.04% from [[Race (United States Census)|other races]], and 0.87% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 5.48% of the population.
12 | 
13 | There were 420 households out of which 29.3% had children under the age of 18 living with them, 40.2% were [[Marriage|married couples]] living together, 24.5% had a female householder with no husband present, and 30.7% were non-families. 25.5% of all households were made up of individuals and 9.5% had someone living alone who was 65 years of age or older.  The average household size was 2.72 and the average family size was 3.26.
14 | 
15 | In the CDP the population was spread out with 30.7% under the age of 18, 9.6% from 18 to 24, 22.6% from 25 to 44, 24.1% from 45 to 64, and 13.0% who were 65 years of age or older.  The median age was 33 years. For every 100 females there were 94.3 males.  For every 100 females age 18 and over, there were 89.3 males.
16 | 
17 | The median income for a household in the CDP was $29,565, and the median income for a family was $38,125. Males had a median income of $31,103 versus $17,411 for females. The [[per capita income]] for the CDP was $13,046.  About 25.4% of families and 30.5% of the population were below the [[poverty line]], including 32.1% of those under age 18 and 16.7% of those age 65 or over.
18 | 
19 | </text>
20 | </document>
21 | 


--------------------------------------------------------------------------------
/data/load/document_42772.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Somers,_Iowa</title>
 3 | <text>
 4 | Somers is a city in [[Calhoun County, Iowa|Calhoun County]], [[Iowa]], [[United States]]. The population was 165 at the 2000 census.
 5 | 
 6 | Somers is located at  (42.377902, -94.431013).
 7 | 
 8 | According to the [[United States Census Bureau]], the city has a total area of 0.3 [[square mile]]s (0.9 [[km²]]), all of it land.
 9 | 
10 | As of the [[census]] of 2000, there were 165 people, 61 households, and 51 families residing in the city. The [[population density]] was 473.1 people per square mile (182.0/km²). There were 66 housing units at an average density of 189.2/sq mi (72.8/km²). The racial makeup of the city was 97.58% [[White (U.S. Census)|White]], 0.61% [[African American (U.S. Census)|African American]], 1.82% from [[Race (United States Census)|other races]]. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 3.03% of the population.
11 | 
12 | There were 61 households out of which 27.9% had children under the age of 18 living with them, 67.2% were [[Marriage|married couples]] living together, 13.1% had a female householder with no husband present, and 14.8% were non-families. 9.8% of all households were made up of individuals and 4.9% had someone living alone who was 65 years of age or older. The average household size was 2.70 and the average family size was 2.92.
13 | 
14 | In the city the population was spread out with 24.8% under the age of 18, 8.5% from 18 to 24, 24.8% from 25 to 44, 22.4% from 45 to 64, and 19.4% who were 65 years of age or older. The median age was 40 years. For every 100 females there were 103.7 males. For every 100 females age 18 and over, there were 96.8 males.
15 | 
16 | The median income for a household in the city was $31,250, and the median income for a family was $43,750. Males had a median income of $32,083 versus $21,250 for females. The [[per capita income]] for the city was $15,777. About 11.5% of families and 15.9% of the population were below the [[poverty line]], including 33.3% of those under the age of eighteen and 16.0% of those sixty five or over.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_42845.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Webb,_Iowa</title>
 3 | <text>
 4 | Webb is a city in [[Clay County, Iowa|Clay County]], [[Iowa]], [[United States]]. The population was 165 at the [[United States Census, 2000|2000 census]].
 5 | 
 6 | Webb is located at  (42.948134, -95.012281).
 7 | 
 8 | According to the [[United States Census Bureau]], the city has a total area of 0.5 square miles (1.3 km²), all of it land.
 9 | 
10 | As of the [[census]] of 2000, there were 165 people, 73 households, and 44 families residing in the city. The [[population density]] was 328.0 people per square mile (127.4/km²). There were 83 housing units at an average density of 165.0/sq mi (64.1/km²). The racial makeup of the city was 99.39% [[White (U.S. Census)|White]] and 0.61% [[Native American (U.S. Census)|Native American]]. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.61% of the population.
11 | 
12 | There were 73 households out of which 26.0% had children under the age of 18 living with them, 50.7% were [[Marriage|married couples]] living together, 4.1% had a female householder with no husband present, and 38.4% were non-families. 34.2% of all households were made up of individuals and 20.5% had someone living alone who was 65 years of age or older. The average household size was 2.26 and the average family size was 2.84.
13 | 
14 | In the city the population was spread out with 25.5% under the age of 18, 4.2% from 18 to 24, 24.2% from 25 to 44, 23.6% from 45 to 64, and 22.4% who were 65 years of age or older. The median age was 42 years. For every 100 females there were 96.4 males. For every 100 females age 18 and over, there were 86.4 males.
15 | 
16 | The median income for a household in the city was $27,500, and the median income for a family was $31,500. Males had a median income of $30,000 versus $21,528 for females. The [[per capita income]] for the city was $13,087. About 10.4% of families and 9.8% of the population were below the [[poverty line]], including 18.8% of those under the age of eighteen and none of those sixty five or over.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_45024.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Simsboro,_Louisiana</title>
 3 | <text>
 4 | 
 5 | Simsboro is a village in [[Lincoln Parish, Louisiana|Lincoln Parish]], [[Louisiana]], [[United States]]. The population was 684 at the [[2000 United States Census|2000 census]]. It is part of the [[Ruston, Louisiana|Ruston]] [[Ruston micropolitan area|Micropolitan Statistical Area]].
 6 | 
 7 | Simsboro is located at  (32.534169, -92.786417).
 8 | 
 9 | According to the [[United States Census Bureau]], the village has a total area of 9.0 km² (3.5 mi²), all land.
10 | 
11 | As of the [[census]] of 2000, there were 684 people, 273 households, and 193 families residing in the village. The [[population density]] was 76.1/km² (197.2/mi²). There were 338 housing units at an average density of 37.6/km² (97.4/mi²). The racial makeup of the village was 77.05% [[White (U.S. Census)|White]], 21.49% [[African American (U.S. Census)|African American]], 0.73% [[Native American (U.S. Census)|Native American]], 0.44% from [[Race (United States Census)|other races]], and 0.29% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.73% of the population.
12 | 
13 | There were 273 households out of which 32.2% had children under the age of 18 living with them, 52.0% were [[Marriage|married couples]] living together, 14.7% had a female householder with no husband present, and 29.3% were non-families. 23.4% of all households were made up of individuals and 8.1% had someone living alone who was 65 years of age or older. The average household size was 2.48 and the average family size was 2.93.
14 | 
15 | In the village the population was spread out with 24.3% under the age of 18, 13.6% from 18 to 24, 28.4% from 25 to 44, 21.6% from 45 to 64, and 12.1% who were 65 years of age or older. The median age was 34 years. For every 100 females there were 93.8 males. For every 100 females age 18 and over, there were 98.5 males.
16 | 
17 | The median income for a household in the village was $31,324, and the median income for a family was $35,417. Males had a median income of $31,111 versus $18,750 for females. The [[per capita income]] for the village was $14,465. About 14.4% of families and 18.1% of the population were below the [[poverty line]], including 23.0% of those under age 18 and none of those age 65 or over.
18 | 
19 | </text>
20 | </document>
21 | 


--------------------------------------------------------------------------------
/data/load/document_45516.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Cornville,_Maine</title>
 3 | <text>
 4 | Cornville is a [[New England town|town]] in [[Somerset County, Maine|Somerset County]], [[Maine]], [[United States]]. The population was 1,208 at the [[2000 United States Census|2000 census]].
 5 | 
 6 | Settled in 1794, Cornville was incorporated on 24 February 1798 as the 116th town in Maine. 
 7 | 
 8 | According to the [[United States Census Bureau]], the town has a total area of 40.7 square miles (105.3 km²), of which, 40.5 square miles (104.8 km²) of it is land and 0.2 square miles (0.5 km²) of it (0.52%) is water.
 9 | 
10 | As of the [[census]] of 2000, there were 1,208 people, 449 households, and 352 families residing in the town.  The [[population density]] was 29.9 people per square mile (11.5/km²).  There were 515 housing units at an average density of 12.7/sq mi (4.9/km²).  The racial makeup of the town was 99.01% [[White (U.S. Census)|White]], 0.08% [[African American (U.S. Census)|African American]], 0.17% [[Native American (U.S. Census)|Native American]], 0.25% [[Asian (U.S. Census)|Asian]], and 0.50% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.33% of the population.
11 | 
12 | There were 449 households out of which 34.3% had children under the age of 18 living with them, 67.5% were [[Marriage|married couples]] living together, 6.7% had a female householder with no husband present, and 21.6% were non-families. 15.1% of all households were made up of individuals and 6.9% had someone living alone who was 65 years of age or older.  The average household size was 2.69 and the average family size was 2.97.
13 | 
14 | In the town the population was spread out with 25.2% under the age of 18, 7.6% from 18 to 24, 28.0% from 25 to 44, 27.8% from 45 to 64, and 11.3% who were 65 years of age or older.  The median age was 39 years. For every 100 females there were 97.4 males.  For every 100 females age 18 and over, there were 97.6 males.
15 | 
16 | The median income for a household in the town was $38,015, and the median income for a family was $41,875. Males had a median income of $30,543 versus $22,083 for females. The [[per capita income]] for the town was $16,184.  About 8.4% of families and 11.5% of the population were below the [[poverty line]], including 17.5% of those under age 18 and 11.3% of those age 65 or over.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_46648.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Lincoln_Township,_Clare_County,_Michigan</title>
 3 | <text>
 4 | 
 5 | Lincoln Township is a [[civil township]] of [[Clare County, Michigan|Clare County]] in the [[U.S. state]] of [[Michigan]].  The population was 1,758 at the [[United States Census, 2000|2000 census]].
 6 | 
 7 | Lake George, Michigan|Lake George]] is an [[unincorporated community]] in the northwest part of the township.
 8 | 
 9 | According to the [[United States Census Bureau]], the township has a total area of 35.9 [[square mile]]s (93.1 [[km²]]), of which, 35.2 square miles (91.2 km²) of it is land and 0.7 square miles (1.9 km²) of it (2.03%) is water.
10 | 
11 | As of the [[census]] of 2000, there were 1,758 people, 766 households, and 537 families residing in the township.  The [[population density]] was 49.9 per square mile (19.3/km²).  There were 1,949 housing units at an average density of 55.4/sq mi (21.4/km²).  The racial makeup of the township was 97.50% [[White (U.S. Census)|White]], 0.28% [[African American (U.S. Census)|African American]], 0.68% [[Native American (U.S. Census)|Native American]], 0.28% [[Asian (U.S. Census)|Asian]], 0.11% from [[Race (United States Census)|other races]], and 1.14% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 1.19% of the population.
12 | 
13 | There were 766 households out of which 22.7% had children under the age of 18 living with them, 57.8% were [[Marriage|married couples]] living together, 8.4% had a female householder with no husband present, and 29.8% were non-families. 25.8% of all households were made up of individuals and 11.6% had someone living alone who was 65 years of age or older.  The average household size was 2.30 and the average family size was 2.71.
14 | 
15 | In the township the population was spread out with 20.4% under the age of 18, 5.1% from 18 to 24, 23.0% from 25 to 44, 31.6% from 45 to 64, and 19.9% who were 65 years of age or older.  The median age was 46 years. For every 100 females there were 98.9 males.  For every 100 females age 18 and over, there were 97.7 males.
16 | 
17 | The median income for a household in the township was $32,279, and the median income for a family was $38,304. Males had a median income of $31,066 versus $22,000 for females. The [[per capita income]] for the township was $18,146.  About 9.2% of families and 11.1% of the population were below the [[poverty line]], including 16.1% of those under age 18 and 5.5% of those age 65 or over.
18 | 
19 | </text>
20 | </document>
21 | 


--------------------------------------------------------------------------------
/data/load/document_4709.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Foreign_relations_of_Georgia</title>
 3 | <text>
 4 | 
 5 | [[Georgia (country)|Georgia]]&apos;s location, nestled between the [[Black Sea]], [[Russia]], and [[Turkey]], gives it strategic importance far beyond its size. It is developing as the gateway from the Black Sea to the [[Caucasus]] and the larger Caspian region, but also serves as a buffer between Russia and Turkey. Georgia has a long and close relationship with Russia, but it is reaching out to its other neighbors and looking to the West in search of alternatives and opportunities. It signed a partnership and cooperation agreement with the [[European Union]], participates in the [[Partnership for Peace]], and encourages foreign investment. [[France]], [[Germany]],  the [[United Kingdom]],and the [[United States]] all have embassies in [[Tbilisi]].
 6 | 
 7 | Georgia is a member of the [[United Nations]], the [[Council of Europe]], and the [[Organization for Security and Co-operation in Europe|OSCE]]
 8 | 
 9 | Because of its strategic location it is in both the [[Russia]]n and [[United States|American]] spheres of influence. In common with many ex-Soviet republics it is both influenced by and fearful of its larger neighbour. The invitation of US troops into the country caused tension with [[Moscow]]. The [[Russian government]] also believes that Georgia is being used by [[Chechnya|Chechen]] separatsts. The American government has interests in an oil pipeline passing through the country. Former president [[Eduard Shevardnadze]] attempted to balance these competing demands. The new leadership is much closer to the United States.
10 | 
11 | Disputes - international:
12 | Georgia relationships with Russia are at it lowest point in modern history due to [[2006 Georgian-Russian espionage controversy|Georgian-Russian espionage controversy]] and due to the [[2008 South Ossetia war]], Georgia broke off diplomatic relations with Russia and has left the [[Commonwealth of Independent States]].
13 | 
14 | </text>
15 | </document>
16 | 


--------------------------------------------------------------------------------
/data/load/document_47156.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Newkirk_Township,_Michigan</title>
 3 | <text>
 4 | Newkirk Township is a [[civil township]] of [[Lake County, Michigan|Lake County]] in the [[U.S. state]] of [[Michigan]]. As of the [[United States Census, 2000|2000 census]], the population was 719.
 5 | 
 6 | According to the [[United States Census Bureau]], the township has a total area of 72.8 square miles (188.6 km²), of which, 72.8 square miles (188.4 km²) of it is land and 0.1 square miles (0.2 km²) of it (0.11%) is water.
 7 | 
 8 | As of the [[census]] of 2000, there were 719 people, 302 households, and 199 families residing in the township.  The [[population density]] was 9.9 per square mile (3.8/km²).  There were 767 housing units at an average density of 10.5/sq mi (4.1/km²).  The racial makeup of the township was 93.74% [[White (U.S. Census)|White]], 0.14% [[African American (U.S. Census)|African American]], 2.64% [[Native American (U.S. Census)|Native American]], 0.14% [[Asian (U.S. Census)|Asian]], 0.42% from [[Race (United States Census)|other races]], and 2.92% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 2.09% of the population.
 9 | 
10 | There were 302 households out of which 25.8% had children under the age of 18 living with them, 53.0% were [[Marriage|married couples]] living together, 9.6% had a female householder with no husband present, and 34.1% were non-families. 30.1% of all households were made up of individuals and 14.2% had someone living alone who was 65 years of age or older.  The average household size was 2.38 and the average family size was 2.90.
11 | 
12 | In the township the population was spread out with 24.8% under the age of 18, 6.0% from 18 to 24, 24.1% from 25 to 44, 24.9% from 45 to 64, and 20.3% who were 65 years of age or older.  The median age was 43 years. For every 100 females there were 99.2 males.  For every 100 females age 18 and over, there were 105.7 males.
13 | 
14 | The median income for a household in the township was $23,636, and the median income for a family was $26,932. Males had a median income of $25,893 versus $19,464 for females. The [[per capita income]] for the township was $11,432.  Below the [[poverty line]] were 25.1% of people, 19.1% of families, 37.2% of those under 18 and 14.1% of those over 64.
15 | 
16 | </text>
17 | </document>
18 | 


--------------------------------------------------------------------------------
/data/load/document_47474.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Avery_Township,_Michigan</title>
 3 | <text>
 4 | Avery Township is a [[civil township]] of [[Montmorency County, Michigan|Montmorency County]] in the [[U.S. state]] of [[Michigan]].  As of the [[United States Census, 2000|2000 census]], the township population was 717.
 5 | 
 6 | According to the [[United States Census Bureau]], the township has a total area of 35.3 square miles (91.4 km²), of which, 35.0 square miles (90.7 km²) of it is land and 0.3 square miles (0.7 km²) of it (0.74%) is water.
 7 | 
 8 | As of the [[census]] of 2000, there were 717 people, 309 households, and 218 families residing in the township.  The [[population density]] was 20.5 per square mile (7.9/km²).  There were 646 housing units at an average density of 18.4/sq mi (7.1/km²).  The racial makeup of the township was 97.07% [[White (U.S. Census)|White]], 1.53% [[African American (U.S. Census)|African American]], 0.84% [[Native American (U.S. Census)|Native American]], and 0.56% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.42% of the population.
 9 | 
10 | There were 309 households out of which 22.0% had children under the age of 18 living with them, 59.5% were [[Marriage|married couples]] living together, 7.1% had a female householder with no husband present, and 29.4% were non-families. 23.9% of all households were made up of individuals and 14.6% had someone living alone who was 65 years of age or older.  The average household size was 2.32 and the average family size was 2.73.
11 | 
12 | In the township the population was spread out with 20.6% under the age of 18, 4.5% from 18 to 24, 20.9% from 25 to 44, 33.2% from 45 to 64, and 20.8% who were 65 years of age or older.  The median age was 48 years. For every 100 females there were 97.0 males.  For every 100 females age 18 and over, there were 93.5 males.
13 | 
14 | The median income for a household in the township was $27,723, and the median income for a family was $28,261. Males had a median income of $24,464 versus $15,833 for females. The [[per capita income]] for the township was $14,677.  About 11.0% of families and 17.4% of the population were below the [[poverty line]], including 33.1% of those under age 18 and 10.9% of those age 65 or over.
15 | 
16 | </text>
17 | </document>
18 | 


--------------------------------------------------------------------------------
/data/load/document_48177.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Seavey_Township,_Aitkin_County,_Minnesota</title>
 3 | <text>
 4 | Seavey Township is a township in [[Aitkin County, Minnesota|Aitkin County]], [[Minnesota]], [[United States]]. The population was 64 as of the [[2000 United States Census|2000 census]].
 5 | 
 6 | According to the [[United States Census Bureau]], the township has a total area of 36.3 square miles (94.1 km²), all of it land.
 7 | 
 8 | White Pine Township, Aitkin County, Minnesota|White Pine Township]] (northeast)
 9 | Pliny Township, Aitkin County, Minnesota|Pliny Township]] (east)
10 | Williams Township, Aitkin County, Minnesota|Williams Township]] (southeast)
11 | Idun Township, Aitkin County, Minnesota|Idun Township]] (south)
12 | East Side Township, Mille Lacs County, Minnesota|East Side Township, Mille Lacs County]] (southwest)
13 | Lakeside Township, Aitkin County, Minnesota|Lakeside Township]] (west)
14 | Malmo Township, Aitkin County, Minnesota|Malmo Township]] (northwest)
15 | 
16 | The township contains these two cemeteries: Holden and Ostlund.
17 | 
18 | As of the [[census]] of 2000, there were 64 people, 31 households, and 16 families residing in the township. The [[population density]] was 1.8 people per square mile (0.7/km²). There were 79 housing units at an average density of 2.2/sq mi (0.8/km²). The racial makeup of the township was 100.00% [[White (U.S. Census)|White]].
19 | 
20 | There were 31 households out of which 16.1% had children under the age of 18 living with them, 41.9% were [[Marriage|married couples]] living together, and 45.2% were non-families. 45.2% of all households were made up of individuals and 12.9% had someone living alone who was 65 years of age or older. The average household size was 2.06 and the average family size was 2.88.
21 | 
22 | In the township the population was spread out with 18.8% under the age of 18, 1.6% from 18 to 24, 29.7% from 25 to 44, 31.3% from 45 to 64, and 18.8% who were 65 years of age or older. The median age was 45 years. For every 100 females there were 137.0 males. For every 100 females age 18 and over, there were 136.4 males.
23 | 
24 | The median income for a household in the township was $30,000, and the median income for a family was $31,250. Males had a median income of $44,375 versus $11,250 for females. The [[per capita income]] for the township was $24,582. There were 15.0% of families and 21.1% of the population living below the [[poverty line]], including 28.6% of under eighteens and 23.8% of those over 64.
25 | 
26 | </text>
27 | </document>
28 | 


--------------------------------------------------------------------------------
/data/load/document_49683.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Bluffton_Township,_Otter_Tail_County,_Minnesota</title>
 3 | <text>
 4 | Bluffton Township is a township in [[Otter Tail County, Minnesota|Otter Tail County]], [[Minnesota]], [[United States]]. The population was 474 at the 2000 census.
 5 | 
 6 | According to the [[United States Census Bureau]], the township has a total area of 33.0 square miles (85.4 km²), of which, 33.0 square miles (85.4 km²) of it is land and 0.04 square miles (0.1 km²) of it (0.09%) is water.
 7 | 
 8 | As of the [[census]] of 2000, there were 474 people, 168 households, and 131 families residing in the township.  The [[population density]] was 14.4 people per square mile (5.6/km²).  There were 180 housing units at an average density of 5.5/sq mi (2.1/km²).  The racial makeup of the township was 99.16% [[White (U.S. Census)|White]], 0.21% [[Asian (U.S. Census)|Asian]], 0.21% from [[Race (United States Census)|other races]], and 0.42% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.42% of the population.
 9 | 
10 | There were 168 households out of which 39.3% had children under the age of 18 living with them, 67.3% were [[Marriage|married couples]] living together, 5.4% had a female householder with no husband present, and 22.0% were non-families. 16.1% of all households were made up of individuals and 7.7% had someone living alone who was 65 years of age or older.  The average household size was 2.82 and the average family size was 3.19.
11 | 
12 | In the township the population was spread out with 28.9% under the age of 18, 9.5% from 18 to 24, 24.7% from 25 to 44, 26.2% from 45 to 64, and 10.8% who were 65 years of age or older.  The median age was 38 years. For every 100 females there were 111.6 males.  For every 100 females age 18 and over, there were 114.6 males.
13 | 
14 | The median income for a household in the township was $45,179, and the median income for a family was $48,611. Males had a median income of $28,897 versus $17,143 for females. The [[per capita income]] for the township was $18,379.  About 5.8% of families and 8.3% of the population were below the [[poverty line]], including 13.3% of those under age 18 and 12.2% of those age 65 or over.
15 | 
16 | </text>
17 | </document>
18 | 


--------------------------------------------------------------------------------
/data/load/document_49819.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Munch_Township,_Pine_County,_Minnesota</title>
 3 | <text>
 4 | Munch Township is a township in [[Pine County, Minnesota|Pine County]], [[Minnesota]], [[United States]]. The population was 222 at the 2000 census.
 5 | 
 6 | According to the [[United States Census Bureau]], the township has a total area of 36.1 square miles (93.4 km²), of which, 35.5 square miles (91.9 km²) of it is land and 0.6 square miles (1.5 km²) of it (1.58%) is water.
 7 | 
 8 | As of the [[census]] of 2000, there were 222 people, 97 households, and 65 families residing in the township.  The [[population density]] was 6.3 people per square mile (2.4/km²).  There were 168 housing units at an average density of 4.7/sq mi (1.8/km²).  The racial makeup of the township was 96.85% [[White (U.S. Census)|White]], 1.80% [[Native American (U.S. Census)|Native American]], and 1.35% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.45% of the population.
 9 | 
10 | There were 97 households out of which 20.6% had children under the age of 18 living with them, 55.7% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 32.0% were non-families. 26.8% of all households were made up of individuals and 7.2% had someone living alone who was 65 years of age or older.  The average household size was 2.29 and the average family size was 2.73.
11 | 
12 | In the township the population was spread out with 18.9% under the age of 18, 5.0% from 18 to 24, 32.0% from 25 to 44, 28.8% from 45 to 64, and 15.3% who were 65 years of age or older.  The median age was 41 years. For every 100 females there were 101.8 males.  For every 100 females age 18 and over, there were 104.5 males.
13 | 
14 | The median income for a household in the township was $34,375, and the median income for a family was $35,938. Males had a median income of $23,125 versus $22,917 for females. The [[per capita income]] for the township was $19,588.  About 7.2% of families and 6.7% of the population were below the [[poverty line]], including 3.8% of those under the age of eighteen and none of those sixty five or over.
15 | 
16 | </text>
17 | </document>
18 | 


--------------------------------------------------------------------------------
/data/load/document_51033.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Bosworth,_Missouri</title>
 3 | <text>
 4 | Bosworth is a city in [[Carroll County, Missouri|Carroll County]], [[Missouri]], [[United States]]. The population was 382 at the 2000 census.
 5 | 
 6 | Bosworth is located at  (39.470448, -93.334797).
 7 | 
 8 | According to the [[United States Census Bureau]], the city has a total area of 0.6 square miles (1.4 km²), all of it land.
 9 | 
10 | As of the [[census]] of 2000, there were 382 people, 153 households, and 102 families residing in the city. The [[population density]] was 691.0 people per square mile (268.2/km²). There were 195 housing units at an average density of 352.7/sq mi (136.9/km²). The racial makeup of the city was 100.00% [[White (U.S. Census)|White]].
11 | 
12 | There were 153 households out of which 30.7% had children under the age of 18 living with them, 56.9% were [[Marriage|married couples]] living together, 6.5% had a female householder with no husband present, and 32.7% were non-families. 26.8% of all households were made up of individuals and 16.3% had someone living alone who was 65 years of age or older. The average household size was 2.50 and the average family size was 3.09.
13 | 
14 | In the city the population was spread out with 29.1% under the age of 18, 6.8% from 18 to 24, 24.6% from 25 to 44, 20.2% from 45 to 64, and 19.4% who were 65 years of age or older. The median age was 37 years. For every 100 females there were 101.1 males. For every 100 females age 18 and over, there were 97.8 males.
15 | 
16 | The median income for a household in the city was $25,357, and the median income for a family was $28,750. Males had a median income of $23,250 versus $17,292 for females. The [[per capita income]] for the city was $11,526. About 11.7% of families and 15.4% of the population were below the [[poverty line]], including 23.5% of those under age 18 and 6.3% of those age 65 or over.
17 | 
18 | [[Barbara Marx]], who was first married to [[Zeppo Marx]], one of the [[Marx Brothers]] (from 1959 until their divorce in 1973), then to [[Frank Sinatra]] (from 1976 until his death in 1998), was born in Bosworth.
19 | 
20 | </text>
21 | </document>
22 | 


--------------------------------------------------------------------------------
/data/load/document_51679.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Velda_City,_Missouri</title>
 3 | <text>
 4 | Velda City is a city in [[St. Louis County, Missouri|St. Louis County]], [[Missouri]], [[United States]]. The population was 1,616 at the 2000 census.
 5 | 
 6 | Velda City is located at  (38.694051, -90.294163).
 7 | 
 8 | According to the [[United States Census Bureau]], the city has a total area of 0.2 square miles (0.4 km²), all of it land.
 9 | 
10 | Velda City is the most densely populated incorporated area in [[St. Louis County, Missouri|St. Louis County]] with 9,872.8 people per square mile.
11 | 
12 | As of the [[census]] of 2000, there were 1,616 people, 610 households, and 428 families residing in the city. The [[population density]] was 9,872.8 people per square mile (3,899.6/km²). There were 670 housing units at an average density of 4,093.3/sq mi (1,616.8/km²). The racial makeup of the city was 3.09% [[White (U.S. Census)|White]], 96.04% [[African American (U.S. Census)|African American]], and 0.87% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.87% of the population.
13 | 
14 | There were 610 households out of which 28.4% had children under the age of 18 living with them, 29.5% were [[Marriage|married couples]] living together, 35.9% had a female householder with no husband present, and 29.7% were non-families. 27.5% of all households were made up of individuals and 9.0% had someone living alone who was 65 years of age or older. The average household size was 2.65 and the average family size was 3.20.
15 | 
16 | In the city the population was spread out with 27.0% under the age of 18, 9.4% from 18 to 24, 26.5% from 25 to 44, 24.6% from 45 to 64, and 12.5% who were 65 years of age or older. The median age was 36 years. For every 100 females there were 76.8 males. For every 100 females age 18 and over, there were 68.7 males.
17 | 
18 | The median income for a household in the city was $30,000, and the median income for a family was $31,652. Males had a median income of $27,768 versus $26,083 for females. The [[per capita income]] for the city was $15,009. About 15.9% of families and 17.5% of the population were below the [[poverty line]], including 19.8% of those under age 18 and 23.3% of those age 65 or over.
19 | 
20 | </text>
21 | </document>
22 | 


--------------------------------------------------------------------------------
/data/load/document_51756.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Sheridan,_Missouri</title>
 3 | <text>
 4 | Sheridan is a city in [[Worth County, Missouri|Worth County]], [[Missouri]], [[United States]], near the [[Platte River (Missouri)|Platte River]]. The population was 185 at the 2000 census, at which time it was a town.
 5 | 
 6 | Sheridan is located at  (40.517355, -94.614890).
 7 | 
 8 | According to the [[United States Census Bureau]], the town has a total area of 0.2 square miles (0.5 km²), all of it land.
 9 | 
10 | As of the [[census]] of 2000, there were 185 people, 97 households, and 51 families residing in the town. The [[population density]] was 954.9 people per square mile (375.9/km²). There were 120 housing units at an average density of 619.4/sq mi (243.9/km²). The racial makeup of the town was 98.92% [[White (U.S. Census)|White]], and 1.08% from two or more races.
11 | 
12 | There were 97 households out of which 18.6% had children under the age of 18 living with them, 43.3% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 46.4% were non-families. 43.3% of all households were made up of individuals and 28.9% had someone living alone who was 65 years of age or older. The average household size was 1.91 and the average family size was 2.54.
13 | 
14 | In the town the population was spread out with 18.4% under the age of 18, 5.4% from 18 to 24, 22.2% from 25 to 44, 23.8% from 45 to 64, and 30.3% who were 65 years of age or older. The median age was 49 years. For every 100 females there were 72.9 males. For every 100 females age 18 and over, there were 71.6 males.
15 | 
16 | The median income for a household in the town was $20,357, and the median income for a family was $25,750. Males had a median income of $24,583 versus $14,375 for females. The [[per capita income]] for the town was $12,162. About 4.4% of families and 13.5% of the population were below the [[poverty line]], including 6.1% of those under the age of eighteen and 14.5% of those sixty five or over.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_53100.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Newtown,_Missouri</title>
 3 | <text>
 4 | Newtown is a city in [[Sullivan County, Missouri|Sullivan County]], [[Missouri]], [[United States]]. The population was 209 at the [[United States Census, 2000|2000 census]], at which time it was a town.
 5 | 
 6 | Newtown is located at  (40.375879, -93.334578).
 7 | 
 8 | According to the [[United States Census Bureau]], the town has a total area of 0.2 square miles (0.6 km²), all of it land.
 9 | 
10 | As of the [[census]] of 2000, there were 209 people, 86 households, and 55 families residing in the town. The [[population density]] was 826.0 people per square mile (322.8/km²). There were 93 housing units at an average density of 367.5/sq mi (143.6/km²). The racial makeup of the town was 98.56% [[White (U.S. Census)|White]], 0.48% [[Native American (U.S. Census)|Native American]], 0.48% from [[Race (United States Census)|other races]], and 0.48% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 4.31% of the population.
11 | 
12 | There were 86 households out of which 36.0% had children under the age of 18 living with them, 50.0% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 36.0% were non-families. 33.7% of all households were made up of individuals and 14.0% had someone living alone who was 65 years of age or older. The average household size was 2.43 and the average family size was 3.16.
13 | 
14 | In the town the population was spread out with 32.1% under the age of 18, 9.1% from 18 to 24, 26.3% from 25 to 44, 17.7% from 45 to 64, and 14.8% who were 65 years of age or older. The median age was 32 years. For every 100 females there were 91.7 males. For every 100 females age 18 and over, there were 86.8 males.
15 | 
16 | The median income for a household in the town was $21,250, and the median income for a family was $43,125. Males had a median income of $23,000 versus $19,625 for females. The [[per capita income]] for the town was $13,561. About 3.2% of families and 10.6% of the population were below the [[poverty line]], including 7.8% of those under the age of eighteen and 25.5% of those sixty five or over.
17 | 
18 | </text>
19 | </document>
20 | 


--------------------------------------------------------------------------------
/data/load/document_9566.txt:
--------------------------------------------------------------------------------
 1 | <document>
 2 | <title>Timeline_of_programming_languages</title>
 3 | <text>
 4 | This is a [[timeline]] of historically important [[programming language]]s.
 5 | 
 6 | Legend
 7 |  ( Entry ) means a non-universal programming language
 8 |  * means a unique language (no direct predecessor)
 9 | 
10 | </text>
11 | </document>
12 | 


--------------------------------------------------------------------------------
/data/lorem_ipsum.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque eget sem augue, non auctor nunc. Nam tincidunt tellus eu lorem mollis non tempus felis vestibulum. Sed diam tortor, euismod id ultricies eget, lacinia sit amet velit. Curabitur volutpat felis ligula. Maecenas ac est ipsum, in mollis lorem. Etiam nunc nunc, viverra nec imperdiet id, lacinia at nunc. Donec molestie ornare adipiscing. Nam elit tortor, lobortis vitae aliquam id, faucibus eu lacus. Cras arcu sapien, lacinia id hendrerit vel, viverra in elit. Cras sed ipsum lacus, non mollis sem. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Curabitur vel bibendum massa. Vivamus adipiscing condimentum vulputate.
2 | 


--------------------------------------------------------------------------------
/data/readme.txt:
--------------------------------------------------------------------------------
1 | Generating a big file :)
2 | 
3 | for i in {1..10000}; do cat lorem_ipsum.txt >> big.txt; done
4 | 
5 | 


--------------------------------------------------------------------------------
/docroot/post.htm:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <body>
 3 | 
 4 | 
 5 | Post data internally
 6 | <form method="POST" action="/index">
 7 | 	<textarea name="data" rows="20" cols="100">
 8 | 	</textarea>
 9 | 	<br />
10 | 	<input type="submit" />
11 | </form>
12 | 
13 | 
14 | Post data externally
15 | <form method="POST" action="http://www.hashemian.com/tools/form-post-tester.php">
16 | 	<textarea name="data" rows="10" cols="100">
17 | 	</textarea>
18 | 	<br />
19 | 	<input type="submit" />
20 | </form>
21 | 
22 | </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/leveldb/AUTHORS:
--------------------------------------------------------------------------------
 1 | # Names should be added to this file like so:
 2 | # Name or Organization <email address>
 3 | 
 4 | Google Inc.
 5 | 
 6 | # Initial version authors:
 7 | Jeffrey Dean <jeff@google.com>
 8 | Sanjay Ghemawat <sanjay@google.com>
 9 | 
10 | # Partial list of contributors:
11 | Kevin Regan <kevin.d.regan@gmail.com>
12 | Johan Bilien <jobi@litl.com>
13 | 


--------------------------------------------------------------------------------
/leveldb/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/leveldb/NEWS:
--------------------------------------------------------------------------------
 1 | Release 1.2 2011-05-16
 2 | ----------------------
 3 | 
 4 | Fixes for larger databases (tested up to one billion 100-byte entries,
 5 | i.e., ~100GB).
 6 | 
 7 | (1) Place hard limit on number of level-0 files.  This fixes errors
 8 | of the form "too many open files".
 9 | 
10 | (2) Fixed memtable management.  Before the fix, a heavy write burst
11 | could cause unbounded memory usage.
12 | 
13 | A fix for a logging bug where the reader would incorrectly complain
14 | about corruption.
15 | 
16 | Allow public access to WriteBatch contents so that users can easily
17 | wrap a DB.
18 | 


--------------------------------------------------------------------------------
/leveldb/README:
--------------------------------------------------------------------------------
 1 | leveldb: A key-value store
 2 | Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
 3 | 
 4 | The code under this directory implements a system for maintaining a
 5 | persistent key/value store.
 6 | 
 7 | See doc/index.html for more explanation.
 8 | See doc/impl.html for a brief overview of the implementation.
 9 | 
10 | The public interface is in include/*.h.  Callers should not include or
11 | rely on the details of any other header files in this package.  Those
12 | internal APIs may be changed without warning.
13 | 
14 | Guide to header files:
15 | 
16 | include/db.h
17 |     Main interface to the DB: Start here
18 | 
19 | include/options.h
20 |     Control over the behavior of an entire database, and also
21 |     control over the behavior of individual reads and writes.
22 | 
23 | include/comparator.h
24 |     Abstraction for user-specified comparison function.  If you want
25 |     just bytewise comparison of keys, you can use the default comparator,
26 |     but clients can write their own comparator implementations if they
27 |     want custom ordering (e.g. to handle different character
28 |     encodings, etc.)
29 | 
30 | include/iterator.h
31 |     Interface for iterating over data. You can get an iterator
32 |     from a DB object.
33 | 
34 | include/write_batch.h
35 |     Interface for atomically applying multiple updates to a database.
36 | 
37 | include/slice.h
38 |     A simple module for maintaining a pointer and a length into some
39 |     other byte array.
40 | 
41 | include/status.h
42 |     Status is returned from many of the public interfaces and is used
43 |     to report success and various kinds of errors.
44 | 
45 | include/env.h
46 |     Abstraction of the OS environment.  A posix implementation of
47 |     this interface is in util/env_posix.cc
48 | 
49 | include/table.h
50 | include/table_builder.h
51 |     Lower-level modules that most clients probably won't use directly
52 | 


--------------------------------------------------------------------------------
/leveldb/TODO:
--------------------------------------------------------------------------------
 1 | ss
 2 | - Stats
 3 | 
 4 | db
 5 | - Maybe implement DB::BulkDeleteForRange(start_key, end_key)
 6 |   that would blow away files whose ranges are entirely contained
 7 |   within [start_key..end_key]?  For Chrome, deletion of obsolete
 8 |   object stores, etc. can be done in the background anyway, so
 9 |   probably not that important.
10 | - There have been requests for MultiGet.
11 | 
12 | After a range is completely deleted, what gets rid of the
13 | corresponding files if we do no future changes to that range.  Make
14 | the conditions for triggering compactions fire in more situations?
15 | 


--------------------------------------------------------------------------------
/leveldb/db/builder.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_DB_BUILDER_H_
 6 | #define STORAGE_LEVELDB_DB_BUILDER_H_
 7 | 
 8 | #include "leveldb/status.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | struct Options;
13 | struct FileMetaData;
14 | 
15 | class Env;
16 | class Iterator;
17 | class TableCache;
18 | class VersionEdit;
19 | 
20 | // Build a Table file from the contents of *iter.  The generated file
21 | // will be named according to meta->number.  On success, the rest of
22 | // *meta will be filled with metadata about the generated table.
23 | // If no data is present in *iter, meta->file_size will be set to
24 | // zero, and no Table file will be produced.
25 | extern Status BuildTable(const std::string& dbname,
26 |                          Env* env,
27 |                          const Options& options,
28 |                          TableCache* table_cache,
29 |                          Iterator* iter,
30 |                          FileMetaData* meta);
31 | 
32 | }  // namespace leveldb
33 | 
34 | #endif  // STORAGE_LEVELDB_DB_BUILDER_H_
35 | 


--------------------------------------------------------------------------------
/leveldb/db/db_iter.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_DB_DB_ITER_H_
 6 | #define STORAGE_LEVELDB_DB_DB_ITER_H_
 7 | 
 8 | #include <stdint.h>
 9 | #include "leveldb/db.h"
10 | #include "db/dbformat.h"
11 | 
12 | namespace leveldb {
13 | 
14 | class DBImpl;
15 | 
16 | // Return a new iterator that converts internal keys (yielded by
17 | // "*internal_iter") that were live at the specified "sequence" number
18 | // into appropriate user keys.
19 | extern Iterator* NewDBIterator(
20 |     DBImpl* db,
21 |     const Comparator* user_key_comparator,
22 |     Iterator* internal_iter,
23 |     SequenceNumber sequence,
24 |     uint32_t seed);
25 | 
26 | }  // namespace leveldb
27 | 
28 | #endif  // STORAGE_LEVELDB_DB_DB_ITER_H_
29 | 


--------------------------------------------------------------------------------
/leveldb/db/log_format.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // Log format information shared by reader and writer.
 6 | // See ../doc/log_format.txt for more detail.
 7 | 
 8 | #ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_
 9 | #define STORAGE_LEVELDB_DB_LOG_FORMAT_H_
10 | 
11 | namespace leveldb {
12 | namespace log {
13 | 
14 | enum RecordType {
15 |   // Zero is reserved for preallocated files
16 |   kZeroType = 0,
17 | 
18 |   kFullType = 1,
19 | 
20 |   // For fragments
21 |   kFirstType = 2,
22 |   kMiddleType = 3,
23 |   kLastType = 4
24 | };
25 | static const int kMaxRecordType = kLastType;
26 | 
27 | static const int kBlockSize = 32768;
28 | 
29 | // Header is checksum (4 bytes), type (1 byte), length (2 bytes).
30 | static const int kHeaderSize = 4 + 1 + 2;
31 | 
32 | }  // namespace log
33 | }  // namespace leveldb
34 | 
35 | #endif  // STORAGE_LEVELDB_DB_LOG_FORMAT_H_
36 | 


--------------------------------------------------------------------------------
/leveldb/db/log_writer.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
 6 | #define STORAGE_LEVELDB_DB_LOG_WRITER_H_
 7 | 
 8 | #include <stdint.h>
 9 | #include "db/log_format.h"
10 | #include "leveldb/slice.h"
11 | #include "leveldb/status.h"
12 | 
13 | namespace leveldb {
14 | 
15 | class WritableFile;
16 | 
17 | namespace log {
18 | 
19 | class Writer {
20 |  public:
21 |   // Create a writer that will append data to "*dest".
22 |   // "*dest" must be initially empty.
23 |   // "*dest" must remain live while this Writer is in use.
24 |   explicit Writer(WritableFile* dest);
25 |   ~Writer();
26 | 
27 |   Status AddRecord(const Slice& slice);
28 | 
29 |  private:
30 |   WritableFile* dest_;
31 |   int block_offset_;       // Current offset in block
32 | 
33 |   // crc32c values for all supported record types.  These are
34 |   // pre-computed to reduce the overhead of computing the crc of the
35 |   // record type stored in the header.
36 |   uint32_t type_crc_[kMaxRecordType + 1];
37 | 
38 |   Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
39 | 
40 |   // No copying allowed
41 |   Writer(const Writer&);
42 |   void operator=(const Writer&);
43 | };
44 | 
45 | }  // namespace log
46 | }  // namespace leveldb
47 | 
48 | #endif  // STORAGE_LEVELDB_DB_LOG_WRITER_H_
49 | 


--------------------------------------------------------------------------------
/leveldb/db/snapshot.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_
 6 | #define STORAGE_LEVELDB_DB_SNAPSHOT_H_
 7 | 
 8 | #include "leveldb/db.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | class SnapshotList;
13 | 
14 | // Snapshots are kept in a doubly-linked list in the DB.
15 | // Each SnapshotImpl corresponds to a particular sequence number.
16 | class SnapshotImpl : public Snapshot {
17 |  public:
18 |   SequenceNumber number_;  // const after creation
19 | 
20 |  private:
21 |   friend class SnapshotList;
22 | 
23 |   // SnapshotImpl is kept in a doubly-linked circular list
24 |   SnapshotImpl* prev_;
25 |   SnapshotImpl* next_;
26 | 
27 |   SnapshotList* list_;                 // just for sanity checks
28 | };
29 | 
30 | class SnapshotList {
31 |  public:
32 |   SnapshotList() {
33 |     list_.prev_ = &list_;
34 |     list_.next_ = &list_;
35 |   }
36 | 
37 |   bool empty() const { return list_.next_ == &list_; }
38 |   SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
39 |   SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
40 | 
41 |   const SnapshotImpl* New(SequenceNumber seq) {
42 |     SnapshotImpl* s = new SnapshotImpl;
43 |     s->number_ = seq;
44 |     s->list_ = this;
45 |     s->next_ = &list_;
46 |     s->prev_ = list_.prev_;
47 |     s->prev_->next_ = s;
48 |     s->next_->prev_ = s;
49 |     return s;
50 |   }
51 | 
52 |   void Delete(const SnapshotImpl* s) {
53 |     assert(s->list_ == this);
54 |     s->prev_->next_ = s->next_;
55 |     s->next_->prev_ = s->prev_;
56 |     delete s;
57 |   }
58 | 
59 |  private:
60 |   // Dummy head of doubly-linked list of snapshots
61 |   SnapshotImpl list_;
62 | };
63 | 
64 | }  // namespace leveldb
65 | 
66 | #endif  // STORAGE_LEVELDB_DB_SNAPSHOT_H_
67 | 


--------------------------------------------------------------------------------
/leveldb/db/table_cache.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // Thread-safe (provides internal synchronization)
 6 | 
 7 | #ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_
 8 | #define STORAGE_LEVELDB_DB_TABLE_CACHE_H_
 9 | 
10 | #include <string>
11 | #include <stdint.h>
12 | #include "db/dbformat.h"
13 | #include "leveldb/cache.h"
14 | #include "leveldb/table.h"
15 | #include "port/port.h"
16 | 
17 | namespace leveldb {
18 | 
19 | class Env;
20 | 
21 | class TableCache {
22 |  public:
23 |   TableCache(const std::string& dbname, const Options* options, int entries);
24 |   ~TableCache();
25 | 
26 |   // Return an iterator for the specified file number (the corresponding
27 |   // file length must be exactly "file_size" bytes).  If "tableptr" is
28 |   // non-NULL, also sets "*tableptr" to point to the Table object
29 |   // underlying the returned iterator, or NULL if no Table object underlies
30 |   // the returned iterator.  The returned "*tableptr" object is owned by
31 |   // the cache and should not be deleted, and is valid for as long as the
32 |   // returned iterator is live.
33 |   Iterator* NewIterator(const ReadOptions& options,
34 |                         uint64_t file_number,
35 |                         uint64_t file_size,
36 |                         Table** tableptr = NULL);
37 | 
38 |   // If a seek to internal key "k" in specified file finds an entry,
39 |   // call (*handle_result)(arg, found_key, found_value).
40 |   Status Get(const ReadOptions& options,
41 |              uint64_t file_number,
42 |              uint64_t file_size,
43 |              const Slice& k,
44 |              void* arg,
45 |              void (*handle_result)(void*, const Slice&, const Slice&));
46 | 
47 |   // Evict any entry for the specified file number
48 |   void Evict(uint64_t file_number);
49 | 
50 |  private:
51 |   Env* const env_;
52 |   const std::string dbname_;
53 |   const Options* options_;
54 |   Cache* cache_;
55 | 
56 |   Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
57 | };
58 | 
59 | }  // namespace leveldb
60 | 
61 | #endif  // STORAGE_LEVELDB_DB_TABLE_CACHE_H_
62 | 


--------------------------------------------------------------------------------
/leveldb/db/version_edit_test.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "db/version_edit.h"
 6 | #include "util/testharness.h"
 7 | 
 8 | namespace leveldb {
 9 | 
10 | static void TestEncodeDecode(const VersionEdit& edit) {
11 |   std::string encoded, encoded2;
12 |   edit.EncodeTo(&encoded);
13 |   VersionEdit parsed;
14 |   Status s = parsed.DecodeFrom(encoded);
15 |   ASSERT_TRUE(s.ok()) << s.ToString();
16 |   parsed.EncodeTo(&encoded2);
17 |   ASSERT_EQ(encoded, encoded2);
18 | }
19 | 
20 | class VersionEditTest { };
21 | 
22 | TEST(VersionEditTest, EncodeDecode) {
23 |   static const uint64_t kBig = 1ull << 50;
24 | 
25 |   VersionEdit edit;
26 |   for (int i = 0; i < 4; i++) {
27 |     TestEncodeDecode(edit);
28 |     edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
29 |                  InternalKey("foo", kBig + 500 + i, kTypeValue),
30 |                  InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
31 |     edit.DeleteFile(4, kBig + 700 + i);
32 |     edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
33 |   }
34 | 
35 |   edit.SetComparatorName("foo");
36 |   edit.SetLogNumber(kBig + 100);
37 |   edit.SetNextFile(kBig + 200);
38 |   edit.SetLastSequence(kBig + 1000);
39 |   TestEncodeDecode(edit);
40 | }
41 | 
42 | }  // namespace leveldb
43 | 
44 | int main(int argc, char** argv) {
45 |   return leveldb::test::RunAllTests();
46 | }
47 | 


--------------------------------------------------------------------------------
/leveldb/db/write_batch_internal.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
 6 | #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
 7 | 
 8 | #include "leveldb/write_batch.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | class MemTable;
13 | 
14 | // WriteBatchInternal provides static methods for manipulating a
15 | // WriteBatch that we don't want in the public WriteBatch interface.
16 | class WriteBatchInternal {
17 |  public:
18 |   // Return the number of entries in the batch.
19 |   static int Count(const WriteBatch* batch);
20 | 
21 |   // Set the count for the number of entries in the batch.
22 |   static void SetCount(WriteBatch* batch, int n);
23 | 
24 |   // Return the seqeunce number for the start of this batch.
25 |   static SequenceNumber Sequence(const WriteBatch* batch);
26 | 
27 |   // Store the specified number as the seqeunce number for the start of
28 |   // this batch.
29 |   static void SetSequence(WriteBatch* batch, SequenceNumber seq);
30 | 
31 |   static Slice Contents(const WriteBatch* batch) {
32 |     return Slice(batch->rep_);
33 |   }
34 | 
35 |   static size_t ByteSize(const WriteBatch* batch) {
36 |     return batch->rep_.size();
37 |   }
38 | 
39 |   static void SetContents(WriteBatch* batch, const Slice& contents);
40 | 
41 |   static Status InsertInto(const WriteBatch* batch, MemTable* memtable);
42 | 
43 |   static void Append(WriteBatch* dst, const WriteBatch* src);
44 | };
45 | 
46 | }  // namespace leveldb
47 | 
48 | 
49 | #endif  // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
50 | 


--------------------------------------------------------------------------------
/leveldb/doc/doc.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin-left: 0.5in;
 3 |   margin-right: 0.5in;
 4 |   background: white;
 5 |   color: black;
 6 | }
 7 | 
 8 | h1 {
 9 |   margin-left: -0.2in;
10 |   font-size: 14pt;
11 | }
12 | h2 {
13 |   margin-left: -0in;
14 |   font-size: 12pt;
15 | }
16 | h3 {
17 |   margin-left: -0in;
18 | }
19 | h4 {
20 |   margin-left: -0in;
21 | }
22 | hr {
23 |   margin-left: -0in;
24 | }
25 | 
26 | /* Definition lists: definition term bold */
27 | dt {
28 |   font-weight: bold;
29 | }
30 | 
31 | address {
32 |   text-align: center;
33 | }
34 | code,samp,var {
35 |   color: blue;
36 | }
37 | kbd {
38 |   color: #600000;
39 | }
40 | div.note p {
41 |   float: right;
42 |   width: 3in;
43 |   margin-right: 0%;
44 |   padding: 1px;
45 |   border: 2px solid #6060a0;
46 |   background-color: #fffff0;
47 | }
48 | 
49 | ul {
50 |   margin-top: -0em;
51 |   margin-bottom: -0em;
52 | }
53 | 
54 | ol {
55 |   margin-top: -0em;
56 |   margin-bottom: -0em;
57 | }
58 | 
59 | UL.nobullets {
60 |   list-style-type: none;
61 |   list-style-image: none;
62 |   margin-left: -1em;
63 | }
64 | 
65 | p {
66 |   margin: 1em 0 1em 0;
67 |   padding: 0 0 0 0;
68 | }
69 | 
70 | pre {
71 |   line-height: 1.3em;
72 |   padding: 0.4em 0 0.8em 0;
73 |   margin:  0 0 0 0;
74 |   border:  0 0 0 0;
75 |   color: blue;
76 | }
77 | 
78 | .datatable {
79 |   margin-left: auto;
80 |   margin-right: auto;
81 |   margin-top: 2em;
82 |   margin-bottom: 2em;
83 |   border: 1px solid;
84 | }
85 | 
86 | .datatable td,th {
87 |   padding: 0 0.5em 0 0.5em;
88 |   text-align: right;
89 | }
90 | 


--------------------------------------------------------------------------------
/leveldb/helpers/memenv/memenv.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
 6 | #define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
 7 | 
 8 | namespace leveldb {
 9 | 
10 | class Env;
11 | 
12 | // Returns a new environment that stores its data in memory and delegates
13 | // all non-file-storage tasks to base_env. The caller must delete the result
14 | // when it is no longer needed.
15 | // *base_env must remain live while the result is in use.
16 | Env* NewMemEnv(Env* base_env);
17 | 
18 | }  // namespace leveldb
19 | 
20 | #endif  // STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_
21 | 


--------------------------------------------------------------------------------
/leveldb/include/leveldb/write_batch.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // WriteBatch holds a collection of updates to apply atomically to a DB.
 6 | //
 7 | // The updates are applied in the order in which they are added
 8 | // to the WriteBatch.  For example, the value of "key" will be "v3"
 9 | // after the following batch is written:
10 | //
11 | //    batch.Put("key", "v1");
12 | //    batch.Delete("key");
13 | //    batch.Put("key", "v2");
14 | //    batch.Put("key", "v3");
15 | //
16 | // Multiple threads can invoke const methods on a WriteBatch without
17 | // external synchronization, but if any of the threads may call a
18 | // non-const method, all threads accessing the same WriteBatch must use
19 | // external synchronization.
20 | 
21 | #ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
22 | #define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
23 | 
24 | #include <string>
25 | #include "leveldb/status.h"
26 | 
27 | namespace leveldb {
28 | 
29 | class Slice;
30 | 
31 | class WriteBatch {
32 |  public:
33 |   WriteBatch();
34 |   ~WriteBatch();
35 | 
36 |   // Store the mapping "key->value" in the database.
37 |   void Put(const Slice& key, const Slice& value);
38 | 
39 |   // If the database contains a mapping for "key", erase it.  Else do nothing.
40 |   void Delete(const Slice& key);
41 | 
42 |   // Clear all updates buffered in this batch.
43 |   void Clear();
44 | 
45 |   // Support for iterating over the contents of a batch.
46 |   class Handler {
47 |    public:
48 |     virtual ~Handler();
49 |     virtual void Put(const Slice& key, const Slice& value) = 0;
50 |     virtual void Delete(const Slice& key) = 0;
51 |   };
52 |   Status Iterate(Handler* handler) const;
53 | 
54 |  private:
55 |   friend class WriteBatchInternal;
56 | 
57 |   std::string rep_;  // See comment in write_batch.cc for the format of rep_
58 | 
59 |   // Intentionally copyable
60 | };
61 | 
62 | }  // namespace leveldb
63 | 
64 | #endif  // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
65 | 


--------------------------------------------------------------------------------
/leveldb/issues/issue200_test.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | // Test for issue 200: when iterator switches direction from backward
 6 | // to forward, the current key can be yielded unexpectedly if a new
 7 | // mutation has been added just before the current key.
 8 | 
 9 | #include "leveldb/db.h"
10 | #include "util/testharness.h"
11 | 
12 | namespace leveldb {
13 | 
14 | class Issue200 { };
15 | 
16 | TEST(Issue200, Test) {
17 |   // Get rid of any state from an old run.
18 |   std::string dbpath = test::TmpDir() + "/leveldb_issue200_test";
19 |   DestroyDB(dbpath, Options());
20 | 
21 |   DB *db;
22 |   Options options;
23 |   options.create_if_missing = true;
24 |   ASSERT_OK(DB::Open(options, dbpath, &db));
25 | 
26 |   WriteOptions write_options;
27 |   ASSERT_OK(db->Put(write_options, "1", "b"));
28 |   ASSERT_OK(db->Put(write_options, "2", "c"));
29 |   ASSERT_OK(db->Put(write_options, "3", "d"));
30 |   ASSERT_OK(db->Put(write_options, "4", "e"));
31 |   ASSERT_OK(db->Put(write_options, "5", "f"));
32 | 
33 |   ReadOptions read_options;
34 |   Iterator *iter = db->NewIterator(read_options);
35 | 
36 |   // Add an element that should not be reflected in the iterator.
37 |   ASSERT_OK(db->Put(write_options, "25", "cd"));
38 | 
39 |   iter->Seek("5");
40 |   ASSERT_EQ(iter->key().ToString(), "5");
41 |   iter->Prev();
42 |   ASSERT_EQ(iter->key().ToString(), "4");
43 |   iter->Prev();
44 |   ASSERT_EQ(iter->key().ToString(), "3");
45 |   iter->Next();
46 |   ASSERT_EQ(iter->key().ToString(), "4");
47 |   iter->Next();
48 |   ASSERT_EQ(iter->key().ToString(), "5");
49 | 
50 |   delete iter;
51 |   delete db;
52 |   DestroyDB(dbpath, options);
53 | }
54 | 
55 | }  // namespace leveldb
56 | 
57 | int main(int argc, char** argv) {
58 |   return leveldb::test::RunAllTests();
59 | }
60 | 


--------------------------------------------------------------------------------
/leveldb/port/README:
--------------------------------------------------------------------------------
 1 | This directory contains interfaces and implementations that isolate the
 2 | rest of the package from platform details.
 3 | 
 4 | Code in the rest of the package includes "port.h" from this directory.
 5 | "port.h" in turn includes a platform specific "port_<platform>.h" file
 6 | that provides the platform specific implementation.
 7 | 
 8 | See port_posix.h for an example of what must be provided in a platform
 9 | specific header file.
10 | 
11 | 


--------------------------------------------------------------------------------
/leveldb/port/port.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_PORT_PORT_H_
 6 | #define STORAGE_LEVELDB_PORT_PORT_H_
 7 | 
 8 | #include <string.h>
 9 | 
10 | // Include the appropriate platform specific file below.  If you are
11 | // porting to a new platform, see "port_example.h" for documentation
12 | // of what the new port_<platform>.h file must provide.
13 | #if defined(LEVELDB_PLATFORM_POSIX)
14 | #  include "port/port_posix.h"
15 | #elif defined(LEVELDB_PLATFORM_CHROMIUM)
16 | #  include "port/port_chromium.h"
17 | #endif
18 | 
19 | #endif  // STORAGE_LEVELDB_PORT_PORT_H_
20 | 


--------------------------------------------------------------------------------
/leveldb/port/port_posix.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "port/port_posix.h"
 6 | 
 7 | #include <cstdlib>
 8 | #include <stdio.h>
 9 | #include <string.h>
10 | #include "util/logging.h"
11 | #include <sys/time.h>
12 | namespace leveldb {
13 | namespace port {
14 | 
15 | static void PthreadCall(const char* label, int result) {
16 |   if (result != 0) {
17 |     fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
18 |     abort();
19 |   }
20 | }
21 | 
22 | Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); }
23 | 
24 | Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
25 | 
26 | void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); }
27 | 
28 | void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); }
29 | 
30 | CondVar::CondVar(Mutex* mu)
31 |     : mu_(mu) {
32 |     PthreadCall("init cv", pthread_cond_init(&cv_, NULL));
33 | }
34 | 
35 | CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
36 | 
37 | void CondVar::Wait() {
38 |   PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
39 | }
40 | 
41 | void CondVar::Wait(int64_t reltime) {
42 |     struct timespec ts;
43 | #if defined(HAVE_POSIX_CLOCKS)
44 |     clock_gettime(CLOCK_REALTIME, &ts);
45 | #else // HAVE_POSIX_CLOCKS
46 |     // we don't support the clocks here.
47 |     struct timeval t;
48 |     gettimeofday(&t, NULL);
49 |     ts.tv_sec = t.tv_sec;
50 |     ts.tv_nsec= t.tv_usec*1000;
51 | #endif // HAVE_POSIX_CLOCKS
52 |     ts.tv_sec += reltime/1000000000;
53 |     ts.tv_nsec+= reltime%1000000000;
54 |     if (ts.tv_nsec >= 1000000000) {
55 |         ts.tv_nsec -= 1000000000;
56 |         ts.tv_sec  += 1;
57 |     }
58 |     pthread_cond_timedwait(&cv_, &mu_->mu_,&ts);
59 | }
60 | 
61 | void CondVar::Signal() {
62 |   PthreadCall("signal", pthread_cond_signal(&cv_));
63 | }
64 | 
65 | void CondVar::SignalAll() {
66 |   PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
67 | }
68 | 
69 | void InitOnce(OnceType* once, void (*initializer)()) {
70 |   PthreadCall("once", pthread_once(once, initializer));
71 | }
72 | 
73 | }  // namespace port
74 | }  // namespace leveldb
75 | 


--------------------------------------------------------------------------------
/leveldb/port/thread_annotations.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H
 6 | 
 7 | // Some environments provide custom macros to aid in static thread-safety
 8 | // analysis.  Provide empty definitions of such macros unless they are already
 9 | // defined.
10 | 
11 | #ifndef EXCLUSIVE_LOCKS_REQUIRED
12 | #define EXCLUSIVE_LOCKS_REQUIRED(...)
13 | #endif
14 | 
15 | #ifndef SHARED_LOCKS_REQUIRED
16 | #define SHARED_LOCKS_REQUIRED(...)
17 | #endif
18 | 
19 | #ifndef LOCKS_EXCLUDED
20 | #define LOCKS_EXCLUDED(...)
21 | #endif
22 | 
23 | #ifndef LOCK_RETURNED
24 | #define LOCK_RETURNED(x)
25 | #endif
26 | 
27 | #ifndef LOCKABLE
28 | #define LOCKABLE
29 | #endif
30 | 
31 | #ifndef SCOPED_LOCKABLE
32 | #define SCOPED_LOCKABLE
33 | #endif
34 | 
35 | #ifndef EXCLUSIVE_LOCK_FUNCTION
36 | #define EXCLUSIVE_LOCK_FUNCTION(...)
37 | #endif
38 | 
39 | #ifndef SHARED_LOCK_FUNCTION
40 | #define SHARED_LOCK_FUNCTION(...)
41 | #endif
42 | 
43 | #ifndef EXCLUSIVE_TRYLOCK_FUNCTION
44 | #define EXCLUSIVE_TRYLOCK_FUNCTION(...)
45 | #endif
46 | 
47 | #ifndef SHARED_TRYLOCK_FUNCTION
48 | #define SHARED_TRYLOCK_FUNCTION(...)
49 | #endif
50 | 
51 | #ifndef UNLOCK_FUNCTION
52 | #define UNLOCK_FUNCTION(...)
53 | #endif
54 | 
55 | #ifndef NO_THREAD_SAFETY_ANALYSIS
56 | #define NO_THREAD_SAFETY_ANALYSIS
57 | #endif
58 | 
59 | #endif  // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H
60 | 


--------------------------------------------------------------------------------
/leveldb/port/win/stdint.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | // MSVC didn't ship with this file until the 2010 version.
 6 | 
 7 | #ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_
 8 | #define STORAGE_LEVELDB_PORT_WIN_STDINT_H_
 9 | 
10 | #if !defined(_MSC_VER)
11 | #error This file should only be included when compiling with MSVC.
12 | #endif
13 | 
14 | // Define C99 equivalent types.
15 | typedef signed char           int8_t;
16 | typedef signed short          int16_t;
17 | typedef signed int            int32_t;
18 | typedef signed long long      int64_t;
19 | typedef unsigned char         uint8_t;
20 | typedef unsigned short        uint16_t;
21 | typedef unsigned int          uint32_t;
22 | typedef unsigned long long    uint64_t;
23 | 
24 | #endif  // STORAGE_LEVELDB_PORT_WIN_STDINT_H_
25 | 


--------------------------------------------------------------------------------
/leveldb/table/block.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_
 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_H_
 7 | 
 8 | #include <stddef.h>
 9 | #include <stdint.h>
10 | #include "leveldb/iterator.h"
11 | 
12 | namespace leveldb {
13 | 
14 | struct BlockContents;
15 | class Comparator;
16 | 
17 | class Block {
18 |  public:
19 |   // Initialize the block with the specified contents.
20 |   explicit Block(const BlockContents& contents);
21 | 
22 |   ~Block();
23 | 
24 |   size_t size() const { return size_; }
25 |   Iterator* NewIterator(const Comparator* comparator);
26 | 
27 |  private:
28 |   uint32_t NumRestarts() const;
29 | 
30 |   const char* data_;
31 |   size_t size_;
32 |   uint32_t restart_offset_;     // Offset in data_ of restart array
33 |   bool owned_;                  // Block owns data_[]
34 | 
35 |   // No copying allowed
36 |   Block(const Block&);
37 |   void operator=(const Block&);
38 | 
39 |   class Iter;
40 | };
41 | 
42 | }  // namespace leveldb
43 | 
44 | #endif  // STORAGE_LEVELDB_TABLE_BLOCK_H_
45 | 


--------------------------------------------------------------------------------
/leveldb/table/block_builder.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_
 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_
 7 | 
 8 | #include <vector>
 9 | 
10 | #include <stdint.h>
11 | #include "leveldb/slice.h"
12 | 
13 | namespace leveldb {
14 | 
15 | struct Options;
16 | 
17 | class BlockBuilder {
18 |  public:
19 |   explicit BlockBuilder(const Options* options);
20 | 
21 |   // Reset the contents as if the BlockBuilder was just constructed.
22 |   void Reset();
23 | 
24 |   // REQUIRES: Finish() has not been callled since the last call to Reset().
25 |   // REQUIRES: key is larger than any previously added key
26 |   void Add(const Slice& key, const Slice& value);
27 | 
28 |   // Finish building the block and return a slice that refers to the
29 |   // block contents.  The returned slice will remain valid for the
30 |   // lifetime of this builder or until Reset() is called.
31 |   Slice Finish();
32 | 
33 |   // Returns an estimate of the current (uncompressed) size of the block
34 |   // we are building.
35 |   size_t CurrentSizeEstimate() const;
36 | 
37 |   // Return true iff no entries have been added since the last Reset()
38 |   bool empty() const {
39 |     return buffer_.empty();
40 |   }
41 | 
42 |  private:
43 |   const Options*        options_;
44 |   std::string           buffer_;      // Destination buffer
45 |   std::vector<uint32_t> restarts_;    // Restart points
46 |   int                   counter_;     // Number of entries emitted since restart
47 |   bool                  finished_;    // Has Finish() been called?
48 |   std::string           last_key_;
49 | 
50 |   // No copying allowed
51 |   BlockBuilder(const BlockBuilder&);
52 |   void operator=(const BlockBuilder&);
53 | };
54 | 
55 | }  // namespace leveldb
56 | 
57 | #endif  // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_
58 | 


--------------------------------------------------------------------------------
/leveldb/table/filter_block.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // A filter block is stored near the end of a Table file.  It contains
 6 | // filters (e.g., bloom filters) for all data blocks in the table combined
 7 | // into a single filter block.
 8 | 
 9 | #ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
10 | #define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
11 | 
12 | #include <stddef.h>
13 | #include <stdint.h>
14 | #include <string>
15 | #include <vector>
16 | #include "leveldb/slice.h"
17 | #include "util/hash.h"
18 | 
19 | namespace leveldb {
20 | 
21 | class FilterPolicy;
22 | 
23 | // A FilterBlockBuilder is used to construct all of the filters for a
24 | // particular Table.  It generates a single string which is stored as
25 | // a special block in the Table.
26 | //
27 | // The sequence of calls to FilterBlockBuilder must match the regexp:
28 | //      (StartBlock AddKey*)* Finish
29 | class FilterBlockBuilder {
30 |  public:
31 |   explicit FilterBlockBuilder(const FilterPolicy*);
32 | 
33 |   void StartBlock(uint64_t block_offset);
34 |   void AddKey(const Slice& key);
35 |   Slice Finish();
36 | 
37 |  private:
38 |   void GenerateFilter();
39 | 
40 |   const FilterPolicy* policy_;
41 |   std::string keys_;              // Flattened key contents
42 |   std::vector<size_t> start_;     // Starting index in keys_ of each key
43 |   std::string result_;            // Filter data computed so far
44 |   std::vector<Slice> tmp_keys_;   // policy_->CreateFilter() argument
45 |   std::vector<uint32_t> filter_offsets_;
46 | 
47 |   // No copying allowed
48 |   FilterBlockBuilder(const FilterBlockBuilder&);
49 |   void operator=(const FilterBlockBuilder&);
50 | };
51 | 
52 | class FilterBlockReader {
53 |  public:
54 |  // REQUIRES: "contents" and *policy must stay live while *this is live.
55 |   FilterBlockReader(const FilterPolicy* policy, const Slice& contents);
56 |   bool KeyMayMatch(uint64_t block_offset, const Slice& key);
57 | 
58 |  private:
59 |   const FilterPolicy* policy_;
60 |   const char* data_;    // Pointer to filter data (at block-start)
61 |   const char* offset_;  // Pointer to beginning of offset array (at block-end)
62 |   size_t num_;          // Number of entries in offset array
63 |   size_t base_lg_;      // Encoding parameter (see kFilterBaseLg in .cc file)
64 | };
65 | 
66 | }
67 | 
68 | #endif  // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
69 | 


--------------------------------------------------------------------------------
/leveldb/table/iterator.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "leveldb/iterator.h"
 6 | 
 7 | namespace leveldb {
 8 | 
 9 | Iterator::Iterator() {
10 |   cleanup_.function = NULL;
11 |   cleanup_.next = NULL;
12 | }
13 | 
14 | Iterator::~Iterator() {
15 |   if (cleanup_.function != NULL) {
16 |     (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2);
17 |     for (Cleanup* c = cleanup_.next; c != NULL; ) {
18 |       (*c->function)(c->arg1, c->arg2);
19 |       Cleanup* next = c->next;
20 |       delete c;
21 |       c = next;
22 |     }
23 |   }
24 | }
25 | 
26 | void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
27 |   assert(func != NULL);
28 |   Cleanup* c;
29 |   if (cleanup_.function == NULL) {
30 |     c = &cleanup_;
31 |   } else {
32 |     c = new Cleanup;
33 |     c->next = cleanup_.next;
34 |     cleanup_.next = c;
35 |   }
36 |   c->function = func;
37 |   c->arg1 = arg1;
38 |   c->arg2 = arg2;
39 | }
40 | 
41 | namespace {
42 | class EmptyIterator : public Iterator {
43 |  public:
44 |   EmptyIterator(const Status& s) : status_(s) { }
45 |   virtual bool Valid() const { return false; }
46 |   virtual void Seek(const Slice& target) { }
47 |   virtual void SeekToFirst() { }
48 |   virtual void SeekToLast() { }
49 |   virtual void Next() { assert(false); }
50 |   virtual void Prev() { assert(false); }
51 |   Slice key() const { assert(false); return Slice(); }
52 |   Slice value() const { assert(false); return Slice(); }
53 |   virtual Status status() const { return status_; }
54 |  private:
55 |   Status status_;
56 | };
57 | }  // namespace
58 | 
59 | Iterator* NewEmptyIterator() {
60 |   return new EmptyIterator(Status::OK());
61 | }
62 | 
63 | Iterator* NewErrorIterator(const Status& status) {
64 |   return new EmptyIterator(status);
65 | }
66 | 
67 | }  // namespace leveldb
68 | 


--------------------------------------------------------------------------------
/leveldb/table/iterator_wrapper.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
 6 | #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
 7 | 
 8 | namespace leveldb {
 9 | 
10 | // A internal wrapper class with an interface similar to Iterator that
11 | // caches the valid() and key() results for an underlying iterator.
12 | // This can help avoid virtual function calls and also gives better
13 | // cache locality.
14 | class IteratorWrapper {
15 |  public:
16 |   IteratorWrapper(): iter_(NULL), valid_(false) { }
17 |   explicit IteratorWrapper(Iterator* iter): iter_(NULL) {
18 |     Set(iter);
19 |   }
20 |   ~IteratorWrapper() { delete iter_; }
21 |   Iterator* iter() const { return iter_; }
22 | 
23 |   // Takes ownership of "iter" and will delete it when destroyed, or
24 |   // when Set() is invoked again.
25 |   void Set(Iterator* iter) {
26 |     delete iter_;
27 |     iter_ = iter;
28 |     if (iter_ == NULL) {
29 |       valid_ = false;
30 |     } else {
31 |       Update();
32 |     }
33 |   }
34 | 
35 | 
36 |   // Iterator interface methods
37 |   bool Valid() const        { return valid_; }
38 |   Slice key() const         { assert(Valid()); return key_; }
39 |   Slice value() const       { assert(Valid()); return iter_->value(); }
40 |   // Methods below require iter() != NULL
41 |   Status status() const     { assert(iter_); return iter_->status(); }
42 |   void Next()               { assert(iter_); iter_->Next();        Update(); }
43 |   void Prev()               { assert(iter_); iter_->Prev();        Update(); }
44 |   void Seek(const Slice& k) { assert(iter_); iter_->Seek(k);       Update(); }
45 |   void SeekToFirst()        { assert(iter_); iter_->SeekToFirst(); Update(); }
46 |   void SeekToLast()         { assert(iter_); iter_->SeekToLast();  Update(); }
47 | 
48 |  private:
49 |   void Update() {
50 |     valid_ = iter_->Valid();
51 |     if (valid_) {
52 |       key_ = iter_->key();
53 |     }
54 |   }
55 | 
56 |   Iterator* iter_;
57 |   bool valid_;
58 |   Slice key_;
59 | };
60 | 
61 | }  // namespace leveldb
62 | 
63 | #endif  // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
64 | 


--------------------------------------------------------------------------------
/leveldb/table/merger.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_TABLE_MERGER_H_
 6 | #define STORAGE_LEVELDB_TABLE_MERGER_H_
 7 | 
 8 | namespace leveldb {
 9 | 
10 | class Comparator;
11 | class Iterator;
12 | 
13 | // Return an iterator that provided the union of the data in
14 | // children[0,n-1].  Takes ownership of the child iterators and
15 | // will delete them when the result iterator is deleted.
16 | //
17 | // The result does no duplicate suppression.  I.e., if a particular
18 | // key is present in K child iterators, it will be yielded K times.
19 | //
20 | // REQUIRES: n >= 0
21 | extern Iterator* NewMergingIterator(
22 |     const Comparator* comparator, Iterator** children, int n);
23 | 
24 | }  // namespace leveldb
25 | 
26 | #endif  // STORAGE_LEVELDB_TABLE_MERGER_H_
27 | 


--------------------------------------------------------------------------------
/leveldb/table/two_level_iterator.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
 6 | #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
 7 | 
 8 | #include "leveldb/iterator.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | struct ReadOptions;
13 | 
14 | // Return a new two level iterator.  A two-level iterator contains an
15 | // index iterator whose values point to a sequence of blocks where
16 | // each block is itself a sequence of key,value pairs.  The returned
17 | // two-level iterator yields the concatenation of all key/value pairs
18 | // in the sequence of blocks.  Takes ownership of "index_iter" and
19 | // will delete it when no longer needed.
20 | //
21 | // Uses a supplied function to convert an index_iter value into
22 | // an iterator over the contents of the corresponding block.
23 | extern Iterator* NewTwoLevelIterator(
24 |     Iterator* index_iter,
25 |     Iterator* (*block_function)(
26 |         void* arg,
27 |         const ReadOptions& options,
28 |         const Slice& index_value),
29 |     void* arg,
30 |     const ReadOptions& options);
31 | 
32 | }  // namespace leveldb
33 | 
34 | #endif  // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
35 | 


--------------------------------------------------------------------------------
/leveldb/util/arena.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/arena.h"
 6 | #include <assert.h>
 7 | 
 8 | namespace leveldb {
 9 | 
10 | static const int kBlockSize = 4096;
11 | 
12 | Arena::Arena() {
13 |   blocks_memory_ = 0;
14 |   alloc_ptr_ = NULL;  // First allocation will allocate a block
15 |   alloc_bytes_remaining_ = 0;
16 | }
17 | 
18 | Arena::~Arena() {
19 |   for (size_t i = 0; i < blocks_.size(); i++) {
20 |     delete[] blocks_[i];
21 |   }
22 | }
23 | 
24 | char* Arena::AllocateFallback(size_t bytes) {
25 |   if (bytes > kBlockSize / 4) {
26 |     // Object is more than a quarter of our block size.  Allocate it separately
27 |     // to avoid wasting too much space in leftover bytes.
28 |     char* result = AllocateNewBlock(bytes);
29 |     return result;
30 |   }
31 | 
32 |   // We waste the remaining space in the current block.
33 |   alloc_ptr_ = AllocateNewBlock(kBlockSize);
34 |   alloc_bytes_remaining_ = kBlockSize;
35 | 
36 |   char* result = alloc_ptr_;
37 |   alloc_ptr_ += bytes;
38 |   alloc_bytes_remaining_ -= bytes;
39 |   return result;
40 | }
41 | 
42 | char* Arena::AllocateAligned(size_t bytes) {
43 |   const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8;
44 |   assert((align & (align-1)) == 0);   // Pointer size should be a power of 2
45 |   size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align-1);
46 |   size_t slop = (current_mod == 0 ? 0 : align - current_mod);
47 |   size_t needed = bytes + slop;
48 |   char* result;
49 |   if (needed <= alloc_bytes_remaining_) {
50 |     result = alloc_ptr_ + slop;
51 |     alloc_ptr_ += needed;
52 |     alloc_bytes_remaining_ -= needed;
53 |   } else {
54 |     // AllocateFallback always returned aligned memory
55 |     result = AllocateFallback(bytes);
56 |   }
57 |   assert((reinterpret_cast<uintptr_t>(result) & (align-1)) == 0);
58 |   return result;
59 | }
60 | 
61 | char* Arena::AllocateNewBlock(size_t block_bytes) {
62 |   char* result = new char[block_bytes];
63 |   blocks_memory_ += block_bytes;
64 |   blocks_.push_back(result);
65 |   return result;
66 | }
67 | 
68 | }  // namespace leveldb
69 | 


--------------------------------------------------------------------------------
/leveldb/util/arena.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
 6 | #define STORAGE_LEVELDB_UTIL_ARENA_H_
 7 | 
 8 | #include <vector>
 9 | #include <assert.h>
10 | #include <stddef.h>
11 | #include <stdint.h>
12 | 
13 | namespace leveldb {
14 | 
15 | class Arena {
16 |  public:
17 |   Arena();
18 |   ~Arena();
19 | 
20 |   // Return a pointer to a newly allocated memory block of "bytes" bytes.
21 |   char* Allocate(size_t bytes);
22 | 
23 |   // Allocate memory with the normal alignment guarantees provided by malloc
24 |   char* AllocateAligned(size_t bytes);
25 | 
26 |   // Returns an estimate of the total memory usage of data allocated
27 |   // by the arena (including space allocated but not yet used for user
28 |   // allocations).
29 |   size_t MemoryUsage() const {
30 |     return blocks_memory_ + blocks_.capacity() * sizeof(char*);
31 |   }
32 | 
33 |  private:
34 |   char* AllocateFallback(size_t bytes);
35 |   char* AllocateNewBlock(size_t block_bytes);
36 | 
37 |   // Allocation state
38 |   char* alloc_ptr_;
39 |   size_t alloc_bytes_remaining_;
40 | 
41 |   // Array of new[] allocated memory blocks
42 |   std::vector<char*> blocks_;
43 | 
44 |   // Bytes of memory in blocks allocated so far
45 |   size_t blocks_memory_;
46 | 
47 |   // No copying allowed
48 |   Arena(const Arena&);
49 |   void operator=(const Arena&);
50 | };
51 | 
52 | inline char* Arena::Allocate(size_t bytes) {
53 |   // The semantics of what to return are a bit messy if we allow
54 |   // 0-byte allocations, so we disallow them here (we don't need
55 |   // them for our internal use).
56 |   assert(bytes > 0);
57 |   if (bytes <= alloc_bytes_remaining_) {
58 |     char* result = alloc_ptr_;
59 |     alloc_ptr_ += bytes;
60 |     alloc_bytes_remaining_ -= bytes;
61 |     return result;
62 |   }
63 |   return AllocateFallback(bytes);
64 | }
65 | 
66 | }  // namespace leveldb
67 | 
68 | #endif  // STORAGE_LEVELDB_UTIL_ARENA_H_
69 | 


--------------------------------------------------------------------------------
/leveldb/util/arena_test.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/arena.h"
 6 | 
 7 | #include "util/random.h"
 8 | #include "util/testharness.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | class ArenaTest { };
13 | 
14 | TEST(ArenaTest, Empty) {
15 |   Arena arena;
16 | }
17 | 
18 | TEST(ArenaTest, Simple) {
19 |   std::vector<std::pair<size_t, char*> > allocated;
20 |   Arena arena;
21 |   const int N = 100000;
22 |   size_t bytes = 0;
23 |   Random rnd(301);
24 |   for (int i = 0; i < N; i++) {
25 |     size_t s;
26 |     if (i % (N / 10) == 0) {
27 |       s = i;
28 |     } else {
29 |       s = rnd.OneIn(4000) ? rnd.Uniform(6000) :
30 |           (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20));
31 |     }
32 |     if (s == 0) {
33 |       // Our arena disallows size 0 allocations.
34 |       s = 1;
35 |     }
36 |     char* r;
37 |     if (rnd.OneIn(10)) {
38 |       r = arena.AllocateAligned(s);
39 |     } else {
40 |       r = arena.Allocate(s);
41 |     }
42 | 
43 |     for (size_t b = 0; b < s; b++) {
44 |       // Fill the "i"th allocation with a known bit pattern
45 |       r[b] = i % 256;
46 |     }
47 |     bytes += s;
48 |     allocated.push_back(std::make_pair(s, r));
49 |     ASSERT_GE(arena.MemoryUsage(), bytes);
50 |     if (i > N/10) {
51 |       ASSERT_LE(arena.MemoryUsage(), bytes * 1.10);
52 |     }
53 |   }
54 |   for (size_t i = 0; i < allocated.size(); i++) {
55 |     size_t num_bytes = allocated[i].first;
56 |     const char* p = allocated[i].second;
57 |     for (size_t b = 0; b < num_bytes; b++) {
58 |       // Check the "i"th allocation for the known bit pattern
59 |       ASSERT_EQ(int(p[b]) & 0xff, i % 256);
60 |     }
61 |   }
62 | }
63 | 
64 | }  // namespace leveldb
65 | 
66 | int main(int argc, char** argv) {
67 |   return leveldb::test::RunAllTests();
68 | }
69 | 


--------------------------------------------------------------------------------
/leveldb/util/comparator.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include <algorithm>
 6 | #include <stdint.h>
 7 | #include "leveldb/comparator.h"
 8 | #include "leveldb/slice.h"
 9 | #include "port/port.h"
10 | #include "util/logging.h"
11 | 
12 | namespace leveldb {
13 | 
14 | Comparator::~Comparator() { }
15 | 
16 | namespace {
17 | class BytewiseComparatorImpl : public Comparator {
18 |  public:
19 |   BytewiseComparatorImpl() { }
20 | 
21 |   virtual const char* Name() const {
22 |     return "leveldb.BytewiseComparator";
23 |   }
24 | 
25 |   virtual int Compare(const Slice& a, const Slice& b) const {
26 |     return a.compare(b);
27 |   }
28 | 
29 |   virtual void FindShortestSeparator(
30 |       std::string* start,
31 |       const Slice& limit) const {
32 |     // Find length of common prefix
33 |     size_t min_length = std::min(start->size(), limit.size());
34 |     size_t diff_index = 0;
35 |     while ((diff_index < min_length) &&
36 |            ((*start)[diff_index] == limit[diff_index])) {
37 |       diff_index++;
38 |     }
39 | 
40 |     if (diff_index >= min_length) {
41 |       // Do not shorten if one string is a prefix of the other
42 |     } else {
43 |       uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
44 |       if (diff_byte < static_cast<uint8_t>(0xff) &&
45 |           diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
46 |         (*start)[diff_index]++;
47 |         start->resize(diff_index + 1);
48 |         assert(Compare(*start, limit) < 0);
49 |       }
50 |     }
51 |   }
52 | 
53 |   virtual void FindShortSuccessor(std::string* key) const {
54 |     // Find first character that can be incremented
55 |     size_t n = key->size();
56 |     for (size_t i = 0; i < n; i++) {
57 |       const uint8_t byte = (*key)[i];
58 |       if (byte != static_cast<uint8_t>(0xff)) {
59 |         (*key)[i] = byte + 1;
60 |         key->resize(i+1);
61 |         return;
62 |       }
63 |     }
64 |     // *key is a run of 0xffs.  Leave it alone.
65 |   }
66 | };
67 | }  // namespace
68 | 
69 | static port::OnceType once = LEVELDB_ONCE_INIT;
70 | static const Comparator* bytewise;
71 | 
72 | static void InitModule() {
73 |   bytewise = new BytewiseComparatorImpl;
74 | }
75 | 
76 | const Comparator* BytewiseComparator() {
77 |   port::InitOnce(&once, InitModule);
78 |   return bytewise;
79 | }
80 | 
81 | }  // namespace leveldb
82 | 


--------------------------------------------------------------------------------
/leveldb/util/crc32c.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_
 6 | #define STORAGE_LEVELDB_UTIL_CRC32C_H_
 7 | 
 8 | #include <stddef.h>
 9 | #include <stdint.h>
10 | 
11 | namespace leveldb {
12 | namespace crc32c {
13 | 
14 | // Return the crc32c of concat(A, data[0,n-1]) where init_crc is the
15 | // crc32c of some string A.  Extend() is often used to maintain the
16 | // crc32c of a stream of data.
17 | extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
18 | 
19 | // Return the crc32c of data[0,n-1]
20 | inline uint32_t Value(const char* data, size_t n) {
21 |   return Extend(0, data, n);
22 | }
23 | 
24 | static const uint32_t kMaskDelta = 0xa282ead8ul;
25 | 
26 | // Return a masked representation of crc.
27 | //
28 | // Motivation: it is problematic to compute the CRC of a string that
29 | // contains embedded CRCs.  Therefore we recommend that CRCs stored
30 | // somewhere (e.g., in files) should be masked before being stored.
31 | inline uint32_t Mask(uint32_t crc) {
32 |   // Rotate right by 15 bits and add a constant.
33 |   return ((crc >> 15) | (crc << 17)) + kMaskDelta;
34 | }
35 | 
36 | // Return the crc whose masked representation is masked_crc.
37 | inline uint32_t Unmask(uint32_t masked_crc) {
38 |   uint32_t rot = masked_crc - kMaskDelta;
39 |   return ((rot >> 17) | (rot << 15));
40 | }
41 | 
42 | }  // namespace crc32c
43 | }  // namespace leveldb
44 | 
45 | #endif  // STORAGE_LEVELDB_UTIL_CRC32C_H_
46 | 


--------------------------------------------------------------------------------
/leveldb/util/crc32c_test.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/crc32c.h"
 6 | #include "util/testharness.h"
 7 | 
 8 | namespace leveldb {
 9 | namespace crc32c {
10 | 
11 | class CRC { };
12 | 
13 | TEST(CRC, StandardResults) {
14 |   // From rfc3720 section B.4.
15 |   char buf[32];
16 | 
17 |   memset(buf, 0, sizeof(buf));
18 |   ASSERT_EQ(0x8a9136aa, Value(buf, sizeof(buf)));
19 | 
20 |   memset(buf, 0xff, sizeof(buf));
21 |   ASSERT_EQ(0x62a8ab43, Value(buf, sizeof(buf)));
22 | 
23 |   for (int i = 0; i < 32; i++) {
24 |     buf[i] = i;
25 |   }
26 |   ASSERT_EQ(0x46dd794e, Value(buf, sizeof(buf)));
27 | 
28 |   for (int i = 0; i < 32; i++) {
29 |     buf[i] = 31 - i;
30 |   }
31 |   ASSERT_EQ(0x113fdb5c, Value(buf, sizeof(buf)));
32 | 
33 |   unsigned char data[48] = {
34 |     0x01, 0xc0, 0x00, 0x00,
35 |     0x00, 0x00, 0x00, 0x00,
36 |     0x00, 0x00, 0x00, 0x00,
37 |     0x00, 0x00, 0x00, 0x00,
38 |     0x14, 0x00, 0x00, 0x00,
39 |     0x00, 0x00, 0x04, 0x00,
40 |     0x00, 0x00, 0x00, 0x14,
41 |     0x00, 0x00, 0x00, 0x18,
42 |     0x28, 0x00, 0x00, 0x00,
43 |     0x00, 0x00, 0x00, 0x00,
44 |     0x02, 0x00, 0x00, 0x00,
45 |     0x00, 0x00, 0x00, 0x00,
46 |   };
47 |   ASSERT_EQ(0xd9963a56, Value(reinterpret_cast<char*>(data), sizeof(data)));
48 | }
49 | 
50 | TEST(CRC, Values) {
51 |   ASSERT_NE(Value("a", 1), Value("foo", 3));
52 | }
53 | 
54 | TEST(CRC, Extend) {
55 |   ASSERT_EQ(Value("hello world", 11),
56 |             Extend(Value("hello ", 6), "world", 5));
57 | }
58 | 
59 | TEST(CRC, Mask) {
60 |   uint32_t crc = Value("foo", 3);
61 |   ASSERT_NE(crc, Mask(crc));
62 |   ASSERT_NE(crc, Mask(Mask(crc)));
63 |   ASSERT_EQ(crc, Unmask(Mask(crc)));
64 |   ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc)))));
65 | }
66 | 
67 | }  // namespace crc32c
68 | }  // namespace leveldb
69 | 
70 | int main(int argc, char** argv) {
71 |   return leveldb::test::RunAllTests();
72 | }
73 | 


--------------------------------------------------------------------------------
/leveldb/util/filter_policy.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "leveldb/filter_policy.h"
 6 | 
 7 | namespace leveldb {
 8 | 
 9 | FilterPolicy::~FilterPolicy() { }
10 | 
11 | }  // namespace leveldb
12 | 


--------------------------------------------------------------------------------
/leveldb/util/hash.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include <string.h>
 6 | #include "util/coding.h"
 7 | #include "util/hash.h"
 8 | 
 9 | // The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through
10 | // between switch labels. The real definition should be provided externally.
11 | // This one is a fallback version for unsupported compilers.
12 | #ifndef FALLTHROUGH_INTENDED
13 | #define FALLTHROUGH_INTENDED do { } while (0)
14 | #endif
15 | 
16 | namespace leveldb {
17 | 
18 | uint32_t Hash(const char* data, size_t n, uint32_t seed) {
19 |   // Similar to murmur hash
20 |   const uint32_t m = 0xc6a4a793;
21 |   const uint32_t r = 24;
22 |   const char* limit = data + n;
23 |   uint32_t h = seed ^ (n * m);
24 | 
25 |   // Pick up four bytes at a time
26 |   while (data + 4 <= limit) {
27 |     uint32_t w = DecodeFixed32(data);
28 |     data += 4;
29 |     h += w;
30 |     h *= m;
31 |     h ^= (h >> 16);
32 |   }
33 | 
34 |   // Pick up remaining bytes
35 |   switch (limit - data) {
36 |     case 3:
37 |       h += data[2] << 16;
38 |       FALLTHROUGH_INTENDED;
39 |     case 2:
40 |       h += data[1] << 8;
41 |       FALLTHROUGH_INTENDED;
42 |     case 1:
43 |       h += data[0];
44 |       h *= m;
45 |       h ^= (h >> r);
46 |       break;
47 |   }
48 |   return h;
49 | }
50 | 
51 | 
52 | }  // namespace leveldb
53 | 


--------------------------------------------------------------------------------
/leveldb/util/hash.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // Simple hash function used for internal data structures
 6 | 
 7 | #ifndef STORAGE_LEVELDB_UTIL_HASH_H_
 8 | #define STORAGE_LEVELDB_UTIL_HASH_H_
 9 | 
10 | #include <stddef.h>
11 | #include <stdint.h>
12 | 
13 | namespace leveldb {
14 | 
15 | extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
16 | 
17 | }
18 | 
19 | #endif  // STORAGE_LEVELDB_UTIL_HASH_H_
20 | 


--------------------------------------------------------------------------------
/leveldb/util/histogram.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
 6 | #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
 7 | 
 8 | #include <string>
 9 | 
10 | namespace leveldb {
11 | 
12 | class Histogram {
13 |  public:
14 |   Histogram() { }
15 |   ~Histogram() { }
16 | 
17 |   void Clear();
18 |   void Add(double value);
19 |   void Merge(const Histogram& other);
20 | 
21 |   std::string ToString() const;
22 | 
23 |  private:
24 |   double min_;
25 |   double max_;
26 |   double num_;
27 |   double sum_;
28 |   double sum_squares_;
29 | 
30 |   enum { kNumBuckets = 154 };
31 |   static const double kBucketLimit[kNumBuckets];
32 |   double buckets_[kNumBuckets];
33 | 
34 |   double Median() const;
35 |   double Percentile(double p) const;
36 |   double Average() const;
37 |   double StandardDeviation() const;
38 | };
39 | 
40 | }  // namespace leveldb
41 | 
42 | #endif  // STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
43 | 


--------------------------------------------------------------------------------
/leveldb/util/logging.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/logging.h"
 6 | 
 7 | #include <errno.h>
 8 | #include <stdarg.h>
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include "leveldb/env.h"
12 | #include "leveldb/slice.h"
13 | 
14 | namespace leveldb {
15 | 
16 | void AppendNumberTo(std::string* str, uint64_t num) {
17 |   char buf[30];
18 |   snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num);
19 |   str->append(buf);
20 | }
21 | 
22 | void AppendEscapedStringTo(std::string* str, const Slice& value) {
23 |   for (size_t i = 0; i < value.size(); i++) {
24 |     char c = value[i];
25 |     if (c >= ' ' && c <= '~') {
26 |       str->push_back(c);
27 |     } else {
28 |       char buf[10];
29 |       snprintf(buf, sizeof(buf), "\\x%02x",
30 |                static_cast<unsigned int>(c) & 0xff);
31 |       str->append(buf);
32 |     }
33 |   }
34 | }
35 | 
36 | std::string NumberToString(uint64_t num) {
37 |   std::string r;
38 |   AppendNumberTo(&r, num);
39 |   return r;
40 | }
41 | 
42 | std::string EscapeString(const Slice& value) {
43 |   std::string r;
44 |   AppendEscapedStringTo(&r, value);
45 |   return r;
46 | }
47 | 
48 | bool ConsumeChar(Slice* in, char c) {
49 |   if (!in->empty() && (*in)[0] == c) {
50 |     in->remove_prefix(1);
51 |     return true;
52 |   } else {
53 |     return false;
54 |   }
55 | }
56 | 
57 | bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
58 |   uint64_t v = 0;
59 |   int digits = 0;
60 |   while (!in->empty()) {
61 |     char c = (*in)[0];
62 |     if (c >= '0' && c <= '9') {
63 |       ++digits;
64 |       const int delta = (c - '0');
65 |       static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
66 |       if (v > kMaxUint64/10 ||
67 |           (v == kMaxUint64/10 && delta > kMaxUint64%10)) {
68 |         // Overflow
69 |         return false;
70 |       }
71 |       v = (v * 10) + delta;
72 |       in->remove_prefix(1);
73 |     } else {
74 |       break;
75 |     }
76 |   }
77 |   *val = v;
78 |   return (digits > 0);
79 | }
80 | 
81 | }  // namespace leveldb
82 | 


--------------------------------------------------------------------------------
/leveldb/util/logging.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | //
 5 | // Must not be included from any .h files to avoid polluting the namespace
 6 | // with macros.
 7 | 
 8 | #ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_
 9 | #define STORAGE_LEVELDB_UTIL_LOGGING_H_
10 | 
11 | #include <stdio.h>
12 | #include <stdint.h>
13 | #include <string>
14 | #include "port/port.h"
15 | 
16 | namespace leveldb {
17 | 
18 | class Slice;
19 | class WritableFile;
20 | 
21 | // Append a human-readable printout of "num" to *str
22 | extern void AppendNumberTo(std::string* str, uint64_t num);
23 | 
24 | // Append a human-readable printout of "value" to *str.
25 | // Escapes any non-printable characters found in "value".
26 | extern void AppendEscapedStringTo(std::string* str, const Slice& value);
27 | 
28 | // Return a human-readable printout of "num"
29 | extern std::string NumberToString(uint64_t num);
30 | 
31 | // Return a human-readable version of "value".
32 | // Escapes any non-printable characters found in "value".
33 | extern std::string EscapeString(const Slice& value);
34 | 
35 | // If *in starts with "c", advances *in past the first character and
36 | // returns true.  Otherwise, returns false.
37 | extern bool ConsumeChar(Slice* in, char c);
38 | 
39 | // Parse a human-readable number from "*in" into *value.  On success,
40 | // advances "*in" past the consumed number and sets "*val" to the
41 | // numeric value.  Otherwise, returns false and leaves *in in an
42 | // unspecified state.
43 | extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
44 | 
45 | }  // namespace leveldb
46 | 
47 | #endif  // STORAGE_LEVELDB_UTIL_LOGGING_H_
48 | 


--------------------------------------------------------------------------------
/leveldb/util/mutexlock.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_
 6 | #define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_
 7 | 
 8 | #include "port/port.h"
 9 | #include "port/thread_annotations.h"
10 | 
11 | namespace leveldb {
12 | 
13 | // Helper class that locks a mutex on construction and unlocks the mutex when
14 | // the destructor of the MutexLock object is invoked.
15 | //
16 | // Typical usage:
17 | //
18 | //   void MyClass::MyMethod() {
19 | //     MutexLock l(&mu_);       // mu_ is an instance variable
20 | //     ... some complex code, possibly with multiple return paths ...
21 | //   }
22 | 
23 | class SCOPED_LOCKABLE MutexLock {
24 |  public:
25 |   explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu)
26 |       : mu_(mu)  {
27 |     this->mu_->Lock();
28 |   }
29 |   ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); }
30 | 
31 |  private:
32 |   port::Mutex *const mu_;
33 |   // No copying allowed
34 |   MutexLock(const MutexLock&);
35 |   void operator=(const MutexLock&);
36 | };
37 | 
38 | }  // namespace leveldb
39 | 
40 | 
41 | #endif  // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_
42 | 


--------------------------------------------------------------------------------
/leveldb/util/options.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "leveldb/options.h"
 6 | 
 7 | #include "leveldb/comparator.h"
 8 | #include "leveldb/env.h"
 9 | 
10 | namespace leveldb {
11 | 
12 | Options::Options()
13 |     : comparator(BytewiseComparator()),
14 |       create_if_missing(false),
15 |       error_if_exists(false),
16 |       paranoid_checks(false),
17 |       env(Env::Default()),
18 |       info_log(NULL),
19 |       write_buffer_size(4<<20),
20 |       max_open_files(1000),
21 |       block_cache(NULL),
22 |       block_size(4096),
23 |       block_restart_interval(16),
24 |       compression(kSnappyCompression),
25 |       filter_policy(NULL) {
26 | }
27 | 
28 | 
29 | }  // namespace leveldb
30 | 


--------------------------------------------------------------------------------
/leveldb/util/random.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_
 6 | #define STORAGE_LEVELDB_UTIL_RANDOM_H_
 7 | 
 8 | #include <stdint.h>
 9 | 
10 | namespace leveldb {
11 | 
12 | // A very simple random number generator.  Not especially good at
13 | // generating truly random bits, but good enough for our needs in this
14 | // package.
15 | class Random {
16 |  private:
17 |   uint32_t seed_;
18 |  public:
19 |   explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
20 |     // Avoid bad seeds.
21 |     if (seed_ == 0 || seed_ == 2147483647L) {
22 |       seed_ = 1;
23 |     }
24 |   }
25 |   uint32_t Next() {
26 |     static const uint32_t M = 2147483647L;   // 2^31-1
27 |     static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
28 |     // We are computing
29 |     //       seed_ = (seed_ * A) % M,    where M = 2^31-1
30 |     //
31 |     // seed_ must not be zero or M, or else all subsequent computed values
32 |     // will be zero or M respectively.  For all other values, seed_ will end
33 |     // up cycling through every number in [1,M-1]
34 |     uint64_t product = seed_ * A;
35 | 
36 |     // Compute (product % M) using the fact that ((x << 31) % M) == x.
37 |     seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
38 |     // The first reduction may overflow by 1 bit, so we may need to
39 |     // repeat.  mod == M is not possible; using > allows the faster
40 |     // sign-bit-based test.
41 |     if (seed_ > M) {
42 |       seed_ -= M;
43 |     }
44 |     return seed_;
45 |   }
46 |   // Returns a uniformly distributed value in the range [0..n-1]
47 |   // REQUIRES: n > 0
48 |   uint32_t Uniform(int n) { return Next() % n; }
49 | 
50 |   // Randomly returns true ~"1/n" of the time, and false otherwise.
51 |   // REQUIRES: n > 0
52 |   bool OneIn(int n) { return (Next() % n) == 0; }
53 | 
54 |   // Skewed: pick "base" uniformly from range [0,max_log] and then
55 |   // return "base" random bits.  The effect is to pick a number in the
56 |   // range [0,2^max_log-1] with exponential bias towards smaller numbers.
57 |   uint32_t Skewed(int max_log) {
58 |     return Uniform(1 << Uniform(max_log + 1));
59 |   }
60 | };
61 | 
62 | }  // namespace leveldb
63 | 
64 | #endif  // STORAGE_LEVELDB_UTIL_RANDOM_H_
65 | 


--------------------------------------------------------------------------------
/leveldb/util/status.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include <stdio.h>
 6 | #include "port/port.h"
 7 | #include "leveldb/status.h"
 8 | 
 9 | namespace leveldb {
10 | 
11 | const char* Status::CopyState(const char* state) {
12 |   uint32_t size;
13 |   memcpy(&size, state, sizeof(size));
14 |   char* result = new char[size + 5];
15 |   memcpy(result, state, size + 5);
16 |   return result;
17 | }
18 | 
19 | Status::Status(Code code, const Slice& msg, const Slice& msg2) {
20 |   assert(code != kOk);
21 |   const uint32_t len1 = msg.size();
22 |   const uint32_t len2 = msg2.size();
23 |   const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
24 |   char* result = new char[size + 5];
25 |   memcpy(result, &size, sizeof(size));
26 |   result[4] = static_cast<char>(code);
27 |   memcpy(result + 5, msg.data(), len1);
28 |   if (len2) {
29 |     result[5 + len1] = ':';
30 |     result[6 + len1] = ' ';
31 |     memcpy(result + 7 + len1, msg2.data(), len2);
32 |   }
33 |   state_ = result;
34 | }
35 | 
36 | std::string Status::ToString() const {
37 |   if (state_ == NULL) {
38 |     return "OK";
39 |   } else {
40 |     char tmp[30];
41 |     const char* type;
42 |     switch (code()) {
43 |       case kOk:
44 |         type = "OK";
45 |         break;
46 |       case kNotFound:
47 |         type = "NotFound: ";
48 |         break;
49 |       case kCorruption:
50 |         type = "Corruption: ";
51 |         break;
52 |       case kNotSupported:
53 |         type = "Not implemented: ";
54 |         break;
55 |       case kInvalidArgument:
56 |         type = "Invalid argument: ";
57 |         break;
58 |       case kIOError:
59 |         type = "IO error: ";
60 |         break;
61 |       default:
62 |         snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
63 |                  static_cast<int>(code()));
64 |         type = tmp;
65 |         break;
66 |     }
67 |     std::string result(type);
68 |     uint32_t length;
69 |     memcpy(&length, state_, sizeof(length));
70 |     result.append(state_ + 5, length);
71 |     return result;
72 |   }
73 | }
74 | 
75 | }  // namespace leveldb
76 | 


--------------------------------------------------------------------------------
/leveldb/util/testharness.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/testharness.h"
 6 | 
 7 | #include <string>
 8 | #include <stdlib.h>
 9 | #include <sys/stat.h>
10 | #include <sys/types.h>
11 | 
12 | namespace leveldb {
13 | namespace test {
14 | 
15 | namespace {
16 | struct Test {
17 |   const char* base;
18 |   const char* name;
19 |   void (*func)();
20 | };
21 | std::vector<Test>* tests;
22 | }
23 | 
24 | bool RegisterTest(const char* base, const char* name, void (*func)()) {
25 |   if (tests == NULL) {
26 |     tests = new std::vector<Test>;
27 |   }
28 |   Test t;
29 |   t.base = base;
30 |   t.name = name;
31 |   t.func = func;
32 |   tests->push_back(t);
33 |   return true;
34 | }
35 | 
36 | int RunAllTests() {
37 |   const char* matcher = getenv("LEVELDB_TESTS");
38 | 
39 |   int num = 0;
40 |   if (tests != NULL) {
41 |     for (size_t i = 0; i < tests->size(); i++) {
42 |       const Test& t = (*tests)[i];
43 |       if (matcher != NULL) {
44 |         std::string name = t.base;
45 |         name.push_back('.');
46 |         name.append(t.name);
47 |         if (strstr(name.c_str(), matcher) == NULL) {
48 |           continue;
49 |         }
50 |       }
51 |       fprintf(stderr, "==== Test %s.%s\n", t.base, t.name);
52 |       (*t.func)();
53 |       ++num;
54 |     }
55 |   }
56 |   fprintf(stderr, "==== PASSED %d tests\n", num);
57 |   return 0;
58 | }
59 | 
60 | std::string TmpDir() {
61 |   std::string dir;
62 |   Status s = Env::Default()->GetTestDirectory(&dir);
63 |   ASSERT_TRUE(s.ok()) << s.ToString();
64 |   return dir;
65 | }
66 | 
67 | int RandomSeed() {
68 |   const char* env = getenv("TEST_RANDOM_SEED");
69 |   int result = (env != NULL ? atoi(env) : 301);
70 |   if (result <= 0) {
71 |     result = 301;
72 |   }
73 |   return result;
74 | }
75 | 
76 | }  // namespace test
77 | }  // namespace leveldb
78 | 


--------------------------------------------------------------------------------
/leveldb/util/testutil.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #include "util/testutil.h"
 6 | 
 7 | #include "util/random.h"
 8 | 
 9 | namespace leveldb {
10 | namespace test {
11 | 
12 | Slice RandomString(Random* rnd, int len, std::string* dst) {
13 |   dst->resize(len);
14 |   for (int i = 0; i < len; i++) {
15 |     (*dst)[i] = static_cast<char>(' ' + rnd->Uniform(95));   // ' ' .. '~'
16 |   }
17 |   return Slice(*dst);
18 | }
19 | 
20 | std::string RandomKey(Random* rnd, int len) {
21 |   // Make sure to generate a wide variety of characters so we
22 |   // test the boundary conditions for short-key optimizations.
23 |   static const char kTestChars[] = {
24 |     '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff'
25 |   };
26 |   std::string result;
27 |   for (int i = 0; i < len; i++) {
28 |     result += kTestChars[rnd->Uniform(sizeof(kTestChars))];
29 |   }
30 |   return result;
31 | }
32 | 
33 | 
34 | extern Slice CompressibleString(Random* rnd, double compressed_fraction,
35 |                                 size_t len, std::string* dst) {
36 |   int raw = static_cast<int>(len * compressed_fraction);
37 |   if (raw < 1) raw = 1;
38 |   std::string raw_data;
39 |   RandomString(rnd, raw, &raw_data);
40 | 
41 |   // Duplicate the random data until we have filled "len" bytes
42 |   dst->clear();
43 |   while (dst->size() < len) {
44 |     dst->append(raw_data);
45 |   }
46 |   dst->resize(len);
47 |   return Slice(*dst);
48 | }
49 | 
50 | }  // namespace test
51 | }  // namespace leveldb
52 | 


--------------------------------------------------------------------------------
/leveldb/util/testutil.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4 | 
 5 | #ifndef STORAGE_LEVELDB_UTIL_TESTUTIL_H_
 6 | #define STORAGE_LEVELDB_UTIL_TESTUTIL_H_
 7 | 
 8 | #include "leveldb/env.h"
 9 | #include "leveldb/slice.h"
10 | #include "util/random.h"
11 | 
12 | namespace leveldb {
13 | namespace test {
14 | 
15 | // Store in *dst a random string of length "len" and return a Slice that
16 | // references the generated data.
17 | extern Slice RandomString(Random* rnd, int len, std::string* dst);
18 | 
19 | // Return a random key with the specified length that may contain interesting
20 | // characters (e.g. \x00, \xff, etc.).
21 | extern std::string RandomKey(Random* rnd, int len);
22 | 
23 | // Store in *dst a string of length "len" that will compress to
24 | // "N*compressed_fraction" bytes and return a Slice that references
25 | // the generated data.
26 | extern Slice CompressibleString(Random* rnd, double compressed_fraction,
27 |                                 size_t len, std::string* dst);
28 | 
29 | // A wrapper that allows injection of errors.
30 | class ErrorEnv : public EnvWrapper {
31 |  public:
32 |   bool writable_file_error_;
33 |   int num_writable_file_errors_;
34 | 
35 |   ErrorEnv() : EnvWrapper(Env::Default()),
36 |                writable_file_error_(false),
37 |                num_writable_file_errors_(0) { }
38 | 
39 |   virtual Status NewWritableFile(const std::string& fname,
40 |                                  WritableFile** result) {
41 |     if (writable_file_error_) {
42 |       ++num_writable_file_errors_;
43 |       *result = NULL;
44 |       return Status::IOError(fname, "fake error");
45 |     }
46 |     return target()->NewWritableFile(fname, result);
47 |   }
48 | };
49 | 
50 | }  // namespace test
51 | }  // namespace leveldb
52 | 
53 | #endif  // STORAGE_LEVELDB_UTIL_TESTUTIL_H_
54 | 


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/lib/EWAHBoolArray/.DS_Store


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/CHANGELOG:
--------------------------------------------------------------------------------
 1 | 0.3.2, 0.3.3, 0.3.4 (June 12 to 15th 2012)
 2 |   Minor releases trying to fix compatibility issues with Microsoft compilers.
 3 | 
 4 | 0.3.1 (May 28th, 2012)
 5 |   Fixed a perf. regression bug.
 6 | 
 7 | 0.3.0  (May 24th, 2012)
 8 |   Fix a bug with the computation of the cardinality.
 9 |   Improved compatibility with microsoft compilers.
10 |   Introduced a toArray method to retrieve the set bits quickly.
11 | 
12 | 0.2.2 (May 24th, 2012)
13 |   Fix a compilation problem under GCC 4.5.
14 |   Added a benchmarking utility. 
15 |   Minor improvements.
16 | 
17 | 0.2.1 (May 21st, 2012)
18 |   Performance boost when decoding (uses GCC intrinsics, please contribute MS-equivalent)
19 |   
20 | 0.2.0 (April 27th 2012)
21 |   Minor refactor, cleaned the code somewhat. Removed less useful methods.
22 | 
23 | 0.1.0 (April 19th 2012)
24 |   Added a unit test related to how we compute the cardinality
25 |   Accelerated the computation of the cardinality
26 | 


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/README:
--------------------------------------------------------------------------------
 1 | Compressed bitset in C++
 2 | Daniel Lemire
 3 | 
 4 | 
 5 | 
 6 | == What is this? ==
 7 | 
 8 | The class EWAHBoolArray is a compressed bitset data structure.
 9 | 
10 | == Licensing ==
11 | 
12 | Apache License 2.0. (Other licenses are possible.)
13 | 
14 | == Limitations ==
15 | 
16 | Because of the compression type being used, you must set the bits
17 | in increasing order (no random access).
18 | 
19 | 
20 | == STL and copy constructors ==
21 | 
22 | I expect most people to construct rather large bitmaps. For this
23 | reason, you should avoid copies.
24 | 
25 | Thus, the code will warn you against doing this:
26 | 
27 |     EWAHBoolArray<uword> bitset1;
28 |     bitset1.set(1);
29 |     bitset1.set(2);
30 |     bitset1.set(1000);
31 |     bitset1.set(1001);
32 |     vector< EWAHBoolArray<uword> > testVec;
33 |     testVec.push_back(bitset1);
34 | 
35 | Instead, do this:
36 | 
37 |     vector< EWAHBoolArray<uword> > testVec(1);
38 |     testVec[0].set(1);
39 |     testVec[0].set(2);
40 |     testVec[0].set(1000);
41 |     testVec[0].set(1001);
42 | 
43 | 
44 | Or you can use the "swap" method.
45 | 
46 |     EWAHBoolArray<uword> bitset1;
47 |     bitset1.set(1);
48 |     bitset1.set(2);
49 |     bitset1.set(1000);
50 |     bitset1.set(1001);
51 |     vector< EWAHBoolArray<uword> > testVec(1);
52 |     testVec.swap(bitset1);
53 | 
54 | 
55 | == Dependencies ==
56 | 
57 | None. (Will work under MacOS or Linux easily.)
58 | 
59 | == Usage ==
60 | 
61 | make
62 | ./unit
63 | make example
64 | ./example
65 | 
66 | == Example ==
67 | 
68 | Please see example.cpp
69 | 
70 | == Ruby wrapper ==
71 | 
72 | Josh Ferguson wrote a wrapper for Ruby. 
73 | The implementation is packaged and installable as a ruby gem.
74 | 
75 | You can install it by typing:
76 | 
77 | gem install ewah-bitset
78 | 
79 | == Further reading ==
80 | 
81 | Please see
82 | 
83 | Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves
84 | word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1),
85 | pages 3-28, 2010.
86 | http://arxiv.org/abs/0901.3751
87 | 
88 | == Warning ==
89 | 
90 | Please don't trust this software. Run your own unit tests. Report bugs.
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/example.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This is code is released under the
 3 |  * Apache License Version 2.0 http://www.apache.org/licenses/.
 4 |  *
 5 |  * (c) Daniel Lemire, http://lemire.me/en/
 6 |  */
 7 | #include <stdlib.h>
 8 | #include "headers/ewah.h"
 9 | #include <set>
10 | #include <cstdlib> 
11 | #include <ctime>
12 | #include <vector>
13 | //EWAHBoolArray<uint64_t>
14 | // EWAHBoolArray<uint32_t> bitset1;
15 | using namespace std;
16 | 
17 | vector<unsigned int> getvec(int density,int length){
18 | 	
19 | 	vector<unsigned int> temp;
20 | 	int qty = length / density;
21 | 	set<unsigned int> myset;
22 | 	//while (myset.size() < qty) {		
23 | 	//	int pos = (rand()%length)+1;
24 | //		myset.insert(pos);
25 | //	}
26 | 	for (int i = 1; i<= qty; i++){
27 | 		myset.insert(i);
28 | 	}
29 | 	for (auto it=myset.begin(); it!=myset.end(); it++){
30 | 		temp.push_back(*it);
31 | 	}
32 | 
33 | 	return temp;	
34 | }
35 | 
36 | double getSize(int density,int length){
37 | 	int iterCount = 500;
38 | 	unsigned int total = 0;
39 | 	int iter = 0;
40 | 	for (int i = 0; i<iterCount; i++){
41 | 	 stringstream ss;
42 | 	 EWAHBoolArray<uint32_t> set;
43 |      vector<unsigned int> v =getvec(density,length);
44 | 	 for (auto value :  v) {
45 | 		set.set(value);
46 | 	 }
47 | 	 set.write(ss);
48 | 	 total += ss.str().size();
49 | 	 iter++;
50 | 	}
51 | 	return ( ( (total/iter) * 8 ) * 1.0 )/ ((length/density) * 1.0) ;
52 | }
53 | 
54 | void f(){
55 | 	for (int i=1; i<=32; i++){
56 | 		cout << "Density = 1/" << i << endl;
57 | 		cout << "size = " << getSize(i,5000) << endl;
58 | 	}
59 | 
60 | }
61 | 
62 | int main() {
63 | 	srand((unsigned)time(0)); 
64 | 	f();
65 | }
66 | 


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/makefile:
--------------------------------------------------------------------------------
 1 | VPATH = src:headers
 2 | CXXFLAGS=-Iheaders -O3 
 3 | HEADERS=ewah.h ewahutil.h boolarray.h runninglengthword.h
 4 | 
 5 | all: unit unit32bits example benchmark
 6 | 
 7 | unit32bits: $(HEADERS) unit.cpp
 8 | 	$(CXX) $(CXXFLAGS) -m32 -o unit32bits src/unit.cpp 
 9 | 	
10 | unit: $(HEADERS) unit.cpp 
11 | 	$(CXX) $(CXXFLAGS) -o unit src/unit.cpp 
12 | 		
13 | example: $(HEADERS) example.cpp
14 | 	$(CXX) $(CXXFLAGS) -o example example.cpp
15 | 
16 | cppcheck: 
17 | 	cppcheck --enable=all headers/*.h src/*.cpp *.cpp
18 | 
19 | benchmark: $(HEADERS) ./src/benchmark.cpp
20 | 	$(CXX) $(CXXFLAGS) -o benchmark ./src/benchmark.cpp
21 | 
22 | 
23 | doxygen: 
24 | 	doxygen doxyconfig.txt
25 | 
26 | package: 
27 | 	zip -9 EWAHBoolArray_`date +%Y-%m-%d`.zip README CHANGELOG makefile example.cpp headers/*.h src/*.cpp
28 | 	cd ..;zip -9 ./EWAHBoolArray/EWAHBoolArray.0.3.4-src.zip ./EWAHBoolArray/README ./EWAHBoolArray/CHANGELOG ./EWAHBoolArray/makefile ./EWAHBoolArray/example.cpp ./EWAHBoolArray/headers/*.h ./EWAHBoolArray/src/*.cpp
29 | clean: 
30 | 	rm -f *.o unit example unit32bits benchmark
31 | 


--------------------------------------------------------------------------------
/lib/EWAHBoolArray/unit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/lib/EWAHBoolArray/unit


--------------------------------------------------------------------------------
/lib/rapidxml-1.13/document01.xml:
--------------------------------------------------------------------------------
1 | <document>
2 | 	<title>First document</title>
3 | 	<field1>Value of field1</field1>
4 | 	<field2>Value of field2</field2>
5 | </document>
6 | 


--------------------------------------------------------------------------------
/lib/rapidxml-1.13/test_rapidxml.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <vector>
 5 | #include "rapidxml.hpp"
 6 | #include "rapidxml_print.hpp"
 7 | 
 8 | using namespace std;
 9 | using namespace rapidxml;
10 | 
11 | int main()
12 | {
13 | 	vector<char> xmlText;
14 | 	string input;
15 | 
16 | 	while (getline(cin, input))
17 | 	{
18 | 		xmlText.insert(xmlText.end(), input.begin(), input.end());
19 | 	}
20 | 
21 | 	xmlText.push_back('\0');
22 | 
23 | 	xml_document<> doc;    					// character type defaults to char
24 | 	doc.parse<parse_full>(&xmlText[0]);    	// 0 means default parse flags
25 | 
26 | 	// xml_node<>* root = doc.first_node("document");
27 | 
28 | 	cout << "The first node is '" << doc.first_node()->name() << "'\n";
29 | 
30 | 	for (xml_node<>* n = doc.first_node("document")->first_node(); n; n = n->next_sibling())
31 | 	{
32 | 		char* v = n->value();
33 | 		// if (!v || !*v) v = "(empty)";
34 | 		cout << n->name() << " : " << v << '\n';
35 | 	}
36 | 
37 | 	string document;
38 | 	rapidxml::print(std::back_inserter(document), doc,0);
39 | 	cout << document;
40 | 
41 | 	return 0;
42 | }
43 | 


--------------------------------------------------------------------------------
/misc/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # $@ name of the target
 3 | # $^ name of all prerequisites with duplicates removed
 4 | # $< name of the first prerequisite 
 5 | 
 6 | CC=g++
 7 | CFLAGS = -std=c++0x -Wall -O0 -pedantic -msse3 -I/usr/local/include
 8 | LIBS = -L/usr/local/lib
 9 | OBJECTS =
10 | 
11 | clean :
12 | 	rm -f *.o \
13 | 		test_compressedset
14 | 
15 | all: test_compressedset
16 | 	
17 | test_compressedset: test_compressedset.cpp
18 | 	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
19 | 
20 | 


--------------------------------------------------------------------------------
/misc/example_compressedset.cpp:
--------------------------------------------------------------------------------
 1 | // from  http://www.stepanovpapers.com/CIKM_2011.pdf 
 2 | // compile with g++  -std=gnu++0x -O3 main.cpp  -o result.bin
 3 | using namespace std;
 4 | 
 5 | #include <iostream>
 6 | #include <stdio.h>
 7 | #include <assert.h>
 8 | #include "varint/CompressedSet.h"
 9 | #include <time.h>
10 | #include "varint/LazyAndSet.h"
11 | #include <vector>
12 | 
13 | double diffclock(clock_t clock1,clock_t clock2)
14 | {
15 | 	double diffticks=clock1-clock2;
16 | 	double diffms=(diffticks*1000)/CLOCKS_PER_SEC;
17 | 	return diffms;
18 | }
19 | void benchmark(){
20 | 	int test;
21 | 		CompressedSet myset1;
22 | 		for (unsigned int i = 1; i<=384000000; ++i){
23 | 		    myset1.addDoc(i);	
24 | 		}
25 | 		std::cout << "compressed!" << endl;
26 |         
27 | 		myset1.flush();
28 | 		myset1.compact();
29 | 
30 | 		CompressedSet::Iterator it(&myset1);
31 | 		// Sequential scanning 384 000 000 docs per second
32 | 		clock_t begin=clock();
33 | 		for (; it.docID() != NO_MORE_DOCS;it.nextDoc()){
34 | 			unsigned int temp = it.docID();
35 | 		}
36 | 		clock_t end=clock();
37 | 	    std::cout << "Iteration Time: " << double(diffclock(end,begin)) << " ms"<< endl;
38 | }
39 | 
40 | 
41 | 
42 | int main() {
43 | 	benchmark();
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | server
3 | engine
4 | tests
5 | 
6 | 


--------------------------------------------------------------------------------
/src/Constants.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef CONSTANTS_HPP
 3 | #define CONSTANTS_HPP
 4 | 
 5 | #include <string>
 6 | 
 7 | namespace zsearch
 8 | {
 9 | 	const std::string LOCK_FILE = "/var/tmp/zsearch.pid";
10 | 
11 | 	const std::string LEVELDB_STORE = "/var/tmp/store";
12 | 	const std::string LEVELDB_TEST_STORE = "/var/tmp/test_store";
13 | 
14 | 	const std::string FIELDS_KEY = "fields";
15 | 	const std::string DOC_ID_KEY = "docId";
16 | 	const std::string WORD_ID_KEY = "wordId";
17 | 
18 | 	// const std::string QUERY_PARSER_DELIMITERS = " \t\n\r.,;";
19 | 
20 | 	namespace server
21 | 	{
22 | 		const unsigned short PORT = 8080;
23 | 
24 | 		const std::string POST_HTM = "/post.htm";
25 | 		const std::string SEARCH_PATH = "/search";
26 | 		const std::string POST_PATH = "/post";
27 | 		const std::string DOC_PATH = "/doc";
28 | 		const std::string INDEX_PATH = "/index";
29 | 		const std::string ROOT = "/";
30 | 
31 | 		const std::string POST_DATA_KEY = "data";
32 | 		const std::string GET_SEARCH_QUERY_KEY = "q";
33 | 		const std::string GET_SEARCH_START_KEY = "s";
34 | 		const std::string GET_SEARCH_OFFSET_KEY = "o";
35 | 		const std::string DOC_ID_KEY = "id";
36 | 	}
37 | 
38 | 	const unsigned int MAX_BATCH_SIZE = 200;
39 | 
40 | 	const std::string DOCUMENT_ROOT = "document";
41 | 	const std::string DOCUMENT_TITLE = "title";
42 | }
43 | 
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/src/DocumentKVStore.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef DOCUMENTKVSTORE_H
 3 | #define DOCUMENTKVSTORE_H
 4 | 
 5 | #include <iostream>
 6 | #include <map>
 7 | #include <memory>
 8 | #include <string>
 9 | #include <sstream>
10 | #include "IKVStore.h"
11 | #include "IDocument.h"
12 | #include "ZException.hpp"
13 | 
14 | 
15 | using namespace std;
16 | 
17 | class DocumentKVStore
18 | {
19 | 	private:
20 | 
21 | 		std::shared_ptr<KVStore::IKVStore> store;
22 | 
23 | 	public:
24 | 
25 | 		DocumentKVStore(std::shared_ptr<KVStore::IKVStore> store) : store(store)
26 | 		{
27 | 			store->Open();
28 | 		}
29 | 
30 | 		~DocumentKVStore()
31 | 		{
32 | 			std::cerr << "Destroyed DocumentKVStore" << std::endl;
33 | 		}
34 | 
35 | 		void addDoc(unsigned int docId, const shared_ptr<IDocument>& doc)
36 | 		{
37 | 			stringstream ss;
38 | 			doc->writeMini(ss);
39 | 			string d = ss.str();
40 | 
41 | 			if (!(store->Put(docId, d).ok()))
42 | 			{
43 | 				std::cerr << "Could not put document into LevelDb " << d << std::endl;
44 | 				throw ZException("Could not put document into LevelDb ");
45 | 			}
46 | 		}
47 | 
48 | 		void removeDoc(unsigned int docId)
49 | 		{
50 | 			store->Delete(docId);	// status could really only be Ok or NotFound
51 | 		}
52 | 
53 | 		int Get(unsigned int docId, shared_ptr<IDocument>& doc) const
54 | 		{
55 | 			string d;
56 | 
57 | 			if (store->Get(docId, d).ok())
58 | 			{
59 | 				doc->readMini(d);
60 | 
61 | 				/*
62 | 				cout << "got docId " << docId << endl << d << endl;
63 | 
64 | 				try
65 | 				{
66 | 					doc->construct(d);
67 | 				}
68 | 				catch (const string& ex)
69 | 				{
70 | 					cerr << ex << endl;
71 | 				}
72 | 				catch (const exception& ex)
73 | 				{
74 | 					cerr << ex.what() << endl;
75 | 				}
76 | 				catch (...)
77 | 				{
78 | 					cerr << "wtf" << endl;
79 | 				}
80 | 				*/
81 | 
82 | 				return 1;
83 | 			}
84 | 
85 | 			return 0;
86 | 		}
87 | 
88 | };
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/src/EngineDataKVStore.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ENGINEDATAKVSTORE_H
 2 | #define ENGINEDATAKVSTORE_H
 3 | 
 4 | #include <iostream>
 5 | #include <memory>
 6 | #include <string>
 7 | #include <sstream>
 8 | #include <unordered_set>
 9 | 
10 | #include "IKVStore.h"
11 | #include "ZException.hpp"
12 | #include "ZUtil.hpp"
13 | #include "Constants.hpp"
14 | 
15 | using namespace std;
16 | 
17 | class EngineDataKVStore
18 | {
19 | 	private:
20 | 
21 | 		unsigned long docId;
22 | 		unsigned long long wordId;
23 | 
24 | 		shared_ptr<KVStore::IKVStore> store;
25 | 
26 | 	public:
27 | 
28 | 		EngineDataKVStore(shared_ptr<KVStore::IKVStore> store) : docId(1), wordId(1), store(store)
29 | 		{
30 | 			string strId;
31 | 
32 | 			cerr << "Populating docId from store" << endl;
33 | 
34 | 			if (store->Get(zsearch::DOC_ID_KEY, strId).ok())
35 | 			{
36 | 				cerr << "got " << strId << endl;
37 | 				docId = ZUtil::atoi(strId.c_str());
38 | 			}
39 | 
40 | 			cerr << "current docId is " << docId << endl;
41 | 
42 | 			cerr << "Populating wordId from store" << endl;
43 | 
44 | 			if (store->Get(zsearch::WORD_ID_KEY, strId).ok())
45 | 			{
46 | 				cerr << "got " << strId << endl;
47 | 				wordId = ZUtil::atoi(strId.c_str());
48 | 			}
49 | 
50 | 			cerr << "current wordId is " << wordId << endl;
51 | 
52 | 			if ((docId <= 1 && wordId > 1) || (docId > 1 && wordId <= 1))
53 | 			{
54 | 				throw ZException("docId and wordId mismatched! Most likely db is corrupt.");
55 | 			}
56 | 
57 | 		}
58 | 
59 | 		~EngineDataKVStore()
60 | 		{
61 | 			cerr << "Persisting docId " << docId << " to store" << endl;
62 | 
63 | 			string strDocId;
64 | 			ZUtil::PutUint64(strDocId, docId);
65 | 
66 | 			if (!store->Put(zsearch::DOC_ID_KEY, strDocId).ok())
67 | 			{
68 | 				cerr << "Error persisting docId to storage, db might be corrupt or invalid at startup" << endl;
69 | 			}
70 | 
71 | 			cerr << "Persisting wordId " << wordId << " to store" << endl;
72 | 
73 | 			string strWordId;
74 | 			ZUtil::PutUint64(strWordId, wordId);
75 | 
76 | 			if (!store->Put(zsearch::WORD_ID_KEY, strWordId).ok())
77 | 			{
78 | 				cerr << "Error persisting wordId to storage, db might be corrupt or invalid at startup" << endl;
79 | 			}
80 | 
81 | 			cerr << "Destroyed EngineDataKVStore" << endl;
82 | 		}
83 | 
84 | 		unsigned long& getDocId()
85 | 		{
86 | 			return docId;
87 | 		}
88 | 
89 | 		unsigned long long& getWordId()
90 | 		{
91 | 			return wordId;
92 | 		}
93 | 
94 | };
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/src/Field.cpp:
--------------------------------------------------------------------------------
 1 | #include "Field.h"
 2 | 
 3 | Field::Field(const char* name, const char* value,int _config){
 4 | 	//assert(name != NULL);
 5 |     _name = name;
 6 | 	fieldsData = (void*)value;
 7 | 	setConfig(_config);
 8 | }
 9 | 
10 | void Field::setConfig(const uint32_t x){
11 | 	uint32_t newConfig=0;
12 | 
13 |     if ( x & STORE_YES ){
14 | 	  newConfig |= STORE_YES;
15 |     } else {
16 | 	  newConfig |= STORE_NO;
17 |     }
18 | 
19 |     if ( (x & INDEX_NO)==0 ){
20 |       bool index=false;	  
21 |       if ( x & INDEX_TOKENIZED ){
22 | 	    newConfig |= INDEX_TOKENIZED;
23 | 		index = true;
24 |       } else if ( x & INDEX_UNTOKENIZED ){
25 | 	    newConfig |= INDEX_UNTOKENIZED;
26 | 	    index = true;
27 | 	  }
28 | 	  if ( !index )
29 | 	    newConfig |= INDEX_NO;
30 |     } else {
31 | 	  newConfig |= INDEX_NO;
32 |     }
33 | 
34 |     if ( newConfig & INDEX_NO && newConfig & STORE_NO ){
35 | 	    // it doesn't make sense to have a field that is neither indexed nor stored
36 |     }
37 |     config = newConfig;
38 | }
39 | 
40 | const char* Field::name() const {
41 | 	return _name; 
42 | }
43 | 
44 | const char* Field::value() const {
45 | 	return static_cast<char*>(fieldsData);
46 | }
47 | 
48 | bool Field::isStored() const {
49 |   return (config & STORE_YES) != 0;
50 | }
51 | 
52 | bool Field::isIndexed() const {
53 |   return (config & INDEX_TOKENIZED)!=0 ; 
54 | }
55 | 
56 | bool Field::isTokenized() const {
57 |   return (config & INDEX_TOKENIZED) != 0;
58 | }
59 | 
60 | const char* Field::toString(){
61 | 	return static_cast<char*>(fieldsData);	
62 | }
63 | 
64 | Field::~Field() {
65 | 	//TODO
66 | }
67 | 


--------------------------------------------------------------------------------
/src/Field.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | A field is a section of a Document. 
 3 | Each field has two parts, a name and a value. 
 4 | Values may be free text, or an atomic keywords, which are not further processed.
 5 | Such keywords maybe used to represent dates, urls, etc.
 6 | Fields are optionally stored so that they may be returned the document.
 7 | */
 8 | #include <cstddef>
 9 | #include <stdint.h> 
10 | 
11 | class Field {
12 | public:
13 | 	enum Store{ 
14 | 		/** Store the original field value. 
15 | 		* This is useful for short texts like doc title
16 | 		* which should be displayed with the results. 
17 | 		*/
18 | 		STORE_YES=1,
19 | 		
20 | 	    /** Do not store the field value in the index. */
21 | 		STORE_NO=2
22 | 	};
23 | 	
24 | 	enum Index{ 
25 | 		/** Do not index the field value.
26 | 		* This field can thus not be searched,
27 | 		* but one can still access its contents provided it is stored
28 | 		*/
29 | 		INDEX_NO=16,
30 | 		
31 | 		/** Index the field's value so it can be searched.
32 | 		 * An Analyzer will be used to tokenize the text before its
33 | 		 * terms will be stored in the index.
34 | 		 */
35 | 		INDEX_TOKENIZED=32,
36 | 		
37 | 		/** Index the field's value without using an Analyzer,
38 | 		* so it can be searched.
39 | 		* The value will be stored as a single term. 
40 | 		* This is useful for unique Ids like product numbers.
41 | 		*/
42 | 		INDEX_UNTOKENIZED=64 
43 | 	};
44 | 	
45 | 	Field(const char* name, const char* value,int _config);
46 | 	
47 | 	virtual ~Field();
48 | 	
49 | 	virtual const char* name() const;
50 | 	virtual const char* value() const;
51 | 	virtual bool isStored() const;
52 | 	virtual bool isIndexed() const;
53 | 	virtual bool isTokenized() const;
54 | 	/** Prints a Field for human consumption. */
55 | 	virtual const char* toString();
56 | 	
57 | protected:	
58 | 	void setConfig(const uint32_t _config);
59 | 	const char* _name;
60 | 	void* fieldsData;
61 | 	uint32_t config;
62 | };


--------------------------------------------------------------------------------
/src/FieldKVStore.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FIELDKVSTORE_H
 2 | #define FIELDKVSTORE_H
 3 | 
 4 | #include <iostream>
 5 | #include <memory>
 6 | #include <string>
 7 | #include <sstream>
 8 | #include <unordered_set>
 9 | 
10 | #include "IKVStore.h"
11 | #include "ZException.hpp"
12 | #include "ZUtil.hpp"
13 | #include "Constants.hpp"
14 | 
15 | using namespace std;
16 | 
17 | class FieldKVStore
18 | {
19 | 	private:
20 | 
21 | 		unordered_set<string> fields;
22 | 		shared_ptr<KVStore::IKVStore> store;
23 | 
24 | 	public:
25 | 
26 | 		FieldKVStore(shared_ptr<KVStore::IKVStore> store) : store(store)
27 | 		{
28 | 			cerr << "Populating fields from store" << endl;
29 | 
30 | 			string strFields;
31 | 
32 | 			if (store->Get(zsearch::FIELDS_KEY, strFields).ok())
33 | 			{
34 | 				istringstream iss(strFields);
35 | 				string field;
36 | 
37 | 				while (iss >> field)
38 | 				{
39 | 					// cerr << field << " ";
40 | 					fields.insert(field);
41 | 				}
42 | 
43 | 				cerr << endl;
44 | 			}
45 | 
46 | 		}
47 | 
48 | 		~FieldKVStore()
49 | 		{
50 | 			cerr << "Persisting fields to store" << endl;
51 | 
52 | 			ostringstream oss;
53 | 
54 | 			for (auto field : fields)
55 | 			{
56 | 				oss << field << " ";
57 | 			}
58 | 
59 | 			string strFields = oss.str();
60 | 
61 | 			if (!store->Put(zsearch::FIELDS_KEY, strFields).ok())
62 | 			{
63 | 				cerr << "Error persisting fields to storage, db might be corrupt or invalid at startup" << endl;
64 | 			}
65 | 
66 | 			cerr << "Destroyed FieldKVStore" << endl;
67 | 		}
68 | 
69 | 		void put(const string& field)
70 | 		{
71 | 			fields.insert(field);
72 | 		}
73 | 
74 | 		const unordered_set<string>& getFields() const
75 | 		{
76 | 			return fields;
77 | 		}
78 | 
79 | };
80 | 
81 | #endif
82 | 


--------------------------------------------------------------------------------
/src/IDocument.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef IDOCUMENT_H
 4 | #define IDOCUMENT_H
 5 | 
 6 | 
 7 | #include <map>
 8 | #include <exception>
 9 | #include <string>
10 | #include <memory>
11 | #include <iostream>
12 | #include <iterator>
13 | #include <vector>
14 | using namespace std;
15 | 
16 | class IDocument
17 | {
18 | 	public:
19 | 
20 | 		virtual void construct(const string& xml) = 0;
21 | 		
22 | 		virtual void write(ostream& out) = 0;
23 | 		
24 | 		virtual void readMini(const string& src) = 0;
25 | 		
26 | 		virtual void writeMini(ostream& out) = 0;
27 | 	
28 | 		virtual void addEntry(const string& key, const string& value) = 0;
29 | 
30 | 		typedef std::vector<pair<string,string>>::const_iterator const_iterator;
31 | 		virtual const_iterator  begin() const = 0;
32 | 		virtual const_iterator  end()   const = 0;
33 | 		
34 | 		virtual void getEntry(const string& key, string& value) = 0;
35 | 		
36 | 		virtual ~IDocument() { }
37 | };
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/src/IInvertedIndex.h:
--------------------------------------------------------------------------------
 1 | #ifndef IINVERTED_INDEX_H
 2 | #define IINVERTED_INDEX_H
 3 | 
 4 | #include <memory>
 5 | #include "varint/Set.h"
 6 | class IInvertedIndex
 7 | {
 8 | 	public:
 9 | 
10 | 		virtual int get(unsigned int wordId, std::shared_ptr<Set>& outset) const = 0;
11 | 		virtual int add(unsigned int wordId, unsigned int docid) = 0;
12 | 		virtual int remove(unsigned int wordId, unsigned int docId) = 0;
13 | 
14 | 		// Virtual destructor
15 | 		virtual ~IInvertedIndex() { }
16 | };
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/IKVStore.h:
--------------------------------------------------------------------------------
 1 | #ifndef IKVSTORE_H
 2 | #define IKVSTORE_H
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <memory>
 7 | 
 8 | namespace KVStore
 9 | {
10 | 
11 | 	class Status
12 | 	{
13 | 		enum Code
14 | 		{
15 | 			kOk = 0,
16 | 			kNotFound = 1,
17 | 			kCorruption = 2,
18 | 			kNotSupported = 3,
19 | 			kInvalidArgument = 4,
20 | 			kIOError = 5
21 | 		};
22 | 
23 | 		Code _code;
24 | 
25 | 		public:
26 | 
27 | 			Code code() const
28 | 			{
29 | 				return _code;
30 | 			}
31 | 
32 | 		Status(Code code)
33 | 		{
34 | 			_code = code;
35 | 		}
36 | 
37 | 		// Return a success status.
38 | 		static Status OK() { return Status(kOk); }
39 | 
40 | 		// Return error status of an appropriate type.
41 | 		static Status NotFound() { return Status(kNotFound); }
42 | 
43 | 		static Status Corruption() { return Status(kCorruption); }
44 | 
45 | 		static Status NotSupported() { return Status(kNotSupported); }
46 | 
47 | 		static Status InvalidArgument() { return Status(kInvalidArgument); }
48 | 
49 | 		static Status IOError() { return Status(kIOError); }
50 | 
51 | 		// Returns true iff the status indicates success.
52 | 		bool ok() const { return (code() == kOk); }
53 | 
54 | 		// Returns true iff the status indicates a NotFound error.
55 | 		bool IsNotFound() const { return code() == kNotFound; }
56 | 
57 | 		// Returns true iff the status indicates a Corruption error.
58 | 		bool IsCorruption() const { return code() == kCorruption; }
59 | 
60 | 		// Returns true iff the status indicates an IOError.
61 | 		bool IsIOError() const { return code() == kIOError; }
62 | 	};
63 | 
64 | 
65 | 	class IKVStore
66 | 	{
67 | 		public:
68 | 
69 | 			virtual Status Open() = 0;
70 | 			virtual Status Put(const std::string& key,const std::string& value) = 0;
71 | 			virtual Status Put(uint64_t key,const std::string& value) = 0;
72 | 			virtual Status Get(const std::string& key, std::string* value) = 0;
73 | 			virtual Status Get(const std::string& key, std::string& value) = 0;
74 | 			virtual Status Get(uint64_t key, std::string& value) = 0;
75 | 			virtual Status Delete(const std::string& key) = 0;
76 | 			virtual Status Delete(uint64_t key) = 0;
77 | 		    virtual	void Compact() = 0;
78 | 		    virtual Status Put(const std::vector<std::pair<unsigned int, std::string>>& writes) = 0;
79 | 			virtual Status Put(const std::vector<std::pair<std::string, std::string>>& writes) = 0;
80 | 			virtual ~IKVStore() { }
81 | 
82 | 
83 | 	};
84 | 
85 | }
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/src/ITokenizer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef ITOKENIZER_H
 3 | #define ITOKENIZER_H
 4 | 
 5 | #include <string>
 6 | 
 7 | class ITokenizer
 8 | {
 9 |     public:
10 | 
11 |         virtual bool nextToken()  = 0;
12 | 
13 |         virtual const std::string& getToken() const = 0;
14 | 
15 | 		virtual void setString(const std::string& str, const std::string& field) = 0;
16 | 
17 | 		virtual ~ITokenizer() { }
18 | };
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | # $@ name of the target
 2 | # $^ name of all prerequisites with duplicates removed
 3 | # $< name of the first prerequisite
 4 | ifneq (,$(findstring /cygdrive/,$(PATH)))
 5 |     OSNAME := Cygwin
 6 | else ifneq (,$(findstring WINDOWS,$(PATH)))
 7 |     OSNAME := Windows
 8 | else
 9 |     OSNAME := $(shell uname -s)
10 | endif
11 | ARCH := $(shell uname -m)
12 | 
13 | CC = g++
14 | BUILD_DIR = ../build
15 | LEVELDB_DIR = ../leveldb
16 | 
17 | 
18 | BUILD_OBJECTS =
19 | 
20 | OBJECTS = $(BUILD_OBJECTS) $(BUILD_DIR)/CompressedSet.a $(LEVELDB_DIR)/libleveldb.a 
21 | 
22 | HEADERS = TokenizerImpl.hpp Constants.hpp ZUtil.hpp Statistics.hpp Engine.hpp InvertedIndexImpl.hpp InvertedIndexBatch.hpp DocumentImpl.hpp InvertedIndexSimpleBatch.hpp DocumentKVStore.hpp ZException.hpp ZUtil.hpp WordIndexKVStore.hpp KVStoreLevelDb.hpp IDocument.h IInvertedIndex.h IKVStore.h KVStoreInMemory.hpp NameSpaceKVStore.hpp FieldKVStore.hpp EngineDataKVStore.hpp
23 | 
24 | CFLAGS_PLAIN =  -flto -std=gnu++0x -Wall -g0 -Ofast -funroll-loops -msse2 -I$(LEVELDB_DIR)/include -I../lib -I../lib/rapidxml-1.13 -I.. -D_GLIBCXX_USE_NANOSLEEP
25 | 
26 | ifeq ($(OSNAME), Darwin)
27 | CFLAGS = $(CFLAGS_PLAIN) -I. -I/usr/local/include -I/opt/local/include -DOS_MACOSX -DINTEL64
28 | LIBS = -L/usr/local/lib -L/opt/local/lib -L$(LEVELDB_DIR) -lleveldb -lm -levent
29 | endif
30 | 
31 | 
32 | ifeq ($(OSNAME), Linux)
33 | CFLAGS = $(CFLAGS_PLAIN) -I. -I/usr/include -I/usr/local/include -I/ms/dist/fsf/PROJ/libevent/2.0.19/include
34 | LIBS = -L/usr/local/lib -L/usr/lib/i386-linux-gnu -L/ms/dist/fsf/PROJ/libevent/2.0.19/lib -L$(LEVELDB_DIR) -lm -lpthread -levent 
35 | endif
36 | 
37 | 
38 | ifeq ($(OSNAME), Cygwin)
39 | CFLAGS = $(CFLAGS_PLAIN)
40 | LIBS = -L/usr/lib -L$(LEVELDB_DIR) -lm -lpthread -levent 
41 | endif
42 | 
43 | clean :
44 | 	rm -f $(BUILD_OBJECTS) $(BUILD_DIR)/server $(BUILD_DIR)/engine
45 | 
46 | all: server engine $(BUILD_DIR)/TokenizerImpl.o
47 | 
48 | server: server.cpp $(OBJECTS)
49 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR)/$@ $^ $(LIBS)
50 | 
51 | engine: engine_simple_main.cpp $(OBJECTS)
52 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR)/$@ $^ $(LIBS) 
53 | 
54 | 
55 | ##
56 | # Classes
57 | ##
58 | 
59 | $(BUILD_DIR)/TokenizerImpl.o : TokenizerImpl.cpp TokenizerImpl.h ITokenizer.h Constants.hpp
60 | 	$(CXX) -c $(CFLAGS) $< -o $@
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/src/TokenizerImpl.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <algorithm>
 5 | #include <locale>
 6 | #include "TokenizerImpl.h"
 7 | 
 8 | using namespace std;
 9 | 
10 | TokenizerImpl::TokenizerImpl() : m_offset(0)
11 | { }
12 | 
13 | TokenizerImpl::~TokenizerImpl()
14 | { 
15 | 	std::cerr << "Destroyed TokenizerImpl" << std::endl;
16 | }
17 | 
18 | 
19 | void TokenizerImpl::setString(const std::string& str, const std::string& field)
20 | {
21 | 	m_string = str;
22 | 	m_token.clear();
23 | 	m_offset = 0;
24 | }
25 | 
26 | 
27 | bool TokenizerImpl::nextToken()
28 | {
29 | 	m_token.resize(0);
30 | 	for (;;){
31 | 		if (m_offset == m_string.size()) break;
32 | 		char c = m_string[m_offset++];
33 | 		c = c | 0x20;	// lowercase
34 | 		if (c >= 'a' ? c <= 'z' : (c >= '0' && c <= '9'))
35 | 		{
36 | 			m_token.push_back(c);
37 | 		} else {
38 | 			if (m_token.size()>0)
39 | 			  return true;
40 | 		}
41 | 	}
42 | 	return m_token.size()>0;
43 | }
44 | 
45 | 
46 | 
47 | const std::string& TokenizerImpl::getToken() const
48 | {	
49 | 	return m_token;
50 | }
51 | 


--------------------------------------------------------------------------------
/src/TokenizerImpl.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TOKENIZERIMPL_H
 3 | #define TOKENIZERIMPL_H
 4 | 
 5 | #include <string>
 6 | #include "ITokenizer.h"
 7 | 
 8 | class TokenizerImpl : public ITokenizer
 9 | {
10 |     public:
11 | 
12 | 		TokenizerImpl();
13 | 
14 | 		~TokenizerImpl();
15 | 		
16 | 		void setString(const std::string& str,const std::string& field);
17 | 
18 |         bool nextToken();
19 | 		
20 |         const std::string& getToken() const;
21 | 
22 | 	protected:
23 | 
24 |         std::string m_string;
25 | 		size_t m_offset;
26 |         std::string m_token;
27 | 
28 | 
29 | };
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/TokenizerImpl.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TOKENIZERIMPL_H
 3 | #define TOKENIZERIMPL_H
 4 | 
 5 | #include <string>
 6 | #include "ITokenizer.h"
 7 | #include <iostream>
 8 | #include <cctype>
 9 | #include <climits>
10 | using namespace std;
11 | class TokenizerImpl : public ITokenizer
12 | {
13 | 	protected:
14 |         bool table[UCHAR_MAX+1];
15 |     public:
16 | 
17 | 		TokenizerImpl(): m_offset(0)
18 | 		{
19 | 			for (int i = 0; i <= UCHAR_MAX; ++i)
20 | 			    table[i] = std::isalnum(i);
21 | 		}
22 | 		
23 | 		~TokenizerImpl(){ 
24 | 			std::cerr << "Destroyed TokenizerImpl" << std::endl;
25 | 		}
26 | 		
27 | 		void setString(const std::string& str, const std::string& field)
28 | 		{
29 | 			m_string = str;
30 | 			stringsize = m_string.size();
31 | 			m_token.clear();
32 | 			m_offset = 0;
33 | 		}
34 | 
35 |         inline bool nextToken()
36 | 		{
37 | 			m_token.resize(0);
38 | 			size_t tokensize = 0;
39 | 			for (;;){
40 | 				if (m_offset == stringsize) break; //22%
41 | 				unsigned char c = m_string[m_offset++]; 
42 | 				c = c | 0x20;
43 | 				if (table[c]) // 39%
44 | 				{
45 | 					tokensize++;
46 | 					m_token.push_back(c);
47 | 				} else {
48 | 					if (tokensize>2){
49 | 					  return true;
50 | 					} else {
51 | 					  m_token.resize(0);	
52 | 					}
53 | 				}
54 | 			}
55 | 			return tokensize>2;
56 | 		}
57 | 		
58 |         
59 | 		inline const std::string& getToken() const
60 | 		{	
61 | 			return m_token;
62 | 		}
63 | 
64 | 	protected:
65 | 
66 |         std::string m_string;
67 | 		size_t stringsize;
68 | 		size_t m_offset;
69 |         std::string m_token;
70 | 
71 | 
72 | };
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/src/ZException.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | /**
 3 |  * http://stackoverflow.com/questions/134569/c-exception-throwing-stdstring
 4 |  */
 5 |  
 6 | #ifndef ZEXCEPTION_H
 7 | #define ZEXCEPTION_H
 8 | 
 9 | #include <string>
10 | 
11 | struct ZException : public std::exception
12 | {
13 | 	std::string s;
14 | 	
15 | 	ZException(std::string ss) : s(ss) { }
16 | 	
17 | 	const char* what() const throw() 
18 | 	{ 
19 | 		return s.c_str(); 
20 | 	}
21 | };
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/trie/EntropyTrie.hpp:
--------------------------------------------------------------------------------
 1 | #include "bit_vector.hpp"
 2 | #include "trie.hpp"
 3 | #include <vector>
 4 | #include <string>
 5 | class EntropyTrie
 6 | {
 7 | private:
 8 | 	typedef cindex::bit_vector<> vector_type;
 9 | 	vector_type bitvector_;
10 | 	cindex::trie<> trie_;
11 | 	std::size_t key_len_;
12 | 	std::vector<const uint8_t*> pending_keys_;
13 | 	std::size_t pending_key_count_;
14 | 	std::size_t n_;
15 | public:
16 | 	/**
17 | 	 * key_len = length of each key in number of bytes
18 | 	 */
19 | 	EntropyTrie(std::size_t key_len): key_len_(key_len),pending_key_count_(0){
20 | 		
21 | 	}
22 | 	
23 | 	bool insert(const uint8_t* key){
24 | 		uint8_t* key_c = new uint8_t[key_len_];
25 | 		memcpy(key_c, key, key_len_);
26 | 		pending_keys_.push_back(key_c);
27 | 		pending_key_count_++;
28 | 		return true;
29 | 	}
30 | 	
31 | 	bool insert(std::string str){
32 | 		const uint8_t* key_c = reinterpret_cast<const uint8_t*>(str.c_str());
33 | 		return insert(key_c);
34 | 	}
35 | 	
36 | 	void flush(){
37 | 		trie_.encode(
38 | 				bitvector_,
39 | 				pending_keys_, key_len_,
40 | 				0,
41 | 				pending_key_count_
42 | 			);
43 | 		for (std::size_t i = 0; i < pending_key_count_; i++)
44 | 			delete [] pending_keys_[i];
45 | 		pending_keys_.clear();
46 | 		n_ = pending_key_count_;
47 | 		pending_key_count_ = 0;
48 | 		bitvector_.compact();
49 | 	}
50 | 	
51 | 	std::size_t locate(const uint8_t* key){
52 | 		std::size_t iter = 0;
53 | 		std::size_t key_index = trie_.locate(bitvector_,
54 | 			iter,key,key_len_,0 ,n_);
55 | 		return key_index;
56 | 	}
57 | 	
58 | 	std::size_t locate(std::string str){
59 | 		const uint8_t* key_c  = reinterpret_cast<const uint8_t*>(str.c_str());
60 | 		return locate(key_c);
61 | 	}
62 | 	
63 | 	virtual ~EntropyTrie(){
64 | 		for (std::size_t i = 0; i < pending_key_count_; i++)
65 | 			delete [] pending_keys_[i];
66 | 	}
67 | 
68 | };


--------------------------------------------------------------------------------
/src/trie/bit_vector.cpp:
--------------------------------------------------------------------------------
 1 | #include "bit_vector.hpp"
 2 | #include <cstring>
 3 | 
 4 | namespace cindex
 5 | {
 6 | 	template<typename BlockType>
 7 | 	bit_vector<BlockType>::bit_vector()
 8 | 		: buf_(NULL), size_(0), capacity_(8)
 9 | 	{
10 | 		resize();
11 | 	}
12 | 
13 | 	template<typename BlockType>
14 | 	bit_vector<BlockType>::~bit_vector()
15 | 	{
16 | 		clear();
17 | 	}
18 | 
19 | 	template<typename BlockType>
20 | 	void
21 | 	bit_vector<BlockType>::clear()
22 | 	{
23 | 		free(buf_);
24 | 		buf_ = NULL;
25 | 
26 | 		size_ = 0;
27 | 		capacity_ = 0;
28 | 	}
29 | 
30 | 	template<typename BlockType>
31 | 	void
32 | 	bit_vector<BlockType>::compact()
33 | 	{
34 | 		capacity_ = size_;
35 | 		resize();
36 | 	}
37 | 
38 | 	template<typename BlockType>
39 | 	void
40 | 	bit_vector<BlockType>::resize()
41 | 	{
42 | 		std::size_t old_byte_size = block_info<block_type>::size(size_);
43 | 		std::size_t new_byte_size = block_info<block_type>::size(capacity_);
44 | 
45 | 		block_type* new_buf = reinterpret_cast<block_type*>(realloc(reinterpret_cast<void*>(buf_), new_byte_size));
46 | 
47 | 		if (!new_buf)
48 | 		{
49 | 			//assert(buf_);
50 | 			//assert(new_byte_size > old_byte_size);
51 | 
52 | 			new_buf = reinterpret_cast<block_type*>(malloc(new_byte_size));
53 | 			//assert(new_buf);
54 | 
55 | 			memcpy(new_buf, buf_, old_byte_size);
56 | 
57 | 			free(buf_);
58 | 		}
59 | 
60 | 		buf_ = new_buf;
61 | 
62 | 		if (new_byte_size > old_byte_size)
63 | 			memset(reinterpret_cast<uint8_t*>(new_buf) + old_byte_size, 0, new_byte_size - old_byte_size);
64 | 	}
65 | 
66 | 	template class bit_vector<uint8_t>;
67 | 	template class bit_vector<uint16_t>;
68 | 	template class bit_vector<uint32_t>;
69 | 	template class bit_vector<uint64_t>;
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/src/trie/block_info.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once 
 2 | #include <tr1/cstdint>
 3 | 
 4 | //#include <boost/integer/static_log2.hpp>
 5 | template <std::size_t n>
 6 | struct choose_initial_n {
 7 | 	static const bool c = (std::size_t(1) << n << n) != 0;
 8 | 	static const std::size_t value = !c*n + choose_initial_n<2*c*n>::value;
 9 | 
10 | };
11 | template <>
12 | struct choose_initial_n<0> {
13 | 		static const std::size_t value = 0;
14 | };
15 | const std::size_t initial_n = choose_initial_n<16>::value;
16 | 
17 | template <std::size_t x,std::size_t n = initial_n>
18 | struct static_log2 {
19 | 	static const bool c = (x >> n) > 0;
20 | 	static const std::size_t value =  c*n + (static_log2< (x>>c*n), n/2 >::value);
21 | };
22 | template <>
23 | struct static_log2<1, 0> { 
24 | 	static const std::size_t value = 0;
25 | };
26 | 
27 | namespace cindex
28 | {
29 | 	template<typename BlockType>
30 | 	class block_info
31 | 	{
32 | 	public:
33 | 		typedef BlockType block_type;
34 | 
35 | 		static const std::size_t bytes_per_block = sizeof(block_type);
36 | 		static const std::size_t bits_per_block = bytes_per_block * 8;
37 | 		static const std::size_t bit_mask = bits_per_block - 1;
38 | 		static const std::size_t log_bits_per_block = static_log2<bits_per_block>::value;
39 | 
40 | 		static std::size_t
41 | 		block_count(std::size_t n) 
42 | 		{
43 | 			return (n + bits_per_block - 1) / bits_per_block;
44 | 		}
45 | 
46 | 		static std::size_t
47 | 		size(std::size_t n) 
48 | 		{
49 | 			return block_count(n) * bytes_per_block;
50 | 		}
51 | 	};
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/src/trie/main.cpp:
--------------------------------------------------------------------------------
 1 | using namespace std;
 2 | #include <vector>
 3 | #include <iostream>
 4 | #include <sstream>
 5 | #include <string>
 6 | #include <stdint.h>
 7 | #include "EntropyTrie.hpp"
 8 | 
 9 | int main() {
10 | 	EntropyTrie trie(5);
11 | 	vector<string> str;
12 | 	str.push_back("aaaaa");
13 | 	str.push_back("aaaab");
14 | 	str.push_back("azaac");
15 | 	str.push_back("azzad");
16 | 	str.push_back("azzze");
17 | 	str.push_back("zaaaf");
18 | 	str.push_back("zazzg");
19 | 	str.push_back("zzaah");
20 | 	str.push_back("zzzai");
21 | 	str.push_back("zzzzj");
22 | 	for (string s : str){
23 | 		trie.insert(s);
24 | 	}
25 | 	trie.flush();
26 | 	size_t pos = trie.locate("aaaab");
27 | 	std::cout << pos << std::endl;
28 | 	
29 | 
30 | 	return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/src/trie/sign_interleave.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | 
 4 | 
 5 | namespace cindex
 6 | {
 7 | 	class sign_interleave
 8 | 	{
 9 | 	public:
10 | 		template<typename T>
11 | 		static T encode(const T& v)
12 | 		{
13 | 			// 0, 1, 2, 3, ... => 0, 2, 4, 8, ...
14 | 			// -1, -2, -3 -4, ... => 1, 3, 5, 7, ...
15 | 			if ((v & (static_cast<T>(1) << (sizeof(T) * 8 - 1))) == 0)
16 | 				return static_cast<T>(v << 1);
17 | 			else
18 | 				return static_cast<T>(((~v) << 1) | 1);
19 | 		}
20 | 
21 | 		template<typename T>
22 | 		static T decode(const T& v)
23 | 		{
24 | 			// 0, 2, 4, 8, ... => 0, 1, 2, 3, ...
25 | 			// 1, 3, 5, 7, ... => -1, -2, -3, -4, ...
26 | 			if ((v & 1) == 0)
27 | 				return static_cast<T>((v >> 1) & ~(static_cast<T>(1) << (sizeof(T) * 8 - 1)));
28 | 			else
29 | 				return static_cast<T>(~(v >> 1) | (static_cast<T>(1) << (sizeof(T) * 8 - 1)));
30 | 		}
31 | 	};
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/BasicSetTest.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "varint/BasicSet.h"
  3 | #include <vector>
  4 | #include <set>
  5 | #include <iostream>
  6 | #include <sstream>
  7 | #include <assert.h>
  8 | #include <algorithm>
  9 | 
 10 | using namespace std;
 11 | 
 12 | bool testvec(set<unsigned int>& data)
 13 | {
 14 | 	stringstream ss;
 15 | 	{
 16 |       BasicSet myset2;
 17 |       for (auto i : data)
 18 | 	  {
 19 | 	    myset2.addDoc(i);
 20 |       }
 21 | 	  // cout << "added " << data.size() << endl;
 22 | 	  // myset2.flush();
 23 |       myset2.compact();
 24 |       myset2.write(ss);
 25 | 	}
 26 | 
 27 | 	BasicSet myset1;
 28 | 	myset1.read(ss);
 29 | 
 30 | 	assert(data.size() == myset1.size());
 31 | 	BasicSet::Iterator it2(&myset1);
 32 | 	for (auto idx : data)
 33 | 	{
 34 | 		assert(it2.nextDoc()!=NO_MORE_DOCS );
 35 | 		assert(it2.docID() == idx);
 36 | 	}
 37 | 
 38 | 	assert(it2.nextDoc() == NO_MORE_DOCS);
 39 | 
 40 | 	return true;
 41 | }
 42 | 
 43 | void test(){
 44 | 
 45 | 	for (uint32_t b = 0; b <= 28; ++b) {
 46 |         cout << "testing1... b = " << b << endl;
 47 |         for (size_t length = 128; length < (1U << 12); length += 128) {
 48 | 	        //cout << "   length = " << length << endl;
 49 | 	        set<unsigned int> data;
 50 | 	        for (size_t i = 0; i < length; ++i) {
 51 |                 unsigned int x = (i + (24 - i) * (12 - i)) % (1U << b);
 52 | 				data.insert(x);
 53 |             }
 54 | 			if (!testvec(data)) {
 55 | 				return;
 56 | 			}
 57 |         }
 58 |         cout << "testing2... b = " << b << endl;
 59 |        	for (size_t length = 1; length < (1U << 9); ++length) {
 60 | 	      //  cout << "   length = " << length << endl;
 61 | 		    set<unsigned int> data;
 62 | 	        for (size_t i = 0; i < length; ++i) {
 63 | 	           data.insert((33231 - i + i * i) % (1U << b));
 64 | 	        }
 65 | 	        if (!testvec(data)) {
 66 | 				return;
 67 | 			}
 68 | 		}
 69 | 	}
 70 | 	cout << "All test passed succesfully!!" << endl;
 71 | }
 72 | 
 73 | void binarySearch()
 74 | {
 75 | 	set<int> basic;
 76 | 
 77 | 	for (int i = 1; i <= 10; ++i)
 78 | 		basic.insert(i);
 79 | 
 80 | 	/*
 81 | 	int search = 11;
 82 | 
 83 | 	set<int>::iterator upper;
 84 | 
 85 | 	if (basic.find(search) == basic.end())
 86 | 	{
 87 | 		upper = upper_bound(basic.begin(), basic.end(), search);
 88 | 	}
 89 | 
 90 | 	cout << "upper " << upper - basic.begin() << endl;
 91 | 	*/
 92 | }
 93 | 
 94 | 
 95 | int main()
 96 | {
 97 | 	test();
 98 | 	// binarySearch();
 99 | }
100 | 


--------------------------------------------------------------------------------
/tests/CompressedSetTests.cpp:
--------------------------------------------------------------------------------
 1 | // g++ --std=gnu++0x CompressedSet.cpp
 2 | 
 3 | #include "varint/CompressedSet.h"
 4 | #include "varint/bitpacking/memutil.h"
 5 | #include <vector>
 6 | #include <set>
 7 | #include <assert.h>
 8 | #include <iostream>
 9 | 
10 | using namespace std;
11 | 
12 | bool testvec(set<unsigned int>& data)
13 | {
14 | 	stringstream ss;
15 | 	{
16 |       CompressedSet myset2;
17 |       for (auto i : data)
18 | 	  {
19 | 	    myset2.addDoc(i);
20 |       }
21 | 	  // cout << "added " << data.size() << endl;
22 | 	  // myset2.flush();
23 |       myset2.compact();
24 |       myset2.write(ss);
25 | 	}
26 | 
27 | 	CompressedSet myset1;
28 | 	myset1.read(ss);
29 | 
30 | 	assert(data.size() == myset1.size());
31 | 	CompressedSet::Iterator it2(&myset1);
32 | 	for (auto idx : data)
33 | 	{
34 | 		assert(it2.nextDoc()!=NO_MORE_DOCS );
35 | 		assert(it2.docID() == idx);
36 | 	}
37 | 	assert(it2.nextDoc() == NO_MORE_DOCS);
38 | 
39 | 	return true;
40 | }
41 | 
42 | void test(){
43 | 
44 | 	for (uint32_t b = 0; b <= 28; ++b) {
45 |         cout << "testing1... b = " << b << endl;
46 |         for (size_t length = 128; length < (1U << 12); length += 128) {
47 | 	        //cout << "   length = " << length << endl;
48 | 	        set<unsigned int> data;
49 | 	        for (size_t i = 0; i < length; ++i) {
50 |                 unsigned int x = (i + (24 - i) * (12 - i)) % (1U << b);
51 | 				data.insert(x);
52 |             }
53 | 			if (!testvec(data)) {
54 | 				return;
55 | 			}
56 |         }
57 |         cout << "testing2... b = " << b << endl;
58 |        	for (size_t length = 1; length < (1U << 9); ++length) {
59 | 	      //  cout << "   length = " << length << endl;
60 | 		    set<unsigned int> data;
61 | 	        for (size_t i = 0; i < length; ++i) {
62 | 	           data.insert((33231 - i + i * i) % (1U << b));
63 | 	        }
64 | 	        if (!testvec(data)) {
65 | 				return;
66 | 			}
67 | 		}
68 | 	}
69 | 	cout << "All test passed succesfully!!" << endl;
70 | }
71 | 
72 | 
73 | int main() {
74 | 	test();
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/InvertedIndexImplTest.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "src/InvertedIndexImpl.hpp"
 3 | #include "lib/tpunit++.hpp"
 4 | #include "src/KVStoreInMemory.hpp"
 5 | #include "varint/SetFactory.h"
 6 | #include "varint/BasicSetFactory.h"
 7 | #include <memory>
 8 | 
 9 | using namespace std;
10 | 
11 | /**
12 |  * Test InvertedIndex
13 |  */
14 | struct InvertedIndexImplTest : tpunit::TestFixture
15 | {
16 | 	InvertedIndexImplTest() : tpunit::TestFixture
17 | 	(
18 | 		TEST(InvertedIndexImplTest::testBasicSet),
19 | 		TEST(InvertedIndexImplTest::testCompressedSet)
20 | 	)
21 | 	{ }
22 | 
23 | 	void testBasicSet()
24 | 	{
25 | 		shared_ptr<ISetFactory> setFactory = make_shared<BasicSetFactory>();
26 | 		shared_ptr<KVStore::IKVStore> invertedIndexStore = make_shared<KVStore::KVStoreInMemory>("/tmp/TestInvertedIndexBasicSet");
27 | 		InvertedIndexImpl invertedIndex(invertedIndexStore, setFactory);
28 | 		invertedIndex.add(1, 44);
29 | 		ASSERT_TRUE(invertedIndex.exist(1));
30 | 		ASSERT_TRUE(1 == invertedIndex.remove(1, 44));
31 | 	}
32 | 
33 | 	void testCompressedSet()
34 | 	{
35 | 		shared_ptr<ISetFactory> setFactory = make_shared<SetFactory>();
36 | 		shared_ptr<KVStore::IKVStore> invertedIndexStore = make_shared<KVStore::KVStoreInMemory>("/tmp/TestInvertedIndexCompressedSet");
37 | 		InvertedIndexImpl invertedIndex(invertedIndexStore, setFactory);
38 | 		invertedIndex.add(1, 44);
39 | 		ASSERT_TRUE(invertedIndex.exist(1));
40 | 	}
41 | 
42 | };
43 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # $@ name of the target
 3 | # $^ name of all prerequisites with duplicates removed
 4 | # $< name of the first prerequisite
 5 | 
 6 | OSNAME  := $(shell uname -s)
 7 | ARCH := $(shell uname -m)
 8 | 
 9 | BUILD_DIR_TEST = ../build/tests
10 | BUILD_DIR = ../build
11 | LEVELDB_DIR = ../leveldb
12 | 
13 | OBJECTS =  $(BUILD_DIR)/CompressedSet.a $(BUILD_DIR)/TokenizerImpl.o $(LEVELDB_DIR)/libleveldb.a
14 | 
15 | CFLAGS_PLAIN = -std=gnu++0x -Wall -g  -pedantic -msse3 -I$(LEVELDB_DIR)/include -I../lib -I../lib/rapidxml-1.13 -I..
16 | 
17 | ifeq ($(OSNAME), Darwin)
18 | CFLAGS = $(CFLAGS_PLAIN) -I/opt/local/include
19 | LIBS = -L/opt/local/lib -L/usr/local/lib -lleveldb -lm -levent
20 | endif
21 | 
22 | ifeq ($(OSNAME), Linux)
23 | CFLAGS = $(CFLAGS_PLAIN) -I/usr/local/include -I/ms/dist/fsf/PROJ/libevent/2.0.19/include -D_GLIBCXX_USE_NANOSLEEP
24 | LIBS = -L/usr/local/lib -L/ms/dist/fsf/PROJ/libevent/2.0.19/lib -L../leveldb-1.7.0 -lm -levent -lpthread
25 | endif
26 | 
27 | 
28 | clean:
29 | 	rm -f $(BUILD_DIR_TEST)/*
30 | 
31 | all: memory_leak_test thread_test document_test tokenizer_test xml_test inverted_index_test compressed_set_test basic_set_test statistics_test
32 | 
33 | memory_leak_test: memory_leak_test.cpp $(OBJECTS)
34 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
35 | 
36 | thread_test: thread_test.cpp $(OBJECTS)
37 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
38 | 
39 | document_test: document_test.cpp $(OBJECTS)
40 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
41 | 
42 | tokenizer_test: tokenizer_test.cpp $(OBJECTS)
43 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
44 | 
45 | xml_test: xml_test.cpp $(OBJECTS)
46 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
47 | 
48 | inverted_index_test: inverted_index_test.cpp $(OBJECTS)
49 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
50 | 
51 | compressed_set_test: CompressedSetTests.cpp $(OBJECTS)
52 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
53 | 	
54 | compressed_set_test2: CompressedSet_test.cpp $(OBJECTS)
55 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
56 | 
57 | basic_set_test: BasicSetTest.cpp $(OBJECTS)
58 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
59 | 
60 | statistics_test: statistics_test.cpp $(OBJECTS)
61 | 	$(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS)
62 | 	
63 | 


--------------------------------------------------------------------------------
/tests/SparseSet_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <vector>
 3 | #include <map>
 4 | #include <fstream>
 5 | #include <memory>
 6 | #include <exception>
 7 | #include <iostream>
 8 | #include <iterator>
 9 | 
10 | #include "lib/tpunit++.hpp"
11 | #include "TestUtils.hpp"
12 | #include "src/SparseSet.hpp"
13 | using namespace std;
14 | 
15 | struct SparseSetTest : tpunit::TestFixture
16 | {
17 | 	SparseSetTest() : tpunit::TestFixture
18 | 	(
19 | 		TEST(SparseSetTest::testSet)
20 | 	)
21 | 	{}
22 | 	
23 | 	void testSet()
24 | 	{
25 | 		SparseSet set;
26 | 		
27 | 		set.insert(65535);
28 | 		set.insert(512);
29 | 		ASSERT_EQUAL(set.ismember(65534),false);
30 | 		ASSERT_EQUAL(set.ismember(65535),true);
31 | 		ASSERT_EQUAL(set.ismember(65536),false);
32 | 		
33 | 		const unsigned int* iter = set.begin();
34 | 		ASSERT_EQUAL(*iter,65535);
35 | 		iter++;
36 | 		ASSERT_EQUAL(*iter,512);
37 | 		iter++;
38 | 		ASSERT_EQUAL(iter,set.end());
39 | 	}
40 | };
41 | 
42 | int main()
43 | {
44 | 	SparseSetTest test;
45 | 
46 | 	/**
47 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
48 | 	 * all tests are successful, otherwise returns the number
49 | 	 * of failing assertions.
50 | 	 */
51 | 	return tpunit::Tests::Run();
52 | }


--------------------------------------------------------------------------------
/tests/StatisticsTest.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "src/Statistics.hpp"
 3 | #include "lib/tpunit++.hpp"
 4 | #include <memory>
 5 | #include <iostream>
 6 | #include <chrono>
 7 | #include <unordered_map>
 8 | 
 9 | using namespace std;
10 | 
11 | /**
12 |  * Test Statistics
13 |  */
14 | struct StatisticsTest : tpunit::TestFixture
15 | {
16 | 	StatisticsTest() : tpunit::TestFixture
17 | 	(
18 | 		TEST(StatisticsTest::testQueries),
19 | 		TEST(StatisticsTest::testRequestTimes)
20 | 	)
21 | 	{ }
22 | 
23 | 	void printTopQueries(const map<unsigned int, set<string>, classcomp>& topQueries)
24 | 	{
25 | 		for (auto it = topQueries.begin(); it != topQueries.end(); ++it)
26 | 		{
27 | 			cout << it->first << " ";
28 | 			for (auto qs : it->second)
29 | 			{
30 | 				cout << qs << " ";
31 | 			}
32 | 			cout << endl;
33 | 		}
34 | 	}
35 | 	
36 | 	void testQueries()
37 | 	{
38 | 		Statistics s;
39 | 		
40 | 		s.addQuery("snoop");
41 | 		s.addQuery("dawg");
42 | 		s.addQuery("werd");
43 | 		s.addQuery("snoop");
44 | 		s.addQuery("snoop");
45 | 		s.addQuery("snoop");
46 | 		s.addQuery("dawg");
47 | 		s.addQuery("werd");
48 | 		s.addQuery("yo");
49 | 		
50 | 		auto topQueries = s.getTopQueries(3);
51 | 		printTopQueries(topQueries);
52 | 		
53 | 		ASSERT_EQUAL(topQueries.size(), 3);
54 | 		
55 | 		topQueries = s.getTopQueries(2);
56 | 		printTopQueries(topQueries);
57 | 		
58 | 		auto it = topQueries.begin();
59 | 		ASSERT_EQUAL(topQueries.size(), 2);
60 | 		
61 | 		ASSERT_EQUAL(it->first, 4);
62 | 		ASSERT_EQUAL((it->second).size(), 1);
63 | 		
64 | 		++it;
65 | 		ASSERT_EQUAL(it->first, 2);
66 | 		ASSERT_EQUAL((it->second).size(), 2);
67 | 
68 | 	}
69 | 
70 | 	void testRequestTimes()
71 | 	{
72 | 		chrono::high_resolution_clock::time_point t0 = chrono::high_resolution_clock::now();
73 | 		Statistics s;
74 | 		chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::now();
75 | 		
76 | 		chrono::nanoseconds timeTaken = chrono::duration_cast<chrono::nanoseconds>(t1 - t0);
77 | 		
78 | 		string req = "creating statistics";
79 | 		s.logRequestTime(req, timeTaken);
80 | 		
81 | 		auto requestTimes = s.getRequestTimes(req);
82 | 		cout << req << " took ";
83 | 		for (auto ms : requestTimes)
84 | 		{
85 | 			cout << ms.count() << "ns ";
86 | 		}
87 | 		cout << endl;
88 | 	}
89 | 	
90 | };
91 | 


--------------------------------------------------------------------------------
/tests/TestUtils.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef TESTUTILS_HPP
 3 | #define TESTUTILS_HPP
 4 | 
 5 | #include <string>
 6 | #include <exception>
 7 | #include <fstream>
 8 | 
 9 | using namespace std;
10 | 
11 | string readFile(const string& fileName)
12 | {
13 | 	ifstream fin(fileName.c_str());
14 | 
15 | 	if (fin.fail())
16 | 		throw "Could not open " + fileName + "!";
17 | 
18 | 	fin.seekg(0, ios::end);
19 | 	size_t length = fin.tellg();
20 | 	fin.seekg(0, ios::beg);
21 | 	char* buffer = new char[length + 1];
22 | 	fin.read(buffer, length);
23 | 	buffer[length] = '\0';
24 | 
25 | 	fin.close();
26 | 
27 | 	string fileStr(buffer);
28 | 	delete [] buffer;
29 | 
30 | 	return fileStr;
31 | }
32 | 
33 | string stripSpecialCharacters(const string& input)
34 | {
35 | 	string clean;
36 | 
37 | 	for (char c : input)
38 | 	{
39 | 		if (c == '\n' || c == '\t')
40 | 			continue;
41 | 
42 | 		clean += c;
43 | 	}
44 | 
45 | 	return clean;
46 | }
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/tests/TokenizerTest.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <string>
 3 | #include <vector>
 4 | #include <map>
 5 | #include <fstream>
 6 | #include <memory>
 7 | #include <exception>
 8 | #include <iostream>
 9 | #include <iterator>
10 | 
11 | #include "src/ITokenizer.h"
12 | #include "src/TokenizerImpl.h"
13 | #include "src/Constants.hpp"
14 | #include "lib/tpunit++.hpp"
15 | #include "TestUtils.hpp"
16 | 
17 | using namespace std;
18 | 
19 | 
20 | /**
21 |  * Test the tokenizer
22 |  */
23 | struct TokenizerTest : tpunit::TestFixture
24 | {
25 | 	TokenizerTest() : tpunit::TestFixture
26 | 	(
27 | 		TEST(TokenizerTest::testTokenizing)
28 | 	)
29 | 	{ }
30 | 
31 | 	void testTokenizing()
32 | 	{
33 | 		string text(" snoop  doggy dawg");
34 | 
35 | 		shared_ptr<ITokenizer> tokenizer = make_shared<TokenizerImpl>();
36 | 
37 | 		tokenizer->setString(text, "field1");
38 | 		
39 | 		while (tokenizer->nextToken())
40 | 		{
41 | 			string token = tokenizer->getToken();
42 | 			cout << token << endl;			
43 | 		}
44 | 		
45 | 		/*
46 | 		vector<string> words = qp.getTokens();
47 | 
48 | 		ASSERT_EQUAL(words.size(), 3);
49 | 		ASSERT_EQUAL(words[0].compare("snoop"), 0);
50 | 		ASSERT_EQUAL(words[1].compare("doggy"), 0);
51 | 		ASSERT_EQUAL(words[2].compare("dawg"), 0);
52 | 		*/
53 | 	}
54 | 
55 | };


--------------------------------------------------------------------------------
/tests/XmlTest.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <string>
 3 | #include <vector>
 4 | #include <map>
 5 | #include <fstream>
 6 | #include <memory>
 7 | #include <exception>
 8 | #include <iostream>
 9 | #include <iterator>
10 | 
11 | 
12 | #include "lib/rapidxml-1.13/rapidxml.hpp"
13 | #include "lib/tpunit++.hpp"
14 | #include "TestUtils.hpp"
15 | 
16 | using namespace std;
17 | 
18 | /**
19 |  * Test rapid xml
20 |  */
21 | struct XmlTest : tpunit::TestFixture
22 | {
23 | 	XmlTest() : tpunit::TestFixture
24 | 	(
25 | 		TEST(XmlTest::testParsingDocument)
26 | 	)
27 | 	{ }
28 | 
29 | 	void testParsingDocument()
30 | 	{
31 | 		string fileStr;
32 | 
33 | 		try
34 | 		{
35 | 			fileStr = readFile("../data/document01.xml");
36 | 		}
37 | 		catch(const string& e)
38 | 		{
39 | 			TRACE(e.c_str());
40 | 			ABORT();
41 | 		}
42 | 
43 | 		// std::cout << fileStr << std::endl;
44 | 
45 | 		vector<char> xmlVec;
46 | 		copy(fileStr.begin(), fileStr.end(), back_inserter(xmlVec));
47 | 		xmlVec.push_back('\n');
48 | 
49 | 		rapidxml::xml_document<> doc;																// character type defaults to char
50 | 		doc.parse<rapidxml::parse_default>(&xmlVec[0]);		// 0 means default parse flags
51 | 
52 | 		string root(doc.first_node()->name());
53 | 		ASSERT_EQUAL(root.compare("document"), 0);
54 | 
55 | 		vector<string> fields;
56 | 
57 | 		for (rapidxml::xml_node<>* n = doc.first_node()->first_node(); n; n = n->next_sibling())
58 | 		{
59 | 			string name(n->name());
60 | 			// char* v = n->value();
61 | 			// if (!v || !*v) v = "(empty)";
62 | 			string value(n->value());
63 | 			fields.push_back(name + ":" + value);
64 | 		}
65 | 
66 | 		ASSERT_EQUAL(fields.size(), 3);
67 | 		ASSERT_EQUAL(fields[0].compare("title:Input document"), 0);
68 | 		ASSERT_EQUAL(fields[1].compare("input1: some text"), 0);
69 | 		ASSERT_EQUAL(fields[2].compare("input1: some more text"), 0);
70 | 	}
71 | 
72 | };
73 | 
74 | 


--------------------------------------------------------------------------------
/tests/document_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <string>
 3 | #include <vector>
 4 | #include <map>
 5 | #include <fstream>
 6 | #include <memory>
 7 | #include <exception>
 8 | #include <iostream>
 9 | #include <iterator>
10 | 
11 | #include "DocumentImplTest.hpp"
12 | 
13 | #include "lib/tpunit++.hpp"
14 | 
15 | using namespace std;
16 | 
17 | int main()
18 | {
19 | 	// QueryParserTest __QueryParserTest;
20 | 	// XmlTest __XmlTest;
21 | 	DocumentImplTest __DocumentImplTest;
22 | 	// InvertedIndexSimpleTest __InvertedIndexSimpleTest;
23 | 
24 | 	/**
25 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
26 | 	 * all tests are successful, otherwise returns the number
27 | 	 * of failing assertions.
28 | 	 */
29 | 	return tpunit::Tests::Run();
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/inverted_index_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "InvertedIndexImplTest.hpp"
 3 | // #include "InvertedIndexSimpleBatchTest.hpp"
 4 | #include "lib/tpunit++.hpp"
 5 | 
 6 | using namespace std;
 7 | 
 8 | int main()
 9 | {
10 | 	InvertedIndexImplTest __InvertedIndexImplTest;
11 | 	// InvertedIndexSimpleBatchTest __InvertedIndexSimpleBatchTest;
12 | 
13 | 	/**
14 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
15 | 	 * all tests are successful, otherwise returns the number
16 | 	 * of failing assertions.
17 | 	 */
18 | 	return tpunit::Tests::Run();
19 | 
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/memory_leak_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <sstream>
 3 | #include <vector>
 4 | #include <stack>
 5 | #include <thread>
 6 | #include <mutex>
 7 | #include <atomic>
 8 | #include <condition_variable>
 9 | #include <chrono>
10 | #include <random>
11 | #include <cstdlib>
12 | 
13 | using namespace std;
14 | 
15 | void work()
16 | {
17 | 	char *c = new char[1000];
18 | }
19 | 
20 | int main()
21 | {
22 | 	work();
23 | 
24 | 	return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/runTests.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | TEST_DIR="../build/tests"
 3 | 
 4 | for file in `find $TEST_DIR -perm -g=x -type f`
 5 | do
 6 | 	echo "running $file"
 7 | 	$file
 8 | done
 9 | 
10 | 


--------------------------------------------------------------------------------
/tests/statistics_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "StatisticsTest.hpp"
 3 | #include "lib/tpunit++.hpp"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main()
 8 | {
 9 | 	StatisticsTest __StatisticsTestTest;
10 | 
11 | 	/**
12 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
13 | 	 * all tests are successful, otherwise returns the number
14 | 	 * of failing assertions.
15 | 	 */
16 | 	return tpunit::Tests::Run();
17 | 
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/tests/tokenizer_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "TokenizerTest.hpp"
 3 | #include "lib/tpunit++.hpp"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main()
 8 | {
 9 | 	TokenizerTest __TokenizerTest;
10 | 	
11 | 	/**
12 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
13 | 	 * all tests are successful, otherwise returns the number
14 | 	 * of failing assertions.
15 | 	 */
16 | 	return tpunit::Tests::Run();
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/xml_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "XmlTest.hpp"
 3 | #include "lib/tpunit++.hpp"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main()
 8 | {
 9 | 	XmlTest __XmlTest;
10 | 
11 | 	/**
12 | 	 * Run all of the registered tpunit++ tests. Returns 0 if
13 | 	 * all tests are successful, otherwise returns the number
14 | 	 * of failing assertions.
15 | 	 */
16 | 	return tpunit::Tests::Run();
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/unicode/UnicodeUtils.cpp:
--------------------------------------------------------------------------------
 1 | /////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2009-2011 Alan Wright. All rights reserved.
 3 | // Distributable under the terms of either the Apache License (Version 2.0)
 4 | // or the GNU Lesser General Public License.
 5 | /////////////////////////////////////////////////////////////////////////////
 6 | 
 7 | #include "LuceneInc.h"
 8 | #include "MiscUtils.h"
 9 | #include "UnicodeUtils.h"
10 | #include "unicode/guniprop.h"
11 | 
12 | namespace Lucene
13 | {
14 |     UnicodeUtil::~UnicodeUtil()
15 |     {
16 |     }
17 |         
18 |     bool UnicodeUtil::isAlnum(wchar_t c)
19 |     {
20 |         return g_unichar_isalnum(c);
21 |     }
22 |     
23 |     bool UnicodeUtil::isAlpha(wchar_t c)
24 |     {
25 |         return g_unichar_isalpha(c);
26 |     }
27 |     
28 |     bool UnicodeUtil::isDigit(wchar_t c)
29 |     {
30 |         return g_unichar_isdigit(c);
31 |     }
32 |     
33 |     bool UnicodeUtil::isSpace(wchar_t c)
34 |     {
35 |         return g_unichar_isspace(c);
36 |     }
37 |     
38 |     bool UnicodeUtil::isUpper(wchar_t c)
39 |     {
40 |         return g_unichar_isupper(c);
41 |     }
42 |     
43 |     bool UnicodeUtil::isLower(wchar_t c)
44 |     {
45 |         return g_unichar_islower(c);
46 |     }
47 |     
48 |     bool UnicodeUtil::isOther(wchar_t c)
49 |     {
50 |         return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER);
51 |     }
52 |     
53 |     bool UnicodeUtil::isNonSpacing(wchar_t c)
54 |     {
55 |         return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK);
56 |     }
57 |     
58 |     wchar_t UnicodeUtil::toUpper(wchar_t c)
59 |     {
60 |         return (wchar_t)g_unichar_toupper(c);
61 |     }
62 |     
63 |     wchar_t UnicodeUtil::toLower(wchar_t c)
64 |     {
65 |         return (wchar_t)g_unichar_tolower(c);
66 |     }
67 |     
68 |     UTF8Result::~UTF8Result()
69 |     {
70 |     }
71 |     
72 |     UnicodeResult::~UnicodeResult()
73 |     {
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/varint/BasicSet.h:
--------------------------------------------------------------------------------
  1 | #ifndef BASIC_SET_H__
  2 | #define BASIC_SET_H__
  3 | 
  4 | #include "Set.h"
  5 | #include <set>
  6 | #include <limits>
  7 | 
  8 | using namespace std;
  9 | 
 10 | class BasicSet;
 11 | 
 12 | class BasicSet : public Set
 13 | {
 14 | 
 15 | 	public:
 16 | 
 17 | 		class Iterator : public Set::Iterator
 18 | 		{
 19 | 			private:
 20 | 				set<unsigned int>::iterator cursor;
 21 | 				// parent
 22 | 				const BasicSet* set;
 23 | 
 24 | 				bool init = false;
 25 | 
 26 | 			public:
 27 | 
 28 | 				Iterator(const BasicSet* parentSet);
 29 | 				Iterator(const BasicSet::Iterator& other);
 30 | 
 31 | 				BasicSet::Iterator& operator=(const BasicSet::Iterator& rhs);
 32 | 				~Iterator();
 33 | 
 34 | 			    unsigned int docID();
 35 | 			    unsigned int nextDoc();
 36 | 				unsigned int Advance(unsigned int target);
 37 | 
 38 | 		};
 39 | 
 40 | 	private:
 41 | 
 42 | 		const BasicSet& operator=(const BasicSet& other);
 43 | 
 44 | 		set<unsigned int> docs;
 45 | 
 46 | 	public:
 47 | 
 48 | 		BasicSet(const BasicSet& other);
 49 | 
 50 | 		/**
 51 | 		 * Swap the content of this bitmap with another bitmap.
 52 | 		 * No copying is done. (Running time complexity is constant.)
 53 | 		 */
 54 | 		void swap(BasicSet & x);
 55 | 
 56 | 
 57 | 		BasicSet();
 58 | 
 59 | 		~BasicSet();
 60 | 
 61 | 		void write(ostream & out);
 62 | 
 63 | 		void read(istream & in);
 64 | 
 65 | 		shared_ptr<Set::Iterator> iterator() const;
 66 | 
 67 | 
 68 | 		/**
 69 | 		 * Add document to this set
 70 | 		 * Note that you must set the bits in increasing order:
 71 | 		 * addDoc(1), addDoc(2) is ok;
 72 | 		 * addDoc(2), addDoc(1) is not ok.
 73 | 		 */
 74 | 		void addDoc(unsigned int docId);
 75 | 
 76 | 		void addDocs(unsigned int* docids,size_t start,size_t len);
 77 | 
 78 | 		BasicSet unorderedAdd(unsigned int docId);
 79 | 
 80 | 		void removeDocId(unsigned int docId);
 81 | 
 82 | 		BasicSet removeDoc(unsigned int docId);
 83 | 
 84 | 		void compact();
 85 | 
 86 | 		void initSet();
 87 | 
 88 | 		/**
 89 | 		 * Gets the number of ids in the set
 90 | 		 * @return docset size
 91 | 		 */
 92 | 		unsigned int size() const;
 93 | 
 94 | 		//This method will not work after a call to flush()
 95 | 		bool find(unsigned int target) const;
 96 | 
 97 | };
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/varint/BasicSetFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef BASIC_SET_FACTORY_H__
 2 | #define BASIC_SET_FACTORY_H__
 3 | 
 4 | #include "Set.h"
 5 | #include "BasicSet.h"
 6 | #include "ISetFactory.h"
 7 | #include <memory>
 8 | 
 9 | class BasicSetFactory : public ISetFactory 
10 | {
11 | 
12 | 	public:
13 | 	
14 | 		virtual const shared_ptr<Set> createSparseSet()
15 | 		{
16 | 			return make_shared<BasicSet>();
17 | 		}
18 | 	
19 | };
20 | 
21 | #endif 
22 | 


--------------------------------------------------------------------------------
/varint/Codec.h:
--------------------------------------------------------------------------------
 1 | #ifndef CODEC_H__
 2 | #define CODEC_H__
 3 | #include "bitpacking/compositecodec.h"
 4 | #include "bitpacking/simdbinarypacking.h"
 5 | #include "bitpacking/variablebyte.h"
 6 | #include "Source.h"
 7 | #include "Sink.h"
 8 | #include "CompressedDeltaChunk.h"
 9 | #include "bitpacking/util.h"
10 | using namespace std;
11 | 
12 | class Codec {
13 | private:
14 | 	CompositeCodec<SIMDBinaryPacking,VariableByte> codec;
15 | 
16 |  public:
17 |     Codec(){
18 |     }
19 |     
20 |     
21 |     ~Codec(){
22 |     }
23 | 
24 |     /**
25 |      * @return the compressed size in bytes
26 |      */
27 |     template<typename srctype>
28 |     __inline__ shared_ptr<CompressedDeltaChunk> Compress(const srctype src, size_t srcSize) const {
29 | 	   assert(!needPaddingTo128Bits(src));
30 |      size_t nbyte = codec.compressedSize((const uint32_t *)src, (sizeof(*src)*srcSize) / 4);
31 |      //shared_ptr<CompressedDeltaChunk> compblock(new CompressedDeltaChunk((sizeof(*src)*(srcSize+ 2048))));
32 |      shared_ptr<CompressedDeltaChunk> compblock(new CompressedDeltaChunk(nbyte));
33 | 
34 | 	   vector<uint8,cacheallocator>& v = compblock->getVector();
35 | 	   assert(!needPaddingTo128Bits(&v[0]));
36 | 	   size_t memavailable = v.size()/4;
37 | 	   codec.encodeArray((const uint32_t *)src, (sizeof(*src)*srcSize) / 4,(uint32_t *)&v[0], memavailable);
38 |      //compblock->resize(memavailable*4);
39 |      return compblock;
40 |     }
41 |     
42 |     size_t Uncompress(Source& src, unsigned int* dst,size_t size) const  {
43 |        assert(!needPaddingTo128Bits(dst));
44 |     
45 |     
46 |        size_t sourceSize;
47 |        const uint8* srcptr = src.Peek(&sourceSize);
48 |        const uint32_t* srcptr2= (const uint32_t*)srcptr;
49 |        assert(!needPaddingTo128Bits(srcptr2));
50 |        size_t memavailable = size;
51 |        codec.decodeArray(srcptr2, sourceSize/4,dst,memavailable);
52 |        return memavailable*4;
53 |     }
54 | 
55 |     bool findInArray(const unsigned int* array, size_t size,unsigned int target) const {       
56 |        for(unsigned int idx = 0; idx<size; ++idx){
57 |          unsigned int lastId = array[idx];
58 |          if (lastId >= target)
59 |             return (lastId == target);
60 |        }
61 |        return false;
62 |     }
63 | 
64 | };
65 | 
66 | #endif  // CODEC_H__
67 | 


--------------------------------------------------------------------------------
/varint/Common.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H__
 2 | #define COMMON_H__
 3 | #include <stdint.h>
 4 | #include <memory>
 5 | #include <limits>
 6 | 
 7 | #ifdef HAVE_BUILTIN_EXPECT
 8 | #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
 9 | #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
10 | #else
11 | #define PREDICT_FALSE(x) x
12 | #define PREDICT_TRUE(x) x
13 | #endif
14 | 
15 | #ifdef _MSC_VER
16 | typedef unsigned __int8  uint8;
17 | #else
18 | typedef uint8_t  uint8;
19 | #endif
20 | typedef char v16qi __attribute__ ((vector_size (16)));
21 | 
22 | #endif  // COMMON_H__
23 | 


--------------------------------------------------------------------------------
/varint/CompressedDeltaChunk.cpp:
--------------------------------------------------------------------------------
 1 | #include "CompressedDeltaChunk.h"
 2 | #include <vector>
 3 | #include "Common.h"
 4 | #include "Sink.h"
 5 | #include "Source.h"
 6 | #include "bitpacking/memutil.h"
 7 | 
 8 |  template <class T>
 9 |  __attribute__ ((const))
10 |  bool needPaddingTo128Bits(const T * inbyte) {
11 |      return reinterpret_cast<uintptr_t> (inbyte) & 15;
12 |  }
13 |     CompressedDeltaChunk::CompressedDeltaChunk(){
14 | 		assert(!needPaddingTo128Bits(&data_[0]));
15 |         compressedSize_ = 0; 
16 |     }
17 | 
18 |     CompressedDeltaChunk::CompressedDeltaChunk(size_t compressedSize):data_(compressedSize){
19 |         compressedSize_ = compressedSize;
20 |         assert(!needPaddingTo128Bits(&data_[0]));
21 |     }
22 |     
23 |     CompressedDeltaChunk::CompressedDeltaChunk(istream & in) :compressedSize_(0) {
24 |         in.read((char*)&compressedSize_,4);
25 | 		data_.resize(compressedSize_);
26 |         in.read((char*)&(data_[0]),compressedSize_);
27 |         assert(!needPaddingTo128Bits(&data_[0]));
28 |     }
29 | 
30 |     void CompressedDeltaChunk::resize(size_t newsize){
31 | 		compressedSize_ = newsize;
32 | 		data_.resize(newsize);
33 | 	    vector<uint8,cacheallocator> tmp(data_);
34 | 	    std::swap(data_, tmp);
35 | 	    assert(!needPaddingTo128Bits(&data_[0]));
36 |     }
37 | 
38 |     vector<uint8,cacheallocator>& CompressedDeltaChunk::getVector(){
39 | 		return data_;
40 |     }
41 | 
42 |     CompressedDeltaChunk::~CompressedDeltaChunk(){
43 |     }
44 |     
45 |     
46 |     size_t CompressedDeltaChunk::getCompressedSize(){
47 |         return compressedSize_;
48 |     }
49 |     
50 |     Sink CompressedDeltaChunk::getSink(){
51 |         return Sink((char*)&(data_[0]),compressedSize_);
52 |     }
53 |     
54 |     Source CompressedDeltaChunk::getSource() const {
55 |         return Source((char*)&(data_[0]),compressedSize_);
56 |     }
57 |     
58 |     void CompressedDeltaChunk::write(ostream & out) const{
59 |         out.write((char*)&compressedSize_,4);
60 |         out.write((char*)&(data_[0]),compressedSize_);
61 |     }


--------------------------------------------------------------------------------
/varint/CompressedDeltaChunk.h:
--------------------------------------------------------------------------------
 1 | #ifndef  COMPRESSED_DELTA_CHUNK_H__
 2 | #define  COMPRESSED_DELTA_CHUNK_H__
 3 | #include <vector>
 4 | #include "Common.h"
 5 | #include "Sink.h"
 6 | #include "Source.h"
 7 | #include "bitpacking/memutil.h"
 8 | class CompressedDeltaChunk {
 9 | private:
10 |     vector<uint8,cacheallocator> data_;
11 |     size_t compressedSize_;
12 |     //disable copy constructor
13 |     CompressedDeltaChunk(const CompressedDeltaChunk& other);
14 |     CompressedDeltaChunk& operator=(const CompressedDeltaChunk& other);
15 | public: 
16 | 	CompressedDeltaChunk();
17 | 	CompressedDeltaChunk(size_t compressedSize);
18 | 	CompressedDeltaChunk(istream & in);
19 | 	void resize(size_t newsize);
20 | 	vector<uint8,cacheallocator>& getVector();
21 | 	~CompressedDeltaChunk();
22 | 	size_t getCompressedSize();
23 | 	Sink getSink();
24 | 	Source getSource() const;
25 | 	void write(ostream & out) const;
26 | 	
27 |     inline void swap(CompressedDeltaChunk & x) throw (){
28 | 	  using std::swap;
29 |       swap(this->data_, x.data_);
30 |       swap(this->compressedSize_, x.compressedSize_);
31 |     }
32 | 	friend void swap(CompressedDeltaChunk& lhs, CompressedDeltaChunk& rhs) noexcept
33 |     {
34 |        lhs.swap(rhs);
35 |     }
36 | }__attribute__ ((aligned (256)));
37 | #endif // COMPRESSED_DELTA_CHUNK_H__


--------------------------------------------------------------------------------
/varint/DeltaChunkStore.h:
--------------------------------------------------------------------------------
 1 | #ifndef  DELTA_CHUNK_STORE_H__
 2 | #define  DELTA_CHUNK_STORE_H__
 3 | #include <vector>
 4 | #include <iostream>
 5 | #include <unordered_map>
 6 | #include <memory>
 7 | #include "CompressedDeltaChunk.h"
 8 | using namespace std;
 9 | 
10 | class DeltaChunkStore {
11 |   vector<shared_ptr<CompressedDeltaChunk> > data2;
12 | public:  
13 |   DeltaChunkStore(){
14 |   }
15 | 
16 |   shared_ptr<CompressedDeltaChunk> allocateBlock(size_t compressedSize){
17 |     shared_ptr<CompressedDeltaChunk> compblock(new CompressedDeltaChunk(compressedSize));
18 |     return compblock;
19 |   }
20 | 
21 |   void add(const shared_ptr<CompressedDeltaChunk>& val) {
22 |     data2.push_back(val);
23 |   }
24 | 
25 |   const CompressedDeltaChunk& get(int index) const  {
26 |     return *data2[index];
27 |   }
28 | 
29 |   void compact(){
30 | 	if (data2.size() != data2.capacity()) {
31 |         vector<shared_ptr<CompressedDeltaChunk> > tmp = data2;
32 |         std::swap(data2, tmp);
33 |     }
34 |   }
35 | 
36 |   size_t size() const {
37 |     return  data2.size();
38 |   }
39 | 
40 |   int getSerialIntNum() const {
41 |     int num = 1; // _len
42 |     for(size_t i=0; i<data2.size(); i++)
43 |     {
44 |         num += 1 + (*data2[i]).getCompressedSize(); // 1 is the int to record the length of the array
45 |     }
46 |     return num;
47 |   }
48 | 
49 |   void write(ostream & out) const{
50 |     int size = data2.size();
51 |     out.write((char*)&size,4);
52 | 
53 |     for(size_t i=0; i<data2.size(); i++)
54 |     {
55 |         (*data2[i]).write(out);
56 |     }
57 |   }
58 | 
59 |   void read(istream & in){
60 |     int size = 0;
61 |     in.read((char*)&size,4);
62 |     data2.clear();
63 |     for(int i = 0; i<size; i++){
64 |       shared_ptr<CompressedDeltaChunk> compblock(new CompressedDeltaChunk(in));
65 |       data2.push_back(compblock);
66 |     }
67 |   }
68 | 
69 |   inline void swap(DeltaChunkStore & x)throw (){ // No throw exception guarantee
70 |       using std::swap;
71 |       swap(this->data2, x.data2);
72 |   }
73 | 
74 |   friend void swap(DeltaChunkStore& lhs, DeltaChunkStore& rhs) noexcept
75 |   {
76 |      lhs.swap(rhs);
77 |   }
78 | 
79 | };
80 | #endif // DELTA_CHUNK_STORE_H__


--------------------------------------------------------------------------------
/varint/ISetFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef ISET_FACTORY_H__
 2 | #define ISET_FACTORY_H__
 3 | 
 4 | #include "Set.h"
 5 | #include <memory>
 6 | 
 7 | class ISetFactory 
 8 | {
 9 | 
10 | 	public:
11 | 	
12 | 		virtual ~ISetFactory()
13 | 		{
14 | 			
15 | 		}
16 | 		
17 | 		virtual const std::shared_ptr<Set> createSparseSet() = 0;
18 | 	
19 | };
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/varint/LazyAndSet.h:
--------------------------------------------------------------------------------
 1 | #ifndef LAZY_AND_SET_H__
 2 | #define LAZY_AND_SET_H__
 3 | #include "CompressedSet.h"
 4 | 
 5 | class LazyAndSet;
 6 | 
 7 | class LazyAndSetIterator : public Set::Iterator {
 8 | private:
 9 | 	unsigned lastReturn; 
10 | 	vector<shared_ptr<Set::Iterator> > iterators;
11 | 	const LazyAndSet& set;
12 | public:
13 | 	LazyAndSetIterator(const LazyAndSet* parent);
14 | 	unsigned int docID();
15 | 	unsigned int nextDoc();
16 | 	unsigned int Advance(unsigned int target);
17 | };
18 | 
19 | class LazyAndSet : public Set {
20 |  public:
21 | 	vector<shared_ptr<Set> > sets_;
22 | 	int nonNullSize;
23 | 	mutable unsigned int setSize;
24 | 	mutable bool init = false;
25 | 	LazyAndSet();
26 | 	
27 | 	LazyAndSet(vector<shared_ptr<Set> >& sets);
28 | 	
29 | 	LazyAndSet(shared_ptr<Set> & left,shared_ptr<Set> & right);
30 | 
31 | 	inline bool find(unsigned int val) const;
32 | 	
33 | 	unsigned int size() const ;
34 | 
35 |     shared_ptr<Set::Iterator> iterator() const;
36 | 	
37 | };
38 | 
39 | #endif  // LAZY_AND_SET_H__
40 | 


--------------------------------------------------------------------------------
/varint/LazyOrSet.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Implementation of the union set of multiple DocIdSets (which essentially is a merged set of thes DocIdSets). 
 3 |  */
 4 | #ifndef LAZY_OR_SET_H__
 5 | #define LAZY_OR_SET_H__
 6 | 
 7 | #include "CompressedSet.h"
 8 | 
 9 | class LazyOrSetIterator : public Set::Iterator {
10 | 	private:
11 | 		class Item {
12 | 		  public:
13 | 			shared_ptr<Set::Iterator> iter;
14 | 			unsigned int doc;
15 | 			Item(shared_ptr<Set::Iterator> it){
16 | 				iter = it;
17 | 				doc = 0;
18 | 			}	
19 | 		};
20 | 		unsigned _curDoc;
21 | 		vector<shared_ptr<Item>> _heap;
22 | 		int _size;
23 | 		void heapRemoveRoot();
24 | 		void heapAdjust();
25 | 	public:
26 | 		LazyOrSetIterator(vector<shared_ptr<Set>> sets);
27 | 		unsigned int docID();
28 | 		unsigned int nextDoc();
29 | 		unsigned int Advance(unsigned int target);
30 | };
31 | 
32 | class LazyOrSet : public Set 
33 | {
34 | 	private:
35 | 		const int INVALID = -1;
36 | 		vector<shared_ptr<Set>> sets;
37 | 		mutable int _size = INVALID;
38 | 
39 | 	public:
40 | 	
41 | 		LazyOrSet(vector<shared_ptr<Set>> docSets);
42 | 		LazyOrSet(shared_ptr<Set> & left,shared_ptr<Set> & right);
43 | 		shared_ptr<Set::Iterator>  iterator()  const;
44 | 	
45 | 		//Override
46 | 		unsigned int size() const;
47 | 		
48 | 		bool find(unsigned int val) const;
49 | };
50 | 	
51 | 
52 | #endif  //LAZY_OR_SET_H__


--------------------------------------------------------------------------------
/varint/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # $@ name of the target
 3 | # $^ name of all prerequisites with duplicates removed
 4 | # $< name of the first prerequisite
 5 | 
 6 | OSNAME  := $(shell uname -s)
 7 | CFLAGS_SIMPLE =  -std=gnu++0x -g -Wall -pedantic
 8 | #CFLAGS_SIMPLE = -flto -std=gnu++0x -g0 -Wall -Ofast -funroll-loops -pedantic
 9 | BUILD_DIR = ../build
10 | 
11 | LIBOBJECTS = $(BUILD_DIR)/varint_CompressedSet.o $(BUILD_DIR)/varint_CompressedDeltaChunk.o  $(BUILD_DIR)/varint_bitpacksimd.o $(BUILD_DIR)/varint_util.o $(BUILD_DIR)/varint_LazyAndSet.o $(BUILD_DIR)/varint_LazyOrSet.o $(BUILD_DIR)/varint_BasicSet.o
12 | 
13 | HEADERS = 
14 | 
15 | 
16 | ifeq ($(OSNAME), Darwin)
17 | CFLAGS = $(CFLAGS_SIMPLE)
18 | LIBS = 
19 | endif
20 | 
21 | ifeq ($(OSNAME), Linux)
22 | CFLAGS = $(CFLAGS_SIMPLE) -msse2
23 | LIBS = 
24 | endif
25 | 
26 | # MINGW
27 | ifneq (,$(findstring MINGW,$(OSNAME)))
28 | CFLAGS = $(CFLAGS_SIMPLE)
29 | LIBS = 
30 | endif 
31 | 
32 | # Cygwin
33 | ifneq (,$(findstring /cygdrive/,$(PATH)))
34 | CFLAGS = $(CFLAGS_SIMPLE) -msse2
35 | LIBS = 
36 | endif
37 | 
38 | 
39 | clean :
40 | 	rm -f $(LIBOBJECTS) *.o $(BUILD_DIR)/CompressedSet.a
41 | 
42 | all: CompressedSet.a
43 | 
44 | CompressedSet.a : $(LIBOBJECTS)
45 | 	$(AR) -rs $(BUILD_DIR)/$@ $(LIBOBJECTS)
46 | 
47 | 
48 | ##
49 | # Classes
50 | ##
51 | 
52 | $(BUILD_DIR)/varint_CompressedSet.o : CompressedSet.cpp 
53 | 	$(CXX) -c $(CFLAGS) $< -o $@
54 | 
55 | $(BUILD_DIR)/varint_BasicSet.o : BasicSet.cpp 
56 | 	$(CXX) -c $(CFLAGS) $< -o $@
57 | 	
58 | $(BUILD_DIR)/varint_CompressedDeltaChunk.o : CompressedDeltaChunk.cpp 
59 | 	$(CXX) -c $(CFLAGS) $< -o $@
60 | 	
61 | $(BUILD_DIR)/varint_bitpacksimd.o : bitpacking/bitpacksimd.cpp 
62 | 	$(CXX) -c $(CFLAGS) $< -o $@
63 | 
64 | $(BUILD_DIR)/varint_bitpacksimd.o : bitpacking/simdintegratedbitpacking.c
65 | 	$(CXX) -c $(CFLAGS) $< -o $@
66 | 			
67 | $(BUILD_DIR)/varint_util.o : bitpacking/util.cpp 
68 | 	$(CXX) -c $(CFLAGS) $< -o $@	
69 | 
70 | $(BUILD_DIR)/varint_LazyAndSet.o : LazyAndSet.cpp
71 | 	$(CXX) -c $(CFLAGS) $< -o $@	
72 | 
73 | $(BUILD_DIR)/varint_LazyOrSet.o : LazyOrSet.cpp
74 | 	$(CXX) -c $(CFLAGS) $< -o $@	
75 | 


--------------------------------------------------------------------------------
/varint/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/README


--------------------------------------------------------------------------------
/varint/Set.h:
--------------------------------------------------------------------------------
 1 | #ifndef ABSTRACT_SET_H__
 2 | #define ABSTRACT_SET_H__
 3 | 
 4 | #include <memory>
 5 | #include <limits>
 6 | #include <iostream>
 7 | const unsigned int NO_MORE_DOCS = std::numeric_limits<unsigned int>::max();
 8 | 
 9 | class Set
10 | {
11 | public:
12 | 	class Iterator {
13 | 		public:
14 | 		virtual unsigned int docID() = 0;
15 | 		//Calling nextDoc past end should have no effect
16 | 		virtual unsigned int nextDoc() = 0;
17 | 		// very efficient Advance method implemented with Skip list
18 | 		virtual unsigned int Advance(unsigned int target) = 0;
19 | 		virtual ~Iterator() {};
20 | 	};
21 | 	virtual std::shared_ptr<Iterator> iterator()  const = 0;
22 | 
23 | 	virtual void addDocs(unsigned int* docids,size_t start,size_t len) { throw -101; };
24 | 	virtual void addDoc(unsigned int docId) { throw -102; };
25 | 	virtual void removeDocId(unsigned int docId) { throw -103; };
26 | 	// Free up unused memory in dynamic collection
27 | 	virtual void compact() { throw -104; };
28 | 
29 | 	virtual bool find(unsigned int target) const = 0;
30 | 
31 | 	virtual void write(std::ostream & out) { throw -105; };
32 | 	virtual void read(std::istream & in) { throw -106; };
33 | 	// virtual void size(std::istream & in) = 0;
34 | 
35 | 	virtual unsigned int size() const = 0;
36 | 
37 | 	virtual ~Set() {}
38 | 
39 | 	bool equals(std::shared_ptr<Set> other){
40 |         std::shared_ptr<Set::Iterator> it1 = this->iterator();
41 |         std::shared_ptr<Set::Iterator> it2 = other->iterator();
42 |         while( it1->nextDoc() != NO_MORE_DOCS ){
43 |         	if (it2->nextDoc() == NO_MORE_DOCS){
44 |         		return false;
45 |         	}
46 |             if (it1->docID() != it2->docID())
47 |                 return false;
48 |         }
49 |         return NO_MORE_DOCS == it2->nextDoc();
50 |     }
51 | };
52 | #endif  // ABSTRACT_SET_H__
53 | 


--------------------------------------------------------------------------------
/varint/SetFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef SET_FACTORY_H__
 2 | #define SET_FACTORY_H__
 3 | 
 4 | #include "Set.h"
 5 | #include "CompressedSet.h"
 6 | #include "ISetFactory.h"
 7 | #include <memory>
 8 | 
 9 | class SetFactory : public ISetFactory 
10 | {
11 | 
12 | public:
13 | 	
14 | 	virtual const shared_ptr<Set> createSparseSet()
15 | 	{
16 | 		return make_shared<CompressedSet>();
17 | 	}
18 | 	
19 | };
20 | 
21 | #endif //SET_FACTORY_H__
22 | 


--------------------------------------------------------------------------------
/varint/SliceInput.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/SliceInput.h


--------------------------------------------------------------------------------
/varint/SliceOutput.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/SliceOutput.h


--------------------------------------------------------------------------------
/varint/bitpacking/bitpacksimd.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This is code is released under the
 3 |  * Apache License Version 2.0 http://www.apache.org/licenses/.
 4 |  *
 5 |  * (c) Leonid Boytsov
 6 |  */
 7 | 
 8 | #ifndef BITPACKSIMD_H
 9 | #define BITPACKSIMD_H
10 | 
11 | #include "common.h"
12 | 
13 | void simdpack(const uint32_t * __restrict__ in,__m128i * __restrict__ out, uint32_t bit);
14 | void simdpackwithoutmask(const uint32_t * __restrict__ in,__m128i * __restrict__ out, uint32_t bit);
15 | void simdunpack(const __m128i * __restrict__ in,uint32_t * __restrict__ out, uint32_t bit);
16 | 
17 | void SIMD_fastunpack_32(const __m128i *  __restrict__ in, uint32_t *  __restrict__  out, const uint32_t bit) ;
18 | void SIMD_fastpackwithoutmask_32(const uint32_t *  __restrict__ in, __m128i *  __restrict__  out, const uint32_t bit);
19 | void SIMD_fastpack_32(const uint32_t *  __restrict__ in, __m128i *  __restrict__  out, const uint32_t bit) ;
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/varint/bitpacking/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef BITPACK_COMMON_H__
 2 | #define BITPACK_COMMON_H__
 3 | #include <sys/types.h>
 4 | #include <errno.h>
 5 | #include <string.h>
 6 | #include <cstdlib> 
 7 | #include <stdexcept>
 8 | #include <vector>
 9 | #include <iostream>
10 | #include <sstream>
11 | #include <cassert>
12 | #include <immintrin.h>
13 | #endif  // PACKING_COMMON_H__
14 | 


--------------------------------------------------------------------------------
/varint/bitpacking/simdcomputil.c:
--------------------------------------------------------------------------------
 1 | #include "simdcomputil.h"
 2 | 
 3 | __attribute__((always_inline))
 4 | static inline __m128i Delta(__m128i curr, __m128i prev) {
 5 |     return _mm_sub_epi32(curr,
 6 |             _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12)));
 7 | }
 8 | 
 9 | 
10 | // returns the integer logarithm of v (bit width)
11 | uint32_t bits(const uint32_t v) {
12 | #ifdef _MSC_VER
13 |     if (v == 0) {
14 |         return 0;
15 |     }
16 |     unsigned long answer;
17 |     _BitScanReverse(&answer, v);
18 |     return answer + 1;
19 | #else
20 |     return v == 0 ? 0 : 32 - __builtin_clz(v); // assume GCC-like compiler if not microsoft
21 | #endif
22 | }
23 | 
24 | __attribute__ ((pure))
25 | uint32_t maxbits(const uint32_t * begin) {
26 |     uint32_t accumulator = 0;
27 |     for (const uint32_t * k = begin; k != begin + SIMDBlockSize; ++k) {
28 |         accumulator |= *k;
29 |     }
30 |     return bits(accumulator);
31 | }
32 | 
33 | static uint32_t maxbitas32int(const __m128i accumulator) {
34 |     uint32_t tmparray[4];
35 |     _mm_storeu_si128((__m128i *) (tmparray), accumulator);
36 |     return bits(tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3]);
37 | }
38 | 
39 | 
40 | // maxbit over 128 integers (SIMDBlockSize) with provided initial value
41 | uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
42 |     __m128i  initoffset = _mm_set1_epi32 (initvalue);
43 |     const __m128i* pin = (const __m128i*)(in);
44 |     __m128i newvec = _mm_load_si128(pin);
45 |     __m128i accumulator = Delta(newvec , initoffset);
46 |     __m128i oldvec = newvec;
47 |     for(uint32_t k = 1; 4*k < SIMDBlockSize; ++k) {
48 |         newvec = _mm_load_si128(pin+k);
49 |         accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
50 |         oldvec = newvec;
51 |     }
52 |     initoffset = oldvec;
53 |     return maxbitas32int(accumulator);
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/varint/bitpacking/simdcomputil.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This code is released under a BSD License.
 3 |  */
 4 | 
 5 | #ifndef SIMDCOMPUTIL_H_
 6 | #define SIMDCOMPUTIL_H_
 7 | 
 8 | #include <emmintrin.h>// SSE2 is required
 9 | #include <stdint.h> // use a C99-compliant compiler, please
10 | 
11 | 
12 | 
13 | 
14 | // returns the integer logarithm of v (bit width)
15 | uint32_t bits(const uint32_t v);
16 | 
17 | // max integer logarithm over a range
18 | uint32_t maxbits(const uint32_t * begin);
19 | 
20 | enum{ SIMDBlockSize = 128};
21 | 
22 | // like maxbit over 128 integers (SIMDBlockSize) with provided initial value
23 | // and using differential coding
24 | uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in);
25 | 
26 | 
27 | 
28 | 
29 | #endif /* SIMDCOMPUTIL_H_ */
30 | 


--------------------------------------------------------------------------------
/varint/bitpacking/simdintegratedbitpacking.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This code is released under a BSD License.
 3 |  */
 4 | 
 5 | #ifndef SIMD_INTEGRATED_BITPACKING_H
 6 | #define SIMD_INTEGRATED_BITPACKING_H
 7 | 
 8 | #include <emmintrin.h>// SSE2 is required
 9 | #include <stdint.h> // use a C99-compliant compiler, please
10 | 
11 | #include "simdcomputil.h"
12 | //reads 128 values in in, writes  bit values from out
13 | void simdpackd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, uint32_t bit);
14 | //reads 128 values in in, writes  bit values from out
15 | void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, uint32_t bit);
16 | //reads bit values in in, writes  128 values to out
17 | void simdunpackd1(uint32_t initvalue, const __m128i *  in,uint32_t *  out, uint32_t bit);
18 | 
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/varint/bitpacking/util.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This is code is released under the
 3 |  * Apache License Version 2.0 http://www.apache.org/licenses/.
 4 |  *
 5 |  * (c) Daniel Lemire, http://lemire.me/en/
 6 |  * and Owen Kaser
 7 |  */
 8 | 
 9 | #ifndef BITPACK_UTIL
10 | #define BITPACK_UTIL
11 | #include "common.h"
12 | #include <iostream> // for std::cerr 
13 | 
14 | using namespace std;
15 | 
16 | // taken from stackoverflow
17 | #ifndef NDEBUG
18 | #   define ASSERT(condition, message) \
19 |     do { \
20 |         if (! (condition)) { \
21 |             std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \
22 |                       << " line " << __LINE__ << ": " << message << std::endl; \
23 |             std::exit(EXIT_FAILURE); \
24 |         } \
25 |     } while (false)
26 | #else
27 | #   define ASSERT(condition, message) do { } while (false)
28 | #endif
29 | 
30 |  __attribute__ ((const))
31 |  bool divisibleby(size_t a, uint32_t x) {
32 |      return (a % x == 0);
33 |  }
34 | 
35 |  void checkifdivisibleby(size_t a, uint32_t x) {
36 |      if (!divisibleby(a, x)) {
37 |          ostringstream convert;
38 |          convert << a << " not divisible by " << x;
39 |          throw logic_error(convert.str());
40 |      }
41 |  }
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/varint/bitpacking/util.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This is code is released under the
 3 |  * Apache License Version 2.0 http://www.apache.org/licenses/.
 4 |  *
 5 |  * (c) Daniel Lemire, http://lemire.me/en/
 6 |  * and Owen Kaser
 7 |  */
 8 | 
 9 | #ifndef BITPACK_UTIL
10 | #define BITPACK_UTIL
11 | #include "common.h"
12 | #include <iostream> // for std::cerr 
13 | 
14 | using namespace std;
15 | 
16 | // taken from stackoverflow
17 | #ifndef NDEBUG
18 | #   define ASSERT(condition, message) \
19 |     do { \
20 |         if (! (condition)) { \
21 |             std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \
22 |                       << " line " << __LINE__ << ": " << message << std::endl; \
23 |             std::exit(EXIT_FAILURE); \
24 |         } \
25 |     } while (false)
26 | #else
27 | #   define ASSERT(condition, message) do { } while (false)
28 | #endif
29 | 
30 |  __attribute__ ((const))
31 | bool divisibleby(size_t a, uint32_t x);
32 | 
33 | void checkifdivisibleby(size_t a, uint32_t x);
34 | 
35 |  template <class T>
36 |  __attribute__ ((const))
37 |  bool needPaddingTo128Bits(const T * inbyte) {
38 |      return reinterpret_cast<uintptr_t> (inbyte) & 15;
39 |  }
40 | 
41 |  __attribute__ ((const))
42 |  constexpr uint32_t gccbits(const uint32_t v) {
43 |      return v == 0 ? 0 : 32 - __builtin_clz(v);
44 |  }
45 | 
46 |  template<class iterator>
47 |  __attribute__ ((pure))
48 |  uint32_t maxbits(const iterator & begin, const iterator & end) {
49 |      uint32_t accumulator = 0;
50 |      for (iterator k = begin; k != end; ++k) {
51 |          accumulator |= *k;
52 |      }
53 |      return gccbits(accumulator);
54 |  }
55 | 
56 |  // For VariableByte codec
57 |  template <class T>
58 |  __attribute__ ((const))
59 |  bool needPaddingTo32Bits(const T * inbyte) {
60 |      return reinterpret_cast<uintptr_t> (inbyte) & 3;
61 |  }
62 | 
63 |  template <class T>
64 |  __attribute__ ((const))
65 |  T * padTo32bits(T * inbyte) {
66 |      return reinterpret_cast< T *> ((reinterpret_cast<uintptr_t> (inbyte)
67 |              + 3) & ~3);
68 |  }
69 | #endif
70 | 


--------------------------------------------------------------------------------