├── .gitignore ├── FAQ ├── INSTALL ├── README.md ├── api ├── load.py ├── python │ ├── .gitignore │ ├── test.py │ ├── test_search.py │ └── zsearch.py └── readme.txt ├── buildall.sh ├── data ├── .gitignore ├── document01.xml ├── document02.xml ├── document03.xml ├── document04.xml ├── input01.txt ├── input02.txt ├── load │ ├── badinput.txt │ ├── document_10035.txt │ ├── document_10161.txt │ ├── document_10303.txt │ ├── document_11629.txt │ ├── document_11674.txt │ ├── document_12090.txt │ ├── document_13116.txt │ ├── document_14043.txt │ ├── document_14919.txt │ ├── document_1641.txt │ ├── document_17756.txt │ ├── document_19019.txt │ ├── document_19029.txt │ ├── document_20482.txt │ ├── document_21.txt │ ├── document_21505.txt │ ├── document_21675.txt │ ├── document_21676.txt │ ├── document_21699.txt │ ├── document_21726.txt │ ├── document_22737.txt │ ├── document_22813.txt │ ├── document_22920.txt │ ├── document_22983.txt │ ├── document_23228.txt │ ├── document_23513.txt │ ├── document_24307.txt │ ├── document_24467.txt │ ├── document_24862.txt │ ├── document_25842.txt │ ├── document_26056.txt │ ├── document_26734.txt │ ├── document_27501.txt │ ├── document_27927.txt │ ├── document_27949.txt │ ├── document_2807.txt │ ├── document_28140.txt │ ├── document_28891.txt │ ├── document_28936.txt │ ├── document_30201.txt │ ├── document_30388.txt │ ├── document_31127.txt │ ├── document_31288.txt │ ├── document_3142.txt │ ├── document_31971.txt │ ├── document_32395.txt │ ├── document_32427.txt │ ├── document_33192.txt │ ├── document_33417.txt │ ├── document_33870.txt │ ├── document_34257.txt │ ├── document_35761.txt │ ├── document_3605.txt │ ├── document_36521.txt │ ├── document_36750.txt │ ├── document_37773.txt │ ├── document_37824.txt │ ├── document_38116.txt │ ├── document_38164.txt │ ├── document_38169.txt │ ├── document_3890.txt │ ├── document_39592.txt │ ├── document_39998.txt │ ├── document_4010.txt │ ├── document_42772.txt │ ├── document_42845.txt │ ├── document_44040.txt │ ├── document_4481.txt │ ├── document_4487.txt │ ├── document_44885.txt │ ├── document_45024.txt │ ├── document_45415.txt │ ├── document_45516.txt │ ├── document_46330.txt │ ├── document_46648.txt │ ├── document_4709.txt │ ├── document_47156.txt │ ├── document_47474.txt │ ├── document_4802.txt │ ├── document_48177.txt │ ├── document_48329.txt │ ├── document_49683.txt │ ├── document_49819.txt │ ├── document_50736.txt │ ├── document_51033.txt │ ├── document_51679.txt │ ├── document_51756.txt │ ├── document_53100.txt │ ├── document_5454.txt │ ├── document_5861.txt │ ├── document_5872.txt │ ├── document_591.txt │ ├── document_6355.txt │ ├── document_7403.txt │ ├── document_750.txt │ ├── document_8215.txt │ ├── document_8495.txt │ ├── document_9403.txt │ ├── document_9566.txt │ └── document_9802.txt ├── lorem_ipsum.txt └── readme.txt ├── docroot └── post.htm ├── leveldb ├── AUTHORS ├── LICENSE ├── Makefile ├── NEWS ├── README ├── TODO ├── build_detect_platform ├── db │ ├── autocompact_test.cc │ ├── builder.cc │ ├── builder.h │ ├── c.cc │ ├── c_test.c │ ├── corruption_test.cc │ ├── db_bench.cc │ ├── db_impl.cc │ ├── db_impl.h │ ├── db_iter.cc │ ├── db_iter.h │ ├── db_test.cc │ ├── dbformat.cc │ ├── dbformat.h │ ├── dbformat_test.cc │ ├── filename.cc │ ├── filename.h │ ├── filename_test.cc │ ├── leveldb_main.cc │ ├── log_format.h │ ├── log_reader.cc │ ├── log_reader.h │ ├── log_test.cc │ ├── log_writer.cc │ ├── log_writer.h │ ├── memtable.cc │ ├── memtable.h │ ├── repair.cc │ ├── skiplist.h │ ├── skiplist_test.cc │ ├── snapshot.h │ ├── table_cache.cc │ ├── table_cache.h │ ├── version_edit.cc │ ├── version_edit.h │ ├── version_edit_test.cc │ ├── version_set.cc │ ├── version_set.h │ ├── version_set_test.cc │ ├── write_batch.cc │ ├── write_batch_internal.h │ └── write_batch_test.cc ├── doc │ ├── bench │ │ ├── db_bench_sqlite3.cc │ │ └── db_bench_tree_db.cc │ ├── benchmark.html │ ├── doc.css │ ├── impl.html │ ├── index.html │ ├── log_format.txt │ └── table_format.txt ├── helpers │ └── memenv │ │ ├── memenv.cc │ │ ├── memenv.h │ │ └── memenv_test.cc ├── include │ └── leveldb │ │ ├── c.h │ │ ├── cache.h │ │ ├── comparator.h │ │ ├── db.h │ │ ├── env.h │ │ ├── filter_policy.h │ │ ├── iterator.h │ │ ├── options.h │ │ ├── slice.h │ │ ├── status.h │ │ ├── table.h │ │ ├── table_builder.h │ │ └── write_batch.h ├── issues │ ├── issue178_test.cc │ └── issue200_test.cc ├── port │ ├── README │ ├── atomic_pointer.h │ ├── port.h │ ├── port_example.h │ ├── port_posix.cc │ ├── port_posix.h │ ├── thread_annotations.h │ └── win │ │ └── stdint.h ├── table │ ├── block.cc │ ├── block.h │ ├── block_builder.cc │ ├── block_builder.h │ ├── filter_block.cc │ ├── filter_block.h │ ├── filter_block_test.cc │ ├── format.cc │ ├── format.h │ ├── iterator.cc │ ├── iterator_wrapper.h │ ├── merger.cc │ ├── merger.h │ ├── table.cc │ ├── table_builder.cc │ ├── table_test.cc │ ├── two_level_iterator.cc │ └── two_level_iterator.h └── util │ ├── arena.cc │ ├── arena.h │ ├── arena_test.cc │ ├── bloom.cc │ ├── bloom_test.cc │ ├── cache.cc │ ├── cache_test.cc │ ├── coding.cc │ ├── coding.h │ ├── coding_test.cc │ ├── comparator.cc │ ├── crc32c.cc │ ├── crc32c.h │ ├── crc32c_test.cc │ ├── env.cc │ ├── env_posix.cc │ ├── env_test.cc │ ├── filter_policy.cc │ ├── hash.cc │ ├── hash.h │ ├── histogram.cc │ ├── histogram.h │ ├── logging.cc │ ├── logging.h │ ├── mutexlock.h │ ├── options.cc │ ├── posix_logger.h │ ├── random.h │ ├── status.cc │ ├── testharness.cc │ ├── testharness.h │ ├── testutil.cc │ └── testutil.h ├── lib ├── EWAHBoolArray │ ├── .DS_Store │ ├── CHANGELOG │ ├── README │ ├── doxyconfig.txt │ ├── example.cpp │ ├── headers │ │ ├── boolarray.h │ │ ├── ewah.h │ │ ├── ewahutil.h │ │ └── runninglengthword.h │ ├── makefile │ ├── src │ │ ├── benchmark.cpp │ │ └── unit.cpp │ └── unit ├── rapidxml-1.13 │ ├── document01.xml │ ├── license.txt │ ├── manual.html │ ├── rapidxml.hpp │ ├── rapidxml_iterators.hpp │ ├── rapidxml_print.hpp │ ├── rapidxml_utils.hpp │ └── test_rapidxml.cpp └── tpunit++.hpp ├── misc ├── Makefile ├── example_compressedset.cpp └── example_leveldb.cpp ├── src ├── .gitignore ├── Constants.hpp ├── DocumentImpl.hpp ├── DocumentKVStore.hpp ├── Engine.hpp ├── EngineDataKVStore.hpp ├── Field.cpp ├── Field.h ├── FieldKVStore.hpp ├── IDocument.h ├── IInvertedIndex.h ├── IKVStore.h ├── ITokenizer.h ├── InvertedIndexBatch.hpp ├── InvertedIndexImpl.hpp ├── InvertedIndexSimpleBatch.hpp ├── KVStoreInMemory.hpp ├── KVStoreLevelDb.hpp ├── LRUCache.hpp ├── Makefile ├── NameSpaceKVStore.hpp ├── SparseSet.hpp ├── Statistics.hpp ├── TokenizerImpl.cpp ├── TokenizerImpl.h ├── TokenizerImpl.hpp ├── WordIndexKVStore.hpp ├── ZException.hpp ├── ZUtil.hpp ├── atomic_pointer.h ├── bloom_filter.hpp ├── cedarpp.h ├── engine_simple_main.cpp ├── server.cpp └── trie │ ├── EntropyTrie.hpp │ ├── bit_access.hpp │ ├── bit_vector.cpp │ ├── bit_vector.hpp │ ├── block_info.hpp │ ├── exp_golomb.hpp │ ├── huffman.hpp │ ├── main.cpp │ ├── sign_interleave.hpp │ └── trie.hpp ├── tests ├── BasicSetTest.cpp ├── CompressedSetTests.cpp ├── CompressedSet_test.cpp ├── DocumentImplTest.hpp ├── InvertedIndexImplTest.hpp ├── InvertedIndexSimpleBatchTest.hpp ├── Makefile ├── SparseSet_test.cpp ├── StatisticsTest.hpp ├── TestUtils.hpp ├── TokenizerTest.hpp ├── XmlTest.hpp ├── document_test.cpp ├── inverted_index_test.cpp ├── memory_leak_test.cpp ├── runTests.sh ├── statistics_test.cpp ├── thread_test.cpp ├── tokenizer_test.cpp └── xml_test.cpp ├── unicode ├── UTF8Stream.cpp ├── UnicodeUtils.cpp └── unicode │ ├── gunichartables.h │ ├── guniprop.cpp │ └── guniprop.h └── varint ├── BasicSet.cpp ├── BasicSet.h ├── BasicSetFactory.h ├── Codec.h ├── CollectionHelper.h ├── Common.h ├── CompressedDeltaChunk.cpp ├── CompressedDeltaChunk.h ├── CompressedSet.cpp ├── CompressedSet.h ├── ConsiseSet.hpp ├── DeltaChunkStore.h ├── ISetFactory.h ├── LazyAndNotSet.h ├── LazyAndSet.cpp ├── LazyAndSet.h ├── LazyOrSet.cpp ├── LazyOrSet.h ├── Makefile ├── README ├── Set.h ├── SetFactory.h ├── Sink.h ├── SliceInput.h ├── SliceOutput.h ├── Source.h ├── bitpacking ├── bitpacksimd.cpp ├── bitpacksimd.h ├── codecs.h ├── common.h ├── compositecodec.h ├── memutil.h ├── simdbinarypacking.h ├── simdcomputil.c ├── simdcomputil.h ├── simdintegratedbitpacking.c ├── simdintegratedbitpacking.h ├── util.cpp ├── util.h └── variablebyte.h └── slice.h /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | 3 | simplehttpserver 4 | 5 | build 6 | bin 7 | 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.so.* 17 | *.dylib 18 | *.dll 19 | 20 | # Compiled Static libraries 21 | *.lai 22 | *.la 23 | *.a 24 | *.lib -------------------------------------------------------------------------------- /FAQ: -------------------------------------------------------------------------------- 1 | Plain-text Frequently-Asked-Questions document for the project. 2 | --------------------------------------------------------------- 3 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Configuration, build, and installation instructions. 2 | ---------------------------------------------------- 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **zsearch** A high performance search engine 2 | 3 | Low Data fragmentation and good random write performance by using levelDB Log Structured Merge Trees. High performance query speed by using CompressedBitmap to store DocumentIds in an InvertedIndex interface provided by a simple libEvent2 http server. 4 | 5 | ## Project Design 6 | 7 | Engine 8 | -> tokenizer 9 | -> documentStore 10 | -> invertedIndex 11 | -> KVStore 12 | -> InMemory 13 | -> LevelDb 14 | -> setFactory 15 | -> setFactory 16 | -> wordIndex 17 | 18 | ## Project Organization 19 | 20 | TODO 21 | 22 | ## Contact 23 | 24 | - [Homepage] (http://victorparmar.github.com/zsearch/) 25 | - Victor at victorparmar@gmail.com 26 | - Maxime at maximecaron@gmail.com 27 | 28 | ## Dependencies 29 | 30 | All of the following: 31 | 32 | - [g++] (http://gcc.gnu.org/) >= 4.7.2 33 | - [libevent2] (http://libevent.org/) >= 2.0.19 34 | - [Python](http://python.org/) >= 2.7 35 | 36 | ## Cloning and Running 37 | 38 | You can clone this repo and simply execute: 39 | 40 | git clone git://github.com/victorparmar/zsearch.git 41 | cd zsearch 42 | ./buildall.sh 43 | ./build/server ./docroot 44 | 45 | ## License 46 | 47 | - Mozilla 48 | 49 | 50 | -------------------------------------------------------------------------------- /api/load.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import codecs 4 | import httplib, urllib 5 | import os, shutil 6 | 7 | from xml.sax.saxutils import escape 8 | 9 | def printText(txt): 10 | lines = txt.split('\n') 11 | for line in lines: 12 | print line.strip() 13 | 14 | 15 | inputs = list() 16 | 17 | httpServ = httplib.HTTPConnection("localhost", 8080) 18 | httpServ.connect() 19 | 20 | path = '../data/load/' 21 | 22 | listing = os.listdir(path) 23 | 24 | for file in listing: 25 | 26 | filename = os.path.join(path, file) 27 | 28 | if (os.path.isfile(filename)): 29 | 30 | #if (filename != '../data/load/document_42772.txt'): 31 | # continue 32 | 33 | print filename 34 | # f = codecs.open(filename, "r", "utf-8") 35 | linestring = open(filename, "r").read() 36 | 37 | # print linestring 38 | 39 | # params = urllib.urlencode({'data': escape(linestring)}) 40 | params = urllib.urlencode({'data' : linestring}) 41 | params = params.encode('utf-8') 42 | # params = {'data' : linestring} 43 | httpServ.request('POST', '/index', params) 44 | # inputs.append(linestring) 45 | 46 | response = httpServ.getresponse() 47 | 48 | # if response.status == httplib.OK: 49 | print "Output from POST request" 50 | printText (response.read()) 51 | 52 | 53 | # print "loaded " + str(len(inputs)) + " files" 54 | 55 | httpServ.close() 56 | 57 | -------------------------------------------------------------------------------- /api/python/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | -------------------------------------------------------------------------------- /api/python/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os, shutil 5 | from zsearch import zsearch 6 | 7 | z = zsearch("http://localhost:8080") 8 | print z 9 | 10 | try: 11 | 12 | print " get document id 1" 13 | d = z.getDocument(1) 14 | 15 | print " print document contents" 16 | for key, value in d.iteritems(): 17 | print key 18 | print value 19 | 20 | except Exception as err: 21 | print err 22 | 23 | data = {'input' : 'victor & parmar É'} 24 | print " add document", data 25 | docId = z.addDocument(data) 26 | print " returned docId:" + docId 27 | 28 | print " retrieve inserted document" 29 | d = z.getDocument(docId) 30 | 31 | print " print document contents" 32 | for key, value in d.iteritems(): 33 | print key 34 | print value 35 | 36 | 37 | data = {'input' : 'snoop < dawg', 'complex' : '72.8/km²'} 38 | print " add document", data 39 | docId = z.addDocument(data) 40 | print " returned docId:" + docId 41 | 42 | print " retrieve inserted document" 43 | d = z.getDocument(docId) 44 | 45 | print " print document contents" 46 | for key, value in d.iteritems(): 47 | print key 48 | print value 49 | 50 | 51 | 52 | print " search for 'some'" 53 | docIds = z.search("some") 54 | 55 | print " returned docIds:" 56 | for docId in docIds: 57 | print docId 58 | 59 | try: 60 | print " try getting invalid documentId (-1)" 61 | d = z.getDocument(-1) 62 | except Exception as err: 63 | print err 64 | -------------------------------------------------------------------------------- /api/python/test_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os, shutil 5 | from zsearch import zsearch 6 | 7 | z = zsearch("http://localhost:8080") 8 | print z 9 | 10 | print " search for 'some'" 11 | docIds = z.search("some") 12 | 13 | print " returned docIds:" 14 | for docId in docIds: 15 | print docId 16 | 17 | print " search for 'some' with start and offset" 18 | docIds = z.search("some", 0, 3) 19 | 20 | print " returned docIds:" 21 | for docId in docIds: 22 | print docId 23 | 24 | print " bad start" 25 | try: 26 | z.search("some", -1, 5) 27 | except Exception, e: 28 | print e 29 | 30 | 31 | print " bad offset" 32 | try: 33 | z.search("some", 0, 'snoop') 34 | except Exception, e: 35 | print e 36 | 37 | print " xml escape (api) 4 | -> wrap within xml frame (api) 5 | -> html escape (api) 6 | -> post (api) 7 | -> html unescape (server) 8 | -> build document and unescape (DocumentImpl / rapidxml) 9 | -> store 10 | -> query 11 | -> xml escape (DocumentImpl) 12 | -> return 13 | 14 | -------------------------------------------------------------------------------- /buildall.sh: -------------------------------------------------------------------------------- 1 | rm -rf ./build 2 | make -C ./leveldb/ clean 3 | 4 | mkdir build 5 | mkdir build/tests 6 | make -C ./varint/ all 7 | make -C ./leveldb/ all 8 | make -C ./src/ all 9 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | big.txt 2 | tweet.txt 3 | tweet.txt.bz2 4 | 5 | -------------------------------------------------------------------------------- /data/document01.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Input document 4 | some text 5 | some more text 6 | 7 | -------------------------------------------------------------------------------- /data/document02.xml: -------------------------------------------------------------------------------- 1 | 2 | document02 3 | some text 4 | 17 | 18 | -------------------------------------------------------------------------------- /data/document03.xml: -------------------------------------------------------------------------------- 1 | 2 | document03 3 | some text 4 | Kingdom Hearts (キングダム ハーツ, Kingudamu Hātsu?) est une série de jeux vidéo d'action-RPG développée et éditée par Square Enix, qui marque l'association entre Disney Interactive Studios et l'univers des jeux de Square sous la direction de Tetsuya Nomura. Kingdom Hearts est donc un cross-over entre plusieurs personnages de Square et l'univers Disney qui a lieu dans un monde parallèle créé spécialement pour la série. Dans chacun des opus, les voix des personnages de Disney sont interprétées par les mêmes célébrités que dans leur œuvre d'origine. 5 | area1 Les personnages de la franchise de Square Enix Final Fantasy font eux aussi des apparitions et interagissent avec le joueur et avec les personnages de Disney. La série tourne autour de la recherche des amis de Sora, le personnage principal, et de ses rencontres avec les personnages de Disney et de Final Fantasy au travers de différents mondes. 6 | 7 | -------------------------------------------------------------------------------- /data/document04.xml: -------------------------------------------------------------------------------- 1 | some < text72.8/km²Input document 2 | -------------------------------------------------------------------------------- /data/input01.txt: -------------------------------------------------------------------------------- 1 | input1 some text 2 | input2 some more text 3 | input3 xxx 4 | -------------------------------------------------------------------------------- /data/input02.txt: -------------------------------------------------------------------------------- 1 | each line corresponds to a document 2 | with the first word of each line corresponding to the field 3 | please ensure that there is no newline at the end of this file, thank you 4 | input1 some text 5 | input1 some more text 6 | area1 Kingdom Hearts (キングダム ハーツ, Kingudamu Hātsu?) est une série de jeux vidéo d'action-RPG développée et éditée par Square Enix, qui marque l'association entre Disney Interactive Studios et l'univers des jeux de Square sous la direction de Tetsuya Nomura. Kingdom Hearts est donc un cross-over entre plusieurs personnages de Square et l'univers Disney qui a lieu dans un monde parallèle créé spécialement pour la série. Dans chacun des opus, les voix des personnages de Disney sont interprétées par les mêmes célébrités que dans leur œuvre d'origine. 7 | area1 Les personnages de la franchise de Square Enix Final Fantasy font eux aussi des apparitions et interagissent avec le joueur et avec les personnages de Disney. La série tourne autour de la recherche des amis de Sora, le personnage principal, et de ses rencontres avec les personnages de Disney et de Final Fantasy au travers de différents mondes. 8 | area1 En 2012, la série compte sept épisodes sortis sur différentes consoles de jeux vidéo, mais de futurs opus sont programmés. Même si chaque titre ne reçoit pas le même succès, la plupart des jeux de la série sont acclamés par les critiques et reçoivent un important succès commercial. En mars 2011, la série Kingdom Hearts s'est vendue à plus de 17 millions d'exemplaires à travers le monde. Une grande variété de produits dérivés est sortie, comprenant les bandes originales, des figurines, guides de jeux, romans et une série en manga. 9 | area1 de 10 | area1 de 11 | area1 de 12 | area1 de 13 | area1 de 14 | area1 de 15 | area1 de 16 | area1 de 17 | area1 de 18 | area1 de 19 | area1 de 20 | area1 de 21 | area1 de 22 | area1 de 23 | area1 de 24 | area1 de 25 | area1 de 26 | area1 de 27 | area1 de 28 | area1 de 29 | area1 de 30 | area1 de 31 | area1 de 32 | area1 de 33 | area1 de 34 | area1 de 35 | area1 de 36 | -------------------------------------------------------------------------------- /data/load/badinput.txt: -------------------------------------------------------------------------------- 1 | blahblah 2 | 3 | -------------------------------------------------------------------------------- /data/load/document_17756.txt: -------------------------------------------------------------------------------- 1 | 2 | Lower_Canada 3 | 4 | 5 | The Province of Lower Canada (French: Province du Bas-Canada) was a [[British colonization of the Americas|British colony]] on the lower [[Saint Lawrence River]] and the shores of the [[Gulf of Saint Lawrence]] (1791–1841). It covered the southern portion of the modern-day Province of [[Quebec]], [[Canada]], and the [[Labrador]] region of the modern-day Province of [[Newfoundland and Labrador]]. 6 | 7 | 8 | The Province of Lower Canada was created by the [[Constitutional Act of 1791]] from the partition of the British colony of the [[Province of Quebec (1763–1791)|Province of Quebec]] (1763–1791) into the Province of Lower Canada and the Province of [[Upper Canada]]. 9 | 10 | Lower Canada consisted of part of former [[French colonization of the Americas|French colony]] of [[New France]], populated mainly by French Canadians, which was ceded to Great Britain after that empire's victory in the [[Seven Years' War]], also called the [[French and Indian Wars]] in the United States. Other parts of New France ceded to Britain became the Colonies of [[Nova Scotia]], [[New Brunswick]] and [[Prince Edward Island]]. 11 | 12 | Like Upper Canada, there was political unrest and a rebellion challenged the [[United Kingdom|British]] rule of the predominantly [[French people|French]] population. After the [[Lower Canada Rebellion|Patriote Rebellion]] was crushed by the British army and [[Loyal (Lower Canada)|Loyal]] volunteers, the 1791 Constitution was suspended on March 27, 1838 and a [[Special Council of Lower Canada|special council]] was appointed to administer the colony. 13 | 14 | The provinces of Lower Canada and Upper Canada were combined as the United Province of Canada in 1841, when the [[Act of Union (1840)|The Union Act]] came into force. Their separate legislatures were combined into a single parliament with equal representation for both constituent parts, even if Lower Canada had more population. 15 | 16 | [[File:Constitution-of-lower-canada.png|thumb|left|Constitution of Lower Canada in 1791]]The Province of Lower Canada inherited the mixed set of French and English institutions that existed in the Province of Quebec during the 1763–1791 period and which continued to exist later in Canada-East (1841–1867) and ultimately in the current Province of Quebec (1867–). 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_35761.txt: -------------------------------------------------------------------------------- 1 | 2 | Blue_Ridge,_Alabama 3 | 4 | Blue Ridge is a [[census-designated place]] (CDP) in [[Elmore County, Alabama|Elmore County]], [[Alabama]], [[United States]]. At the 2000 census the population was 1,331. It is part of the [[Montgomery, Alabama|Montgomery]] [[Montgomery Metropolitan Area|Metropolitan Statistical Area]]. 5 | 6 | Blue Ridge is located at (32.486744, -86.190823). 7 | 8 | According to the [[U.S. Census Bureau]], the CDP has a total area of 7.9 square miles (20.5 km²), of which, 7.9 square miles (20.4 km²) of it is land and 0.1 square miles (0.2 km²) of it (0.88%) is water. 9 | 10 | As of the [[census]] of 2000, there were 1,331 people, 511 households, and 436 families residing in the CDP. The [[population density]] was 169.3 people per square mile (65.4/km²). There were 526 housing units at an average density of 66.9/sq mi (25.8/km²). The racial makeup of the CDP was 95.79% [[Race (United States Census)|White]], 3.08% [[Race (United States Census)|Black]] or [[Race (United States Census)|African American]], 0.38% [[Race (United States Census)|Native American]], 0.30% [[Race (United States Census)|Asian]], 0.08% [[Race (United States Census)|Pacific Islander]], and 0.38% from two or more races. 0.53% of the population were [[Race (United States Census)|Hispanic]] or [[Race (United States Census)|Latino]] of any race. 11 | 12 | There were 511 households out of which 30.7% had children under the age of 18 living with them, 0.0% were [[Marriage|married couples]] living together, 6.5% had a female householder with no husband present, and 14.5% were non-families. 13.1% of all households were made up of individuals and 5.3% had someone living alone who was 65 years of age or older. The average household size was 2.60 and the average family size was 2.84. 13 | 14 | In the CDP the population was spread out with 22.2% under the age of 18, 4.7% from 18 to 24, 21.0% from 25 to 44, 37.6% from 45 to 64, and 14.6% who were 65 years of age or older. The median age was 46 years. For every 100 females there were 101.4 males. For every 100 females age 18 and over, there were 94.4 males. 15 | 16 | The median income for a household in the CDP was $73,162, and the median income for a family was $83,320. Males had a median income of $60,625 versus $37,875 for females. The [[per capita income]] for the CDP was $32,774. None of the families and 1.1% of the population were living below the [[poverty line]], including no under eighteens and none of those over 64. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_39592.txt: -------------------------------------------------------------------------------- 1 | 2 | Cypress_Quarters,_Florida 3 | 4 | 5 | Cypress Quarters is a [[census-designated place]] (CDP) in [[Okeechobee County, Florida|Okeechobee County]], [[Florida]], [[United States]]. The population was 1,150 at the 2000 census. 6 | 7 | Cypress Quarters is located at . 8 | 9 | According to the [[United States Census Bureau]], the CDP has a total area of 2.7 square miles (7.0 km²), all of it land. 10 | 11 | As of the [[census]] of 2000, there were 1,150 people, 420 households, and 291 families residing in the CDP. The [[population density]] was 425.0 people per square mile (163.8/km²). There were 469 housing units at an average density of 173.3/sq mi (66.8/km²). The racial makeup of the CDP was 29.22% [[White (U.S. Census)|White]], 65.83% [[African American (U.S. Census)|African American]], 0.61% [[Native American (U.S. Census)|Native American]], 0.43% [[Asian (U.S. Census)|Asian]], 3.04% from [[Race (United States Census)|other races]], and 0.87% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 5.48% of the population. 12 | 13 | There were 420 households out of which 29.3% had children under the age of 18 living with them, 40.2% were [[Marriage|married couples]] living together, 24.5% had a female householder with no husband present, and 30.7% were non-families. 25.5% of all households were made up of individuals and 9.5% had someone living alone who was 65 years of age or older. The average household size was 2.72 and the average family size was 3.26. 14 | 15 | In the CDP the population was spread out with 30.7% under the age of 18, 9.6% from 18 to 24, 22.6% from 25 to 44, 24.1% from 45 to 64, and 13.0% who were 65 years of age or older. The median age was 33 years. For every 100 females there were 94.3 males. For every 100 females age 18 and over, there were 89.3 males. 16 | 17 | The median income for a household in the CDP was $29,565, and the median income for a family was $38,125. Males had a median income of $31,103 versus $17,411 for females. The [[per capita income]] for the CDP was $13,046. About 25.4% of families and 30.5% of the population were below the [[poverty line]], including 32.1% of those under age 18 and 16.7% of those age 65 or over. 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /data/load/document_42772.txt: -------------------------------------------------------------------------------- 1 | 2 | Somers,_Iowa 3 | 4 | Somers is a city in [[Calhoun County, Iowa|Calhoun County]], [[Iowa]], [[United States]]. The population was 165 at the 2000 census. 5 | 6 | Somers is located at (42.377902, -94.431013). 7 | 8 | According to the [[United States Census Bureau]], the city has a total area of 0.3 [[square mile]]s (0.9 [[km²]]), all of it land. 9 | 10 | As of the [[census]] of 2000, there were 165 people, 61 households, and 51 families residing in the city. The [[population density]] was 473.1 people per square mile (182.0/km²). There were 66 housing units at an average density of 189.2/sq mi (72.8/km²). The racial makeup of the city was 97.58% [[White (U.S. Census)|White]], 0.61% [[African American (U.S. Census)|African American]], 1.82% from [[Race (United States Census)|other races]]. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 3.03% of the population. 11 | 12 | There were 61 households out of which 27.9% had children under the age of 18 living with them, 67.2% were [[Marriage|married couples]] living together, 13.1% had a female householder with no husband present, and 14.8% were non-families. 9.8% of all households were made up of individuals and 4.9% had someone living alone who was 65 years of age or older. The average household size was 2.70 and the average family size was 2.92. 13 | 14 | In the city the population was spread out with 24.8% under the age of 18, 8.5% from 18 to 24, 24.8% from 25 to 44, 22.4% from 45 to 64, and 19.4% who were 65 years of age or older. The median age was 40 years. For every 100 females there were 103.7 males. For every 100 females age 18 and over, there were 96.8 males. 15 | 16 | The median income for a household in the city was $31,250, and the median income for a family was $43,750. Males had a median income of $32,083 versus $21,250 for females. The [[per capita income]] for the city was $15,777. About 11.5% of families and 15.9% of the population were below the [[poverty line]], including 33.3% of those under the age of eighteen and 16.0% of those sixty five or over. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_42845.txt: -------------------------------------------------------------------------------- 1 | 2 | Webb,_Iowa 3 | 4 | Webb is a city in [[Clay County, Iowa|Clay County]], [[Iowa]], [[United States]]. The population was 165 at the [[United States Census, 2000|2000 census]]. 5 | 6 | Webb is located at (42.948134, -95.012281). 7 | 8 | According to the [[United States Census Bureau]], the city has a total area of 0.5 square miles (1.3 km²), all of it land. 9 | 10 | As of the [[census]] of 2000, there were 165 people, 73 households, and 44 families residing in the city. The [[population density]] was 328.0 people per square mile (127.4/km²). There were 83 housing units at an average density of 165.0/sq mi (64.1/km²). The racial makeup of the city was 99.39% [[White (U.S. Census)|White]] and 0.61% [[Native American (U.S. Census)|Native American]]. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.61% of the population. 11 | 12 | There were 73 households out of which 26.0% had children under the age of 18 living with them, 50.7% were [[Marriage|married couples]] living together, 4.1% had a female householder with no husband present, and 38.4% were non-families. 34.2% of all households were made up of individuals and 20.5% had someone living alone who was 65 years of age or older. The average household size was 2.26 and the average family size was 2.84. 13 | 14 | In the city the population was spread out with 25.5% under the age of 18, 4.2% from 18 to 24, 24.2% from 25 to 44, 23.6% from 45 to 64, and 22.4% who were 65 years of age or older. The median age was 42 years. For every 100 females there were 96.4 males. For every 100 females age 18 and over, there were 86.4 males. 15 | 16 | The median income for a household in the city was $27,500, and the median income for a family was $31,500. Males had a median income of $30,000 versus $21,528 for females. The [[per capita income]] for the city was $13,087. About 10.4% of families and 9.8% of the population were below the [[poverty line]], including 18.8% of those under the age of eighteen and none of those sixty five or over. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_45024.txt: -------------------------------------------------------------------------------- 1 | 2 | Simsboro,_Louisiana 3 | 4 | 5 | Simsboro is a village in [[Lincoln Parish, Louisiana|Lincoln Parish]], [[Louisiana]], [[United States]]. The population was 684 at the [[2000 United States Census|2000 census]]. It is part of the [[Ruston, Louisiana|Ruston]] [[Ruston micropolitan area|Micropolitan Statistical Area]]. 6 | 7 | Simsboro is located at (32.534169, -92.786417). 8 | 9 | According to the [[United States Census Bureau]], the village has a total area of 9.0 km² (3.5 mi²), all land. 10 | 11 | As of the [[census]] of 2000, there were 684 people, 273 households, and 193 families residing in the village. The [[population density]] was 76.1/km² (197.2/mi²). There were 338 housing units at an average density of 37.6/km² (97.4/mi²). The racial makeup of the village was 77.05% [[White (U.S. Census)|White]], 21.49% [[African American (U.S. Census)|African American]], 0.73% [[Native American (U.S. Census)|Native American]], 0.44% from [[Race (United States Census)|other races]], and 0.29% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.73% of the population. 12 | 13 | There were 273 households out of which 32.2% had children under the age of 18 living with them, 52.0% were [[Marriage|married couples]] living together, 14.7% had a female householder with no husband present, and 29.3% were non-families. 23.4% of all households were made up of individuals and 8.1% had someone living alone who was 65 years of age or older. The average household size was 2.48 and the average family size was 2.93. 14 | 15 | In the village the population was spread out with 24.3% under the age of 18, 13.6% from 18 to 24, 28.4% from 25 to 44, 21.6% from 45 to 64, and 12.1% who were 65 years of age or older. The median age was 34 years. For every 100 females there were 93.8 males. For every 100 females age 18 and over, there were 98.5 males. 16 | 17 | The median income for a household in the village was $31,324, and the median income for a family was $35,417. Males had a median income of $31,111 versus $18,750 for females. The [[per capita income]] for the village was $14,465. About 14.4% of families and 18.1% of the population were below the [[poverty line]], including 23.0% of those under age 18 and none of those age 65 or over. 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /data/load/document_45516.txt: -------------------------------------------------------------------------------- 1 | 2 | Cornville,_Maine 3 | 4 | Cornville is a [[New England town|town]] in [[Somerset County, Maine|Somerset County]], [[Maine]], [[United States]]. The population was 1,208 at the [[2000 United States Census|2000 census]]. 5 | 6 | Settled in 1794, Cornville was incorporated on 24 February 1798 as the 116th town in Maine. 7 | 8 | According to the [[United States Census Bureau]], the town has a total area of 40.7 square miles (105.3 km²), of which, 40.5 square miles (104.8 km²) of it is land and 0.2 square miles (0.5 km²) of it (0.52%) is water. 9 | 10 | As of the [[census]] of 2000, there were 1,208 people, 449 households, and 352 families residing in the town. The [[population density]] was 29.9 people per square mile (11.5/km²). There were 515 housing units at an average density of 12.7/sq mi (4.9/km²). The racial makeup of the town was 99.01% [[White (U.S. Census)|White]], 0.08% [[African American (U.S. Census)|African American]], 0.17% [[Native American (U.S. Census)|Native American]], 0.25% [[Asian (U.S. Census)|Asian]], and 0.50% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.33% of the population. 11 | 12 | There were 449 households out of which 34.3% had children under the age of 18 living with them, 67.5% were [[Marriage|married couples]] living together, 6.7% had a female householder with no husband present, and 21.6% were non-families. 15.1% of all households were made up of individuals and 6.9% had someone living alone who was 65 years of age or older. The average household size was 2.69 and the average family size was 2.97. 13 | 14 | In the town the population was spread out with 25.2% under the age of 18, 7.6% from 18 to 24, 28.0% from 25 to 44, 27.8% from 45 to 64, and 11.3% who were 65 years of age or older. The median age was 39 years. For every 100 females there were 97.4 males. For every 100 females age 18 and over, there were 97.6 males. 15 | 16 | The median income for a household in the town was $38,015, and the median income for a family was $41,875. Males had a median income of $30,543 versus $22,083 for females. The [[per capita income]] for the town was $16,184. About 8.4% of families and 11.5% of the population were below the [[poverty line]], including 17.5% of those under age 18 and 11.3% of those age 65 or over. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_46648.txt: -------------------------------------------------------------------------------- 1 | 2 | Lincoln_Township,_Clare_County,_Michigan 3 | 4 | 5 | Lincoln Township is a [[civil township]] of [[Clare County, Michigan|Clare County]] in the [[U.S. state]] of [[Michigan]]. The population was 1,758 at the [[United States Census, 2000|2000 census]]. 6 | 7 | Lake George, Michigan|Lake George]] is an [[unincorporated community]] in the northwest part of the township. 8 | 9 | According to the [[United States Census Bureau]], the township has a total area of 35.9 [[square mile]]s (93.1 [[km²]]), of which, 35.2 square miles (91.2 km²) of it is land and 0.7 square miles (1.9 km²) of it (2.03%) is water. 10 | 11 | As of the [[census]] of 2000, there were 1,758 people, 766 households, and 537 families residing in the township. The [[population density]] was 49.9 per square mile (19.3/km²). There were 1,949 housing units at an average density of 55.4/sq mi (21.4/km²). The racial makeup of the township was 97.50% [[White (U.S. Census)|White]], 0.28% [[African American (U.S. Census)|African American]], 0.68% [[Native American (U.S. Census)|Native American]], 0.28% [[Asian (U.S. Census)|Asian]], 0.11% from [[Race (United States Census)|other races]], and 1.14% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 1.19% of the population. 12 | 13 | There were 766 households out of which 22.7% had children under the age of 18 living with them, 57.8% were [[Marriage|married couples]] living together, 8.4% had a female householder with no husband present, and 29.8% were non-families. 25.8% of all households were made up of individuals and 11.6% had someone living alone who was 65 years of age or older. The average household size was 2.30 and the average family size was 2.71. 14 | 15 | In the township the population was spread out with 20.4% under the age of 18, 5.1% from 18 to 24, 23.0% from 25 to 44, 31.6% from 45 to 64, and 19.9% who were 65 years of age or older. The median age was 46 years. For every 100 females there were 98.9 males. For every 100 females age 18 and over, there were 97.7 males. 16 | 17 | The median income for a household in the township was $32,279, and the median income for a family was $38,304. Males had a median income of $31,066 versus $22,000 for females. The [[per capita income]] for the township was $18,146. About 9.2% of families and 11.1% of the population were below the [[poverty line]], including 16.1% of those under age 18 and 5.5% of those age 65 or over. 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /data/load/document_4709.txt: -------------------------------------------------------------------------------- 1 | 2 | Foreign_relations_of_Georgia 3 | 4 | 5 | [[Georgia (country)|Georgia]]'s location, nestled between the [[Black Sea]], [[Russia]], and [[Turkey]], gives it strategic importance far beyond its size. It is developing as the gateway from the Black Sea to the [[Caucasus]] and the larger Caspian region, but also serves as a buffer between Russia and Turkey. Georgia has a long and close relationship with Russia, but it is reaching out to its other neighbors and looking to the West in search of alternatives and opportunities. It signed a partnership and cooperation agreement with the [[European Union]], participates in the [[Partnership for Peace]], and encourages foreign investment. [[France]], [[Germany]], the [[United Kingdom]],and the [[United States]] all have embassies in [[Tbilisi]]. 6 | 7 | Georgia is a member of the [[United Nations]], the [[Council of Europe]], and the [[Organization for Security and Co-operation in Europe|OSCE]] 8 | 9 | Because of its strategic location it is in both the [[Russia]]n and [[United States|American]] spheres of influence. In common with many ex-Soviet republics it is both influenced by and fearful of its larger neighbour. The invitation of US troops into the country caused tension with [[Moscow]]. The [[Russian government]] also believes that Georgia is being used by [[Chechnya|Chechen]] separatsts. The American government has interests in an oil pipeline passing through the country. Former president [[Eduard Shevardnadze]] attempted to balance these competing demands. The new leadership is much closer to the United States. 10 | 11 | Disputes - international: 12 | Georgia relationships with Russia are at it lowest point in modern history due to [[2006 Georgian-Russian espionage controversy|Georgian-Russian espionage controversy]] and due to the [[2008 South Ossetia war]], Georgia broke off diplomatic relations with Russia and has left the [[Commonwealth of Independent States]]. 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /data/load/document_47156.txt: -------------------------------------------------------------------------------- 1 | 2 | Newkirk_Township,_Michigan 3 | 4 | Newkirk Township is a [[civil township]] of [[Lake County, Michigan|Lake County]] in the [[U.S. state]] of [[Michigan]]. As of the [[United States Census, 2000|2000 census]], the population was 719. 5 | 6 | According to the [[United States Census Bureau]], the township has a total area of 72.8 square miles (188.6 km²), of which, 72.8 square miles (188.4 km²) of it is land and 0.1 square miles (0.2 km²) of it (0.11%) is water. 7 | 8 | As of the [[census]] of 2000, there were 719 people, 302 households, and 199 families residing in the township. The [[population density]] was 9.9 per square mile (3.8/km²). There were 767 housing units at an average density of 10.5/sq mi (4.1/km²). The racial makeup of the township was 93.74% [[White (U.S. Census)|White]], 0.14% [[African American (U.S. Census)|African American]], 2.64% [[Native American (U.S. Census)|Native American]], 0.14% [[Asian (U.S. Census)|Asian]], 0.42% from [[Race (United States Census)|other races]], and 2.92% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 2.09% of the population. 9 | 10 | There were 302 households out of which 25.8% had children under the age of 18 living with them, 53.0% were [[Marriage|married couples]] living together, 9.6% had a female householder with no husband present, and 34.1% were non-families. 30.1% of all households were made up of individuals and 14.2% had someone living alone who was 65 years of age or older. The average household size was 2.38 and the average family size was 2.90. 11 | 12 | In the township the population was spread out with 24.8% under the age of 18, 6.0% from 18 to 24, 24.1% from 25 to 44, 24.9% from 45 to 64, and 20.3% who were 65 years of age or older. The median age was 43 years. For every 100 females there were 99.2 males. For every 100 females age 18 and over, there were 105.7 males. 13 | 14 | The median income for a household in the township was $23,636, and the median income for a family was $26,932. Males had a median income of $25,893 versus $19,464 for females. The [[per capita income]] for the township was $11,432. Below the [[poverty line]] were 25.1% of people, 19.1% of families, 37.2% of those under 18 and 14.1% of those over 64. 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /data/load/document_47474.txt: -------------------------------------------------------------------------------- 1 | 2 | Avery_Township,_Michigan 3 | 4 | Avery Township is a [[civil township]] of [[Montmorency County, Michigan|Montmorency County]] in the [[U.S. state]] of [[Michigan]]. As of the [[United States Census, 2000|2000 census]], the township population was 717. 5 | 6 | According to the [[United States Census Bureau]], the township has a total area of 35.3 square miles (91.4 km²), of which, 35.0 square miles (90.7 km²) of it is land and 0.3 square miles (0.7 km²) of it (0.74%) is water. 7 | 8 | As of the [[census]] of 2000, there were 717 people, 309 households, and 218 families residing in the township. The [[population density]] was 20.5 per square mile (7.9/km²). There were 646 housing units at an average density of 18.4/sq mi (7.1/km²). The racial makeup of the township was 97.07% [[White (U.S. Census)|White]], 1.53% [[African American (U.S. Census)|African American]], 0.84% [[Native American (U.S. Census)|Native American]], and 0.56% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.42% of the population. 9 | 10 | There were 309 households out of which 22.0% had children under the age of 18 living with them, 59.5% were [[Marriage|married couples]] living together, 7.1% had a female householder with no husband present, and 29.4% were non-families. 23.9% of all households were made up of individuals and 14.6% had someone living alone who was 65 years of age or older. The average household size was 2.32 and the average family size was 2.73. 11 | 12 | In the township the population was spread out with 20.6% under the age of 18, 4.5% from 18 to 24, 20.9% from 25 to 44, 33.2% from 45 to 64, and 20.8% who were 65 years of age or older. The median age was 48 years. For every 100 females there were 97.0 males. For every 100 females age 18 and over, there were 93.5 males. 13 | 14 | The median income for a household in the township was $27,723, and the median income for a family was $28,261. Males had a median income of $24,464 versus $15,833 for females. The [[per capita income]] for the township was $14,677. About 11.0% of families and 17.4% of the population were below the [[poverty line]], including 33.1% of those under age 18 and 10.9% of those age 65 or over. 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /data/load/document_48177.txt: -------------------------------------------------------------------------------- 1 | 2 | Seavey_Township,_Aitkin_County,_Minnesota 3 | 4 | Seavey Township is a township in [[Aitkin County, Minnesota|Aitkin County]], [[Minnesota]], [[United States]]. The population was 64 as of the [[2000 United States Census|2000 census]]. 5 | 6 | According to the [[United States Census Bureau]], the township has a total area of 36.3 square miles (94.1 km²), all of it land. 7 | 8 | White Pine Township, Aitkin County, Minnesota|White Pine Township]] (northeast) 9 | Pliny Township, Aitkin County, Minnesota|Pliny Township]] (east) 10 | Williams Township, Aitkin County, Minnesota|Williams Township]] (southeast) 11 | Idun Township, Aitkin County, Minnesota|Idun Township]] (south) 12 | East Side Township, Mille Lacs County, Minnesota|East Side Township, Mille Lacs County]] (southwest) 13 | Lakeside Township, Aitkin County, Minnesota|Lakeside Township]] (west) 14 | Malmo Township, Aitkin County, Minnesota|Malmo Township]] (northwest) 15 | 16 | The township contains these two cemeteries: Holden and Ostlund. 17 | 18 | As of the [[census]] of 2000, there were 64 people, 31 households, and 16 families residing in the township. The [[population density]] was 1.8 people per square mile (0.7/km²). There were 79 housing units at an average density of 2.2/sq mi (0.8/km²). The racial makeup of the township was 100.00% [[White (U.S. Census)|White]]. 19 | 20 | There were 31 households out of which 16.1% had children under the age of 18 living with them, 41.9% were [[Marriage|married couples]] living together, and 45.2% were non-families. 45.2% of all households were made up of individuals and 12.9% had someone living alone who was 65 years of age or older. The average household size was 2.06 and the average family size was 2.88. 21 | 22 | In the township the population was spread out with 18.8% under the age of 18, 1.6% from 18 to 24, 29.7% from 25 to 44, 31.3% from 45 to 64, and 18.8% who were 65 years of age or older. The median age was 45 years. For every 100 females there were 137.0 males. For every 100 females age 18 and over, there were 136.4 males. 23 | 24 | The median income for a household in the township was $30,000, and the median income for a family was $31,250. Males had a median income of $44,375 versus $11,250 for females. The [[per capita income]] for the township was $24,582. There were 15.0% of families and 21.1% of the population living below the [[poverty line]], including 28.6% of under eighteens and 23.8% of those over 64. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /data/load/document_49683.txt: -------------------------------------------------------------------------------- 1 | 2 | Bluffton_Township,_Otter_Tail_County,_Minnesota 3 | 4 | Bluffton Township is a township in [[Otter Tail County, Minnesota|Otter Tail County]], [[Minnesota]], [[United States]]. The population was 474 at the 2000 census. 5 | 6 | According to the [[United States Census Bureau]], the township has a total area of 33.0 square miles (85.4 km²), of which, 33.0 square miles (85.4 km²) of it is land and 0.04 square miles (0.1 km²) of it (0.09%) is water. 7 | 8 | As of the [[census]] of 2000, there were 474 people, 168 households, and 131 families residing in the township. The [[population density]] was 14.4 people per square mile (5.6/km²). There were 180 housing units at an average density of 5.5/sq mi (2.1/km²). The racial makeup of the township was 99.16% [[White (U.S. Census)|White]], 0.21% [[Asian (U.S. Census)|Asian]], 0.21% from [[Race (United States Census)|other races]], and 0.42% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.42% of the population. 9 | 10 | There were 168 households out of which 39.3% had children under the age of 18 living with them, 67.3% were [[Marriage|married couples]] living together, 5.4% had a female householder with no husband present, and 22.0% were non-families. 16.1% of all households were made up of individuals and 7.7% had someone living alone who was 65 years of age or older. The average household size was 2.82 and the average family size was 3.19. 11 | 12 | In the township the population was spread out with 28.9% under the age of 18, 9.5% from 18 to 24, 24.7% from 25 to 44, 26.2% from 45 to 64, and 10.8% who were 65 years of age or older. The median age was 38 years. For every 100 females there were 111.6 males. For every 100 females age 18 and over, there were 114.6 males. 13 | 14 | The median income for a household in the township was $45,179, and the median income for a family was $48,611. Males had a median income of $28,897 versus $17,143 for females. The [[per capita income]] for the township was $18,379. About 5.8% of families and 8.3% of the population were below the [[poverty line]], including 13.3% of those under age 18 and 12.2% of those age 65 or over. 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /data/load/document_49819.txt: -------------------------------------------------------------------------------- 1 | 2 | Munch_Township,_Pine_County,_Minnesota 3 | 4 | Munch Township is a township in [[Pine County, Minnesota|Pine County]], [[Minnesota]], [[United States]]. The population was 222 at the 2000 census. 5 | 6 | According to the [[United States Census Bureau]], the township has a total area of 36.1 square miles (93.4 km²), of which, 35.5 square miles (91.9 km²) of it is land and 0.6 square miles (1.5 km²) of it (1.58%) is water. 7 | 8 | As of the [[census]] of 2000, there were 222 people, 97 households, and 65 families residing in the township. The [[population density]] was 6.3 people per square mile (2.4/km²). There were 168 housing units at an average density of 4.7/sq mi (1.8/km²). The racial makeup of the township was 96.85% [[White (U.S. Census)|White]], 1.80% [[Native American (U.S. Census)|Native American]], and 1.35% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.45% of the population. 9 | 10 | There were 97 households out of which 20.6% had children under the age of 18 living with them, 55.7% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 32.0% were non-families. 26.8% of all households were made up of individuals and 7.2% had someone living alone who was 65 years of age or older. The average household size was 2.29 and the average family size was 2.73. 11 | 12 | In the township the population was spread out with 18.9% under the age of 18, 5.0% from 18 to 24, 32.0% from 25 to 44, 28.8% from 45 to 64, and 15.3% who were 65 years of age or older. The median age was 41 years. For every 100 females there were 101.8 males. For every 100 females age 18 and over, there were 104.5 males. 13 | 14 | The median income for a household in the township was $34,375, and the median income for a family was $35,938. Males had a median income of $23,125 versus $22,917 for females. The [[per capita income]] for the township was $19,588. About 7.2% of families and 6.7% of the population were below the [[poverty line]], including 3.8% of those under the age of eighteen and none of those sixty five or over. 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /data/load/document_51033.txt: -------------------------------------------------------------------------------- 1 | 2 | Bosworth,_Missouri 3 | 4 | Bosworth is a city in [[Carroll County, Missouri|Carroll County]], [[Missouri]], [[United States]]. The population was 382 at the 2000 census. 5 | 6 | Bosworth is located at (39.470448, -93.334797). 7 | 8 | According to the [[United States Census Bureau]], the city has a total area of 0.6 square miles (1.4 km²), all of it land. 9 | 10 | As of the [[census]] of 2000, there were 382 people, 153 households, and 102 families residing in the city. The [[population density]] was 691.0 people per square mile (268.2/km²). There were 195 housing units at an average density of 352.7/sq mi (136.9/km²). The racial makeup of the city was 100.00% [[White (U.S. Census)|White]]. 11 | 12 | There were 153 households out of which 30.7% had children under the age of 18 living with them, 56.9% were [[Marriage|married couples]] living together, 6.5% had a female householder with no husband present, and 32.7% were non-families. 26.8% of all households were made up of individuals and 16.3% had someone living alone who was 65 years of age or older. The average household size was 2.50 and the average family size was 3.09. 13 | 14 | In the city the population was spread out with 29.1% under the age of 18, 6.8% from 18 to 24, 24.6% from 25 to 44, 20.2% from 45 to 64, and 19.4% who were 65 years of age or older. The median age was 37 years. For every 100 females there were 101.1 males. For every 100 females age 18 and over, there were 97.8 males. 15 | 16 | The median income for a household in the city was $25,357, and the median income for a family was $28,750. Males had a median income of $23,250 versus $17,292 for females. The [[per capita income]] for the city was $11,526. About 11.7% of families and 15.4% of the population were below the [[poverty line]], including 23.5% of those under age 18 and 6.3% of those age 65 or over. 17 | 18 | [[Barbara Marx]], who was first married to [[Zeppo Marx]], one of the [[Marx Brothers]] (from 1959 until their divorce in 1973), then to [[Frank Sinatra]] (from 1976 until his death in 1998), was born in Bosworth. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /data/load/document_51679.txt: -------------------------------------------------------------------------------- 1 | 2 | Velda_City,_Missouri 3 | 4 | Velda City is a city in [[St. Louis County, Missouri|St. Louis County]], [[Missouri]], [[United States]]. The population was 1,616 at the 2000 census. 5 | 6 | Velda City is located at (38.694051, -90.294163). 7 | 8 | According to the [[United States Census Bureau]], the city has a total area of 0.2 square miles (0.4 km²), all of it land. 9 | 10 | Velda City is the most densely populated incorporated area in [[St. Louis County, Missouri|St. Louis County]] with 9,872.8 people per square mile. 11 | 12 | As of the [[census]] of 2000, there were 1,616 people, 610 households, and 428 families residing in the city. The [[population density]] was 9,872.8 people per square mile (3,899.6/km²). There were 670 housing units at an average density of 4,093.3/sq mi (1,616.8/km²). The racial makeup of the city was 3.09% [[White (U.S. Census)|White]], 96.04% [[African American (U.S. Census)|African American]], and 0.87% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 0.87% of the population. 13 | 14 | There were 610 households out of which 28.4% had children under the age of 18 living with them, 29.5% were [[Marriage|married couples]] living together, 35.9% had a female householder with no husband present, and 29.7% were non-families. 27.5% of all households were made up of individuals and 9.0% had someone living alone who was 65 years of age or older. The average household size was 2.65 and the average family size was 3.20. 15 | 16 | In the city the population was spread out with 27.0% under the age of 18, 9.4% from 18 to 24, 26.5% from 25 to 44, 24.6% from 45 to 64, and 12.5% who were 65 years of age or older. The median age was 36 years. For every 100 females there were 76.8 males. For every 100 females age 18 and over, there were 68.7 males. 17 | 18 | The median income for a household in the city was $30,000, and the median income for a family was $31,652. Males had a median income of $27,768 versus $26,083 for females. The [[per capita income]] for the city was $15,009. About 15.9% of families and 17.5% of the population were below the [[poverty line]], including 19.8% of those under age 18 and 23.3% of those age 65 or over. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /data/load/document_51756.txt: -------------------------------------------------------------------------------- 1 | 2 | Sheridan,_Missouri 3 | 4 | Sheridan is a city in [[Worth County, Missouri|Worth County]], [[Missouri]], [[United States]], near the [[Platte River (Missouri)|Platte River]]. The population was 185 at the 2000 census, at which time it was a town. 5 | 6 | Sheridan is located at (40.517355, -94.614890). 7 | 8 | According to the [[United States Census Bureau]], the town has a total area of 0.2 square miles (0.5 km²), all of it land. 9 | 10 | As of the [[census]] of 2000, there were 185 people, 97 households, and 51 families residing in the town. The [[population density]] was 954.9 people per square mile (375.9/km²). There were 120 housing units at an average density of 619.4/sq mi (243.9/km²). The racial makeup of the town was 98.92% [[White (U.S. Census)|White]], and 1.08% from two or more races. 11 | 12 | There were 97 households out of which 18.6% had children under the age of 18 living with them, 43.3% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 46.4% were non-families. 43.3% of all households were made up of individuals and 28.9% had someone living alone who was 65 years of age or older. The average household size was 1.91 and the average family size was 2.54. 13 | 14 | In the town the population was spread out with 18.4% under the age of 18, 5.4% from 18 to 24, 22.2% from 25 to 44, 23.8% from 45 to 64, and 30.3% who were 65 years of age or older. The median age was 49 years. For every 100 females there were 72.9 males. For every 100 females age 18 and over, there were 71.6 males. 15 | 16 | The median income for a household in the town was $20,357, and the median income for a family was $25,750. Males had a median income of $24,583 versus $14,375 for females. The [[per capita income]] for the town was $12,162. About 4.4% of families and 13.5% of the population were below the [[poverty line]], including 6.1% of those under the age of eighteen and 14.5% of those sixty five or over. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_53100.txt: -------------------------------------------------------------------------------- 1 | 2 | Newtown,_Missouri 3 | 4 | Newtown is a city in [[Sullivan County, Missouri|Sullivan County]], [[Missouri]], [[United States]]. The population was 209 at the [[United States Census, 2000|2000 census]], at which time it was a town. 5 | 6 | Newtown is located at (40.375879, -93.334578). 7 | 8 | According to the [[United States Census Bureau]], the town has a total area of 0.2 square miles (0.6 km²), all of it land. 9 | 10 | As of the [[census]] of 2000, there were 209 people, 86 households, and 55 families residing in the town. The [[population density]] was 826.0 people per square mile (322.8/km²). There were 93 housing units at an average density of 367.5/sq mi (143.6/km²). The racial makeup of the town was 98.56% [[White (U.S. Census)|White]], 0.48% [[Native American (U.S. Census)|Native American]], 0.48% from [[Race (United States Census)|other races]], and 0.48% from two or more races. [[Hispanic (U.S. Census)|Hispanic]] or [[Latino (U.S. Census)|Latino]] of any race were 4.31% of the population. 11 | 12 | There were 86 households out of which 36.0% had children under the age of 18 living with them, 50.0% were [[Marriage|married couples]] living together, 9.3% had a female householder with no husband present, and 36.0% were non-families. 33.7% of all households were made up of individuals and 14.0% had someone living alone who was 65 years of age or older. The average household size was 2.43 and the average family size was 3.16. 13 | 14 | In the town the population was spread out with 32.1% under the age of 18, 9.1% from 18 to 24, 26.3% from 25 to 44, 17.7% from 45 to 64, and 14.8% who were 65 years of age or older. The median age was 32 years. For every 100 females there were 91.7 males. For every 100 females age 18 and over, there were 86.8 males. 15 | 16 | The median income for a household in the town was $21,250, and the median income for a family was $43,125. Males had a median income of $23,000 versus $19,625 for females. The [[per capita income]] for the town was $13,561. About 3.2% of families and 10.6% of the population were below the [[poverty line]], including 7.8% of those under the age of eighteen and 25.5% of those sixty five or over. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data/load/document_9566.txt: -------------------------------------------------------------------------------- 1 | 2 | Timeline_of_programming_languages 3 | 4 | This is a [[timeline]] of historically important [[programming language]]s. 5 | 6 | Legend 7 | ( Entry ) means a non-universal programming language 8 | * means a unique language (no direct predecessor) 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /data/lorem_ipsum.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque eget sem augue, non auctor nunc. Nam tincidunt tellus eu lorem mollis non tempus felis vestibulum. Sed diam tortor, euismod id ultricies eget, lacinia sit amet velit. Curabitur volutpat felis ligula. Maecenas ac est ipsum, in mollis lorem. Etiam nunc nunc, viverra nec imperdiet id, lacinia at nunc. Donec molestie ornare adipiscing. Nam elit tortor, lobortis vitae aliquam id, faucibus eu lacus. Cras arcu sapien, lacinia id hendrerit vel, viverra in elit. Cras sed ipsum lacus, non mollis sem. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Curabitur vel bibendum massa. Vivamus adipiscing condimentum vulputate. 2 | -------------------------------------------------------------------------------- /data/readme.txt: -------------------------------------------------------------------------------- 1 | Generating a big file :) 2 | 3 | for i in {1..10000}; do cat lorem_ipsum.txt >> big.txt; done 4 | 5 | -------------------------------------------------------------------------------- /docroot/post.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Post data internally 6 |
7 | 9 |
10 | 11 |
12 | 13 | 14 | Post data externally 15 |
16 | 18 |
19 | 20 |
21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /leveldb/AUTHORS: -------------------------------------------------------------------------------- 1 | # Names should be added to this file like so: 2 | # Name or Organization 3 | 4 | Google Inc. 5 | 6 | # Initial version authors: 7 | Jeffrey Dean 8 | Sanjay Ghemawat 9 | 10 | # Partial list of contributors: 11 | Kevin Regan 12 | Johan Bilien 13 | -------------------------------------------------------------------------------- /leveldb/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /leveldb/NEWS: -------------------------------------------------------------------------------- 1 | Release 1.2 2011-05-16 2 | ---------------------- 3 | 4 | Fixes for larger databases (tested up to one billion 100-byte entries, 5 | i.e., ~100GB). 6 | 7 | (1) Place hard limit on number of level-0 files. This fixes errors 8 | of the form "too many open files". 9 | 10 | (2) Fixed memtable management. Before the fix, a heavy write burst 11 | could cause unbounded memory usage. 12 | 13 | A fix for a logging bug where the reader would incorrectly complain 14 | about corruption. 15 | 16 | Allow public access to WriteBatch contents so that users can easily 17 | wrap a DB. 18 | -------------------------------------------------------------------------------- /leveldb/README: -------------------------------------------------------------------------------- 1 | leveldb: A key-value store 2 | Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) 3 | 4 | The code under this directory implements a system for maintaining a 5 | persistent key/value store. 6 | 7 | See doc/index.html for more explanation. 8 | See doc/impl.html for a brief overview of the implementation. 9 | 10 | The public interface is in include/*.h. Callers should not include or 11 | rely on the details of any other header files in this package. Those 12 | internal APIs may be changed without warning. 13 | 14 | Guide to header files: 15 | 16 | include/db.h 17 | Main interface to the DB: Start here 18 | 19 | include/options.h 20 | Control over the behavior of an entire database, and also 21 | control over the behavior of individual reads and writes. 22 | 23 | include/comparator.h 24 | Abstraction for user-specified comparison function. If you want 25 | just bytewise comparison of keys, you can use the default comparator, 26 | but clients can write their own comparator implementations if they 27 | want custom ordering (e.g. to handle different character 28 | encodings, etc.) 29 | 30 | include/iterator.h 31 | Interface for iterating over data. You can get an iterator 32 | from a DB object. 33 | 34 | include/write_batch.h 35 | Interface for atomically applying multiple updates to a database. 36 | 37 | include/slice.h 38 | A simple module for maintaining a pointer and a length into some 39 | other byte array. 40 | 41 | include/status.h 42 | Status is returned from many of the public interfaces and is used 43 | to report success and various kinds of errors. 44 | 45 | include/env.h 46 | Abstraction of the OS environment. A posix implementation of 47 | this interface is in util/env_posix.cc 48 | 49 | include/table.h 50 | include/table_builder.h 51 | Lower-level modules that most clients probably won't use directly 52 | -------------------------------------------------------------------------------- /leveldb/TODO: -------------------------------------------------------------------------------- 1 | ss 2 | - Stats 3 | 4 | db 5 | - Maybe implement DB::BulkDeleteForRange(start_key, end_key) 6 | that would blow away files whose ranges are entirely contained 7 | within [start_key..end_key]? For Chrome, deletion of obsolete 8 | object stores, etc. can be done in the background anyway, so 9 | probably not that important. 10 | - There have been requests for MultiGet. 11 | 12 | After a range is completely deleted, what gets rid of the 13 | corresponding files if we do no future changes to that range. Make 14 | the conditions for triggering compactions fire in more situations? 15 | -------------------------------------------------------------------------------- /leveldb/db/builder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_BUILDER_H_ 6 | #define STORAGE_LEVELDB_DB_BUILDER_H_ 7 | 8 | #include "leveldb/status.h" 9 | 10 | namespace leveldb { 11 | 12 | struct Options; 13 | struct FileMetaData; 14 | 15 | class Env; 16 | class Iterator; 17 | class TableCache; 18 | class VersionEdit; 19 | 20 | // Build a Table file from the contents of *iter. The generated file 21 | // will be named according to meta->number. On success, the rest of 22 | // *meta will be filled with metadata about the generated table. 23 | // If no data is present in *iter, meta->file_size will be set to 24 | // zero, and no Table file will be produced. 25 | extern Status BuildTable(const std::string& dbname, 26 | Env* env, 27 | const Options& options, 28 | TableCache* table_cache, 29 | Iterator* iter, 30 | FileMetaData* meta); 31 | 32 | } // namespace leveldb 33 | 34 | #endif // STORAGE_LEVELDB_DB_BUILDER_H_ 35 | -------------------------------------------------------------------------------- /leveldb/db/db_iter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_DB_ITER_H_ 6 | #define STORAGE_LEVELDB_DB_DB_ITER_H_ 7 | 8 | #include 9 | #include "leveldb/db.h" 10 | #include "db/dbformat.h" 11 | 12 | namespace leveldb { 13 | 14 | class DBImpl; 15 | 16 | // Return a new iterator that converts internal keys (yielded by 17 | // "*internal_iter") that were live at the specified "sequence" number 18 | // into appropriate user keys. 19 | extern Iterator* NewDBIterator( 20 | DBImpl* db, 21 | const Comparator* user_key_comparator, 22 | Iterator* internal_iter, 23 | SequenceNumber sequence, 24 | uint32_t seed); 25 | 26 | } // namespace leveldb 27 | 28 | #endif // STORAGE_LEVELDB_DB_DB_ITER_H_ 29 | -------------------------------------------------------------------------------- /leveldb/db/log_format.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Log format information shared by reader and writer. 6 | // See ../doc/log_format.txt for more detail. 7 | 8 | #ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 9 | #define STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 10 | 11 | namespace leveldb { 12 | namespace log { 13 | 14 | enum RecordType { 15 | // Zero is reserved for preallocated files 16 | kZeroType = 0, 17 | 18 | kFullType = 1, 19 | 20 | // For fragments 21 | kFirstType = 2, 22 | kMiddleType = 3, 23 | kLastType = 4 24 | }; 25 | static const int kMaxRecordType = kLastType; 26 | 27 | static const int kBlockSize = 32768; 28 | 29 | // Header is checksum (4 bytes), type (1 byte), length (2 bytes). 30 | static const int kHeaderSize = 4 + 1 + 2; 31 | 32 | } // namespace log 33 | } // namespace leveldb 34 | 35 | #endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 36 | -------------------------------------------------------------------------------- /leveldb/db/log_writer.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ 6 | #define STORAGE_LEVELDB_DB_LOG_WRITER_H_ 7 | 8 | #include 9 | #include "db/log_format.h" 10 | #include "leveldb/slice.h" 11 | #include "leveldb/status.h" 12 | 13 | namespace leveldb { 14 | 15 | class WritableFile; 16 | 17 | namespace log { 18 | 19 | class Writer { 20 | public: 21 | // Create a writer that will append data to "*dest". 22 | // "*dest" must be initially empty. 23 | // "*dest" must remain live while this Writer is in use. 24 | explicit Writer(WritableFile* dest); 25 | ~Writer(); 26 | 27 | Status AddRecord(const Slice& slice); 28 | 29 | private: 30 | WritableFile* dest_; 31 | int block_offset_; // Current offset in block 32 | 33 | // crc32c values for all supported record types. These are 34 | // pre-computed to reduce the overhead of computing the crc of the 35 | // record type stored in the header. 36 | uint32_t type_crc_[kMaxRecordType + 1]; 37 | 38 | Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); 39 | 40 | // No copying allowed 41 | Writer(const Writer&); 42 | void operator=(const Writer&); 43 | }; 44 | 45 | } // namespace log 46 | } // namespace leveldb 47 | 48 | #endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_ 49 | -------------------------------------------------------------------------------- /leveldb/db/snapshot.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ 6 | #define STORAGE_LEVELDB_DB_SNAPSHOT_H_ 7 | 8 | #include "leveldb/db.h" 9 | 10 | namespace leveldb { 11 | 12 | class SnapshotList; 13 | 14 | // Snapshots are kept in a doubly-linked list in the DB. 15 | // Each SnapshotImpl corresponds to a particular sequence number. 16 | class SnapshotImpl : public Snapshot { 17 | public: 18 | SequenceNumber number_; // const after creation 19 | 20 | private: 21 | friend class SnapshotList; 22 | 23 | // SnapshotImpl is kept in a doubly-linked circular list 24 | SnapshotImpl* prev_; 25 | SnapshotImpl* next_; 26 | 27 | SnapshotList* list_; // just for sanity checks 28 | }; 29 | 30 | class SnapshotList { 31 | public: 32 | SnapshotList() { 33 | list_.prev_ = &list_; 34 | list_.next_ = &list_; 35 | } 36 | 37 | bool empty() const { return list_.next_ == &list_; } 38 | SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } 39 | SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } 40 | 41 | const SnapshotImpl* New(SequenceNumber seq) { 42 | SnapshotImpl* s = new SnapshotImpl; 43 | s->number_ = seq; 44 | s->list_ = this; 45 | s->next_ = &list_; 46 | s->prev_ = list_.prev_; 47 | s->prev_->next_ = s; 48 | s->next_->prev_ = s; 49 | return s; 50 | } 51 | 52 | void Delete(const SnapshotImpl* s) { 53 | assert(s->list_ == this); 54 | s->prev_->next_ = s->next_; 55 | s->next_->prev_ = s->prev_; 56 | delete s; 57 | } 58 | 59 | private: 60 | // Dummy head of doubly-linked list of snapshots 61 | SnapshotImpl list_; 62 | }; 63 | 64 | } // namespace leveldb 65 | 66 | #endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_ 67 | -------------------------------------------------------------------------------- /leveldb/db/table_cache.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Thread-safe (provides internal synchronization) 6 | 7 | #ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 8 | #define STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 9 | 10 | #include 11 | #include 12 | #include "db/dbformat.h" 13 | #include "leveldb/cache.h" 14 | #include "leveldb/table.h" 15 | #include "port/port.h" 16 | 17 | namespace leveldb { 18 | 19 | class Env; 20 | 21 | class TableCache { 22 | public: 23 | TableCache(const std::string& dbname, const Options* options, int entries); 24 | ~TableCache(); 25 | 26 | // Return an iterator for the specified file number (the corresponding 27 | // file length must be exactly "file_size" bytes). If "tableptr" is 28 | // non-NULL, also sets "*tableptr" to point to the Table object 29 | // underlying the returned iterator, or NULL if no Table object underlies 30 | // the returned iterator. The returned "*tableptr" object is owned by 31 | // the cache and should not be deleted, and is valid for as long as the 32 | // returned iterator is live. 33 | Iterator* NewIterator(const ReadOptions& options, 34 | uint64_t file_number, 35 | uint64_t file_size, 36 | Table** tableptr = NULL); 37 | 38 | // If a seek to internal key "k" in specified file finds an entry, 39 | // call (*handle_result)(arg, found_key, found_value). 40 | Status Get(const ReadOptions& options, 41 | uint64_t file_number, 42 | uint64_t file_size, 43 | const Slice& k, 44 | void* arg, 45 | void (*handle_result)(void*, const Slice&, const Slice&)); 46 | 47 | // Evict any entry for the specified file number 48 | void Evict(uint64_t file_number); 49 | 50 | private: 51 | Env* const env_; 52 | const std::string dbname_; 53 | const Options* options_; 54 | Cache* cache_; 55 | 56 | Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**); 57 | }; 58 | 59 | } // namespace leveldb 60 | 61 | #endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 62 | -------------------------------------------------------------------------------- /leveldb/db/version_edit_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/version_edit.h" 6 | #include "util/testharness.h" 7 | 8 | namespace leveldb { 9 | 10 | static void TestEncodeDecode(const VersionEdit& edit) { 11 | std::string encoded, encoded2; 12 | edit.EncodeTo(&encoded); 13 | VersionEdit parsed; 14 | Status s = parsed.DecodeFrom(encoded); 15 | ASSERT_TRUE(s.ok()) << s.ToString(); 16 | parsed.EncodeTo(&encoded2); 17 | ASSERT_EQ(encoded, encoded2); 18 | } 19 | 20 | class VersionEditTest { }; 21 | 22 | TEST(VersionEditTest, EncodeDecode) { 23 | static const uint64_t kBig = 1ull << 50; 24 | 25 | VersionEdit edit; 26 | for (int i = 0; i < 4; i++) { 27 | TestEncodeDecode(edit); 28 | edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, 29 | InternalKey("foo", kBig + 500 + i, kTypeValue), 30 | InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); 31 | edit.DeleteFile(4, kBig + 700 + i); 32 | edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); 33 | } 34 | 35 | edit.SetComparatorName("foo"); 36 | edit.SetLogNumber(kBig + 100); 37 | edit.SetNextFile(kBig + 200); 38 | edit.SetLastSequence(kBig + 1000); 39 | TestEncodeDecode(edit); 40 | } 41 | 42 | } // namespace leveldb 43 | 44 | int main(int argc, char** argv) { 45 | return leveldb::test::RunAllTests(); 46 | } 47 | -------------------------------------------------------------------------------- /leveldb/db/write_batch_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 6 | #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 7 | 8 | #include "leveldb/write_batch.h" 9 | 10 | namespace leveldb { 11 | 12 | class MemTable; 13 | 14 | // WriteBatchInternal provides static methods for manipulating a 15 | // WriteBatch that we don't want in the public WriteBatch interface. 16 | class WriteBatchInternal { 17 | public: 18 | // Return the number of entries in the batch. 19 | static int Count(const WriteBatch* batch); 20 | 21 | // Set the count for the number of entries in the batch. 22 | static void SetCount(WriteBatch* batch, int n); 23 | 24 | // Return the seqeunce number for the start of this batch. 25 | static SequenceNumber Sequence(const WriteBatch* batch); 26 | 27 | // Store the specified number as the seqeunce number for the start of 28 | // this batch. 29 | static void SetSequence(WriteBatch* batch, SequenceNumber seq); 30 | 31 | static Slice Contents(const WriteBatch* batch) { 32 | return Slice(batch->rep_); 33 | } 34 | 35 | static size_t ByteSize(const WriteBatch* batch) { 36 | return batch->rep_.size(); 37 | } 38 | 39 | static void SetContents(WriteBatch* batch, const Slice& contents); 40 | 41 | static Status InsertInto(const WriteBatch* batch, MemTable* memtable); 42 | 43 | static void Append(WriteBatch* dst, const WriteBatch* src); 44 | }; 45 | 46 | } // namespace leveldb 47 | 48 | 49 | #endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 50 | -------------------------------------------------------------------------------- /leveldb/doc/doc.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin-left: 0.5in; 3 | margin-right: 0.5in; 4 | background: white; 5 | color: black; 6 | } 7 | 8 | h1 { 9 | margin-left: -0.2in; 10 | font-size: 14pt; 11 | } 12 | h2 { 13 | margin-left: -0in; 14 | font-size: 12pt; 15 | } 16 | h3 { 17 | margin-left: -0in; 18 | } 19 | h4 { 20 | margin-left: -0in; 21 | } 22 | hr { 23 | margin-left: -0in; 24 | } 25 | 26 | /* Definition lists: definition term bold */ 27 | dt { 28 | font-weight: bold; 29 | } 30 | 31 | address { 32 | text-align: center; 33 | } 34 | code,samp,var { 35 | color: blue; 36 | } 37 | kbd { 38 | color: #600000; 39 | } 40 | div.note p { 41 | float: right; 42 | width: 3in; 43 | margin-right: 0%; 44 | padding: 1px; 45 | border: 2px solid #6060a0; 46 | background-color: #fffff0; 47 | } 48 | 49 | ul { 50 | margin-top: -0em; 51 | margin-bottom: -0em; 52 | } 53 | 54 | ol { 55 | margin-top: -0em; 56 | margin-bottom: -0em; 57 | } 58 | 59 | UL.nobullets { 60 | list-style-type: none; 61 | list-style-image: none; 62 | margin-left: -1em; 63 | } 64 | 65 | p { 66 | margin: 1em 0 1em 0; 67 | padding: 0 0 0 0; 68 | } 69 | 70 | pre { 71 | line-height: 1.3em; 72 | padding: 0.4em 0 0.8em 0; 73 | margin: 0 0 0 0; 74 | border: 0 0 0 0; 75 | color: blue; 76 | } 77 | 78 | .datatable { 79 | margin-left: auto; 80 | margin-right: auto; 81 | margin-top: 2em; 82 | margin-bottom: 2em; 83 | border: 1px solid; 84 | } 85 | 86 | .datatable td,th { 87 | padding: 0 0.5em 0 0.5em; 88 | text-align: right; 89 | } 90 | -------------------------------------------------------------------------------- /leveldb/helpers/memenv/memenv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 6 | #define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 7 | 8 | namespace leveldb { 9 | 10 | class Env; 11 | 12 | // Returns a new environment that stores its data in memory and delegates 13 | // all non-file-storage tasks to base_env. The caller must delete the result 14 | // when it is no longer needed. 15 | // *base_env must remain live while the result is in use. 16 | Env* NewMemEnv(Env* base_env); 17 | 18 | } // namespace leveldb 19 | 20 | #endif // STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 21 | -------------------------------------------------------------------------------- /leveldb/include/leveldb/write_batch.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // WriteBatch holds a collection of updates to apply atomically to a DB. 6 | // 7 | // The updates are applied in the order in which they are added 8 | // to the WriteBatch. For example, the value of "key" will be "v3" 9 | // after the following batch is written: 10 | // 11 | // batch.Put("key", "v1"); 12 | // batch.Delete("key"); 13 | // batch.Put("key", "v2"); 14 | // batch.Put("key", "v3"); 15 | // 16 | // Multiple threads can invoke const methods on a WriteBatch without 17 | // external synchronization, but if any of the threads may call a 18 | // non-const method, all threads accessing the same WriteBatch must use 19 | // external synchronization. 20 | 21 | #ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 22 | #define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 23 | 24 | #include 25 | #include "leveldb/status.h" 26 | 27 | namespace leveldb { 28 | 29 | class Slice; 30 | 31 | class WriteBatch { 32 | public: 33 | WriteBatch(); 34 | ~WriteBatch(); 35 | 36 | // Store the mapping "key->value" in the database. 37 | void Put(const Slice& key, const Slice& value); 38 | 39 | // If the database contains a mapping for "key", erase it. Else do nothing. 40 | void Delete(const Slice& key); 41 | 42 | // Clear all updates buffered in this batch. 43 | void Clear(); 44 | 45 | // Support for iterating over the contents of a batch. 46 | class Handler { 47 | public: 48 | virtual ~Handler(); 49 | virtual void Put(const Slice& key, const Slice& value) = 0; 50 | virtual void Delete(const Slice& key) = 0; 51 | }; 52 | Status Iterate(Handler* handler) const; 53 | 54 | private: 55 | friend class WriteBatchInternal; 56 | 57 | std::string rep_; // See comment in write_batch.cc for the format of rep_ 58 | 59 | // Intentionally copyable 60 | }; 61 | 62 | } // namespace leveldb 63 | 64 | #endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 65 | -------------------------------------------------------------------------------- /leveldb/issues/issue200_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | // Test for issue 200: when iterator switches direction from backward 6 | // to forward, the current key can be yielded unexpectedly if a new 7 | // mutation has been added just before the current key. 8 | 9 | #include "leveldb/db.h" 10 | #include "util/testharness.h" 11 | 12 | namespace leveldb { 13 | 14 | class Issue200 { }; 15 | 16 | TEST(Issue200, Test) { 17 | // Get rid of any state from an old run. 18 | std::string dbpath = test::TmpDir() + "/leveldb_issue200_test"; 19 | DestroyDB(dbpath, Options()); 20 | 21 | DB *db; 22 | Options options; 23 | options.create_if_missing = true; 24 | ASSERT_OK(DB::Open(options, dbpath, &db)); 25 | 26 | WriteOptions write_options; 27 | ASSERT_OK(db->Put(write_options, "1", "b")); 28 | ASSERT_OK(db->Put(write_options, "2", "c")); 29 | ASSERT_OK(db->Put(write_options, "3", "d")); 30 | ASSERT_OK(db->Put(write_options, "4", "e")); 31 | ASSERT_OK(db->Put(write_options, "5", "f")); 32 | 33 | ReadOptions read_options; 34 | Iterator *iter = db->NewIterator(read_options); 35 | 36 | // Add an element that should not be reflected in the iterator. 37 | ASSERT_OK(db->Put(write_options, "25", "cd")); 38 | 39 | iter->Seek("5"); 40 | ASSERT_EQ(iter->key().ToString(), "5"); 41 | iter->Prev(); 42 | ASSERT_EQ(iter->key().ToString(), "4"); 43 | iter->Prev(); 44 | ASSERT_EQ(iter->key().ToString(), "3"); 45 | iter->Next(); 46 | ASSERT_EQ(iter->key().ToString(), "4"); 47 | iter->Next(); 48 | ASSERT_EQ(iter->key().ToString(), "5"); 49 | 50 | delete iter; 51 | delete db; 52 | DestroyDB(dbpath, options); 53 | } 54 | 55 | } // namespace leveldb 56 | 57 | int main(int argc, char** argv) { 58 | return leveldb::test::RunAllTests(); 59 | } 60 | -------------------------------------------------------------------------------- /leveldb/port/README: -------------------------------------------------------------------------------- 1 | This directory contains interfaces and implementations that isolate the 2 | rest of the package from platform details. 3 | 4 | Code in the rest of the package includes "port.h" from this directory. 5 | "port.h" in turn includes a platform specific "port_.h" file 6 | that provides the platform specific implementation. 7 | 8 | See port_posix.h for an example of what must be provided in a platform 9 | specific header file. 10 | 11 | -------------------------------------------------------------------------------- /leveldb/port/port.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_PORT_PORT_H_ 6 | #define STORAGE_LEVELDB_PORT_PORT_H_ 7 | 8 | #include 9 | 10 | // Include the appropriate platform specific file below. If you are 11 | // porting to a new platform, see "port_example.h" for documentation 12 | // of what the new port_.h file must provide. 13 | #if defined(LEVELDB_PLATFORM_POSIX) 14 | # include "port/port_posix.h" 15 | #elif defined(LEVELDB_PLATFORM_CHROMIUM) 16 | # include "port/port_chromium.h" 17 | #endif 18 | 19 | #endif // STORAGE_LEVELDB_PORT_PORT_H_ 20 | -------------------------------------------------------------------------------- /leveldb/port/port_posix.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "port/port_posix.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include "util/logging.h" 11 | #include 12 | namespace leveldb { 13 | namespace port { 14 | 15 | static void PthreadCall(const char* label, int result) { 16 | if (result != 0) { 17 | fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); 18 | abort(); 19 | } 20 | } 21 | 22 | Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } 23 | 24 | Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } 25 | 26 | void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } 27 | 28 | void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } 29 | 30 | CondVar::CondVar(Mutex* mu) 31 | : mu_(mu) { 32 | PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); 33 | } 34 | 35 | CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } 36 | 37 | void CondVar::Wait() { 38 | PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); 39 | } 40 | 41 | void CondVar::Wait(int64_t reltime) { 42 | struct timespec ts; 43 | #if defined(HAVE_POSIX_CLOCKS) 44 | clock_gettime(CLOCK_REALTIME, &ts); 45 | #else // HAVE_POSIX_CLOCKS 46 | // we don't support the clocks here. 47 | struct timeval t; 48 | gettimeofday(&t, NULL); 49 | ts.tv_sec = t.tv_sec; 50 | ts.tv_nsec= t.tv_usec*1000; 51 | #endif // HAVE_POSIX_CLOCKS 52 | ts.tv_sec += reltime/1000000000; 53 | ts.tv_nsec+= reltime%1000000000; 54 | if (ts.tv_nsec >= 1000000000) { 55 | ts.tv_nsec -= 1000000000; 56 | ts.tv_sec += 1; 57 | } 58 | pthread_cond_timedwait(&cv_, &mu_->mu_,&ts); 59 | } 60 | 61 | void CondVar::Signal() { 62 | PthreadCall("signal", pthread_cond_signal(&cv_)); 63 | } 64 | 65 | void CondVar::SignalAll() { 66 | PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); 67 | } 68 | 69 | void InitOnce(OnceType* once, void (*initializer)()) { 70 | PthreadCall("once", pthread_once(once, initializer)); 71 | } 72 | 73 | } // namespace port 74 | } // namespace leveldb 75 | -------------------------------------------------------------------------------- /leveldb/port/thread_annotations.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H 6 | 7 | // Some environments provide custom macros to aid in static thread-safety 8 | // analysis. Provide empty definitions of such macros unless they are already 9 | // defined. 10 | 11 | #ifndef EXCLUSIVE_LOCKS_REQUIRED 12 | #define EXCLUSIVE_LOCKS_REQUIRED(...) 13 | #endif 14 | 15 | #ifndef SHARED_LOCKS_REQUIRED 16 | #define SHARED_LOCKS_REQUIRED(...) 17 | #endif 18 | 19 | #ifndef LOCKS_EXCLUDED 20 | #define LOCKS_EXCLUDED(...) 21 | #endif 22 | 23 | #ifndef LOCK_RETURNED 24 | #define LOCK_RETURNED(x) 25 | #endif 26 | 27 | #ifndef LOCKABLE 28 | #define LOCKABLE 29 | #endif 30 | 31 | #ifndef SCOPED_LOCKABLE 32 | #define SCOPED_LOCKABLE 33 | #endif 34 | 35 | #ifndef EXCLUSIVE_LOCK_FUNCTION 36 | #define EXCLUSIVE_LOCK_FUNCTION(...) 37 | #endif 38 | 39 | #ifndef SHARED_LOCK_FUNCTION 40 | #define SHARED_LOCK_FUNCTION(...) 41 | #endif 42 | 43 | #ifndef EXCLUSIVE_TRYLOCK_FUNCTION 44 | #define EXCLUSIVE_TRYLOCK_FUNCTION(...) 45 | #endif 46 | 47 | #ifndef SHARED_TRYLOCK_FUNCTION 48 | #define SHARED_TRYLOCK_FUNCTION(...) 49 | #endif 50 | 51 | #ifndef UNLOCK_FUNCTION 52 | #define UNLOCK_FUNCTION(...) 53 | #endif 54 | 55 | #ifndef NO_THREAD_SAFETY_ANALYSIS 56 | #define NO_THREAD_SAFETY_ANALYSIS 57 | #endif 58 | 59 | #endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H 60 | -------------------------------------------------------------------------------- /leveldb/port/win/stdint.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | // MSVC didn't ship with this file until the 2010 version. 6 | 7 | #ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 8 | #define STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 9 | 10 | #if !defined(_MSC_VER) 11 | #error This file should only be included when compiling with MSVC. 12 | #endif 13 | 14 | // Define C99 equivalent types. 15 | typedef signed char int8_t; 16 | typedef signed short int16_t; 17 | typedef signed int int32_t; 18 | typedef signed long long int64_t; 19 | typedef unsigned char uint8_t; 20 | typedef unsigned short uint16_t; 21 | typedef unsigned int uint32_t; 22 | typedef unsigned long long uint64_t; 23 | 24 | #endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 25 | -------------------------------------------------------------------------------- /leveldb/table/block.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_ 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_H_ 7 | 8 | #include 9 | #include 10 | #include "leveldb/iterator.h" 11 | 12 | namespace leveldb { 13 | 14 | struct BlockContents; 15 | class Comparator; 16 | 17 | class Block { 18 | public: 19 | // Initialize the block with the specified contents. 20 | explicit Block(const BlockContents& contents); 21 | 22 | ~Block(); 23 | 24 | size_t size() const { return size_; } 25 | Iterator* NewIterator(const Comparator* comparator); 26 | 27 | private: 28 | uint32_t NumRestarts() const; 29 | 30 | const char* data_; 31 | size_t size_; 32 | uint32_t restart_offset_; // Offset in data_ of restart array 33 | bool owned_; // Block owns data_[] 34 | 35 | // No copying allowed 36 | Block(const Block&); 37 | void operator=(const Block&); 38 | 39 | class Iter; 40 | }; 41 | 42 | } // namespace leveldb 43 | 44 | #endif // STORAGE_LEVELDB_TABLE_BLOCK_H_ 45 | -------------------------------------------------------------------------------- /leveldb/table/block_builder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 7 | 8 | #include 9 | 10 | #include 11 | #include "leveldb/slice.h" 12 | 13 | namespace leveldb { 14 | 15 | struct Options; 16 | 17 | class BlockBuilder { 18 | public: 19 | explicit BlockBuilder(const Options* options); 20 | 21 | // Reset the contents as if the BlockBuilder was just constructed. 22 | void Reset(); 23 | 24 | // REQUIRES: Finish() has not been callled since the last call to Reset(). 25 | // REQUIRES: key is larger than any previously added key 26 | void Add(const Slice& key, const Slice& value); 27 | 28 | // Finish building the block and return a slice that refers to the 29 | // block contents. The returned slice will remain valid for the 30 | // lifetime of this builder or until Reset() is called. 31 | Slice Finish(); 32 | 33 | // Returns an estimate of the current (uncompressed) size of the block 34 | // we are building. 35 | size_t CurrentSizeEstimate() const; 36 | 37 | // Return true iff no entries have been added since the last Reset() 38 | bool empty() const { 39 | return buffer_.empty(); 40 | } 41 | 42 | private: 43 | const Options* options_; 44 | std::string buffer_; // Destination buffer 45 | std::vector restarts_; // Restart points 46 | int counter_; // Number of entries emitted since restart 47 | bool finished_; // Has Finish() been called? 48 | std::string last_key_; 49 | 50 | // No copying allowed 51 | BlockBuilder(const BlockBuilder&); 52 | void operator=(const BlockBuilder&); 53 | }; 54 | 55 | } // namespace leveldb 56 | 57 | #endif // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 58 | -------------------------------------------------------------------------------- /leveldb/table/filter_block.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // A filter block is stored near the end of a Table file. It contains 6 | // filters (e.g., bloom filters) for all data blocks in the table combined 7 | // into a single filter block. 8 | 9 | #ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 10 | #define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "leveldb/slice.h" 17 | #include "util/hash.h" 18 | 19 | namespace leveldb { 20 | 21 | class FilterPolicy; 22 | 23 | // A FilterBlockBuilder is used to construct all of the filters for a 24 | // particular Table. It generates a single string which is stored as 25 | // a special block in the Table. 26 | // 27 | // The sequence of calls to FilterBlockBuilder must match the regexp: 28 | // (StartBlock AddKey*)* Finish 29 | class FilterBlockBuilder { 30 | public: 31 | explicit FilterBlockBuilder(const FilterPolicy*); 32 | 33 | void StartBlock(uint64_t block_offset); 34 | void AddKey(const Slice& key); 35 | Slice Finish(); 36 | 37 | private: 38 | void GenerateFilter(); 39 | 40 | const FilterPolicy* policy_; 41 | std::string keys_; // Flattened key contents 42 | std::vector start_; // Starting index in keys_ of each key 43 | std::string result_; // Filter data computed so far 44 | std::vector tmp_keys_; // policy_->CreateFilter() argument 45 | std::vector filter_offsets_; 46 | 47 | // No copying allowed 48 | FilterBlockBuilder(const FilterBlockBuilder&); 49 | void operator=(const FilterBlockBuilder&); 50 | }; 51 | 52 | class FilterBlockReader { 53 | public: 54 | // REQUIRES: "contents" and *policy must stay live while *this is live. 55 | FilterBlockReader(const FilterPolicy* policy, const Slice& contents); 56 | bool KeyMayMatch(uint64_t block_offset, const Slice& key); 57 | 58 | private: 59 | const FilterPolicy* policy_; 60 | const char* data_; // Pointer to filter data (at block-start) 61 | const char* offset_; // Pointer to beginning of offset array (at block-end) 62 | size_t num_; // Number of entries in offset array 63 | size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) 64 | }; 65 | 66 | } 67 | 68 | #endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 69 | -------------------------------------------------------------------------------- /leveldb/table/iterator.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/iterator.h" 6 | 7 | namespace leveldb { 8 | 9 | Iterator::Iterator() { 10 | cleanup_.function = NULL; 11 | cleanup_.next = NULL; 12 | } 13 | 14 | Iterator::~Iterator() { 15 | if (cleanup_.function != NULL) { 16 | (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); 17 | for (Cleanup* c = cleanup_.next; c != NULL; ) { 18 | (*c->function)(c->arg1, c->arg2); 19 | Cleanup* next = c->next; 20 | delete c; 21 | c = next; 22 | } 23 | } 24 | } 25 | 26 | void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { 27 | assert(func != NULL); 28 | Cleanup* c; 29 | if (cleanup_.function == NULL) { 30 | c = &cleanup_; 31 | } else { 32 | c = new Cleanup; 33 | c->next = cleanup_.next; 34 | cleanup_.next = c; 35 | } 36 | c->function = func; 37 | c->arg1 = arg1; 38 | c->arg2 = arg2; 39 | } 40 | 41 | namespace { 42 | class EmptyIterator : public Iterator { 43 | public: 44 | EmptyIterator(const Status& s) : status_(s) { } 45 | virtual bool Valid() const { return false; } 46 | virtual void Seek(const Slice& target) { } 47 | virtual void SeekToFirst() { } 48 | virtual void SeekToLast() { } 49 | virtual void Next() { assert(false); } 50 | virtual void Prev() { assert(false); } 51 | Slice key() const { assert(false); return Slice(); } 52 | Slice value() const { assert(false); return Slice(); } 53 | virtual Status status() const { return status_; } 54 | private: 55 | Status status_; 56 | }; 57 | } // namespace 58 | 59 | Iterator* NewEmptyIterator() { 60 | return new EmptyIterator(Status::OK()); 61 | } 62 | 63 | Iterator* NewErrorIterator(const Status& status) { 64 | return new EmptyIterator(status); 65 | } 66 | 67 | } // namespace leveldb 68 | -------------------------------------------------------------------------------- /leveldb/table/iterator_wrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 6 | #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 7 | 8 | namespace leveldb { 9 | 10 | // A internal wrapper class with an interface similar to Iterator that 11 | // caches the valid() and key() results for an underlying iterator. 12 | // This can help avoid virtual function calls and also gives better 13 | // cache locality. 14 | class IteratorWrapper { 15 | public: 16 | IteratorWrapper(): iter_(NULL), valid_(false) { } 17 | explicit IteratorWrapper(Iterator* iter): iter_(NULL) { 18 | Set(iter); 19 | } 20 | ~IteratorWrapper() { delete iter_; } 21 | Iterator* iter() const { return iter_; } 22 | 23 | // Takes ownership of "iter" and will delete it when destroyed, or 24 | // when Set() is invoked again. 25 | void Set(Iterator* iter) { 26 | delete iter_; 27 | iter_ = iter; 28 | if (iter_ == NULL) { 29 | valid_ = false; 30 | } else { 31 | Update(); 32 | } 33 | } 34 | 35 | 36 | // Iterator interface methods 37 | bool Valid() const { return valid_; } 38 | Slice key() const { assert(Valid()); return key_; } 39 | Slice value() const { assert(Valid()); return iter_->value(); } 40 | // Methods below require iter() != NULL 41 | Status status() const { assert(iter_); return iter_->status(); } 42 | void Next() { assert(iter_); iter_->Next(); Update(); } 43 | void Prev() { assert(iter_); iter_->Prev(); Update(); } 44 | void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } 45 | void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } 46 | void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } 47 | 48 | private: 49 | void Update() { 50 | valid_ = iter_->Valid(); 51 | if (valid_) { 52 | key_ = iter_->key(); 53 | } 54 | } 55 | 56 | Iterator* iter_; 57 | bool valid_; 58 | Slice key_; 59 | }; 60 | 61 | } // namespace leveldb 62 | 63 | #endif // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 64 | -------------------------------------------------------------------------------- /leveldb/table/merger.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_MERGER_H_ 6 | #define STORAGE_LEVELDB_TABLE_MERGER_H_ 7 | 8 | namespace leveldb { 9 | 10 | class Comparator; 11 | class Iterator; 12 | 13 | // Return an iterator that provided the union of the data in 14 | // children[0,n-1]. Takes ownership of the child iterators and 15 | // will delete them when the result iterator is deleted. 16 | // 17 | // The result does no duplicate suppression. I.e., if a particular 18 | // key is present in K child iterators, it will be yielded K times. 19 | // 20 | // REQUIRES: n >= 0 21 | extern Iterator* NewMergingIterator( 22 | const Comparator* comparator, Iterator** children, int n); 23 | 24 | } // namespace leveldb 25 | 26 | #endif // STORAGE_LEVELDB_TABLE_MERGER_H_ 27 | -------------------------------------------------------------------------------- /leveldb/table/two_level_iterator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 6 | #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 7 | 8 | #include "leveldb/iterator.h" 9 | 10 | namespace leveldb { 11 | 12 | struct ReadOptions; 13 | 14 | // Return a new two level iterator. A two-level iterator contains an 15 | // index iterator whose values point to a sequence of blocks where 16 | // each block is itself a sequence of key,value pairs. The returned 17 | // two-level iterator yields the concatenation of all key/value pairs 18 | // in the sequence of blocks. Takes ownership of "index_iter" and 19 | // will delete it when no longer needed. 20 | // 21 | // Uses a supplied function to convert an index_iter value into 22 | // an iterator over the contents of the corresponding block. 23 | extern Iterator* NewTwoLevelIterator( 24 | Iterator* index_iter, 25 | Iterator* (*block_function)( 26 | void* arg, 27 | const ReadOptions& options, 28 | const Slice& index_value), 29 | void* arg, 30 | const ReadOptions& options); 31 | 32 | } // namespace leveldb 33 | 34 | #endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 35 | -------------------------------------------------------------------------------- /leveldb/util/arena.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/arena.h" 6 | #include 7 | 8 | namespace leveldb { 9 | 10 | static const int kBlockSize = 4096; 11 | 12 | Arena::Arena() { 13 | blocks_memory_ = 0; 14 | alloc_ptr_ = NULL; // First allocation will allocate a block 15 | alloc_bytes_remaining_ = 0; 16 | } 17 | 18 | Arena::~Arena() { 19 | for (size_t i = 0; i < blocks_.size(); i++) { 20 | delete[] blocks_[i]; 21 | } 22 | } 23 | 24 | char* Arena::AllocateFallback(size_t bytes) { 25 | if (bytes > kBlockSize / 4) { 26 | // Object is more than a quarter of our block size. Allocate it separately 27 | // to avoid wasting too much space in leftover bytes. 28 | char* result = AllocateNewBlock(bytes); 29 | return result; 30 | } 31 | 32 | // We waste the remaining space in the current block. 33 | alloc_ptr_ = AllocateNewBlock(kBlockSize); 34 | alloc_bytes_remaining_ = kBlockSize; 35 | 36 | char* result = alloc_ptr_; 37 | alloc_ptr_ += bytes; 38 | alloc_bytes_remaining_ -= bytes; 39 | return result; 40 | } 41 | 42 | char* Arena::AllocateAligned(size_t bytes) { 43 | const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8; 44 | assert((align & (align-1)) == 0); // Pointer size should be a power of 2 45 | size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); 46 | size_t slop = (current_mod == 0 ? 0 : align - current_mod); 47 | size_t needed = bytes + slop; 48 | char* result; 49 | if (needed <= alloc_bytes_remaining_) { 50 | result = alloc_ptr_ + slop; 51 | alloc_ptr_ += needed; 52 | alloc_bytes_remaining_ -= needed; 53 | } else { 54 | // AllocateFallback always returned aligned memory 55 | result = AllocateFallback(bytes); 56 | } 57 | assert((reinterpret_cast(result) & (align-1)) == 0); 58 | return result; 59 | } 60 | 61 | char* Arena::AllocateNewBlock(size_t block_bytes) { 62 | char* result = new char[block_bytes]; 63 | blocks_memory_ += block_bytes; 64 | blocks_.push_back(result); 65 | return result; 66 | } 67 | 68 | } // namespace leveldb 69 | -------------------------------------------------------------------------------- /leveldb/util/arena.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ 6 | #define STORAGE_LEVELDB_UTIL_ARENA_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace leveldb { 14 | 15 | class Arena { 16 | public: 17 | Arena(); 18 | ~Arena(); 19 | 20 | // Return a pointer to a newly allocated memory block of "bytes" bytes. 21 | char* Allocate(size_t bytes); 22 | 23 | // Allocate memory with the normal alignment guarantees provided by malloc 24 | char* AllocateAligned(size_t bytes); 25 | 26 | // Returns an estimate of the total memory usage of data allocated 27 | // by the arena (including space allocated but not yet used for user 28 | // allocations). 29 | size_t MemoryUsage() const { 30 | return blocks_memory_ + blocks_.capacity() * sizeof(char*); 31 | } 32 | 33 | private: 34 | char* AllocateFallback(size_t bytes); 35 | char* AllocateNewBlock(size_t block_bytes); 36 | 37 | // Allocation state 38 | char* alloc_ptr_; 39 | size_t alloc_bytes_remaining_; 40 | 41 | // Array of new[] allocated memory blocks 42 | std::vector blocks_; 43 | 44 | // Bytes of memory in blocks allocated so far 45 | size_t blocks_memory_; 46 | 47 | // No copying allowed 48 | Arena(const Arena&); 49 | void operator=(const Arena&); 50 | }; 51 | 52 | inline char* Arena::Allocate(size_t bytes) { 53 | // The semantics of what to return are a bit messy if we allow 54 | // 0-byte allocations, so we disallow them here (we don't need 55 | // them for our internal use). 56 | assert(bytes > 0); 57 | if (bytes <= alloc_bytes_remaining_) { 58 | char* result = alloc_ptr_; 59 | alloc_ptr_ += bytes; 60 | alloc_bytes_remaining_ -= bytes; 61 | return result; 62 | } 63 | return AllocateFallback(bytes); 64 | } 65 | 66 | } // namespace leveldb 67 | 68 | #endif // STORAGE_LEVELDB_UTIL_ARENA_H_ 69 | -------------------------------------------------------------------------------- /leveldb/util/arena_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/arena.h" 6 | 7 | #include "util/random.h" 8 | #include "util/testharness.h" 9 | 10 | namespace leveldb { 11 | 12 | class ArenaTest { }; 13 | 14 | TEST(ArenaTest, Empty) { 15 | Arena arena; 16 | } 17 | 18 | TEST(ArenaTest, Simple) { 19 | std::vector > allocated; 20 | Arena arena; 21 | const int N = 100000; 22 | size_t bytes = 0; 23 | Random rnd(301); 24 | for (int i = 0; i < N; i++) { 25 | size_t s; 26 | if (i % (N / 10) == 0) { 27 | s = i; 28 | } else { 29 | s = rnd.OneIn(4000) ? rnd.Uniform(6000) : 30 | (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); 31 | } 32 | if (s == 0) { 33 | // Our arena disallows size 0 allocations. 34 | s = 1; 35 | } 36 | char* r; 37 | if (rnd.OneIn(10)) { 38 | r = arena.AllocateAligned(s); 39 | } else { 40 | r = arena.Allocate(s); 41 | } 42 | 43 | for (size_t b = 0; b < s; b++) { 44 | // Fill the "i"th allocation with a known bit pattern 45 | r[b] = i % 256; 46 | } 47 | bytes += s; 48 | allocated.push_back(std::make_pair(s, r)); 49 | ASSERT_GE(arena.MemoryUsage(), bytes); 50 | if (i > N/10) { 51 | ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); 52 | } 53 | } 54 | for (size_t i = 0; i < allocated.size(); i++) { 55 | size_t num_bytes = allocated[i].first; 56 | const char* p = allocated[i].second; 57 | for (size_t b = 0; b < num_bytes; b++) { 58 | // Check the "i"th allocation for the known bit pattern 59 | ASSERT_EQ(int(p[b]) & 0xff, i % 256); 60 | } 61 | } 62 | } 63 | 64 | } // namespace leveldb 65 | 66 | int main(int argc, char** argv) { 67 | return leveldb::test::RunAllTests(); 68 | } 69 | -------------------------------------------------------------------------------- /leveldb/util/comparator.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include 7 | #include "leveldb/comparator.h" 8 | #include "leveldb/slice.h" 9 | #include "port/port.h" 10 | #include "util/logging.h" 11 | 12 | namespace leveldb { 13 | 14 | Comparator::~Comparator() { } 15 | 16 | namespace { 17 | class BytewiseComparatorImpl : public Comparator { 18 | public: 19 | BytewiseComparatorImpl() { } 20 | 21 | virtual const char* Name() const { 22 | return "leveldb.BytewiseComparator"; 23 | } 24 | 25 | virtual int Compare(const Slice& a, const Slice& b) const { 26 | return a.compare(b); 27 | } 28 | 29 | virtual void FindShortestSeparator( 30 | std::string* start, 31 | const Slice& limit) const { 32 | // Find length of common prefix 33 | size_t min_length = std::min(start->size(), limit.size()); 34 | size_t diff_index = 0; 35 | while ((diff_index < min_length) && 36 | ((*start)[diff_index] == limit[diff_index])) { 37 | diff_index++; 38 | } 39 | 40 | if (diff_index >= min_length) { 41 | // Do not shorten if one string is a prefix of the other 42 | } else { 43 | uint8_t diff_byte = static_cast((*start)[diff_index]); 44 | if (diff_byte < static_cast(0xff) && 45 | diff_byte + 1 < static_cast(limit[diff_index])) { 46 | (*start)[diff_index]++; 47 | start->resize(diff_index + 1); 48 | assert(Compare(*start, limit) < 0); 49 | } 50 | } 51 | } 52 | 53 | virtual void FindShortSuccessor(std::string* key) const { 54 | // Find first character that can be incremented 55 | size_t n = key->size(); 56 | for (size_t i = 0; i < n; i++) { 57 | const uint8_t byte = (*key)[i]; 58 | if (byte != static_cast(0xff)) { 59 | (*key)[i] = byte + 1; 60 | key->resize(i+1); 61 | return; 62 | } 63 | } 64 | // *key is a run of 0xffs. Leave it alone. 65 | } 66 | }; 67 | } // namespace 68 | 69 | static port::OnceType once = LEVELDB_ONCE_INIT; 70 | static const Comparator* bytewise; 71 | 72 | static void InitModule() { 73 | bytewise = new BytewiseComparatorImpl; 74 | } 75 | 76 | const Comparator* BytewiseComparator() { 77 | port::InitOnce(&once, InitModule); 78 | return bytewise; 79 | } 80 | 81 | } // namespace leveldb 82 | -------------------------------------------------------------------------------- /leveldb/util/crc32c.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_ 6 | #define STORAGE_LEVELDB_UTIL_CRC32C_H_ 7 | 8 | #include 9 | #include 10 | 11 | namespace leveldb { 12 | namespace crc32c { 13 | 14 | // Return the crc32c of concat(A, data[0,n-1]) where init_crc is the 15 | // crc32c of some string A. Extend() is often used to maintain the 16 | // crc32c of a stream of data. 17 | extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); 18 | 19 | // Return the crc32c of data[0,n-1] 20 | inline uint32_t Value(const char* data, size_t n) { 21 | return Extend(0, data, n); 22 | } 23 | 24 | static const uint32_t kMaskDelta = 0xa282ead8ul; 25 | 26 | // Return a masked representation of crc. 27 | // 28 | // Motivation: it is problematic to compute the CRC of a string that 29 | // contains embedded CRCs. Therefore we recommend that CRCs stored 30 | // somewhere (e.g., in files) should be masked before being stored. 31 | inline uint32_t Mask(uint32_t crc) { 32 | // Rotate right by 15 bits and add a constant. 33 | return ((crc >> 15) | (crc << 17)) + kMaskDelta; 34 | } 35 | 36 | // Return the crc whose masked representation is masked_crc. 37 | inline uint32_t Unmask(uint32_t masked_crc) { 38 | uint32_t rot = masked_crc - kMaskDelta; 39 | return ((rot >> 17) | (rot << 15)); 40 | } 41 | 42 | } // namespace crc32c 43 | } // namespace leveldb 44 | 45 | #endif // STORAGE_LEVELDB_UTIL_CRC32C_H_ 46 | -------------------------------------------------------------------------------- /leveldb/util/crc32c_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/crc32c.h" 6 | #include "util/testharness.h" 7 | 8 | namespace leveldb { 9 | namespace crc32c { 10 | 11 | class CRC { }; 12 | 13 | TEST(CRC, StandardResults) { 14 | // From rfc3720 section B.4. 15 | char buf[32]; 16 | 17 | memset(buf, 0, sizeof(buf)); 18 | ASSERT_EQ(0x8a9136aa, Value(buf, sizeof(buf))); 19 | 20 | memset(buf, 0xff, sizeof(buf)); 21 | ASSERT_EQ(0x62a8ab43, Value(buf, sizeof(buf))); 22 | 23 | for (int i = 0; i < 32; i++) { 24 | buf[i] = i; 25 | } 26 | ASSERT_EQ(0x46dd794e, Value(buf, sizeof(buf))); 27 | 28 | for (int i = 0; i < 32; i++) { 29 | buf[i] = 31 - i; 30 | } 31 | ASSERT_EQ(0x113fdb5c, Value(buf, sizeof(buf))); 32 | 33 | unsigned char data[48] = { 34 | 0x01, 0xc0, 0x00, 0x00, 35 | 0x00, 0x00, 0x00, 0x00, 36 | 0x00, 0x00, 0x00, 0x00, 37 | 0x00, 0x00, 0x00, 0x00, 38 | 0x14, 0x00, 0x00, 0x00, 39 | 0x00, 0x00, 0x04, 0x00, 40 | 0x00, 0x00, 0x00, 0x14, 41 | 0x00, 0x00, 0x00, 0x18, 42 | 0x28, 0x00, 0x00, 0x00, 43 | 0x00, 0x00, 0x00, 0x00, 44 | 0x02, 0x00, 0x00, 0x00, 45 | 0x00, 0x00, 0x00, 0x00, 46 | }; 47 | ASSERT_EQ(0xd9963a56, Value(reinterpret_cast(data), sizeof(data))); 48 | } 49 | 50 | TEST(CRC, Values) { 51 | ASSERT_NE(Value("a", 1), Value("foo", 3)); 52 | } 53 | 54 | TEST(CRC, Extend) { 55 | ASSERT_EQ(Value("hello world", 11), 56 | Extend(Value("hello ", 6), "world", 5)); 57 | } 58 | 59 | TEST(CRC, Mask) { 60 | uint32_t crc = Value("foo", 3); 61 | ASSERT_NE(crc, Mask(crc)); 62 | ASSERT_NE(crc, Mask(Mask(crc))); 63 | ASSERT_EQ(crc, Unmask(Mask(crc))); 64 | ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc))))); 65 | } 66 | 67 | } // namespace crc32c 68 | } // namespace leveldb 69 | 70 | int main(int argc, char** argv) { 71 | return leveldb::test::RunAllTests(); 72 | } 73 | -------------------------------------------------------------------------------- /leveldb/util/filter_policy.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/filter_policy.h" 6 | 7 | namespace leveldb { 8 | 9 | FilterPolicy::~FilterPolicy() { } 10 | 11 | } // namespace leveldb 12 | -------------------------------------------------------------------------------- /leveldb/util/hash.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "util/coding.h" 7 | #include "util/hash.h" 8 | 9 | // The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through 10 | // between switch labels. The real definition should be provided externally. 11 | // This one is a fallback version for unsupported compilers. 12 | #ifndef FALLTHROUGH_INTENDED 13 | #define FALLTHROUGH_INTENDED do { } while (0) 14 | #endif 15 | 16 | namespace leveldb { 17 | 18 | uint32_t Hash(const char* data, size_t n, uint32_t seed) { 19 | // Similar to murmur hash 20 | const uint32_t m = 0xc6a4a793; 21 | const uint32_t r = 24; 22 | const char* limit = data + n; 23 | uint32_t h = seed ^ (n * m); 24 | 25 | // Pick up four bytes at a time 26 | while (data + 4 <= limit) { 27 | uint32_t w = DecodeFixed32(data); 28 | data += 4; 29 | h += w; 30 | h *= m; 31 | h ^= (h >> 16); 32 | } 33 | 34 | // Pick up remaining bytes 35 | switch (limit - data) { 36 | case 3: 37 | h += data[2] << 16; 38 | FALLTHROUGH_INTENDED; 39 | case 2: 40 | h += data[1] << 8; 41 | FALLTHROUGH_INTENDED; 42 | case 1: 43 | h += data[0]; 44 | h *= m; 45 | h ^= (h >> r); 46 | break; 47 | } 48 | return h; 49 | } 50 | 51 | 52 | } // namespace leveldb 53 | -------------------------------------------------------------------------------- /leveldb/util/hash.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Simple hash function used for internal data structures 6 | 7 | #ifndef STORAGE_LEVELDB_UTIL_HASH_H_ 8 | #define STORAGE_LEVELDB_UTIL_HASH_H_ 9 | 10 | #include 11 | #include 12 | 13 | namespace leveldb { 14 | 15 | extern uint32_t Hash(const char* data, size_t n, uint32_t seed); 16 | 17 | } 18 | 19 | #endif // STORAGE_LEVELDB_UTIL_HASH_H_ 20 | -------------------------------------------------------------------------------- /leveldb/util/histogram.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 6 | #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 7 | 8 | #include 9 | 10 | namespace leveldb { 11 | 12 | class Histogram { 13 | public: 14 | Histogram() { } 15 | ~Histogram() { } 16 | 17 | void Clear(); 18 | void Add(double value); 19 | void Merge(const Histogram& other); 20 | 21 | std::string ToString() const; 22 | 23 | private: 24 | double min_; 25 | double max_; 26 | double num_; 27 | double sum_; 28 | double sum_squares_; 29 | 30 | enum { kNumBuckets = 154 }; 31 | static const double kBucketLimit[kNumBuckets]; 32 | double buckets_[kNumBuckets]; 33 | 34 | double Median() const; 35 | double Percentile(double p) const; 36 | double Average() const; 37 | double StandardDeviation() const; 38 | }; 39 | 40 | } // namespace leveldb 41 | 42 | #endif // STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 43 | -------------------------------------------------------------------------------- /leveldb/util/logging.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/logging.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "leveldb/env.h" 12 | #include "leveldb/slice.h" 13 | 14 | namespace leveldb { 15 | 16 | void AppendNumberTo(std::string* str, uint64_t num) { 17 | char buf[30]; 18 | snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); 19 | str->append(buf); 20 | } 21 | 22 | void AppendEscapedStringTo(std::string* str, const Slice& value) { 23 | for (size_t i = 0; i < value.size(); i++) { 24 | char c = value[i]; 25 | if (c >= ' ' && c <= '~') { 26 | str->push_back(c); 27 | } else { 28 | char buf[10]; 29 | snprintf(buf, sizeof(buf), "\\x%02x", 30 | static_cast(c) & 0xff); 31 | str->append(buf); 32 | } 33 | } 34 | } 35 | 36 | std::string NumberToString(uint64_t num) { 37 | std::string r; 38 | AppendNumberTo(&r, num); 39 | return r; 40 | } 41 | 42 | std::string EscapeString(const Slice& value) { 43 | std::string r; 44 | AppendEscapedStringTo(&r, value); 45 | return r; 46 | } 47 | 48 | bool ConsumeChar(Slice* in, char c) { 49 | if (!in->empty() && (*in)[0] == c) { 50 | in->remove_prefix(1); 51 | return true; 52 | } else { 53 | return false; 54 | } 55 | } 56 | 57 | bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { 58 | uint64_t v = 0; 59 | int digits = 0; 60 | while (!in->empty()) { 61 | char c = (*in)[0]; 62 | if (c >= '0' && c <= '9') { 63 | ++digits; 64 | const int delta = (c - '0'); 65 | static const uint64_t kMaxUint64 = ~static_cast(0); 66 | if (v > kMaxUint64/10 || 67 | (v == kMaxUint64/10 && delta > kMaxUint64%10)) { 68 | // Overflow 69 | return false; 70 | } 71 | v = (v * 10) + delta; 72 | in->remove_prefix(1); 73 | } else { 74 | break; 75 | } 76 | } 77 | *val = v; 78 | return (digits > 0); 79 | } 80 | 81 | } // namespace leveldb 82 | -------------------------------------------------------------------------------- /leveldb/util/logging.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Must not be included from any .h files to avoid polluting the namespace 6 | // with macros. 7 | 8 | #ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_ 9 | #define STORAGE_LEVELDB_UTIL_LOGGING_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include "port/port.h" 15 | 16 | namespace leveldb { 17 | 18 | class Slice; 19 | class WritableFile; 20 | 21 | // Append a human-readable printout of "num" to *str 22 | extern void AppendNumberTo(std::string* str, uint64_t num); 23 | 24 | // Append a human-readable printout of "value" to *str. 25 | // Escapes any non-printable characters found in "value". 26 | extern void AppendEscapedStringTo(std::string* str, const Slice& value); 27 | 28 | // Return a human-readable printout of "num" 29 | extern std::string NumberToString(uint64_t num); 30 | 31 | // Return a human-readable version of "value". 32 | // Escapes any non-printable characters found in "value". 33 | extern std::string EscapeString(const Slice& value); 34 | 35 | // If *in starts with "c", advances *in past the first character and 36 | // returns true. Otherwise, returns false. 37 | extern bool ConsumeChar(Slice* in, char c); 38 | 39 | // Parse a human-readable number from "*in" into *value. On success, 40 | // advances "*in" past the consumed number and sets "*val" to the 41 | // numeric value. Otherwise, returns false and leaves *in in an 42 | // unspecified state. 43 | extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val); 44 | 45 | } // namespace leveldb 46 | 47 | #endif // STORAGE_LEVELDB_UTIL_LOGGING_H_ 48 | -------------------------------------------------------------------------------- /leveldb/util/mutexlock.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 6 | #define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 7 | 8 | #include "port/port.h" 9 | #include "port/thread_annotations.h" 10 | 11 | namespace leveldb { 12 | 13 | // Helper class that locks a mutex on construction and unlocks the mutex when 14 | // the destructor of the MutexLock object is invoked. 15 | // 16 | // Typical usage: 17 | // 18 | // void MyClass::MyMethod() { 19 | // MutexLock l(&mu_); // mu_ is an instance variable 20 | // ... some complex code, possibly with multiple return paths ... 21 | // } 22 | 23 | class SCOPED_LOCKABLE MutexLock { 24 | public: 25 | explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) 26 | : mu_(mu) { 27 | this->mu_->Lock(); 28 | } 29 | ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); } 30 | 31 | private: 32 | port::Mutex *const mu_; 33 | // No copying allowed 34 | MutexLock(const MutexLock&); 35 | void operator=(const MutexLock&); 36 | }; 37 | 38 | } // namespace leveldb 39 | 40 | 41 | #endif // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 42 | -------------------------------------------------------------------------------- /leveldb/util/options.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/options.h" 6 | 7 | #include "leveldb/comparator.h" 8 | #include "leveldb/env.h" 9 | 10 | namespace leveldb { 11 | 12 | Options::Options() 13 | : comparator(BytewiseComparator()), 14 | create_if_missing(false), 15 | error_if_exists(false), 16 | paranoid_checks(false), 17 | env(Env::Default()), 18 | info_log(NULL), 19 | write_buffer_size(4<<20), 20 | max_open_files(1000), 21 | block_cache(NULL), 22 | block_size(4096), 23 | block_restart_interval(16), 24 | compression(kSnappyCompression), 25 | filter_policy(NULL) { 26 | } 27 | 28 | 29 | } // namespace leveldb 30 | -------------------------------------------------------------------------------- /leveldb/util/random.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_ 6 | #define STORAGE_LEVELDB_UTIL_RANDOM_H_ 7 | 8 | #include 9 | 10 | namespace leveldb { 11 | 12 | // A very simple random number generator. Not especially good at 13 | // generating truly random bits, but good enough for our needs in this 14 | // package. 15 | class Random { 16 | private: 17 | uint32_t seed_; 18 | public: 19 | explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { 20 | // Avoid bad seeds. 21 | if (seed_ == 0 || seed_ == 2147483647L) { 22 | seed_ = 1; 23 | } 24 | } 25 | uint32_t Next() { 26 | static const uint32_t M = 2147483647L; // 2^31-1 27 | static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 28 | // We are computing 29 | // seed_ = (seed_ * A) % M, where M = 2^31-1 30 | // 31 | // seed_ must not be zero or M, or else all subsequent computed values 32 | // will be zero or M respectively. For all other values, seed_ will end 33 | // up cycling through every number in [1,M-1] 34 | uint64_t product = seed_ * A; 35 | 36 | // Compute (product % M) using the fact that ((x << 31) % M) == x. 37 | seed_ = static_cast((product >> 31) + (product & M)); 38 | // The first reduction may overflow by 1 bit, so we may need to 39 | // repeat. mod == M is not possible; using > allows the faster 40 | // sign-bit-based test. 41 | if (seed_ > M) { 42 | seed_ -= M; 43 | } 44 | return seed_; 45 | } 46 | // Returns a uniformly distributed value in the range [0..n-1] 47 | // REQUIRES: n > 0 48 | uint32_t Uniform(int n) { return Next() % n; } 49 | 50 | // Randomly returns true ~"1/n" of the time, and false otherwise. 51 | // REQUIRES: n > 0 52 | bool OneIn(int n) { return (Next() % n) == 0; } 53 | 54 | // Skewed: pick "base" uniformly from range [0,max_log] and then 55 | // return "base" random bits. The effect is to pick a number in the 56 | // range [0,2^max_log-1] with exponential bias towards smaller numbers. 57 | uint32_t Skewed(int max_log) { 58 | return Uniform(1 << Uniform(max_log + 1)); 59 | } 60 | }; 61 | 62 | } // namespace leveldb 63 | 64 | #endif // STORAGE_LEVELDB_UTIL_RANDOM_H_ 65 | -------------------------------------------------------------------------------- /leveldb/util/status.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "port/port.h" 7 | #include "leveldb/status.h" 8 | 9 | namespace leveldb { 10 | 11 | const char* Status::CopyState(const char* state) { 12 | uint32_t size; 13 | memcpy(&size, state, sizeof(size)); 14 | char* result = new char[size + 5]; 15 | memcpy(result, state, size + 5); 16 | return result; 17 | } 18 | 19 | Status::Status(Code code, const Slice& msg, const Slice& msg2) { 20 | assert(code != kOk); 21 | const uint32_t len1 = msg.size(); 22 | const uint32_t len2 = msg2.size(); 23 | const uint32_t size = len1 + (len2 ? (2 + len2) : 0); 24 | char* result = new char[size + 5]; 25 | memcpy(result, &size, sizeof(size)); 26 | result[4] = static_cast(code); 27 | memcpy(result + 5, msg.data(), len1); 28 | if (len2) { 29 | result[5 + len1] = ':'; 30 | result[6 + len1] = ' '; 31 | memcpy(result + 7 + len1, msg2.data(), len2); 32 | } 33 | state_ = result; 34 | } 35 | 36 | std::string Status::ToString() const { 37 | if (state_ == NULL) { 38 | return "OK"; 39 | } else { 40 | char tmp[30]; 41 | const char* type; 42 | switch (code()) { 43 | case kOk: 44 | type = "OK"; 45 | break; 46 | case kNotFound: 47 | type = "NotFound: "; 48 | break; 49 | case kCorruption: 50 | type = "Corruption: "; 51 | break; 52 | case kNotSupported: 53 | type = "Not implemented: "; 54 | break; 55 | case kInvalidArgument: 56 | type = "Invalid argument: "; 57 | break; 58 | case kIOError: 59 | type = "IO error: "; 60 | break; 61 | default: 62 | snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", 63 | static_cast(code())); 64 | type = tmp; 65 | break; 66 | } 67 | std::string result(type); 68 | uint32_t length; 69 | memcpy(&length, state_, sizeof(length)); 70 | result.append(state_ + 5, length); 71 | return result; 72 | } 73 | } 74 | 75 | } // namespace leveldb 76 | -------------------------------------------------------------------------------- /leveldb/util/testharness.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/testharness.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace leveldb { 13 | namespace test { 14 | 15 | namespace { 16 | struct Test { 17 | const char* base; 18 | const char* name; 19 | void (*func)(); 20 | }; 21 | std::vector* tests; 22 | } 23 | 24 | bool RegisterTest(const char* base, const char* name, void (*func)()) { 25 | if (tests == NULL) { 26 | tests = new std::vector; 27 | } 28 | Test t; 29 | t.base = base; 30 | t.name = name; 31 | t.func = func; 32 | tests->push_back(t); 33 | return true; 34 | } 35 | 36 | int RunAllTests() { 37 | const char* matcher = getenv("LEVELDB_TESTS"); 38 | 39 | int num = 0; 40 | if (tests != NULL) { 41 | for (size_t i = 0; i < tests->size(); i++) { 42 | const Test& t = (*tests)[i]; 43 | if (matcher != NULL) { 44 | std::string name = t.base; 45 | name.push_back('.'); 46 | name.append(t.name); 47 | if (strstr(name.c_str(), matcher) == NULL) { 48 | continue; 49 | } 50 | } 51 | fprintf(stderr, "==== Test %s.%s\n", t.base, t.name); 52 | (*t.func)(); 53 | ++num; 54 | } 55 | } 56 | fprintf(stderr, "==== PASSED %d tests\n", num); 57 | return 0; 58 | } 59 | 60 | std::string TmpDir() { 61 | std::string dir; 62 | Status s = Env::Default()->GetTestDirectory(&dir); 63 | ASSERT_TRUE(s.ok()) << s.ToString(); 64 | return dir; 65 | } 66 | 67 | int RandomSeed() { 68 | const char* env = getenv("TEST_RANDOM_SEED"); 69 | int result = (env != NULL ? atoi(env) : 301); 70 | if (result <= 0) { 71 | result = 301; 72 | } 73 | return result; 74 | } 75 | 76 | } // namespace test 77 | } // namespace leveldb 78 | -------------------------------------------------------------------------------- /leveldb/util/testutil.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/testutil.h" 6 | 7 | #include "util/random.h" 8 | 9 | namespace leveldb { 10 | namespace test { 11 | 12 | Slice RandomString(Random* rnd, int len, std::string* dst) { 13 | dst->resize(len); 14 | for (int i = 0; i < len; i++) { 15 | (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' 16 | } 17 | return Slice(*dst); 18 | } 19 | 20 | std::string RandomKey(Random* rnd, int len) { 21 | // Make sure to generate a wide variety of characters so we 22 | // test the boundary conditions for short-key optimizations. 23 | static const char kTestChars[] = { 24 | '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff' 25 | }; 26 | std::string result; 27 | for (int i = 0; i < len; i++) { 28 | result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; 29 | } 30 | return result; 31 | } 32 | 33 | 34 | extern Slice CompressibleString(Random* rnd, double compressed_fraction, 35 | size_t len, std::string* dst) { 36 | int raw = static_cast(len * compressed_fraction); 37 | if (raw < 1) raw = 1; 38 | std::string raw_data; 39 | RandomString(rnd, raw, &raw_data); 40 | 41 | // Duplicate the random data until we have filled "len" bytes 42 | dst->clear(); 43 | while (dst->size() < len) { 44 | dst->append(raw_data); 45 | } 46 | dst->resize(len); 47 | return Slice(*dst); 48 | } 49 | 50 | } // namespace test 51 | } // namespace leveldb 52 | -------------------------------------------------------------------------------- /leveldb/util/testutil.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 6 | #define STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 7 | 8 | #include "leveldb/env.h" 9 | #include "leveldb/slice.h" 10 | #include "util/random.h" 11 | 12 | namespace leveldb { 13 | namespace test { 14 | 15 | // Store in *dst a random string of length "len" and return a Slice that 16 | // references the generated data. 17 | extern Slice RandomString(Random* rnd, int len, std::string* dst); 18 | 19 | // Return a random key with the specified length that may contain interesting 20 | // characters (e.g. \x00, \xff, etc.). 21 | extern std::string RandomKey(Random* rnd, int len); 22 | 23 | // Store in *dst a string of length "len" that will compress to 24 | // "N*compressed_fraction" bytes and return a Slice that references 25 | // the generated data. 26 | extern Slice CompressibleString(Random* rnd, double compressed_fraction, 27 | size_t len, std::string* dst); 28 | 29 | // A wrapper that allows injection of errors. 30 | class ErrorEnv : public EnvWrapper { 31 | public: 32 | bool writable_file_error_; 33 | int num_writable_file_errors_; 34 | 35 | ErrorEnv() : EnvWrapper(Env::Default()), 36 | writable_file_error_(false), 37 | num_writable_file_errors_(0) { } 38 | 39 | virtual Status NewWritableFile(const std::string& fname, 40 | WritableFile** result) { 41 | if (writable_file_error_) { 42 | ++num_writable_file_errors_; 43 | *result = NULL; 44 | return Status::IOError(fname, "fake error"); 45 | } 46 | return target()->NewWritableFile(fname, result); 47 | } 48 | }; 49 | 50 | } // namespace test 51 | } // namespace leveldb 52 | 53 | #endif // STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 54 | -------------------------------------------------------------------------------- /lib/EWAHBoolArray/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/lib/EWAHBoolArray/.DS_Store -------------------------------------------------------------------------------- /lib/EWAHBoolArray/CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.3.2, 0.3.3, 0.3.4 (June 12 to 15th 2012) 2 | Minor releases trying to fix compatibility issues with Microsoft compilers. 3 | 4 | 0.3.1 (May 28th, 2012) 5 | Fixed a perf. regression bug. 6 | 7 | 0.3.0 (May 24th, 2012) 8 | Fix a bug with the computation of the cardinality. 9 | Improved compatibility with microsoft compilers. 10 | Introduced a toArray method to retrieve the set bits quickly. 11 | 12 | 0.2.2 (May 24th, 2012) 13 | Fix a compilation problem under GCC 4.5. 14 | Added a benchmarking utility. 15 | Minor improvements. 16 | 17 | 0.2.1 (May 21st, 2012) 18 | Performance boost when decoding (uses GCC intrinsics, please contribute MS-equivalent) 19 | 20 | 0.2.0 (April 27th 2012) 21 | Minor refactor, cleaned the code somewhat. Removed less useful methods. 22 | 23 | 0.1.0 (April 19th 2012) 24 | Added a unit test related to how we compute the cardinality 25 | Accelerated the computation of the cardinality 26 | -------------------------------------------------------------------------------- /lib/EWAHBoolArray/README: -------------------------------------------------------------------------------- 1 | Compressed bitset in C++ 2 | Daniel Lemire 3 | 4 | 5 | 6 | == What is this? == 7 | 8 | The class EWAHBoolArray is a compressed bitset data structure. 9 | 10 | == Licensing == 11 | 12 | Apache License 2.0. (Other licenses are possible.) 13 | 14 | == Limitations == 15 | 16 | Because of the compression type being used, you must set the bits 17 | in increasing order (no random access). 18 | 19 | 20 | == STL and copy constructors == 21 | 22 | I expect most people to construct rather large bitmaps. For this 23 | reason, you should avoid copies. 24 | 25 | Thus, the code will warn you against doing this: 26 | 27 | EWAHBoolArray bitset1; 28 | bitset1.set(1); 29 | bitset1.set(2); 30 | bitset1.set(1000); 31 | bitset1.set(1001); 32 | vector< EWAHBoolArray > testVec; 33 | testVec.push_back(bitset1); 34 | 35 | Instead, do this: 36 | 37 | vector< EWAHBoolArray > testVec(1); 38 | testVec[0].set(1); 39 | testVec[0].set(2); 40 | testVec[0].set(1000); 41 | testVec[0].set(1001); 42 | 43 | 44 | Or you can use the "swap" method. 45 | 46 | EWAHBoolArray bitset1; 47 | bitset1.set(1); 48 | bitset1.set(2); 49 | bitset1.set(1000); 50 | bitset1.set(1001); 51 | vector< EWAHBoolArray > testVec(1); 52 | testVec.swap(bitset1); 53 | 54 | 55 | == Dependencies == 56 | 57 | None. (Will work under MacOS or Linux easily.) 58 | 59 | == Usage == 60 | 61 | make 62 | ./unit 63 | make example 64 | ./example 65 | 66 | == Example == 67 | 68 | Please see example.cpp 69 | 70 | == Ruby wrapper == 71 | 72 | Josh Ferguson wrote a wrapper for Ruby. 73 | The implementation is packaged and installable as a ruby gem. 74 | 75 | You can install it by typing: 76 | 77 | gem install ewah-bitset 78 | 79 | == Further reading == 80 | 81 | Please see 82 | 83 | Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves 84 | word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), 85 | pages 3-28, 2010. 86 | http://arxiv.org/abs/0901.3751 87 | 88 | == Warning == 89 | 90 | Please don't trust this software. Run your own unit tests. Report bugs. 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /lib/EWAHBoolArray/example.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | #include 8 | #include "headers/ewah.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | //EWAHBoolArray 14 | // EWAHBoolArray bitset1; 15 | using namespace std; 16 | 17 | vector getvec(int density,int length){ 18 | 19 | vector temp; 20 | int qty = length / density; 21 | set myset; 22 | //while (myset.size() < qty) { 23 | // int pos = (rand()%length)+1; 24 | // myset.insert(pos); 25 | // } 26 | for (int i = 1; i<= qty; i++){ 27 | myset.insert(i); 28 | } 29 | for (auto it=myset.begin(); it!=myset.end(); it++){ 30 | temp.push_back(*it); 31 | } 32 | 33 | return temp; 34 | } 35 | 36 | double getSize(int density,int length){ 37 | int iterCount = 500; 38 | unsigned int total = 0; 39 | int iter = 0; 40 | for (int i = 0; i set; 43 | vector v =getvec(density,length); 44 | for (auto value : v) { 45 | set.set(value); 46 | } 47 | set.write(ss); 48 | total += ss.str().size(); 49 | iter++; 50 | } 51 | return ( ( (total/iter) * 8 ) * 1.0 )/ ((length/density) * 1.0) ; 52 | } 53 | 54 | void f(){ 55 | for (int i=1; i<=32; i++){ 56 | cout << "Density = 1/" << i << endl; 57 | cout << "size = " << getSize(i,5000) << endl; 58 | } 59 | 60 | } 61 | 62 | int main() { 63 | srand((unsigned)time(0)); 64 | f(); 65 | } 66 | -------------------------------------------------------------------------------- /lib/EWAHBoolArray/makefile: -------------------------------------------------------------------------------- 1 | VPATH = src:headers 2 | CXXFLAGS=-Iheaders -O3 3 | HEADERS=ewah.h ewahutil.h boolarray.h runninglengthword.h 4 | 5 | all: unit unit32bits example benchmark 6 | 7 | unit32bits: $(HEADERS) unit.cpp 8 | $(CXX) $(CXXFLAGS) -m32 -o unit32bits src/unit.cpp 9 | 10 | unit: $(HEADERS) unit.cpp 11 | $(CXX) $(CXXFLAGS) -o unit src/unit.cpp 12 | 13 | example: $(HEADERS) example.cpp 14 | $(CXX) $(CXXFLAGS) -o example example.cpp 15 | 16 | cppcheck: 17 | cppcheck --enable=all headers/*.h src/*.cpp *.cpp 18 | 19 | benchmark: $(HEADERS) ./src/benchmark.cpp 20 | $(CXX) $(CXXFLAGS) -o benchmark ./src/benchmark.cpp 21 | 22 | 23 | doxygen: 24 | doxygen doxyconfig.txt 25 | 26 | package: 27 | zip -9 EWAHBoolArray_`date +%Y-%m-%d`.zip README CHANGELOG makefile example.cpp headers/*.h src/*.cpp 28 | cd ..;zip -9 ./EWAHBoolArray/EWAHBoolArray.0.3.4-src.zip ./EWAHBoolArray/README ./EWAHBoolArray/CHANGELOG ./EWAHBoolArray/makefile ./EWAHBoolArray/example.cpp ./EWAHBoolArray/headers/*.h ./EWAHBoolArray/src/*.cpp 29 | clean: 30 | rm -f *.o unit example unit32bits benchmark 31 | -------------------------------------------------------------------------------- /lib/EWAHBoolArray/unit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/lib/EWAHBoolArray/unit -------------------------------------------------------------------------------- /lib/rapidxml-1.13/document01.xml: -------------------------------------------------------------------------------- 1 | 2 | First document 3 | Value of field1 4 | Value of field2 5 | 6 | -------------------------------------------------------------------------------- /lib/rapidxml-1.13/test_rapidxml.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "rapidxml.hpp" 6 | #include "rapidxml_print.hpp" 7 | 8 | using namespace std; 9 | using namespace rapidxml; 10 | 11 | int main() 12 | { 13 | vector xmlText; 14 | string input; 15 | 16 | while (getline(cin, input)) 17 | { 18 | xmlText.insert(xmlText.end(), input.begin(), input.end()); 19 | } 20 | 21 | xmlText.push_back('\0'); 22 | 23 | xml_document<> doc; // character type defaults to char 24 | doc.parse(&xmlText[0]); // 0 means default parse flags 25 | 26 | // xml_node<>* root = doc.first_node("document"); 27 | 28 | cout << "The first node is '" << doc.first_node()->name() << "'\n"; 29 | 30 | for (xml_node<>* n = doc.first_node("document")->first_node(); n; n = n->next_sibling()) 31 | { 32 | char* v = n->value(); 33 | // if (!v || !*v) v = "(empty)"; 34 | cout << n->name() << " : " << v << '\n'; 35 | } 36 | 37 | string document; 38 | rapidxml::print(std::back_inserter(document), doc,0); 39 | cout << document; 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /misc/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # $@ name of the target 3 | # $^ name of all prerequisites with duplicates removed 4 | # $< name of the first prerequisite 5 | 6 | CC=g++ 7 | CFLAGS = -std=c++0x -Wall -O0 -pedantic -msse3 -I/usr/local/include 8 | LIBS = -L/usr/local/lib 9 | OBJECTS = 10 | 11 | clean : 12 | rm -f *.o \ 13 | test_compressedset 14 | 15 | all: test_compressedset 16 | 17 | test_compressedset: test_compressedset.cpp 18 | $(CC) $(CFLAGS) -o $@ $^ $(LIBS) 19 | 20 | -------------------------------------------------------------------------------- /misc/example_compressedset.cpp: -------------------------------------------------------------------------------- 1 | // from http://www.stepanovpapers.com/CIKM_2011.pdf 2 | // compile with g++ -std=gnu++0x -O3 main.cpp -o result.bin 3 | using namespace std; 4 | 5 | #include 6 | #include 7 | #include 8 | #include "varint/CompressedSet.h" 9 | #include 10 | #include "varint/LazyAndSet.h" 11 | #include 12 | 13 | double diffclock(clock_t clock1,clock_t clock2) 14 | { 15 | double diffticks=clock1-clock2; 16 | double diffms=(diffticks*1000)/CLOCKS_PER_SEC; 17 | return diffms; 18 | } 19 | void benchmark(){ 20 | int test; 21 | CompressedSet myset1; 22 | for (unsigned int i = 1; i<=384000000; ++i){ 23 | myset1.addDoc(i); 24 | } 25 | std::cout << "compressed!" << endl; 26 | 27 | myset1.flush(); 28 | myset1.compact(); 29 | 30 | CompressedSet::Iterator it(&myset1); 31 | // Sequential scanning 384 000 000 docs per second 32 | clock_t begin=clock(); 33 | for (; it.docID() != NO_MORE_DOCS;it.nextDoc()){ 34 | unsigned int temp = it.docID(); 35 | } 36 | clock_t end=clock(); 37 | std::cout << "Iteration Time: " << double(diffclock(end,begin)) << " ms"<< endl; 38 | } 39 | 40 | 41 | 42 | int main() { 43 | benchmark(); 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | server 3 | engine 4 | tests 5 | 6 | -------------------------------------------------------------------------------- /src/Constants.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CONSTANTS_HPP 3 | #define CONSTANTS_HPP 4 | 5 | #include 6 | 7 | namespace zsearch 8 | { 9 | const std::string LOCK_FILE = "/var/tmp/zsearch.pid"; 10 | 11 | const std::string LEVELDB_STORE = "/var/tmp/store"; 12 | const std::string LEVELDB_TEST_STORE = "/var/tmp/test_store"; 13 | 14 | const std::string FIELDS_KEY = "fields"; 15 | const std::string DOC_ID_KEY = "docId"; 16 | const std::string WORD_ID_KEY = "wordId"; 17 | 18 | // const std::string QUERY_PARSER_DELIMITERS = " \t\n\r.,;"; 19 | 20 | namespace server 21 | { 22 | const unsigned short PORT = 8080; 23 | 24 | const std::string POST_HTM = "/post.htm"; 25 | const std::string SEARCH_PATH = "/search"; 26 | const std::string POST_PATH = "/post"; 27 | const std::string DOC_PATH = "/doc"; 28 | const std::string INDEX_PATH = "/index"; 29 | const std::string ROOT = "/"; 30 | 31 | const std::string POST_DATA_KEY = "data"; 32 | const std::string GET_SEARCH_QUERY_KEY = "q"; 33 | const std::string GET_SEARCH_START_KEY = "s"; 34 | const std::string GET_SEARCH_OFFSET_KEY = "o"; 35 | const std::string DOC_ID_KEY = "id"; 36 | } 37 | 38 | const unsigned int MAX_BATCH_SIZE = 200; 39 | 40 | const std::string DOCUMENT_ROOT = "document"; 41 | const std::string DOCUMENT_TITLE = "title"; 42 | } 43 | 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/DocumentKVStore.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DOCUMENTKVSTORE_H 3 | #define DOCUMENTKVSTORE_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "IKVStore.h" 11 | #include "IDocument.h" 12 | #include "ZException.hpp" 13 | 14 | 15 | using namespace std; 16 | 17 | class DocumentKVStore 18 | { 19 | private: 20 | 21 | std::shared_ptr store; 22 | 23 | public: 24 | 25 | DocumentKVStore(std::shared_ptr store) : store(store) 26 | { 27 | store->Open(); 28 | } 29 | 30 | ~DocumentKVStore() 31 | { 32 | std::cerr << "Destroyed DocumentKVStore" << std::endl; 33 | } 34 | 35 | void addDoc(unsigned int docId, const shared_ptr& doc) 36 | { 37 | stringstream ss; 38 | doc->writeMini(ss); 39 | string d = ss.str(); 40 | 41 | if (!(store->Put(docId, d).ok())) 42 | { 43 | std::cerr << "Could not put document into LevelDb " << d << std::endl; 44 | throw ZException("Could not put document into LevelDb "); 45 | } 46 | } 47 | 48 | void removeDoc(unsigned int docId) 49 | { 50 | store->Delete(docId); // status could really only be Ok or NotFound 51 | } 52 | 53 | int Get(unsigned int docId, shared_ptr& doc) const 54 | { 55 | string d; 56 | 57 | if (store->Get(docId, d).ok()) 58 | { 59 | doc->readMini(d); 60 | 61 | /* 62 | cout << "got docId " << docId << endl << d << endl; 63 | 64 | try 65 | { 66 | doc->construct(d); 67 | } 68 | catch (const string& ex) 69 | { 70 | cerr << ex << endl; 71 | } 72 | catch (const exception& ex) 73 | { 74 | cerr << ex.what() << endl; 75 | } 76 | catch (...) 77 | { 78 | cerr << "wtf" << endl; 79 | } 80 | */ 81 | 82 | return 1; 83 | } 84 | 85 | return 0; 86 | } 87 | 88 | }; 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /src/EngineDataKVStore.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENGINEDATAKVSTORE_H 2 | #define ENGINEDATAKVSTORE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "IKVStore.h" 11 | #include "ZException.hpp" 12 | #include "ZUtil.hpp" 13 | #include "Constants.hpp" 14 | 15 | using namespace std; 16 | 17 | class EngineDataKVStore 18 | { 19 | private: 20 | 21 | unsigned long docId; 22 | unsigned long long wordId; 23 | 24 | shared_ptr store; 25 | 26 | public: 27 | 28 | EngineDataKVStore(shared_ptr store) : docId(1), wordId(1), store(store) 29 | { 30 | string strId; 31 | 32 | cerr << "Populating docId from store" << endl; 33 | 34 | if (store->Get(zsearch::DOC_ID_KEY, strId).ok()) 35 | { 36 | cerr << "got " << strId << endl; 37 | docId = ZUtil::atoi(strId.c_str()); 38 | } 39 | 40 | cerr << "current docId is " << docId << endl; 41 | 42 | cerr << "Populating wordId from store" << endl; 43 | 44 | if (store->Get(zsearch::WORD_ID_KEY, strId).ok()) 45 | { 46 | cerr << "got " << strId << endl; 47 | wordId = ZUtil::atoi(strId.c_str()); 48 | } 49 | 50 | cerr << "current wordId is " << wordId << endl; 51 | 52 | if ((docId <= 1 && wordId > 1) || (docId > 1 && wordId <= 1)) 53 | { 54 | throw ZException("docId and wordId mismatched! Most likely db is corrupt."); 55 | } 56 | 57 | } 58 | 59 | ~EngineDataKVStore() 60 | { 61 | cerr << "Persisting docId " << docId << " to store" << endl; 62 | 63 | string strDocId; 64 | ZUtil::PutUint64(strDocId, docId); 65 | 66 | if (!store->Put(zsearch::DOC_ID_KEY, strDocId).ok()) 67 | { 68 | cerr << "Error persisting docId to storage, db might be corrupt or invalid at startup" << endl; 69 | } 70 | 71 | cerr << "Persisting wordId " << wordId << " to store" << endl; 72 | 73 | string strWordId; 74 | ZUtil::PutUint64(strWordId, wordId); 75 | 76 | if (!store->Put(zsearch::WORD_ID_KEY, strWordId).ok()) 77 | { 78 | cerr << "Error persisting wordId to storage, db might be corrupt or invalid at startup" << endl; 79 | } 80 | 81 | cerr << "Destroyed EngineDataKVStore" << endl; 82 | } 83 | 84 | unsigned long& getDocId() 85 | { 86 | return docId; 87 | } 88 | 89 | unsigned long long& getWordId() 90 | { 91 | return wordId; 92 | } 93 | 94 | }; 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /src/Field.cpp: -------------------------------------------------------------------------------- 1 | #include "Field.h" 2 | 3 | Field::Field(const char* name, const char* value,int _config){ 4 | //assert(name != NULL); 5 | _name = name; 6 | fieldsData = (void*)value; 7 | setConfig(_config); 8 | } 9 | 10 | void Field::setConfig(const uint32_t x){ 11 | uint32_t newConfig=0; 12 | 13 | if ( x & STORE_YES ){ 14 | newConfig |= STORE_YES; 15 | } else { 16 | newConfig |= STORE_NO; 17 | } 18 | 19 | if ( (x & INDEX_NO)==0 ){ 20 | bool index=false; 21 | if ( x & INDEX_TOKENIZED ){ 22 | newConfig |= INDEX_TOKENIZED; 23 | index = true; 24 | } else if ( x & INDEX_UNTOKENIZED ){ 25 | newConfig |= INDEX_UNTOKENIZED; 26 | index = true; 27 | } 28 | if ( !index ) 29 | newConfig |= INDEX_NO; 30 | } else { 31 | newConfig |= INDEX_NO; 32 | } 33 | 34 | if ( newConfig & INDEX_NO && newConfig & STORE_NO ){ 35 | // it doesn't make sense to have a field that is neither indexed nor stored 36 | } 37 | config = newConfig; 38 | } 39 | 40 | const char* Field::name() const { 41 | return _name; 42 | } 43 | 44 | const char* Field::value() const { 45 | return static_cast(fieldsData); 46 | } 47 | 48 | bool Field::isStored() const { 49 | return (config & STORE_YES) != 0; 50 | } 51 | 52 | bool Field::isIndexed() const { 53 | return (config & INDEX_TOKENIZED)!=0 ; 54 | } 55 | 56 | bool Field::isTokenized() const { 57 | return (config & INDEX_TOKENIZED) != 0; 58 | } 59 | 60 | const char* Field::toString(){ 61 | return static_cast(fieldsData); 62 | } 63 | 64 | Field::~Field() { 65 | //TODO 66 | } 67 | -------------------------------------------------------------------------------- /src/Field.h: -------------------------------------------------------------------------------- 1 | /** 2 | A field is a section of a Document. 3 | Each field has two parts, a name and a value. 4 | Values may be free text, or an atomic keywords, which are not further processed. 5 | Such keywords maybe used to represent dates, urls, etc. 6 | Fields are optionally stored so that they may be returned the document. 7 | */ 8 | #include 9 | #include 10 | 11 | class Field { 12 | public: 13 | enum Store{ 14 | /** Store the original field value. 15 | * This is useful for short texts like doc title 16 | * which should be displayed with the results. 17 | */ 18 | STORE_YES=1, 19 | 20 | /** Do not store the field value in the index. */ 21 | STORE_NO=2 22 | }; 23 | 24 | enum Index{ 25 | /** Do not index the field value. 26 | * This field can thus not be searched, 27 | * but one can still access its contents provided it is stored 28 | */ 29 | INDEX_NO=16, 30 | 31 | /** Index the field's value so it can be searched. 32 | * An Analyzer will be used to tokenize the text before its 33 | * terms will be stored in the index. 34 | */ 35 | INDEX_TOKENIZED=32, 36 | 37 | /** Index the field's value without using an Analyzer, 38 | * so it can be searched. 39 | * The value will be stored as a single term. 40 | * This is useful for unique Ids like product numbers. 41 | */ 42 | INDEX_UNTOKENIZED=64 43 | }; 44 | 45 | Field(const char* name, const char* value,int _config); 46 | 47 | virtual ~Field(); 48 | 49 | virtual const char* name() const; 50 | virtual const char* value() const; 51 | virtual bool isStored() const; 52 | virtual bool isIndexed() const; 53 | virtual bool isTokenized() const; 54 | /** Prints a Field for human consumption. */ 55 | virtual const char* toString(); 56 | 57 | protected: 58 | void setConfig(const uint32_t _config); 59 | const char* _name; 60 | void* fieldsData; 61 | uint32_t config; 62 | }; -------------------------------------------------------------------------------- /src/FieldKVStore.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FIELDKVSTORE_H 2 | #define FIELDKVSTORE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "IKVStore.h" 11 | #include "ZException.hpp" 12 | #include "ZUtil.hpp" 13 | #include "Constants.hpp" 14 | 15 | using namespace std; 16 | 17 | class FieldKVStore 18 | { 19 | private: 20 | 21 | unordered_set fields; 22 | shared_ptr store; 23 | 24 | public: 25 | 26 | FieldKVStore(shared_ptr store) : store(store) 27 | { 28 | cerr << "Populating fields from store" << endl; 29 | 30 | string strFields; 31 | 32 | if (store->Get(zsearch::FIELDS_KEY, strFields).ok()) 33 | { 34 | istringstream iss(strFields); 35 | string field; 36 | 37 | while (iss >> field) 38 | { 39 | // cerr << field << " "; 40 | fields.insert(field); 41 | } 42 | 43 | cerr << endl; 44 | } 45 | 46 | } 47 | 48 | ~FieldKVStore() 49 | { 50 | cerr << "Persisting fields to store" << endl; 51 | 52 | ostringstream oss; 53 | 54 | for (auto field : fields) 55 | { 56 | oss << field << " "; 57 | } 58 | 59 | string strFields = oss.str(); 60 | 61 | if (!store->Put(zsearch::FIELDS_KEY, strFields).ok()) 62 | { 63 | cerr << "Error persisting fields to storage, db might be corrupt or invalid at startup" << endl; 64 | } 65 | 66 | cerr << "Destroyed FieldKVStore" << endl; 67 | } 68 | 69 | void put(const string& field) 70 | { 71 | fields.insert(field); 72 | } 73 | 74 | const unordered_set& getFields() const 75 | { 76 | return fields; 77 | } 78 | 79 | }; 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /src/IDocument.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef IDOCUMENT_H 4 | #define IDOCUMENT_H 5 | 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | using namespace std; 15 | 16 | class IDocument 17 | { 18 | public: 19 | 20 | virtual void construct(const string& xml) = 0; 21 | 22 | virtual void write(ostream& out) = 0; 23 | 24 | virtual void readMini(const string& src) = 0; 25 | 26 | virtual void writeMini(ostream& out) = 0; 27 | 28 | virtual void addEntry(const string& key, const string& value) = 0; 29 | 30 | typedef std::vector>::const_iterator const_iterator; 31 | virtual const_iterator begin() const = 0; 32 | virtual const_iterator end() const = 0; 33 | 34 | virtual void getEntry(const string& key, string& value) = 0; 35 | 36 | virtual ~IDocument() { } 37 | }; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/IInvertedIndex.h: -------------------------------------------------------------------------------- 1 | #ifndef IINVERTED_INDEX_H 2 | #define IINVERTED_INDEX_H 3 | 4 | #include 5 | #include "varint/Set.h" 6 | class IInvertedIndex 7 | { 8 | public: 9 | 10 | virtual int get(unsigned int wordId, std::shared_ptr& outset) const = 0; 11 | virtual int add(unsigned int wordId, unsigned int docid) = 0; 12 | virtual int remove(unsigned int wordId, unsigned int docId) = 0; 13 | 14 | // Virtual destructor 15 | virtual ~IInvertedIndex() { } 16 | }; 17 | #endif 18 | -------------------------------------------------------------------------------- /src/IKVStore.h: -------------------------------------------------------------------------------- 1 | #ifndef IKVSTORE_H 2 | #define IKVSTORE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace KVStore 9 | { 10 | 11 | class Status 12 | { 13 | enum Code 14 | { 15 | kOk = 0, 16 | kNotFound = 1, 17 | kCorruption = 2, 18 | kNotSupported = 3, 19 | kInvalidArgument = 4, 20 | kIOError = 5 21 | }; 22 | 23 | Code _code; 24 | 25 | public: 26 | 27 | Code code() const 28 | { 29 | return _code; 30 | } 31 | 32 | Status(Code code) 33 | { 34 | _code = code; 35 | } 36 | 37 | // Return a success status. 38 | static Status OK() { return Status(kOk); } 39 | 40 | // Return error status of an appropriate type. 41 | static Status NotFound() { return Status(kNotFound); } 42 | 43 | static Status Corruption() { return Status(kCorruption); } 44 | 45 | static Status NotSupported() { return Status(kNotSupported); } 46 | 47 | static Status InvalidArgument() { return Status(kInvalidArgument); } 48 | 49 | static Status IOError() { return Status(kIOError); } 50 | 51 | // Returns true iff the status indicates success. 52 | bool ok() const { return (code() == kOk); } 53 | 54 | // Returns true iff the status indicates a NotFound error. 55 | bool IsNotFound() const { return code() == kNotFound; } 56 | 57 | // Returns true iff the status indicates a Corruption error. 58 | bool IsCorruption() const { return code() == kCorruption; } 59 | 60 | // Returns true iff the status indicates an IOError. 61 | bool IsIOError() const { return code() == kIOError; } 62 | }; 63 | 64 | 65 | class IKVStore 66 | { 67 | public: 68 | 69 | virtual Status Open() = 0; 70 | virtual Status Put(const std::string& key,const std::string& value) = 0; 71 | virtual Status Put(uint64_t key,const std::string& value) = 0; 72 | virtual Status Get(const std::string& key, std::string* value) = 0; 73 | virtual Status Get(const std::string& key, std::string& value) = 0; 74 | virtual Status Get(uint64_t key, std::string& value) = 0; 75 | virtual Status Delete(const std::string& key) = 0; 76 | virtual Status Delete(uint64_t key) = 0; 77 | virtual void Compact() = 0; 78 | virtual Status Put(const std::vector>& writes) = 0; 79 | virtual Status Put(const std::vector>& writes) = 0; 80 | virtual ~IKVStore() { } 81 | 82 | 83 | }; 84 | 85 | } 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /src/ITokenizer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef ITOKENIZER_H 3 | #define ITOKENIZER_H 4 | 5 | #include 6 | 7 | class ITokenizer 8 | { 9 | public: 10 | 11 | virtual bool nextToken() = 0; 12 | 13 | virtual const std::string& getToken() const = 0; 14 | 15 | virtual void setString(const std::string& str, const std::string& field) = 0; 16 | 17 | virtual ~ITokenizer() { } 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | # $@ name of the target 2 | # $^ name of all prerequisites with duplicates removed 3 | # $< name of the first prerequisite 4 | ifneq (,$(findstring /cygdrive/,$(PATH))) 5 | OSNAME := Cygwin 6 | else ifneq (,$(findstring WINDOWS,$(PATH))) 7 | OSNAME := Windows 8 | else 9 | OSNAME := $(shell uname -s) 10 | endif 11 | ARCH := $(shell uname -m) 12 | 13 | CC = g++ 14 | BUILD_DIR = ../build 15 | LEVELDB_DIR = ../leveldb 16 | 17 | 18 | BUILD_OBJECTS = 19 | 20 | OBJECTS = $(BUILD_OBJECTS) $(BUILD_DIR)/CompressedSet.a $(LEVELDB_DIR)/libleveldb.a 21 | 22 | HEADERS = TokenizerImpl.hpp Constants.hpp ZUtil.hpp Statistics.hpp Engine.hpp InvertedIndexImpl.hpp InvertedIndexBatch.hpp DocumentImpl.hpp InvertedIndexSimpleBatch.hpp DocumentKVStore.hpp ZException.hpp ZUtil.hpp WordIndexKVStore.hpp KVStoreLevelDb.hpp IDocument.h IInvertedIndex.h IKVStore.h KVStoreInMemory.hpp NameSpaceKVStore.hpp FieldKVStore.hpp EngineDataKVStore.hpp 23 | 24 | CFLAGS_PLAIN = -flto -std=gnu++0x -Wall -g0 -Ofast -funroll-loops -msse2 -I$(LEVELDB_DIR)/include -I../lib -I../lib/rapidxml-1.13 -I.. -D_GLIBCXX_USE_NANOSLEEP 25 | 26 | ifeq ($(OSNAME), Darwin) 27 | CFLAGS = $(CFLAGS_PLAIN) -I. -I/usr/local/include -I/opt/local/include -DOS_MACOSX -DINTEL64 28 | LIBS = -L/usr/local/lib -L/opt/local/lib -L$(LEVELDB_DIR) -lleveldb -lm -levent 29 | endif 30 | 31 | 32 | ifeq ($(OSNAME), Linux) 33 | CFLAGS = $(CFLAGS_PLAIN) -I. -I/usr/include -I/usr/local/include -I/ms/dist/fsf/PROJ/libevent/2.0.19/include 34 | LIBS = -L/usr/local/lib -L/usr/lib/i386-linux-gnu -L/ms/dist/fsf/PROJ/libevent/2.0.19/lib -L$(LEVELDB_DIR) -lm -lpthread -levent 35 | endif 36 | 37 | 38 | ifeq ($(OSNAME), Cygwin) 39 | CFLAGS = $(CFLAGS_PLAIN) 40 | LIBS = -L/usr/lib -L$(LEVELDB_DIR) -lm -lpthread -levent 41 | endif 42 | 43 | clean : 44 | rm -f $(BUILD_OBJECTS) $(BUILD_DIR)/server $(BUILD_DIR)/engine 45 | 46 | all: server engine $(BUILD_DIR)/TokenizerImpl.o 47 | 48 | server: server.cpp $(OBJECTS) 49 | $(CXX) $(CFLAGS) -o $(BUILD_DIR)/$@ $^ $(LIBS) 50 | 51 | engine: engine_simple_main.cpp $(OBJECTS) 52 | $(CXX) $(CFLAGS) -o $(BUILD_DIR)/$@ $^ $(LIBS) 53 | 54 | 55 | ## 56 | # Classes 57 | ## 58 | 59 | $(BUILD_DIR)/TokenizerImpl.o : TokenizerImpl.cpp TokenizerImpl.h ITokenizer.h Constants.hpp 60 | $(CXX) -c $(CFLAGS) $< -o $@ 61 | 62 | 63 | -------------------------------------------------------------------------------- /src/TokenizerImpl.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "TokenizerImpl.h" 7 | 8 | using namespace std; 9 | 10 | TokenizerImpl::TokenizerImpl() : m_offset(0) 11 | { } 12 | 13 | TokenizerImpl::~TokenizerImpl() 14 | { 15 | std::cerr << "Destroyed TokenizerImpl" << std::endl; 16 | } 17 | 18 | 19 | void TokenizerImpl::setString(const std::string& str, const std::string& field) 20 | { 21 | m_string = str; 22 | m_token.clear(); 23 | m_offset = 0; 24 | } 25 | 26 | 27 | bool TokenizerImpl::nextToken() 28 | { 29 | m_token.resize(0); 30 | for (;;){ 31 | if (m_offset == m_string.size()) break; 32 | char c = m_string[m_offset++]; 33 | c = c | 0x20; // lowercase 34 | if (c >= 'a' ? c <= 'z' : (c >= '0' && c <= '9')) 35 | { 36 | m_token.push_back(c); 37 | } else { 38 | if (m_token.size()>0) 39 | return true; 40 | } 41 | } 42 | return m_token.size()>0; 43 | } 44 | 45 | 46 | 47 | const std::string& TokenizerImpl::getToken() const 48 | { 49 | return m_token; 50 | } 51 | -------------------------------------------------------------------------------- /src/TokenizerImpl.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TOKENIZERIMPL_H 3 | #define TOKENIZERIMPL_H 4 | 5 | #include 6 | #include "ITokenizer.h" 7 | 8 | class TokenizerImpl : public ITokenizer 9 | { 10 | public: 11 | 12 | TokenizerImpl(); 13 | 14 | ~TokenizerImpl(); 15 | 16 | void setString(const std::string& str,const std::string& field); 17 | 18 | bool nextToken(); 19 | 20 | const std::string& getToken() const; 21 | 22 | protected: 23 | 24 | std::string m_string; 25 | size_t m_offset; 26 | std::string m_token; 27 | 28 | 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/TokenizerImpl.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TOKENIZERIMPL_H 3 | #define TOKENIZERIMPL_H 4 | 5 | #include 6 | #include "ITokenizer.h" 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | class TokenizerImpl : public ITokenizer 12 | { 13 | protected: 14 | bool table[UCHAR_MAX+1]; 15 | public: 16 | 17 | TokenizerImpl(): m_offset(0) 18 | { 19 | for (int i = 0; i <= UCHAR_MAX; ++i) 20 | table[i] = std::isalnum(i); 21 | } 22 | 23 | ~TokenizerImpl(){ 24 | std::cerr << "Destroyed TokenizerImpl" << std::endl; 25 | } 26 | 27 | void setString(const std::string& str, const std::string& field) 28 | { 29 | m_string = str; 30 | stringsize = m_string.size(); 31 | m_token.clear(); 32 | m_offset = 0; 33 | } 34 | 35 | inline bool nextToken() 36 | { 37 | m_token.resize(0); 38 | size_t tokensize = 0; 39 | for (;;){ 40 | if (m_offset == stringsize) break; //22% 41 | unsigned char c = m_string[m_offset++]; 42 | c = c | 0x20; 43 | if (table[c]) // 39% 44 | { 45 | tokensize++; 46 | m_token.push_back(c); 47 | } else { 48 | if (tokensize>2){ 49 | return true; 50 | } else { 51 | m_token.resize(0); 52 | } 53 | } 54 | } 55 | return tokensize>2; 56 | } 57 | 58 | 59 | inline const std::string& getToken() const 60 | { 61 | return m_token; 62 | } 63 | 64 | protected: 65 | 66 | std::string m_string; 67 | size_t stringsize; 68 | size_t m_offset; 69 | std::string m_token; 70 | 71 | 72 | }; 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/ZException.hpp: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * http://stackoverflow.com/questions/134569/c-exception-throwing-stdstring 4 | */ 5 | 6 | #ifndef ZEXCEPTION_H 7 | #define ZEXCEPTION_H 8 | 9 | #include 10 | 11 | struct ZException : public std::exception 12 | { 13 | std::string s; 14 | 15 | ZException(std::string ss) : s(ss) { } 16 | 17 | const char* what() const throw() 18 | { 19 | return s.c_str(); 20 | } 21 | }; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/trie/EntropyTrie.hpp: -------------------------------------------------------------------------------- 1 | #include "bit_vector.hpp" 2 | #include "trie.hpp" 3 | #include 4 | #include 5 | class EntropyTrie 6 | { 7 | private: 8 | typedef cindex::bit_vector<> vector_type; 9 | vector_type bitvector_; 10 | cindex::trie<> trie_; 11 | std::size_t key_len_; 12 | std::vector pending_keys_; 13 | std::size_t pending_key_count_; 14 | std::size_t n_; 15 | public: 16 | /** 17 | * key_len = length of each key in number of bytes 18 | */ 19 | EntropyTrie(std::size_t key_len): key_len_(key_len),pending_key_count_(0){ 20 | 21 | } 22 | 23 | bool insert(const uint8_t* key){ 24 | uint8_t* key_c = new uint8_t[key_len_]; 25 | memcpy(key_c, key, key_len_); 26 | pending_keys_.push_back(key_c); 27 | pending_key_count_++; 28 | return true; 29 | } 30 | 31 | bool insert(std::string str){ 32 | const uint8_t* key_c = reinterpret_cast(str.c_str()); 33 | return insert(key_c); 34 | } 35 | 36 | void flush(){ 37 | trie_.encode( 38 | bitvector_, 39 | pending_keys_, key_len_, 40 | 0, 41 | pending_key_count_ 42 | ); 43 | for (std::size_t i = 0; i < pending_key_count_; i++) 44 | delete [] pending_keys_[i]; 45 | pending_keys_.clear(); 46 | n_ = pending_key_count_; 47 | pending_key_count_ = 0; 48 | bitvector_.compact(); 49 | } 50 | 51 | std::size_t locate(const uint8_t* key){ 52 | std::size_t iter = 0; 53 | std::size_t key_index = trie_.locate(bitvector_, 54 | iter,key,key_len_,0 ,n_); 55 | return key_index; 56 | } 57 | 58 | std::size_t locate(std::string str){ 59 | const uint8_t* key_c = reinterpret_cast(str.c_str()); 60 | return locate(key_c); 61 | } 62 | 63 | virtual ~EntropyTrie(){ 64 | for (std::size_t i = 0; i < pending_key_count_; i++) 65 | delete [] pending_keys_[i]; 66 | } 67 | 68 | }; -------------------------------------------------------------------------------- /src/trie/bit_vector.cpp: -------------------------------------------------------------------------------- 1 | #include "bit_vector.hpp" 2 | #include 3 | 4 | namespace cindex 5 | { 6 | template 7 | bit_vector::bit_vector() 8 | : buf_(NULL), size_(0), capacity_(8) 9 | { 10 | resize(); 11 | } 12 | 13 | template 14 | bit_vector::~bit_vector() 15 | { 16 | clear(); 17 | } 18 | 19 | template 20 | void 21 | bit_vector::clear() 22 | { 23 | free(buf_); 24 | buf_ = NULL; 25 | 26 | size_ = 0; 27 | capacity_ = 0; 28 | } 29 | 30 | template 31 | void 32 | bit_vector::compact() 33 | { 34 | capacity_ = size_; 35 | resize(); 36 | } 37 | 38 | template 39 | void 40 | bit_vector::resize() 41 | { 42 | std::size_t old_byte_size = block_info::size(size_); 43 | std::size_t new_byte_size = block_info::size(capacity_); 44 | 45 | block_type* new_buf = reinterpret_cast(realloc(reinterpret_cast(buf_), new_byte_size)); 46 | 47 | if (!new_buf) 48 | { 49 | //assert(buf_); 50 | //assert(new_byte_size > old_byte_size); 51 | 52 | new_buf = reinterpret_cast(malloc(new_byte_size)); 53 | //assert(new_buf); 54 | 55 | memcpy(new_buf, buf_, old_byte_size); 56 | 57 | free(buf_); 58 | } 59 | 60 | buf_ = new_buf; 61 | 62 | if (new_byte_size > old_byte_size) 63 | memset(reinterpret_cast(new_buf) + old_byte_size, 0, new_byte_size - old_byte_size); 64 | } 65 | 66 | template class bit_vector; 67 | template class bit_vector; 68 | template class bit_vector; 69 | template class bit_vector; 70 | } 71 | 72 | -------------------------------------------------------------------------------- /src/trie/block_info.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | //#include 5 | template 6 | struct choose_initial_n { 7 | static const bool c = (std::size_t(1) << n << n) != 0; 8 | static const std::size_t value = !c*n + choose_initial_n<2*c*n>::value; 9 | 10 | }; 11 | template <> 12 | struct choose_initial_n<0> { 13 | static const std::size_t value = 0; 14 | }; 15 | const std::size_t initial_n = choose_initial_n<16>::value; 16 | 17 | template 18 | struct static_log2 { 19 | static const bool c = (x >> n) > 0; 20 | static const std::size_t value = c*n + (static_log2< (x>>c*n), n/2 >::value); 21 | }; 22 | template <> 23 | struct static_log2<1, 0> { 24 | static const std::size_t value = 0; 25 | }; 26 | 27 | namespace cindex 28 | { 29 | template 30 | class block_info 31 | { 32 | public: 33 | typedef BlockType block_type; 34 | 35 | static const std::size_t bytes_per_block = sizeof(block_type); 36 | static const std::size_t bits_per_block = bytes_per_block * 8; 37 | static const std::size_t bit_mask = bits_per_block - 1; 38 | static const std::size_t log_bits_per_block = static_log2::value; 39 | 40 | static std::size_t 41 | block_count(std::size_t n) 42 | { 43 | return (n + bits_per_block - 1) / bits_per_block; 44 | } 45 | 46 | static std::size_t 47 | size(std::size_t n) 48 | { 49 | return block_count(n) * bytes_per_block; 50 | } 51 | }; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/trie/main.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "EntropyTrie.hpp" 8 | 9 | int main() { 10 | EntropyTrie trie(5); 11 | vector str; 12 | str.push_back("aaaaa"); 13 | str.push_back("aaaab"); 14 | str.push_back("azaac"); 15 | str.push_back("azzad"); 16 | str.push_back("azzze"); 17 | str.push_back("zaaaf"); 18 | str.push_back("zazzg"); 19 | str.push_back("zzaah"); 20 | str.push_back("zzzai"); 21 | str.push_back("zzzzj"); 22 | for (string s : str){ 23 | trie.insert(s); 24 | } 25 | trie.flush(); 26 | size_t pos = trie.locate("aaaab"); 27 | std::cout << pos << std::endl; 28 | 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /src/trie/sign_interleave.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | 5 | namespace cindex 6 | { 7 | class sign_interleave 8 | { 9 | public: 10 | template 11 | static T encode(const T& v) 12 | { 13 | // 0, 1, 2, 3, ... => 0, 2, 4, 8, ... 14 | // -1, -2, -3 -4, ... => 1, 3, 5, 7, ... 15 | if ((v & (static_cast(1) << (sizeof(T) * 8 - 1))) == 0) 16 | return static_cast(v << 1); 17 | else 18 | return static_cast(((~v) << 1) | 1); 19 | } 20 | 21 | template 22 | static T decode(const T& v) 23 | { 24 | // 0, 2, 4, 8, ... => 0, 1, 2, 3, ... 25 | // 1, 3, 5, 7, ... => -1, -2, -3, -4, ... 26 | if ((v & 1) == 0) 27 | return static_cast((v >> 1) & ~(static_cast(1) << (sizeof(T) * 8 - 1))); 28 | else 29 | return static_cast(~(v >> 1) | (static_cast(1) << (sizeof(T) * 8 - 1))); 30 | } 31 | }; 32 | } 33 | 34 | -------------------------------------------------------------------------------- /tests/BasicSetTest.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "varint/BasicSet.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | bool testvec(set& data) 13 | { 14 | stringstream ss; 15 | { 16 | BasicSet myset2; 17 | for (auto i : data) 18 | { 19 | myset2.addDoc(i); 20 | } 21 | // cout << "added " << data.size() << endl; 22 | // myset2.flush(); 23 | myset2.compact(); 24 | myset2.write(ss); 25 | } 26 | 27 | BasicSet myset1; 28 | myset1.read(ss); 29 | 30 | assert(data.size() == myset1.size()); 31 | BasicSet::Iterator it2(&myset1); 32 | for (auto idx : data) 33 | { 34 | assert(it2.nextDoc()!=NO_MORE_DOCS ); 35 | assert(it2.docID() == idx); 36 | } 37 | 38 | assert(it2.nextDoc() == NO_MORE_DOCS); 39 | 40 | return true; 41 | } 42 | 43 | void test(){ 44 | 45 | for (uint32_t b = 0; b <= 28; ++b) { 46 | cout << "testing1... b = " << b << endl; 47 | for (size_t length = 128; length < (1U << 12); length += 128) { 48 | //cout << " length = " << length << endl; 49 | set data; 50 | for (size_t i = 0; i < length; ++i) { 51 | unsigned int x = (i + (24 - i) * (12 - i)) % (1U << b); 52 | data.insert(x); 53 | } 54 | if (!testvec(data)) { 55 | return; 56 | } 57 | } 58 | cout << "testing2... b = " << b << endl; 59 | for (size_t length = 1; length < (1U << 9); ++length) { 60 | // cout << " length = " << length << endl; 61 | set data; 62 | for (size_t i = 0; i < length; ++i) { 63 | data.insert((33231 - i + i * i) % (1U << b)); 64 | } 65 | if (!testvec(data)) { 66 | return; 67 | } 68 | } 69 | } 70 | cout << "All test passed succesfully!!" << endl; 71 | } 72 | 73 | void binarySearch() 74 | { 75 | set basic; 76 | 77 | for (int i = 1; i <= 10; ++i) 78 | basic.insert(i); 79 | 80 | /* 81 | int search = 11; 82 | 83 | set::iterator upper; 84 | 85 | if (basic.find(search) == basic.end()) 86 | { 87 | upper = upper_bound(basic.begin(), basic.end(), search); 88 | } 89 | 90 | cout << "upper " << upper - basic.begin() << endl; 91 | */ 92 | } 93 | 94 | 95 | int main() 96 | { 97 | test(); 98 | // binarySearch(); 99 | } 100 | -------------------------------------------------------------------------------- /tests/CompressedSetTests.cpp: -------------------------------------------------------------------------------- 1 | // g++ --std=gnu++0x CompressedSet.cpp 2 | 3 | #include "varint/CompressedSet.h" 4 | #include "varint/bitpacking/memutil.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | bool testvec(set& data) 13 | { 14 | stringstream ss; 15 | { 16 | CompressedSet myset2; 17 | for (auto i : data) 18 | { 19 | myset2.addDoc(i); 20 | } 21 | // cout << "added " << data.size() << endl; 22 | // myset2.flush(); 23 | myset2.compact(); 24 | myset2.write(ss); 25 | } 26 | 27 | CompressedSet myset1; 28 | myset1.read(ss); 29 | 30 | assert(data.size() == myset1.size()); 31 | CompressedSet::Iterator it2(&myset1); 32 | for (auto idx : data) 33 | { 34 | assert(it2.nextDoc()!=NO_MORE_DOCS ); 35 | assert(it2.docID() == idx); 36 | } 37 | assert(it2.nextDoc() == NO_MORE_DOCS); 38 | 39 | return true; 40 | } 41 | 42 | void test(){ 43 | 44 | for (uint32_t b = 0; b <= 28; ++b) { 45 | cout << "testing1... b = " << b << endl; 46 | for (size_t length = 128; length < (1U << 12); length += 128) { 47 | //cout << " length = " << length << endl; 48 | set data; 49 | for (size_t i = 0; i < length; ++i) { 50 | unsigned int x = (i + (24 - i) * (12 - i)) % (1U << b); 51 | data.insert(x); 52 | } 53 | if (!testvec(data)) { 54 | return; 55 | } 56 | } 57 | cout << "testing2... b = " << b << endl; 58 | for (size_t length = 1; length < (1U << 9); ++length) { 59 | // cout << " length = " << length << endl; 60 | set data; 61 | for (size_t i = 0; i < length; ++i) { 62 | data.insert((33231 - i + i * i) % (1U << b)); 63 | } 64 | if (!testvec(data)) { 65 | return; 66 | } 67 | } 68 | } 69 | cout << "All test passed succesfully!!" << endl; 70 | } 71 | 72 | 73 | int main() { 74 | test(); 75 | } 76 | -------------------------------------------------------------------------------- /tests/InvertedIndexImplTest.hpp: -------------------------------------------------------------------------------- 1 | 2 | #include "src/InvertedIndexImpl.hpp" 3 | #include "lib/tpunit++.hpp" 4 | #include "src/KVStoreInMemory.hpp" 5 | #include "varint/SetFactory.h" 6 | #include "varint/BasicSetFactory.h" 7 | #include 8 | 9 | using namespace std; 10 | 11 | /** 12 | * Test InvertedIndex 13 | */ 14 | struct InvertedIndexImplTest : tpunit::TestFixture 15 | { 16 | InvertedIndexImplTest() : tpunit::TestFixture 17 | ( 18 | TEST(InvertedIndexImplTest::testBasicSet), 19 | TEST(InvertedIndexImplTest::testCompressedSet) 20 | ) 21 | { } 22 | 23 | void testBasicSet() 24 | { 25 | shared_ptr setFactory = make_shared(); 26 | shared_ptr invertedIndexStore = make_shared("/tmp/TestInvertedIndexBasicSet"); 27 | InvertedIndexImpl invertedIndex(invertedIndexStore, setFactory); 28 | invertedIndex.add(1, 44); 29 | ASSERT_TRUE(invertedIndex.exist(1)); 30 | ASSERT_TRUE(1 == invertedIndex.remove(1, 44)); 31 | } 32 | 33 | void testCompressedSet() 34 | { 35 | shared_ptr setFactory = make_shared(); 36 | shared_ptr invertedIndexStore = make_shared("/tmp/TestInvertedIndexCompressedSet"); 37 | InvertedIndexImpl invertedIndex(invertedIndexStore, setFactory); 38 | invertedIndex.add(1, 44); 39 | ASSERT_TRUE(invertedIndex.exist(1)); 40 | } 41 | 42 | }; 43 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # $@ name of the target 3 | # $^ name of all prerequisites with duplicates removed 4 | # $< name of the first prerequisite 5 | 6 | OSNAME := $(shell uname -s) 7 | ARCH := $(shell uname -m) 8 | 9 | BUILD_DIR_TEST = ../build/tests 10 | BUILD_DIR = ../build 11 | LEVELDB_DIR = ../leveldb 12 | 13 | OBJECTS = $(BUILD_DIR)/CompressedSet.a $(BUILD_DIR)/TokenizerImpl.o $(LEVELDB_DIR)/libleveldb.a 14 | 15 | CFLAGS_PLAIN = -std=gnu++0x -Wall -g -pedantic -msse3 -I$(LEVELDB_DIR)/include -I../lib -I../lib/rapidxml-1.13 -I.. 16 | 17 | ifeq ($(OSNAME), Darwin) 18 | CFLAGS = $(CFLAGS_PLAIN) -I/opt/local/include 19 | LIBS = -L/opt/local/lib -L/usr/local/lib -lleveldb -lm -levent 20 | endif 21 | 22 | ifeq ($(OSNAME), Linux) 23 | CFLAGS = $(CFLAGS_PLAIN) -I/usr/local/include -I/ms/dist/fsf/PROJ/libevent/2.0.19/include -D_GLIBCXX_USE_NANOSLEEP 24 | LIBS = -L/usr/local/lib -L/ms/dist/fsf/PROJ/libevent/2.0.19/lib -L../leveldb-1.7.0 -lm -levent -lpthread 25 | endif 26 | 27 | 28 | clean: 29 | rm -f $(BUILD_DIR_TEST)/* 30 | 31 | all: memory_leak_test thread_test document_test tokenizer_test xml_test inverted_index_test compressed_set_test basic_set_test statistics_test 32 | 33 | memory_leak_test: memory_leak_test.cpp $(OBJECTS) 34 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 35 | 36 | thread_test: thread_test.cpp $(OBJECTS) 37 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 38 | 39 | document_test: document_test.cpp $(OBJECTS) 40 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 41 | 42 | tokenizer_test: tokenizer_test.cpp $(OBJECTS) 43 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 44 | 45 | xml_test: xml_test.cpp $(OBJECTS) 46 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 47 | 48 | inverted_index_test: inverted_index_test.cpp $(OBJECTS) 49 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 50 | 51 | compressed_set_test: CompressedSetTests.cpp $(OBJECTS) 52 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 53 | 54 | compressed_set_test2: CompressedSet_test.cpp $(OBJECTS) 55 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 56 | 57 | basic_set_test: BasicSetTest.cpp $(OBJECTS) 58 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 59 | 60 | statistics_test: statistics_test.cpp $(OBJECTS) 61 | $(CXX) $(CFLAGS) -o $(BUILD_DIR_TEST)/$@ $^ $(LIBS) 62 | 63 | -------------------------------------------------------------------------------- /tests/SparseSet_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "lib/tpunit++.hpp" 11 | #include "TestUtils.hpp" 12 | #include "src/SparseSet.hpp" 13 | using namespace std; 14 | 15 | struct SparseSetTest : tpunit::TestFixture 16 | { 17 | SparseSetTest() : tpunit::TestFixture 18 | ( 19 | TEST(SparseSetTest::testSet) 20 | ) 21 | {} 22 | 23 | void testSet() 24 | { 25 | SparseSet set; 26 | 27 | set.insert(65535); 28 | set.insert(512); 29 | ASSERT_EQUAL(set.ismember(65534),false); 30 | ASSERT_EQUAL(set.ismember(65535),true); 31 | ASSERT_EQUAL(set.ismember(65536),false); 32 | 33 | const unsigned int* iter = set.begin(); 34 | ASSERT_EQUAL(*iter,65535); 35 | iter++; 36 | ASSERT_EQUAL(*iter,512); 37 | iter++; 38 | ASSERT_EQUAL(iter,set.end()); 39 | } 40 | }; 41 | 42 | int main() 43 | { 44 | SparseSetTest test; 45 | 46 | /** 47 | * Run all of the registered tpunit++ tests. Returns 0 if 48 | * all tests are successful, otherwise returns the number 49 | * of failing assertions. 50 | */ 51 | return tpunit::Tests::Run(); 52 | } -------------------------------------------------------------------------------- /tests/StatisticsTest.hpp: -------------------------------------------------------------------------------- 1 | 2 | #include "src/Statistics.hpp" 3 | #include "lib/tpunit++.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | /** 12 | * Test Statistics 13 | */ 14 | struct StatisticsTest : tpunit::TestFixture 15 | { 16 | StatisticsTest() : tpunit::TestFixture 17 | ( 18 | TEST(StatisticsTest::testQueries), 19 | TEST(StatisticsTest::testRequestTimes) 20 | ) 21 | { } 22 | 23 | void printTopQueries(const map, classcomp>& topQueries) 24 | { 25 | for (auto it = topQueries.begin(); it != topQueries.end(); ++it) 26 | { 27 | cout << it->first << " "; 28 | for (auto qs : it->second) 29 | { 30 | cout << qs << " "; 31 | } 32 | cout << endl; 33 | } 34 | } 35 | 36 | void testQueries() 37 | { 38 | Statistics s; 39 | 40 | s.addQuery("snoop"); 41 | s.addQuery("dawg"); 42 | s.addQuery("werd"); 43 | s.addQuery("snoop"); 44 | s.addQuery("snoop"); 45 | s.addQuery("snoop"); 46 | s.addQuery("dawg"); 47 | s.addQuery("werd"); 48 | s.addQuery("yo"); 49 | 50 | auto topQueries = s.getTopQueries(3); 51 | printTopQueries(topQueries); 52 | 53 | ASSERT_EQUAL(topQueries.size(), 3); 54 | 55 | topQueries = s.getTopQueries(2); 56 | printTopQueries(topQueries); 57 | 58 | auto it = topQueries.begin(); 59 | ASSERT_EQUAL(topQueries.size(), 2); 60 | 61 | ASSERT_EQUAL(it->first, 4); 62 | ASSERT_EQUAL((it->second).size(), 1); 63 | 64 | ++it; 65 | ASSERT_EQUAL(it->first, 2); 66 | ASSERT_EQUAL((it->second).size(), 2); 67 | 68 | } 69 | 70 | void testRequestTimes() 71 | { 72 | chrono::high_resolution_clock::time_point t0 = chrono::high_resolution_clock::now(); 73 | Statistics s; 74 | chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::now(); 75 | 76 | chrono::nanoseconds timeTaken = chrono::duration_cast(t1 - t0); 77 | 78 | string req = "creating statistics"; 79 | s.logRequestTime(req, timeTaken); 80 | 81 | auto requestTimes = s.getRequestTimes(req); 82 | cout << req << " took "; 83 | for (auto ms : requestTimes) 84 | { 85 | cout << ms.count() << "ns "; 86 | } 87 | cout << endl; 88 | } 89 | 90 | }; 91 | -------------------------------------------------------------------------------- /tests/TestUtils.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef TESTUTILS_HPP 3 | #define TESTUTILS_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | string readFile(const string& fileName) 12 | { 13 | ifstream fin(fileName.c_str()); 14 | 15 | if (fin.fail()) 16 | throw "Could not open " + fileName + "!"; 17 | 18 | fin.seekg(0, ios::end); 19 | size_t length = fin.tellg(); 20 | fin.seekg(0, ios::beg); 21 | char* buffer = new char[length + 1]; 22 | fin.read(buffer, length); 23 | buffer[length] = '\0'; 24 | 25 | fin.close(); 26 | 27 | string fileStr(buffer); 28 | delete [] buffer; 29 | 30 | return fileStr; 31 | } 32 | 33 | string stripSpecialCharacters(const string& input) 34 | { 35 | string clean; 36 | 37 | for (char c : input) 38 | { 39 | if (c == '\n' || c == '\t') 40 | continue; 41 | 42 | clean += c; 43 | } 44 | 45 | return clean; 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /tests/TokenizerTest.hpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "src/ITokenizer.h" 12 | #include "src/TokenizerImpl.h" 13 | #include "src/Constants.hpp" 14 | #include "lib/tpunit++.hpp" 15 | #include "TestUtils.hpp" 16 | 17 | using namespace std; 18 | 19 | 20 | /** 21 | * Test the tokenizer 22 | */ 23 | struct TokenizerTest : tpunit::TestFixture 24 | { 25 | TokenizerTest() : tpunit::TestFixture 26 | ( 27 | TEST(TokenizerTest::testTokenizing) 28 | ) 29 | { } 30 | 31 | void testTokenizing() 32 | { 33 | string text(" snoop doggy dawg"); 34 | 35 | shared_ptr tokenizer = make_shared(); 36 | 37 | tokenizer->setString(text, "field1"); 38 | 39 | while (tokenizer->nextToken()) 40 | { 41 | string token = tokenizer->getToken(); 42 | cout << token << endl; 43 | } 44 | 45 | /* 46 | vector words = qp.getTokens(); 47 | 48 | ASSERT_EQUAL(words.size(), 3); 49 | ASSERT_EQUAL(words[0].compare("snoop"), 0); 50 | ASSERT_EQUAL(words[1].compare("doggy"), 0); 51 | ASSERT_EQUAL(words[2].compare("dawg"), 0); 52 | */ 53 | } 54 | 55 | }; -------------------------------------------------------------------------------- /tests/XmlTest.hpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | #include "lib/rapidxml-1.13/rapidxml.hpp" 13 | #include "lib/tpunit++.hpp" 14 | #include "TestUtils.hpp" 15 | 16 | using namespace std; 17 | 18 | /** 19 | * Test rapid xml 20 | */ 21 | struct XmlTest : tpunit::TestFixture 22 | { 23 | XmlTest() : tpunit::TestFixture 24 | ( 25 | TEST(XmlTest::testParsingDocument) 26 | ) 27 | { } 28 | 29 | void testParsingDocument() 30 | { 31 | string fileStr; 32 | 33 | try 34 | { 35 | fileStr = readFile("../data/document01.xml"); 36 | } 37 | catch(const string& e) 38 | { 39 | TRACE(e.c_str()); 40 | ABORT(); 41 | } 42 | 43 | // std::cout << fileStr << std::endl; 44 | 45 | vector xmlVec; 46 | copy(fileStr.begin(), fileStr.end(), back_inserter(xmlVec)); 47 | xmlVec.push_back('\n'); 48 | 49 | rapidxml::xml_document<> doc; // character type defaults to char 50 | doc.parse(&xmlVec[0]); // 0 means default parse flags 51 | 52 | string root(doc.first_node()->name()); 53 | ASSERT_EQUAL(root.compare("document"), 0); 54 | 55 | vector fields; 56 | 57 | for (rapidxml::xml_node<>* n = doc.first_node()->first_node(); n; n = n->next_sibling()) 58 | { 59 | string name(n->name()); 60 | // char* v = n->value(); 61 | // if (!v || !*v) v = "(empty)"; 62 | string value(n->value()); 63 | fields.push_back(name + ":" + value); 64 | } 65 | 66 | ASSERT_EQUAL(fields.size(), 3); 67 | ASSERT_EQUAL(fields[0].compare("title:Input document"), 0); 68 | ASSERT_EQUAL(fields[1].compare("input1: some text"), 0); 69 | ASSERT_EQUAL(fields[2].compare("input1: some more text"), 0); 70 | } 71 | 72 | }; 73 | 74 | -------------------------------------------------------------------------------- /tests/document_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "DocumentImplTest.hpp" 12 | 13 | #include "lib/tpunit++.hpp" 14 | 15 | using namespace std; 16 | 17 | int main() 18 | { 19 | // QueryParserTest __QueryParserTest; 20 | // XmlTest __XmlTest; 21 | DocumentImplTest __DocumentImplTest; 22 | // InvertedIndexSimpleTest __InvertedIndexSimpleTest; 23 | 24 | /** 25 | * Run all of the registered tpunit++ tests. Returns 0 if 26 | * all tests are successful, otherwise returns the number 27 | * of failing assertions. 28 | */ 29 | return tpunit::Tests::Run(); 30 | 31 | } 32 | -------------------------------------------------------------------------------- /tests/inverted_index_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "InvertedIndexImplTest.hpp" 3 | // #include "InvertedIndexSimpleBatchTest.hpp" 4 | #include "lib/tpunit++.hpp" 5 | 6 | using namespace std; 7 | 8 | int main() 9 | { 10 | InvertedIndexImplTest __InvertedIndexImplTest; 11 | // InvertedIndexSimpleBatchTest __InvertedIndexSimpleBatchTest; 12 | 13 | /** 14 | * Run all of the registered tpunit++ tests. Returns 0 if 15 | * all tests are successful, otherwise returns the number 16 | * of failing assertions. 17 | */ 18 | return tpunit::Tests::Run(); 19 | 20 | } 21 | 22 | -------------------------------------------------------------------------------- /tests/memory_leak_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | void work() 16 | { 17 | char *c = new char[1000]; 18 | } 19 | 20 | int main() 21 | { 22 | work(); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /tests/runTests.sh: -------------------------------------------------------------------------------- 1 | 2 | TEST_DIR="../build/tests" 3 | 4 | for file in `find $TEST_DIR -perm -g=x -type f` 5 | do 6 | echo "running $file" 7 | $file 8 | done 9 | 10 | -------------------------------------------------------------------------------- /tests/statistics_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "StatisticsTest.hpp" 3 | #include "lib/tpunit++.hpp" 4 | 5 | using namespace std; 6 | 7 | int main() 8 | { 9 | StatisticsTest __StatisticsTestTest; 10 | 11 | /** 12 | * Run all of the registered tpunit++ tests. Returns 0 if 13 | * all tests are successful, otherwise returns the number 14 | * of failing assertions. 15 | */ 16 | return tpunit::Tests::Run(); 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /tests/tokenizer_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "TokenizerTest.hpp" 3 | #include "lib/tpunit++.hpp" 4 | 5 | using namespace std; 6 | 7 | int main() 8 | { 9 | TokenizerTest __TokenizerTest; 10 | 11 | /** 12 | * Run all of the registered tpunit++ tests. Returns 0 if 13 | * all tests are successful, otherwise returns the number 14 | * of failing assertions. 15 | */ 16 | return tpunit::Tests::Run(); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /tests/xml_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "XmlTest.hpp" 3 | #include "lib/tpunit++.hpp" 4 | 5 | using namespace std; 6 | 7 | int main() 8 | { 9 | XmlTest __XmlTest; 10 | 11 | /** 12 | * Run all of the registered tpunit++ tests. Returns 0 if 13 | * all tests are successful, otherwise returns the number 14 | * of failing assertions. 15 | */ 16 | return tpunit::Tests::Run(); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /unicode/UnicodeUtils.cpp: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2009-2011 Alan Wright. All rights reserved. 3 | // Distributable under the terms of either the Apache License (Version 2.0) 4 | // or the GNU Lesser General Public License. 5 | ///////////////////////////////////////////////////////////////////////////// 6 | 7 | #include "LuceneInc.h" 8 | #include "MiscUtils.h" 9 | #include "UnicodeUtils.h" 10 | #include "unicode/guniprop.h" 11 | 12 | namespace Lucene 13 | { 14 | UnicodeUtil::~UnicodeUtil() 15 | { 16 | } 17 | 18 | bool UnicodeUtil::isAlnum(wchar_t c) 19 | { 20 | return g_unichar_isalnum(c); 21 | } 22 | 23 | bool UnicodeUtil::isAlpha(wchar_t c) 24 | { 25 | return g_unichar_isalpha(c); 26 | } 27 | 28 | bool UnicodeUtil::isDigit(wchar_t c) 29 | { 30 | return g_unichar_isdigit(c); 31 | } 32 | 33 | bool UnicodeUtil::isSpace(wchar_t c) 34 | { 35 | return g_unichar_isspace(c); 36 | } 37 | 38 | bool UnicodeUtil::isUpper(wchar_t c) 39 | { 40 | return g_unichar_isupper(c); 41 | } 42 | 43 | bool UnicodeUtil::isLower(wchar_t c) 44 | { 45 | return g_unichar_islower(c); 46 | } 47 | 48 | bool UnicodeUtil::isOther(wchar_t c) 49 | { 50 | return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER); 51 | } 52 | 53 | bool UnicodeUtil::isNonSpacing(wchar_t c) 54 | { 55 | return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK); 56 | } 57 | 58 | wchar_t UnicodeUtil::toUpper(wchar_t c) 59 | { 60 | return (wchar_t)g_unichar_toupper(c); 61 | } 62 | 63 | wchar_t UnicodeUtil::toLower(wchar_t c) 64 | { 65 | return (wchar_t)g_unichar_tolower(c); 66 | } 67 | 68 | UTF8Result::~UTF8Result() 69 | { 70 | } 71 | 72 | UnicodeResult::~UnicodeResult() 73 | { 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /varint/BasicSet.h: -------------------------------------------------------------------------------- 1 | #ifndef BASIC_SET_H__ 2 | #define BASIC_SET_H__ 3 | 4 | #include "Set.h" 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | class BasicSet; 11 | 12 | class BasicSet : public Set 13 | { 14 | 15 | public: 16 | 17 | class Iterator : public Set::Iterator 18 | { 19 | private: 20 | set::iterator cursor; 21 | // parent 22 | const BasicSet* set; 23 | 24 | bool init = false; 25 | 26 | public: 27 | 28 | Iterator(const BasicSet* parentSet); 29 | Iterator(const BasicSet::Iterator& other); 30 | 31 | BasicSet::Iterator& operator=(const BasicSet::Iterator& rhs); 32 | ~Iterator(); 33 | 34 | unsigned int docID(); 35 | unsigned int nextDoc(); 36 | unsigned int Advance(unsigned int target); 37 | 38 | }; 39 | 40 | private: 41 | 42 | const BasicSet& operator=(const BasicSet& other); 43 | 44 | set docs; 45 | 46 | public: 47 | 48 | BasicSet(const BasicSet& other); 49 | 50 | /** 51 | * Swap the content of this bitmap with another bitmap. 52 | * No copying is done. (Running time complexity is constant.) 53 | */ 54 | void swap(BasicSet & x); 55 | 56 | 57 | BasicSet(); 58 | 59 | ~BasicSet(); 60 | 61 | void write(ostream & out); 62 | 63 | void read(istream & in); 64 | 65 | shared_ptr iterator() const; 66 | 67 | 68 | /** 69 | * Add document to this set 70 | * Note that you must set the bits in increasing order: 71 | * addDoc(1), addDoc(2) is ok; 72 | * addDoc(2), addDoc(1) is not ok. 73 | */ 74 | void addDoc(unsigned int docId); 75 | 76 | void addDocs(unsigned int* docids,size_t start,size_t len); 77 | 78 | BasicSet unorderedAdd(unsigned int docId); 79 | 80 | void removeDocId(unsigned int docId); 81 | 82 | BasicSet removeDoc(unsigned int docId); 83 | 84 | void compact(); 85 | 86 | void initSet(); 87 | 88 | /** 89 | * Gets the number of ids in the set 90 | * @return docset size 91 | */ 92 | unsigned int size() const; 93 | 94 | //This method will not work after a call to flush() 95 | bool find(unsigned int target) const; 96 | 97 | }; 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /varint/BasicSetFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef BASIC_SET_FACTORY_H__ 2 | #define BASIC_SET_FACTORY_H__ 3 | 4 | #include "Set.h" 5 | #include "BasicSet.h" 6 | #include "ISetFactory.h" 7 | #include 8 | 9 | class BasicSetFactory : public ISetFactory 10 | { 11 | 12 | public: 13 | 14 | virtual const shared_ptr createSparseSet() 15 | { 16 | return make_shared(); 17 | } 18 | 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /varint/Codec.h: -------------------------------------------------------------------------------- 1 | #ifndef CODEC_H__ 2 | #define CODEC_H__ 3 | #include "bitpacking/compositecodec.h" 4 | #include "bitpacking/simdbinarypacking.h" 5 | #include "bitpacking/variablebyte.h" 6 | #include "Source.h" 7 | #include "Sink.h" 8 | #include "CompressedDeltaChunk.h" 9 | #include "bitpacking/util.h" 10 | using namespace std; 11 | 12 | class Codec { 13 | private: 14 | CompositeCodec codec; 15 | 16 | public: 17 | Codec(){ 18 | } 19 | 20 | 21 | ~Codec(){ 22 | } 23 | 24 | /** 25 | * @return the compressed size in bytes 26 | */ 27 | template 28 | __inline__ shared_ptr Compress(const srctype src, size_t srcSize) const { 29 | assert(!needPaddingTo128Bits(src)); 30 | size_t nbyte = codec.compressedSize((const uint32_t *)src, (sizeof(*src)*srcSize) / 4); 31 | //shared_ptr compblock(new CompressedDeltaChunk((sizeof(*src)*(srcSize+ 2048)))); 32 | shared_ptr compblock(new CompressedDeltaChunk(nbyte)); 33 | 34 | vector& v = compblock->getVector(); 35 | assert(!needPaddingTo128Bits(&v[0])); 36 | size_t memavailable = v.size()/4; 37 | codec.encodeArray((const uint32_t *)src, (sizeof(*src)*srcSize) / 4,(uint32_t *)&v[0], memavailable); 38 | //compblock->resize(memavailable*4); 39 | return compblock; 40 | } 41 | 42 | size_t Uncompress(Source& src, unsigned int* dst,size_t size) const { 43 | assert(!needPaddingTo128Bits(dst)); 44 | 45 | 46 | size_t sourceSize; 47 | const uint8* srcptr = src.Peek(&sourceSize); 48 | const uint32_t* srcptr2= (const uint32_t*)srcptr; 49 | assert(!needPaddingTo128Bits(srcptr2)); 50 | size_t memavailable = size; 51 | codec.decodeArray(srcptr2, sourceSize/4,dst,memavailable); 52 | return memavailable*4; 53 | } 54 | 55 | bool findInArray(const unsigned int* array, size_t size,unsigned int target) const { 56 | for(unsigned int idx = 0; idx= target) 59 | return (lastId == target); 60 | } 61 | return false; 62 | } 63 | 64 | }; 65 | 66 | #endif // CODEC_H__ 67 | -------------------------------------------------------------------------------- /varint/Common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H__ 2 | #define COMMON_H__ 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef HAVE_BUILTIN_EXPECT 8 | #define PREDICT_FALSE(x) (__builtin_expect(x, 0)) 9 | #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) 10 | #else 11 | #define PREDICT_FALSE(x) x 12 | #define PREDICT_TRUE(x) x 13 | #endif 14 | 15 | #ifdef _MSC_VER 16 | typedef unsigned __int8 uint8; 17 | #else 18 | typedef uint8_t uint8; 19 | #endif 20 | typedef char v16qi __attribute__ ((vector_size (16))); 21 | 22 | #endif // COMMON_H__ 23 | -------------------------------------------------------------------------------- /varint/CompressedDeltaChunk.cpp: -------------------------------------------------------------------------------- 1 | #include "CompressedDeltaChunk.h" 2 | #include 3 | #include "Common.h" 4 | #include "Sink.h" 5 | #include "Source.h" 6 | #include "bitpacking/memutil.h" 7 | 8 | template 9 | __attribute__ ((const)) 10 | bool needPaddingTo128Bits(const T * inbyte) { 11 | return reinterpret_cast (inbyte) & 15; 12 | } 13 | CompressedDeltaChunk::CompressedDeltaChunk(){ 14 | assert(!needPaddingTo128Bits(&data_[0])); 15 | compressedSize_ = 0; 16 | } 17 | 18 | CompressedDeltaChunk::CompressedDeltaChunk(size_t compressedSize):data_(compressedSize){ 19 | compressedSize_ = compressedSize; 20 | assert(!needPaddingTo128Bits(&data_[0])); 21 | } 22 | 23 | CompressedDeltaChunk::CompressedDeltaChunk(istream & in) :compressedSize_(0) { 24 | in.read((char*)&compressedSize_,4); 25 | data_.resize(compressedSize_); 26 | in.read((char*)&(data_[0]),compressedSize_); 27 | assert(!needPaddingTo128Bits(&data_[0])); 28 | } 29 | 30 | void CompressedDeltaChunk::resize(size_t newsize){ 31 | compressedSize_ = newsize; 32 | data_.resize(newsize); 33 | vector tmp(data_); 34 | std::swap(data_, tmp); 35 | assert(!needPaddingTo128Bits(&data_[0])); 36 | } 37 | 38 | vector& CompressedDeltaChunk::getVector(){ 39 | return data_; 40 | } 41 | 42 | CompressedDeltaChunk::~CompressedDeltaChunk(){ 43 | } 44 | 45 | 46 | size_t CompressedDeltaChunk::getCompressedSize(){ 47 | return compressedSize_; 48 | } 49 | 50 | Sink CompressedDeltaChunk::getSink(){ 51 | return Sink((char*)&(data_[0]),compressedSize_); 52 | } 53 | 54 | Source CompressedDeltaChunk::getSource() const { 55 | return Source((char*)&(data_[0]),compressedSize_); 56 | } 57 | 58 | void CompressedDeltaChunk::write(ostream & out) const{ 59 | out.write((char*)&compressedSize_,4); 60 | out.write((char*)&(data_[0]),compressedSize_); 61 | } -------------------------------------------------------------------------------- /varint/CompressedDeltaChunk.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPRESSED_DELTA_CHUNK_H__ 2 | #define COMPRESSED_DELTA_CHUNK_H__ 3 | #include 4 | #include "Common.h" 5 | #include "Sink.h" 6 | #include "Source.h" 7 | #include "bitpacking/memutil.h" 8 | class CompressedDeltaChunk { 9 | private: 10 | vector data_; 11 | size_t compressedSize_; 12 | //disable copy constructor 13 | CompressedDeltaChunk(const CompressedDeltaChunk& other); 14 | CompressedDeltaChunk& operator=(const CompressedDeltaChunk& other); 15 | public: 16 | CompressedDeltaChunk(); 17 | CompressedDeltaChunk(size_t compressedSize); 18 | CompressedDeltaChunk(istream & in); 19 | void resize(size_t newsize); 20 | vector& getVector(); 21 | ~CompressedDeltaChunk(); 22 | size_t getCompressedSize(); 23 | Sink getSink(); 24 | Source getSource() const; 25 | void write(ostream & out) const; 26 | 27 | inline void swap(CompressedDeltaChunk & x) throw (){ 28 | using std::swap; 29 | swap(this->data_, x.data_); 30 | swap(this->compressedSize_, x.compressedSize_); 31 | } 32 | friend void swap(CompressedDeltaChunk& lhs, CompressedDeltaChunk& rhs) noexcept 33 | { 34 | lhs.swap(rhs); 35 | } 36 | }__attribute__ ((aligned (256))); 37 | #endif // COMPRESSED_DELTA_CHUNK_H__ -------------------------------------------------------------------------------- /varint/DeltaChunkStore.h: -------------------------------------------------------------------------------- 1 | #ifndef DELTA_CHUNK_STORE_H__ 2 | #define DELTA_CHUNK_STORE_H__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "CompressedDeltaChunk.h" 8 | using namespace std; 9 | 10 | class DeltaChunkStore { 11 | vector > data2; 12 | public: 13 | DeltaChunkStore(){ 14 | } 15 | 16 | shared_ptr allocateBlock(size_t compressedSize){ 17 | shared_ptr compblock(new CompressedDeltaChunk(compressedSize)); 18 | return compblock; 19 | } 20 | 21 | void add(const shared_ptr& val) { 22 | data2.push_back(val); 23 | } 24 | 25 | const CompressedDeltaChunk& get(int index) const { 26 | return *data2[index]; 27 | } 28 | 29 | void compact(){ 30 | if (data2.size() != data2.capacity()) { 31 | vector > tmp = data2; 32 | std::swap(data2, tmp); 33 | } 34 | } 35 | 36 | size_t size() const { 37 | return data2.size(); 38 | } 39 | 40 | int getSerialIntNum() const { 41 | int num = 1; // _len 42 | for(size_t i=0; i compblock(new CompressedDeltaChunk(in)); 65 | data2.push_back(compblock); 66 | } 67 | } 68 | 69 | inline void swap(DeltaChunkStore & x)throw (){ // No throw exception guarantee 70 | using std::swap; 71 | swap(this->data2, x.data2); 72 | } 73 | 74 | friend void swap(DeltaChunkStore& lhs, DeltaChunkStore& rhs) noexcept 75 | { 76 | lhs.swap(rhs); 77 | } 78 | 79 | }; 80 | #endif // DELTA_CHUNK_STORE_H__ -------------------------------------------------------------------------------- /varint/ISetFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef ISET_FACTORY_H__ 2 | #define ISET_FACTORY_H__ 3 | 4 | #include "Set.h" 5 | #include 6 | 7 | class ISetFactory 8 | { 9 | 10 | public: 11 | 12 | virtual ~ISetFactory() 13 | { 14 | 15 | } 16 | 17 | virtual const std::shared_ptr createSparseSet() = 0; 18 | 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /varint/LazyAndSet.h: -------------------------------------------------------------------------------- 1 | #ifndef LAZY_AND_SET_H__ 2 | #define LAZY_AND_SET_H__ 3 | #include "CompressedSet.h" 4 | 5 | class LazyAndSet; 6 | 7 | class LazyAndSetIterator : public Set::Iterator { 8 | private: 9 | unsigned lastReturn; 10 | vector > iterators; 11 | const LazyAndSet& set; 12 | public: 13 | LazyAndSetIterator(const LazyAndSet* parent); 14 | unsigned int docID(); 15 | unsigned int nextDoc(); 16 | unsigned int Advance(unsigned int target); 17 | }; 18 | 19 | class LazyAndSet : public Set { 20 | public: 21 | vector > sets_; 22 | int nonNullSize; 23 | mutable unsigned int setSize; 24 | mutable bool init = false; 25 | LazyAndSet(); 26 | 27 | LazyAndSet(vector >& sets); 28 | 29 | LazyAndSet(shared_ptr & left,shared_ptr & right); 30 | 31 | inline bool find(unsigned int val) const; 32 | 33 | unsigned int size() const ; 34 | 35 | shared_ptr iterator() const; 36 | 37 | }; 38 | 39 | #endif // LAZY_AND_SET_H__ 40 | -------------------------------------------------------------------------------- /varint/LazyOrSet.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Implementation of the union set of multiple DocIdSets (which essentially is a merged set of thes DocIdSets). 3 | */ 4 | #ifndef LAZY_OR_SET_H__ 5 | #define LAZY_OR_SET_H__ 6 | 7 | #include "CompressedSet.h" 8 | 9 | class LazyOrSetIterator : public Set::Iterator { 10 | private: 11 | class Item { 12 | public: 13 | shared_ptr iter; 14 | unsigned int doc; 15 | Item(shared_ptr it){ 16 | iter = it; 17 | doc = 0; 18 | } 19 | }; 20 | unsigned _curDoc; 21 | vector> _heap; 22 | int _size; 23 | void heapRemoveRoot(); 24 | void heapAdjust(); 25 | public: 26 | LazyOrSetIterator(vector> sets); 27 | unsigned int docID(); 28 | unsigned int nextDoc(); 29 | unsigned int Advance(unsigned int target); 30 | }; 31 | 32 | class LazyOrSet : public Set 33 | { 34 | private: 35 | const int INVALID = -1; 36 | vector> sets; 37 | mutable int _size = INVALID; 38 | 39 | public: 40 | 41 | LazyOrSet(vector> docSets); 42 | LazyOrSet(shared_ptr & left,shared_ptr & right); 43 | shared_ptr iterator() const; 44 | 45 | //Override 46 | unsigned int size() const; 47 | 48 | bool find(unsigned int val) const; 49 | }; 50 | 51 | 52 | #endif //LAZY_OR_SET_H__ -------------------------------------------------------------------------------- /varint/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # $@ name of the target 3 | # $^ name of all prerequisites with duplicates removed 4 | # $< name of the first prerequisite 5 | 6 | OSNAME := $(shell uname -s) 7 | CFLAGS_SIMPLE = -std=gnu++0x -g -Wall -pedantic 8 | #CFLAGS_SIMPLE = -flto -std=gnu++0x -g0 -Wall -Ofast -funroll-loops -pedantic 9 | BUILD_DIR = ../build 10 | 11 | LIBOBJECTS = $(BUILD_DIR)/varint_CompressedSet.o $(BUILD_DIR)/varint_CompressedDeltaChunk.o $(BUILD_DIR)/varint_bitpacksimd.o $(BUILD_DIR)/varint_util.o $(BUILD_DIR)/varint_LazyAndSet.o $(BUILD_DIR)/varint_LazyOrSet.o $(BUILD_DIR)/varint_BasicSet.o 12 | 13 | HEADERS = 14 | 15 | 16 | ifeq ($(OSNAME), Darwin) 17 | CFLAGS = $(CFLAGS_SIMPLE) 18 | LIBS = 19 | endif 20 | 21 | ifeq ($(OSNAME), Linux) 22 | CFLAGS = $(CFLAGS_SIMPLE) -msse2 23 | LIBS = 24 | endif 25 | 26 | # MINGW 27 | ifneq (,$(findstring MINGW,$(OSNAME))) 28 | CFLAGS = $(CFLAGS_SIMPLE) 29 | LIBS = 30 | endif 31 | 32 | # Cygwin 33 | ifneq (,$(findstring /cygdrive/,$(PATH))) 34 | CFLAGS = $(CFLAGS_SIMPLE) -msse2 35 | LIBS = 36 | endif 37 | 38 | 39 | clean : 40 | rm -f $(LIBOBJECTS) *.o $(BUILD_DIR)/CompressedSet.a 41 | 42 | all: CompressedSet.a 43 | 44 | CompressedSet.a : $(LIBOBJECTS) 45 | $(AR) -rs $(BUILD_DIR)/$@ $(LIBOBJECTS) 46 | 47 | 48 | ## 49 | # Classes 50 | ## 51 | 52 | $(BUILD_DIR)/varint_CompressedSet.o : CompressedSet.cpp 53 | $(CXX) -c $(CFLAGS) $< -o $@ 54 | 55 | $(BUILD_DIR)/varint_BasicSet.o : BasicSet.cpp 56 | $(CXX) -c $(CFLAGS) $< -o $@ 57 | 58 | $(BUILD_DIR)/varint_CompressedDeltaChunk.o : CompressedDeltaChunk.cpp 59 | $(CXX) -c $(CFLAGS) $< -o $@ 60 | 61 | $(BUILD_DIR)/varint_bitpacksimd.o : bitpacking/bitpacksimd.cpp 62 | $(CXX) -c $(CFLAGS) $< -o $@ 63 | 64 | $(BUILD_DIR)/varint_bitpacksimd.o : bitpacking/simdintegratedbitpacking.c 65 | $(CXX) -c $(CFLAGS) $< -o $@ 66 | 67 | $(BUILD_DIR)/varint_util.o : bitpacking/util.cpp 68 | $(CXX) -c $(CFLAGS) $< -o $@ 69 | 70 | $(BUILD_DIR)/varint_LazyAndSet.o : LazyAndSet.cpp 71 | $(CXX) -c $(CFLAGS) $< -o $@ 72 | 73 | $(BUILD_DIR)/varint_LazyOrSet.o : LazyOrSet.cpp 74 | $(CXX) -c $(CFLAGS) $< -o $@ 75 | -------------------------------------------------------------------------------- /varint/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/README -------------------------------------------------------------------------------- /varint/Set.h: -------------------------------------------------------------------------------- 1 | #ifndef ABSTRACT_SET_H__ 2 | #define ABSTRACT_SET_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | const unsigned int NO_MORE_DOCS = std::numeric_limits::max(); 8 | 9 | class Set 10 | { 11 | public: 12 | class Iterator { 13 | public: 14 | virtual unsigned int docID() = 0; 15 | //Calling nextDoc past end should have no effect 16 | virtual unsigned int nextDoc() = 0; 17 | // very efficient Advance method implemented with Skip list 18 | virtual unsigned int Advance(unsigned int target) = 0; 19 | virtual ~Iterator() {}; 20 | }; 21 | virtual std::shared_ptr iterator() const = 0; 22 | 23 | virtual void addDocs(unsigned int* docids,size_t start,size_t len) { throw -101; }; 24 | virtual void addDoc(unsigned int docId) { throw -102; }; 25 | virtual void removeDocId(unsigned int docId) { throw -103; }; 26 | // Free up unused memory in dynamic collection 27 | virtual void compact() { throw -104; }; 28 | 29 | virtual bool find(unsigned int target) const = 0; 30 | 31 | virtual void write(std::ostream & out) { throw -105; }; 32 | virtual void read(std::istream & in) { throw -106; }; 33 | // virtual void size(std::istream & in) = 0; 34 | 35 | virtual unsigned int size() const = 0; 36 | 37 | virtual ~Set() {} 38 | 39 | bool equals(std::shared_ptr other){ 40 | std::shared_ptr it1 = this->iterator(); 41 | std::shared_ptr it2 = other->iterator(); 42 | while( it1->nextDoc() != NO_MORE_DOCS ){ 43 | if (it2->nextDoc() == NO_MORE_DOCS){ 44 | return false; 45 | } 46 | if (it1->docID() != it2->docID()) 47 | return false; 48 | } 49 | return NO_MORE_DOCS == it2->nextDoc(); 50 | } 51 | }; 52 | #endif // ABSTRACT_SET_H__ 53 | -------------------------------------------------------------------------------- /varint/SetFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef SET_FACTORY_H__ 2 | #define SET_FACTORY_H__ 3 | 4 | #include "Set.h" 5 | #include "CompressedSet.h" 6 | #include "ISetFactory.h" 7 | #include 8 | 9 | class SetFactory : public ISetFactory 10 | { 11 | 12 | public: 13 | 14 | virtual const shared_ptr createSparseSet() 15 | { 16 | return make_shared(); 17 | } 18 | 19 | }; 20 | 21 | #endif //SET_FACTORY_H__ 22 | -------------------------------------------------------------------------------- /varint/SliceInput.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/SliceInput.h -------------------------------------------------------------------------------- /varint/SliceOutput.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wheresvic/zsearch/72fe592a04c5fc90075475c2590bc3b8db03ed5d/varint/SliceOutput.h -------------------------------------------------------------------------------- /varint/bitpacking/bitpacksimd.h: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Leonid Boytsov 6 | */ 7 | 8 | #ifndef BITPACKSIMD_H 9 | #define BITPACKSIMD_H 10 | 11 | #include "common.h" 12 | 13 | void simdpack(const uint32_t * __restrict__ in,__m128i * __restrict__ out, uint32_t bit); 14 | void simdpackwithoutmask(const uint32_t * __restrict__ in,__m128i * __restrict__ out, uint32_t bit); 15 | void simdunpack(const __m128i * __restrict__ in,uint32_t * __restrict__ out, uint32_t bit); 16 | 17 | void SIMD_fastunpack_32(const __m128i * __restrict__ in, uint32_t * __restrict__ out, const uint32_t bit) ; 18 | void SIMD_fastpackwithoutmask_32(const uint32_t * __restrict__ in, __m128i * __restrict__ out, const uint32_t bit); 19 | void SIMD_fastpack_32(const uint32_t * __restrict__ in, __m128i * __restrict__ out, const uint32_t bit) ; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /varint/bitpacking/common.h: -------------------------------------------------------------------------------- 1 | #ifndef BITPACK_COMMON_H__ 2 | #define BITPACK_COMMON_H__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #endif // PACKING_COMMON_H__ 14 | -------------------------------------------------------------------------------- /varint/bitpacking/simdcomputil.c: -------------------------------------------------------------------------------- 1 | #include "simdcomputil.h" 2 | 3 | __attribute__((always_inline)) 4 | static inline __m128i Delta(__m128i curr, __m128i prev) { 5 | return _mm_sub_epi32(curr, 6 | _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12))); 7 | } 8 | 9 | 10 | // returns the integer logarithm of v (bit width) 11 | uint32_t bits(const uint32_t v) { 12 | #ifdef _MSC_VER 13 | if (v == 0) { 14 | return 0; 15 | } 16 | unsigned long answer; 17 | _BitScanReverse(&answer, v); 18 | return answer + 1; 19 | #else 20 | return v == 0 ? 0 : 32 - __builtin_clz(v); // assume GCC-like compiler if not microsoft 21 | #endif 22 | } 23 | 24 | __attribute__ ((pure)) 25 | uint32_t maxbits(const uint32_t * begin) { 26 | uint32_t accumulator = 0; 27 | for (const uint32_t * k = begin; k != begin + SIMDBlockSize; ++k) { 28 | accumulator |= *k; 29 | } 30 | return bits(accumulator); 31 | } 32 | 33 | static uint32_t maxbitas32int(const __m128i accumulator) { 34 | uint32_t tmparray[4]; 35 | _mm_storeu_si128((__m128i *) (tmparray), accumulator); 36 | return bits(tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3]); 37 | } 38 | 39 | 40 | // maxbit over 128 integers (SIMDBlockSize) with provided initial value 41 | uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) { 42 | __m128i initoffset = _mm_set1_epi32 (initvalue); 43 | const __m128i* pin = (const __m128i*)(in); 44 | __m128i newvec = _mm_load_si128(pin); 45 | __m128i accumulator = Delta(newvec , initoffset); 46 | __m128i oldvec = newvec; 47 | for(uint32_t k = 1; 4*k < SIMDBlockSize; ++k) { 48 | newvec = _mm_load_si128(pin+k); 49 | accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec)); 50 | oldvec = newvec; 51 | } 52 | initoffset = oldvec; 53 | return maxbitas32int(accumulator); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /varint/bitpacking/simdcomputil.h: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under a BSD License. 3 | */ 4 | 5 | #ifndef SIMDCOMPUTIL_H_ 6 | #define SIMDCOMPUTIL_H_ 7 | 8 | #include // SSE2 is required 9 | #include // use a C99-compliant compiler, please 10 | 11 | 12 | 13 | 14 | // returns the integer logarithm of v (bit width) 15 | uint32_t bits(const uint32_t v); 16 | 17 | // max integer logarithm over a range 18 | uint32_t maxbits(const uint32_t * begin); 19 | 20 | enum{ SIMDBlockSize = 128}; 21 | 22 | // like maxbit over 128 integers (SIMDBlockSize) with provided initial value 23 | // and using differential coding 24 | uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in); 25 | 26 | 27 | 28 | 29 | #endif /* SIMDCOMPUTIL_H_ */ 30 | -------------------------------------------------------------------------------- /varint/bitpacking/simdintegratedbitpacking.h: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under a BSD License. 3 | */ 4 | 5 | #ifndef SIMD_INTEGRATED_BITPACKING_H 6 | #define SIMD_INTEGRATED_BITPACKING_H 7 | 8 | #include // SSE2 is required 9 | #include // use a C99-compliant compiler, please 10 | 11 | #include "simdcomputil.h" 12 | //reads 128 values in in, writes bit values from out 13 | void simdpackd1(uint32_t initvalue, const uint32_t * in,__m128i * out, uint32_t bit); 14 | //reads 128 values in in, writes bit values from out 15 | void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t * in,__m128i * out, uint32_t bit); 16 | //reads bit values in in, writes 128 values to out 17 | void simdunpackd1(uint32_t initvalue, const __m128i * in,uint32_t * out, uint32_t bit); 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /varint/bitpacking/util.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | * and Owen Kaser 7 | */ 8 | 9 | #ifndef BITPACK_UTIL 10 | #define BITPACK_UTIL 11 | #include "common.h" 12 | #include // for std::cerr 13 | 14 | using namespace std; 15 | 16 | // taken from stackoverflow 17 | #ifndef NDEBUG 18 | # define ASSERT(condition, message) \ 19 | do { \ 20 | if (! (condition)) { \ 21 | std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ 22 | << " line " << __LINE__ << ": " << message << std::endl; \ 23 | std::exit(EXIT_FAILURE); \ 24 | } \ 25 | } while (false) 26 | #else 27 | # define ASSERT(condition, message) do { } while (false) 28 | #endif 29 | 30 | __attribute__ ((const)) 31 | bool divisibleby(size_t a, uint32_t x) { 32 | return (a % x == 0); 33 | } 34 | 35 | void checkifdivisibleby(size_t a, uint32_t x) { 36 | if (!divisibleby(a, x)) { 37 | ostringstream convert; 38 | convert << a << " not divisible by " << x; 39 | throw logic_error(convert.str()); 40 | } 41 | } 42 | 43 | 44 | 45 | 46 | 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /varint/bitpacking/util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | * and Owen Kaser 7 | */ 8 | 9 | #ifndef BITPACK_UTIL 10 | #define BITPACK_UTIL 11 | #include "common.h" 12 | #include // for std::cerr 13 | 14 | using namespace std; 15 | 16 | // taken from stackoverflow 17 | #ifndef NDEBUG 18 | # define ASSERT(condition, message) \ 19 | do { \ 20 | if (! (condition)) { \ 21 | std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ 22 | << " line " << __LINE__ << ": " << message << std::endl; \ 23 | std::exit(EXIT_FAILURE); \ 24 | } \ 25 | } while (false) 26 | #else 27 | # define ASSERT(condition, message) do { } while (false) 28 | #endif 29 | 30 | __attribute__ ((const)) 31 | bool divisibleby(size_t a, uint32_t x); 32 | 33 | void checkifdivisibleby(size_t a, uint32_t x); 34 | 35 | template 36 | __attribute__ ((const)) 37 | bool needPaddingTo128Bits(const T * inbyte) { 38 | return reinterpret_cast (inbyte) & 15; 39 | } 40 | 41 | __attribute__ ((const)) 42 | constexpr uint32_t gccbits(const uint32_t v) { 43 | return v == 0 ? 0 : 32 - __builtin_clz(v); 44 | } 45 | 46 | template 47 | __attribute__ ((pure)) 48 | uint32_t maxbits(const iterator & begin, const iterator & end) { 49 | uint32_t accumulator = 0; 50 | for (iterator k = begin; k != end; ++k) { 51 | accumulator |= *k; 52 | } 53 | return gccbits(accumulator); 54 | } 55 | 56 | // For VariableByte codec 57 | template 58 | __attribute__ ((const)) 59 | bool needPaddingTo32Bits(const T * inbyte) { 60 | return reinterpret_cast (inbyte) & 3; 61 | } 62 | 63 | template 64 | __attribute__ ((const)) 65 | T * padTo32bits(T * inbyte) { 66 | return reinterpret_cast< T *> ((reinterpret_cast (inbyte) 67 | + 3) & ~3); 68 | } 69 | #endif 70 | --------------------------------------------------------------------------------