├── .gitignore ├── .travis.yml ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── NEWS ├── README.md ├── README.md.old ├── TODO ├── build_detect_platform ├── db ├── autocompact_test.cc ├── builder.cc ├── builder.h ├── c.cc ├── c_test.c ├── corruption_test.cc ├── db_bench.cc ├── db_impl.cc ├── db_impl.h ├── db_iter.cc ├── db_iter.h ├── db_test.cc ├── dbformat.cc ├── dbformat.h ├── dbformat_test.cc ├── dumpfile.cc ├── fault_injection_test.cc ├── filename.cc ├── filename.h ├── filename_test.cc ├── leveldbutil.cc ├── log_format.h ├── log_reader.cc ├── log_reader.h ├── log_test.cc ├── log_writer.cc ├── log_writer.h ├── memtable.cc ├── memtable.h ├── recovery_test.cc ├── repair.cc ├── skiplist.h ├── skiplist_test.cc ├── snapshot.h ├── table_cache.cc ├── table_cache.h ├── version_edit.cc ├── version_edit.h ├── version_edit_test.cc ├── version_set.cc ├── version_set.h ├── version_set_test.cc ├── write_batch.cc ├── write_batch_internal.h └── write_batch_test.cc ├── doc ├── LevelDB日知录.pdf ├── bench │ ├── db_bench_sqlite3.cc │ └── db_bench_tree_db.cc ├── benchmark.html ├── doc.css ├── impl.html ├── index.html ├── leveldb实现解析.pdf ├── log_format.txt └── table_format.txt ├── helpers └── memenv │ ├── memenv.cc │ ├── memenv.h │ └── memenv_test.cc ├── include └── leveldb │ ├── c.h │ ├── cache.h │ ├── comparator.h │ ├── db.h │ ├── dumpfile.h │ ├── env.h │ ├── filter_policy.h │ ├── iterator.h │ ├── options.h │ ├── slice.h │ ├── status.h │ ├── table.h │ ├── table_builder.h │ └── write_batch.h ├── issues ├── issue178_test.cc └── issue200_test.cc ├── port ├── README ├── atomic_pointer.h ├── port.h ├── port_example.h ├── port_posix.cc ├── port_posix.h ├── thread_annotations.h └── win │ └── stdint.h ├── table ├── block.cc ├── block.h ├── block_builder.cc ├── block_builder.h ├── filter_block.cc ├── filter_block.h ├── filter_block_test.cc ├── format.cc ├── format.h ├── iterator.cc ├── iterator_wrapper.h ├── merger.cc ├── merger.h ├── table.cc ├── table_builder.cc ├── table_test.cc ├── two_level_iterator.cc └── two_level_iterator.h └── util ├── arena.cc ├── arena.h ├── arena_test.cc ├── bloom.cc ├── bloom_test.cc ├── cache.cc ├── cache_test.cc ├── coding.cc ├── coding.h ├── coding_test.cc ├── comparator.cc ├── crc32c.cc ├── crc32c.h ├── crc32c_test.cc ├── env.cc ├── env_posix.cc ├── env_test.cc ├── filter_policy.cc ├── hash.cc ├── hash.h ├── hash_test.cc ├── histogram.cc ├── histogram.h ├── logging.cc ├── logging.h ├── mutexlock.h ├── options.cc ├── posix_logger.h ├── random.h ├── status.cc ├── testharness.cc ├── testharness.h ├── testutil.cc └── testutil.h /.gitignore: -------------------------------------------------------------------------------- 1 | build_config.mk 2 | *.a 3 | *.o 4 | *.dylib* 5 | *.so 6 | *.so.* 7 | *_test 8 | *.swp 9 | db_bench 10 | leveldbutil 11 | tags 12 | test 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - clang 4 | - gcc 5 | os: 6 | - linux 7 | - osx 8 | sudo: false 9 | before_install: 10 | - echo $LANG 11 | - echo $LC_ALL 12 | script: 13 | - make -j 4 check 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # Names should be added to this file like so: 2 | # Name or Organization 3 | 4 | Google Inc. 5 | 6 | # Initial version authors: 7 | Jeffrey Dean 8 | Sanjay Ghemawat 9 | 10 | # Partial list of contributors: 11 | Kevin Regan 12 | Johan Bilien 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | We'd love to accept your code patches! However, before we can take them, we 4 | have to jump a couple of legal hurdles. 5 | 6 | ## Contributor License Agreements 7 | 8 | Please fill out either the individual or corporate Contributor License 9 | Agreement as appropriate. 10 | 11 | * If you are an individual writing original source code and you're sure you 12 | own the intellectual property, then sign an [individual CLA](https://developers.google.com/open-source/cla/individual). 13 | * If you work for a company that wants to allow you to contribute your work, 14 | then sign a [corporate CLA](https://developers.google.com/open-source/cla/corporate). 15 | 16 | Follow either of the two links above to access the appropriate CLA and 17 | instructions for how to sign and return it. 18 | 19 | ## Submitting a Patch 20 | 21 | 1. Sign the contributors license agreement above. 22 | 2. Decide which code you want to submit. A submission should be a set of changes 23 | that addresses one issue in the [issue tracker](https://github.com/google/leveldb/issues). 24 | Please don't mix more than one logical change per submission, because it makes 25 | the history hard to follow. If you want to make a change 26 | (e.g. add a sample or feature) that doesn't have a corresponding issue in the 27 | issue tracker, please create one. 28 | 3. **Submitting**: When you are ready to submit, send us a Pull Request. Be 29 | sure to include the issue number you fixed and the name you used to sign 30 | the CLA. 31 | 32 | ## Writing Code ## 33 | 34 | If your contribution contains code, please make sure that it follows 35 | [the style guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml). 36 | Otherwise we will have to ask you to make changes, and that's no fun for anyone. 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Release 1.2 2011-05-16 2 | ---------------------- 3 | 4 | Fixes for larger databases (tested up to one billion 100-byte entries, 5 | i.e., ~100GB). 6 | 7 | (1) Place hard limit on number of level-0 files. This fixes errors 8 | of the form "too many open files". 9 | 10 | (2) Fixed memtable management. Before the fix, a heavy write burst 11 | could cause unbounded memory usage. 12 | 13 | A fix for a logging bug where the reader would incorrectly complain 14 | about corruption. 15 | 16 | Allow public access to WriteBatch contents so that users can easily 17 | wrap a DB. 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #带注释的leveldb v1.18 代码 2 | 3 | 原始代码地址: https://github.com/google/leveldb 4 | 5 | 源码阅读笔记: 6 | 7 | - 国内:http://blog.1feng.me/tags/leveldb/ 8 | - 源站:https://1feng.github.io/tags/leveldb/ 9 | 10 | 参考资料: 11 | - 源码阅读前: [《leveldb日知录》-- 朗格科技](https://github.com/1Feng/decode-leveldb/blob/master/doc/LevelDB%E6%97%A5%E7%9F%A5%E5%BD%95.pdf) 12 | - 源码阅读中: [《leveldb实现解析》-- 那岩](https://github.com/1Feng/decode-leveldb/blob/master/doc/leveldb%E5%AE%9E%E7%8E%B0%E8%A7%A3%E6%9E%90.pdf) 13 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | ss 2 | - Stats 3 | 4 | db 5 | - Maybe implement DB::BulkDeleteForRange(start_key, end_key) 6 | that would blow away files whose ranges are entirely contained 7 | within [start_key..end_key]? For Chrome, deletion of obsolete 8 | object stores, etc. can be done in the background anyway, so 9 | probably not that important. 10 | - There have been requests for MultiGet. 11 | 12 | After a range is completely deleted, what gets rid of the 13 | corresponding files if we do no future changes to that range. Make 14 | the conditions for triggering compactions fire in more situations? 15 | -------------------------------------------------------------------------------- /db/autocompact_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/db.h" 6 | #include "db/db_impl.h" 7 | #include "leveldb/cache.h" 8 | #include "util/testharness.h" 9 | #include "util/testutil.h" 10 | 11 | namespace leveldb { 12 | 13 | class AutoCompactTest { 14 | public: 15 | std::string dbname_; 16 | Cache* tiny_cache_; 17 | Options options_; 18 | DB* db_; 19 | 20 | AutoCompactTest() { 21 | dbname_ = test::TmpDir() + "/autocompact_test"; 22 | tiny_cache_ = NewLRUCache(100); 23 | options_.block_cache = tiny_cache_; 24 | DestroyDB(dbname_, options_); 25 | options_.create_if_missing = true; 26 | options_.compression = kNoCompression; 27 | ASSERT_OK(DB::Open(options_, dbname_, &db_)); 28 | } 29 | 30 | ~AutoCompactTest() { 31 | delete db_; 32 | DestroyDB(dbname_, Options()); 33 | delete tiny_cache_; 34 | } 35 | 36 | std::string Key(int i) { 37 | char buf[100]; 38 | snprintf(buf, sizeof(buf), "key%06d", i); 39 | return std::string(buf); 40 | } 41 | 42 | uint64_t Size(const Slice& start, const Slice& limit) { 43 | Range r(start, limit); 44 | uint64_t size; 45 | db_->GetApproximateSizes(&r, 1, &size); 46 | return size; 47 | } 48 | 49 | void DoReads(int n); 50 | }; 51 | 52 | static const int kValueSize = 200 * 1024; 53 | static const int kTotalSize = 100 * 1024 * 1024; 54 | static const int kCount = kTotalSize / kValueSize; 55 | 56 | // Read through the first n keys repeatedly and check that they get 57 | // compacted (verified by checking the size of the key space). 58 | void AutoCompactTest::DoReads(int n) { 59 | std::string value(kValueSize, 'x'); 60 | DBImpl* dbi = reinterpret_cast(db_); 61 | 62 | // Fill database 63 | for (int i = 0; i < kCount; i++) { 64 | ASSERT_OK(db_->Put(WriteOptions(), Key(i), value)); 65 | } 66 | ASSERT_OK(dbi->TEST_CompactMemTable()); 67 | 68 | // Delete everything 69 | for (int i = 0; i < kCount; i++) { 70 | ASSERT_OK(db_->Delete(WriteOptions(), Key(i))); 71 | } 72 | ASSERT_OK(dbi->TEST_CompactMemTable()); 73 | 74 | // Get initial measurement of the space we will be reading. 75 | const int64_t initial_size = Size(Key(0), Key(n)); 76 | const int64_t initial_other_size = Size(Key(n), Key(kCount)); 77 | 78 | // Read until size drops significantly. 79 | std::string limit_key = Key(n); 80 | for (int read = 0; true; read++) { 81 | ASSERT_LT(read, 100) << "Taking too long to compact"; 82 | Iterator* iter = db_->NewIterator(ReadOptions()); 83 | for (iter->SeekToFirst(); 84 | iter->Valid() && iter->key().ToString() < limit_key; 85 | iter->Next()) { 86 | // Drop data 87 | } 88 | delete iter; 89 | // Wait a little bit to allow any triggered compactions to complete. 90 | Env::Default()->SleepForMicroseconds(1000000); 91 | uint64_t size = Size(Key(0), Key(n)); 92 | fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n", 93 | read+1, size/1048576.0, Size(Key(n), Key(kCount))/1048576.0); 94 | if (size <= initial_size/10) { 95 | break; 96 | } 97 | } 98 | 99 | // Verify that the size of the key space not touched by the reads 100 | // is pretty much unchanged. 101 | const int64_t final_other_size = Size(Key(n), Key(kCount)); 102 | ASSERT_LE(final_other_size, initial_other_size + 1048576); 103 | ASSERT_GE(final_other_size, initial_other_size/5 - 1048576); 104 | } 105 | 106 | TEST(AutoCompactTest, ReadAll) { 107 | DoReads(kCount); 108 | } 109 | 110 | TEST(AutoCompactTest, ReadHalf) { 111 | DoReads(kCount/2); 112 | } 113 | 114 | } // namespace leveldb 115 | 116 | int main(int argc, char** argv) { 117 | return leveldb::test::RunAllTests(); 118 | } 119 | -------------------------------------------------------------------------------- /db/builder.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/builder.h" 6 | 7 | #include "db/filename.h" 8 | #include "db/dbformat.h" 9 | #include "db/table_cache.h" 10 | #include "db/version_edit.h" 11 | #include "leveldb/db.h" 12 | #include "leveldb/env.h" 13 | #include "leveldb/iterator.h" 14 | 15 | namespace leveldb { 16 | 17 | Status BuildTable(const std::string& dbname, 18 | Env* env, 19 | const Options& options, 20 | TableCache* table_cache, 21 | Iterator* iter, 22 | FileMetaData* meta) { 23 | Status s; 24 | meta->file_size = 0; 25 | iter->SeekToFirst(); 26 | 27 | // 生成sstable的filename 28 | std::string fname = TableFileName(dbname, meta->number); 29 | if (iter->Valid()) { 30 | WritableFile* file; 31 | // 打开文件 32 | s = env->NewWritableFile(fname, &file); 33 | if (!s.ok()) { 34 | return s; 35 | } 36 | 37 | TableBuilder* builder = new TableBuilder(options, file); 38 | meta->smallest.DecodeFrom(iter->key()); 39 | // 利用iter遍历memtable 40 | for (; iter->Valid(); iter->Next()) { 41 | Slice key = iter->key(); 42 | meta->largest.DecodeFrom(key); 43 | builder->Add(key, iter->value()); 44 | } 45 | 46 | // Finish and check for builder errors 47 | if (s.ok()) { 48 | s = builder->Finish(); 49 | if (s.ok()) { 50 | meta->file_size = builder->FileSize(); 51 | assert(meta->file_size > 0); 52 | } 53 | } else { 54 | builder->Abandon(); 55 | } 56 | delete builder; 57 | 58 | // Finish and check for file errors 59 | if (s.ok()) { 60 | s = file->Sync(); 61 | } 62 | if (s.ok()) { 63 | s = file->Close(); 64 | } 65 | delete file; 66 | file = NULL; 67 | 68 | if (s.ok()) { 69 | // Verify that the table is usable 70 | Iterator* it = table_cache->NewIterator(ReadOptions(), 71 | meta->number, 72 | meta->file_size); 73 | s = it->status(); 74 | delete it; 75 | } 76 | } 77 | 78 | // Check for input iterator errors 79 | if (!iter->status().ok()) { 80 | s = iter->status(); 81 | } 82 | 83 | if (s.ok() && meta->file_size > 0) { 84 | // Keep it 85 | } else { 86 | env->DeleteFile(fname); 87 | } 88 | return s; 89 | } 90 | 91 | } // namespace leveldb 92 | -------------------------------------------------------------------------------- /db/builder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_BUILDER_H_ 6 | #define STORAGE_LEVELDB_DB_BUILDER_H_ 7 | 8 | #include "leveldb/status.h" 9 | 10 | namespace leveldb { 11 | 12 | struct Options; 13 | struct FileMetaData; 14 | 15 | class Env; 16 | class Iterator; 17 | class TableCache; 18 | class VersionEdit; 19 | 20 | // Build a Table file from the contents of *iter. The generated file 21 | // will be named according to meta->number. On success, the rest of 22 | // *meta will be filled with metadata about the generated table. 23 | // If no data is present in *iter, meta->file_size will be set to 24 | // zero, and no Table file will be produced. 25 | extern Status BuildTable(const std::string& dbname, 26 | Env* env, 27 | const Options& options, 28 | TableCache* table_cache, 29 | Iterator* iter, 30 | FileMetaData* meta); 31 | 32 | } // namespace leveldb 33 | 34 | #endif // STORAGE_LEVELDB_DB_BUILDER_H_ 35 | -------------------------------------------------------------------------------- /db/db_iter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_DB_ITER_H_ 6 | #define STORAGE_LEVELDB_DB_DB_ITER_H_ 7 | 8 | #include 9 | #include "leveldb/db.h" 10 | #include "db/dbformat.h" 11 | 12 | namespace leveldb { 13 | 14 | class DBImpl; 15 | 16 | // Return a new iterator that converts internal keys (yielded by 17 | // "*internal_iter") that were live at the specified "sequence" number 18 | // into appropriate user keys. 19 | extern Iterator* NewDBIterator( 20 | DBImpl* db, 21 | const Comparator* user_key_comparator, 22 | Iterator* internal_iter, 23 | SequenceNumber sequence, 24 | uint32_t seed); 25 | 26 | } // namespace leveldb 27 | 28 | #endif // STORAGE_LEVELDB_DB_DB_ITER_H_ 29 | -------------------------------------------------------------------------------- /db/dbformat.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "db/dbformat.h" 7 | #include "port/port.h" 8 | #include "util/coding.h" 9 | 10 | namespace leveldb { 11 | 12 | static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { 13 | assert(seq <= kMaxSequenceNumber); 14 | assert(t <= kValueTypeForSeek); 15 | return (seq << 8) | t; 16 | } 17 | 18 | void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { 19 | result->append(key.user_key.data(), key.user_key.size()); 20 | PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); 21 | } 22 | 23 | std::string ParsedInternalKey::DebugString() const { 24 | char buf[50]; 25 | snprintf(buf, sizeof(buf), "' @ %llu : %d", 26 | (unsigned long long) sequence, 27 | int(type)); 28 | std::string result = "'"; 29 | result += EscapeString(user_key.ToString()); 30 | result += buf; 31 | return result; 32 | } 33 | 34 | std::string InternalKey::DebugString() const { 35 | std::string result; 36 | ParsedInternalKey parsed; 37 | if (ParseInternalKey(rep_, &parsed)) { 38 | result = parsed.DebugString(); 39 | } else { 40 | result = "(bad)"; 41 | result.append(EscapeString(rep_)); 42 | } 43 | return result; 44 | } 45 | 46 | const char* InternalKeyComparator::Name() const { 47 | return "leveldb.InternalKeyComparator"; 48 | } 49 | 50 | int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { 51 | // Order by: 52 | // increasing user key (according to user-supplied comparator) 53 | // decreasing sequence number 54 | // decreasing type (though sequence# should be enough to disambiguate) 55 | int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); 56 | if (r == 0) { 57 | const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); 58 | const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); 59 | if (anum > bnum) { 60 | r = -1; 61 | } else if (anum < bnum) { 62 | r = +1; 63 | } 64 | } 65 | return r; 66 | } 67 | 68 | void InternalKeyComparator::FindShortestSeparator( 69 | std::string* start, 70 | const Slice& limit) const { 71 | // Attempt to shorten the user portion of the key 72 | Slice user_start = ExtractUserKey(*start); 73 | Slice user_limit = ExtractUserKey(limit); 74 | std::string tmp(user_start.data(), user_start.size()); 75 | user_comparator_->FindShortestSeparator(&tmp, user_limit); 76 | if (tmp.size() < user_start.size() && 77 | // tmp[tmp.size() - 1]在FindShortestSeprator里做了+1操作 78 | // 所以字节序上,tmp大于user_start了 79 | user_comparator_->Compare(user_start, tmp) < 0) { 80 | // User key has become shorter physically, but larger logically. 81 | // Tack on the earliest possible number to the shortened user key. 82 | PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); 83 | assert(this->Compare(*start, tmp) < 0); 84 | assert(this->Compare(tmp, limit) < 0); 85 | start->swap(tmp); 86 | } 87 | } 88 | 89 | void InternalKeyComparator::FindShortSuccessor(std::string* key) const { 90 | Slice user_key = ExtractUserKey(*key); 91 | std::string tmp(user_key.data(), user_key.size()); 92 | user_comparator_->FindShortSuccessor(&tmp); 93 | if (tmp.size() < user_key.size() && 94 | user_comparator_->Compare(user_key, tmp) < 0) { 95 | // User key has become shorter physically, but larger logically. 96 | // Tack on the earliest possible number to the shortened user key. 97 | PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); 98 | assert(this->Compare(*key, tmp) < 0); 99 | key->swap(tmp); 100 | } 101 | } 102 | 103 | const char* InternalFilterPolicy::Name() const { 104 | return user_policy_->Name(); 105 | } 106 | 107 | void InternalFilterPolicy::CreateFilter(const Slice* keys, int n, 108 | std::string* dst) const { 109 | // We rely on the fact that the code in table.cc does not mind us 110 | // adjusting keys[]. 111 | Slice* mkey = const_cast(keys); 112 | for (int i = 0; i < n; i++) { 113 | mkey[i] = ExtractUserKey(keys[i]); 114 | // TODO(sanjay): Suppress dups? 115 | } 116 | user_policy_->CreateFilter(keys, n, dst); 117 | } 118 | 119 | bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const { 120 | return user_policy_->KeyMayMatch(ExtractUserKey(key), f); 121 | } 122 | 123 | LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) { 124 | size_t usize = user_key.size(); 125 | size_t needed = usize + 13; // A conservative estimate 126 | char* dst; 127 | if (needed <= sizeof(space_)) { 128 | dst = space_; 129 | } else { 130 | dst = new char[needed]; 131 | } 132 | start_ = dst; 133 | dst = EncodeVarint32(dst, usize + 8); 134 | kstart_ = dst; 135 | memcpy(dst, user_key.data(), usize); 136 | dst += usize; 137 | EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); 138 | dst += 8; 139 | end_ = dst; 140 | } 141 | 142 | } // namespace leveldb 143 | -------------------------------------------------------------------------------- /db/dbformat_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/dbformat.h" 6 | #include "util/logging.h" 7 | #include "util/testharness.h" 8 | 9 | namespace leveldb { 10 | 11 | static std::string IKey(const std::string& user_key, 12 | uint64_t seq, 13 | ValueType vt) { 14 | std::string encoded; 15 | AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); 16 | return encoded; 17 | } 18 | 19 | static std::string Shorten(const std::string& s, const std::string& l) { 20 | std::string result = s; 21 | InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l); 22 | return result; 23 | } 24 | 25 | static std::string ShortSuccessor(const std::string& s) { 26 | std::string result = s; 27 | InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result); 28 | return result; 29 | } 30 | 31 | static void TestKey(const std::string& key, 32 | uint64_t seq, 33 | ValueType vt) { 34 | std::string encoded = IKey(key, seq, vt); 35 | 36 | Slice in(encoded); 37 | ParsedInternalKey decoded("", 0, kTypeValue); 38 | 39 | ASSERT_TRUE(ParseInternalKey(in, &decoded)); 40 | ASSERT_EQ(key, decoded.user_key.ToString()); 41 | ASSERT_EQ(seq, decoded.sequence); 42 | ASSERT_EQ(vt, decoded.type); 43 | 44 | ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded)); 45 | } 46 | 47 | class FormatTest { }; 48 | 49 | TEST(FormatTest, InternalKey_EncodeDecode) { 50 | const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" }; 51 | const uint64_t seq[] = { 52 | 1, 2, 3, 53 | (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, 54 | (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1, 55 | (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1 56 | }; 57 | for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { 58 | for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { 59 | TestKey(keys[k], seq[s], kTypeValue); 60 | TestKey("hello", 1, kTypeDeletion); 61 | } 62 | } 63 | } 64 | 65 | TEST(FormatTest, InternalKeyShortSeparator) { 66 | // When user keys are same 67 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 68 | Shorten(IKey("foo", 100, kTypeValue), 69 | IKey("foo", 99, kTypeValue))); 70 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 71 | Shorten(IKey("foo", 100, kTypeValue), 72 | IKey("foo", 101, kTypeValue))); 73 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 74 | Shorten(IKey("foo", 100, kTypeValue), 75 | IKey("foo", 100, kTypeValue))); 76 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 77 | Shorten(IKey("foo", 100, kTypeValue), 78 | IKey("foo", 100, kTypeDeletion))); 79 | 80 | // When user keys are misordered 81 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 82 | Shorten(IKey("foo", 100, kTypeValue), 83 | IKey("bar", 99, kTypeValue))); 84 | 85 | // When user keys are different, but correctly ordered 86 | ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), 87 | Shorten(IKey("foo", 100, kTypeValue), 88 | IKey("hello", 200, kTypeValue))); 89 | 90 | // When start user key is prefix of limit user key 91 | ASSERT_EQ(IKey("foo", 100, kTypeValue), 92 | Shorten(IKey("foo", 100, kTypeValue), 93 | IKey("foobar", 200, kTypeValue))); 94 | 95 | // When limit user key is prefix of start user key 96 | ASSERT_EQ(IKey("foobar", 100, kTypeValue), 97 | Shorten(IKey("foobar", 100, kTypeValue), 98 | IKey("foo", 200, kTypeValue))); 99 | } 100 | 101 | TEST(FormatTest, InternalKeyShortestSuccessor) { 102 | ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), 103 | ShortSuccessor(IKey("foo", 100, kTypeValue))); 104 | ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), 105 | ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); 106 | } 107 | 108 | } // namespace leveldb 109 | 110 | int main(int argc, char** argv) { 111 | return leveldb::test::RunAllTests(); 112 | } 113 | -------------------------------------------------------------------------------- /db/filename.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include 7 | #include "db/filename.h" 8 | #include "db/dbformat.h" 9 | #include "leveldb/env.h" 10 | #include "util/logging.h" 11 | 12 | namespace leveldb { 13 | 14 | // A utility routine: write "data" to the named file and Sync() it. 15 | extern Status WriteStringToFileSync(Env* env, const Slice& data, 16 | const std::string& fname); 17 | 18 | static std::string MakeFileName(const std::string& name, uint64_t number, 19 | const char* suffix) { 20 | char buf[100]; 21 | snprintf(buf, sizeof(buf), "/%06llu.%s", 22 | static_cast(number), 23 | suffix); 24 | return name + buf; 25 | } 26 | 27 | std::string LogFileName(const std::string& name, uint64_t number) { 28 | assert(number > 0); 29 | return MakeFileName(name, number, "log"); 30 | } 31 | 32 | std::string TableFileName(const std::string& name, uint64_t number) { 33 | assert(number > 0); 34 | return MakeFileName(name, number, "ldb"); 35 | } 36 | 37 | std::string SSTTableFileName(const std::string& name, uint64_t number) { 38 | assert(number > 0); 39 | return MakeFileName(name, number, "sst"); 40 | } 41 | 42 | std::string DescriptorFileName(const std::string& dbname, uint64_t number) { 43 | assert(number > 0); 44 | char buf[100]; 45 | snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", 46 | static_cast(number)); 47 | return dbname + buf; 48 | } 49 | 50 | std::string CurrentFileName(const std::string& dbname) { 51 | return dbname + "/CURRENT"; 52 | } 53 | 54 | std::string LockFileName(const std::string& dbname) { 55 | return dbname + "/LOCK"; 56 | } 57 | 58 | std::string TempFileName(const std::string& dbname, uint64_t number) { 59 | assert(number > 0); 60 | return MakeFileName(dbname, number, "dbtmp"); 61 | } 62 | 63 | std::string InfoLogFileName(const std::string& dbname) { 64 | return dbname + "/LOG"; 65 | } 66 | 67 | // Return the name of the old info log file for "dbname". 68 | std::string OldInfoLogFileName(const std::string& dbname) { 69 | return dbname + "/LOG.old"; 70 | } 71 | 72 | 73 | // Owned filenames have the form: 74 | // dbname/CURRENT 75 | // dbname/LOCK 76 | // dbname/LOG 77 | // dbname/LOG.old 78 | // dbname/MANIFEST-[0-9]+ 79 | // dbname/[0-9]+.(log|sst|ldb) 80 | bool ParseFileName(const std::string& fname, 81 | uint64_t* number, 82 | FileType* type) { 83 | Slice rest(fname); 84 | if (rest == "CURRENT") { 85 | *number = 0; 86 | *type = kCurrentFile; 87 | } else if (rest == "LOCK") { 88 | *number = 0; 89 | *type = kDBLockFile; 90 | } else if (rest == "LOG" || rest == "LOG.old") { 91 | *number = 0; 92 | *type = kInfoLogFile; 93 | } else if (rest.starts_with("MANIFEST-")) { 94 | rest.remove_prefix(strlen("MANIFEST-")); 95 | uint64_t num; 96 | if (!ConsumeDecimalNumber(&rest, &num)) { 97 | return false; 98 | } 99 | if (!rest.empty()) { 100 | return false; 101 | } 102 | *type = kDescriptorFile; 103 | *number = num; 104 | } else { 105 | // Avoid strtoull() to keep filename format independent of the 106 | // current locale 107 | uint64_t num; 108 | if (!ConsumeDecimalNumber(&rest, &num)) { 109 | return false; 110 | } 111 | Slice suffix = rest; 112 | if (suffix == Slice(".log")) { 113 | *type = kLogFile; 114 | } else if (suffix == Slice(".sst") || suffix == Slice(".ldb")) { 115 | *type = kTableFile; 116 | } else if (suffix == Slice(".dbtmp")) { 117 | *type = kTempFile; 118 | } else { 119 | return false; 120 | } 121 | *number = num; 122 | } 123 | return true; 124 | } 125 | 126 | Status SetCurrentFile(Env* env, const std::string& dbname, 127 | uint64_t descriptor_number) { 128 | // Remove leading "dbname/" and add newline to manifest file name 129 | std::string manifest = DescriptorFileName(dbname, descriptor_number); 130 | Slice contents = manifest; 131 | assert(contents.starts_with(dbname + "/")); 132 | contents.remove_prefix(dbname.size() + 1); 133 | std::string tmp = TempFileName(dbname, descriptor_number); 134 | // 写入current文件的仅仅就是个manifest文件名 135 | Status s = WriteStringToFileSync(env, contents.ToString() + "\n", tmp); 136 | if (s.ok()) { 137 | s = env->RenameFile(tmp, CurrentFileName(dbname)); 138 | } 139 | if (!s.ok()) { 140 | env->DeleteFile(tmp); 141 | } 142 | return s; 143 | } 144 | 145 | } // namespace leveldb 146 | -------------------------------------------------------------------------------- /db/filename.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // File names used by DB code 6 | 7 | #ifndef STORAGE_LEVELDB_DB_FILENAME_H_ 8 | #define STORAGE_LEVELDB_DB_FILENAME_H_ 9 | 10 | #include 11 | #include 12 | #include "leveldb/slice.h" 13 | #include "leveldb/status.h" 14 | #include "port/port.h" 15 | 16 | namespace leveldb { 17 | 18 | class Env; 19 | 20 | enum FileType { 21 | kLogFile, 22 | kDBLockFile, 23 | kTableFile, 24 | kDescriptorFile, 25 | kCurrentFile, 26 | kTempFile, 27 | kInfoLogFile // Either the current one, or an old one 28 | }; 29 | 30 | // Return the name of the log file with the specified number 31 | // in the db named by "dbname". The result will be prefixed with 32 | // "dbname". 33 | extern std::string LogFileName(const std::string& dbname, uint64_t number); 34 | 35 | // Return the name of the sstable with the specified number 36 | // in the db named by "dbname". The result will be prefixed with 37 | // "dbname". 38 | extern std::string TableFileName(const std::string& dbname, uint64_t number); 39 | 40 | // Return the legacy file name for an sstable with the specified number 41 | // in the db named by "dbname". The result will be prefixed with 42 | // "dbname". 43 | extern std::string SSTTableFileName(const std::string& dbname, uint64_t number); 44 | 45 | // Return the name of the descriptor file for the db named by 46 | // "dbname" and the specified incarnation number. The result will be 47 | // prefixed with "dbname". 48 | extern std::string DescriptorFileName(const std::string& dbname, 49 | uint64_t number); 50 | 51 | // Return the name of the current file. This file contains the name 52 | // of the current manifest file. The result will be prefixed with 53 | // "dbname". 54 | extern std::string CurrentFileName(const std::string& dbname); 55 | 56 | // Return the name of the lock file for the db named by 57 | // "dbname". The result will be prefixed with "dbname". 58 | extern std::string LockFileName(const std::string& dbname); 59 | 60 | // Return the name of a temporary file owned by the db named "dbname". 61 | // The result will be prefixed with "dbname". 62 | extern std::string TempFileName(const std::string& dbname, uint64_t number); 63 | 64 | // Return the name of the info log file for "dbname". 65 | extern std::string InfoLogFileName(const std::string& dbname); 66 | 67 | // Return the name of the old info log file for "dbname". 68 | extern std::string OldInfoLogFileName(const std::string& dbname); 69 | 70 | // If filename is a leveldb file, store the type of the file in *type. 71 | // The number encoded in the filename is stored in *number. If the 72 | // filename was successfully parsed, returns true. Else return false. 73 | extern bool ParseFileName(const std::string& filename, 74 | uint64_t* number, 75 | FileType* type); 76 | 77 | // Make the CURRENT file point to the descriptor file with the 78 | // specified number. 79 | extern Status SetCurrentFile(Env* env, const std::string& dbname, 80 | uint64_t descriptor_number); 81 | 82 | 83 | } // namespace leveldb 84 | 85 | #endif // STORAGE_LEVELDB_DB_FILENAME_H_ 86 | -------------------------------------------------------------------------------- /db/filename_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/filename.h" 6 | 7 | #include "db/dbformat.h" 8 | #include "port/port.h" 9 | #include "util/logging.h" 10 | #include "util/testharness.h" 11 | 12 | namespace leveldb { 13 | 14 | class FileNameTest { }; 15 | 16 | TEST(FileNameTest, Parse) { 17 | Slice db; 18 | FileType type; 19 | uint64_t number; 20 | 21 | // Successful parses 22 | static struct { 23 | const char* fname; 24 | uint64_t number; 25 | FileType type; 26 | } cases[] = { 27 | { "100.log", 100, kLogFile }, 28 | { "0.log", 0, kLogFile }, 29 | { "0.sst", 0, kTableFile }, 30 | { "0.ldb", 0, kTableFile }, 31 | { "CURRENT", 0, kCurrentFile }, 32 | { "LOCK", 0, kDBLockFile }, 33 | { "MANIFEST-2", 2, kDescriptorFile }, 34 | { "MANIFEST-7", 7, kDescriptorFile }, 35 | { "LOG", 0, kInfoLogFile }, 36 | { "LOG.old", 0, kInfoLogFile }, 37 | { "18446744073709551615.log", 18446744073709551615ull, kLogFile }, 38 | }; 39 | for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 40 | std::string f = cases[i].fname; 41 | ASSERT_TRUE(ParseFileName(f, &number, &type)) << f; 42 | ASSERT_EQ(cases[i].type, type) << f; 43 | ASSERT_EQ(cases[i].number, number) << f; 44 | } 45 | 46 | // Errors 47 | static const char* errors[] = { 48 | "", 49 | "foo", 50 | "foo-dx-100.log", 51 | ".log", 52 | "", 53 | "manifest", 54 | "CURREN", 55 | "CURRENTX", 56 | "MANIFES", 57 | "MANIFEST", 58 | "MANIFEST-", 59 | "XMANIFEST-3", 60 | "MANIFEST-3x", 61 | "LOC", 62 | "LOCKx", 63 | "LO", 64 | "LOGx", 65 | "18446744073709551616.log", 66 | "184467440737095516150.log", 67 | "100", 68 | "100.", 69 | "100.lop" 70 | }; 71 | for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { 72 | std::string f = errors[i]; 73 | ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; 74 | } 75 | } 76 | 77 | TEST(FileNameTest, Construction) { 78 | uint64_t number; 79 | FileType type; 80 | std::string fname; 81 | 82 | fname = CurrentFileName("foo"); 83 | ASSERT_EQ("foo/", std::string(fname.data(), 4)); 84 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 85 | ASSERT_EQ(0, number); 86 | ASSERT_EQ(kCurrentFile, type); 87 | 88 | fname = LockFileName("foo"); 89 | ASSERT_EQ("foo/", std::string(fname.data(), 4)); 90 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 91 | ASSERT_EQ(0, number); 92 | ASSERT_EQ(kDBLockFile, type); 93 | 94 | fname = LogFileName("foo", 192); 95 | ASSERT_EQ("foo/", std::string(fname.data(), 4)); 96 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 97 | ASSERT_EQ(192, number); 98 | ASSERT_EQ(kLogFile, type); 99 | 100 | fname = TableFileName("bar", 200); 101 | ASSERT_EQ("bar/", std::string(fname.data(), 4)); 102 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 103 | ASSERT_EQ(200, number); 104 | ASSERT_EQ(kTableFile, type); 105 | 106 | fname = DescriptorFileName("bar", 100); 107 | ASSERT_EQ("bar/", std::string(fname.data(), 4)); 108 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 109 | ASSERT_EQ(100, number); 110 | ASSERT_EQ(kDescriptorFile, type); 111 | 112 | fname = TempFileName("tmp", 999); 113 | ASSERT_EQ("tmp/", std::string(fname.data(), 4)); 114 | ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); 115 | ASSERT_EQ(999, number); 116 | ASSERT_EQ(kTempFile, type); 117 | } 118 | 119 | } // namespace leveldb 120 | 121 | int main(int argc, char** argv) { 122 | return leveldb::test::RunAllTests(); 123 | } 124 | -------------------------------------------------------------------------------- /db/leveldbutil.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "leveldb/dumpfile.h" 7 | #include "leveldb/env.h" 8 | #include "leveldb/status.h" 9 | 10 | namespace leveldb { 11 | namespace { 12 | 13 | class StdoutPrinter : public WritableFile { 14 | public: 15 | virtual Status Append(const Slice& data) { 16 | fwrite(data.data(), 1, data.size(), stdout); 17 | return Status::OK(); 18 | } 19 | virtual Status Close() { return Status::OK(); } 20 | virtual Status Flush() { return Status::OK(); } 21 | virtual Status Sync() { return Status::OK(); } 22 | }; 23 | 24 | bool HandleDumpCommand(Env* env, char** files, int num) { 25 | StdoutPrinter printer; 26 | bool ok = true; 27 | for (int i = 0; i < num; i++) { 28 | Status s = DumpFile(env, files[i], &printer); 29 | if (!s.ok()) { 30 | fprintf(stderr, "%s\n", s.ToString().c_str()); 31 | ok = false; 32 | } 33 | } 34 | return ok; 35 | } 36 | 37 | } // namespace 38 | } // namespace leveldb 39 | 40 | static void Usage() { 41 | fprintf( 42 | stderr, 43 | "Usage: leveldbutil command...\n" 44 | " dump files... -- dump contents of specified files\n" 45 | ); 46 | } 47 | 48 | int main(int argc, char** argv) { 49 | leveldb::Env* env = leveldb::Env::Default(); 50 | bool ok = true; 51 | if (argc < 2) { 52 | Usage(); 53 | ok = false; 54 | } else { 55 | std::string command = argv[1]; 56 | if (command == "dump") { 57 | ok = leveldb::HandleDumpCommand(env, argv+2, argc-2); 58 | } else { 59 | Usage(); 60 | ok = false; 61 | } 62 | } 63 | return (ok ? 0 : 1); 64 | } 65 | -------------------------------------------------------------------------------- /db/log_format.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Log format information shared by reader and writer. 6 | // See ../doc/log_format.txt for more detail. 7 | 8 | #ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 9 | #define STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 10 | 11 | namespace leveldb { 12 | namespace log { 13 | 14 | enum RecordType { 15 | // Zero is reserved for preallocated files 16 | kZeroType = 0, 17 | 18 | kFullType = 1, // record在某个block中完整存放 19 | 20 | // For fragments 21 | // 目测一条完整记录最多会被拆分在三个block中 22 | kFirstType = 2, // record被切分了,第一块位于当前block 23 | kMiddleType = 3, // record被切分了,第二块位于当前block 24 | kLastType = 4 // 最后一块位于当前blcok 25 | }; 26 | static const int kMaxRecordType = kLastType; 27 | 28 | // 固定32kb大小的block 29 | static const int kBlockSize = 32768; 30 | 31 | // Header is checksum (4 bytes), length (2 bytes), type (1 byte). 32 | static const int kHeaderSize = 4 + 2 + 1; 33 | 34 | } // namespace log 35 | } // namespace leveldb 36 | 37 | #endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_ 38 | -------------------------------------------------------------------------------- /db/log_reader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_LOG_READER_H_ 6 | #define STORAGE_LEVELDB_DB_LOG_READER_H_ 7 | 8 | #include 9 | 10 | #include "db/log_format.h" 11 | #include "leveldb/slice.h" 12 | #include "leveldb/status.h" 13 | 14 | namespace leveldb { 15 | 16 | class SequentialFile; 17 | 18 | namespace log { 19 | 20 | class Reader { 21 | public: 22 | // Interface for reporting errors. 23 | class Reporter { 24 | public: 25 | virtual ~Reporter(); 26 | 27 | // Some corruption was detected. "size" is the approximate number 28 | // of bytes dropped due to the corruption. 29 | virtual void Corruption(size_t bytes, const Status& status) = 0; 30 | }; 31 | 32 | // Create a reader that will return log records from "*file". 33 | // "*file" must remain live while this Reader is in use. 34 | // 35 | // If "reporter" is non-NULL, it is notified whenever some data is 36 | // dropped due to a detected corruption. "*reporter" must remain 37 | // live while this Reader is in use. 38 | // 39 | // If "checksum" is true, verify checksums if available. 40 | // 41 | // The Reader will start reading at the first record located at physical 42 | // position >= initial_offset within the file. 43 | Reader(SequentialFile* file, Reporter* reporter, bool checksum, 44 | uint64_t initial_offset); 45 | 46 | ~Reader(); 47 | 48 | // Read the next record into *record. Returns true if read 49 | // successfully, false if we hit end of the input. May use 50 | // "*scratch" as temporary storage. The contents filled in *record 51 | // will only be valid until the next mutating operation on this 52 | // reader or the next mutation to *scratch. 53 | bool ReadRecord(Slice* record, std::string* scratch); 54 | 55 | // Returns the physical offset of the last record returned by ReadRecord. 56 | // 57 | // Undefined before the first call to ReadRecord. 58 | uint64_t LastRecordOffset(); 59 | 60 | private: 61 | SequentialFile* const file_; 62 | Reporter* const reporter_; 63 | bool const checksum_; 64 | char* const backing_store_; 65 | Slice buffer_; 66 | bool eof_; // Last Read() indicated EOF by returning < kBlockSize 67 | 68 | // Offset of the last record returned by ReadRecord. 69 | uint64_t last_record_offset_; 70 | // Offset of the first location past the end of buffer_. 71 | uint64_t end_of_buffer_offset_; 72 | 73 | // Offset at which to start looking for the first record to return 74 | uint64_t const initial_offset_; 75 | 76 | // True if we are resynchronizing after a seek (initial_offset_ > 0). In 77 | // particular, a run of kMiddleType and kLastType records can be silently 78 | // skipped in this mode 79 | bool resyncing_; 80 | 81 | // Extend record types with the following special values 82 | enum { 83 | kEof = kMaxRecordType + 1, 84 | // Returned whenever we find an invalid physical record. 85 | // Currently there are three situations in which this happens: 86 | // * The record has an invalid CRC (ReadPhysicalRecord reports a drop) 87 | // * The record is a 0-length record (No drop is reported) 88 | // * The record is below constructor's initial_offset (No drop is reported) 89 | kBadRecord = kMaxRecordType + 2 90 | }; 91 | 92 | // Skips all blocks that are completely before "initial_offset_". 93 | // 94 | // Returns true on success. Handles reporting. 95 | bool SkipToInitialBlock(); 96 | 97 | // Return type, or one of the preceding special values 98 | unsigned int ReadPhysicalRecord(Slice* result); 99 | 100 | // Reports dropped bytes to the reporter. 101 | // buffer_ must be updated to remove the dropped bytes prior to invocation. 102 | void ReportCorruption(uint64_t bytes, const char* reason); 103 | void ReportDrop(uint64_t bytes, const Status& reason); 104 | 105 | // No copying allowed 106 | Reader(const Reader&); 107 | void operator=(const Reader&); 108 | }; 109 | 110 | } // namespace log 111 | } // namespace leveldb 112 | 113 | #endif // STORAGE_LEVELDB_DB_LOG_READER_H_ 114 | -------------------------------------------------------------------------------- /db/log_writer.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/log_writer.h" 6 | 7 | #include 8 | #include "leveldb/env.h" 9 | #include "util/coding.h" 10 | #include "util/crc32c.h" 11 | 12 | namespace leveldb { 13 | namespace log { 14 | 15 | static void InitTypeCrc(uint32_t* type_crc) { 16 | for (int i = 0; i <= kMaxRecordType; i++) { 17 | char t = static_cast(i); 18 | type_crc[i] = crc32c::Value(&t, 1); 19 | } 20 | } 21 | 22 | Writer::Writer(WritableFile* dest) 23 | : dest_(dest), 24 | block_offset_(0) { 25 | InitTypeCrc(type_crc_); 26 | } 27 | 28 | Writer::Writer(WritableFile* dest, uint64_t dest_length) 29 | : dest_(dest), block_offset_(dest_length % kBlockSize) { 30 | InitTypeCrc(type_crc_); 31 | } 32 | 33 | Writer::~Writer() { 34 | } 35 | 36 | Status Writer::AddRecord(const Slice& slice) { 37 | const char* ptr = slice.data(); 38 | size_t left = slice.size(); 39 | 40 | // Fragment the record if necessary and emit it. Note that if slice 41 | // is empty, we still want to iterate once to emit a single 42 | // zero-length record 43 | Status s; 44 | // begin用来标识当前record是否是第一块 45 | // 一条完整记录最多被拆分成三块record 46 | bool begin = true; 47 | do { 48 | // 计算当前blcok还剩余多少空间,字节为单位 49 | const int leftover = kBlockSize - block_offset_; 50 | assert(leftover >= 0); 51 | // 如果剩余的空间连header都放不下了,那么直接把剩余进行填充,跳过 52 | if (leftover < kHeaderSize) { 53 | // Switch to a new block 54 | if (leftover > 0) { 55 | // Fill the trailer (literal below relies on kHeaderSize being 7) 56 | assert(kHeaderSize == 7); 57 | dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover)); 58 | } 59 | // 写满一个block,同时将block_offset置0 60 | // 下次写,block就空了,逻辑上相当与切换了一个新的block 61 | block_offset_ = 0; 62 | } 63 | 64 | // Invariant: we never leave < kHeaderSize bytes in a block. 65 | assert(kBlockSize - block_offset_ - kHeaderSize >= 0); 66 | 67 | const size_t avail = kBlockSize - block_offset_ - kHeaderSize; 68 | // 每次写入的大小,最大为avail,所以必然不会超出一个block 69 | // 如果left >= avail,则意味着被切分 70 | const size_t fragment_length = (left < avail) ? left : avail; 71 | 72 | RecordType type; 73 | // end用来标识当前record是否是最后一块 74 | const bool end = (left == fragment_length); 75 | if (begin && end) { 76 | type = kFullType; 77 | } else if (begin) { 78 | type = kFirstType; 79 | } else if (end) { 80 | type = kLastType; 81 | } else { 82 | type = kMiddleType; 83 | } 84 | 85 | // 写入文件 86 | s = EmitPhysicalRecord(type, ptr, fragment_length); 87 | ptr += fragment_length; 88 | left -= fragment_length; 89 | begin = false; 90 | } while (s.ok() && left > 0); 91 | return s; 92 | } 93 | 94 | // header 结构如下: 95 | // ------------------------------------------------------- 96 | // | 4 byte | 1 byte | 1 byte | 1 byte | 97 | // ------------------------------------------------------- 98 | // | crc | data szie 高8位 | data size 低8位 | type | 99 | // ------------------------------------------------------- 100 | // block size 最大32kb, 2^15 byte, 所以16bit存放n足矣 101 | 102 | Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { 103 | assert(n <= 0xffff); // Must fit in two bytes 104 | assert(block_offset_ + kHeaderSize + n <= kBlockSize); 105 | 106 | // Format the header 107 | char buf[kHeaderSize]; 108 | buf[4] = static_cast(n & 0xff); 109 | buf[5] = static_cast(n >> 8); 110 | buf[6] = static_cast(t); 111 | 112 | // Compute the crc of the record type and the payload. 113 | uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); 114 | crc = crc32c::Mask(crc); // Adjust for storage 115 | EncodeFixed32(buf, crc); 116 | 117 | // Write the header and the payload 118 | // 先写header,再写data 119 | Status s = dest_->Append(Slice(buf, kHeaderSize)); 120 | if (s.ok()) { 121 | s = dest_->Append(Slice(ptr, n)); 122 | if (s.ok()) { 123 | s = dest_->Flush(); 124 | } 125 | } 126 | block_offset_ += kHeaderSize + n; 127 | return s; 128 | } 129 | 130 | } // namespace log 131 | } // namespace leveldb 132 | -------------------------------------------------------------------------------- /db/log_writer.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ 6 | #define STORAGE_LEVELDB_DB_LOG_WRITER_H_ 7 | 8 | #include 9 | #include "db/log_format.h" 10 | #include "leveldb/slice.h" 11 | #include "leveldb/status.h" 12 | 13 | namespace leveldb { 14 | 15 | class WritableFile; 16 | 17 | namespace log { 18 | 19 | class Writer { 20 | public: 21 | // Create a writer that will append data to "*dest". 22 | // "*dest" must be initially empty. 23 | // "*dest" must remain live while this Writer is in use. 24 | explicit Writer(WritableFile* dest); 25 | 26 | // Create a writer that will append data to "*dest". 27 | // "*dest" must have initial length "dest_length". 28 | // "*dest" must remain live while this Writer is in use. 29 | Writer(WritableFile* dest, uint64_t dest_length); 30 | 31 | ~Writer(); 32 | 33 | Status AddRecord(const Slice& slice); 34 | 35 | private: 36 | WritableFile* dest_; // linux 下为 PosixWritableFile 37 | int block_offset_; // Current offset in block 38 | 39 | // crc32c values for all supported record types. These are 40 | // pre-computed to reduce the overhead of computing the crc of the 41 | // record type stored in the header. 42 | uint32_t type_crc_[kMaxRecordType + 1]; 43 | 44 | Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); 45 | 46 | // No copying allowed 47 | Writer(const Writer&); 48 | void operator=(const Writer&); 49 | }; 50 | 51 | } // namespace log 52 | } // namespace leveldb 53 | 54 | #endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_ 55 | -------------------------------------------------------------------------------- /db/memtable.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_ 6 | #define STORAGE_LEVELDB_DB_MEMTABLE_H_ 7 | 8 | #include 9 | #include "leveldb/db.h" 10 | #include "db/dbformat.h" 11 | #include "db/skiplist.h" 12 | #include "util/arena.h" 13 | 14 | namespace leveldb { 15 | 16 | class InternalKeyComparator; 17 | class Mutex; 18 | class MemTableIterator; 19 | 20 | class MemTable { 21 | public: 22 | // MemTables are reference counted. The initial reference count 23 | // is zero and the caller must call Ref() at least once. 24 | explicit MemTable(const InternalKeyComparator& comparator); 25 | 26 | // Increase reference count. 27 | void Ref() { ++refs_; } 28 | 29 | // Drop reference count. Delete if no more references exist. 30 | void Unref() { 31 | --refs_; 32 | assert(refs_ >= 0); 33 | if (refs_ <= 0) { 34 | delete this; 35 | } 36 | } 37 | 38 | // Returns an estimate of the number of bytes of data in use by this 39 | // data structure. It is safe to call when MemTable is being modified. 40 | size_t ApproximateMemoryUsage(); 41 | 42 | // Return an iterator that yields the contents of the memtable. 43 | // 44 | // The caller must ensure that the underlying MemTable remains live 45 | // while the returned iterator is live. The keys returned by this 46 | // iterator are internal keys encoded by AppendInternalKey in the 47 | // db/format.{h,cc} module. 48 | Iterator* NewIterator(); 49 | 50 | // Add an entry into memtable that maps key to value at the 51 | // specified sequence number and with the specified type. 52 | // Typically value will be empty if type==kTypeDeletion. 53 | void Add(SequenceNumber seq, ValueType type, 54 | const Slice& key, 55 | const Slice& value); 56 | 57 | // If memtable contains a value for key, store it in *value and return true. 58 | // If memtable contains a deletion for key, store a NotFound() error 59 | // in *status and return true. 60 | // Else, return false. 61 | bool Get(const LookupKey& key, std::string* value, Status* s); 62 | 63 | private: 64 | ~MemTable(); // Private since only Unref() should be used to delete it 65 | 66 | struct KeyComparator { 67 | const InternalKeyComparator comparator; 68 | explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } 69 | int operator()(const char* a, const char* b) const; 70 | }; 71 | friend class MemTableIterator; 72 | friend class MemTableBackwardIterator; 73 | 74 | typedef SkipList Table; 75 | 76 | KeyComparator comparator_; 77 | int refs_; 78 | Arena arena_; 79 | Table table_; 80 | 81 | // No copying allowed 82 | MemTable(const MemTable&); 83 | void operator=(const MemTable&); 84 | }; 85 | 86 | } // namespace leveldb 87 | 88 | #endif // STORAGE_LEVELDB_DB_MEMTABLE_H_ 89 | -------------------------------------------------------------------------------- /db/snapshot.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ 6 | #define STORAGE_LEVELDB_DB_SNAPSHOT_H_ 7 | 8 | #include "db/dbformat.h" 9 | #include "leveldb/db.h" 10 | 11 | namespace leveldb { 12 | 13 | class SnapshotList; 14 | 15 | // Snapshots are kept in a doubly-linked list in the DB. 16 | // Each SnapshotImpl corresponds to a particular sequence number. 17 | class SnapshotImpl : public Snapshot { 18 | public: 19 | SequenceNumber number_; // const after creation 20 | 21 | private: 22 | friend class SnapshotList; 23 | 24 | // SnapshotImpl is kept in a doubly-linked circular list 25 | SnapshotImpl* prev_; 26 | SnapshotImpl* next_; 27 | 28 | SnapshotList* list_; // just for sanity checks 29 | }; 30 | 31 | class SnapshotList { 32 | public: 33 | SnapshotList() { 34 | list_.prev_ = &list_; 35 | list_.next_ = &list_; 36 | } 37 | 38 | bool empty() const { return list_.next_ == &list_; } 39 | SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } 40 | SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } 41 | 42 | const SnapshotImpl* New(SequenceNumber seq) { 43 | SnapshotImpl* s = new SnapshotImpl; 44 | s->number_ = seq; 45 | s->list_ = this; 46 | s->next_ = &list_; 47 | s->prev_ = list_.prev_; 48 | s->prev_->next_ = s; 49 | s->next_->prev_ = s; 50 | return s; 51 | } 52 | 53 | void Delete(const SnapshotImpl* s) { 54 | assert(s->list_ == this); 55 | s->prev_->next_ = s->next_; 56 | s->next_->prev_ = s->prev_; 57 | delete s; 58 | } 59 | 60 | private: 61 | // Dummy head of doubly-linked list of snapshots 62 | SnapshotImpl list_; 63 | }; 64 | 65 | } // namespace leveldb 66 | 67 | #endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_ 68 | -------------------------------------------------------------------------------- /db/table_cache.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/table_cache.h" 6 | 7 | #include "db/filename.h" 8 | #include "leveldb/env.h" 9 | #include "leveldb/table.h" 10 | #include "util/coding.h" 11 | 12 | namespace leveldb { 13 | 14 | // table_cache里存放的数据,key是fimenumber 15 | struct TableAndFile { 16 | RandomAccessFile* file; 17 | Table* table; 18 | }; 19 | 20 | static void DeleteEntry(const Slice& key, void* value) { 21 | TableAndFile* tf = reinterpret_cast(value); 22 | delete tf->table; 23 | delete tf->file; 24 | delete tf; 25 | } 26 | 27 | static void UnrefEntry(void* arg1, void* arg2) { 28 | Cache* cache = reinterpret_cast(arg1); 29 | Cache::Handle* h = reinterpret_cast(arg2); 30 | cache->Release(h); 31 | } 32 | 33 | TableCache::TableCache(const std::string& dbname, 34 | const Options* options, 35 | int entries) 36 | : env_(options->env), 37 | dbname_(dbname), 38 | options_(options), 39 | cache_(NewLRUCache(entries)) { 40 | } 41 | 42 | TableCache::~TableCache() { 43 | delete cache_; 44 | } 45 | 46 | Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, 47 | Cache::Handle** handle) { 48 | Status s; 49 | // 构造出table_cache的key,即file_number 50 | char buf[sizeof(file_number)]; 51 | EncodeFixed64(buf, file_number); 52 | Slice key(buf, sizeof(buf)); 53 | // 这里的cache只是table cache,存放的是文件handle,table的指针 54 | // table里存放了sstable的index内容,以及指示block_cache的cache_id 55 | // 注意和block cache做区分 56 | // 如果返回的handle不为NULL,lookup内部会增加其引用计数 57 | // 所以,调用放在使用玩handle之后后需要减少其引用计数 58 | *handle = cache_->Lookup(key); 59 | // 如果 *handle != NULL,说明命中缓存,直接返回OK 60 | if (*handle == NULL) { 61 | std::string fname = TableFileName(dbname_, file_number); 62 | RandomAccessFile* file = NULL; 63 | Table* table = NULL; 64 | s = env_->NewRandomAccessFile(fname, &file); 65 | if (!s.ok()) { 66 | std::string old_fname = SSTTableFileName(dbname_, file_number); 67 | if (env_->NewRandomAccessFile(old_fname, &file).ok()) { 68 | s = Status::OK(); 69 | } 70 | } 71 | if (s.ok()) { 72 | // 一次文件读取, 读取index block 73 | s = Table::Open(*options_, file, file_size, &table); 74 | } 75 | 76 | if (!s.ok()) { 77 | assert(table == NULL); 78 | delete file; 79 | // We do not cache error results so that if the error is transient, 80 | // or somebody repairs the file, we recover automatically. 81 | } else { 82 | // table_cache里存放的是TableAndFile结构,此处的key是file_number 83 | TableAndFile* tf = new TableAndFile; 84 | tf->file = file; 85 | tf->table = table; 86 | // 把key, tf组织进一个LRUHandle中,插入LRU-cache内,然后返回这个LRUHandle 87 | // 返回handle同样会增加其引用计数,由调用方负责减小这个值 88 | // 如果这个值减小到0,会通过DeleteEnty来真正释放 89 | *handle = cache_->Insert(key, tf, 1, &DeleteEntry); 90 | } 91 | } 92 | return s; 93 | } 94 | 95 | Iterator* TableCache::NewIterator(const ReadOptions& options, 96 | uint64_t file_number, 97 | uint64_t file_size, 98 | Table** tableptr) { 99 | if (tableptr != NULL) { 100 | *tableptr = NULL; 101 | } 102 | 103 | Cache::Handle* handle = NULL; 104 | Status s = FindTable(file_number, file_size, &handle); 105 | if (!s.ok()) { 106 | return NewErrorIterator(s); 107 | } 108 | 109 | Table* table = reinterpret_cast(cache_->Value(handle))->table; 110 | Iterator* result = table->NewIterator(options); 111 | result->RegisterCleanup(&UnrefEntry, cache_, handle); 112 | if (tableptr != NULL) { 113 | *tableptr = table; 114 | } 115 | return result; 116 | } 117 | 118 | Status TableCache::Get(const ReadOptions& options, 119 | uint64_t file_number, 120 | uint64_t file_size, 121 | const Slice& k, 122 | void* arg, 123 | void (*saver)(void*, const Slice&, const Slice&)) { 124 | Cache::Handle* handle = NULL; 125 | // 第一次文件读取,加载 index 126 | Status s = FindTable(file_number, file_size, &handle); 127 | if (s.ok()) { 128 | // 避免在cache中再查找了,直接用上文中返回的handle,取table进行查找 129 | Table* t = reinterpret_cast(cache_->Value(handle))->table; 130 | // 如果cache里没有,则触发第二次文件读取,读取data block内容 131 | s = t->InternalGet(options, k, arg, saver); 132 | cache_->Release(handle); 133 | } 134 | return s; 135 | } 136 | 137 | void TableCache::Evict(uint64_t file_number) { 138 | char buf[sizeof(file_number)]; 139 | EncodeFixed64(buf, file_number); 140 | cache_->Erase(Slice(buf, sizeof(buf))); 141 | } 142 | 143 | } // namespace leveldb 144 | -------------------------------------------------------------------------------- /db/table_cache.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Thread-safe (provides internal synchronization) 6 | 7 | #ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 8 | #define STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 9 | 10 | #include 11 | #include 12 | #include "db/dbformat.h" 13 | #include "leveldb/cache.h" 14 | #include "leveldb/table.h" 15 | #include "port/port.h" 16 | 17 | namespace leveldb { 18 | 19 | class Env; 20 | 21 | class TableCache { 22 | public: 23 | TableCache(const std::string& dbname, const Options* options, int entries); 24 | ~TableCache(); 25 | 26 | // Return an iterator for the specified file number (the corresponding 27 | // file length must be exactly "file_size" bytes). If "tableptr" is 28 | // non-NULL, also sets "*tableptr" to point to the Table object 29 | // underlying the returned iterator, or NULL if no Table object underlies 30 | // the returned iterator. The returned "*tableptr" object is owned by 31 | // the cache and should not be deleted, and is valid for as long as the 32 | // returned iterator is live. 33 | Iterator* NewIterator(const ReadOptions& options, 34 | uint64_t file_number, 35 | uint64_t file_size, 36 | Table** tableptr = NULL); 37 | 38 | // If a seek to internal key "k" in specified file finds an entry, 39 | // call (*handle_result)(arg, found_key, found_value). 40 | Status Get(const ReadOptions& options, 41 | uint64_t file_number, 42 | uint64_t file_size, 43 | const Slice& k, 44 | void* arg, 45 | void (*handle_result)(void*, const Slice&, const Slice&)); 46 | 47 | // Evict any entry for the specified file number 48 | void Evict(uint64_t file_number); 49 | 50 | private: 51 | Env* const env_; 52 | const std::string dbname_; 53 | const Options* options_; 54 | Cache* cache_; 55 | 56 | Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**); 57 | }; 58 | 59 | } // namespace leveldb 60 | 61 | #endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_ 62 | -------------------------------------------------------------------------------- /db/version_edit.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_VERSION_EDIT_H_ 6 | #define STORAGE_LEVELDB_DB_VERSION_EDIT_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include "db/dbformat.h" 12 | 13 | namespace leveldb { 14 | 15 | class VersionSet; 16 | 17 | // 记录了文件编号, 文件大小,最小key,最大key 18 | // sstable文件的命名就是按照file number + 特定后缀完成的 19 | struct FileMetaData { 20 | int refs; 21 | int allowed_seeks; // Seeks allowed until compaction 22 | uint64_t number; 23 | uint64_t file_size; // File size in bytes 24 | InternalKey smallest; // Smallest internal key served by table 25 | InternalKey largest; // Largest internal key served by table 26 | 27 | FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { } 28 | }; 29 | 30 | class VersionEdit { 31 | public: 32 | VersionEdit() { Clear(); } 33 | ~VersionEdit() { } 34 | 35 | void Clear(); 36 | 37 | void SetComparatorName(const Slice& name) { 38 | has_comparator_ = true; 39 | comparator_ = name.ToString(); 40 | } 41 | void SetLogNumber(uint64_t num) { 42 | has_log_number_ = true; 43 | log_number_ = num; 44 | } 45 | void SetPrevLogNumber(uint64_t num) { 46 | has_prev_log_number_ = true; 47 | prev_log_number_ = num; 48 | } 49 | void SetNextFile(uint64_t num) { 50 | has_next_file_number_ = true; 51 | next_file_number_ = num; 52 | } 53 | void SetLastSequence(SequenceNumber seq) { 54 | has_last_sequence_ = true; 55 | last_sequence_ = seq; 56 | } 57 | void SetCompactPointer(int level, const InternalKey& key) { 58 | compact_pointers_.push_back(std::make_pair(level, key)); 59 | } 60 | 61 | // Add the specified file at the specified number. 62 | // REQUIRES: This version has not been saved (see VersionSet::SaveTo) 63 | // REQUIRES: "smallest" and "largest" are smallest and largest keys in file 64 | void AddFile(int level, uint64_t file, 65 | uint64_t file_size, 66 | const InternalKey& smallest, 67 | const InternalKey& largest) { 68 | FileMetaData f; 69 | f.number = file; 70 | f.file_size = file_size; 71 | f.smallest = smallest; 72 | f.largest = largest; 73 | new_files_.push_back(std::make_pair(level, f)); 74 | } 75 | 76 | // Delete the specified "file" from the specified "level". 77 | void DeleteFile(int level, uint64_t file) { 78 | deleted_files_.insert(std::make_pair(level, file)); 79 | } 80 | 81 | void EncodeTo(std::string* dst) const; 82 | Status DecodeFrom(const Slice& src); 83 | 84 | std::string DebugString() const; 85 | 86 | private: 87 | friend class VersionSet; 88 | 89 | // pair 90 | typedef std::set< std::pair > DeletedFileSet; 91 | 92 | std::string comparator_; 93 | // 下面的几个number,都是用来写入manifest 94 | 95 | // 当前的binlog number 96 | uint64_t log_number_; 97 | // 旧版本leveldb中使用的,当前已经没有意义 98 | // 只在从旧版本leveldb数据库恢复时使用 99 | uint64_t prev_log_number_; 100 | // leveldb里面,每个file(manifest, db, binlog)都有一个number,由VersionSet负责全局控制 101 | uint64_t next_file_number_; 102 | SequenceNumber last_sequence_; 103 | bool has_comparator_; 104 | bool has_log_number_; 105 | bool has_prev_log_number_; 106 | bool has_next_file_number_; 107 | bool has_last_sequence_; 108 | 109 | std::vector< std::pair > compact_pointers_; 110 | DeletedFileSet deleted_files_; 111 | std::vector< std::pair > new_files_; 112 | }; 113 | 114 | } // namespace leveldb 115 | 116 | #endif // STORAGE_LEVELDB_DB_VERSION_EDIT_H_ 117 | -------------------------------------------------------------------------------- /db/version_edit_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "db/version_edit.h" 6 | #include "util/testharness.h" 7 | 8 | namespace leveldb { 9 | 10 | static void TestEncodeDecode(const VersionEdit& edit) { 11 | std::string encoded, encoded2; 12 | edit.EncodeTo(&encoded); 13 | VersionEdit parsed; 14 | Status s = parsed.DecodeFrom(encoded); 15 | ASSERT_TRUE(s.ok()) << s.ToString(); 16 | parsed.EncodeTo(&encoded2); 17 | ASSERT_EQ(encoded, encoded2); 18 | } 19 | 20 | class VersionEditTest { }; 21 | 22 | TEST(VersionEditTest, EncodeDecode) { 23 | static const uint64_t kBig = 1ull << 50; 24 | 25 | VersionEdit edit; 26 | for (int i = 0; i < 4; i++) { 27 | TestEncodeDecode(edit); 28 | edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, 29 | InternalKey("foo", kBig + 500 + i, kTypeValue), 30 | InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); 31 | edit.DeleteFile(4, kBig + 700 + i); 32 | edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); 33 | } 34 | 35 | edit.SetComparatorName("foo"); 36 | edit.SetLogNumber(kBig + 100); 37 | edit.SetNextFile(kBig + 200); 38 | edit.SetLastSequence(kBig + 1000); 39 | TestEncodeDecode(edit); 40 | } 41 | 42 | } // namespace leveldb 43 | 44 | int main(int argc, char** argv) { 45 | return leveldb::test::RunAllTests(); 46 | } 47 | -------------------------------------------------------------------------------- /db/write_batch_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 6 | #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 7 | 8 | #include "db/dbformat.h" 9 | #include "leveldb/write_batch.h" 10 | 11 | namespace leveldb { 12 | 13 | class MemTable; 14 | 15 | // WriteBatchInternal provides static methods for manipulating a 16 | // WriteBatch that we don't want in the public WriteBatch interface. 17 | class WriteBatchInternal { 18 | public: 19 | // Return the number of entries in the batch. 20 | static int Count(const WriteBatch* batch); 21 | 22 | // Set the count for the number of entries in the batch. 23 | static void SetCount(WriteBatch* batch, int n); 24 | 25 | // Return the sequence number for the start of this batch. 26 | static SequenceNumber Sequence(const WriteBatch* batch); 27 | 28 | // Store the specified number as the sequence number for the start of 29 | // this batch. 30 | static void SetSequence(WriteBatch* batch, SequenceNumber seq); 31 | 32 | static Slice Contents(const WriteBatch* batch) { 33 | return Slice(batch->rep_); 34 | } 35 | 36 | static size_t ByteSize(const WriteBatch* batch) { 37 | return batch->rep_.size(); 38 | } 39 | 40 | static void SetContents(WriteBatch* batch, const Slice& contents); 41 | 42 | static Status InsertInto(const WriteBatch* batch, MemTable* memtable); 43 | 44 | static void Append(WriteBatch* dst, const WriteBatch* src); 45 | }; 46 | 47 | } // namespace leveldb 48 | 49 | 50 | #endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ 51 | -------------------------------------------------------------------------------- /db/write_batch_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/db.h" 6 | 7 | #include "db/memtable.h" 8 | #include "db/write_batch_internal.h" 9 | #include "leveldb/env.h" 10 | #include "util/logging.h" 11 | #include "util/testharness.h" 12 | 13 | namespace leveldb { 14 | 15 | static std::string PrintContents(WriteBatch* b) { 16 | InternalKeyComparator cmp(BytewiseComparator()); 17 | MemTable* mem = new MemTable(cmp); 18 | mem->Ref(); 19 | std::string state; 20 | Status s = WriteBatchInternal::InsertInto(b, mem); 21 | int count = 0; 22 | Iterator* iter = mem->NewIterator(); 23 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { 24 | ParsedInternalKey ikey; 25 | ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey)); 26 | switch (ikey.type) { 27 | case kTypeValue: 28 | state.append("Put("); 29 | state.append(ikey.user_key.ToString()); 30 | state.append(", "); 31 | state.append(iter->value().ToString()); 32 | state.append(")"); 33 | count++; 34 | break; 35 | case kTypeDeletion: 36 | state.append("Delete("); 37 | state.append(ikey.user_key.ToString()); 38 | state.append(")"); 39 | count++; 40 | break; 41 | } 42 | state.append("@"); 43 | state.append(NumberToString(ikey.sequence)); 44 | } 45 | delete iter; 46 | if (!s.ok()) { 47 | state.append("ParseError()"); 48 | } else if (count != WriteBatchInternal::Count(b)) { 49 | state.append("CountMismatch()"); 50 | } 51 | mem->Unref(); 52 | return state; 53 | } 54 | 55 | class WriteBatchTest { }; 56 | 57 | TEST(WriteBatchTest, Empty) { 58 | WriteBatch batch; 59 | ASSERT_EQ("", PrintContents(&batch)); 60 | ASSERT_EQ(0, WriteBatchInternal::Count(&batch)); 61 | } 62 | 63 | TEST(WriteBatchTest, Multiple) { 64 | WriteBatch batch; 65 | batch.Put(Slice("foo"), Slice("bar")); 66 | batch.Delete(Slice("box")); 67 | batch.Put(Slice("baz"), Slice("boo")); 68 | WriteBatchInternal::SetSequence(&batch, 100); 69 | ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch)); 70 | ASSERT_EQ(3, WriteBatchInternal::Count(&batch)); 71 | ASSERT_EQ("Put(baz, boo)@102" 72 | "Delete(box)@101" 73 | "Put(foo, bar)@100", 74 | PrintContents(&batch)); 75 | } 76 | 77 | TEST(WriteBatchTest, Corruption) { 78 | WriteBatch batch; 79 | batch.Put(Slice("foo"), Slice("bar")); 80 | batch.Delete(Slice("box")); 81 | WriteBatchInternal::SetSequence(&batch, 200); 82 | Slice contents = WriteBatchInternal::Contents(&batch); 83 | WriteBatchInternal::SetContents(&batch, 84 | Slice(contents.data(),contents.size()-1)); 85 | ASSERT_EQ("Put(foo, bar)@200" 86 | "ParseError()", 87 | PrintContents(&batch)); 88 | } 89 | 90 | TEST(WriteBatchTest, Append) { 91 | WriteBatch b1, b2; 92 | WriteBatchInternal::SetSequence(&b1, 200); 93 | WriteBatchInternal::SetSequence(&b2, 300); 94 | WriteBatchInternal::Append(&b1, &b2); 95 | ASSERT_EQ("", 96 | PrintContents(&b1)); 97 | b2.Put("a", "va"); 98 | WriteBatchInternal::Append(&b1, &b2); 99 | ASSERT_EQ("Put(a, va)@200", 100 | PrintContents(&b1)); 101 | b2.Clear(); 102 | b2.Put("b", "vb"); 103 | WriteBatchInternal::Append(&b1, &b2); 104 | ASSERT_EQ("Put(a, va)@200" 105 | "Put(b, vb)@201", 106 | PrintContents(&b1)); 107 | b2.Delete("foo"); 108 | WriteBatchInternal::Append(&b1, &b2); 109 | ASSERT_EQ("Put(a, va)@200" 110 | "Put(b, vb)@202" 111 | "Put(b, vb)@201" 112 | "Delete(foo)@203", 113 | PrintContents(&b1)); 114 | } 115 | 116 | } // namespace leveldb 117 | 118 | int main(int argc, char** argv) { 119 | return leveldb::test::RunAllTests(); 120 | } 121 | -------------------------------------------------------------------------------- /doc/LevelDB日知录.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Feng/leveldb-annotated/7c1ec47a848f67aafe2a8f1ec7c0ea8b12acdfc7/doc/LevelDB日知录.pdf -------------------------------------------------------------------------------- /doc/doc.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin-left: 0.5in; 3 | margin-right: 0.5in; 4 | background: white; 5 | color: black; 6 | } 7 | 8 | h1 { 9 | margin-left: -0.2in; 10 | font-size: 14pt; 11 | } 12 | h2 { 13 | margin-left: -0in; 14 | font-size: 12pt; 15 | } 16 | h3 { 17 | margin-left: -0in; 18 | } 19 | h4 { 20 | margin-left: -0in; 21 | } 22 | hr { 23 | margin-left: -0in; 24 | } 25 | 26 | /* Definition lists: definition term bold */ 27 | dt { 28 | font-weight: bold; 29 | } 30 | 31 | address { 32 | text-align: center; 33 | } 34 | code,samp,var { 35 | color: blue; 36 | } 37 | kbd { 38 | color: #600000; 39 | } 40 | div.note p { 41 | float: right; 42 | width: 3in; 43 | margin-right: 0%; 44 | padding: 1px; 45 | border: 2px solid #6060a0; 46 | background-color: #fffff0; 47 | } 48 | 49 | ul { 50 | margin-top: -0em; 51 | margin-bottom: -0em; 52 | } 53 | 54 | ol { 55 | margin-top: -0em; 56 | margin-bottom: -0em; 57 | } 58 | 59 | UL.nobullets { 60 | list-style-type: none; 61 | list-style-image: none; 62 | margin-left: -1em; 63 | } 64 | 65 | p { 66 | margin: 1em 0 1em 0; 67 | padding: 0 0 0 0; 68 | } 69 | 70 | pre { 71 | line-height: 1.3em; 72 | padding: 0.4em 0 0.8em 0; 73 | margin: 0 0 0 0; 74 | border: 0 0 0 0; 75 | color: blue; 76 | } 77 | 78 | .datatable { 79 | margin-left: auto; 80 | margin-right: auto; 81 | margin-top: 2em; 82 | margin-bottom: 2em; 83 | border: 1px solid; 84 | } 85 | 86 | .datatable td,th { 87 | padding: 0 0.5em 0 0.5em; 88 | text-align: right; 89 | } 90 | -------------------------------------------------------------------------------- /doc/leveldb实现解析.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Feng/leveldb-annotated/7c1ec47a848f67aafe2a8f1ec7c0ea8b12acdfc7/doc/leveldb实现解析.pdf -------------------------------------------------------------------------------- /doc/log_format.txt: -------------------------------------------------------------------------------- 1 | The log file contents are a sequence of 32KB blocks. The only 2 | exception is that the tail of the file may contain a partial block. 3 | 4 | Each block consists of a sequence of records: 5 | block := record* trailer? 6 | record := 7 | checksum: uint32 // crc32c of type and data[] ; little-endian 8 | length: uint16 // little-endian 9 | type: uint8 // One of FULL, FIRST, MIDDLE, LAST 10 | data: uint8[length] 11 | 12 | A record never starts within the last six bytes of a block (since it 13 | won't fit). Any leftover bytes here form the trailer, which must 14 | consist entirely of zero bytes and must be skipped by readers. 15 | 16 | Aside: if exactly seven bytes are left in the current block, and a new 17 | non-zero length record is added, the writer must emit a FIRST record 18 | (which contains zero bytes of user data) to fill up the trailing seven 19 | bytes of the block and then emit all of the user data in subsequent 20 | blocks. 21 | 22 | More types may be added in the future. Some Readers may skip record 23 | types they do not understand, others may report that some data was 24 | skipped. 25 | 26 | FULL == 1 27 | FIRST == 2 28 | MIDDLE == 3 29 | LAST == 4 30 | 31 | The FULL record contains the contents of an entire user record. 32 | 33 | FIRST, MIDDLE, LAST are types used for user records that have been 34 | split into multiple fragments (typically because of block boundaries). 35 | FIRST is the type of the first fragment of a user record, LAST is the 36 | type of the last fragment of a user record, and MIDDLE is the type of 37 | all interior fragments of a user record. 38 | 39 | Example: consider a sequence of user records: 40 | A: length 1000 41 | B: length 97270 42 | C: length 8000 43 | A will be stored as a FULL record in the first block. 44 | 45 | B will be split into three fragments: first fragment occupies the rest 46 | of the first block, second fragment occupies the entirety of the 47 | second block, and the third fragment occupies a prefix of the third 48 | block. This will leave six bytes free in the third block, which will 49 | be left empty as the trailer. 50 | 51 | C will be stored as a FULL record in the fourth block. 52 | 53 | =================== 54 | 55 | Some benefits over the recordio format: 56 | 57 | (1) We do not need any heuristics for resyncing - just go to next 58 | block boundary and scan. If there is a corruption, skip to the next 59 | block. As a side-benefit, we do not get confused when part of the 60 | contents of one log file are embedded as a record inside another log 61 | file. 62 | 63 | (2) Splitting at approximate boundaries (e.g., for mapreduce) is 64 | simple: find the next block boundary and skip records until we 65 | hit a FULL or FIRST record. 66 | 67 | (3) We do not need extra buffering for large records. 68 | 69 | Some downsides compared to recordio format: 70 | 71 | (1) No packing of tiny records. This could be fixed by adding a new 72 | record type, so it is a shortcoming of the current implementation, 73 | not necessarily the format. 74 | 75 | (2) No compression. Again, this could be fixed by adding new record types. 76 | -------------------------------------------------------------------------------- /doc/table_format.txt: -------------------------------------------------------------------------------- 1 | File format 2 | =========== 3 | 4 | 5 | [data block 1] 6 | [data block 2] 7 | ... 8 | [data block N] 9 | [meta block 1] 10 | ... 11 | [meta block K] 12 | [metaindex block] 13 | [index block] 14 | [Footer] (fixed size; starts at file_size - sizeof(Footer)) 15 | 16 | 17 | The file contains internal pointers. Each such pointer is called 18 | a BlockHandle and contains the following information: 19 | offset: varint64 20 | size: varint64 21 | See https://developers.google.com/protocol-buffers/docs/encoding#varints 22 | for an explanation of varint64 format. 23 | 24 | (1) The sequence of key/value pairs in the file are stored in sorted 25 | order and partitioned into a sequence of data blocks. These blocks 26 | come one after another at the beginning of the file. Each data block 27 | is formatted according to the code in block_builder.cc, and then 28 | optionally compressed. 29 | 30 | (2) After the data blocks we store a bunch of meta blocks. The 31 | supported meta block types are described below. More meta block types 32 | may be added in the future. Each meta block is again formatted using 33 | block_builder.cc and then optionally compressed. 34 | 35 | (3) A "metaindex" block. It contains one entry for every other meta 36 | block where the key is the name of the meta block and the value is a 37 | BlockHandle pointing to that meta block. 38 | 39 | (4) An "index" block. This block contains one entry per data block, 40 | where the key is a string >= last key in that data block and before 41 | the first key in the successive data block. The value is the 42 | BlockHandle for the data block. 43 | 44 | (6) At the very end of the file is a fixed length footer that contains 45 | the BlockHandle of the metaindex and index blocks as well as a magic number. 46 | metaindex_handle: char[p]; // Block handle for metaindex 47 | index_handle: char[q]; // Block handle for index 48 | padding: char[40-p-q]; // zeroed bytes to make fixed length 49 | // (40==2*BlockHandle::kMaxEncodedLength) 50 | magic: fixed64; // == 0xdb4775248b80fb57 (little-endian) 51 | 52 | "filter" Meta Block 53 | ------------------- 54 | 55 | If a "FilterPolicy" was specified when the database was opened, a 56 | filter block is stored in each table. The "metaindex" block contains 57 | an entry that maps from "filter." to the BlockHandle for the filter 58 | block where "" is the string returned by the filter policy's 59 | "Name()" method. 60 | 61 | The filter block stores a sequence of filters, where filter i contains 62 | the output of FilterPolicy::CreateFilter() on all keys that are stored 63 | in a block whose file offset falls within the range 64 | 65 | [ i*base ... (i+1)*base-1 ] 66 | 67 | Currently, "base" is 2KB. So for example, if blocks X and Y start in 68 | the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be 69 | converted to a filter by calling FilterPolicy::CreateFilter(), and the 70 | resulting filter will be stored as the first filter in the filter 71 | block. 72 | 73 | The filter block is formatted as follows: 74 | 75 | [filter 0] 76 | [filter 1] 77 | [filter 2] 78 | ... 79 | [filter N-1] 80 | 81 | [offset of filter 0] : 4 bytes 82 | [offset of filter 1] : 4 bytes 83 | [offset of filter 2] : 4 bytes 84 | ... 85 | [offset of filter N-1] : 4 bytes 86 | 87 | [offset of beginning of offset array] : 4 bytes 88 | lg(base) : 1 byte 89 | 90 | The offset array at the end of the filter block allows efficient 91 | mapping from a data block offset to the corresponding filter. 92 | 93 | "stats" Meta Block 94 | ------------------ 95 | 96 | This meta block contains a bunch of stats. The key is the name 97 | of the statistic. The value contains the statistic. 98 | TODO(postrelease): record following stats. 99 | data size 100 | index size 101 | key size (uncompressed) 102 | value size (uncompressed) 103 | number of entries 104 | number of data blocks 105 | -------------------------------------------------------------------------------- /helpers/memenv/memenv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 6 | #define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 7 | 8 | namespace leveldb { 9 | 10 | class Env; 11 | 12 | // Returns a new environment that stores its data in memory and delegates 13 | // all non-file-storage tasks to base_env. The caller must delete the result 14 | // when it is no longer needed. 15 | // *base_env must remain live while the result is in use. 16 | Env* NewMemEnv(Env* base_env); 17 | 18 | } // namespace leveldb 19 | 20 | #endif // STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ 21 | -------------------------------------------------------------------------------- /include/leveldb/cache.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // A Cache is an interface that maps keys to values. It has internal 6 | // synchronization and may be safely accessed concurrently from 7 | // multiple threads. It may automatically evict entries to make room 8 | // for new entries. Values have a specified charge against the cache 9 | // capacity. For example, a cache where the values are variable 10 | // length strings, may use the length of the string as the charge for 11 | // the string. 12 | // 13 | // A builtin cache implementation with a least-recently-used eviction 14 | // policy is provided. Clients may use their own implementations if 15 | // they want something more sophisticated (like scan-resistance, a 16 | // custom eviction policy, variable cache sizing, etc.) 17 | 18 | #ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ 19 | #define STORAGE_LEVELDB_INCLUDE_CACHE_H_ 20 | 21 | #include 22 | #include "leveldb/slice.h" 23 | 24 | namespace leveldb { 25 | 26 | class Cache; 27 | 28 | // Create a new cache with a fixed size capacity. This implementation 29 | // of Cache uses a least-recently-used eviction policy. 30 | extern Cache* NewLRUCache(size_t capacity); 31 | 32 | class Cache { 33 | public: 34 | Cache() { } 35 | 36 | // Destroys all existing entries by calling the "deleter" 37 | // function that was passed to the constructor. 38 | virtual ~Cache(); 39 | 40 | // Opaque handle to an entry stored in the cache. 41 | struct Handle { }; 42 | 43 | // Insert a mapping from key->value into the cache and assign it 44 | // the specified charge against the total cache capacity. 45 | // 46 | // Returns a handle that corresponds to the mapping. The caller 47 | // must call this->Release(handle) when the returned mapping is no 48 | // longer needed. 49 | // 50 | // When the inserted entry is no longer needed, the key and 51 | // value will be passed to "deleter". 52 | virtual Handle* Insert(const Slice& key, void* value, size_t charge, 53 | void (*deleter)(const Slice& key, void* value)) = 0; 54 | 55 | // If the cache has no mapping for "key", returns NULL. 56 | // 57 | // Else return a handle that corresponds to the mapping. The caller 58 | // must call this->Release(handle) when the returned mapping is no 59 | // longer needed. 60 | virtual Handle* Lookup(const Slice& key) = 0; 61 | 62 | // Release a mapping returned by a previous Lookup(). 63 | // REQUIRES: handle must not have been released yet. 64 | // REQUIRES: handle must have been returned by a method on *this. 65 | virtual void Release(Handle* handle) = 0; 66 | 67 | // Return the value encapsulated in a handle returned by a 68 | // successful Lookup(). 69 | // REQUIRES: handle must not have been released yet. 70 | // REQUIRES: handle must have been returned by a method on *this. 71 | virtual void* Value(Handle* handle) = 0; 72 | 73 | // If the cache contains entry for key, erase it. Note that the 74 | // underlying entry will be kept around until all existing handles 75 | // to it have been released. 76 | virtual void Erase(const Slice& key) = 0; 77 | 78 | // Return a new numeric id. May be used by multiple clients who are 79 | // sharing the same cache to partition the key space. Typically the 80 | // client will allocate a new id at startup and prepend the id to 81 | // its cache keys. 82 | virtual uint64_t NewId() = 0; 83 | 84 | // Remove all cache entries that are not actively in use. Memory-constrained 85 | // applications may wish to call this method to reduce memory usage. 86 | // Default implementation of Prune() does nothing. Subclasses are strongly 87 | // encouraged to override the default implementation. A future release of 88 | // leveldb may change Prune() to a pure abstract method. 89 | virtual void Prune() {} 90 | 91 | // Return an estimate of the combined charges of all elements stored in the 92 | // cache. 93 | virtual size_t TotalCharge() const = 0; 94 | 95 | private: 96 | void LRU_Remove(Handle* e); 97 | void LRU_Append(Handle* e); 98 | void Unref(Handle* e); 99 | 100 | struct Rep; 101 | Rep* rep_; 102 | 103 | // No copying allowed 104 | Cache(const Cache&); 105 | void operator=(const Cache&); 106 | }; 107 | 108 | } // namespace leveldb 109 | 110 | #endif // STORAGE_LEVELDB_INCLUDE_CACHE_H_ 111 | -------------------------------------------------------------------------------- /include/leveldb/comparator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ 6 | #define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ 7 | 8 | #include 9 | 10 | namespace leveldb { 11 | 12 | class Slice; 13 | 14 | // A Comparator object provides a total order across slices that are 15 | // used as keys in an sstable or a database. A Comparator implementation 16 | // must be thread-safe since leveldb may invoke its methods concurrently 17 | // from multiple threads. 18 | class Comparator { 19 | public: 20 | virtual ~Comparator(); 21 | 22 | // Three-way comparison. Returns value: 23 | // < 0 iff "a" < "b", 24 | // == 0 iff "a" == "b", 25 | // > 0 iff "a" > "b" 26 | virtual int Compare(const Slice& a, const Slice& b) const = 0; 27 | 28 | // The name of the comparator. Used to check for comparator 29 | // mismatches (i.e., a DB created with one comparator is 30 | // accessed using a different comparator. 31 | // 32 | // The client of this package should switch to a new name whenever 33 | // the comparator implementation changes in a way that will cause 34 | // the relative ordering of any two keys to change. 35 | // 36 | // Names starting with "leveldb." are reserved and should not be used 37 | // by any clients of this package. 38 | virtual const char* Name() const = 0; 39 | 40 | // Advanced functions: these are used to reduce the space requirements 41 | // for internal data structures like index blocks. 42 | 43 | // If *start < limit, changes *start to a short string in [start,limit). 44 | // Simple comparator implementations may return with *start unchanged, 45 | // i.e., an implementation of this method that does nothing is correct. 46 | virtual void FindShortestSeparator( 47 | std::string* start, 48 | const Slice& limit) const = 0; 49 | 50 | // Changes *key to a short string >= *key. 51 | // Simple comparator implementations may return with *key unchanged, 52 | // i.e., an implementation of this method that does nothing is correct. 53 | virtual void FindShortSuccessor(std::string* key) const = 0; 54 | }; 55 | 56 | // Return a builtin comparator that uses lexicographic byte-wise 57 | // ordering. The result remains the property of this module and 58 | // must not be deleted. 59 | extern const Comparator* BytewiseComparator(); 60 | 61 | } // namespace leveldb 62 | 63 | #endif // STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ 64 | -------------------------------------------------------------------------------- /include/leveldb/dumpfile.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ 6 | #define STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ 7 | 8 | #include 9 | #include "leveldb/env.h" 10 | #include "leveldb/status.h" 11 | 12 | namespace leveldb { 13 | 14 | // Dump the contents of the file named by fname in text format to 15 | // *dst. Makes a sequence of dst->Append() calls; each call is passed 16 | // the newline-terminated text corresponding to a single item found 17 | // in the file. 18 | // 19 | // Returns a non-OK result if fname does not name a leveldb storage 20 | // file, or if the file cannot be read. 21 | Status DumpFile(Env* env, const std::string& fname, WritableFile* dst); 22 | 23 | } // namespace leveldb 24 | 25 | #endif // STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ 26 | -------------------------------------------------------------------------------- /include/leveldb/filter_policy.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // A database can be configured with a custom FilterPolicy object. 6 | // This object is responsible for creating a small filter from a set 7 | // of keys. These filters are stored in leveldb and are consulted 8 | // automatically by leveldb to decide whether or not to read some 9 | // information from disk. In many cases, a filter can cut down the 10 | // number of disk seeks form a handful to a single disk seek per 11 | // DB::Get() call. 12 | // 13 | // Most people will want to use the builtin bloom filter support (see 14 | // NewBloomFilterPolicy() below). 15 | 16 | #ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ 17 | #define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ 18 | 19 | #include 20 | 21 | namespace leveldb { 22 | 23 | class Slice; 24 | 25 | class FilterPolicy { 26 | public: 27 | virtual ~FilterPolicy(); 28 | 29 | // Return the name of this policy. Note that if the filter encoding 30 | // changes in an incompatible way, the name returned by this method 31 | // must be changed. Otherwise, old incompatible filters may be 32 | // passed to methods of this type. 33 | virtual const char* Name() const = 0; 34 | 35 | // keys[0,n-1] contains a list of keys (potentially with duplicates) 36 | // that are ordered according to the user supplied comparator. 37 | // Append a filter that summarizes keys[0,n-1] to *dst. 38 | // 39 | // Warning: do not change the initial contents of *dst. Instead, 40 | // append the newly constructed filter to *dst. 41 | virtual void CreateFilter(const Slice* keys, int n, std::string* dst) 42 | const = 0; 43 | 44 | // "filter" contains the data appended by a preceding call to 45 | // CreateFilter() on this class. This method must return true if 46 | // the key was in the list of keys passed to CreateFilter(). 47 | // This method may return true or false if the key was not on the 48 | // list, but it should aim to return false with a high probability. 49 | virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; 50 | }; 51 | 52 | // Return a new filter policy that uses a bloom filter with approximately 53 | // the specified number of bits per key. A good value for bits_per_key 54 | // is 10, which yields a filter with ~ 1% false positive rate. 55 | // 56 | // Callers must delete the result after any database that is using the 57 | // result has been closed. 58 | // 59 | // Note: if you are using a custom comparator that ignores some parts 60 | // of the keys being compared, you must not use NewBloomFilterPolicy() 61 | // and must provide your own FilterPolicy that also ignores the 62 | // corresponding parts of the keys. For example, if the comparator 63 | // ignores trailing spaces, it would be incorrect to use a 64 | // FilterPolicy (like NewBloomFilterPolicy) that does not ignore 65 | // trailing spaces in keys. 66 | extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); 67 | 68 | } 69 | 70 | #endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ 71 | -------------------------------------------------------------------------------- /include/leveldb/iterator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // An iterator yields a sequence of key/value pairs from a source. 6 | // The following class defines the interface. Multiple implementations 7 | // are provided by this library. In particular, iterators are provided 8 | // to access the contents of a Table or a DB. 9 | // 10 | // Multiple threads can invoke const methods on an Iterator without 11 | // external synchronization, but if any of the threads may call a 12 | // non-const method, all threads accessing the same Iterator must use 13 | // external synchronization. 14 | 15 | #ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ 16 | #define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ 17 | 18 | #include "leveldb/slice.h" 19 | #include "leveldb/status.h" 20 | 21 | namespace leveldb { 22 | 23 | class Iterator { 24 | public: 25 | Iterator(); 26 | virtual ~Iterator(); 27 | 28 | // An iterator is either positioned at a key/value pair, or 29 | // not valid. This method returns true iff the iterator is valid. 30 | virtual bool Valid() const = 0; 31 | 32 | // Position at the first key in the source. The iterator is Valid() 33 | // after this call iff the source is not empty. 34 | virtual void SeekToFirst() = 0; 35 | 36 | // Position at the last key in the source. The iterator is 37 | // Valid() after this call iff the source is not empty. 38 | virtual void SeekToLast() = 0; 39 | 40 | // Position at the first key in the source that is at or past target. 41 | // The iterator is Valid() after this call iff the source contains 42 | // an entry that comes at or past target. 43 | virtual void Seek(const Slice& target) = 0; 44 | 45 | // Moves to the next entry in the source. After this call, Valid() is 46 | // true iff the iterator was not positioned at the last entry in the source. 47 | // REQUIRES: Valid() 48 | virtual void Next() = 0; 49 | 50 | // Moves to the previous entry in the source. After this call, Valid() is 51 | // true iff the iterator was not positioned at the first entry in source. 52 | // REQUIRES: Valid() 53 | virtual void Prev() = 0; 54 | 55 | // Return the key for the current entry. The underlying storage for 56 | // the returned slice is valid only until the next modification of 57 | // the iterator. 58 | // REQUIRES: Valid() 59 | virtual Slice key() const = 0; 60 | 61 | // Return the value for the current entry. The underlying storage for 62 | // the returned slice is valid only until the next modification of 63 | // the iterator. 64 | // REQUIRES: Valid() 65 | virtual Slice value() const = 0; 66 | 67 | // If an error has occurred, return it. Else return an ok status. 68 | virtual Status status() const = 0; 69 | 70 | // Clients are allowed to register function/arg1/arg2 triples that 71 | // will be invoked when this iterator is destroyed. 72 | // 73 | // Note that unlike all of the preceding methods, this method is 74 | // not abstract and therefore clients should not override it. 75 | typedef void (*CleanupFunction)(void* arg1, void* arg2); 76 | void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); 77 | 78 | private: 79 | struct Cleanup { 80 | CleanupFunction function; 81 | void* arg1; 82 | void* arg2; 83 | Cleanup* next; 84 | }; 85 | Cleanup cleanup_; 86 | 87 | // No copying allowed 88 | Iterator(const Iterator&); 89 | void operator=(const Iterator&); 90 | }; 91 | 92 | // Return an empty iterator (yields nothing). 93 | extern Iterator* NewEmptyIterator(); 94 | 95 | // Return an empty iterator with the specified status. 96 | extern Iterator* NewErrorIterator(const Status& status); 97 | 98 | } // namespace leveldb 99 | 100 | #endif // STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ 101 | -------------------------------------------------------------------------------- /include/leveldb/slice.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Slice is a simple structure containing a pointer into some external 6 | // storage and a size. The user of a Slice must ensure that the slice 7 | // is not used after the corresponding external storage has been 8 | // deallocated. 9 | // 10 | // Multiple threads can invoke const methods on a Slice without 11 | // external synchronization, but if any of the threads may call a 12 | // non-const method, all threads accessing the same Slice must use 13 | // external synchronization. 14 | 15 | #ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_ 16 | #define STORAGE_LEVELDB_INCLUDE_SLICE_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace leveldb { 24 | 25 | class Slice { 26 | public: 27 | // Create an empty slice. 28 | Slice() : data_(""), size_(0) { } 29 | 30 | // Create a slice that refers to d[0,n-1]. 31 | Slice(const char* d, size_t n) : data_(d), size_(n) { } 32 | 33 | // Create a slice that refers to the contents of "s" 34 | Slice(const std::string& s) : data_(s.data()), size_(s.size()) { } 35 | 36 | // Create a slice that refers to s[0,strlen(s)-1] 37 | Slice(const char* s) : data_(s), size_(strlen(s)) { } 38 | 39 | // Return a pointer to the beginning of the referenced data 40 | const char* data() const { return data_; } 41 | 42 | // Return the length (in bytes) of the referenced data 43 | size_t size() const { return size_; } 44 | 45 | // Return true iff the length of the referenced data is zero 46 | bool empty() const { return size_ == 0; } 47 | 48 | // Return the ith byte in the referenced data. 49 | // REQUIRES: n < size() 50 | char operator[](size_t n) const { 51 | assert(n < size()); 52 | return data_[n]; 53 | } 54 | 55 | // Change this slice to refer to an empty array 56 | void clear() { data_ = ""; size_ = 0; } 57 | 58 | // Drop the first "n" bytes from this slice. 59 | void remove_prefix(size_t n) { 60 | assert(n <= size()); 61 | data_ += n; 62 | size_ -= n; 63 | } 64 | 65 | // Return a string that contains the copy of the referenced data. 66 | std::string ToString() const { return std::string(data_, size_); } 67 | 68 | // Three-way comparison. Returns value: 69 | // < 0 iff "*this" < "b", 70 | // == 0 iff "*this" == "b", 71 | // > 0 iff "*this" > "b" 72 | int compare(const Slice& b) const; 73 | 74 | // Return true iff "x" is a prefix of "*this" 75 | bool starts_with(const Slice& x) const { 76 | return ((size_ >= x.size_) && 77 | (memcmp(data_, x.data_, x.size_) == 0)); 78 | } 79 | 80 | private: 81 | const char* data_; 82 | size_t size_; 83 | 84 | // Intentionally copyable 85 | }; 86 | 87 | inline bool operator==(const Slice& x, const Slice& y) { 88 | return ((x.size() == y.size()) && 89 | (memcmp(x.data(), y.data(), x.size()) == 0)); 90 | } 91 | 92 | inline bool operator!=(const Slice& x, const Slice& y) { 93 | return !(x == y); 94 | } 95 | 96 | inline int Slice::compare(const Slice& b) const { 97 | const size_t min_len = (size_ < b.size_) ? size_ : b.size_; 98 | int r = memcmp(data_, b.data_, min_len); 99 | if (r == 0) { 100 | if (size_ < b.size_) r = -1; 101 | else if (size_ > b.size_) r = +1; 102 | } 103 | return r; 104 | } 105 | 106 | } // namespace leveldb 107 | 108 | 109 | #endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_ 110 | -------------------------------------------------------------------------------- /include/leveldb/status.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // A Status encapsulates the result of an operation. It may indicate success, 6 | // or it may indicate an error with an associated error message. 7 | // 8 | // Multiple threads can invoke const methods on a Status without 9 | // external synchronization, but if any of the threads may call a 10 | // non-const method, all threads accessing the same Status must use 11 | // external synchronization. 12 | 13 | #ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_ 14 | #define STORAGE_LEVELDB_INCLUDE_STATUS_H_ 15 | 16 | #include 17 | #include "leveldb/slice.h" 18 | 19 | namespace leveldb { 20 | 21 | class Status { 22 | public: 23 | // Create a success status. 24 | Status() : state_(NULL) { } 25 | ~Status() { delete[] state_; } 26 | 27 | // Copy the specified status. 28 | Status(const Status& s); 29 | void operator=(const Status& s); 30 | 31 | // Return a success status. 32 | static Status OK() { return Status(); } 33 | 34 | // Return error status of an appropriate type. 35 | static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { 36 | return Status(kNotFound, msg, msg2); 37 | } 38 | static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) { 39 | return Status(kCorruption, msg, msg2); 40 | } 41 | static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { 42 | return Status(kNotSupported, msg, msg2); 43 | } 44 | static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { 45 | return Status(kInvalidArgument, msg, msg2); 46 | } 47 | static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) { 48 | return Status(kIOError, msg, msg2); 49 | } 50 | 51 | // Returns true iff the status indicates success. 52 | bool ok() const { return (state_ == NULL); } 53 | 54 | // Returns true iff the status indicates a NotFound error. 55 | bool IsNotFound() const { return code() == kNotFound; } 56 | 57 | // Corruption 这里是什么意思? 58 | // Returns true iff the status indicates a Corruption error. 59 | bool IsCorruption() const { return code() == kCorruption; } 60 | 61 | // Returns true iff the status indicates an IOError. 62 | bool IsIOError() const { return code() == kIOError; } 63 | 64 | // Returns true iff the status indicates a NotSupportedError. 65 | bool IsNotSupportedError() const { return code() == kNotSupported; } 66 | 67 | // Returns true iff the status indicates an InvalidArgument. 68 | bool IsInvalidArgument() const { return code() == kInvalidArgument; } 69 | 70 | // Return a string representation of this status suitable for printing. 71 | // Returns the string "OK" for success. 72 | std::string ToString() const; 73 | 74 | private: 75 | // OK status has a NULL state_. Otherwise, state_ is a new[] array 76 | // of the following form: 77 | // state_[0..3] == length of message 78 | // state_[4] == code 79 | // state_[5..] == message 80 | // ------------------------------------------------------------------------- 81 | // | 4 byte | 1 byte | len1 byte | 1 byte | 1 byte | len2 byte | 82 | // ------------------------------------------------------------------------- 83 | // | length | code | msg1 | : | space | msg2 | 84 | // ------------------------------------------------------------------------- 85 | const char* state_; 86 | 87 | enum Code { 88 | kOk = 0, 89 | kNotFound = 1, 90 | kCorruption = 2, // 这个代表什么? 91 | kNotSupported = 3, 92 | kInvalidArgument = 4, 93 | kIOError = 5 94 | }; 95 | 96 | Code code() const { 97 | return (state_ == NULL) ? kOk : static_cast(state_[4]); 98 | } 99 | 100 | Status(Code code, const Slice& msg, const Slice& msg2); 101 | static const char* CopyState(const char* s); 102 | }; 103 | 104 | inline Status::Status(const Status& s) { 105 | state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); 106 | } 107 | inline void Status::operator=(const Status& s) { 108 | // The following condition catches both aliasing (when this == &s), 109 | // and the common case where both s and *this are ok. 110 | if (state_ != s.state_) { 111 | delete[] state_; 112 | state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); 113 | } 114 | } 115 | 116 | } // namespace leveldb 117 | 118 | #endif // STORAGE_LEVELDB_INCLUDE_STATUS_H_ 119 | -------------------------------------------------------------------------------- /include/leveldb/table.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_ 6 | #define STORAGE_LEVELDB_INCLUDE_TABLE_H_ 7 | 8 | #include 9 | #include "leveldb/iterator.h" 10 | 11 | namespace leveldb { 12 | 13 | class Block; 14 | class BlockHandle; 15 | class Footer; 16 | struct Options; 17 | class RandomAccessFile; 18 | struct ReadOptions; 19 | class TableCache; 20 | 21 | // A Table is a sorted map from strings to strings. Tables are 22 | // immutable and persistent. A Table may be safely accessed from 23 | // multiple threads without external synchronization. 24 | class Table { 25 | public: 26 | // Attempt to open the table that is stored in bytes [0..file_size) 27 | // of "file", and read the metadata entries necessary to allow 28 | // retrieving data from the table. 29 | // 30 | // If successful, returns ok and sets "*table" to the newly opened 31 | // table. The client should delete "*table" when no longer needed. 32 | // If there was an error while initializing the table, sets "*table" 33 | // to NULL and returns a non-ok status. Does not take ownership of 34 | // "*source", but the client must ensure that "source" remains live 35 | // for the duration of the returned table's lifetime. 36 | // 37 | // *file must remain live while this Table is in use. 38 | static Status Open(const Options& options, 39 | RandomAccessFile* file, 40 | uint64_t file_size, 41 | Table** table); 42 | 43 | ~Table(); 44 | 45 | // Returns a new iterator over the table contents. 46 | // The result of NewIterator() is initially invalid (caller must 47 | // call one of the Seek methods on the iterator before using it). 48 | Iterator* NewIterator(const ReadOptions&) const; 49 | 50 | // Given a key, return an approximate byte offset in the file where 51 | // the data for that key begins (or would begin if the key were 52 | // present in the file). The returned value is in terms of file 53 | // bytes, and so includes effects like compression of the underlying data. 54 | // E.g., the approximate offset of the last key in the table will 55 | // be close to the file length. 56 | uint64_t ApproximateOffsetOf(const Slice& key) const; 57 | 58 | private: 59 | struct Rep; 60 | Rep* rep_; 61 | 62 | explicit Table(Rep* rep) { rep_ = rep; } 63 | static Iterator* BlockReader(void*, const ReadOptions&, const Slice&); 64 | 65 | // Calls (*handle_result)(arg, ...) with the entry found after a call 66 | // to Seek(key). May not make such a call if filter policy says 67 | // that key is not present. 68 | friend class TableCache; 69 | Status InternalGet( 70 | const ReadOptions&, const Slice& key, 71 | void* arg, 72 | void (*handle_result)(void* arg, const Slice& k, const Slice& v)); 73 | 74 | 75 | void ReadMeta(const Footer& footer); 76 | void ReadFilter(const Slice& filter_handle_value); 77 | 78 | // No copying allowed 79 | Table(const Table&); 80 | void operator=(const Table&); 81 | }; 82 | 83 | } // namespace leveldb 84 | 85 | #endif // STORAGE_LEVELDB_INCLUDE_TABLE_H_ 86 | -------------------------------------------------------------------------------- /include/leveldb/table_builder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // TableBuilder provides the interface used to build a Table 6 | // (an immutable and sorted map from keys to values). 7 | // 8 | // Multiple threads can invoke const methods on a TableBuilder without 9 | // external synchronization, but if any of the threads may call a 10 | // non-const method, all threads accessing the same TableBuilder must use 11 | // external synchronization. 12 | 13 | #ifndef STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ 14 | #define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ 15 | 16 | #include 17 | #include "leveldb/options.h" 18 | #include "leveldb/status.h" 19 | 20 | namespace leveldb { 21 | 22 | class BlockBuilder; 23 | class BlockHandle; 24 | class WritableFile; 25 | 26 | class TableBuilder { 27 | public: 28 | // Create a builder that will store the contents of the table it is 29 | // building in *file. Does not close the file. It is up to the 30 | // caller to close the file after calling Finish(). 31 | TableBuilder(const Options& options, WritableFile* file); 32 | 33 | // REQUIRES: Either Finish() or Abandon() has been called. 34 | ~TableBuilder(); 35 | 36 | // Change the options used by this builder. Note: only some of the 37 | // option fields can be changed after construction. If a field is 38 | // not allowed to change dynamically and its value in the structure 39 | // passed to the constructor is different from its value in the 40 | // structure passed to this method, this method will return an error 41 | // without changing any fields. 42 | Status ChangeOptions(const Options& options); 43 | 44 | // Add key,value to the table being constructed. 45 | // REQUIRES: key is after any previously added key according to comparator. 46 | // REQUIRES: Finish(), Abandon() have not been called 47 | void Add(const Slice& key, const Slice& value); 48 | 49 | // Advanced operation: flush any buffered key/value pairs to file. 50 | // Can be used to ensure that two adjacent entries never live in 51 | // the same data block. Most clients should not need to use this method. 52 | // REQUIRES: Finish(), Abandon() have not been called 53 | void Flush(); 54 | 55 | // Return non-ok iff some error has been detected. 56 | Status status() const; 57 | 58 | // Finish building the table. Stops using the file passed to the 59 | // constructor after this function returns. 60 | // REQUIRES: Finish(), Abandon() have not been called 61 | Status Finish(); 62 | 63 | // Indicate that the contents of this builder should be abandoned. Stops 64 | // using the file passed to the constructor after this function returns. 65 | // If the caller is not going to call Finish(), it must call Abandon() 66 | // before destroying this builder. 67 | // REQUIRES: Finish(), Abandon() have not been called 68 | void Abandon(); 69 | 70 | // Number of calls to Add() so far. 71 | uint64_t NumEntries() const; 72 | 73 | // Size of the file generated so far. If invoked after a successful 74 | // Finish() call, returns the size of the final generated file. 75 | uint64_t FileSize() const; 76 | 77 | private: 78 | bool ok() const { return status().ok(); } 79 | void WriteBlock(BlockBuilder* block, BlockHandle* handle); 80 | void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle); 81 | 82 | struct Rep; 83 | Rep* rep_; 84 | 85 | // No copying allowed 86 | TableBuilder(const TableBuilder&); 87 | void operator=(const TableBuilder&); 88 | }; 89 | 90 | } // namespace leveldb 91 | 92 | #endif // STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ 93 | -------------------------------------------------------------------------------- /include/leveldb/write_batch.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // WriteBatch holds a collection of updates to apply atomically to a DB. 6 | // 7 | // The updates are applied in the order in which they are added 8 | // to the WriteBatch. For example, the value of "key" will be "v3" 9 | // after the following batch is written: 10 | // 11 | // batch.Put("key", "v1"); 12 | // batch.Delete("key"); 13 | // batch.Put("key", "v2"); 14 | // batch.Put("key", "v3"); 15 | // 16 | // Multiple threads can invoke const methods on a WriteBatch without 17 | // external synchronization, but if any of the threads may call a 18 | // non-const method, all threads accessing the same WriteBatch must use 19 | // external synchronization. 20 | 21 | #ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 22 | #define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 23 | 24 | #include 25 | #include "leveldb/status.h" 26 | 27 | namespace leveldb { 28 | 29 | class Slice; 30 | 31 | class WriteBatch { 32 | public: 33 | WriteBatch(); 34 | ~WriteBatch(); 35 | 36 | // Store the mapping "key->value" in the database. 37 | void Put(const Slice& key, const Slice& value); 38 | 39 | // If the database contains a mapping for "key", erase it. Else do nothing. 40 | void Delete(const Slice& key); 41 | 42 | // Clear all updates buffered in this batch. 43 | void Clear(); 44 | 45 | // Support for iterating over the contents of a batch. 46 | class Handler { 47 | public: 48 | virtual ~Handler(); 49 | virtual void Put(const Slice& key, const Slice& value) = 0; 50 | virtual void Delete(const Slice& key) = 0; 51 | }; 52 | Status Iterate(Handler* handler) const; 53 | 54 | private: 55 | friend class WriteBatchInternal; 56 | 57 | std::string rep_; // See comment in write_batch.cc for the format of rep_ 58 | 59 | // Intentionally copyable 60 | }; 61 | 62 | } // namespace leveldb 63 | 64 | #endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ 65 | -------------------------------------------------------------------------------- /issues/issue178_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | // Test for issue 178: a manual compaction causes deleted data to reappear. 6 | #include 7 | #include 8 | #include 9 | 10 | #include "leveldb/db.h" 11 | #include "leveldb/write_batch.h" 12 | #include "util/testharness.h" 13 | 14 | namespace { 15 | 16 | const int kNumKeys = 1100000; 17 | 18 | std::string Key1(int i) { 19 | char buf[100]; 20 | snprintf(buf, sizeof(buf), "my_key_%d", i); 21 | return buf; 22 | } 23 | 24 | std::string Key2(int i) { 25 | return Key1(i) + "_xxx"; 26 | } 27 | 28 | class Issue178 { }; 29 | 30 | TEST(Issue178, Test) { 31 | // Get rid of any state from an old run. 32 | std::string dbpath = leveldb::test::TmpDir() + "/leveldb_cbug_test"; 33 | DestroyDB(dbpath, leveldb::Options()); 34 | 35 | // Open database. Disable compression since it affects the creation 36 | // of layers and the code below is trying to test against a very 37 | // specific scenario. 38 | leveldb::DB* db; 39 | leveldb::Options db_options; 40 | db_options.create_if_missing = true; 41 | db_options.compression = leveldb::kNoCompression; 42 | ASSERT_OK(leveldb::DB::Open(db_options, dbpath, &db)); 43 | 44 | // create first key range 45 | leveldb::WriteBatch batch; 46 | for (size_t i = 0; i < kNumKeys; i++) { 47 | batch.Put(Key1(i), "value for range 1 key"); 48 | } 49 | ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); 50 | 51 | // create second key range 52 | batch.Clear(); 53 | for (size_t i = 0; i < kNumKeys; i++) { 54 | batch.Put(Key2(i), "value for range 2 key"); 55 | } 56 | ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); 57 | 58 | // delete second key range 59 | batch.Clear(); 60 | for (size_t i = 0; i < kNumKeys; i++) { 61 | batch.Delete(Key2(i)); 62 | } 63 | ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch)); 64 | 65 | // compact database 66 | std::string start_key = Key1(0); 67 | std::string end_key = Key1(kNumKeys - 1); 68 | leveldb::Slice least(start_key.data(), start_key.size()); 69 | leveldb::Slice greatest(end_key.data(), end_key.size()); 70 | 71 | // commenting out the line below causes the example to work correctly 72 | db->CompactRange(&least, &greatest); 73 | 74 | // count the keys 75 | leveldb::Iterator* iter = db->NewIterator(leveldb::ReadOptions()); 76 | size_t num_keys = 0; 77 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { 78 | num_keys++; 79 | } 80 | delete iter; 81 | ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys"; 82 | 83 | // close database 84 | delete db; 85 | DestroyDB(dbpath, leveldb::Options()); 86 | } 87 | 88 | } // anonymous namespace 89 | 90 | int main(int argc, char** argv) { 91 | return leveldb::test::RunAllTests(); 92 | } 93 | -------------------------------------------------------------------------------- /issues/issue200_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | // Test for issue 200: when iterator switches direction from backward 6 | // to forward, the current key can be yielded unexpectedly if a new 7 | // mutation has been added just before the current key. 8 | 9 | #include "leveldb/db.h" 10 | #include "util/testharness.h" 11 | 12 | namespace leveldb { 13 | 14 | class Issue200 { }; 15 | 16 | TEST(Issue200, Test) { 17 | // Get rid of any state from an old run. 18 | std::string dbpath = test::TmpDir() + "/leveldb_issue200_test"; 19 | DestroyDB(dbpath, Options()); 20 | 21 | DB *db; 22 | Options options; 23 | options.create_if_missing = true; 24 | ASSERT_OK(DB::Open(options, dbpath, &db)); 25 | 26 | WriteOptions write_options; 27 | ASSERT_OK(db->Put(write_options, "1", "b")); 28 | ASSERT_OK(db->Put(write_options, "2", "c")); 29 | ASSERT_OK(db->Put(write_options, "3", "d")); 30 | ASSERT_OK(db->Put(write_options, "4", "e")); 31 | ASSERT_OK(db->Put(write_options, "5", "f")); 32 | 33 | ReadOptions read_options; 34 | Iterator *iter = db->NewIterator(read_options); 35 | 36 | // Add an element that should not be reflected in the iterator. 37 | ASSERT_OK(db->Put(write_options, "25", "cd")); 38 | 39 | iter->Seek("5"); 40 | ASSERT_EQ(iter->key().ToString(), "5"); 41 | iter->Prev(); 42 | ASSERT_EQ(iter->key().ToString(), "4"); 43 | iter->Prev(); 44 | ASSERT_EQ(iter->key().ToString(), "3"); 45 | iter->Next(); 46 | ASSERT_EQ(iter->key().ToString(), "4"); 47 | iter->Next(); 48 | ASSERT_EQ(iter->key().ToString(), "5"); 49 | 50 | delete iter; 51 | delete db; 52 | DestroyDB(dbpath, options); 53 | } 54 | 55 | } // namespace leveldb 56 | 57 | int main(int argc, char** argv) { 58 | return leveldb::test::RunAllTests(); 59 | } 60 | -------------------------------------------------------------------------------- /port/README: -------------------------------------------------------------------------------- 1 | This directory contains interfaces and implementations that isolate the 2 | rest of the package from platform details. 3 | 4 | Code in the rest of the package includes "port.h" from this directory. 5 | "port.h" in turn includes a platform specific "port_.h" file 6 | that provides the platform specific implementation. 7 | 8 | See port_posix.h for an example of what must be provided in a platform 9 | specific header file. 10 | 11 | -------------------------------------------------------------------------------- /port/port.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_PORT_PORT_H_ 6 | #define STORAGE_LEVELDB_PORT_PORT_H_ 7 | 8 | #include 9 | 10 | // Include the appropriate platform specific file below. If you are 11 | // porting to a new platform, see "port_example.h" for documentation 12 | // of what the new port_.h file must provide. 13 | #if defined(LEVELDB_PLATFORM_POSIX) 14 | # include "port/port_posix.h" 15 | #elif defined(LEVELDB_PLATFORM_CHROMIUM) 16 | # include "port/port_chromium.h" 17 | #endif 18 | 19 | #endif // STORAGE_LEVELDB_PORT_PORT_H_ 20 | -------------------------------------------------------------------------------- /port/port_example.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // This file contains the specification, but not the implementations, 6 | // of the types/operations/etc. that should be defined by a platform 7 | // specific port_.h file. Use this file as a reference for 8 | // how to port this package to a new platform. 9 | 10 | #ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ 11 | #define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ 12 | 13 | namespace leveldb { 14 | namespace port { 15 | 16 | // TODO(jorlow): Many of these belong more in the environment class rather than 17 | // here. We should try moving them and see if it affects perf. 18 | 19 | // The following boolean constant must be true on a little-endian machine 20 | // and false otherwise. 21 | static const bool kLittleEndian = true /* or some other expression */; 22 | 23 | // ------------------ Threading ------------------- 24 | 25 | // A Mutex represents an exclusive lock. 26 | class Mutex { 27 | public: 28 | Mutex(); 29 | ~Mutex(); 30 | 31 | // Lock the mutex. Waits until other lockers have exited. 32 | // Will deadlock if the mutex is already locked by this thread. 33 | void Lock(); 34 | 35 | // Unlock the mutex. 36 | // REQUIRES: This mutex was locked by this thread. 37 | void Unlock(); 38 | 39 | // Optionally crash if this thread does not hold this mutex. 40 | // The implementation must be fast, especially if NDEBUG is 41 | // defined. The implementation is allowed to skip all checks. 42 | void AssertHeld(); 43 | }; 44 | 45 | class CondVar { 46 | public: 47 | explicit CondVar(Mutex* mu); 48 | ~CondVar(); 49 | 50 | // Atomically release *mu and block on this condition variable until 51 | // either a call to SignalAll(), or a call to Signal() that picks 52 | // this thread to wakeup. 53 | // REQUIRES: this thread holds *mu 54 | void Wait(); 55 | 56 | // If there are some threads waiting, wake up at least one of them. 57 | void Signal(); 58 | 59 | // Wake up all waiting threads. 60 | void SignallAll(); 61 | }; 62 | 63 | // Thread-safe initialization. 64 | // Used as follows: 65 | // static port::OnceType init_control = LEVELDB_ONCE_INIT; 66 | // static void Initializer() { ... do something ...; } 67 | // ... 68 | // port::InitOnce(&init_control, &Initializer); 69 | typedef intptr_t OnceType; 70 | #define LEVELDB_ONCE_INIT 0 71 | extern void InitOnce(port::OnceType*, void (*initializer)()); 72 | 73 | // A type that holds a pointer that can be read or written atomically 74 | // (i.e., without word-tearing.) 75 | class AtomicPointer { 76 | private: 77 | intptr_t rep_; 78 | public: 79 | // Initialize to arbitrary value 80 | AtomicPointer(); 81 | 82 | // Initialize to hold v 83 | explicit AtomicPointer(void* v) : rep_(v) { } 84 | 85 | // Read and return the stored pointer with the guarantee that no 86 | // later memory access (read or write) by this thread can be 87 | // reordered ahead of this read. 88 | void* Acquire_Load() const; 89 | 90 | // Set v as the stored pointer with the guarantee that no earlier 91 | // memory access (read or write) by this thread can be reordered 92 | // after this store. 93 | void Release_Store(void* v); 94 | 95 | // Read the stored pointer with no ordering guarantees. 96 | void* NoBarrier_Load() const; 97 | 98 | // Set va as the stored pointer with no ordering guarantees. 99 | void NoBarrier_Store(void* v); 100 | }; 101 | 102 | // ------------------ Compression ------------------- 103 | 104 | // Store the snappy compression of "input[0,input_length-1]" in *output. 105 | // Returns false if snappy is not supported by this port. 106 | extern bool Snappy_Compress(const char* input, size_t input_length, 107 | std::string* output); 108 | 109 | // If input[0,input_length-1] looks like a valid snappy compressed 110 | // buffer, store the size of the uncompressed data in *result and 111 | // return true. Else return false. 112 | extern bool Snappy_GetUncompressedLength(const char* input, size_t length, 113 | size_t* result); 114 | 115 | // Attempt to snappy uncompress input[0,input_length-1] into *output. 116 | // Returns true if successful, false if the input is invalid lightweight 117 | // compressed data. 118 | // 119 | // REQUIRES: at least the first "n" bytes of output[] must be writable 120 | // where "n" is the result of a successful call to 121 | // Snappy_GetUncompressedLength. 122 | extern bool Snappy_Uncompress(const char* input_data, size_t input_length, 123 | char* output); 124 | 125 | // ------------------ Miscellaneous ------------------- 126 | 127 | // If heap profiling is not supported, returns false. 128 | // Else repeatedly calls (*func)(arg, data, n) and then returns true. 129 | // The concatenation of all "data[0,n-1]" fragments is the heap profile. 130 | extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg); 131 | 132 | } // namespace port 133 | } // namespace leveldb 134 | 135 | #endif // STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ 136 | -------------------------------------------------------------------------------- /port/port_posix.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "port/port_posix.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace leveldb { 12 | namespace port { 13 | 14 | static void PthreadCall(const char* label, int result) { 15 | if (result != 0) { 16 | fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); 17 | abort(); 18 | } 19 | } 20 | 21 | Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } 22 | 23 | Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } 24 | 25 | void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } 26 | 27 | void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } 28 | 29 | CondVar::CondVar(Mutex* mu) 30 | : mu_(mu) { 31 | PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); 32 | } 33 | 34 | CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } 35 | 36 | void CondVar::Wait() { 37 | PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); 38 | } 39 | 40 | void CondVar::Signal() { 41 | PthreadCall("signal", pthread_cond_signal(&cv_)); 42 | } 43 | 44 | void CondVar::SignalAll() { 45 | PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); 46 | } 47 | 48 | void InitOnce(OnceType* once, void (*initializer)()) { 49 | PthreadCall("once", pthread_once(once, initializer)); 50 | } 51 | 52 | } // namespace port 53 | } // namespace leveldb 54 | -------------------------------------------------------------------------------- /port/port_posix.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // See port_example.h for documentation for the following types/functions. 6 | 7 | #ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_ 8 | #define STORAGE_LEVELDB_PORT_PORT_POSIX_H_ 9 | 10 | #undef PLATFORM_IS_LITTLE_ENDIAN 11 | #if defined(OS_MACOSX) 12 | #include 13 | #if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) 14 | #define PLATFORM_IS_LITTLE_ENDIAN \ 15 | (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) 16 | #endif 17 | #elif defined(OS_SOLARIS) 18 | #include 19 | #ifdef _LITTLE_ENDIAN 20 | #define PLATFORM_IS_LITTLE_ENDIAN true 21 | #else 22 | #define PLATFORM_IS_LITTLE_ENDIAN false 23 | #endif 24 | #elif defined(OS_FREEBSD) || defined(OS_OPENBSD) ||\ 25 | defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD) 26 | #include 27 | #include 28 | #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) 29 | #elif defined(OS_HPUX) 30 | #define PLATFORM_IS_LITTLE_ENDIAN false 31 | #elif defined(OS_ANDROID) 32 | // Due to a bug in the NDK x86 definition, 33 | // _BYTE_ORDER must be used instead of __BYTE_ORDER on Android. 34 | // See http://code.google.com/p/android/issues/detail?id=39824 35 | #include 36 | #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) 37 | #else 38 | #include 39 | #endif 40 | 41 | #include 42 | #ifdef SNAPPY 43 | #include 44 | #endif 45 | #include 46 | #include 47 | #include "port/atomic_pointer.h" 48 | 49 | #ifndef PLATFORM_IS_LITTLE_ENDIAN 50 | #define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) 51 | #endif 52 | 53 | #if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ 54 | defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ 55 | defined(OS_ANDROID) || defined(OS_HPUX) || defined(CYGWIN) 56 | // Use fread/fwrite/fflush on platforms without _unlocked variants 57 | #define fread_unlocked fread 58 | #define fwrite_unlocked fwrite 59 | #define fflush_unlocked fflush 60 | #endif 61 | 62 | #if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\ 63 | defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) 64 | // Use fsync() on platforms without fdatasync() 65 | #define fdatasync fsync 66 | #endif 67 | 68 | #if defined(OS_ANDROID) && __ANDROID_API__ < 9 69 | // fdatasync() was only introduced in API level 9 on Android. Use fsync() 70 | // when targetting older platforms. 71 | #define fdatasync fsync 72 | #endif 73 | 74 | namespace leveldb { 75 | namespace port { 76 | 77 | static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN; 78 | #undef PLATFORM_IS_LITTLE_ENDIAN 79 | 80 | class CondVar; 81 | 82 | class Mutex { 83 | public: 84 | Mutex(); 85 | ~Mutex(); 86 | 87 | void Lock(); 88 | void Unlock(); 89 | void AssertHeld() { } 90 | 91 | private: 92 | friend class CondVar; 93 | pthread_mutex_t mu_; 94 | 95 | // No copying 96 | Mutex(const Mutex&); 97 | void operator=(const Mutex&); 98 | }; 99 | 100 | class CondVar { 101 | public: 102 | explicit CondVar(Mutex* mu); 103 | ~CondVar(); 104 | void Wait(); 105 | void Signal(); 106 | void SignalAll(); 107 | private: 108 | pthread_cond_t cv_; 109 | Mutex* mu_; 110 | }; 111 | 112 | typedef pthread_once_t OnceType; 113 | #define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT 114 | extern void InitOnce(OnceType* once, void (*initializer)()); 115 | 116 | inline bool Snappy_Compress(const char* input, size_t length, 117 | ::std::string* output) { 118 | #ifdef SNAPPY 119 | output->resize(snappy::MaxCompressedLength(length)); 120 | size_t outlen; 121 | snappy::RawCompress(input, length, &(*output)[0], &outlen); 122 | output->resize(outlen); 123 | return true; 124 | #endif 125 | 126 | return false; 127 | } 128 | 129 | inline bool Snappy_GetUncompressedLength(const char* input, size_t length, 130 | size_t* result) { 131 | #ifdef SNAPPY 132 | return snappy::GetUncompressedLength(input, length, result); 133 | #else 134 | return false; 135 | #endif 136 | } 137 | 138 | inline bool Snappy_Uncompress(const char* input, size_t length, 139 | char* output) { 140 | #ifdef SNAPPY 141 | return snappy::RawUncompress(input, length, output); 142 | #else 143 | return false; 144 | #endif 145 | } 146 | 147 | inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { 148 | return false; 149 | } 150 | 151 | } // namespace port 152 | } // namespace leveldb 153 | 154 | #endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_ 155 | -------------------------------------------------------------------------------- /port/thread_annotations.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ 6 | #define STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ 7 | 8 | // Some environments provide custom macros to aid in static thread-safety 9 | // analysis. Provide empty definitions of such macros unless they are already 10 | // defined. 11 | 12 | #ifndef EXCLUSIVE_LOCKS_REQUIRED 13 | #define EXCLUSIVE_LOCKS_REQUIRED(...) 14 | #endif 15 | 16 | #ifndef SHARED_LOCKS_REQUIRED 17 | #define SHARED_LOCKS_REQUIRED(...) 18 | #endif 19 | 20 | #ifndef LOCKS_EXCLUDED 21 | #define LOCKS_EXCLUDED(...) 22 | #endif 23 | 24 | #ifndef LOCK_RETURNED 25 | #define LOCK_RETURNED(x) 26 | #endif 27 | 28 | #ifndef LOCKABLE 29 | #define LOCKABLE 30 | #endif 31 | 32 | #ifndef SCOPED_LOCKABLE 33 | #define SCOPED_LOCKABLE 34 | #endif 35 | 36 | #ifndef EXCLUSIVE_LOCK_FUNCTION 37 | #define EXCLUSIVE_LOCK_FUNCTION(...) 38 | #endif 39 | 40 | #ifndef SHARED_LOCK_FUNCTION 41 | #define SHARED_LOCK_FUNCTION(...) 42 | #endif 43 | 44 | #ifndef EXCLUSIVE_TRYLOCK_FUNCTION 45 | #define EXCLUSIVE_TRYLOCK_FUNCTION(...) 46 | #endif 47 | 48 | #ifndef SHARED_TRYLOCK_FUNCTION 49 | #define SHARED_TRYLOCK_FUNCTION(...) 50 | #endif 51 | 52 | #ifndef UNLOCK_FUNCTION 53 | #define UNLOCK_FUNCTION(...) 54 | #endif 55 | 56 | #ifndef NO_THREAD_SAFETY_ANALYSIS 57 | #define NO_THREAD_SAFETY_ANALYSIS 58 | #endif 59 | 60 | #endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ 61 | -------------------------------------------------------------------------------- /port/win/stdint.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | // MSVC didn't ship with this file until the 2010 version. 6 | 7 | #ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 8 | #define STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 9 | 10 | #if !defined(_MSC_VER) 11 | #error This file should only be included when compiling with MSVC. 12 | #endif 13 | 14 | // Define C99 equivalent types. 15 | typedef signed char int8_t; 16 | typedef signed short int16_t; 17 | typedef signed int int32_t; 18 | typedef signed long long int64_t; 19 | typedef unsigned char uint8_t; 20 | typedef unsigned short uint16_t; 21 | typedef unsigned int uint32_t; 22 | typedef unsigned long long uint64_t; 23 | 24 | #endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_ 25 | -------------------------------------------------------------------------------- /table/block.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_ 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_H_ 7 | 8 | #include 9 | #include 10 | #include "leveldb/iterator.h" 11 | 12 | namespace leveldb { 13 | 14 | struct BlockContents; 15 | class Comparator; 16 | 17 | // block的布局如下: 18 | // 19 | // ------------------------------low address ---------------->data_ 20 | // | record | | 21 | // ------------------- | 22 | // | record | | 23 | // ------------------- | 24 | // | ...... | | 25 | // ------------------- | 26 | // | record | | 27 | // --------------------------> data_ + restart_offset_ | 28 | // | restart[0] | | size_ 29 | // ------------------- | | 30 | // | restart[1] | | | 31 | // ------------------- n * 32 bit | 32 | // | ...... | | | 33 | // ------------------- | | 34 | // | restart[n-1] | | | 35 | // --------------------------- | 36 | // | num_restarts(n) | 32 bit | 37 | // ------------------------------high address------------------ 38 | // 39 | // restart_offset_ = size_ - (n + 1) * sizeof(uint32_t) 40 | // 41 | // record布局如下: 42 | // --------------------------------------------------------------------------------- 43 | // | VarInt | VarInt | VarInt | unshared_bytes | value_bytes | 44 | // -------------------------------------------------------------------------------- 45 | // | shared_bytes | unshared_bytes | value_bytes | unshared_key_data | value_data | 46 | // --------------------------------------------------------------------------------- 47 | // 48 | // 所谓shared_key_data,是因为sstable里的k/v是按照key有序排放的,所以采用了前缀压缩 49 | // 根据前一个record的key结合shared_bytes就可以构造出当前record的key了 50 | // 前缀压缩并不是连续的,可能是每几个一组可以符合压缩策略,所以需要restart记录的前缀压缩的重启点 51 | 52 | 53 | class Block { 54 | public: 55 | // Initialize the block with the specified contents. 56 | explicit Block(const BlockContents& contents); 57 | 58 | ~Block(); 59 | 60 | size_t size() const { return size_; } 61 | Iterator* NewIterator(const Comparator* comparator); 62 | 63 | private: 64 | uint32_t NumRestarts() const; 65 | 66 | const char* data_; 67 | size_t size_; 68 | uint32_t restart_offset_; // Offset in data_ of restart array 69 | bool owned_; // Block owns data_[] 70 | 71 | // No copying allowed 72 | Block(const Block&); 73 | void operator=(const Block&); 74 | 75 | class Iter; 76 | }; 77 | 78 | } // namespace leveldb 79 | 80 | #endif // STORAGE_LEVELDB_TABLE_BLOCK_H_ 81 | -------------------------------------------------------------------------------- /table/block_builder.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // BlockBuilder generates blocks where keys are prefix-compressed: 6 | // 7 | // When we store a key, we drop the prefix shared with the previous 8 | // string. This helps reduce the space requirement significantly. 9 | // Furthermore, once every K keys, we do not apply the prefix 10 | // compression and store the entire key. We call this a "restart 11 | // point". The tail end of the block stores the offsets of all of the 12 | // restart points, and can be used to do a binary search when looking 13 | // for a particular key. Values are stored as-is (without compression) 14 | // immediately following the corresponding key. 15 | // 16 | // An entry for a particular key-value pair has the form: 17 | // shared_bytes: varint32 18 | // unshared_bytes: varint32 19 | // value_length: varint32 20 | // key_delta: char[unshared_bytes] 21 | // value: char[value_length] 22 | // shared_bytes == 0 for restart points. 23 | // 24 | // The trailer of the block has the form: 25 | // restarts: uint32[num_restarts] 26 | // num_restarts: uint32 27 | // restarts[i] contains the offset within the block of the ith restart point. 28 | 29 | #include "table/block_builder.h" 30 | 31 | #include 32 | #include 33 | #include "leveldb/comparator.h" 34 | #include "leveldb/table_builder.h" 35 | #include "util/coding.h" 36 | 37 | namespace leveldb { 38 | 39 | BlockBuilder::BlockBuilder(const Options* options) 40 | : options_(options), 41 | restarts_(), 42 | counter_(0), 43 | finished_(false) { 44 | assert(options->block_restart_interval >= 1); 45 | restarts_.push_back(0); // First restart point is at offset 0 46 | } 47 | 48 | void BlockBuilder::Reset() { 49 | buffer_.clear(); 50 | restarts_.clear(); 51 | restarts_.push_back(0); // First restart point is at offset 0 52 | counter_ = 0; 53 | finished_ = false; 54 | last_key_.clear(); 55 | } 56 | 57 | size_t BlockBuilder::CurrentSizeEstimate() const { 58 | return (buffer_.size() + // Raw data buffer 59 | restarts_.size() * sizeof(uint32_t) + // Restart array 60 | sizeof(uint32_t)); // Restart array length 61 | } 62 | 63 | Slice BlockBuilder::Finish() { 64 | // Append restart array 65 | for (size_t i = 0; i < restarts_.size(); i++) { 66 | PutFixed32(&buffer_, restarts_[i]); 67 | } 68 | PutFixed32(&buffer_, restarts_.size()); 69 | finished_ = true; 70 | return Slice(buffer_); 71 | } 72 | 73 | void BlockBuilder::Add(const Slice& key, const Slice& value) { 74 | Slice last_key_piece(last_key_); 75 | assert(!finished_); 76 | assert(counter_ <= options_->block_restart_interval); 77 | // 如果buffer_不为空了,那么当前的key就需要被之前的key大,这一点需要caller来保证 78 | // 即,从小到大的往Block里写 79 | assert(buffer_.empty() // No values yet? 80 | || options_->comparator->Compare(key, last_key_piece) > 0); 81 | size_t shared = 0; 82 | if (counter_ < options_->block_restart_interval) { 83 | // See how much sharing to do with previous string 84 | const size_t min_length = std::min(last_key_piece.size(), key.size()); 85 | while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { 86 | shared++; 87 | } 88 | } else { 89 | // Restart compression 90 | restarts_.push_back(buffer_.size()); 91 | counter_ = 0; 92 | } 93 | const size_t non_shared = key.size() - shared; 94 | 95 | // Add "" to buffer_ 96 | PutVarint32(&buffer_, shared); 97 | PutVarint32(&buffer_, non_shared); 98 | PutVarint32(&buffer_, value.size()); 99 | 100 | // Add string delta to buffer_ followed by value 101 | buffer_.append(key.data() + shared, non_shared); 102 | buffer_.append(value.data(), value.size()); 103 | 104 | // Update state 105 | last_key_.resize(shared); 106 | last_key_.append(key.data() + shared, non_shared); 107 | assert(Slice(last_key_) == key); 108 | counter_++; 109 | } 110 | 111 | } // namespace leveldb 112 | -------------------------------------------------------------------------------- /table/block_builder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 6 | #define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 7 | 8 | #include 9 | 10 | #include 11 | #include "leveldb/slice.h" 12 | 13 | namespace leveldb { 14 | 15 | struct Options; 16 | 17 | class BlockBuilder { 18 | public: 19 | explicit BlockBuilder(const Options* options); 20 | 21 | // Reset the contents as if the BlockBuilder was just constructed. 22 | void Reset(); 23 | 24 | // REQUIRES: Finish() has not been called since the last call to Reset(). 25 | // REQUIRES: key is larger than any previously added key 26 | void Add(const Slice& key, const Slice& value); 27 | 28 | // Finish building the block and return a slice that refers to the 29 | // block contents. The returned slice will remain valid for the 30 | // lifetime of this builder or until Reset() is called. 31 | Slice Finish(); 32 | 33 | // Returns an estimate of the current (uncompressed) size of the block 34 | // we are building. 35 | size_t CurrentSizeEstimate() const; 36 | 37 | // Return true iff no entries have been added since the last Reset() 38 | bool empty() const { 39 | return buffer_.empty(); 40 | } 41 | 42 | private: 43 | const Options* options_; 44 | std::string buffer_; // Destination buffer 45 | std::vector restarts_; // Restart points 46 | int counter_; // Number of entries emitted since restart 47 | bool finished_; // Has Finish() been called? 48 | std::string last_key_; 49 | 50 | // No copying allowed 51 | BlockBuilder(const BlockBuilder&); 52 | void operator=(const BlockBuilder&); 53 | }; 54 | 55 | } // namespace leveldb 56 | 57 | #endif // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ 58 | -------------------------------------------------------------------------------- /table/filter_block.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "table/filter_block.h" 6 | 7 | #include "leveldb/filter_policy.h" 8 | #include "util/coding.h" 9 | 10 | namespace leveldb { 11 | 12 | // See doc/table_format.txt for an explanation of the filter block format. 13 | 14 | // Generate new filter every 2KB of data 15 | static const size_t kFilterBaseLg = 11; 16 | static const size_t kFilterBase = 1 << kFilterBaseLg; 17 | 18 | FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) 19 | : policy_(policy) { 20 | } 21 | void FilterBlockBuilder::StartBlock(uint64_t block_offset) { 22 | // 一个block 非特殊情况下大于等于4kb 23 | // 不超过6kb时会成对生成数据,如果超过6kb不超过8kb,则会生成3个offset数据,依此类推 24 | // 例如第一个block就超过6kb时,会写入offset1 offset2 offset2 25 | // ------------------------------------------------------------------------ 26 | // | 0 | offset 1 | offset 1 | offset 2 | offset 2 | offset 2 | offset 3 | .... 27 | // ------------------------------------------------------------------------ 28 | // 其中offset2 - offset1 即第一个block的filter数据 29 | // 特殊情况下,例如sstable里的最后一个data block 大小不足4kb 30 | // 参见Finish(),通过GenerateFilter()写了start,然后end置为array_offset 31 | // 32 | // 以此保证的索引规则如下: 33 | // filter_offsets[block_offset/KFilterBase] 就是filter的start 34 | // filter_offsets[block_offset/KFilterBase + 1] 就是filter的end 35 | uint64_t filter_index = (block_offset / kFilterBase); 36 | assert(filter_index >= filter_offsets_.size()); 37 | while (filter_index > filter_offsets_.size()) { 38 | GenerateFilter(); 39 | } 40 | } 41 | 42 | void FilterBlockBuilder::AddKey(const Slice& key) { 43 | Slice k = key; 44 | start_.push_back(keys_.size()); 45 | keys_.append(k.data(), k.size()); 46 | } 47 | 48 | Slice FilterBlockBuilder::Finish() { 49 | if (!start_.empty()) { 50 | GenerateFilter(); 51 | } 52 | 53 | // Append array of per-filter offsets 54 | const uint32_t array_offset = result_.size(); 55 | for (size_t i = 0; i < filter_offsets_.size(); i++) { 56 | PutFixed32(&result_, filter_offsets_[i]); 57 | } 58 | 59 | PutFixed32(&result_, array_offset); 60 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result 61 | return Slice(result_); 62 | } 63 | 64 | void FilterBlockBuilder::GenerateFilter() { 65 | const size_t num_keys = start_.size(); 66 | if (num_keys == 0) { 67 | // Fast path if there are no keys for this filter 68 | filter_offsets_.push_back(result_.size()); 69 | return; 70 | } 71 | 72 | // Make list of keys from flattened key structure 73 | start_.push_back(keys_.size()); // Simplify length computation 74 | tmp_keys_.resize(num_keys); 75 | for (size_t i = 0; i < num_keys; i++) { 76 | const char* base = keys_.data() + start_[i]; 77 | size_t length = start_[i+1] - start_[i]; 78 | tmp_keys_[i] = Slice(base, length); 79 | } 80 | 81 | // Generate filter for current set of keys and append to result_. 82 | filter_offsets_.push_back(result_.size()); 83 | policy_->CreateFilter(&tmp_keys_[0], static_cast(num_keys), &result_); 84 | 85 | tmp_keys_.clear(); 86 | keys_.clear(); 87 | start_.clear(); 88 | } 89 | 90 | FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, 91 | const Slice& contents) 92 | : policy_(policy), 93 | data_(NULL), 94 | offset_(NULL), 95 | num_(0), 96 | base_lg_(0) { 97 | size_t n = contents.size(); 98 | if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array 99 | base_lg_ = contents[n-1]; 100 | uint32_t last_word = DecodeFixed32(contents.data() + n - 5); 101 | if (last_word > n - 5) return; 102 | data_ = contents.data(); 103 | offset_ = data_ + last_word; 104 | num_ = (n - 5 - last_word) / 4; 105 | } 106 | 107 | bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { 108 | // 这里是用右移动11位,上面StartBlock是除以2^11 109 | // 其实效果是一样的 110 | uint64_t index = block_offset >> base_lg_; 111 | if (index < num_) { 112 | uint32_t start = DecodeFixed32(offset_ + index*4); 113 | uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); 114 | if (start <= limit && limit <= static_cast(offset_ - data_)) { 115 | Slice filter = Slice(data_ + start, limit - start); 116 | return policy_->KeyMayMatch(key, filter); 117 | } else if (start == limit) { 118 | // Empty filters do not match any keys 119 | return false; 120 | } 121 | } 122 | return true; // Errors are treated as potential matches 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /table/filter_block.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // A filter block is stored near the end of a Table file. It contains 6 | // filters (e.g., bloom filters) for all data blocks in the table combined 7 | // into a single filter block. 8 | 9 | #ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 10 | #define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "leveldb/slice.h" 17 | #include "util/hash.h" 18 | 19 | namespace leveldb { 20 | 21 | class FilterPolicy; 22 | 23 | // A FilterBlockBuilder is used to construct all of the filters for a 24 | // particular Table. It generates a single string which is stored as 25 | // a special block in the Table. 26 | // 27 | // The sequence of calls to FilterBlockBuilder must match the regexp: 28 | // (StartBlock AddKey*)* Finish 29 | // @1Feng: 注意这个特殊的block 30 | class FilterBlockBuilder { 31 | public: 32 | explicit FilterBlockBuilder(const FilterPolicy*); 33 | 34 | void StartBlock(uint64_t block_offset); 35 | void AddKey(const Slice& key); 36 | Slice Finish(); 37 | 38 | private: 39 | void GenerateFilter(); 40 | 41 | const FilterPolicy* policy_; 42 | std::string keys_; // Flattened key contents 43 | std::vector start_; // Starting index in keys_ of each key 44 | std::string result_; // Filter data computed so far 45 | std::vector tmp_keys_; // policy_->CreateFilter() argument 46 | std::vector filter_offsets_; 47 | 48 | // No copying allowed 49 | FilterBlockBuilder(const FilterBlockBuilder&); 50 | void operator=(const FilterBlockBuilder&); 51 | }; 52 | 53 | class FilterBlockReader { 54 | public: 55 | // REQUIRES: "contents" and *policy must stay live while *this is live. 56 | FilterBlockReader(const FilterPolicy* policy, const Slice& contents); 57 | bool KeyMayMatch(uint64_t block_offset, const Slice& key); 58 | 59 | private: 60 | const FilterPolicy* policy_; 61 | const char* data_; // Pointer to filter data (at block-start) 62 | const char* offset_; // Pointer to beginning of offset array (at block-end) 63 | size_t num_; // Number of entries in offset array 64 | size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) 65 | }; 66 | 67 | } 68 | 69 | #endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ 70 | -------------------------------------------------------------------------------- /table/filter_block_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "table/filter_block.h" 6 | 7 | #include "leveldb/filter_policy.h" 8 | #include "util/coding.h" 9 | #include "util/hash.h" 10 | #include "util/logging.h" 11 | #include "util/testharness.h" 12 | #include "util/testutil.h" 13 | 14 | namespace leveldb { 15 | 16 | // For testing: emit an array with one hash value per key 17 | class TestHashFilter : public FilterPolicy { 18 | public: 19 | virtual const char* Name() const { 20 | return "TestHashFilter"; 21 | } 22 | 23 | virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { 24 | for (int i = 0; i < n; i++) { 25 | uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); 26 | PutFixed32(dst, h); 27 | } 28 | } 29 | 30 | virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { 31 | uint32_t h = Hash(key.data(), key.size(), 1); 32 | for (size_t i = 0; i + 4 <= filter.size(); i += 4) { 33 | if (h == DecodeFixed32(filter.data() + i)) { 34 | return true; 35 | } 36 | } 37 | return false; 38 | } 39 | }; 40 | 41 | class FilterBlockTest { 42 | public: 43 | TestHashFilter policy_; 44 | }; 45 | 46 | TEST(FilterBlockTest, EmptyBuilder) { 47 | FilterBlockBuilder builder(&policy_); 48 | Slice block = builder.Finish(); 49 | ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); 50 | FilterBlockReader reader(&policy_, block); 51 | ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); 52 | ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); 53 | } 54 | 55 | TEST(FilterBlockTest, SingleChunk) { 56 | FilterBlockBuilder builder(&policy_); 57 | builder.StartBlock(100); 58 | builder.AddKey("foo"); 59 | builder.AddKey("bar"); 60 | builder.AddKey("box"); 61 | builder.StartBlock(200); 62 | builder.AddKey("box"); 63 | builder.StartBlock(300); 64 | builder.AddKey("hello"); 65 | Slice block = builder.Finish(); 66 | FilterBlockReader reader(&policy_, block); 67 | ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); 68 | ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); 69 | ASSERT_TRUE(reader.KeyMayMatch(100, "box")); 70 | ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); 71 | ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); 72 | ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); 73 | ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); 74 | } 75 | 76 | TEST(FilterBlockTest, MultiChunk) { 77 | FilterBlockBuilder builder(&policy_); 78 | 79 | // First filter 80 | builder.StartBlock(0); 81 | builder.AddKey("foo"); 82 | builder.StartBlock(2000); 83 | builder.AddKey("bar"); 84 | 85 | // Second filter 86 | builder.StartBlock(3100); 87 | builder.AddKey("box"); 88 | 89 | // Third filter is empty 90 | 91 | // Last filter 92 | builder.StartBlock(9000); 93 | builder.AddKey("box"); 94 | builder.AddKey("hello"); 95 | 96 | Slice block = builder.Finish(); 97 | FilterBlockReader reader(&policy_, block); 98 | 99 | // Check first filter 100 | ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); 101 | ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); 102 | ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); 103 | ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); 104 | 105 | // Check second filter 106 | ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); 107 | ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); 108 | ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); 109 | ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); 110 | 111 | // Check third filter (empty) 112 | ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); 113 | ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); 114 | ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); 115 | ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); 116 | 117 | // Check last filter 118 | ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); 119 | ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); 120 | ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); 121 | ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); 122 | } 123 | 124 | } // namespace leveldb 125 | 126 | int main(int argc, char** argv) { 127 | return leveldb::test::RunAllTests(); 128 | } 129 | -------------------------------------------------------------------------------- /table/iterator.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/iterator.h" 6 | 7 | namespace leveldb { 8 | 9 | Iterator::Iterator() { 10 | cleanup_.function = NULL; 11 | cleanup_.next = NULL; 12 | } 13 | 14 | Iterator::~Iterator() { 15 | if (cleanup_.function != NULL) { 16 | (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); 17 | for (Cleanup* c = cleanup_.next; c != NULL; ) { 18 | (*c->function)(c->arg1, c->arg2); 19 | Cleanup* next = c->next; 20 | delete c; 21 | c = next; 22 | } 23 | } 24 | } 25 | 26 | void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { 27 | assert(func != NULL); 28 | Cleanup* c; 29 | if (cleanup_.function == NULL) { 30 | c = &cleanup_; 31 | } else { 32 | c = new Cleanup; 33 | c->next = cleanup_.next; 34 | cleanup_.next = c; 35 | } 36 | c->function = func; 37 | c->arg1 = arg1; 38 | c->arg2 = arg2; 39 | } 40 | 41 | namespace { 42 | class EmptyIterator : public Iterator { 43 | public: 44 | EmptyIterator(const Status& s) : status_(s) { } 45 | virtual bool Valid() const { return false; } 46 | virtual void Seek(const Slice& target) { } 47 | virtual void SeekToFirst() { } 48 | virtual void SeekToLast() { } 49 | virtual void Next() { assert(false); } 50 | virtual void Prev() { assert(false); } 51 | Slice key() const { assert(false); return Slice(); } 52 | Slice value() const { assert(false); return Slice(); } 53 | virtual Status status() const { return status_; } 54 | private: 55 | Status status_; 56 | }; 57 | } // namespace 58 | 59 | Iterator* NewEmptyIterator() { 60 | return new EmptyIterator(Status::OK()); 61 | } 62 | 63 | Iterator* NewErrorIterator(const Status& status) { 64 | return new EmptyIterator(status); 65 | } 66 | 67 | } // namespace leveldb 68 | -------------------------------------------------------------------------------- /table/iterator_wrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 6 | #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 7 | 8 | #include "leveldb/iterator.h" 9 | #include "leveldb/slice.h" 10 | 11 | namespace leveldb { 12 | 13 | // A internal wrapper class with an interface similar to Iterator that 14 | // caches the valid() and key() results for an underlying iterator. 15 | // This can help avoid virtual function calls and also gives better 16 | // cache locality. 17 | class IteratorWrapper { 18 | public: 19 | IteratorWrapper(): iter_(NULL), valid_(false) { } 20 | explicit IteratorWrapper(Iterator* iter): iter_(NULL) { 21 | Set(iter); 22 | } 23 | ~IteratorWrapper() { delete iter_; } 24 | Iterator* iter() const { return iter_; } 25 | 26 | // Takes ownership of "iter" and will delete it when destroyed, or 27 | // when Set() is invoked again. 28 | void Set(Iterator* iter) { 29 | delete iter_; 30 | iter_ = iter; 31 | if (iter_ == NULL) { 32 | valid_ = false; 33 | } else { 34 | Update(); 35 | } 36 | } 37 | 38 | 39 | // Iterator interface methods 40 | bool Valid() const { return valid_; } 41 | Slice key() const { assert(Valid()); return key_; } 42 | Slice value() const { assert(Valid()); return iter_->value(); } 43 | // Methods below require iter() != NULL 44 | Status status() const { assert(iter_); return iter_->status(); } 45 | void Next() { assert(iter_); iter_->Next(); Update(); } 46 | void Prev() { assert(iter_); iter_->Prev(); Update(); } 47 | void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } 48 | void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } 49 | void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } 50 | 51 | private: 52 | void Update() { 53 | valid_ = iter_->Valid(); 54 | if (valid_) { 55 | key_ = iter_->key(); 56 | } 57 | } 58 | 59 | Iterator* iter_; 60 | bool valid_; 61 | Slice key_; 62 | }; 63 | 64 | } // namespace leveldb 65 | 66 | #endif // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ 67 | -------------------------------------------------------------------------------- /table/merger.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_MERGER_H_ 6 | #define STORAGE_LEVELDB_TABLE_MERGER_H_ 7 | 8 | namespace leveldb { 9 | 10 | class Comparator; 11 | class Iterator; 12 | 13 | // Return an iterator that provided the union of the data in 14 | // children[0,n-1]. Takes ownership of the child iterators and 15 | // will delete them when the result iterator is deleted. 16 | // 17 | // The result does no duplicate suppression. I.e., if a particular 18 | // key is present in K child iterators, it will be yielded K times. 19 | // 20 | // REQUIRES: n >= 0 21 | extern Iterator* NewMergingIterator( 22 | const Comparator* comparator, Iterator** children, int n); 23 | 24 | } // namespace leveldb 25 | 26 | #endif // STORAGE_LEVELDB_TABLE_MERGER_H_ 27 | -------------------------------------------------------------------------------- /table/two_level_iterator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 6 | #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 7 | 8 | #include "leveldb/iterator.h" 9 | 10 | namespace leveldb { 11 | 12 | struct ReadOptions; 13 | 14 | // Return a new two level iterator. A two-level iterator contains an 15 | // index iterator whose values point to a sequence of blocks where 16 | // each block is itself a sequence of key,value pairs. The returned 17 | // two-level iterator yields the concatenation of all key/value pairs 18 | // in the sequence of blocks. Takes ownership of "index_iter" and 19 | // will delete it when no longer needed. 20 | // 21 | // Uses a supplied function to convert an index_iter value into 22 | // an iterator over the contents of the corresponding block. 23 | extern Iterator* NewTwoLevelIterator( 24 | Iterator* index_iter, 25 | Iterator* (*block_function)( 26 | void* arg, 27 | const ReadOptions& options, 28 | const Slice& index_value), 29 | void* arg, 30 | const ReadOptions& options); 31 | 32 | } // namespace leveldb 33 | 34 | #endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ 35 | -------------------------------------------------------------------------------- /util/arena.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/arena.h" 6 | #include 7 | 8 | namespace leveldb { 9 | 10 | static const int kBlockSize = 4096; 11 | 12 | Arena::Arena() : memory_usage_(0) { 13 | alloc_ptr_ = NULL; // First allocation will allocate a block 14 | alloc_bytes_remaining_ = 0; 15 | } 16 | 17 | Arena::~Arena() { 18 | for (size_t i = 0; i < blocks_.size(); i++) { 19 | delete[] blocks_[i]; 20 | } 21 | } 22 | 23 | char* Arena::AllocateFallback(size_t bytes) { 24 | if (bytes > kBlockSize / 4) { 25 | // Object is more than a quarter of our block size. Allocate it separately 26 | // to avoid wasting too much space in leftover bytes. 27 | char* result = AllocateNewBlock(bytes); 28 | return result; 29 | } 30 | 31 | // We waste the remaining space in the current block. 32 | alloc_ptr_ = AllocateNewBlock(kBlockSize); 33 | alloc_bytes_remaining_ = kBlockSize; 34 | 35 | char* result = alloc_ptr_; 36 | alloc_ptr_ += bytes; 37 | alloc_bytes_remaining_ -= bytes; 38 | return result; 39 | } 40 | 41 | // @1Feng 42 | // 没看懂这个对齐规则是什么? 43 | char* Arena::AllocateAligned(size_t bytes) { 44 | const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8; 45 | assert((align & (align-1)) == 0); // Pointer size should be a power of 2 46 | size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); 47 | size_t slop = (current_mod == 0 ? 0 : align - current_mod); 48 | size_t needed = bytes + slop; 49 | char* result; 50 | if (needed <= alloc_bytes_remaining_) { 51 | result = alloc_ptr_ + slop; 52 | alloc_ptr_ += needed; 53 | alloc_bytes_remaining_ -= needed; 54 | } else { 55 | // AllocateFallback always returned aligned memory 56 | result = AllocateFallback(bytes); 57 | } 58 | assert((reinterpret_cast(result) & (align-1)) == 0); 59 | return result; 60 | } 61 | 62 | char* Arena::AllocateNewBlock(size_t block_bytes) { 63 | char* result = new char[block_bytes]; 64 | blocks_.push_back(result); 65 | memory_usage_.NoBarrier_Store( 66 | reinterpret_cast(MemoryUsage() + block_bytes + sizeof(char*))); 67 | return result; 68 | } 69 | 70 | } // namespace leveldb 71 | -------------------------------------------------------------------------------- /util/arena.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ 6 | #define STORAGE_LEVELDB_UTIL_ARENA_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "port/port.h" 13 | 14 | namespace leveldb { 15 | 16 | class Arena { 17 | public: 18 | Arena(); 19 | ~Arena(); 20 | 21 | // Return a pointer to a newly allocated memory block of "bytes" bytes. 22 | char* Allocate(size_t bytes); 23 | 24 | // Allocate memory with the normal alignment guarantees provided by malloc 25 | char* AllocateAligned(size_t bytes); 26 | 27 | // Returns an estimate of the total memory usage of data allocated 28 | // by the arena. 29 | size_t MemoryUsage() const { 30 | return reinterpret_cast(memory_usage_.NoBarrier_Load()); 31 | } 32 | 33 | private: 34 | char* AllocateFallback(size_t bytes); 35 | char* AllocateNewBlock(size_t block_bytes); 36 | 37 | // Allocation state 38 | char* alloc_ptr_; 39 | size_t alloc_bytes_remaining_; 40 | 41 | // Array of new[] allocated memory blocks 42 | std::vector blocks_; 43 | 44 | // Total memory usage of the arena. 45 | port::AtomicPointer memory_usage_; 46 | 47 | // No copying allowed 48 | Arena(const Arena&); 49 | void operator=(const Arena&); 50 | }; 51 | 52 | inline char* Arena::Allocate(size_t bytes) { 53 | // The semantics of what to return are a bit messy if we allow 54 | // 0-byte allocations, so we disallow them here (we don't need 55 | // them for our internal use). 56 | assert(bytes > 0); 57 | if (bytes <= alloc_bytes_remaining_) { 58 | char* result = alloc_ptr_; 59 | alloc_ptr_ += bytes; 60 | alloc_bytes_remaining_ -= bytes; 61 | return result; 62 | } 63 | return AllocateFallback(bytes); 64 | } 65 | 66 | } // namespace leveldb 67 | 68 | #endif // STORAGE_LEVELDB_UTIL_ARENA_H_ 69 | -------------------------------------------------------------------------------- /util/arena_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/arena.h" 6 | 7 | #include "util/random.h" 8 | #include "util/testharness.h" 9 | 10 | namespace leveldb { 11 | 12 | class ArenaTest { }; 13 | 14 | TEST(ArenaTest, Empty) { 15 | Arena arena; 16 | } 17 | 18 | TEST(ArenaTest, Simple) { 19 | std::vector > allocated; 20 | Arena arena; 21 | const int N = 100000; 22 | size_t bytes = 0; 23 | Random rnd(301); 24 | for (int i = 0; i < N; i++) { 25 | size_t s; 26 | if (i % (N / 10) == 0) { 27 | s = i; 28 | } else { 29 | s = rnd.OneIn(4000) ? rnd.Uniform(6000) : 30 | (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); 31 | } 32 | if (s == 0) { 33 | // Our arena disallows size 0 allocations. 34 | s = 1; 35 | } 36 | char* r; 37 | if (rnd.OneIn(10)) { 38 | r = arena.AllocateAligned(s); 39 | } else { 40 | r = arena.Allocate(s); 41 | } 42 | 43 | for (size_t b = 0; b < s; b++) { 44 | // Fill the "i"th allocation with a known bit pattern 45 | r[b] = i % 256; 46 | } 47 | bytes += s; 48 | allocated.push_back(std::make_pair(s, r)); 49 | ASSERT_GE(arena.MemoryUsage(), bytes); 50 | if (i > N/10) { 51 | ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); 52 | } 53 | } 54 | for (size_t i = 0; i < allocated.size(); i++) { 55 | size_t num_bytes = allocated[i].first; 56 | const char* p = allocated[i].second; 57 | for (size_t b = 0; b < num_bytes; b++) { 58 | // Check the "i"th allocation for the known bit pattern 59 | ASSERT_EQ(int(p[b]) & 0xff, i % 256); 60 | } 61 | } 62 | } 63 | 64 | } // namespace leveldb 65 | 66 | int main(int argc, char** argv) { 67 | return leveldb::test::RunAllTests(); 68 | } 69 | -------------------------------------------------------------------------------- /util/bloom.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/filter_policy.h" 6 | 7 | #include "leveldb/slice.h" 8 | #include "util/hash.h" 9 | 10 | namespace leveldb { 11 | 12 | namespace { 13 | static uint32_t BloomHash(const Slice& key) { 14 | return Hash(key.data(), key.size(), 0xbc9f1d34); 15 | } 16 | 17 | class BloomFilterPolicy : public FilterPolicy { 18 | private: 19 | size_t bits_per_key_; 20 | size_t k_; 21 | 22 | public: 23 | explicit BloomFilterPolicy(int bits_per_key) 24 | : bits_per_key_(bits_per_key) { 25 | // We intentionally round down to reduce probing cost a little bit 26 | k_ = static_cast(bits_per_key * 0.69); // 0.69 =~ ln(2) 27 | if (k_ < 1) k_ = 1; 28 | if (k_ > 30) k_ = 30; 29 | } 30 | 31 | virtual const char* Name() const { 32 | return "leveldb.BuiltinBloomFilter2"; 33 | } 34 | 35 | virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { 36 | // Compute bloom filter size (in both bits and bytes) 37 | size_t bits = n * bits_per_key_; 38 | 39 | // For small n, we can see a very high false positive rate. Fix it 40 | // by enforcing a minimum bloom filter length. 41 | if (bits < 64) bits = 64; 42 | 43 | size_t bytes = (bits + 7) / 8; 44 | bits = bytes * 8; 45 | 46 | const size_t init_size = dst->size(); 47 | dst->resize(init_size + bytes, 0); 48 | dst->push_back(static_cast(k_)); // Remember # of probes in filter 49 | char* array = &(*dst)[init_size]; 50 | for (int i = 0; i < n; i++) { 51 | // Use double-hashing to generate a sequence of hash values. 52 | // See analysis in [Kirsch,Mitzenmacher 2006]. 53 | uint32_t h = BloomHash(keys[i]); 54 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits 55 | for (size_t j = 0; j < k_; j++) { 56 | const uint32_t bitpos = h % bits; 57 | array[bitpos/8] |= (1 << (bitpos % 8)); 58 | h += delta; 59 | } 60 | } 61 | } 62 | 63 | virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { 64 | const size_t len = bloom_filter.size(); 65 | if (len < 2) return false; 66 | 67 | const char* array = bloom_filter.data(); 68 | const size_t bits = (len - 1) * 8; 69 | 70 | // Use the encoded k so that we can read filters generated by 71 | // bloom filters created using different parameters. 72 | const size_t k = array[len-1]; 73 | if (k > 30) { 74 | // Reserved for potentially new encodings for short bloom filters. 75 | // Consider it a match. 76 | return true; 77 | } 78 | 79 | uint32_t h = BloomHash(key); 80 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits 81 | for (size_t j = 0; j < k; j++) { 82 | const uint32_t bitpos = h % bits; 83 | if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; 84 | h += delta; 85 | } 86 | return true; 87 | } 88 | }; 89 | } 90 | 91 | const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { 92 | return new BloomFilterPolicy(bits_per_key); 93 | } 94 | 95 | } // namespace leveldb 96 | -------------------------------------------------------------------------------- /util/bloom_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/filter_policy.h" 6 | 7 | #include "util/coding.h" 8 | #include "util/logging.h" 9 | #include "util/testharness.h" 10 | #include "util/testutil.h" 11 | 12 | namespace leveldb { 13 | 14 | static const int kVerbose = 1; 15 | 16 | static Slice Key(int i, char* buffer) { 17 | EncodeFixed32(buffer, i); 18 | return Slice(buffer, sizeof(uint32_t)); 19 | } 20 | 21 | class BloomTest { 22 | private: 23 | const FilterPolicy* policy_; 24 | std::string filter_; 25 | std::vector keys_; 26 | 27 | public: 28 | BloomTest() : policy_(NewBloomFilterPolicy(10)) { } 29 | 30 | ~BloomTest() { 31 | delete policy_; 32 | } 33 | 34 | void Reset() { 35 | keys_.clear(); 36 | filter_.clear(); 37 | } 38 | 39 | void Add(const Slice& s) { 40 | keys_.push_back(s.ToString()); 41 | } 42 | 43 | void Build() { 44 | std::vector key_slices; 45 | for (size_t i = 0; i < keys_.size(); i++) { 46 | key_slices.push_back(Slice(keys_[i])); 47 | } 48 | filter_.clear(); 49 | policy_->CreateFilter(&key_slices[0], static_cast(key_slices.size()), 50 | &filter_); 51 | keys_.clear(); 52 | if (kVerbose >= 2) DumpFilter(); 53 | } 54 | 55 | size_t FilterSize() const { 56 | return filter_.size(); 57 | } 58 | 59 | void DumpFilter() { 60 | fprintf(stderr, "F("); 61 | for (size_t i = 0; i+1 < filter_.size(); i++) { 62 | const unsigned int c = static_cast(filter_[i]); 63 | for (int j = 0; j < 8; j++) { 64 | fprintf(stderr, "%c", (c & (1 <KeyMayMatch(s, filter_); 75 | } 76 | 77 | double FalsePositiveRate() { 78 | char buffer[sizeof(int)]; 79 | int result = 0; 80 | for (int i = 0; i < 10000; i++) { 81 | if (Matches(Key(i + 1000000000, buffer))) { 82 | result++; 83 | } 84 | } 85 | return result / 10000.0; 86 | } 87 | }; 88 | 89 | TEST(BloomTest, EmptyFilter) { 90 | ASSERT_TRUE(! Matches("hello")); 91 | ASSERT_TRUE(! Matches("world")); 92 | } 93 | 94 | TEST(BloomTest, Small) { 95 | Add("hello"); 96 | Add("world"); 97 | ASSERT_TRUE(Matches("hello")); 98 | ASSERT_TRUE(Matches("world")); 99 | ASSERT_TRUE(! Matches("x")); 100 | ASSERT_TRUE(! Matches("foo")); 101 | } 102 | 103 | static int NextLength(int length) { 104 | if (length < 10) { 105 | length += 1; 106 | } else if (length < 100) { 107 | length += 10; 108 | } else if (length < 1000) { 109 | length += 100; 110 | } else { 111 | length += 1000; 112 | } 113 | return length; 114 | } 115 | 116 | TEST(BloomTest, VaryingLengths) { 117 | char buffer[sizeof(int)]; 118 | 119 | // Count number of filters that significantly exceed the false positive rate 120 | int mediocre_filters = 0; 121 | int good_filters = 0; 122 | 123 | for (int length = 1; length <= 10000; length = NextLength(length)) { 124 | Reset(); 125 | for (int i = 0; i < length; i++) { 126 | Add(Key(i, buffer)); 127 | } 128 | Build(); 129 | 130 | ASSERT_LE(FilterSize(), static_cast((length * 10 / 8) + 40)) 131 | << length; 132 | 133 | // All added keys must match 134 | for (int i = 0; i < length; i++) { 135 | ASSERT_TRUE(Matches(Key(i, buffer))) 136 | << "Length " << length << "; key " << i; 137 | } 138 | 139 | // Check false positive rate 140 | double rate = FalsePositiveRate(); 141 | if (kVerbose >= 1) { 142 | fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", 143 | rate*100.0, length, static_cast(FilterSize())); 144 | } 145 | ASSERT_LE(rate, 0.02); // Must not be over 2% 146 | if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often 147 | else good_filters++; 148 | } 149 | if (kVerbose >= 1) { 150 | fprintf(stderr, "Filters: %d good, %d mediocre\n", 151 | good_filters, mediocre_filters); 152 | } 153 | ASSERT_LE(mediocre_filters, good_filters/5); 154 | } 155 | 156 | // Different bits-per-byte 157 | 158 | } // namespace leveldb 159 | 160 | int main(int argc, char** argv) { 161 | return leveldb::test::RunAllTests(); 162 | } 163 | -------------------------------------------------------------------------------- /util/coding.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Endian-neutral encoding: 6 | // * Fixed-length numbers are encoded with least-significant byte first 7 | // * In addition we support variable length "varint" encoding 8 | // * Strings are encoded prefixed by their length in varint format 9 | 10 | #ifndef STORAGE_LEVELDB_UTIL_CODING_H_ 11 | #define STORAGE_LEVELDB_UTIL_CODING_H_ 12 | 13 | #include 14 | #include 15 | #include 16 | #include "leveldb/slice.h" 17 | #include "port/port.h" 18 | 19 | namespace leveldb { 20 | 21 | // Standard Put... routines append to a string 22 | extern void PutFixed32(std::string* dst, uint32_t value); 23 | extern void PutFixed64(std::string* dst, uint64_t value); 24 | extern void PutVarint32(std::string* dst, uint32_t value); 25 | extern void PutVarint64(std::string* dst, uint64_t value); 26 | extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value); 27 | 28 | // Standard Get... routines parse a value from the beginning of a Slice 29 | // and advance the slice past the parsed value. 30 | extern bool GetVarint32(Slice* input, uint32_t* value); 31 | extern bool GetVarint64(Slice* input, uint64_t* value); 32 | extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); 33 | 34 | // Pointer-based variants of GetVarint... These either store a value 35 | // in *v and return a pointer just past the parsed value, or return 36 | // NULL on error. These routines only look at bytes in the range 37 | // [p..limit-1] 38 | extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v); 39 | extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v); 40 | 41 | // Returns the length of the varint32 or varint64 encoding of "v" 42 | extern int VarintLength(uint64_t v); 43 | 44 | // Lower-level versions of Put... that write directly into a character buffer 45 | // REQUIRES: dst has enough space for the value being written 46 | extern void EncodeFixed32(char* dst, uint32_t value); 47 | extern void EncodeFixed64(char* dst, uint64_t value); 48 | 49 | // Lower-level versions of Put... that write directly into a character buffer 50 | // and return a pointer just past the last byte written. 51 | // REQUIRES: dst has enough space for the value being written 52 | extern char* EncodeVarint32(char* dst, uint32_t value); 53 | extern char* EncodeVarint64(char* dst, uint64_t value); 54 | 55 | // Lower-level versions of Get... that read directly from a character buffer 56 | // without any bounds checking. 57 | 58 | inline uint32_t DecodeFixed32(const char* ptr) { 59 | if (port::kLittleEndian) { 60 | // Load the raw bytes 61 | uint32_t result; 62 | memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load 63 | return result; 64 | } else { 65 | return ((static_cast(static_cast(ptr[0]))) 66 | | (static_cast(static_cast(ptr[1])) << 8) 67 | | (static_cast(static_cast(ptr[2])) << 16) 68 | | (static_cast(static_cast(ptr[3])) << 24)); 69 | } 70 | } 71 | 72 | inline uint64_t DecodeFixed64(const char* ptr) { 73 | if (port::kLittleEndian) { 74 | // Load the raw bytes 75 | uint64_t result; 76 | memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load 77 | return result; 78 | } else { 79 | uint64_t lo = DecodeFixed32(ptr); 80 | uint64_t hi = DecodeFixed32(ptr + 4); 81 | return (hi << 32) | lo; 82 | } 83 | } 84 | 85 | // Internal routine for use by fallback path of GetVarint32Ptr 86 | extern const char* GetVarint32PtrFallback(const char* p, 87 | const char* limit, 88 | uint32_t* value); 89 | inline const char* GetVarint32Ptr(const char* p, 90 | const char* limit, 91 | uint32_t* value) { 92 | if (p < limit) { 93 | uint32_t result = *(reinterpret_cast(p)); 94 | if ((result & 128) == 0) { 95 | *value = result; 96 | return p + 1; 97 | } 98 | } 99 | return GetVarint32PtrFallback(p, limit, value); 100 | } 101 | 102 | } // namespace leveldb 103 | 104 | #endif // STORAGE_LEVELDB_UTIL_CODING_H_ 105 | -------------------------------------------------------------------------------- /util/comparator.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include 7 | #include "leveldb/comparator.h" 8 | #include "leveldb/slice.h" 9 | #include "port/port.h" 10 | #include "util/logging.h" 11 | 12 | namespace leveldb { 13 | 14 | Comparator::~Comparator() { } 15 | 16 | namespace { 17 | class BytewiseComparatorImpl : public Comparator { 18 | public: 19 | BytewiseComparatorImpl() { } 20 | 21 | virtual const char* Name() const { 22 | return "leveldb.BytewiseComparator"; 23 | } 24 | 25 | virtual int Compare(const Slice& a, const Slice& b) const { 26 | return a.compare(b); 27 | } 28 | 29 | virtual void FindShortestSeparator( 30 | std::string* start, 31 | const Slice& limit) const { 32 | // Find length of common prefix 33 | size_t min_length = std::min(start->size(), limit.size()); 34 | size_t diff_index = 0; 35 | while ((diff_index < min_length) && 36 | ((*start)[diff_index] == limit[diff_index])) { 37 | diff_index++; 38 | } 39 | 40 | if (diff_index >= min_length) { 41 | // Do not shorten if one string is a prefix of the other 42 | } else { 43 | uint8_t diff_byte = static_cast((*start)[diff_index]); 44 | if (diff_byte < static_cast(0xff) && 45 | diff_byte + 1 < static_cast(limit[diff_index])) { 46 | (*start)[diff_index]++; 47 | start->resize(diff_index + 1); 48 | assert(Compare(*start, limit) < 0); 49 | } 50 | } 51 | } 52 | 53 | virtual void FindShortSuccessor(std::string* key) const { 54 | // Find first character that can be incremented 55 | size_t n = key->size(); 56 | for (size_t i = 0; i < n; i++) { 57 | const uint8_t byte = (*key)[i]; 58 | if (byte != static_cast(0xff)) { 59 | (*key)[i] = byte + 1; 60 | key->resize(i+1); 61 | return; 62 | } 63 | } 64 | // *key is a run of 0xffs. Leave it alone. 65 | } 66 | }; 67 | } // namespace 68 | 69 | static port::OnceType once = LEVELDB_ONCE_INIT; 70 | static const Comparator* bytewise; 71 | 72 | static void InitModule() { 73 | bytewise = new BytewiseComparatorImpl; 74 | } 75 | 76 | const Comparator* BytewiseComparator() { 77 | port::InitOnce(&once, InitModule); 78 | return bytewise; 79 | } 80 | 81 | } // namespace leveldb 82 | -------------------------------------------------------------------------------- /util/crc32c.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_ 6 | #define STORAGE_LEVELDB_UTIL_CRC32C_H_ 7 | 8 | #include 9 | #include 10 | 11 | namespace leveldb { 12 | namespace crc32c { 13 | 14 | // Return the crc32c of concat(A, data[0,n-1]) where init_crc is the 15 | // crc32c of some string A. Extend() is often used to maintain the 16 | // crc32c of a stream of data. 17 | extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); 18 | 19 | // Return the crc32c of data[0,n-1] 20 | inline uint32_t Value(const char* data, size_t n) { 21 | return Extend(0, data, n); 22 | } 23 | 24 | static const uint32_t kMaskDelta = 0xa282ead8ul; 25 | 26 | // Return a masked representation of crc. 27 | // 28 | // Motivation: it is problematic to compute the CRC of a string that 29 | // contains embedded CRCs. Therefore we recommend that CRCs stored 30 | // somewhere (e.g., in files) should be masked before being stored. 31 | inline uint32_t Mask(uint32_t crc) { 32 | // Rotate right by 15 bits and add a constant. 33 | return ((crc >> 15) | (crc << 17)) + kMaskDelta; 34 | } 35 | 36 | // Return the crc whose masked representation is masked_crc. 37 | inline uint32_t Unmask(uint32_t masked_crc) { 38 | uint32_t rot = masked_crc - kMaskDelta; 39 | return ((rot >> 17) | (rot << 15)); 40 | } 41 | 42 | } // namespace crc32c 43 | } // namespace leveldb 44 | 45 | #endif // STORAGE_LEVELDB_UTIL_CRC32C_H_ 46 | -------------------------------------------------------------------------------- /util/crc32c_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/crc32c.h" 6 | #include "util/testharness.h" 7 | 8 | namespace leveldb { 9 | namespace crc32c { 10 | 11 | class CRC { }; 12 | 13 | TEST(CRC, StandardResults) { 14 | // From rfc3720 section B.4. 15 | char buf[32]; 16 | 17 | memset(buf, 0, sizeof(buf)); 18 | ASSERT_EQ(0x8a9136aa, Value(buf, sizeof(buf))); 19 | 20 | memset(buf, 0xff, sizeof(buf)); 21 | ASSERT_EQ(0x62a8ab43, Value(buf, sizeof(buf))); 22 | 23 | for (int i = 0; i < 32; i++) { 24 | buf[i] = i; 25 | } 26 | ASSERT_EQ(0x46dd794e, Value(buf, sizeof(buf))); 27 | 28 | for (int i = 0; i < 32; i++) { 29 | buf[i] = 31 - i; 30 | } 31 | ASSERT_EQ(0x113fdb5c, Value(buf, sizeof(buf))); 32 | 33 | unsigned char data[48] = { 34 | 0x01, 0xc0, 0x00, 0x00, 35 | 0x00, 0x00, 0x00, 0x00, 36 | 0x00, 0x00, 0x00, 0x00, 37 | 0x00, 0x00, 0x00, 0x00, 38 | 0x14, 0x00, 0x00, 0x00, 39 | 0x00, 0x00, 0x04, 0x00, 40 | 0x00, 0x00, 0x00, 0x14, 41 | 0x00, 0x00, 0x00, 0x18, 42 | 0x28, 0x00, 0x00, 0x00, 43 | 0x00, 0x00, 0x00, 0x00, 44 | 0x02, 0x00, 0x00, 0x00, 45 | 0x00, 0x00, 0x00, 0x00, 46 | }; 47 | ASSERT_EQ(0xd9963a56, Value(reinterpret_cast(data), sizeof(data))); 48 | } 49 | 50 | TEST(CRC, Values) { 51 | ASSERT_NE(Value("a", 1), Value("foo", 3)); 52 | } 53 | 54 | TEST(CRC, Extend) { 55 | ASSERT_EQ(Value("hello world", 11), 56 | Extend(Value("hello ", 6), "world", 5)); 57 | } 58 | 59 | TEST(CRC, Mask) { 60 | uint32_t crc = Value("foo", 3); 61 | ASSERT_NE(crc, Mask(crc)); 62 | ASSERT_NE(crc, Mask(Mask(crc))); 63 | ASSERT_EQ(crc, Unmask(Mask(crc))); 64 | ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc))))); 65 | } 66 | 67 | } // namespace crc32c 68 | } // namespace leveldb 69 | 70 | int main(int argc, char** argv) { 71 | return leveldb::test::RunAllTests(); 72 | } 73 | -------------------------------------------------------------------------------- /util/env.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/env.h" 6 | 7 | namespace leveldb { 8 | 9 | Env::~Env() { 10 | } 11 | 12 | Status Env::NewAppendableFile(const std::string& fname, WritableFile** result) { 13 | return Status::NotSupported("NewAppendableFile", fname); 14 | } 15 | 16 | SequentialFile::~SequentialFile() { 17 | } 18 | 19 | RandomAccessFile::~RandomAccessFile() { 20 | } 21 | 22 | WritableFile::~WritableFile() { 23 | } 24 | 25 | Logger::~Logger() { 26 | } 27 | 28 | FileLock::~FileLock() { 29 | } 30 | 31 | void Log(Logger* info_log, const char* format, ...) { 32 | if (info_log != NULL) { 33 | va_list ap; 34 | va_start(ap, format); 35 | info_log->Logv(format, ap); 36 | va_end(ap); 37 | } 38 | } 39 | 40 | static Status DoWriteStringToFile(Env* env, const Slice& data, 41 | const std::string& fname, 42 | bool should_sync) { 43 | WritableFile* file; 44 | Status s = env->NewWritableFile(fname, &file); 45 | if (!s.ok()) { 46 | return s; 47 | } 48 | s = file->Append(data); 49 | if (s.ok() && should_sync) { 50 | s = file->Sync(); 51 | } 52 | if (s.ok()) { 53 | s = file->Close(); 54 | } 55 | delete file; // Will auto-close if we did not close above 56 | if (!s.ok()) { 57 | env->DeleteFile(fname); 58 | } 59 | return s; 60 | } 61 | 62 | Status WriteStringToFile(Env* env, const Slice& data, 63 | const std::string& fname) { 64 | return DoWriteStringToFile(env, data, fname, false); 65 | } 66 | 67 | Status WriteStringToFileSync(Env* env, const Slice& data, 68 | const std::string& fname) { 69 | return DoWriteStringToFile(env, data, fname, true); 70 | } 71 | 72 | Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { 73 | data->clear(); 74 | SequentialFile* file; 75 | Status s = env->NewSequentialFile(fname, &file); 76 | if (!s.ok()) { 77 | return s; 78 | } 79 | static const int kBufferSize = 8192; 80 | char* space = new char[kBufferSize]; 81 | while (true) { 82 | Slice fragment; 83 | s = file->Read(kBufferSize, &fragment, space); 84 | if (!s.ok()) { 85 | break; 86 | } 87 | data->append(fragment.data(), fragment.size()); 88 | if (fragment.empty()) { 89 | break; 90 | } 91 | } 92 | delete[] space; 93 | delete file; 94 | return s; 95 | } 96 | 97 | EnvWrapper::~EnvWrapper() { 98 | } 99 | 100 | } // namespace leveldb 101 | -------------------------------------------------------------------------------- /util/env_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/env.h" 6 | 7 | #include "port/port.h" 8 | #include "util/testharness.h" 9 | 10 | namespace leveldb { 11 | 12 | static const int kDelayMicros = 100000; 13 | 14 | class EnvPosixTest { 15 | private: 16 | port::Mutex mu_; 17 | std::string events_; 18 | 19 | public: 20 | Env* env_; 21 | EnvPosixTest() : env_(Env::Default()) { } 22 | }; 23 | 24 | static void SetBool(void* ptr) { 25 | reinterpret_cast(ptr)->NoBarrier_Store(ptr); 26 | } 27 | 28 | TEST(EnvPosixTest, RunImmediately) { 29 | port::AtomicPointer called (NULL); 30 | env_->Schedule(&SetBool, &called); 31 | Env::Default()->SleepForMicroseconds(kDelayMicros); 32 | ASSERT_TRUE(called.NoBarrier_Load() != NULL); 33 | } 34 | 35 | TEST(EnvPosixTest, RunMany) { 36 | port::AtomicPointer last_id (NULL); 37 | 38 | struct CB { 39 | port::AtomicPointer* last_id_ptr; // Pointer to shared slot 40 | uintptr_t id; // Order# for the execution of this callback 41 | 42 | CB(port::AtomicPointer* p, int i) : last_id_ptr(p), id(i) { } 43 | 44 | static void Run(void* v) { 45 | CB* cb = reinterpret_cast(v); 46 | void* cur = cb->last_id_ptr->NoBarrier_Load(); 47 | ASSERT_EQ(cb->id-1, reinterpret_cast(cur)); 48 | cb->last_id_ptr->Release_Store(reinterpret_cast(cb->id)); 49 | } 50 | }; 51 | 52 | // Schedule in different order than start time 53 | CB cb1(&last_id, 1); 54 | CB cb2(&last_id, 2); 55 | CB cb3(&last_id, 3); 56 | CB cb4(&last_id, 4); 57 | env_->Schedule(&CB::Run, &cb1); 58 | env_->Schedule(&CB::Run, &cb2); 59 | env_->Schedule(&CB::Run, &cb3); 60 | env_->Schedule(&CB::Run, &cb4); 61 | 62 | Env::Default()->SleepForMicroseconds(kDelayMicros); 63 | void* cur = last_id.Acquire_Load(); 64 | ASSERT_EQ(4, reinterpret_cast(cur)); 65 | } 66 | 67 | struct State { 68 | port::Mutex mu; 69 | int val; 70 | int num_running; 71 | }; 72 | 73 | static void ThreadBody(void* arg) { 74 | State* s = reinterpret_cast(arg); 75 | s->mu.Lock(); 76 | s->val += 1; 77 | s->num_running -= 1; 78 | s->mu.Unlock(); 79 | } 80 | 81 | TEST(EnvPosixTest, StartThread) { 82 | State state; 83 | state.val = 0; 84 | state.num_running = 3; 85 | for (int i = 0; i < 3; i++) { 86 | env_->StartThread(&ThreadBody, &state); 87 | } 88 | while (true) { 89 | state.mu.Lock(); 90 | int num = state.num_running; 91 | state.mu.Unlock(); 92 | if (num == 0) { 93 | break; 94 | } 95 | Env::Default()->SleepForMicroseconds(kDelayMicros); 96 | } 97 | ASSERT_EQ(state.val, 3); 98 | } 99 | 100 | } // namespace leveldb 101 | 102 | int main(int argc, char** argv) { 103 | return leveldb::test::RunAllTests(); 104 | } 105 | -------------------------------------------------------------------------------- /util/filter_policy.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/filter_policy.h" 6 | 7 | namespace leveldb { 8 | 9 | FilterPolicy::~FilterPolicy() { } 10 | 11 | } // namespace leveldb 12 | -------------------------------------------------------------------------------- /util/hash.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "util/coding.h" 7 | #include "util/hash.h" 8 | 9 | // The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through 10 | // between switch labels. The real definition should be provided externally. 11 | // This one is a fallback version for unsupported compilers. 12 | #ifndef FALLTHROUGH_INTENDED 13 | #define FALLTHROUGH_INTENDED do { } while (0) 14 | #endif 15 | 16 | namespace leveldb { 17 | 18 | uint32_t Hash(const char* data, size_t n, uint32_t seed) { 19 | // Similar to murmur hash 20 | const uint32_t m = 0xc6a4a793; 21 | const uint32_t r = 24; 22 | const char* limit = data + n; 23 | uint32_t h = seed ^ (n * m); 24 | 25 | // Pick up four bytes at a time 26 | while (data + 4 <= limit) { 27 | uint32_t w = DecodeFixed32(data); 28 | data += 4; 29 | h += w; 30 | h *= m; 31 | h ^= (h >> 16); 32 | } 33 | 34 | // Pick up remaining bytes 35 | switch (limit - data) { 36 | case 3: 37 | h += static_cast(data[2]) << 16; 38 | FALLTHROUGH_INTENDED; 39 | case 2: 40 | h += static_cast(data[1]) << 8; 41 | FALLTHROUGH_INTENDED; 42 | case 1: 43 | h += static_cast(data[0]); 44 | h *= m; 45 | h ^= (h >> r); 46 | break; 47 | } 48 | return h; 49 | } 50 | 51 | 52 | } // namespace leveldb 53 | -------------------------------------------------------------------------------- /util/hash.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Simple hash function used for internal data structures 6 | 7 | #ifndef STORAGE_LEVELDB_UTIL_HASH_H_ 8 | #define STORAGE_LEVELDB_UTIL_HASH_H_ 9 | 10 | #include 11 | #include 12 | 13 | namespace leveldb { 14 | 15 | extern uint32_t Hash(const char* data, size_t n, uint32_t seed); 16 | 17 | } 18 | 19 | #endif // STORAGE_LEVELDB_UTIL_HASH_H_ 20 | -------------------------------------------------------------------------------- /util/hash_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/hash.h" 6 | #include "util/testharness.h" 7 | 8 | namespace leveldb { 9 | 10 | class HASH { }; 11 | 12 | TEST(HASH, SignedUnsignedIssue) { 13 | const unsigned char data1[1] = {0x62}; 14 | const unsigned char data2[2] = {0xc3, 0x97}; 15 | const unsigned char data3[3] = {0xe2, 0x99, 0xa5}; 16 | const unsigned char data4[4] = {0xe1, 0x80, 0xb9, 0x32}; 17 | const unsigned char data5[48] = { 18 | 0x01, 0xc0, 0x00, 0x00, 19 | 0x00, 0x00, 0x00, 0x00, 20 | 0x00, 0x00, 0x00, 0x00, 21 | 0x00, 0x00, 0x00, 0x00, 22 | 0x14, 0x00, 0x00, 0x00, 23 | 0x00, 0x00, 0x04, 0x00, 24 | 0x00, 0x00, 0x00, 0x14, 25 | 0x00, 0x00, 0x00, 0x18, 26 | 0x28, 0x00, 0x00, 0x00, 27 | 0x00, 0x00, 0x00, 0x00, 28 | 0x02, 0x00, 0x00, 0x00, 29 | 0x00, 0x00, 0x00, 0x00, 30 | }; 31 | 32 | ASSERT_EQ(Hash(0, 0, 0xbc9f1d34), 0xbc9f1d34); 33 | ASSERT_EQ( 34 | Hash(reinterpret_cast(data1), sizeof(data1), 0xbc9f1d34), 35 | 0xef1345c4); 36 | ASSERT_EQ( 37 | Hash(reinterpret_cast(data2), sizeof(data2), 0xbc9f1d34), 38 | 0x5b663814); 39 | ASSERT_EQ( 40 | Hash(reinterpret_cast(data3), sizeof(data3), 0xbc9f1d34), 41 | 0x323c078f); 42 | ASSERT_EQ( 43 | Hash(reinterpret_cast(data4), sizeof(data4), 0xbc9f1d34), 44 | 0xed21633a); 45 | ASSERT_EQ( 46 | Hash(reinterpret_cast(data5), sizeof(data5), 0x12345678), 47 | 0xf333dabb); 48 | } 49 | 50 | } // namespace leveldb 51 | 52 | int main(int argc, char** argv) { 53 | return leveldb::test::RunAllTests(); 54 | } 55 | -------------------------------------------------------------------------------- /util/histogram.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 6 | #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 7 | 8 | #include 9 | 10 | namespace leveldb { 11 | 12 | class Histogram { 13 | public: 14 | Histogram() { } 15 | ~Histogram() { } 16 | 17 | void Clear(); 18 | void Add(double value); 19 | void Merge(const Histogram& other); 20 | 21 | std::string ToString() const; 22 | 23 | private: 24 | double min_; 25 | double max_; 26 | double num_; 27 | double sum_; 28 | double sum_squares_; 29 | 30 | enum { kNumBuckets = 154 }; 31 | static const double kBucketLimit[kNumBuckets]; 32 | double buckets_[kNumBuckets]; 33 | 34 | double Median() const; 35 | double Percentile(double p) const; 36 | double Average() const; 37 | double StandardDeviation() const; 38 | }; 39 | 40 | } // namespace leveldb 41 | 42 | #endif // STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ 43 | -------------------------------------------------------------------------------- /util/logging.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/logging.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "leveldb/env.h" 12 | #include "leveldb/slice.h" 13 | 14 | namespace leveldb { 15 | 16 | void AppendNumberTo(std::string* str, uint64_t num) { 17 | char buf[30]; 18 | snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); 19 | str->append(buf); 20 | } 21 | 22 | void AppendEscapedStringTo(std::string* str, const Slice& value) { 23 | for (size_t i = 0; i < value.size(); i++) { 24 | char c = value[i]; 25 | if (c >= ' ' && c <= '~') { 26 | str->push_back(c); 27 | } else { 28 | char buf[10]; 29 | snprintf(buf, sizeof(buf), "\\x%02x", 30 | static_cast(c) & 0xff); 31 | str->append(buf); 32 | } 33 | } 34 | } 35 | 36 | std::string NumberToString(uint64_t num) { 37 | std::string r; 38 | AppendNumberTo(&r, num); 39 | return r; 40 | } 41 | 42 | std::string EscapeString(const Slice& value) { 43 | std::string r; 44 | AppendEscapedStringTo(&r, value); 45 | return r; 46 | } 47 | 48 | bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { 49 | uint64_t v = 0; 50 | int digits = 0; 51 | while (!in->empty()) { 52 | char c = (*in)[0]; 53 | if (c >= '0' && c <= '9') { 54 | ++digits; 55 | const int delta = (c - '0'); 56 | static const uint64_t kMaxUint64 = ~static_cast(0); 57 | if (v > kMaxUint64/10 || 58 | (v == kMaxUint64/10 && delta > kMaxUint64%10)) { 59 | // Overflow 60 | return false; 61 | } 62 | v = (v * 10) + delta; 63 | in->remove_prefix(1); 64 | } else { 65 | break; 66 | } 67 | } 68 | *val = v; 69 | return (digits > 0); 70 | } 71 | 72 | } // namespace leveldb 73 | -------------------------------------------------------------------------------- /util/logging.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Must not be included from any .h files to avoid polluting the namespace 6 | // with macros. 7 | 8 | #ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_ 9 | #define STORAGE_LEVELDB_UTIL_LOGGING_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include "port/port.h" 15 | 16 | namespace leveldb { 17 | 18 | class Slice; 19 | class WritableFile; 20 | 21 | // Append a human-readable printout of "num" to *str 22 | extern void AppendNumberTo(std::string* str, uint64_t num); 23 | 24 | // Append a human-readable printout of "value" to *str. 25 | // Escapes any non-printable characters found in "value". 26 | extern void AppendEscapedStringTo(std::string* str, const Slice& value); 27 | 28 | // Return a human-readable printout of "num" 29 | extern std::string NumberToString(uint64_t num); 30 | 31 | // Return a human-readable version of "value". 32 | // Escapes any non-printable characters found in "value". 33 | extern std::string EscapeString(const Slice& value); 34 | 35 | // Parse a human-readable number from "*in" into *value. On success, 36 | // advances "*in" past the consumed number and sets "*val" to the 37 | // numeric value. Otherwise, returns false and leaves *in in an 38 | // unspecified state. 39 | extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val); 40 | 41 | } // namespace leveldb 42 | 43 | #endif // STORAGE_LEVELDB_UTIL_LOGGING_H_ 44 | -------------------------------------------------------------------------------- /util/mutexlock.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 6 | #define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 7 | 8 | #include "port/port.h" 9 | #include "port/thread_annotations.h" 10 | 11 | namespace leveldb { 12 | 13 | // Helper class that locks a mutex on construction and unlocks the mutex when 14 | // the destructor of the MutexLock object is invoked. 15 | // 16 | // Typical usage: 17 | // 18 | // void MyClass::MyMethod() { 19 | // MutexLock l(&mu_); // mu_ is an instance variable 20 | // ... some complex code, possibly with multiple return paths ... 21 | // } 22 | 23 | class SCOPED_LOCKABLE MutexLock { 24 | public: 25 | explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) 26 | : mu_(mu) { 27 | this->mu_->Lock(); 28 | } 29 | ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); } 30 | 31 | private: 32 | port::Mutex *const mu_; 33 | // No copying allowed 34 | MutexLock(const MutexLock&); 35 | void operator=(const MutexLock&); 36 | }; 37 | 38 | } // namespace leveldb 39 | 40 | 41 | #endif // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ 42 | -------------------------------------------------------------------------------- /util/options.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "leveldb/options.h" 6 | 7 | #include "leveldb/comparator.h" 8 | #include "leveldb/env.h" 9 | 10 | namespace leveldb { 11 | 12 | Options::Options() 13 | : comparator(BytewiseComparator()), 14 | create_if_missing(false), 15 | error_if_exists(false), 16 | paranoid_checks(false), 17 | env(Env::Default()), 18 | info_log(NULL), 19 | write_buffer_size(4<<20), 20 | max_open_files(1000), 21 | block_cache(NULL), 22 | block_size(4096), 23 | block_restart_interval(16), 24 | compression(kSnappyCompression), 25 | reuse_logs(false), 26 | filter_policy(NULL) { 27 | } 28 | 29 | } // namespace leveldb 30 | -------------------------------------------------------------------------------- /util/posix_logger.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | // 5 | // Logger implementation that can be shared by all environments 6 | // where enough posix functionality is available. 7 | 8 | #ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ 9 | #define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "leveldb/env.h" 16 | 17 | namespace leveldb { 18 | 19 | class PosixLogger : public Logger { 20 | private: 21 | FILE* file_; 22 | uint64_t (*gettid_)(); // Return the thread id for the current thread 23 | public: 24 | PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { } 25 | virtual ~PosixLogger() { 26 | fclose(file_); 27 | } 28 | virtual void Logv(const char* format, va_list ap) { 29 | const uint64_t thread_id = (*gettid_)(); 30 | 31 | // We try twice: the first time with a fixed-size stack allocated buffer, 32 | // and the second time with a much larger dynamically allocated buffer. 33 | char buffer[500]; 34 | for (int iter = 0; iter < 2; iter++) { 35 | char* base; 36 | int bufsize; 37 | if (iter == 0) { 38 | bufsize = sizeof(buffer); 39 | base = buffer; 40 | } else { 41 | bufsize = 30000; 42 | base = new char[bufsize]; 43 | } 44 | char* p = base; 45 | char* limit = base + bufsize; 46 | 47 | struct timeval now_tv; 48 | gettimeofday(&now_tv, NULL); 49 | const time_t seconds = now_tv.tv_sec; 50 | struct tm t; 51 | localtime_r(&seconds, &t); 52 | p += snprintf(p, limit - p, 53 | "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", 54 | t.tm_year + 1900, 55 | t.tm_mon + 1, 56 | t.tm_mday, 57 | t.tm_hour, 58 | t.tm_min, 59 | t.tm_sec, 60 | static_cast(now_tv.tv_usec), 61 | static_cast(thread_id)); 62 | 63 | // Print the message 64 | if (p < limit) { 65 | va_list backup_ap; 66 | va_copy(backup_ap, ap); 67 | p += vsnprintf(p, limit - p, format, backup_ap); 68 | va_end(backup_ap); 69 | } 70 | 71 | // Truncate to available space if necessary 72 | if (p >= limit) { 73 | if (iter == 0) { 74 | continue; // Try again with larger buffer 75 | } else { 76 | p = limit - 1; 77 | } 78 | } 79 | 80 | // Add newline if necessary 81 | if (p == base || p[-1] != '\n') { 82 | *p++ = '\n'; 83 | } 84 | 85 | assert(p <= limit); 86 | fwrite(base, 1, p - base, file_); 87 | fflush(file_); 88 | if (base != buffer) { 89 | delete[] base; 90 | } 91 | break; 92 | } 93 | } 94 | }; 95 | 96 | } // namespace leveldb 97 | 98 | #endif // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ 99 | -------------------------------------------------------------------------------- /util/random.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_ 6 | #define STORAGE_LEVELDB_UTIL_RANDOM_H_ 7 | 8 | #include 9 | 10 | namespace leveldb { 11 | 12 | // A very simple random number generator. Not especially good at 13 | // generating truly random bits, but good enough for our needs in this 14 | // package. 15 | class Random { 16 | private: 17 | uint32_t seed_; 18 | public: 19 | explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { 20 | // Avoid bad seeds. 21 | if (seed_ == 0 || seed_ == 2147483647L) { 22 | seed_ = 1; 23 | } 24 | } 25 | uint32_t Next() { 26 | static const uint32_t M = 2147483647L; // 2^31-1 27 | static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 28 | // We are computing 29 | // seed_ = (seed_ * A) % M, where M = 2^31-1 30 | // 31 | // seed_ must not be zero or M, or else all subsequent computed values 32 | // will be zero or M respectively. For all other values, seed_ will end 33 | // up cycling through every number in [1,M-1] 34 | uint64_t product = seed_ * A; 35 | 36 | // Compute (product % M) using the fact that ((x << 31) % M) == x. 37 | seed_ = static_cast((product >> 31) + (product & M)); 38 | // The first reduction may overflow by 1 bit, so we may need to 39 | // repeat. mod == M is not possible; using > allows the faster 40 | // sign-bit-based test. 41 | if (seed_ > M) { 42 | seed_ -= M; 43 | } 44 | return seed_; 45 | } 46 | // Returns a uniformly distributed value in the range [0..n-1] 47 | // REQUIRES: n > 0 48 | uint32_t Uniform(int n) { return Next() % n; } 49 | 50 | // Randomly returns true ~"1/n" of the time, and false otherwise. 51 | // REQUIRES: n > 0 52 | bool OneIn(int n) { return (Next() % n) == 0; } 53 | 54 | // Skewed: pick "base" uniformly from range [0,max_log] and then 55 | // return "base" random bits. The effect is to pick a number in the 56 | // range [0,2^max_log-1] with exponential bias towards smaller numbers. 57 | uint32_t Skewed(int max_log) { 58 | return Uniform(1 << Uniform(max_log + 1)); 59 | } 60 | }; 61 | 62 | } // namespace leveldb 63 | 64 | #endif // STORAGE_LEVELDB_UTIL_RANDOM_H_ 65 | -------------------------------------------------------------------------------- /util/status.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include 6 | #include "port/port.h" 7 | #include "leveldb/status.h" 8 | 9 | namespace leveldb { 10 | 11 | const char* Status::CopyState(const char* state) { 12 | uint32_t size; 13 | memcpy(&size, state, sizeof(size)); 14 | char* result = new char[size + 5]; 15 | memcpy(result, state, size + 5); 16 | return result; 17 | } 18 | 19 | Status::Status(Code code, const Slice& msg, const Slice& msg2) { 20 | assert(code != kOk); 21 | const uint32_t len1 = msg.size(); 22 | const uint32_t len2 = msg2.size(); 23 | const uint32_t size = len1 + (len2 ? (2 + len2) : 0); 24 | char* result = new char[size + 5]; 25 | memcpy(result, &size, sizeof(size)); 26 | result[4] = static_cast(code); 27 | memcpy(result + 5, msg.data(), len1); 28 | if (len2) { 29 | result[5 + len1] = ':'; 30 | result[6 + len1] = ' '; 31 | memcpy(result + 7 + len1, msg2.data(), len2); 32 | } 33 | state_ = result; 34 | } 35 | 36 | std::string Status::ToString() const { 37 | if (state_ == NULL) { 38 | return "OK"; 39 | } else { 40 | char tmp[30]; 41 | const char* type; 42 | switch (code()) { 43 | case kOk: 44 | type = "OK"; 45 | break; 46 | case kNotFound: 47 | type = "NotFound: "; 48 | break; 49 | case kCorruption: 50 | type = "Corruption: "; 51 | break; 52 | case kNotSupported: 53 | type = "Not implemented: "; 54 | break; 55 | case kInvalidArgument: 56 | type = "Invalid argument: "; 57 | break; 58 | case kIOError: 59 | type = "IO error: "; 60 | break; 61 | default: 62 | snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", 63 | static_cast(code())); 64 | type = tmp; 65 | break; 66 | } 67 | std::string result(type); 68 | uint32_t length; 69 | memcpy(&length, state_, sizeof(length)); 70 | result.append(state_ + 5, length); 71 | return result; 72 | } 73 | } 74 | 75 | } // namespace leveldb 76 | -------------------------------------------------------------------------------- /util/testharness.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/testharness.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace leveldb { 13 | namespace test { 14 | 15 | namespace { 16 | struct Test { 17 | const char* base; 18 | const char* name; 19 | void (*func)(); 20 | }; 21 | std::vector* tests; 22 | } 23 | 24 | bool RegisterTest(const char* base, const char* name, void (*func)()) { 25 | if (tests == NULL) { 26 | tests = new std::vector; 27 | } 28 | Test t; 29 | t.base = base; 30 | t.name = name; 31 | t.func = func; 32 | tests->push_back(t); 33 | return true; 34 | } 35 | 36 | int RunAllTests() { 37 | const char* matcher = getenv("LEVELDB_TESTS"); 38 | 39 | int num = 0; 40 | if (tests != NULL) { 41 | for (size_t i = 0; i < tests->size(); i++) { 42 | const Test& t = (*tests)[i]; 43 | if (matcher != NULL) { 44 | std::string name = t.base; 45 | name.push_back('.'); 46 | name.append(t.name); 47 | if (strstr(name.c_str(), matcher) == NULL) { 48 | continue; 49 | } 50 | } 51 | fprintf(stderr, "==== Test %s.%s\n", t.base, t.name); 52 | (*t.func)(); 53 | ++num; 54 | } 55 | } 56 | fprintf(stderr, "==== PASSED %d tests\n", num); 57 | return 0; 58 | } 59 | 60 | std::string TmpDir() { 61 | std::string dir; 62 | Status s = Env::Default()->GetTestDirectory(&dir); 63 | ASSERT_TRUE(s.ok()) << s.ToString(); 64 | return dir; 65 | } 66 | 67 | int RandomSeed() { 68 | const char* env = getenv("TEST_RANDOM_SEED"); 69 | int result = (env != NULL ? atoi(env) : 301); 70 | if (result <= 0) { 71 | result = 301; 72 | } 73 | return result; 74 | } 75 | 76 | } // namespace test 77 | } // namespace leveldb 78 | -------------------------------------------------------------------------------- /util/testharness.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ 6 | #define STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include "leveldb/env.h" 12 | #include "leveldb/slice.h" 13 | #include "util/random.h" 14 | 15 | namespace leveldb { 16 | namespace test { 17 | 18 | // Run some of the tests registered by the TEST() macro. If the 19 | // environment variable "LEVELDB_TESTS" is not set, runs all tests. 20 | // Otherwise, runs only the tests whose name contains the value of 21 | // "LEVELDB_TESTS" as a substring. E.g., suppose the tests are: 22 | // TEST(Foo, Hello) { ... } 23 | // TEST(Foo, World) { ... } 24 | // LEVELDB_TESTS=Hello will run the first test 25 | // LEVELDB_TESTS=o will run both tests 26 | // LEVELDB_TESTS=Junk will run no tests 27 | // 28 | // Returns 0 if all tests pass. 29 | // Dies or returns a non-zero value if some test fails. 30 | extern int RunAllTests(); 31 | 32 | // Return the directory to use for temporary storage. 33 | extern std::string TmpDir(); 34 | 35 | // Return a randomization seed for this run. Typically returns the 36 | // same number on repeated invocations of this binary, but automated 37 | // runs may be able to vary the seed. 38 | extern int RandomSeed(); 39 | 40 | // An instance of Tester is allocated to hold temporary state during 41 | // the execution of an assertion. 42 | class Tester { 43 | private: 44 | bool ok_; 45 | const char* fname_; 46 | int line_; 47 | std::stringstream ss_; 48 | 49 | public: 50 | Tester(const char* f, int l) 51 | : ok_(true), fname_(f), line_(l) { 52 | } 53 | 54 | ~Tester() { 55 | if (!ok_) { 56 | fprintf(stderr, "%s:%d:%s\n", fname_, line_, ss_.str().c_str()); 57 | exit(1); 58 | } 59 | } 60 | 61 | Tester& Is(bool b, const char* msg) { 62 | if (!b) { 63 | ss_ << " Assertion failure " << msg; 64 | ok_ = false; 65 | } 66 | return *this; 67 | } 68 | 69 | Tester& IsOk(const Status& s) { 70 | if (!s.ok()) { 71 | ss_ << " " << s.ToString(); 72 | ok_ = false; 73 | } 74 | return *this; 75 | } 76 | 77 | #define BINARY_OP(name,op) \ 78 | template \ 79 | Tester& name(const X& x, const Y& y) { \ 80 | if (! (x op y)) { \ 81 | ss_ << " failed: " << x << (" " #op " ") << y; \ 82 | ok_ = false; \ 83 | } \ 84 | return *this; \ 85 | } 86 | 87 | BINARY_OP(IsEq, ==) 88 | BINARY_OP(IsNe, !=) 89 | BINARY_OP(IsGe, >=) 90 | BINARY_OP(IsGt, >) 91 | BINARY_OP(IsLe, <=) 92 | BINARY_OP(IsLt, <) 93 | #undef BINARY_OP 94 | 95 | // Attach the specified value to the error message if an error has occurred 96 | template 97 | Tester& operator<<(const V& value) { 98 | if (!ok_) { 99 | ss_ << " " << value; 100 | } 101 | return *this; 102 | } 103 | }; 104 | 105 | #define ASSERT_TRUE(c) ::leveldb::test::Tester(__FILE__, __LINE__).Is((c), #c) 106 | #define ASSERT_OK(s) ::leveldb::test::Tester(__FILE__, __LINE__).IsOk((s)) 107 | #define ASSERT_EQ(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsEq((a),(b)) 108 | #define ASSERT_NE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsNe((a),(b)) 109 | #define ASSERT_GE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGe((a),(b)) 110 | #define ASSERT_GT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGt((a),(b)) 111 | #define ASSERT_LE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLe((a),(b)) 112 | #define ASSERT_LT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLt((a),(b)) 113 | 114 | #define TCONCAT(a,b) TCONCAT1(a,b) 115 | #define TCONCAT1(a,b) a##b 116 | 117 | #define TEST(base,name) \ 118 | class TCONCAT(_Test_,name) : public base { \ 119 | public: \ 120 | void _Run(); \ 121 | static void _RunIt() { \ 122 | TCONCAT(_Test_,name) t; \ 123 | t._Run(); \ 124 | } \ 125 | }; \ 126 | bool TCONCAT(_Test_ignored_,name) = \ 127 | ::leveldb::test::RegisterTest(#base, #name, &TCONCAT(_Test_,name)::_RunIt); \ 128 | void TCONCAT(_Test_,name)::_Run() 129 | 130 | // Register the specified test. Typically not used directly, but 131 | // invoked via the macro expansion of TEST. 132 | extern bool RegisterTest(const char* base, const char* name, void (*func)()); 133 | 134 | 135 | } // namespace test 136 | } // namespace leveldb 137 | 138 | #endif // STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ 139 | -------------------------------------------------------------------------------- /util/testutil.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #include "util/testutil.h" 6 | 7 | #include "util/random.h" 8 | 9 | namespace leveldb { 10 | namespace test { 11 | 12 | Slice RandomString(Random* rnd, int len, std::string* dst) { 13 | dst->resize(len); 14 | for (int i = 0; i < len; i++) { 15 | (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' 16 | } 17 | return Slice(*dst); 18 | } 19 | 20 | std::string RandomKey(Random* rnd, int len) { 21 | // Make sure to generate a wide variety of characters so we 22 | // test the boundary conditions for short-key optimizations. 23 | static const char kTestChars[] = { 24 | '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff' 25 | }; 26 | std::string result; 27 | for (int i = 0; i < len; i++) { 28 | result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; 29 | } 30 | return result; 31 | } 32 | 33 | 34 | extern Slice CompressibleString(Random* rnd, double compressed_fraction, 35 | size_t len, std::string* dst) { 36 | int raw = static_cast(len * compressed_fraction); 37 | if (raw < 1) raw = 1; 38 | std::string raw_data; 39 | RandomString(rnd, raw, &raw_data); 40 | 41 | // Duplicate the random data until we have filled "len" bytes 42 | dst->clear(); 43 | while (dst->size() < len) { 44 | dst->append(raw_data); 45 | } 46 | dst->resize(len); 47 | return Slice(*dst); 48 | } 49 | 50 | } // namespace test 51 | } // namespace leveldb 52 | -------------------------------------------------------------------------------- /util/testutil.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 | 5 | #ifndef STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 6 | #define STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 7 | 8 | #include "leveldb/env.h" 9 | #include "leveldb/slice.h" 10 | #include "util/random.h" 11 | 12 | namespace leveldb { 13 | namespace test { 14 | 15 | // Store in *dst a random string of length "len" and return a Slice that 16 | // references the generated data. 17 | extern Slice RandomString(Random* rnd, int len, std::string* dst); 18 | 19 | // Return a random key with the specified length that may contain interesting 20 | // characters (e.g. \x00, \xff, etc.). 21 | extern std::string RandomKey(Random* rnd, int len); 22 | 23 | // Store in *dst a string of length "len" that will compress to 24 | // "N*compressed_fraction" bytes and return a Slice that references 25 | // the generated data. 26 | extern Slice CompressibleString(Random* rnd, double compressed_fraction, 27 | size_t len, std::string* dst); 28 | 29 | // A wrapper that allows injection of errors. 30 | class ErrorEnv : public EnvWrapper { 31 | public: 32 | bool writable_file_error_; 33 | int num_writable_file_errors_; 34 | 35 | ErrorEnv() : EnvWrapper(Env::Default()), 36 | writable_file_error_(false), 37 | num_writable_file_errors_(0) { } 38 | 39 | virtual Status NewWritableFile(const std::string& fname, 40 | WritableFile** result) { 41 | if (writable_file_error_) { 42 | ++num_writable_file_errors_; 43 | *result = NULL; 44 | return Status::IOError(fname, "fake error"); 45 | } 46 | return target()->NewWritableFile(fname, result); 47 | } 48 | 49 | virtual Status NewAppendableFile(const std::string& fname, 50 | WritableFile** result) { 51 | if (writable_file_error_) { 52 | ++num_writable_file_errors_; 53 | *result = NULL; 54 | return Status::IOError(fname, "fake error"); 55 | } 56 | return target()->NewAppendableFile(fname, result); 57 | } 58 | }; 59 | 60 | } // namespace test 61 | } // namespace leveldb 62 | 63 | #endif // STORAGE_LEVELDB_UTIL_TESTUTIL_H_ 64 | --------------------------------------------------------------------------------