├── .gitignore ├── bootstrap.sh ├── handler.h ├── globals.cc ├── dumplog.cc ├── binlog-msg.h ├── Makefile ├── globals.h ├── leveldbd.conf ├── README.md ├── logfile.h ├── master-slave.md ├── logdb.h ├── logfile.cc ├── binlog-msg.cc ├── leveldbd.cc ├── handler.cc └── logdb.cc /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | leveldbd* 3 | deps -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | test -e deps && exit 0 3 | mkdir -p deps; cd deps 4 | 5 | git clone https://github.com/google/snappy && 6 | cd snappy && ./autogen.sh && ./configure && make clean && make $1 || exit 1 7 | cd .. 8 | 9 | git clone https://github.com/google/leveldb && 10 | cd leveldb && make libleveldb.a $1 || exit 1 11 | cd .. 12 | 13 | git clone https://github.com/yedf/handy && 14 | cd handy && make libhandy.a || exit 1 15 | -------------------------------------------------------------------------------- /handler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include "leveldb/db.h" 6 | #include "globals.h" 7 | #include "logdb.h" 8 | 9 | using namespace std; 10 | using namespace handy; 11 | 12 | int64_t getSize(Slice bkey, Slice ekey, leveldb::DB* db); 13 | 14 | void handleReq(EventBase& base, LogDb* db, const HttpConnPtr& con); 15 | Status decodeKvBody(Slice* body, Slice* key, Slice* value, bool* exist ); 16 | -------------------------------------------------------------------------------- /globals.cc: -------------------------------------------------------------------------------- 1 | #include "globals.h" 2 | 3 | Conf g_conf; 4 | int g_page_limit; 5 | int g_batch_count; 6 | int g_batch_size; 7 | int g_flush_slave_interval; 8 | 9 | void setGlobalConfig(Conf& conf) { 10 | g_page_limit = g_conf.getInteger("", "page_limit", 1000); 11 | g_batch_count = g_conf.getInteger("", "batch_count", 100*1000); 12 | g_batch_size = g_conf.getInteger("", "batch_size", 3); 13 | g_batch_size *= 1024*1024; 14 | g_flush_slave_interval = g_conf.getInteger("", "flush_slave_interval", 3); 15 | } 16 | 17 | -------------------------------------------------------------------------------- /dumplog.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "handler.h" 5 | #include 6 | #include 7 | #include "globals.h" 8 | #include "logdb.h" 9 | 10 | int main(int argc, const char* argv[]) { 11 | if (argc < 2) { 12 | printf("usage: %s \n", argv[0]); 13 | return 1; 14 | } 15 | Status st = LogDb::dumpFile(argv[1]); 16 | if (!st.ok()) { 17 | error("dumpfile error: %d %s", st.code(), st.msg()); 18 | return 1; 19 | } 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /binlog-msg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include "leveldb/db.h" 6 | #include "globals.h" 7 | #include "logdb.h" 8 | 9 | using namespace std; 10 | using namespace handy; 11 | 12 | void addBinlogHeader(Slice bkey, Slice ekey, HttpRequest& req, HttpResponse& resp); 13 | void handleBinlog(LogDb* db, EventBase* base, const HttpConnPtr& con); 14 | void sendEmptyBinlog(EventBase* base, LogDb* db); 15 | void sendSyncReq(LogDb* db, EventBase* base, const HttpConnPtr& con); 16 | void processSyncResp(LogDb* db, const HttpConnPtr& con, EventBase* base); 17 | 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | $(shell ./bootstrap.sh 1>&2) 2 | CC=cc 3 | CXX=g++ 4 | CXXFLAGS= -DOS_LINUX -g -std=c++11 -Wall -I. -Ideps/handy -Ideps/leveldb/include 5 | LDFLAGS= -pthread deps/handy/libhandy.a deps/leveldb/libleveldb.a deps/snappy/.libs/libsnappy.a 6 | 7 | SOURCES = handler.cc globals.cc logdb.cc logfile.cc binlog-msg.cc 8 | 9 | PROGRAMS = leveldbd dumplog 10 | 11 | OBJECTS = $(SOURCES:.cc=.o) 12 | 13 | default: $(PROGRAMS) db2 14 | 15 | db2: leveldbd 16 | cp -f leveldbd db2 17 | 18 | clean: 19 | -rm -f $(PROGRAMS) 20 | -rm -f *.o 21 | 22 | $(PROGRAMS): $(OBJECTS) 23 | 24 | .cc.o: 25 | $(CXX) $(CXXFLAGS) -c $< -o $@ 26 | 27 | .c.o: 28 | $(CC) $(CFLAGS) -c $< -o $@ 29 | 30 | .cc: 31 | $(CXX) $< -o $@ $(OBJECTS) $(CXXFLAGS) $(LDFLAGS) 32 | -------------------------------------------------------------------------------- /globals.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "leveldb/db.h" 7 | 8 | using namespace std; 9 | using namespace handy; 10 | 11 | extern Conf g_conf; 12 | extern int g_page_limit; 13 | extern int g_batch_count; 14 | extern int g_batch_size; 15 | extern int g_flush_slave_interval; 16 | 17 | void setGlobalConfig(Conf& conf); 18 | inline leveldb::Slice convSlice(Slice s) { return leveldb::Slice(s.data(), s.size()); } 19 | inline Slice convSlice(leveldb::Slice s) { return Slice(s.data(), s.size()); } 20 | inline string addSlash(const string& dir) { if (dir.size() && dir[dir.size()-1] != '/') return dir + '/'; return dir; } 21 | 22 | struct ConvertStatus { 23 | Status st; 24 | ConvertStatus(const leveldb::Status& s){ if (!s.ok()) { st = Status(EINVAL, s.ToString()); } } 25 | operator Status () { return move(st); } 26 | }; 27 | 28 | 29 | -------------------------------------------------------------------------------- /leveldbd.conf: -------------------------------------------------------------------------------- 1 | #run the program as a daemon or not 2 | #default on 3 | daemon=off 4 | 5 | #default info 6 | loglevel=trace 7 | 8 | #db data directory 9 | dbdir=/root/ldbd 10 | 11 | #default leveldbd.log 12 | logfile= 13 | 14 | #read threads number 15 | #default 8 16 | read_threads=8 17 | 18 | #write threads number 19 | #default 1 20 | write_threads=1 21 | 22 | #program bind ipv4 addr 23 | #default 0.0.0.0 24 | bind = 0.0.0.0 25 | 26 | #program listen port 27 | #default 80 28 | port = 80 29 | 30 | #stat-server listen port 31 | #default 8080 32 | stat_port = 8080 33 | 34 | #limit records of a page 35 | #default 1000 36 | page_limit = 20 37 | 38 | #limit records of batch operation 39 | #default 100000 40 | batch_count = 100 41 | 42 | #limit size of a batch operation 43 | #unit MB 44 | #default 3 45 | batch_size = 1 46 | 47 | #limit size for binlog file 48 | #unit MB 49 | #default 0 do not write binlog 50 | binlog_size = 64 51 | 52 | #id of this db 53 | #no default 54 | dbid = 1 55 | 56 | #help file 57 | #default README 58 | help_file = README.md 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **leveldbd是一个nosql数据库,底层使用leveldb作为存储引擎,提供REST接口。** 2 | 3 | 提供的特性包括 4 | 5 | - 主从同步 6 | - 主主同步 7 | - snappy压缩 8 | - 范围查询 9 | - 批量读写 10 | - 易于管理 11 | - 内置状态查看与管理 12 | 13 | 使用了C++11,需要g++4.8 在ubuntu14上测试 14 | 15 | ##编译运行 16 | 17 | git clone https://github.com/yedf/leveldbd.git 18 | 19 | cd leveldbd 20 | 21 | make 22 | 23 | ./leveldbd 24 | 25 | ##主从复制 26 | 27 | https://github.com/yedf/leveldbd/blob/master/master-slave.md 28 | 29 | ##leveldbd REST接口 30 | 31 | ###Get 32 | 33 | curl localhost/d/key1 34 | 35 | 36 | ###Set 37 | 38 | curl -d"value1" localhost/d/key1 39 | 40 | 41 | ###Delete 42 | 43 | curl -X"DELETE" localhost/d/key1 44 | 45 | 46 | ###Navigate 网页方式进行浏览与管理 47 | 48 | localhost/nav-next/begin-key 49 | 50 | 51 | ###Batch-Get 52 | 53 | localhost/batch-get/ 54 | 55 | request body data format is key-format. (format detail can be found in the end) 56 | 57 | response data format is kv-format 58 | 59 | 60 | ###Batch-Set 61 | 62 | curl -X"POST" localhost/batch-set/ 63 | 64 | request body data format is kv-format. 65 | 66 | 67 | ###Batch-Delete 68 | 69 | curl -X"DELETE" localhost/batch-delete/ 70 | 71 | request body data format is key-format 72 | 73 | 74 | ###Range-Get 75 | 76 | localhost/range-get/begin-key?end=end-key&inc=1 77 | 78 | query 'end' is optional which specify the end key (excluded in response), default get untill end 79 | 80 | query 'inc' is optional which specify whether the begin key should be included in response. default 0 81 | 82 | response data format is kv-format. 83 | 84 | 85 | ###body format 86 | kv-format:'[key]\n[value len]\n[value]\n[key2]\n0\n\n[key3]\n-1\n\n[key4]...' 87 | 88 | value len: 89 | 90 | -1 indicate key not exist 91 | 92 | 0 value lenght is 0 93 | 94 | 95 | key-format: '[key1]\n[key2]...' 96 | -------------------------------------------------------------------------------- /logfile.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | using namespace handy; 8 | 9 | const int64_t LOG_MAGIC = 0x2323232323232323; 10 | 11 | //record format 12 | // magic len data padded 13 | // 8 8 len padded-to-8 14 | struct LogFile { 15 | LogFile(): fd_(-1) {} 16 | Status open(const string& name, bool readonly=true); 17 | Status append(Slice record); 18 | Status getRecord(int64_t* offset, Slice* data, string* scrach); 19 | Status batchRecord(int64_t offset, string* rec, int batchSize); 20 | Status sync(); 21 | int64_t size() { return lseek(fd_, 0, SEEK_END);} 22 | static Status decodeBinlogData(Slice* fileCont, Slice* record); 23 | 24 | int fd_; 25 | string name_; 26 | static size_t totalLen(size_t sz) { return (sz + 8 + 8+ 7) / 8 * 8; } 27 | }; 28 | 29 | struct SyncPos { 30 | string key; 31 | int64_t dataFinished; 32 | int64_t fileno; 33 | int64_t offset; 34 | SyncPos(): dataFinished(1), fileno(-1), offset(-1) {} 35 | string toString() { return util::format("%ld %ld %ld %s", fileno, offset, dataFinished, key.c_str()); } 36 | bool fromString(const string& pos, char delimiter) { 37 | return fromSlices(Slice(pos).split(delimiter)); 38 | } 39 | bool fromSlices(const vector& ss) { 40 | if (ss.size() < 3) { 41 | return false; 42 | } 43 | int i = 0; 44 | fileno = util::atoi(ss[i++].data()); 45 | offset = util::atoi(ss[i++].data()); 46 | dataFinished = util::atoi(ss[i++].data()); 47 | if (ss.size() > 3) { 48 | Slice keyln = ss[i++]; 49 | key = keyln.eatWord(); 50 | if (key.size() && key[0] == '#') { 51 | key.clear(); 52 | } 53 | } 54 | return true; 55 | } 56 | string toLines() { 57 | return util::format("%ld #binlog file no\n%ld #binlog offset\n%ld #data file finished flag\n%s #current key\n", 58 | fileno, offset, dataFinished, key.c_str()); 59 | } 60 | bool operator == (SyncPos& pos) { return key == pos.key && dataFinished == pos.dataFinished && fileno == pos.fileno && offset == pos.offset; } 61 | bool operator != (SyncPos& pos) { return !operator ==(pos); } 62 | }; 63 | -------------------------------------------------------------------------------- /master-slave.md: -------------------------------------------------------------------------------- 1 | - [master-config](#master-config) 2 | - [slave-config](#slave-config) 3 | - [slave-status](#slave-status) 4 | 5 | ##master-config 6 | ```sh 7 | #run the program as a daemon or not 8 | #default on 9 | daemon=off 10 | 11 | #default info 12 | loglevel=trace 13 | 14 | #db data directory 15 | dbdir=/root/ldbd 16 | 17 | #default leveldbd.log 18 | logfile= 19 | 20 | #read threads number 21 | #default 8 22 | read_threads=8 23 | 24 | #write threads number 25 | #default 1 26 | write_threads=1 27 | 28 | #program bind ipv4 addr 29 | #default 0.0.0.0 30 | bind = 0.0.0.0 31 | 32 | #program listen port 33 | #default 80 34 | port = 80 35 | 36 | #stat-server listen port 37 | #default 8080 38 | stat_port = 8080 39 | 40 | #limit records of a page 41 | #default 1000 42 | page_limit = 20 43 | 44 | #limit records of batch operation 45 | #default 100000 46 | batch_count = 100 47 | 48 | #limit size of a batch operation 49 | #unit MB 50 | #default 3 51 | batch_size = 1 52 | 53 | #limit size for binlog file 54 | #unit MB 55 | #default 0 do not write binlog 56 | binlog_size = 64 57 | 58 | #id of this db 59 | #no default 60 | dbid = 1 61 | 62 | #help file 63 | #default README 64 | help_file = README.md 65 | ``` 66 | 67 | ##slave-config 68 | ```sh 69 | #run the program as a daemon or not 70 | #default on 71 | daemon=off 72 | 73 | #default info 74 | loglevel=trace 75 | 76 | #db data directory 77 | dbdir=/root/ldbd2 78 | 79 | #default leveldbd.log 80 | logfile= 81 | 82 | #read threads number 83 | #default 8 84 | read_threads=8 85 | 86 | #write threads number 87 | #default 1 88 | write_threads=1 89 | 90 | #program bind ipv4 addr 91 | #default 0.0.0.0 92 | bind = 0.0.0.0 93 | 94 | #program listen port 95 | #default 80 96 | port = 81 97 | 98 | #stat-server listen port 99 | #default 8080 100 | stat_port = 8081 101 | 102 | #limit records of a page 103 | #default 1000 104 | page_limit = 20 105 | 106 | #limit records of batch operation 107 | #default 100000 108 | batch_count = 100 109 | 110 | #limit size of a batch operation 111 | #unit MB 112 | #default 3 113 | batch_size = 1 114 | 115 | #limit size for binlog file 116 | #unit MB 117 | #default 0 do not write binlog 118 | binlog_size = 0 119 | 120 | #id of this db 121 | #no default 122 | dbid = 2 123 | 124 | #help file 125 | #default README 126 | help_file = README.md 127 | ``` 128 | 129 | ##slave-status 130 | 131 | 修改slave-status的内容,文件位置/root/ldbd/slave-status, dbdir下的slave-status 132 | 133 | 注意,该文件记录了从数据库同步的位置,会被数据库修改 134 | 135 | slave-status内容为 136 | ```sh 137 | localhost #host 138 | 80 #port 139 | 1 #binlog file no 140 | 0 #binlog offset 141 | 0 #data file finished flag 142 | #current key 143 | 144 | ``` -------------------------------------------------------------------------------- /logdb.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "leveldb/db.h" 10 | #include "leveldb/env.h" 11 | #include "globals.h" 12 | #include "logfile.h" 13 | 14 | struct FileName { 15 | static string binlogPrefix() { return "binlog-"; } 16 | static bool isBinlog(const std::string& name) { return Slice(name).starts_with(binlogPrefix()); } 17 | static int64_t binlogNum(const string& name); 18 | static string binlogFile(int64_t no) { return binlogPrefix().data()+util::format("%05d", no); } 19 | static string closedFile() { return "dbclosed.txt"; } 20 | static string slaveFile() { return "slave-status"; } 21 | }; 22 | 23 | enum BinlogOp { BinlogWrite=1, BinlogDelete, }; 24 | 25 | inline const char* strOp(BinlogOp op) { 26 | if (op == BinlogDelete) { 27 | return "Delete"; 28 | } else if (op == BinlogWrite) { 29 | return "Write"; 30 | } 31 | return "Unkown"; 32 | }; 33 | 34 | struct LogRecord { 35 | int dbid; 36 | time_t tm; 37 | Slice key; 38 | Slice value; 39 | BinlogOp op; 40 | LogRecord():dbid(0),tm(0) {} 41 | LogRecord(int dbid1, time_t tm1, Slice key1, Slice value1, BinlogOp op1): dbid(dbid1),tm(tm1), key(key1), value(value1), op(op1) {} 42 | Status encodeRecord(string* data); 43 | static Status decodeRecord(Slice data, LogRecord* rec); 44 | }; 45 | 46 | struct SlaveStatus { 47 | string host; 48 | int port; 49 | SyncPos pos; 50 | time_t lastSaved; 51 | bool changed; 52 | SlaveStatus():port(-1), lastSaved(time(NULL)), changed(0) {} 53 | bool isValid() { return pos.offset != -1; } 54 | }; 55 | 56 | struct LogDb: public mutex { 57 | LogDb():dbid_(-1), binlogSize_(0), lastFile_(0), curLog_(NULL), db_(NULL) { } 58 | Status init(Conf& conf); 59 | leveldb::DB* getdb() { return db_; } 60 | Status write(Slice key, Slice value); 61 | Status remove(Slice key); 62 | Status applyLog(Slice record); 63 | ~LogDb(); 64 | vector removeSlaveConnsLock() { lock_guard lk(*this); return move(slaveConns_); } 65 | SlaveStatus getSlaveStatusLock() { lock_guard lk(*this); return slaveStatus_; } 66 | Status updateSlaveStatusLock(SyncPos pos); 67 | Status fetchLogLock(int64_t* fileno, int64_t* offset, string* data, const HttpConnPtr& con); 68 | static Status dumpFile(const string& name); 69 | 70 | 71 | SlaveStatus slaveStatus_; 72 | string binlogDir_, dbdir_; 73 | int dbid_; 74 | int binlogSize_; 75 | int64_t lastFile_; 76 | LogFile* curLog_; 77 | leveldb::DB* db_; 78 | vector slaveConns_; 79 | 80 | Status getLog_(int64_t fileno, int64_t offset, string* rec); 81 | Status saveSlave_(); 82 | Status checkCurLog_(); 83 | Status applyRecord_(LogRecord& rec); 84 | Status operateDb_(LogRecord& rec); 85 | Status operateLog_(Slice data); 86 | Status loadLogs_(); 87 | Status loadSlave_(); 88 | }; 89 | -------------------------------------------------------------------------------- /logfile.cc: -------------------------------------------------------------------------------- 1 | #include "logfile.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | Status LogFile::open(const string& name, bool readonly) { 9 | name_ = name; 10 | Status st; 11 | int flag = O_RDWR|O_APPEND; 12 | if (!readonly) { 13 | flag |= O_CREAT; 14 | } 15 | fd_ = ::open(name.c_str(), flag, 0622); 16 | if (fd_ < 0) { 17 | st = Status::ioError("open", name); 18 | error("%s", st.toString().c_str()); 19 | } 20 | if (!readonly) { 21 | info("open logfile %s %s", name.c_str(), st.toString().c_str()); 22 | } 23 | return Status(); 24 | } 25 | 26 | Status LogFile::append(Slice record) { 27 | int padded = totalLen(record.size()); 28 | char* p = new char[padded]; 29 | memset(p, 0, padded); 30 | unique_ptr rel1(p); 31 | memcpy(p, &LOG_MAGIC, 8); 32 | *(size_t*)(p+8) = record.size(); 33 | memcpy(p+16, record.data(), record.size()); 34 | int w = ::write(fd_, p, padded); 35 | if (w != padded) { 36 | Status st = Status::ioError("write", name_); 37 | error("%s", st.toString().c_str()); 38 | return st; 39 | } 40 | return Status(); 41 | } 42 | 43 | Status LogFile::getRecord(int64_t* offset, Slice* data, string* scrach) { 44 | int64_t head[2] = {0, 0} ; 45 | *data = Slice(); 46 | int r = pread(fd_, head, 16, *offset); 47 | if (r == 0) { 48 | return Status(); 49 | } 50 | int64_t magic = head[0], len = head[1]; 51 | int r2 = -1; 52 | if (r == 16 && magic == LOG_MAGIC) { 53 | int padded = totalLen(len); 54 | scrach->resize(padded-16); 55 | char* p = (char*)scrach->c_str(); 56 | r2 = pread(fd_, p, padded-16, *offset+16); 57 | if (r2 == padded-16) { 58 | *data = Slice(p, len); 59 | *offset += padded; 60 | return Status(); 61 | } 62 | } 63 | Status st = Status::fromFormat(EINVAL, "getrecord error r %d r2 %d len %ld magic %lx off %ld errno %d %s", 64 | r, r2, len, magic, *offset, errno, errstr()); 65 | error("%s", st.toString().c_str()); 66 | return st; 67 | } 68 | 69 | Status LogFile::sync() { 70 | int r = fsync(fd_); 71 | if (r<0) { 72 | Status st = Status::ioError("fsync", name_); 73 | error("%s", st.toString().c_str()); 74 | return st; 75 | } 76 | return Status(); 77 | } 78 | 79 | Status LogFile::batchRecord(int64_t offset, string* rec, int batchSize) { 80 | char* p = new char[batchSize]; 81 | unique_ptr rel1(p); 82 | int r = pread(fd_, p, batchSize, offset); 83 | Status st; 84 | if (r < 0) { 85 | st = Status::ioError("pread", name_); 86 | error("logfile batchRecord %s", st.toString().c_str()); 87 | return st; 88 | } 89 | if (r == 0) { 90 | return Status(); 91 | } 92 | char* pe = p + r; 93 | char* pb = p; 94 | int64_t magic = 0; 95 | int64_t len = 0; 96 | while (pb + 16 <= pe) { 97 | magic = *(int64_t*)pb; 98 | len = *(int64_t*)(pb+8); 99 | if (magic != LOG_MAGIC || len < 0) { 100 | error("logfile bad format magic %lx len %ld at %s %ld", 101 | magic, len, name_.c_str(), offset+pb-p); 102 | return Status::fromFormat(EINVAL, "bad format log file %s", name_.c_str()); 103 | } 104 | int64_t tlen = totalLen(len); 105 | if (pb + tlen > pe) { 106 | break; 107 | } 108 | pb += tlen; 109 | } 110 | if (pb == p) { 111 | error("log record invalid. readed %ld len %ld batch_size %d", pe-p, len, batchSize); 112 | return Status::fromFormat(EINVAL, "bad format"); 113 | } 114 | rec->clear(); 115 | rec->append(p, pb); 116 | return Status(); 117 | } 118 | 119 | Status LogFile::decodeBinlogData(Slice* fileCont, Slice* record) { 120 | Status inval = Status::fromFormat(EINVAL, "bad format for binlog resp"); 121 | if (fileCont->empty()) { 122 | error("empty fileCont"); 123 | return inval; 124 | } 125 | int64_t magic = *(int64_t*)fileCont->begin(); 126 | if (magic != LOG_MAGIC) { 127 | error("bad magic no in binlog data"); 128 | return inval; 129 | } 130 | int64_t len = *(int64_t*)(fileCont->begin()+8); 131 | int64_t tlen = totalLen(len); 132 | 133 | if (fileCont->begin()+tlen > fileCont->end()) { 134 | error("bad length in binlog resp tlen %ld body %ld", tlen, fileCont->size()); 135 | return inval; 136 | } 137 | *record = Slice(fileCont->begin()+16, len); 138 | *fileCont = Slice(fileCont->begin()+tlen, fileCont->end()); 139 | return Status(); 140 | } 141 | 142 | -------------------------------------------------------------------------------- /binlog-msg.cc: -------------------------------------------------------------------------------- 1 | #include "binlog-msg.h" 2 | #include "handler.h" 3 | 4 | void handleBinlog(LogDb* db, EventBase* base, const HttpConnPtr& con) { 5 | HttpRequest& req = con.getRequest(); 6 | string sf = req.getArg("f"); 7 | string soff = req.getArg("off"); 8 | if (sf.empty() || soff.empty()) { 9 | error("empty arg f or off"); 10 | con->close(); 11 | return; 12 | } 13 | SyncPos pos; 14 | pos.fileno = util::atoi(sf.c_str()); 15 | pos.offset = util::atoi(soff.c_str()); 16 | HttpResponse& resp = con.getResponse(); 17 | resp.headers["req-info"] = pos.toString(); 18 | SyncPos npos = pos; 19 | Status st = db->fetchLogLock(&npos.fileno, &npos.offset, &resp.body, con); 20 | if (!st.ok()) { 21 | con.getResponse().setStatus(500, st.toString()); 22 | base->safeCall([con]{con.sendResponse(); }); 23 | return; 24 | } else if (pos.fileno == npos.fileno && pos.offset == npos.offset) { 25 | return; 26 | } 27 | resp.headers["next-info"] = npos.toString(); 28 | info("binlog response req-info '%s' next-info '%s' body len %ld", 29 | resp.getHeader("req-info").c_str(), resp.getHeader("next-info").c_str(), resp.body.size()); 30 | base->safeCall([con]{con.sendResponse(); }); 31 | } 32 | 33 | void addBinlogHeader(Slice bkey, Slice ekey, HttpRequest& req, HttpResponse& resp) { 34 | string reqinfo = req.getHeader("req-info"); 35 | if (reqinfo.size()) { 36 | resp.headers["req-info"] = reqinfo; 37 | SyncPos pos; 38 | bool r = pos.fromString(reqinfo, ' '); 39 | if (!r) { 40 | error("sync pos decode failed %s", reqinfo.c_str()); 41 | return; 42 | } 43 | if (ekey.empty()) { 44 | pos.key.clear(); 45 | pos.dataFinished = 1; 46 | } else { 47 | pos.key = ekey; 48 | } 49 | resp.headers["next-info"] = pos.toString(); 50 | } 51 | } 52 | 53 | void sendEmptyBinlog(EventBase* base, LogDb* db) { 54 | vector conns = db->removeSlaveConnsLock(); 55 | for (auto& con: conns) { 56 | HttpResponse& resp = con.getResponse(); 57 | resp.headers["next-info"] = resp.headers["req-info"]; 58 | info("binlog response %s empty resp", resp.getHeader("req-info").c_str()); 59 | con.sendResponse(); 60 | } 61 | } 62 | 63 | void sendSyncReq(LogDb* db, EventBase* base, const HttpConnPtr& con) { 64 | SlaveStatus ss = db->getSlaveStatusLock(); 65 | HttpRequest& req = con.getRequest(); 66 | req.headers["req-info"] = ss.pos.toString(); 67 | if (!ss.pos.dataFinished) { 68 | req.query_uri = "/range-get/" + ss.pos.key; 69 | } else { 70 | req.query_uri = util::format("/binlog/?f=%05ld&off=%ld", ss.pos.fileno, ss.pos.offset); 71 | } 72 | debug("geting %s", req.query_uri.c_str()); 73 | base->safeCall([con] { con.sendRequest();}); 74 | } 75 | 76 | void processSyncResp(LogDb* db, const HttpConnPtr& con, EventBase* base) { 77 | bool isError = true; 78 | ExitCaller atend([&]{ if (isError) con->close(); else sendSyncReq(db, base, con); }); 79 | 80 | HttpResponse& res = con.getResponse(); 81 | if (res.status != 200) { 82 | error("response error. code %d", res.status); 83 | return; 84 | } 85 | Slice body = res.getBody(); 86 | string reqinfo = res.getHeader("req-info"); 87 | SyncPos pos; 88 | bool r = pos.fromString(reqinfo, ' '); 89 | if (!r) { 90 | error("unexpected header req-info '%s'", reqinfo.data()); 91 | return; 92 | } 93 | SlaveStatus ss = db->getSlaveStatusLock(); 94 | if (pos != ss.pos) { 95 | error("header req-info '%s' not match slave status '%s'", 96 | pos.toString().c_str(), ss.pos.toString().c_str()); 97 | return; 98 | } 99 | 100 | Status st; 101 | if (pos.dataFinished == 0) { //range-get resp 102 | Slice key, value; 103 | bool exist; 104 | while (body.size() && (st=decodeKvBody(&body, &key, &value, &exist), st.ok())) { 105 | st = db->write(key, value); 106 | if (!st.ok()) { 107 | break; 108 | } 109 | } 110 | } else { //binlog resp 111 | Slice record; 112 | while (body.size() && (st=LogFile::decodeBinlogData(&body, &record), st.ok())) { 113 | st = db->applyLog(record); 114 | if (!st.ok()) { 115 | break; 116 | } 117 | } 118 | } 119 | if (st.ok()) { 120 | string nextinfo = res.getHeader("next-info"); 121 | SyncPos pos; 122 | bool r = pos.fromString(nextinfo, ' '); 123 | if (!r) { 124 | error("unexpected header next-info %s", nextinfo.c_str()); 125 | con->close(); 126 | return; 127 | } else { 128 | st = db->updateSlaveStatusLock(pos); 129 | } 130 | } 131 | isError = false; 132 | } 133 | 134 | 135 | -------------------------------------------------------------------------------- /leveldbd.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "handler.h" 5 | #include 6 | #include 7 | #include "globals.h" 8 | #include "binlog-msg.h" 9 | 10 | void setupStatServer(StatServer& svr, EventBase& base, LogDb* db, const char* argv[]); 11 | void handleHttpReq(EventBase& base, LogDb* db, const HttpConnPtr& con, ThreadPool& rpool, ThreadPool& wpool); 12 | void processArgs(int argc, const char* argv[], Conf& conf); 13 | void httpConnectTo(ThreadPool* wpool, LogDb* db, EventBase* base, const string& ip, int port); 14 | 15 | int main(int argc, const char* argv[]) { 16 | string program = argv[0]; 17 | processArgs(argc, argv, g_conf); 18 | 19 | //setup log 20 | string logfile = g_conf.get("", "logfile", program+".log"); 21 | if (logfile.size()) { 22 | Logger::getLogger().setFileName(logfile.c_str()); 23 | } 24 | string loglevel = g_conf.get("", "loglevel", "INFO"); 25 | Logger::getLogger().setLogLevel(loglevel); 26 | 27 | info("program begin. loglevel %s", loglevel.c_str()); 28 | //setup thread pool 29 | ThreadPool readPool(g_conf.getInteger("", "read_threads", 8)); 30 | ThreadPool writePool(1); 31 | 32 | //setup db 33 | setGlobalConfig(g_conf); 34 | LogDb db; 35 | Status st = db.init(g_conf); 36 | fatalif(!st.ok(), "LogDb init failed. %s", st.msg()); 37 | 38 | //setup network 39 | string ip = g_conf.get("", "bind", ""); 40 | int port = g_conf.getInteger("", "port", 80); 41 | int stat_port = g_conf.getInteger("", "stat_port", 8080); 42 | EventBase base(1000); 43 | HttpServer leveldbd(&base); 44 | int r = leveldbd.bind(ip, port); 45 | exitif(r, "bind failed %d %s", errno, strerror(errno)); 46 | StatServer statsvr(&base); 47 | r = statsvr.bind(ip, stat_port); 48 | exitif(r, "bind failed %d %s", errno, strerror(errno)); 49 | leveldbd.onDefault([&](const HttpConnPtr& con) { 50 | handleHttpReq(base, &db, con, readPool, writePool); 51 | }); 52 | base.runAfter(3000, [&]{ sendEmptyBinlog(&base, &db); }, 5000); 53 | setupStatServer(statsvr, base, &db, argv); 54 | 55 | if (db.slaveStatus_.isValid()) { 56 | httpConnectTo(&writePool, &db, &base, db.slaveStatus_.host, db.slaveStatus_.port); 57 | } 58 | Signal::signal(SIGINT, [&]{base.exit(); }); 59 | base.loop(); 60 | readPool.exit().join(); 61 | writePool.exit().join(); 62 | return 0; 63 | } 64 | 65 | void handleHttpReq(EventBase& base, LogDb* db, const HttpConnPtr& con, ThreadPool& rpool, ThreadPool& wpool){ 66 | HttpRequest& req = con.getRequest(); 67 | ThreadPool* pool = req.method == "GET" && !Slice(req.uri).starts_with("/nav-")? &rpool: &wpool; 68 | pool->addTask([=, &base] { handleReq(base, db, con); }); 69 | } 70 | 71 | void httpConnectTo(ThreadPool* wpool, LogDb* db, EventBase* base, const string& ip, int port) { 72 | HttpConnPtr con = TcpConn::createConnection(base, ip, port, 200); 73 | con->onState([=](const TcpConnPtr& con) { 74 | TcpConn::State st = con->getState(); 75 | HttpConnPtr hcon = con; 76 | if (st == TcpConn::Connected) { 77 | wpool->addTask([=]{sendSyncReq(db, base, con); }); 78 | } else if (st == TcpConn::Failed || st == TcpConn::Closed) { 79 | base->runAfter(3000, [=]{ httpConnectTo(wpool, db, base, ip, port); }); 80 | } 81 | }); 82 | 83 | con.onHttpMsg([=](const HttpConnPtr& hcon) { 84 | wpool->addTask([=]{ 85 | processSyncResp(db, hcon, base); 86 | }); 87 | }); 88 | } 89 | 90 | void processArgs(int argc, const char* argv[], Conf& g_conf){ 91 | string config = argv[0] + string(".conf"); 92 | const char* usage = "usage: %s [-f config_file] [start|stop|restart]\n"; 93 | char* const* gv = (char* const*)argv; 94 | for (int ch=0; (ch=getopt(argc, gv, "f:h"))!= -1;) { 95 | switch(ch) { 96 | case 'f': 97 | config = optarg; 98 | break; 99 | case 'h': 100 | printf(usage, argv[0]); 101 | exit(0); 102 | break; 103 | default: 104 | printf("unknown option %c\n", ch); 105 | printf(usage, argv[0]); 106 | exit(1); 107 | } 108 | } 109 | string cmd = "start"; 110 | if (argc > optind) { 111 | cmd = argv[optind]; 112 | } 113 | if (argc > optind + 1 || (cmd != "start" && cmd != "stop" && cmd != "restart")) { 114 | printf(usage, argv[0]); 115 | exit(1); 116 | } 117 | int r = g_conf.parse(config.c_str()); 118 | if (r != 0) { 119 | printf("config %s parse error at line %d", config.c_str(), r); 120 | exit(1); 121 | } 122 | string pidfile = g_conf.get("", "pidfile", argv[0]+(string)".pid"); 123 | if (g_conf.getBoolean("", "daemon", true)) { 124 | Daemon::daemonProcess(cmd.c_str(), pidfile.c_str()); 125 | } 126 | } 127 | 128 | void setupStatServer(StatServer& svr, EventBase& base, LogDb* db, const char* argv[]) { 129 | svr.onState("loglevel", "log level for server", []{return Logger::getLogger().getLogLevelStr(); }); 130 | svr.onState("pid", "process id of server", [] { return getpid(); }); 131 | svr.onState("space", "total space of db kB", [db] { return getSize("/", "=", db->getdb())/1024; }); 132 | svr.onState("dbid", "dbid of this db", [db] { return db->dbid_; }); 133 | svr.onState("binlog-file", "current binlog file no of this db", [db] { return db->lastFile_; }); 134 | svr.onState("binlog-offset", "current binlog file offset", [db] { 135 | size_t sz = 0; 136 | Status st = file::getFileSize(db->binlogDir_+FileName::binlogFile(db->lastFile_), &sz); 137 | return sz; 138 | }); 139 | svr.onState("slave-current-key", "slave key of this db", [db] { return db->getSlaveStatusLock().pos.key; }); 140 | svr.onState("slave-file", "slave file of this db", [db] { return db->getSlaveStatusLock().pos.fileno; }); 141 | svr.onState("slave-offset", "slave offset of this db", [db] { return db->getSlaveStatusLock().pos.offset; }); 142 | svr.onCmd("lesslog", "set log to less detail", []{ Logger::getLogger().adjustLogLevel(-1); return "OK"; }); 143 | svr.onCmd("morelog", "set log to more detail", [] { Logger::getLogger().adjustLogLevel(1); return "OK"; }); 144 | svr.onCmd("restart", "restart program", [&] { 145 | base.safeCall([&]{ base.exit(); Daemon::changeTo(argv);}); 146 | return "restarting"; 147 | }); 148 | svr.onCmd("stop", "stop program", [&] { base.safeCall([&]{base.exit();}); return "stoping"; }); 149 | svr.onPageFile("config", "show config file", g_conf.filename); 150 | svr.onPageFile("help", "show help", g_conf.get("", "help_file", "README")); 151 | } 152 | 153 | -------------------------------------------------------------------------------- /handler.cc: -------------------------------------------------------------------------------- 1 | #include "handler.h" 2 | #include "binlog-msg.h" 3 | 4 | void addKvBody(Slice key, const Slice* value, string* body) { 5 | body->append(key.data(), key.size()); 6 | char buf[64]; 7 | int cn = snprintf(buf, sizeof buf, "\n%ld\n", value ? (int64_t)value->size() : -1); 8 | body->append(buf, cn); 9 | body->append(value->data(), value->size()); 10 | body->append("\n"); 11 | 12 | } 13 | 14 | Status decodeKvBody(Slice* body, Slice* key, Slice* value, bool* exists) { 15 | Status inval = Status::fromFormat(EINVAL, "bad format for range resp"); 16 | if (body->empty()) { 17 | error("empty body in decode"); 18 | return inval; 19 | } 20 | for (const char* p = body->begin(); p < body->end(); p++) { 21 | if (*p == '\n') { 22 | *key = Slice(body->begin(), p); 23 | p++; 24 | for (const char* pe = p; pe < body->end(); pe++) { 25 | if (*pe == '\n') { 26 | int64_t len = util::atoi(p, pe); 27 | pe++; 28 | if (pe + len > body->end()) { 29 | error("bad format for range resp"); 30 | return inval; 31 | } 32 | if (len == -1) { 33 | *exists = false; 34 | } else { 35 | *exists = true; 36 | *value = Slice(pe, pe+len); 37 | *body = Slice(pe+len+1, body->end()); 38 | } 39 | return Status(); 40 | } 41 | } 42 | } 43 | } 44 | error("bad format in range format, no line feed"); 45 | return inval; 46 | } 47 | 48 | void addKeyBody(Slice key, string* body) { 49 | body->append(key.data(), key.size()); 50 | *body += '\n'; 51 | } 52 | 53 | Status decodeKeyBody(Slice* body, Slice* key) { 54 | Status inval = Status::fromFormat(EINVAL, "bad format for range resp"); 55 | if (body->empty()) { 56 | error("empty body in decode"); 57 | return inval; 58 | } 59 | for (const char* p = body->begin(); p < body->end(); p++) { 60 | if (*p == '\n') { 61 | *key = Slice(body->begin(), p); 62 | *body = Slice(p+1, body->end()); 63 | return Status(); 64 | } 65 | } 66 | error("bad format in range format, no line feed"); 67 | return inval; 68 | } 69 | 70 | static void handleBatchGet(LogDb* db, HttpRequest& req, HttpResponse& resp) { 71 | Slice key; 72 | Status st; 73 | leveldb::DB* ldb = db->getdb(); 74 | Slice body = req.getBody(); 75 | while (body.size() && st.ok() && (st=decodeKeyBody(&body, &key), st.ok())) { 76 | string value; 77 | leveldb::Status s = ldb->Get(leveldb::ReadOptions(), convSlice(key), &value); 78 | if (s.ok()) { 79 | Slice v(value); 80 | addKvBody(key, &v, &resp.body); 81 | } else if (s.IsNotFound()) { 82 | addKvBody(key, NULL, &resp.body); 83 | } else { 84 | error("ldb error: %s", s.ToString().c_str()); 85 | st = (ConvertStatus)s; 86 | } 87 | } 88 | if (!st.ok()) { 89 | resp.setStatus(500, "Internal Error"); 90 | } 91 | } 92 | 93 | static void handleBatchSet(LogDb* db, HttpRequest& req, HttpResponse& resp) { 94 | Slice key, value; 95 | Status st; 96 | Slice body = req.getBody(); 97 | bool exists; 98 | while (body.size() && st.ok() && (st=decodeKvBody(&body, &key, &value, &exists), st.ok())) { 99 | st = db->write(key, value); 100 | } 101 | if (!st.ok()) { 102 | resp.setStatus(500, "Internal Error"); 103 | } 104 | } 105 | 106 | static void handleBatchDelete(LogDb* db, HttpRequest& req, HttpResponse& resp) { 107 | Slice key; 108 | Status st; 109 | Slice body = req.getBody(); 110 | while (body.size() && (st=decodeKeyBody(&body, &key), st.ok())) { 111 | st = db->remove(key); 112 | if (!st.ok()) { 113 | break; 114 | } 115 | } 116 | if (!st.ok()) { 117 | resp.setStatus(500, "Internal Error"); 118 | } 119 | } 120 | 121 | static void handleRangeGet(LogDb* db, HttpRequest& req, HttpResponse& resp) { 122 | leveldb::DB* ldb = db->getdb(); 123 | Slice uri = req.uri; 124 | Slice rget = "/range-get/"; 125 | if (!uri.starts_with(rget)) { 126 | resp.setNotFound(); 127 | return; 128 | } 129 | Slice bkey = uri.sub(rget.size()); 130 | Slice ekey = req.getArg("end"); 131 | if (ekey.empty()) { 132 | ekey = "\xff"; 133 | } 134 | bool inc = req.getArg("inc") == "1"; 135 | leveldb::Iterator* it = ldb->NewIterator(leveldb::ReadOptions()); 136 | unique_ptr rel1(it); 137 | int n = 0; 138 | leveldb::Slice lekey = convSlice(ekey); 139 | leveldb::Slice lbkey = convSlice(bkey); 140 | it->Seek(lbkey); 141 | if (!inc && it->Valid() && it->key() == lbkey) { 142 | it->Next(); 143 | } 144 | Slice k1; 145 | for (; it->Valid(); it->Next()) { 146 | if (it->key().compare(lekey) >= 0) { 147 | break; 148 | } 149 | k1 = convSlice(it->key()); 150 | Slice v = convSlice(it->value()); 151 | addKvBody(k1, &v, &resp.body); 152 | if (++n >= g_batch_count || resp.body.size() >= (size_t)g_batch_size) { 153 | break; 154 | } 155 | } 156 | addBinlogHeader(bkey, k1, req, resp); 157 | } 158 | 159 | int64_t getSize(Slice bkey, Slice ekey, leveldb::DB* db) { 160 | leveldb::Range ra; 161 | ra.start = convSlice(bkey); 162 | ra.limit = convSlice(ekey); 163 | uint64_t sz = 0; 164 | db->GetApproximateSizes(&ra, 1, &sz); 165 | return (int64_t)sz; 166 | } 167 | 168 | static void handleNav(leveldb::DB* db, HttpRequest& req, HttpResponse& resp) { 169 | Slice uri = req.uri; 170 | Slice navn = "/nav-next/"; 171 | Slice navp = "/nav-prev/"; 172 | Slice navl = "/nav-prev="; 173 | int n = 0; 174 | leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions()); 175 | unique_ptr rel1(it); 176 | string ln; 177 | resp.body.append("first-page
"); 178 | if (uri.starts_with(navn)){ 179 | Slice pgkey = uri.sub(navn.size()); 180 | ln = util::format("prev-page
", 181 | (int)pgkey.size(), pgkey.data()); 182 | resp.body.append(ln); 183 | Slice key = pgkey; 184 | for (it->Seek(convSlice(pgkey)); it->Valid(); it->Next()) { 185 | key = convSlice(it->key()); 186 | ln = util::format("delete %.*s
", 187 | (int)uri.size(), uri.data(), (int)key.size(), key.data(), 188 | (int)key.size(), key.data(), (int)key.size(), key.data()); 189 | resp.body.append(ln); 190 | if (++n>=g_page_limit) { 191 | break; 192 | } 193 | } 194 | ln = util::format("next-page
", 195 | (int)key.size(), key.data()); 196 | resp.body.append(ln); 197 | } else if (uri.starts_with(navp)) { 198 | Slice pgkey = uri.sub(navp.size()); 199 | vector lns; 200 | ln = util::format("next-page
", 201 | (int)pgkey.size(), pgkey.data()); 202 | lns.push_back(ln); 203 | Slice key = pgkey; 204 | for (it->Seek(convSlice(pgkey)); it->Valid(); it->Prev()) { 205 | key = convSlice(it->key()); 206 | ln = util::format("delete %.*s
", 207 | (int)uri.size(), uri.data(), (int)key.size(), key.data(), 208 | (int)key.size(), key.data(), (int)key.size(), key.data()); 209 | lns.push_back(ln); 210 | if (++n>=g_page_limit) { 211 | break; 212 | } 213 | } 214 | ln = util::format("prev-page
", 215 | (int)key.size(), key.data()); 216 | lns.push_back(ln); 217 | for(auto it = lns.rbegin(); it != lns.rend(); it ++) { 218 | resp.body.append(*it); 219 | } 220 | } else if (uri == navl) { 221 | Slice key; 222 | vector lns; 223 | for (it->SeekToLast(); it->Valid(); it->Prev()) { 224 | key = convSlice(it->key()); 225 | if (lns.empty()) { 226 | ln = util::format("next-page
", 227 | (int)key.size(), key.data()); 228 | lns.push_back(ln); 229 | } 230 | ln = util::format("delete %.*s
", 231 | (int)uri.size(), uri.data(), (int)key.size(), key.data(), 232 | (int)key.size(), key.data(), (int)key.size(), key.data()); 233 | lns.push_back(ln); 234 | if (++n>=g_page_limit) { 235 | break; 236 | } 237 | } 238 | ln = util::format("prev-page
", 239 | (int)key.size(), key.data()); 240 | lns.push_back(ln); 241 | for(auto it = lns.rbegin(); it != lns.rend(); it ++) { 242 | resp.body.append(*it); 243 | } 244 | } else { 245 | resp.setNotFound(); 246 | return; 247 | } 248 | resp.body.append("last-page
"); 249 | } 250 | 251 | static void handleSize(leveldb::DB* db, HttpRequest& req, HttpResponse& resp) { 252 | Slice uri = req.uri; 253 | Slice pre = "/size/"; 254 | Slice bkey = uri.sub(pre.size()); 255 | Slice ekey = req.getArg("end"); 256 | if (ekey.empty()) { 257 | ekey = "\xff"; 258 | } 259 | int64_t sz = getSize(bkey, ekey, db); 260 | resp.body = util::format("%ld", sz); 261 | } 262 | 263 | void handleReq(EventBase& base, LogDb* db, const HttpConnPtr& con) { 264 | HttpRequest& req = con.getRequest(); 265 | Status mst; 266 | HttpResponse& resp = con.getResponse(); 267 | Slice uri = req.uri; 268 | Slice d = "/d/"; 269 | string value; 270 | leveldb::DB* ldb = db->getdb(); 271 | if (uri.starts_with(d)) { 272 | Slice localkey = uri.sub(d.size()); 273 | leveldb::Slice key = convSlice(localkey); 274 | if (key.empty()) { 275 | resp.setStatus(403, "empty key"); 276 | } else if (req.method == "GET") { 277 | leveldb::Status s = ldb->Get(leveldb::ReadOptions(), key, &value); 278 | if (s.ok()) { 279 | resp.body2 = value; 280 | } else if (s.IsNotFound()) { 281 | resp.setNotFound(); 282 | } else { 283 | mst = (ConvertStatus)s; 284 | } 285 | } else if (req.method == "POST") { 286 | mst = db->write(localkey, req.getBody()); 287 | } else if (req.method == "DELETE") { 288 | mst = db->remove(localkey); 289 | } else { 290 | resp.setStatus(403, "unknown method"); 291 | } 292 | if (!mst.ok()) { 293 | resp.setStatus(500, "Internal Error"); 294 | error("%.*s error %s", (int)req.method.size(), req.method.data(), 295 | mst.toString().c_str()); 296 | } 297 | } else if (uri.starts_with("/nav-")){ 298 | string dk = req.getArg("d"); 299 | if (dk.size()) { 300 | mst = db->remove("/"+dk); 301 | } 302 | if (!mst.ok()) { 303 | resp.setStatus(500, "Internal Error"); 304 | } else { 305 | handleNav(ldb, req, resp); 306 | } 307 | } else if (uri.starts_with("/batch-get/")) { 308 | handleBatchGet(db, req, resp); 309 | } else if (uri.starts_with("/batch-set/")) { 310 | handleBatchSet(db, req, resp); 311 | } else if (uri.starts_with("/batch-delete/")) { 312 | handleBatchDelete(db, req, resp); 313 | } else if (uri.starts_with("/range-get/")){ 314 | handleRangeGet(db, req, resp); 315 | } else if (uri.starts_with("/size/")) { 316 | handleSize(ldb, req, resp); 317 | } else if (uri.starts_with("/binlog/")) { 318 | handleBinlog(db, &base, con); 319 | return; 320 | } else { 321 | resp.setNotFound(); 322 | } 323 | info("req %s processed status %d length %lu", 324 | req.query_uri.c_str(), resp.status, resp.getBody().size()); 325 | base.safeCall([con]{ con.sendResponse(); info("resp sended");}); 326 | } 327 | 328 | -------------------------------------------------------------------------------- /logdb.cc: -------------------------------------------------------------------------------- 1 | #include "logdb.h" 2 | #include 3 | #include "handler.h" 4 | #include "binlog-msg.h" 5 | 6 | int64_t FileName::binlogNum(const string& name) { 7 | Slice s1(name); 8 | if (s1.starts_with(binlogPrefix())) { 9 | Slice p1 = s1.sub(binlogPrefix().size()); 10 | return util::atoi(p1.begin()); 11 | } 12 | return 0; 13 | } 14 | 15 | void bin_write(char*& p, const void* v, size_t len) { 16 | memcpy(p, v, len); 17 | p += len; 18 | } 19 | void bin_read(char*& p, void* v, size_t len) { 20 | memcpy(v, p, len); 21 | p += len; 22 | } 23 | 24 | template void bin_writeValue(char*& p, C v) { 25 | return bin_write(p, &v, sizeof(C)); 26 | } 27 | 28 | template C bin_readValue(char*& p) { 29 | C c; 30 | bin_read(p, &c, sizeof(C)); 31 | return c; 32 | } 33 | 34 | Status LogRecord::encodeRecord(string* data){ 35 | data->clear(); 36 | data->resize(4+8+4+4+key.size()+4+value.size()); 37 | char* p = (char*)data->c_str(); 38 | bin_writeValue(p, (int32_t)dbid); 39 | bin_writeValue(p, (int64_t)tm); 40 | bin_writeValue(p, (int32_t)op); 41 | bin_writeValue(p, (int32_t)key.size()); 42 | bin_write(p, key.data(), key.size()); 43 | bin_writeValue(p, (int32_t)value.size()); 44 | bin_write(p, value.data(), value.size()); 45 | assert(p == data->c_str() + data->size()); 46 | return Status(); 47 | } 48 | 49 | Status LogRecord::decodeRecord(Slice data, LogRecord* rec){ 50 | Status es(EINVAL, "record length error"); 51 | char* p = (char*)data.data(); 52 | size_t isz = 4+8+4+4+4; 53 | if (data.size() < isz) { 54 | return es; 55 | } 56 | rec->dbid = bin_readValue(p); 57 | rec->tm = bin_readValue(p); 58 | rec->op = (BinlogOp)bin_readValue(p); 59 | size_t len = bin_readValue(p); 60 | if (data.size() < isz+len) { 61 | return es; 62 | } 63 | rec->key = Slice(p, len); 64 | p += len; 65 | size_t len2 = bin_readValue(p); 66 | if (data.size() != isz+len+len2) { 67 | return es; 68 | } 69 | rec->value = Slice(p, len2); 70 | p += len2; 71 | return Status(); 72 | } 73 | 74 | 75 | Status LogDb::dumpFile(const string& name) { 76 | 77 | LogFile lf; 78 | Status st = lf.open(name); 79 | if (st.ok()) { 80 | Slice rec; 81 | int64_t offset = 0; 82 | string scrach; 83 | LogRecord lr; 84 | int i = 0; 85 | for (;;) { 86 | st = lf.getRecord(&offset, &rec, &scrach); 87 | if (!st.ok() || rec.size() == 0) { 88 | break; 89 | } 90 | st = LogRecord::decodeRecord(rec, &lr); 91 | if (!st.ok()) { 92 | break; 93 | } 94 | printf("record %d: op %s time %ld %s key %.*s value %.*s\n", ++i, 95 | lr.op==BinlogWrite?"WRITE":"DELETE", (long)lr.tm, 96 | util::readableTime(lr.tm).c_str(), 97 | (int)lr.key.size(), lr.key.data(), 98 | (int)lr.value.size(), lr.value.data()); 99 | } 100 | } 101 | return st; 102 | } 103 | 104 | Status LogDb::init(Conf& conf) { 105 | 106 | dbdir_ = conf.get("", "dbdir", "ldbd"); 107 | dbdir_ = addSlash(dbdir_); 108 | Status s = file::createDir(dbdir_); 109 | if (!s.ok() && s.code() != EEXIST) { 110 | error("create dir failed: %s", s.toString().c_str()); 111 | return s; 112 | } 113 | leveldb::Options options; 114 | options.create_if_missing = true; 115 | s = (ConvertStatus)leveldb::DB::Open(options, dbdir_+"ldb", &db_); 116 | fatalif(!s.ok(), "leveldb open failed %s", s.msg()); 117 | 118 | if (s.ok()) { 119 | s = loadSlave_(); 120 | } 121 | binlogSize_ = conf.getInteger("", "binlog_size", 0); 122 | binlogSize_ *= 1024*1024; 123 | if (binlogSize_ == 0) { 124 | return s; 125 | } 126 | binlogDir_ = dbdir_ + "binlog/"; 127 | dbid_ = conf.getInteger("", "dbid", 0); 128 | if (dbid_ <= 0) { 129 | s = Status::fromFormat(EINVAL, "dbid should be set a positive interger when binlog enabled"); 130 | error("%s", s.toString().c_str()); 131 | } 132 | if (s.ok()) { 133 | s = loadLogs_(); 134 | } 135 | return s; 136 | } 137 | 138 | Status LogDb::loadSlave_() { 139 | string filename = dbdir_ + FileName::slaveFile(); 140 | string cont; 141 | Status st = file::getContent(filename, cont); 142 | info("load file %s result %d", filename.c_str(), st.code()); 143 | if (st.code() == ENOENT) { 144 | return Status(); 145 | } 146 | Slice data = cont; 147 | vector lns = data.split('\n'); 148 | size_t c = 0; 149 | if (lns.size() > c) { 150 | slaveStatus_.host = lns[c].eatWord(); 151 | } 152 | if (lns.size() > ++c) { 153 | slaveStatus_.port = atoi(lns[c].data()); 154 | vector lns2; 155 | copy(lns.begin()+2, lns.end(), back_inserter(lns2)); 156 | bool r = slaveStatus_.pos.fromSlices(lns2); 157 | if (r) { 158 | return Status(); 159 | } 160 | } 161 | st = Status::fromFormat(EINVAL, "bad format for slave status"); 162 | error("%s", st.toString().c_str()); 163 | return st; 164 | } 165 | 166 | Status LogDb::loadLogs_() { 167 | file::createDir(binlogDir_); //ignore return value 168 | vector files; 169 | Status s = file::getChildren(binlogDir_, &files); 170 | if (!s.ok()) return s; 171 | vector logs; 172 | for(size_t i = 0; i < files.size(); i ++) { 173 | int64_t n = FileName::binlogNum(files[i]); 174 | if (n) { 175 | logs.push_back(n); 176 | } 177 | } 178 | sort(logs.begin(), logs.end()); 179 | if (logs.size()) { // remove last empty log file 180 | string lastfile = binlogDir_+FileName::binlogFile(logs.back()); 181 | uint64_t sz; 182 | Status s2 = file::getFileSize(lastfile, &sz); 183 | if (s2.ok() && sz == 0) { 184 | logs.pop_back(); 185 | } 186 | } 187 | if (logs.size()) { 188 | lastFile_ = logs.back(); 189 | } 190 | string cfile = dbdir_ + FileName::closedFile().data(); 191 | string cont; 192 | s = file::getContent(cfile, cont); 193 | if (s.code() == ENOENT) { //ignore 194 | s = Status(); 195 | } else if (s.ok() && cont != "1" && lastFile_) { //not elegantly closed, redo last log record 196 | string lastfile = binlogDir_ + FileName::binlogFile(lastFile_); 197 | size_t fsz = 0; 198 | s = file::getFileSize(lastfile, &fsz); 199 | if (!s.ok()) { 200 | return s; 201 | } 202 | LogFile lf; 203 | s = lf.open(lastfile); 204 | if (!s.ok()) { 205 | return s; 206 | } 207 | int64_t offset = 0; 208 | Slice data; 209 | string scrach; 210 | for (;;) { 211 | s = lf.getRecord(&offset, &data, &scrach); 212 | if (!s.ok()) { 213 | return s; 214 | } 215 | if (data.size() == 0) { 216 | error("unexpected end of logfile offset %ld sz %ld dsz %ld ignored", 217 | offset, fsz, data.size()); 218 | break; 219 | } 220 | if (offset == (int64_t)fsz) { 221 | LogRecord lr; 222 | s = LogRecord::decodeRecord(data, &lr); 223 | if (!s.ok()) { 224 | return s; 225 | } 226 | if (lr.op == BinlogWrite) { 227 | s = (ConvertStatus)db_->Put(leveldb::WriteOptions(), convSlice(lr.key), convSlice(lr.value)); 228 | } else if (lr.op == BinlogDelete) { 229 | s = (ConvertStatus)db_->Delete(leveldb::WriteOptions(), convSlice(lr.key)); 230 | } else { 231 | s = Status::fromFormat(EINVAL, "unknown binlogOp %d", lr.op); 232 | error("%s", s.toString().c_str()); 233 | } 234 | break; 235 | } 236 | } 237 | } 238 | if (s.ok()) { 239 | s = file::writeContent(cfile, "0"); 240 | } 241 | checkCurLog_(); 242 | return s; 243 | } 244 | 245 | LogDb::~LogDb() { 246 | if (slaveStatus_.changed) { 247 | saveSlave_(); 248 | } 249 | delete curLog_; 250 | if (binlogDir_.size()) { 251 | file::writeContent(dbdir_ + FileName::closedFile(), "1"); 252 | } 253 | delete db_; 254 | } 255 | 256 | Status LogDb::checkCurLog_() { 257 | Status st; 258 | if (curLog_ && curLog_->size() > binlogSize_) { 259 | st = curLog_->sync(); 260 | if (!st.ok()) { 261 | return st; 262 | } 263 | delete curLog_; 264 | curLog_ = NULL; 265 | } 266 | if (curLog_ == NULL) { 267 | curLog_ = new LogFile(); 268 | st = curLog_->open(binlogDir_+FileName::binlogFile(lastFile_+1), false); 269 | if (st.ok()) { 270 | lastFile_ ++; 271 | } 272 | } 273 | return st; 274 | } 275 | 276 | Status LogDb::write(Slice key, Slice value) { 277 | debug("write %.*s value len %ld", (int)key.size(), key.data(), value.size()); 278 | LogRecord rec(dbid_, time(NULL), key, value, BinlogWrite); 279 | return applyRecord_(rec); 280 | } 281 | 282 | Status LogDb::remove(Slice key) { 283 | debug("remove %.*s", (int)key.size(), key.data()); 284 | LogRecord rec(dbid_, time(NULL), key, "", BinlogDelete); 285 | return applyRecord_(rec); 286 | } 287 | 288 | Status LogDb::applyLog(Slice record) { 289 | LogRecord rec; 290 | Status st = LogRecord::decodeRecord(record, &rec); 291 | debug("applying %d %ld %s %.*s %d", 292 | rec.dbid, rec.tm, strOp(rec.op), (int)rec.key.size(), rec.key.data(), (int)rec.value.size()); 293 | if (!st.ok() || rec.dbid == dbid_) { //ignore if dbid is self 294 | return st; 295 | } 296 | if (binlogDir_.size()) { 297 | st = operateLog_(record); 298 | if (!st.ok()) { 299 | return st; 300 | } 301 | st = operateDb_(rec); 302 | } else { 303 | st = operateDb_(rec); 304 | } 305 | return st; 306 | } 307 | 308 | Status LogDb::applyRecord_(LogRecord& rec) { 309 | Status st; 310 | if (binlogDir_.size()) { 311 | string data; 312 | st = rec.encodeRecord(&data); 313 | if (!st.ok()) { 314 | return st; 315 | } 316 | st = operateLog_(data); 317 | if (!st.ok()) { 318 | return st; 319 | } 320 | st = operateDb_(rec); 321 | } else { 322 | st = operateDb_(rec); 323 | } 324 | return st; 325 | } 326 | 327 | Status LogDb::operateDb_(LogRecord& rec) { 328 | if (rec.op == BinlogWrite) { 329 | return (ConvertStatus)db_->Put(leveldb::WriteOptions(), convSlice(rec.key), convSlice(rec.value)); 330 | } else if (rec.op == BinlogDelete) { 331 | return (ConvertStatus) db_->Delete(leveldb::WriteOptions(), convSlice(rec.key)); 332 | } 333 | Status st = Status::fromFormat(EINVAL, "unknown op in LogRecord %d", rec.op); 334 | error("%s", st.toString().c_str()); 335 | return st; 336 | } 337 | 338 | Status LogDb::operateLog_(Slice data) { 339 | Status s = checkCurLog_(); 340 | if (s.ok()) { 341 | s = curLog_->append(data); 342 | } 343 | vector conns = removeSlaveConnsLock(); 344 | for (auto& con: conns) { 345 | EventBase* base = con->getBase(); 346 | if (base) { 347 | handleBinlog(this, base, con); 348 | } else { 349 | error("connection closed, but sending response in operateLog"); 350 | } 351 | } 352 | return s; 353 | } 354 | 355 | Status LogDb::saveSlave_() { 356 | string cont = util::format("%s #host\n%d #port\n%s", 357 | slaveStatus_.host.c_str(), slaveStatus_.port, slaveStatus_.pos.toLines().c_str()); 358 | string fname = dbdir_ + FileName::slaveFile(); 359 | Status st = file::renameSave(fname, fname+".tmp", cont); 360 | if (!st.ok()) { 361 | error("save slave status failed %s", st.toString().c_str()); 362 | return st; 363 | } 364 | info("save slave staus ok '%s'", slaveStatus_.pos.toString().c_str()); 365 | slaveStatus_.changed = false; 366 | slaveStatus_.lastSaved = time(NULL); 367 | return Status(); 368 | } 369 | 370 | Status LogDb::fetchLogLock(int64_t* fileno, int64_t* offset, string* data, const HttpConnPtr& con) { 371 | if (binlogDir_.empty()) { 372 | return Status::fromFormat(EINVAL, "binlog dir empty"); 373 | } 374 | lock_guard lk(*this); 375 | if (*fileno == lastFile_ && *offset == curLog_->size()) { 376 | slaveConns_.push_back(con); 377 | return Status(); 378 | } 379 | if (*fileno > lastFile_ || (*fileno == lastFile_ && *offset > curLog_->size())) { 380 | error("qfile %ld qoff %ld larger than lastfile %ld off %ld while curlog==NULL", 381 | *fileno, *offset, lastFile_, curLog_->size()); 382 | return Status::fromFormat(EINVAL, "file offset not valid"); 383 | } 384 | Status st = getLog_(*fileno, *offset, data); 385 | if (!st.ok()) { //error 386 | error("db get log failed"); 387 | return st; 388 | } 389 | if (data->empty()) { 390 | ++*fileno; 391 | *offset = 0; 392 | } else { 393 | *offset += data->size(); 394 | } 395 | return Status(); 396 | } 397 | 398 | Status LogDb::getLog_(int64_t fileno, int64_t offset, string* rec) { 399 | LogFile nf; 400 | LogFile* lf = NULL; 401 | Status st; 402 | if (curLog_ && lastFile_ == fileno) { 403 | lf = curLog_; 404 | } else { 405 | lf = &nf; 406 | st = nf.open(binlogDir_+FileName::binlogFile(fileno)); 407 | } 408 | if (st.ok()) { 409 | st = lf->batchRecord(offset, rec, g_batch_size); 410 | } 411 | return st; 412 | } 413 | 414 | Status LogDb::updateSlaveStatusLock(SyncPos pos) { 415 | lock_guard lk(*this); 416 | SlaveStatus& ss = slaveStatus_; 417 | if (pos != ss.pos) { 418 | ss.pos = pos; 419 | ss.changed = true; 420 | time_t now = time(NULL); 421 | if (now - ss.lastSaved > g_flush_slave_interval) { 422 | return saveSlave_(); 423 | } 424 | } 425 | return Status(); 426 | } --------------------------------------------------------------------------------