├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── ducktime.cc └── notes.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 berthubert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -O3 -Wall -ggdb 2 | 3 | DUCKINCLUDE=/home/ahu/git/duckdb/src/include 4 | DUCKLIBS=/home/ahu/git/duckdb/build/release/src 5 | 6 | 7 | CXXFLAGS:= -std=gnu++17 -Wall -O3 -ggdb -MMD -MP -fno-omit-frame-pointer -IIext/CLI11 \ 8 | -I${DUCKINCLUDE} 9 | 10 | 11 | # CXXFLAGS += -Wno-delete-non-virtual-dtor 12 | 13 | PROGRAMS = ducktime 14 | all: ${PROGRAMS} 15 | 16 | -include *.d 17 | 18 | clean: 19 | rm -f *~ *.o *.d 20 | 21 | ducktime: ducktime.o 22 | $(CXX) -std=gnu++17 $^ -o $@ -pthread -Wl,-rpath=${DUCKLIBS} ${DUCKLIBS}/libduckdb.so 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ducktime 2 | a toy duckdb based timeseries database 3 | 4 | Currently the 'ducktime' binary does nothing but fill a table with somewhat 5 | plausible values. 6 | 7 | Goal of this program is to figure out how DuckDB behaves under various 8 | circumstances, and how to best feed it data. 9 | 10 | To compile, edit Makefile with the location of your DuckDB tree, and run 11 | 'make'. 12 | 13 | To test, run: `./ducktime duckfilename 10000000`, and it will stream 10 14 | million rows into `duckfilename`. The data simulates some typical timeseries 15 | data. 16 | 17 | Status & Features 18 | ----------------- 19 | This is a toy. It might grow into something or not. Current features: 20 | 21 | * Schema adjusted automatically if you add new fields 22 | * Native storage of integers, doubles, strings 23 | * Uses the DuckDB "Bulk" API 24 | 25 | I'm still exploring the best use of transactions and the Appender API to get the smoothest experience. 26 | -------------------------------------------------------------------------------- /ducktime.cc: -------------------------------------------------------------------------------- 1 | #include "duckdb.hpp" 2 | #include "duckdb/main/appender.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | 11 | /** Class which you can give a DuckDB name and you can then 12 | stream data to it. Will create fields as needed */ 13 | class DuckTime 14 | { 15 | public: 16 | explicit DuckTime(string_view fname) : d_db(&fname[0]), d_con(d_db) 17 | { 18 | d_con.BeginTransaction(); 19 | } 20 | 21 | ~DuckTime() 22 | { 23 | cerr<<"Closing appender"<Close(); 26 | cerr<<"Committing transaction"<Flush(); 35 | } 36 | typedef std::variant var_t; 37 | //! store a datum. Tags could be indexed later. 38 | void addValue(const std::vector>& tags, std::string name, const initializer_list>& values, double t); 39 | 40 | private: 41 | // for each table, the known types 42 | std::map > > d_types; 43 | duckdb::DuckDB d_db; 44 | duckdb::Connection d_con; 45 | std::unique_ptr d_appender; 46 | }; 47 | using namespace duckdb; 48 | 49 | //! Get field names and types from a table 50 | vector > getSchema(Connection& con, string_view table) 51 | { 52 | auto stmt = con.Prepare("SELECT column_name, data_type FROM information_schema_columns() WHERE table_schema='main' AND table_name=? ORDER BY ordinal_position"); 53 | if(!stmt->success) { 54 | throw runtime_error("Unable to prepare query for schema retrieval: "+stmt->error); 55 | } 56 | cerr<<"Get schema prepare done, executing it for '"<Execute(&table[0]); // if you pass a string, it doesn't work? 58 | 59 | if(!res->success) { 60 | throw runtime_error("Unable to retrieve schema: "+stmt->error); 61 | } 62 | 63 | vector> ret; 64 | for(const auto& row : *res) { 65 | ret.push_back({row.GetValue(0), row.GetValue(1)}); 66 | } 67 | return ret; 68 | } 69 | 70 | 71 | //! Add a column to a atable with a certain type 72 | void addColumn(Connection& con, string_view table, string_view name, string_view type) 73 | { 74 | // SECURITY PROBLEM - somehow we can't do prepared statements here 75 | auto stmt = con.Prepare("ALTER table "+string(table)+" add column \""+string(name)+ "\" "+string(type)); 76 | 77 | // would love to do this, but we can't: 78 | //auto stmt = con.Prepare("ALTER table ? add column ? ?"); 79 | if(!stmt->success) { 80 | throw std::runtime_error("Error preparing statement: "+stmt->error); 81 | } 82 | auto res = stmt->Execute(); 83 | if(!res->success) { 84 | throw std::runtime_error("Error executing statement: "+res->error); 85 | return; 86 | } 87 | } 88 | 89 | 90 | 91 | void DuckTime::addValue(const std::vector>& tags, std::string name, const initializer_list>& values, double tstamp) 92 | { 93 | auto& types = d_types[name]; 94 | if(types.empty()) { 95 | cerr<<"Have no type information for table \""<success) { 100 | throw std::runtime_error("Failed to create table '"+name+"': "+res->error); 101 | } 102 | 103 | } 104 | } 105 | bool addedSomething=false; 106 | // make sure all columns exist for the 'values' 107 | for(const auto& v : values) { 108 | if(auto iter = find_if(types.begin(), types.end(), [&v](const auto& a) { return a.first == v.first;} ); iter == types.end()) { 109 | cerr<<"Adding column "<(&v.second)) 119 | addColumn(d_con, name, v.first, "DOUBLE"); 120 | if(std::get_if(&v.second)) 121 | addColumn(d_con, name, v.first, "TEXT"); 122 | else 123 | addColumn(d_con, name, v.first, "INT64"); 124 | addedSomething=true; 125 | } 126 | } 127 | 128 | 129 | if(addedSomething) { 130 | cerr<<"Rereading schema because we added something"<(d_con, name.c_str()); 139 | } 140 | 141 | d_appender->BeginRow(); 142 | // these are all the fields this table has, each of them needs to be in the Appender 143 | for(const auto& t : types) { 144 | bool appended=false; 145 | if(t.first == "timestamp") { 146 | d_appender->Append((int64_t)tstamp); 147 | continue; 148 | } 149 | // consult the values 150 | for(const auto& v : values) { 151 | if(t.first == v.first) { 152 | std::visit([this](auto&& arg) { 153 | using T = std::decay_t; 154 | // cerr<<" Appending "<Append(arg); 156 | }, v.second); 157 | appended=true; 158 | break; 159 | } 160 | } 161 | // consult the tags 162 | for(const auto& v : tags) { 163 | if(t.first == v.first) { 164 | std::visit([this](auto&& arg) { 165 | using T = std::decay_t; 166 | // cerr<<" Appending TAG "<Append(arg); 168 | }, v.second); 169 | appended=true; 170 | break; 171 | } 172 | } 173 | 174 | 175 | if(!appended) { 176 | cerr<<"Did not get any data for column "<Append(nullptr); 178 | } 179 | } 180 | d_appender->EndRow(); 181 | 182 | } 183 | 184 | 185 | int main(int argc, char** argv) 186 | try 187 | { 188 | if(argc != 3) { 189 | cerr<<"Syntax: ducktime duckdbname number\nAdd 'number' items to the duckdbname database\n"; 190 | return EXIT_FAILURE; 191 | } 192 | DuckTime dt(argv[1]); 193 | int limit = atoi(argv[2]); 194 | struct timeval tv; 195 | for(int n=0; n < limit; ++n) { 196 | double t; 197 | gettimeofday(&tv, 0); 198 | t= tv.tv_sec * 1000 + tv.tv_usec/1000.0; 199 | dt.addValue({{"server", n % 16}}, "network", {{"in", n*1234}, {"out", n*321}}, t); 200 | 201 | if(!(n% (1<<20))) { 202 | cerr<<"Cycle time: " << 100.0*n/limit << "%"<