├── .travis.yml ├── Makefile ├── dsv_filter.hpp ├── dsv_filter_example.cpp ├── exprtk.hpp ├── readme.txt ├── strtk.hpp └── world_data.txt /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | 3 | language: cpp 4 | 5 | sudo: required 6 | 7 | compiler: 8 | - gcc 9 | 10 | script: 11 | - make clean all 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # ****************************************************************** 3 | # * Delimiter Seperated Values Filter Library * 4 | # * * 5 | # * Author: Arash Partow (2004) * 6 | # * URL: http://www.partow.net/programming/dsvfilter/index.html * 7 | # * * 8 | # * Copyright notice: * 9 | # * Free use of the Delimiter Seperated Values Filter Library is * 10 | # * permitted under the guidelines of the MIT License. * 11 | # * http://www.opensource.org/licenses/MIT * 12 | # * * 13 | # ****************************************************************** 14 | # 15 | 16 | 17 | COMPILER = -c++ 18 | #COMPILER = -clang 19 | OPTIMIZATION_OPT = -O3 20 | BASE_OPTIONS = -ansi -pedantic-errors -Wall -Wextra -Werror -Wno-long-long 21 | #BASE_OPTIONS = -ansi -pedantic-errors -Wall -Wextra -Werror -Wno-long-long -Ddsv_filter_use_mmap 22 | OPTIONS = $(BASE_OPTIONS) $(OPTIMIZATION_OPT) -o 23 | LINKER_OPT = -L/usr/lib -lstdc++ -lm 24 | #LINKER_OPT = -L/usr/lib -lstdc++ -lboost_iostreams -lm 25 | 26 | BUILD_LIST+=dsv_filter_example 27 | 28 | all: $(BUILD_LIST) 29 | 30 | dsv_filter_example: dsv_filter_example.cpp dsv_filter.hpp exprtk.hpp strtk.hpp 31 | $(COMPILER) $(OPTIONS) dsv_filter_example dsv_filter_example.cpp $(LINKER_OPT) 32 | 33 | pgo: dsv_filter_example.cpp dsv_filter.hpp exprtk.hpp strtk.hpp 34 | $(COMPILER) $(BASE_OPTIONS) -O3 -march=native -fprofile-generate -o dsv_filter_example dsv_filter_example.cpp $(LINKER_OPT) 35 | ./dsv_filter_example 36 | $(COMPILER) $(BASE_OPTIONS) -O3 -march=native -fprofile-use -o dsv_filter_example dsv_filter_example.cpp $(LINKER_OPT) 37 | 38 | strip_bin: 39 | strip -s dsv_filter_example 40 | 41 | valgrind_check: 42 | valgrind --leak-check=full --show-reachable=yes --track-origins=yes ./dsv_filter_example 43 | 44 | clean: 45 | rm -f core.* *~ *.o *.bak *stackdump gmon.out *.gcda *.gcno *.gcnor *.gch 46 | -------------------------------------------------------------------------------- /dsv_filter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ****************************************************************** 3 | * Delimiter Seperated Values Filter Library * 4 | * * 5 | * Author: Arash Partow (2004) * 6 | * URL: http://www.partow.net/programming/dsvfilter/index.html * 7 | * * 8 | * Copyright notice: * 9 | * Free use of the Delimiter Seperated Values Filter Library is * 10 | * permitted under the guidelines of the MIT License. * 11 | * http://www.opensource.org/licenses/MIT * 12 | * * 13 | ****************************************************************** 14 | */ 15 | 16 | 17 | #ifndef INCLUDE_DSV_FILTER_HPP 18 | #define INCLUDE_DSV_FILTER_HPP 19 | 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #define strtk_no_tr1_or_boost 26 | 27 | #include "exprtk.hpp" 28 | #include "strtk.hpp" 29 | 30 | 31 | #ifdef dsv_filter_use_mmap 32 | #include 33 | #endif 34 | 35 | 36 | class dsv_filter 37 | { 38 | public: 39 | 40 | struct column_properties 41 | { 42 | enum column_type 43 | { 44 | e_none = 0, 45 | e_string = 1, 46 | e_number = 2 47 | }; 48 | 49 | column_properties() 50 | : type(e_none), 51 | name (""), 52 | value_s(""), 53 | value_n(0.0), 54 | process(false) 55 | {} 56 | 57 | column_type type; 58 | std::string name; 59 | std::string value_s; 60 | double value_n; 61 | strtk::util::value value; 62 | bool process; 63 | }; 64 | 65 | dsv_filter() 66 | : file_name_ ("" ), 67 | input_delimiter_ (","), 68 | output_delimiter_("|") 69 | { 70 | symbol_table_.add_constants(); 71 | expression_.register_symbol_table(symbol_table_); 72 | } 73 | 74 | inline std::string file_name() const 75 | { 76 | return file_name_; 77 | } 78 | 79 | inline void set_input_delimiter(const std::string& input_delimiter) 80 | { 81 | input_delimiter_ = input_delimiter; 82 | } 83 | 84 | inline void set_output_delimiter(const std::string& output_delimiter) 85 | { 86 | output_delimiter_ = output_delimiter; 87 | } 88 | 89 | inline std::string input_delimiter() const 90 | { 91 | return input_delimiter_; 92 | } 93 | 94 | inline std::string output_delimiter() const 95 | { 96 | return output_delimiter_; 97 | } 98 | 99 | inline std::size_t column_count() const 100 | { 101 | return column_.size(); 102 | } 103 | 104 | inline std::size_t row_count() const 105 | { 106 | return grid_.row_count(); 107 | } 108 | 109 | inline const column_properties& column(const std::size_t& index) const 110 | { 111 | return column_[index]; 112 | } 113 | 114 | inline bool load(const std::string& file_name) 115 | { 116 | if (!strtk::fileio::file_exists(file_name)) 117 | return false; 118 | 119 | file_name_ = file_name; 120 | strtk::token_grid::options options; 121 | options.column_delimiters = input_delimiter_; 122 | 123 | #ifdef dsv_filter_use_mmap 124 | input_source.close(); 125 | input_source.open(file_name_); 126 | 127 | unsigned char* data = reinterpret_cast(const_cast(input_source.data())); 128 | 129 | if (!grid_.load(data,input_source.size(),options)) 130 | return false; 131 | #else 132 | if (!grid_.load(file_name_,options)) 133 | return false; 134 | #endif 135 | 136 | if (0 == grid_.row_count()) 137 | return false; 138 | else if (grid_.row_count() < 2) 139 | return false; 140 | else if (!process_column_header()) 141 | return false; 142 | 143 | return true; 144 | } 145 | 146 | inline bool add_filter(const std::string& filter_expression) 147 | { 148 | error_ = ""; 149 | parser_.dec().collect_variables() = true; 150 | 151 | if (!parser_.compile(filter_expression,expression_)) 152 | { 153 | error_ = "Error: " + parser_.error() + "\tFilter: " + filter_expression; 154 | 155 | return false; 156 | } 157 | 158 | // Only extract for processing, the column values 159 | // that are being used in the current expression. 160 | typedef exprtk::parser parser_t; 161 | typedef parser_t::dependent_entity_collector::symbol_t symbol_t; 162 | 163 | std::deque symbol_list; 164 | 165 | parser_.dec().symbols(symbol_list); 166 | 167 | for (std::size_t i = 0; i < column_.size(); ++i) 168 | { 169 | if (column_[i].name.empty()) 170 | continue; 171 | 172 | column_[i].process = false; 173 | 174 | for (std::size_t j = 0; j < symbol_list.size(); ++j) 175 | { 176 | if (strtk::imatch(symbol_list[j].first,column_[i].name)) 177 | { 178 | column_[i].process = true; 179 | break; 180 | } 181 | } 182 | } 183 | 184 | return true; 185 | } 186 | 187 | template class Sequence> 189 | inline bool row(const std::size_t& r, 190 | const Sequence& selected_column, 191 | std::string& row_result) 192 | { 193 | if (selected_column.size() != column_.size()) 194 | { 195 | error_ = "Error: number of selected columns larger than number of columns"; 196 | return false; 197 | } 198 | 199 | if (r >= grid_.row_count()) 200 | { 201 | strtk::build_string s; 202 | s << "Error: row[" << r << "] out of bounds."; 203 | error_ = s.to_str(); 204 | 205 | return false; 206 | } 207 | 208 | if (row_.index() != r) 209 | { 210 | row_ = grid_.row(r); 211 | } 212 | 213 | bool append_delimeter = false; 214 | 215 | for (std::size_t c = 0; c < column_.size(); ++c) 216 | { 217 | if (selected_column[c]) 218 | { 219 | if (append_delimeter) 220 | row_result.append(output_delimiter_); 221 | else 222 | append_delimeter = true; 223 | 224 | strtk::token_grid::range_t token = row_.token(c); 225 | row_result.append(token.first,token.second); 226 | } 227 | } 228 | 229 | return true; 230 | } 231 | 232 | inline std::string error() 233 | { 234 | return error_; 235 | } 236 | 237 | enum filter_result 238 | { 239 | e_error, 240 | e_match, 241 | e_mismatch 242 | }; 243 | 244 | inline filter_result operator[](const std::size_t& r) 245 | { 246 | row_ = grid_.row(r); 247 | 248 | for (std::size_t c = 0; c < column_.size(); ++c) 249 | { 250 | if (!column_[c].process) 251 | continue; 252 | else if (!row_.parse_with_index(c,column_[c].value)) 253 | { 254 | strtk::build_string s; 255 | s << "Error: Failed to process element at row/col["<< r << "," << c << "] value:" << row_.get(c); 256 | error_ = s.to_str(); 257 | 258 | return e_error; 259 | } 260 | } 261 | 262 | return (1.0 == expression_.value()) ? e_match : e_mismatch; 263 | } 264 | 265 | const strtk::token_grid& grid() const 266 | { 267 | return grid_; 268 | } 269 | 270 | private: 271 | 272 | inline bool process_column_header() 273 | { 274 | static const std::string string_id ("_s"); 275 | static const std::string number_id ("_n"); 276 | 277 | expression_.get_symbol_table().clear(); 278 | column_.clear(); 279 | column_.resize(grid_.row(0).size()); 280 | 281 | strtk::token_grid::row_type row = grid_.row(0); 282 | std::string col_name = ""; 283 | std::string col_suffix = ""; 284 | 285 | for (std::size_t i = 0; i < row.size(); ++i) 286 | { 287 | column_properties& column = column_[i]; 288 | 289 | column.process = false; 290 | 291 | col_name = row.get(i); 292 | col_suffix = (col_name.size() >= 2) ? strtk::text::remaining_string(col_name.size() - 2,col_name) : ""; 293 | col_name = col_name.substr(0,col_name.size() - 2); 294 | 295 | if (symbol_table_.symbol_exists(col_name)) 296 | { 297 | error_ = "Error: Redefinition of column " + col_name; 298 | return false; 299 | } 300 | else if (strtk::iends_with("_s",col_suffix)) 301 | { 302 | column.type = dsv_filter::column_properties::e_string; 303 | column.name = col_name; 304 | column.value = strtk::util::value(column.value_s); 305 | column.process = true; 306 | 307 | symbol_table_.add_stringvar(col_name,column.value_s); 308 | } 309 | else if (strtk::iends_with("_n",col_suffix)) 310 | { 311 | column.type = dsv_filter::column_properties::e_number; 312 | column.name = col_name; 313 | column.process = true; 314 | column.value = strtk::util::value(column.value_n); 315 | 316 | symbol_table_.add_variable(col_name,column.value_n); 317 | } 318 | } 319 | 320 | return true; 321 | } 322 | 323 | std::string file_name_; 324 | std::string input_delimiter_; 325 | std::string output_delimiter_; 326 | std::string error_; 327 | std::vector column_; 328 | strtk::token_grid grid_; 329 | exprtk::symbol_table symbol_table_; 330 | exprtk::parser parser_; 331 | exprtk::expression expression_; 332 | strtk::token_grid::row_type row_; 333 | 334 | #ifdef dsv_filter_use_mmap 335 | boost::iostreams::mapped_file_source input_source; 336 | #endif 337 | 338 | }; 339 | 340 | #endif 341 | -------------------------------------------------------------------------------- /dsv_filter_example.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ****************************************************************** 3 | * Delimiter Seperated Values Filter Library * 4 | * * 5 | * DSV Filter Example * 6 | * Author: Arash Partow (2004) * 7 | * URL: http://www.partow.net/programming/dsvfilter/index.html * 8 | * * 9 | * Copyright notice: * 10 | * Free use of the Delimiter Seperated Values Filter Library is * 11 | * permitted under the guidelines of the MIT License. * 12 | * http://www.opensource.org/licenses/MIT * 13 | * * 14 | ****************************************************************** 15 | */ 16 | 17 | 18 | /* 19 | Description: 20 | 21 | The following is a simple driver demonstrating the capabilities 22 | of the DSV Filter library. The example is capable of loading a 23 | CSV or DSV flat text file into memory, and then outputting a 24 | subset of rows based on a user specified filter expression. 25 | 26 | It is required that the first line of the input file be a header 27 | line, which uniquely names each of the columns. If a column is to 28 | be used within expressions, then the name of the column must end 29 | in either an "_n" or "_s" denoting the type of the column as 30 | being either a number or string respectively. 31 | 32 | Furthermore the driver supports selectively choosing which 33 | columns to output upon successfully filtering a row or simply to 34 | output the number of rows that have matched the specified filter 35 | expression. 36 | 37 | Example Filter 1: 38 | 39 | col3 == (col2 + col6) or col5 > col1 40 | 41 | Explanation: 42 | For each row/tuple select for output where the value of col3 is 43 | equal to the sum of the values of col2 and col6 or if the value 44 | of col5 is greater than col1. 45 | 46 | 47 | Example Filter 2: 48 | 49 | select col1,col8 | col3 > col2 + col6 50 | 51 | Explanation: 52 | For each row, select the columns named col1 and col8 for output 53 | where the value for col3 is greater than the sum of the values 54 | of col2 and col6. 55 | 56 | 57 | Example Filter 3: 58 | 59 | count | col3 <= (col2 - col6) and 'Grape' in col7 60 | 61 | Explanation: 62 | Return the number of rows where the value for col3 is less than 63 | or equal to the difference of the values between col2 and col6 64 | and where the string 'Grape' appears in col7. 65 | 66 | It should be noted that when declaring an expression or select 67 | list, that the column names are not case sensitive. Furthermore 68 | they should not include the "_s" or "_n" suffixes. 69 | 70 | The pipe symbol "|" indicates the separation of the select or 71 | count clause from the where-clause which defines the filter 72 | expression upon which every row will be evaluated against. 73 | 74 | 75 | Typical Usage: 76 | 77 | Step 1: Execute dsv_filter_example 78 | 79 | Step 2: Type 'help' at the prompt for a quick overview of the available commands 80 | 81 | Step 3: Set the desired input delimiter value by using the 'input_delimiter = ' command. 82 | By default it's pipe "|" 83 | 84 | Step 4: Load the data-store by using the 'load ' command 85 | 86 | Step 5: Type 'information' to view details about the data-store 87 | 88 | Step 6: Type 'list' to view all the queryable columns 89 | 90 | Step 7: Enter query and press 'enter' 91 | 92 | */ 93 | 94 | 95 | #include 96 | #include 97 | 98 | #include "dsv_filter.hpp" 99 | #include "strtk.hpp" 100 | 101 | 102 | void display_columns(const dsv_filter& filter); 103 | 104 | bool load_dsv(const std::string query, dsv_filter& filter); 105 | 106 | template class Sequence> 108 | void display_history(const Sequence& query_history); 109 | 110 | template class Sequence> 112 | void generate_results(const Sequence& selected_column, dsv_filter& filter, const bool count_only = false); 113 | 114 | template class Sequence> 116 | bool parse_query(std::string& query, Sequence& selected_column_list, bool& count_mode, dsv_filter& filter); 117 | 118 | bool set_output_delimiter(const std::string& query, dsv_filter& filter); 119 | 120 | bool set_input_delimiter(const std::string& query, dsv_filter& filter); 121 | 122 | template class Sequence> 124 | bool lookup_history(const Sequence& query_history, std::string& query); 125 | 126 | void information(const dsv_filter& filter); 127 | 128 | void print_help(); 129 | 130 | int main(int argc, char* argv[]) 131 | { 132 | //example usage: 133 | //dsv_filter_example 134 | //dsv_filter_example 135 | //dsv_filter_example 136 | 137 | std::string file_name = ""; 138 | std::string input_delimiter = "|"; 139 | std::string output_delimiter = "|"; 140 | 141 | if (2 == argc) 142 | { 143 | file_name = argv[1]; 144 | } 145 | else if (3 == argc) 146 | { 147 | file_name = argv[1]; 148 | input_delimiter = argv[2]; 149 | } 150 | else if (4 == argc) 151 | { 152 | file_name = argv[1]; 153 | input_delimiter = argv[2]; 154 | output_delimiter = argv[3]; 155 | } 156 | 157 | dsv_filter filter; 158 | 159 | filter.set_input_delimiter(input_delimiter); 160 | filter.set_output_delimiter(output_delimiter); 161 | 162 | if (!file_name.empty()) 163 | { 164 | if (filter.load(file_name)) 165 | std::cout << "Successfully loaded " << filter.file_name() << "\n"; 166 | else 167 | { 168 | std::cerr << "Error - Failed to load: " << file_name << std::endl; 169 | return 1; 170 | } 171 | } 172 | 173 | std::vector selected_column_list(filter.column_count(),true); 174 | 175 | std::deque query_history; 176 | std::string query; 177 | 178 | for ( ; ; ) 179 | { 180 | std::cout << "\nEnter query: "; 181 | std::getline(std::cin,query); 182 | 183 | strtk::remove_leading_trailing(" \t\n\r",query); 184 | 185 | if (query.empty()) 186 | continue; 187 | else if (query_history.empty() || (query_history.back() != query)) 188 | { 189 | query_history.push_back(query); 190 | } 191 | 192 | if (0 == strtk::ifind("exec",query)) 193 | { 194 | if (!lookup_history(query_history,query)) 195 | continue; 196 | } 197 | 198 | if (strtk::imatch(query,"exit") || strtk::imatch(query,"quit")) 199 | break; 200 | else if (strtk::imatch(query,"help")) 201 | { 202 | print_help(); 203 | continue; 204 | } 205 | else if (strtk::imatch(query,"list")) 206 | { 207 | display_columns(filter); 208 | continue; 209 | } 210 | else if (0 == strtk::ifind("load",query)) 211 | { 212 | load_dsv(query,filter); 213 | continue; 214 | } 215 | else if (strtk::imatch(query,"history")) 216 | { 217 | query_history.pop_back(); 218 | display_history(query_history); 219 | continue; 220 | } 221 | else if (0 == strtk::ifind("output_delimiter",query)) 222 | { 223 | set_output_delimiter(query,filter); 224 | continue; 225 | } 226 | else if (0 == strtk::ifind("input_delimiter",query)) 227 | { 228 | set_input_delimiter(query,filter); 229 | continue; 230 | } 231 | else if ((0 == strtk::ifind("information",query)) || (0 == strtk::ifind("info",query))) 232 | { 233 | information(filter); 234 | continue; 235 | } 236 | 237 | selected_column_list.resize(filter.column_count(),true); 238 | std::fill_n(selected_column_list.begin(),selected_column_list.size(),true); 239 | 240 | bool count_mode = false; 241 | 242 | if(!parse_query(query,selected_column_list,count_mode,filter)) 243 | continue; 244 | else if(filter.add_filter(query)) 245 | generate_results(selected_column_list,filter,count_mode); 246 | else 247 | { 248 | std::cout << filter.error() << std::endl; 249 | continue; 250 | } 251 | } 252 | 253 | return 0; 254 | } 255 | 256 | void display_columns(const dsv_filter& filter) 257 | { 258 | if (0 == filter.column_count()) 259 | std::cout << "No valid columns available.\n"; 260 | else 261 | { 262 | std::cout << "+--+----------------+---------+\n"; 263 | std::cout << "|# | Name | Type |\n"; 264 | std::cout << "+--+----------------+---------+\n"; 265 | std::size_t length = 0; 266 | 267 | for (std::size_t i = 0; i < filter.column_count(); ++i) 268 | { 269 | const dsv_filter::column_properties& column = filter.column(i); 270 | 271 | length = std::max(length,column.name.size()); 272 | } 273 | 274 | for (std::size_t i = 0; i < filter.column_count(); ++i) 275 | { 276 | const dsv_filter::column_properties& column = filter.column(i); 277 | 278 | std::cout << "|" << strtk::text::right_align(2,'0',i) << "| " 279 | << strtk::text::left_align(length,column.name); 280 | 281 | switch (column.type) 282 | { 283 | case dsv_filter::column_properties::e_string : std::cout << " | STRING |\n"; break; 284 | case dsv_filter::column_properties::e_number : std::cout << " | NUMBER |\n"; break; 285 | default : std::cout << " | UNKNOWN |\n"; break; 286 | } 287 | } 288 | 289 | std::cout << "+--+----------------+---------+\n"; 290 | } 291 | } 292 | 293 | bool load_dsv(const std::string query, dsv_filter& filter) 294 | { 295 | static strtk::ignore_token ignore; 296 | std::string file_name; 297 | 298 | if (!strtk::parse(query," \t",ignore,file_name)) 299 | return true; 300 | 301 | strtk::util::timer timer; 302 | timer.start(); 303 | 304 | if(!filter.load(file_name)) 305 | { 306 | std::cout << "Failed to load: " << file_name << std::endl; 307 | return false; 308 | } 309 | 310 | timer.stop(); 311 | 312 | printf("Successfully loaded: %s - Total time: %6.3fsec\n", 313 | file_name.c_str(), 314 | timer.time()); 315 | 316 | return true; 317 | } 318 | 319 | template class Sequence> 321 | void display_history(const Sequence& query_history) 322 | { 323 | for (std::size_t i = 0; i < query_history.size(); ++i) 324 | { 325 | std::cout << strtk::text::right_align(2,'0',i) << " " << query_history[i] << std::endl; 326 | } 327 | 328 | std::cout << "Number of queries: " << query_history.size() << std::endl; 329 | } 330 | 331 | 332 | template class Sequence> 334 | void generate_results(const Sequence& selected_column, dsv_filter& filter, const bool count_only) 335 | { 336 | strtk::util::timer timer; 337 | timer.start(); 338 | 339 | std::size_t result_count = 0; 340 | std::size_t row_count = filter.row_count(); 341 | dsv_filter::filter_result filter_result; 342 | 343 | std::string result; 344 | result.reserve(strtk::one_kilobyte); 345 | 346 | for (std::size_t r = 1; r < row_count; ++r) 347 | { 348 | filter_result = filter[r]; 349 | 350 | if (dsv_filter::e_match == filter_result) 351 | { 352 | if (!count_only) 353 | { 354 | result.clear(); 355 | 356 | if (!filter.row(r,selected_column,result)) 357 | { 358 | std::cout << filter.error() << std::endl; 359 | break; 360 | } 361 | 362 | std::cout << result << "\n"; 363 | } 364 | 365 | ++result_count; 366 | } 367 | else if (dsv_filter::e_error == filter_result) 368 | { 369 | std::cout << filter.error() << std::endl; 370 | break; 371 | } 372 | } 373 | 374 | timer.stop(); 375 | 376 | std::cout << "---------------------\n"; 377 | printf("Number of results: %d\nTime: %8.5fms\n", 378 | static_cast(result_count), 379 | timer.time() * 1000.0); 380 | } 381 | 382 | template class Sequence> 384 | bool parse_query(std::string& query, 385 | Sequence& selected_column_list, 386 | bool& count_mode, 387 | dsv_filter& filter) 388 | { 389 | std::deque sub_query; 390 | strtk::parse(query,"|",sub_query); 391 | 392 | strtk::remove_empty_strings(sub_query); 393 | 394 | if (2 == sub_query.size()) 395 | { 396 | if (0 == strtk::ifind("select",sub_query[0])) 397 | { 398 | std::fill_n(selected_column_list.begin(),selected_column_list.size(),false); 399 | 400 | std::deque selected_cols; 401 | 402 | strtk::parse(sub_query[0],", ",selected_cols); 403 | bool col_found = false; 404 | 405 | for (std::size_t i = 1; i < selected_cols.size(); ++i) 406 | { 407 | if (selected_cols[i].empty()) 408 | continue; 409 | 410 | col_found = false; 411 | 412 | for (std::size_t c = 0; c < filter.column_count(); ++c) 413 | { 414 | if (strtk::imatch(selected_cols[i],filter.column(c).name)) 415 | { 416 | selected_column_list[c] = true; 417 | col_found = true; 418 | 419 | break; 420 | } 421 | } 422 | 423 | if (!col_found) 424 | { 425 | std::cout << "Error - Invalid column: [" << selected_cols[i] << "]" << std::endl; 426 | return false; 427 | } 428 | } 429 | 430 | query = sub_query[1]; 431 | } 432 | else if (0 == strtk::ifind("count",sub_query[0])) 433 | { 434 | count_mode = true; 435 | query = sub_query[1]; 436 | } 437 | else 438 | return false; 439 | } 440 | else if (1 != sub_query.size()) 441 | return false; 442 | else 443 | query = sub_query[0]; 444 | 445 | return true; 446 | } 447 | 448 | bool set_output_delimiter(const std::string& query, dsv_filter& filter) 449 | { 450 | static const std::string preamble = "output_delimiter = "; 451 | 452 | if (0 != strtk::ifind("output_delimiter = ",query)) 453 | { 454 | std::cout << "Invalid format for command. eg: output_delimiter = |" << std::endl; 455 | return false; 456 | } 457 | 458 | std::string delimiter = strtk::text::remaining_string(preamble.size(),query); 459 | strtk::remove_leading_trailing(" '\"",delimiter); 460 | 461 | if (delimiter.empty()) 462 | return false; 463 | 464 | filter.set_output_delimiter(delimiter); 465 | std::cout << "Output delimiter set to: " << delimiter << std::endl; 466 | 467 | return true; 468 | } 469 | 470 | bool set_input_delimiter(const std::string& query, dsv_filter& filter) 471 | { 472 | static const std::string preamble = "input_delimiter = "; 473 | 474 | if (0 != strtk::ifind("input_delimiter = ",query)) 475 | { 476 | std::cout << "Invalid format for command. eg: input_delimiter = |" << std::endl; 477 | return false; 478 | } 479 | 480 | std::string delimiter = strtk::text::remaining_string(preamble.size(),query); 481 | strtk::remove_leading_trailing(" '\"",delimiter); 482 | 483 | if (delimiter.empty()) 484 | return false; 485 | 486 | filter.set_input_delimiter(delimiter); 487 | std::cout << "Input delimiter set to: " << delimiter << std::endl; 488 | 489 | return true; 490 | } 491 | 492 | template class Sequence> 494 | bool lookup_history(const Sequence& query_history, std::string& query) 495 | { 496 | if (query_history.empty()) 497 | return false; 498 | 499 | static strtk::ignore_token ignore; 500 | std::size_t query_index = 0; 501 | 502 | if (!strtk::parse(query," \t",ignore,query_index)) 503 | return false; 504 | else if (query_index >= query_history.size()) 505 | return false; 506 | 507 | query = query_history[query_index]; 508 | std::cout << "Query: " << query << std::endl; 509 | 510 | return true; 511 | } 512 | 513 | void information(const dsv_filter& filter) 514 | { 515 | std::cout << "\n"; 516 | std::cout << "Information\n"; 517 | std::cout << "-----------\n"; 518 | std::cout << "File: " << filter.file_name() << "\n"; 519 | std::cout << "Rows: " << filter.row_count() << "\n"; 520 | std::cout << "Columns: " << filter.column_count() << "\n"; 521 | std::cout << "Elements: " << filter.row_count() * filter.column_count() << "\n"; 522 | std::cout << "Input Delimiter: " << filter.input_delimiter() << "\n"; 523 | std::cout << "Output Delimiter: " << filter.output_delimiter() << "\n"; 524 | std::cout << "\n"; 525 | } 526 | 527 | void print_help() 528 | { 529 | std::cout << "\n"; 530 | std::cout << "DSV Filter Help\n"; 531 | std::cout << "Command Definition\n"; 532 | std::cout << "------------------------------------\n"; 533 | std::cout << "list List columns and their types\n"; 534 | std::cout << "history Display history of queries\n"; 535 | std::cout << "load Load DSV file\n"; 536 | std::cout << "exec Execute the i'th query found in history\n"; 537 | std::cout << "input_delimiter = Set the input delimiter\n"; 538 | std::cout << "output_delimiter = Set the output delimiter\n"; 539 | std::cout << "information Display details of DSV store\n"; 540 | std::cout << "\n"; 541 | } 542 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArashPartow/filter/19b0b935f02a8ef9f3c5aa150dc91402a6d69342/readme.txt -------------------------------------------------------------------------------- /world_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArashPartow/filter/19b0b935f02a8ef9f3c5aa150dc91402a6d69342/world_data.txt --------------------------------------------------------------------------------