├── .gitignore ├── DataFrameDoc.docx ├── README.md ├── DMScu ├── src │ ├── Makefile.Linux.GCC64 │ ├── Makefile.Linux.GCC64D │ ├── DMScu_MMapFile.cc │ ├── CommonMakefile.mk │ ├── filebase_tester.cc │ ├── mmfile_tester.cc │ ├── fixsizestr_tester.cc │ ├── DMScu_FileBase.cc │ └── DMScu_MMapBase.cc └── include │ ├── DMScu_MMapFile.h │ ├── DMScu_Exception.h │ ├── DMScu_FileDef.h │ └── DMScu_FixedSizeString.h ├── src ├── Makefile.Linux.GCC64D ├── Makefile.Linux.GCC64 ├── sample_data.csv ├── BaseContainer.cc ├── CommonMakefile.mk └── datasci_tester.cc ├── License └── include ├── DataFrame_opt.tcc ├── BaseContainer.h ├── DataFrame_read.tcc ├── BaseContainer.tcc ├── DataFrame_misc.tcc ├── DataFrame_set.tcc ├── DataFrame.tcc ├── DFVisitors.h └── DataFrame_get.tcc /.gitignore: -------------------------------------------------------------------------------- 1 | \bin 2 | \lib 3 | \obj 4 | -------------------------------------------------------------------------------- /DataFrameDoc.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/DataFrame/master/DataFrameDoc.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataFrame 2 | This is a C++ statistical library to provide an interface similar to Pandas package in Python 3 | -------------------------------------------------------------------------------- /DMScu/src/Makefile.Linux.GCC64: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## July 17 2009 3 | 4 | BUILD_PLATFORM = Linux.GCC64 5 | BUILD_DEFINE = Linux_GCC64 6 | 7 | # ----------------------------------------------------------------------------- 8 | 9 | CXX = /usr/bin/g++ 10 | 11 | INCLUDES = -I/usr/include/c++/7 -I/usr/include 12 | 13 | LFLAGS = -L/usr/lib/gcc/x86_64-linux-gnu/7 14 | CXXFLAGS = -O3 -std=c++17 $(INCLUDES) $(DEFINES) 15 | 16 | PLATFORM_LIBS = -lpthread -lrt -ldl -lm -lnsl -lstdc++ 17 | 18 | # ----------------------------------------------------------------------------- 19 | 20 | include CommonMakefile.mk 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | ## Local Variables: 25 | ## mode:Makefile 26 | ## tab-width:4 27 | ## End: 28 | -------------------------------------------------------------------------------- /DMScu/src/Makefile.Linux.GCC64D: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## July 17 2009 3 | 4 | BUILD_PLATFORM = Linux.GCC64D 5 | BUILD_DEFINE = Linux_GCC64 6 | 7 | # ----------------------------------------------------------------------------- 8 | 9 | CXX = /usr/bin/g++ 10 | 11 | INCLUDES = -I/usr/include/c++/7 -I/usr/include 12 | 13 | LFLAGS = -L/usr/lib/gcc/x86_64-linux-gnu/7 14 | CXXFLAGS = -g -std=c++17 $(INCLUDES) $(DEFINES) 15 | 16 | PLATFORM_LIBS = -lpthread -lrt -ldl -lm -lnsl -lstdc++ 17 | 18 | # ----------------------------------------------------------------------------- 19 | 20 | include CommonMakefile.mk 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | ## Local Variables: 25 | ## mode:Makefile 26 | ## tab-width:4 27 | ## End: 28 | -------------------------------------------------------------------------------- /src/Makefile.Linux.GCC64D: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## September 12 2017 3 | 4 | BUILD_PLATFORM = Linux.GCC64D 5 | BUILD_DEFINE = Linux_GCC64 6 | 7 | # ----------------------------------------------------------------------------- 8 | 9 | CXX = /usr/bin/g++ 10 | 11 | INCLUDES = -I/usr/include/c++/7 -I/usr/include 12 | 13 | LFLAGS = -L/usr/lib/gcc/x86_64-linux-gnu/7 14 | CXXFLAGS = -g $(INCLUDES) $(DEFINES) -std=c++17 15 | 16 | PLATFORM_LIBS = -lpthread -lrt -ldl -lm -lnsl -lstdc++ 17 | 18 | # ----------------------------------------------------------------------------- 19 | 20 | include CommonMakefile.mk 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | ## Local Variables: 25 | ## mode:Makefile 26 | ## tab-width:4 27 | ## End: 28 | -------------------------------------------------------------------------------- /src/Makefile.Linux.GCC64: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## September 12 2017 3 | 4 | BUILD_PLATFORM = Linux.GCC64 5 | BUILD_DEFINE = Linux_GCC64 6 | 7 | # ----------------------------------------------------------------------------- 8 | 9 | CXX = /usr/bin/g++ 10 | 11 | INCLUDES = -I/usr/include/c++/7 -I/usr/include 12 | 13 | LFLAGS = -L/usr/lib/gcc/x86_64-linux-gnu/7 14 | CXXFLAGS = -O3 -fPIC $(INCLUDES) $(DEFINES) -std=c++17 15 | 16 | PLATFORM_LIBS = -lpthread -lrt -ldl -lm -lnsl -lstdc++ 17 | 18 | # ----------------------------------------------------------------------------- 19 | 20 | include CommonMakefile.mk 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | ## Local Variables: 25 | ## mode:Makefile 26 | ## tab-width:4 27 | ## End: 28 | 29 | -------------------------------------------------------------------------------- /src/sample_data.csv: -------------------------------------------------------------------------------- 1 | # Test csv file 2 | # 3 | 4 | INDEX:123432,123433,123434,123435,123436,123441,123442,123448,123449,123450,123450,123450,123450,123450,123451,123451,123452,123452,123452,123453,123454,123455,123455,123456,123457,123458,123459,123460, 5 | ul_col::123450,123451,123452,123450,123455,123450,123449,123448,123451,123452,123452,123450,123455,123450,123454,123453,123456,123457,123458,123459,123460,123441,123442,123432,123433,123434,123435,123436, 6 | xint_col::35,36,40,45,46,33,34,8,7,1,4,6,12,14,2,9,3,10,11,20,15,5,13,22,23,24,25,30, 7 | str_col::XXXX10,XXXX11,XXXX01,XXXX02,XXXX03,XXXX6,XXXX7,Running fast,$15 increase,4% of something,3.4% of GDP,Market pulls back,Bonds vs. Equities,Here comes the sun,Description 4/5,C++14 development,This is bad,Some explanation,More strings,XXXX04,XXXX1,Market drops,Almost done,XXXX2,XXXX3,XXXX4,XXXX4,XXXX5, 8 | dbl_col::2.009,3.111,10,4.2222,5.3333,12,6.25,10,0.9999,1.2345,4.2345,3,8,3.3333,2.2345,4.25,3.2345,0.009,1.111,5.25,11,5.2345,2.2222,1.009,2.111,9,3.2222,4.3333, 9 | dbl_col_2::0.87865,-0.6999,0.4111,0.1902,-0.4888,0.2,0.1056,0.1,0.06743,0.998,0.15678,0.923,0.0111,-0.8888,0.3456,0.0056,0.056,0.07865,-0.9999,0.0456,0.14,0.00345,0.1002,0.078654,-0.8999,0.01119,0.8002,-0.9888, 10 | -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2018, Hossein Moein 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /DMScu/include/DMScu_MMapFile.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // August 21, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #ifndef _INCLUDED_DMScu_MMapFile_h 7 | #define _INCLUDED_DMScu_MMapFile_h 0 8 | 9 | // ---------------------------------------------------------------------------- 10 | 11 | #include 12 | 13 | // ---------------------------------------------------------------------------- 14 | 15 | class DMScu_MMapFile : public DMScu_MMapBase { 16 | 17 | 18 | public: 19 | 20 | inline DMScu_MMapFile (const char *file_name, OPEN_MODE open_mode, 21 | size_type buffer_size = 0LL, 22 | mode_t file_open_mode = 23 | S_IRUSR | S_IWUSR | S_IRGRP) 24 | : DMScu_MMapBase (file_name, open_mode, _mmap_file_, 25 | buffer_size, file_open_mode) { 26 | 27 | _translate_open_mode (); 28 | open (); 29 | } 30 | 31 | virtual bool open (); 32 | virtual void unlink (); 33 | 34 | protected: 35 | 36 | virtual bool _initial_map_posthook () { 37 | 38 | _file_flags |= _in_use_; 39 | return (true); 40 | } 41 | }; 42 | 43 | // ---------------------------------------------------------------------------- 44 | 45 | #undef _INCLUDED_DMScu_MMapFile_h 46 | #define _INCLUDED_DMScu_MMapFile_h 1 47 | #endif // _INCLUDED_DMScu_MMapFile_h 48 | 49 | // Local Variables: 50 | // mode:C++ 51 | // tab-width:4 52 | // c-basic-offset:4 53 | // End: 54 | -------------------------------------------------------------------------------- /src/BaseContainer.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 13, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | 8 | // ---------------------------------------------------------------------------- 9 | 10 | namespace hmdf 11 | { 12 | 13 | HeteroVector::HeteroVector () { 14 | 15 | clear_functions_.reserve(2); 16 | copy_functions_.reserve(2); 17 | move_functions_.reserve(2); 18 | } 19 | 20 | // ---------------------------------------------------------------------------- 21 | 22 | HeteroVector::HeteroVector (const HeteroVector &that) { *this = that; } 23 | HeteroVector::HeteroVector (HeteroVector &&that) { *this = that; } 24 | 25 | // ---------------------------------------------------------------------------- 26 | 27 | HeteroVector &HeteroVector::operator= (const HeteroVector &rhs) { 28 | 29 | if (&rhs != this) { 30 | clear(); 31 | clear_functions_ = rhs.clear_functions_; 32 | copy_functions_ = rhs.copy_functions_; 33 | move_functions_ = rhs.move_functions_; 34 | 35 | for (auto &©_function : copy_functions_) 36 | copy_function(rhs, *this); 37 | } 38 | 39 | return (*this); 40 | } 41 | 42 | // ---------------------------------------------------------------------------- 43 | 44 | HeteroVector &HeteroVector::operator= (HeteroVector &&rhs) { 45 | 46 | if (&rhs != this) { 47 | clear(); 48 | clear_functions_ = std::move(rhs.clear_functions_); 49 | copy_functions_ = std::move(rhs.copy_functions_); 50 | move_functions_ = std::move(rhs.move_functions_); 51 | 52 | for (auto &&move_function : move_functions_) 53 | move_function(rhs, *this); 54 | } 55 | 56 | return (*this); 57 | } 58 | 59 | // ---------------------------------------------------------------------------- 60 | 61 | void HeteroVector::clear() { 62 | 63 | for (auto &&clear_func : clear_functions_) 64 | clear_func (*this); 65 | } 66 | 67 | } // namespace hmdf 68 | 69 | // ---------------------------------------------------------------------------- 70 | 71 | // Local Variables: 72 | // mode:C++ 73 | // tab-width:4 74 | // c-basic-offset:4 75 | // End 76 | -------------------------------------------------------------------------------- /DMScu/include/DMScu_Exception.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // July 17 2009 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #ifndef _INCLUDED_DMScu_Exception_h 7 | #define _INCLUDED_DMScu_Exception_h 0 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // ---------------------------------------------------------------------------- 15 | 16 | class DMScu_Exception : public std::runtime_error { 17 | 18 | public: 19 | 20 | typedef unsigned int size_type; 21 | 22 | inline DMScu_Exception (const char *desc, 23 | bool is_fatal = true, 24 | const char *filename = "", 25 | size_type line_number = 0) throw () 26 | : std::runtime_error (desc), 27 | line_number_ (line_number), 28 | is_fatal_ (is_fatal) { 29 | 30 | std::strncpy (filename_, filename, sizeof (filename_) - 1); 31 | filename_ [sizeof (filename_) - 1] = 0; 32 | } 33 | 34 | inline bool is_fatal () const throw () { return (is_fatal_); } 35 | inline const char *file_name () const throw () { return (filename_); } 36 | inline size_type line_number () const throw () { 37 | 38 | return (line_number_); 39 | } 40 | 41 | std::ostream &dump (std::ostream &os) const { 42 | 43 | os << "EXCEPTION THROWN:\n" 44 | << " Description : " << what () << "\n" 45 | << " File Name : " << filename_ << "\n" 46 | << " Line Number : " << line_number_ << "\n" 47 | << " Is Fatal : " << (is_fatal_ ? "Yes" : "No") 48 | << std::endl; 49 | return os; 50 | } 51 | 52 | private: 53 | 54 | char filename_ [64]; 55 | const size_type line_number_; 56 | const bool is_fatal_; 57 | }; 58 | 59 | // ---------------------------------------------------------------------------- 60 | 61 | #undef _INCLUDED_DMScu_Exception_h 62 | #define _INCLUDED_DMScu_Exception_h 1 63 | #endif // _INCLUDED_DMScu_Exception_h 64 | 65 | // Local Variables: 66 | // mode:C++ 67 | // tab-width:4 68 | // c-basic-offset:4 69 | // End: 70 | -------------------------------------------------------------------------------- /DMScu/src/DMScu_MMapFile.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // August 21, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // ---------------------------------------------------------------------------- 13 | 14 | bool DMScu_MMapFile::open () { 15 | 16 | if (is_open ()) 17 | throw DMScu_Exception ("DMScu_MMapFile::open(): " 18 | "The device is already open"); 19 | 20 | if ((_file_desc = ::open (get_file_name (), _file_open_flags, 21 | _file_open_mode)) > 0) { 22 | struct stat stat_data; 23 | 24 | if (! ::fstat (_file_desc, &stat_data)) 25 | _file_size = stat_data.st_size; 26 | else { 27 | DMScu_FixedSizeString<4095> err; 28 | 29 | err.printf ("DMScu_MMapFile::open(): ::fstat(): (%d) %s --- %s", 30 | errno, strerror (errno), get_file_name ()); 31 | 32 | ::close (_file_desc); 33 | _file_desc = 0; 34 | throw DMScu_Exception (err.c_str ()); 35 | } 36 | } 37 | else { 38 | DMScu_FixedSizeString<4095> err; 39 | 40 | err.printf ("DMScu_MMapFile::open(): ::open(): (%d) %s --- %s", 41 | errno, ::strerror (errno), get_file_name ()); 42 | 43 | _file_desc = 0; 44 | throw DMScu_Exception (err.c_str ()); 45 | } 46 | 47 | return (_initial_map (_file_size, _mmap_prot, _mmap_flags, _file_desc)); 48 | } 49 | 50 | // ---------------------------------------------------------------------------- 51 | 52 | void DMScu_MMapFile::unlink () { 53 | 54 | if (is_open ()) 55 | close (); 56 | 57 | if (::unlink (get_file_name ()) < 0) { 58 | DMScu_FixedSizeString<4095> err; 59 | 60 | err.printf ("DMScu_MMapFile::unlink(): ::unlink(): (%d) %s --- %s", 61 | errno, ::strerror (errno), get_file_name ()); 62 | throw DMScu_Exception (err.c_str ()); 63 | } 64 | 65 | return; 66 | } 67 | 68 | // ---------------------------------------------------------------------------- 69 | 70 | // Local Variables: 71 | // mode:C++ 72 | // tab-width:4 73 | // c-basic-offset:4 74 | // End: 75 | -------------------------------------------------------------------------------- /include/DataFrame_opt.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 25, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | 8 | // ---------------------------------------------------------------------------- 9 | 10 | namespace hmdf 11 | { 12 | 13 | // ---------------------------------------------------------------------------- 14 | 15 | template class DS> 16 | template 17 | bool DataFrame::is_equal (const DataFrame &rhs) const { 18 | 19 | if (data_tb_.size() != rhs.data_tb_.size()) 20 | return (false); 21 | if (timestamps_ != rhs.timestamps_) 22 | return (false); 23 | 24 | for (const auto &iter : data_tb_) { 25 | equal_functor_ functor (iter.first.c_str(), *this); 26 | 27 | rhs.data_[iter.second].change(functor); 28 | if (! functor.result) 29 | return (false); 30 | } 31 | 32 | return (true); 33 | } 34 | 35 | // ---------------------------------------------------------------------------- 36 | 37 | template class DS> 38 | template 39 | DataFrame &DataFrame:: 40 | modify_by_idx (DataFrame &rhs, bool already_sorted) { 41 | 42 | if (! already_sorted) { 43 | rhs.sort(); 44 | sort(); 45 | } 46 | 47 | const size_type lhs_s { timestamps_.size() }; 48 | const size_type rhs_s { rhs.timestamps_.size() }; 49 | size_type lhs_i { 0 }; 50 | 51 | for (size_type rhs_i = 0; rhs_i < rhs_s; ++rhs_i) { 52 | if (lhs_i >= lhs_s) 53 | break; 54 | while (timestamps_[lhs_i] < rhs.timestamps_[rhs_i] && lhs_i < lhs_s) 55 | lhs_i += 1; 56 | 57 | if (timestamps_[lhs_i] == rhs.timestamps_[rhs_i]) { 58 | for (auto &iter : data_tb_) { 59 | mod_by_idx_functor_ functor (iter.first.c_str(), 60 | rhs, 61 | lhs_i, 62 | rhs_i); 63 | 64 | data_[iter.second].change(functor); 65 | } 66 | 67 | lhs_i += 1; 68 | } 69 | else if (timestamps_[lhs_i] < rhs.timestamps_[rhs_i]) break; 70 | } 71 | 72 | return (*this); 73 | } 74 | 75 | } // namespace hmdf 76 | 77 | // ---------------------------------------------------------------------------- 78 | 79 | // Local Variables: 80 | // mode:C++ 81 | // tab-width:4 82 | // c-basic-offset:4 83 | // End: 84 | -------------------------------------------------------------------------------- /src/CommonMakefile.mk: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## September 12 2017 3 | 4 | LOCAL_LIB_DIR = ../lib/$(BUILD_PLATFORM) 5 | LOCAL_BIN_DIR = ../bin/$(BUILD_PLATFORM) 6 | LOCAL_OBJ_DIR = ../obj/$(BUILD_PLATFORM) 7 | LOCAL_INCLUDE_DIR = ../include 8 | PROJECT_LIB_DIR = ../../lib/$(BUILD_PLATFORM) 9 | PROJECT_INCLUDE_DIR = ../../include 10 | 11 | # ----------------------------------------------------------------------------- 12 | 13 | SRCS = BaseContainer.cc datasci_tester.cc 14 | 15 | HEADERS = $(LOCAL_INCLUDE_DIR)/BaseContainer.h \ 16 | $(LOCAL_INCLUDE_DIR)/BaseContainer.tcc \ 17 | $(LOCAL_INCLUDE_DIR)/DataFrame.h \ 18 | $(LOCAL_INCLUDE_DIR)/DataFrame.tcc \ 19 | $(LOCAL_INCLUDE_DIR)/DataFrame_misc.tcc \ 20 | $(LOCAL_INCLUDE_DIR)/DataFrame_set.tcc \ 21 | $(LOCAL_INCLUDE_DIR)/DataFrame_get.tcc \ 22 | $(LOCAL_INCLUDE_DIR)/DataFrame_read.tcc \ 23 | $(LOCAL_INCLUDE_DIR)/DataFrame_opt.tcc \ 24 | $(LOCAL_INCLUDE_DIR)/DFVisitors.h 25 | 26 | LIB_NAME = DataSci 27 | TARGET_LIB = $(LOCAL_LIB_DIR)/lib$(LIB_NAME).a 28 | 29 | TARGETS += $(TARGET_LIB) $(LOCAL_BIN_DIR)/datasci_tester 30 | 31 | # ----------------------------------------------------------------------------- 32 | 33 | LFLAGS += -Bstatic -L$(LOCAL_LIB_DIR) -L$(PROJECT_LIB_DIR) 34 | 35 | LIBS = $(LFLAGS) -l$(LIB_NAME) -lDMScu $(PLATFORM_LIBS) 36 | INCLUDES += -I. -I$(LOCAL_INCLUDE_DIR) -I$(PROJECT_INCLUDE_DIR) 37 | DEFINES = -D_REENTRANT -DDMS_INCLUDE_SOURCE \ 38 | -DP_THREADS -D_POSIX_PTHREAD_SEMANTICS -DDMS_$(BUILD_DEFINE)__ 39 | 40 | # ----------------------------------------------------------------------------- 41 | 42 | # object file 43 | # 44 | LIB_OBJS = $(LOCAL_OBJ_DIR)/BaseContainer.o 45 | 46 | # ----------------------------------------------------------------------------- 47 | 48 | # set up C++ suffixes and relationship between .cc and .o files 49 | # 50 | .SUFFIXES: .cc 51 | 52 | $(LOCAL_OBJ_DIR)/%.o: %.cc 53 | $(CXX) $(CXXFLAGS) -c $< -o $@ 54 | 55 | .cc : 56 | $(CXX) $(CXXFLAGS) $< -o $@ -lm $(TLIB) -lg++ 57 | # $(CXX) $(CXXFLAGS) $< -o $@ -lm $(TLIB) 58 | 59 | # ----------------------------------------------------------------------------- 60 | 61 | all: PRE_BUILD $(TARGETS) 62 | 63 | PRE_BUILD: 64 | mkdir -p $(LOCAL_LIB_DIR) 65 | mkdir -p $(LOCAL_BIN_DIR) 66 | mkdir -p $(LOCAL_OBJ_DIR) 67 | mkdir -p $(PROJECT_LIB_DIR) 68 | mkdir -p $(PROJECT_INCLUDE_DIR) 69 | 70 | $(TARGET_LIB): $(LIB_OBJS) 71 | ar -clrs $(TARGET_LIB) $(LIB_OBJS) 72 | 73 | DATASCI_TESTER_OBJ = $(LOCAL_OBJ_DIR)/datasci_tester.o 74 | $(LOCAL_BIN_DIR)/datasci_tester: $(TARGET_LIB) $(DATASCI_TESTER_OBJ) 75 | $(CXX) -o $@ $(DATASCI_TESTER_OBJ) $(LIBS) 76 | 77 | # ----------------------------------------------------------------------------- 78 | 79 | depend: 80 | makedepend $(CXXFLAGS) -Y $(SRCS) 81 | 82 | clean: 83 | rm -f $(LIB_OBJS) $(TARGETS) $(DATASCI_TESTER_OBJ) 84 | 85 | clobber: 86 | rm -f $(LIB_OBJS) $(TARGETS) $(DATASCI_TESTER_OBJ) 87 | 88 | install_lib: 89 | cp -pf $(TARGET_LIB) $(PROJECT_LIB_DIR)/. 90 | 91 | install_hdr: 92 | cp -pf $(HEADERS) $(PROJECT_INCLUDE_DIR)/. 93 | 94 | # ----------------------------------------------------------------------------- 95 | 96 | ## Local Variables: 97 | ## mode:Makefile 98 | ## tab-width:4 99 | ## End: 100 | -------------------------------------------------------------------------------- /DMScu/src/CommonMakefile.mk: -------------------------------------------------------------------------------- 1 | ## Hossein Moein 2 | ## July 17 2009 3 | 4 | LOCAL_LIB_DIR = ../lib/$(BUILD_PLATFORM) 5 | LOCAL_BIN_DIR = ../bin/$(BUILD_PLATFORM) 6 | LOCAL_OBJ_DIR = ../obj/$(BUILD_PLATFORM) 7 | LOCAL_INCLUDE_DIR = ../include 8 | PROJECT_LIB_DIR = ../../../lib/$(BUILD_PLATFORM) 9 | PROJECT_INCLUDE_DIR = ../../../include 10 | 11 | # ----------------------------------------------------------------------------- 12 | 13 | SRCS = 14 | 15 | HEADERS = $(LOCAL_INCLUDE_DIR)/DMScu_FixedSizeString.h \ 16 | $(LOCAL_INCLUDE_DIR)/DMScu_Exception.h \ 17 | $(LOCAL_INCLUDE_DIR)/DMScu_FileBase.h \ 18 | $(LOCAL_INCLUDE_DIR)/DMScu_FileDef.h \ 19 | $(LOCAL_INCLUDE_DIR)/DMScu_MMapBase.h \ 20 | $(LOCAL_INCLUDE_DIR)/DMScu_MMapFile.h 21 | 22 | LIB_NAME = DMScu 23 | TARGET_LIB = $(LOCAL_LIB_DIR)/lib$(LIB_NAME).a 24 | 25 | TARGETS += $(TARGET_LIB) \ 26 | $(LOCAL_BIN_DIR)/fixsizestr_tester \ 27 | $(LOCAL_BIN_DIR)/filebase_tester \ 28 | $(LOCAL_BIN_DIR)/mmfile_tester 29 | 30 | # ----------------------------------------------------------------------------- 31 | 32 | LFLAGS += -Bstatic -L$(LOCAL_LIB_DIR) -L$(PROJECT_LIB_DIR) 33 | 34 | LIBS = $(LFLAGS) -l$(LIB_NAME) $(PLATFORM_LIBS) 35 | INCLUDES += -I. -I$(LOCAL_INCLUDE_DIR) -I$(PROJECT_INCLUDE_DIR) 36 | DEFINES = -D_REENTRANT -DDMS_INCLUDE_SOURCE \ 37 | -DP_THREADS -D_POSIX_PTHREAD_SEMANTICS -DDMS_$(BUILD_DEFINE)__ 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | # object file 42 | # 43 | LIB_OBJS = $(LOCAL_OBJ_DIR)/DMScu_FileBase.o \ 44 | $(LOCAL_OBJ_DIR)/DMScu_MMapBase.o \ 45 | $(LOCAL_OBJ_DIR)/DMScu_MMapFile.o 46 | 47 | # ----------------------------------------------------------------------------- 48 | 49 | # set up C++ suffixes and relationship between .cc and .o files 50 | # 51 | .SUFFIXES: .cc 52 | 53 | $(LOCAL_OBJ_DIR)/%.o: %.cc 54 | $(CXX) $(CXXFLAGS) -c $< -o $@ 55 | 56 | .cc : 57 | $(CXX) $(CXXFLAGS) $< -o $@ -lm $(TLIB) -lg++ 58 | 59 | # ----------------------------------------------------------------------------- 60 | 61 | all: PRE_BUILD $(TARGETS) 62 | 63 | PRE_BUILD: 64 | mkdir -p $(LOCAL_LIB_DIR) 65 | mkdir -p $(LOCAL_BIN_DIR) 66 | mkdir -p $(LOCAL_OBJ_DIR) 67 | mkdir -p $(PROJECT_LIB_DIR) 68 | mkdir -p $(PROJECT_INCLUDE_DIR) 69 | 70 | $(TARGET_LIB): $(LIB_OBJS) 71 | ar -clrs $(TARGET_LIB) $(LIB_OBJS) 72 | 73 | FIXSIZESTR_TESTER_OBJ = $(LOCAL_OBJ_DIR)/fixsizestr_tester.o 74 | $(LOCAL_BIN_DIR)/fixsizestr_tester: $(TARGET_LIB) $(FIXSIZESTR_TESTER_OBJ) 75 | $(CXX) -o $@ $(FIXSIZESTR_TESTER_OBJ) $(LIBS) 76 | 77 | FILEBASE_TESTER_OBJ = $(LOCAL_OBJ_DIR)/filebase_tester.o 78 | $(LOCAL_BIN_DIR)/filebase_tester: $(TARGET_LIB) $(FILEBASE_TESTER_OBJ) 79 | $(CXX) -o $@ $(FILEBASE_TESTER_OBJ) $(LIBS) 80 | 81 | MMFILE_TESTER_OBJ = $(LOCAL_OBJ_DIR)/mmfile_tester.o 82 | $(LOCAL_BIN_DIR)/mmfile_tester: $(TARGET_LIB) $(MMFILE_TESTER_OBJ) 83 | $(CXX) -o $@ $(MMFILE_TESTER_OBJ) $(LIBS) 84 | 85 | # ----------------------------------------------------------------------------- 86 | 87 | depend: 88 | makedepend $(CXXFLAGS) -Y $(SRCS) 89 | 90 | clean: 91 | rm -f $(LIB_OBJS) 92 | 93 | clobber: 94 | rm -f $(LIB_OBJS) $(TARGETS) $(FIXSIZESTR_TESTER_OBJ) \ 95 | $(FILEBASE_TESTER_OBJ) $(MMFILE_TESTER_OBJ) 96 | 97 | install_lib: 98 | cp -pf $(TARGET_LIB) $(PROJECT_LIB_DIR)/. 99 | 100 | install_hdr: 101 | cp -pf $(HEADERS) $(PROJECT_INCLUDE_DIR)/. 102 | 103 | # ----------------------------------------------------------------------------- 104 | 105 | ## Local Variables: 106 | ## mode:Makefile 107 | ## tab-width:4 108 | ## End: 109 | -------------------------------------------------------------------------------- /DMScu/include/DMScu_FileDef.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 21, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #ifndef _INCLUDED_DMScu_FileDef_h 7 | #define _INCLUDED_DMScu_FileDef_h 0 8 | 9 | // ---------------------------------------------------------------------------- 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | // ---------------------------------------------------------------------------- 17 | 18 | class DMScu_FileDef { 19 | 20 | public: 21 | 22 | enum OPEN_MODE { _read_ = 1, _write_ = 2, _append_ = 4, 23 | _bread_ = 8, _bwrite_ = 16, _bappend_ = 32 }; 24 | enum SEEK_TYPE { _seek_set_ = 1, _seek_cur_ = 2, _seek_end_ = 4 }; 25 | enum CLOSE_MODE { _normal_, _to_offset_ }; 26 | enum DEVICE_TYPE { _shared_memory_, _mmap_file_, _stream_file_ }; 27 | 28 | typedef unsigned long long int size_type; 29 | typedef unsigned int flag_type; 30 | 31 | static const size_type NOVAL = static_cast(-1); 32 | 33 | protected: 34 | 35 | static const size_type MIN_BUFFER_SIZE = 65536LL; 36 | static const size_type BUFFER_SIZE = MIN_BUFFER_SIZE * 16LL; 37 | 38 | public: 39 | 40 | inline void set_width (flag_type the_width) throw () { 41 | 42 | _width = the_width; 43 | } 44 | inline void set_precision (short pre) throw () { _precision = pre; } 45 | inline bool is_ok () const throw () { return (_good_flag); } 46 | inline bool is_eof () const throw () { 47 | 48 | return (_current_offset >= _file_size); 49 | } 50 | inline short get_precision () const throw () { return (_precision); } 51 | inline size_type get_file_size () const throw () { 52 | 53 | return (_file_size); 54 | } 55 | inline DEVICE_TYPE get_device_type () const throw () { 56 | 57 | return (device_type_); 58 | } 59 | inline size_type tell () const throw () { 60 | 61 | return (_file_flags & _in_use_ 62 | ? _current_offset : static_cast(-1)); 63 | } 64 | inline const char *get_file_name () const throw () { 65 | 66 | return (file_name_.c_str ()); 67 | } 68 | 69 | inline DMScu_FileDef (const char *file_name, 70 | OPEN_MODE om, 71 | DEVICE_TYPE dt) throw () 72 | : _width (~0U), 73 | _file_flags (_not_in_use_), 74 | _precision (6), 75 | _good_flag (false), 76 | _current_offset (0), 77 | _file_size (0), 78 | open_mode_ (om), 79 | device_type_ (dt), 80 | file_name_ (file_name) { } 81 | 82 | protected: 83 | 84 | enum STATE { _written_ = 64, _touched_ = 128, _in_use_ = 256, 85 | _s_read_ = 1, _s_write_ = 2, _s_append_ = 4, 86 | _s_bread_ = 8, _s_bwrite_ = 16, _s_bappend_ = 32, 87 | _not_in_use_ = 0, _already_opened_ = 512 }; 88 | 89 | flag_type _width; 90 | flag_type _file_flags; 91 | short _precision; 92 | bool _good_flag; 93 | size_type _current_offset; 94 | size_type _file_size; 95 | 96 | inline OPEN_MODE _get_open_mode () const throw () { 97 | 98 | return (open_mode_); 99 | } 100 | 101 | inline static bool 102 | _is_in_list (const char this_char, const char *char_list) throw () { 103 | 104 | const char *str = char_list; 105 | 106 | while (*str) 107 | if (this_char ^ *str) 108 | str += 1; 109 | else 110 | return (true); 111 | 112 | return (false); 113 | } 114 | 115 | private: 116 | 117 | const OPEN_MODE open_mode_; 118 | const DEVICE_TYPE device_type_; 119 | const DMScu_FixedSizeString<2047> file_name_; 120 | 121 | // It's not gonna happen 122 | // 123 | DMScu_FileDef (); 124 | }; 125 | 126 | // ---------------------------------------------------------------------------- 127 | 128 | #undef _INCLUDED_DMScu_FileDef_h 129 | #define _INCLUDED_DMScu_FileDef_h 1 130 | #endif // _INCLUDED_DMScu_FileDef_h 131 | 132 | // Local Variables: 133 | // mode:C++ 134 | // tab-width:4 135 | // c-basic-offset:4 136 | // End: 137 | -------------------------------------------------------------------------------- /include/BaseContainer.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 11, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // #include 15 | 16 | // ---------------------------------------------------------------------------- 17 | 18 | namespace hmdf 19 | { 20 | 21 | // This class implements a heterogeneous vector. Its design and implementation 22 | // are partly inspired by Andy G's Blog at: 23 | // https://gieseanw.wordpress.com/2017/05/03/a-true-heterogeneous-container/ 24 | // 25 | struct HeteroVector { 26 | 27 | public: 28 | 29 | HeteroVector(); 30 | HeteroVector(const HeteroVector &that); 31 | HeteroVector(HeteroVector &&that); 32 | 33 | ~HeteroVector() { clear(); } 34 | 35 | HeteroVector &operator= (const HeteroVector &rhs); 36 | HeteroVector &operator= (HeteroVector &&rhs); 37 | 38 | template> 39 | V &get_vec(); 40 | template> 41 | const V &get_vec() const; 42 | 43 | template> 44 | void push_back(const T &v); 45 | template, class... Args> 46 | void emplace_back (Args &&... args); 47 | template, class... Args> 48 | void emplace (typename V::const_iterator pos, Args &&... args); 49 | 50 | template> 51 | void reserve (typename V::size_type r) { get_vec().reserve (r); } 52 | template> 53 | void shrink_to_fit () { get_vec().shrink_to_fit (); } 54 | 55 | template> 56 | typename V::size_type size () const { return (get_vec().size()); } 57 | 58 | void clear(); 59 | 60 | template> 61 | void resize(typename V::size_type count); 62 | template> 63 | void resize(typename V::size_type count, const T &v); 64 | 65 | template> 66 | void pop_back(); 67 | 68 | template> 69 | bool empty() const noexcept; 70 | 71 | template> 72 | T &at(typename V::size_type idx); 73 | template> 74 | const T &at(typename V::size_type idx) const; 75 | 76 | template> 77 | T &back(); 78 | template> 79 | const T &back() const; 80 | 81 | template> 82 | T &front(); 83 | template> 84 | const T &front() const; 85 | 86 | private: 87 | 88 | template> 89 | static std::unordered_map items_; 90 | 91 | std::vector> clear_functions_; 92 | std::vector> copy_functions_; 94 | std::vector> move_functions_; 96 | 97 | // Visitor stuff 98 | // 99 | template 100 | void visit_impl_help_ (T &visitor); 101 | template 102 | void visit_impl_help_ (T &visitor) const; 103 | 104 | template 105 | void sort_impl_help_ (T &functor); 106 | 107 | template 108 | void change_impl_help_ (T &functor); 109 | template 110 | void change_impl_help_ (T &functor) const; 111 | 112 | // Specific visit implementations 113 | // 114 | template class TLIST, class... TYPES> 115 | void visit_impl_ (T &&visitor, TLIST); 116 | template class TLIST, class... TYPES> 117 | void visit_impl_ (T &&visitor, TLIST) const; 118 | 119 | template class TLIST, class... TYPES> 120 | void sort_impl_ (T &&functor, TLIST); 121 | 122 | template class TLIST, class... TYPES> 123 | void change_impl_ (T &&functor, TLIST); 124 | template class TLIST, class... TYPES> 125 | void change_impl_ (T &&functor, TLIST) const; 126 | 127 | public: 128 | 129 | template 130 | struct type_list { }; 131 | 132 | template 133 | struct visitor_base { using types = type_list; }; 134 | 135 | template 136 | void visit (T &&visitor) { 137 | 138 | visit_impl_ (visitor, typename std::decay_t::types { }); 139 | } 140 | template 141 | void visit (T &&visitor) const { 142 | 143 | visit_impl_ (visitor, typename std::decay_t::types { }); 144 | } 145 | template 146 | void sort (T &&functor) { 147 | 148 | sort_impl_ (functor, typename std::decay_t::types { }); 149 | } 150 | template 151 | void change (T &&functor) { 152 | 153 | change_impl_ (functor, typename std::decay_t::types { }); 154 | } 155 | template 156 | void change (T &&functor) const { 157 | 158 | change_impl_ (functor, typename std::decay_t::types { }); 159 | } 160 | }; 161 | 162 | } // namespace hmdf 163 | 164 | // ---------------------------------------------------------------------------- 165 | 166 | # ifdef DMS_INCLUDE_SOURCE 167 | # include 168 | # endif // DMS_INCLUDE_SOURCE 169 | 170 | // ---------------------------------------------------------------------------- 171 | 172 | // Local Variables: 173 | // mode:C++ 174 | // tab-width:4 175 | // c-basic-offset:4 176 | // End: 177 | -------------------------------------------------------------------------------- /DMScu/src/filebase_tester.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 25, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | 8 | bool WriteFile (DMScu_FileBase &); 9 | bool ReadFile (DMScu_FileBase &); 10 | 11 | //----------------------------------------------------------------------------- 12 | 13 | int main (int argCnt, char *argVctr []) { 14 | 15 | // DMScu_FileBase fbase_file ("test.fbase", DMScu_FileBase::_bwrite_, 10000); 16 | DMScu_FileBase fbase_file ("test.fbase", DMScu_FileBase::_write_, 10000); 17 | 18 | if (! fbase_file.is_ok ()) { 19 | printf ("Unable to create the fbase file\n"); 20 | return (EXIT_FAILURE); 21 | } 22 | 23 | if (WriteFile (fbase_file) < 0) { 24 | printf ("Error in writing\n"); 25 | return (EXIT_FAILURE); 26 | } 27 | 28 | fbase_file.close (); 29 | // DMScu_FileBase read_file ("test.fbase", DMScu_FileBase::_bread_); 30 | DMScu_FileBase read_file ("test.fbase", DMScu_FileBase::_read_); 31 | 32 | read_file.close (); 33 | read_file.open (); 34 | 35 | if (! read_file.is_ok ()) { 36 | printf ("Unable to open file for reading\n"); 37 | return (EXIT_FAILURE); 38 | } 39 | 40 | if (ReadFile (read_file) < 0) { 41 | printf ("Error in writing\n"); 42 | return (EXIT_FAILURE); 43 | } 44 | 45 | read_file.close (); 46 | read_file.open (); 47 | 48 | DMScu_FileBase write_file ("test.fbase", DMScu_FileBase::_append_, 10000); 49 | 50 | write_file.close (); 51 | write_file.open (); 52 | write_file.close (); 53 | write_file.open (); 54 | 55 | DMScu_FileBase read_file2 ("test.fbase", DMScu_FileBase::_read_); 56 | 57 | read_file2.close (); 58 | read_file2.open (); 59 | 60 | char buffer [2048]; 61 | 62 | read_file2.go_to_line (3); 63 | read_file2.get_line (buffer); 64 | std::cout << "Line 3 is: " << buffer << std::endl; 65 | write_file.go_to_line (8); 66 | write_file.get_line (buffer); 67 | std::cout << "Line 8 is: " << buffer << std::endl; 68 | write_file.go_to_line (0); 69 | write_file.get_line (buffer); 70 | std::cout << "Line 0 is: " << buffer << std::endl; 71 | read_file2.go_to_line (10); 72 | read_file2.get_line (buffer); 73 | std::cout << "Line 10 is: " << buffer << std::endl; 74 | read_file2.go_to_line (5); 75 | read_file2.get_line (buffer); 76 | std::cout << "Line 5 is: " << buffer << std::endl; 77 | read_file2.go_to_line (0); 78 | read_file2.get_line (buffer); 79 | std::cout << "Line 0 is: " << buffer << std::endl; 80 | read_file2.go_to_line (10); 81 | read_file2.get_line (buffer); 82 | std::cout << "Line 10 is: " << buffer << std::endl; 83 | 84 | // Throw an exception 85 | // 86 | try { 87 | read_file2.go_to_line (1000); 88 | read_file2.get_line (buffer); 89 | std::cout << "Line 10000 is: " << buffer << std::endl; 90 | } 91 | catch (DMScu_Exception &ex) { 92 | std::cerr << "Expected EXCEPTION: " << ex.what () << std::endl; 93 | } 94 | 95 | read_file2.go_to_line (3); 96 | read_file2.get_line (buffer); 97 | std::cout << "Line 3 is: " << buffer << std::endl; 98 | read_file2.go_to_line (8); 99 | read_file2.get_line (buffer); 100 | std::cout << "Line 8 is: " << buffer << std::endl; 101 | read_file2.go_to_line (0); 102 | read_file2.get_line (buffer); 103 | std::cout << "Line 0 is: " << buffer << std::endl; 104 | read_file2.go_to_line (10); 105 | read_file2.get_line (buffer); 106 | std::cout << "Line 10 is: " << buffer << std::endl; 107 | read_file2.go_to_line (5); 108 | read_file2.get_line (buffer); 109 | std::cout << "Line 5 is: " << buffer << std::endl; 110 | read_file2.go_to_line (0); 111 | read_file2.get_line (buffer); 112 | std::cout << "Line 0 is: " << buffer << std::endl; 113 | read_file2.go_to_line (10); 114 | read_file2.get_line (buffer); 115 | std::cout << "Line 10 is: " << buffer << std::endl; 116 | 117 | read_file2.close (); 118 | write_file.unlink (); 119 | 120 | DMScu_FileBase c_file ("/export/home/moeinh/work/HITS/src/DMScu/src/" 121 | "DMScu_FileBase.cc", 122 | DMScu_FileBase::_read_, 10000); 123 | char line [1024]; 124 | 125 | while (! c_file.is_eof ()) { 126 | c_file.get_line (line); 127 | std::cout << line << std::endl; 128 | } 129 | 130 | return (EXIT_SUCCESS); 131 | } 132 | 133 | //----------------------------------------------------------------------------- 134 | 135 | bool WriteFile (DMScu_FileBase & fbase_file) { 136 | 137 | const double dvar = 1.67890128976584; 138 | 139 | fbase_file.set_precision (3); 140 | for (int index = 0; index < 1000; ++index) { 141 | fbase_file << dvar + index << '\n'; 142 | } 143 | 144 | return (true); 145 | } 146 | 147 | //----------------------------------------------------------------------------- 148 | 149 | bool ReadFile (DMScu_FileBase &fbase_file) { 150 | 151 | double var = 0.0; 152 | 153 | fbase_file.set_precision (3); 154 | while (! fbase_file.is_eof ()) { 155 | try { 156 | fbase_file >> var; 157 | fbase_file.get_char (); // Read the linefeed 158 | } 159 | catch (DMScu_Exception &ex) { 160 | std::cerr << "EXCEPTION: " << ex.what () << std::endl; 161 | return -1; 162 | } 163 | std::cout << var << std::endl; 164 | } 165 | 166 | // Just to see it throwing an exception 167 | // 168 | try { 169 | fbase_file >> var; 170 | } 171 | catch (DMScu_Exception &ex) { 172 | std::cerr << "Expected EXCEPTION: " << ex.what () << std::endl; 173 | return (true); 174 | } 175 | 176 | return (true); 177 | } 178 | 179 | //----------------------------------------------------------------------------- 180 | 181 | // Local Variables: 182 | // mode:C++ 183 | // tab-width:4 184 | // c-basic-offset:4 185 | // End: 186 | -------------------------------------------------------------------------------- /DMScu/src/mmfile_tester.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // August 21, 2007 3 | 4 | #include 5 | 6 | #include 7 | 8 | int WriteFile (DMScu_MMapFile &); 9 | int ReadFile (DMScu_MMapFile &); 10 | 11 | //----------------------------------------------------------------------------- 12 | 13 | int main (int argCnt, char *argVctr []) { 14 | 15 | // DMScu_MMapFile mmap_file ("test.mmap", DMScu_MMapFile::_bwrite_, 10000); 16 | DMScu_MMapFile mmap_file ("test.mmap", DMScu_MMapFile::_write_, 10000); 17 | 18 | if (! mmap_file.is_ok ()) { 19 | printf ("Unable to create the mmap file\n"); 20 | return EXIT_FAILURE; 21 | } 22 | 23 | if (WriteFile (mmap_file) < 0) { 24 | printf ("Error in writing\n"); 25 | return EXIT_FAILURE; 26 | } 27 | 28 | mmap_file.close (); 29 | // DMScu_MMapFile read_file ("test.mmap", DMScu_MMapFile::_bread_); 30 | DMScu_MMapFile read_file ("test.mmap", DMScu_MMapFile::_read_); 31 | 32 | read_file.close (); 33 | read_file.open (); 34 | 35 | if (! read_file.is_ok ()) { 36 | printf ("Unable to open file for reading\n"); 37 | return EXIT_FAILURE; 38 | } 39 | 40 | if (ReadFile (read_file) < 0) { 41 | printf ("Error in writing\n"); 42 | return EXIT_FAILURE; 43 | } 44 | 45 | read_file.close (); 46 | read_file.open (); 47 | 48 | DMScu_MMapFile write_file ("test.mmap", DMScu_MMapFile::_append_, 10000); 49 | 50 | /* 51 | std::ifstream cifs ("/export/home/moeinh/work/HITS/src/DMScu/" 52 | "src/DMScu_MMapBase.cc"); 53 | std::ifstream hifs ("/export/home/moeinh/work/HITS/src/DMScu/" 54 | "include/DMScu_MMapBase.h"); 55 | 56 | write_file.close (); 57 | write_file.open (); 58 | write_file.close (); 59 | write_file.open (); 60 | 61 | write_file << cifs; 62 | write_file << hifs; 63 | 64 | cifs.close (); 65 | hifs.close (); 66 | write_file.close (); 67 | 68 | DMScu_MMapFile read_file2 ("test.mmap", DMScu_MMapFile::_read_); 69 | 70 | read_file2.close (); 71 | read_file2.open (); 72 | 73 | char buffer [2048]; 74 | 75 | read_file2.go_to_line (3); 76 | read_file2.get_line (buffer); 77 | std::cout << "Line 3 is: " << buffer << std::endl; 78 | read_file2.go_to_line (8); 79 | read_file2.get_line (buffer); 80 | std::cout << "Line 8 is: " << buffer << std::endl; 81 | read_file2.go_to_line (0); 82 | read_file2.get_line (buffer); 83 | std::cout << "Line 0 is: " << buffer << std::endl; 84 | read_file2.go_to_line (10); 85 | read_file2.get_line (buffer); 86 | std::cout << "Line 10 is: " << buffer << std::endl; 87 | read_file2.go_to_line (5); 88 | read_file2.get_line (buffer); 89 | std::cout << "Line 5 is: " << buffer << std::endl; 90 | read_file2.go_to_line (0); 91 | read_file2.get_line (buffer); 92 | std::cout << "Line 0 is: " << buffer << std::endl; 93 | read_file2.go_to_line (10); 94 | read_file2.get_line (buffer); 95 | std::cout << "Line 10 is: " << buffer << std::endl; 96 | 97 | // Throw an exception 98 | // 99 | try { 100 | read_file2.go_to_line (1000); 101 | read_file2.get_line (buffer); 102 | std::cout << "Line 10000 is: " << buffer << std::endl; 103 | } 104 | catch (DMScu_Exception &ex) { 105 | std::cerr << "EXCEPTION: " << ex.what () << std::endl; 106 | } 107 | 108 | std::cout << "Testing remap() ..." << std::endl; 109 | 110 | read_file2.remap (10000); 111 | read_file2.go_to_line (3); 112 | read_file2.get_line (buffer); 113 | std::cout << "Line 3 is: " << buffer << std::endl; 114 | read_file2.go_to_line (8); 115 | read_file2.get_line (buffer); 116 | std::cout << "Line 8 is: " << buffer << std::endl; 117 | read_file2.go_to_line (0); 118 | read_file2.get_line (buffer); 119 | std::cout << "Line 0 is: " << buffer << std::endl; 120 | read_file2.go_to_line (10); 121 | read_file2.get_line (buffer); 122 | std::cout << "Line 10 is: " << buffer << std::endl; 123 | read_file2.go_to_line (5); 124 | read_file2.get_line (buffer); 125 | std::cout << "Line 5 is: " << buffer << std::endl; 126 | read_file2.go_to_line (0); 127 | read_file2.get_line (buffer); 128 | std::cout << "Line 0 is: " << buffer << std::endl; 129 | read_file2.go_to_line (10); 130 | read_file2.get_line (buffer); 131 | std::cout << "Line 10 is: " << buffer << std::endl; 132 | 133 | std::cout << "System Page Size: " << DMScu_MMapBase::SYSTEM_PAGE_SIZE 134 | << std::endl; 135 | 136 | read_file2.close (); 137 | write_file.unlink (); 138 | */ 139 | return EXIT_SUCCESS; 140 | } 141 | 142 | //----------------------------------------------------------------------------- 143 | 144 | int WriteFile (DMScu_MMapFile & mmapFile) { 145 | 146 | const double dvar = 1.67890128976584; 147 | // const double dvar = 1.6; 148 | 149 | mmapFile.set_precision (3); 150 | for (int index = 0; index < 1000; ++index) 151 | // mmapFile << dvar + index; 152 | mmapFile << dvar + index << '\n'; 153 | 154 | return 0; 155 | } 156 | 157 | //----------------------------------------------------------------------------- 158 | 159 | int ReadFile (DMScu_MMapFile &mmapFile) { 160 | 161 | double var = 0.0; 162 | 163 | mmapFile.set_precision (3); 164 | while (! mmapFile.is_eof ()) { 165 | try { 166 | mmapFile >> var; 167 | } 168 | catch (DMScu_Exception &ex) { 169 | std::cerr << "EXCEPTION: " << ex.what () << std::endl; 170 | return -1; 171 | } 172 | std::cout << var << std::endl; 173 | } 174 | 175 | // Just to see it throwing an exception 176 | // 177 | // std::cerr << "This exception is supposed to happen:" << std::endl; 178 | // try { 179 | // mmapFile >> var; 180 | // } 181 | // catch (DMScu_Exception &ex) { 182 | // std::cerr << "EXCEPTION: " << ex.what () << std::endl; 183 | // return 0; 184 | // } 185 | 186 | return 0; 187 | } 188 | 189 | //----------------------------------------------------------------------------- 190 | 191 | // Local Variables: 192 | // mode:C++ 193 | // tab-width:4 194 | // c-basic-offset:4 195 | // End: 196 | -------------------------------------------------------------------------------- /include/DataFrame_read.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | // ---------------------------------------------------------------------------- 11 | 12 | namespace hmdf 13 | { 14 | 15 | #define gcc_likely(x) __builtin_expect(!!(x), 1) 16 | #define gcc_unlikely(x) __builtin_expect(!!(x), 0) 17 | 18 | template class DS> 19 | bool DataFrame::read (const char *file_name) { 20 | 21 | DMScu_MMapFile file (file_name, 22 | DMScu_MMapFile::_read_, 23 | DMScu_MMapBase::SYSTEM_PAGE_SIZE * 2); 24 | 25 | bool beg_line = true; 26 | char value[1024]; 27 | char type[64]; 28 | 29 | while (! file.is_eof ()) { 30 | char c = static_cast(file.get_char()); 31 | 32 | if (c == '#' || c == '\n' || c == '\0') { 33 | if (c == '#') 34 | while (! file.is_eof ()) { 35 | c = static_cast(file.get_char()); 36 | if (c == '\n') 37 | break; 38 | } 39 | continue; 40 | } 41 | file.put_back(); 42 | 43 | file.get_token(':', value); 44 | if (! ::strcmp(value, "INDEX")) { 45 | TSVec vec; 46 | 47 | while (! file.is_eof ()) { 48 | c = static_cast(file.get_char()); 49 | if (gcc_unlikely(c == '\n')) 50 | break; 51 | file.put_back(); 52 | file.get_token(',', value); 53 | vec.push_back(static_cast(atoll(value))); 54 | } 55 | load_index(std::forward(vec)); 56 | } 57 | else { 58 | c = static_cast(file.get_char()); 59 | if (c != '<') 60 | throw DataFrameError ("DataFrame::read(): ERROR: Expected " 61 | "'<' char to specify column type"); 62 | file.get_token('>', type); 63 | c = static_cast(file.get_char()); 64 | if (c != ':') 65 | throw DataFrameError ("DataFrame::read(): ERROR: Expected " 66 | "':' char to start column values"); 67 | 68 | if (! ::strcmp(type, "double")) { 69 | DS &vec = create_column(value); 70 | 71 | while (! file.is_eof ()) { 72 | c = static_cast(file.get_char()); 73 | if (gcc_unlikely(c == '\n')) 74 | break; 75 | file.put_back(); 76 | file.get_token(',', value); 77 | vec.push_back(atof(value)); 78 | } 79 | } 80 | else if (! ::strcmp(type, "int")) { 81 | DS &vec = create_column(value); 82 | 83 | while (! file.is_eof ()) { 84 | c = static_cast(file.get_char()); 85 | if (gcc_unlikely(c == '\n')) 86 | break; 87 | file.put_back(); 88 | file.get_token(',', value); 89 | vec.push_back(atoi(value)); 90 | } 91 | } 92 | else if (! ::strcmp(type, "uint")) { 93 | DS &vec = create_column(value); 94 | 95 | while (! file.is_eof ()) { 96 | c = static_cast(file.get_char()); 97 | if (gcc_unlikely(c == '\n')) 98 | break; 99 | file.put_back(); 100 | file.get_token(',', value); 101 | vec.push_back(static_cast(atol(value))); 102 | } 103 | } 104 | else if (! ::strcmp(type, "long")) { 105 | DS &vec = create_column(value); 106 | 107 | while (! file.is_eof ()) { 108 | c = static_cast(file.get_char()); 109 | if (gcc_unlikely(c == '\n')) 110 | break; 111 | file.put_back(); 112 | file.get_token(',', value); 113 | vec.push_back(atol(value)); 114 | } 115 | } 116 | else if (! ::strcmp(type, "ulong")) { 117 | DS &vec = create_column(value); 118 | 119 | while (! file.is_eof ()) { 120 | c = static_cast(file.get_char()); 121 | if (gcc_unlikely(c == '\n')) 122 | break; 123 | file.put_back(); 124 | file.get_token(',', value); 125 | vec.push_back(static_cast(atoll(value))); 126 | } 127 | } 128 | else if (! ::strcmp(type, "string")) { 129 | DS &vec = create_column(value); 130 | 131 | while (! file.is_eof ()) { 132 | c = static_cast(file.get_char()); 133 | if (gcc_unlikely(c == '\n')) 134 | break; 135 | file.put_back(); 136 | file.get_token(',', value); 137 | vec.push_back(value); 138 | } 139 | } 140 | else if (! ::strcmp(type, "bool")) { 141 | DS &vec = create_column(value); 142 | } 143 | else 144 | throw DataFrameError ("DataFrame::read(): ERROR: Unknown " 145 | "column type"); 146 | } 147 | } 148 | 149 | file.close(); 150 | return(true); 151 | } 152 | 153 | // ---------------------------------------------------------------------------- 154 | 155 | template class DS> 156 | std::future DataFrame:: 157 | read_async (const char *file_name) { 158 | 159 | return (std::async(std::launch::async, &DataFrame::read, this, file_name)); 160 | } 161 | 162 | } // namespace hmdf 163 | 164 | // ---------------------------------------------------------------------------- 165 | 166 | // Local Variables: 167 | // mode:C++ 168 | // tab-width:4 169 | // c-basic-offset:4 170 | // End: 171 | -------------------------------------------------------------------------------- /include/BaseContainer.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | // ---------------------------------------------------------------------------- 11 | 12 | namespace hmdf 13 | { 14 | 15 | template 16 | std::unordered_map HeteroVector::items_; 17 | 18 | // ---------------------------------------------------------------------------- 19 | 20 | template 21 | V &HeteroVector::get_vec() { 22 | 23 | auto iter = items_.find (this); 24 | 25 | // don't have it yet, so create functions for copying and destroying 26 | if (iter == items_.end()) { 27 | clear_functions_.emplace_back ( 28 | [](HeteroVector &hv) { items_.erase(&hv); }); 29 | 30 | // if someone copies me, they need to call each 31 | // copy_function and pass themself 32 | copy_functions_.emplace_back ( 33 | [](const HeteroVector &from, HeteroVector &to) { 34 | items_[&to] = items_[&from]; 35 | }); 36 | 37 | move_functions_.emplace_back ( 38 | [](HeteroVector &from, HeteroVector &to) { 39 | items_[&to] = std::move(items_[&from]); 40 | }); 41 | 42 | iter = items_.emplace (this, V()).first; 43 | } 44 | 45 | return (iter->second); 46 | } 47 | 48 | // ---------------------------------------------------------------------------- 49 | 50 | template 51 | const V &HeteroVector::get_vec() const { 52 | 53 | return (const_cast(this)->get_vec()); 54 | } 55 | 56 | // ---------------------------------------------------------------------------- 57 | 58 | template 59 | void HeteroVector::push_back(const T &v) { get_vec().push_back (v); } 60 | 61 | // ---------------------------------------------------------------------------- 62 | 63 | template 64 | void HeteroVector::emplace_back (Args &&... args) { 65 | 66 | get_vec().emplace_back (std::forward(args)...); 67 | } 68 | 69 | // ---------------------------------------------------------------------------- 70 | 71 | template 72 | void HeteroVector::emplace (typename V::const_iterator pos, Args &&... args) { 73 | 74 | get_vec().emplace (pos, std::forward(args)...); 75 | } 76 | 77 | // ---------------------------------------------------------------------------- 78 | 79 | template 80 | void HeteroVector::visit_impl_help_ (T &visitor) { 81 | 82 | auto iter = items_.find (this); 83 | 84 | if (iter != items_.end()) 85 | for (auto &&element : iter->second) 86 | visitor(element); 87 | } 88 | 89 | // ---------------------------------------------------------------------------- 90 | 91 | template 92 | void HeteroVector::visit_impl_help_ (T &visitor) const { 93 | 94 | const auto citer = items_.find (this); 95 | 96 | if (citer != items_.end()) 97 | for (auto &&element : citer->second) 98 | visitor(element); 99 | } 100 | 101 | // ---------------------------------------------------------------------------- 102 | 103 | template 104 | void HeteroVector::sort_impl_help_ (T &functor) { 105 | 106 | auto iter = items_.find (this); 107 | 108 | if (iter != items_.end()) 109 | std::sort (iter->second.begin(), iter->second.end(), functor); 110 | } 111 | 112 | // ---------------------------------------------------------------------------- 113 | 114 | template 115 | void HeteroVector::change_impl_help_ (T &functor) { 116 | 117 | auto iter = items_.find (this); 118 | 119 | if (iter != items_.end()) 120 | functor(iter->second); 121 | } 122 | 123 | // ---------------------------------------------------------------------------- 124 | 125 | template 126 | void HeteroVector::change_impl_help_ (T &functor) const { 127 | 128 | const auto citer = items_.find (this); 129 | 130 | if (citer != items_.end()) 131 | functor(citer->second); 132 | } 133 | 134 | // ---------------------------------------------------------------------------- 135 | 136 | template class TLIST, class... TYPES> 137 | void HeteroVector::visit_impl_ (T &&visitor, TLIST) { 138 | 139 | // (..., visit_impl_help_, TYPES>(visitor)); // C++17 140 | using expander = int[]; 141 | (void) expander { 0, (visit_impl_help_(visitor), 0) ... }; 142 | } 143 | 144 | // ---------------------------------------------------------------------------- 145 | 146 | template class TLIST, class... TYPES> 147 | void HeteroVector::visit_impl_ (T &&visitor, TLIST) const { 148 | 149 | // (..., visit_impl_help_, TYPES>(visitor)); // C++17 150 | using expander = int[]; 151 | (void) expander { 0, (visit_impl_help_(visitor), 0) ... }; 152 | } 153 | 154 | // ---------------------------------------------------------------------------- 155 | 156 | template class TLIST, class... TYPES> 157 | void HeteroVector::sort_impl_ (T &&functor, TLIST) { 158 | 159 | using expander = int[]; 160 | (void) expander { 0, (sort_impl_help_(functor), 0) ... }; 161 | } 162 | 163 | // ---------------------------------------------------------------------------- 164 | 165 | template class TLIST, class... TYPES> 166 | void HeteroVector::change_impl_ (T &&functor, TLIST) { 167 | 168 | using expander = int[]; 169 | (void) expander { 0, (change_impl_help_(functor), 0) ... }; 170 | } 171 | 172 | // ---------------------------------------------------------------------------- 173 | 174 | template class TLIST, class... TYPES> 175 | void HeteroVector::change_impl_ (T &&functor, TLIST) const { 176 | 177 | using expander = int[]; 178 | (void) expander { 0, (change_impl_help_(functor), 0) ... }; 179 | } 180 | 181 | // ---------------------------------------------------------------------------- 182 | 183 | template 184 | void HeteroVector::resize(typename V::size_type count) { 185 | 186 | get_vec().resize (count); 187 | } 188 | 189 | // ---------------------------------------------------------------------------- 190 | 191 | template 192 | void HeteroVector::resize(typename V::size_type count, const T &v) { 193 | 194 | get_vec().resize (count, v); 195 | } 196 | 197 | // ---------------------------------------------------------------------------- 198 | 199 | template 200 | void HeteroVector::pop_back() { get_vec().pop_back (); } 201 | 202 | // ---------------------------------------------------------------------------- 203 | 204 | template 205 | bool HeteroVector::empty() const noexcept { 206 | 207 | return (get_vec().empty ()); 208 | } 209 | 210 | // ---------------------------------------------------------------------------- 211 | 212 | template 213 | T &HeteroVector::at(typename V::size_type idx) { 214 | 215 | return (get_vec().at (idx)); 216 | } 217 | 218 | // ---------------------------------------------------------------------------- 219 | 220 | template 221 | const T &HeteroVector::at(typename V::size_type idx) const { 222 | 223 | return (get_vec().at (idx)); 224 | } 225 | 226 | // ---------------------------------------------------------------------------- 227 | 228 | template 229 | T &HeteroVector::back() { return (get_vec().back ()); } 230 | 231 | // ---------------------------------------------------------------------------- 232 | 233 | template 234 | const T &HeteroVector::back() const { return (get_vec().back ()); } 235 | 236 | // ---------------------------------------------------------------------------- 237 | 238 | template 239 | T &HeteroVector::front() { return (get_vec().front ()); } 240 | 241 | // ---------------------------------------------------------------------------- 242 | 243 | template 244 | const T &HeteroVector::front() const { return (get_vec().front ()); } 245 | 246 | } // namespace hmdf 247 | 248 | // ---------------------------------------------------------------------------- 249 | 250 | // Local Variables: 251 | // mode:C++ 252 | // tab-width:4 253 | // c-basic-offset:4 254 | // End: 255 | 256 | -------------------------------------------------------------------------------- /include/DataFrame_misc.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | 8 | // ---------------------------------------------------------------------------- 9 | 10 | namespace hmdf 11 | { 12 | 13 | // ---------------------------------------------------------------------------- 14 | 15 | template class DS> 16 | template 17 | template 18 | void 19 | DataFrame::consistent_functor_::operator() (T &vec) const { 20 | 21 | using ValueType = 22 | typename std::remove_reference::type::value_type; 23 | 24 | vec.resize(size, ValueType()); 25 | } 26 | 27 | // ---------------------------------------------------------------------------- 28 | 29 | template class DS> 30 | template 31 | template 32 | void 33 | DataFrame::sort_functor_::operator() (T2 &vec) const { 34 | 35 | using VecType = typename std::remove_reference::type; 36 | using DataValueType = typename VecType::value_type; 37 | 38 | std::multimap tmp_map; 39 | const DataFrame::size_type idx_s = idx_vec.size(); 40 | 41 | for (size_t i = 0; i < idx_s; ++i) 42 | tmp_map.emplace(std::move(idx_vec[i]), std::move(vec[i])); 43 | vec.clear(); 44 | for (auto &iter : tmp_map) 45 | vec.emplace_back(std::move(iter.second)); 46 | 47 | return; 48 | } 49 | 50 | // ---------------------------------------------------------------------------- 51 | 52 | template class DS> 53 | template 54 | template 55 | void 56 | DataFrame::load_functor_::operator() (const T &vec) { 57 | 58 | using VecType = typename std::remove_reference::type; 59 | using ValueType = typename VecType::value_type; 60 | 61 | df.load_column(name, 62 | vec.begin() + begin, 63 | vec.begin() + end, 64 | true); 65 | return; 66 | } 67 | 68 | // ---------------------------------------------------------------------------- 69 | 70 | template class DS> 71 | template 72 | template 73 | void 74 | DataFrame::add_col_functor_::operator() (const T &vec) { 75 | 76 | using VecType = typename std::remove_reference::type; 77 | using ValueType = typename VecType::value_type; 78 | 79 | df.create_column(name); 80 | return; 81 | } 82 | 83 | // ---------------------------------------------------------------------------- 84 | 85 | template class DS> 86 | template 87 | template 88 | void 89 | DataFrame::groupby_functor_::operator() (const T &vec) { 90 | 91 | for (std::size_t i = begin; i < end && i < vec.size(); ++i) 92 | functor (timestamp, name, vec[i]); 93 | 94 | if (! ::strcmp(name, "INDEX")) { 95 | TimeStamp v; 96 | 97 | functor.get_value(v); 98 | df.append_index(v); 99 | } 100 | else { 101 | using VecType = typename std::remove_reference::type; 102 | using ValueType = typename VecType::value_type; 103 | 104 | ValueType v; 105 | 106 | functor.get_value(v); 107 | df.append_column(name, v, false); 108 | } 109 | 110 | return; 111 | } 112 | 113 | // ---------------------------------------------------------------------------- 114 | 115 | template class DS> 116 | template 117 | template 118 | void 119 | DataFrame::bucket_functor_::operator() (const T &vec) { 120 | 121 | using VecType = typename std::remove_reference::type; 122 | using ValueType = typename VecType::value_type; 123 | 124 | const std::size_t ts_s = timestamps.size(); 125 | std::size_t marker = 0; 126 | 127 | if (df.timestamps_.empty()) 128 | for (std::size_t i = 0; i < ts_s; ++i) 129 | if (timestamps[i] - timestamps[marker] >= interval) { 130 | df.timestamps_.push_back(timestamps[i - 1]); 131 | marker = i; 132 | } 133 | 134 | for (std::size_t i = 0, marker = 0; i < ts_s; ++i) { 135 | if (timestamps[i] - timestamps[marker] >= interval) { 136 | ValueType v; 137 | 138 | functor.get_value(v); 139 | df.append_column(name, v, false); 140 | functor.reset(); 141 | marker = i; 142 | } 143 | functor (timestamps[i], name, vec[i]); 144 | } 145 | 146 | return; 147 | } 148 | 149 | // ---------------------------------------------------------------------------- 150 | 151 | template class DS> 152 | template 153 | template 154 | void 155 | DataFrame::print_functor_::operator() (const T &vec) { 156 | 157 | using VecType = typename std::remove_reference::type; 158 | using ValueType = typename VecType::value_type; 159 | 160 | if (! values_only) { 161 | os << name << ':'; 162 | if (typeid(ValueType) == typeid(double)) 163 | os << ":"; 164 | else if (typeid(ValueType) == typeid(int)) 165 | os << ":"; 166 | else if (typeid(ValueType) == typeid(unsigned int)) 167 | os << ":"; 168 | else if (typeid(ValueType) == typeid(long)) 169 | os << ":"; 170 | else if (typeid(ValueType) == typeid(unsigned long)) 171 | os << ":"; 172 | else if (typeid(ValueType) == typeid(std::string)) 173 | os << ":"; 174 | else if (typeid(ValueType) == typeid(bool)) 175 | os << ":"; 176 | else 177 | os << ":"; 178 | } 179 | for (std::size_t i = 0; i < vec.size(); ++i) 180 | os << vec[i] << ','; 181 | os << '\n'; 182 | 183 | return; 184 | } 185 | 186 | // ---------------------------------------------------------------------------- 187 | 188 | template class DS> 189 | template 190 | template 191 | void 192 | DataFrame:: 193 | equal_functor_::operator() (const DS &lhs_vec) { 194 | 195 | const auto &iter = df.data_tb_.find(name); 196 | 197 | if (iter == df.data_tb_.end()) { 198 | result = false; 199 | return; 200 | } 201 | 202 | const DataVec &hv = df.data_[iter->second]; 203 | const DS &rhs_vec = hv.get_vec>(); 204 | 205 | if (lhs_vec != rhs_vec) 206 | result = false; 207 | } 208 | 209 | // ---------------------------------------------------------------------------- 210 | 211 | template class DS> 212 | template 213 | template 214 | void 215 | DataFrame:: 216 | mod_by_idx_functor_::operator() (DS &lhs_vec) const { 217 | 218 | const auto &iter = rhs_df.data_tb_.find(name); 219 | 220 | if (iter != rhs_df.data_tb_.end()) { 221 | const DS &rhs_vec = rhs_df.get_column(name); 222 | 223 | lhs_vec[lhs_idx] = rhs_vec[rhs_idx]; 224 | } 225 | } 226 | 227 | // ---------------------------------------------------------------------------- 228 | 229 | template class DS> 230 | template 231 | void 232 | DataFrame::for_each_in_tuple_ (const std::tuple &tu, 233 | F func, 234 | std::index_sequence) const { 235 | 236 | using expander = int[]; 237 | (void) expander { 0, (func(std::get(tu)), 0) ... }; 238 | } 239 | 240 | // ---------------------------------------------------------------------------- 241 | 242 | template class DS> 243 | template 244 | void 245 | DataFrame::for_each_in_tuple_ (std::tuple &tu, 246 | F func, 247 | std::index_sequence) { 248 | 249 | using expander = int[]; 250 | (void) expander { 0, (func(std::get(tu)), 0) ... }; 251 | } 252 | 253 | // ---------------------------------------------------------------------------- 254 | 255 | template class DS> 256 | template 257 | void 258 | DataFrame:: 259 | for_each_in_tuple_ (const std::tuple &tu, F func) const { 260 | 261 | for_each_in_tuple_(tu, func, std::make_index_sequence()); 262 | } 263 | 264 | // ---------------------------------------------------------------------------- 265 | 266 | template class DS> 267 | template 268 | void 269 | DataFrame::for_each_in_tuple_ (std::tuple &tu, F func) { 270 | 271 | for_each_in_tuple_(tu, func, std::make_index_sequence()); 272 | } 273 | 274 | } // namespace hmdf 275 | 276 | // ---------------------------------------------------------------------------- 277 | 278 | // Local Variables: 279 | // mode:C++ 280 | // tab-width:4 281 | // c-basic-offset:4 282 | // End: 283 | -------------------------------------------------------------------------------- /include/DataFrame_set.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | // ---------------------------------------------------------------------------- 11 | 12 | namespace hmdf 13 | { 14 | 15 | template class DS> 16 | template 17 | DS &DataFrame::create_column (const char *name) { 18 | 19 | if (! ::strcmp(name, "INDEX")) 20 | throw DataFrameError ("DataFrame::create_column(): ERROR: " 21 | "Data column name cannot be 'INDEX'"); 22 | 23 | data_.emplace_back (DataVec()); 24 | data_tb_.emplace (name, data_.size() - 1); 25 | 26 | DataVec &hv = data_.back(); 27 | DS &vec = hv.get_vec>(); 28 | 29 | // vec.resize(timestamps_.size(), _get_nan()); 30 | return (vec); 31 | } 32 | 33 | // ---------------------------------------------------------------------------- 34 | 35 | template class DS> 36 | template 37 | typename DataFrame::size_type 38 | DataFrame::load_data (TSVec &&indices, Ts ... args) { 39 | 40 | size_type cnt = load_index(std::move(indices)); 41 | 42 | auto args_tuple = std::tuple(args ...); 43 | // const size_type tuple_size = 44 | // std::tuple_size::value; 45 | auto fc = 46 | [this, &cnt](auto &pa) mutable -> void { cnt += this->_load_pair(pa); }; 47 | 48 | for_each_in_tuple_ (args_tuple, fc); 49 | 50 | return (cnt); 51 | } 52 | 53 | // ---------------------------------------------------------------------------- 54 | 55 | template class DS> 56 | template 57 | typename DataFrame::size_type 58 | DataFrame::load_index(const ITR &begin, const ITR &end) { 59 | 60 | const size_type s = std::distance(begin, end); 61 | 62 | timestamps_.clear (); 63 | timestamps_.reserve (s); 64 | timestamps_.insert (timestamps_.begin (), begin, end); 65 | return (s); 66 | } 67 | 68 | // ---------------------------------------------------------------------------- 69 | 70 | template class DS> 71 | typename DataFrame::size_type 72 | DataFrame::load_index(TSVec &&idx) { 73 | 74 | timestamps_ = idx; 75 | return (timestamps_.size()); 76 | } 77 | 78 | // ---------------------------------------------------------------------------- 79 | 80 | template class DS> 81 | template 82 | typename DataFrame::size_type 83 | DataFrame::append_index(const ITR &begin, const ITR &end) { 84 | 85 | const size_type s = std::distance(begin, end); 86 | 87 | timestamps_.reserve (timestamps_.size() + s); 88 | timestamps_.insert (timestamps_.end (), begin, end); 89 | return (s); 90 | } 91 | 92 | // ---------------------------------------------------------------------------- 93 | 94 | template class DS> 95 | typename DataFrame::size_type 96 | DataFrame::append_index(const TimeStamp &val) { 97 | 98 | timestamps_.push_back (val); 99 | return (1); 100 | } 101 | 102 | // ---------------------------------------------------------------------------- 103 | 104 | template class DS> 105 | template 106 | typename DataFrame::size_type 107 | DataFrame::load_column (const char *name, 108 | const ITR &begin, 109 | const ITR &end, 110 | bool pad_with_nan) { 111 | 112 | size_type s = std::distance(begin, end); 113 | const size_type idx_s = timestamps_.size(); 114 | 115 | if (s > idx_s) { 116 | char buffer [512]; 117 | 118 | sprintf (buffer, "DataFrame::load_column(): ERROR: " 119 | "data size of %lu is larger than index size of %lu", 120 | s, idx_s); 121 | throw InconsistentData (buffer); 122 | } 123 | 124 | const auto iter = data_tb_.find (name); 125 | DS *vec_ptr = nullptr; 126 | 127 | if (iter == data_tb_.end()) 128 | vec_ptr = &(create_column(name)); 129 | else { 130 | DataVec &hv = data_[iter->second]; 131 | 132 | vec_ptr = &(hv.get_vec>()); 133 | } 134 | 135 | vec_ptr->clear(); 136 | vec_ptr->reserve(idx_s); 137 | vec_ptr->insert (vec_ptr->begin (), begin, end); 138 | 139 | size_type ret_cnt = s; 140 | 141 | s = vec_ptr->size(); 142 | if (pad_with_nan && s < idx_s) { 143 | for (size_type i = 0; i < idx_s - s; ++i) { 144 | vec_ptr->push_back (std::move(_get_nan())); 145 | ret_cnt += 1; 146 | } 147 | } 148 | 149 | return (ret_cnt); 150 | } 151 | 152 | // ---------------------------------------------------------------------------- 153 | 154 | template class DS> 155 | template 156 | typename DataFrame::size_type 157 | DataFrame:: 158 | load_column (const char *name, DS &&data, bool pad_with_nan) { 159 | 160 | const size_type idx_s = timestamps_.size(); 161 | const size_type data_s = data.size(); 162 | 163 | if (data_s > idx_s) { 164 | char buffer [512]; 165 | 166 | sprintf (buffer, "DataFrame::load_column(): ERROR: " 167 | "data size of %lu is larger than index size of %lu", 168 | data_s, idx_s); 169 | throw InconsistentData (buffer); 170 | } 171 | 172 | size_type ret_cnt = data_s; 173 | 174 | if (pad_with_nan && data_s < idx_s) { 175 | for (size_type i = 0; i < idx_s - data_s; ++i) { 176 | data.push_back (std::move(_get_nan())); 177 | ret_cnt += 1; 178 | } 179 | } 180 | 181 | const auto iter = data_tb_.find (name); 182 | DS *vec_ptr = nullptr; 183 | 184 | if (iter == data_tb_.end()) 185 | vec_ptr = &(create_column(name)); 186 | else { 187 | DataVec &hv = data_[iter->second]; 188 | 189 | vec_ptr = &(hv.get_vec>()); 190 | } 191 | 192 | *vec_ptr = std::move(data); 193 | return (ret_cnt); 194 | } 195 | 196 | // ---------------------------------------------------------------------------- 197 | 198 | template class DS> 199 | template 200 | typename DataFrame::size_type 201 | DataFrame::_load_pair(std::pair &col_name_data) { 202 | 203 | return (load_column( 204 | col_name_data.first, // column name 205 | std::move(col_name_data.second), 206 | true)); 207 | } 208 | 209 | // ---------------------------------------------------------------------------- 210 | 211 | template class DS> 212 | template 213 | typename DataFrame::size_type 214 | DataFrame::append_column (const char *name, 215 | const ITR &begin, 216 | const ITR &end, 217 | bool pad_with_nan) { 218 | 219 | const auto iter = data_tb_.find (name); 220 | 221 | if (iter == data_tb_.end()) { 222 | char buffer [512]; 223 | 224 | sprintf (buffer, "DataFrame::append_column(): ERROR: " 225 | "Cannot find column '%s'", 226 | name); 227 | throw ColNotFound (buffer); 228 | } 229 | 230 | DataVec &hv = data_[iter->second]; 231 | DS &vec = hv.get_vec>(); 232 | 233 | size_type s = std::distance(begin, end) + vec.size (); 234 | const size_type idx_s = timestamps_.size(); 235 | 236 | if (s > idx_s) { 237 | char buffer [512]; 238 | 239 | sprintf (buffer, "DataFrame::append_column(): ERROR: " 240 | "data size of %lu is larger than index size of %lu", 241 | s, idx_s); 242 | throw InconsistentData (buffer); 243 | } 244 | 245 | vec.reserve (idx_s); 246 | vec.insert (vec.end (), begin, end); 247 | 248 | size_type ret_cnt = s; 249 | 250 | s = vec.size(); 251 | if (pad_with_nan && s < idx_s) { 252 | for (size_type i = 0; i < idx_s - s; ++i) { 253 | vec.push_back (std::move(_get_nan())); 254 | ret_cnt += 1; 255 | } 256 | } 257 | 258 | return (ret_cnt); 259 | } 260 | 261 | // ---------------------------------------------------------------------------- 262 | 263 | template class DS> 264 | template 265 | typename DataFrame::size_type 266 | DataFrame:: 267 | append_column (const char *name, const T &val, bool pad_with_nan) { 268 | 269 | const auto iter = data_tb_.find (name); 270 | 271 | if (iter == data_tb_.end()) { 272 | char buffer [512]; 273 | 274 | sprintf (buffer, "DataFrame::append_column(): ERROR: " 275 | "Cannot find column '%s'", 276 | name); 277 | throw ColNotFound (buffer); 278 | } 279 | 280 | DataVec &hv = data_[iter->second]; 281 | DS &vec = hv.get_vec>(); 282 | 283 | size_type s = 1; 284 | const size_type idx_s = timestamps_.size(); 285 | 286 | if (s > idx_s) { 287 | char buffer [512]; 288 | 289 | sprintf (buffer, "DataFrame::append_column(): ERROR: " 290 | "data size of %lu is larger than index size of %lu", 291 | s, idx_s); 292 | throw InconsistentData (buffer); 293 | } 294 | 295 | vec.reserve (idx_s); 296 | vec.push_back (val); 297 | 298 | size_type ret_cnt = s; 299 | 300 | s = vec.size(); 301 | if (pad_with_nan && s < idx_s) { 302 | for (size_type i = 0; i < idx_s - s; ++i) { 303 | vec.push_back (std::move(_get_nan())); 304 | ret_cnt += 1; 305 | } 306 | } 307 | 308 | return (ret_cnt); 309 | } 310 | 311 | } // namespace hmdf 312 | 313 | // ---------------------------------------------------------------------------- 314 | 315 | // Local Variables: 316 | // mode:C++ 317 | // tab-width:4 318 | // c-basic-offset:4 319 | // End: 320 | -------------------------------------------------------------------------------- /include/DataFrame.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | // ---------------------------------------------------------------------------- 11 | 12 | namespace hmdf 13 | { 14 | 15 | template class DS> 16 | template 17 | inline constexpr T DataFrame::_get_nan() { 18 | 19 | if (std::numeric_limits::has_quiet_NaN) 20 | return (std::numeric_limits::quiet_NaN()); 21 | return (T()); 22 | } 23 | 24 | // ---------------------------------------------------------------------------- 25 | 26 | template class DS> 27 | template 28 | void DataFrame::make_consistent () { 29 | 30 | const size_type idx_s = timestamps_.size(); 31 | consistent_functor_ functor (idx_s); 32 | 33 | for (auto &iter : data_) 34 | iter.change(functor); 35 | } 36 | 37 | // ---------------------------------------------------------------------------- 38 | 39 | template class DS> 40 | template 41 | void DataFrame::sort(const char *by_name) { 42 | 43 | make_consistent(); 44 | 45 | if (by_name == nullptr) { 46 | sort_functor_ functor (timestamps_); 47 | 48 | for (auto &iter : data_) 49 | iter.change(functor); 50 | 51 | std::sort (timestamps_.begin(), timestamps_.end()); 52 | } 53 | else { 54 | const auto iter = data_tb_.find (by_name); 55 | 56 | if (iter == data_tb_.end()) { 57 | char buffer [512]; 58 | 59 | sprintf (buffer, "DataFrame::sort(): ERROR: " 60 | "Cannot find column '%s'", 61 | by_name); 62 | throw ColNotFound (buffer); 63 | } 64 | 65 | DataVec &hv = data_[iter->second]; 66 | DS &idx_vec = hv.get_vec>(); 67 | sort_functor_ functor (idx_vec); 68 | 69 | for (size_type i = 0; i < data_.size(); ++i) 70 | if (i != iter->second) 71 | data_[i].change(functor); 72 | functor(timestamps_); 73 | 74 | std::sort (idx_vec.begin(), idx_vec.end()); 75 | } 76 | 77 | return; 78 | } 79 | 80 | // ---------------------------------------------------------------------------- 81 | 82 | template class DS> 83 | template 84 | std::future DataFrame::sort_async(const char *by_name) { 85 | 86 | return (std::async(std::launch::async, 87 | &DataFrame::sort, 88 | this, by_name)); 89 | } 90 | 91 | // ---------------------------------------------------------------------------- 92 | 93 | template class DS> 94 | template 95 | DataFrame 96 | DataFrame:: 97 | groupby (F &&func, const char *gb_col_name, bool already_sorted) const { 98 | 99 | DataFrame tmp_df = *this; 100 | 101 | if (! already_sorted) 102 | if (gb_col_name == nullptr) tmp_df.sort(); 103 | else tmp_df.sort(gb_col_name); 104 | 105 | DataFrame df; 106 | 107 | for (const auto &iter : tmp_df.data_tb_) { 108 | add_col_functor_ functor (iter.first.c_str(), df); 109 | 110 | tmp_df.data_[iter.second].change(functor); 111 | } 112 | 113 | const size_type vec_size = tmp_df.timestamps_.size(); 114 | size_type marker = 0; 115 | 116 | if (gb_col_name == nullptr) { // Index 117 | for (size_type i = 0; i < vec_size; ++i) { 118 | if (tmp_df.timestamps_[i] != tmp_df.timestamps_[marker]) { 119 | df.append_index(tmp_df.timestamps_[marker]); 120 | for (const auto &iter : tmp_df.data_tb_) { 121 | groupby_functor_ functor( 122 | iter.first.c_str(), 123 | marker, 124 | i, 125 | tmp_df.timestamps_[marker], 126 | func, 127 | df); 128 | 129 | tmp_df.data_[iter.second].change(functor); 130 | func.reset(); 131 | } 132 | 133 | marker = i; 134 | } 135 | } 136 | if (marker < vec_size) { 137 | df.append_index(tmp_df.timestamps_[vec_size - 1]); 138 | for (const auto &iter : tmp_df.data_tb_) { 139 | groupby_functor_ functor( 140 | iter.first.c_str(), 141 | vec_size - 1, 142 | vec_size, 143 | tmp_df.timestamps_[vec_size - 1], 144 | func, 145 | df); 146 | 147 | tmp_df.data_[iter.second].change(functor); 148 | } 149 | } 150 | } 151 | else { // Non-index column 152 | const DS &gb_vec = tmp_df.get_column(gb_col_name); 153 | 154 | for (size_type i = 0; i < vec_size; ++i) { 155 | if (gb_vec[i] != gb_vec[marker]) { 156 | groupby_functor_ ts_functor( 157 | "INDEX", 158 | marker, 159 | i, 160 | tmp_df.timestamps_[marker], 161 | func, 162 | df); 163 | 164 | ts_functor(tmp_df.timestamps_); 165 | df.append_column(gb_col_name, gb_vec [marker], false); 166 | func.reset(); 167 | 168 | for (const auto &iter : tmp_df.data_tb_) { 169 | if (iter.first != gb_col_name) { 170 | groupby_functor_ functor( 171 | iter.first.c_str(), 172 | marker, 173 | i, 174 | tmp_df.timestamps_[marker], 175 | func, 176 | df); 177 | 178 | tmp_df.data_[iter.second].change(functor); 179 | func.reset(); 180 | } 181 | } 182 | 183 | marker = i; 184 | } 185 | } 186 | 187 | if (marker < vec_size) { 188 | groupby_functor_ ts_functor( 189 | "INDEX", 190 | vec_size - 1, 191 | vec_size, 192 | tmp_df.timestamps_[vec_size - 1], 193 | func, 194 | df); 195 | 196 | ts_functor(tmp_df.timestamps_); 197 | df.append_column(gb_col_name, gb_vec [vec_size - 1], false); 198 | func.reset(); 199 | 200 | for (const auto &iter : tmp_df.data_tb_) { 201 | if (iter.first != gb_col_name) { 202 | groupby_functor_ functor( 203 | iter.first.c_str(), 204 | vec_size - 1, 205 | vec_size, 206 | tmp_df.timestamps_[vec_size - 1], 207 | func, 208 | df); 209 | 210 | tmp_df.data_[iter.second].change(functor); 211 | } 212 | } 213 | } 214 | } 215 | 216 | return (df); 217 | } 218 | 219 | // ---------------------------------------------------------------------------- 220 | 221 | template class DS> 222 | template 223 | std::future> 224 | DataFrame:: 225 | groupby_async (F &&func, const char *gb_col_name, bool already_sorted) const { 226 | 227 | return (std::async(std::launch::async, 228 | &DataFrame::groupby, 229 | this, 230 | std::move(func), 231 | gb_col_name, 232 | already_sorted)); 233 | } 234 | 235 | // ---------------------------------------------------------------------------- 236 | 237 | template class DS> 238 | template 239 | DataFrame 240 | DataFrame:: 241 | bucketize (F &&func, const TimeStamp &bucket_interval) const { 242 | 243 | DataFrame df; 244 | 245 | for (const auto &iter : data_tb_) { 246 | add_col_functor_ functor (iter.first.c_str(), df); 247 | 248 | data_[iter.second].change(functor); 249 | } 250 | 251 | for (const auto &iter : data_tb_) { 252 | bucket_functor_ functor( 253 | iter.first.c_str(), 254 | timestamps_, 255 | bucket_interval, 256 | func, 257 | df); 258 | 259 | data_[iter.second].change(functor); 260 | } 261 | 262 | return (df); 263 | } 264 | 265 | // ---------------------------------------------------------------------------- 266 | 267 | template class DS> 268 | template 269 | std::future> 270 | DataFrame:: 271 | bucketize_async (F &&func, const TimeStamp &bucket_interval) const { 272 | 273 | return (std::async(std::launch::async, 274 | &DataFrame::bucketize, 275 | this, 276 | std::move(func), 277 | std::cref(bucket_interval))); 278 | } 279 | 280 | // ---------------------------------------------------------------------------- 281 | 282 | template class DS> 283 | template 284 | bool DataFrame::write (S &o, bool values_only) const { 285 | 286 | if (! values_only) o << "INDEX:"; 287 | for (size_type i = 0; i < timestamps_.size(); ++i) 288 | o << timestamps_[i] << ','; 289 | o << '\n'; 290 | 291 | for (const auto &iter : data_tb_) { 292 | print_functor_ functor (iter.first.c_str(), values_only, o); 293 | 294 | data_[iter.second].change(functor); 295 | } 296 | 297 | o << std::endl; 298 | return (true); 299 | } 300 | 301 | // ---------------------------------------------------------------------------- 302 | 303 | template class DS> 304 | template 305 | std::future DataFrame:: 306 | write_async (S &o, bool values_only) const { 307 | 308 | return (std::async(std::launch::async, 309 | &DataFrame::write, 310 | this, 311 | std::ref(o), 312 | values_only)); 313 | } 314 | 315 | } // namespace hmdf 316 | 317 | // ---------------------------------------------------------------------------- 318 | 319 | // Local Variables: 320 | // mode:C++ 321 | // tab-width:4 322 | // c-basic-offset:4 323 | // End: 324 | -------------------------------------------------------------------------------- /DMScu/src/fixsizestr_tester.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // July 17 2009 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | using namespace std; 13 | 14 | // ---------------------------------------------------------------------------- 15 | 16 | int main (int arg_cnt, char *arg_vctr []) { 17 | 18 | const size_t the_size = 32; 19 | DMScu_FixedSizeString the_str; 20 | DMScu_FixedSizeString the_str2 = the_str; 21 | DMScu_FixedSizeString the_str3 (the_str); 22 | 23 | cout << "The size is: " << the_size << endl; 24 | cout << "capacity(): " << the_str.capacity () << endl; 25 | cout << "size(): " << the_str.size () << endl; 26 | cout << "c_str(): '" << the_str.c_str () << "'" << endl; 27 | cout << "Size of DMScu_FixedSizeString<32>: " << sizeof the_str << endl; 28 | 29 | DMScu_VirtualString::const_pointer str_1 = "This is a test"; 30 | 31 | cout << "str_1: " << str_1 << endl; 32 | 33 | the_str = str_1; 34 | cout << "After the_str = str_1;\n" << the_str.c_str () << endl; 35 | cout << "size(): " << the_str.size () << endl; 36 | 37 | DMScu_VirtualString::const_pointer str_2 = "This string 2"; 38 | 39 | cout << "str_2: " << str_2 << endl; 40 | 41 | the_str = str_2; 42 | cout << "After the_str = str_2;\n" << the_str.c_str () << endl; 43 | cout << "size(): " << the_str.size () << endl; 44 | 45 | the_str = str_1; 46 | the_str += str_2; 47 | cout << "After the_str = str_1; " << endl 48 | << " the_str += str_2; " << endl 49 | << the_str.c_str () 50 | << endl; 51 | cout << "size(): " << the_str.size () << endl; 52 | cout << "Result of the_str == str_1;\n" << (the_str == str_1) << endl; 53 | cout << "Result of the_str != str_1;\n" << (the_str != str_1) << endl; 54 | 55 | the_str = str_1; 56 | cout << "After the_str = str_1; " << endl; 57 | cout << "Result of the_str == str_1;\n" << (the_str == str_1) << endl; 58 | cout << "Result of the_str != str_1;\n" << (the_str != str_1) << endl; 59 | 60 | the_str.printf ("%d %lf", 12, 20.4356); 61 | cout << "After the_str.printf (\"%d %lf\", 12, 20.4356);\n" 62 | << the_str.c_str () << endl; 63 | 64 | cout << "The 3rd char is: " << the_str [3] << endl; 65 | the_str [3] = 'X'; 66 | cout << "After the_str [3] = 'X'; The 3rd char is: " 67 | << the_str [3] << endl 68 | << the_str.c_str () << endl; 69 | 70 | cout << "Is the_str empty? " << the_str.empty () << endl; 71 | the_str.clear (); 72 | // the_str = ""; 73 | cout << "After clear(), is the_str empty? " << the_str.empty () << endl; 74 | 75 | DMScu_FixedSizeString the_str4 ("String passed to constructor"); 76 | 77 | cout << "After the_str4 (\"String passed to constructor\");\n" 78 | << the_str4.c_str () << endl; 79 | 80 | DMScu_FixedSizeString<28> str28 = "This is a 28 char string"; 81 | DMScu_FixedSizeString<64> str64 = "This is a 64 char string. " 82 | "I am going to make it longer than 28 chars."; 83 | DMScu_VirtualString &vstr28 = str28; 84 | DMScu_VirtualString &vstr64 = str64; 85 | 86 | cout << "\n\n-- Testing the abstract base class\n\n"; 87 | { 88 | cout << "vstr28 = '" << vstr28.c_str () << "'" << endl; 89 | cout << "vstr64 = '" << vstr64.c_str () << "'" << endl; 90 | } 91 | 92 | cout << "\n\n-- Testing the iterators on vstr64\n\n"; 93 | { 94 | for (DMScu_VirtualString::const_iterator itr = str64.begin (); 95 | itr != vstr64.end (); ++itr) 96 | cout << " '" << *itr << "'"; 97 | cout << endl; 98 | } 99 | 100 | cout << "\n\n-- Testing the comparison operators\n\n"; 101 | { 102 | DMScu_FixedSizeString<28> str1 = "AAAAx"; 103 | DMScu_FixedSizeString<18> str2 = "AAAA"; 104 | 105 | cout << "AAAAx > AAAA -> " << (str1 > str2) << endl; 106 | str1 = "Azzz"; 107 | str2 = "Bxxx"; 108 | cout << "Azzz > Bxxx -> " << (str1 > str2) << endl; 109 | str1 = "AAAA"; 110 | str2 = "AAAA"; 111 | cout << "AAAA > AAAA -> " << (str1 > str2) << endl; 112 | str1 = "AAAz"; 113 | str2 = "AAAx"; 114 | cout << "AAAz > AAAx -> " << (str1 > str2) << endl; 115 | } 116 | 117 | cout << "\n\n-- Testing the find methods\n\n"; 118 | { 119 | DMScu_FixedSizeString<28> str = "A.B.CDEFGHIJKLMN"; 120 | 121 | if (str.find ('.') != 1) { 122 | cout << "ERROR: str.find ('.') failed" << endl; 123 | return (-1); 124 | } 125 | if (str.find ('.', 2) != 3) { 126 | cout << "ERROR: str.find ('.', 2) failed" << endl; 127 | return (-1); 128 | } 129 | if (str.find ('.', 4) != DMScu_VirtualString::npos) { 130 | cout << "ERROR: str.find ('.', 4) failed" << endl; 131 | return (-1); 132 | } 133 | if (str.find ('X') != DMScu_VirtualString::npos) { 134 | cout << "ERROR: str.find ('X') failed" << endl; 135 | return (-1); 136 | } 137 | if (str.find ('.', 16) != DMScu_VirtualString::npos) { 138 | cout << "ERROR: str.find ('.', 16) failed" << endl; 139 | return (-1); 140 | } 141 | 142 | if (str.find ("HIJ", 4) != 9) { 143 | cout << "ERROR: str.find (\"HIJ\", 9) failed" << endl; 144 | return (-1); 145 | } 146 | if (str.find ("A.B.CD") != 0) { 147 | cout << "ERROR: str.find (\"A.B.CD\") failed" << endl; 148 | return (-1); 149 | } 150 | if (str.find ("LMN", 13) != 13) { 151 | cout << "ERROR: str.find (\"LMN\", 13) failed" << endl; 152 | return (-1); 153 | } 154 | if (str.find ("LMN") != 13) { 155 | cout << "ERROR: str.find (\"LMN\") failed" << endl; 156 | return (-1); 157 | } 158 | if (str.find ("XYZ") != DMScu_VirtualString::npos) { 159 | cout << "ERROR: str.find (\"XYZ\", 9) failed" << endl; 160 | return (-1); 161 | } 162 | 163 | cout << "SUCCESS: find method is working" << endl; 164 | } 165 | 166 | cout << "\n\n-- Testing the ncopy()\n\n"; 167 | { 168 | DMScu_FixedSizeString<8> str; 169 | 170 | str.ncopy ("123456", 8); 171 | cout << "It must be '123456' -- '" << str.c_str () 172 | << "'" << endl; 173 | 174 | str.ncopy ("123456789012", 5); 175 | cout << "It must be '12345' -- '" << str.c_str () 176 | << "'" << endl; 177 | 178 | } 179 | 180 | cout << "\n\n-- Testing the append_printf()\n\n"; 181 | { 182 | DMScu_FixedSizeString<1023> str; 183 | 184 | str = "This is a string: "; 185 | str.append_printf ("%s %d -- ", "This is appended", 1); 186 | str.append_printf ("%s %d.\n", "This is appended again", 2); 187 | cout << str.c_str () << endl; 188 | 189 | } 190 | 191 | // I just want to make sure that these statements will compile. 192 | // 193 | str28.compare (vstr64); 194 | vstr64.compare (str28); 195 | if (str28 == vstr64); 196 | if (vstr64 == str28); 197 | 198 | cout << "\n\n-- Testing the replace()\n\n"; 199 | { 200 | DMScu_FixedSizeString<15> str = "USD/JPY"; 201 | std::string stdstr = "USD/JPY"; 202 | 203 | cout << "Original: " << str; 204 | str.replace (3, 1, ""); 205 | stdstr.replace (3, 1, ""); 206 | cout << " replace (3, 1, \"\"): " << str << " " << stdstr << endl; 207 | 208 | str = "USD/JPY"; 209 | stdstr = "USD/JPY"; 210 | cout << "Original: " << str; 211 | str.replace (3, 1, "\\"); 212 | stdstr.replace (3, 1, "\\"); 213 | cout << " replace (3, 1, \"\\\"): " << str << " " << stdstr << endl; 214 | 215 | str = "USD/JPY"; 216 | stdstr = "USD/JPY"; 217 | cout << "Original: " << str; 218 | str.replace (3, 3, "->>"); 219 | stdstr.replace (3, 3, "->>"); 220 | cout << " Rreplace (3, 3, \"->>\"): " << str << " " << stdstr << endl; 221 | 222 | str = "USD/JPY"; 223 | stdstr = "USD/JPY"; 224 | cout << "Original: " << str; 225 | str.replace (0, 1, ""); 226 | stdstr.replace (0, 1, ""); 227 | cout << " replace (0, 1, \"\"): " << str << " " << stdstr << endl; 228 | 229 | str = "USD/JPY"; 230 | stdstr = "USD/JPY"; 231 | cout << "Original: " << str; 232 | str.replace (0, 1, "S"); 233 | stdstr.replace (0, 1, "S"); 234 | cout << " replace (0, 1, \"S\"): " << str << " " << stdstr << endl; 235 | 236 | str = "USD/JPY"; 237 | stdstr = "USD/JPY"; 238 | cout << "Original: " << str; 239 | str.replace (0, 3, "->>"); 240 | stdstr.replace (0, 3, "->>"); 241 | cout << " replace (0, 3, \"->>\"): " << str << " " << stdstr << endl; 242 | 243 | str = "USD/JPY"; 244 | stdstr = "USD/JPY"; 245 | cout << "Original: " << str; 246 | str.replace (6, 1, ""); 247 | stdstr.replace (6, 1, ""); 248 | cout << " replace (6, 1, \"\"): " << str << " " << stdstr << endl; 249 | 250 | str = "USD/JPY"; 251 | stdstr = "USD/JPY"; 252 | cout << "Original: " << str; 253 | str.replace (6, 1, "P"); 254 | stdstr.replace (6, 1, "P"); 255 | cout << " replace (6, 1, \"P\"): " << str << " " << stdstr << endl; 256 | 257 | str = "USD/JPY"; 258 | stdstr = "USD/JPY"; 259 | cout << "Original: " << str; 260 | str.replace (6, 3, "->>"); 261 | stdstr.replace (6, 3, "->>"); 262 | cout << " replace (6, 3, \"->>\"): " << str << " " << stdstr << endl; 263 | 264 | str = "USD/JPY"; 265 | stdstr = "USD/JPY"; 266 | cout << "Original: " << str; 267 | str.replace (6, 1, "->>"); 268 | stdstr.replace (6, 1, "->>"); 269 | cout << " replace (6, 1, \"->>\"): " << str << " " << stdstr << endl; 270 | 271 | str = "USD/JPY"; 272 | stdstr = "USD/JPY"; 273 | cout << "Original: " << str; 274 | str.replace (5, 1, "->>"); 275 | stdstr.replace (5, 1, "->>"); 276 | cout << " replace (5, 1, \"->>\"): " << str << " " << stdstr << endl; 277 | 278 | str = "USD/JPY"; 279 | stdstr = "USD/JPY"; 280 | cout << "Original: " << str; 281 | str.replace (5, 3, "->>"); 282 | stdstr.replace (5, 3, "->>"); 283 | cout << " replace (5, 3, \"->>\"): " << str << " " << stdstr << endl; 284 | } 285 | 286 | cout << "\n\n-- Testing Performance\n\n"; 287 | { 288 | static const char *STRING = "The is a test string"; 289 | static const char *ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 290 | char *str1 = ::strdup (STRING); 291 | char *str2 = ::strdup (STRING); 292 | DMScu_FixedSizeString<31> my_str1 = STRING; 293 | DMScu_FixedSizeString<31> my_str2 = STRING; 294 | std::string std_str1 = STRING; 295 | std::string std_str2 = STRING; 296 | int count = 0; 297 | const time_t start = ::time (NULL); 298 | 299 | for (int i = 0; i < 100000000; ++i) { 300 | str1 [10] = ALPHA [::rand () % 25]; 301 | str2 [10] = ALPHA [::rand () % 25]; 302 | 303 | if (my_str1 == my_str2) 304 | // if (std_str1 == std_str2) 305 | // if (! ::strcmp (str1, str2)) 306 | ++count; 307 | } 308 | 309 | const time_t end = ::time (NULL); 310 | cout << "String comparison took: " 311 | << end - start << " seconds." << endl; 312 | } 313 | 314 | return EXIT_SUCCESS; 315 | } 316 | 317 | // ---------------------------------------------------------------------------- 318 | 319 | // Local Variables: 320 | // mode:C++ 321 | // tab-width:4 322 | // c-basic-offset:4 323 | // End: 324 | -------------------------------------------------------------------------------- /DMScu/include/DMScu_FixedSizeString.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // July 17 2009 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #ifndef _INCLUDED_DMScu_FixedSizeString_h 7 | #define _INCLUDED_DMScu_FixedSizeString_h 0 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | // ---------------------------------------------------------------------------- 18 | 19 | // This abstract base class makes it possible to pass different template 20 | // instances around as one type and to be able to assign them interchangeably. 21 | // The only penalty paid for having this base class is to carry around one 22 | // additional (pointer size) member. There shouldn't be any performace 23 | // penalty, since everything is still stack based and there is no virtuality. 24 | // 25 | // NOTE: DMScu_VirtualString MAKES NO BOUNDARY CHECKS. IT IS THE RESPONSIBILITY 26 | // OF THE PROGRAMMER TO TAKE CARE OF THAT. 27 | // 28 | class DMScu_VirtualString { 29 | 30 | public: 31 | 32 | typedef unsigned int size_type; 33 | typedef char value_type; 34 | typedef value_type * pointer; 35 | typedef const value_type * const_pointer; 36 | typedef value_type & reference; 37 | typedef const value_type & const_reference; 38 | 39 | typedef pointer iterator; 40 | typedef const_pointer const_iterator; 41 | 42 | static const size_type npos = static_cast(-1); 43 | 44 | inline iterator begin () throw () { return (string_); } 45 | inline const_iterator begin () const throw () { return (string_); } 46 | 47 | // Unfortunately, the following two methods are not as cheap as they are 48 | // supposed to be. 49 | // 50 | inline iterator end () throw () { return (string_ + size ()); } 51 | inline const_iterator end () const throw () { 52 | 53 | return (string_ + size ()); 54 | } 55 | 56 | protected: 57 | 58 | inline DMScu_VirtualString (pointer str) throw () : string_ (str) { } 59 | 60 | public: 61 | 62 | // Assignment methods. 63 | // 64 | inline DMScu_VirtualString &operator = (const_pointer rhs) throw () { 65 | 66 | ::strcpy (string_, rhs); 67 | return (*this); 68 | } 69 | inline DMScu_VirtualString & 70 | operator = (const DMScu_VirtualString &rhs) throw () { 71 | 72 | return (*this = rhs.c_str ()); 73 | } 74 | inline DMScu_VirtualString & 75 | ncopy (const_pointer rhs, size_type len) throw () { 76 | 77 | ::strncpy (string_, rhs, len); 78 | string_ [len] = 0; 79 | return (*this); 80 | } 81 | 82 | // 83 | // Appending methods. 84 | // 85 | 86 | inline DMScu_VirtualString &append (const_pointer rhs) throw () { 87 | 88 | ::strcat (string_, rhs); 89 | return (*this); 90 | } 91 | inline DMScu_VirtualString & 92 | append (const DMScu_VirtualString &rhs) throw () { 93 | 94 | return (append (rhs.c_str ())); 95 | } 96 | inline DMScu_VirtualString &operator += (const_pointer rhs) throw () { 97 | 98 | return (append (rhs)); 99 | } 100 | inline DMScu_VirtualString & 101 | operator += (const DMScu_VirtualString &rhs) throw () { 102 | 103 | return (append (rhs.c_str ())); 104 | } 105 | 106 | inline size_type 107 | find (const_reference token, size_type pos = 0) const throw () { 108 | 109 | size_type counter = 0; 110 | 111 | for (const_pointer itr = &(string_ [pos]); *itr; ++itr, ++counter) 112 | if (string_ [pos + counter] == token) 113 | return (pos + counter); 114 | 115 | return (npos); 116 | } 117 | inline size_type 118 | find (const_pointer token, size_type pos = 0) const throw () { 119 | 120 | const size_type token_len = ::strlen (token); 121 | const size_type self_len = size (); 122 | 123 | if ((token_len + pos) > self_len) 124 | return (npos); 125 | 126 | size_type counter = 0; 127 | 128 | for (const_pointer itr = &(string_ [pos]); 129 | itr + token_len - begin () <= self_len; ++itr, ++counter) 130 | if (! ::strncmp (token, itr, token_len)) 131 | return (pos + counter); 132 | 133 | return (npos); 134 | } 135 | inline size_type find (const DMScu_VirtualString &token, 136 | size_type pos = 0) const throw () { 137 | 138 | const size_type len = size (); 139 | 140 | return (find (token.c_str (), pos)); 141 | } 142 | 143 | // Replaces the substring statring at pos with length n with s 144 | // 145 | inline DMScu_VirtualString & 146 | replace (size_type pos, size_type n, const_pointer s) { 147 | 148 | if (*s == 0) { 149 | size_type i = pos; 150 | 151 | for (; string_ [i]; ++i) 152 | string_ [i] = string_ [i + 1]; 153 | string_ [i] = string_ [i + 1]; 154 | } 155 | else { 156 | bool overwrote_null = false; 157 | size_type i = 0; 158 | 159 | while (s [i]) { 160 | if (string_ [i + pos] == 0) 161 | overwrote_null = 0; 162 | if (i >= n) 163 | string_ [i + pos + 1] = string_ [i + pos]; 164 | string_ [i + pos] = s [i]; 165 | ++i; 166 | } 167 | if (overwrote_null) 168 | string_ [i + pos] = 0; 169 | } 170 | 171 | return (*this); 172 | } 173 | 174 | inline int printf (const char *format_str, ...) throw () { 175 | 176 | va_list argument_ptr; 177 | 178 | va_start (argument_ptr, format_str); 179 | 180 | const int ret = ::vsprintf (string_, format_str, argument_ptr); 181 | 182 | va_end (argument_ptr); 183 | return (ret); 184 | } 185 | 186 | inline int append_printf (const char *format_str, ...) throw () { 187 | 188 | va_list argument_ptr; 189 | 190 | va_start (argument_ptr, format_str); 191 | 192 | const int ret = 193 | ::vsprintf (string_ + size (), format_str, argument_ptr); 194 | 195 | va_end (argument_ptr); 196 | return (ret); 197 | } 198 | 199 | // Comparison methods. 200 | // 201 | inline int compare (const_pointer rhs) const throw () { 202 | 203 | return (::strcmp (string_, rhs)); 204 | } 205 | inline int compare (const DMScu_VirtualString &rhs) const throw () { 206 | 207 | return (compare (rhs.c_str ())); 208 | } 209 | 210 | inline bool operator == (const_pointer rhs) const throw () { 211 | 212 | return (compare (rhs) == 0); 213 | } 214 | inline bool 215 | operator == (const DMScu_VirtualString &rhs) const throw () { 216 | 217 | return (*this == rhs.c_str ()); 218 | } 219 | inline bool operator != (const_pointer rhs) const throw () { 220 | 221 | return (compare (rhs) != 0); 222 | } 223 | inline bool 224 | operator != (const DMScu_VirtualString &rhs) const throw () { 225 | 226 | return (*this != rhs.c_str ()); 227 | } 228 | inline bool operator > (const_pointer rhs) const throw () { 229 | 230 | return (compare (rhs) > 0); 231 | } 232 | inline bool 233 | operator > (const DMScu_VirtualString &rhs) const throw () { 234 | 235 | return (*this > rhs.c_str ()); 236 | } 237 | inline bool operator < (const_pointer rhs) const throw () { 238 | 239 | return (compare (rhs) < 0); 240 | } 241 | inline bool 242 | operator < (const DMScu_VirtualString &rhs) const throw () { 243 | 244 | return (*this < rhs.c_str ()); 245 | } 246 | 247 | // char based access methods. 248 | // 249 | inline const_reference operator [] (size_type index) const throw () { 250 | 251 | return (string_ [index]); 252 | } 253 | inline reference operator [] (size_type index) throw () { 254 | 255 | return (string_ [index]); 256 | } 257 | 258 | inline void clear () throw () { *string_ = 0; } 259 | 260 | // const utility methods. 261 | // 262 | inline const_pointer c_str () const throw () { return (string_); } 263 | inline const_pointer sub_c_str (size_type offset) const throw () { 264 | 265 | return (offset != npos ? string_ + offset : NULL); 266 | } 267 | inline size_type size () const throw () { return (::strlen(string_)); } 268 | inline bool empty () const throw () { return (*string_ == 0); } 269 | 270 | private: 271 | 272 | pointer string_; 273 | 274 | // The semantics of this class does not allow the following two 275 | // methods, therefore they are prohibited. 276 | // 277 | DMScu_VirtualString (); 278 | DMScu_VirtualString (const DMScu_VirtualString &); 279 | }; 280 | 281 | // ---------------------------------------------------------------------------- 282 | 283 | // This is a fixed size NTBS. Its sole purpose is performance. Most often 284 | // programmers use utility strings with known upper limit size. 285 | // DMScu_FixedSizeString makes no dynamic allocation/deallocation and is 286 | // strictly stack based. This will improve the performance of multi-threaded 287 | // applications that use a lot of utiltiy strings. 288 | // 289 | // NOTE: DMScu_FixedSizeString makes no boundary checks. It is the 290 | // responsibility of the programmer to take care of that. 291 | // 292 | template 293 | class DMScu_FixedSizeString : public DMScu_VirtualString { 294 | 295 | public: 296 | 297 | inline DMScu_FixedSizeString () throw () 298 | : DMScu_VirtualString (buffer_) { *buffer_ = 0; } 299 | inline DMScu_FixedSizeString (const_pointer str) throw () 300 | : DMScu_VirtualString (buffer_) { *this = str; } 301 | inline DMScu_FixedSizeString (const DMScu_FixedSizeString &that) 302 | throw () 303 | : DMScu_VirtualString (buffer_) { *this = that; } 304 | inline DMScu_FixedSizeString (const DMScu_VirtualString &that) throw () 305 | : DMScu_VirtualString (buffer_) { *this = that; } 306 | 307 | // Assignment methods which cannot be inherited or virtual. 308 | // 309 | inline DMScu_FixedSizeString & 310 | operator = (const DMScu_FixedSizeString &rhs) throw () { 311 | 312 | ::strcpy (buffer_, rhs.buffer_); 313 | return (*this); 314 | } 315 | inline DMScu_FixedSizeString &operator = (const_pointer rhs) throw () { 316 | 317 | ::strncpy (buffer_, rhs, cu_SIZE); 318 | buffer_ [cu_SIZE] = 0; 319 | return (*this); 320 | } 321 | inline DMScu_FixedSizeString & 322 | operator = (const DMScu_VirtualString &rhs) throw () { 323 | 324 | *this = rhs.c_str (); 325 | return (*this); 326 | } 327 | 328 | static inline size_type capacity () throw () { return (cu_SIZE); } 329 | 330 | private: 331 | 332 | value_type buffer_ [cu_SIZE + 1]; 333 | }; 334 | 335 | // ---------------------------------------------------------------------------- 336 | 337 | inline std::ostream & 338 | operator << (std::ostream &lhs, const DMScu_VirtualString &rhs) { 339 | 340 | return (lhs << rhs.c_str ()); 341 | } 342 | 343 | // ---------------------------------------------------------------------------- 344 | 345 | #undef _INCLUDED_DMScu_FixedSizeString_h 346 | #define _INCLUDED_DMScu_FixedSizeString_h 1 347 | #endif // _INCLUDED_DMScu_FixedSizeString_h 348 | 349 | // Local Variables: 350 | // mode:C++ 351 | // tab-width:4 352 | // c-basic-offset:4 353 | // End: 354 | -------------------------------------------------------------------------------- /include/DFVisitors.h: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 22, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // ---------------------------------------------------------------------------- 14 | 15 | namespace hmdf 16 | { 17 | 18 | template 19 | struct MeanVisitor { 20 | 21 | private: 22 | 23 | T mean_ { T(0) }; 24 | std::size_t cnt_ { 0 }; 25 | 26 | public: 27 | 28 | using value_type = T; 29 | 30 | inline void operator() (const TS_T &, const T &val) { 31 | 32 | mean_ += val; 33 | cnt_ +=1; 34 | } 35 | inline void reset () { mean_ = T(0); cnt_ = 0; } 36 | inline std::size_t get_count () const { return (cnt_); } 37 | inline T get_sum () const { return (mean_); } 38 | inline T get_value () const { return (mean_ / T(cnt_)); } 39 | }; 40 | 41 | // ---------------------------------------------------------------------------- 42 | 43 | template::value, T>::type> 47 | struct SumVisitor { 48 | 49 | private: 50 | 51 | T sum_ { T(0) }; 52 | 53 | public: 54 | 55 | using value_type = T; 56 | 57 | inline void operator() (const TS_T &, const T &val) { 58 | 59 | sum_ += val; 60 | } 61 | inline void reset () { sum_ = T(0); } 62 | inline T get_value () const { return (sum_); } 63 | }; 64 | 65 | // ---------------------------------------------------------------------------- 66 | 67 | template::value, T>::type> 71 | struct MaxVisitor { 72 | 73 | private: 74 | 75 | T max_ { std::numeric_limits::min() }; 76 | 77 | public: 78 | 79 | using value_type = T; 80 | 81 | inline void operator() (const TS_T &, const T &val) { 82 | 83 | if (val > max_) max_ = val; 84 | } 85 | inline void reset () { max_ = std::numeric_limits::min(); } 86 | inline T get_value () const { return (max_); } 87 | }; 88 | 89 | // ---------------------------------------------------------------------------- 90 | 91 | template::value, T>::type> 95 | struct MinVisitor { 96 | 97 | private: 98 | 99 | T min_ { std::numeric_limits::max() }; 100 | 101 | public: 102 | 103 | using value_type = T; 104 | 105 | inline void operator() (const TS_T &, const T &val) { 106 | 107 | if (val < min_) min_ = val; 108 | } 109 | inline void reset () { min_ = std::numeric_limits::max(); } 110 | inline T get_value () const { return (min_); } 111 | }; 112 | 113 | // ---------------------------------------------------------------------------- 114 | 115 | template::value, T>::type> 119 | struct CovVisitor { 120 | 121 | private: 122 | 123 | T total1_ { T(0) }; 124 | T total2_ { T(0) }; 125 | T dot_prod_ { T(0) }; 126 | T dot_prod1_ { T(0) }; 127 | T dot_prod2_ { T(0) }; 128 | std::size_t cnt_ { 0 }; 129 | const unsigned char b_; 130 | 131 | public: 132 | 133 | using value_type = T; 134 | 135 | explicit CovVisitor (std::size_t bias = 1) : b_ (bias) { } 136 | inline void operator() (const TS_T &, const T &val1, const T &val2) { 137 | 138 | total1_ += val1; 139 | total2_ += val2; 140 | dot_prod_ += (val1 * val2); 141 | dot_prod1_ += (val1 * val1); 142 | dot_prod2_ += (val2 * val2); 143 | cnt_ += 1; 144 | } 145 | inline void reset () { 146 | 147 | total1_ = total2_ = dot_prod_ = dot_prod1_ = dot_prod2_ = T(0); 148 | cnt_ = 0; 149 | } 150 | inline T get_value () const { 151 | 152 | return ((dot_prod_ - (total1_ * total2_) / T(cnt_)) / (T(cnt_) - T(1))); 153 | } 154 | 155 | inline T get_var1 () const { 156 | 157 | return((dot_prod1_ - (total1_ * total1_) / T(cnt_)) / (T(cnt_) - T(1))); 158 | } 159 | inline T get_var2 () const { 160 | 161 | return((dot_prod2_ - (total2_ * total2_) / T(cnt_)) / (T(cnt_) - T(1))); 162 | } 163 | }; 164 | 165 | // ---------------------------------------------------------------------------- 166 | 167 | template::value, T>::type> 171 | struct VarVisitor { 172 | 173 | private: 174 | 175 | CovVisitor cov_; 176 | 177 | public: 178 | 179 | using value_type = T; 180 | 181 | explicit VarVisitor (std::size_t bias = 1) : cov_ (bias) { } 182 | inline void operator() (const TS_T &idx, const T &val) { 183 | 184 | cov_ (idx, val, val); 185 | } 186 | inline void reset () { cov_.reset(); } 187 | inline T get_value () const { return (cov_.get_value()); } 188 | }; 189 | 190 | // ---------------------------------------------------------------------------- 191 | 192 | template::value, T>::type> 196 | struct StdVisitor { 197 | 198 | private: 199 | 200 | VarVisitor var_; 201 | 202 | public: 203 | 204 | using value_type = T; 205 | 206 | explicit StdVisitor (std::size_t bias = 1) : var_ (bias) { } 207 | inline void operator() (const TS_T &idx, const T &val) { 208 | 209 | var_ (idx, val); 210 | } 211 | inline void reset () { var_.reset(); } 212 | inline T get_value () const { return (::sqrt(var_.get_value())); } 213 | }; 214 | 215 | // ---------------------------------------------------------------------------- 216 | 217 | template::value, T>::type> 221 | struct CorrVisitor { 222 | 223 | private: 224 | 225 | CovVisitor cov_; 226 | 227 | public: 228 | 229 | using value_type = T; 230 | 231 | explicit CorrVisitor (std::size_t bias = 1) : cov_ (bias) { } 232 | inline void operator() (const TS_T &idx, const T &val1, const T &val2) { 233 | 234 | cov_ (idx, val1, val2); 235 | } 236 | inline void reset () { cov_.reset(); } 237 | inline T get_value () const { 238 | 239 | return (cov_.get_value() / 240 | (::sqrt(cov_.get_var1())* ::sqrt(cov_.get_var2()))); 241 | } 242 | }; 243 | 244 | // ---------------------------------------------------------------------------- 245 | 246 | template::value, T>::type> 250 | struct DotProdVisitor { 251 | 252 | private: 253 | 254 | T dot_prod_ { T(0) }; 255 | 256 | public: 257 | 258 | using value_type = T; 259 | 260 | inline void operator() (const TS_T &idx, const T &val1, const T &val2) { 261 | 262 | dot_prod_ += (val1 * val2); 263 | } 264 | inline void reset () { dot_prod_ = T(0); } 265 | inline T get_value () const { return (dot_prod_); } 266 | }; 267 | 268 | // ---------------------------------------------------------------------------- 269 | 270 | // One-pass stats calculation. 271 | // 272 | template::value, T>::type> 276 | struct StatsVisitor { 277 | 278 | private: 279 | 280 | std::size_t n_ { 0 }; 281 | T m1_ { T(0) }; 282 | T m2_ { T(0) }; 283 | T m3_ { T(0) }; 284 | T m4_ { T(0) }; 285 | 286 | public: 287 | 288 | using value_type = T; 289 | 290 | inline void operator() (const TS_T &idx, const T &val) { 291 | 292 | T delta, delta_n, delta_n2, term1; 293 | std::size_t n1 = n_; 294 | 295 | n_ += 1; 296 | delta = val - m1_; 297 | delta_n = delta / T(n_); 298 | delta_n2 = delta_n * delta_n; 299 | term1 = delta * delta_n * T(n1); 300 | m1_ += delta_n; 301 | m4_ += term1 * delta_n2 * T(n_ * n_ - 3 * n_ + 3) + 302 | 6.0 * delta_n2 * m2_ - 303 | 4.0 * delta_n * m3_; 304 | m3_ += term1 * delta_n * T(n_ - 2) - 3.0 * delta_n * m2_; 305 | m2_ += term1; 306 | } 307 | inline void reset () { 308 | 309 | n_ = 0; 310 | m1_ = m2_ = m3_ = m4_ = T(0); 311 | } 312 | 313 | inline std::size_t get_count () const { return (n_); } 314 | inline T get_mean () const { return (m1_); } 315 | inline T get_variance () const { return (m2_ / (T(n_) - 1.0)); } 316 | inline T get_std () const { return (::sqrt(get_variance())); } 317 | inline T get_skew () const { 318 | 319 | return (::sqrt(n_) * m3_ / ::pow(m2_, 1.5)); 320 | } 321 | inline T get_kurtosis () const { 322 | 323 | return (T(n_) * m4_ / (m2_ * m2_) - 3.0); 324 | } 325 | }; 326 | 327 | // ---------------------------------------------------------------------------- 328 | 329 | // One pass simple linear regression 330 | // 331 | template::value, T>::type> 335 | struct SLRegressionVisitor { 336 | 337 | private: 338 | 339 | std::size_t n_ { 0 }; 340 | 341 | // Sum of the product of the difference between x and its mean and 342 | // the difference between y and its mean. 343 | T s_xy_ { T(0) }; 344 | StatsVisitor x_stats_; 345 | StatsVisitor y_stats_; 346 | 347 | public: 348 | 349 | using value_type = T; 350 | 351 | inline void operator() (const TS_T &idx, const T &x, const T &y) { 352 | 353 | s_xy_ += (x_stats_.get_mean() - x) * 354 | (y_stats_.get_mean() - y) * 355 | T(n_) / T(n_ + 1); 356 | 357 | x_stats_(idx, x); 358 | y_stats_(idx, y); 359 | n_ += 1; 360 | } 361 | inline void reset () { 362 | 363 | n_ = 0; 364 | s_xy_ = T(0); 365 | x_stats_.reset(); 366 | y_stats_.reset(); 367 | } 368 | 369 | inline std::size_t get_count () const { return (n_); } 370 | inline T get_slope () const { 371 | 372 | // Sum of the squares of the difference between each x and 373 | // the mean x value. 374 | const T s_xx = x_stats_.get_variance() * T(n_ - 1); 375 | 376 | return (s_xy_ / s_xx); 377 | } 378 | inline T get_intercept () const { 379 | 380 | return (y_stats_.get_mean() - get_slope() * x_stats_.get_mean()); 381 | } 382 | inline T get_corr () const { 383 | 384 | const T t = x_stats_.get_std() * y_stats_.get_std(); 385 | 386 | return (s_xy_ / (T(n_ - 1) * t)); 387 | } 388 | }; 389 | 390 | // ---------------------------------------------------------------------------- 391 | 392 | struct GroupbySum 393 | : HeteroVector::visitor_base { 394 | 395 | private: 396 | 397 | int int_sum { 0 }; 398 | unsigned int uint_sum { 0 }; 399 | double double_sum { 0.0 }; 400 | long long_sum { 0 }; 401 | unsigned long ulong_sum { 0 }; 402 | std::string str_sum { }; 403 | 404 | public: 405 | 406 | template 407 | void 408 | operator() (const unsigned long &ts, const char *name, const T &datum) { 409 | 410 | return; 411 | } 412 | 413 | void reset () { 414 | 415 | int_sum = 0; 416 | uint_sum = 0; 417 | double_sum = 0.0; 418 | long_sum = 0; 419 | ulong_sum = 0; 420 | str_sum.clear(); 421 | } 422 | 423 | template void get_value (T &) const { return; } 424 | }; 425 | 426 | // ------------------------------------- 427 | 428 | template<> 429 | void GroupbySum:: 430 | operator() (const unsigned long &ts, 431 | const char *name, 432 | const int &datum) { int_sum += datum; } 433 | template<> 434 | void GroupbySum:: 435 | operator() (const unsigned long &ts, 436 | const char *name, 437 | const unsigned int &datum) { uint_sum += datum; } 438 | template<> 439 | void GroupbySum:: 440 | operator() (const unsigned long &ts, 441 | const char *name, 442 | const double &datum) { double_sum += datum; } 443 | template<> 444 | void GroupbySum:: 445 | operator() (const unsigned long &ts, 446 | const char *name, 447 | const long &datum) { long_sum += datum; } 448 | template<> 449 | void GroupbySum:: 450 | operator() (const unsigned long &ts, 451 | const char *name, 452 | const unsigned long &datum) { ulong_sum += datum; } 453 | template<> 454 | void GroupbySum:: 455 | operator() (const unsigned long &ts, 456 | const char *name, 457 | const std::string &datum) { 458 | 459 | if (str_sum.empty()) 460 | str_sum += datum; 461 | else { 462 | str_sum += '|'; 463 | str_sum += datum; 464 | } 465 | } 466 | 467 | // ------------------------------------- 468 | 469 | template<> 470 | void GroupbySum::get_value (int &v) const { v = int_sum; } 471 | template<> 472 | void GroupbySum:: 473 | get_value (unsigned int &v) const { v = uint_sum; } 474 | template<> 475 | void GroupbySum::get_value (double &v) const { v = double_sum; } 476 | template<> 477 | void GroupbySum::get_value (long &v) const { v = long_sum; } 478 | template<> 479 | void GroupbySum:: 480 | get_value(unsigned long &v) const { v = ulong_sum; } 481 | template<> 482 | void GroupbySum:: 483 | get_value (std::string &v) const { v = str_sum; } 484 | 485 | } // namespace hmdf 486 | 487 | // ---------------------------------------------------------------------------- 488 | 489 | // Local Variables: 490 | // mode:C++ 491 | // tab-width:4 492 | // c-basic-offset:4 493 | // End: 494 | -------------------------------------------------------------------------------- /include/DataFrame_get.tcc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 12, 2017 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | // ---------------------------------------------------------------------------- 11 | 12 | namespace hmdf 13 | { 14 | 15 | template class DS> 16 | template 17 | DS &DataFrame::get_column (const char *name) { 18 | 19 | auto iter = data_tb_.find (name); 20 | 21 | if (iter == data_tb_.end()) { 22 | char buffer [512]; 23 | 24 | sprintf (buffer, "DataFrame::get_column(): ERROR: " 25 | "Cannot find column '%s'", 26 | name); 27 | throw ColNotFound (buffer); 28 | } 29 | 30 | DataVec &hv = data_[iter->second]; 31 | 32 | return (hv.get_vec>()); 33 | } 34 | 35 | // ---------------------------------------------------------------------------- 36 | 37 | template class DS> 38 | template 39 | const DS &DataFrame::get_column (const char *name) const { 40 | 41 | return (const_cast(this)->get_column(name)); 42 | } 43 | 44 | // ---------------------------------------------------------------------------- 45 | 46 | template class DS> 47 | template 48 | void DataFrame::multi_visit (Ts ... args) const { 49 | 50 | auto args_tuple = std::tuple(args ...); 51 | auto fc = [this](auto &pa) mutable -> void { 52 | 53 | auto &functor = *(pa.second); 54 | 55 | using T = 56 | typename std::remove_reference::type::value_type; 57 | using V = 58 | typename std::remove_const< 59 | typename std::remove_reference::type>::type; 60 | 61 | this->visit(pa.first, const_cast(functor)); 62 | }; 63 | 64 | for_each_in_tuple_ (args_tuple, fc); 65 | } 66 | 67 | // ---------------------------------------------------------------------------- 68 | 69 | template class DS> 70 | template 71 | V &DataFrame::visit (const char *name, V &visitor) const { 72 | 73 | const auto iter = data_tb_.find (name); 74 | 75 | if (iter == data_tb_.end()) { 76 | char buffer [512]; 77 | 78 | sprintf (buffer, 79 | "DataFrame::visit(1): ERROR: Cannot find column '%s'", 80 | name); 81 | throw ColNotFound (buffer); 82 | } 83 | 84 | const DataVec &hv = data_[iter->second]; 85 | const DS &vec = hv.get_vec>(); 86 | const size_type idx_s = timestamps_.size(); 87 | const size_type data_s = vec.size(); 88 | 89 | for (size_type i = 0; i < idx_s; ++i) 90 | visitor (timestamps_[i], i < data_s ? vec[i] : _get_nan()); 91 | 92 | return (visitor); 93 | } 94 | 95 | // ---------------------------------------------------------------------------- 96 | 97 | template class DS> 98 | template 99 | V &&DataFrame:: 100 | visit (const char *name1, const char *name2, V &&visitor) const { 101 | 102 | const auto iter1 = data_tb_.find (name1); 103 | const auto iter2 = data_tb_.find (name2); 104 | 105 | if (iter1 == data_tb_.end()) { 106 | char buffer [512]; 107 | 108 | sprintf (buffer, 109 | "DataFrame::visit(2): ERROR: Cannot find column '%s'", 110 | name1); 111 | throw ColNotFound (buffer); 112 | } 113 | if (iter2 == data_tb_.end()) { 114 | char buffer [512]; 115 | 116 | sprintf (buffer, 117 | "DataFrame::visit(2): ERROR: Cannot find column '%s'", 118 | name2); 119 | throw ColNotFound (buffer); 120 | } 121 | 122 | const DataVec &hv1 = data_[iter1->second]; 123 | const DataVec &hv2 = data_[iter2->second]; 124 | const DS &vec1 = hv1.get_vec>(); 125 | const DS &vec2 = hv2.get_vec>(); 126 | const size_type idx_s = timestamps_.size(); 127 | const size_type data_s1 = vec1.size(); 128 | const size_type data_s2 = vec2.size(); 129 | 130 | for (size_type i = 0; i < idx_s; ++i) 131 | visitor (timestamps_[i], 132 | i < data_s1 ? vec1[i] : _get_nan(), 133 | i < data_s2 ? vec2[i] : _get_nan()); 134 | 135 | return (visitor); 136 | } 137 | 138 | // ---------------------------------------------------------------------------- 139 | 140 | template class DS> 141 | template 142 | V &&DataFrame:: 143 | visit (const char *name1, 144 | const char *name2, 145 | const char *name3, 146 | V &&visitor) const { 147 | 148 | const auto iter1 = data_tb_.find (name1); 149 | const auto iter2 = data_tb_.find (name2); 150 | const auto iter3 = data_tb_.find (name3); 151 | 152 | if (iter1 == data_tb_.end()) { 153 | char buffer [512]; 154 | 155 | sprintf (buffer, 156 | "DataFrame::visit(3): ERROR: Cannot find column '%s'", 157 | name1); 158 | throw ColNotFound (buffer); 159 | } 160 | if (iter2 == data_tb_.end()) { 161 | char buffer [512]; 162 | 163 | sprintf (buffer, 164 | "DataFrame::visit(3): ERROR: Cannot find column '%s'", 165 | name2); 166 | throw ColNotFound (buffer); 167 | } 168 | if (iter3 == data_tb_.end()) { 169 | char buffer [512]; 170 | 171 | sprintf (buffer, 172 | "DataFrame::visit(3): ERROR: Cannot find column '%s'", 173 | name3); 174 | throw ColNotFound (buffer); 175 | } 176 | 177 | const DataVec &hv1 = data_[iter1->second]; 178 | const DataVec &hv2 = data_[iter2->second]; 179 | const DataVec &hv3 = data_[iter3->second]; 180 | const DS &vec1 = hv1.get_vec>(); 181 | const DS &vec2 = hv2.get_vec>(); 182 | const DS &vec3 = hv3.get_vec>(); 183 | const size_type idx_s = timestamps_.size(); 184 | const size_type data_s1 = vec1.size(); 185 | const size_type data_s2 = vec2.size(); 186 | const size_type data_s3 = vec3.size(); 187 | 188 | for (size_type i = 0; i < idx_s; ++i) 189 | visitor (timestamps_[i], 190 | i < data_s1 ? vec1[i] : _get_nan(), 191 | i < data_s2 ? vec2[i] : _get_nan(), 192 | i < data_s3 ? vec3[i] : _get_nan()); 193 | 194 | return (visitor); 195 | } 196 | 197 | // ---------------------------------------------------------------------------- 198 | 199 | template class DS> 200 | template 201 | V &&DataFrame:: 202 | visit (const char *name1, 203 | const char *name2, 204 | const char *name3, 205 | const char *name4, 206 | V &&visitor) const { 207 | 208 | const auto iter1 = data_tb_.find (name1); 209 | const auto iter2 = data_tb_.find (name2); 210 | const auto iter3 = data_tb_.find (name3); 211 | const auto iter4 = data_tb_.find (name4); 212 | 213 | if (iter1 == data_tb_.end()) { 214 | char buffer [512]; 215 | 216 | sprintf (buffer, 217 | "DataFrame::visit(4): ERROR: Cannot find column '%s'", 218 | name1); 219 | throw ColNotFound (buffer); 220 | } 221 | if (iter2 == data_tb_.end()) { 222 | char buffer [512]; 223 | 224 | sprintf (buffer, 225 | "DataFrame::visit(4): ERROR: Cannot find column '%s'", 226 | name2); 227 | throw ColNotFound (buffer); 228 | } 229 | if (iter3 == data_tb_.end()) { 230 | char buffer [512]; 231 | 232 | sprintf (buffer, 233 | "DataFrame::visit(4): ERROR: Cannot find column '%s'", 234 | name3); 235 | throw ColNotFound (buffer); 236 | } 237 | if (iter4 == data_tb_.end()) { 238 | char buffer [512]; 239 | 240 | sprintf (buffer, 241 | "DataFrame::visit(4): ERROR: Cannot find column '%s'", 242 | name4); 243 | throw ColNotFound (buffer); 244 | } 245 | 246 | const DataVec &hv1 = data_[iter1->second]; 247 | const DataVec &hv2 = data_[iter2->second]; 248 | const DataVec &hv3 = data_[iter3->second]; 249 | const DataVec &hv4 = data_[iter4->second]; 250 | const DS &vec1 = hv1.get_vec>(); 251 | const DS &vec2 = hv2.get_vec>(); 252 | const DS &vec3 = hv3.get_vec>(); 253 | const DS &vec4 = hv4.get_vec>(); 254 | const size_type idx_s = timestamps_.size(); 255 | const size_type data_s1 = vec1.size(); 256 | const size_type data_s2 = vec2.size(); 257 | const size_type data_s3 = vec3.size(); 258 | const size_type data_s4 = vec4.size(); 259 | 260 | for (size_type i = 0; i < idx_s; ++i) 261 | visitor (timestamps_[i], 262 | i < data_s1 ? vec1[i] : _get_nan(), 263 | i < data_s2 ? vec2[i] : _get_nan(), 264 | i < data_s3 ? vec3[i] : _get_nan(), 265 | i < data_s4 ? vec4[i] : _get_nan()); 266 | 267 | return (visitor); 268 | } 269 | 270 | // ---------------------------------------------------------------------------- 271 | 272 | template class DS> 273 | template 275 | V &&DataFrame:: 276 | visit (const char *name1, 277 | const char *name2, 278 | const char *name3, 279 | const char *name4, 280 | const char *name5, 281 | V &&visitor) const { 282 | 283 | const auto iter1 = data_tb_.find (name1); 284 | const auto iter2 = data_tb_.find (name2); 285 | const auto iter3 = data_tb_.find (name3); 286 | const auto iter4 = data_tb_.find (name4); 287 | const auto iter5 = data_tb_.find (name5); 288 | 289 | if (iter1 == data_tb_.end()) { 290 | char buffer [512]; 291 | 292 | sprintf (buffer, 293 | "DataFrame::visit(5): ERROR: Cannot find column '%s'", 294 | name1); 295 | throw ColNotFound (buffer); 296 | } 297 | if (iter2 == data_tb_.end()) { 298 | char buffer [512]; 299 | 300 | sprintf (buffer, 301 | "DataFrame::visit(5): ERROR: Cannot find column '%s'", 302 | name2); 303 | throw ColNotFound (buffer); 304 | } 305 | if (iter3 == data_tb_.end()) { 306 | char buffer [512]; 307 | 308 | sprintf (buffer, 309 | "DataFrame::visit(5): ERROR: Cannot find column '%s'", 310 | name3); 311 | throw ColNotFound (buffer); 312 | } 313 | if (iter4 == data_tb_.end()) { 314 | char buffer [512]; 315 | 316 | sprintf (buffer, 317 | "DataFrame::visit(5): ERROR: Cannot find column '%s'", 318 | name4); 319 | throw ColNotFound (buffer); 320 | } 321 | if (iter5 == data_tb_.end()) { 322 | char buffer [512]; 323 | 324 | sprintf (buffer, 325 | "DataFrame::visit(5): ERROR: Cannot find column '%s'", 326 | name5); 327 | throw ColNotFound (buffer); 328 | } 329 | 330 | const DataVec &hv1 = data_[iter1->second]; 331 | const DataVec &hv2 = data_[iter2->second]; 332 | const DataVec &hv3 = data_[iter3->second]; 333 | const DataVec &hv4 = data_[iter4->second]; 334 | const DataVec &hv5 = data_[iter5->second]; 335 | const DS &vec1 = hv1.get_vec>(); 336 | const DS &vec2 = hv2.get_vec>(); 337 | const DS &vec3 = hv3.get_vec>(); 338 | const DS &vec4 = hv4.get_vec>(); 339 | const DS &vec5 = hv5.get_vec>(); 340 | const size_type idx_s = timestamps_.size(); 341 | const size_type data_s1 = vec1.size(); 342 | const size_type data_s2 = vec2.size(); 343 | const size_type data_s3 = vec3.size(); 344 | const size_type data_s4 = vec4.size(); 345 | const size_type data_s5 = vec5.size(); 346 | 347 | for (size_type i = 0; i < idx_s; ++i) 348 | visitor (timestamps_[i], 349 | i < data_s1 ? vec1[i] : _get_nan(), 350 | i < data_s2 ? vec2[i] : _get_nan(), 351 | i < data_s3 ? vec3[i] : _get_nan(), 352 | i < data_s4 ? vec4[i] : _get_nan(), 353 | i < data_s5 ? vec5[i] : _get_nan()); 354 | 355 | return (visitor); 356 | } 357 | 358 | // ---------------------------------------------------------------------------- 359 | 360 | template class DS> 361 | template 362 | DataFrame 363 | DataFrame::get_data_by_idx (TS begin, TS end) const { 364 | 365 | const auto &lower = 366 | std::lower_bound (timestamps_.begin(), timestamps_.end(), begin); 367 | const auto &upper = 368 | std::upper_bound (timestamps_.begin(), timestamps_.end(), end); 369 | DataFrame df; 370 | 371 | if (lower != timestamps_.end()) { 372 | df.load_index(lower, upper); 373 | 374 | const size_type b_dist = std::distance(timestamps_.begin(), lower); 375 | const size_type e_dist = std::distance(timestamps_.begin(), 376 | upper < timestamps_.end() 377 | ? upper 378 | : timestamps_.end()); 379 | 380 | for (auto &iter : data_tb_) { 381 | load_functor_ functor (iter.first.c_str(), 382 | b_dist, 383 | e_dist, 384 | df); 385 | 386 | data_[iter.second].change(functor); 387 | } 388 | } 389 | 390 | return (df); 391 | } 392 | 393 | // ---------------------------------------------------------------------------- 394 | 395 | template class DS> 396 | template 397 | DataFrame 398 | DataFrame::get_data_by_loc (size_type begin, size_type end) const { 399 | 400 | DataFrame df; 401 | 402 | if (end < timestamps_.size() && begin <= end) { 403 | df.load_index(timestamps_.begin() + begin, timestamps_.begin() + end); 404 | 405 | for (auto &iter : data_tb_) { 406 | load_functor_ functor (iter.first.c_str(), 407 | begin, 408 | end, 409 | df); 410 | 411 | data_[iter.second].change(functor); 412 | } 413 | } 414 | 415 | return (df); 416 | } 417 | 418 | } // namespace hmdf 419 | 420 | // ---------------------------------------------------------------------------- 421 | 422 | // Local Variables: 423 | // mode:C++ 424 | // tab-width:4 425 | // c-basic-offset:4 426 | // End: 427 | -------------------------------------------------------------------------------- /DMScu/src/DMScu_FileBase.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // September 21, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // ---------------------------------------------------------------------------- 13 | 14 | bool DMScu_FileBase::_translate_open_mode () throw () { 15 | 16 | switch (_get_open_mode ()) { 17 | 18 | case _read_: 19 | _file_flags = _s_read_ | _in_use_; 20 | break; 21 | case _bread_: 22 | _file_flags = _s_bread_ | _in_use_; 23 | break; 24 | case _write_: 25 | _file_flags = _s_write_ | _s_read_ | _in_use_; 26 | break; 27 | case _bwrite_: 28 | _file_flags = _s_bwrite_ | _s_bread_ | _in_use_; 29 | break; 30 | case _append_: 31 | _file_flags = _s_append_ | _s_read_ | _in_use_; 32 | break; 33 | case _bappend_: 34 | _file_flags = _s_bappend_ | _s_bread_ | _in_use_; 35 | break; 36 | } 37 | 38 | return (true); 39 | } 40 | 41 | // ---------------------------------------------------------------------------- 42 | 43 | bool DMScu_FileBase::open () { 44 | 45 | if (is_open ()) 46 | throw DMScu_Exception ("DMScu_FileBase::open(): " 47 | "The device is already open"); 48 | 49 | const char *om = 50 | (_file_flags & _s_read_ && ! (_file_flags & _s_write_ || 51 | _file_flags & _s_append_) ? "r" 52 | : (_file_flags & _s_bread_ && ! (_file_flags & _s_bwrite_ || 53 | _file_flags & _s_bappend_) ? "rb" 54 | : (_file_flags & _s_write_ ? "w+" 55 | : (_file_flags & _s_bwrite_ ? "wb+" 56 | : (_file_flags & _s_append_ ? "a+" 57 | : (_file_flags & _s_bappend_ ? "ab+" 58 | : "r")))))); 59 | 60 | if (_file_flags & _already_opened_) 61 | if (_file_flags & _s_write_) 62 | om = "r+"; 63 | else if (_file_flags & _s_bwrite_) 64 | om = "rb+"; 65 | 66 | if ((stream_ = ::fopen (get_file_name (), om)) != NULL) { 67 | struct stat stat_data; 68 | 69 | if (! ::stat (get_file_name (), &stat_data)) 70 | _file_size = stat_data.st_size; 71 | else { 72 | DMScu_FixedSizeString<4095> err; 73 | 74 | err.printf ("DMScu_FileBase::open(): ::stat(): (%d) %s --- %s", 75 | errno, strerror (errno), get_file_name ()); 76 | 77 | close (); 78 | stream_ = NULL; 79 | throw DMScu_Exception (err.c_str ()); 80 | } 81 | } 82 | else { 83 | DMScu_FixedSizeString<4095> err; 84 | 85 | err.printf ("DMScu_FileBase::open(): ::fopen(): (%d) %s --- %s (%s)", 86 | errno, strerror (errno), get_file_name (), om); 87 | throw DMScu_Exception (err.c_str ()); 88 | } 89 | 90 | _current_offset = 0; 91 | _set_buffer (buffer_size_); 92 | 93 | if (_file_flags & _s_append_ || _file_flags & _s_bappend_) { 94 | if (::fseek (stream_, 0, SEEK_END) < 0 ) { 95 | DMScu_FixedSizeString<2047> err; 96 | 97 | err.printf ("DMScu_FileBase::open(): ::fseek(): (%d) %s", 98 | errno, strerror (errno)); 99 | throw DMScu_Exception (err.c_str ()); 100 | } 101 | _current_offset = _file_size; 102 | } 103 | 104 | _file_flags |= _in_use_; 105 | _file_flags |= _already_opened_; 106 | _good_flag = true; 107 | return (true); 108 | } 109 | 110 | // ---------------------------------------------------------------------------- 111 | 112 | void DMScu_FileBase::_set_buffer (size_type bs) { 113 | 114 | buffer_size_ = bs; 115 | 116 | if (file_buffer_) { 117 | delete[] file_buffer_; 118 | file_buffer_ = NULL; 119 | } 120 | file_buffer_ = (buffer_size_ == 0) ? NULL : new char [buffer_size_]; 121 | // if (file_buffer_) 122 | // ::memset (file_buffer_, 0, buffer_size_); 123 | 124 | const int type = (buffer_size_ == 0) ? _IONBF : _IOFBF; 125 | 126 | if (::setvbuf (stream_, file_buffer_, type, buffer_size_) != 0) { 127 | DMScu_FixedSizeString<2047> err; 128 | 129 | err.printf ("DMScu_FileBase::_set_buffer(): ::setvbuf(): (%d) %s", 130 | errno, strerror (errno)); 131 | throw DMScu_Exception (err.c_str ()); 132 | } 133 | 134 | return; 135 | } 136 | 137 | // ---------------------------------------------------------------------------- 138 | 139 | DMScu_FileBase::size_type DMScu_FileBase:: 140 | read (void *data_ptr, size_type element_size, size_type element_count) 141 | throw () { 142 | 143 | size_type read_size = element_count; 144 | 145 | if (_current_offset + element_size * read_size < _file_size) 146 | _current_offset += element_size * read_size; 147 | else { 148 | read_size = (_file_size - _current_offset) / element_size; 149 | _current_offset = _file_size; 150 | } 151 | 152 | if (::fread (data_ptr, element_size, read_size, stream_) != read_size) { 153 | // DMScu_FixedSizeString<2047> err; 154 | 155 | // err.printf ("DMScu_FileBase::read(): ::fread(): (%d) %s", 156 | // errno, strerror (errno)); 157 | // throw DMScu_Exception (err.c_str ()); 158 | _good_flag = false; 159 | return (NOVAL); 160 | } 161 | 162 | _file_flags |= _touched_; 163 | return (read_size); 164 | } 165 | 166 | // ---------------------------------------------------------------------------- 167 | 168 | DMScu_FileBase::size_type DMScu_FileBase:: 169 | write (const void *data_ptr, size_type element_size, size_type element_count) { 170 | 171 | const flag_type flag = 172 | _s_bwrite_ | _s_bappend_ | _s_write_ | _s_append_; 173 | 174 | if (! (_file_flags & flag)) { 175 | DMScu_FixedSizeString<2047> err; 176 | 177 | err.printf ("DMScu_FileBase::write(): " 178 | "Bad file permission for the action requested."); 179 | throw DMScu_Exception (err.c_str ()); 180 | } 181 | 182 | if (::fwrite (data_ptr, 183 | element_size, 184 | element_count, 185 | stream_) != element_count) { 186 | DMScu_FixedSizeString<2047> err; 187 | 188 | err.printf ("DMScu_FileBase::write(): ::fwrite(): (%d) %s", 189 | errno, strerror (errno)); 190 | throw DMScu_Exception (err.c_str ()); 191 | } 192 | 193 | const size_type byte_count = element_size * element_count; 194 | const size_type growth = _current_offset + byte_count; 195 | 196 | if (growth > _file_size) 197 | _file_size += growth - _file_size; 198 | _current_offset += byte_count; 199 | _file_flags |= _written_; 200 | 201 | return (element_count); 202 | } 203 | 204 | // ---------------------------------------------------------------------------- 205 | 206 | int DMScu_FileBase::close () { 207 | 208 | if (::fclose (stream_) != 0) { 209 | DMScu_FixedSizeString<2047> err; 210 | 211 | err.printf ("DMScu_FileBase::close(): ::fclose(): (%d) %s", 212 | errno, strerror (errno)); 213 | throw DMScu_Exception (err.c_str ()); 214 | } 215 | 216 | stream_ = NULL; 217 | _file_flags &= ~_in_use_; 218 | _current_offset = 0; 219 | _file_size = 0; 220 | 221 | delete[] file_buffer_; 222 | file_buffer_ = NULL; 223 | 224 | return (0); 225 | } 226 | 227 | // ---------------------------------------------------------------------------- 228 | 229 | void DMScu_FileBase::unlink () { 230 | 231 | if (is_open ()) 232 | close (); 233 | 234 | if (::unlink (get_file_name ()) < 0) { 235 | DMScu_FixedSizeString<2047> err; 236 | 237 | err.printf ("DMScu_FileBase::unlink(): ::unlink(): (%d) %s", 238 | errno, strerror (errno)); 239 | throw DMScu_Exception (err.c_str ()); 240 | } 241 | 242 | return; 243 | } 244 | 245 | // ---------------------------------------------------------------------------- 246 | 247 | int DMScu_FileBase::put_back (unsigned char c) { 248 | 249 | if (_current_offset == 0) { 250 | DMScu_FixedSizeString<511> err; 251 | 252 | err.printf ("DMScu_FileBase::put_back(): " 253 | "Trying to pass the edge of the file. Under flow"); 254 | throw DMScu_Exception (err.c_str ()); 255 | } 256 | 257 | _current_offset -= 1; 258 | return (::ungetc (c, stream_)); 259 | } 260 | 261 | // ---------------------------------------------------------------------------- 262 | 263 | int DMScu_FileBase::get_char () throw () { 264 | 265 | const int rc = ::fgetc (stream_); 266 | 267 | _current_offset += (rc == EOF) ? 0 : 1; 268 | return (rc); 269 | } 270 | 271 | // ---------------------------------------------------------------------------- 272 | 273 | std::string DMScu_FileBase::get_string (const char *search_str) throw () { 274 | 275 | std::string slug; 276 | 277 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 278 | _file_flags & _s_bappend_) { 279 | return (slug); 280 | } 281 | 282 | // _current_offset is incremented in get_char() 283 | // 284 | while (_current_offset < _file_size) { 285 | const char c = static_cast(get_char ()); 286 | 287 | if (c == EOF) 288 | break; 289 | 290 | if (! _is_in_list (c , search_str)) { 291 | put_back (c); 292 | break; 293 | } 294 | slug += c; 295 | } 296 | 297 | return (slug); 298 | } 299 | 300 | // ---------------------------------------------------------------------------- 301 | 302 | std::string DMScu_FileBase::get_token (const char *delimit_str) throw () { 303 | 304 | std::string slug; 305 | 306 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 307 | _file_flags & _s_bappend_) { 308 | return (slug); 309 | } 310 | 311 | // _current_offset is incremented in get_char() 312 | // 313 | while (_current_offset < _file_size) { 314 | const char c = static_cast(get_char ()); 315 | 316 | if (c == EOF) 317 | break; 318 | 319 | if (_is_in_list (c , delimit_str)) 320 | break; 321 | slug += c; 322 | } 323 | 324 | return (slug); 325 | } 326 | 327 | // ---------------------------------------------------------------------------- 328 | 329 | DMScu_FileBase::size_type 330 | DMScu_FileBase::get_token (char delimit, char *buffer) throw () { 331 | 332 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 333 | _file_flags & _s_bappend_) { 334 | return (NOVAL); 335 | } 336 | 337 | size_type counter = 0; 338 | 339 | // _current_offset is incremented in get_char() 340 | // 341 | while (_current_offset < _file_size) { 342 | const char c = static_cast(get_char ()); 343 | 344 | if (c == EOF) 345 | break; 346 | 347 | if (c == delimit) 348 | break; 349 | 350 | buffer [counter] = c; 351 | counter += 1; 352 | } 353 | buffer [counter] = 0; 354 | 355 | return (counter); 356 | } 357 | 358 | // ---------------------------------------------------------------------------- 359 | 360 | int DMScu_FileBase::printf (const char *format_str, ...) throw () { 361 | 362 | va_list argument_ptr; 363 | 364 | va_start (argument_ptr, format_str); 365 | 366 | const int rc = ::vfprintf (stream_, format_str, argument_ptr); 367 | 368 | if (rc < 0) { 369 | _good_flag = false; 370 | return (-1); 371 | } 372 | 373 | va_end (argument_ptr); 374 | 375 | if (rc > 0) { 376 | _file_flags |= _written_; 377 | _current_offset += rc; 378 | _file_size = 379 | _current_offset > _file_size ? _current_offset : _file_size; 380 | } 381 | 382 | return (rc); 383 | } 384 | 385 | // ---------------------------------------------------------------------------- 386 | 387 | int DMScu_FileBase::put_char (int the_char) { 388 | 389 | const char tmp_char = the_char; 390 | 391 | if (write (&tmp_char, sizeof (char), 1) == 1) 392 | return (the_char); 393 | 394 | return (EOF); 395 | } 396 | 397 | // ---------------------------------------------------------------------------- 398 | 399 | int DMScu_FileBase::put_string (const char *the_str) throw () { 400 | 401 | return (printf ("%s", the_str)); 402 | } 403 | 404 | // ---------------------------------------------------------------------------- 405 | 406 | // Go to the 0-based line 407 | // 408 | DMScu_FileBase::size_type DMScu_FileBase::go_to_line (size_type line) { 409 | 410 | // It does not make sense to go to a particular line 411 | // in a binary file 412 | // 413 | if (_file_flags & _s_bwrite_ || _file_flags & _s_bappend_ || 414 | _file_flags & _s_bread_) { 415 | DMScu_FixedSizeString<2047> err; 416 | 417 | err.printf ("DMScu_FileBase::go_to_line(): " 418 | "Bad file permission for the action requested."); 419 | throw DMScu_Exception (err.c_str ()); 420 | } 421 | 422 | size_type counter = 0; 423 | 424 | seek (0, _seek_set_); 425 | while (_current_offset < _file_size && counter < line) { 426 | const char c = static_cast(get_char ()); 427 | 428 | if (c == EOF) 429 | break; 430 | else if (c == '\n') 431 | counter += 1; 432 | } 433 | 434 | return (_current_offset); 435 | } 436 | 437 | // ---------------------------------------------------------------------------- 438 | 439 | int DMScu_FileBase::seek (size_type the_offset, SEEK_TYPE seek_type) 440 | throw () { // Just for maximum efficiency 441 | 442 | switch (seek_type) { 443 | 444 | case _seek_set_: 445 | { 446 | if (the_offset > _file_size) { 447 | _good_flag = false; 448 | return (-1); 449 | } 450 | break; 451 | } 452 | case _seek_cur_: 453 | { 454 | the_offset += _current_offset; 455 | if (the_offset > _file_size) { 456 | _good_flag = false; 457 | return (-1); 458 | } 459 | break; 460 | } 461 | case _seek_end_: 462 | { 463 | if (the_offset > _file_size) { 464 | _good_flag = false; 465 | return (-1); 466 | } 467 | else 468 | the_offset = _file_size - the_offset; 469 | break; 470 | } 471 | } 472 | 473 | if (::fseek (stream_, the_offset, SEEK_SET) < 0 ) { 474 | _good_flag = false; 475 | return (-1); 476 | } 477 | _current_offset = the_offset; 478 | 479 | return (0); 480 | } 481 | 482 | // ---------------------------------------------------------------------------- 483 | 484 | int DMScu_FileBase::truncate (size_type truncate_size) { 485 | 486 | const flag_type flag = 487 | _s_bwrite_ | _s_bappend_ | _s_write_ | _s_append_; 488 | 489 | if (! is_open () || ! (_file_flags & flag)) { 490 | DMScu_FixedSizeString<2047> err; 491 | 492 | err.printf ("DMScu_FileBase::truncate(): " 493 | "Bad file permission for the action requested."); 494 | throw DMScu_Exception (err.c_str ()); 495 | } 496 | 497 | if (::ftruncate (_get_file_desc (), truncate_size) < 0) { 498 | DMScu_FixedSizeString<2047> err; 499 | 500 | err.printf ("DMScu_FileBase::truncate(): ::ftruncate(): (%d) %s", 501 | errno, strerror (errno)); 502 | throw DMScu_Exception (err.c_str ()); 503 | } 504 | 505 | _file_size = truncate_size; 506 | seek (_current_offset > truncate_size ? truncate_size : _current_offset, 507 | _seek_set_); 508 | 509 | return (0); 510 | } 511 | 512 | // ---------------------------------------------------------------------------- 513 | 514 | // Local Variables: 515 | // mode:C++ 516 | // tab-width:4 517 | // c-basic-offset:4 518 | // End: 519 | -------------------------------------------------------------------------------- /src/datasci_tester.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | using namespace hmdf; 8 | 9 | // ----------------------------------------------------------------------------- 10 | 11 | struct my_visitor : HeteroVector::visitor_base { 12 | 13 | template 14 | void operator() (T &i) { i += i; std::cout << "-- " << i << std::endl;} 15 | }; 16 | 17 | // ----------------------------------------------------------------------------- 18 | 19 | struct sort_functor : HeteroVector::visitor_base { 20 | 21 | template 22 | bool operator() (const T &lhs, const T &rhs) { 23 | 24 | return (lhs < rhs); 25 | } 26 | }; 27 | 28 | // ----------------------------------------------------------------------------- 29 | 30 | struct change_functor : HeteroVector::visitor_base { 31 | 32 | void operator() (auto &val) { 33 | 34 | for (int i = 0; i < 10; ++i) 35 | // val.push_back( 36 | // DataFrame:: 37 | // _get_nan::type::value_type>()); 38 | val.push_back( 39 | typename std::remove_reference::type::value_type()); 40 | } 41 | }; 42 | 43 | // ----------------------------------------------------------------------------- 44 | 45 | int main(int argc, char *argv[]) { 46 | 47 | HeteroVector hv; 48 | HeteroVector hv2; 49 | HeteroVector hv3; 50 | 51 | const std::vector &int_vec = hv.get_vec(); 52 | 53 | hv.push_back (3); 54 | hv.emplace_back (4); 55 | hv.push_back (5); 56 | hv.emplace (int_vec.begin (), 10); 57 | hv.push_back (1); 58 | hv.push_back (0); 59 | 60 | hv.push_back (4.25); 61 | hv.push_back (5.6845); 62 | hv.push_back (6.1112); 63 | hv.push_back (1.05); 64 | hv.push_back (0.456783); 65 | hv.push_back (0.123); 66 | 67 | hv.push_back (std::string("str_1")); 68 | hv.push_back (std::string("str_2")); 69 | hv.push_back (std::string("str_3")); 70 | hv.push_back (std::string("abc")); 71 | hv.push_back (std::string("fas")); 72 | 73 | hv2 = hv; 74 | hv3 = std::move(hv2); 75 | 76 | const std::vector &dbl_vec = hv.get_vec(); 77 | 78 | for (const auto &iter : int_vec) 79 | std::cout << iter << std::endl; 80 | 81 | std::cout << std::endl; 82 | for (const auto &iter : dbl_vec) 83 | std::cout << iter << std::endl; 84 | 85 | my_visitor mv; 86 | 87 | std::cout << "Visiting ..." << std::endl; 88 | hv.visit(std::move(mv)); 89 | 90 | sort_functor sort_func; 91 | 92 | std::cout << "Sorting ..." << std::endl; 93 | hv.sort(std::move(sort_func)); 94 | std::cout << "Visiting ..." << std::endl; 95 | hv.visit(std::move(mv)); 96 | 97 | change_functor change_func; 98 | 99 | std::cout << "Changing ..." << std::endl; 100 | hv.change(std::move(change_func)); 101 | std::cout << "Visiting ..." << std::endl; 102 | hv.visit(std::move(mv)); 103 | 104 | hv.resize(100, 5); 105 | hv.pop_back>(); 106 | hv.empty>(); 107 | hv.at>(5); 108 | hv.back>(); 109 | hv.front>(); 110 | 111 | // 112 | // ---------------------------------------- 113 | // 114 | 115 | std::cout << "\n\nNow testing Data Frame\n" << std::endl; 116 | 117 | typedef DataFrame MyDataFrame; 118 | 119 | MyDataFrame df; 120 | std::vector &col0 = 121 | df.create_column(static_cast("col_name")); 122 | 123 | std::vector intvec = { 1, 2, 3, 4, 5 }; 124 | std::vector dblvec = 125 | { 1.2345, 2.2345, 3.2345, 4.2345, 5.2345 }; 126 | std::vector dblvec2 = 127 | { 0.998, 0.3456, 0.056, 0.15678, 0.00345, 0.923, 0.06743, 0.1 }; 128 | std::vector strvec = 129 | { "Col_name", "Col_name", "Col_name", "Col_name", "Col_name" }; 130 | std::vector ulgvec = 131 | { 1UL, 2UL, 3UL, 4UL, 5UL, 8UL, 7UL, 6UL }; 132 | std::vector xulgvec = ulgvec; 133 | 134 | int rc = df.load_data(std::move(ulgvec), 135 | std::make_pair("int_col", intvec), 136 | std::make_pair("dbl_col", dblvec), 137 | std::make_pair("dbl_col_2", dblvec2), 138 | std::make_pair("str_col", strvec), 139 | std::make_pair("ul_col", xulgvec)); 140 | 141 | std::cout << "Return code " << rc 142 | << " should be " << 143 | ulgvec.size() + 144 | intvec.size() + 145 | dblvec.size() + 146 | dblvec2.size() + 147 | strvec.size() + 148 | ulgvec.size() 149 | << std::endl; 150 | 151 | df.load_index(ulgvec.begin(), ulgvec.end()); 152 | df.load_column("int_col", intvec.begin(), intvec.end(), true); 153 | df.load_column("str_col", strvec.begin(), strvec.end(), true); 154 | df.load_column("dbl_col", dblvec.begin(), dblvec.end(), true); 155 | df.load_column("dbl_col_2", dblvec2.begin(), dblvec2.end(), false); 156 | 157 | df.append_column("str_col", "Additional column"); 158 | df.append_column("dbl_col", 10.56); 159 | 160 | std::vector ivec = df.get_column ("int_col"); 161 | 162 | std::cout << "Data is: " << df.get_column ("dbl_col")[2] 163 | << std::endl; 164 | 165 | hmdf::MeanVisitor ivisitor; 166 | hmdf::MeanVisitor dvisitor; 167 | 168 | std::cout << "Integer average is: " 169 | << df.visit("int_col", ivisitor).get_value() 170 | << std::endl; 171 | std::cout << "Double average is: " 172 | << df.visit("dbl_col", dvisitor).get_value() 173 | << std::endl; 174 | 175 | df.get_column("dbl_col")[5] = 6.5; 176 | df.get_column("dbl_col")[6] = 7.5; 177 | df.get_column("dbl_col")[7] = 8.5; 178 | dvisitor.reset(); 179 | std::cout << "Double average is: " 180 | << df.visit("dbl_col", dvisitor).get_value() 181 | << std::endl; 182 | 183 | df.write(std::cout); 184 | 185 | std::cout << "Printing integer vector BEFORE making make_consistent ..." 186 | << std::endl; 187 | 188 | std::vector dvec = df.get_column ("dbl_col"); 189 | std::vector dvec2 = df.get_column ("dbl_col_2"); 190 | 191 | for (auto iter : dvec) 192 | std::cout << iter << " "; 193 | std::cout << std::endl; 194 | for (auto iter : dvec2) 195 | std::cout << iter << " "; 196 | std::cout << std::endl; 197 | 198 | df.make_consistent(); 199 | std::cout << "Printing integer vector AFTER making make_consistent ..." 200 | << std::endl; 201 | dvec = df.get_column ("dbl_col"); 202 | dvec2 = df.get_column ("dbl_col_2"); 203 | for (auto iter : dvec) 204 | std::cout << iter << " "; 205 | std::cout << std::endl; 206 | for (auto iter : dvec2) 207 | std::cout << iter << " "; 208 | std::cout << std::endl; 209 | 210 | df.sort(); 211 | std::cout << "Printing after sorting the index ..." << std::endl; 212 | dvec = df.get_column ("dbl_col"); 213 | dvec2 = df.get_column ("dbl_col_2"); 214 | for (auto iter : dvec) 215 | std::cout << iter << " "; 216 | std::cout << std::endl; 217 | for (auto iter : dvec2) 218 | std::cout << iter << " "; 219 | std::cout << std::endl; 220 | 221 | df.sort("dbl_col_2"); 222 | std::cout << "Printing after sorting the dbl_col_2 ..." << std::endl; 223 | dvec = df.get_column ("dbl_col"); 224 | dvec2 = df.get_column ("dbl_col_2"); 225 | for (auto iter : dvec) 226 | std::cout << iter << " "; 227 | std::cout << std::endl; 228 | for (auto iter : dvec2) 229 | std::cout << iter << " "; 230 | std::cout << std::endl; 231 | 232 | MyDataFrame df2 = df.get_data_by_idx(3, 5); 233 | 234 | std::cout << "Printing the second df after get_data_by_idx() ..." 235 | << std::endl; 236 | dvec = df2.get_column ("dbl_col"); 237 | dvec2 = df2.get_column ("dbl_col_2"); 238 | for (auto iter : dvec) 239 | std::cout << iter << " "; 240 | std::cout << std::endl; 241 | for (auto iter : dvec2) 242 | std::cout << iter << " "; 243 | std::cout << std::endl; 244 | 245 | MyDataFrame df3 = df.get_data_by_loc(1, 2); 246 | 247 | std::cout << "Printing the second df after get_data_by_loc() ..." 248 | << std::endl; 249 | dvec = df3.get_column ("dbl_col"); 250 | dvec2 = df3.get_column ("dbl_col_2"); 251 | for (auto iter : dvec) 252 | std::cout << iter << " "; 253 | std::cout << std::endl; 254 | for (auto iter : dvec2) 255 | std::cout << iter << " "; 256 | std::cout << std::endl; 257 | 258 | hmdf::CorrVisitor corr_visitor; 259 | 260 | std::cout << "Correlation between dbl_col and dbl_col_2 is: " 261 | << df.visit("dbl_col", 262 | "dbl_col_2", 263 | corr_visitor).get_value() 264 | << std::endl; 265 | 266 | hmdf::StatsVisitor stats_visitor; 267 | 268 | df.visit("dbl_col", stats_visitor); 269 | std::cout << std::endl; 270 | dvec = df.get_column ("dbl_col"); 271 | for (auto iter : dvec) 272 | std::cout << iter << " "; 273 | std::cout << std::endl; 274 | std::cout << "Skewness of dbl_col is: " 275 | << stats_visitor.get_skew() 276 | << std::endl; 277 | std::cout << "Kurtosis of dbl_col is: " 278 | << stats_visitor.get_kurtosis() 279 | << std::endl; 280 | std::cout << "Mean of dbl_col is: " 281 | << stats_visitor.get_mean() 282 | << std::endl; 283 | std::cout << "Variamce of dbl_col is: " 284 | << stats_visitor.get_variance() 285 | << std::endl; 286 | 287 | std::cout <<"\nDoing simple linear regression between dbl_col and dbl_col_2" 288 | << std::endl; 289 | 290 | hmdf::SLRegressionVisitor slr_visitor; 291 | 292 | df.visit("dbl_col", "dbl_col_2", slr_visitor); 293 | std::cout << "Count of dbl_col and dbl_col_2 is: " 294 | << slr_visitor.get_count() << std::endl; 295 | std::cout << "Slope of dbl_col and dbl_col_2 is: " 296 | << slr_visitor.get_slope() << std::endl; 297 | std::cout << "Intercept of dbl_col and dbl_col_2 is: " 298 | << slr_visitor.get_intercept() << std::endl; 299 | std::cout << "Correlation of dbl_col and dbl_col_2 is: " 300 | << slr_visitor.get_corr() << std::endl; 301 | corr_visitor.reset(); 302 | std::cout << "Old correlation between dbl_col and dbl_col_2 is: " 303 | << df.visit("dbl_col", 304 | "dbl_col_2", 305 | corr_visitor).get_value() 306 | << std::endl; 307 | 308 | std::cout << "\nTesting GROUPBY:\n" << std::endl; 309 | 310 | std::vector ulgvec2 = 311 | { 123450, 123451, 123452, 123450, 123455, 123450, 123449, 312 | 123448, 123451, 123452, 123452, 123450, 123455, 123450, 313 | 123454, 123453, 123456, 123457, 123458, 123459, 123460, 314 | 123441, 123442, 123432, 123433, 123434, 123435, 123436 }; 315 | std::vector xulgvec2 = ulgvec2; 316 | std::vector intvec2 = 317 | { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 318 | 15, 20, 22, 23, 24, 25, 30, 33, 34, 35, 36, 40, 45, 46 }; 319 | std::vector xdblvec2 = 320 | { 1.2345, 2.2345, 3.2345, 4.2345, 5.2345, 3.0, 0.9999, 321 | 10.0, 4.25, 0.009, 1.111, 8.0, 2.2222, 3.3333, 322 | 11.0, 5.25, 1.009, 2.111, 9.0, 3.2222, 4.3333, 323 | 12.0, 6.25, 2.009, 3.111, 10.0, 4.2222, 5.3333 }; 324 | std::vector dblvec22 = 325 | { 0.998, 0.3456, 0.056, 0.15678, 0.00345, 0.923, 0.06743, 326 | 0.1, 0.0056, 0.07865, -0.9999, 0.0111, 0.1002, -0.8888, 327 | 0.14, 0.0456, 0.078654, -0.8999, 0.01119, 0.8002, -0.9888, 328 | 0.2, 0.1056, 0.87865, -0.6999, 0.4111, 0.1902, -0.4888 }; 329 | std::vector strvec2 = 330 | { "4% of something", "Description 4/5", "This is bad", 331 | "3.4% of GDP", "Market drops", "Market pulls back", 332 | "$15 increase", "Running fast", "C++14 development", 333 | "Some explanation", "More strings", "Bonds vs. Equities", 334 | "Almost done", "Here comes the sun", "XXXX1", "XXXX04", 335 | "XXXX2", "XXXX3", "XXXX4", "XXXX4", "XXXX5", "XXXX6", 336 | "XXXX7", "XXXX10", "XXXX11", "XXXX01", "XXXX02", "XXXX03" }; 337 | 338 | MyDataFrame dfx; 339 | 340 | dfx.load_data(std::move(ulgvec2), 341 | std::make_pair("xint_col", intvec2), 342 | std::make_pair("dbl_col", xdblvec2), 343 | std::make_pair("dbl_col_2", dblvec22), 344 | std::make_pair("str_col", strvec2), 345 | std::make_pair("ul_col", xulgvec2)); 346 | dfx.write(std::cout); 347 | 348 | const MyDataFrame dfxx = 349 | dfx.groupby(GroupbySum()); 355 | 356 | dfxx.write(std::cout); 361 | 362 | const MyDataFrame dfxx2 = 363 | dfx.groupby(GroupbySum(), "str_col"); 369 | 370 | dfxx2.write(std::cout); 375 | 376 | std::future gb_fut = 377 | dfx.groupby_async(GroupbySum(), "dbl_col_2"); 383 | const MyDataFrame dfxx3 = gb_fut.get(); 384 | 385 | dfxx3.write(std::cout); 390 | 391 | std::cout << "\nTesting Async write\n" << std::endl; 392 | 393 | std::future fut = 394 | dfxx3.write_async(std::cout); 399 | 400 | fut.get(); 401 | 402 | std::cout << "\nTesting Bucketize()\n" << std::endl; 403 | 404 | std::future sort_fut = 405 | dfx.sort_async(); 406 | 407 | sort_fut.get(); 408 | dfx.write(std::cout); 413 | 414 | const MyDataFrame::TimeStamp interval = 4; 415 | std::future b_fut = 416 | dfx.bucketize_async(GroupbySum(), interval); 421 | const MyDataFrame buck_df = b_fut.get(); 422 | 423 | buck_df.write(std::cout, true); 428 | 429 | std::cout << "\nTesting read()\n" << std::endl; 430 | 431 | 432 | MyDataFrame df_read; 433 | std::future fut2 = 434 | df_read.read_async("/home/hossein/WindowsShare/Linux/" 435 | "src/DataSci/src/sample_data.csv"); 436 | 437 | fut2.get(); 438 | df_read.write(std::cout); 443 | 444 | std::cout << "\nTesting multi_visit()\n" << std::endl; 445 | 446 | hmdf::MeanVisitor ivisitor2; 447 | hmdf::MeanVisitor ulvisitor; 448 | hmdf::MeanVisitor dvisitor2; 449 | hmdf::MeanVisitor dvisitor22; 450 | 451 | dfx.multi_visit(std::make_pair("xint_col", &ivisitor2), 452 | std::make_pair("dbl_col", &dvisitor2), 453 | std::make_pair("dbl_col_2", &dvisitor22), 454 | std::make_pair("ul_col", &ulvisitor)); 455 | 456 | std::cout << "Integer average is: " << ivisitor2.get_value() 457 | << std::endl; 458 | std::cout << "Double average is: " << dvisitor2.get_value() 459 | << std::endl; 460 | std::cout << "Double2 average is: " << dvisitor22.get_value() 461 | << std::endl; 462 | std::cout << "ULong average is: " << ulvisitor.get_value() 463 | << std::endl; 464 | 465 | std::cout << "\nTesting constructors and assignments\n" << std::endl; 466 | 467 | MyDataFrame df_copy_con = dfx; 468 | 469 | std::cout << "These must be Equal: " 470 | << df_copy_con.is_equal(dfx) 474 | << std::endl; 475 | std::cout << "These must Not be Equal: " 476 | << df_copy_con.is_equal(dfxx) 480 | << std::endl; 481 | 482 | df_copy_con.get_column("dbl_col")[7] = 88.888888; 483 | std::cout << "Values in dfx, df_copy_con: " 484 | << dfx.get_column("dbl_col")[7] << ", " 485 | << df_copy_con.get_column("dbl_col")[7] 486 | << std::endl; 487 | std::cout << "After the change, these must Not be Equal: " 488 | << df_copy_con.is_equal(dfx) 492 | << std::endl; 493 | 494 | std::cout << "dfx before modify_by_idx()" << std::endl; 495 | dfx.write(std::cout); 500 | 501 | dfx.modify_by_idx(df_copy_con); 502 | std::cout << "dfx after modify_by_idx()" << std::endl; 503 | dfx.write(std::cout); 508 | dfx.modify_by_idx(df); 509 | std::cout << "dfx after modify_by_idx()" << std::endl; 510 | dfx.write(std::cout); 515 | 516 | return (0); 517 | } 518 | 519 | // ----------------------------------------------------------------------------- 520 | 521 | // Local Variables: 522 | // mode:C++ 523 | // tab-width:4 524 | // c-basic-offset:4 525 | // End: 526 | -------------------------------------------------------------------------------- /DMScu/src/DMScu_MMapBase.cc: -------------------------------------------------------------------------------- 1 | // Hossein Moein 2 | // August 21, 2007 3 | // Copyright (C) 2017-2018 Hossein Moein 4 | // Distributed under the BSD Software License (see file License) 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | // ---------------------------------------------------------------------------- 13 | 14 | const DMScu_MMapBase::size_type DMScu_MMapBase::SYSTEM_PAGE_SIZE = 15 | ::sysconf (_SC_PAGESIZE); 16 | 17 | // ---------------------------------------------------------------------------- 18 | 19 | bool DMScu_MMapBase::_translate_open_mode () throw () { 20 | 21 | switch (_get_open_mode ()) { 22 | 23 | case _read_: 24 | { 25 | _file_flags = _s_read_ | _in_use_; 26 | _mmap_mode = PROT_READ; 27 | _mmap_prot = PROT_READ; 28 | _file_open_flags = O_RDONLY | O_CREAT | O_SYNC; 29 | break; 30 | } 31 | case _bread_: 32 | { 33 | _file_flags = _s_bread_ | _in_use_; 34 | _mmap_mode = PROT_READ; 35 | _mmap_prot = PROT_READ; 36 | _file_open_flags = O_RDONLY | O_CREAT | O_SYNC; 37 | break; 38 | } 39 | case _write_: 40 | { 41 | _file_flags = _s_write_ | _s_read_ | _in_use_; 42 | _mmap_mode = PROT_WRITE | PROT_READ; 43 | _mmap_prot = PROT_WRITE | PROT_READ; 44 | _file_open_flags = O_RDWR | O_CREAT | O_SYNC; 45 | break; 46 | } 47 | case _bwrite_: 48 | { 49 | _file_flags = _s_bwrite_ | _s_bread_ | _in_use_; 50 | _mmap_mode = PROT_WRITE | PROT_READ; 51 | _mmap_prot = PROT_WRITE | PROT_READ; 52 | _file_open_flags = O_RDWR | O_CREAT | O_SYNC; 53 | break; 54 | } 55 | case _append_: 56 | { 57 | _file_flags = _s_append_ | _s_read_ | _in_use_; 58 | _mmap_mode = PROT_WRITE | PROT_READ; 59 | _mmap_prot = PROT_WRITE | PROT_READ; 60 | _file_open_flags = O_RDWR | O_APPEND | O_CREAT | O_SYNC; 61 | break; 62 | } 63 | case _bappend_: 64 | { 65 | _file_flags = _s_bappend_ | _s_bread_ | _in_use_; 66 | _mmap_mode = PROT_WRITE | PROT_READ; 67 | _mmap_prot = PROT_WRITE | PROT_READ; 68 | _file_open_flags = O_RDWR | O_APPEND | O_CREAT | O_SYNC; 69 | break; 70 | } 71 | } 72 | 73 | return (true); 74 | } 75 | 76 | // ---------------------------------------------------------------------------- 77 | 78 | bool DMScu_MMapBase::_initial_map (size_type file_size, 79 | int mmap_prot, 80 | flag_type mmap_flags, 81 | int file_desc, 82 | off_t offset, 83 | void *start) throw () { 84 | 85 | if (file_size) { 86 | _mmap_ptr = 87 | ::mmap(start, file_size, mmap_prot, mmap_flags, file_desc, offset); 88 | 89 | if (_mmap_ptr != MAP_FAILED) { 90 | _mmap_size = file_size; 91 | _file_desc = file_desc; 92 | if (_get_open_mode () == _append_ || _get_open_mode() == _bappend_) 93 | _current_offset = _mmap_size; 94 | } 95 | else { 96 | ::close (file_desc); 97 | _file_desc = 0; 98 | 99 | return (_good_flag = false); 100 | } 101 | } 102 | 103 | if (_initial_map_posthook ()) 104 | return (_good_flag = true); 105 | 106 | return (_good_flag = false); 107 | } 108 | 109 | // ---------------------------------------------------------------------------- 110 | 111 | DMScu_MMapBase::size_type DMScu_MMapBase:: 112 | read (void *data_ptr, size_type data_size, size_type data_count) throw () { 113 | 114 | size_type read_size = data_size * data_count; 115 | const void *cpy_from = 116 | reinterpret_cast(_mmap_ptr) + _current_offset; 117 | 118 | if (_current_offset + read_size < _file_size) 119 | _current_offset += read_size; 120 | else { 121 | data_count = (_file_size - _current_offset) / data_size; 122 | read_size = data_size * data_count; 123 | _current_offset = _file_size; 124 | } 125 | 126 | ::memcpy (data_ptr, cpy_from, read_size); 127 | _file_flags |= _touched_; 128 | return (data_count); 129 | } 130 | 131 | // ---------------------------------------------------------------------------- 132 | 133 | DMScu_MMapBase::size_type DMScu_MMapBase:: 134 | write (const void *data_ptr, size_type data_size, size_type data_count) { 135 | 136 | const flag_type flag = 137 | _s_bwrite_ | _s_bappend_ | _s_write_ | _s_append_; 138 | 139 | if (! (_file_flags & flag)) { 140 | DMScu_FixedSizeString<2047> err; 141 | 142 | err.printf ("DMScu_MMapBase::write(): " 143 | "Bad file permission for the action requested."); 144 | throw DMScu_Exception (err.c_str ()); 145 | } 146 | 147 | const size_type byte_count = data_size * data_count; 148 | const size_type growth = _current_offset + byte_count; 149 | 150 | if (growth > _mmap_size) { 151 | const size_type tp_add = growth - _mmap_size; 152 | const size_type new_size = 153 | _mmap_size + 154 | ((! get_buffer_size ()) 155 | ? tp_add 156 | : (((tp_add / get_buffer_size ()) * get_buffer_size ()) + 157 | (tp_add % get_buffer_size() ? get_buffer_size () : 0))); 158 | 159 | const cu_AutoFileDesc desc_guard (*this); 160 | void *const tmp_mmap_ptr = 161 | ::mmap (NULL, new_size, _mmap_prot, _mmap_flags, _file_desc, 0); 162 | 163 | if (tmp_mmap_ptr == MAP_FAILED) { 164 | DMScu_FixedSizeString<2047> err; 165 | 166 | err.printf ("DMScu_MMapBase::write(): ::mmap(): (%d) %s", 167 | errno, ::strerror (errno)); 168 | throw DMScu_Exception (err.c_str ()); 169 | } 170 | 171 | if (_mmap_ptr) 172 | ::munmap (_mmap_ptr, _mmap_size); 173 | 174 | if (::ftruncate (_file_desc, new_size) < 0) { 175 | DMScu_FixedSizeString<2047> err; 176 | 177 | err.printf ("DMScu_MMapBase::write(): ::ftruncate(): (%d) %s", 178 | errno, ::strerror (errno)); 179 | 180 | throw DMScu_Exception (err.c_str ()); 181 | } 182 | 183 | _mmap_size = new_size; 184 | _mmap_ptr = tmp_mmap_ptr; 185 | } 186 | 187 | ::memcpy (reinterpret_cast(_mmap_ptr) + _current_offset, 188 | data_ptr, 189 | byte_count); 190 | 191 | _file_size += growth > _file_size ? growth - _file_size : 0; 192 | _file_flags |= _written_; 193 | _current_offset += byte_count; 194 | 195 | return (data_count); 196 | } 197 | 198 | // ---------------------------------------------------------------------------- 199 | 200 | int DMScu_MMapBase::close (CLOSE_MODE close_mode) { 201 | 202 | if (_file_flags & _in_use_) { 203 | const flag_type wflag = 204 | _s_bwrite_ | _s_write_ | _s_append_ | _s_bappend_; 205 | const size_type length = 206 | close_mode == _normal_ ? _file_size : _current_offset; 207 | 208 | if (_mmap_ptr && _file_size && _file_flags & wflag) 209 | ::msync (_mmap_ptr, length, MS_SYNC); 210 | 211 | if (_mmap_ptr && ::munmap (_mmap_ptr, _mmap_size) != 0) { 212 | DMScu_FixedSizeString<2047> err; 213 | 214 | err.printf ("DMScu_MMapBase::close(): ::munmap(): (%d) %s", 215 | errno, ::strerror (errno)); 216 | throw DMScu_Exception (err.c_str ()); 217 | } 218 | 219 | if (is_open ()) { 220 | if (_file_flags & wflag) { 221 | int res; 222 | res = ::ftruncate (_file_desc, length); 223 | res = ::fsync (_file_desc); 224 | } 225 | 226 | if (::close (_file_desc) != 0) { 227 | DMScu_FixedSizeString<2047> err; 228 | 229 | err.printf ("DMScu_MMapBase::close(): ::close(): (%d) %s", 230 | errno, ::strerror (errno)); 231 | throw DMScu_Exception (err.c_str ()); 232 | } 233 | 234 | _file_desc = 0; 235 | } 236 | 237 | _file_flags &= ~_in_use_; 238 | _current_offset = 0; 239 | _file_size = 0; 240 | _mmap_ptr = NULL; 241 | } 242 | else 243 | return (EOF); 244 | 245 | return (0); 246 | } 247 | 248 | // ---------------------------------------------------------------------------- 249 | 250 | int DMScu_MMapBase::put_back () { 251 | 252 | if (_current_offset == 0) { 253 | DMScu_FixedSizeString<2047> err; 254 | 255 | err.printf ("DMScu_MMapBase::put_back(): " 256 | "Trying to pass the edge of the file. Under flow"); 257 | throw DMScu_Exception (err.c_str ()); 258 | } 259 | 260 | return (*(reinterpret_cast(_mmap_ptr) + --_current_offset)); 261 | } 262 | 263 | // ---------------------------------------------------------------------------- 264 | 265 | int DMScu_MMapBase::get_char () throw () { 266 | 267 | if (_current_offset < _file_size) 268 | return (*(reinterpret_cast(_mmap_ptr) + _current_offset++)); 269 | else 270 | return (-1); 271 | } 272 | 273 | // ---------------------------------------------------------------------------- 274 | 275 | std::string DMScu_MMapBase::get_string (const char *search_str) throw () { 276 | 277 | std::string s; 278 | 279 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 280 | _file_flags & _s_bappend_) { 281 | return (s); 282 | } 283 | 284 | while (_current_offset < _file_size) { 285 | if (! _is_in_list ( 286 | *(reinterpret_cast(_mmap_ptr) + _current_offset), 287 | search_str)) { 288 | _current_offset += 1; 289 | break; 290 | } 291 | s.append (reinterpret_cast(_mmap_ptr) + _current_offset++, 1); 292 | } 293 | 294 | return (s); 295 | } 296 | 297 | // ---------------------------------------------------------------------------- 298 | 299 | std::string DMScu_MMapBase::get_token (const char *delimit_str) throw () { 300 | 301 | std::string s; 302 | 303 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 304 | _file_flags & _s_bappend_) 305 | return (s); 306 | 307 | while (_current_offset < _file_size) { 308 | if (_is_in_list ( 309 | *(reinterpret_cast(_mmap_ptr) + _current_offset), 310 | delimit_str)) { 311 | _current_offset += 1; 312 | break; 313 | } 314 | s.append (reinterpret_cast(_mmap_ptr) + _current_offset++, 1); 315 | } 316 | 317 | return (s); 318 | } 319 | 320 | // ---------------------------------------------------------------------------- 321 | 322 | DMScu_MMapBase::size_type 323 | DMScu_MMapBase::get_token (char delimit, char *buffer) throw () { 324 | 325 | if (_file_flags & _s_bread_ || _file_flags & _s_bwrite_ || 326 | _file_flags & _s_bappend_) 327 | return (NOVAL); 328 | 329 | flag_type counter = 0; 330 | 331 | while (_current_offset < _file_size) { 332 | if (*(reinterpret_cast(_mmap_ptr) + _current_offset) == 333 | delimit) { 334 | _current_offset += 1; 335 | break; 336 | } 337 | buffer [counter++] = 338 | *(reinterpret_cast(_mmap_ptr) + _current_offset++); 339 | } 340 | buffer [counter] = 0; 341 | 342 | return (counter); 343 | } 344 | 345 | // ---------------------------------------------------------------------------- 346 | 347 | inline bool DMScu_MMapBase::check_space_4_printf_ () throw () { 348 | 349 | const flag_type flag = _s_write_ | _s_append_ | _in_use_; 350 | 351 | if (_file_flags & flag) { 352 | const size_type buffer_size = 353 | get_buffer_size() > BUFFER_SIZE ? get_buffer_size () : BUFFER_SIZE; 354 | const size_type new_size = _mmap_size - _current_offset; 355 | 356 | if (new_size < MIN_BUFFER_SIZE) { 357 | truncate (_mmap_size + buffer_size); 358 | _file_size = _current_offset; 359 | } 360 | return (true); 361 | } 362 | 363 | return (false); 364 | } 365 | 366 | // ---------------------------------------------------------------------------- 367 | 368 | int DMScu_MMapBase::printf (const char *format_str, ...) throw () { 369 | 370 | int char_count = 0; 371 | va_list argument_ptr; 372 | 373 | if (check_space_4_printf_ ()) { 374 | va_start (argument_ptr, format_str); 375 | 376 | char_count = 377 | ::vsprintf (reinterpret_cast(_mmap_ptr) + _current_offset, 378 | format_str, 379 | argument_ptr); 380 | 381 | va_end (argument_ptr); 382 | if (char_count > 0) { 383 | _file_flags |= _written_; 384 | _current_offset += char_count; 385 | _file_size = 386 | _current_offset > _file_size ? _current_offset : _file_size; 387 | } 388 | } 389 | 390 | return (char_count); 391 | } 392 | 393 | // ---------------------------------------------------------------------------- 394 | 395 | int DMScu_MMapBase::put_char (int the_char) { 396 | 397 | const char tmp_char = the_char; 398 | 399 | write (&tmp_char, sizeof (char), 1); 400 | return (the_char); 401 | } 402 | 403 | // ---------------------------------------------------------------------------- 404 | 405 | int DMScu_MMapBase::put_string (const char *the_str) throw () { 406 | 407 | return (printf ("%s", the_str)); 408 | } 409 | 410 | // ---------------------------------------------------------------------------- 411 | 412 | int DMScu_MMapBase::remap (size_type offset, size_type map_size) { 413 | 414 | const cu_AutoFileDesc desc_guard (*this); 415 | struct stat stat_data; 416 | 417 | if (::fstat (_file_desc, &stat_data) < 0) { 418 | DMScu_FixedSizeString<2047> err; 419 | 420 | err.printf ("DMScu_MMapBase::remap(): ::fstat(): (%d) %s", 421 | errno, ::strerror (errno)); 422 | throw DMScu_Exception (err.c_str ()); 423 | } 424 | if (offset + map_size >= stat_data.st_size) { 425 | DMScu_FixedSizeString<2047> err; 426 | 427 | err.printf ("DMScu_MMapBase::remap(): offset %llu + map size %llu " 428 | ">= file size %llu", 429 | offset, map_size, 430 | static_cast(stat_data.st_size)); 431 | throw DMScu_Exception (err.c_str ()); 432 | } 433 | 434 | void *old_mmap_ptr = _mmap_ptr; 435 | 436 | if (_mmap_ptr) 437 | munmap (old_mmap_ptr, _mmap_size); 438 | 439 | _mmap_size = 0; 440 | _mmap_ptr = NULL; 441 | 442 | const size_type useable_offset = 443 | (offset / SYSTEM_PAGE_SIZE) * SYSTEM_PAGE_SIZE; 444 | const size_type size_to_do = 445 | (map_size > 0) ? map_size : stat_data.st_size - useable_offset; 446 | void *tmp_mmap_ptr = ::mmap (old_mmap_ptr, 447 | size_to_do, 448 | _mmap_prot, 449 | _mmap_flags, 450 | _file_desc, 451 | useable_offset); 452 | 453 | if (tmp_mmap_ptr != MAP_FAILED) { 454 | _mmap_size = size_to_do; 455 | _mmap_ptr = tmp_mmap_ptr; 456 | _current_offset = 457 | (_current_offset < useable_offset) 458 | ? 0 : (_current_offset > _mmap_size) 459 | ? _mmap_size : _current_offset; 460 | } 461 | else { 462 | DMScu_FixedSizeString<2047> err; 463 | 464 | err.printf ("DMScu_MMapBase::remap(): ::mmap(): (%d) %s", 465 | errno, ::strerror (errno)); 466 | throw DMScu_Exception (err.c_str ()); 467 | } 468 | 469 | return (0); 470 | } 471 | 472 | // ---------------------------------------------------------------------------- 473 | 474 | // Go to the 0-based line 475 | // 476 | DMScu_MMapBase::size_type DMScu_MMapBase::go_to_line (size_type line) { 477 | 478 | // It does not make sense to go to a particular line 479 | // in a binary file 480 | // 481 | if ((_file_flags & _s_bwrite_) || (_file_flags & _s_bappend_) || 482 | (_file_flags & _s_bread_)) { 483 | DMScu_FixedSizeString<2047> err; 484 | 485 | err.printf ("DMScu_MMapBase::go_to_line(): " 486 | "Bad file permission for the action requested."); 487 | throw DMScu_Exception (err.c_str ()); 488 | } 489 | 490 | const char *str = reinterpret_cast(_mmap_ptr); 491 | size_type curr_line = 0; 492 | size_type offset; 493 | 494 | for (offset = 0; offset < _file_size; ++offset) { 495 | if (curr_line == line) 496 | break; 497 | if (str [offset] == '\n') 498 | curr_line += 1; 499 | } 500 | 501 | seek (offset, _seek_set_); 502 | return (_current_offset); 503 | } 504 | 505 | // ---------------------------------------------------------------------------- 506 | 507 | int DMScu_MMapBase:: 508 | seek (size_type the_offset, SEEK_TYPE seek_type) throw () { 509 | 510 | switch (seek_type) { 511 | 512 | case _seek_set_: 513 | { 514 | if (the_offset <= _file_size) 515 | _current_offset = the_offset; 516 | else { 517 | _good_flag = false; 518 | return (-1); 519 | } 520 | break; 521 | } 522 | case _seek_cur_: 523 | { 524 | the_offset += _current_offset; 525 | if (the_offset <= _file_size) 526 | _current_offset = the_offset; 527 | else { 528 | _good_flag = false; 529 | return (-1); 530 | } 531 | break; 532 | } 533 | case _seek_end_: 534 | { 535 | if (the_offset <= _file_size) 536 | _current_offset = _file_size - the_offset; 537 | else { 538 | _good_flag = false; 539 | return (-1); 540 | } 541 | break; 542 | } 543 | } 544 | 545 | return (0); 546 | } 547 | 548 | // ---------------------------------------------------------------------------- 549 | 550 | int DMScu_MMapBase::set_flag (int mmap_prot, flag_type mmap_flags) { 551 | 552 | if ((_mmap_prot != mmap_prot) || (_mmap_flags != mmap_flags)) { 553 | _mmap_prot = mmap_prot; 554 | _mmap_flags = mmap_flags; 555 | if (_mmap_ptr) { 556 | munmap (_mmap_ptr, _mmap_size); 557 | _mmap_ptr = NULL; 558 | 559 | const cu_AutoFileDesc desc_guard (*this); 560 | 561 | if ((_mmap_ptr = ::mmap (NULL, 562 | _mmap_size, 563 | _mmap_prot, 564 | _mmap_flags, 565 | _file_desc, 566 | 0)) == MAP_FAILED) { 567 | DMScu_FixedSizeString<2047> err; 568 | 569 | err.printf ("DMScu_MMapBase::set_flag(): ::mmap(): (%d) %s", 570 | errno, ::strerror (errno)); 571 | 572 | throw DMScu_Exception (err.c_str ()); 573 | } 574 | } 575 | } 576 | 577 | return (0); 578 | } 579 | 580 | // ---------------------------------------------------------------------------- 581 | 582 | int DMScu_MMapBase::truncate (size_type truncate_size) { 583 | 584 | const flag_type flag = 585 | _s_bwrite_ | _s_bappend_ | _s_write_ | _s_append_; 586 | 587 | if (! (_file_flags & flag)) { 588 | DMScu_FixedSizeString<2047> err; 589 | 590 | err.printf ("DMScu_MMapBase::truncate(): " 591 | "Bad file permission for the action requested."); 592 | throw DMScu_Exception (err.c_str ()); 593 | } 594 | 595 | if (truncate_size == _file_size) 596 | return (0); 597 | 598 | void *old_mmap_ptr = NULL; 599 | 600 | if (_mmap_ptr) { 601 | old_mmap_ptr = _mmap_ptr; 602 | ::munmap (old_mmap_ptr, _mmap_size); 603 | } 604 | 605 | _mmap_size = 0; 606 | _mmap_ptr = NULL; 607 | 608 | const cu_AutoFileDesc desc_guard (*this); 609 | void *tmp_mmap_ptr = NULL; 610 | 611 | if (truncate_size != 0) { 612 | tmp_mmap_ptr = ::mmap (old_mmap_ptr, 613 | truncate_size, 614 | _mmap_prot, 615 | _mmap_flags, 616 | _file_desc, 617 | 0); 618 | 619 | if (tmp_mmap_ptr == MAP_FAILED) { 620 | DMScu_FixedSizeString<2047> err; 621 | 622 | err.printf ("DMScu_MMapBase::truncate(): ::mmap(): (%d) %s", 623 | errno, ::strerror (errno)); 624 | throw DMScu_Exception (err.c_str ()); 625 | } 626 | } 627 | 628 | if (::ftruncate (_file_desc, truncate_size) < 0) { 629 | DMScu_FixedSizeString<2047> err; 630 | 631 | err.printf ("DMScu_MMapBase::truncate(): ::ftruncate(): (%d) %s", 632 | errno, ::strerror (errno)); 633 | throw DMScu_Exception (err.c_str ()); 634 | } 635 | 636 | _file_size = truncate_size; 637 | _mmap_size = truncate_size; 638 | _mmap_ptr = tmp_mmap_ptr; 639 | _current_offset = 640 | _current_offset > truncate_size ? truncate_size : _current_offset; 641 | 642 | return (0); 643 | } 644 | 645 | // ---------------------------------------------------------------------------- 646 | 647 | DMScu_MMapBase &DMScu_MMapBase::operator << (std::ifstream &ifs) { 648 | 649 | char buffer [64 * 1024]; 650 | 651 | if ((_file_flags & _s_bwrite_) || (_file_flags & _s_bappend_)) { 652 | while (! ifs.eof ()) { 653 | ifs.read (buffer, sizeof (buffer)); 654 | write (buffer, ifs.gcount (), 1); 655 | } 656 | } 657 | else if ((_file_flags & _s_write_) || (_file_flags & _s_append_)) { 658 | while (! ifs.eof ()) { 659 | ifs.read (buffer, sizeof (buffer) - 1); 660 | buffer [ifs.gcount ()] = 0; 661 | printf ("%s", buffer); 662 | } 663 | } 664 | else { 665 | DMScu_FixedSizeString<2047> err; 666 | 667 | err.printf ("DMScu_MMapBase::<< (std::ifstream &): " 668 | "Bad file permission for the action requested."); 669 | throw DMScu_Exception (err.c_str ()); 670 | } 671 | 672 | return (*this); 673 | } 674 | 675 | // ---------------------------------------------------------------------------- 676 | 677 | DMScu_MMapBase &DMScu_MMapBase::operator << (const FILE &fref) { 678 | 679 | char buffer [64 * 1024]; 680 | 681 | if ((_file_flags & _s_bwrite_) || (_file_flags & _s_bappend_)) { 682 | while (! feof (const_cast(&fref))) { 683 | const int sread = 684 | fread (buffer, sizeof (buffer), 1, const_cast(&fref)); 685 | 686 | write (buffer, sread, 1); 687 | } 688 | } 689 | else if ((_file_flags & _s_write_) || (_file_flags & _s_append_)) { 690 | while (! feof (const_cast(&fref))) { 691 | const int sread = 692 | fread (buffer, sizeof (buffer - 1), 1, 693 | const_cast(&fref)); 694 | 695 | buffer [sread] = 0; 696 | printf ("%s", buffer); 697 | } 698 | } 699 | else { 700 | DMScu_FixedSizeString<2047> err; 701 | 702 | err.printf ("DMScu_MMapBase::<< (const FILE &): " 703 | "Bad file permission for the action requested."); 704 | throw DMScu_Exception (err.c_str ()); 705 | } 706 | 707 | return (*this); 708 | } 709 | 710 | // ---------------------------------------------------------------------------- 711 | 712 | // Local Variables: 713 | // mode:C++ 714 | // tab-width:4 715 | // c-basic-offset:4 716 | // End: 717 | --------------------------------------------------------------------------------