├── .gitignore ├── CMakeLists.txt ├── README.md ├── TinyNPY.cpp ├── TinyNPY.h └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # Custom 31 | *.tmp 32 | *.exif.txt 33 | *.exif.txt.txt 34 | .DS_Store 35 | CMakeSettings.json 36 | .vs/ 37 | .idea/ 38 | .vscode/ 39 | bin/ 40 | binaries/ 41 | build/ 42 | 43 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | 3 | project(TinyNPY) 4 | include(GNUInstallDirs) 5 | 6 | find_package(ZLIB REQUIRED) 7 | 8 | #CMAKE_BUILD_TOOL 9 | 10 | ################################ 11 | # set lib version here 12 | 13 | set(GENERIC_LIB_VERSION "1.0.0") 14 | set(GENERIC_LIB_SOVERSION "1") 15 | 16 | ################################ 17 | # Add definitions 18 | 19 | set(CMAKE_CXX_STANDARD 11) 20 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") 21 | 22 | ################################ 23 | # Add targets 24 | # By Default shared libray is being built 25 | # To build static libs also - Do cmake . -DBUILD_STATIC_LIBS:BOOL=ON 26 | # User can choose not to build shared library by using cmake -DBUILD_SHARED_LIBS:BOOL=OFF 27 | # To build only static libs use cmake . -DBUILD_SHARED_LIBS:BOOL=OFF -DBUILD_STATIC_LIBS:BOOL=ON 28 | # To build the demo binary, use cmake . -DBUILD_DEMO:BOOL=ON 29 | 30 | option(BUILD_SHARED_LIBS "build as shared library" ON) 31 | option(BUILD_STATIC_LIBS "build as static library" OFF) 32 | option(LINK_CRT_STATIC_LIBS "link CRT static library" OFF) 33 | option(BUILD_DEMO "build demo binary" ON) 34 | 35 | # set MSVC runtime linkage to static or dynamic 36 | # as in: https://stackoverflow.com/questions/10113017/setting-the-msvc-runtime-in-cmake 37 | macro(configure_runtime CRT_RUNTIME) 38 | if(MSVC) 39 | # Default to statically-linked runtime. 40 | if("${CRT_RUNTIME}" STREQUAL "") 41 | set(CRT_RUNTIME "static") 42 | endif() 43 | # Set compiler options. 44 | set(variables 45 | CMAKE_C_FLAGS_DEBUG 46 | CMAKE_C_FLAGS_MINSIZEREL 47 | CMAKE_C_FLAGS_RELEASE 48 | CMAKE_C_FLAGS_RELWITHDEBINFO 49 | CMAKE_CXX_FLAGS_DEBUG 50 | CMAKE_CXX_FLAGS_MINSIZEREL 51 | CMAKE_CXX_FLAGS_RELEASE 52 | CMAKE_CXX_FLAGS_RELWITHDEBINFO 53 | ) 54 | if(${CRT_RUNTIME} STREQUAL "static") 55 | message(STATUS "MSVC -> forcing use of statically-linked runtime.") 56 | foreach(variable ${variables}) 57 | if(${variable} MATCHES "/MD") 58 | string(REGEX REPLACE "/MD" "/MT" ${variable} "${${variable}}") 59 | endif() 60 | endforeach() 61 | else() 62 | message(STATUS "MSVC -> forcing use of dynamically-linked runtime.") 63 | foreach(variable ${variables}) 64 | if(${variable} MATCHES "/MT") 65 | string(REGEX REPLACE "/MT" "/MD" ${variable} "${${variable}}") 66 | endif() 67 | endforeach() 68 | endif() 69 | endif() 70 | endmacro() 71 | 72 | if(LINK_CRT_STATIC_LIBS) 73 | # set MSVC runtime linkage to static 74 | configure_runtime("static") 75 | endif() 76 | 77 | # to distinguish between debug and release lib 78 | set(CMAKE_DEBUG_POSTFIX "d") 79 | 80 | if(BUILD_SHARED_LIBS) 81 | add_library(TinyNPY SHARED TinyNPY.cpp TinyNPY.h) 82 | 83 | if(MSVC_VERSION GREATER 1300) 84 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251") # needs to have dll-interface 85 | endif() 86 | 87 | target_link_libraries(TinyNPY PRIVATE ZLIB::ZLIB) 88 | set_target_properties(TinyNPY PROPERTIES 89 | COMPILE_DEFINITIONS "TINYNPY_EXPORT" 90 | VERSION "${GENERIC_LIB_VERSION}" 91 | SOVERSION "${GENERIC_LIB_SOVERSION}") 92 | 93 | 94 | if(DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11") 95 | target_include_directories(TinyNPY PUBLIC 96 | $ 97 | $) 98 | 99 | if(MSVC) 100 | target_compile_definitions(TinyNPY PUBLIC _CRT_SECURE_NO_WARNINGS) 101 | endif() 102 | else() 103 | include_directories(${PROJECT_SOURCE_DIR}) 104 | 105 | if(MSVC) 106 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 107 | endif() 108 | endif() 109 | 110 | # export targets for find_package config mode 111 | export(TARGETS TinyNPY 112 | FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake) 113 | 114 | install(TARGETS TinyNPY 115 | EXPORT ${CMAKE_PROJECT_NAME}Targets 116 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 117 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 118 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 119 | endif() 120 | 121 | if(BUILD_STATIC_LIBS) 122 | add_library(TinyNPYstatic STATIC TinyNPY.cpp TinyNPY.h) 123 | 124 | target_link_libraries(TinyNPYstatic PRIVATE ZLIB::ZLIB) 125 | set_target_properties(TinyNPYstatic PROPERTIES 126 | OUTPUT_NAME TinyNPY 127 | VERSION "${GENERIC_LIB_VERSION}" 128 | SOVERSION "${GENERIC_LIB_SOVERSION}") 129 | 130 | if(DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11") 131 | target_include_directories(TinyNPYstatic PUBLIC 132 | $ 133 | $) 134 | 135 | if(MSVC) 136 | target_compile_definitions(TinyNPYstatic PUBLIC _CRT_SECURE_NO_WARNINGS) 137 | endif() 138 | else() 139 | include_directories(${PROJECT_SOURCE_DIR}) 140 | 141 | if(MSVC) 142 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 143 | endif() 144 | endif() 145 | 146 | # export targets for find_package config mode 147 | export(TARGETS TinyNPYstatic 148 | FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake) 149 | 150 | install(TARGETS TinyNPYstatic 151 | EXPORT ${CMAKE_PROJECT_NAME}Targets 152 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 153 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 154 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 155 | endif() 156 | 157 | if(BUILD_DEMO) 158 | add_executable(TinyNPYdemo main.cpp) 159 | if(BUILD_SHARED_LIBS) 160 | add_dependencies(TinyNPYdemo TinyNPY) 161 | target_link_libraries(TinyNPYdemo TinyNPY) 162 | target_compile_definitions(TinyNPYdemo PRIVATE TINYNPY_IMPORT) 163 | else(BUILD_STATIC_LIBS) 164 | add_dependencies(TinyNPYdemo TinyNPYstatic) 165 | target_link_libraries(TinyNPYdemo TinyNPYstatic PRIVATE ZLIB::ZLIB) 166 | endif() 167 | endif() 168 | 169 | install(FILES TinyNPY.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 170 | 171 | foreach(p LIB INCLUDE) 172 | set(var CMAKE_INSTALL_${p}DIR) 173 | if(NOT IS_ABSOLUTE "${${var}}") 174 | set(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") 175 | endif() 176 | endforeach() 177 | 178 | file(WRITE 179 | ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake 180 | "include(\${CMAKE_CURRENT_LIST_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)\n") 181 | 182 | install(FILES 183 | ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake 184 | DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) 185 | 186 | install(EXPORT ${CMAKE_PROJECT_NAME}Targets 187 | DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TinyNPY: Tiny C++ loader/exporter of python numpy array NPY/NPZ files 2 | 3 | ## Introduction 4 | 5 | TinyNPY is a tiny, lightweight C++ library for parsing Numpy array files in NPY and NPZ format. No third party dependencies are needed to parse NPY and uncompressed NPZ files, but [ZLIB](https://www.zlib.net) library is needed to parse compressed NPZ files. TinyNPY is easy to use, simply copy the two source files in you project. 6 | 7 | ## Usage example 8 | 9 | ``` 10 | #include "TinyNPY.h" 11 | 12 | int main(int argc, const char** argv) 13 | { 14 | // read NPY array file 15 | NpyArray arr; 16 | const LPCSTR ret = arr.LoadNPY(argv[1]); 17 | 18 | // read NPZ arrays file: specific array 19 | //NpyArray arr; 20 | //const LPCSTR ret = arr.LoadNPZ(argv[1], "features"); 21 | 22 | // read NPZ arrays file: all arrays 23 | //NpyArray::npz_t arrays; 24 | //const LPCSTR ret = arr.LoadNPZ(argv[1], arrays); 25 | //NpyArray& arr = arrays.begin()->second; 26 | 27 | if (ret != NULL) { 28 | std::cout << ret << " '" << argv[1] << "'\n"; 29 | return -2; 30 | } 31 | 32 | // print array metadata 33 | std::cout << "Number of values " << arr.NumValue() << "\n"; 34 | std::cout << "Size in bytes " << arr.SizeBytes() << "\n"; 35 | if (typeid(int) == arr.ValueType()) 36 | std::cout << "Value type float\n"; 37 | if (typeid(float) == arr.ValueType()) 38 | std::cout << "Value type float\n"; 39 | return EXIT_SUCCESS; 40 | } 41 | ``` 42 | See `main.cpp` for more details. 43 | 44 | ## Copyright 45 | 46 | Redistribution and use in source and binary forms, with or without 47 | modification, are permitted provided that the following conditions are met: 48 | 49 | - Redistributions of source code must retain the above copyright notice, 50 | this list of conditions and the following disclaimer. 51 | - Redistributions in binary form must reproduce the above copyright notice, 52 | this list of conditions and the following disclaimer in the documentation 53 | and/or other materials provided with the distribution. 54 | 55 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY EXPRESS 56 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 58 | NO EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 59 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 62 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 64 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 | -------------------------------------------------------------------------------- /TinyNPY.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////// 2 | // TinyNPY.cpp 3 | // 4 | // Copyright 2007 cDc@seacave 5 | // Distributed under the Boost Software License, Version 1.0 6 | // (See http://www.boost.org/LICENSE_1_0.txt) 7 | 8 | #include "TinyNPY.h" 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | // D E F I N E S /////////////////////////////////////////////////// 15 | 16 | 17 | // S T R U C T S /////////////////////////////////////////////////// 18 | 19 | // Invoke a function when the object is destroyed, 20 | // typically at scope exit if the object is allocated on the stack 21 | template > 22 | class TScopeExitRun 23 | { 24 | public: 25 | TScopeExitRun(Functor f) : functor(f) {} 26 | ~TScopeExitRun() { functor(); } 27 | void Reset(Functor f) { functor = f; } 28 | 29 | protected: 30 | Functor functor; 31 | }; 32 | typedef class TScopeExitRun<> ScopeExitRun; 33 | /*----------------------------------------------------------------*/ 34 | 35 | 36 | // input 37 | LPCSTR NpyArray::ParseHeaderNPY(const std::string& header, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder) 38 | { 39 | ASSERT(header[header.size() - 1] == '\n'); 40 | 41 | // fortran order 42 | size_t loc1 = header.find("fortran_order"); 43 | if (loc1 == std::string::npos) 44 | return "error: failed to find header keyword 'fortran_order'"; 45 | fortranOrder = (header.substr(loc1+16, 4) == "True"); 46 | 47 | // shape 48 | loc1 = header.find("("); 49 | size_t loc2 = header.find(")"); 50 | if (loc1 == std::string::npos || loc2 == std::string::npos) 51 | return "error: failed to find header keyword '(' or ')'"; 52 | 53 | shape.clear(); 54 | std::regex num_regex("[0-9][0-9]*"); 55 | std::smatch sm; 56 | std::string strShape = header.substr(loc1 + 1, loc2 - loc1 - 1); 57 | while (std::regex_search(strShape, sm, num_regex)) { 58 | shape.push_back(std::stoi(sm[0].str())); 59 | strShape = sm.suffix().str(); 60 | } 61 | 62 | // endian, word size, data type 63 | // byte order code | stands for not applicable. 64 | // not sure when this applies except for byte array 65 | loc1 = header.find("descr"); 66 | if (loc1 == std::string::npos) 67 | return "error: failed to find header keyword 'descr'"; 68 | loc1 += 9; 69 | const bool littleEndian = (header[loc1] == '<' || header[loc1] == '|'); 70 | ASSERT(littleEndian); 71 | type = header[loc1+1]; 72 | 73 | const std::string str_ws = header.substr(loc1+2); 74 | loc2 = str_ws.find("'"); 75 | wordSize = std::stoi(str_ws.substr(0, loc2)); 76 | return NULL; 77 | } 78 | 79 | LPCSTR NpyArray::ParseHeaderNPY(const uint8_t* buffer, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder) 80 | { 81 | if (buffer[0] != (uint8_t)0x93 || _tcsncmp(reinterpret_cast(buffer+1), "NUMPY", 5) != 0) 82 | return "error: invalid header id"; 83 | // parse the length of the header data 84 | uint32_t lenHeader, offset; 85 | ASSERT(buffer[7] >= 0); // minor version number of the file format 86 | if (buffer[6] > 1) { // major version number of the file format 87 | // little-endian unsigned int 88 | lenHeader = (uint32_t(buffer[11])<<24)|(uint32_t(buffer[10])<<16)|(uint32_t(buffer[9])<<8)|uint32_t(buffer[8]); 89 | offset = 12; 90 | } else { 91 | // little-endian unsigned short int 92 | lenHeader = (uint16_t(buffer[9])<<8)|uint16_t(buffer[8]); 93 | offset = 10; 94 | } 95 | const std::string header(reinterpret_cast(buffer+offset), lenHeader); 96 | return ParseHeaderNPY(header, shape, wordSize, type, fortranOrder); 97 | } 98 | 99 | LPCSTR NpyArray::ParseHeaderNPY(FILE* fp, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder) 100 | { 101 | char buffer[32]; 102 | if (fread(buffer, sizeof(char), 10, fp) != 10 || 103 | buffer[0] != (char)0x93 || _tcsncmp(buffer+1, "NUMPY", 5) != 0) 104 | return "error: invalid header id"; 105 | // parse the length of the header data 106 | uint32_t lenHeader; 107 | ASSERT(buffer[7] >= 0); // minor version number of the file format 108 | if (buffer[6] > 1) { // major version number of the file format 109 | // little-endian unsigned int 110 | fread(buffer+10, sizeof(char), 2, fp); 111 | lenHeader = (uint32_t(buffer[11])<<24)|(uint32_t(buffer[10])<<16)|(uint32_t(buffer[9])<<8)|uint32_t(buffer[8]); 112 | } else { 113 | // little-endian unsigned short int 114 | lenHeader = (uint16_t(buffer[9])<<8)|uint16_t(buffer[8]); 115 | } 116 | std::string header(lenHeader, '\0'); 117 | if (fread(&header[0], sizeof(char), lenHeader, fp) != lenHeader) 118 | return "error: invalid header"; 119 | return ParseHeaderNPY(header, shape, wordSize, type, fortranOrder); 120 | } 121 | 122 | LPCSTR NpyArray::ParseFooterZIP(FILE* fp, uint16_t& nrecs, size_t& globalHeaderSize, size_t& globalHeaderOffset) 123 | { 124 | char footer[32]; 125 | fseek(fp, -22, SEEK_END); 126 | if (fread(footer, sizeof(char), 22, fp) != 22) 127 | return "error: failed footer"; 128 | const uint16_t diskNo = *(uint16_t*)(footer+4); ASSERT(diskNo == 0); 129 | const uint16_t diskStart = *(uint16_t*)(footer+6); ASSERT(diskStart == 0); 130 | const uint16_t nrecsOnDisk = *(uint16_t*)(footer+8); 131 | nrecs = *(uint16_t*)(footer+10); ASSERT(nrecsOnDisk == nrecs); 132 | globalHeaderSize = *(uint32_t*)(footer+12); 133 | globalHeaderOffset = *(uint32_t*)(footer+16); 134 | const uint16_t lenComment = *(uint16_t*)(footer+20); ASSERT(lenComment == 0); 135 | return NULL; 136 | } 137 | 138 | LPCSTR NpyArray::LoadNPY(FILE* fp) 139 | { 140 | Release(); 141 | LPCSTR ret = ParseHeaderNPY(fp, shape, wordSize, type, fortranOrder); 142 | if (ret != NULL) 143 | return ret; 144 | init(); 145 | const size_t nread = fread(Data(), 1, SizeBytes(), fp); 146 | if (nread != SizeBytes()) 147 | return "error: failed fread"; 148 | return NULL; 149 | } 150 | 151 | LPCSTR NpyArray::LoadNPY(std::string filename) 152 | { 153 | FILE* fp = fopen(filename.c_str(), "rb"); 154 | if (!fp) 155 | return "error: unable to open file"; 156 | const ScopeExitRun closeFp([&]() { fclose(fp); }); 157 | return LoadNPY(fp); 158 | } 159 | 160 | LPCSTR NpyArray::LoadNPZ(FILE* fp, uint32_t comprBytes, uint32_t uncomprBytes) 161 | { 162 | std::vector bufferCompr(comprBytes); 163 | std::vector bufferUncompr(uncomprBytes); 164 | if (fread(bufferCompr.data(), 1, comprBytes, fp) != comprBytes) 165 | return "error: failed fread"; 166 | 167 | z_stream d_stream; 168 | d_stream.zalloc = Z_NULL; 169 | d_stream.zfree = Z_NULL; 170 | d_stream.opaque = Z_NULL; 171 | d_stream.avail_in = 0; 172 | d_stream.next_in = Z_NULL; 173 | int err = inflateInit2(&d_stream, -MAX_WBITS); 174 | if (err != Z_OK) 175 | return "error: can not init inflate"; 176 | 177 | d_stream.avail_in = comprBytes; 178 | d_stream.next_in = bufferCompr.data(); 179 | d_stream.avail_out = uncomprBytes; 180 | d_stream.next_out = bufferUncompr.data(); 181 | err = inflate(&d_stream, Z_FINISH); 182 | if (err != Z_STREAM_END && err != Z_OK) 183 | return "error: can not uncompress"; 184 | 185 | err = inflateEnd(&d_stream); 186 | 187 | LPCSTR ret = ParseHeaderNPY(bufferUncompr.data(), shape, wordSize, type, fortranOrder); 188 | if (ret != NULL) 189 | return ret; 190 | init(); 191 | const size_t offset = uncomprBytes - SizeBytes(); 192 | memcpy(Data(), bufferUncompr.data()+offset, SizeBytes()); 193 | return NULL; 194 | } 195 | 196 | LPCSTR NpyArray::LoadNPZ(std::string filename, std::string varname) 197 | { 198 | Release(); 199 | FILE* fp = fopen(filename.c_str(), "rb"); 200 | if (!fp) 201 | return "error: unable to open file"; 202 | const ScopeExitRun closeFp([&]() { fclose(fp); }); 203 | while (true) { 204 | const LPCSTR ret = LoadArrayNPZ(fp, varname, *this); 205 | if (ret == NULL && !IsEmpty()) 206 | return NULL; 207 | if (ret == (const char*)1) 208 | break; 209 | } 210 | return "error: variable name not found"; 211 | } 212 | 213 | LPCSTR NpyArray::LoadNPZ(std::string filename, npz_t& arrays) 214 | { 215 | FILE* fp = fopen(filename.c_str(), "rb"); 216 | if (!fp) 217 | return "error: unable to open file"; 218 | const ScopeExitRun closeFp([&]() { fclose(fp); }); 219 | while (true) { 220 | NpyArray arr; 221 | std::string varname; 222 | const LPCSTR ret = LoadArrayNPZ(fp, varname, arr); 223 | if (ret == (const char*)1) 224 | break; 225 | if (ret != NULL) 226 | return ret; 227 | arrays.emplace(varname, std::move(arr)); 228 | } 229 | return NULL; 230 | } 231 | 232 | LPCSTR NpyArray::LoadArrayNPZ(FILE* fp, std::string& varname, NpyArray& arr) 233 | { 234 | char localHeader[32]; 235 | if (fread(localHeader, sizeof(char), 30, fp) != 30) 236 | return "error: failed fread"; 237 | 238 | // if we've reached the global header, stop reading 239 | if (localHeader[2] != 0x03 || localHeader[3] != 0x04) 240 | return (const char*)1; 241 | 242 | // read in the variable name 243 | const uint16_t lenName = *reinterpret_cast(localHeader+26); 244 | std::string vname(lenName, ' '); 245 | if (fread(&vname[0], sizeof(char), lenName, fp) != lenName) 246 | return "error: failed fread"; 247 | 248 | // erase the lagging .npy 249 | vname.erase(vname.end()-4, vname.end()); 250 | 251 | // read in the extra field 252 | const uint16_t lenExtraField = *reinterpret_cast(localHeader+28); 253 | fseek(fp, lenExtraField, SEEK_CUR); // skip past the extra field 254 | 255 | if (varname.empty() || varname == vname) { 256 | // read current array 257 | if (varname.empty()) 258 | varname = vname; 259 | const uint16_t comprMethod = *reinterpret_cast(localHeader+8); 260 | if (comprMethod == 0) 261 | return arr.LoadNPY(fp); 262 | const uint32_t comprBytes = *reinterpret_cast(localHeader+18); 263 | const uint32_t uncomprBytes = *reinterpret_cast(localHeader+22); 264 | return arr.LoadNPZ(fp, comprBytes, uncomprBytes); 265 | } 266 | 267 | // skip current array data 268 | const uint32_t size = *reinterpret_cast(localHeader+22); 269 | fseek(fp, size, SEEK_CUR); 270 | return NULL; 271 | } 272 | /*----------------------------------------------------------------*/ 273 | 274 | 275 | 276 | // output 277 | std::vector NpyArray::CreateHeaderNPY(const shape_t& shape, char type, size_t wordSize) 278 | { 279 | std::vector dict; 280 | add(dict, "{'descr': '"); 281 | #if __BYTE_ORDER == __LITTLE_ENDIAN 282 | add(dict, '<'); 283 | #else 284 | add(dict, '>'); 285 | #endif 286 | add(dict, type); 287 | add(dict, std::to_string(wordSize)); 288 | add(dict, "', 'fortran_order': False, 'shape': ("); 289 | add(dict, std::to_string(shape[0])); 290 | for (size_t i = 1; i < shape.size(); i++) { 291 | add(dict, ", "); 292 | add(dict, std::to_string(shape[i])); 293 | } 294 | add(dict, "), }"); 295 | // pad with spaces so that preamble+dict is modulo 16 bytes 296 | // preamble is 10/12 bytes and dict needs to end with \n 297 | char ver = 1; 298 | size_t remainder = 16 - (10 + dict.size()) % 16; 299 | if (dict.size() + remainder > 65535) { 300 | ver = 2; 301 | remainder = 16 - (12 + dict.size()) % 16; 302 | } 303 | dict.insert(dict.end(), remainder, ' '); 304 | dict.back() = '\n'; 305 | 306 | std::vector header; 307 | add(header, (char)0x93); 308 | add(header, "NUMPY"); 309 | add(header, ver); // major version of numpy format 310 | add(header, (char)0); // minor version of numpy format 311 | if (ver == 1) 312 | add(header, (uint16_t)dict.size()); 313 | else 314 | add(header, (uint32_t)dict.size()); 315 | header.insert(header.end(), dict.begin(), dict.end()); 316 | return header; 317 | } 318 | 319 | LPCSTR NpyArray::SaveNPY(std::string filename, bool bAppend) const 320 | { 321 | FILE* fp; 322 | shape_t _shape; 323 | const shape_t* pShape; 324 | if (bAppend && (fp=fopen(filename.c_str(), "r+b")) != NULL) { 325 | // file exists, append to it; read the header, modify the array size 326 | char _type; 327 | size_t _wordSize; 328 | bool _fortranOrder; 329 | LPCSTR ret = ParseHeaderNPY(fp, _shape, _wordSize, _type, _fortranOrder); 330 | if (ret != NULL) 331 | return ret; 332 | ASSERT(!_fortranOrder); 333 | 334 | if (wordSize != _wordSize) 335 | return "error: npy_save word size"; 336 | if (shape.size() != _shape.size()) 337 | return "error: npy_save attempting to append mis-dimensioned data"; 338 | 339 | for (size_t i = 1; i < shape.size(); i++) { 340 | if (shape[i] != _shape[i]) 341 | return "error: npy_save attempting to append misshaped data"; 342 | } 343 | _shape[0] += shape[0]; 344 | pShape = &_shape; 345 | } else { 346 | // create a new file 347 | fp = fopen(filename.c_str(), "wb"); 348 | pShape = &shape; 349 | } 350 | if (!fp) 351 | return "error: unable to open file"; 352 | 353 | const std::vector header = CreateHeaderNPY(*pShape, std::abs(type), wordSize); 354 | 355 | fseek(fp, 0, SEEK_SET); 356 | fwrite(header.data(), sizeof(char), header.size(), fp); 357 | fseek(fp, 0, SEEK_END); 358 | fwrite(Data(), wordSize, numValues, fp); 359 | fclose(fp); 360 | return NULL; 361 | } 362 | 363 | LPCSTR NpyArray::SaveNPZ(std::string zipname, std::string varname, bool bAppend) const 364 | { 365 | FILE* fp; 366 | uint16_t nrecs = 0; 367 | size_t globalHeaderOffset = 0; 368 | std::vector globalHeader; 369 | if (bAppend && (fp=fopen(zipname.c_str(), "r+b")) != NULL) { 370 | // zip file exists, add a new NPY array to it; 371 | // first read the footer and parse the offset and size of the global header 372 | // then read and store the global header; 373 | // the new data will be written at the start of the global header, 374 | // then append the global header and footer below it 375 | size_t globalHeaderSize; 376 | LPCSTR ret = ParseFooterZIP(fp, nrecs, globalHeaderSize, globalHeaderOffset); 377 | if (ret != NULL) 378 | return ret; 379 | fseek(fp, (long)globalHeaderOffset, SEEK_SET); 380 | globalHeader.resize(globalHeaderSize); 381 | size_t res = fread(globalHeader.data(), sizeof(char), globalHeaderSize, fp); 382 | if (res != globalHeaderSize) 383 | return "error: header read error while adding to existing zip"; 384 | fseek(fp, (long)globalHeaderOffset, SEEK_SET); 385 | } else { 386 | fp = fopen(zipname.c_str(), "wb"); 387 | } 388 | if (!fp) 389 | return "error: unable to open file"; 390 | 391 | const std::vector npyHeader = CreateHeaderNPY(shape, std::abs(type), wordSize); 392 | const size_t nbytes = SizeBytes() + npyHeader.size(); 393 | 394 | // get the CRC of the data to be added 395 | uint32_t crc = crc32(0L, (uint8_t*)npyHeader.data(), (uLong)npyHeader.size()); 396 | crc = crc32(crc, Data(), (uLong)SizeBytes()); 397 | 398 | // append NPY extension 399 | varname += ".npy"; 400 | 401 | // build the local header 402 | std::vector localHeader; 403 | add(localHeader, "PK"); // first part of signature 404 | add(localHeader, (uint16_t)0x0403); // second part of signature 405 | add(localHeader, (uint16_t)20); // min version to extract 406 | add(localHeader, (uint16_t)0); // general purpose bit flag 407 | add(localHeader, (uint16_t)0); // compression method 408 | add(localHeader, (uint16_t)0); // file last mod time 409 | add(localHeader, (uint16_t)0); // file last mod date 410 | add(localHeader, (uint32_t)crc); // CRC 411 | add(localHeader, (uint32_t)nbytes); // compressed size 412 | add(localHeader, (uint32_t)nbytes); // uncompressed size 413 | add(localHeader, (uint16_t)varname.size()); // variable name length 414 | add(localHeader, (uint16_t)0); // extra field length 415 | add(localHeader, varname); 416 | 417 | // build global header 418 | add(globalHeader, "PK"); // first part of signature 419 | add(globalHeader, (uint16_t)0x0201); // second part of signature 420 | add(globalHeader, (uint16_t)20); // version made by 421 | globalHeader.insert(globalHeader.end(), localHeader.begin()+4, localHeader.begin()+30); 422 | add(globalHeader, (uint16_t)0); // file comment length 423 | add(globalHeader, (uint16_t)0); // disk number where file starts 424 | add(globalHeader, (uint16_t)0); // internal file attributes 425 | add(globalHeader, (uint32_t)0); // external file attributes 426 | add(globalHeader, (uint32_t)globalHeaderOffset); // relative offset of local file header, since it begins where the global header used to begin 427 | add(globalHeader, varname); 428 | 429 | // build footer 430 | std::vector footer; 431 | add(footer, "PK"); // first part of signature 432 | add(footer, (uint16_t)0x0605); // second part of signature 433 | add(footer, (uint16_t)0); // number of this disk 434 | add(footer, (uint16_t)0); // disk where footer starts 435 | add(footer, (uint16_t)(nrecs+1)); // number of records on this disk 436 | add(footer, (uint16_t)(nrecs+1)); // total number of records 437 | add(footer, (uint32_t)globalHeader.size()); // number of bytes of global headers 438 | add(footer, (uint32_t)(globalHeaderOffset + nbytes + localHeader.size())); // offset of start of global headers, since global header now starts after newly written array 439 | add(footer, (uint16_t)0); // zip file comment length 440 | 441 | // write everything 442 | fwrite(localHeader.data(), sizeof(char), localHeader.size(), fp); 443 | fwrite(npyHeader.data(), sizeof(char), npyHeader.size(), fp); 444 | fwrite(Data(), wordSize, numValues, fp); 445 | fwrite(globalHeader.data(), sizeof(char), globalHeader.size(), fp); 446 | fwrite(footer.data(), sizeof(char), footer.size(), fp); 447 | fclose(fp); 448 | return NULL; 449 | } 450 | /*----------------------------------------------------------------*/ 451 | 452 | 453 | 454 | // tools 455 | char NpyArray::getTypeChar(const std::type_info& t) 456 | { 457 | if (t == typeid(float)) return 'f'; 458 | if (t == typeid(double)) return 'f'; 459 | if (t == typeid(long double)) return 'f'; 460 | 461 | if (t == typeid(int)) return 'i'; 462 | if (t == typeid(char)) return 'i'; 463 | if (t == typeid(short)) return 'i'; 464 | if (t == typeid(long)) return 'i'; 465 | if (t == typeid(long long)) return 'i'; 466 | 467 | if (t == typeid(uint8_t)) return 'u'; 468 | if (t == typeid(unsigned short)) return 'u'; 469 | if (t == typeid(unsigned long)) return 'u'; 470 | if (t == typeid(unsigned long long)) return 'u'; 471 | if (t == typeid(unsigned int)) return 'u'; 472 | 473 | if (t == typeid(bool)) return 'b'; 474 | 475 | if (t == typeid(std::complex)) return 'c'; 476 | if (t == typeid(std::complex)) return 'c'; 477 | if (t == typeid(std::complex)) return 'c'; 478 | 479 | return '?'; 480 | } 481 | 482 | const std::type_info& NpyArray::getTypeInfo(char t, size_t s) 483 | { 484 | switch (t) { 485 | case 'f': switch (s) { 486 | case 4: return typeid(float); 487 | case 8: return typeid(double); 488 | case 16: return typeid(long double); 489 | } break; 490 | case 'i': switch (s) { 491 | case 1: return typeid(char); 492 | case 2: return typeid(short); 493 | case 4: return typeid(int); 494 | case 8: return typeid(long); 495 | case 16: return typeid(long long); 496 | } break; 497 | case 'u': switch (s) { 498 | case 1: return typeid(unsigned char); 499 | case 2: return typeid(unsigned short); 500 | case 4: return typeid(unsigned int); 501 | case 8: return typeid(unsigned long); 502 | case 16: return typeid(unsigned long long); 503 | } break; 504 | case 'c': switch (s) { 505 | case 8: return typeid(std::complex); 506 | case 16: return typeid(std::complex); 507 | case 32: return typeid(std::complex); 508 | } break; 509 | case 'b': return typeid(bool); 510 | } 511 | return typeid(void); 512 | } 513 | /*----------------------------------------------------------------*/ 514 | -------------------------------------------------------------------------------- /TinyNPY.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////// 2 | // TinyNPY.h 3 | // 4 | // Read/write .npy and .npz python numpy array files. 5 | // 6 | // Copyright 2007 cDc@seacave 7 | // Distributed under the Boost Software License, Version 1.0 8 | // (See http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | #ifndef __SEACAVE_NPY_H__ 11 | #define __SEACAVE_NPY_H__ 12 | 13 | 14 | // I N C L U D E S ///////////////////////////////////////////////// 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | 27 | // D E F I N E S /////////////////////////////////////////////////// 28 | 29 | #define TINYNPY_MAJOR_VERSION 1 30 | #define TINYNPY_MINOR_VERSION 0 31 | #define TINYNPY_PATCH_VERSION 0 32 | 33 | #ifdef _MSC_VER 34 | # ifdef TINYNPY_EXPORT 35 | # define TINYNPY_LIB __declspec(dllexport) 36 | # elif defined(TINYNPY_IMPORT) 37 | # define TINYNPY_LIB __declspec(dllimport) 38 | # else 39 | # define TINYNPY_LIB 40 | # endif 41 | #elif __GNUC__ >= 4 42 | # define TINYNPY_LIB __attribute__((visibility("default"))) 43 | #else 44 | # define TINYNPY_LIB 45 | #endif 46 | 47 | #ifndef ASSERT 48 | #define ASSERT(x) 49 | #endif 50 | #ifndef LPCSTR 51 | typedef const char* LPCSTR; 52 | #endif 53 | #ifndef _tcslen 54 | #define _tcslen strlen 55 | #endif 56 | #ifndef _tcsncmp 57 | #define _tcsncmp strncmp 58 | #endif 59 | 60 | 61 | // S T R U C T S /////////////////////////////////////////////////// 62 | 63 | class TINYNPY_LIB NpyArray { 64 | public: 65 | using shape_t = std::vector; 66 | using npz_t = std::map; 67 | 68 | private: 69 | uint8_t* data; 70 | shape_t shape; 71 | size_t numValues; 72 | size_t wordSize; 73 | char type; 74 | bool fortranOrder; 75 | 76 | public: 77 | NpyArray() : data(NULL), numValues(0), wordSize(0), type(0), fortranOrder(0) {} 78 | 79 | template 80 | NpyArray(const shape_t& _shape, T* _data, bool _fortranOrder=false) 81 | : data((uint8_t*)_data), shape(_shape), numValues(NumValue(shape)), wordSize(sizeof(T)), type(-getTypeChar(typeid(T))), fortranOrder(_fortranOrder) {} 82 | 83 | NpyArray(const shape_t& _shape, size_t _wordSize, char _type, bool _fortranOrder=false) 84 | : data(NULL), shape(_shape), numValues(NumValue(shape)), wordSize(_wordSize), type(_type), fortranOrder(_fortranOrder) {} 85 | 86 | NpyArray(NpyArray&& arr) 87 | : data(arr.data), shape(std::move(arr.shape)), numValues(arr.numValues), wordSize(arr.wordSize), type(arr.type), fortranOrder(arr.fortranOrder) { arr.Clean(); } 88 | 89 | NpyArray(const NpyArray&) = delete; 90 | 91 | ~NpyArray() { if (OwnData()) delete[] data; } 92 | 93 | 94 | bool IsEmpty() const { 95 | return data == NULL; 96 | } 97 | bool OwnData() const { 98 | return type > 0; 99 | } 100 | void Allocate() { 101 | ASSERT(data == NULL && numValues > 0 && OwnData()); 102 | data = new uint8_t[SizeBytes()]; 103 | } 104 | void SetData(const uint8_t* _data) { 105 | ASSERT(data == NULL); 106 | data = const_cast(_data); 107 | } 108 | void Release() { 109 | if (OwnData()) 110 | delete[] data; 111 | Clean(); 112 | } 113 | void Clean() { 114 | data = NULL; 115 | } 116 | 117 | 118 | size_t SizeBytes() const { 119 | return numValues * wordSize; 120 | } 121 | size_t SizeValueBytes() const { 122 | return wordSize; 123 | } 124 | static size_t NumValue(const shape_t& shape) { 125 | return std::accumulate(shape.cbegin(), shape.cend(), size_t(1), std::multiplies()); 126 | } 127 | size_t NumValue() const { 128 | return numValues; 129 | } 130 | 131 | const std::type_info& ValueType() const { 132 | return getTypeInfo(std::abs(type), wordSize); 133 | } 134 | 135 | const shape_t& Shape() const { 136 | return shape; 137 | } 138 | shape_t& Shape() { 139 | return shape; 140 | } 141 | 142 | char Type() const { 143 | return type; 144 | } 145 | char& Type() { 146 | return type; 147 | } 148 | 149 | bool ColMajor() const { 150 | return fortranOrder; 151 | } 152 | bool& ColMajor() { 153 | return fortranOrder; 154 | } 155 | 156 | template 157 | const T* Data() const { 158 | return reinterpret_cast(data); 159 | } 160 | template 161 | T* Data() { 162 | return reinterpret_cast(data); 163 | } 164 | template 165 | std::vector DataVector() const { 166 | const T* p = Data(); 167 | return std::vector(p, p + numValues); 168 | } 169 | 170 | 171 | // tools 172 | static char getTypeChar(const std::type_info& t); 173 | static const std::type_info& getTypeInfo(char t, size_t s); 174 | 175 | 176 | // input 177 | LPCSTR LoadNPY(FILE* fp); 178 | LPCSTR LoadNPY(std::string filename); 179 | LPCSTR LoadNPZ(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes); 180 | LPCSTR LoadNPZ(std::string filename, std::string varname); 181 | static LPCSTR LoadNPZ(std::string filename, npz_t& arrays); 182 | 183 | 184 | // output 185 | LPCSTR SaveNPY(std::string filename, bool bAppend=false) const; 186 | LPCSTR SaveNPZ(std::string zipname, std::string varname, bool bAppend=true) const; 187 | template 188 | static LPCSTR SaveNPY(std::string filename, const std::vector& data, shape_t shape=shape_t(), bool bAppend=false) { 189 | if (shape.empty()) 190 | shape.push_back(data.size()); 191 | NpyArray arr(std::move(shape), const_cast(data.data())); 192 | return arr.SaveNPY(filename, bAppend); 193 | } 194 | template 195 | static LPCSTR SaveNPZ(std::string zipname, std::string varname, const std::vector& data, shape_t shape=shape_t(), bool bAppend=true) { 196 | if (shape.empty()) 197 | shape.push_back(data.size()); 198 | NpyArray arr(std::move(shape), const_cast(data.data())); 199 | return arr.SaveNPZ(zipname, varname, bAppend); 200 | } 201 | 202 | private: 203 | void init() { 204 | numValues = NumValue(shape); 205 | Allocate(); 206 | } 207 | 208 | // input 209 | static LPCSTR ParseHeaderNPY(const std::string& header, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder); 210 | static LPCSTR ParseHeaderNPY(const uint8_t* buffer, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder); 211 | static LPCSTR ParseHeaderNPY(FILE* fp, shape_t& shape, size_t& wordSize, char& type, bool& fortranOrder); 212 | static LPCSTR ParseFooterZIP(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset); 213 | static LPCSTR LoadArrayNPZ(FILE* fp, std::string& varname, NpyArray& arr); 214 | 215 | // output 216 | static std::vector CreateHeaderNPY(const shape_t& shape, char type, size_t wordSize); 217 | 218 | static std::vector& add(std::vector& lhs, const std::string rhs) { 219 | lhs.insert(lhs.end(), rhs.cbegin(), rhs.cend()); 220 | return lhs; 221 | } 222 | static std::vector& add(std::vector& lhs, const char* rhs) { 223 | // write in little endian 224 | const size_t len = _tcslen(rhs); 225 | lhs.reserve(len); 226 | for (size_t byte = 0; byte < len; byte++) 227 | lhs.push_back(rhs[byte]); 228 | return lhs; 229 | } 230 | template 231 | static std::vector& add(std::vector& lhs, const T rhs) { 232 | // write in little endian 233 | for (size_t byte = 0; byte < sizeof(T); byte++) 234 | lhs.push_back(reinterpret_cast(&rhs)[byte]); 235 | return lhs; 236 | } 237 | }; 238 | 239 | #endif // __SEACAVE_NPY_H__ 240 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // Defines the entry point for the console application. 2 | 3 | #ifdef _MSC_VER 4 | #include 5 | #endif 6 | #include "TinyNPY.h" 7 | #include // std::cout 8 | 9 | int main(int argc, const char** argv) 10 | { 11 | if (argc != 2) { 12 | std::cout << "Usage: TinyNPY \n"; 13 | return -1; 14 | } 15 | 16 | // read NPY array file 17 | NpyArray arr; 18 | const LPCSTR ret = arr.LoadNPY(argv[1]); 19 | 20 | // read NPZ arrays file: specific array 21 | //NpyArray arr; 22 | //const LPCSTR ret = arr.LoadNPZ(argv[1], "features"); 23 | 24 | // read NPZ arrays file: all arrays 25 | //NpyArray::npz_t arrays; 26 | //const LPCSTR ret = arr.LoadNPZ(argv[1], arrays); 27 | //NpyArray& arr = arrays.begin()->second; 28 | 29 | if (ret != NULL) { 30 | std::cout << ret << " '" << argv[1] << "'\n"; 31 | return -2; 32 | } 33 | 34 | // print array metadata 35 | std::cout << "Dimensions:"; 36 | for (size_t s: arr.Shape()) 37 | std::cout << " " << s; 38 | std::cout << "\n"; 39 | std::cout << "Number of values: " << arr.NumValue() << "\n"; 40 | std::cout << "Size in bytes: " << arr.SizeBytes() << "\n"; 41 | if (typeid(int) == arr.ValueType()) 42 | std::cout << "Value type: int\n"; 43 | if (typeid(float) == arr.ValueType()) 44 | std::cout << "Value type: float\n"; 45 | std::cout << "Values order: " << (arr.ColMajor() ? "col-major\n" : "row-major\n"); 46 | return EXIT_SUCCESS; 47 | } 48 | --------------------------------------------------------------------------------