├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── package.R └── read.R ├── io.Rproj ├── man └── read.Rd └── src ├── Makevars ├── Makevars.win ├── init.c ├── io └── read │ ├── MemoryMappedReader.h │ ├── posix │ ├── FileConnection.h │ └── MemoryMappedConnection.h │ ├── read.h │ └── windows │ ├── FileConnection.h │ └── MemoryMappedConnection.h └── read.cpp /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^io\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: io 2 | Version: 0.0.0.9000 3 | Title: File input/output 4 | Description: What the package does (one paragraph). 5 | Authors@R: c( 6 | person("Hadley", "Wickham", , "hadley@rstudio.com", role = c("aut", "cre")), 7 | person("Kevin", "Ushey", , "kevin@rstudio.com", role = c("aut")), 8 | person("RStudio", role = "cph") 9 | ) 10 | License: GPL-3 11 | Encoding: UTF-8 12 | LazyData: true 13 | ByteCompile: true 14 | URL: https://github.com/r-lib/io 15 | BugReports: https://github.com/r-lib/io/issues 16 | RoxygenNote: 6.0.1 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(read) 4 | export(read_bytes) 5 | export(read_lines) 6 | export(read_lines_bytes) 7 | useDynLib(io, .registration = TRUE, .fixes = "C_") 8 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib io, .registration = TRUE, .fixes = "C_" 2 | NULL 3 | -------------------------------------------------------------------------------- /R/read.R: -------------------------------------------------------------------------------- 1 | #' Read the Contents of a File 2 | #' 3 | #' Read the contents of a file into a string (or, in the case of 4 | #' \code{read_lines}, a vector of strings). 5 | #' 6 | #' @param path A file path. 7 | #' 8 | #' @name read 9 | #' @rdname read 10 | #' @export 11 | read <- function(path) { 12 | path <- normalizePath(path, mustWork = TRUE) 13 | .Call(C_io_read, path) 14 | } 15 | 16 | #' @name read 17 | #' @rdname read 18 | #' @export 19 | read_lines <- function(path) { 20 | path <- normalizePath(path, mustWork = TRUE) 21 | .Call(C_io_read_lines, path) 22 | } 23 | 24 | #' @name read 25 | #' @rdname read 26 | #' @export 27 | read_bytes <- function(path) { 28 | path <- normalizePath(path, mustWork = TRUE) 29 | .Call(C_io_read_bytes, path) 30 | } 31 | 32 | #' @name read 33 | #' @rdname read 34 | #' @export 35 | read_lines_bytes <- function(path) { 36 | path <- normalizePath(path, mustWork = TRUE) 37 | .Call(C_io_read_lines_bytes, path) 38 | } 39 | 40 | -------------------------------------------------------------------------------- /io.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/read.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read.R 3 | \name{read} 4 | \alias{read} 5 | \alias{read} 6 | \alias{read_lines} 7 | \alias{read} 8 | \alias{read_bytes} 9 | \alias{read} 10 | \alias{read_lines_bytes} 11 | \title{Read the Contents of a File} 12 | \usage{ 13 | read(path) 14 | 15 | read_lines(path) 16 | 17 | read_bytes(path) 18 | 19 | read_lines_bytes(path) 20 | } 21 | \arguments{ 22 | \item{path}{A file path.} 23 | } 24 | \description{ 25 | Read the contents of a file into a string (or, in the case of 26 | \code{read_lines}, a vector of strings). 27 | } 28 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS += -DSTRICT_R_HEADERS 2 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS += -DSTRICT_R_HEADERS -DWIN32_LEAN_AND_MEAN 2 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* .Call calls */ 7 | extern SEXP io_read(SEXP); 8 | extern SEXP io_read_bytes(SEXP); 9 | extern SEXP io_read_lines(SEXP); 10 | extern SEXP io_read_lines_bytes(SEXP); 11 | 12 | static const R_CallMethodDef CallEntries[] = { 13 | {"io_read", (DL_FUNC) &io_read, 1}, 14 | {"io_read_bytes", (DL_FUNC) &io_read_bytes, 1}, 15 | {"io_read_lines", (DL_FUNC) &io_read_lines, 1}, 16 | {"io_read_lines_bytes", (DL_FUNC) &io_read_lines_bytes, 1}, 17 | {NULL, NULL, 0} 18 | }; 19 | 20 | void R_init_io(DllInfo *dll) 21 | { 22 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 23 | R_useDynamicSymbols(dll, FALSE); 24 | } 25 | -------------------------------------------------------------------------------- /src/io/read/MemoryMappedReader.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_MEMORY_MAPPED_READER_H 2 | #define IO_READ_MEMORY_MAPPED_READER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef _WIN32 9 | # include "posix/FileConnection.h" 10 | # include "posix/MemoryMappedConnection.h" 11 | #else 12 | # include "windows/FileConnection.h" 13 | # include "windows/MemoryMappedConnection.h" 14 | #endif 15 | 16 | namespace io { 17 | namespace detail { 18 | 19 | class MemoryMappedReader 20 | { 21 | public: 22 | 23 | class VectorReader 24 | { 25 | public: 26 | 27 | explicit VectorReader(std::vector* pData) 28 | : pData_(pData) 29 | { 30 | } 31 | 32 | template 33 | void operator()(const T& lhs, const T& rhs) 34 | { 35 | pData_->push_back(std::string(lhs, rhs)); 36 | } 37 | 38 | private: 39 | std::vector* pData_; 40 | }; 41 | 42 | static bool read(const char* path, std::string* pContent) 43 | { 44 | // Open file connection 45 | FileConnection conn(path); 46 | if (!conn.open()) 47 | return false; 48 | 49 | // Get size of file 50 | std::size_t size; 51 | if (!conn.size(&size)) 52 | return false; 53 | 54 | // Early return for empty files 55 | if (size == 0) 56 | return true; 57 | 58 | // mmap the file 59 | MemoryMappedConnection map(conn, size); 60 | if (!map.open()) 61 | return false; 62 | 63 | pContent->assign(map, size); 64 | return true; 65 | } 66 | 67 | template 68 | static bool read_lines(const char* path, F f) 69 | { 70 | FileConnection conn(path); 71 | if (!conn.open()) 72 | return false; 73 | 74 | // Get size of file 75 | std::size_t size; 76 | if (!conn.size(&size)) 77 | return false; 78 | 79 | // Early return for empty files 80 | if (size == 0) 81 | return true; 82 | 83 | // mmap the file 84 | MemoryMappedConnection map(conn, size); 85 | if (!map.open()) 86 | return false; 87 | 88 | // special case: just a '\n' 89 | bool endsWithNewline = map[size - 1] == '\n'; 90 | if (size == 1 && endsWithNewline) 91 | return true; 92 | 93 | // Search for newlines 94 | const char* lower = map; 95 | const char* upper = map; 96 | const char* end = map + size; 97 | while (true) 98 | { 99 | upper = std::find(lower, end, '\n'); 100 | if (upper == end) 101 | break; 102 | 103 | // Handle '\r\n' 104 | int CR = *(upper - 1) == '\r'; 105 | upper -= CR; 106 | 107 | // Pass to functor 108 | f(lower, upper); 109 | 110 | // Update 111 | lower = upper + 1 + CR; 112 | } 113 | 114 | // If this file ended with a newline, we're done 115 | if (endsWithNewline) 116 | return true; 117 | 118 | // Otherwise, consume one more string, then we're done 119 | f(lower, end); 120 | return true; 121 | } 122 | 123 | static bool read_lines(const char* path, std::vector* pContent) 124 | { 125 | VectorReader reader(pContent); 126 | return read_lines(path, reader); 127 | } 128 | 129 | }; 130 | 131 | } // namespace detail 132 | } // namespace io 133 | 134 | #endif /* IO_READ_MEMORY_MAPPED_READER_H */ 135 | -------------------------------------------------------------------------------- /src/io/read/posix/FileConnection.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_POSIX_FILE_CONNECTION_H 2 | #define IO_READ_POSIX_FILE_CONNECTION_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace io { 11 | namespace detail { 12 | 13 | class FileConnection 14 | { 15 | public: 16 | 17 | typedef int FileDescriptor; 18 | 19 | FileConnection(const char* path, int flags = O_RDONLY) 20 | { 21 | fd_ = ::open(path, flags); 22 | } 23 | 24 | ~FileConnection() 25 | { 26 | if (open()) 27 | ::close(fd_); 28 | } 29 | 30 | bool open() 31 | { 32 | return fd_ != -1; 33 | } 34 | 35 | bool size(std::size_t* pSize) 36 | { 37 | struct stat info; 38 | if (::fstat(fd_, &info) == -1) 39 | return false; 40 | 41 | *pSize = info.st_size; 42 | return true; 43 | } 44 | 45 | operator FileDescriptor() const 46 | { 47 | return fd_; 48 | } 49 | 50 | private: 51 | FileDescriptor fd_; 52 | }; 53 | 54 | 55 | } // namespace detail 56 | } // namespace io 57 | 58 | #endif /* IO_READ_POSIX_FILE_CONNECTION_H */ 59 | -------------------------------------------------------------------------------- /src/io/read/posix/MemoryMappedConnection.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_POSIX_MEMORY_MAPPED_CONNECTION_H 2 | #define IO_READ_POSIX_MEMORY_MAPPED_CONNECTION_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace io { 9 | namespace detail { 10 | 11 | class MemoryMappedConnection 12 | { 13 | public: 14 | 15 | MemoryMappedConnection(int fd, std::size_t size) 16 | : size_(size) 17 | { 18 | #ifdef MAP_POPULATE 19 | map_ = (char*) ::mmap(0, size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); 20 | #else 21 | map_ = (char*) ::mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); 22 | #endif 23 | 24 | #if defined(POSIX_MADV_SEQUENTIAL) && defined(POSIX_MADV_WILLNEED) 25 | ::posix_madvise((void*) map_, size, POSIX_MADV_SEQUENTIAL | POSIX_MADV_WILLNEED); 26 | #endif 27 | } 28 | 29 | ~MemoryMappedConnection() 30 | { 31 | if (map_ != MAP_FAILED) 32 | ::munmap(map_, size_); 33 | } 34 | 35 | bool open() 36 | { 37 | return map_ != MAP_FAILED; 38 | } 39 | 40 | operator char*() const 41 | { 42 | return map_; 43 | } 44 | 45 | private: 46 | char* map_; 47 | std::size_t size_; 48 | }; 49 | 50 | } // namespace detail 51 | } // namespace io 52 | 53 | #endif /* IO_READ_POSIX_MEMORY_MAPPED_CONNECTION_H */ 54 | -------------------------------------------------------------------------------- /src/io/read/read.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_READ_H 2 | #define IO_READ_READ_H 3 | 4 | #include 5 | #include 6 | 7 | #include "MemoryMappedReader.h" 8 | 9 | namespace io { 10 | 11 | inline bool read(const std::string& absolutePath, std::string* pContent) 12 | { 13 | return detail::MemoryMappedReader::read(absolutePath.c_str(), pContent); 14 | } 15 | 16 | inline bool read_lines(const std::string& absolutePath, 17 | std::vector* pLines) 18 | { 19 | return detail::MemoryMappedReader::read_lines(absolutePath.c_str(), pLines); 20 | } 21 | 22 | } // namespace io 23 | 24 | #endif /* IO_READ_READ_H */ 25 | -------------------------------------------------------------------------------- /src/io/read/windows/FileConnection.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_WINDOWS_FILE_CONNECTION_H 2 | #define IO_READ_WINDOWS_FILE_CONNECTION_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace io { 9 | namespace detail { 10 | 11 | class FileConnection 12 | { 13 | public: 14 | typedef HANDLE FileDescriptor; 15 | 16 | FileConnection(const char* path, int flags = GENERIC_READ) 17 | { 18 | handle_ = ::CreateFile(path, flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); 19 | } 20 | 21 | ~FileConnection() 22 | { 23 | if (open()) 24 | ::CloseHandle(handle_); 25 | } 26 | 27 | bool open() 28 | { 29 | return handle_ != INVALID_HANDLE_VALUE; 30 | } 31 | 32 | bool size(std::size_t* pSize) 33 | { 34 | LARGE_INTEGER size; 35 | bool status = ::GetFileSizeEx(handle_, &size); 36 | if (status) 37 | *pSize = size.QuadPart; 38 | return status; 39 | } 40 | 41 | operator FileDescriptor() const 42 | { 43 | return handle_; 44 | } 45 | 46 | private: 47 | FileDescriptor handle_; 48 | }; 49 | 50 | } // namespace detail 51 | } // namespace io 52 | 53 | #endif /* IO_READ_WINDOWS_FILE_CONNECTION_H */ 54 | -------------------------------------------------------------------------------- /src/io/read/windows/MemoryMappedConnection.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H 2 | #define IO_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace io { 9 | namespace detail { 10 | 11 | class MemoryMappedConnection 12 | { 13 | public: 14 | 15 | MemoryMappedConnection(HANDLE handle, std::size_t size) 16 | : map_(NULL), size_(size) 17 | { 18 | handle_ = ::CreateFileMapping(handle, NULL, PAGE_READONLY, 0, 0, NULL); 19 | if (handle_ == NULL) 20 | return; 21 | 22 | map_ = (char*) ::MapViewOfFile(handle_, FILE_MAP_READ, 0, 0, size); 23 | } 24 | 25 | ~MemoryMappedConnection() 26 | { 27 | if (map_ != NULL) 28 | ::UnmapViewOfFile(map_); 29 | 30 | if (handle_ != NULL) 31 | ::CloseHandle(handle_); 32 | } 33 | 34 | bool open() 35 | { 36 | return map_ != NULL; 37 | } 38 | 39 | operator char*() const 40 | { 41 | return map_; 42 | } 43 | 44 | private: 45 | char* map_; 46 | std::size_t size_; 47 | HANDLE handle_; 48 | }; 49 | 50 | } // namespace detail 51 | } // namespace io 52 | 53 | #endif /* IO_READ_WINDOWS_MEMORY_MAPPED_CONNECTION_H */ 54 | -------------------------------------------------------------------------------- /src/read.cpp: -------------------------------------------------------------------------------- 1 | #include "io/read/read.h" 2 | 3 | #include 4 | 5 | #define R_NO_REMAP 6 | #include 7 | #include 8 | 9 | extern "C" SEXP io_read(SEXP absolutePathSEXP) 10 | { 11 | const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); 12 | 13 | std::string contents; 14 | bool result = io::read(absolutePath, &contents); 15 | if (!result) 16 | { 17 | Rf_warning("Failed to read file"); 18 | return R_NilValue; 19 | } 20 | 21 | SEXP resultSEXP = PROTECT(Rf_allocVector(STRSXP, 1)); 22 | SET_STRING_ELT(resultSEXP, 0, Rf_mkCharLen(contents.c_str(), contents.size())); 23 | UNPROTECT(1); 24 | return resultSEXP; 25 | } 26 | 27 | extern "C" SEXP io_read_lines(SEXP absolutePathSEXP) 28 | { 29 | const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); 30 | 31 | std::vector lines; 32 | bool result = io::read_lines(absolutePath, &lines); 33 | if (!result) 34 | { 35 | Rf_warning("Failed to read file"); 36 | return R_NilValue; 37 | } 38 | 39 | R_xlen_t n = (R_xlen_t) lines.size(); 40 | SEXP resultSEXP = PROTECT(Rf_allocVector(STRSXP, n)); 41 | for (R_xlen_t i = 0; i < n; ++i) 42 | SET_STRING_ELT(resultSEXP, i, Rf_mkCharLen(lines[i].c_str(), lines[i].size())); 43 | UNPROTECT(1); 44 | return resultSEXP; 45 | } 46 | 47 | extern "C" SEXP io_read_bytes(SEXP absolutePathSEXP) 48 | { 49 | const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); 50 | 51 | std::string contents; 52 | bool result = io::read(absolutePath, &contents); 53 | if (!result) 54 | { 55 | Rf_warning("Failed to read file"); 56 | return R_NilValue; 57 | } 58 | 59 | SEXP resultSEXP = PROTECT(Rf_allocVector(RAWSXP, contents.size())); 60 | std::memcpy(RAW(resultSEXP), contents.c_str(), contents.size()); 61 | UNPROTECT(1); 62 | return resultSEXP; 63 | } 64 | 65 | extern "C" SEXP io_read_lines_bytes(SEXP absolutePathSEXP) 66 | { 67 | const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); 68 | 69 | std::vector lines; 70 | bool result = io::read_lines(absolutePath, &lines); 71 | if (!result) 72 | { 73 | Rf_warning("Failed to read file"); 74 | return R_NilValue; 75 | } 76 | 77 | R_xlen_t n = (R_xlen_t) lines.size(); 78 | SEXP resultSEXP = PROTECT(Rf_allocVector(VECSXP, n)); 79 | for (R_xlen_t i = 0; i < n; ++i) 80 | { 81 | SEXP rawSEXP = Rf_allocVector(RAWSXP, lines[i].size()); 82 | std::memcpy(RAW(rawSEXP), lines[i].c_str(), lines[i].size()); 83 | SET_VECTOR_ELT(resultSEXP, i, rawSEXP); 84 | } 85 | UNPROTECT(1); 86 | return resultSEXP; 87 | } 88 | --------------------------------------------------------------------------------