├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── Makefile
├── NAMESPACE
├── R
    ├── colours.R
    ├── read.R
    ├── read_comments.R
    ├── read_rels.R
    ├── read_style.R
    ├── read_workbook.R
    ├── read_worksheet.R
    ├── readxl.R
    ├── rexcel-package.r
    ├── shared_strings.R
    └── utils.R
├── README.md
├── appveyor.yml
├── inst
    └── sheets
    │   ├── Ekaterinburg_IP_9-RESAVED.xlsx
    │   ├── Ekaterinburg_IP_9.xlsx
    │   ├── README.md
    │   ├── defined-names.xlsx
    │   ├── exp.xlsx
    │   ├── gabe.xlsx
    │   ├── gs-test-formula-formatting.xlsx
    │   ├── mini-gap.xlsx
    │   └── only_numbers.xlsx
├── internal
    ├── ekaterinburg.R
    ├── img
    │   ├── Kinsey-Male.jpg
    │   ├── Kinsey-Spreadsheet.jpg
    │   ├── doctorow_2016-May-11.jpg
    │   ├── enron-example.png
    │   ├── excel-reactivity.gif
    │   └── excel-reactivity.mov
    └── mini-gap
    │   ├── [Content_Types].xml
    │   ├── _rels
    │       ├── .rels
    │       └── rels.xml
    │   └── xl
    │       ├── _rels
    │           └── workbook.xml.rels
    │       ├── drawings
    │           ├── worksheetdrawing1.xml
    │           ├── worksheetdrawing2.xml
    │           ├── worksheetdrawing3.xml
    │           ├── worksheetdrawing4.xml
    │           └── worksheetdrawing5.xml
    │       ├── sharedStrings.xml
    │       ├── styles.xml
    │       ├── workbook.xml
    │       └── worksheets
    │           ├── _rels
    │               ├── sheet1.xml.rels
    │               ├── sheet2.xml.rels
    │               ├── sheet3.xml.rels
    │               ├── sheet4.xml.rels
    │               └── sheet5.xml.rels
    │           ├── sheet1.xml
    │           ├── sheet2.xml
    │           ├── sheet3.xml
    │           ├── sheet4.xml
    │           └── sheet5.xml
├── man
    ├── rexcel.Rd
    ├── rexcel_read.Rd
    ├── rexcel_read_workbook.Rd
    └── rexcel_readxl.Rd
├── rexcel.Rproj
└── tests
    ├── testthat.R
    └── testthat
        ├── helper-rexcel.R
        ├── test-colours.R
        ├── test-gs.R
        ├── test-readxl.R
        ├── test-rexcel-read-workbook.R
        ├── test-view.R
        └── test-xlsx-utils.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^Makefile$
4 | ^README.Rmd$
5 | ^.travis.yml$
6 | ^appveyor.yml$
7 | ^internal$
8 | inst/sheets/README.md
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | example.xlsx
 5 | tests/testthat/readxl
 6 | tests/testthat/*.xlsx
 7 | inst/doc
 8 | ~$*.xlsx
 9 | tests/testthat/reference/readxl
10 | internal/2016-06-09_skype.md
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: r
2 | cache: packages
3 | warnings_are_errors: true
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rexcel
 2 | Title: R/Excel Linen Driver
 3 | Version: 0.0.1
 4 | Author: Rich FitzJohn and Jenny Bryan
 5 | Maintainer: Rich FitzJohn <rich.fitzjohn@gmail.com>
 6 | Description: rexcel now is part of a complete sentence.
 7 | License: GPL-3
 8 | LazyData: true
 9 | URL: https://github.com/rsheets/rexcel
10 | BugReports: https://github.com/rsheets/rexcel/issues
11 | Imports:
12 |     cellranger (>= 1.1.0),
13 |     linen (>= 0.0.4),
14 |     progress,
15 |     tibble (>= 1.1),
16 |     xml2 (>= 1.0.0)
17 | Suggests:
18 |     testthat,
19 |     googlesheets (>= 0.2.0),
20 |     rprojroot,
21 |     readxl
22 | RoxygenNote: 5.0.1.9000
23 | Remotes:
24 |     hadley/xml2,
25 |     rsheets/linen
26 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PACKAGE := $(shell grep '^Package:' DESCRIPTION | sed -E 's/^Package:[[:space:]]+//')
 2 | RSCRIPT = Rscript --no-init-file
 3 | 
 4 | all: install
 5 | 
 6 | test:
 7 | 	STORR_SKIP_DOWNLOADS=true make test_all
 8 | 
 9 | test_all:
10 | 	${RSCRIPT} -e 'library(methods); devtools::test()'
11 | 
12 | roxygen:
13 | 	@mkdir -p man
14 | 	${RSCRIPT} -e "library(methods); devtools::document()"
15 | 
16 | install:
17 | 	R CMD INSTALL .
18 | 
19 | build:
20 | 	R CMD build .
21 | 
22 | check:
23 | 	_R_CHECK_CRAN_INCOMING_=FALSE make check_all
24 | 
25 | check_all: build
26 | 	R CMD check --as-cran --no-manual `ls -1tr ${PACKAGE}*gz | tail -n1`
27 | 	@rm -f `ls -1tr ${PACKAGE}*gz | tail -n1`
28 | 	@rm -rf ${PACKAGE}.Rcheck
29 | 
30 | autodoc:
31 | 	${RSCRIPT} autodoc.R process
32 | 
33 | staticdocs:
34 | 	@mkdir -p inst/staticdocs
35 | 	${RSCRIPT} -e "library(methods); staticdocs::build_site()"
36 | 	rm -f vignettes/*.html
37 | 	@rmdir inst/staticdocs
38 | website: staticdocs
39 | 	./update_web.sh
40 | 
41 | # No real targets!
42 | .PHONY: all test document install vignettes
43 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(rexcel_read)
4 | export(rexcel_read_workbook)
5 | export(rexcel_readxl)
6 | 


--------------------------------------------------------------------------------
/R/colours.R:
--------------------------------------------------------------------------------
  1 | col_apply_tint <- function(col, tint) {
  2 |   if (length(tint) == 1L && length(col) > 1L) {
  3 |     tint <- rep(tint, length(col))
  4 |   }
  5 |   i <- tint < 0
  6 |   hsl <- rgb2hsl(grDevices::col2rgb(col))
  7 |   if (any(i)) {
  8 |     hsl[3L, i] <- hsl[3L, i] * (1 + tint)
  9 |   }
 10 |   i <- !i
 11 |   if (any(i)) {
 12 |     hsl[3L, i] <- hsl[3L, i] * (1 - tint) + tint
 13 |   }
 14 |   rgb2col(hsl2rgb(hsl))
 15 | }
 16 | 
 17 | ## NOTE: the spec is unfortunately a little vague about the
 18 | ## interpretation of the alpha channel; in the example colours
 19 | ## (p. 1763) they use 00 to indicate opacity but empirically (and
 20 | ## conventionally) FF is used.
 21 | argb2rgb <- function(x, opaque="FF") {
 22 |   a <- substr(x, 1L, 2L)
 23 |   rgb <- paste0("#", substr(x, 3L, 8L))
 24 |   if (a == opaque) rgb else paste0(rgb, a)
 25 | }
 26 | 
 27 | check_col_matrix <- function(m) {
 28 |   if (!is.matrix(m)) {
 29 |     if (length(m) == 3L) {
 30 |       m <- matrix(m, 3L, 1L)
 31 |     } else if (length(m) == 4L) {
 32 |       m <- matrix(m, 3L, 1L)
 33 |     } else {
 34 |       stop("Invalid input for m")
 35 |     }
 36 |   }
 37 |   if (!any(nrow(m) == c(3, 4))) {
 38 |     stop("Invalid input for m")
 39 |   }
 40 |   m
 41 | }
 42 | 
 43 | rgb2hsl <- function(m) {
 44 |   m <- check_col_matrix(m)
 45 |   if (length(m) == 0L) {
 46 |     nms <- c("h", "s", "l", if (nrow(m) == 4L) "a")
 47 |     ret <- matrix(numeric(), length(nms), 0)
 48 |     rownames(ret) <- nms
 49 |     return(ret)
 50 |   }
 51 |   m <- m / 255
 52 |   r <- apply(m[1:3, , drop=FALSE], 2, range)
 53 |   c_min <- r[1L, ]
 54 |   c_max <- r[2L, ]
 55 |   delta <- c_max - c_min
 56 | 
 57 |   l <- (c_max + c_min) / 2
 58 |   s <- delta / (1 - abs(2 * l - 1))
 59 | 
 60 |   r <- m[1L, ]
 61 |   g <- m[2L, ]
 62 |   b <- m[3L, ]
 63 | 
 64 |   i <- apply(m[1:3, , drop=FALSE], 2, which.max)
 65 |   h <- numeric(length(s))
 66 |   j <- i == 1L
 67 |   h[j] <- (g[j] - b[j]) / delta[j] %% 6
 68 |   j <- i == 2L
 69 |   h[j] <- (b[j] - r[j]) / delta[j] + 2
 70 |   j <- i == 3L
 71 |   h[j] <- (r[j] - g[j]) / delta[j] + 4
 72 | 
 73 |   h <- h / 6
 74 | 
 75 |   i <- delta == 0
 76 |   h[i] <- s[i] <- 0
 77 | 
 78 |   i <- h < 0
 79 |   h[i] <- h[i] %% 1
 80 | 
 81 |   if (nrow(m) == 4L) {
 82 |     rbind(h, s, l, a=m[4L, ])
 83 |   } else {
 84 |     rbind(h, s, l)
 85 |   }
 86 | }
 87 | 
 88 | hsl2rgb <- function(m) {
 89 |   m <- check_col_matrix(m)
 90 | 
 91 |   h <- m[1L, ] * 360 / 60
 92 |   s <- m[2L, ]
 93 |   l <- m[3L, ]
 94 | 
 95 |   C <- (1 - abs(2 * l - 1)) * s
 96 |   X <- C * (1 - abs(h %% 2 - 1))
 97 |   H <- floor(h)
 98 | 
 99 |   cx <- rbind(C, X)
100 |   rgb <- array(0, dim(m))
101 |   rgb[1:2,     H == 0] <- cx[,    H == 0]
102 |   rgb[1:2,     H == 1] <- cx[2:1, H == 1]
103 |   rgb[2:3,     H == 2] <- cx[,    H == 2]
104 |   rgb[2:3,     H == 3] <- cx[2:1, H == 3]
105 |   rgb[c(1, 3), H == 4] <- cx[2:1, H == 4]
106 |   rgb[c(1, 3), H == 5] <- cx[,    H == 5]
107 | 
108 |   mm <- l - C / 2
109 |   ret <- (rgb + rep(mm, each=3)) * 255
110 | 
111 |   rownames(ret) <- c("red", "green", "blue")
112 |   if (nrow(m) == 4L) {
113 |     ret <- rbind(ret, alpha=m[4L, ] * 255)
114 |   }
115 |   ret
116 | }
117 | 
118 | rgb2col <- function(m) {
119 |   grDevices::rgb(m[1, ], m[2, ], m[3, ],
120 |                  alpha = if (nrow(m) == 4L) m[4, ], maxColorValue = 255)
121 | }
122 | 


--------------------------------------------------------------------------------
/R/read.R:
--------------------------------------------------------------------------------
  1 | ##' This function does not get the data into a usable form but at
  2 | ##' least loads it up into R so we can poke about with it.  The
  3 | ##' resulting loaded data can distinguish between formulae and data,
  4 | ##' numbers and text.  Merged cells are detected.  Style information
  5 | ##' is included, though nothing is done with it yet.  A summary of the
  6 | ##' data is printed if you print the resulting object.
  7 | ##'
  8 | ##' @title Read an xlsx file that probably contains nontabular data
  9 | ##'
 10 | ##' @param path Path to the xlsx file to load.  xls files are not supported.
 11 | ##'
 12 | ##' @param sheet Sheet number (not name at this point).  Googlesheets
 13 | ##'   exported sheets are likely not to do the right thing.
 14 | ##'
 15 | ##' @return An \code{xlsx} object that can be printed.  Future methods
 16 | ##'   might do something sensible.  The structure is subject to
 17 | ##'   complete change so is not documented here.
 18 | ##' @export
 19 | rexcel_read <- function(path, sheet=1L) {
 20 |   rexcel_read_workbook(path, sheet, FALSE)$sheets[[1L]]
 21 | }
 22 | 
 23 | ##' Read an entire workbook
 24 | ##'
 25 | ##' @title Read an Excel workbook
 26 | ##'
 27 | ##' @param path Path to the xlsx file to load.  xls files are not supported.
 28 | ##'
 29 | ##' @param sheets Character or integer vector of sheets to read, or
 30 | ##'   \code{NULL} to read all sheets (the default)
 31 | ##'
 32 | ##' @param progress Display a progress bar?
 33 | ##' @export
 34 | rexcel_read_workbook <- function(path, sheets=NULL, progress=TRUE) {
 35 | 
 36 |   if (!is_xlsx(path)) {
 37 |     stop("`path` does not appear to point to valid xlsx:\n", path,
 38 |          call. = FALSE)
 39 |   }
 40 | 
 41 |   dat <- xlsx_read_workbook(path)
 42 | 
 43 |   if (is.null(sheets)) {
 44 |     sheets <- xlsx_sheet_names(dat)
 45 |   } else if (is.numeric(sheets)) {
 46 |     sheets <- xlsx_sheet_names(dat)[sheets]
 47 |   }
 48 | 
 49 |   p <- progress(paste0(basename(path), " [:bar] :current / :total"),
 50 |                 length(sheets), show=progress)
 51 |   p(0)
 52 | 
 53 |   ## Some of this will move into the worksheet and save some of the
 54 |   ## ugly options passinh here.
 55 |   strings <- xlsx_read_shared_strings(path)
 56 |   date_offset <- xlsx_date_offset(path)
 57 | 
 58 |   style_xlsx <- xlsx_read_style(path)
 59 |   lookup <- tibble::tibble(
 60 |     font    = style_xlsx$cell_xfs$font_id,
 61 |     fill    = style_xlsx$cell_xfs$fill_id,
 62 |     border  = style_xlsx$cell_xfs$border_id,
 63 |     num_fmt = style_xlsx$cell_xfs$num_fmt_id)
 64 | 
 65 |   ## This becomes read_number_formats?
 66 |   if (nrow(style_xlsx$num_fmts) > 0L) {
 67 |     n <- max(style_xlsx$num_fmts$num_format_id)
 68 |     fmt <- rep(NA_character_, n)
 69 |     fmt[seq_along(xlsx_format_codes())] <- xlsx_format_codes()
 70 |     fmt[style_xlsx$num_fmts$num_format_id] <- style_xlsx$num_fmts$format_code
 71 |     custom_date <- style_xlsx$num_fmts$num_format_id[
 72 |       grepl("[dmyhs]", style_xlsx$num_fmts$format_code)]
 73 |   } else {
 74 |     fmt <- xlsx_format_codes()
 75 |     custom_date <- integer(0)
 76 |   }
 77 |   is_date_time <- xlsx_is_date_time(seq_along(fmt), custom_date)
 78 |   num_fmt <- tibble::tibble(num_fmt = fmt, is_date_time = is_date_time)
 79 | 
 80 |   style <- linen::linen_style(lookup, font=style_xlsx$fonts,
 81 |                               fill=style_xlsx$fills,
 82 |                               border=style_xlsx$borders,
 83 |                               num_fmt=num_fmt)
 84 | 
 85 |   workbook <- linen::workbook(sheets, style, dat$defined_names)
 86 |   for (s in sheets) {
 87 |     p(1)
 88 |     rexcel_read_worksheet(path, s, workbook, dat, strings, style, date_offset)
 89 |   }
 90 | 
 91 |   workbook
 92 | }
 93 | 
 94 | ## The name here is a bit of a gong show, as is the general logic.  I
 95 | ## hope this will refine a bit over the next little bit.
 96 | rexcel_read_worksheet <- function(path, sheet, workbook,
 97 |                                   workbook_dat, strings, style, date_offset) {
 98 |   if (is.numeric(sheet)) {
 99 |     sheet_name <- workbook$names[[sheet]]
100 |   } else if (is.character(sheet)) {
101 |     sheet_name <- sheet
102 |   } else {
103 |     stop("Invalid input for sheet")
104 |   }
105 | 
106 |   target <- xlsx_internal_sheet_name(sheet, workbook_dat)
107 |   rels <- xlsx_read_rels(path, target)
108 | 
109 |   xml <- xlsx_read_sheet(path, sheet, workbook_dat)
110 |   ns <- xml2::xml_ns(xml)
111 | 
112 |   merged <- xlsx_read_merged(xml, ns)
113 |   view <- xlsx_ct_worksheet_views(xml, ns)
114 |   cols <- xlsx_ct_cols(xml, ns) # NOTE: not used yet
115 |   dat <- xlsx_parse_cells(xml, ns, strings, style, date_offset)
116 |   rows <- dat$rows
117 |   cells <- linen::cells(dat$cells$ref, dat$cells$style, dat$cells$type,
118 |                         dat$cells$value, dat$cells$formula)
119 | 
120 |   comments <- NULL
121 |   if (!is.null(rels)) {
122 |     path_comments <- rels$target_abs[rels$type == "comments"]
123 |     if (length(path_comments) == 1L) {
124 |       comments <- xlsx_read_comments(path, path_comments)
125 |     } else if (length(path_comments) > 1L) {
126 |       stop("CHECK THIS") # TODO: assertion.
127 |     }
128 |   }
129 | 
130 |   linen::worksheet(sheet_name, cols, rows, cells, merged, view, comments,
131 |                    workbook)
132 | }
133 | 
134 | xlsx_list_files <- function(path) {
135 |   ret <- tibble::as_tibble(utils::unzip(path, list = TRUE))
136 |   names(ret) <- tolower(names(ret))
137 |   ret[order(ret$name), ]
138 | }
139 | 
140 | xlsx_read_sheet <- function(path, sheet, workbook_dat) {
141 |   xml <- xlsx_read_file(path, xlsx_internal_sheet_name(sheet, workbook_dat))
142 |   stopifnot(xml2::xml_name(xml) == "worksheet")
143 |   xml
144 | }
145 | 
146 | xlsx_read_file <- function(path, file) {
147 |   tmp <- tempfile()
148 |   dir.create(tmp)
149 |   ## Oh boy more terrible default behaviour.
150 |   filename <- tryCatch(utils::unzip(path, file, exdir=tmp),
151 |                        warning=function(e) stop(e))
152 |   on.exit(unlink(tmp, recursive=TRUE))
153 |   xml2::read_xml(filename)
154 | }
155 | 
156 | xlsx_read_file_if_exists <- function(path, file, missing=NULL) {
157 |   ## TODO: Appropriate error handling here is difficult; we should
158 |   ## check that `path` exists, but by the time that this is called we
159 |   ## know that already.
160 |   tmp <- tempfile()
161 |   dir.create(tmp)
162 |   filename <- tryCatch(utils::unzip(path, file, exdir=tmp),
163 |                        warning=function(e) NULL,
164 |                        error=function(e) NULL)
165 |   if (is.null(filename)) {
166 |     missing
167 |   } else {
168 |     on.exit(unlink(tmp, recursive=TRUE))
169 |     xml2::read_xml(filename)
170 |   }
171 | }
172 | 
173 | ## sheetData: https://msdn.microsoft.com/EN-US/library/office/documentformat.openxml.spreadsheet.sheetdata.aspx
174 | ##
175 | ##   Nothing looks interesting in sheetData, and all elements must be
176 | ##   'row'.
177 | ##
178 | ## row: https://msdn.microsoft.com/EN-US/library/office/documentformat.openxml.spreadsheet.row.aspx
179 | ##   The only interesting attribute here is "hidden", but that
180 | ##   includes being collapsed by outline, so probably not that
181 | ##   interesting.
182 | ##
183 | ## cell: https://msdn.microsoft.com/EN-US/library/office/documentformat.openxml.spreadsheet.cell.aspx
184 | ##
185 | ##   Might contain
186 | ##     <f>: formula
187 | ##     <is> rich test inline
188 | ##     <v> value
189 | ##   Interesting attributes:
190 | ##     r: an A1 style reference to the locatiopn of this cell
191 | ##     s: the index of this cell's style (if colours are used as a guide)
192 | ##     t: type "an enumeration representing the cell's data type", the
193 | ##       only reference to which I can find is
194 | ##       http://mailman.vse.cz/pipermail/sc34wg4/attachments/20100428/3fc0a446/attachment-0001.pdf
195 | ##       - b: boolean
196 | ##       - d: date (ISO 8601)
197 | ##       -  e: error
198 | ##       - inlineStr: inline string in rich text format, with
199 | ##           contents in the 'is' element, not the 'v' element.
200 | ##       - n: number
201 | ##       - s: shared string
202 | ##       - str: formula string
203 | ##
204 | ## However, many numbers seem not to have a "t" attribute which is
205 | ## charming.
206 | ##
207 | ## NOTE: handling of formulae is potentially tricky as they can have an attribute "shared" which
208 | ##
209 | ## Blank cells have no children at all.
210 | ##
211 | ## See readxl/src/XlsxCell.h: XlsxCell::type()
212 | xlsx_parse_cells <- function(xml, ns, strings, style_data, date_offset) {
213 |   sheet_data <- xlsx_read_sheet_data(xml, ns, strings)
214 |   cells <- sheet_data$cells
215 |   rows <- sheet_data$rows
216 | 
217 |   is_date_time <- linen::style_lookup(style_data, idt = "num_fmt/is_date_time",
218 |                                       idx = cells$style)$idt
219 |   is_date_time[is.na(is_date_time)] <- FALSE
220 | 
221 |   ## TODO: could/should this not be done when 'cells' is first loaded?
222 |   type <- character(nrow(cells))
223 |   type[!is.na(cells$type) & cells$type == "b"] <- "bool"
224 |   type[!is.na(cells$type) & cells$type == "s" | cells$type == "str"] <- "text"
225 |   i <- is.na(cells$type) | cells$type == "n"
226 |   type[i] <- ifelse(is_date_time[i], "date", "number")
227 |   type[lengths(cells$value) == 0L] <- "blank"
228 |   cells$type <- type
229 | 
230 |   i <- type == "date"
231 |   cells$value[i] <-
232 |     as.list(as.POSIXct(unlist(cells$value[i]) * 86400, "UTC", date_offset))
233 | 
234 |   list(cells=cells, rows=rows)
235 | }
236 | 
237 | xlsx_sheet_names <- function(dat) {
238 |   if (is.character(dat)) {
239 |     dat <- xlsx_read_workbook(dat)
240 |   }
241 |   sheets <- dat$sheets
242 |   sheets$name[sheets$type == "worksheet" & sheets$state != "veryHidden"]
243 | }
244 | 
245 | ## Return the filename within the bundle
246 | xlsx_internal_sheet_name <- function(sheet, dat) {
247 |   if (length(sheet) != 1L) {
248 |     stop("'sheet' must be a scalar")
249 |   }
250 |   if (is.na(sheet)) {
251 |     stop("'sheet' must be non-missing")
252 |   }
253 | 
254 |   sheets <- dat$sheets
255 |   sheets <- sheets[sheets$type == "worksheet" & sheets$state != "veryHidden", ]
256 | 
257 |   if (is.character(sheet)) {
258 |     target <- sheets$target_abs[match(sheet, sheets$name)]
259 |   } else if (is.numeric(sheet)) {
260 |     target <- sheets$target_abs[[sheet]]
261 |   } else {
262 |     stop("invalid input")
263 |   }
264 |   target
265 | }
266 | 
267 | ## NOTE: Date handling will change a bit once I get the string parsing
268 | ## stuff entirely worked out.
269 | xlsx_date_offset <- function(path) {
270 |   ## See readxl/src/utils.h: dateOffset
271 |   ## See readxl/src/XlsxWorkbook.h: is1904
272 |   xml <- xlsx_read_file(path, "xl/workbook.xml")
273 |   ns <- xml2::xml_ns(xml)
274 |   xpath <- sprintf("/%s/%s/@date1904",
275 |                    xlsx_name("workbook", ns), xlsx_name("workbookPr", ns))
276 |   date1904 <- xml2::xml_find_first(xml, xpath, xml2::xml_ns(xml))
277 |   if (inherits(date1904, "xml_missing")) {
278 |     date_is_1904 <- FALSE
279 |   } else {
280 |     ## TODO: in theory we should do whatever atoi would allow here
281 |     ## (that's what Hadley uses in the C++) but I have a sheet that
282 |     ## contains this as "false".  So I'm trying this way for now.
283 |     value <- xml2::xml_text(date1904)
284 |     date_is_1904 <- value == "1" || value == "true"
285 |   }
286 |   if (date_is_1904) "1904-01-01" else "1899-12-30"
287 | }
288 | 
289 | xlsx_is_date_time <- function(id, custom) {
290 |   ## See readxl's src/CellType.h: isDateTime()
291 |   id %in% c(c(14:22, 27:36, 45:47, 50:58, 71:81), custom)
292 | }
293 | 


--------------------------------------------------------------------------------
/R/read_comments.R:
--------------------------------------------------------------------------------
 1 | ## [x] 18.7.1 author (Author) -- xlsx_ct_authors
 2 | ## [x] 18.7.2 authors (Authors) -- (in xlsx_ct_author)
 3 | ## [x] 18.7.3 comment (Comment) -- xlsx_ct_comment
 4 | ## [x] 18.7.4 commentList (List of Comments) -- xlsx_ct_comment_list
 5 | ## [ ] 18.7.5 commentPr (Comment Properties)
 6 | ## [x] 18.7.6 comments (Comments) -- xlsx_ct_comments
 7 | ## [x] 18.7.7 text (Comment Text) -- xlsx_ct_rst
 8 | xlsx_read_comments <- function(path, file) {
 9 |   xml <- xlsx_read_file(path, file)
10 |   xlsx_ct_comments(xml, xml2::xml_ns(xml))
11 | }
12 | 
13 | xlsx_ct_comments <- function(xml, ns) {
14 |   authors <- xlsx_ct_authors(xml, ns)
15 |   xlsx_ct_comment_list(xml, ns, authors)
16 | }
17 | 
18 | xlsx_ct_authors <- function(xml, ns) {
19 |   authors <-
20 |     xml2::xml_children(xml2::xml_find_first(xml, xlsx_name("authors", ns), ns))
21 |   vcapply(authors, xml2::xml_text)
22 | }
23 | 
24 | xlsx_ct_comment_list <- function(xml, ns, authors) {
25 |   process_container(xml, xlsx_name("commentList", ns), ns,
26 |                     xlsx_ct_comment, authors)
27 | }
28 | 
29 | xlsx_ct_comment <- function(x, ns, authors) {
30 |   at <- as.list(xml2::xml_attrs(x))
31 |   text <- xlsx_ct_rst(xml2::xml_find_first(x, xlsx_name("text", ns), ns), ns)
32 |   tibble::tibble(
33 |     ref = attr_character(at$ref),
34 |     author = authors[attr_integer(at$authorId) + 1L],
35 |     shape_id = attr_integer(at$shapeId),
36 |     text = text)
37 | }
38 | 


--------------------------------------------------------------------------------
/R/read_rels.R:
--------------------------------------------------------------------------------
 1 | 
 2 | xlsx_read_rels <- function(path, file) {
 3 |   xml <- xlsx_read_file_if_exists(path, xlsx_path_rels(file))
 4 |   if (is.null(xml)) {
 5 |     NULL
 6 |   } else {
 7 |     ## TODO: These are allowed to be external references I think; in
 8 |     ## which case the abs path here is not correct.
 9 |     ##
10 |     ## TODO: In the case where these are absolute references, the
11 |     ## relative references are incorrect, but doing this properly
12 |     ## requires some path arithmetic.
13 |     ##
14 |     ## NOTE: while this is an "absolute" path, we lack the initial slash...
15 |     ret <- rbind_df(lapply(xml2::xml_children(xml), xlsx_parse_relationship))
16 | 
17 |     target_abs <- ret$target
18 |     i <- grepl("^/", target_abs)
19 |     if (any(i)) {
20 |       target_abs[i] <- sub("^/", "", target_abs[i])
21 |     }
22 |     j <- !i
23 |     if (any(j)) {
24 |       target_abs[j] <- path_join(dirname(file), target_abs[j])
25 |     }
26 |     ret$target_abs <- target_abs
27 |     ret
28 |   }
29 | }
30 | 
31 | xlsx_parse_relationship <- function(x) {
32 |   at <- as.list(xml2::xml_attrs(x))
33 |   tibble::tibble(
34 |     id = attr_character(at$Id),
35 |     type = basename(at$Type),
36 |     target = at$Target)
37 | }
38 | 
39 | ## Part 2, 9.3.3, p. 24
40 | ## > A special naming convention is used for the Relationships
41 | ## > part. First, the Relationships part for a part in a given folder
42 | ## > in the name hierarchy is stored in a sub-folder called
43 | ## > “_rels”. Second, the name of the Relationships part is formed by
44 | ## > appending “.rels” to the name of the original part. Package
45 | ## > relationships are found in the package relationships part named
46 | ## > “/_rels/.rels”.
47 | xlsx_path_rels <- function(filename) {
48 |   file.path(dirname(filename), "_rels",
49 |             paste0(basename(filename), ".rels"))
50 | }
51 | 


--------------------------------------------------------------------------------
/R/read_style.R:
--------------------------------------------------------------------------------
  1 | ## Basically everything here follows from "Styles": section 18.8 (p. 1744).
  2 | ##
  3 | ## I have been fairly thorough at pulling things in, but it's not
  4 | ## really complete.  There are enumeration types that I have not
  5 | ## turned into factors (arguments go either way, really).  There are a
  6 | ## few things that are going to be very hard to deal with, too.
  7 | ##
  8 | ##   [x] 18.8.1  alignment (Alignment) -- xlsx_ct_alignment
  9 | ##   [x] 18.8.2  b (Bold) -- xlsx_ct_boolean_property
 10 | ##   [x] 18.8.3  bgColor (Background Color) -- xlsx_ct_color
 11 | ##   [x] 18.8.4  border (Border) -- xlsx_ct_border
 12 | ##   [x] 18.8.5  borders (Borders) -- xlsx_ct_borders
 13 | ##   [x] 18.8.6  bottom (Bottom Border) -- xlsx_ct_border_pr
 14 | ##   [x] 18.8.7  cellStyle (Cell Style) -- xlsx_ct_cell_style
 15 | ##   [x] 18.8.8  cellStyles (Cell Styles) -- xlsx_ct_cell_styles
 16 | ##   [x] 18.8.9  cellStyleXfs (Formatting Records) -- xlsx_ct_cell_style_xfs
 17 | ##   [x] 18.8.10 cellXfs (Cell Formats) -- xlsx_ct_cell_xfs
 18 | ##   [ ] 18.8.11 colors (Colors)
 19 | ##   [x] 18.8.12 condense (Condense) -- xlsx_ct_boolean_property
 20 | ##   [-] 18.8.13 diagonal (Diagonal) -- (xlsx_ct_border_pr)
 21 | ##   [ ] 18.8.14 dxf (Formatting)
 22 | ##   [ ] 18.8.15 dxfs (Formats)
 23 | ##   [x] 18.8.16 end (Trailing Edge Border) -- xlsx_ct_border_pr
 24 | ##   [x] 18.8.17 extend (Extend) -- xlsx_ct_boolean_property
 25 | ##   [x] 18.8.18 family (Font Family) -- xlsx_st_font_family
 26 | ##   [x] 18.8.19 fgColor (Foreground Color) -- xlsx_ct_color
 27 | ##   [x] 18.8.20 fill (Fill) -- xlsx_ct_fill
 28 | ##   [x] 18.8.21 fills (Fills) -- xlsx_ct_fills
 29 | ##   [x] 18.8.22 font (Font) -- xlsx_ct_font
 30 | ##   [x] 18.8.23 fonts (Fonts) -- xlsx_ct_fonts
 31 | ##   [x] 18.8.24 gradientFill (Gradient) -- xlsx_ct_gradient_fill
 32 | ##   [ ] 18.8.25 horizontal (Horizontal Inner Borders)
 33 | ##   [x] 18.8.26 i (Italic) -- xlsx_ct_boolean_property
 34 | ##   [x] 18.8.27 indexedColors (Color Indexes) -- xlsx_ct_indexed_colors
 35 | ##   [ ] 18.8.28 mruColors (MRU Colors)
 36 | ##   [x] 18.8.29 name (Font Name) -- (plain text handling in xlsx_ct_font)
 37 | ##   [x] 18.8.30 numFmt (Number Format) -- xlsx_ct_num_fmt
 38 | ##   [x] 18.8.31 numFmts (Number Formats) -- xlsx_ct_num_fmts
 39 | ##   [x] 18.8.32 patternFill (Pattern) -- xlsx_ct_pattern_fill
 40 | ##   [ ] 18.8.33 protection (Protection Properties)
 41 | ##   [x] 18.8.34 rgbColor (RGB Color) -- xlsx_ct_rgbcolor
 42 | ##   [x] 18.8.35 scheme (Scheme) -- (plain text handling in xlsx_ct_font)
 43 | ##   [x] 18.8.36 shadow (Shadow) -- xlsx_ct_boolean_property
 44 | ##   [x] 18.8.37 start (Leading Edge Border) -- xlsx_ct_border_pr
 45 | ##   [ ] 18.8.38 stop (Gradient Stop)
 46 | ##   [x] 18.8.39 styleSheet (Style Sheet) -- xlsx_read_style
 47 | ##   [ ] 18.8.40 tableStyle (Table Style)
 48 | ##   [ ] 18.8.41 tableStyleElement (Table Style)
 49 | ##   [ ] 18.8.42 tableStyles (Table Styles)
 50 | ##   [x] 18.8.43 top (Top Border) -- xlsx_ct_border_pr
 51 | ##   [ ] 18.8.44 vertical (Vertical Inner Border)
 52 | ##   [x] 18.8.45 xf (Format) -- xlsx_ct_xf
 53 | ##
 54 | ## Most elements at some point things get called a CT_<thing> (for
 55 | ## "Complex Type"), e.g., CT_Color; at that point the processing thing
 56 | ## is called xlsx_ct_boolean_property or similar.  Note that this
 57 | ## drives off the *type*, not off the element name.  These are often,
 58 | ## but not always, the same.
 59 | ##
 60 | ## There are no strong argument conventions either; if anything can
 61 | ## contain a colour the both theme and index are passed through, as
 62 | ## these contain information to convert colour types into an RGB
 63 | ## triplet.  If any Xpath query is used then the namespace is passed
 64 | ## along as ns.
 65 | ##
 66 | ## From the point of view of the rest of the package, the only entry
 67 | ## point to use is xlsx_read_style which will return a list of a great
 68 | ## many data.frames (the format here will get cleaned up soon).
 69 | ##
 70 | ## Some of the needed functions here come from the shared string table.
 71 | ##
 72 | ##
 73 | xlsx_read_style <- function(path) {
 74 |   xml <- xlsx_read_file(path, "xl/styles.xml")
 75 |   ns <- xml2::xml_ns(xml)
 76 | 
 77 |   theme <- xlsx_read_theme(path)
 78 |   index <- xlsx_ct_indexed_colors(xml, ns)
 79 | 
 80 |   fonts <- xlsx_ct_fonts(xml, ns, theme, index)
 81 |   fills <- xlsx_ct_fills(xml, ns, theme, index)
 82 |   borders <- xlsx_ct_borders(xml, ns, theme, index)
 83 | 
 84 |   cell_style_xfs <- xlsx_ct_cell_style_xfs(xml, ns)
 85 |   cell_xfs <- xlsx_ct_cell_xfs(xml, ns)
 86 |   cell_styles <- xlsx_ct_cell_styles(xml, ns)
 87 |   num_fmts <- xlsx_ct_num_fmts(xml, ns)
 88 | 
 89 |   list(fonts=fonts,
 90 |        fills=fills,
 91 |        borders=borders,
 92 |        cell_style_xfs=cell_style_xfs,
 93 |        cell_xfs=cell_xfs,
 94 |        cell_styles=cell_styles,
 95 |        num_fmts=num_fmts)
 96 | }
 97 | 
 98 | ## NOTE: this only reads the the colour information from the theme as
 99 | ## nothing else looks that exciting in there, really.
100 | xlsx_read_theme <- function(path) {
101 |   ## TODO: Strictly, the theme information should come from the
102 |   ## workbook.rels.xml file by looking to see which file has the
103 |   ## appropriate officeDocument/2006/relationships/theme entry, but this
104 |   ## should be fine for now.
105 |   ##
106 |   ## NOTE: MSDN suggests that this will always be theme1.xml for Excel
107 |   ## and only n>1 for PowerPoint.
108 |   xml <- xlsx_read_file_if_exists(path, "xl/theme/theme1.xml")
109 |   if (is.null(xml)) {
110 |     return(NULL)
111 |   }
112 |   ns <- xml2::xml_ns(xml)
113 |   tmp <- xml2::xml_find_first(xml, "/a:theme/a:themeElements/a:clrScheme", ns)
114 | 
115 |   ## Empirical ordering, based on one random website.  I have not
116 |   ## found the support for this in the actual spec yet and have seen a
117 |   ## few variants on the ordering listed there incl dk1/lt1/dk2/lt2/accent...
118 | 
119 |   nms <- c("lt1", "dk1", "lt2", "dk2",
120 |            paste0("accent", 1:6),
121 |            "hlink", "folHlink")
122 |   f <- function(x, xml, ns) {
123 |     tmp <- xml2::xml_find_first(xml, paste0(".//a:", x), ns)
124 |     nd <- xml2::xml_children(tmp)[[1L]]
125 |     at <- switch(xml2::xml_name(nd), sysClr="lastClr", srgbClr="val")
126 |     paste0("#", xml2::xml_attr(nd, at, ns))
127 |   }
128 |   pal <- vcapply(nms, f, xml, ns)
129 | 
130 |   list(palette=pal)
131 | }
132 | 
133 | ## 18.8.23 fonts
134 | xlsx_ct_fonts <- function(xml, ns, theme, index) {
135 |   process_container(xml, xlsx_name("fonts", ns), ns, xlsx_ct_font,
136 |                     theme, index)
137 | }
138 | 
139 | ## 18.8.22 font
140 | ##
141 | ## The link to the actual definition is broken, but p. 3930, l 3797
142 | ## looks good.  Beware of the similar but different CT_Font probably
143 | ## for Word's XML.
144 | ##
145 | ## Possible daughter elements (all optional but at most one of each present)
146 | ##
147 | ##   name (CT_FontName)
148 | ##   charset (CT_IntProperty)
149 | ##   family (CT_FontFamily)
150 | ##   b, i, strike, outline, shadow, condense, extend (CT_BooleanProperty)
151 | ##   color (CT_Color)
152 | ##   sz (CT_FontSize)
153 | ##   u (CT_UnderlineProperty)
154 | ##   vertAlign (CT_VerticalAlignFontProperty) - subscript / superscript
155 | ##   scheme (CT_FontScheme)
156 | ##
157 | ## Looks like horizontal alignment comes through with the xf element
158 | ## in cellxfs, but I think I ignore that at the moment.  Seems like an
159 | ## odd place tbh.
160 | ##
161 | ## Despite most elements being CT_*, most of this is just that if the
162 | ## element is present a "val" attribute is required.
163 | ##
164 | ## Note that some of the elements here are defined in the "Shared
165 | ## Strings" section of the spec.  Others I have not tracked down yet.
166 | xlsx_ct_font <- function(x, ns, theme, index) {
167 |   name <- xml2::xml_text(
168 |     xml2::xml_find_first(x, xlsx_name("name/@val", ns), ns))
169 |   ## ignoring charset
170 |   family <- xlsx_st_font_family(
171 |     xml2::xml_find_first(x, xlsx_name("family", ns), ns))
172 | 
173 |   b <- xlsx_ct_boolean_property(
174 |     xml2::xml_find_first(x, xlsx_name("b", ns), ns))
175 |   i <- xlsx_ct_boolean_property(
176 |     xml2::xml_find_first(x, xlsx_name("i", ns), ns))
177 |   strike <- xlsx_ct_boolean_property(
178 |     xml2::xml_find_first(x, xlsx_name("strike", ns), ns))
179 |   outline <- xlsx_ct_boolean_property(
180 |     xml2::xml_find_first(x, xlsx_name("outline", ns), ns))
181 |   shadow <- xlsx_ct_boolean_property(
182 |     xml2::xml_find_first(x, xlsx_name("shadow", ns), ns))
183 |   condense <- xlsx_ct_boolean_property(
184 |     xml2::xml_find_first(x, xlsx_name("condense", ns), ns))
185 |   extend <- xlsx_ct_boolean_property(
186 |     xml2::xml_find_first(x, xlsx_name("extend", ns), ns))
187 | 
188 |   color <- xlsx_ct_color(
189 |     xml2::xml_find_first(x, xlsx_name("color", ns), ns), theme, index)
190 |   sz <- xlsx_ct_font_size(
191 |     xml2::xml_find_first(x, xlsx_name("sz", ns), ns))
192 | 
193 |   u <- xlsx_ct_underline_property(
194 |     xml2::xml_find_first(x, xlsx_name("u", ns), ns))
195 |   ## This one here is either baseline, superscript or subscript.  So
196 |   ## probably not terribly useful and fairly confuse-able with
197 |   ## _actual_ vertical alignment.
198 |   ##   vertAlign <- xml2::xml_text(
199 |   ##     xml2::xml_find_first(x, xlsx_name("vertAlign/@val", ns), ns))
200 |   scheme <- xml2::xml_text(
201 |     xml2::xml_find_first(x, xlsx_name("scheme/@val", ns), ns))
202 | 
203 |   tibble::tibble(name, family,
204 |                  b, i, strike, outline, shadow, condense, extend,
205 |                  color, sz, u, scheme)
206 | }
207 | 
208 | ## 18.8.18 family
209 | xlsx_st_font_family <- function(f, missing=NA_character_) {
210 |   pos <- c(NA_character_, "Roman", "Swiss", "Modern", "Script", "Decorative",
211 |            rep("<<reserved>>", 9))
212 |   if (inherits(f, "xml_missing")) {
213 |     missing
214 |   } else {
215 |     pos[[as.integer(xml2::xml_attr(f, "val")) + 1L]]
216 |   }
217 | }
218 | 
219 | ## Used by a bunch of things.  The actual definition is on l 3751 of
220 | ## A.2 (p. 3929).  Note that the xsd defines that if the element is
221 | ## present but @val is empty it defaults to TRUE.
222 | xlsx_ct_boolean_property <- function(b, missing=FALSE) {
223 |   if (inherits(b, "xml_missing")) {
224 |     missing
225 |   } else {
226 |     val <- xml2::xml_attr(b, "val")
227 |     if (is.na(val)) TRUE else as.logical(as.integer(val))
228 |   }
229 | }
230 | 
231 | ## 18.8.21 fills
232 | xlsx_ct_fills <- function(xml, ns, theme, index) {
233 |   process_container(xml, xlsx_name("fills", ns), ns, xlsx_ct_fill, theme, index)
234 | }
235 | 
236 | ## 18.8.20 fill
237 | xlsx_ct_fill <- function(x, ns, theme, index) {
238 |   ## TODO: In the case where not all of these are "pattern" (i.e., we
239 |   ## have a gradient fill) this will not work correctly because we
240 |   ## need totally different things here.  I think what we'll return
241 |   ## there is type=gradient, and then a lookup to a gradient table, so
242 |   ## this will expand by one more column with gradient_id perhaps.
243 | 
244 |   ## The only options here, according to the xsd (A.2, p. 3925,
245 |   ## l. 3498) is a single element of patternFill or gradientFill
246 |   xk <- xml2::xml_children(x)[[1L]]
247 |   if (xml2::xml_name(xk) == "patternFill") {
248 |     xlsx_ct_pattern_fill(xk, ns, theme, index)
249 |   } else {
250 |     xlsx_ct_gradient_fill(xk, ns, theme, index)
251 |   }
252 | }
253 | 
254 | ## 18.8.32 patternFill
255 | xlsx_ct_pattern_fill <- function(x, ns, theme, index) {
256 |   pattern_type <- xml2::xml_attr(x, "patternType")
257 |   fg <- xlsx_ct_color(xml2::xml_find_first(x, xlsx_name("fgColor", ns), ns),
258 |                       theme, index)
259 |   bg <- xlsx_ct_color(xml2::xml_find_first(x, xlsx_name("bgColor", ns), ns),
260 |                       theme, index)
261 |   c(type="pattern", pattern_type=pattern_type, fg=fg, bg=bg)
262 | }
263 | 
264 | ## 18.8.24 gradientFill
265 | xlsx_ct_gradient_fill <- function(x, ns, theme, index) {
266 |   ## zero or more stop elements, plus attributes type, degree, left,
267 |   ## right, bottom, all of which are optional.  I think that
268 |   ## realistically we'll have to dump these into a separate lookup
269 |   ## table or something.
270 |   ##
271 |   ## It will be interesting to see what is used in the main corpus.
272 |   ## Even with the terrible things that people do to spreadsheets I'd
273 |   ## hope that this is not actually common.
274 |   stop("Ignoring gradient fill")
275 | }
276 | 
277 | ## 18.8.3  bgColor
278 | ## 18.8.19 fgColor
279 | xlsx_ct_color <- function(x, theme, index) {
280 |   if (inherits(x, "xml_missing")) {
281 |     NA_character_
282 |   } else {
283 |     ## The schema is vague on this point but let's make the assumption
284 |     ## that only one of the following is present:
285 |     ## auto, indexed, rgb, theme
286 |     tmp <- xml2::xml_attrs(x)
287 |     types <- c("auto", "indexed", "rgb", "theme")
288 |     i <- types %in% names(tmp)
289 |     if (!any(i)) {
290 |       return(NA_character_)
291 |     }
292 |     t <- types[i][[1L]]
293 |     v <- tmp[[t]]
294 |     ## TODO: I can't find any information indicating what "auto" means
295 |     ## in this context.  The spec says (at least for fgColor in
296 |     ## 18.8.19, p. 1757, but similar words are used elsewhere):
297 |     ##
298 |     ## > auto: A boolean value indicating the color is automatic and
299 |     ## > system color dependent.
300 |     ##
301 |     ## So it probably depends on exactly _where_ the colour is used
302 |     ## (e.g. if it tends to be a fg or a bg colour).  So I will return
303 |     ## "auto" I think, at least for now.  Probably I could return
304 |     ## "black" but that's going to be quite lossy.  This way I can
305 |     ## transform into a sensible colour at use.
306 |     col <- switch(
307 |       t,
308 |       auto="auto",
309 |       indexed=palette_color(as.integer(v) + 1L, index),
310 |       rgb=argb2rgb(v),
311 |       theme=palette_color(as.integer(v) + 1L, theme$palette))
312 |     if ("tint" %in% names(tmp)) {
313 |       col <- col_apply_tint(col, as.numeric(tmp[["tint"]]))
314 |     }
315 |     col
316 |   }
317 | }
318 | 
319 | ## 18.8.5  borders
320 | xlsx_ct_borders <- function(xml, ns, theme, index) {
321 |   process_container(xml, xlsx_name("borders", ns), ns,
322 |                     xlsx_ct_border, theme, index)
323 | }
324 | 
325 | ## 18.8.4  border
326 | ##
327 | ## See also
328 | ##   * 18.8.5 (p. 1750)
329 | ##   * A.2 l. 3460 (p. 3924)
330 | ##
331 | ## Unfortunately, note that the xsd talks about start / end but the
332 | ## *example* has begin / end.  And neither of them indicates what on
333 | ## earth these are for (though the text in the example suggests that
334 | ## end is the right border in that context).  In the sheets I am
335 | ## looking at I mostly see left / right / top / bottom / diagonal.
336 | xlsx_ct_border <- function(x, ns, theme, index) {
337 |   ## NOTE: I am skipping attributes diagonalUp and diagonalDown along
338 |   ## with the element diagonal - it's not the only bit of formatting
339 |   ## trivia we won't handle, but it's a fairly unusual thing to see, I
340 |   ## believe.
341 |   outline <- attr_bool(xml2::xml_attr(x, "outline"), FALSE)
342 | 
343 |   f <- function(path) {
344 |     xlsx_ct_border_pr(xml2::xml_find_first(x, path, ns), ns, theme, index)
345 |   }
346 | 
347 |   tmp <- list(list(outline = outline),
348 |               start = f(xlsx_name("start", ns)),
349 |               end = f(xlsx_name("end", ns)),
350 |               left = f(xlsx_name("left", ns)),
351 |               right = f(xlsx_name("right", ns)),
352 |               top = f(xlsx_name("top", ns)),
353 |               bottom = f(xlsx_name("bottom", ns)))
354 |   tmp <- unlist(tmp, FALSE)
355 |   names(tmp) <- sub(".", "_", names(tmp), fixed=TRUE)
356 |   tibble::as_tibble(tmp)
357 | }
358 | 
359 | ## style (ST_BorderStyle) can be one of (18.18.3, p. 2428):
360 | ##
361 | ##   * dashDot
362 | ##   * dashDotDot
363 | ##   * dashed
364 | ##   * dotted
365 | ##   * double
366 | ##   * hair
367 | ##   * medium
368 | ##   * mediumDashDot
369 | ##   * mediumDashDotDot
370 | ##   * mediumDashed
371 | ##   * none
372 | ##   * slantDashDot
373 | ##   * thick
374 | ##   * thin
375 | ##
376 | ## Note that the various combinations do not cross with one another.
377 | ##
378 | ## This handles:
379 | ##   * 18.8.6  bottom
380 | ##   * 18.8.16 end
381 | ##   * 18.8.37 start
382 | ##   * 18.8.43 top
383 | ## as well as left and right which aren't given section numbers in the spec.
384 | xlsx_ct_border_pr <- function(x, ns, theme, index) {
385 |   present <- !inherits(x, "xml_missing")
386 |   if (present) {
387 |     style <- xml2::xml_attr(x, "style")
388 |     color <- xlsx_ct_color(
389 |       xml2::xml_find_first(x, xlsx_name("color", ns), ns), theme, index)
390 |   } else {
391 |     color <- style <- NA_character_
392 |   }
393 |   list(present=present, style=style, color=color)
394 | }
395 | 
396 | ## 18.8.9  cellStyleXfs
397 | xlsx_ct_cell_style_xfs <- function(xml, ns) {
398 |   process_container(xml, xlsx_name("cellStyleXfs", ns), ns, xlsx_ct_xf,
399 |                     classes=TRUE)
400 | }
401 | 
402 | ## 18.8.10 cellXfs
403 | xlsx_ct_cell_xfs <- function(xml, ns) {
404 |   process_container(xml, xlsx_name("cellXfs", ns), ns, xlsx_ct_xf)
405 | }
406 | 
407 | ## 18.8.45 xf (format)
408 | xlsx_ct_xf <- function(x, ns) {
409 |   at <- xml_attrs_list(x)
410 | 
411 |   ## Booleans, indicating if things are applied:
412 |   apply_border <- attr_bool(at$applyBorder, FALSE)
413 |   apply_fill <- attr_bool(at$applyFill, FALSE)
414 |   apply_font <- attr_bool(at$applyFont, FALSE)
415 |   apply_number_format <- attr_bool(at$applyNumberFormat, FALSE)
416 | 
417 |   apply_alignment <- attr_bool(at$applyAlignment, FALSE)
418 | 
419 |   `%&&%` <- function(a, b) {
420 |     if (isTRUE(a)) b else NA_integer_
421 |   }
422 | 
423 |   xf <- tibble::tibble(
424 |     border_id  = apply_border        %&&% attr_integer(at$borderId) + 1L,
425 |     fill_id    = apply_fill          %&&% attr_integer(at$fillId)   + 1L,
426 |     font_id    = apply_font          %&&% attr_integer(at$fontId)   + 1L,
427 |     num_fmt_id = apply_number_format %&&% attr_integer(at$numFmtId) + 1L,
428 | 
429 |     ## Not really sure about these, but they don't hurt to keep around:
430 |     pivot_button = attr_bool(at$pivotButton, FALSE),
431 |     quote_prefix = attr_bool(at$quotePrefix, FALSE),
432 |     apply_protection = attr_bool(at$applyProtection, FALSE),
433 | 
434 |     ## This is a reference against cellStyleXfs
435 |     xf_id = attr_integer(at$xfId) + 1L)
436 | 
437 |   if (is.null(x)) {
438 |     alignment <- xlsx_ct_alignment(NULL, ns)
439 |   } else {
440 |     alignment <- xlsx_ct_alignment(
441 |       xml2::xml_find_first(x, xlsx_name("alignment", ns), ns))
442 |     if (!isTRUE(apply_alignment)) {
443 |       alignment[] <- lapply(alignment, as_na)
444 |     }
445 |   }
446 |   cbind(xf, alignment)
447 | }
448 | 
449 | ## 18.8.1  alignment
450 | ##
451 | ## horizontal: center | centerContinuous | distributed | fill |
452 | ##   general | justify | right
453 | ##
454 | ## vertical: bottom | center | distributed | justify | top
455 | xlsx_ct_alignment <- function(x, ns) {
456 |   at <- xml_attrs_list(x)
457 |   tibble::tibble(
458 |     horizontal=attr_character(at$horizontal),
459 |     vertical=attr_character(at$vertical),
460 |     indent=attr_integer(at$indent),
461 |     justify_last_line=attr_bool(at$justifyLastLine, FALSE),
462 |     reading_order=attr_integer(at$readingOrder),
463 |     ## relativeIndent [used only in a dxf element]
464 |     shrink_to_fit=attr_bool(at$shrinkToFit, FALSE),
465 |     text_rotation=attr_integer(at$text_rotation),
466 |     text_wrap=attr_bool(at$textWrap, FALSE))
467 | }
468 | 
469 | ## 18.8.8  cellStyles
470 | xlsx_ct_cell_styles <- function(xml, ns) {
471 |   process_container(xml, xlsx_name("cellStyles", ns), ns, xlsx_ct_cell_style,
472 |                     classes = TRUE)
473 | }
474 | 
475 | ## 18.8.7  cellStyle
476 | xlsx_ct_cell_style <- function(x, ns) {
477 |   ## NOTE: Getting this right is really hard because the Annex (G.2)
478 |   ## lists information about "built-in" styles but these vary with all
479 |   ## things like row position, but no actual information about the
480 |   ## styles is given in the annex.  So it's not really obvious what we
481 |   ## can do here.
482 | 
483 |   ## NOTE: This element can contain "extension list" elements which
484 |   ## are reserved for future use.  But we can skip that.
485 | 
486 |   ## NOTE: xfId: Zero-based index referencing an xf record in the
487 |   ## cellStyleXfs collection. This is used to determine the formatting
488 |   ## defined for this named cell style (this is converted to base1 for
489 |   ## use in R).
490 | 
491 |   at <- xml_attrs_list(x)
492 |   tibble::tibble(
493 |     builtin_id = attr_integer(at$builtinId) + 1L,
494 |     custom_builtin = attr_bool(at$customBuiltin, FALSE),
495 |     hidden = attr_bool(at$hidden, FALSE),
496 |     i_level = attr_integer(at$iLevel),
497 |     name = attr_character(at$name),
498 |     xf_id = attr_integer(at$xfId) + 1L)
499 | }
500 | 
501 | ## 18.8.31 numFmts
502 | xlsx_ct_num_fmts <- function(xml, ns) {
503 |   classes <- c(num_format_id="integer", format_code="character")
504 |   process_container(xml, xlsx_name("numFmts", ns), ns,
505 |                     xlsx_ct_num_fmt, classes = classes)
506 | }
507 | 
508 | ## 18.8.30 numFmt
509 | xlsx_ct_num_fmt <- function(x, ns) {
510 |   at <- as.list(xml2::xml_attrs(x))
511 |   tibble::tibble(
512 |     num_format_id = attr_integer(at$numFmtId) + 1L,
513 |     format_code = attr_character(at$formatCode))
514 | }
515 | 
516 | ## Below here is bits that may move around a bit; code for processing
517 | ## things out into values that R can understand, mostly for colours.
518 | ## We need to do the number formatting thing soon too.
519 | 
520 | ## These come from the ECMA Open XML definition, p 1763 (18.8.27).
521 | ## The spec describes this as a "legacy indexing scheme for colors
522 | ## that is still required for some records, and for backwards
523 | ## compatibility with legacy formats" but this seems to be far more
524 | ## widespread than that (and from things generated with Microsoft's
525 | ## current software I think).
526 | ##
527 | ## Indecies 64 and 65 (the 65th and 66th elements) should be treated
528 | ## specially as system foreground and background colour respectively, but
529 | xlsx_indexed_cols <- function() {
530 |   c("#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF",
531 |     "#FFFF00", "#FF00FF", "#00FFFF", "#000000", "#FFFFFF",
532 |     "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF",
533 |     "#00FFFF", "#800000", "#008000", "#000080", "#808000",
534 |     "#800080", "#008080", "#C0C0C0", "#808080", "#9999FF",
535 |     "#993366", "#FFFFCC", "#CCFFFF", "#660066", "#FF8080",
536 |     "#0066CC", "#CCCCFF", "#000080", "#FF00FF", "#FFFF00",
537 |     "#00FFFF", "#800080", "#800000", "#008080", "#0000FF",
538 |     "#00CCFF", "#CCFFFF", "#CCFFCC", "#FFFF99", "#99CCFF",
539 |     "#FF99CC", "#CC99FF", "#FFCC99", "#3366FF", "#33CCCC",
540 |     "#99CC00", "#FFCC00", "#FF9900", "#FF6600", "#666699",
541 |     "#969696", "#003366", "#339966", "#003300", "#333300",
542 |     "#993300", "#993366", "#333399", "#333333",
543 |     ## Special:
544 |     "black", "white")
545 | }
546 | 
547 | ## See 18.8.30, p. 1767
548 | xlsx_format_codes <- function() {
549 |   ## "Ids not specified in the listing, such as 5, 6, 7, and 8, shall
550 |   ## follow the number format specified by the formatCode attribute."
551 |   c("General",
552 |     "0",
553 |     "0.00",
554 |     "#,##0",
555 |     "#,##0.00",
556 |     ## missing 4-8 incl
557 |     rep(NA, length(4:8)),
558 |     "0%",
559 |     "0.00%",
560 |     "0.00E+00",
561 |     "# ?/?",
562 |     "# ??/??",
563 |     "mm-dd-yy",
564 |     "d-mmm-yy",
565 |     "d-mmm mmm-yy",
566 |     "h:mm AM/PM",
567 |     "h:mm:ss AM/PM",
568 |     "h:mm",
569 |     "h:mm:ss",
570 |     "m/d/yy h:mm",
571 |     ## missing 23-36 incl
572 |     rep(NA, length(23:36)),
573 |     "#,##0 ;(#,##0)",
574 |     "#,##0 ;[Red](#,##0)",
575 |     "#,##0.00;(#,##0.00)",
576 |     "#,##0.00;[Red](#,##0.00)",
577 |     ## missing 41-44 incl
578 |     rep(NA, length(41:44)),
579 |     "mm:ss",
580 |     "[h]:mm:ss",
581 |     "mmss.0",
582 |     "##0.0E+0",
583 |     "@")
584 | }
585 | 
586 | ## See 18.18.55, p. 2462
587 | xlsx_pattern_type <- function() {
588 |   c(## Can process these two
589 |     "none",  # ignores both fgColor and bgColor
590 |     "solid", # renders only the fgColor
591 |     ## but not these:
592 |     "darkDown",
593 |     "darkGray",
594 |     "darkGrid",
595 |     "darkHorizontal",
596 |     "darkTrellis",
597 |     "darkUp",
598 |     "darkVertical",
599 |     "gray0625",
600 |     "gray125",
601 |     "lightDown",
602 |     "lightGray",
603 |     "lightGrid",
604 |     "lightHorizontal",
605 |     "lightTrellis",
606 |     "lightUp",
607 |     "lightVertical",
608 |     "mediumGray")
609 | }
610 | 
611 | ## 18.8.27 indexedColors (Color Indexes)
612 | xlsx_ct_indexed_colors <- function(xml, ns) {
613 |   ## TODO: I should replace this with the full correct path.
614 |   xpath <- paste0("//", xlsx_name("indexedColors", ns))
615 |   indexed_colors <- xml2::xml_find_first(xml, xpath, ns)
616 |   if (inherits(indexed_colors, "xml_missing")) {
617 |     indexed <- xlsx_indexed_cols()
618 |   } else {
619 |     ## NOTE, it seems here that "00" is used for full opacity, which
620 |     ## is charming.
621 |     indexed <- vcapply(xml2::xml_children(indexed_colors),
622 |                        xlsx_ct_rgbcolor, "00")
623 |     if (length(indexed) == 64) {
624 |       indexed <- c(indexed, "black", "white")
625 |     }
626 |   }
627 |   indexed
628 | }
629 | 
630 | ## 18.8.34 rgbColor (RGB Color)
631 | ##
632 | ## NOTE: the spec is unfortunately a little vague about the
633 | ## interpretation of the alpha channel; in the example colours
634 | ## (p. 1763) they use 00 to indicate opacity but empirically (and
635 | ## conventionally) FF is used.
636 | xlsx_ct_rgbcolor <- function(x, opaque="FF") {
637 |   argb2rgb(xml2::xml_attr(x, "rgb"), opaque)
638 | }
639 | 
640 | palette_color <- function(i, pal, err="black") {
641 |   if (i > length(pal)) err else pal[[i]]
642 | }
643 | 


--------------------------------------------------------------------------------
/R/read_workbook.R:
--------------------------------------------------------------------------------
 1 | xlsx_read_workbook <- function(path) {
 2 |   ## TODO: Consider what do do when rels is NULL; do we throw?
 3 |   rels <- xlsx_read_rels(path, "xl/workbook.xml")
 4 |   xml <- xlsx_read_file(path, "xl/workbook.xml")
 5 |   ns <- xml2::xml_ns(xml)
 6 | 
 7 |   defined_names <- xlsx_ct_external_defined_names(xml, ns)
 8 |   sheets <- xlsx_ct_sheets(xml, ns, rels)
 9 | 
10 |   list(rels=rels, sheets=sheets, defined_names=defined_names)
11 | }
12 | 
13 | xlsx_namespace <- function(ns) {
14 |   url <- "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
15 |   names(ns)[[match(url, ns)]]
16 | }
17 | xlsx_name <- function(name, ns) {
18 |   paste0(xlsx_namespace(ns), ":", name)
19 | }
20 | 
21 | ## 18.2.20 sheets
22 | xlsx_ct_sheets <- function(xml, ns, rels) {
23 |   ## Apparently new xml2 has some facilities for dealing with
24 |   ## namespaces which might make this easier.  Or break everything in
25 |   ## here.  Or perhaps a little of both.
26 |   dat <- process_container(xml, xlsx_name("sheets", ns), ns, xlsx_ct_sheet)
27 | 
28 |   if (is.null(rels)) {
29 |     stop("FIXME")
30 |   } else {
31 |     i <- match(dat$ref, rels$id)
32 |     dat <- cbind(dat, rels[i, -1L])
33 |   }
34 | 
35 |   dat
36 | }
37 | 
38 | ## 18.2.19 sheet
39 | xlsx_ct_sheet <- function(xml, ns) {
40 |   at <- as.list(xml2::xml_attrs(xml))
41 |   tibble::tibble(
42 |     name = attr_character(at$name),
43 |     sheet_id = attr_integer(at$sheetId),
44 |     state = attr_character(at$state, "visible"),
45 |     ref = attr_character(at[["id"]]))
46 | }
47 | 
48 | ## 18.14.6 definedName
49 | xlsx_ct_external_defined_names <- function(xml, ns) {
50 |   process_container(xml, xlsx_name("definedNames", ns), ns,
51 |                     xlsx_ct_external_defined_name, classes=TRUE)
52 | }
53 | 
54 | ## 18.14.5 definedName
55 | xlsx_ct_external_defined_name <- function(xml, ns) {
56 |   at <- xml_attrs_list(xml)
57 |   tibble::tibble(
58 |     name = attr_character(at$name),
59 |     refers_to = attr_character(at$refersTo),
60 |     sheet_id = attr_integer(at$sheetId))
61 | }
62 | 


--------------------------------------------------------------------------------
/R/read_worksheet.R:
--------------------------------------------------------------------------------
  1 | ## [ ] 18.3.1.1  anchor (Object Cell Anchor)
  2 | ## [ ] 18.3.1.2  autoFilter (AutoFilter Settings)
  3 | ## [ ] 18.3.1.3  brk (Break)
  4 | ## [x] 18.3.1.4  c (Cell) -- xlsx_ct_cell
  5 | ## [ ] 18.3.1.5  cellSmartTag (Cell Smart Tag)
  6 | ## [ ] 18.3.1.6  cellSmartTagPr (Smart Tag Properties)
  7 | ## [ ] 18.3.1.7  cellSmartTags (Cell Smart Tags)
  8 | ## [ ] 18.3.1.8  cellWatch (Cell Watch Item)
  9 | ## [ ] 18.3.1.9  cellWatches (Cell Watch Items)
 10 | ## [ ] 18.3.1.10 cfRule (Conditional Formatting Rule)
 11 | ## [ ] 18.3.1.11 cfvo (Conditional Format Value Object)
 12 | ## [ ] 18.3.1.12 chartsheet (Chart Sheet)
 13 | ## [x] 18.3.1.13 col (Column Width & Formatting) -- xlsx_ct_cols
 14 | ## [ ] 18.3.1.14 colBreaks (Vertical Page Breaks)
 15 | ## [ ] 18.3.1.15 color (Data Bar Color)
 16 | ## [ ] 18.3.1.16 colorScale (Color Scale)
 17 | ## [x] 18.3.1.17 cols (Column Information) -- xlsx_ct_cols
 18 | ## [ ] 18.3.1.18 conditionalFormatting (Conditional Formatting)
 19 | ## [ ] 18.3.1.19 control (Embedded Control)
 20 | ## [ ] 18.3.1.20 controlPr (Embedded Control Properties)
 21 | ## [ ] 18.3.1.21 controls (Embedded Controls)
 22 | ## [ ] 18.3.1.22 customPr (Custom Property)
 23 | ## [ ] 18.3.1.23 customProperties (Custom Properties)
 24 | ## [ ] 18.3.1.24 customSheetView (Custom Chart Sheet View)
 25 | ## [ ] 18.3.1.25 customSheetView (Custom Sheet View)
 26 | ## [ ] 18.3.1.26 customSheetViews (Custom Chart Sheet Views)
 27 | ## [ ] 18.3.1.27 customSheetViews (Custom Sheet Views)
 28 | ## [ ] 18.3.1.28 dataBar (Data Bar)
 29 | ## [ ] 18.3.1.29 dataConsolidate (Data Consolidate)
 30 | ## [ ] 18.3.1.30 dataRef (Data Consolidation Reference)
 31 | ## [ ] 18.3.1.31 dataRefs (Data Consolidation References)
 32 | ## [ ] 18.3.1.32 dataValidation (Data Validation)
 33 | ## [ ] 18.3.1.33 dataValidations (Data Validations)
 34 | ## [ ] 18.3.1.34 dialogsheet (Dialog Sheet)
 35 | ## [ ] 18.3.1.35 dimension (Worksheet Dimensions)
 36 | ## [ ] 18.3.1.36 drawing (Drawing)
 37 | ## [ ] 18.3.1.37 drawingHF (Drawing Reference in Header Footer)
 38 | ## [ ] 18.3.1.38 evenFooter (Even Page Footer)
 39 | ## [ ] 18.3.1.39 evenHeader (Even Page Header)
 40 | ## [ ] 18.3.1.40 f (Formula)
 41 | ## [ ] 18.3.1.41 firstFooter (First Page Footer)
 42 | ## [ ] 18.3.1.42 firstHeader (First Page Header)
 43 | ## [ ] 18.3.1.43 formula (Formula)
 44 | ## [ ] 18.3.1.44 formula1 (Formula 1)
 45 | ## [ ] 18.3.1.45 formula2 (Formula 2)
 46 | ## [ ] 18.3.1.46 headerFooter (Header Footer Settings)
 47 | ## [ ] 18.3.1.47 hyperlink (Hyperlink)
 48 | ## [ ] 18.3.1.48 hyperlinks (Hyperlinks)
 49 | ## [ ] 18.3.1.49 iconSet (Icon Set)
 50 | ## [ ] 18.3.1.50 ignoredError (Ignored Error)
 51 | ## [ ] 18.3.1.51 ignoredErrors (Ignored Errors)
 52 | ## [ ] 18.3.1.52 inputCells (Input Cells)
 53 | ## [ ] 18.3.1.53 is (Rich Text Inline)
 54 | ## [x] 18.3.1.54 mergeCell (Merged Cell) -- xlsx_ct_merge_cell
 55 | ## [x] 18.3.1.55 mergeCells (Merge Cells) -- xlsx_read_merged
 56 | ## [ ] 18.3.1.56 objectPr (Embedded Object Properties)
 57 | ## [ ] 18.3.1.57 oddFooter (Odd Page Footer)
 58 | ## [ ] 18.3.1.58 oddHeader (Odd Header)
 59 | ## [ ] 18.3.1.59 oleObject (Embedded Object)
 60 | ## [ ] 18.3.1.60 oleObjects (Embedded Objects)
 61 | ## [ ] 18.3.1.61 outlinePr (Outline Properties)
 62 | ## [ ] 18.3.1.62 pageMargins (Page Margins)
 63 | ## [ ] 18.3.1.63 pageSetup (Page Setup Settings)
 64 | ## [ ] 18.3.1.64 pageSetup (Chart Sheet Page Setup)
 65 | ## [ ] 18.3.1.65 pageSetUpPr (Page Setup Properties)
 66 | ## [x] 18.3.1.66 pane (View Pane) -- xlsx_ct_pane
 67 | ## [ ] 18.3.1.67 picture (Background Image)
 68 | ## [ ] 18.3.1.68 pivotArea (Pivot Area)
 69 | ## [ ] 18.3.1.69 pivotSelection (PivotTable Selection)
 70 | ## [ ] 18.3.1.70 printOptions (Print Options)
 71 | ## [ ] 18.3.1.71 protectedRange (Protected Range)
 72 | ## [ ] 18.3.1.72 protectedRanges (Protected Ranges)
 73 | ## [ ] 18.3.1.73 row (Row)
 74 | ## [ ] 18.3.1.74 rowBreaks (Horizontal Page Breaks (Row))
 75 | ## [ ] 18.3.1.75 scenario (Scenario)
 76 | ## [ ] 18.3.1.76 scenarios (Scenarios)
 77 | ## [ ] 18.3.1.77 securityDescriptor (Security Descriptor)
 78 | ## [ ] 18.3.1.78 selection (Selection)
 79 | ## [ ] 18.3.1.79 sheetCalcPr (Sheet Calculation Properties)
 80 | ## [x] 18.3.1.80 sheetData (Sheet Data) -- xlsx_read_sheet_data
 81 | ## [ ] 18.3.1.81 sheetFormatPr (Sheet Format Properties)
 82 | ## [ ] 18.3.1.82 sheetPr (Sheet Properties)
 83 | ## [ ] 18.3.1.83 sheetPr (Chart Sheet Properties)
 84 | ## [ ] 18.3.1.84 sheetProtection (Chart Sheet Protection)
 85 | ## [ ] 18.3.1.85 sheetProtection (Sheet Protection Options)
 86 | ## [-] 18.3.1.86 sheetView (Chart Sheet View)
 87 | ## [x] 18.3.1.87 sheetView (Worksheet View) -- xlsx_ct_worksheet_view
 88 | ## [x] 18.3.1.88 sheetViews (Sheet Views) -- xlsx_ct_worksheet_views
 89 | ## [-] 18.3.1.89 sheetViews (Chart Sheet Views)
 90 | ## [ ] 18.3.1.90 smartTags (Smart Tags)
 91 | ## [ ] 18.3.1.91 sortCondition (Sort Condition)
 92 | ## [ ] 18.3.1.92 sortState (Sort State)
 93 | ## [ ] 18.3.1.93 tabColor (Sheet Tab Color)
 94 | ## [ ] 18.3.1.94 tablePart (Table Part)
 95 | ## [ ] 18.3.1.95 tableParts (Table Parts)
 96 | ## [ ] 18.3.1.96 v (Cell Value)
 97 | ## [ ] 18.3.1.97 webPublishItem (Web Publishing Item)
 98 | ## [ ] 18.3.1.98 webPublishItems (Web Publishing Items)
 99 | ## [ ] 18.3.1.99 worksheet (Worksheet)
100 | 
101 | ## 18.3.1.17 cols
102 | xlsx_ct_cols <- function(xml, ns) {
103 |   classes <- c(best_fit="logical", collapsed="logical",
104 |                custom_width="logical", hidden="logical",
105 |                min="integer", max="integer", outline_level="integer",
106 |                style="integer", width="numeric")
107 |   process_container(xml, xlsx_name("cols", ns), ns, xlsx_ct_col,
108 |                     classes=classes)
109 | }
110 | 
111 | ## NOTE: width is a funny measurement that is a hybrid of pixels and
112 | ## character width:
113 | ##
114 | ## > Column width measured as the number of characters of the maximum
115 | ## > digit width of the numbers 0, 1, 2, ..., 9 as rendered in the
116 | ## > normal style's font. There are 4 pixels of margin padding (two on
117 | ## > each side), plus 1 pixel padding for the gridlines.
118 | ##
119 | ## But the actual calculation of how much space there is will vary
120 | ## with things like the border and padding thickness, boldness, etc.
121 | xlsx_ct_col <- function(xml, ns) {
122 |   at <- as.list(xml2::xml_attrs(xml))
123 |   tibble::tibble(
124 |     best_fit = attr_bool(at$bestFit, FALSE),
125 |     collapsed = attr_bool(at$collapsed, FALSE),
126 |     custom_width = attr_bool(at$customWidth, FALSE),
127 |     hidden = attr_bool(at$hidden, FALSE),
128 |     min = attr_integer(at$min),
129 |     max = attr_integer(at$max),
130 |     outline_level = attr_integer(at$outlineLevel),
131 |     ## phonetic = attr_bool(at$phonetic),
132 |     style = attr_integer(at$style), # for new cols only
133 |     width = attr_numeric(at$width))
134 | }
135 | 
136 | ## 18.3.1.55 mergeCells (Merge Cells)
137 | xlsx_read_merged <- function(xml, ns) {
138 |   merged <- xml2::xml_children(
139 |     xml2::xml_find_first(xml, xlsx_name("mergeCells", ns), ns))
140 |   lapply(merged, xlsx_ct_merge_cell)
141 | }
142 | 
143 | ## 18.3.1.54 mergeCell (Merged Cell)
144 | xlsx_ct_merge_cell <- function(x) {
145 |   cellranger::as.cell_limits(xml2::xml_attr(x, "ref"))
146 | }
147 | 
148 | ## 18.3.1.80 sheetData (Sheet Data)
149 | xlsx_read_sheet_data <- function(xml, ns, strings) {
150 |   rows <- xml2::xml_children(
151 |     xml2::xml_find_first(xml, xlsx_name("sheetData", ns), ns))
152 |   dat <- lapply(rows, xlsx_ct_row, ns, strings)
153 | 
154 |   cells <- rbind_df(unlist(lapply(dat, "[[", "cells"), FALSE),
155 |                     c(ref="character", style="integer", type="character",
156 |                       formula="character", value="list"))
157 |   rows <- rbind_df(lapply(dat, "[[", "row"),
158 |                    vcapply(xlsx_ct_row(NULL)$row, storage.mode))
159 | 
160 |   list(rows=rows, cells=cells)
161 | }
162 | 
163 | ## 18.3.1.73 row (Row)
164 | xlsx_ct_row <- function(xml, ns, strings) {
165 |   ## NOTE: the r attribute is optional and the <cell> 'r' attribute is
166 |   ## optional so it seems possible that we could end up unable to
167 |   ## determine where rows and cells are?  Unless the the fact that
168 |   ## rows are an xsd:sequence comes in to help?
169 |   at <- xml_attrs_list(xml)
170 | 
171 |   row <- list(
172 |     r = attr_integer(at$r),
173 |     spans = attr_character(at$spans),
174 |     s = attr_integer(at[["s"]]), # style
175 |     custom_format = attr_bool(at$customFormat, FALSE),
176 |     ht = attr_numeric(at$ht), # height -- vs <col> that uses "width" :-/
177 |     hidden = attr_bool(at$hidden, FALSE),
178 |     custom_height = attr_bool(at$customHeight),
179 |     outline_level = attr_integer(at$outlineLevel),
180 |     collapsed = attr_bool(at$collapsed, FALSE),
181 |     ## ph = attr_bool(at$ph, FALSE))
182 |     thick_top = attr_bool(at$thickTop, FALSE),
183 |     thick_bot = attr_bool(at$thickBot, FALSE))
184 | 
185 |   if (is.null(xml)) {
186 |     cells <- list()
187 |   } else {
188 |     cells <- lapply(xml2::xml_find_all(xml, xlsx_name("c", ns), ns),
189 |                     xlsx_ct_cell, ns, strings)
190 |   }
191 | 
192 |   list(row=row, cells=cells)
193 | }
194 | 
195 | ## 18.3.1.4  c (Cell)
196 | ##
197 | ## > While a cell can have a formula element f and a value element v,
198 | ## > when the cell's type t is inlineStr then only the element is is
199 | ## > allowed as a child element.
200 | ##
201 | ## Ignoring attributes:
202 | ## * cm metadata
203 | ## * vm metadat
204 | ## * ph phonetic information
205 | ##
206 | ## Cell types are enumerated in 18.18.11:
207 | ##
208 | ##   b: Boolean
209 | ##   d: Date (in the ISO 8601 format)
210 | ##   e: Error
211 | ##   inlineStr: Inline String (i.e., one not in the shared string
212 | ##              table. If this cell type is used, then the cell value
213 | ##              is in the is element rather than the v element in the
214 | ##              cell (c element).)
215 | ##   n: Number
216 | ##   s: Shared String
217 | ##   str: String (a formula string)
218 | xlsx_ct_cell <- function(xml, ns, strings) {
219 |   at <- as.list(xml2::xml_attrs(xml))
220 | 
221 |   type <- attr_character(at$t)
222 |   if (identical(type, "inlineStr")) { # avoid missingness
223 |     formula <- NA_character_
224 |     ## value here _must_ be present, so no conditional (vs below)
225 |     value <- xlsx_ct_rst(xml2::xml_find_first(xml, xlsx_name("is", ns), ns), ns)
226 |   } else {
227 |     formula <- xml2::xml_text(xml2::xml_find_first(xml, xlsx_name("f", ns), ns))
228 |     v <- xml2::xml_find_first(xml, xlsx_name("v", ns), ns)
229 |     value <- if (inherits(v, "xml_missing")) NULL else xml2::xml_text(v)
230 | 
231 |     ## String substitutions from the string table:
232 |     if (identical(type, "s")) {
233 |       value <- strings[as.integer(unlist(value)) + 1L]
234 |     } else if (identical(type, "b")) {
235 |       ## TODO: This is probably worth doing?
236 |       ## value <- as.logical(as.integer(value))
237 |       value <- as.numeric(value)
238 |     } else if (!is.null(value) &&
239 |                (is.na(type) || identical(type, "n") || identical(type, "d"))) {
240 |       ## TODO: Consider being more careful with the 'd' class here?
241 |       value <- as.numeric(value)
242 |     }
243 |   }
244 | 
245 |   list(
246 |     ref = attr_character(at$r),
247 |     style = attr_integer(at$s) + 1L,
248 |     type = type,
249 |     formula = formula,
250 |     value = value)
251 | }
252 | 
253 | ## 18.3.1.88 sheetViews
254 | xlsx_ct_worksheet_views <- function(xml, ns) {
255 |   xpath <- sprintf("./%s/%s",
256 |                    xlsx_name("sheetViews", ns), xlsx_name("sheetView", ns))
257 |   els <- xml2::xml_find_all(xml, xpath, ns)
258 |   if (length(els) == 0L) {
259 |     NULL
260 |   } else if (length(els) == 1L) {
261 |     xlsx_ct_worksheet_view(els[[1L]], ns)
262 |   } else {
263 |     tmp <- lapply(els, xlsx_ct_worksheet_view, ns)
264 |     empty <- vlapply(tmp, is.null)
265 |     if (all(empty)) {
266 |       NULL
267 |     } else {
268 |       ## TODO: check the Enron corpus;
269 |       ##    larry_campbell__21047__EOTT tanks tx NM.xlsx
270 |       ## for a file where this is the case.  It needs opening in Excel
271 |       ## to see what this resolves as.  Given we largely ignore these,
272 |       ## we'll take the first for now.
273 |       tmp[[which(!empty)[[1L]]]]
274 |     }
275 |   }
276 | }
277 | 
278 | ## 18.3.1.87 sheetView
279 | xlsx_ct_worksheet_view <- function(xml, ns) {
280 |   pane <- xml2::xml_find_first(xml, xlsx_name("pane", ns), ns)
281 |   if (inherits(pane, "xml_missing")) NULL else xlsx_ct_pane(pane)
282 | }
283 | 
284 | ## 18.3.1.66 pane (View Pane)
285 | xlsx_ct_pane <- function(xml, ns) {
286 |   at <- xml_attrs_list(xml)
287 |   ## x_split: Horizontal position of the split, in 1/20th of a point;
288 |   ##    0 if none.  If the pane is frozen, this value indicates the
289 |   ##    number of columns visible in the top pane.
290 |   ## y_split: Vertical position of the split, in 1/20th of a point;
291 |   ##    0 if none.  If the pane is frozen, this value indicates the
292 |   ##    number of columns visible in the left pane.
293 |   ## state: one of frozen / frozenSplit / split
294 |   state <- attr_character(at$state)
295 |   list(x_split = attr_numeric(at$xSplit, 0),
296 |        y_split = attr_numeric(at$ySplit, 0),
297 |        top_left = attr_character(at$topLeftCell),
298 |        state = state,
299 |        frozen = state != "split")
300 | }
301 | 


--------------------------------------------------------------------------------
/R/readxl.R:
--------------------------------------------------------------------------------
 1 | ##' Read an Excel spreadsheet the same way as readxl, but slower.
 2 | ##' Assumes a well behaved table of data.
 3 | ##' @title Read an Excel spreadsheet like readxl
 4 | ##' @param path Path to the xlsx file
 5 | ##' @param sheet Sheet name or an integer
 6 | ##' @param col_names TRUE (the default) indicating we should use the
 7 | ##'   first row as column names, FALSE, indicating we should generate
 8 | ##'   names (X1, X2, ..., Xn) or a character vector of names to apply.
 9 | ##' @param col_types Either NULL (the default) indicating we should
10 | ##'   guess the column types or a vector of column types (values must
11 | ##'   be "blank", "numeric", "date" or "text").
12 | ##' @param na Values indicating missing values (if different from
13 | ##'   blank).  Not yet used.
14 | ##' @param skip Number of rows to skip.
15 | ##' @export
16 | rexcel_readxl <- function(path, sheet=1L, col_names=TRUE,
17 |                           col_types=NULL, na="", skip=0) {
18 |   dat <- rexcel_read(path, sheet)
19 |   linen::worksheet_to_table(dat, col_names, col_types, na, skip)
20 | }
21 | 


--------------------------------------------------------------------------------
/R/rexcel-package.r:
--------------------------------------------------------------------------------
1 | #' rexcel.
2 | #'
3 | #' @name rexcel
4 | #' @docType package
5 | NULL
6 | 


--------------------------------------------------------------------------------
/R/shared_strings.R:
--------------------------------------------------------------------------------
 1 | ## Shared string table:
 2 | ##
 3 | ##   [-] 18.4.1  charset (Character Set) (ignored for now)
 4 | ##   [x] 18.4.2  outline (Outline) -- xlsx_ct_boolean_property
 5 | ##   [-] 18.4.3  phoneticPr (Phonetic Properties)
 6 | ##   [x] 18.4.4  r (Rich Text Run) -- xlsx_ct_rst
 7 | ##   [-] 18.4.5  rFont (Font)
 8 | ##   [-] 18.4.6  rPh (Phonetic Run)
 9 | ##   [-] 18.4.7  rPr (Run Properties)
10 | ##   [x] 18.4.8  si (String Item) -- xlsx_ct_rst
11 | ##   [x] 18.4.9  sst (Shared String Table) -- xlsx_read_shared_strings
12 | ##   [x] 18.4.10 strike (Strike Through) -- xlsx_ct_boolean_property
13 | ##   [x] 18.4.11 sz (Font Size) -- xlsx_ct_font_size
14 | ##   [x] 18.4.12 t (Text) -- in xlsx_ct_rst
15 | ##   [x] 18.4.13 u (Underline) -- xlsx_ct_underline_property
16 | ##   [-] 18.4.14 vertAlign (Vertical Alignment) (ignored for now)
17 | 
18 | ## If the format is <si>/<t> then we can just take the text values.
19 | ## Otherwise we'll have to parse out the RTF strings separately.
20 | xlsx_read_shared_strings <- function(path) {
21 |   xml <- xlsx_read_file_if_exists(path, "xl/sharedStrings.xml")
22 |   if (is.null(xml)) {
23 |     return(character(0))
24 |   }
25 |   vcapply(xml2::xml_children(xml), xlsx_ct_rst, xml2::xml_ns(xml))
26 | }
27 | 
28 | ## 18.4.8 si
29 | ##
30 | ## This is the core function that reads a string item (si).  The spec
31 | ## is a bit vague on this, but it seems most likely that the element
32 | ## can contain either a 't' or a bunch of 'r' elements, but not both.
33 | ##
34 | ## NOTE: Ignoring rPh and phoneticPr which might be part of this
35 | ## element.  Terribly anglocentric :(
36 | xlsx_ct_rst <- function(x, ns) {
37 |   t <- xml2::xml_find_first(x, xlsx_name("t", ns), ns)
38 |   r <- xml2::xml_find_all(x, xlsx_name("r", ns), ns)
39 |   if (length(r) == 0L) {
40 |     ## Treat as plain text.
41 |     ## 18.4.12 t -- ST_Xstring
42 |     ##
43 |     ## The only complication here is that we *might* contain the flag:
44 |     ## xml:space which is a W3C defined thing indicating if whitespace
45 |     ## is relevant.
46 |     str <- xml2::xml_text(t)
47 |   } else {
48 |     ## NOTE: we totally ignore sub-string formatting.
49 |     str <- if (inherits(t, "xml_missing")) "" else xml2::xml_text(t)
50 |     if (length(r) > 0L) {
51 |       str <- paste(c(str, xml2::xml_text(
52 |                             xml2::xml_find_all(r, xlsx_name("t", ns), ns))),
53 |                    collapse="")
54 |     }
55 |   }
56 | 
57 |   ## NOTE: I am still getting slightly different line endings to
58 |   ## readxl because I need to convert \n -> \r\n to match
59 |   ##
60 |   ## Unescape the strings (ST_Xstring) See 22.9.2.19 [p3786]
61 |   re <- "_x([[:xdigit:]]{4})_"
62 |   i <- regexpr(re, str, perl = TRUE)
63 |   len <- nchar(str)
64 |   while (i > 0) {
65 |     repl <- intToUtf8(as.integer(paste0("0x", substr(str, i + 2, i + 5))))
66 |     str <- sub(re, repl, str)
67 |     ## This bit of faffery stops an escaped '_' character being
68 |     ## counted as an unescaped '_' character, and is tested.  This
69 |     ## would be heaps easier in languages with char-by-char string
70 |     ## handling
71 |     j <- regexpr(re, substr(str, i + 1, len))
72 |     i <- if (j > 0) i + j else j
73 |   }
74 | 
75 |   str
76 | }
77 | 
78 | ## 18.4.11 sz (Font Size)
79 | xlsx_ct_font_size <- function(sz) {
80 |   as.numeric(xml2::xml_attr(sz, "val"))
81 | }
82 | 
83 | ## 18.4.13 u (Underline)
84 | xlsx_ct_underline_property <- function(u, missing="none") {
85 |   if (inherits(u, "xml_missing")) {
86 |     missing
87 |   } else {
88 |     val <- xml2::xml_attr(u, "val")
89 |     if (is.na(val)) "single" else val
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | vlapply <- function(X, FUN, ...) {
  2 |   vapply(X, FUN, logical(1), ...)
  3 | }
  4 | viapply <- function(X, FUN, ...) {
  5 |   vapply(X, FUN, integer(1), ...)
  6 | }
  7 | vnapply <- function(X, FUN, ...) {
  8 |   vapply(X, FUN, numeric(1), ...)
  9 | }
 10 | vcapply <- function(X, FUN, ...) {
 11 |   vapply(X, FUN, character(1), ...)
 12 | }
 13 | 
 14 | attr_bool <- function(x, missing=NA) {
 15 |   if (is.null(x)) missing else as.logical(as.integer(x))
 16 | }
 17 | 
 18 | attr_integer <- function(x, missing=NA_integer_) {
 19 |   if (is.null(x)) missing else as.integer(x)
 20 | }
 21 | 
 22 | attr_numeric <- function(x, missing=NA_real_) {
 23 |   if (is.null(x)) missing else as.numeric(x)
 24 | }
 25 | 
 26 | attr_character <- function(x, missing=NA_character_) {
 27 |   if (is.null(x)) missing else x
 28 | }
 29 | 
 30 | `%||%` <- function(a, b) {
 31 |   if (is.null(a)) b else a
 32 | }
 33 | 
 34 | process_container <- function(xml, xpath, ns, fun, ..., classes=NULL) {
 35 |   els <- xml2::xml_children(xml2::xml_find_first(xml, xpath, ns))
 36 |   if (isTRUE(classes)) {
 37 |     if (length(els) == 0L) {
 38 |       classes <- vcapply(fun(NULL, ns, ...), storage.mode)
 39 |     } else {
 40 |       classes <- NULL
 41 |     }
 42 |   }
 43 |   rbind_df(lapply(els, fun, ns, ...), classes)
 44 | }
 45 | 
 46 | ## The function below is a faster version of
 47 | ##
 48 | ##   tibble::as_tibble(do.call("rbind", x, quote=TRUE))
 49 | ##
 50 | ## But it avoids constructing a very hard to validate, slow to run
 51 | ## function (on the order of a second), but it's not terrible nice to
 52 | ## look at or understand.
 53 | rbind_df <- function(x, classes=NULL) {
 54 |   if (length(x) == 0L) {
 55 |     return(empty_tibble(classes))
 56 |   }
 57 |   nms <- names(x[[1L]])
 58 |   xx <- unlist(x, FALSE)
 59 |   dim(xx) <- c(length(nms), length(x))
 60 |   if (is.null(classes)) {
 61 |     preserve <- logical(length(nms))
 62 |   } else {
 63 |     preserve <- classes == "list"
 64 |   }
 65 |   ul <- function(i, x) {
 66 |     if (preserve[[i]]) x else unlist(x)
 67 |   }
 68 |   tmp <- stats::setNames(lapply(seq_along(nms), function(i) ul(i, xx[i, ])), nms)
 69 |   tibble::as_tibble(tmp)
 70 | }
 71 | 
 72 | empty_tibble <- function(classes = NULL) {
 73 |   if (is.null(classes)) {
 74 |     ## NOTE: Once things settle down, this can be dropped.
 75 |     stop("deal with me")
 76 |     tibble::tibble()
 77 |   } else {
 78 |     tibble::as_tibble(lapply(classes, vector))
 79 |   }
 80 | }
 81 | 
 82 | progress <- function(fmt, total, ..., show=TRUE) {
 83 |   if (show && total > 0L) {
 84 |     pb <- progress::progress_bar$new(fmt, total=total)
 85 |     function(len=1) {
 86 |       invisible(pb$tick(len))
 87 |     }
 88 |   } else {
 89 |     function(...) {}
 90 |   }
 91 | }
 92 | 
 93 | path_join <- function(a, b) {
 94 |   na <- length(a)
 95 |   nb <- length(b)
 96 |   if (na == 1L && nb != 1L) {
 97 |     a <- rep_len(a, nb)
 98 |   } else if (nb == 1L && na != 1L) {
 99 |     b <- rep_len(b, na)
100 |   } else if (na != nb && na != 1L && nb != 1L) {
101 |     stop("Can't recycle vectors together")
102 |   }
103 | 
104 |   i <- regexpr("(\\.\\./)+", b)
105 |   len <- attr(i, "match.length", exact=TRUE)
106 |   j <- len > 0L
107 |   if (any(j)) {
108 |     b[j] <- substr(b[j], len[j] + 1L, nchar(b[j]))
109 |     len[j] <- len[j] / 3
110 | 
111 |     tmp <- strsplit(a[j], "/", fixed=TRUE)
112 |     for (k in seq_along(tmp)) {
113 |       ii <- length(tmp[[k]]) - len[j][[k]]
114 |       if (ii < 0L) {
115 |         ii <- 0L
116 |         ## TODO: this turns up once in the Enron corpus
117 |         ##   warning("Cannot resolve internal reference; above workbook?")
118 |       }
119 |       tmp[[k]] <- paste(tmp[[k]][seq_len(ii)], collapse="/")
120 |     }
121 |     a[j] <- unlist(tmp)
122 |   }
123 |   paste(a, b, sep="/")
124 | }
125 | 
126 | ## TODO: replace as.list(xml2::xml_attrs(...)) with this where NULL
127 | ## values are OK.
128 | xml_attrs_list <- function(x) {
129 |   if (is.null(x)) {
130 |     structure(list(), names=character())
131 |   } else {
132 |     as.list(xml2::xml_attrs(x))
133 |   }
134 | }
135 | 
136 | as_na <- function(x) {
137 |   ret <- NA
138 |   storage.mode(ret) <- storage.mode(x)
139 |   ret
140 | }
141 | 
142 | is_xlsx <- function(path) {
143 |   if (!file.exists(path)) {
144 |     stop("\n", path, "\ndoes not exist")
145 |   }
146 |   ## TO DO: verify it's a zip archive? only way I know is unix `file` command
147 |   ## http://officeopenxml.com/anatomyofOOXML-xlsx.php
148 |   ## https://msdn.microsoft.com/en-us/library/office/gg278316.aspx#MinWBScenario
149 |   files <- xlsx_list_files(path)
150 |   has_content_types <- "[Content_Types].xml" %in% files$name
151 |   has_rels <- "_rels/.rels" %in% files$name
152 |   has_workbook_xml <- "xl/workbook.xml" %in% files$name
153 |   has_sheet <- any(grepl("xl/worksheets/sheet[0-9]*.xml", files$name))
154 |   has_content_types && has_rels && has_workbook_xml && has_sheet
155 | }
156 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # rexcel
 2 | [![Build Status](https://travis-ci.org/rsheets/rexcel.svg?branch=master)](https://travis-ci.org/rsheets/rexcel)
 3 | 
 4 | **Warning: This project is an experiment; do not use for anything other than amusement/frustration purposes.**
 5 | 
 6 | ## Design
 7 | 
 8 | This package implements a **very slow**, but thorough, Excel (xlsx) reader.  If you have a rectangular region of cells to read you will be better off with another Excel reading package such as: [`readxl`](http://cran.r-project.org/package=readxl),
 9 | [`openxlsx`](http://cran.r-project.org/package=openxlsx),
10 | [`XLConnect`](http://cran.r-project.org/package=XLConnect),
11 | [`xlsx`](http://cran.r-project.org/package=xlsx),
12 | [`gdata`](http://cran.r-project.org/package=gdata), [`RODBC`](http://cran.r-project.org/package=RODBC), or possibly even
13 | [`excel.link`](http://cran.r-project.org/package=excel.link), [`WriteXLS`](http://cran.r-project.org/package=WriteXLS), [`table1xlsx`](http://cran.r-project.org/package=table1xlsx), [`tablaxlsx`](http://cran.r-project.org/package=tablaxlsx) (not clear how current these last 4 are). Mango Solutions has a nice review article, [R: the Excel Connection](http://www.mango-solutions.com/wp/2015/05/r-the-excel-connection/), in which they compare several of the above packages, with a special emphasis on those that can both read and write Excel files (XLConnect, xlsx, openxlsx, excel.link).
14 | 
15 | Compared with the above packages, `rexcel` tries to read all the data from an Excel sheet using [`linen`](https://github.com/rsheets/linen) as an intermediate representation in R. The eventual goal is to provide a common receptacle for detailed spreadsheet information from both Excel and Google Sheets.  Rather than trying to create a single data.frame in one shot, it allows access to data, formulae and formatting information.  Excel type information is preserved, especially for heterogeneous columns.  It has no non-R dependencies (e.g. on Perl or Java) and should run on any platform regardless of whether Excel is installed.
16 | 
17 | ## Installation
18 | 
19 | Requires the development version of `xml2`, as the newer version changes the behaviour of matching functions in fairly large ways.  For terminal printing we use the most recent copy of `crayon` which includes more accurate rendering.  And we use `linen` as the R spreadsheet representation.
20 | 
21 | ```r
22 | devtools::install_github("hadley/xml2")
23 | devtools::install_github("gaborcsardi/crayon")
24 | devtools::install_github("rsheets/linen")
25 | devtools::install_github("rsheets/rexcel")
26 | ```
27 | 
28 | ## Formatting preserved
29 | 
30 | * [x] Cell fill colour
31 | * [x] Cell patterns
32 | * [ ] Cell _gradients_
33 | * [x] Text colour
34 | * [x] Text bold, italic, underline, strikethrough, outline, shadow, condense, extend
35 | * [x] Text font
36 | * [x] Text size
37 | * [x] Text alignment (horizontal, vertical)
38 | * [x] Column/row visibility
39 | * [x] Column/row width/height
40 | * [ ] Styles applied at the column level (though the spec seems vague about if that's a real thing - compare p. 1600 and 1596)
41 | * [ ] Conditional formatting (the rule and the outcome)
42 | * [x] Borders (position and colour)
43 | * [ ] Numeric/date formatting, possibly also formatted text?
44 | * [ ] Table styles (e.g. for pivot tables)
45 | 
46 | Of particular concern:
47 | 
48 | > A cell can have both direct formatting (e.g., bold) and a cell style (e.g., Explanatory) applied to it. Therefore, both the cell style xf records and cell xf records shall be read to understand the full set of formatting applied to a cell.
49 | 
50 | (18.8.10, p.1754)
51 | 
52 | Also
53 | 
54 | > When the color palette is modified, the indexedColors collection is written. When a custom color has been selected, the mruColors collection is written.
55 | 
56 | So we should find a sheet that includes this and see what this looks like empirically.
57 | 
58 | ## Other worthwhile things to get
59 | 
60 | * [x] named ranges
61 | * [x] comments (author, ref, rich text, visibility)
62 | * [ ] graphs
63 | * [ ] other drawings
64 | * [ ] pivot tables
65 | * [ ] frozen rows / split panes
66 | * [ ] calculation chain
67 | * [ ] header/footer
68 | 
69 | ## The Excel XML Spec
70 | 
71 | Page and section numbers refer to the "ECMA Office Open XML Part 1 - Fundamentals and Markup Language Reference" document; a massive and fairly unweidly 5026 PDF!  The spreadsheet material is mostly in section 18 (pages 1518 - 2508).
72 | 
73 | ## Writing Excel files
74 | 
75 | Writing workbooks is not currently supported.  Before implementing it, we want to see how much information we can preserve while reading.  Once we know that, we can start working towards seeing how much can be written and how lossy a read/write cycle will be (it is likely to be *very* lossy as there is an enormous number of things that might be stored in a worksheet.
76 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | # Adapt as necessary starting from here
14 | 
15 | build_script:
16 |   - travis-tool.sh install_deps
17 | 
18 | test_script:
19 |   - travis-tool.sh run_tests
20 | 
21 | on_failure:
22 |   - travis-tool.sh dump_logs
23 | 
24 | artifacts:
25 |   - path: '*.Rcheck\**\*.log'
26 |     name: Logs
27 | 
28 |   - path: '*.Rcheck\**\*.out'
29 |     name: Logs
30 | 
31 |   - path: '*.Rcheck\**\*.fail'
32 |     name: Logs
33 | 
34 |   - path: '*.Rcheck\**\*.Rout'
35 |     name: Logs
36 | 
37 |   - path: '\*_*.tar.gz'
38 |     name: Bits
39 | 
40 |   - path: '\*_*.zip'
41 |     name: Bits
42 | 


--------------------------------------------------------------------------------
/inst/sheets/Ekaterinburg_IP_9-RESAVED.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/Ekaterinburg_IP_9-RESAVED.xlsx


--------------------------------------------------------------------------------
/inst/sheets/Ekaterinburg_IP_9.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/Ekaterinburg_IP_9.xlsx


--------------------------------------------------------------------------------
/inst/sheets/README.md:
--------------------------------------------------------------------------------
 1 | Notes on where these sheets came from.
 2 | 
 3 | exp.xlsx: GitHub <https://github.com/hadley/readxl/issues/163>. OP [wrathematics](https://github.com/wrathematics) says data "was generated by and exported from JMP". Commenter [nacnudus](https://github.com/nacnudus) notes that "The cells don't have the `r` attribute that gives the cell address."
 4 | 
 5 | Ekaterinburg_IP_9.xlsx: GitHub <https://github.com/hadley/readxl/issues/80>. OP said:
 6 | 
 7 | > readxl version 0.1.0
 8 | > There is a problem with reading some xlsx files (not the ones generated by Google Docs - this is some other xlsx generating BI) please try the following:
 9 | 
10 |         download.file("https://raw.githubusercontent.com/e-kotov/tmp/master/Ekaterinburg_IP_9.xlsx",
11 |                       method = "curl", dest = "Ekaterinburg_IP_9.xlsx")
12 |         x <- read_excel("Ekaterinburg_IP_9.xlsx", skip = 5)
13 | 
14 | > It returns " Error: Couldn't find 'xl/worksheets/sheet1.xml'". As soon as the file in question is opened in Excel and re-saved, it is loaded with no problem.
15 | 
16 | Jenny used the above to download the original file (included here w/ OP's permission), then opened in Excel and did Save As .... to create
17 | `inst/sheets/Ekaterinburg_IP_9-RESAVED.xlsx` in order to compare the underlying XML.
18 | 


--------------------------------------------------------------------------------
/inst/sheets/defined-names.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/defined-names.xlsx


--------------------------------------------------------------------------------
/inst/sheets/exp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/exp.xlsx


--------------------------------------------------------------------------------
/inst/sheets/gabe.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/gabe.xlsx


--------------------------------------------------------------------------------
/inst/sheets/gs-test-formula-formatting.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/gs-test-formula-formatting.xlsx


--------------------------------------------------------------------------------
/inst/sheets/mini-gap.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/mini-gap.xlsx


--------------------------------------------------------------------------------
/inst/sheets/only_numbers.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/inst/sheets/only_numbers.xlsx


--------------------------------------------------------------------------------
/internal/ekaterinburg.R:
--------------------------------------------------------------------------------
 1 | ## origin story for Ekaterinburg_IP_9.xlsx, found in inst/sheets
 2 | 
 3 | ## https://github.com/hadley/readxl/issues/80
 4 | ## OP says:
 5 | ## readxl version 0.1.0
 6 | ## There is a problem with reading some xlsx files (not the ones generated by
 7 | ## Google Docs - this is some other xlsx generating BI) please try the
 8 | ## following:
 9 | ## download.file("https://raw.githubusercontent.com/e-kotov/tmp/master/Ekaterinburg_IP_9.xlsx",
10 | ##                method = "curl", dest = "Ekaterinburg_IP_9.xlsx")
11 | ## x <- read_excel("Ekaterinburg_IP_9.xlsx", skip = 5)
12 | ## It returns " Error: Couldn't find 'xl/worksheets/sheet1.xml'".
13 | ##
14 | ## As soon as the file in question is opened in Excel and re-saved, it is loaded
15 | ## with no problem.
16 | 
17 | download.file("https://raw.githubusercontent.com/e-kotov/tmp/master/Ekaterinburg_IP_9.xlsx",
18 |               method = "curl", dest = "Ekaterinburg_IP_9.xlsx")
19 | ## then moved to inst/sheets
20 | 
21 | ## opened xlsx in Excel and did Save As .... to create
22 | ## inst/sheets/Ekaterinburg_IP_9-RESAVED.xlsx
23 | ## in order to compare the underlying XML
24 | 


--------------------------------------------------------------------------------
/internal/img/Kinsey-Male.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/Kinsey-Male.jpg


--------------------------------------------------------------------------------
/internal/img/Kinsey-Spreadsheet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/Kinsey-Spreadsheet.jpg


--------------------------------------------------------------------------------
/internal/img/doctorow_2016-May-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/doctorow_2016-May-11.jpg


--------------------------------------------------------------------------------
/internal/img/enron-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/enron-example.png


--------------------------------------------------------------------------------
/internal/img/excel-reactivity.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/excel-reactivity.gif


--------------------------------------------------------------------------------
/internal/img/excel-reactivity.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsheets/rexcel/e8dd5d30388b252c91d1cf4baa014a119280e57f/internal/img/excel-reactivity.mov


--------------------------------------------------------------------------------
/internal/mini-gap/[Content_Types].xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default ContentType="application/vnd.openxmlformats-package.relationships+xml" Extension="rels"/><Default ContentType="application/xml" Extension="xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.drawing+xml" PartName="/xl/drawings/worksheetdrawing4.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.drawing+xml" PartName="/xl/drawings/worksheetdrawing2.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.drawing+xml" PartName="/xl/drawings/worksheetdrawing1.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.drawing+xml" PartName="/xl/drawings/worksheetdrawing3.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.drawing+xml" PartName="/xl/drawings/worksheetdrawing5.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml" PartName="/xl/styles.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml" PartName="/xl/sharedStrings.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" PartName="/xl/workbook.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" PartName="/xl/worksheets/sheet5.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" PartName="/xl/worksheets/sheet3.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" PartName="/xl/worksheets/sheet1.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" PartName="/xl/worksheets/sheet4.xml"/><Override ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" PartName="/xl/worksheets/sheet2.xml"/></Types>


--------------------------------------------------------------------------------
/internal/mini-gap/_rels/.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/_rels/rels.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/_rels/workbook.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings" Target="sharedStrings.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet3.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet4.xml"/><Relationship Id="rId6" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet5.xml"/><Relationship Id="rId7" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet2.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/drawings/worksheetdrawing1.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><xdr:wsDr xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:dgm="http://schemas.openxmlformats.org/drawingml/2006/diagram"/>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/drawings/worksheetdrawing2.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><xdr:wsDr xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:dgm="http://schemas.openxmlformats.org/drawingml/2006/diagram"/>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/drawings/worksheetdrawing3.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><xdr:wsDr xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:dgm="http://schemas.openxmlformats.org/drawingml/2006/diagram"/>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/drawings/worksheetdrawing4.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><xdr:wsDr xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:dgm="http://schemas.openxmlformats.org/drawingml/2006/diagram"/>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/drawings/worksheetdrawing5.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><xdr:wsDr xmlns:xdr="http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:c="http://schemas.openxmlformats.org/drawingml/2006/chart" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:dgm="http://schemas.openxmlformats.org/drawingml/2006/diagram"/>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/sharedStrings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="80" uniqueCount="33"><si><t>country</t></si><si><t>continent</t></si><si><t>year</t></si><si><t>lifeExp</t></si><si><t>pop</t></si><si><t>gdpPercap</t></si><si><t>Algeria</t></si><si><t>Africa</t></si><si><t>Angola</t></si><si><t>Albania</t></si><si><t>Europe</t></si><si><t>Benin</t></si><si><t>Austria</t></si><si><t>Argentina</t></si><si><t>Americas</t></si><si><t>Belgium</t></si><si><t>Australia</t></si><si><t>Oceania</t></si><si><t>Bolivia</t></si><si><t>Bosnia and Herzegovina</t></si><si><t>New Zealand</t></si><si><t>Bulgaria</t></si><si><t>Brazil</t></si><si><t>Canada</t></si><si><t>Afghanistan</t></si><si><t>Asia</t></si><si><t>Bahrain</t></si><si><t>Chile</t></si><si><t>Bangladesh</t></si><si><t>Botswana</t></si><si><t>Cambodia</t></si><si><t>China</t></si><si><t>Burkina Faso</t></si></sst>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/styles.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><fonts count="2"><font><sz val="10.0"/><color rgb="FF000000"/><name val="Arial"/></font><font/></fonts><fills count="2"><fill><patternFill patternType="none"/></fill><fill><patternFill patternType="lightGray"/></fill></fills><borders count="1"><border><left/><right/><top/><bottom/></border></borders><cellStyleXfs count="1"><xf borderId="0" fillId="0" fontId="0" numFmtId="0" applyAlignment="1" applyFont="1"/></cellStyleXfs><cellXfs count="2"><xf borderId="0" fillId="0" fontId="0" numFmtId="0" xfId="0" applyAlignment="1" applyFont="1"><alignment/></xf><xf borderId="0" fillId="0" fontId="1" numFmtId="0" xfId="0" applyAlignment="1" applyFont="1"><alignment/></xf></cellXfs><cellStyles count="1"><cellStyle xfId="0" name="Normal" builtinId="0"/></cellStyles><dxfs count="0"/></styleSheet>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/workbook.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><workbookPr/><sheets><sheet state="visible" name="Africa" sheetId="1" r:id="rId3"/><sheet state="visible" name="Americas" sheetId="2" r:id="rId4"/><sheet state="visible" name="Asia" sheetId="3" r:id="rId5"/><sheet state="visible" name="Europe" sheetId="4" r:id="rId6"/><sheet state="visible" name="Oceania" sheetId="5" r:id="rId7"/></sheets><definedNames/><calcPr/></workbook>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/_rels/sheet1.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing" Target="../drawings/worksheetdrawing1.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/_rels/sheet2.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing" Target="../drawings/worksheetdrawing2.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/_rels/sheet3.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing" Target="../drawings/worksheetdrawing3.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/_rels/sheet4.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing" Target="../drawings/worksheetdrawing4.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/_rels/sheet5.xml.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing" Target="../drawings/worksheetdrawing5.xml"/></Relationships>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/sheet1.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><sheetViews><sheetView workbookViewId="0"/></sheetViews><sheetFormatPr customHeight="1" defaultColWidth="14.43" defaultRowHeight="15.75"/><sheetData><row r="1"><c r="A1" s="1" t="s"><v>0</v></c><c r="B1" s="1" t="s"><v>1</v></c><c r="C1" s="1" t="s"><v>2</v></c><c r="D1" s="1" t="s"><v>3</v></c><c r="E1" s="1" t="s"><v>4</v></c><c r="F1" s="1" t="s"><v>5</v></c></row><row r="2"><c r="A2" s="1" t="s"><v>9</v></c><c r="B2" s="1" t="s"><v>10</v></c><c r="C2" s="1"><v>1952.0</v></c><c r="D2" s="1"><v>55.23</v></c><c r="E2" s="1"><v>1282697.0</v></c><c r="F2" s="1"><v>1601.0561</v></c></row><row r="3"><c r="A3" s="1" t="s"><v>12</v></c><c r="B3" s="1" t="s"><v>10</v></c><c r="C3" s="1"><v>1952.0</v></c><c r="D3" s="1"><v>66.8</v></c><c r="E3" s="1"><v>6927772.0</v></c><c r="F3" s="1"><v>6137.0765</v></c></row><row r="4"><c r="A4" s="1" t="s"><v>15</v></c><c r="B4" s="1" t="s"><v>10</v></c><c r="C4" s="1"><v>1952.0</v></c><c r="D4" s="1"><v>68.0</v></c><c r="E4" s="1"><v>8730405.0</v></c><c r="F4" s="1"><v>8343.1051</v></c></row><row r="5"><c r="A5" s="1" t="s"><v>19</v></c><c r="B5" s="1" t="s"><v>10</v></c><c r="C5" s="1"><v>1952.0</v></c><c r="D5" s="1"><v>53.82</v></c><c r="E5" s="1"><v>2791000.0</v></c><c r="F5" s="1"><v>973.5332</v></c></row><row r="6"><c r="A6" s="1" t="s"><v>21</v></c><c r="B6" s="1" t="s"><v>10</v></c><c r="C6" s="1"><v>1952.0</v></c><c r="D6" s="1"><v>59.6</v></c><c r="E6" s="1"><v>7274900.0</v></c><c r="F6" s="1"><v>2444.2866</v></c></row></sheetData><drawing r:id="rId1"/></worksheet>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/sheet2.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><sheetViews><sheetView workbookViewId="0"/></sheetViews><sheetFormatPr customHeight="1" defaultColWidth="14.43" defaultRowHeight="15.75"/><sheetData><row r="1"><c r="A1" s="1" t="s"><v>0</v></c><c r="B1" s="1" t="s"><v>1</v></c><c r="C1" s="1" t="s"><v>2</v></c><c r="D1" s="1" t="s"><v>3</v></c><c r="E1" s="1" t="s"><v>4</v></c><c r="F1" s="1" t="s"><v>5</v></c></row><row r="2"><c r="A2" s="1" t="s"><v>16</v></c><c r="B2" s="1" t="s"><v>17</v></c><c r="C2" s="1"><v>1952.0</v></c><c r="D2" s="1"><v>69.12</v></c><c r="E2" s="1"><v>8691212.0</v></c><c r="F2" s="1"><v>10039.6</v></c></row><row r="3"><c r="A3" s="1" t="s"><v>20</v></c><c r="B3" s="1" t="s"><v>17</v></c><c r="C3" s="1"><v>1952.0</v></c><c r="D3" s="1"><v>69.39</v></c><c r="E3" s="1"><v>1994794.0</v></c><c r="F3" s="1"><v>10556.58</v></c></row><row r="4"><c r="A4" s="1" t="s"><v>16</v></c><c r="B4" s="1" t="s"><v>17</v></c><c r="C4" s="1"><v>1957.0</v></c><c r="D4" s="1"><v>70.33</v></c><c r="E4" s="1"><v>9712569.0</v></c><c r="F4" s="1"><v>10949.65</v></c></row><row r="5"><c r="A5" s="1" t="s"><v>20</v></c><c r="B5" s="1" t="s"><v>17</v></c><c r="C5" s="1"><v>1957.0</v></c><c r="D5" s="1"><v>70.26</v></c><c r="E5" s="1"><v>2229407.0</v></c><c r="F5" s="1"><v>12247.4</v></c></row><row r="6"><c r="A6" s="1" t="s"><v>16</v></c><c r="B6" s="1" t="s"><v>17</v></c><c r="C6" s="1"><v>1962.0</v></c><c r="D6" s="1"><v>70.93</v></c><c r="E6" s="1"><v>1.0794968E7</v></c><c r="F6" s="1"><v>12217.23</v></c></row></sheetData><drawing r:id="rId1"/></worksheet>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/sheet3.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><sheetViews><sheetView workbookViewId="0"/></sheetViews><sheetFormatPr customHeight="1" defaultColWidth="14.43" defaultRowHeight="15.75"/><sheetData><row r="1"><c r="A1" s="1" t="s"><v>0</v></c><c r="B1" s="1" t="s"><v>1</v></c><c r="C1" s="1" t="s"><v>2</v></c><c r="D1" s="1" t="s"><v>3</v></c><c r="E1" s="1" t="s"><v>4</v></c><c r="F1" s="1" t="s"><v>5</v></c></row><row r="2"><c r="A2" s="1" t="s"><v>13</v></c><c r="B2" s="1" t="s"><v>14</v></c><c r="C2" s="1"><v>1952.0</v></c><c r="D2" s="1"><v>62.485</v></c><c r="E2" s="1"><v>1.7876956E7</v></c><c r="F2" s="1"><v>5911.315</v></c></row><row r="3"><c r="A3" s="1" t="s"><v>18</v></c><c r="B3" s="1" t="s"><v>14</v></c><c r="C3" s="1"><v>1952.0</v></c><c r="D3" s="1"><v>40.414</v></c><c r="E3" s="1"><v>2883315.0</v></c><c r="F3" s="1"><v>2677.326</v></c></row><row r="4"><c r="A4" s="1" t="s"><v>22</v></c><c r="B4" s="1" t="s"><v>14</v></c><c r="C4" s="1"><v>1952.0</v></c><c r="D4" s="1"><v>50.917</v></c><c r="E4" s="1"><v>5.660256E7</v></c><c r="F4" s="1"><v>2108.944</v></c></row><row r="5"><c r="A5" s="1" t="s"><v>23</v></c><c r="B5" s="1" t="s"><v>14</v></c><c r="C5" s="1"><v>1952.0</v></c><c r="D5" s="1"><v>68.75</v></c><c r="E5" s="1"><v>1.4785584E7</v></c><c r="F5" s="1"><v>11367.161</v></c></row><row r="6"><c r="A6" s="1" t="s"><v>27</v></c><c r="B6" s="1" t="s"><v>14</v></c><c r="C6" s="1"><v>1952.0</v></c><c r="D6" s="1"><v>54.745</v></c><c r="E6" s="1"><v>6377619.0</v></c><c r="F6" s="1"><v>3939.979</v></c></row></sheetData><drawing r:id="rId1"/></worksheet>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/sheet4.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><sheetViews><sheetView workbookViewId="0"/></sheetViews><sheetFormatPr customHeight="1" defaultColWidth="14.43" defaultRowHeight="15.75"/><sheetData><row r="1"><c r="A1" s="1" t="s"><v>0</v></c><c r="B1" s="1" t="s"><v>1</v></c><c r="C1" s="1" t="s"><v>2</v></c><c r="D1" s="1" t="s"><v>3</v></c><c r="E1" s="1" t="s"><v>4</v></c><c r="F1" s="1" t="s"><v>5</v></c></row><row r="2"><c r="A2" s="1" t="s"><v>6</v></c><c r="B2" s="1" t="s"><v>7</v></c><c r="C2" s="1"><v>1952.0</v></c><c r="D2" s="1"><v>43.077</v></c><c r="E2" s="1"><v>9279525.0</v></c><c r="F2" s="1"><v>2449.0082</v></c></row><row r="3"><c r="A3" s="1" t="s"><v>8</v></c><c r="B3" s="1" t="s"><v>7</v></c><c r="C3" s="1"><v>1952.0</v></c><c r="D3" s="1"><v>30.015</v></c><c r="E3" s="1"><v>4232095.0</v></c><c r="F3" s="1"><v>3520.6103</v></c></row><row r="4"><c r="A4" s="1" t="s"><v>11</v></c><c r="B4" s="1" t="s"><v>7</v></c><c r="C4" s="1"><v>1952.0</v></c><c r="D4" s="1"><v>38.223</v></c><c r="E4" s="1"><v>1738315.0</v></c><c r="F4" s="1"><v>1062.7522</v></c></row><row r="5"><c r="A5" s="1" t="s"><v>29</v></c><c r="B5" s="1" t="s"><v>7</v></c><c r="C5" s="1"><v>1952.0</v></c><c r="D5" s="1"><v>47.622</v></c><c r="E5" s="1"><v>442308.0</v></c><c r="F5" s="1"><v>851.2411</v></c></row><row r="6"><c r="A6" s="1" t="s"><v>32</v></c><c r="B6" s="1" t="s"><v>7</v></c><c r="C6" s="1"><v>1952.0</v></c><c r="D6" s="1"><v>31.975</v></c><c r="E6" s="1"><v>4469979.0</v></c><c r="F6" s="1"><v>543.2552</v></c></row></sheetData><drawing r:id="rId1"/></worksheet>


--------------------------------------------------------------------------------
/internal/mini-gap/xl/worksheets/sheet5.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?><worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mx="http://schemas.microsoft.com/office/mac/excel/2008/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:mv="urn:schemas-microsoft-com:mac:vml" xmlns:x14="http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" xmlns:xm="http://schemas.microsoft.com/office/excel/2006/main"><sheetViews><sheetView workbookViewId="0"/></sheetViews><sheetFormatPr customHeight="1" defaultColWidth="14.43" defaultRowHeight="15.75"/><sheetData><row r="1"><c r="A1" s="1" t="s"><v>0</v></c><c r="B1" s="1" t="s"><v>1</v></c><c r="C1" s="1" t="s"><v>2</v></c><c r="D1" s="1" t="s"><v>3</v></c><c r="E1" s="1" t="s"><v>4</v></c><c r="F1" s="1" t="s"><v>5</v></c></row><row r="2"><c r="A2" s="1" t="s"><v>24</v></c><c r="B2" s="1" t="s"><v>25</v></c><c r="C2" s="1"><v>1952.0</v></c><c r="D2" s="1"><v>28.801</v></c><c r="E2" s="1"><v>8425333.0</v></c><c r="F2" s="1"><v>779.4453</v></c></row><row r="3"><c r="A3" s="1" t="s"><v>26</v></c><c r="B3" s="1" t="s"><v>25</v></c><c r="C3" s="1"><v>1952.0</v></c><c r="D3" s="1"><v>50.939</v></c><c r="E3" s="1"><v>120447.0</v></c><c r="F3" s="1"><v>9867.0848</v></c></row><row r="4"><c r="A4" s="1" t="s"><v>28</v></c><c r="B4" s="1" t="s"><v>25</v></c><c r="C4" s="1"><v>1952.0</v></c><c r="D4" s="1"><v>37.484</v></c><c r="E4" s="1"><v>4.6886859E7</v></c><c r="F4" s="1"><v>684.2442</v></c></row><row r="5"><c r="A5" s="1" t="s"><v>30</v></c><c r="B5" s="1" t="s"><v>25</v></c><c r="C5" s="1"><v>1952.0</v></c><c r="D5" s="1"><v>39.417</v></c><c r="E5" s="1"><v>4693836.0</v></c><c r="F5" s="1"><v>368.4693</v></c></row><row r="6"><c r="A6" s="1" t="s"><v>31</v></c><c r="B6" s="1" t="s"><v>25</v></c><c r="C6" s="1"><v>1952.0</v></c><c r="D6" s="1"><v>44.0</v></c><c r="E6" s="1"><v>5.56263527E8</v></c><c r="F6" s="1"><v>400.4486</v></c></row></sheetData><drawing r:id="rId1"/></worksheet>


--------------------------------------------------------------------------------
/man/rexcel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rexcel-package.r
 3 | \docType{package}
 4 | \name{rexcel}
 5 | \alias{rexcel}
 6 | \alias{rexcel-package}
 7 | \title{rexcel.}
 8 | \description{
 9 | rexcel.
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/man/rexcel_read.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{rexcel_read}
 4 | \alias{rexcel_read}
 5 | \title{Read an xlsx file that probably contains nontabular data}
 6 | \usage{
 7 | rexcel_read(path, sheet = 1L)
 8 | }
 9 | \arguments{
10 | \item{path}{Path to the xlsx file to load.  xls files are not supported.}
11 | 
12 | \item{sheet}{Sheet number (not name at this point).  Googlesheets
13 | exported sheets are likely not to do the right thing.}
14 | }
15 | \value{
16 | An \code{xlsx} object that can be printed.  Future methods
17 |   might do something sensible.  The structure is subject to
18 |   complete change so is not documented here.
19 | }
20 | \description{
21 | This function does not get the data into a usable form but at
22 | least loads it up into R so we can poke about with it.  The
23 | resulting loaded data can distinguish between formulae and data,
24 | numbers and text.  Merged cells are detected.  Style information
25 | is included, though nothing is done with it yet.  A summary of the
26 | data is printed if you print the resulting object.
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/man/rexcel_read_workbook.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{rexcel_read_workbook}
 4 | \alias{rexcel_read_workbook}
 5 | \title{Read an Excel workbook}
 6 | \usage{
 7 | rexcel_read_workbook(path, sheets = NULL, progress = TRUE)
 8 | }
 9 | \arguments{
10 | \item{path}{Path to the xlsx file to load.  xls files are not supported.}
11 | 
12 | \item{sheets}{Character or integer vector of sheets to read, or
13 | \code{NULL} to read all sheets (the default)}
14 | 
15 | \item{progress}{Display a progress bar?}
16 | }
17 | \description{
18 | Read an entire workbook
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/man/rexcel_readxl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readxl.R
 3 | \name{rexcel_readxl}
 4 | \alias{rexcel_readxl}
 5 | \title{Read an Excel spreadsheet like readxl}
 6 | \usage{
 7 | rexcel_readxl(path, sheet = 1L, col_names = TRUE, col_types = NULL,
 8 |   na = "", skip = 0)
 9 | }
10 | \arguments{
11 | \item{path}{Path to the xlsx file}
12 | 
13 | \item{sheet}{Sheet name or an integer}
14 | 
15 | \item{col_names}{TRUE (the default) indicating we should use the
16 | first row as column names, FALSE, indicating we should generate
17 | names (X1, X2, ..., Xn) or a character vector of names to apply.}
18 | 
19 | \item{col_types}{Either NULL (the default) indicating we should
20 | guess the column types or a vector of column types (values must
21 | be "blank", "numeric", "date" or "text").}
22 | 
23 | \item{na}{Values indicating missing values (if different from
24 | blank).  Not yet used.}
25 | 
26 | \item{skip}{Number of rows to skip.}
27 | }
28 | \description{
29 | Read an Excel spreadsheet the same way as readxl, but slower.
30 | Assumes a well behaved table of data.
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/rexcel.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rexcel)
3 | 
4 | test_check("rexcel")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-rexcel.R:
--------------------------------------------------------------------------------
 1 | here <- rprojroot::find_package_root_file
 2 | 
 3 | TEST_DIR <- here("tests", "testthat")
 4 | TEST_REF_DIR <- file.path(TEST_DIR, "reference")
 5 | if (!dir.exists(TEST_REF_DIR)) dir.create(TEST_REF_DIR)
 6 | 
 7 | ## Get readxl source in order to get tests and therefore test sheets.
 8 | get_readxl <- function(path = TEST_REF_DIR) {
 9 |   readxl_path <- file.path(path, "readxl")
10 |   if (dir.exists(readxl_path)) {
11 |     return(readxl_path)
12 |   }
13 |   if (!has_internet()) {
14 |     return(tempfile())
15 |   }
16 |   url <- "https://cran.rstudio.com/src/contrib/readxl_0.1.1.tar.gz"
17 |   dest <- tempfile()
18 |   tryCatch(download.file(url, dest),
19 |            error = function(e) skip(e$message))
20 |   on.exit(file.remove(dest))
21 |   untar(dest, exdir = path)
22 |   readxl_path
23 | }
24 | 
25 | ## This is not terrifically portable (windows does not support it, but
26 | ## it gives a more graceful behaviour on Linux / OSX when there is no
27 | ## internet)
28 | has_internet <- function() {
29 |   !is.null(suppressWarnings(nsl("www.google.com")))
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-colours.R:
--------------------------------------------------------------------------------
 1 | context("colours")
 2 | 
 3 | test_that("rgb2hsl", {
 4 |   ## These are random colours, ground-truthed with
 5 |   ##   http://www.rapidtables.com/convert/color/rgb-to-hsl.htm
 6 |   ## Because that website returns things as [degrees, percent,
 7 |   ## percent] to 3sf I have a little wrapper here.
 8 |   f <- function(x) {
 9 |     round(x * c(360, 100, 100), c(0, 1, 1))
10 |   }
11 | 
12 |   expect_identical(f(rgb2hsl(c(106, 90, 205))), rbind(h=248, s=53.5, l=57.8))
13 | 
14 |   cols <- c("gray22", "gray92", "indianred4", "slateblue",
15 |             "dodgerblue4", "sienna2", "steelblue", "lightskyblue",
16 |             "lightcyan4", "burlywood4")
17 |   ans <- f(rgb2hsl(grDevices::col2rgb(cols)))
18 |   cmp <- rbind(h=c(0, 0, 0, 248, 210, 19, 207, 203, 180, 33),
19 |                s=c(0, 0, 41.1, 53.5, 79.4, 83.5, 44, 92, 6.8, 24.1),
20 |                l=c(22, 92.2, 38.6, 57.8, 30.4, 59.6, 49, 75.5, 51.2, 43.9))
21 |   expect_identical(ans, cmp)
22 | 
23 |   ## Alpha handling:
24 |   m <- rbind(grDevices::col2rgb(cols), alpha=runif(length(cols), 0, 255))
25 |   ans2 <- rgb2hsl(m)
26 |   expect_identical(ans2[1:3, ], rgb2hsl(grDevices::col2rgb(cols)))
27 |   expect_equal(ans2[4, ], m[4, ] / 255)
28 | 
29 |   expect_equal(dim(rgb2hsl(matrix(numeric(), 3, 0))), c(3, 0))
30 |   expect_equal(dim(rgb2hsl(matrix(numeric(), 4, 0))), c(4, 0))
31 | 
32 |   ## And the reverse
33 |   m <- grDevices::col2rgb(cols)
34 |   expect_equal(hsl2rgb(rgb2hsl(m)), m)
35 | 
36 |   ## Do them _all_
37 |   mm <- grDevices::col2rgb(colors())
38 |   expect_equal(hsl2rgb(rgb2hsl(mm)), mm)
39 | })
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-gs.R:
--------------------------------------------------------------------------------
 1 | context("googlesheets")
 2 | 
 3 | test_that("can read google sheets in right order", {
 4 |   skip_if_not_installed("googlesheets")
 5 |   filename <- system.file("mini-gap", "mini-gap.xlsx", package = "googlesheets")
 6 | 
 7 |   # expect_equal(xlsx_internal_sheet_name(filename, 1L),
 8 |   #              "xl/worksheets/sheet4.xml")
 9 | 
10 |   ## WHAT'S WRONG?
11 |   ## the interface of this function seems to have changed since this test was
12 |   ## written
13 | 
14 |   ## TO DO
15 |   ## replace with something that looks at the table returned in linen object
16 |   ## that maps worksheet number to target files
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-readxl.R:
--------------------------------------------------------------------------------
 1 | ## Test reading against readxl
 2 | context("readxl")
 3 | 
 4 | files <- dir(file.path(get_readxl(), "tests", "testthat"),
 5 |              pattern = "\\.xlsx$", full.names = TRUE)
 6 | 
 7 | ## Rich says: I don't see the inline string version opening correctly in
 8 | ## numbers so this might be a little beyond our needs to open here.
 9 | files <- files[grep("^(inlineStr)", basename(files), invert = TRUE)]
10 | files <- setNames(files, basename(files))
11 | 
12 | for (f in files) {
13 |   test_that(basename(f), {
14 |     ## Rich says: the as.data.frame here works around something deeply weird
15 |     ## with all.equal and tbl_dfs
16 |     readxl <- as.data.frame(readxl::read_excel(f), stringsAsFactors = FALSE)
17 |     us <- as.data.frame(rexcel_readxl(f))
18 |     if (basename(f) == "new_line_errors.xlsx") {
19 |       ## NOTE: I think that xml2 is replacing \r\n -> \n which causes
20 |       ## the confusion here.  I'm pretty happy about this though as
21 |       ## \r\n is not very R-ish.
22 |       readxl$column_name[[1]] <-
23 |         gsub("\r\r", "\r", readxl$column_name[[1]], fixed = TRUE)
24 |     }
25 |     expect_equal(us, readxl, label = paste("our import of", basename(f)))
26 |   })
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rexcel-read-workbook.R:
--------------------------------------------------------------------------------
 1 | ## Can we read some universe of sheets without error?
 2 | ## Later we could store reference objects, but just seems frustrating now.
 3 | context("read workbooks")
 4 | 
 5 | sheets <- dir(system.file("sheets", package = "rexcel"),
 6 |               pattern = "\\.xlsx$", full.names = TRUE)
 7 | sheets <- setNames(sheets, basename(sheets))
 8 | 
 9 | ## Jenny: I propose we skip Ekaterinburg until we have a decent, general
10 | ## solution to the non-standard namespacing problem
11 | ## Jenny: I'm skipping both because even resaved one is large. In due course,
12 | ## we should create a scaled down version for testing.
13 | ## Jenny: also skipping exp.xlsx for now
14 | sheets <-
15 |   sheets[grep("^Ekaterinburg_IP_9|^exp", names(sheets), invert = TRUE)]
16 | 
17 | for (sh in sheets) {
18 |   test_that(basename(sh), {
19 |     expect_silent(rexcel_read_workbook(sh, progress = FALSE))
20 |   })
21 | }
22 | 
23 | test_that("read one sheet - by name", {
24 |   filename <- sheets[["mini-gap.xlsx"]]
25 |   d <- rexcel_read_workbook(filename, progress = FALSE)
26 |   for (s in d$names) {
27 |     expect_equal(d$sheets[[s]]$values(), rexcel_read(filename, s)$values())
28 |   }
29 | })
30 | 
31 | test_that("read one sheet - by index", {
32 |   filename <- sheets[["mini-gap.xlsx"]]
33 |   d <- rexcel_read_workbook(filename, progress = FALSE)
34 |   for (s in seq_along(d$names)) {
35 |     expect_equal(d$sheets[[s]]$values(), rexcel_read(filename, s)$values())
36 |   }
37 | })
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-view.R:
--------------------------------------------------------------------------------
 1 | context("view")
 2 | 
 3 | ## NOTE: These are really tests of *linen*
 4 | 
 5 | test_that("basic view", {
 6 |   filename <- here("inst", "sheets", "mini-gap.xlsx")
 7 |   d <- rexcel::rexcel_read(filename)
 8 |   tbl <- d$table()
 9 |   expect_equal(nrow(tbl), 5)
10 |   expect_equal(names(tbl)[[1]], "country")
11 | 
12 |   xr <- cellranger::cell_limits(c(4, 1), c(d$dim[[1]], d$dim[[2]]))
13 |   v <- linen::worksheet_view(d, xr)
14 |   expect_error(v$table(), "header information not convertable to col_names")
15 | 
16 |   v2 <- linen::worksheet_view(d, xr, header = letters[seq_len(d$dim[[1]])])
17 |   tbl_v2 <- v2$table()
18 |   expect_equal(nrow(tbl_v2), 3)
19 |   expect_equal(names(tbl_v2), letters[1:6])
20 | 
21 |   tmp <- unname(tbl[3:5, ])
22 |   rownames(tmp) <- NULL
23 |   expect_equal(unname(tbl_v2), tmp)
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-xlsx-utils.R:
--------------------------------------------------------------------------------
 1 | context("xlsx utilities")
 2 | 
 3 | iris_path <- file.path(get_readxl(), "tests", "testthat", "iris-excel.xlsx")
 4 | 
 5 | test_that("xlsx is detected as such and vice versa", {
 6 | 
 7 |   expect_true(is_xlsx(iris_path))
 8 | 
 9 |   expect_error(is_xlsx("nonexistent_path"), "does not exist")
10 |   expect_error(is_xlsx(system.file(package = "linen")), "cannot be opened")
11 |   zf <- tempfile()
12 |   utils::zip(zf,
13 |              files = dir(system.file(package = "linen"), full.names = TRUE),
14 |              extras = "--quiet")
15 |   expect_false(is_xlsx(paste0(zf, ".zip")))
16 | })
17 | 
18 | test_that("xlsx files are listed", {
19 |   iris_files <- xlsx_list_files(iris_path)$name
20 |   ref <- c("_rels/.rels", "[Content_Types].xml", "docProps/app.xml",
21 |            "docProps/core.xml", "xl/_rels/workbook.xml.rels",
22 |            "xl/sharedStrings.xml", "xl/styles.xml", "xl/theme/theme1.xml",
23 |            "xl/workbook.xml", "xl/worksheets/sheet1.xml")
24 |   expect_identical(intersect(iris_files, ref), iris_files)
25 | })
26 | 
27 | 


--------------------------------------------------------------------------------