├── .Rbuildignore
├── .binder
    └── Dockerfile
├── .gitignore
├── .lintr
├── .nojekyll
├── .travis.yml
├── 01-introduction.Rmd
├── 02-set-up.Rmd
├── 03-programming.Rmd
├── 04-workflow.Rmd
├── 05-input-output.Rmd
├── 06-data-carpentry.Rmd
├── 07-performance.Rmd
├── 08-hardware.Rmd
├── 09-collaboration.Rmd
├── 10-learning.Rmd
├── DESCRIPTION
├── Dockerfile
├── LICENSE
├── Makefile
├── NAMESPACE
├── R
    └── .gitignore
├── README.md
├── _bookdown.yml
├── _output.yml
├── appendix.Rmd
├── code
    ├── 01-introduction_f3.R
    ├── 02-blas.R
    ├── 03-programming_f1.R
    ├── 03-programming_f3.R
    ├── 03-programming_f4.R
    ├── 03-programming_f5.R
    ├── 04-project-planning_f1.R
    ├── 04-project-planning_f2.R
    ├── 05-io_f1.R
    ├── 05-io_f2.R
    ├── 06-data-carpentry_f2.R
    ├── 07-performance_f3.R
    ├── 07-performance_f5.R
    ├── 07-performance_f6.R
    ├── 08-hardware_benchmarks.R
    ├── 08-hardware_cpu_speed.R
    ├── before_script.R
    ├── docstats.R
    └── initialise.R
├── css
    └── style.css
├── deploy.sh
├── efficientR.Rproj
├── extdata
    ├── .gitignore
    ├── 03-f5.Rds
    ├── 05-f1.RData
    ├── 05-f2.RData
    ├── 07-rcpp_comparison.RData
    ├── 07-which_comparison.RData
    ├── clock_speed.RData
    ├── co2.csv
    ├── co2.feather
    ├── cranlog.Rds
    ├── dplyr-verbs.csv
    ├── filesizes.Rds
    ├── idata-renamed.Rds
    ├── lnd_geo_df.Rds
    ├── lnd_simple.Rds
    ├── mean_comparison.RData
    ├── out-ice.Rds
    ├── package_list.csv
    ├── past_results.RData
    ├── pew.csv
    ├── pop_change.csv
    ├── res-datatable.Rds
    ├── res.Rds
    ├── res_rl_blas.Rds
    ├── res_rl_noblas.Rds
    ├── res_v.Rds
    ├── reshape-pew.csv
    ├── rtimes.Rds
    └── wtimes.Rds
├── figures
    ├── f0_front_scale.png
    ├── f0_full.png
    ├── f0_web.png
    ├── f1_1_800px-QWERTY-home-keys-position.png
    ├── f1_2_profvis-ice.png
    ├── f1_3_icesheet-change.png
    ├── f2_1_sysmon.png
    ├── f2_2_rstudio.png
    ├── f2_3_view.png
    ├── f4_2_DiagrammeR-gantt-book.png
    ├── f4_3_geosphere-badge.png
    ├── f4_4_geoPlot-badge.png
    ├── f5_3_rstudio-package-filepath-intellisense.png
    ├── f6_1_world_co2.png
    ├── f7_1_profvis.png
    ├── f7_2_profvis_monopoly.png
    ├── f7_4_profvis_monopoly.png
    ├── f8_1_3SDRAM-DIMMs.jpg
    ├── f8_2_627px-Laptop-hard-drive-exposed.jpg
    ├── f9_1_rstudio-git.png
    ├── f9_2_rstudio-githist.png
    ├── icesheet-change.png
    ├── icesheet-test.png
    └── pf10_1_package-autocompletion.png
├── images
    ├── note.png
    ├── tip.png
    └── warning.png
├── index.Rmd
├── packages.bib
├── preamble.tex
├── refs.bib
└── src
    ├── .gitignore
    ├── mean_cpp.cpp
    └── precision.cpp


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^extdata$
4 | .nojekyll
5 | .travis.yml
6 | 


--------------------------------------------------------------------------------
/.binder/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rockerdev/binder:4.0.0
 2 | LABEL maintainer='Robin Lovelace, Colin Gillespie'
 3 | USER root
 4 | 
 5 | # Auto-get sysreqs
 6 | RUN wget https://github.com/csgillespie/efficientR/raw/master/DESCRIPTION \
 7 |   && Rscript -e "remotes::install_github('r-hub/sysreqs')" \
 8 |   && sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION', 'linux-x86_64-ubuntu-gcc'))") \
 9 |   && echo "$sysreqs"
10 | # Install efficentR package
11 | RUN Rscript -e "remotes::install_github('csgillespie/efficientR')"
12 | 
13 | # Copy the book across
14 | RUN wget https://github.com/csgillespie/efficientR/archive/master.zip \
15 |   && unzip master.zip \
16 |   && mv efficientR-master /home/rstudio/efficientR \
17 |   && chown -Rv rstudio /home/rstudio \
18 |   && rm master.zip
19 | 
20 | USER ${NB_USER}
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | # Example code in package build process
 5 | *-Ex.R
 6 | # RStudio files
 7 | .Rproj.user/
 8 | # produced vignettes
 9 | vignettes/*.html
10 | vignettes/*.pdf
11 | .Rproj.user
12 | *.html
13 | libs/
14 | *_cache
15 | *_files
16 | _main*
17 | _book/*
18 | preamble.log
19 | Rplots.pdf
20 | rsconnect
21 | land.sqlite3
22 | data/
23 | voc_voyages.xlsx
24 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: with_defaults(
2 |     assignment_linter = NULL,
3 |     object_name_linter = NULL,
4 |     commented_code_linter = NULL,
5 |     line_length_linter(100)
6 |   )
7 | 


--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/.nojekyll


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | pandoc_version: 2.9.2.1
 4 | 
 5 | notifications:
 6 |   email:
 7 |     on_success: change
 8 |     on_failure: change
 9 | 
10 | r_github_packages:
11 |   - csgillespie/efficient
12 | 
13 | script:
14 |   - R CMD INSTALL ../efficientR/
15 |   - make html
16 | 
17 | after_success:
18 |   - test $TRAVIS_PULL_REQUEST == "false" && test $TRAVIS_BRANCH == "master" && bash deploy.sh
19 | 


--------------------------------------------------------------------------------
/08-hardware.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | knit: "bookdown::preview_chapter"
  3 | ---
  4 | 
  5 | ```{r, echo=FALSE}
  6 | rm(list=ls())
  7 | ```
  8 | 
  9 | # Efficient hardware {#hardware}
 10 | 
 11 | This chapter is odd for a book on R programming. It contains very little code,  and yet the chapter has the potential to speed up your algorithms by orders of magnitude. This chapter considers the impact that your computer has on your time.
 12 | 
 13 | Your hardware is crucial. It will not only determine how _fast_ you can solve your problem, but also whether you can even tackle the problem of interest.  This is because everything is loaded in RAM. Of course, having a more powerful computer costs money. The goal is to help you decide whether the benefits of upgrading your hardware are worth that extra cost.
 14 | 
 15 | We'll begin this chapter with a background section on computer storage and memory and how it is measured. Then we consider individual computer components,  before concluding with renting machines in the cloud.
 16 | 
 17 | ### Prerequisites {-}
 18 | 
 19 | This chapter will focus on assessing your hardware and the benefit of upgrading. We will use the **benchmarkme** package to quantify the effect of changing your CPU.
 20 | 
 21 | ```{r}
 22 | library("benchmarkme")
 23 | ```
 24 | 
 25 | ## Top 5 tips for efficient hardware
 26 | 
 27 | 1. Use the package **benchmarkme** to assess your CPUs number crunching ability is it worth upgrading your hardware?
 28 | 1. If possible, add more RAM.
 29 | 1. Double check that you have installed a $64$-bit version of R.
 30 | 1. Cloud computing is a cost effective way of obtaining more compute power.
 31 | 1. A solid state drive typically won't have much impact on the speed of your R code, but will increase your overall productivity since I/0 is much faster.
 32 | 
 33 | ## Background: what is a byte?
 34 | 
 35 | A computer cannot store "numbers" or "letters". The only thing a computer can store and work with is bits. A bit is binary, it is either a $0$ or a $1$. In fact from a physics perspective, a bit is just a blip of electricity that either is or isn't there.
 36 | 
 37 | In the past the ASCII character set dominated computing. This set defines $128$ characters including $0$ to $9$, upper and lower case alpha-numeric and a few control characters such as a new line. To store these characters required $7$ bits
 38 | since $2^7 = 128$, but $8$ bits were typically used for performance [reasons](http://stackoverflow.com/q/14690159/203420). Table \@ref(tab:ascii) gives the binary representation of the first few characters.
 39 | 
 40 | ```{r ascii, echo=FALSE}
 41 | dd = tibble::frame_data(
 42 |   ~"Bit representation", ~"Character",
 43 |   "$01000001$",  "A",
 44 |   "$01000010$" , "B",
 45 |   "$01000011$" , "C",
 46 |   "$01000100$" , "D",
 47 |   "$01000101$" , "E",
 48 |   "$01010010$" , "R"
 49 | )
 50 | knitr::kable(dd, caption="The bit representation of a few ASCII characters.")
 51 | ```
 52 | 
 53 | The limitation of only having $256$ characters led to the development of Unicode, a standard framework aimed at creating a single character set for every reasonable writing system. Typically, Unicode characters require sixteen bits of storage. 
 54 | 
 55 | Eight bits is one byte, or ASCII character. So two ASCII characters would use two bytes or $16$ bits. A pure text document containing $100$ characters would use $100$ bytes ($800$ bits). Note that mark-up, such as font information or meta-data, can impose a substantial memory overhead: an empty `.docx` file requires about $3,700$ bytes of storage.
 56 | 
 57 | When computer scientists first started to think about computer memory, they noticed that $2^{10} = 1024 \simeq 10^3$ and $2^{20} =1,048,576\simeq 10^6$, so they adopted the short hand of kilo- and mega-bytes. Of course, _everyone_ knew that it was just a short hand, and it was really a binary power. When computers became more wide spread, foolish people like you and me just assumed that kilo actually meant $10^3$ bytes.
 58 | 
 59 | Fortunately the IEEE Standards Board intervened and created conventional, internationally adopted definitions of the International System of Units (SI) prefixes. So a kilobyte (kB) is $10^3 = 1000$ bytes and a megabyte (MB) is $10^6$ bytes or $10^3$ kilobytes (see table 8.2). A petabyte is approximately $100$ million drawers filled with text. Astonishingly Google processes around $20$ petabytes of data every day. 
 60 | 
 61 | Factor | 	Name |	Symbol |	Origin| Derivation 
 62 | -------|-------|---------|--------|-----------
 63 | $2^{10}$ |  kibi  | 	Ki | 	Kilobinary:  | $(2^{10})^1$  |  Kilo: $(10^3)^1$ 
 64 | $2^{20}$ | 	mebi  | 	Mi | 	Megabinary:  | $(2^{10})^2$  |  Mega: $(10^3)^2$ 
 65 | $2^{30}$ | 	gibi  | 	Gi | 	Gigabinary:  | $(2^{10})^3$	 |  Giga: $(10^3)^3$ 
 66 | $2^{40}$ | 	tebi  | 	Ti | 	Terabinary:  | $(2^{10})^4$	 |  Tera: $(10^3)^4$ 
 67 | $2^{50}$ | 	pebi  | 	Pi | 	Petabinary:  | $(2^{10})^5$	 |  Peta: $(10^3)^5$ 
 68 | 
 69 | Table 8.2: Data conversion table. Credit: [http://physics.nist.gov/cuu/Units/binary.html](http://physics.nist.gov/cuu/Units/binary.html)
 70 | 
 71 | Even though there is now an agreed standard for discussing memory, that doesn't mean that everyone follows it.
 72 | Microsoft Windows, for example, uses 1MB to mean $2^{20}$B. Even more confusing the capacity of a $1.44$MB floppy disk is a mixture, $1\text{MB} = 10^3 \times 2^{10}$B. Typically RAM is specified in kibibytes, but hard drive manufacturers follow the SI standard!
 73 | 
 74 | ## Random access memory: RAM {#ram}
 75 | 
 76 | Random access memory (RAM) is a type of computer memory that can be accessed randomly: any byte of memory can be accessed without touching the preceding bytes. RAM is found in computers, phones, tablets and even printers. The amount of RAM R has access to is incredibly important. Since R loads objects into RAM, the amount of RAM you have available can limit the size of data set you can analyse.
 77 | 
 78 | Even if the original data set is relatively small, your analysis can generate large objects. For example, suppose we want to perform standard cluster analysis. The built-in data set `USArrests`, is a data frame with $50$ rows and $4$ columns. Each row corresponds to a state in the USA
 79 | 
 80 | ```{r echo=2}
 81 | data("USArrests", package = "datasets")
 82 | head(USArrests, 3)
 83 | ```
 84 | 
 85 | If we want to group states that have similar crime statistics, a standard first step is to calculate the distance or similarity matrix
 86 | 
 87 | ```{r}
 88 | d = dist(USArrests)
 89 | ```
 90 | 
 91 | When we inspect the object size of the original data set and the distance object using the **pryr** package
 92 | 
 93 | ```{r}
 94 | pryr::object_size(USArrests)
 95 | pryr::object_size(d)
 96 | ```
 97 | 
 98 | ```{block, type="rmdnote"}
 99 | The distance object `d` is actually a vector that contains the distances in the upper triangular region. 
100 | ```
101 | 
102 | we have managed to create an object that is three times larger than the original data set. In fact the object `d` is a symmetric $n \times n$ matrix, where $n$ is the number of rows in `USArrests`. Clearly, as `n` increases the size of `d` increases at rate $O(n^2)$. So if our original data set contained $10,000$ records, the associated distance matrix would contain almost $10^8$ values. Of course since the matrix is symmetric, this corresponds to around $50$ million unique values.
103 | 
104 | ```{block, type="rmdtip"}
105 | A rough rule of thumb is that your RAM should be three times the size of your data set.
106 | ```
107 | 
108 | Another benefit of increasing the amount of onboard RAM is that the 'garbage collector', a process that runs periodically to free-up system memory occupied by R, is called less often. It is straightforward to determine how much RAM you have using the **benchmarkme** package
109 | 
110 | ```{r, results="hide"}
111 | benchmarkme::get_ram()
112 | #> 16.3 GB
113 | ```
114 | 
115 | <!-- https://en.wikipedia.org/wiki/DIMM -->
116 | ```{r 8-1, fig.cap="Three DIMM slots on a computer motherboard used for increasing the amount of available RAM. Credit: Wikimedia.org", echo=FALSE, out.width="100%"}
117 | knitr::include_graphics("figures/f8_1_3SDRAM-DIMMs.jpg")
118 | ```
119 | 
120 | It is sometimes possible to increase your computer's RAM. On a computer motherboard there are typically $2$ to $4$ RAM or memory slots. If you have free slots, then you can add more memory. RAM comes in the form of dual in-line memory modules (DIMMs) that can be slotted into the motherboard spaces (see figure \@ref(fig:8-1) for example).
121 | However it is common that all slots are already taken. This means that to upgrade your computer's memory, some or all of the DIMMs will have to be removed. To go from $8$GB to $16$GB, for example, you may have to discard the two $4$GB RAM cards and replace them with two $8$GB cards. Increasing your laptop/desktop from $4$GB to $16$GB or $32$GB is cheap and should definitely be considered. As R Core member Uwe Ligges states,
122 | 
123 | ```{r}
124 | fortunes::fortune(192)
125 | ```
126 | 
127 | It is a testament to the design of R that it is still relevant and its popularity is growing. Ross Ihaka, one of the originators of the R programming language, made a throw-away comment in 2003:
128 | 
129 | ```{r}
130 | fortunes::fortune(21)
131 | ```
132 | 
133 | Considering that a standard smart phone now contains $1$GB of RAM, the fact that R was designed for "basic" computers, but can scale across clusters is impressive.
134 | R's origins on computers with limited resources helps explain its efficiency at dealing with large datasets.
135 | 
136 | #### Exercises {-}
137 | 
138 | The following two exercises aim to help you determine if it is worthwhile upgrading your RAM.
139 | 
140 | 1. R loads everything into memory, i.e. your computers RAM. How much RAM does your computer have?
141 | 2. Using your preferred search engine, how much does it cost to double the amount of available RAM on your system? 
142 | 
143 | ## Hard drives: HDD vs SSD
144 | 
145 | You are using R because you want to analyse data. 
146 | The data is typically stored on your hard drive; but not all hard drives are equal.
147 | Unless you have a fairly expensive laptop your computer probably has a standard hard disk drive (HDD). 
148 | HDDs were first introduced by IBM in 1956. Data is stored using magnetism on a rotating platter, as shown in Figure \@ref(fig:8-2). The faster the platter spins, the faster the HDD can perform. Many laptop drives spin at either $5400$RPM (Revolutions per Minute) or $7200$RPM. The major advantage of HDDs is that they are cheap, making a $1$TB laptop standard.
149 | 
150 | ```{block type="rmdnote"}
151 | In the authors' experience, having an SSD drive doesn't make **much** difference to R. However, the reduction in boot time and general tasks makes an SSD drive  a wonderful purchase.
152 | ```
153 | 
154 | ```{r 8-2, fig.cap='A standard 2.5" hard drive, found in most laptops. Credit: https://en.wikipedia.org/wiki/Hard\\_disk\\_drive', echo=FALSE}
155 | knitr::include_graphics("figures/f8_2_627px-Laptop-hard-drive-exposed.jpg")
156 | ```
157 | 
158 | Solid state drives (SSDs) can be thought of as large, but more sophisticated versions of USB sticks. They have no moving parts and information is stored in microchips. Since there are no moving parts, reading/writing is much quicker. SSDs have other benefits: they are quieter, allow faster boot time (no 'spin up' time) and require less power (more battery life).
159 | 
160 | The read/write speed for a standard HDD is usually in the region of $50-120$MB/s (usually closer to $50$MB). For SSDs, speeds are typically over $200$MB/s. For top-of-the-range models this can approach $500$MB/s. If you're wondering, read/write speeds for RAM is around $2-20$GB/s. So at best SSDs are at least one order of magnitude slower than RAM, but still faster than standard HDDs.
161 | 
162 | ```{block type="rmdtip"}
163 | If you are unsure what type of hard drive you have, then time how long your computer takes to reach the log-in screen. If it is less then five seconds, you probably have a SSD.  There are links on the book's website detailing more precise methods for each OS.
164 | ```
165 | 
166 | ## Operating systems: 32-bit or 64-bit
167 | 
168 | R comes in two versions: $32$-bit and $64$-bit. Your operating system also comes in two versions, $32$-bit and $64$-bit.  Ideally you want $64$-bit versions of both R and the operating system. Using a $32$-bit version of either has severe limitations on the amount of RAM R can access. So when we suggest that you should just buy more RAM, this assumes that you are using a $64$-bit operating system, with a $64$-bit version of R.
169 | 
170 | ```{block type="rmdnote"}
171 | If you are using an OS version from the last five years, it is unlikely to be $32$-bit OS.
172 | ```
173 | 
174 | A $32$-bit machine can access at most only $4$GB of RAM. Although some CPUs offer solutions to this limitation, if you are running a $32$-bit operating system, then R is limited to around $3$GB RAM. If you are running a $64$-bit operating system, but only a $32$-bit version of R, then you have access to slightly more memory (but not much). Modern systems should run a $64$-bit operating system, with a $64$-bit version of R. Your memory limit is now measured as $8$ terabytes for Windows machines and $128$TB for Unix-based OSs.  An easy method for determining if you are running a $64$-bit version of R is to run
175 | 
176 | ```{r results="hide"}
177 | .Machine$sizeof.pointer
178 | ```
179 | 
180 | which will return $8$ if you a running a $64$-bit version of R.
181 | 
182 | To find precise details consult the R help pages `help("Memory-limits")` and `help("Memory")`.
183 | 
184 | #### Exercises {-}
185 | 
186 | These exercises aim to condense the previous section into the key points.
187 | 
188 | 1. Are you using $32$-bit or $64$-bit version of R?
189 | 2. If you are using Windows, what are the results of running the command `memory.limit()`?
190 | 
191 | ## Central processing unit (CPU)
192 | 
193 | The central processing unit (CPU), or the processor, is the brains of a computer. The CPU is responsible for performing numerical calculations. The faster the processor, the faster R will run. The clock speed (or clock rate, measured in hertz) is the frequency with which the CPU executes instructions. The faster the clock speed, the more instructions a CPU can execute in a section. CPU clock speed for a single CPU has been fairly static in the last couple of years, hovering around 3.4GHz (see figure \@ref(fig:8-3)).
194 | 
195 | ```{r 8-3, echo=FALSE, fig.width=6, fig.height=4, fig.cap="CPU clock speed. The data for this figure was collected from web-forum and wikipedia. It is intended to indicate general trends in CPU speed.", out.width="70%"} 
196 | local(source("code/08-hardware_cpu_speed.R", local = TRUE))
197 | ```
198 | 
199 | Unfortunately we can't simply use clock speeds to compare CPUs, since the internal architecture of a CPU plays a crucial role in determining the CPU performance. The R package **benchmarkme** provides functions for benchmarking your system and contains data from previous benchmarks. Figure \@ref(fig:8-4) shows the relative performance for over $150$ CPUs.
200 | 
201 | ```{r 8-4, echo=FALSE, fig.width=6, fig.height=4,  fig.cap="CPU benchmarks from the R package, **benchmarkme**. Each point represents an individual CPU result.", fig.keep="last", out.width="70%"}
202 | local(source("code/08-hardware_benchmarks.R", local = TRUE))
203 | ```
204 | 
205 | Running the benchmarks and comparing your CPU to others is straightforward using the **benchmarkme** package.
206 | After loading the package, we can benchmark your CPU
207 | 
208 | ```{r eval=FALSE}
209 | res = benchmark_std() 
210 | ```
211 | 
212 | and compare the results to other users
213 | 
214 | ```{r eval=FALSE}
215 | plot(res)
216 | # Upload your benchmarks for future users
217 | upload_results(res) 
218 | ```
219 | 
220 | You get the model specifications of the top CPUs using `get_datatable(res)`. 
221 | 
222 | ## Cloud computing
223 | 
224 | Cloud computing uses networks of remote servers, instead of a local computer, to store and analyse data. It is now becoming increasingly popular to rent cloud computing resources.
225 | 
226 | ### Amazon EC2
227 | 
228 | Amazon Elastic Compute Cloud (EC2) is one of a number of providers of this service. EC2 makes it (relatively) easy to run R instances in the cloud. Users can configure the operating system, CPU, hard drive type, the amount of RAM and where your project is physically located.
229 | 
230 | If you want to run a server in the Amazon EC2 cloud, you have to select the system you are going to boot up. There are a vast array of pre-packaged system images. Some of these images are just basic operating systems, such as Debian or Ubuntu, which require further configuration. There is also an [Amazon machine image](http://www.louisaslett.com/RStudio_AMI/) that specifically targets R and RStudio.
231 | 
232 | #### Exercise {-}
233 | 
234 | To assess whether you should consider cloud computing, how much does it cost to rent a machine comparable to your laptop in the cloud?
235 | 


--------------------------------------------------------------------------------
/09-collaboration.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | knit: "bookdown::preview_chapter"
  3 | ---
  4 | 
  5 | ```{r, echo=FALSE}
  6 | rm(list=ls())
  7 | ```
  8 | 
  9 | ```{r echo=FALSE, message=FALSE}
 10 | library(dplyr)
 11 | ```
 12 | 
 13 | # Efficient collaboration {#collaboration}
 14 | 
 15 | Large projects inevitably involve many people. This poses risks but also opportunities for improving computational efficiency and productivity, especially if project collaborators are reading and committing code. This chapter provides guidance on how to minimise the risks and maximise the benefits of collaborative R programming. 
 16 | 
 17 | Collaborative working has a number of benefits. A team with a diverse skill set is usually stronger than a team with a very narrow focus. It makes sense to specialize: clearly defining roles such as statistician, front-end developer, system administrator and project manager will make your team stronger. Even if you are working alone, dividing the work into discrete branches in this way can be useful, as discussed in Chapter \@ref(workflow).
 18 | 
 19 | Collaborative programming provides an opportunity for people to review each other's code. This can be encouraged by using a uniform style with many comments, as described in Section \@ref(coding-style). Like using a clear style in human language, following a style guide has the additional advantage of making your code more understandable to others.
 20 | 
 21 | When working on complex programming projects with multiple inter-dependencies version control is essential. Even on small projects tracking the progress of your project's code-base has many advantages and makes collaboration much easier. Fortunately it is now easier than ever before to integrate version control into your project, using RStudio's interface to the version control software `git` and online code sharing websites such as GitHub. This is the subject of Section \@ref(version-control).
 22 | 
 23 | The final section, \@ref(code-review), addresses the question of working in a team and performing
 24 | code reviews.
 25 | 
 26 | ### Prerequisites {-}
 27 | 
 28 | This chapter deals with coding standards and techniques. The only packages required for this 
 29 | chapter are **lubridate** and **dplyr**. These packages are used to illustrate good practice.
 30 | 
 31 | ## Top 5 tips for efficient collaboration
 32 | 
 33 | 1. Have a consistent coding style. 
 34 | 1. Think carefully about your comments and keep them up to date.
 35 | 1. Use version control whenever possible.
 36 | 1. Use informative commit messages.
 37 | 1. Don't be afraid to elicit feedback from colleagues.
 38 | 
 39 | ## Coding style
 40 | 
 41 | To be a successful programmer you need to use a consistent programming style.
 42 | There is no single 'correct' style, but using multiple styles in the same project is wrong [@ba_aa_ath_state_2012]. To some extent good style is subjective and down to personal taste. There are, however, general principles that
 43 | most programmers agree on, such as:
 44 | 
 45 | - Use modular code;
 46 | - Comment your code;
 47 | - Don't Repeat Yourself (DRY);
 48 | - Be concise, clear and consistent.
 49 | 
 50 | Good coding style will make you more efficient even if you are the only person who reads it.
 51 | When your code is read by multiple readers or you are developing code with co-workers, having a consistent style is even more important. There are a number of R style guides online that are broadly similar. One example is [Hadley Whickham](http://adv-r.had.co.nz/Style.html). The style followed in this book is based on a combination of Hadley Wickham's guide and our own preferences (we follow Yihui Xie in preferring `=` to `<-` for assignment, for example).
 52 | 
 53 | In-line with the principle of automation (automate any task that can save time by automating), the easiest way to improve your code is to ask your computer to do it, using RStudio.
 54 | 
 55 | ### Reformatting code with RStudio
 56 | 
 57 | RStudio can automatically clean up poorly indented and formatted code. To do this, select the lines that need to be formatted (e.g. via `Ctrl+A` to select the entire script) then automatically indent it with `Ctrl+I`. The shortcut `Ctrl+Shift+A` will reformat the code, adding spaces for maximum readability. An example is provided below.
 58 | 
 59 | ```{r, eval=FALSE}
 60 | # Poorly indented/formatted code
 61 | if(!exists("x")){
 62 | x=c(3,5)
 63 | y=x[2]}
 64 | ```
 65 | 
 66 | This code chunk works but is not pleasant to read. RStudio automatically indents the code after the `if` statement as follows.
 67 | 
 68 | ```{r}
 69 | # Automatically indented code (Ctrl+I in RStudio)
 70 | if(!exists("x")){
 71 |   x=c(3,5)
 72 |   y=x[2]}
 73 | ```
 74 | 
 75 | This is a start, but it's still not easy to read. This can be fixed in RStudio as illustrated below (these options can be seen in the Code menu, accessed with `Alt+C` on Windows/Linux computers).
 76 | 
 77 | ```{r}
 78 | # Automatically reformat the code (Ctrl+Shift+A in RStudio)
 79 | if(!exists("x")) {
 80 |   x = c(3, 5)
 81 |   y = x[2]
 82 | }
 83 | ```
 84 | 
 85 | Note that some aspects of style are subjective: we would not leave a space after the `if` and `)`. 
 86 | 
 87 | ### File names
 88 | 
 89 | File names should use the `.R` extension and should be lower case (e.g. `load.R`). Avoid spaces. Use a dash or underscore to separate words.
 90 | 
 91 | ```{r, engine='bash', eval=FALSE}
 92 | # Good names
 93 | normalise.R
 94 | load.R
 95 | # Bad names
 96 | Normalise.r
 97 | load data.R
 98 | ```
 99 | 
100 | Section 1.1 of [Writing R Extensions](https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Package-structure) provides more detailed guidance on file names, such as avoiding non-English alphabetic characters since they cannot be guaranteed  to work across locales. While the guidelines are strict, the guidance aids in making your scripts more portable.
101 | 
102 | ### Loading packages
103 | 
104 | Library function calls should be at the top of your script. When loading an essential package,  use `library` instead of `require` since a missing package will then raise an error. If a package isn't essential,  use `require` and appropriately capture the warning raised.  Package names should be surrounded with speech marks. 
105 | 
106 | ```{r messages=FALSE}
107 | # Good
108 | library("dplyr")
109 | # Non-standard evaluation
110 | library(dplyr)
111 | ```
112 | 
113 | Avoid listing every package you may need, instead just include the packages you actually use. If you find that you are loading many packages, consider putting all packages in a file called `packages.R` and using `source` appropriately. 
114 | 
115 | ### Commenting
116 | 
117 | Comments can greatly improve the efficiency of collaborative projects by helping everyone to understand what each line of code is doing. However comments should be used carefully: plastering your script with comments does not necessarily make it more efficient, and too many comments can be inefficient. Updating heavily commented code can be a pain, for example: not only will you have to change all the R code, you'll also have to rewrite or delete all the comments!
118 | 
119 | Ensure that your comments are meaningful. Avoid using verbose English to explain standard R code. The comment below, for example, adds no useful information because it is obvious by reading the code that `x` is being set to 1:
120 | 
121 | ```{r}
122 | # Setting x equal to 1
123 | x = 1
124 | ```
125 | 
126 | Instead, comments should provide context. Imagine `x` was being used as a counter (in which case it should probably have a more meaningful name, like `counter`, but we'll continue to use `x` for illustrative purposes). In that case the comment could explain your intention for its future use:
127 | 
128 | ```{r}
129 | # Initialize counter
130 | x = 1
131 | ```
132 | 
133 | The example above illustrates that comments are more useful if they provide context and explain the programmer's intention [@Mcconnell2004]. Each comment line should begin with a single hash (`#`), followed by a space. Comments can be toggled (turned on and off) in this way with `Ctl+Shift+C` in RStudio. The double hash (`##`) can be reserved for R output. If you follow your comment with four dashes (`# ----`) RStudio will enable code folding until the next instance of this.
134 | 
135 | ### Object names
136 | 
137 | > "When I use a word," Humpty Dumpty said, in a rather scornful tone, 
138 | > "it means just what I choose it to mean - neither more nor less."
139 | >
140 | > * Lewis Carroll - Through the Looking Glass, Chapter 6.
141 | 
142 | It is important for objects and functions to be named consistently and sensibly. To take a silly example, imagine if all objects in your projects were called `x`, `xx`, `xxx` etc. The code would run fine. However, it would be hard for other people, and a future you, to figure out what was going on, especially when you got to the object `xxxxxxxxxx`!
143 | 
144 | For this reason, giving a clear and consistent name to your objects, especially if they are going to be used many times in your script, can boost project efficiency (if an object is only used once, its name is less important, a case where `x` could be acceptable). Following discussion in [@ba_aa_ath_state_2012] and elsewhere, suggest an `underscore_separated` style for function and object names^[One notable exception are packages in Bioconductor, where variable names are `camelCase`. In this case, you should match the existing style.]. Unless you are creating an S3 object, avoid using a `.` in the name (this will help avoid confusing Python programmers!). Names should be concise yet meaningful. 
145 | 
146 | In functions the required arguments should always be first, followed by optional arguments. The special `...` argument should be last. If your argument has a boolean value, use `TRUE`/`FALSE` instead of `T`/`F` for clarity. 
147 | 
148 | ```{block type="rmdwarning"}
149 | It's tempting to use `T`/`F` as shortcuts. But it is easy to accidentally redefine these variables, e.g. `F = 10`.
150 | R raises an error if you try to redefine `TRUE`/`FALSE`.
151 | ```
152 | 
153 | While it's possible to write arguments that depend on other arguments, try to avoid using this idiom
154 | as it makes understanding the default behaviour harder to understand. Typically it's easier to set an argument to have a default value of `NULL` and check its value using `is.null` than by using `missing`. 
155 | Where possible, avoid using names of existing functions. 
156 | 
157 | ### Example package
158 | 
159 | The `lubridate` package is a good example of a package that has a consistent naming system,  to make it easy for users to guess its features and behaviour. Dates are encoded in a variety of ways, but the `lubridate` package has a neat set of functions consisting of the three letters, **y**ear, **m**onth and **d**ay. For example,
160 | 
161 | ```{r results="hide", message=FALSE}
162 | library("lubridate")
163 | ymd("2012-01-02")
164 | dmy("02-01-2012")
165 | mdy("01-02-2012")
166 | ```
167 | 
168 | ### Assignment
169 | 
170 | The two most common ways of assigning objects to values in R is with `<-` and `=`.  In most (but not all) contexts, they can be used interchangeably. Regardless of which operator you prefer, consistency is key, particularly when working in a  group. In this book we use the `=` operator for assignment, as it's faster to type and more consistent with other languages.
171 | 
172 | The one place where a difference occurs is during function calls. Consider the following  piece of code used for timing random number generation
173 | 
174 | ```{r eval=FALSE, results="hide"}
175 | system.time(expr1 <- rnorm(10e5))
176 | system.time(expr2 = rnorm(10e5)) # error
177 | ```
178 | 
179 | The first lines will run correctly __and__ create a variable called `expr1`. The second line will raise an error. When we use `=` in a function call, it changes from an  _assignment_ operator to an _argument passing_ operator. For further information about assignment, see `?assignOps`.
180 | 
181 | ### Spacing
182 | 
183 | Consistent spacing is an easy way of making your code more readable. Even a simple command such as `x = x + 1` takes a bit more time to understand when the spacing is removed, i.e. `x=x+1`. You should add a space around the operators `+`, `-`, `\` and `*`. Include a space around the assignment operators, `<-` and `=`. Additionally, add a space around any comparison operators such as `==` and `<`. The latter rule helps avoid bugs 
184 | 
185 | ```{r echo=2:5, results="hide"}
186 | x = 10
187 | # Bug. x now equals 1
188 | x[x<-1]
189 | # Correct. Selecting values less than -1
190 | x[x < -1]
191 | ```
192 | 
193 | The exceptions to the space rule are `:`, `::` and `:::`, as well as `$` and `@` symbols for selecting sub-parts of objects. As with English, add a space after a comma, e.g.
194 | 
195 | ```{r eval=FALSE}
196 | z[z$colA > 1990, ]
197 | ```
198 | 
199 | ### Indentation
200 | 
201 | Use two spaces to indent code. Never mix tabs and spaces. RStudio can automatically convert the tab character to spaces (see `Tools -> Global options -> Code`).
202 | 
203 | ### Curly braces
204 | 
205 | Consider the following code:
206 | 
207 | ```{r eval=FALSE}
208 | # Bad style, fails
209 | if(x < 5)
210 | { 
211 | y} 
212 | else {
213 |   x}
214 | ```
215 | 
216 | Typing this straight into R will result in an error.  An opening curly brace, `{` should not go on its own line and  should always be followed by a line break. A closing curly brace should always go on its own line (unless it’s followed by an `else`, in which case the `else` should go on its own line). The code inside curly braces should be indented (and RStudio will enforce this rule), as shown below.
217 | 
218 | ```{r, results="hide"}
219 | # Good style
220 | if(x < 5){
221 |   x
222 | } else {
223 |   y
224 | }
225 | ```
226 | 
227 | #### Exercises {-}
228 | 
229 | Look at the difference between your style and RStudio's based on a representative R script that you have written (see Section \@ref(coding-style)). What are the similarities? What are the differences? Are you consistent? Write these down and think about how you can use the results to improve your coding style.
230 | 
231 | ## Version control
232 | 
233 | When a project gets large, complicated or mission-critical it is important to keep track of how it evolves. In the same way that Dropbox saves a 'backup' of your files, version control systems keep a backup of your code. The only difference is that version control systems back-up your code *forever*. 
234 | 
235 | The version control system we recommend is Git, a command-line application created by Linus Torvalds, who also invented Linux.^[We recommend '10 Years of Git: An Interview with Git Creator Linus Torvalds' from
236 | [Linux.com](http://www.linux.com/news/featured-blogs/185-jennifer-cloer/821541-10-years-of-git-an-interview-with-git-creator-linus-torvalds) for more information on this topic.
237 | ] The easiest way to integrate your R projects with Git, if you're not accustomed to using a shell (e.g. the Unix command line), is with RStudio's Git tab, in the top right-hand window (see figure \@ref(fig:9-1)). This shows a number of files have been modified (as illustrated with the blue M symbol) and that some are new (as illustrated with the yellow ? symbol). Checking the tick-box will enable these files to be *committed*.
238 | 
239 | ### Commits
240 | 
241 | Commits are the basic units of version control. Keep your commits 'atomic': each one should only do one thing. Document your work with clear and concise commit messages, use the present tense, e.g.: 'Add analysis functions'.
242 | 
243 | Committing code only updates the files on your 'local' branch. To update the files stored on a remote server (e.g. on GitHub), you must 'push' the commit. This can be done using `git push` from a shell or using the green up arrow in RStudio, illustrated in figure \@ref(fig:9-1). The blue down arrow will 'pull' the latest version of the repository from the remote.^[For a more detailed account of this process, see
244 | [GitHub's help pages](https://help.github.com/).]
245 | 
246 | ```{r 9-1, fig.cap="The Git tab in RStudio", echo=FALSE, out.width="70%"}
247 | knitr::include_graphics("figures/f9_1_rstudio-git.png")
248 | ```
249 | 
250 | ### Git integration in RStudio
251 | 
252 | How can you enable this functionality on your installation of RStudio? RStudio can be a GUI Git only if Git has been installed *and* RStudio can find it. You need a working installation of Git (e.g. installed through `apt-get install git` Ubuntu/Debian or via [GitHub Desktop](https://help.github.com/desktop/guides/getting-started/installing-github-desktop/) for Mac and Windows). RStudio can be linked to your Git installation via Tools > Global Options, in the Git/SVN tab. This tab also provides a [link](https://support.rstudio.com/hc/en-us/articles/200532077) to a help page on RStudio/Git.
253 | 
254 | Once Git has been linked to your RStudio installation, it can be used to track changes in a new project by selecting `Create a git repository` when creating a new project. The tab illustrated in figure \@ref(fig:9-1) will appear, allowing functionality for interacting with Git via RStudio.
255 | 
256 | RStudio provides a useful GUI for navigating past commits. This allows you to see the entire history of your project. To navigate and view the details of past commits click on the Diff button in the Git pane, as illustrated in figure \@ref(fig:9-2).
257 | 
258 | ```{r 9-2, fig.cap="The Git history navigation interface", echo=FALSE, out.width="100%"}
259 | knitr::include_graphics("figures/f9_2_rstudio-githist.png")
260 | ```
261 | 
262 | ### GitHub
263 | 
264 | GitHub is an online platform that makes sharing your work and collaborative code easy. There are alternatives such as [GitLab](https://about.gitlab.com/). The focus here is on GitHub as it's by far the most popular among R developers. Also, through the command `devtools::install_github()`, preview versions of a package can be installed and updated in an instant. This makes 'GitHub packages' a great way to access the latest functionality. And GitHub makes it easy to get your work 'out there' to the world for efficiently collaborating with others, without the restraints placed on CRAN packages.
265 | 
266 | To install the GitHub version of the **benchmarkme** package, for example one would enter
267 | 
268 | ```{r, eval=FALSE}
269 | devtools::install_github("csgillespie/benchmarkme")
270 | ```
271 | 
272 | Note that `csgillespie` is the GitHub user and `benchmarkme` is the package name. Replacing `csgillespie` with `robinlovelace` in the above code would install Robin's version of the package. This is useful for fast collaboration with many people, but you must remember that GitHub packages will not update automatically with the command `update.packages` (see \@ref(updating-r-packages)).
273 | 
274 | ```{block, note-text, type='rmdwarning'}
275 | Warning: although GitHub is fantastic for collaboration, it can end up creating more problems than it solves if your collaborators are not git-literate. In one project, Robin eventually abandoned using GitHub to collaborate after his collaborator found it impossible to work with. More time was being spent debugging git/GitHub than actually working. Our advice therefore is to **never impose git** and always ensure that other lines of communication (e.g. phone calls, emails) are open as different people prefer different ways of communicating.
276 | ```
277 | 
278 | ### Branches, forks, pulls and clones
279 | 
280 | Git is a large program which takes a long time to learn in depth. However, getting to grips with the basics of some of its more advanced functions can make you a more efficient collaborator. Using and merging branches, for example, allows you to test new features in a self-contained environment before it is used in production (e.g. when shifting to an updated version of a package which is not backwards compatible). Instead of bogging you down with a comprehensive discussion of what is possible, this section cuts to the most important features for collaboration: branches, forks, fetches and clones. For a more detailed description of Git's powerful functionality, we recommend Jenny Byran's [book](http://happygitwithr.com/), "Happy Git and GitHub for the useR".
281 | 
282 | Branches are distinct versions of your repository. Git allows you to jump seamlessly between different versions of your entire project. To create a new branch called test, you need to enter the shell and use the Git command line:
283 | 
284 | ```{r, engine='bash', eval=FALSE}
285 | git checkout -b test
286 | ```
287 | 
288 | This is the equivalent of entering two commands: `git branch test` to create the branch and then `git checkout test` to *checkout* that branch. Checkout means switch into that branch. Any changes will not affect your previous branch. In RStudio you can jump quickly between branches using the drop down menu in the top right of the Git pane. This is illustrated in figure \@ref(fig:9-1): see the `master` text followed by a down arrow. Clicking on this will allow you to select other branches.
289 | 
290 | Forks are like branches but they exist on other people's computers. You can fork a repository on GitHub easily, as described on the site's [help pages](https://help.github.com/articles/fork-a-repo/). If you want an exact copy of this repository (including the commit history) you can *clone* this fork to your computer using the command `git clone` or by using a Git GUI such as GitHub Desktop. This is preferable from a collaboration perspective compared to cloning the repository directly, because any changes can be pushed back online easily if you are working from your own fork. You cannot push to forks that you have not created. If you want your work to be incorporated into the original fork you can use a *pull request*. Note: if you don't need the project's entire commit history, you can simply download a zip file containing the latest version of the repository from GitHub (see at the top right of any GitHub repository).
291 | 
292 | A pull request (PR) is a mechanism on GitHub by which your code can be added to an existing project. One of the most useful features of a PR from a collaboration perspective is that it provides an opportunity for others to comment on your code, line by line, before it gets merged. This is all done online on GitHub, as discussed in [GitHub's online help](https://help.github.com/articles/merging-a-pull-request/). Following feedback, you may want to refactor code, written by you or others.
293 | 
294 | 
295 | ```{r, echo=FALSE}
296 | # Content to add
297 | # For this reason GitHub is the subject of the next section.
298 | ### Using GitHub
299 | ```
300 | 
301 | ## Code review {#code-review}
302 | 
303 | What is a code review?^[This section is being written with small teams in mind. Larger teams should consult a more detailed text on code review.] Simply when we have finished working on a piece of code, a colleague reviews our work and considers questions such as
304 | 
305 |  * Is the code correct and properly documented?
306 |  * Could the code be improved?
307 |  * Does the code conform to existing style guidelines?
308 |  * Are there any automated tests? If so, are they sufficient?
309 | 
310 | A good code review shares knowledge and best practice. 
311 | 
312 | A lightweight code review can take a variety of forms. For example, it could be  as simple as emailing round some code for comments, or "over the shoulder", where someone literally looks over your shoulder while coding. More formal techniques include paired programming where two developers work side by side on the same project.
313 | 
314 | Regardless of the review method being employed, there a number of points to remember. First, as with all forms of feedback, be constructive. Rather than pointing out flaws, give suggested improvements. Closely related is give praise when appropriate. Second, if you are reviewing a piece of code set a time frame or the number of lines of code you will review. For example, you will spend one hour reviewing a piece of code, or a maximum of 400 lines. Third, a code review should be performed before the code is merged into a larger code base; fix mistakes as soon as possible.
315 | 
316 | Many R users don't work in team or group; instead they work by themselves. Practically, there isn't anyone nearby to review their code. However there is still the option of an _unoffical_ code review. For example, if you have hosted code on an online repository such as GitHub, users will naturally give feedback on our code (especially if you make it clear that you welcome feedback). Another good place is StackOverflow (covered in detail in chapter \@ref(learning)). This site allows you to post answers to other users questions. When you post an answer, if your code is unclear, this will be flagged in comments below your answer.
317 | 
318 | 


--------------------------------------------------------------------------------
/10-learning.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | knit: "bookdown::preview_chapter"
  3 | ---
  4 | 
  5 | ```{r, echo=FALSE}
  6 | rm(list=ls())
  7 | ```
  8 | 
  9 | # Efficient learning {#learning}
 10 | 
 11 | As with any vibrant open source software community, R is fast moving. This can be disorientating because it means that you can never 'finish' learning R. On the other hand, it makes R a fascinating subject: there is always more to learn. Even experienced R users keep finding new functionality that helps solve problems quicker and more elegantly. Therefore *learning how to learn* is one of the most important skills to have if you want to learn R *in depth*. We emphasise *depth* of learning because it is more efficient to learn something properly than to Google it repeatedly every time you forget how it works.
 12 | 
 13 | This chapter aims to equip you with concepts, guidance and tips that will accelerate your transition from an R *hacker* to an R *programmer*. This inevitably involves effective use of R's help, reading R source code, and use of online material.
 14 | 
 15 | ### Prerequisties {-}
 16 | 
 17 | The only package used in this section is **swirl**
 18 | 
 19 | ```{r}
 20 | library("swirl")
 21 | ```
 22 | 
 23 | ## Top 5 tips for efficient learning
 24 | 
 25 | 1. Use R's internal help, e.g. with `?`, `??`, `vignette()` and `apropos()`. Try **swirl**.
 26 | 1. Read about the latest developments in established outlets such as the *Journal for Statistical Software*, the *R Journal*, R lists and the 'blogosphere'.
 27 | 1. If stuck, ask for help! A clear question posted in an appropriate place, using reproducible code, should get a quick and enlightening answer.
 28 | 1. For more in-depth learning, nothing can beat immersive R books and tutorials. Do some research and decide which resources you should complete.
 29 | 1. One of the best ways to consolidate learning is to write-it-up and pass on the knowledge: telling the story of what you've learned with also help others.
 30 | 
 31 | ## Using R's internal help
 32 | 
 33 | Sometimes the best place to look for help is within R itself. Using R's help has 3 main advantages from an efficiency perspective: 1) it's faster to query R from inside your IDE than to switch context and search for help on a different platform (e.g. the internet which has countless distractions); 2) it works offline; 3) learning to read R's documentation (and source code) is a powerful skill in itself that will improve your R programming.
 34 | 
 35 | The main disadvantage of R's internal help is that it is terse and in some cases sparse. Do not expect to *always* be able to find the answer in R so be prepared to look elsewhere for more detailed help and context. From a learning perspective becoming acquainted with R's documentation is often better than finding out the solution from a different source: it was written by developers, largely for developers. Therefore with R documentation you learn about a function *from the horses mouth*. R help also sometimes sheds light on a function's history, e.g. through references to academic papers.
 36 | 
 37 | As you look to learn about a topic or function in R, it is likely that you will have a search strategy of your own, ranging from broad to narrow:
 38 | 
 39 | 1. Searching R and installed packages for help on a specific *topic*.
 40 | 1. Reading-up on *packages* vignettes.
 41 | 1. Getting help on a specific *function*.
 42 | 1. Looking into the *source code*.
 43 | 
 44 | In many cases you may already have researched stages 1 and 2. Often you can stop at 3 and simply use the function without worrying exactly how it works. In every case, it is useful to be aware of this hierarchical approach to learning from R's internal help, so you can start with the 'Big Picture' (and avoid going down a misguided route early on) and then quickly focus in on the functions that are most related to your task. To illustrate this approach in action, imagine that you are interested in a specific topic: optimisation. The remainder of this section will work through the stages 1 to 4 outlined above as if we wanted to find out more about this topic, with occasional diversions from this topic to see how specific help functions work in more detail. The final method of learning from R's internal resources covered in this section is **swirl**, a package for interactive learning that we cover last.
 45 | 
 46 | ### Searching R for topics
 47 | 
 48 | A 'wide boundary' search for a topic in R will often begin with a search for instances of a keyword in the documentation and function names. Using the example of optimisation, one could start with a search for a text string related to the topic of interest:
 49 | 
 50 | ```{r, eval=FALSE}
 51 | # help.search("optim") # or, more concisely
 52 | ??optim
 53 | ```
 54 | 
 55 | Note that the `??` symbol is simply a useful shorthand version of the function `help.search()`.
 56 | It is sometimes useful to use the full function rather than the shorthand version, because that way you can specify a number of options.
 57 | To search for all help pages that mention the more specific terms "optimisation" or "optimization" (the US spelling), in the title or alias of the help pages, for example, the following command would be used:
 58 | 
 59 | ```{r, eval=FALSE}
 60 | help.search(pattern = "optimisation|optimization", fields = c("title", "concept"))
 61 | ```
 62 | 
 63 | This will return a short (and potentially more efficiently focussed) list of help pages than the wide-ranging `??optim` call.
 64 | To make the search even more specific, we can use the `package` argument to constrain the search to a single package.
 65 | This can be very useful when you know that a function exists in a specific package, but you cannot remember what it is called:
 66 | 
 67 | ```{r, eval=FALSE}
 68 | help.search(pattern = "optimisation|optimization", fields = c("title", "concept"), package = "stats")
 69 | ```
 70 | 
 71 | Another function for searching R is `apropos()`. It prints to the console any R objects (including 'hidden' functions, those beginning with `.` and datasets) whose name matches a given text string. Because it does not search R's documentation, it tends to return fewer results than `help.search()`. Its use and typical outputs can be seen from a couple of examples below:
 72 | 
 73 | ```{r}
 74 | apropos("optim")
 75 | apropos("lm")[1:6] # show only first six results
 76 | ```
 77 | 
 78 | To search *all R packages*, including those you have not installed locally, for a specific topic there are a number of options. For obvious reasons, this depends on having internet access. The most rudimentary way to see what packages are available from CRAN, if you are using RStudio, is to use its autocompletion functionality for package names. To take an example, if you are looking for a package for geospatial data analysis, you could do worse than to enter the text string `geo` as an argument into the package installation function (for example `install.packages(geo)`) and hitting `Tab` when the cursor is between the `o` and the `)` in the example. The resulting options are shown in the figure below: selecting one from the dropdown menu will result in it being completed with surrounding quote marks, as necessary.
 79 | 
 80 | ```{r, echo=FALSE, fig.cap="Package name autocompletion in action in RStudio for packages beginning with 'geo'.", out.width="100%"}
 81 | knitr::include_graphics("figures/pf10_1_package-autocompletion.png")
 82 | ```
 83 | 
 84 | ### Finding and using vignettes
 85 | 
 86 | Some packages contain vignettes. These are pieces of ['long-form' documentation](http://r-pkgs.had.co.nz/vignettes.html) that allow package authors to go into detail explaining how the package works [@Wickham_2015]. In general they are high quality. Because they can be used to illustrate real world use cases, vignettes can be the best way to understand functions in the context of broader explanations and longer examples than are provided in function help pages. Although many packages lack vignettes, they deserve a sub-section of their own because they can boost the efficiency with which package functions are used, in an integrated workflow.
 87 | 
 88 | ```{block, type="rmdnote"}
 89 | If you are frustrated because a certain package lacks a vignette, you can create one. This can be a great way of learning about and consolidating your knowledge of a package. To create a vignette, first download the source code of a package and then use `devtools::use_vignette()`. To add a vignette to the **efficient** package used in this book, for example, you could clone the repo, e.g. using the command `git clone git@github.com:csgillespie/efficient`. Once you have opened the repo as a project, e.g. in RStudio, you could create a vignette called "efficient-learning" with the following command: `use_vignette("efficient-learning")`.
 90 | ```
 91 | 
 92 | To browse any vignettes associated with a particular package, we can use the handy function `browseVignettes()`:
 93 | 
 94 | ```{r eval=FALSE}
 95 | browseVignettes(package = "benchmarkme")
 96 | ```
 97 | 
 98 | This is roughly equivalent to `vignette(package = "benchmarkme")` but opens a new page in a browser and lets you navigate all the vignettes in that particular package. For an overview of all vignettes available from R packages installed on your computer, try browsing all available vignettes with `browseVignettes()`. You may be surprised at how many hidden gems there are in there!
 99 | 
100 | How best to *use* vignettes depends on the vignette in question and your aims. In general you should expect to spend longer reading vignette's than other types of R documentation. The *Introduction to dplyr* vignette (opened with `vignette("introduction", package = "dplyr")`), for example, contains almost 4,000 words of prose and example code and outputs, illustrating how its functions work. We recommend working through the examples and typing the example code to 'learn by doing'.
101 | 
102 | Another way to learn from package vignettes is to view their source code. You can find where vignette source code lives by looking in the `vignette/` folder of the package's source code: **dplyr**'s vignettes, for example, can be viewed (and edited) online at [github.com/hadley/dplyr/tree/master/vignettes](https://github.com/hadley/dplyr/tree/master/vignettes). A quick way to view a vignette's R code is with the `edit()` function:
103 | 
104 | ```{r, eval=FALSE}
105 | v = vignette("introduction", package = "dplyr")
106 | edit(v)
107 | ```
108 | 
109 | 
110 | ### Getting help on functions
111 | 
112 | All functions have help pages. These contain, at a minimum, a list of the input arguments and the nature of the output that can be expected. Once a function has been identified, e.g. using one of the methods outlined in Section \@ref(searching-r-for-topics), its *help page* can be displayed by prefixing the function name with `?`. Continuing with the previous example, the help page associated with the command `optim()` (for general purpose optimisation) can be invoked as follows:
113 | 
114 | ```{r eval=FALSE}
115 | # help("optim") # or, more concisely:
116 | ?optim
117 | ```
118 | 
119 | In general, help pages describe *what* functions do, not *how* they work. This is one of the reasons
120 | that function help pages are thought (by some) to be difficult to understand. In practice, 
121 | this means that the help page does not describe the underlying mathematics or algorithm in 
122 | detail, it's aim is to describe the interface.
123 | 
124 | A help page is divided into a number of sections. 
125 | The help for `optim()` is typical, in that it has a title (General-purpose Optimization) followed by short Description, Usage and Arguments sections.
126 | The Description is usually just a sentence or two for explaining what it does. Usage shows the arguments that the function needs to work. And Arguments describes what kind of objects the function expects. Longer sections typically include Details and Examples, which provide some context and provide (usually reproducible) examples of how the function can be used, respectively. The typically short Value, References and See Also sections facilitate efficient learning by explaining what the output means, where you can find academic literature on the subject, and which functions are related.
127 | 
128 | `optim()` is a mature and heavily used function so it has a long help page: you'll probably be thankful to learn that not all help pages are this long!
129 | With so much potentially overwhelming information in a single help page, the placement of the short, dense sections at the beginning is efficient because it means you can understand the fundamentals of a function in few words.
130 | Learning how to read and quickly interpret such help pages will greatly help your ability to learn R. Take some time to study the help for `optim()` in detail.
131 | 
132 | It is worth discussing the contents of the Usage section in particular, because this contains information that may not be immediately obvious:
133 | 
134 | ```
135 | optim(par, fn, gr = NULL, ...,
136 |       method = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent"),
137 |       lower = -Inf, upper = Inf, control = list(), hessian = FALSE)
138 | ```
139 | 
140 | This contains two pieces of critical information: 1) the *essential* arguments which must be provided for the function to work (`par` and `fn` in this case, as `gr` has a default value) before the `...` symbol; and 2) *optional* arguments that control how the function works (`method`, `lower`, and `hessian` in this case). `...` are optional arguments whose values depend on the other arguments (which will be passed to the function represented by `fn` in this case). Let's see how this works in practice by trying to run `optim()` to find the minimum value of the function $y = x^4 - x^2$:
141 | 
142 | ```{r}
143 | fn = function(x) {
144 |   x^4 - x^2
145 | }
146 | optim(par = 0, fn = fn)
147 | ```
148 | 
149 | The results show that the minimum value of `fn(x)` is found when `x = 0.707..` ($\frac{1}{\sqrt{2}}$), with a minimum value of $-0.25$. It took $58$ iterations of the function call for `optim()` to converge on this value. Each of these output values is described in the Values section of the help pages.
150 | From the help pages, we could guess that providing the function call without specifying `par` (i.e. `optim(fn = fn)`) would fail, which indeed it does. 
151 | 
152 | The most _helpful_ section is often the Examples. These lie at the bottom of the help page and show precisely how the function works. You can either copy and paste the code, or actually run the example code using the `example` command (it is well worth running these examples due to the graphics produced):
153 | 
154 | ```{r eval=FALSE}
155 | example(optim)
156 | ```
157 | 
158 | ```{block, type="rmdnote"}
159 | When a package is added to CRAN, the example part of the documentation is run on all major platforms. This helps ensure that a package works on multiple systems.
160 | ```
161 | 
162 | Another useful section in the help file is `See Also:`. In the `optim()` help page, it links to `optimize()` which may be more appropriate for this use case.
163 | 
164 | ### Reading R source code
165 |  
166 | R is open source. This means that we can view the underlying source code and examine any function. Of course the code is complex, and diving straight into the source code won't help that much. However, watching the GitHub R source code [mirror](https://github.com/wch/r-source/) will allow you to monitor small changes that occur. This gives a nice entry point into a complex code base. Likewise, examining the source of small functions, such as `NCOL` is informative, e.g. `getFunction("NCOL")`
167 | 
168 | ```{block, type="rmdtip"}
169 | Subscribing to the R NEWS [blog](https://developer.r-project.org/blosxom.cgi/R-devel/NEWS/) is an easy way of keeping track of future changes.
170 | ```
171 | 
172 | Many R packages are developed in the open on GitHub or R-Forge. Select a few well known packages and examine their source. A good package to start with is **[drat](https://github.com/eddelbuettel/drat)**. This is a relatively simple package developed by Dirk Eddelbuettel (author of Rcpp) that only contains a few functions. It gives you an excellent pointer into software development by one of the key R package writers.
173 | 
174 | A shortcut for browsing R's source code is provided by the RStudio IDE: clicking on a function and then hit `F2` will open its source code in the file editor. This works for both functions that exist in R and its packages and functions that you created yourself in another R script (so long as it is within your project directory).
175 | Although reading source code can be interesting in itself, it is probably best done in the context of a specific question, e.g. "how can I use a function name as an argument in my own function?" (looking at the source code of `apply()` may help here).
176 | 
177 | ### Swirl
178 | 
179 | **swirl** is an interactive teaching platform for R. It offers a number of extensions and, for the pioneering, the ability for others to create custom extensions. The learning curve and method will not work for everyone, but this package is worth flagging as a potent self teaching resource. In some ways **swirl** can be seen as the ultimate internal R help as it allows dedicated learning sessions, based on multiple choice questions, all within a usual R session. To enter the **swirl** world, just enter the following. The resultant instructions will explain the rest:
180 | 
181 | ```{r, eval=FALSE}
182 | library("swirl")
183 | swirl()
184 | ```
185 | 
186 | ## Online resources
187 | 
188 | The R community has a strong online presence, providing many resources for learning. Over time, there has fortunately been a tendency for R resources to become more user friendly and up-to-date. Many resources that have been on CRAN for many years are dated by now so it's more efficient to navigate directly to the most up-to-date and efficient-to-use resources.
189 | 
190 | 'Cheat sheets' are short documents summarising how to do certain things. [RStudio](http://www.rstudio.com/resources/cheatsheets/), for example, provides excellent cheat sheets on [**dplyr**](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf), [**rmarkdown**](https://www.rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf) and the [RStudio IDE](https://www.rstudio.com/wp-content/uploads/2016/01/rstudio-IDE-cheatsheet.pdf) itself.
191 | 
192 | The R-project website contains six detailed [official manuals](https://cran.r-project.org/manuals.html), plus a giant pdf file containing documentation for all recommended packages. These include [An Introduction to R](https://cran.r-project.org/doc/manuals/r-release/R-intro.html), [The R language definition](https://cran.r-project.org/doc/manuals/r-release/R-lang.html) and [R Installation and Administration](https://cran.r-project.org/doc/manuals/r-release/R-admin.html), all of which are recommended for people wanting to learn their general R skills. If you are developing a package and want to submit it to CRAN, the [Writing R Extensions](https://cran.r-project.org/doc/manuals/r-release/R-exts.html) manual is recommended reading, although it has to some extent been superseded by @Wickham_2015, the source code of which is [available online](https://github.com/hadley/r-pkgs). While these manuals are long, they contain important information written by experienced R programmers.
193 | 
194 | For more domain-specific and up-to-date information on developments in R, we recommend checking out academic journals. The [R-journal](https://journal.r-project.org/) regularly publishes articles describing new R packages, as well as general programming hints. Similarly, the articles in the [Journal of Statistical Software](https://www.jstatsoft.org/) have a strong R bias. Publications in these journals are generally of very high quality and have been rigorously peer reviewed. However, they may be rather technical for R novices.
195 | 
196 | The wider community provides a much larger body of information, of more variable quality, than the official R resources. The [Contributed Documentation](https://cran.r-project.org/other-docs.html) page on R's home page contains dozens of tutorials and other resources on a wide range of topics. Some of these are excellent, although many are not kept up-to-date. An excellent resource for browsing R help pages online is provided by [rdocumentation.org](http://www.rdocumentation.org).
197 | 
198 | Lower grade but more frequently released information can be found on the 'blogosphere'. Central to this is  [R-bloggers](http://www.r-bloggers.com/), a blog aggregator of content contributed by bloggers who write about R (in English). It is a great way to get exposed to new and different packages. Similarly monitoring the _[#rstats](https://twitter.com/search?q=%23rstats)_ twitter tag keeps you up-to-date with the latest news.
199 | 
200 | There are also mailing lists, Google groups and the Stack Exchange Q & A sites. Before requesting help, read a few other questions to learn the format of the site. Make sure you search previous questions so you are not duplicating work. Perhaps the most important point is that people aren't under __any__ obligation to answer your question. One of the fantastic things about the open-source community is that you can ask questions and one of core developers may answer your question for free; but remember, everyone is busy!
201 | 
202 | ### Stackoverflow
203 | 
204 | The number one place on the internet for getting help on programming is [Stackoverflow](http://www.stackoverflow.com). This website provides a platform for asking and answering questions. Through site membership, questions and answers are voted up or down. Users of Stackoverflow earn reputation points when their question or answer is up-voted. Anyone (with enough reputation) can edit a question or answer. This helps the content remain relevant.
205 | 
206 | Questions are tagged. The R questions can be found under the [R tag](http://stackoverflow.com/questions/tagged/r). The [R page](https://stackoverflow.com/tags/r/info) contains links to Official documentation, free resources, and various other links. Members of the Stackoverflow R community have tagged, using `r-faq`, a few questions that often crop up. 
207 | 
208 | ### Mailing lists and groups.
209 | 
210 | There are many mailing lists and Google groups focused on R and particular packages. The main list for getting help is `R-help`. This is a high volume mailing list, with around a dozen messages per day. A more technical mailing list is `R-devel`. This list is intended for questions and discussion about code development in R. The discussion on this list is very technical. However, it's a good place to be introduced to new ideas - but it's not the place to ask about these ideas! There are 
211 | many other special interest mailing [lists](https://www.r-project.org/mail.html) covering topics such as high performance computing to ecology. Many popular packages also have their own mailing list or Google group, e.g. **ggplot2** and **shiny**. The key piece of advice  is before mailing a list, read the relevant mailing archive and check that your message is appropriate.
212 | 
213 | ## Asking a question
214 | 
215 | A great way to get specific help on a difficult topic is to ask for help.
216 | However, asking a good question is not easy. Three common mistakes, and ways to avoid them, are outlined below:
217 | 
218 | 1. Asking a question that has already been asked: ensure you've properly searched for the answer before posting.
219 | 2. The answer to the question can be found in R's help: make sure you've properly read the relevant help pages before asking.
220 | 3. The question does not contain a reproducible example: create a simple version of your data, show the code you've tried, and display the result you are hoping for.
221 | 
222 | Your question should contain just enough information that you problem is clear and can be reproducible, while at the same time avoid unnecessary details. Fortunately there is a StackOverflow question - [How to make a great R reproducible example?](http://stackoverflow.com/q/5963269/203420) that provides excellent guidance.
223 | Additional guides that explain how to create good programming questions are provided by [StackOverflow](https://stackoverflow.com/help/how-to-ask) and and the [R mailing list posting guide](https://www.r-project.org/posting-guide.html). 
224 | 
225 | ### Minimal data set {-}
226 | 
227 | What is the smallest data set you can construct that will reproduce your issue? Your actual data set may contain $10^5$ rows and $10^4$ columns, but to get your idea across you might only need $4$ rows and $3$ columns. Making small example data sets is easy.  For example, to create a data frame with two numeric columns and a column of characters just use
228 | 
229 | ```{r echo=-3}
230 | set.seed(1)
231 | example_df = data.frame(x = rnorm(4), y = rnorm(4), z = sample(LETTERS, 4))
232 | ```
233 | 
234 | Note the call to `set.seed` ensures anyone who runs the code will get the same random number stream. Alternatively, you can use one of the many data sets that come with R - `library(help = "datasets")`. 
235 | 
236 | If creating an example data set isn't possible, then use `dput` on your actual data set. This will create an ASCII text representation of the object that will enable anyone to recreate the object
237 | 
238 | ```{r results="hide"}
239 | dput(example_df)
240 | #> structure(list(
241 | #>  x = c(-0.626453810742332, 0.183643324222082, -0.835628612410047, 1.59528080213779), 
242 | #>  y = c(0.329507771815361, -0.820468384118015, 0.487429052428485, 0.738324705129217), 
243 | #>  z = structure(c(3L, 4L, 1L, 2L), .Label = c("J", "R", "S", "Y"), class = "factor")), 
244 | #>  .Names = c("x", "y", "z"), row.names = c(NA, -4L), class = "data.frame")
245 | ```
246 | 
247 | ### Minimal example {-}
248 |  
249 | What you should not do, is simply copy and paste your entire function into your question. It's unlikely that your entire function doesn't work, so just simplify it to the bare minimum.  The aim is to target your actual issue.  Avoid copying and pasting large blocks of code; remove superfluous lines that are not part of the problem. Before asking your question, can you run your code in a clean R environment and reproduce your error?
250 | 
251 | ## Learning in depth
252 | 
253 | In the age of the internet and social media, many people feel lucky if they have time out to go for a walk, let alone sit down to read a book. However it is undeniable that learning R *in depth* is a time consuming activity. Reading a book or a large tutorial (and completing the practical examples contained within) may not be the most efficient way to solve a particular problem in the short term, but it can be one of the best ways to learn R programming properly, especially in the long-run.
254 | 
255 | In depth learning differs from shallow, incremental learning because rather than discovering how a specific function works, you find out how systems of functions work together. To take a metaphor from civil engineering, in depth learning is about building strong foundations, on which a wide range of buildings can be constructed. In depth learning can be highly efficient in the long run because it will pay back over many years, regardless of the domain-specific problem you want to use R to tackle. Shallow learning, to continue the metaphor, is more like erecting many temporary structures: they can solve a specific problem in the short term but they will not be durable. Flimsy dwellings can be swept away. Shallow memories can be forgotten.
256 | 
257 | Having established that time spent 'deep learning' can, counter-intuitively, be efficient, it is worth thinking about how to deep learn. This varies from person to person. It does not involve passively absorbing sacred information transmitted year after year by the 'R gods'. It is an active, participatory process. To ensure that memories are rapidly actionable you must 'learn by doing'. Learning from a cohesive, systematic and relatively comprehensive resource will help you to see the many interconnections between the different elements of R programming and how they can be combined for efficient work.
258 | 
259 | There are a number of such resources, including this book. Although the understandable tendency will be to use it incrementally, dipping in and out of different sections when different problems arise, we also recommend reading it systematically to see how the different elements of efficiency fit together. It is likely that as you work progressively through this book, in parallel with solving real world problems, you will realise that the solution is not to have the 'right' resource at hand but to be able to use the tools provided by R efficiently. Once you hit this level of proficiency, you should have the confidence to address most problems encountered from first principles. Over time, your 'first port of call' should move away from Google and even R's internal help to simply giving it a try: informed trial and error, intelligent experimentation, can be the best approach to both learning and solving problems quickly, once you are equipped with the tools to do so. That's why this is the last section in the book.
260 | 
261 | If you have already worked through all the examples in this book, or if you want to learn areas not covered in it, there are many excellent resources for extending and deepening your knowledge of R programming for fast and effective work, and to do new things with it. Because R is a large and ever-evolving language, there is no definitive list of resources for taking your R skills to new heights. However, the list below, in rough ascending order of difficulty and depth, should provide plenty of material and motivation for in depth learning of R.
262 | 
263 | 1. Free webinars and online courses provided by [RStudio](http://www.rstudio.com/resources/webinars/) and [DataCamp](https://www.datacamp.com/community/open-courses). Both organisations are well regarded and keep their content up-to-date, but there are likely other sources of other online courses. We recommend testing pushing your abilities, rather than going over the same material covered in this book. 
264 | 1. *R for Data Science* [@grolemund_r_2016], a free book introducing many concepts and 'tidy' packages for working with data (a free online version is available from [r4ds.had.co.nz/](http://r4ds.had.co.nz/)).
265 | 1. *R programming for Data Science* [@peng_r_2014], which provides in depth coverage of analysis and visualisation of datasets.  
266 | 1. *Advanced R Programming* [@Wickham2014], an advanced book which looks at the internals of how R works (free from [adv-r.had.co.nz](http://adv-r.had.co.nz/)).
267 | 
268 | ## Spread the knowledge
269 | 
270 | The final thing to say on the topic of efficient learning relates to the [old](https://en.wikipedia.org/wiki/Docendo_discimus) (~2000 years old!) saying *docendo discimus*:
271 | 
272 | > **by teaching we learn**.
273 | 
274 | This means that passing on information is one of the best ways to consolidate your learning. It was largely by helping others to learn R that we became proficient R users.
275 | 
276 | Demand for R skills is growing, so there are many opportunities to teach R. Whether it's helping your colleague to use `apply()`, or writing a blog post on solving certain problems in R, teaching others R can be a rewarding experience. Furthermore, spreading the knowledge can be efficient: it will improve your own understanding of the language and benefit the entire community, providing a positive feedback to the movement towards open source software in data-driven computing.
277 | 
278 | Assuming you have completed reading the book, the only remaining thing to say is well done: you are now an efficient R programmer. We hope you direct your new found skills towards the greater good and pass on the wisdom to others along the way.
279 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: efficientR
 3 | Title: Efficient R Programming
 4 | Version: 0.1.1
 5 | Authors@R: 
 6 |     c(person(given = "Colin",
 7 |              family = "Gillespie",
 8 |              role = c("aut", "cre"),
 9 |              email = "csgillespie@gmail.com"),
10 |       person(given = "Robin",
11 |              family = "Lovelace",
12 |              role = "aut"))
13 | Maintainer: Colin Gillespie <csgillespie@gmail.com>
14 | Description: This is a dummy travis package used to build the
15 |     forthcoming O'Reilly book, Efficient R Programming. The description
16 |     file is used to keep track of the packages used within this book.
17 | Depends: 
18 |     assertive.reflection,
19 |     benchmarkme,
20 |     bookdown,
21 |     cranlogs,
22 |     data.table (>= 1.9.6),
23 |     dbplyr,
24 |     devtools (>= 1.12.0),
25 |     DiagrammeR,
26 |     dplyr,
27 |     drat,
28 |     efficient,
29 |     formatR,
30 |     fortunes,
31 |     geosphere,
32 |     ggmap,
33 |     ggplot2 (>= 2.1.0),
34 |     ggplot2movies,
35 |     knitr,
36 |     lubridate,
37 |     maps,
38 |     microbenchmark,
39 |     profvis (>= 0.3.2),
40 |     pryr,
41 |     Rcpp (>= 0.12.7),
42 |     readr,
43 |     reticulate,
44 |     RSQLite,
45 |     swirl,
46 |     tibble (>= 1.1.0),
47 |     tidyr
48 | LinkingTo: 
49 |     Rcpp
50 | Remotes:
51 |     csgillespie/efficient
52 | Encoding: UTF-8
53 | LazyData: TRUE
54 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/tidyverse
 2 | 
 3 | # Other dependencies
 4 | RUN apt-get update \
 5 |  && apt-get install -y --no-install-recommends \
 6 | 	  libjpeg-dev \
 7 |  && apt-get clean \
 8 |  && rm -rf /var/lib/apt/lists/*
 9 | 
10 | # Install other R packages
11 | RUN R -e "remotes::install_github('csgillespie/efficientR')"
12 | RUN su rstudio && \
13 |   cd /home/rstudio && \
14 |   wget https://github.com/csgillespie/efficientR/archive/master.zip && \
15 |   unzip master.zip && \
16 |   mv efficientR-master /home/rstudio/efficientR && \
17 |   cd efficientR && \
18 |   make html
19 | RUN chown -Rv rstudio /home/rstudio/efficientR
20 | 
21 | # Install RStudio
22 | RUN wget https://s3.amazonaws.com/rstudio-ide-build/server/bionic/amd64/rstudio-server-1.3.938-amd64.deb
23 | RUN dpkg -i rstudio-server-*-amd64.deb && \
24 |   rm rstudio-server-*-amd64.deb
25 | 
26 | RUN echo '{' >> /etc/rstudio/rstudio-prefs.json
27 | RUN echo '    "rmd_chunk_output_inline": false' >> /etc/rstudio/rstudio-prefs.json
28 | RUN echo '}' >> /etc/rstudio/rstudio-prefs.json
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This work is licensed under the Creative Commons Attribution-NonCommercial-NoDerivs 3.0 United States License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-nd/3.0/us/ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | html:
 2 | 	Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::gitbook", clean = FALSE)'
 3 | 	cp -fvr css/style.css _book/
 4 | 	cp -fvr images _book/
 5 | 	cp -fvr _main.utf8.md _book/main.md
 6 | 
 7 | build:
 8 | 	make html
 9 | 	Rscript -e 'browseURL("_book/index.html")'
10 | 	
11 | pdf:
12 | 	Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::pdf_book")'
13 | 
14 | md:
15 | 	Rscript -e 'bookdown::render_book("index.Rmd", output_format = "bookdown::pdf_book",clean=FALSE)'
16 | 	
17 | install:
18 | 	Rscript -e 'devtools::install_github("csgillespie/efficientR")'
19 | 
20 | ## For Colin, who keeps forgetting how to deploy.
21 | deploy:
22 | 	Rscript -e 'bookdown::publish_book(render="local", account="csgillespie")'
23 | 
24 | clean:
25 | 	Rscript -e "bookdown::clean_book(TRUE)"
26 | 	rm -fvr *.log Rplots.pdf _bookdown_files land.sqlite3
27 | 
28 | cleaner:
29 | 	make clean && rm -fvr rsconnect
30 | 	rm -frv *.aux *.out  *.toc # Latex output
31 | 	rm -fvr *.html # rogue html files
32 | 	


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Dummy file


--------------------------------------------------------------------------------
/R/.gitignore:
--------------------------------------------------------------------------------
1 | RcppExports.R
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Efficient R programming by Colin Gillespie and Robin Lovelace
 2 | 
 3 | [![Build Status](https://travis-ci.org/csgillespie/efficientR.png?branch=master)](https://travis-ci.org/csgillespie/efficientR) 
 4 | [![Launch Rstudio Binder](http://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/csgillespie/efficientR/master?urlpath=rstudio)
 5 | 
 6 | ![alt text](figures/f0_front_scale.png)
 7 | 
 8 | The text and code for the forthcoming O'Reilly book: [Efficient R programming](https://csgillespie.github.io/efficientR/). Pull requests and general comments are welcome.
 9 | 
10 | Get a hard copy from: [Amazon (UK)](https://alexa.design/2pmrqBj), [Amazon (USA)](https://alexa.design/2pmfpf4), [O'Reilly](http://shop.oreilly.com/product/0636920047995.do)
11 | 
12 | For details on how to build this book, please see the [Appendix](https://csgillespie.github.io/efficientR/building-the-book-from-source.html).
13 | 
14 | To run the book in a Docker container run the following commands
15 | 
16 | ```bash
17 | # from Docker hub
18 | # docker run -e PASSWORD=pw --rm -p 8786:8787 robinlovelace/efficientR
19 | # local version from inside this repo:
20 | docker build -t efficientr docker/rstudio-no-build
21 | docker run -e PASSWORD=pw --rm -p 8785:8787 efficientr
22 | ```
23 | 


--------------------------------------------------------------------------------
/_bookdown.yml:
--------------------------------------------------------------------------------
1 | before_chapter_script: ["code/before_script.R"]
2 | delete_merged_file: true
3 | 


--------------------------------------------------------------------------------
/_output.yml:
--------------------------------------------------------------------------------
 1 | bookdown::gitbook:
 2 |   css: [css/style.css,style.css] #A hacky fix to make it work with chapter preview
 3 |   config:
 4 |     toc:
 5 |       collapse: section
 6 |       before: |
 7 |         <li><a href="./">Efficient R programming</a></li>
 8 |       after: |
 9 |         <li><a href="http://www.jumpingrivers.com">Colin Gillespie</a></li>
10 |         <li><a href="http://www.robinlovelace.net">Robin Lovelace</a></li>
11 |     edit:
12 |       link: https://github.com/csgillespie/efficientR/edit/master/%s
13 |       text: "Edit"
14 | bookdown::pdf_book:
15 |   includes:
16 |     in_header: preamble.tex
17 |   keep_tex: yes
18 | 


--------------------------------------------------------------------------------
/appendix.Rmd:
--------------------------------------------------------------------------------
 1 | # (APPENDIX) Appendix {-} 
 2 | 
 3 | # Building the book from source
 4 | 
 5 | The complete source of the book is available [online](https://github.com/csgillespie/efficientR). To build the book:
 6 | 
 7 | 1. Install the latest version of R
 8 |     * If you are using RStudio, make sure that's up-to-date as well
 9 | 1. Install the book dependencies.
10 | 
11 |     ```{r eval=FALSE}
12 |     # Make sure you are using the latest version of `devtools`
13 |     # Older versions do not work.
14 |     devtools::install_github("csgillespie/efficientR")
15 |     ```
16 |     
17 | 1. Clone the efficientR [repository](https://github.com/csgillespie/efficientR)
18 |   * See the chapter \@ref(collaboration) on Efficient collaboration for an introduction
19 |   to git and github.
20 | 1. If you are using `RStudio`, open `index.Rmd` and click `Knit`.
21 |     * Alternatively (for mainly Linux users) you can use the bundled `Makefile`
22 | 
23 | ## Package dependencies 
24 | 
25 | The book uses datasets stored in the **efficient** GitHub package, which can be installed (after **devtools** has been installed) as follows:
26 | 
27 | ```{r eval=FALSE}
28 | # Installs package dependencies shown below
29 | devtools::install_github("csgillespie/efficient",
30 |                          args = "--with-keep.source")
31 | ```
32 | 
33 | The book depends on the following CRAN packages:
34 | 
35 | ```{r is_travis, echo=FALSE}
36 | ## Don't generate bib file on travis
37 | is_travis = Sys.getenv("TRAVIS")
38 | is_travis = (is_travis == "true")
39 | pkgs_df = read.csv("extdata/package_list.csv", header=TRUE)
40 | ```
41 | 
42 | ```{r echo=FALSE, warning=FALSE, message=FALSE, eval=(!is_travis)}
43 | desc = read.dcf("DESCRIPTION")
44 | headings = dimnames(desc)[[2]]
45 | fields = which(headings %in% c("Depends", "Imports", "Suggests"))
46 | pkgs = paste(desc[fields], collapse = ", ")
47 | pkgs = gsub("\n", " ", pkgs)
48 | pkgs = strsplit(pkgs, ",")[[1]]
49 | pkgs = gsub(" ", "", pkgs)
50 | pkgs = gsub("\\(.*)", "", pkgs) # Remove versions from packages
51 | ext_pkgs = c("rio", "feather")  # issues installing on travis
52 | to_install = !pkgs %in% rownames(installed.packages())
53 | 
54 | if(sum(to_install) > 0){
55 |   install.packages(pkgs[to_install])
56 | }
57 | 
58 | if(!all(ext_pkgs %in% rownames(installed.packages()))) {
59 |   message("Installing rio and feather")
60 |   install.packages(ext_pkgs, repos="https://cran.rstudio.com/")
61 | }
62 | 
63 | i = 1
64 | pkgs = c(pkgs, ext_pkgs)
65 | pkgs = pkgs[order(pkgs)]
66 | pkgs_df = data.frame(Name = pkgs, Title = NA, cite = NA, version = NA)
67 | for(i in seq_along(pkgs)){
68 |   f = system.file(package = pkgs[i], "DESCRIPTION")
69 |   # Title is always on 3rd line
70 |   title = readLines(f)
71 |   title = title[grep("Title: ", title)]
72 |   pkgs_df$Title[i] = gsub("Title: ", "", title)
73 |   pkgs_df$cite[i] = paste0("[@R-", pkgs[i], "]")
74 |   pkgs_df$version[i] = as.character(packageVersion(pkgs[i]))
75 | }
76 | pkgs_df[,2] = paste(pkgs_df[,2], pkgs_df[,3])
77 | pkgs_df = pkgs_df[,-3]
78 | write.csv(pkgs_df, "extdata/package_list.csv", row.names = FALSE)
79 | knitr::write_bib(pkgs, file="packages.bib")
80 | ```
81 | 
82 | ```{r echo=FALSE, message=FALSE, warning=FALSE}
83 | knitr::kable(pkgs_df)
84 | ```
85 | 
86 | # References {-}
87 | 


--------------------------------------------------------------------------------
/code/01-introduction_f3.R:
--------------------------------------------------------------------------------
 1 | if(!file.exists("extdata/out-ice.Rds")) {
 2 |   library("rnoaa")
 3 |   library("ggplot2")
 4 |   yrs = seq(1985, 2019, by = 10)
 5 |   out = sapply(yrs, function(x)
 6 |     sea_ice(year = x, mo = 'Sep', pole = 'N'))
 7 |   names(out) = yrs
 8 |   dir.create("extdata")
 9 |   saveRDS(out, "extdata/out-ice.Rds")
10 | } else {
11 |   out = readRDS("extdata/out-ice.Rds")
12 | }
13 | df = dplyr::rbind_all(out, id = "Year")
14 | xlims = quantile(df$lat, probs = c(0.01, 0.90))
15 | ylims = quantile(df$long, probs = c(0.01, 0.99))
16 | ggplot(df, aes(long, lat, group = paste(group, Year))) +
17 |   geom_path() +
18 |   geom_polygon(aes(fill = Year), alpha = 0.3) +
19 |   xlim(xlims) +
20 |   ylim(ylims) +
21 |   scale_fill_brewer(type = "seq") +
22 |   theme_minimal() +
23 |   theme(axis.title = element_blank(),
24 |         axis.text = element_blank())
25 | ggsave("figures/icesheet-change.png")
26 | 


--------------------------------------------------------------------------------
/code/02-blas.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | library("ggplot2")
 3 | res_blas = readRDS("extdata/res_rl_blas.Rds")
 4 | res_noblas = readRDS("extdata/res_rl_noblas.Rds")
 5 | res_blas$blas = "Optimized"
 6 | res_noblas$blas = "Standard"
 7 | res_all = rbind(res_blas, res_noblas)
 8 | res_all$test_group = factor(res_all$test_group)
 9 | levels(res_all$test_group) = c("Programming", "Matrix calculation", "Matrix function")
10 | 
11 | g = ggplot(res_all) + geom_violin(aes(test_group, elapsed, fill = blas),position=position_dodge(0.9))
12 | 
13 | 
14 | g1 = g + labs(title = "Performance gains with BLAS", 
15 |           x = "Benchmark", y = "Elapsed time",colour = NULL, fill = NULL) + 
16 |   theme(panel.grid.major.y = element_line(colour = "gray90"), 
17 |     panel.grid.minor = element_line(colour = NA), 
18 |     panel.grid.major.x = element_line(colour = NA), 
19 |     plot.title = element_text(size = 12, 
20 |         face = "bold", hjust = 1, vjust = 0), 
21 |     panel.background = element_rect(fill = NA), 
22 |     legend.background = element_rect(fill = NA), 
23 |     legend.position = c(0.93, 0.92), 
24 |     axis.ticks.x = element_line(linetype = "blank")) + 
25 |   scale_fill_manual(values=c(get_col(2), get_col(9)))
26 | 
27 | g1 = g1 + scale_y_continuous(limits=c(0,12), expand = c(0, 0))
28 | print(g1)
29 | 


--------------------------------------------------------------------------------
/code/03-programming_f1.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | 
 3 | shade_under_curve <- function(fun, xmin, xmax, length=100, 
 4 |                               col="grey"){
 5 |   xvals <- seq(xmin, xmax, length=length)
 6 |   dvals <- match.fun(fun)(xvals)
 7 |   polygon(c(xvals,rev(xvals)),c(rep(0,length),rev(dvals)),col=col, border=NA)
 8 | }
 9 | 
10 | 
11 | col=function(alpha=255) rgb(85,130,169, alpha=alpha, maxColorValue=255)
12 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
13 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
14 | plot(0, type="n", xlim=c(0, 1), ylim=c(0, 1), axes=FALSE, frame=FALSE, xlab="", ylab="")
15 | 
16 | shade_under_curve(function(x) x^2, 0, 1, col="grey95")
17 | abline(h=seq(0, 1, 0.2), lty=1, col="grey90")
18 | curve(x^2, 0,1, add=TRUE, lwd=4, col=col())
19 | axis(1, tick=F,  col.axis="grey50", cex.axis = 0.8)
20 | axis(2, tick=F,  col.axis="grey50", cex.axis = 0.8)
21 | 
22 | text(0.77, 0.7, "f(x)", col=col(), font=2, cex=1.2)
23 | text(0, 0.97, "Miss", pos=4, font=1, cex=1)
24 | text(1, 0.03, "Hit", pos=2, font=1, cex=1)
25 | 
26 | 
27 | title("Monte Carlo integration", adj=1, 
28 |       cex.main=0.9, font.main=2, col.main="black")
29 | 
30 | set.seed(5)
31 | N = 40
32 | px = runif(N); py=runif(N)
33 | points(px[py < px^2], py[py < px^2], pch=21, bg=col(255), col="grey10")
34 | points(px[py > px^2], py[py > px^2], pch=21, bg=col(100), col="grey80")
35 | 


--------------------------------------------------------------------------------
/code/03-programming_f3.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | data(movies, package="ggplot2movies")
 3 | ratings = movies[, 7:16]
 4 | popular = apply(ratings, 1, nnet::which.is.max)
 5 | tab = table(popular)
 6 | tab = tab/sum(tab)
 7 | plot(tab)
 8 | 
 9 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
10 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
11 | plot(tab, xlab="Movie rating", ylab="Proportion of votes",  axes=FALSE, frame=FALSE, 
12 |      lwd=6, col="steelblue", xlim=c(0.75, 10.25), ylim=c(0, 0.25), 
13 |      panel.first = abline(h=seq(0, 0.25, 0.05), lty=3, col="grey80"))
14 | 
15 | axis(2, tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
16 | axis(1, 1:10, 1:10, tick=F,  col.axis="grey50", cex.axis = 0.8)
17 | title("Voting preference", adj=1, 
18 |       cex.main=0.9, font.main=2, col.main="black")
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/code/03-programming_f4.R:
--------------------------------------------------------------------------------
 1 | # dd = NULL
 2 | # for(i in seq(2, 4, length.out=12)) {
 3 | #   x = rnorm(10^i)
 4 | #   dd_tmp = rbenchmark::benchmark(my_mean(x), cmp_mean(x), mean(x), 
 5 | #                                  columns=c("test", "elapsed", "relative"),
 6 | #                                  order="relative", replications=5000)
 7 | #   dd_tmp$i = i
 8 | #   dd = rbind(dd, dd_tmp)
 9 | # }
10 | # dd$p = 10^dd$i
11 | # dir.create("data", showWarnings = FALSE)
12 | # mean_comparison = dd
13 | # save(mean_comparison, file="extdata/mean_comparison.RData")
14 | 
15 | 
16 | 
17 | 
18 | source("code/initialise.R")
19 | load(file="extdata/mean_comparison.RData")
20 | 
21 | dd = mean_comparison
22 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
23 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
24 | plot(0, type="n", ylim=c(0, 200), xlim=c(10^2, 10^4), axes=FALSE, frame=FALSE, 
25 |      xlab="Sample size", ylab="Relative timings", 
26 |      log="x")
27 | abline(h=seq(0, 200, 50), lty=1, col="grey90")
28 | 
29 | m1 = dd[dd$test=="mean(x)",]
30 | lines(m1$p, m1$relative, lwd=3, col=3)
31 | m1 = dd[dd$test=="cmp_mean(x)",]
32 | lines(m1$p, m1$relative, col=2, lwd=3)
33 | m1 = dd[dd$test=="my_mean(x)",]
34 | lines(m1$p, m1$relative, col=5, lwd=3)
35 | 
36 | ## Axis labels
37 | axis(2, tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
38 | axis(1, at = 10^(2:4), labels=c(expression(10^2), expression(10^3), expression(10^4)), 
39 |      tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
40 | 
41 | ## Title
42 | title("Compiled vs non-compiled", adj=1, 
43 |       cex.main=0.9, font.main=2, col.main="black")
44 | 
45 | ## Line labels
46 | text(1000, 90, "Pure R", col=5, lwd=2, font=2)
47 | text(1000, 20, "Compiled R", col=2, font=2)
48 | text(8000, 10, "mean", col=3, font=2)
49 | 


--------------------------------------------------------------------------------
/code/03-programming_f5.R:
--------------------------------------------------------------------------------
 1 | library(microbenchmark)
 2 | library(ggplot2)
 3 | source("code/initialise.R")
 4 | # set.seed(1)
 5 | # x = matrix(rnorm(100000), ncol=1000)
 6 | # z = microbenchmark::microbenchmark(
 7 | #   apply(x, 2, function(i) mean(i)/sd(x)), 
 8 | #   {sd_x = sd(x); apply(x, 2, function(i) mean(i)/sd_x)},
 9 | #   times=500, unit="s"
10 | # )
11 | #saveRDS(z, "extdata/03-f5.Rds")
12 | 
13 | z = readRDS("extdata/03-f5.Rds")
14 | plot(z)
15 | z$time = z$time/10^9
16 | z$expr = factor(z$expr, labels=c("Standard", "Cached"))
17 | 
18 | g = ggplot(z) + 
19 |   geom_violin(aes(expr, time),position=position_dodge(0.9), bg=get_col(3)) + 
20 |   scale_y_continuous(limits=c(10^-3, 10^1), expand=c(0, 0), breaks = 10^(-3:1), 
21 |                      trans="log10", 
22 |                      labels=c(expression(10^-3),expression(10^-2),
23 |                               expression(10^-1),expression(10^0),expression(10^1)))
24 | g
25 | g1 = g +  labs(title = "Performance gains with cached variables", 
26 |                x = NULL, y = "Elapsed time (secs)",colour = NULL, fill = NULL) + 
27 |   theme(panel.grid.major.y = element_line(colour = "gray90"), 
28 |         panel.grid.minor = element_line(colour = NA), 
29 |         panel.grid.major.x = element_line(colour = NA), 
30 |         plot.title = element_text(size = 12, 
31 |                                   face = "bold", hjust = 1, vjust = 0), 
32 |         panel.background = element_rect(fill = NA), 
33 |         legend.background = element_rect(fill = NA), 
34 |         legend.position = c(0.93, 0.92), 
35 |         axis.ticks.x = element_line(linetype = "blank"),
36 |         axis.ticks.y = element_line(linetype = "blank"))
37 | print(g1)


--------------------------------------------------------------------------------
/code/04-project-planning_f1.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | 
 3 | x = seq(0, 1, length.out=100)
 4 | dd = data.frame(Phase = rep(c("Planning", "Programming", "Write-up"), each=length(x)), 
 5 |                 Time = c(dbeta(x, 2, 6), dbeta(x, 2, 2) + 0.2, dbeta(x, 6, 2)), 
 6 |                 x = c(x, x, x))
 7 | dd$x = dd$x * 4
 8 | 
 9 | 
10 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
11 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
12 | plot(0.1, type="n", ylim=c(0, 3), xlim=c(0, 4), axes=FALSE, frame=FALSE, 
13 |      xlab=NA, ylab="Level of activity")
14 | abline(h=0:4, lty=1, col="grey80")
15 | 
16 | lines(dd$x[dd$Phase=="Planning"], dd$Time[dd$Phase=="Planning"], 
17 |       col=get_col(2), lwd=2)
18 | text(1.1, 2.8, "Planning", col=get_col(2))
19 | 
20 | lines(dd$x[dd$Phase=="Programming"], dd$Time[dd$Phase=="Programming"], 
21 |       col=get_col(3), lwd=2)
22 | text(2, 1.8, "Programming", col=get_col(3))
23 | 
24 | 
25 | lines(dd$x[dd$Phase=="Write-up"], dd$Time[dd$Phase=="Write-up"], 
26 |       col=get_col(1), lwd=2)
27 | text(2.9, 2.8, "Write-up", col=get_col(1))
28 | axis(1, c(0), c("Start"), hadj=0, tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
29 | axis(1, c(4), "End", hadj=1,  col.axis="grey50", cex.axis = 0.8)
30 | 
31 | title("Key project phases", adj=1, 
32 |       cex.main=0.9, font.main=2, col.main="black")
33 | mtext("Time", side=1, padj=2)
34 | 
35 | 


--------------------------------------------------------------------------------
/code/04-project-planning_f2.R:
--------------------------------------------------------------------------------
 1 | DiagrammeR::mermaid("gantt
 2 |         dateFormat  YYYY-MM-DD
 3 | 
 4 |         section 2 chapters
 5 |         Write Chapter 2           :done,    des1, 2015-10-15,2015-11-24
 6 |         Write Chapter 3           :done,    des2, 2015-10-15,2015-11-24
 7 |         Review Chapter 2          :done,  des3, 2015-12-03,2016-01-10
 8 |         Review Chapter 3          :active,  des4, 2015-12-03,2016-01-10
 9 | 
10 |         section 1st half
11 |         Write Chapter 4          :crit, active, 2015-12-19,2016-01-19
12 |         Write Chapter 6          :crit, active, 2015-12-19,2016-01-19
13 |         Review Chapter 4          :active, 2016-01-30,2016-03-01
14 |         Review Chapter 6          :active, 2016-01-30,2016-03-01
15 | 
16 |         section 2nd half
17 |         Write Chapter 5               :active, a1, 2016-03-01,2016-04-15
18 |         Write Chapter 7               :active, a1, 2016-03-01,2016-04-15
19 |         Write Chapter 1               :active, a1, 2016-03-08,2016-05-03
20 |         Write Chapter 8               :active, c8, 2016-03-20,2016-05-03
21 |         Technical review              :crit, active, a1, 2016-05-03,2016-05-30
22 |         Complete final draft          :active, c8, 2016-05-30,2016-06-28
23 | ")


--------------------------------------------------------------------------------
/code/05-io_f1.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | library(ggplot2)
 3 | library(data.table)
 4 | library(readr)
 5 | library("microbenchmark")
 6 | 
 7 | # # Start corresponds to 0.1 MB
 8 | # rows = 10^(seq(3.445,6, length.out = 50))
 9 | # cols = 2 * 10^(0:2)
10 | # res = NULL
11 | # for(i in seq_along(rows)) {
12 | #   for(k in 1:80) {
13 | #     for(j in seq_along(cols)) {
14 | #       no_of_rows = floor(rows[i]/(10^(j-1)))
15 | #       m = matrix(runif(no_of_rows * cols[j]), nrow = no_of_rows, ncol = cols[j])
16 | #       fname = tempfile()
17 | #       write.csv(m, file = fname, row.names = FALSE)
18 | #       mb = microbenchmark(times = 10,
19 | #                           base_default = read.csv(fname),
20 | #                           readr_default = read_csv(fname),
21 | #                           fread_default = fread(fname)
22 | #       )
23 | # 
24 | # 
25 | #       tab = tapply(mb$time/1000, mb$expr, mean)
26 | #       res_tmp = data.frame(exp = names(tab),
27 | #                            time = as.vector(tab), rows = no_of_rows,
28 | #                            cols = cols[j])
29 | #       unlink(fname)
30 | #       res = rbind(res, res_tmp)
31 | #       save(res, file="04_tmp.RData")
32 | #     }
33 | #   }
34 | #   message(i)
35 | # }
36 | #save(res, file="extdata/04-f3.RData")
37 | load("extdata/05-f1.RData")
38 | res = aggregate(time ~ cols+rows+ exp, mean, data=res)
39 | res$MB = res$cols*res$rows*18/1000000 ## Approximate
40 | res$cells = paste(res$rows, res$cols)
41 | res$Time = NA
42 | for(i in res$cells){
43 |   sel = res$cells == i
44 |   res$Time[sel] = res$time[sel] / min(res$time[sel])
45 | }
46 | res$type = factor(res$exp, labels=c("base", "data.table","readr"))
47 | 
48 | library("ggplot2")
49 | res$facet_cols = paste("No of columns:", res$cols)
50 | res = res[res$MB >= 0.1,]
51 | g = ggplot(res, aes(MB, Time)) + 
52 |   geom_line(aes(colour = type, linetype=type), size=1) + 
53 |   facet_grid(~ facet_cols) +
54 |   scale_x_continuous(limits=c(min(res$MB),36), expand = c(0, 0), trans="log10") + 
55 |   theme(panel.grid.major.y = element_line(colour = "gray90"), 
56 |         panel.grid.minor = element_line(colour = NA), 
57 |         panel.grid.major.x = element_line(colour = NA), 
58 |         plot.title = element_text(size = 12, 
59 |                                   face = "bold", hjust = 1, vjust = 0), 
60 |         panel.background = element_rect(fill = NA), 
61 |         legend.background = element_rect(fill = NA), 
62 |         legend.position = c(0.95, 0.92), 
63 |         axis.ticks.x = element_line(linetype = "blank"),
64 |         axis.ticks.y = element_line(linetype = "blank"),
65 |         legend.text = element_text(size = 11), 
66 |         legend.key = element_rect(fill = NA)) +
67 |   ylab("Relative time") +  xlab("File size (MB)") + 
68 |   scale_colour_manual(values=c(get_col(2), get_col(3), get_col(4))) + 
69 |   scale_y_continuous(limits=c(0,15), expand = c(0, 0)) 
70 | g1 = g + theme(strip.background = element_rect(fill = "white"), 
71 |           strip.text = element_text( hjust = 0.95, face="bold")) + 
72 |   guides(colour = FALSE, linetype=FALSE)
73 | 
74 | 
75 | labels = tibble::frame_data(
76 |   ~MB, ~Time, ~type, ~facet_cols,
77 |   6, 7, "base", paste("No of columns:", 2),
78 |   0.28, 1.5, "data.table", paste("No of columns:", 2),
79 |   0.21, 13, "readr", paste("No of columns:", 2)
80 | )
81 | 
82 | g2 = g1 + geom_text(data=labels, aes(color=type, label=type))
83 | print(g2)
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/code/05-io_f2.R:
--------------------------------------------------------------------------------
  1 | source("code/initialise.R")
  2 | library(microbenchmark)
  3 | library(dplyr)
  4 | library(ggplot2)
  5 | # Start corresponds to 0.1 MB
  6 | io_data = function(start=1,
  7 |                    end=6, length.out=10, times = 5, reps=10){
  8 |   library(feather)# avoid travis issues
  9 |   cols = 20
 10 |   #start = 1;end=2;length.out=5;times=5
 11 |   rows = 10^(seq(start,end, length.out = length.out))
 12 |   res = NULL
 13 |   i = 3
 14 |   for(i in seq_along(rows)) {
 15 |     no_of_rows = floor(rows[i])
 16 |     for(k in 1:reps) {
 17 |       m = matrix(runif(no_of_rows * 20), nrow = no_of_rows, ncol = 20)
 18 |       m = as.data.frame(m)
 19 |       fname = replicate(3, tempfile())
 20 |       
 21 |       (mb_write = microbenchmark(times = times,
 22 |                                  base = write.csv(m, file = fname[1], row.names = FALSE),
 23 |                                  feather = write_feather(m, fname[2]),
 24 |                                  rds = saveRDS(m, fname[3]), unit="s"))
 25 |       
 26 |       (mb_read = microbenchmark(times = times,
 27 |                                 base = read.csv(fname[1]),
 28 |                                 feather = read_feather(fname[2]),
 29 |                                 rds = readRDS(fname[3]), unit="s"))
 30 |       
 31 |       
 32 |       sizes = file.size(fname)
 33 |       
 34 |       
 35 |       tab_read = tapply(mb_read$time/1000, mb_read$expr, mean)
 36 |       tab_write = tapply(mb_write$time/1000, mb_write$expr, mean)
 37 |       res_tmp = data.frame(exp = names(tab_read),
 38 |                            read_time = as.vector(tab_read),
 39 |                            write_time = as.vector(tab_write),
 40 |                            rows = no_of_rows,
 41 |                            size=sizes, rel=sizes/sizes[1])
 42 |       
 43 |       unlink(fname)
 44 |       gc()
 45 |     }
 46 |     
 47 |     res = rbind(res, res_tmp)
 48 |     message(i)
 49 |   }
 50 |   
 51 |   res
 52 | }
 53 | 
 54 | # resa = io_data(1, 2, 10, times=5000, reps=200)
 55 | # resb = io_data(2, 3, 10, times=1000, reps=100)
 56 | # resc = io_data(3, 4, 10, times=50, reps=100)
 57 | # resd = io_data(4, 5, 10, times=50, reps=50)
 58 | # rese = io_data(5, 6, 10, times=50, reps=50)
 59 | # res = rbind(resa, resb, resc, resd)#, rese)
 60 | 
 61 | 
 62 | 
 63 | #saveRDS(res, file="extdata/05-f2.RData")
 64 | res = readRDS(file="extdata/05-f2.RData")
 65 | 
 66 | 
 67 | 
 68 | 
 69 | res1 = group_by(res, rows, exp)
 70 | res2 = summarise(res1, 
 71 |                  "read_time" = mean(read_time),
 72 |                  "write_time" = mean(write_time), 
 73 |                  "no_rows" = mean(rows), 
 74 |                  "size"=mean(size), "rel"=mean(rel))
 75 | 
 76 | 
 77 | res2$MB = 20*res2$rows*18/10^6
 78 | 
 79 | res2$Read_Time = NA
 80 | res2$Write_Time = NA
 81 | i = 10
 82 | for(i in res2$no_rows){
 83 |   sel = res2$no_rows == i
 84 |   res2$Read_Time[sel] = res2$read_time[sel] / res2$read_time[sel][1]
 85 |   res2$Write_Time[sel] = res2$write_time[sel] / res2$write_time[sel][1]
 86 | }
 87 | 
 88 | dd_plotting = data.frame(MB = rep(res2$MB, 3), 
 89 |                          time = c(res2$rel, res2$Read_Time, res2$Write_Time), 
 90 |                          file_type=c("base", "feather","rds"), 
 91 |                          type=rep(c("File size", "Read time", "Write time"), each=nrow(res2)))
 92 | 
 93 | dd_plotting$type = factor(dd_plotting$type, levels=c("File size", "Write time", "Read time"))
 94 | 
 95 | 
 96 | g = ggplot(dd_plotting, aes(MB, time)) + 
 97 |   geom_line(aes(colour = file_type, linetype=file_type), size=1) + 
 98 |   facet_grid(~ type) +
 99 |   scale_x_continuous(limits=c(min(dd_plotting$MB),36), expand = c(0, 0), trans="log10") + 
100 |   theme(panel.grid.major.y = element_line(colour = "gray90"), 
101 |         panel.grid.minor = element_line(colour = NA), 
102 |         panel.grid.major.x = element_line(colour = NA), 
103 |         plot.title = element_text(size = 12, 
104 |                                   face = "bold", hjust = 1, vjust = 0), 
105 |         panel.background = element_rect(fill = NA), 
106 |         legend.background = element_rect(fill = NA), 
107 |         legend.position = c(0.95, 0.92), 
108 |         axis.ticks.x = element_line(linetype = "blank"),
109 |         axis.ticks.y = element_line(linetype = "blank"),
110 |         legend.text = element_text(size = 11), 
111 |         legend.key = element_rect(fill = NA)) +
112 |   ylab("Relative to CSV") +  xlab("CSV file size (MB)") + 
113 |   scale_colour_manual(values=c(get_col(2), get_col(3), get_col(4))) + 
114 |   scale_y_continuous(limits=c(0,1.05), expand = c(0, 0)) 
115 | g1 = g + theme(strip.background = element_rect(fill = "white"), 
116 |                strip.text = element_text( hjust = 0.95, face="bold")) + 
117 |   guides(colour = FALSE, linetype=FALSE)
118 | 
119 | g1
120 | labels = tibble::frame_data(
121 |   ~MB, ~time, ~file_type, ~type,
122 |   0.01, 0.97, "base", "File size",
123 |   0.012, 0.85, "feather", "File size",
124 |   0.01, 0.37, "rds", "File size"
125 | )
126 | 
127 | 
128 | g2 = g1 + geom_text(data=labels, aes(color=file_type, label=file_type))
129 | print(g2)
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/code/06-data-carpentry_f2.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | res = readRDS("extdata/res-datatable.Rds")
 3 | 
 4 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
 5 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
 6 | plot(0.1, type="n", xlim=c(0, 400), ylim=c(0, 80), axes=FALSE, frame=FALSE, 
 7 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i', 
 8 |     xlab=NA, ylab="Time (relative to the fastest)")
 9 | mtext("Size (MB)", side=1, padj=2)
10 | 
11 | abline(h=seq(0, 80, by=20), lty=1, col="grey90")
12 | 
13 | 
14 | lines(res$MB[res$exp=="base_sqrbrkt"], res$Time[res$exp=="base_sqrbrkt"], 
15 |       col=get_col(1), lwd=2)
16 | text(400, 72, "base", col=get_col(1), pos=2)
17 | lines(res$MB[res$exp=="dplyr_filter"], res$Time[res$exp=="dplyr_filter"], 
18 |       col=get_col(2), lwd=2)
19 | text(400, 45, "dplyr", col=get_col(2), pos=2)
20 | lines(res$MB[res$exp=="dt_key"], res$Time[res$exp=="dt_key"], 
21 |       col=get_col(3), lwd=2)
22 | text(400, 4, "DT:Key", col=get_col(3), pos=2)
23 | lines(res$MB[res$exp=="dt_standard"], res$Time[res$exp=="dt_standard"], 
24 |       col=get_col(4), lwd=2)
25 | text(400, 16, "DT", col=get_col(4), pos=2)
26 | 
27 | axis(1, seq(0, 400, 400), tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
28 | axis(2, seq(0, 80, by=20), tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
29 | 
30 | 
31 | title("Subsetting comparison", adj=1, 
32 |       cex.main=0.9, font.main=2, col.main="black")
33 | 


--------------------------------------------------------------------------------
/code/07-performance_f3.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | # ## Original idea from https://github.com/csgillespie/efficientR/issues/121 and @adamryczkowski
 3 | # 
 4 | # n_rep=10 #Number of times to generate a test sample. This smooths the chart.
 5 | # dd = matrix(0, nrow=100, ncol=3)
 6 | # ns = 10^(seq(from=log10(10), to=log10(10^6), length.out =nrow(dd)))
 7 | # 
 8 | # tmp = matrix(0, nrow=n_rep, ncol=2)
 9 | # for (i in seq_len(nrow(dd)))
10 | # {
11 | #   for(j in seq_len(n_rep))
12 | #   {
13 | #     s = sample(c(TRUE, FALSE), ns[i], TRUE, prob=c(1-0.0001,0.0001))
14 | #     tmp[j,] = summary(
15 | #      microbenchmark::microbenchmark(
16 | #         which.max(s),
17 | #         which(s)[[1]])
18 | #       )[['median']]
19 | #   }
20 | #   dd[i,] = c(ns[i], colMeans(tmp))
21 | #   cat(".")
22 | # }
23 | #saveRDS(dd, file="extdata/07-which_comparison.RData")
24 | dd = readRDS(file="extdata/07-which_comparison.RData")
25 | 
26 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
27 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
28 | plot(dd[,1], dd[,3]/dd[,2], log="xy",
29 |      pch=21, xlim=c(9, 10^4), ylim=c(1, 10^2), 
30 |      axes=FALSE, frame=FALSE, 
31 |      ylab="Relative speed", xlab="Vector length",
32 |      bg="steelblue", panel.first = {abline(h=10^(0:2), lty=1, col="grey90");
33 |        abline(h=c(2, 5, 20, 50), lty=1, col="grey90")})
34 | 
35 | 
36 | axis(2, tick=FALSE,  col.axis="grey50", cex.axis = 0.8)
37 | axis(1, at = 10^(1:4), labels=c(expression(10^1),expression(10^2),expression(10^3),expression(10^4)), tick=F,
38 |      col.axis="grey50", cex.axis = 0.8)
39 | 
40 | 
41 | title("which.min() vs which()", adj=1, 
42 |       cex.main=0.9, font.main=2, col.main="black")
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/code/07-performance_f5.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | 
 3 | # Code to download logs of various packages
 4 | # # Not run to avoid cranlogs dependency for book
 5 | # dd = cranlogs::cran_downloads(packages = c("V8", "Rcpp", "rPython", "rJava"),
 6 | #                                from = "2013-01-01", to = "2020-09-01")
 7 | # dd$Downloads <- ave(
 8 | #   dd$count,
 9 | #   dd$package,
10 | #   FUN = function(x)
11 | #     zoo::rollmean(x, k = 30, na.pad = T)
12 | # )
13 | #  saveRDS(dd, "extdata/cranlog.Rds")
14 | 
15 | dd = readRDS("extdata/cranlog.Rds")
16 | dd = dd[dd$count > 0, ]
17 | 
18 | v8 = dd[dd$package == "V8", ]
19 | rcpp = dd[dd$package == "Rcpp", ]
20 | rjava = dd[dd$package == "rJava", ]
21 | rpython = dd[dd$package == "rPython", ]
22 | 
23 | par(mar = c(3,3,2,1), mgp = c(2, 0.4, 0), tck = -.01,
24 |     cex.axis = 0.9, las = 1, xaxs = 'i',yaxs = 'i')
25 | 
26 | # Blank graph
27 | start = as.Date("2013-01-01"); end = as.Date("2021-02-01")
28 | plot(rcpp$date[1], type = "n", xlim = c(start, end), ylim = c(10^0, 7*10^4),
29 |      axes = FALSE, frame.plot = FALSE,
30 |      xlab = "Time", ylab = "Downloads per day",
31 |      log = "y")
32 | abline(h = 10^(0:4), lty = 1, col = "grey80")
33 | 
34 | # Add R versions
35 | R_dates = as.Date(c("2013-04-03", "2014-04-03", "2015-04-03", "2016-04-03",
36 |                     "2017-04-03", "2018-04-03", "2019-04-03", "2020-04-03"))
37 | R_vers = c(paste0("R 3.", 0:6), "R 4.0")
38 | abline(v = R_dates, col = "grey90", lty = 2)
39 | text(R_dates, 55000, R_vers,  pos = 4, col = "grey50", cex = 0.9)
40 | 
41 | # Label lines
42 | lines(rcpp$date, rcpp$Downloads, lwd = 3,col = 3)
43 | lines(v8$date, v8$Downloads, lwd = 2, col = 4)
44 | lines(rjava$date, rjava$Downloads, lwd = 2, col = 2)
45 | lines(rpython$date, rpython$Downloads, lwd = 2, col = 1)
46 | 
47 | # Add axis
48 | axis(2, tick = FALSE,  col.axis = "grey50", cex.axis = 0.8)
49 | axis(1, at = c(as.Date("2013-01-01"),
50 |                as.Date("2015-01-01"),
51 |                as.Date("2017-01-01"),
52 |                as.Date("2019-01-01"),
53 |                as.Date("2021-01-01")),
54 |      labels = c(2013, 2015, 2017, 2019, 2021), tick = FALSE,  col.axis = "grey50", cex.axis = 0.8)
55 | 
56 | title("The rise of Rcpp", adj = 1,
57 |       cex.main = 0.9, font.main = 2, col.main = "black")
58 | 
59 | # Label lines
60 | text(as.Date("2020-06-01"), 6, "rPython", col = 1, font = 2, cex = 0.9)
61 | text(as.Date("2020-06-01"), 20000, "Rcpp", col = 3, font = 2, cex = 0.9)
62 | text(as.Date("2020-06-01"), 800, "V8", col = 4, font = 2, cex = 0.9)
63 | text(as.Date("2020-06-01"), 4500, "rJava", col = 2, font = 2, cex = 0.9)
64 | 


--------------------------------------------------------------------------------
/code/07-performance_f6.R:
--------------------------------------------------------------------------------
 1 | #save(z, file="extdata/07-rcpp_comparison.RData")
 2 | load("extdata/07-rcpp_comparison.RData")
 3 | source("code/initialise.R")
 4 | library(ggplot2)
 5 | 
 6 | z$expr = factor(z$expr, levels=c("mean(x)",  "mean_c(x)", "com_mean_r(x)", "mean_r(x)"))
 7 | z$expr = factor(z$expr, labels=c("Base",  "Rcpp", "Byte compiled R", "Pure R"))
 8 | 
 9 | z$time = z$time/10^6
10 | g = ggplot(z) +
11 |   geom_violin(aes(expr, time),position=position_dodge(0.9), bg=get_col(3)) +
12 |   scale_y_continuous(limits=c(10^-3, 10^1), expand=c(0, 0), breaks = 10^(-3:1),
13 |                      trans="log10",
14 |                      labels=c(expression(10^-3),expression(10^-2),
15 |                               expression(10^-1),expression(10^0),expression(10^1)))
16 | 
17 | g1 = g +  labs(title = "Performance gains with Rcpp",
18 |            x = NULL, y = "Elapsed time (secs)",colour = NULL, fill = NULL) +
19 |   theme(panel.grid.major.y = element_line(colour = "gray90"),
20 |         panel.grid.minor = element_line(colour = NA),
21 |         panel.grid.major.x = element_line(colour = NA),
22 |         plot.title = element_text(size = 12,
23 |                                   face = "bold", hjust = 1, vjust = 0),
24 |         panel.background = element_rect(fill = NA),
25 |         legend.background = element_rect(fill = NA),
26 |         legend.position = c(0.93, 0.92),
27 |         axis.ticks.x = element_line(linetype = "blank"),
28 |         axis.ticks.y = element_line(linetype = "blank"))
29 | print(g1)
30 | 


--------------------------------------------------------------------------------
/code/08-hardware_benchmarks.R:
--------------------------------------------------------------------------------
 1 | library("benchmarkme")
 2 | library(ggplot2)
 3 | # results = plot_past()
 4 | # times = sort(tapply(results$time, results$id, mean))
 5 | # times = times/min(times)
 6 | # past_results = data.frame(times, rank=1:length(times))
 7 | #save(past_results, file="extdata/past_results.RData")
 8 | load("extdata/past_results.RData")
 9 | source("code/initialise.R")
10 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
11 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
12 | plot(0.1, type="n", ylim=c(0.95, 100), xlim=c(0, 158), axes=FALSE, frame=FALSE, 
13 |      xlab="Rank", ylab="Relative time", 
14 |      log="y", panel.first = abline(h=c(1, 2, 5, 10, 20, 50, 100), lty=1, col="grey90"))
15 | points(past_results$rank, past_results$times, 
16 |        pch=21, bg=get_col(3, 240), 
17 |        col="grey90", cex=0.9)
18 | 
19 | 
20 | ## Axis
21 | axis(1, tick = FALSE,  col.axis="grey50", cex.axis = 0.8)
22 | axis(2,c(1, 2,5,10, 20,50, 100), c(1, 2,5,10, 20,50, 100), tick = FALSE,  col.axis="grey50", cex.axis = 0.8)
23 | 
24 | ## Point out the slow CPU
25 | text(130, 50, "Intel Atom @ 1.66GHz", pos=3, cex=0.9)
26 | 
27 | ## Title
28 | title("CPU benchmarks", adj=1, 
29 |       cex.main=0.9, font.main=2, col.main="black")
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/code/08-hardware_cpu_speed.R:
--------------------------------------------------------------------------------
 1 | source("code/initialise.R")
 2 | load("extdata/clock_speed.RData")
 3 | 
 4 | par(mar=c(3,3,2,1), mgp=c(2,0.4,0), tck=-.01,
 5 |     cex.axis=0.9, las=1, xaxs='i',yaxs='i')
 6 | plot(0.1, type="n", ylim=c(0.9, 10000), xlim=c(1980, 2011), axes=FALSE, frame=FALSE, 
 7 |      xlab="", ylab="Clock speed (MHz)", 
 8 |      log="y")
 9 | abline(h=10^(-1:4), lty=3, col="grey80")
10 | abline(h=3600, lty=2, col="grey20")
11 | 
12 | axis(1, tick = FALSE,  col.axis="grey50", cex.axis = 0.8)
13 | axis(2,10^c(0:4), c(expression(10^0), expression(10^1), expression(10^2), expression(10^3), 
14 |                    expression(10^4)),tick = FALSE,  col.axis="grey50", cex.axis = 0.8)
15 | points(clock_speed$Year, clock_speed$MIPS, pch=21, bg=get_col(3, 240), 
16 |        col="grey90", cex=0.9)
17 | 
18 | text(1980, 5000, "3.4 GHz", col="grey20", font.main=2, cex=1, pos=4)
19 | 
20 | title("CPU clock speed", adj=1, 
21 |       cex.main=0.9, font.main=2, col.main="black")
22 | 


--------------------------------------------------------------------------------
/code/before_script.R:
--------------------------------------------------------------------------------
 1 | ## Needed for Rscript
 2 | library("methods")
 3 | library("microbenchmark")
 4 | ## Needed because CSG doesn't load datasets
 5 | data("USArrests", package="datasets")
 6 | 
 7 | 
 8 | knitr::opts_chunk$set(
 9 |   comment = "#>",
10 |   collapse = TRUE,
11 |   cache = TRUE, 
12 |   fig.align="center",
13 |   fig.pos="t"
14 | )
15 | set.seed(2016)
16 | options(digits = 3)
17 | options(dplyr.print_min = 4, dplyr.print_max = 4)


--------------------------------------------------------------------------------
/code/docstats.R:
--------------------------------------------------------------------------------
 1 | library(stringi)
 2 | library(dplyr)
 3 | 
 4 | f = list.files(pattern = "Rmd")
 5 | 
 6 | i = 1
 7 | 
 8 | doc_stats = data_frame(
 9 |   Chapter = rep(NA, length(f)),
10 |   Words = rep(NA, length(f)))
11 | 
12 | for(i in 1:length(f)){
13 |   doc_stats$Chapter[i] = f[i]
14 |   d = readLines(f[i])
15 |   s = stri_stats_latex(d)
16 |   doc_stats$Words[i] = s["Words"]
17 | }
18 | 
19 | doc_stats
20 | 
21 | # # code to rename chapters (run once)
22 | # oldnums = 5:10
23 | # newnums = oldnums + 1
24 | # oldnums[nchar(oldnums) == 1] = paste0("0", oldnums[nchar(oldnums) == 1])
25 | # newnums[nchar(newnums) == 1] = paste0("0", newnums[nchar(newnums) == 1])
26 | # i = 1
27 | # fnew = f
28 | # for(i in seq_along(oldnums)){
29 | #   j = grep(pattern = oldnums[i], f)
30 | #   fnew[j] = gsub(pattern = oldnums[i], replacement = newnums[i], x = f[j])
31 | #   file.rename(f[j], fnew[j])
32 | # }
33 | # 
34 | # fnew
35 | 
36 | 


--------------------------------------------------------------------------------
/code/initialise.R:
--------------------------------------------------------------------------------
 1 | alpha=255
 2 | colours = c(rgb(200,79,178, alpha=alpha,maxColorValue=255), 
 3 |             rgb(105,147,45, alpha=alpha, maxColorValue=255),
 4 |             rgb(85,130,169, alpha=alpha, maxColorValue=255),
 5 |             rgb(204,74,83, alpha=alpha, maxColorValue=255),
 6 |             rgb(183,110,39, alpha=alpha, maxColorValue=255),
 7 |             rgb(131,108,192, alpha=alpha, maxColorValue=255),
 8 |             rgb(63,142,96, alpha=alpha, maxColorValue=255))
 9 | palette(colours)
10 | 
11 | get_col = function(i, alpha=255) {
12 |   c(rgb(200,79,178, alpha=alpha,maxColorValue=255), 
13 |     rgb(105,147,45, alpha=alpha, maxColorValue=255),
14 |     rgb(85,130,169, alpha=alpha, maxColorValue=255),
15 |     rgb(204,74,83, alpha=alpha, maxColorValue=255),
16 |     rgb(183,110,39, alpha=alpha, maxColorValue=255),
17 |     rgb(131,108,192, alpha=alpha, maxColorValue=255),
18 |     rgb(63,142,96, alpha=alpha, maxColorValue=255))[i]
19 | }


--------------------------------------------------------------------------------
/css/style.css:
--------------------------------------------------------------------------------
  1 | .book .book-header h1 {
  2 |   opacity: 1;
  3 |   text-align: left;
  4 | }
  5 | 
  6 | #header .title {
  7 |   margin-bottom: 0em;
  8 | }
  9 | #header h4.author {
 10 |   margin: 0;
 11 |   color: #666;
 12 | }
 13 | 
 14 | #header h4.author em {
 15 |   font-style: normal;
 16 | }
 17 | 
 18 | #header h4.date {
 19 |   margin:0.5em;
 20 | }
 21 | 
 22 | #TOC ul,
 23 | #TOC li,
 24 | #TOC span,
 25 | #TOC a {
 26 |   margin: 0;
 27 |   padding: 0;
 28 |   position: relative;
 29 | }
 30 | #TOC {
 31 |   line-height: 1;
 32 |   border-radius: 5px 5px 0 0;
 33 |   background: #141414;
 34 |   background: linear-gradient(to bottom, #333333 0%, #141414 100%);
 35 |   border-bottom: 2px solid #0fa1e0;
 36 |   width: auto;
 37 | }
 38 | #TOC:after,
 39 | #TOC ul:after {
 40 |   content: '';
 41 |   display: block;
 42 |   clear: both;
 43 | }
 44 | #TOC a {
 45 |   background: #141414;
 46 |   background: linear-gradient(to bottom, #333333 0%, #141414 100%);
 47 |   color: #ffffff;
 48 |   display: block;
 49 |   padding: 19px 20px;
 50 |   text-decoration: none;
 51 |   text-shadow: none;
 52 | }
 53 | #TOC ul {
 54 |   list-style: none;
 55 | }
 56 | #TOC > ul > li {
 57 |   display: inline-block;
 58 |   float: left;
 59 |   margin: 0;
 60 | }
 61 | #TOC > ul > li > a {
 62 |   color: #ffffff;
 63 | }
 64 | #TOC > ul > li:hover:after {
 65 |   content: '';
 66 |   display: block;
 67 |   width: 0;
 68 |   height: 0;
 69 |   position: absolute;
 70 |   left: 50%;
 71 |   bottom: 0;
 72 |   border-left: 10px solid transparent;
 73 |   border-right: 10px solid transparent;
 74 |   border-bottom: 10px solid #0fa1e0;
 75 |   margin-left: -10px;
 76 | }
 77 | #TOC > ul > li:first-child > a {
 78 |   border-radius: 5px 0 0 0;
 79 | }
 80 | #TOC.align-right > ul > li:first-child > a,
 81 | #TOC.align-center > ul > li:first-child > a {
 82 |   border-radius: 0;
 83 | }
 84 | #TOC.align-right > ul > li:last-child > a {
 85 |   border-radius: 0 5px 0 0;
 86 | }
 87 | #TOC > ul > li.active > a,
 88 | #TOC > ul > li:hover > a {
 89 |   color: #ffffff;
 90 |   box-shadow: inset 0 0 3px #000000;
 91 |   background: #070707;
 92 |   background: linear-gradient(to bottom, #262626 0%, #070707 100%);
 93 | }
 94 | #TOC .has-sub {
 95 |   z-index: 1;
 96 | }
 97 | #TOC .has-sub:hover > ul {
 98 |   display: block;
 99 | }
100 | #TOC .has-sub ul {
101 |   display: none;
102 |   position: absolute;
103 |   width: 200px;
104 |   top: 100%;
105 |   left: 0;
106 | }
107 | #TOC .has-sub ul li a {
108 |   background: #0fa1e0;
109 |   border-bottom: 1px dotted #31b7f1;
110 |   filter: none;
111 |   display: block;
112 |   line-height: 120%;
113 |   padding: 10px;
114 |   color: #ffffff;
115 | }
116 | #TOC .has-sub ul li:hover a {
117 |   background: #0c7fb0;
118 | }
119 | #TOC ul ul li:hover > a {
120 |   color: #ffffff;
121 | }
122 | #TOC .has-sub .has-sub:hover > ul {
123 |   display: block;
124 | }
125 | #TOC .has-sub .has-sub ul {
126 |   display: none;
127 |   position: absolute;
128 |   left: 100%;
129 |   top: 0;
130 | }
131 | #TOC .has-sub .has-sub ul li a {
132 |   background: #0c7fb0;
133 |   border-bottom: 1px dotted #31b7f1;
134 | }
135 | #TOC .has-sub .has-sub ul li a:hover {
136 |   background: #0a6d98;
137 | }
138 | #TOC ul ul li.last > a,
139 | #TOC ul ul li:last-child > a,
140 | #TOC ul ul ul li.last > a,
141 | #TOC ul ul ul li:last-child > a,
142 | #TOC .has-sub ul li:last-child > a,
143 | #TOC .has-sub ul li.last > a {
144 |   border-bottom: 0;
145 | }
146 | .rmdnote, .rmdtip, .rmdwarning {
147 |   padding: 1em 1em 1em 4em;
148 |   margin-bottom: 10px;
149 |   background: #f5f5f5 5px center/3em no-repeat;
150 | } 
151 | 
152 | .rmdnote {
153 |   background-image: url("images/note.png");
154 | }
155 | .rmdtip {
156 |   background-image: url("images/tip.png");
157 | }
158 | .rmdwarning {
159 |   background-image: url("images/warning.png");
160 | }


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -o errexit -o nounset
 3 | BASE_REPO=$PWD
 4 | 
 5 | update_website() {
 6 |   cd ..; mkdir gh-pages; cd gh-pages
 7 |   git init
 8 |   git config user.name "Colin Gillespie"
 9 |   git config user.email "csgillespie@gmail.com"
10 |   git config --global push.default simple
11 |   git remote add upstream "https://$GITHUB_PAT@github.com/csgillespie/efficientR.git"
12 |   git fetch --depth 1 upstream 2>err.txt
13 |   git checkout gh-pages
14 | 
15 |   cp -fvr $BASE_REPO/_book/* .
16 |   git add *.html; git add libs/; git add figures/; git add style.css; git add images/;
17 |   git add _main_files/*; git add *.json; git add main.md
18 |   git commit -a -m "Updating book (${TRAVIS_BUILD_NUMBER})"
19 |   git status
20 |   git push 2>err.txt
21 |   cd ..
22 | }
23 | 
24 | update_website
25 | 


--------------------------------------------------------------------------------
/efficientR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Website
19 | 


--------------------------------------------------------------------------------
/extdata/.gitignore:
--------------------------------------------------------------------------------
1 | voc_*
2 | co2.RData
3 | co2.Rds
4 | miniaa.Rds
5 | 


--------------------------------------------------------------------------------
/extdata/03-f5.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/03-f5.Rds


--------------------------------------------------------------------------------
/extdata/05-f1.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/05-f1.RData


--------------------------------------------------------------------------------
/extdata/05-f2.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/05-f2.RData


--------------------------------------------------------------------------------
/extdata/07-rcpp_comparison.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/07-rcpp_comparison.RData


--------------------------------------------------------------------------------
/extdata/07-which_comparison.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/07-which_comparison.RData


--------------------------------------------------------------------------------
/extdata/clock_speed.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/clock_speed.RData


--------------------------------------------------------------------------------
/extdata/co2.csv:
--------------------------------------------------------------------------------
  1 | "","time","co2"
  2 | "1",1959,315.42
  3 | "2",1959.08333333334,316.31
  4 | "3",1959.16666666668,316.5
  5 | "4",1959.25000000002,317.56
  6 | "5",1959.33333333336,318.13
  7 | "6",1959.4166666667,318
  8 | "7",1959.50000000004,316.39
  9 | "8",1959.58333333338,314.65
 10 | "9",1959.66666666672,313.68
 11 | "10",1959.75000000006,313.18
 12 | "11",1959.8333333334,314.66
 13 | "12",1959.91666666675,315.43
 14 | "13",1960.00000000009,316.27
 15 | "14",1960.08333333343,316.81
 16 | "15",1960.16666666677,317.42
 17 | "16",1960.25000000011,318.87
 18 | "17",1960.33333333345,319.87
 19 | "18",1960.41666666679,319.43
 20 | "19",1960.50000000013,318.01
 21 | "20",1960.58333333347,315.74
 22 | "21",1960.66666666681,314
 23 | "22",1960.75000000015,313.68
 24 | "23",1960.83333333349,314.84
 25 | "24",1960.91666666683,316.03
 26 | "25",1961.00000000017,316.73
 27 | "26",1961.08333333351,317.54
 28 | "27",1961.16666666685,318.38
 29 | "28",1961.25000000019,319.31
 30 | "29",1961.33333333353,320.42
 31 | "30",1961.41666666687,319.61
 32 | "31",1961.50000000021,318.42
 33 | "32",1961.58333333355,316.63
 34 | "33",1961.6666666669,314.83
 35 | "34",1961.75000000024,315.16
 36 | "35",1961.83333333358,315.94
 37 | "36",1961.91666666692,316.85
 38 | "37",1962.00000000026,317.78
 39 | "38",1962.0833333336,318.4
 40 | "39",1962.16666666694,319.53
 41 | "40",1962.25000000028,320.42
 42 | "41",1962.33333333362,320.85
 43 | "42",1962.41666666696,320.45
 44 | "43",1962.5000000003,319.45
 45 | "44",1962.58333333364,317.25
 46 | "45",1962.66666666698,316.11
 47 | "46",1962.75000000032,315.27
 48 | "47",1962.83333333366,316.53
 49 | "48",1962.916666667,317.53
 50 | "49",1963.00000000034,318.58
 51 | "50",1963.08333333368,318.92
 52 | "51",1963.16666666702,319.7
 53 | "52",1963.25000000036,321.22
 54 | "53",1963.3333333337,322.08
 55 | "54",1963.41666666704,321.31
 56 | "55",1963.50000000039,319.58
 57 | "56",1963.58333333373,317.61
 58 | "57",1963.66666666707,316.05
 59 | "58",1963.75000000041,315.83
 60 | "59",1963.83333333375,316.91
 61 | "60",1963.91666666709,318.2
 62 | "61",1964.00000000043,319.41
 63 | "62",1964.08333333377,320.07
 64 | "63",1964.16666666711,320.74
 65 | "64",1964.25000000045,321.4
 66 | "65",1964.33333333379,322.06
 67 | "66",1964.41666666713,321.73
 68 | "67",1964.50000000047,320.27
 69 | "68",1964.58333333381,318.54
 70 | "69",1964.66666666715,316.54
 71 | "70",1964.75000000049,316.71
 72 | "71",1964.83333333383,317.53
 73 | "72",1964.91666666717,318.55
 74 | "73",1965.00000000051,319.27
 75 | "74",1965.08333333385,320.28
 76 | "75",1965.16666666719,320.73
 77 | "76",1965.25000000054,321.97
 78 | "77",1965.33333333388,322
 79 | "78",1965.41666666722,321.71
 80 | "79",1965.50000000056,321.05
 81 | "80",1965.5833333339,318.71
 82 | "81",1965.66666666724,317.66
 83 | "82",1965.75000000058,317.14
 84 | "83",1965.83333333392,318.7
 85 | "84",1965.91666666726,319.25
 86 | "85",1966.0000000006,320.46
 87 | "86",1966.08333333394,321.43
 88 | "87",1966.16666666728,322.23
 89 | "88",1966.25000000062,323.54
 90 | "89",1966.33333333396,323.91
 91 | "90",1966.4166666673,323.59
 92 | "91",1966.50000000064,322.24
 93 | "92",1966.58333333398,320.2
 94 | "93",1966.66666666732,318.48
 95 | "94",1966.75000000066,317.94
 96 | "95",1966.833333334,319.63
 97 | "96",1966.91666666734,320.87
 98 | "97",1967.00000000069,322.17
 99 | "98",1967.08333333403,322.34
100 | "99",1967.16666666737,322.88
101 | "100",1967.25000000071,324.25
102 | "101",1967.33333333405,324.83
103 | "102",1967.41666666739,323.93
104 | "103",1967.50000000073,322.38
105 | "104",1967.58333333407,320.76
106 | "105",1967.66666666741,319.1
107 | "106",1967.75000000075,319.24
108 | "107",1967.83333333409,320.56
109 | "108",1967.91666666743,321.8
110 | "109",1968.00000000077,322.4
111 | "110",1968.08333333411,322.99
112 | "111",1968.16666666745,323.73
113 | "112",1968.25000000079,324.86
114 | "113",1968.33333333413,325.4
115 | "114",1968.41666666747,325.2
116 | "115",1968.50000000081,323.98
117 | "116",1968.58333333415,321.95
118 | "117",1968.66666666749,320.18
119 | "118",1968.75000000084,320.09
120 | "119",1968.83333333418,321.16
121 | "120",1968.91666666752,322.74
122 | "121",1969.00000000086,323.83
123 | "122",1969.0833333342,324.26
124 | "123",1969.16666666754,325.47
125 | "124",1969.25000000088,326.5
126 | "125",1969.33333333422,327.21
127 | "126",1969.41666666756,326.54
128 | "127",1969.5000000009,325.72
129 | "128",1969.58333333424,323.5
130 | "129",1969.66666666758,322.22
131 | "130",1969.75000000092,321.62
132 | "131",1969.83333333426,322.69
133 | "132",1969.9166666676,323.95
134 | "133",1970.00000000094,324.89
135 | "134",1970.08333333428,325.82
136 | "135",1970.16666666762,326.77
137 | "136",1970.25000000096,327.97
138 | "137",1970.3333333343,327.91
139 | "138",1970.41666666764,327.5
140 | "139",1970.50000000098,326.18
141 | "140",1970.58333333433,324.53
142 | "141",1970.66666666767,322.93
143 | "142",1970.75000000101,322.9
144 | "143",1970.83333333435,323.85
145 | "144",1970.91666666769,324.96
146 | "145",1971.00000000103,326.01
147 | "146",1971.08333333437,326.51
148 | "147",1971.16666666771,327.01
149 | "148",1971.25000000105,327.62
150 | "149",1971.33333333439,328.76
151 | "150",1971.41666666773,328.4
152 | "151",1971.50000000107,327.2
153 | "152",1971.58333333441,325.27
154 | "153",1971.66666666775,323.2
155 | "154",1971.75000000109,323.4
156 | "155",1971.83333333443,324.63
157 | "156",1971.91666666777,325.85
158 | "157",1972.00000000111,326.6
159 | "158",1972.08333333445,327.47
160 | "159",1972.16666666779,327.58
161 | "160",1972.25000000113,329.56
162 | "161",1972.33333333448,329.9
163 | "162",1972.41666666782,328.92
164 | "163",1972.50000000116,327.88
165 | "164",1972.5833333345,326.16
166 | "165",1972.66666666784,324.68
167 | "166",1972.75000000118,325.04
168 | "167",1972.83333333452,326.34
169 | "168",1972.91666666786,327.39
170 | "169",1973.0000000012,328.37
171 | "170",1973.08333333454,329.4
172 | "171",1973.16666666788,330.14
173 | "172",1973.25000000122,331.33
174 | "173",1973.33333333456,332.31
175 | "174",1973.4166666679,331.9
176 | "175",1973.50000000124,330.7
177 | "176",1973.58333333458,329.15
178 | "177",1973.66666666792,327.35
179 | "178",1973.75000000126,327.02
180 | "179",1973.8333333346,327.99
181 | "180",1973.91666666794,328.48
182 | "181",1974.00000000128,329.18
183 | "182",1974.08333333463,330.55
184 | "183",1974.16666666797,331.32
185 | "184",1974.25000000131,332.48
186 | "185",1974.33333333465,332.92
187 | "186",1974.41666666799,332.08
188 | "187",1974.50000000133,331.01
189 | "188",1974.58333333467,329.23
190 | "189",1974.66666666801,327.27
191 | "190",1974.75000000135,327.21
192 | "191",1974.83333333469,328.29
193 | "192",1974.91666666803,329.41
194 | "193",1975.00000000137,330.23
195 | "194",1975.08333333471,331.25
196 | "195",1975.16666666805,331.87
197 | "196",1975.25000000139,333.14
198 | "197",1975.33333333473,333.8
199 | "198",1975.41666666807,333.43
200 | "199",1975.50000000141,331.73
201 | "200",1975.58333333475,329.9
202 | "201",1975.66666666809,328.4
203 | "202",1975.75000000143,328.17
204 | "203",1975.83333333478,329.32
205 | "204",1975.91666666812,330.59
206 | "205",1976.00000000146,331.58
207 | "206",1976.0833333348,332.39
208 | "207",1976.16666666814,333.33
209 | "208",1976.25000000148,334.41
210 | "209",1976.33333333482,334.71
211 | "210",1976.41666666816,334.17
212 | "211",1976.5000000015,332.89
213 | "212",1976.58333333484,330.77
214 | "213",1976.66666666818,329.14
215 | "214",1976.75000000152,328.78
216 | "215",1976.83333333486,330.14
217 | "216",1976.9166666682,331.52
218 | "217",1977.00000000154,332.75
219 | "218",1977.08333333488,333.24
220 | "219",1977.16666666822,334.53
221 | "220",1977.25000000156,335.9
222 | "221",1977.3333333349,336.57
223 | "222",1977.41666666824,336.1
224 | "223",1977.50000000158,334.76
225 | "224",1977.58333333493,332.59
226 | "225",1977.66666666827,331.42
227 | "226",1977.75000000161,330.98
228 | "227",1977.83333333495,332.24
229 | "228",1977.91666666829,333.68
230 | "229",1978.00000000163,334.8
231 | "230",1978.08333333497,335.22
232 | "231",1978.16666666831,336.47
233 | "232",1978.25000000165,337.59
234 | "233",1978.33333333499,337.84
235 | "234",1978.41666666833,337.72
236 | "235",1978.50000000167,336.37
237 | "236",1978.58333333501,334.51
238 | "237",1978.66666666835,332.6
239 | "238",1978.75000000169,332.38
240 | "239",1978.83333333503,333.75
241 | "240",1978.91666666837,334.78
242 | "241",1979.00000000171,336.05
243 | "242",1979.08333333505,336.59
244 | "243",1979.16666666839,337.79
245 | "244",1979.25000000173,338.71
246 | "245",1979.33333333507,339.3
247 | "246",1979.41666666842,339.12
248 | "247",1979.50000000176,337.56
249 | "248",1979.5833333351,335.92
250 | "249",1979.66666666844,333.75
251 | "250",1979.75000000178,333.7
252 | "251",1979.83333333512,335.12
253 | "252",1979.91666666846,336.56
254 | "253",1980.0000000018,337.84
255 | "254",1980.08333333514,338.19
256 | "255",1980.16666666848,339.91
257 | "256",1980.25000000182,340.6
258 | "257",1980.33333333516,341.29
259 | "258",1980.4166666685,341
260 | "259",1980.50000000184,339.39
261 | "260",1980.58333333518,337.43
262 | "261",1980.66666666852,335.72
263 | "262",1980.75000000186,335.84
264 | "263",1980.8333333352,336.93
265 | "264",1980.91666666854,338.04
266 | "265",1981.00000000188,339.06
267 | "266",1981.08333333522,340.3
268 | "267",1981.16666666857,341.21
269 | "268",1981.25000000191,342.33
270 | "269",1981.33333333525,342.74
271 | "270",1981.41666666859,342.08
272 | "271",1981.50000000193,340.32
273 | "272",1981.58333333527,338.26
274 | "273",1981.66666666861,336.52
275 | "274",1981.75000000195,336.68
276 | "275",1981.83333333529,338.19
277 | "276",1981.91666666863,339.44
278 | "277",1982.00000000197,340.57
279 | "278",1982.08333333531,341.44
280 | "279",1982.16666666865,342.53
281 | "280",1982.25000000199,343.39
282 | "281",1982.33333333533,343.96
283 | "282",1982.41666666867,343.18
284 | "283",1982.50000000201,341.88
285 | "284",1982.58333333535,339.65
286 | "285",1982.66666666869,337.81
287 | "286",1982.75000000203,337.69
288 | "287",1982.83333333537,339.09
289 | "288",1982.91666666872,340.32
290 | "289",1983.00000000206,341.2
291 | "290",1983.0833333354,342.35
292 | "291",1983.16666666874,342.93
293 | "292",1983.25000000208,344.77
294 | "293",1983.33333333542,345.58
295 | "294",1983.41666666876,345.14
296 | "295",1983.5000000021,343.81
297 | "296",1983.58333333544,342.21
298 | "297",1983.66666666878,339.69
299 | "298",1983.75000000212,339.82
300 | "299",1983.83333333546,340.98
301 | "300",1983.9166666688,342.82
302 | "301",1984.00000000214,343.52
303 | "302",1984.08333333548,344.33
304 | "303",1984.16666666882,345.11
305 | "304",1984.25000000216,346.88
306 | "305",1984.3333333355,347.25
307 | "306",1984.41666666884,346.62
308 | "307",1984.50000000218,345.22
309 | "308",1984.58333333552,343.11
310 | "309",1984.66666666887,340.9
311 | "310",1984.75000000221,341.18
312 | "311",1984.83333333555,342.8
313 | "312",1984.91666666889,344.04
314 | "313",1985.00000000223,344.79
315 | "314",1985.08333333557,345.82
316 | "315",1985.16666666891,347.25
317 | "316",1985.25000000225,348.17
318 | "317",1985.33333333559,348.74
319 | "318",1985.41666666893,348.07
320 | "319",1985.50000000227,346.38
321 | "320",1985.58333333561,344.51
322 | "321",1985.66666666895,342.92
323 | "322",1985.75000000229,342.62
324 | "323",1985.83333333563,344.06
325 | "324",1985.91666666897,345.38
326 | "325",1986.00000000231,346.11
327 | "326",1986.08333333565,346.78
328 | "327",1986.16666666899,347.68
329 | "328",1986.25000000233,349.37
330 | "329",1986.33333333567,350.03
331 | "330",1986.41666666902,349.37
332 | "331",1986.50000000236,347.76
333 | "332",1986.5833333357,345.73
334 | "333",1986.66666666904,344.68
335 | "334",1986.75000000238,343.99
336 | "335",1986.83333333572,345.48
337 | "336",1986.91666666906,346.72
338 | "337",1987.0000000024,347.84
339 | "338",1987.08333333574,348.29
340 | "339",1987.16666666908,349.23
341 | "340",1987.25000000242,350.8
342 | "341",1987.33333333576,351.66
343 | "342",1987.4166666691,351.07
344 | "343",1987.50000000244,349.33
345 | "344",1987.58333333578,347.92
346 | "345",1987.66666666912,346.27
347 | "346",1987.75000000246,346.18
348 | "347",1987.8333333358,347.64
349 | "348",1987.91666666914,348.78
350 | "349",1988.00000000248,350.25
351 | "350",1988.08333333582,351.54
352 | "351",1988.16666666916,352.05
353 | "352",1988.25000000251,353.41
354 | "353",1988.33333333585,354.04
355 | "354",1988.41666666919,353.62
356 | "355",1988.50000000253,352.22
357 | "356",1988.58333333587,350.27
358 | "357",1988.66666666921,348.55
359 | "358",1988.75000000255,348.72
360 | "359",1988.83333333589,349.91
361 | "360",1988.91666666923,351.18
362 | "361",1989.00000000257,352.6
363 | "362",1989.08333333591,352.92
364 | "363",1989.16666666925,353.53
365 | "364",1989.25000000259,355.26
366 | "365",1989.33333333593,355.52
367 | "366",1989.41666666927,354.97
368 | "367",1989.50000000261,353.75
369 | "368",1989.58333333595,351.52
370 | "369",1989.66666666929,349.64
371 | "370",1989.75000000263,349.83
372 | "371",1989.83333333597,351.14
373 | "372",1989.91666666931,352.37
374 | "373",1990.00000000266,353.5
375 | "374",1990.083333336,354.55
376 | "375",1990.16666666934,355.23
377 | "376",1990.25000000268,356.04
378 | "377",1990.33333333602,357
379 | "378",1990.41666666936,356.07
380 | "379",1990.5000000027,354.67
381 | "380",1990.58333333604,352.76
382 | "381",1990.66666666938,350.82
383 | "382",1990.75000000272,351.04
384 | "383",1990.83333333606,352.69
385 | "384",1990.9166666694,354.07
386 | "385",1991.00000000274,354.59
387 | "386",1991.08333333608,355.63
388 | "387",1991.16666666942,357.03
389 | "388",1991.25000000276,358.48
390 | "389",1991.3333333361,359.22
391 | "390",1991.41666666944,358.12
392 | "391",1991.50000000278,356.06
393 | "392",1991.58333333612,353.92
394 | "393",1991.66666666946,352.05
395 | "394",1991.75000000281,352.11
396 | "395",1991.83333333615,353.64
397 | "396",1991.91666666949,354.89
398 | "397",1992.00000000283,355.88
399 | "398",1992.08333333617,356.63
400 | "399",1992.16666666951,357.72
401 | "400",1992.25000000285,359.07
402 | "401",1992.33333333619,359.58
403 | "402",1992.41666666953,359.17
404 | "403",1992.50000000287,356.94
405 | "404",1992.58333333621,354.92
406 | "405",1992.66666666955,352.94
407 | "406",1992.75000000289,353.23
408 | "407",1992.83333333623,354.09
409 | "408",1992.91666666957,355.33
410 | "409",1993.00000000291,356.63
411 | "410",1993.08333333625,357.1
412 | "411",1993.16666666959,358.32
413 | "412",1993.25000000293,359.41
414 | "413",1993.33333333627,360.23
415 | "414",1993.41666666961,359.55
416 | "415",1993.50000000296,357.53
417 | "416",1993.5833333363,355.48
418 | "417",1993.66666666964,353.67
419 | "418",1993.75000000298,353.95
420 | "419",1993.83333333632,355.3
421 | "420",1993.91666666966,356.78
422 | "421",1994.000000003,358.34
423 | "422",1994.08333333634,358.89
424 | "423",1994.16666666968,359.95
425 | "424",1994.25000000302,361.25
426 | "425",1994.33333333636,361.67
427 | "426",1994.4166666697,360.94
428 | "427",1994.50000000304,359.55
429 | "428",1994.58333333638,357.49
430 | "429",1994.66666666972,355.84
431 | "430",1994.75000000306,356
432 | "431",1994.8333333364,357.59
433 | "432",1994.91666666974,359.05
434 | "433",1995.00000000308,359.98
435 | "434",1995.08333333642,361.03
436 | "435",1995.16666666976,361.66
437 | "436",1995.25000000311,363.48
438 | "437",1995.33333333645,363.82
439 | "438",1995.41666666979,363.3
440 | "439",1995.50000000313,361.94
441 | "440",1995.58333333647,359.5
442 | "441",1995.66666666981,358.11
443 | "442",1995.75000000315,357.8
444 | "443",1995.83333333649,359.61
445 | "444",1995.91666666983,360.74
446 | "445",1996.00000000317,362.09
447 | "446",1996.08333333651,363.29
448 | "447",1996.16666666985,364.06
449 | "448",1996.25000000319,364.76
450 | "449",1996.33333333653,365.45
451 | "450",1996.41666666987,365.01
452 | "451",1996.50000000321,363.7
453 | "452",1996.58333333655,361.54
454 | "453",1996.66666666989,359.51
455 | "454",1996.75000000323,359.65
456 | "455",1996.83333333657,360.8
457 | "456",1996.91666666991,362.38
458 | "457",1997.00000000325,363.23
459 | "458",1997.0833333366,364.06
460 | "459",1997.16666666994,364.61
461 | "460",1997.25000000328,366.4
462 | "461",1997.33333333662,366.84
463 | "462",1997.41666666996,365.68
464 | "463",1997.5000000033,364.52
465 | "464",1997.58333333664,362.57
466 | "465",1997.66666666998,360.24
467 | "466",1997.75000000332,360.83
468 | "467",1997.83333333666,362.49
469 | "468",1997.91666667,364.34
470 | 


--------------------------------------------------------------------------------
/extdata/co2.feather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/co2.feather


--------------------------------------------------------------------------------
/extdata/cranlog.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/cranlog.Rds


--------------------------------------------------------------------------------
/extdata/dplyr-verbs.csv:
--------------------------------------------------------------------------------
 1 | "dplyr function(s)","Description","Base R functions"
 2 | "filter(), slice()","Subset rows by attribute (filter) or position (slice)","subset(), ["
 3 | "arrange()","Return data ordered by variable(s)","order()"
 4 | "select()","Subset columns","subset(), [, [["
 5 | "rename()","Rename columns","colnames()"
 6 | "distinct()","Return unique rows","!duplicated()"
 7 | "mutate()","Create new variables (transmute drops existing variables)","transform(), [["
 8 | "summarise()","Collapse data into a single row","aggregate(), tapply()"
 9 | "sample_n()","Return a sample of the data","sample()"
10 | 


--------------------------------------------------------------------------------
/extdata/filesizes.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/filesizes.Rds


--------------------------------------------------------------------------------
/extdata/idata-renamed.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/idata-renamed.Rds


--------------------------------------------------------------------------------
/extdata/lnd_geo_df.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/lnd_geo_df.Rds


--------------------------------------------------------------------------------
/extdata/lnd_simple.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/lnd_simple.Rds


--------------------------------------------------------------------------------
/extdata/mean_comparison.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/mean_comparison.RData


--------------------------------------------------------------------------------
/extdata/out-ice.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/out-ice.Rds


--------------------------------------------------------------------------------
/extdata/package_list.csv:
--------------------------------------------------------------------------------
 1 | "Name","Title","version"
 2 | "assertive.reflection","Assertions for Checking the State of R [@R-assertive.reflection]","0.0.4"
 3 | "benchmarkme","Crowd Sourced System Benchmarks [@R-benchmarkme]","1.0.3"
 4 | "bookdown","Authoring Books and Technical Documents with R Markdown [@R-bookdown]","0.18"
 5 | "cranlogs","Download Logs from the 'RStudio' 'CRAN' Mirror [@R-cranlogs]","2.1.1"
 6 | "data.table","Extension of `data.frame` [@R-data.table]","1.12.8"
 7 | "dbplyr","A 'dplyr' Back End for Databases [@R-dbplyr]","1.4.3"
 8 | "devtools","Tools to Make Developing R Packages Easier [@R-devtools]","2.3.0"
 9 | "DiagrammeR","Graph/Network Visualization [@R-DiagrammeR]","1.0.5"
10 | "dplyr","A Grammar of Data Manipulation [@R-dplyr]","0.8.5"
11 | "drat","'Drat' R Archive Template [@R-drat]","0.1.5"
12 | "efficient","Becoming an Efficient R Programmer [@R-efficient]","0.1.3"
13 | "feather","R Bindings to the Feather 'API' [@R-feather]","0.3.5"
14 | "formatR","Format R Code Automatically [@R-formatR]","1.7"
15 | "fortunes","R Fortunes [@R-fortunes]","1.5.4"
16 | "geosphere","Spherical Trigonometry [@R-geosphere]","1.5.10"
17 | "ggmap","Spatial Visualization with ggplot2 [@R-ggmap]","3.0.0"
18 | "ggplot2","Create Elegant Data Visualisations Using the Grammar of Graphics [@R-ggplot2]","3.3.0"
19 | "ggplot2movies","Movies Data [@R-ggplot2movies]","0.0.1"
20 | "knitr","A General-Purpose Package for Dynamic Report Generation in R [@R-knitr]","1.28"
21 | "lubridate","Make Dealing with Dates a Little Easier [@R-lubridate]","1.7.8"
22 | "maps","Draw Geographical Maps [@R-maps]","3.3.0"
23 | "microbenchmark","Accurate Timing Functions [@R-microbenchmark]","1.4.7"
24 | "profvis","Interactive Visualizations for Profiling R Code [@R-profvis]","0.3.6"
25 | "pryr","Tools for Computing on the Language [@R-pryr]","0.1.4"
26 | "Rcpp","Seamless R and C++ Integration [@R-Rcpp]","1.0.4.6"
27 | "readr","Read Rectangular Text Data [@R-readr]","1.3.1"
28 | "reticulate","Interface to 'Python' [@R-reticulate]","1.15"
29 | "rio","A Swiss-Army Knife for Data I/O [@R-rio]","0.5.16"
30 | "RSQLite","'SQLite' Interface for R [@R-RSQLite]","2.2.0"
31 | "swirl","Learn R, in R [@R-swirl]","2.4.5"
32 | "tibble","Simple Data Frames [@R-tibble]","3.0.1"
33 | "tidyr","Tidy Messy Data [@R-tidyr]","1.0.2"
34 | 


--------------------------------------------------------------------------------
/extdata/past_results.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/past_results.RData


--------------------------------------------------------------------------------
/extdata/pew.csv:
--------------------------------------------------------------------------------
 1 | religion,<$10k,$10--20k,$20--30k,$30--40k,$40--50k,$50--75k,$75--100k,$100--150k,>150k
 2 | Agnostic,27,34,60,81,76,137,122,109,84
 3 | Atheist,12,27,37,52,35,70,73,59,74
 4 | Buddhist,27,21,30,34,33,58,62,39,53
 5 | Catholic,418,617,732,670,638,1116,949,792,633
 6 | Don’t know/refused (no information on religious affiliation),15,14,15,11,10,35,21,17,18
 7 | Evangelical Protestant Churches,575,869,1064,982,881,1486,949,723,414
 8 | Hindu,1,9,7,9,11,34,47,48,54
 9 | Historically Black Protestant Churches,228,244,236,238,197,223,131,81,78
10 | Jehovah's Witness,20,27,24,24,21,30,15,11,6
11 | Jewish,19,19,25,25,30,95,69,87,151
12 | Mainline Protestant Churches,289,495,619,655,651,1107,939,753,634
13 | Mormon,29,40,48,51,56,112,85,49,42
14 | Muslim,6,7,9,10,9,23,16,8,6
15 | Orthodox,13,17,23,32,32,47,38,42,46
16 | Other Christian,9,7,11,13,13,14,18,14,12
17 | Other Faiths,20,33,40,46,49,63,46,40,41
18 | Other World Religions,5,2,3,4,2,7,3,4,4
19 | Unaffiliated,217,299,374,365,341,528,407,321,258
20 | 


--------------------------------------------------------------------------------
/extdata/pop_change.csv:
--------------------------------------------------------------------------------
 1 | # POPULATION CHANGE DATA PROVIDED BY U.S. CENSUS.,,,,,,,,,,,,,,,,,,,,,,
 2 | # CHANGE EXPRESSED AS PERCENTAGE (0-100).,,,,,,,,,,,,,,,,,,,,,,
 3 | STATE_OR_REGION,1910_POPULATION,1920_POPULATION,1930_POPULATION,1940_POPULATION,1950_POPULATION,1960_POPULATION,1970_POPULATION,1980_POPULATION,1990_POPULATION,2000_POPULATION,2010_POPULATION,1910_CHANGE,1920_CHANGE,1930_CHANGE,1940_CHANGE,1950_CHANGE,1960_CHANGE,1970_CHANGE,1980_CHANGE,1990_CHANGE,2000_CHANGE,2010_CHANGE
 4 | United States,92228531,106021568,123202660,132165129,151325798,179323175,203211926,226545805,248709873,281421906,308745538,21,15,16.2,7.3,14.5,18.5,13.3,11.5,9.8,13.2,9.7
 5 | Northeast,25868573,29662053,34427091,35976777,39477986,44677819,49040703,49135283,50809229,53594378,55317240,22.9,14.7,16.1,4.5,9.7,13.2,9.8,0.2,3.4,5.5,3.2
 6 | Midwest,29888542,34019792,38594100,40143332,44460762,51619139,56571663,58865670,59668632,64392776,66927001,13.5,13.8,13.4,4,10.8,16.1,9.6,4.1,1.4,7.9,3.9
 7 | South,29389330,33125803,37857633,41665901,47197088,54973113,62795367,75372362,85445930,100236820,114555744,19.8,12.7,14.3,10.1,13.3,16.5,14.2,20,13.4,17.3,14.3
 8 | West,7082086,9213920,12323836,14379119,20189962,28053104,34804193,43172490,52786082,63197932,71945553,64.4,30.1,33.8,16.7,40.4,38.9,24.1,24,22.3,19.7,13.8
 9 | Alabama,2138093,2348174,2646248,2832961,3061743,3266740,3444165,3893888,4040587,4447100,4779736,16.9,9.8,12.7,7.1,8.1,6.7,5.4,13.1,3.8,10.1,7.5
10 | Alaska,64356,55036,59278,72524,128643,226167,300382,401851,550043,626932,710231,1.2,-14.5,7.7,22.3,77.4,75.8,32.8,33.8,36.9,14,13.3
11 | Arizona,204354,334162,435573,499261,749587,1302161,1770900,2718215,3665228,5130632,6392017,66.2,63.5,30.3,14.6,50.1,73.7,36,53.5,34.8,40,24.6
12 | Arkansas,1574449,1752204,1854482,1949387,1909511,1786272,1923295,2286435,2350725,2673400,2915918,20,11.3,5.8,5.1,-2,-6.5,7.7,18.9,2.8,13.7,9.1
13 | California,2377549,3426861,5677251,6907387,10586223,15717204,19953134,23667902,29760021,33871648,37253956,60.1,44.1,65.7,21.7,53.3,48.5,27,18.6,25.7,13.8,10
14 | Colorado,799024,939629,1035791,1123296,1325089,1753947,2207259,2889964,3294394,4301261,5029196,48,17.6,10.2,8.4,18,32.4,25.8,30.9,14,30.6,16.9
15 | Connecticut,1114756,1380631,1606903,1709242,2007280,2535234,3031709,3107576,3287116,3405565,3574097,22.7,23.9,16.4,6.4,17.4,26.3,19.6,2.5,5.8,3.6,4.9
16 | Delaware,202322,223003,238380,266505,318085,446292,548104,594338,666168,783600,897934,9.5,10.2,6.9,11.8,19.4,40.3,22.8,8.4,12.1,17.6,14.6
17 | District of Columbia,331069,437571,486869,663091,802178,763956,756510,638333,606900,572059,601723,18.8,32.2,11.3,36.2,21,-4.8,-1,-15.6,-4.9,-5.7,5.2
18 | Florida,752619,968470,1468211,1897414,2771305,4951560,6789443,9746324,12937926,15982378,18801310,42.4,28.7,51.6,29.2,46.1,78.7,37.1,43.6,32.7,23.5,17.6
19 | Georgia,2609121,2895832,2908506,3123723,3444578,3943116,4589575,5463105,6478216,8186453,9687653,17.7,11,0.4,7.4,10.3,14.5,16.4,19,18.6,26.4,18.3
20 | Hawaii,191909,255912,368336,423330,499794,632772,768561,964691,1108229,1211537,1360301,24.6,33.4,43.9,14.9,18.1,26.6,21.5,25.5,14.9,9.3,12.3
21 | Idaho,325594,431866,445032,524873,588637,667191,712567,943935,1006749,1293953,1567582,101.3,32.6,3,17.9,12.1,13.3,6.8,32.5,6.7,28.5,21.1
22 | Illinois,5638591,6485280,7630654,7897241,8712176,10081158,11113976,11426518,11430602,12419293,12830632,16.9,15,17.7,3.5,10.3,15.7,10.2,2.8,0,8.6,3.3
23 | Indiana,2700876,2930390,3238503,3427796,3934224,4662498,5193669,5490224,5544159,6080485,6483802,7.3,8.5,10.5,5.8,14.8,18.5,11.4,5.7,1,9.7,6.6
24 | Iowa,2224771,2404021,2470939,2538268,2621073,2757537,2824376,2913808,2776755,2926324,3046355,-0.3,8.1,2.8,2.7,3.3,5.2,2.4,3.2,-4.7,5.4,4.1
25 | Kansas,1690949,1769257,1880999,1801028,1905299,2178611,2246578,2363679,2477574,2688418,2853118,15,4.6,6.3,-4.3,5.8,14.3,3.1,5.2,4.8,8.5,6.1
26 | Kentucky,2289905,2416630,2614589,2845627,2944806,3038156,3218706,3660777,3685296,4041769,4339367,6.6,5.5,8.2,8.8,3.5,3.2,5.9,13.7,0.7,9.7,7.4
27 | Louisiana,1656388,1798509,2101593,2363880,2683516,3257022,3641306,4205900,4219973,4468976,4533372,19.9,8.6,16.9,12.5,13.5,21.4,11.8,15.5,0.3,5.9,1.4
28 | Maine,742371,768014,797423,847226,913774,969265,992048,1124660,1227928,1274923,1328361,6.9,3.5,3.8,6.2,7.9,6.1,2.4,13.4,9.2,3.8,4.2
29 | Maryland,1295346,1449661,1631526,1821244,2343001,3100689,3922399,4216975,4781468,5296486,5773552,9,11.9,12.5,11.6,28.6,32.3,26.5,7.5,13.4,10.8,9
30 | Massachusetts,3366416,3852356,4249614,4316721,4690514,5148578,5689170,5737037,6016425,6349097,6547629,20,14.4,10.3,1.6,8.7,9.8,10.5,0.8,4.9,5.5,3.1
31 | Michigan,2810173,3668412,4842325,5256106,6371766,7823194,8875083,9262078,9295297,9938444,9883640,16.1,30.5,32,8.5,21.2,22.8,13.4,4.4,0.4,6.9,-0.6
32 | Minnesota,2075708,2387125,2563953,2792300,2982483,3413864,3804971,4075970,4375099,4919479,5303925,18.5,15,7.4,8.9,6.8,14.5,11.5,7.1,7.3,12.4,7.8
33 | Mississippi,1797114,1790618,2009821,2183796,2178914,2178141,2216912,2520638,2573216,2844658,2967297,15.8,-0.4,12.2,8.7,-0.2,0,1.8,13.7,2.1,10.5,4.3
34 | Missouri,3293335,3404055,3629367,3784664,3954653,4319813,4676501,4916686,5117073,5595211,5988927,6,3.4,6.6,4.3,4.5,9.2,8.3,5.1,4.1,9.3,7
35 | Montana,376053,548889,537606,559456,591024,674767,694409,786690,799065,902195,989415,54.5,46,-2.1,4.1,5.6,14.2,2.9,13.3,1.6,12.9,9.7
36 | Nebraska,1192214,1296372,1377963,1315834,1325510,1411330,1483493,1569825,1578385,1711263,1826341,11.8,8.7,6.3,-4.5,0.7,6.5,5.1,5.8,0.5,8.4,6.7
37 | Nevada,81875,77407,91058,110247,160083,285278,488738,800493,1201833,1998257,2700551,93.4,-5.5,17.6,21.1,45.2,78.2,71.3,63.8,50.1,66.3,35.1
38 | New Hampshire,430572,443083,465293,491524,533242,606921,737681,920610,1109252,1235786,1316470,4.6,2.9,5,5.6,8.5,13.8,21.5,24.8,20.5,11.4,6.5
39 | New Jersey,2537167,3155900,4041334,4160165,4835329,6066782,7168164,7364823,7730188,8414350,8791894,34.7,24.4,28.1,2.9,16.2,25.5,18.2,2.7,5,8.9,4.5
40 | New Mexico,327301,360350,423317,531818,681187,951023,1016000,1302894,1515069,1819046,2059179,67.6,10.1,17.5,25.6,28.1,39.6,6.8,28.2,16.3,20.1,13.2
41 | New York,9113614,10385227,12588066,13479142,14830192,16782304,18236967,17558072,17990455,18976457,19378102,25.4,14,21.2,7.1,10,13.2,8.7,-3.7,2.5,5.5,2.1
42 | North Carolina,2206287,2559123,3170276,3571623,4061929,4556155,5082059,5881766,6628637,8049313,9535483,16.5,16,23.9,12.7,13.7,12.2,11.5,15.7,12.7,21.4,18.5
43 | North Dakota,577056,646872,680845,641935,619636,632446,617761,652717,638800,642200,672591,80.8,12.1,5.3,-5.7,-3.5,2.1,-2.3,5.7,-2.1,0.5,4.7
44 | Ohio,4767121,5759394,6646697,6907612,7946627,9706397,10652017,10797630,10847115,11353140,11536504,14.7,20.8,15.4,3.9,15,22.1,9.7,1.4,0.5,4.7,1.6
45 | Oklahoma,1657155,2028283,2396040,2336434,2233351,2328284,2559229,3025290,3145585,3450654,3751351,109.7,22.4,18.1,-2.5,-4.4,4.3,9.9,18.2,4,9.7,8.7
46 | Oregon,672765,783389,953786,1089684,1521341,1768687,2091385,2633105,2842321,3421399,3831074,62.7,16.4,21.8,14.2,39.6,16.3,18.2,25.9,7.9,20.4,12
47 | Pennsylvania,7665111,8720017,9631350,9900180,10498012,11319366,11793909,11863895,11881643,12281054,12702379,21.6,13.8,10.5,2.8,6,7.8,4.2,0.6,0.1,3.4,3.4
48 | Rhode Island,542610,604397,687497,713346,791896,859488,946725,947154,1003464,1048319,1052567,26.6,11.4,13.7,3.8,11,8.5,10.1,0,5.9,4.5,0.4
49 | South Carolina,1515400,1683724,1738765,1899804,2117027,2382594,2590516,3121820,3486703,4012012,4625364,13.1,11.1,3.3,9.3,11.4,12.5,8.7,20.5,11.7,15.1,15.3
50 | South Dakota,583888,636547,692849,642961,652740,680514,665507,690768,696004,754844,814180,45.4,9,8.8,-7.2,1.5,4.3,-2.2,3.8,0.8,8.5,7.9
51 | Tennessee,2184789,2337885,2616556,2915841,3291718,3567089,3923687,4591120,4877185,5689283,6346105,8.1,7,11.9,11.4,12.9,8.4,10,17,6.2,16.7,11.5
52 | Texas,3896542,4663228,5824715,6414824,7711194,9579677,11196730,14229191,16986510,20851820,25145561,27.8,19.7,24.9,10.1,20.2,24.2,16.9,27.1,19.4,22.8,20.6
53 | Utah,373351,449396,507847,550310,688862,890627,1059273,1461037,1722850,2233169,2763885,34.9,20.4,13,8.4,25.2,29.3,18.9,37.9,17.9,29.6,23.8
54 | Vermont,355956,352428,359611,359231,377747,389881,444330,511456,562758,608827,625741,3.6,-1,2,-0.1,5.2,3.2,14,15.1,10,8.2,2.8
55 | Virginia,2061612,2309187,2421851,2677773,3318680,3966949,4648494,5346818,6187358,7078515,8001024,11.2,12,4.9,10.6,23.9,19.5,17.2,15,15.7,14.4,13
56 | Washington,1141990,1356621,1563396,1736191,2378963,2853214,3409169,4132156,4866692,5894121,6724540,120.4,18.8,15.2,11.1,37,19.9,19.5,21.2,17.8,21.1,14.1
57 | West Virginia,1221119,1463701,1729205,1901974,2005552,1860421,1744237,1949644,1793477,1808344,1852994,27.4,19.9,18.1,10,5.4,-7.2,-6.2,11.8,-8,0.8,2.5
58 | Wisconsin,2333860,2632067,2939006,3137587,3434575,3951777,4417731,4705767,4891769,5363675,5686986,12.8,12.8,11.7,6.8,9.5,15.1,11.8,6.5,4,9.6,6
59 | Wyoming,145965,194402,225565,250742,290529,330066,332416,469557,453588,493782,563626,57.7,33.2,16,11.2,15.9,13.6,0.7,41.3,-3.4,8.9,14.1
60 | Puerto Rico,1118012,1299809,1543913,1869255,2210703,2349544,2712033,3196520,3522037,3808610,3725789,17.3,16.3,18.8,21.1,18.3,6.3,15.4,17.9,10.2,8.1,-2.2
61 | 


--------------------------------------------------------------------------------
/extdata/res-datatable.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/res-datatable.Rds


--------------------------------------------------------------------------------
/extdata/res.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/res.Rds


--------------------------------------------------------------------------------
/extdata/res_rl_blas.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/res_rl_blas.Rds


--------------------------------------------------------------------------------
/extdata/res_rl_noblas.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/res_rl_noblas.Rds


--------------------------------------------------------------------------------
/extdata/res_v.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/res_v.Rds


--------------------------------------------------------------------------------
/extdata/reshape-pew.csv:
--------------------------------------------------------------------------------
 1 | "","religion","<$10k","$10--20k","$20--30k","$30--40k","$40--50k","$50--75k","$75--100k","$100--150k",">150k","Don't know/refused"
 2 | "1","Agnostic",27,34,60,81,76,137,122,109,84,96
 3 | "2","Atheist",12,27,37,52,35,70,73,59,74,76
 4 | "3"," Buddhist ",27,21,30,34,33,58,62,39,53,54
 5 | "4"," Catholic ",418,617,732,670,638,1116,949,792,633,1489
 6 | "5"," Don’t know/refused (no information on religious affiliation) ",15,14,15,11,10,35,21,17,18,116
 7 | "6"," Evangelical Protestant Churches ",575,869,1064,982,881,1486,949,723,414,1529
 8 | "7"," Hindu ",1,9,7,9,11,34,47,48,54,37
 9 | "8"," Historically Black Protestant Churches ",228,244,236,238,197,223,131,81,78,339
10 | "9"," Jehovah's Witness ",20,27,24,24,21,30,15,11,6,37
11 | "10"," Jewish ",19,19,25,25,30,95,69,87,151,162
12 | "11"," Mainline Protestant Churches ",289,495,619,655,651,1107,939,753,634,1328
13 | "12"," Mormon ",29,40,48,51,56,112,85,49,42,69
14 | "13"," Muslim ",6,7,9,10,9,23,16,8,6,22
15 | "14"," Orthodox ",13,17,23,32,32,47,38,42,46,73
16 | "15"," Other Christian ",9,7,11,13,13,14,18,14,12,18
17 | "16"," Other Faiths ",20,33,40,46,49,63,46,40,41,71
18 | "17"," Other World Religions ",5,2,3,4,2,7,3,4,4,8
19 | "18"," Unaffiliated ",217,299,374,365,341,528,407,321,258,597
20 | 


--------------------------------------------------------------------------------
/extdata/rtimes.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/rtimes.Rds


--------------------------------------------------------------------------------
/extdata/wtimes.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/extdata/wtimes.Rds


--------------------------------------------------------------------------------
/figures/f0_front_scale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f0_front_scale.png


--------------------------------------------------------------------------------
/figures/f0_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f0_full.png


--------------------------------------------------------------------------------
/figures/f0_web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f0_web.png


--------------------------------------------------------------------------------
/figures/f1_1_800px-QWERTY-home-keys-position.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f1_1_800px-QWERTY-home-keys-position.png


--------------------------------------------------------------------------------
/figures/f1_2_profvis-ice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f1_2_profvis-ice.png


--------------------------------------------------------------------------------
/figures/f1_3_icesheet-change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f1_3_icesheet-change.png


--------------------------------------------------------------------------------
/figures/f2_1_sysmon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f2_1_sysmon.png


--------------------------------------------------------------------------------
/figures/f2_2_rstudio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f2_2_rstudio.png


--------------------------------------------------------------------------------
/figures/f2_3_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f2_3_view.png


--------------------------------------------------------------------------------
/figures/f4_2_DiagrammeR-gantt-book.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f4_2_DiagrammeR-gantt-book.png


--------------------------------------------------------------------------------
/figures/f4_3_geosphere-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f4_3_geosphere-badge.png


--------------------------------------------------------------------------------
/figures/f4_4_geoPlot-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f4_4_geoPlot-badge.png


--------------------------------------------------------------------------------
/figures/f5_3_rstudio-package-filepath-intellisense.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f5_3_rstudio-package-filepath-intellisense.png


--------------------------------------------------------------------------------
/figures/f6_1_world_co2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f6_1_world_co2.png


--------------------------------------------------------------------------------
/figures/f7_1_profvis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f7_1_profvis.png


--------------------------------------------------------------------------------
/figures/f7_2_profvis_monopoly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f7_2_profvis_monopoly.png


--------------------------------------------------------------------------------
/figures/f7_4_profvis_monopoly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f7_4_profvis_monopoly.png


--------------------------------------------------------------------------------
/figures/f8_1_3SDRAM-DIMMs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f8_1_3SDRAM-DIMMs.jpg


--------------------------------------------------------------------------------
/figures/f8_2_627px-Laptop-hard-drive-exposed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f8_2_627px-Laptop-hard-drive-exposed.jpg


--------------------------------------------------------------------------------
/figures/f9_1_rstudio-git.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f9_1_rstudio-git.png


--------------------------------------------------------------------------------
/figures/f9_2_rstudio-githist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/f9_2_rstudio-githist.png


--------------------------------------------------------------------------------
/figures/icesheet-change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/icesheet-change.png


--------------------------------------------------------------------------------
/figures/icesheet-test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/icesheet-test.png


--------------------------------------------------------------------------------
/figures/pf10_1_package-autocompletion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/figures/pf10_1_package-autocompletion.png


--------------------------------------------------------------------------------
/images/note.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/images/note.png


--------------------------------------------------------------------------------
/images/tip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/images/tip.png


--------------------------------------------------------------------------------
/images/warning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csgillespie/efficientR/5e80d32421848c44e243c8c8edcd601081c762cd/images/warning.png


--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
 1 | --- 
 2 | title: "Efficient R programming"
 3 | author: ["Colin Gillespie", "Robin Lovelace"]
 4 | date: "`r Sys.Date()`"
 5 | knit: "bookdown::render_book"
 6 | site: bookdown::bookdown_site
 7 | documentclass: book
 8 | bibliography: [refs.bib, packages.bib]
 9 | biblio-style: apalike
10 | link-citations: yes
11 | twitter-handle: csgillespie
12 | cover-image: figures/f0_web.png
13 | description: "Efficient R Programming is about increasing the amount of work you 
14 |   can do with R in a given amount of time. It's about both computational and programmer efficiency."
15 | github-repo: csgillespie/efficientR
16 | url: 'https\://csgillespie.github.io/efficientR/'
17 | ---
18 | 
19 | # Welcome to Efficient R Programming {-}
20 | 
21 | ```{r echo=FALSE, out.width="33%"}
22 | knitr::include_graphics("figures/f0_web.png")
23 | ```
24 | 
25 | This is the [online version](https://csgillespie.github.io/efficientR/) of the O'Reilly book: [Efficient R programming](http://shop.oreilly.com/product/0636920047995.do). Pull requests and general comments are welcome.
26 | 
27 | Get a hard copy from: [Amazon (UK)](https://alexa.design/2pmrqBj), [Amazon (USA)](https://alexa.design/2pmfpf4), [O'Reilly](http://shop.oreilly.com/product/0636920047995.do)
28 | 
29 | ## Authors {-}
30 | 
31 | [Colin Gillespie](http://www.mas.ncl.ac.uk/~ncsg3/) is Senior Lecturer (Associate
32 | Professor) at Newcastle University, UK.
33 | He is an Executive Editor of the [R Journal](https://journal.r-project.org/board.html), with research interests including high performance
34 | statistical computing and Bayesian statistics. Colin founded the [Jumping Rivers](https://www.jumpingrivers.com/) consultancy and has been teaching R
35 | since 2005 at all levels, from beginning to advanced programming.
36 | 
37 | [Robin Lovelace](http://robinlovelace.net/) is Associate Professor at the Institute for Transport Studies ([ITS](http://www.its.leeds.ac.uk/)) and Leeds Institute for Data Analytics ([LIDA](http://lida.leeds.ac.uk/about-lida/contact/)), University of Leeds, UK. 
38 | His research focuses on geocomputation and reproducible data science for evidence-based policy-making. 
39 | Decarbonising the global economy while improving health and environmental outcomes is a major problem solving challenge.
40 | Robin's research supports solutions by generating evidence and tools enabling evidence-based investment in efficient and healthy modes of transport at local, city and national scales. 
41 | Robin is the Lead Developer of the award-winning Propensity to Cycle Tool (publicly available at [www.pct.bike](https://www.pct.bike/)), conveyor of the Transport Data Science module and workshop series, and co-author of popular [packages](https://cran.r-project.org/web/checks/check_results_rob00x_at_gmail.com.html), papers, and [books](https://www.google.com/search?tbm=bks&q=robin+lovelace), including [Geocomputation with R](https://geocompr.robinlovelace.net/).
42 | 
43 | # Preface {-}
44 | 
45 | *Efficient R Programming* is about increasing the amount of work you can do with R in a given amount of time. It's about both *computational* and *programmer* efficiency. There are many excellent R resources about topic areas such as visualisation [e.g. @chang2012r], data science [e.g. @grolemund_r_2016] and package development [e.g. @Wickham_2015]. There are even more resources on how to use R in particular domains, including Bayesian Statistics, Machine Learning and Geographic Information Systems. However, there are very few unified resources on how to simply make R work effectively. Hints, tips and decades of community knowledge on the subject are scattered across hundreds of internet pages, email threads and discussion forums, making it challenging for R users to understand how to write efficient code.
46 | 
47 | In our teaching we have found that this issue applies to beginners and experienced users alike. Whether it's a question of understanding how to use R's vector objects to avoid for loops, knowing how to set-up your `.Rprofile` and `.Renviron` files or the ability to harness R's excellent C++ interface to do the 'heavy lifting', the concept of efficiency is key. The book aims to distill tips, warnings and 'tricks of the trade' down into a single, cohesive whole that will provide a useful resource to R programmers of all stripes for years to come.
48 | 
49 | The content of the book reflects the questions that our students, from a range of disciplines, skill levels and industries, have asked over the years to make their R work faster. How to set-up my system optimally for R programming work? How can one apply general principles from Computer Science (such as do not repeat yourself, DRY) to the specifics of an R script? How can R code be incorporated into an efficient workflow, including project inception, collaboration and write-up? And how can one learn quickly how to use new packages and functions?
50 | 
51 | The book answers each of these questions, and more, in 10 self-contained chapters. Each chapter starts simple and gets progressively more advanced, so there is something for everyone in each. While the more advanced topics such as parallel programming and C++ may not be immediately relevant to R beginners, the book helps to navigate R's famously steep learning curve with a commitment to starting slow and building on strong foundations. Thus even experienced R users are likely to find previously hidden gems of advice in the early parts of the chapters. "Why did no one tell me that before?" is a common exclamation we have heard while teaching this material.
52 | 
53 | Efficient programming should not be seen as an optional extra and the importance of efficiency grows with the size of projects and datasets. In fact, this book was devised while we were teaching a course on 'R for Big Data': it quickly became apparent that if you want to work with large datasets, your code must work efficiently. Even if you work with small datasets, efficient code, that is both fast to write *and* run is a vital component of successful R projects. We found that the concept of efficient programming is important to all branches of the R community. Whether you are a sporadic user of R (e.g. for its unbeatable range of statistical packages), looking to develop a package, or working on a large collaborative project in which efficiency is mission-critical, code efficiency will have a major impact on your productivity.
54 | 
55 | Ultimately efficiency is about getting more output for less work input. To take the analogy of a car, would you rather drive 1000 km on a single tank (or a single charge of your batteries) or refuel a heavy, clunky and ugly car every 50 km? In the same way, efficient R code is better than inefficient R code in almost every way: it is easier to read, write, run, share and maintain. This book cannot provide all the answers about how to produce such code but it certainly can provide ideas, example code and tips to make a start in the right direction of travel.
56 | 
57 | 


--------------------------------------------------------------------------------
/packages.bib:
--------------------------------------------------------------------------------
  1 | @Manual{R-assertive.reflection,
  2 |   title = {assertive.reflection: Assertions for Checking the State of R},
  3 |   author = {Richard Cotton},
  4 |   year = {2016},
  5 |   note = {R package version 0.0-4},
  6 |   url = {https://CRAN.R-project.org/package=assertive.reflection},
  7 | }
  8 | @Manual{R-benchmarkme,
  9 |   title = {benchmarkme: Crowd Sourced System Benchmarks},
 10 |   author = {Colin Gillespie},
 11 |   year = {2019},
 12 |   note = {R package version 1.0.3},
 13 |   url = {https://github.com/csgillespie/benchmarkme},
 14 | }
 15 | @Manual{R-bookdown,
 16 |   title = {bookdown: Authoring Books and Technical Documents with R Markdown},
 17 |   author = {Yihui Xie},
 18 |   year = {2020},
 19 |   note = {R package version 0.18},
 20 |   url = {https://github.com/rstudio/bookdown},
 21 | }
 22 | @Manual{R-cranlogs,
 23 |   title = {cranlogs: Download Logs from the RStudio 'CRAN' Mirror},
 24 |   author = {Gábor Csárdi},
 25 |   year = {2019},
 26 |   note = {R package version 2.1.1},
 27 |   url = {https://CRAN.R-project.org/package=cranlogs},
 28 | }
 29 | @Manual{R-data.table,
 30 |   title = {data.table: Extension of `data.frame`},
 31 |   author = {Matt Dowle and Arun Srinivasan},
 32 |   year = {2019},
 33 |   note = {http://r-datatable.com, https://Rdatatable.gitlab.io/data.table,
 34 | https://github.com/Rdatatable/data.table},
 35 | }
 36 | @Manual{R-dbplyr,
 37 |   title = {dbplyr: A 'dplyr' Back End for Databases},
 38 |   author = {Hadley Wickham and Edgar Ruiz},
 39 |   year = {2020},
 40 |   note = {https://dbplyr.tidyverse.org/, https://github.com/tidyverse/dbplyr},
 41 | }
 42 | @Manual{R-devtools,
 43 |   title = {devtools: Tools to Make Developing R Packages Easier},
 44 |   author = {Hadley Wickham and Jim Hester and Winston Chang},
 45 |   year = {2020},
 46 |   note = {https://devtools.r-lib.org/, https://github.com/r-lib/devtools},
 47 | }
 48 | @Manual{R-DiagrammeR,
 49 |   title = {DiagrammeR: Graph/Network Visualization},
 50 |   author = {Richard Iannone},
 51 |   year = {2020},
 52 |   note = {R package version 1.0.5},
 53 |   url = {https://github.com/rich-iannone/DiagrammeR},
 54 | }
 55 | @Manual{R-dplyr,
 56 |   title = {dplyr: A Grammar of Data Manipulation},
 57 |   author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
 58 |   year = {2020},
 59 |   note = {http://dplyr.tidyverse.org, https://github.com/tidyverse/dplyr},
 60 | }
 61 | @Manual{R-drat,
 62 |   title = {drat: 'Drat' R Archive Template},
 63 |   author = {Dirk Eddelbuettel with contributions by Carl Boettiger and Neal Fultz and Sebastian Gibb and Colin Gillespie and Jan Górecki and Matt Jones and Thomas Leeper and Steven Pav and Jan Schulz and Christoph Stepper.},
 64 |   year = {2019},
 65 |   note = {R package version 0.1.5},
 66 |   url = {http://dirk.eddelbuettel.com/code/drat.html},
 67 | }
 68 | @Manual{R-efficient,
 69 |   title = {efficient: Becoming an Efficient R Programmer},
 70 |   author = {Colin Gillespie and Robin Lovelace},
 71 |   year = {2020},
 72 |   note = {R package version 0.1.3},
 73 | }
 74 | @Manual{R-feather,
 75 |   title = {feather: R Bindings to the Feather 'API'},
 76 |   author = {Hadley Wickham},
 77 |   year = {2019},
 78 |   note = {R package version 0.3.5},
 79 |   url = {https://CRAN.R-project.org/package=feather},
 80 | }
 81 | @Manual{R-formatR,
 82 |   title = {formatR: Format R Code Automatically},
 83 |   author = {Yihui Xie},
 84 |   year = {2019},
 85 |   note = {R package version 1.7},
 86 |   url = {https://CRAN.R-project.org/package=formatR},
 87 | }
 88 | @Manual{R-fortunes,
 89 |   title = {fortunes: R Fortunes},
 90 |   author = {Achim Zeileis and the R community. Contributions (fortunes and/or code) by Torsten Hothorn and Peter Dalgaard and Uwe Ligges and Kevin Wright and Martin Maechler and Kjetil Brinchmann Halvorsen and Kurt Hornik and Duncan Murdoch and Andy Bunn and Ray Brownrigg and Roger Bivand and Spencer Graves and Jim Lemon and Christian Kleiber and David L. Reiner and Berton Gunter and Roger Koenker and Charles Berry and Marc Schwartz and Michael Dewey and Ben Bolker and Peter Dunn and Sarah Goslee and Simon Blomberg and Bill Venables and Roland Rau and Thomas Petzoldt and Rolf Turner and Mark Leeds and Emmanuel Charpentier and Chris Evans and Paolo Sonego and Peter Ehlers and Detlef Steuer and Tal Galili and Greg Snow and Brian D. Ripley and Michael Sumner and David Winsemius and Liviu Andronic and Brian Diggs and Matthieu Stigler and Michael Friendly and Dirk Eddelbuettel and Richard M. Heiberger and Patrick Burns and Dieter Menne and Andrie {de Vries} and Barry Rowlingson and Renaud Lancelot and R. Michael Weylandt and Jon Olav Skoien and Francois Morneau and Antony Unwin and Joshua Wiley and Terry Therneau and Bryan Hanson and Henrik Singmann and Eduard Szoecs and Gregor Passolt and John C. Nash.},
 91 |   year = {2016},
 92 |   note = {R package version 1.5-4},
 93 |   url = {https://CRAN.R-project.org/package=fortunes},
 94 | }
 95 | @Manual{R-geosphere,
 96 |   title = {geosphere: Spherical Trigonometry},
 97 |   author = {Robert J. Hijmans},
 98 |   year = {2019},
 99 |   note = {R package version 1.5-10},
100 |   url = {https://CRAN.R-project.org/package=geosphere},
101 | }
102 | @Manual{R-ggmap,
103 |   title = {ggmap: Spatial Visualization with ggplot2},
104 |   author = {David Kahle and Hadley Wickham and Scott Jackson},
105 |   year = {2019},
106 |   note = {R package version 3.0.0},
107 |   url = {https://CRAN.R-project.org/package=ggmap},
108 | }
109 | @Manual{R-ggplot2,
110 |   title = {ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics},
111 |   author = {Hadley Wickham and Winston Chang and Lionel Henry and Thomas Lin Pedersen and Kohske Takahashi and Claus Wilke and Kara Woo and Hiroaki Yutani and Dewey Dunnington},
112 |   year = {2020},
113 |   note = {R package version 3.3.0},
114 |   url = {https://CRAN.R-project.org/package=ggplot2},
115 | }
116 | @Manual{R-ggplot2movies,
117 |   title = {ggplot2movies: Movies Data},
118 |   author = {Hadley Wickham},
119 |   year = {2015},
120 |   note = {R package version 0.0.1},
121 |   url = {https://CRAN.R-project.org/package=ggplot2movies},
122 | }
123 | @Manual{R-knitr,
124 |   title = {knitr: A General-Purpose Package for Dynamic Report Generation in R},
125 |   author = {Yihui Xie},
126 |   year = {2020},
127 |   note = {R package version 1.28},
128 |   url = {https://yihui.org/knitr/},
129 | }
130 | @Manual{R-lubridate,
131 |   title = {lubridate: Make Dealing with Dates a Little Easier},
132 |   author = {Vitalie Spinu and Garrett Grolemund and Hadley Wickham},
133 |   year = {2020},
134 |   note = {http://lubridate.tidyverse.org,
135 | https://github.com/tidyverse/lubridate},
136 | }
137 | @Manual{R-maps,
138 |   title = {maps: Draw Geographical Maps},
139 |   author = {Original S code by Richard A. Becker and Allan R. Wilks. R version by Ray Brownrigg. Enhancements by Thomas P Minka and Alex Deckmyn.},
140 |   year = {2018},
141 |   note = {R package version 3.3.0},
142 |   url = {https://CRAN.R-project.org/package=maps},
143 | }
144 | @Manual{R-microbenchmark,
145 |   title = {microbenchmark: Accurate Timing Functions},
146 |   author = {Olaf Mersmann},
147 |   year = {2019},
148 |   note = {R package version 1.4-7},
149 |   url = {https://CRAN.R-project.org/package=microbenchmark},
150 | }
151 | @Manual{R-profvis,
152 |   title = {profvis: Interactive Visualizations for Profiling R Code},
153 |   author = {Winston Chang and Javier Luraschi and Timothy Mastny},
154 |   year = {2019},
155 |   note = {R package version 0.3.6},
156 |   url = {https://rstudio.github.io/profvis/},
157 | }
158 | @Manual{R-pryr,
159 |   title = {pryr: Tools for Computing on the Language},
160 |   author = {Hadley Wickham},
161 |   year = {2018},
162 |   note = {R package version 0.1.4},
163 |   url = {https://CRAN.R-project.org/package=pryr},
164 | }
165 | @Manual{R-Rcpp,
166 |   title = {Rcpp: Seamless R and C++ Integration},
167 |   author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Douglas Bates and John Chambers},
168 |   year = {2020},
169 |   note = {http://www.rcpp.org, http://dirk.eddelbuettel.com/code/rcpp.html,
170 | https://github.com/RcppCore/Rcpp},
171 | }
172 | @Manual{R-readr,
173 |   title = {readr: Read Rectangular Text Data},
174 |   author = {Hadley Wickham and Jim Hester and Romain Francois},
175 |   year = {2018},
176 |   note = {R package version 1.3.1},
177 |   url = {https://CRAN.R-project.org/package=readr},
178 | }
179 | @Manual{R-reticulate,
180 |   title = {reticulate: Interface to 'Python'},
181 |   author = {Kevin Ushey and JJ Allaire and Yuan Tang},
182 |   year = {2020},
183 |   note = {R package version 1.15},
184 |   url = {https://github.com/rstudio/reticulate},
185 | }
186 | @Manual{R-rio,
187 |   title = {rio: A Swiss-Army Knife for Data I/O},
188 |   author = {Chung-hong Chan and Thomas J. Leeper},
189 |   year = {2018},
190 |   note = {R package version 0.5.16},
191 |   url = {https://CRAN.R-project.org/package=rio},
192 | }
193 | @Manual{R-RSQLite,
194 |   title = {RSQLite: 'SQLite' Interface for R},
195 |   author = {Kirill Müller and Hadley Wickham and David A. James and Seth Falcon},
196 |   year = {2020},
197 |   note = {https://rsqlite.r-dbi.org, https://github.com/r-dbi/RSQLite},
198 | }
199 | @Manual{R-swirl,
200 |   title = {swirl: Learn R, in R},
201 |   author = {Sean Kross and Nick Carchedi and Bill Bauer and Gina Grdina},
202 |   year = {2020},
203 |   note = {R package version 2.4.5},
204 |   url = {http://swirlstats.com},
205 | }
206 | @Manual{R-tibble,
207 |   title = {tibble: Simple Data Frames},
208 |   author = {Kirill Müller and Hadley Wickham},
209 |   year = {2020},
210 |   note = {https://tibble.tidyverse.org/, https://github.com/tidyverse/tibble},
211 | }
212 | @Manual{R-tidyr,
213 |   title = {tidyr: Tidy Messy Data},
214 |   author = {Hadley Wickham and Lionel Henry},
215 |   year = {2020},
216 |   note = {https://tidyr.tidyverse.org, https://github.com/tidyverse/tidyr},
217 | }
218 | @Book{bookdown2016,
219 |   title = {bookdown: Authoring Books and Technical Documents with {R} Markdown},
220 |   author = {Yihui Xie},
221 |   publisher = {Chapman and Hall/CRC},
222 |   address = {Boca Raton, Florida},
223 |   year = {2016},
224 |   note = {ISBN 978-1138700109},
225 |   url = {https://github.com/rstudio/bookdown},
226 | }
227 | @Article{ggmap2013,
228 |   author = {David Kahle and Hadley Wickham},
229 |   title = {ggmap: Spatial Visualization with ggplot2},
230 |   journal = {The R Journal},
231 |   year = {2013},
232 |   volume = {5},
233 |   number = {1},
234 |   pages = {144--161},
235 |   url = {https://journal.r-project.org/archive/2013-1/kahle-wickham.pdf},
236 | }
237 | @Book{ggplot22016,
238 |   author = {Hadley Wickham},
239 |   title = {ggplot2: Elegant Graphics for Data Analysis},
240 |   publisher = {Springer-Verlag New York},
241 |   year = {2016},
242 |   isbn = {978-3-319-24277-4},
243 |   url = {https://ggplot2.tidyverse.org},
244 | }
245 | @Book{knitr2015,
246 |   title = {Dynamic Documents with {R} and knitr},
247 |   author = {Yihui Xie},
248 |   publisher = {Chapman and Hall/CRC},
249 |   address = {Boca Raton, Florida},
250 |   year = {2015},
251 |   edition = {2nd},
252 |   note = {ISBN 978-1498716963},
253 |   url = {https://yihui.org/knitr/},
254 | }
255 | @InCollection{knitr2014,
256 |   booktitle = {Implementing Reproducible Computational Research},
257 |   editor = {Victoria Stodden and Friedrich Leisch and Roger D. Peng},
258 |   title = {knitr: A Comprehensive Tool for Reproducible Research in {R}},
259 |   author = {Yihui Xie},
260 |   publisher = {Chapman and Hall/CRC},
261 |   year = {2014},
262 |   note = {ISBN 978-1466561595},
263 |   url = {http://www.crcpress.com/product/isbn/9781466561595},
264 | }
265 | @Article{lubridate2011,
266 |   title = {Dates and Times Made Easy with {lubridate}},
267 |   author = {Garrett Grolemund and Hadley Wickham},
268 |   journal = {Journal of Statistical Software},
269 |   year = {2011},
270 |   volume = {40},
271 |   number = {3},
272 |   pages = {1--25},
273 |   url = {http://www.jstatsoft.org/v40/i03/},
274 | }
275 | @Article{Rcpp2011,
276 |   title = {{Rcpp}: Seamless {R} and {C++} Integration},
277 |   author = {Dirk Eddelbuettel and Romain Fran\c{c}ois},
278 |   journal = {Journal of Statistical Software},
279 |   year = {2011},
280 |   volume = {40},
281 |   number = {8},
282 |   pages = {1--18},
283 |   url = {http://www.jstatsoft.org/v40/i08/},
284 |   doi = {10.18637/jss.v040.i08},
285 | }
286 | @Book{Rcpp2013,
287 |   title = {Seamless {R} and {C++} Integration with {Rcpp}},
288 |   author = {Dirk Eddelbuettel},
289 |   publisher = {Springer},
290 |   address = {New York},
291 |   year = {2013},
292 |   note = {ISBN 978-1-4614-6867-7},
293 |   doi = {10.1007/978-1-4614-6868-4},
294 | }
295 | @Article{Rcpp2017,
296 |   title = {{Extending 	extit{R} with 	extit{C++}: A Brief Introduction to 	extit{Rcpp}}},
297 |   author = {Dirk Eddelbuettel and James Joseph Balamuta},
298 |   journal = {PeerJ Preprints},
299 |   year = {2017},
300 |   month = {aug},
301 |   volume = {5},
302 |   pages = {e3188v1},
303 |   issn = {2167-9843},
304 |   url = {https://doi.org/10.7287/peerj.preprints.3188v1},
305 |   doi = {10.7287/peerj.preprints.3188v1},
306 | }
307 | 


--------------------------------------------------------------------------------
/preamble.tex:
--------------------------------------------------------------------------------
 1 | \usepackage{booktabs}
 2 | \usepackage{longtable}
 3 | \usepackage{framed,color}
 4 | \definecolor{shadecolor}{RGB}{248,248,248}
 5 | 
 6 | \ifxetex
 7 |   \usepackage{letltxmacro}
 8 |   \setlength{\XeTeXLinkMargin}{1pt}
 9 |   \LetLtxMacro\SavedIncludeGraphics\includegraphics
10 |   \def\includegraphics#1#{% #1 catches optional stuff (star/opt. arg.)
11 |     \IncludeGraphicsAux{#1}%
12 |   }%
13 |   \newcommand*{\IncludeGraphicsAux}[2]{%
14 |     \XeTeXLinkBox{%
15 |       \SavedIncludeGraphics#1{#2}%
16 |     }%
17 |   }%
18 | \fi
19 | 
20 | \newenvironment{rmdblock}[1]
21 |   {\begin{shaded*}
22 |   \begin{itemize}
23 |   \renewcommand{\labelitemi}{
24 |     \raisebox{-.7\height}[0pt][0pt]{
25 |       {\setkeys{Gin}{width=3em,keepaspectratio}\includegraphics{images/#1}}
26 |     }
27 |   }
28 |   \item
29 |   }
30 |   {
31 |   \end{itemize}
32 |   \end{shaded*}
33 |   }
34 | \newenvironment{rmdnote}
35 |   {\begin{rmdblock}{note}}
36 |   {\end{rmdblock}}
37 | \newenvironment{rmdtip}
38 |   {\begin{rmdblock}{tip}}
39 |   {\end{rmdblock}}
40 | \newenvironment{rmdwarning}
41 |   {\begin{rmdblock}{warning}}
42 |   {\end{rmdblock}}


--------------------------------------------------------------------------------
/refs.bib:
--------------------------------------------------------------------------------
  1 |  
  2 | @book{berkun2005art,
  3 |   title =        {The art of project management},
  4 |   author =       {Berkun, Scott},
  5 |   year =         {2005},
  6 |   publisher =    {O'Reilly Media}
  7 | }
  8 | 
  9 | @book{Braun2007,
 10 |   title =        {A first course in statistical programming with R},
 11 |   author =       {Braun, John and Murdoch, Duncan J},
 12 |   volume =       {25},
 13 |   year =         {2007},
 14 |   publisher =    {Cambridge University Press Cambridge}
 15 | }
 16 | 
 17 | @book{Burns2011,
 18 |   title =        {The R inferno},
 19 |   author =       {Burns, Patrick},
 20 |   year =         {2011},
 21 |   publisher =    {Lulu.com}
 22 | }
 23 | 
 24 | @article{Codd1979,
 25 |   abstract =     {During the last three or four years several investigators have
 26 |                   been exploring “semantic models” for formatted databases. The
 27 |                   intent is to capture (in a more or less formal way) more of
 28 |                   the meaning of the data so that database design can become
 29 |                   more systematic and the database system itself can behave more
 30 |                   intelligently. Two major thrusts are clear: (I) the search for
 31 |                   meaningful units that are as small as possible--atomic
 32 |                   semantics; (2) the search for meaningful units that are larger
 33 |                   than the usual n-ary relation-molecular semantics. In this
 34 |                   paper we propose extensions to the relational model to support
 35 |                   certain atomic and molecular semantics. These extensions
 36 |                   represent a synthesis of many ideas from the published work in
 37 |                   semantic modeling plus the introduction of new rules for
 38 |                   insertion, update, and deletion, as well as new algebraic
 39 |                   operators.},
 40 |   author =       {Codd, E. F.},
 41 |   doi =          {10.1145/320107.320109},
 42 |   issn =         {03625915},
 43 |   journal =      {ACM Transactions on Database Systems},
 44 |   keywords =     {22,29,3,33,34,39,4,70,73,and phrases,base,conceptual
 45 |                   model,conceptual schema,cr categories,data model,data
 46 |                   semantics,database,database schema,entity
 47 |                   model,knowledge,knowledge representation,relation,relational
 48 |                   database,relational model,relational schema,semantic model},
 49 |   number =       {4},
 50 |   pages =        {397--434},
 51 |   title =        {{Extending the database relational model to capture more
 52 |                   meaning}},
 53 |   url =          {http://sites.google.com/site/eherrerao902/p397.pdf},
 54 |   volume =       {4},
 55 |   year =         {1979}
 56 | }
 57 | 
 58 | @misc{eddelbuettel2010benchmarking,
 59 |   title =        {Benchmarking single-and multi-core BLAS implementations and
 60 |                   GPUs for use with R},
 61 |   author =       {Eddelbuettel, Dirk},
 62 |   year =         {2010},
 63 |   publisher =    {Mathematica}
 64 | }
 65 | 
 66 | @article{Eddelbuettel2011,
 67 |   title =        {Rcpp: Seamless R and C++ integration},
 68 |   author =       {Eddelbuettel, Dirk and Fran{\c{c}}ois, Romain},
 69 |   journal =      {Journal of Statistical Software},
 70 |   volume =       {40},
 71 |   number =       {8},
 72 |   pages =        {1--18},
 73 |   year =         {2011}
 74 | }
 75 | 
 76 | @book{Eddelbuettel2013,
 77 |   title =        {Seamless R and C++ integration with Rcpp},
 78 |   author =       {Eddelbuettel, Dirk},
 79 |   year =         {2013},
 80 |   publisher =    {Springer}
 81 | }
 82 | 
 83 | @article{Eddelbuettel_2011,
 84 |   title =        {Rcpp: Seamless R and C++ integration},
 85 |   volume =       40,
 86 |   number =       8,
 87 |   journal =      {Journal of Statistical Software},
 88 |   author =       {Eddelbuettel, Dirk and François, Romain and Allaire, J. and
 89 |                   Chambers, John and Bates, Douglas and Ushey, Kevin},
 90 |   year =         2011,
 91 |   pages =        {1–18}
 92 | }
 93 | 
 94 | @article{Goldberg1991,
 95 |   title =        {What every computer scientist should know about floating-point
 96 |                   arithmetic},
 97 |   author =       {Goldberg, David},
 98 |   journal =      {ACM Computing Surveys (CSUR)},
 99 |   volume =       {23},
100 |   number =       {1},
101 |   pages =        {5--48},
102 |   year =         {1991},
103 |   publisher =    {ACM}
104 | }
105 | 
106 | @book{grolemund_r_2016,
107 |   title =        {R for {{Data Science}}},
108 |   isbn =         {978-1-4919-1039-9},
109 |   pagetotal =    {250},
110 |   timestamp =    {2016-02-14T19:42:29Z},
111 |   langid =       {english},
112 |   publisher =    {{O'Reilly Media}},
113 |   author =       {Grolemund, G. and Wickham, H.},
114 |   year =         {2016}
115 | }
116 | 
117 | @article{kersten2011researcher,
118 |   title =        {The researcher’s guide to the data deluge: Querying a
119 |                   scientific database in just a few seconds},
120 |   author =       {Kersten, Martin L and Idreos, Stratos and Manegold, Stefan and
121 |                   Liarou, Erietta and others},
122 |   journal =      {PVLDB Challenges and Visions},
123 |   volume =       {3},
124 |   year =         {2011}
125 | }
126 | 
127 | @article{lovelace1842translator,
128 |   title =        {Translators notes to an article on Babbages Analytical
129 |                   Engine},
130 |   author =       {Lovelace, Ada Countess},
131 |   journal =      {Scientific Memoirs},
132 |   volume =       {3},
133 |   pages =        {691--731},
134 |   year =         {1842}
135 | }
136 | @book{mccallum2011,
137 |   title={Parallel R},
138 |   author={McCallum, Ethan and Weston, Stephen},
139 |   year={2011},
140 |   publisher={{O'Reilly Media}}
141 | }
142 | @book{Mcconnell2004,
143 |   title =        {Code complete},
144 |   author =       {McConnell, Steve},
145 |   year =         {2004},
146 |   publisher =    {Pearson Education}
147 | }
148 | 
149 | @article{Miller1992,
150 |   title={Algorithm AS 274: Least squares routines to supplement those of Gentleman},
151 |   author={Miller, Alan J},
152 |   journal={Applied Statistics},
153 |   pages={458--478},
154 |   year={1992},
155 |   publisher={JSTOR}
156 | }
157 | 
158 | @article{PMBoK2000,
159 |   title =        {Guide to the project Management body of knowledge},
160 |   journal =      {Project Management Institute, Pennsylvania USA},
161 |   author =       {PMBoK, A},
162 |   year =         {2000}
163 | }
164 | 
165 | @article{sekhon2006art,
166 |   title =        {The Art of Benchmarking: Evaluating the Performance of R on
167 |                   Linux and OS X},
168 |   author =       {Sekhon, Jasjeet S},
169 |   journal =      {The Political Methodologist},
170 |   volume =       {14},
171 |   number =       {1},
172 |   pages =        {15--19},
173 |   year =         {2006}
174 | }
175 | 
176 | @book{Spector_2008,
177 |   title =        {Data manipulation with R},
178 |   ISBN =         {0-387-74730-3},
179 |   publisher =    {Springer Science & Business Media},
180 |   author =       {Spector, Phil},
181 |   year =         {2008}
182 | }
183 | 
184 | @article{visser_speeding_2015,
185 |   title =        {Speeding {{Up Ecological}} and {{Evolutionary Computations}}
186 |                   in {{R}}; {{Essentials}} of {{High Performance Computing}} for
187 |                   {{Biologists}}},
188 |   volume =       {11},
189 |   issn =         {1553-7358},
190 |   url =          {http://dx.plos.org/10.1371/journal.pcbi.1004140},
191 |   doi =          {10.1371/journal.pcbi.1004140},
192 |   timestamp =    {2016-02-06T22:24:02Z},
193 |   langid =       {english},
194 |   number =       {3},
195 |   journaltitle = {PLOS Computational Biology},
196 |   shortjournal = {PLOS Comput. Biol.},
197 |   author =       {Visser, Marco D. and McMahon, Sean M. and Merow, Cory and
198 |                   Dixon, Philip M. and Record, Sydne and Jongejans, Eelke},
199 |   editor =       {Ouellette, Francis},
200 |   urldate =      {2016-02-06},
201 |   date =         {2015-03-26},
202 |   pages =        {e1004140},
203 | }
204 | 
205 | @book{Wickham2014,
206 |   title =        {Advanced R},
207 |   author =       {Wickham, Hadley},
208 |   year =         {2014},
209 |   publisher =    {CRC Press}
210 | }
211 | 
212 | @article{Wickham_2014,
213 |   title =        {Tidy data},
214 |   volume =       {14},
215 |   ISSN =         {1548-7660},
216 |   number =       {5},
217 |   journal =      {The Journal of Statistical Software},
218 |   author =       {Wickham, Hadley},
219 |   year =         {2014}
220 | }
221 | 
222 | @book{Wickham_2015,
223 |   title =        {R Packages},
224 |   ISBN =         {978-1-4919-1056-6},
225 |   abstractNote = {Turn your R code into packages that others can easily download
226 |                   and use. This practical book shows you how to bundle reusable
227 |                   R functions, sample data, and documentation together by
228 |                   applying author Hadley Wickham’s package development
229 |                   philosophy. In the process, you’ll work with devtools,
230 |                   roxygen, and testthat, a set of R packages that automate
231 |                   common development tasks. Devtools encapsulates best practices
232 |                   that Hadley has learned from years of working with this
233 |                   programming language.Ideal for developers, data scientists,
234 |                   and programmers with various backgrounds, this book starts you
235 |                   with the basics and shows you how to improve your package
236 |                   writing over time. You’ll learn to focus on what you want your
237 |                   package to do, rather than think about package structure.Learn
238 |                   about the most useful components of an R package, including
239 |                   vignettes and unit testsAutomate anything you can, taking
240 |                   advantage of the years of development experience embodied in
241 |                   devtoolsGet tips on good style, such as organizing functions
242 |                   into filesStreamline your development process with
243 |                   devtoolsLearn the best way to submit your package to the
244 |                   Comprehensive R Archive Network (CRAN)Learn from a
245 |                   well-respected member of the R community who created 30 R
246 |                   packages, including ggplot2, dplyr, and tidyr},
247 |   publisher =    {{O'Reilly Media}},
248 |   author =       {Wickham, Hadley},
249 |   year =         {2015},
250 |   month =        {Mar}
251 | }
252 | 
253 | @book{xie2015dynamic,
254 |   title =        {Dynamic Documents with R and knitr},
255 |   author =       {Xie, Yihui},
256 |   volume =       {29},
257 |   year =         {2015},
258 |   publisher =    {CRC Press}
259 | }
260 | 
261 | 
262 | @book{cotton_learning_2013,
263 |   title = {Learning {{R}}},
264 |   isbn = {978-1-4493-5719-1},
265 |   abstract = {Learn how to perform data analysis with the R language and software environment, even if you have little or no programming experience. With the tutorials in this hands-on guide, you'll learn how to use the essential R tools you need to know to analyze data, including data types and programming concepts.The second half of Learning R shows you real data analysis in action by covering everything from importing data to publishing your results. Each chapter in the book includes a quiz on what you've learned, and concludes with exercises, most of which involve writing R code.Write a simple R program, and discover what the language can doUse data types such as vectors, arrays, lists, data frames, and stringsExecute code conditionally or repeatedly with branches and loopsApply R add-on packages, and package your own work for othersLearn how to clean data you import from a variety of sourcesUnderstand data through visualization and summary statisticsUse statistical models to pass quantitative judgments about data and make predictionsLearn what to do when things go wrong while writing data analysis code},
266 |   language = {en},
267 |   timestamp = {2016-05-02T07:41:32Z},
268 |   publisher = {{O'Reilly Media}},
269 |   author = {Cotton, Richard},
270 |   month = sep,
271 |   year = {2013},
272 |   keywords = {Computers / Mathematical \& Statistical Software,Computers / Programming Languages / General,Mathematics / Probability \& Statistics / General}
273 | }
274 | 
275 | @book{chang2012r,
276 |   title={R graphics cookbook},
277 |   author={Chang, Winston},
278 |   year={2012},
279 |   publisher={{O'Reilly Media}}
280 | }
281 | 
282 | @article{jensen2011can,
283 |   title={Can worksite nutritional interventions improve productivity and firm profitability? A literature review},
284 |   author={Jensen, J{\o}rgen Dejg{\aa}rd},
285 |   journal={Perspectives in public health},
286 |   volume={131},
287 |   number={4},
288 |   pages={184--192},
289 |   year={2011},
290 |   publisher={SAGE Publications}
291 | }
292 | 
293 | @article{pereira2015impact,
294 |   title={The impact of onsite workplace health-enhancing physical activity interventions on worker productivity: a systematic review},
295 |   author={Pereira, Michelle Jessica and Coombes, Brooke Kaye and Comans, Tracy Anne and Johnston, Venerina},
296 |   journal={Occupational and environmental medicine},
297 |   volume={72},
298 |   number={6},
299 |   pages={401--412},
300 |   year={2015},
301 |   publisher={BMJ Publishing Group Ltd}
302 | }
303 | 
304 | @article{grant2013exploration,
305 |   title={An exploration of the psychological factors affecting remote e-worker's job effectiveness, well-being and work-life balance},
306 |   author={Grant, Christine A and Wallace, Louise M and Spurgeon, Peter C},
307 |   journal={Employee Relations},
308 |   volume={35},
309 |   number={5},
310 |   pages={527--546},
311 |   year={2013},
312 |   publisher={Emerald Group Publishing Limited}
313 | }
314 | 
315 | @book{janert2010data,
316 |   title={Data analysis with open source tools},
317 |   author={Janert, Philipp K},
318 |   year={2010},
319 |   publisher={"O'Reilly Media"}
320 | }
321 | 
322 | @article{kruchten2012technical,
323 |   title={Technical debt: from metaphor to theory and practice},
324 |   author={Kruchten, Philippe and Nord, Robert L and Ozkaya, Ipek},
325 |   journal={IEEE software},
326 |   number={6},
327 |   pages={18--21},
328 |   year={2012},
329 |   publisher={IEEE}
330 | }
331 | 
332 | @book{cotton_testing_2016,
333 | 	title = {Testing {R} {Code}},
334 | 	abstract = {The traditional user of R software is a statistician or data analyst, not a computer programmer. So they do not have the necessary skills to control the quality of their code. This book covers run-time testing (assertions) and development-time testing (unit tests) of R-code, along with tips for writ},
335 | 	urldate = {2016-06-05},
336 | 	author = {Cotton, Richard},
337 | 	month = dec,
338 | 	year = {2016}
339 | }
340 | 
341 | @book{lovelace_spatial_2016,
342 | 	title = {Spatial microsimulation with {R}},
343 | 	url = {https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/9781498711548},
344 | 	publisher = {CRC Press},
345 | 	author = {Lovelace, Robin and Dumont, Morgane},
346 | 	year = {2016}
347 | }
348 | 
349 | @article{JSSv071i02,
350 |    author = {Dirk Eddelbuettel and Murray Stokely and Jeroen Ooms},
351 |    title = {RProtoBuf: Efficient Cross-Language Data Serialization in R},
352 |    journal = {Journal of Statistical Software},
353 |    volume = {71},
354 |    number = {1},
355 |    year = {2016},
356 |    keywords = {R; Rcpp; Protocol Buffers; serialization; cross-platform},
357 |    abstract = {Modern data collection and analysis pipelines often involve a sophisticated mix of applications written in general purpose and specialized programming languages. Many formats commonly used to import and export data between different programs or systems, such as CSV or JSON, are verbose, inefficient, not type-safe, or tied to a specific programming language. Protocol Buffers are a popular method of serializing structured data between applications  -  while remaining independent of programming languages or operating systems. They offer a unique combination of features, performance, and maturity that seems particularly well suited for data-driven applications and numerical computing. The RProtoBuf package provides a complete interface to Protocol Buffers from the R environment for statistical computing. This paper outlines the general class of data serialization requirements for statistical computing, describes the implementation of the RProtoBuf package, and illustrates its use with example applications in large-scale data collection pipelines and web services.},
358 |    issn = {1548-7660},
359 |    pages = {1--24},
360 |    doi = {10.18637/jss.v071.i02},
361 |    url = {https://www.jstatsoft.org/index.php/jss/article/view/v071i02}
362 | }
363 | @article{ba_aa_ath_state_2012,
364 | 	title = {The state of naming conventions in {R}},
365 | 	volume = {4},
366 | 	url = {https://journal.r-project.org/archive/2012-2/RJournal_2012-2_Baaaath.pdf},
367 | 	number = {2},
368 | 	urldate = {2016-07-15},
369 | 	journal = {The R Journal},
370 | 	author = {B{\aa}{\aa}th, Rasmus},
371 | 	year = {2012},
372 | 	pages = {74--75},
373 | 	file = {The State of Naming Conventions in R - RJournal_2012-2_Baaaath.pdf:/home/robin/.zotero/zotero/433bm2oj.default/zotero/storage/PR6UEE9W/RJournal_2012-2_Baaaath.pdf:application/pdf}
374 | }
375 | @article{chambers_object-oriented_2014,
376 | 	title = {Object-{Oriented} {Programming}, {Functional} {Programming} and {R}},
377 | 	volume = {29},
378 | 	issn = {0883-4237},
379 | 	url = {http://projecteuclid.org/euclid.ss/1408368569},
380 | 	doi = {10.1214/13-STS452},
381 | 	language = {en},
382 | 	number = {2},
383 | 	urldate = {2016-07-24},
384 | 	journal = {Statistical Science},
385 | 	author = {Chambers, John M.},
386 | 	month = may,
387 | 	year = {2014},
388 | 	pages = {167--180},
389 | 	file = {Object-Oriented Programming, Functional Programming and R - 1409.3531.pdf:/home/robin/.zotero/zotero/433bm2oj.default/zotero/storage/6XXGFDQA/1409.3531.pdf:application/pdf}
390 | }
391 | @book{peng_r_2014,
392 | 	title = {R {Programming} for {Data} {Science}},
393 | 	url = {https://leanpub.com/rprogramming},
394 | 	abstract = {Learn the fundamentals for R programming and gain the tools needed for doing data science.},
395 | 	urldate = {2016-09-13},
396 | 	publisher = {Leanpub},
397 | 	author = {Peng, Roger},
398 | 	month = feb,
399 | 	year = {2014},
400 | 	file = {Snapshot:/home/robin/.mozilla/firefox/k0kvn64l.default/zotero/storage/GFJKM45W/rprogramming.html:text/html}
401 | }
402 | @article{sanchez_handling_2013,
403 | 	title = {Handling and processing strings in {R}},
404 | 	url = {http://www.academia.edu/download/36290733/Handling_and_Processing_Strings_in_R.pdf},
405 | 	urldate = {2016-09-19},
406 | 	journal = {Trowchez Editions},
407 | 	author = {Sanchez, Gaston},
408 | 	year = {2013},
409 | 	file = {Handling_and_Processing_Strings_in_R.pdf:/home/robin/.mozilla/firefox/k0kvn64l.default/zotero/storage/ZM3ESV4F/Handling_and_Processing_Strings_in_R.pdf:application/pdf}
410 | }
411 | 
412 | @article{wickham2010stringr,
413 |   title={stringr: modern, consistent string processing},
414 |   author={Wickham, Hadley},
415 |   journal={The R Journal},
416 |   volume={2},
417 |   number={2},
418 |   pages={38--40},
419 |   year={2010}
420 | }
421 | 
422 | @article{team2016installation,
423 |   title={R installation and Administration},
424 |   author={{R Core Team}},
425 |   journal={R Foundation for Statistical Computing},
426 | 	url = {https://cran.r-project.org/doc/manuals/r-release/R-admin.html},
427 |   year={2016}
428 | }
429 | 
430 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | efficientR.so
2 | *.o
3 | RcppExports.cpp
4 | 


--------------------------------------------------------------------------------
/src/mean_cpp.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | double mean_cpp(NumericVector x){
 6 |   int i;
 7 |   int n = x.size();
 8 |   double mean = 0;
 9 | 
10 |   for(i=0; i<n; i++){
11 |     mean = mean + x[i]/n;
12 |   }
13 |   return mean;
14 | }
15 | 
16 | // [[Rcpp::export]]
17 | NumericVector res_cpp(NumericVector x, NumericVector y){
18 |   int i;
19 |   int n = x.size();
20 |   NumericVector residuals(n);
21 |   for(i=0; i<n; i++){
22 |     residuals[i] = pow(x[i] - y[i], 2);
23 |   }
24 |   return residuals;
25 | }
26 | 
27 | // [[Rcpp::export]]
28 | NumericVector res_sugar(NumericVector x, NumericVector y){
29 |   return pow(x-y, 2);
30 | }
31 | 


--------------------------------------------------------------------------------
/src/precision.cpp:
--------------------------------------------------------------------------------
 1 | /* Not used */
 2 | #include <Rcpp.h>
 3 | using namespace Rcpp;
 4 | 
 5 | // [[Rcpp::export]]
 6 | float test1() {
 7 |   float a = 1.0 / 81;
 8 |   float b = 0;
 9 |   for (int i = 0; i < 729; ++ i)
10 |     b = b + a;
11 |   return b;
12 | }
13 | 
14 | // [[Rcpp::export]]
15 | double test2() {
16 |   double a = 1.0 / 81;
17 |   double b = 0;
18 |   for (int i = 0; i < 729; ++ i)
19 |     b += a;
20 |     return b;
21 | }


--------------------------------------------------------------------------------