├── .devcontainer └── devcontainer.json ├── .gitignore ├── Dockerfile ├── README.md ├── notebook.ipynb ├── requirements.txt ├── setup-fte.R └── setup.R /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/python 3 | { 4 | "name": "Python 3", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 7 | "customizations": { 8 | "vscode": { 9 | "extensions": [ 10 | "GitHub.copilot", 11 | "ms-python.python", 12 | "ms-python.vscode-pylance", 13 | "cweijan.vscode-office", 14 | "ms-toolsai.jupyter" 15 | ] 16 | } 17 | }, 18 | 19 | // Features to add to the dev container. More info: https://containers.dev/features. 20 | // "features": {}, 21 | 22 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 23 | // "forwardPorts": [], 24 | 25 | // Use 'onCreateCommand' to run commands when the container is created. 26 | "onCreateCommand": "sh setup.sh", 27 | 28 | // Configure tool-specific properties. 29 | // "customizations": {}, 30 | 31 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 32 | // "remoteUser": "root", 33 | 34 | // Minimum requriements for machine 35 | "hostRequirements": { 36 | "cpus": 4 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .DS_Store 3 | .ipynb_checkpoints/ 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:bullseye 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | COPY . . 7 | 8 | RUN apt-get update 9 | 10 | RUN apt-get install -y r-base \ 11 | libharfbuzz-dev \ 12 | libfribidi-dev \ 13 | libfreetype6-dev \ 14 | libpng-dev \ 15 | libtiff5-dev \ 16 | libjpeg-dev 17 | 18 | RUN Rscript setup.R 19 | 20 | RUN pip install -r requirements.txt 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Quickstart 2 | 3 | A Jupyter notebook that mixes Python3 and R. 4 | 5 | ## Example Notebook 6 | 7 | https://nbviewer.jupyter.org/github/dmil/jupyter-quickstart/blob/master/notebook.ipynb 8 | 9 | ## Requirements 10 | 11 | * Homebrew 12 | * Python 3 13 | 14 | ## Quickstart 15 | 16 | 1. Install some packages with [HomeBrew](https://brew.sh/) 17 | 18 | ```bash 19 | brew install r 20 | brew install libgit2 21 | ``` 22 | 23 | 2. Clone the repo and `cd` into the folder you cloned 24 | 25 | 3. Install R and python packages 26 | 27 | ```bash 28 | RScript setup.R 29 | pip3 install -r requirements.txt 30 | ``` 31 | 32 | 4. Open the jupyter notebook 33 | 34 | ```bash 35 | jupyter notebook notebook.ipynb 36 | ``` 37 | 38 | ## Additional steps for FiveThirtyEight Writers 39 | 40 | See additional setup instructions inside `setup-fte.R`. 41 | -------------------------------------------------------------------------------- /notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python and R\n", 8 | "\n", 9 | "This setup allows you to use *Python* and *R* in the same notebook.\n", 10 | "\n", 11 | "To set up a similar notebook, see quickstart instructions here:\n", 12 | "\n", 13 | "https://github.com/dmil/jupyter-quickstart\n", 14 | "\n", 15 | "Some thoughts on why I like this setup and how I use it at the [end](notebook.ipynb#Thoughts) of this notebook." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%load_ext rpy2.ipython\n", 25 | "%load_ext autoreload\n", 26 | "%autoreload 2\n", 27 | "\n", 28 | "%matplotlib inline \n", 29 | "from matplotlib import rcParams\n", 30 | "rcParams['figure.figsize'] = (16, 100)\n", 31 | "\n", 32 | "import warnings\n", 33 | "from rpy2.rinterface import RRuntimeWarning\n", 34 | "warnings.filterwarnings(\"ignore\") # Ignore all warnings\n", 35 | "# warnings.filterwarnings(\"ignore\", category=RRuntimeWarning) # Show some warnings\n", 36 | "\n", 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "from IPython.display import display, HTML" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "application/javascript": [ 51 | "// Disable auto-scrolling\n", 52 | "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", 53 | " return false;\n", 54 | "}\n" 55 | ], 56 | "text/plain": [ 57 | "" 58 | ] 59 | }, 60 | "metadata": {}, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "%%javascript\n", 66 | "// Disable auto-scrolling\n", 67 | "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", 68 | " return false;\n", 69 | "}" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "This is a Python notebook, but below is an R cell. The `%%R` at the top of the cell indicates that the code in this cell will be R code." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stderr", 86 | "output_type": "stream", 87 | "text": [ 88 | "R[write to console]: Loading required package: tidyverse\n", 89 | "\n" 90 | ] 91 | }, 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──\n", 97 | "✔ ggplot2 3.4.1 ✔ purrr 1.0.1\n", 98 | "✔ tibble 3.1.8 ✔ dplyr 1.1.0\n", 99 | "✔ tidyr 1.3.0 ✔ stringr 1.5.0\n", 100 | "✔ readr 2.1.3 ✔ forcats 0.5.1\n", 101 | "── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──\n", 102 | "✖ dplyr::filter() masks stats::filter()\n", 103 | "✖ dplyr::lag() masks stats::lag()\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "%%R\n", 109 | "\n", 110 | "# My commonly used R imports\n", 111 | "\n", 112 | "require('tidyverse')" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Below is a Python cell, there is nothing at the top of the cell because it is python by default." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 4, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/html": [ 130 | "
\n", 131 | "\n", 144 | "\n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | "
AB
017
11041
2361
3797
48040
\n", 180 | "
" 181 | ], 182 | "text/plain": [ 183 | " A B\n", 184 | "0 1 7\n", 185 | "1 10 41\n", 186 | "2 36 1\n", 187 | "3 79 7\n", 188 | "4 80 40" 189 | ] 190 | }, 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "# EXAMPLE PYTHON CELL\n", 198 | "\n", 199 | "# create a dataframe of random numbers with two columns, A and B\n", 200 | "df = pd.DataFrame(\n", 201 | " np.random.randint(0,100,size=(100, 2)), columns=list('AB'))\n", 202 | "\n", 203 | "# display first 5 rows\n", 204 | "df.head(5)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "Now another R cell, `%%R -i df` indicates that this is an R cell and imports the dataframe from Python. This is one way to pass data back and forth from R to Python. Another is to write a CSV file in Python and read it in R (or vice versa)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "image/png": "\n" 222 | }, 223 | "metadata": {}, 224 | "output_type": "display_data" 225 | } 226 | ], 227 | "source": [ 228 | "%%R -i df\n", 229 | "\n", 230 | "# Plotting using R\n", 231 | "plt <- ggplot(df) +\n", 232 | " geom_point(aes(A,B))\n", 233 | "\n", 234 | "plt" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "## Thoughts\n", 242 | "\n", 243 | "#### Why I like this setup\n", 244 | "- It lets me do some things in Python (hitting APIs, scraping, etc)\n", 245 | "- It lets do other things in R (statistical calculations, visualization, etc)\n", 246 | "\n", 247 | "#### How I use this setup\n", 248 | "- If things start to get complicated, I move Python functions into `.py` files in the repo and then call those functions from the notebook. This keeps the notebook clean.\n", 249 | "- While you can read the pandas dataframe directly into R by putting `%%R -i df` at the top of the cell, I prefer to save human-readable CSV files at the end of the python cells (usually _wide_ format), and then read those CSV files into the R cells and convert it to whatever format I need to visualize it (usually _long_ format). This allows me to:\n", 250 | " - send the CSV data files along with the article to my editor\n", 251 | " - file the R code + CSV files to the charts team and they can just run it and modify as needed\n", 252 | "\n", 253 | "#### Other thoughts about ths setup\n", 254 | "- I have found it useful to learn R (or Python) while on deadline. When I was learning R it allowed me to switch back to Python if I was stuck on a step and the deadline was approaching, and then switch back to R to finish up.\n", 255 | "- Storing code away in functions in separate files let me keep the notebooks very clean and readable and proved to be a good way to annotate my process for the quantitative edit.\n", 256 | "- One-click pipeline, I could quickly change a variable and re-run an analysis with new data or a different query.\n", 257 | "\n" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [] 266 | } 267 | ], 268 | "metadata": { 269 | "kernelspec": { 270 | "display_name": "Python 3 (ipykernel)", 271 | "language": "python", 272 | "name": "python3" 273 | }, 274 | "language_info": { 275 | "codemirror_mode": { 276 | "name": "ipython", 277 | "version": 3 278 | }, 279 | "file_extension": ".py", 280 | "mimetype": "text/x-python", 281 | "name": "python", 282 | "nbconvert_exporter": "python", 283 | "pygments_lexer": "ipython3", 284 | "version": "3.9.7" 285 | } 286 | }, 287 | "nbformat": 4, 288 | "nbformat_minor": 4 289 | } 290 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | pandas 3 | scikit-learn 4 | rpy2 5 | tzlocal 6 | matplotlib 7 | -------------------------------------------------------------------------------- /setup-fte.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env RScript 2 | 3 | # Step 1: Make sure your computer has the FiveThirtyEight fonts installed 4 | # (ask someone on viz team for the fonts or find them inside our Google Drive) 5 | 6 | # Step 2: Make a GitHub personal access token 7 | # https://github.com/settings/tokens 8 | # make sure all boxes in the repo scope are checked. 9 | 10 | # Step 3: Insert the auth token below in place of **** and then run this script 11 | require('extrafont') 12 | require('devtools') 13 | install_github("fivethirtyeight/theme538", auth_token = "****") 14 | font_import(prompt=FALSE) 15 | -------------------------------------------------------------------------------- /setup.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env RScript 2 | 3 | # A function that installs packages 4 | installPackage <- function(pkg) { 5 | install.packages(pkg, repos='http://cran.us.r-project.org', verbose = TRUE) 6 | } 7 | 8 | # A list of packages to install, including tidyverse 9 | # (this includes ggplot2, dplyr, and several others...) 10 | # https://www.tidyverse.org/packages/ 11 | libs <- c("tidyverse", "extrafont", "Cairo", "devtools","gridExtra") 12 | 13 | # Apply the function above to the list of packages 14 | lapply(libs, installPackage) 15 | 16 | # Print successful completion message 17 | sprintf("Successfully loaded: %s", libs) 18 | --------------------------------------------------------------------------------