├── .Rprofile
├── .devcontainer
└── devcontainer.json
├── .editorconfig
├── .gitignore
├── .vscode
└── settings.json
├── LICENSE.md
├── README.md
├── _quarto.yml
├── babel.config.js
├── data
└── .gitignore
├── docs
├── .gitignore
├── _cleanup-knitr.qmd
├── _setup-data-1.qmd
├── _setup-knitr.qmd
├── indexing.qmd
├── info.qmd
├── intro.qmd
├── method_chaining.qmd
├── tidy.qmd
└── timeseries.qmd
├── docusaurus.config.js
├── package-lock.json
├── package.json
├── renv.lock
├── renv
├── .gitignore
└── activate.R
├── requirements.txt
├── sidebars.js
├── src
├── css
│ └── custom.css
└── pages
│ └── index.module.css
└── static
├── .nojekyll
└── img
├── favicon.ico
└── logo.svg
/.Rprofile:
--------------------------------------------------------------------------------
1 | source("renv/activate.R")
2 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "image": "ghcr.io/rocker-org/devcontainer/r-ver:4",
3 | "features": {
4 | "ghcr.io/rocker-org/devcontainer-features/quarto-cli": {
5 | "version": "1.5.57"
6 | },
7 | "ghcr.io/eitsupi/devcontainer-features/go-task": {},
8 | "ghcr.io/devcontainers/features/node": {},
9 | "ghcr.io/devcontainers/features/python": {},
10 | "ghcr.io/eitsupi/devcontainer-features/duckdb-cli": {},
11 | "ghcr.io/rocker-org/devcontainer-features/renv-cache": {}
12 | },
13 | "customizations": {
14 | "vscode": {
15 | "extensions": [
16 | "editorconfig.editorconfig",
17 | "ms-toolsai.jupyter",
18 | "prql-lang.prql-vscode"
19 | ]
20 | }
21 | },
22 | "updateContentCommand": {
23 | "r-packages": "R -q -e 'renv::restore()'",
24 | "python-packages": "python3 -m pip install jupyter jupyter-cache -r requirements.txt",
25 | "npm-packages": "npm install"
26 | },
27 | "containerEnv": {
28 | "NOT_CRAN": "true",
29 | // "RENV_CONFIG_PAK_ENABLED": "TRUE"
30 | "RENV_CONFIG_SANDBOX_ENABLED": "FALSE"
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 2
6 | end_of_line = lf
7 | charset = utf-8
8 | trim_trailing_whitespace = false
9 | insert_final_newline = true
10 |
11 | [*.json]
12 | indent_style = tab
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | /node_modules
3 |
4 | # Production
5 | /build
6 |
7 | # Generated files
8 | .docusaurus
9 | .cache-loader
10 |
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 |
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 |
22 | /.quarto/
23 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "jupyter.notebookFileRoot": "${workspaceFolder}",
3 | "python.formatting.provider": "black",
4 | "[r]": {
5 | "editor.tabSize": 2
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2023 querying-with-prql authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Quering with PRQL
2 |
3 | This is a book-style website built by Quarto and Docusaurus.
4 |
5 | To build, we need quarto cli, node.js, R, Python, and a lot of packages!
6 | Please check the [devcontainer.json](.devcontainer/devcontainer.json) file.
7 |
8 | Build by
9 |
10 | ```sh
11 | quarto render --cache-refresh
12 | ```
13 |
14 | And deploy by
15 |
16 | ```sh
17 | npm run deploy
18 | ```
19 |
20 | ## License
21 |
22 | Licensed under the MIT License.
23 |
--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
1 | project:
2 | type: docusaurus
3 | execute-dir: project
4 | render:
5 | - /docs/info.qmd
6 | - /docs/[!_]*.qmd
7 |
8 | format:
9 | docusaurus-md:
10 | df-print: kable
11 | code-line-numbers: true
12 |
--------------------------------------------------------------------------------
/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 |
--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # mdx files and figures will be generated by Quarto CLI
2 | *.mdx
3 | *_files
4 |
5 | *_cache
6 |
--------------------------------------------------------------------------------
/docs/_cleanup-knitr.qmd:
--------------------------------------------------------------------------------
1 |
2 | ```{r}
3 | #| include: false
4 | DBI::dbDisconnect(con, shutdown = TRUE)
5 | ```
6 |
--------------------------------------------------------------------------------
/docs/_setup-data-1.qmd:
--------------------------------------------------------------------------------
1 | ## Preparing Data
2 |
3 | {{< include _setup-knitr.qmd >}}
4 |
5 | ### Download
6 |
7 | Download the data to be analysis (zipped CSV file) and write the data to a Parquet file.
8 |
9 | This document uses R here, but we can do it in another language, or, manually download and unzip and
10 | create the Parquet file (with DuckDB CLI).
11 |
12 | :::{.panel-tabset}
13 |
14 | #### R
15 |
16 | ```{r}
17 | #| filename: R
18 | #| cache: false
19 | #| code-fold: true
20 | #| warning: false
21 | # Create "data" directory, download the zip file into the directory, and create a Parquet file.
22 | data_dir <- "data"
23 | dest <- file.path(data_dir, "flights.csv.zip")
24 | csv_name <- "On_Time_Reporting_Carrier_On_Time_Performance_(1987_present)_2022_1.csv"
25 | csv_path <- file.path(data_dir, csv_name)
26 | parquet_path <- file.path(data_dir, "flights.parquet")
27 |
28 | if (!fs::file_exists(parquet_path)) {
29 | if (!fs::file_exists(dest)) {
30 | fs::dir_create(data_dir)
31 | curl::curl_download(
32 | "https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_2022_1.zip",
33 | dest,
34 | handle = curl::new_handle(ssl_verifypeer = FALSE)
35 | )
36 | }
37 |
38 | unzip(dest, csv_name, exdir = data_dir)
39 | duckdb:::sql(glue::glue("COPY (FROM read_csv_auto('{csv_path}')) TO '{parquet_path}' (FORMAT PARQUET)"))
40 | }
41 | ```
42 |
43 | #### Shell
44 |
45 | This is a sample command to download the zipped CSV file and covert it to a Parquet file.
46 |
47 | :::{.callout-tip}
48 |
49 | On Windows, the `unzip` command is not available by default, so use something like
50 | `Expand-Archive -Path data/flights.csv.zip -DestinationPath data` in PowerShell insead.
51 |
52 | :::
53 |
54 | ```{.bash filename=Terminal}
55 | mkdir data
56 | curl -sL https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_2022_1.zip -o data/flights.csv.zip
57 | unzip -d data data/flights.csv.zip
58 | duckdb -c "COPY (FROM read_csv_auto('data/On_Time_Reporting_Carrier_On_Time_Performance_(1987_present)_2022_1.csv')) TO 'data/flights.parquet' (FORMAT PARQUET)"
59 | ```
60 |
61 | :::
62 |
63 | ### Load the Data
64 |
65 | After the Parquet file is ready,
66 | load it into DuckDB (in-memory) database table, R DataFrame, and Python polars.LazyFrame.
67 |
68 | :::{.panel-tabset}
69 |
70 | #### DuckDB
71 |
72 | ```{r}
73 | #| filename: R
74 | #| cache: false
75 | #| include: false
76 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
77 | ```
78 |
79 | ```{glue_sql}
80 | #| filename: SQL
81 | #| cache: false
82 | #| warning: false
83 | CREATE TABLE tab AS SELECT * FROM 'data/flights.parquet'
84 | ```
85 |
86 | ```{glue_sql}
87 | #| filename: SQL
88 | FROM tab LIMIT 5
89 | ```
90 |
91 | #### R DataFrame
92 |
93 | ```{r}
94 | #| filename: R
95 | #| cache: false
96 | #| output: false
97 | library(dplyr, warn.conflicts = FALSE)
98 |
99 | df <- duckdb:::sql("FROM 'data/flights.parquet'")
100 | ```
101 |
102 | ```{r}
103 | #| filename: R
104 | df |> head(5)
105 | ```
106 |
107 | #### Python polars.LazyFrame
108 |
109 | ```{python}
110 | #| filename: Python
111 | #| cache: false
112 | #| output: false
113 | import polars as pl
114 |
115 | lf = pl.scan_parquet("data/flights.parquet")
116 | ```
117 |
118 | ```{python}
119 | #| filename: Python
120 | lf.fetch(5)
121 | ```
122 |
123 | :::
124 |
--------------------------------------------------------------------------------
/docs/_setup-knitr.qmd:
--------------------------------------------------------------------------------
1 | ```{r}
2 | #| include: false
3 | #| cache: false
4 | library(knitr)
5 | library(glue)
6 | library(prqlr)
7 | library(reticulate)
8 |
9 | # Set SQL code blocks print option
10 | knitr::opts_knit$set(sql.print = \(x) paste(knitr::kable(x, format = "markdown"), collapse = "\n"))
11 |
12 | # readr package option
13 | options(readr.show_col_types = FALSE)
14 | ```
15 |
16 | :::{.callout-important}
17 |
18 | Since the PRQL and SQL results shown on this page are after being converted to R DataFrame via knitr,
19 | they have been converted from DuckDB types to R types.
20 | So `NULL` in DuckDB is shown as `NA`.
21 |
22 | :::
23 |
--------------------------------------------------------------------------------
/docs/indexing.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Selecting and Filtering
3 | description: Selecting columns and filtering rows from a table.
4 | engine: knitr
5 | knitr:
6 | opts_chunk:
7 | connection: con
8 | engine-opts:
9 | target: sql.duckdb
10 | use_glue: true
11 | execute:
12 | cache: true
13 | sidebar_position: 2
14 | ---
15 |
16 | :::{.callout-note}
17 |
18 | This page is based on the chapter ["Indexing (Or Lack Thereof)"](https://kevinheavey.github.io/modern-polars/indexing.html)
19 | of the Modern Polars book.
20 |
21 | :::
22 |
23 | {{< include _setup-data-1.qmd >}}
24 |
25 | ## Read the Data with PRQL
26 |
27 | Since PRQL cannot be used to create tables, data must be read from pre-created tables in a DB.
28 |
29 | But when PRQL is used on DuckDB, it is possible to treat Parquet and other files as tables,
30 | so that PRQL queries can be executed without creating tables.
31 |
32 | ```{prql}
33 | #| filename: PRQL
34 | from `{{parquet_path}}`
35 | take 5
36 | ```
37 |
38 | ## Selecting Columns and Slicing Rows
39 |
40 | For column selection, the syntax is much the same, but the syntax for row selection using position looks different.
41 |
42 | Note that PRQL, SQL, and R are 1-based, while Python is 0-based.
43 |
44 | :::{.panel-tabset}
45 |
46 | ### PRQL DuckDB
47 |
48 | ```{prql}
49 | #| filename: PRQL
50 | from tab
51 | select {Dest, Tail_Number}
52 | take 13..16
53 | ```
54 |
55 | ### SQL DuckDB
56 |
57 | ```{glue_sql}
58 | #| filename: SQL
59 | SELECT
60 | Dest,
61 | Tail_Number
62 | FROM tab
63 | LIMIT 4 OFFSET 12
64 | ```
65 |
66 | :::{.callout-tip}
67 |
68 | DuckDB allows SQL query starts with `FROM`,
69 | so we can also write the query as follows:
70 |
71 | ```{.sql filename="SQL"}
72 | FROM tab
73 | SELECT
74 | Dest,
75 | Tail_Number
76 | LIMIT 4 OFFSET 12
77 | ```
78 |
79 | :::
80 |
81 | ### dplyr R
82 |
83 | ```{r}
84 | #| filename: R
85 | df |>
86 | select(Dest, Tail_Number) |>
87 | slice(13:16)
88 | ```
89 |
90 | ### Python Polars
91 |
92 | ```{python}
93 | #| filename: Python
94 | lf.select("Dest", "Tail_Number").slice(12, 4).collect()
95 | ```
96 |
97 | :::
98 |
99 | ## Filtering Rows
100 |
101 | :::{.panel-tabset}
102 |
103 | ### PRQL DuckDB
104 |
105 | ```{prql}
106 | #| filename: PRQL
107 | from tab
108 | filter (IATA_CODE_Reporting_Airline | in ["AA", "DL"])
109 | take 5
110 | ```
111 |
112 | ### SQL DuckDB
113 |
114 | ```{glue_sql}
115 | #| filename: SQL
116 | FROM tab
117 | WHERE IATA_CODE_Reporting_Airline IN ('AA', 'DL')
118 | LIMIT 5
119 | ```
120 |
121 | ### dplyr R
122 |
123 | ```{r}
124 | #| filename: R
125 | df |>
126 | filter(IATA_CODE_Reporting_Airline %in% c("AA", "DL")) |>
127 | head(5)
128 | ```
129 |
130 | ### Python Polars
131 |
132 | ```{python}
133 | #| filename: Python
134 | lf.filter(pl.col("IATA_CODE_Reporting_Airline").is_in(["AA", "DL"])).head(5).collect()
135 | ```
136 |
137 | :::
138 |
139 | ## Assigning
140 |
141 | :::{.panel-tabset}
142 |
143 | ### PRQL DuckDB
144 |
145 | ```{prql}
146 | #| filename: PRQL
147 | let f = from [
148 | {a = 1, b = 10},
149 | {a = 2, b = 20},
150 | {a = 3, b = 30},
151 | {a = 4, b = 40},
152 | {a = 5, b = 50},
153 | ]
154 |
155 | from f
156 | select {
157 | a, # Note: can't use `derive` here https://github.com/PRQL/prql/issues/3130
158 | b = case [
159 | a <= 3 => b // 10,
160 | true => b
161 | ]
162 | }
163 | ```
164 |
165 | ### SQL DuckDB
166 |
167 | ```{glue_sql}
168 | #| filename: SQL
169 | WITH table_0 AS (
170 | SELECT
171 | 1 AS a,
172 | 10 AS b
173 | UNION
174 | ALL
175 | SELECT
176 | 2 AS a,
177 | 20 AS b
178 | UNION
179 | ALL
180 | SELECT
181 | 3 AS a,
182 | 30 AS b
183 | UNION
184 | ALL
185 | SELECT
186 | 4 AS a,
187 | 40 AS b
188 | UNION
189 | ALL
190 | SELECT
191 | 5 AS a,
192 | 50 AS b
193 | ),
194 |
195 | f AS (
196 | SELECT
197 | a,
198 | b
199 | FROM
200 | table_0 AS table_1
201 | )
202 |
203 | SELECT
204 | a,
205 | CASE
206 | WHEN a <= 3 THEN b // 10
207 | ELSE b
208 | END AS b
209 | FROM
210 | f
211 | ```
212 |
213 | ### dplyr R
214 |
215 | ```{r}
216 | #| filename: R
217 | f <- dplyr::tibble(a = 1:5, b = a * 10)
218 |
219 | f |>
220 | mutate(
221 | b = case_when(
222 | a <= 3 ~ b %/% 10,
223 | .default = b
224 | )
225 | )
226 | ```
227 |
228 | ### Python Polars
229 |
230 | ```{python}
231 | #| filename: Python
232 | f = pl.DataFrame({"a": list(range(1, 6)), "b": list(range(10, 60, 10))})
233 |
234 | f.with_columns(
235 | b=pl.when(pl.col("a") <= 3).then(pl.col("b") // 10).otherwise(pl.col("b"))
236 | )
237 | ```
238 |
239 | :::
240 |
241 | {{< include _cleanup-knitr.qmd >}}
242 |
--------------------------------------------------------------------------------
/docs/info.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | engine: knitr
3 | output-file: _info.mdx
4 | ---
5 |
6 | :::{.callout-important}
7 |
8 | The versions we are currently using are as follows:
9 |
10 | - PRQL: `r prqlr::prql_version()`
11 | - DuckDB: `r duckdb:::sql("select version()")[1, 1]`
12 |
13 | :::
14 |
--------------------------------------------------------------------------------
/docs/intro.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Introduction
3 | jupyter: python3
4 | sidebar_position: 1
5 | slug: /
6 | ---
7 |
8 | This document is an attempt to do some typical table data manipulation
9 | using [PRQL](https://prql-lang.org/) and [DuckDB](https://duckdb.org/).
10 |
11 | PRQL is a in development modern language for data manipulation, and can be compiled to SQL.
12 | DuckDB is a modern in-memory SQL OLAP ([very fast](https://duckdblabs.github.io/db-benchmark/))
13 | database management system.
14 |
15 | ## Motivation
16 |
17 | CLI, JavaScript, Python, R, etc., we can combine PRQL compiler and DuckDB in various places
18 | to manipulate data with PRQL.
19 |
20 | At the same time, new features are being added to DuckDB's SQL on a constant basis,
21 | and the syntax is becoming more friendly.
22 |
23 | So, I thought that by comparing PRQL and the latest DuckDB SQL (and other query libraries),
24 | we could clarify the missing features of the PRQL and highlight the latest features of DuckDB.
25 |
26 | ## Content and Credit
27 |
28 | The content of each data manipulation is based on
29 | the awesome [Modern Polars](https://kevinheavey.github.io/modern-polars/) book
30 | by [Kevin Heavey](https://github.com/kevinheavey).
31 | And furthermore its origin is [Modern Pandas](https://tomaugspurger.github.io/posts/modern-1-intro/)
32 | by [Tom Augsburger](https://github.com/TomAugspurger).
33 |
34 | Each query is side-by-side with that of the original
35 | [Python Polars](https://pola-rs.github.io/polars/py-polars/html/reference/) one
36 | and the equivalent operations in PRQL, DuckDB SQL, and [dplyr](https://dplyr.tidyverse.org/) (R).
37 |
38 | Since this document focuses on the differences between the grammars of the different languages
39 | and does not go into detail about the processing details,
40 | I suggest that you also check out the original excellent artcles and book.
41 |
42 | :::{.content-hidden}
43 |
44 | Since Quarto cannot mix knitr and jupyter engines in a single qmd file (also if using include shortcode),
45 | the knitr document wll output to a separate md file and imported here using the Docusaurus mdx functionality.
46 |
47 | :::
48 |
49 |
50 | ```{=mdx}
51 | import VesionInfo from './_info.mdx';
52 |
53 |
54 | ```
55 |
56 | ## Running the Code on IPython
57 |
58 | As mentioned above, PRQL and DuckDB can run on a variety of languages.
59 | This section describes, as an example, the use of IPython for easy interactive execution of PRQL, SQL, and Polars.
60 |
61 | ### Setup
62 |
63 | This command installs the necessary Python packages.
64 |
65 | ```{.bash filename="Terminal"}
66 | python -m pip install pyprql polars[pyarrow]
67 | ```
68 |
69 | Then, we can start `ipython`.
70 |
71 | Run `load_ext` magic to activate PRQL magic (from [pyprql](https://pyprql.readthedocs.io/))
72 | and SQL magic (from [jupysql](https://jupysql.ploomber.io/)).
73 |
74 | ```{python}
75 | #| filename: IPython
76 | #| output: false
77 | %load_ext pyprql.magic
78 | %load_ext sql
79 | ```
80 |
81 | ```{python}
82 | #| filename: IPython
83 | #| include: false
84 | %config SqlMagic.displaycon = False
85 | %config SqlMagic.feedback = False
86 | ```
87 |
88 | Use SQL magic connects to DuckDB in-memory database.
89 | Note that PRQL magic shares the connection with SQL magic.
90 |
91 | ```{python}
92 | #| filename: IPython
93 | %sql duckdb://
94 | ```
95 |
96 | To fix the SQL dialect used inside PRQL magic, we need to set the target with config magic.
97 | By setting `"sql.duckdb"` here, we can specify to the PRQL compiler that it should generate optimized SQL for DuckDB.
98 |
99 | ```{python}
100 | #| filename: IPython
101 | %config PrqlMagic.target = "sql.duckdb"
102 | ```
103 |
104 | All available targets can be found in the `prqlc.get_targets` function.
105 |
106 | ```{python}
107 | #| filename: IPython
108 | import prqlc
109 |
110 | print(prqlc.get_targets())
111 | ```
112 |
113 | ### Combine DuckDB and Polars
114 |
115 | By setting `autopolars` config, the results of PRQL and SQL executions can be converted to polars.DataFrame.
116 |
117 | ```{python}
118 | #| filename: IPython
119 | #| output: false
120 | %config PrqlMagic.autopolars = True
121 | %config SqlMagic.autopolars = True
122 | ```
123 |
124 | Also, since DuckDB can execute queries against polars.DataFrame and polars.LazyFrame etc.,
125 | these can be referenced directly from PRQL or SQL, as shown below.
126 |
127 | ```{python}
128 | #| filename: IPython
129 | import polars as pl
130 |
131 | lf = pl.LazyFrame({"a": list(range(1, 6))})
132 | ```
133 |
134 | :::{.callout-important}
135 |
136 | With JupySQL 0.10.14 and DuckDB 1.1, the following setting is needed to scan all frames in Python.
137 |
138 | ```{python}
139 | #| filename: IPython
140 | #| output: false
141 | %%sql
142 | set python_scan_all_frames=true
143 | ```
144 |
145 | :::
146 |
147 | :::{.panel-tabset}
148 |
149 | #### PRQL DuckDB
150 |
151 | ```{python}
152 | #| filename: IPython
153 | %%prql
154 | from lf
155 | derive b = a * 5
156 | take 3
157 | ```
158 |
159 | #### SQL DuckDB
160 |
161 | ```{python}
162 | #| filename: IPython
163 | %%sql
164 | SELECT
165 | a,
166 | a * 5 AS b
167 | FROM lf
168 | LIMIT 3
169 | ```
170 |
171 | #### Python Polars
172 |
173 | ```{python}
174 | #| filename: IPython
175 | lf.with_columns(b=pl.col("a") * 5).head(3).collect()
176 | ```
177 |
178 | :::
179 |
180 | ## License
181 |
182 | This website's content is licensed under the MIT license.
183 |
--------------------------------------------------------------------------------
/docs/method_chaining.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Define Functions
3 | description: |
4 | To eliminate the need to copy and paste the same code,
5 | it is convenient to make the process a function.
6 | engine: knitr
7 | knitr:
8 | opts_chunk:
9 | connection: con
10 | engine-opts:
11 | target: sql.duckdb
12 | use_glue: true
13 | execute:
14 | cache: true
15 | sidebar_position: 3
16 | ---
17 |
18 | :::{.callout-note}
19 |
20 | This page is based on the chapter ["Method Chaining"](https://kevinheavey.github.io/modern-polars/method_chaining.html)
21 | of the Modern Polars book.
22 |
23 | :::
24 |
25 | To eliminate the need to copy and paste the same code, it is convenient to make the process a function.
26 |
27 | PRQL can chain functions by `|` (pipe operator),
28 | DuckDB SQL can chain functions by `.` (DuckDB >= 0.8, [duckdb/duckdb#6725](https://github.com/duckdb/duckdb/pull/6725)),
29 | and R can chain functions by `|>` (pipe operator, R >= 4.1).
30 | They are defined so that the previous value in the chain becomes the first (DuckDB SQL and R) or
31 | the last (PRQL) parameter of the function.
32 |
33 | In Python, we can chain methods using `.`.
34 |
35 | {{< include _setup-data-1.qmd >}}
36 |
37 | ## Functions
38 |
39 | Define some functions and see if they work.
40 |
41 | :::{.callout-important}
42 |
43 | PRQL currently does not allow functions to be stored in a separate session or file,
44 | and they must always be defined with the main query.
45 | ([PRQL/prql#1803](https://github.com/PRQL/prql/issues/1803))
46 |
47 | :::
48 |
49 | ### Extract City Names
50 |
51 | PRQL does not currently have the ability to apply a function to multiple columns at once.
52 | ([PRQL/prql#2386](https://github.com/PRQL/prql/issues/2386))
53 |
54 | The others can apply a function to multiple columns at once,
55 | but DuckDB SQL differs in that the column names are updated and that behavior cannot be prevented.
56 |
57 | :::{.panel-tabset}
58 |
59 | #### PRQL DuckDB
60 |
61 | ```{r}
62 | #| echo: false
63 | .prql_func_extract_city_name <- r"-(let extract_city_name = col -> s"regexp_replace({col}, ',.*', '')")-"
64 | ```
65 |
66 | ```{prql}
67 | #| filename: PRQL
68 | {{.prql_func_extract_city_name}}
69 |
70 | from tab
71 | select {
72 | OriginCityName = extract_city_name OriginCityName,
73 | DestCityName = extract_city_name DestCityName
74 | }
75 | take 5
76 | ```
77 |
78 | #### SQL DuckDB
79 |
80 | ```{sql}
81 | #| filename: SQL
82 | #| cache: false
83 | CREATE MACRO extract_city_name(col) AS regexp_replace(col, ',.*', '')
84 | ```
85 |
86 | The function works as follows:
87 |
88 | ```{glue_sql}
89 | #| filename: SQL
90 | SELECT
91 | extract_city_name(COLUMNS(['OriginCityName', 'DestCityName']))
92 | FROM
93 | tab
94 | LIMIT 5
95 | ```
96 |
97 | #### dplyr R
98 |
99 | ```{r}
100 | #| filename: R
101 | #| cache: false
102 | extract_city_name <- function(col) stringr::str_remove(col, ",.*")
103 | ```
104 |
105 | The function works as follows:
106 |
107 | ```{r}
108 | #| filename: R
109 | df |>
110 | head(5) |>
111 | mutate(
112 | across(c(OriginCityName, DestCityName), extract_city_name),
113 | .keep = "none"
114 | )
115 | ```
116 |
117 | #### Python Polars
118 |
119 | ```{python}
120 | #| filename: Python
121 | #| cache: false
122 | def extract_city_name(cols: list[str]) -> pl.Expr:
123 | return pl.col(cols).str.replace(",.*", "")
124 | ```
125 |
126 | The function works as follows:
127 |
128 | ```{python}
129 | #| filename: Python
130 | lf.fetch(5).select(extract_city_name(["OriginCityName", "DestCityName"]))
131 | ```
132 |
133 | :::
134 |
135 | ### Timestamp Calculation
136 |
137 | PRQL and SQL can only define functions that return a single column.
138 |
139 | The column name (here `FlightDate`) cannot be used in the function definition of DuckDB SQL.
140 |
141 | :::{.panel-tabset}
142 |
143 | #### PRQL DuckDB
144 |
145 | ```{r}
146 | #| echo: false
147 | .prql_func_time_to_datetime <- r"(let time_to_datetime = string -> s"""
148 | FlightDate::TIMESTAMP +
149 | TRY_CAST(regexp_replace({string}, '^2400$', '0000').substr(1, 2).concat(' hours') AS INTERVAL) +
150 | TRY_CAST(regexp_replace({string}, '^2400$', '0000').substr(3, 2).concat(' minutes') AS INTERVAL)
151 | """)"
152 | ```
153 |
154 | ```{prql}
155 | #| filename: PRQL
156 | {{.prql_func_time_to_datetime}}
157 |
158 | from tab
159 | select {
160 | FlightDate,
161 | DepTimeOld = DepTime
162 | }
163 | derive {
164 | DepTime = (time_to_datetime DepTimeOld)
165 | }
166 | take 5
167 | ```
168 |
169 | #### SQL DuckDB
170 |
171 | ```{sql}
172 | #| filename: SQL
173 | #| cache: false
174 | CREATE MACRO time_to_datetime(date, string) AS
175 | date::TIMESTAMP +
176 | TRY_CAST(regexp_replace(string, '^2400$', '0000').substr(1, 2).concat(' hours') AS INTERVAL) +
177 | TRY_CAST(regexp_replace(string, '^2400$', '0000').substr(3, 2).concat(' minutes') AS INTERVAL)
178 | ```
179 |
180 | The function works as follows:
181 |
182 | ```{glue_sql}
183 | #| filename: SQL
184 | SELECT
185 | FlightDate,
186 | time_to_datetime(FlightDate, DepTime) AS DepTime,
187 | DepTime AS DepTimeOld
188 | FROM
189 | tab
190 | LIMIT 5
191 | ```
192 |
193 | #### dplyr R
194 |
195 | A function returns a single column.
196 |
197 | ```{r}
198 | #| filename: R
199 | #| cache: false
200 | time_col <- function(col) {
201 | col |>
202 | case_match(
203 | "2400" ~ "0000",
204 | .default = col
205 | ) |>
206 | (\(x) {
207 | (stringr::str_sub(x, 1, 2) |> lubridate::hours()) +
208 | (stringr::str_sub(x, 3, 4) |> lubridate::minutes())
209 | })()
210 | }
211 | ```
212 |
213 | A function returns a DataFrame.
214 |
215 | ```{r}
216 | #| filename: R
217 | #| cache: false
218 | time_to_datetime <- function(df, columns) {
219 | df |>
220 | mutate(across(all_of({{ columns }}), \(x) FlightDate + time_col(x)))
221 | }
222 | ```
223 |
224 | The function works as follows:
225 |
226 | ```{r}
227 | #| filename: R
228 | df |>
229 | head(5) |>
230 | select(FlightDate, DepTime) |>
231 | mutate(DepTimeOld = DepTime) |>
232 | time_to_datetime("DepTime")
233 | ```
234 |
235 | #### Python Polars
236 |
237 | A function returns a single `polars.Expr`.
238 |
239 | ```{python}
240 | #| filename: Python
241 | #| cache: false
242 | def time_col(col: str) -> pl.Expr:
243 | col_expr = pl.col(col)
244 | return (
245 | pl.when(col_expr == "2400")
246 | .then(pl.lit("0000"))
247 | .otherwise(col_expr)
248 | .str.strptime(pl.Time, "%H%M", strict=False)
249 | .alias(col)
250 | )
251 | ```
252 |
253 | A function returns a list of `polars.Expr`.
254 |
255 | ```{python}
256 | #| filename: Python
257 | #| cache: false
258 | def time_to_datetime(columns: list[str]) -> list[pl.Expr]:
259 | date_col = pl.col("FlightDate")
260 | return [date_col.dt.combine(time_col(col)).alias(col) for col in columns]
261 | ```
262 |
263 | The function works as follows:
264 |
265 | ```{python}
266 | #| filename: Python
267 | lf.fetch(5).select(["FlightDate", "DepTime"]).with_columns(
268 | DepTimeOld=pl.col("DepTime"), *time_to_datetime(["DepTime"])
269 | )
270 | ```
271 |
272 | :::
273 |
274 | ## Use Functions in the Query
275 |
276 | dplyr and Polars allow column names to be predefined as vectors or lists,
277 | which can then be referenced in the query.
278 |
279 | :::{.panel-tabset}
280 |
281 | ### PRQL DuckDB
282 |
283 | ```{prql}
284 | #| filename: PRQL
285 | {{.prql_func_extract_city_name}}
286 |
287 | {{.prql_func_time_to_datetime}}
288 |
289 | from tab
290 | select {
291 | Dest,
292 | Tail_Number,
293 | IATA_CODE_Reporting_Airline,
294 | CancellationCode,
295 | DepTime = time_to_datetime DepTime,
296 | ArrTime = time_to_datetime ArrTime,
297 | CRSArrTime = time_to_datetime CRSArrTime,
298 | CRSDepTime = time_to_datetime CRSDepTime,
299 | FlightDate,
300 | Flight_Number_Reporting_Airline,
301 | OriginCityName = extract_city_name OriginCityName,
302 | DestCityName = extract_city_name DestCityName,
303 | Origin,
304 | DepDelay
305 | }
306 | take 5
307 | ```
308 |
309 | ### SQL DuckDB
310 |
311 | ```{glue_sql}
312 | #| filename: SQL
313 | SELECT
314 | Dest,
315 | Tail_Number,
316 | IATA_CODE_Reporting_Airline,
317 | CancellationCode,
318 | time_to_datetime(
319 | FlightDate,
320 | COLUMNS([
321 | DepTime,
322 | ArrTime,
323 | CRSArrTime,
324 | CRSDepTime
325 | ])
326 | ),
327 | FlightDate,
328 | Flight_Number_Reporting_Airline,
329 | extract_city_name(COLUMNS([OriginCityName, DestCityName])),
330 | Origin,
331 | DepDelay
332 | FROM
333 | tab
334 | LIMIT 5
335 | ```
336 |
337 | ### dplyr R
338 |
339 | ```{r}
340 | #| filename: R
341 | category_cols <- c(
342 | "Dest",
343 | "Tail_Number",
344 | "IATA_CODE_Reporting_Airline",
345 | "CancellationCode"
346 | )
347 |
348 | time_cols <- c("DepTime", "ArrTime", "CRSArrTime", "CRSDepTime")
349 |
350 | cols <- c(
351 | category_cols,
352 | time_cols,
353 | c(
354 | "FlightDate",
355 | "Flight_Number_Reporting_Airline",
356 | "OriginCityName",
357 | "DestCityName",
358 | "Origin",
359 | "DepDelay"
360 | )
361 | )
362 |
363 | df |>
364 | select(all_of(cols)) |>
365 | mutate(
366 | across({{ category_cols }}, as.factor),
367 | across(c(OriginCityName, DestCityName), extract_city_name)
368 | ) |>
369 | time_to_datetime(time_cols) |>
370 | head(5)
371 | ```
372 |
373 | ### Python Polars
374 |
375 | ```{python}
376 | #| filename: Python
377 | category_cols = [
378 | "Dest",
379 | "Tail_Number",
380 | "IATA_CODE_Reporting_Airline",
381 | "CancellationCode",
382 | ]
383 |
384 | time_cols = ["DepTime", "ArrTime", "CRSArrTime", "CRSDepTime"]
385 |
386 | cols = (
387 | category_cols
388 | + time_cols
389 | + [
390 | "FlightDate",
391 | "Flight_Number_Reporting_Airline",
392 | "OriginCityName",
393 | "DestCityName",
394 | "Origin",
395 | "DepDelay",
396 | ]
397 | )
398 |
399 | lf.select(cols).with_columns(
400 | [
401 | pl.col(category_cols).cast(pl.Categorical),
402 | extract_city_name(["OriginCityName", "DestCityName"]),
403 | *time_to_datetime(time_cols),
404 | ]
405 | ).head(5).collect()
406 | ```
407 |
408 | :::
409 |
410 | {{< include _cleanup-knitr.qmd >}}
411 |
--------------------------------------------------------------------------------
/docs/tidy.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Reshaping and Tidy Data
3 | description: Make the data tidy.
4 | engine: knitr
5 | knitr:
6 | opts_chunk:
7 | connection: con
8 | engine-opts:
9 | target: sql.duckdb
10 | use_glue: true
11 | info_string: '{.sql filename="SQL"}'
12 | execute:
13 | cache: true
14 | sidebar_position: 4
15 | ---
16 |
17 | :::{.callout-note}
18 |
19 | This page is based on the chapter ["Reshaping and Tidy Data"](https://kevinheavey.github.io/modern-polars/tidy.html)
20 | of the Modern Polars book.
21 |
22 | :::
23 |
24 | ## Read the Data
25 |
26 | {{< include _setup-knitr.qmd >}}
27 |
28 | ### Download
29 |
30 | Download the data to be analysis (tables on the website) and write the data to CSV files.
31 |
32 | This document uses R to download the data from the source here,
33 | but we can also download and use the CSV files included in
34 | the [kevinheavey/modern-polars](https://github.com/kevinheavey/modern-polars/tree/master/data/nba) GitHub repository.
35 |
36 | :::{.panel-tabset}
37 |
38 | #### R
39 |
40 | ```{r}
41 | #| filename: R
42 | #| code-fold: true
43 | #| cache: false
44 | nba_dir <- file.path("data", "nba")
45 |
46 | months <- c(
47 | "october",
48 | "november",
49 | "december",
50 | "january",
51 | "february",
52 | "march",
53 | "april",
54 | "may",
55 | "june"
56 | )
57 |
58 | column_names <- c(
59 | date = "date",
60 | away_team = "visitor_neutral",
61 | away_points = "pts",
62 | home_team = "home_neutral",
63 | home_points = "pts_2"
64 | )
65 |
66 | .write_data <- function(month) {
67 | base_url <- "http://www.basketball-reference.com/leagues/NBA_2016_games-{month}.html"
68 |
69 | glue::glue(base_url, month = month) |>
70 | rvest::read_html() |>
71 | rvest::html_table() |>
72 | (\(x) x[[1]])() |> # TODO: Rewrite after R 4.3
73 | janitor::clean_names() |>
74 | dplyr::select(all_of(column_names)) |>
75 | dplyr::filter(date != "Playoffs") |>
76 | readr::write_csv(file.path(nba_dir, glue::glue("{month}.csv")))
77 | }
78 |
79 | if (!fs::dir_exists(nba_dir)) {
80 | fs::dir_create(nba_dir)
81 | months |>
82 | purrr::walk(.write_data)
83 | }
84 | ```
85 |
86 | #### Shell
87 |
88 | This is a sample command to download the CSV files from the `kevinheavey/modern-polars` GitHub repository.
89 |
90 | ```{r}
91 | #| results: asis
92 | #| echo: false
93 | base_command <- glue::glue("curl -sL https://github.com/kevinheavey/modern-polars/raw/87539190dde3e99d5e4c4f9957c78932a33075a0/data/nba/{{month}}.csv -o {nba_dir}/{{month}}.csv")
94 |
95 | commands <- glue::glue(base_command, month = months) |>
96 | stringr::str_c(collapse = "\n")
97 |
98 | cat(
99 | "```{.bash filename=Terminal}",
100 | glue::glue("mkdir {nba_dir}"),
101 | commands,
102 | "```",
103 | sep = "\n"
104 | )
105 | ```
106 |
107 | :::
108 |
109 | ### Load the Data
110 |
111 | After the CSV files are ready, load these into DuckDB (in-memory) database table,
112 | R DataFrame, and Python polars.LazyFrame.
113 |
114 | :::{.panel-tabset}
115 |
116 | #### DuckDB
117 |
118 | ```{r }
119 | #| filename: R
120 | #| cache: false
121 | #| output: false
122 | #| echo: false
123 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
124 | ```
125 |
126 | ```{glue_sql}
127 | #| filename: SQL
128 | #| cache: false
129 | #| output: false
130 | CREATE TABLE tab AS FROM read_csv_auto('data/nba/*.csv')
131 | ```
132 |
133 | ```{glue_sql}
134 | #| filename: SQL
135 | FROM tab
136 | LIMIT 5
137 | ```
138 |
139 | #### R DataFrame
140 |
141 | ```{r}
142 | #| filename: R
143 | #| cache: false
144 | library(dplyr, warn.conflicts = FALSE)
145 |
146 | df <- readr::read_csv(
147 | fs::dir_ls("data/nba", glob = "*.csv")
148 | )
149 | ```
150 |
151 | ```{r}
152 | #| filename: R
153 | df |> head(5)
154 | ```
155 |
156 | #### Python polars.LazyFrame
157 |
158 | ```{python}
159 | #| filename: Python
160 | #| cache: false
161 | import polars as pl
162 |
163 | lf = pl.scan_csv("data/nba/*.csv")
164 | ```
165 |
166 | ```{python}
167 | #| filename: Python
168 | lf.head(5).collect()
169 | ```
170 |
171 | :::
172 |
173 | ## Cleaning {#sec-cleaning}
174 |
175 | Convert the `date` column to date type and delete rows containing missing values (`null`).
176 |
177 | PRQL does not have a "remove rows with missing values in any column" syntax
178 | ([PRQL/prql#2386](https://github.com/PRQL/prql/issues/2386)),
179 | but DuckDB SQL does (>= 0.8, [duckdb/duckdb#6621](https://github.com/duckdb/duckdb/pull/6621)), so it can be used.
180 |
181 | :::{.panel-tabset}
182 |
183 | ### PRQL DuckDB
184 |
185 | ```{prql}
186 | #| filename: PRQL
187 | #| label: prql_cleaning
188 | let games = (
189 | from tab
190 | filter s"COLUMNS(*) IS NOT NULL"
191 | derive date_new = (s"strptime(date, '%a, %b %d, %Y')" | as date)
192 | select !{this.date} # `this` points to refer to current relation
193 | sort date_new
194 | derive game_id = (row_number this)
195 | )
196 |
197 | from games
198 | take 5
199 | ```
200 |
201 | ### SQL DuckDB
202 |
203 | ```{sql}
204 | --| filename: SQL
205 | --| cache: false
206 | --| output: false
207 | CREATE TABLE games AS (
208 | WITH _tab1 AS (
209 | SELECT
210 | * REPLACE (strptime(date, '%a, %b %d, %Y')::date AS date)
211 | FROM tab
212 | WHERE COLUMNS(*) IS NOT NULL
213 | )
214 |
215 | SELECT
216 | row_number() OVER(ORDER BY date) AS game_id,
217 | *
218 | FROM _tab1
219 | ORDER BY date
220 | )
221 | ```
222 |
223 | ```{glue_sql}
224 | #| filename: SQL
225 | FROM games
226 | LIMIT 5
227 | ```
228 |
229 | ### dplyr R
230 |
231 | ```{r}
232 | #| filename: R
233 | games <- df |>
234 | filter(!if_any(everything(), is.na)) |> # Also can use `tidyr::drop_na`
235 | mutate(
236 | date = lubridate::parse_date_time(date, "%a, %b %d, %Y") |>
237 | lubridate::as_date()
238 | ) |>
239 | arrange(date) |>
240 | mutate(game_id = row_number(), .before = 1)
241 | ```
242 |
243 | ```{r}
244 | #| filename: R
245 | games |>
246 | head(5)
247 | ```
248 |
249 | ### Python Polars
250 |
251 | ```{python}
252 | #| filename: Python
253 | #| cache: false
254 | games = (
255 | lf.filter(~pl.any_horizontal(pl.all().is_null())) # Also can use `polars.LazyFrame.drop_nulls`
256 | .with_columns(
257 | pl.col("date").str.strptime(pl.Date, "%a, %b %d, %Y"),
258 | )
259 | .sort("date")
260 | .with_row_index("game_id")
261 | .collect()
262 | )
263 | ```
264 |
265 | ```{python}
266 | #| filename: Python
267 | games.head(5)
268 | ```
269 |
270 | :::
271 |
272 | Looking at the result tables, we notice that the PRQL result is different from the other results;
273 | A column named `date` in other results is named `date_new` in PRQL.
274 | This is because another name is needed to avoid the behavior that
275 | using the column name `date` here would add a new column called `date:1`
276 | instead of updating the original `date` column.
277 |
278 | In DuckDB SQL, we can use [Replace Clause](https://duckdb.org/docs/sql/expressions/star#replace-clause)
279 | to update the original column with the same column name.
280 |
281 | The SQL generated by the PRQL compiler looks like this:
282 |
283 | ```{prql}
284 | #| connection: null
285 | #| echo: false
286 | <>
287 | ```
288 |
289 | ## Tidy Data {#sec-tidy-data}
290 |
291 | :::{.callout-important}
292 |
293 | - PRQL does not yet support PIVOT and UNPIVOT. ([PRQL/prql#644](https://github.com/PRQL/prql/issues/644))
294 | - DuckDB SQL supports PIVOT and UNPIVOT >= 0.8. ([duckdb/duckdb#6387](https://github.com/duckdb/duckdb/pull/6387))
295 |
296 | :::
297 |
298 | ### Unpivot
299 |
300 | Transforms the data from wide format to long format.
301 | This transformation is called by names such as unpivot, pivot longer, and melt.
302 |
303 | :::{.panel-tabset}
304 |
305 | #### PRQL DuckDB
306 |
307 | :::{.callout-important}
308 |
309 | `games` in this query is defiened in @sec-cleaning with SQL, not with PRQL.
310 |
311 | :::
312 |
313 | ```{prql}
314 | #| filename: PRQL
315 | from s"""
316 | SELECT *
317 | FROM (
318 | PIVOT_LONGER games
319 | ON away_team, home_team
320 | INTO
321 | NAME variable
322 | VALUE team
323 | )
324 | """
325 | group {team} (
326 | sort this.date
327 | derive rest = this.date - (this.date | lag 1) - 1
328 | )
329 | select !{away_points, home_points}
330 | filter rest != null
331 | sort game_id
332 | take 5
333 | ```
334 |
335 | #### SQL DuckDB
336 |
337 | ```{sql}
338 | --| filename: SQL
339 | --| cache: false
340 | --| output: false
341 | CREATE TABLE tidy AS (
342 | WITH _tab1 AS (
343 | PIVOT_LONGER games
344 | ON away_team, home_team
345 | INTO
346 | NAME variable
347 | VALUE team
348 | ),
349 |
350 | _tab2 AS (
351 | SELECT
352 | COLUMNS(x -> NOT suffix(x, '_points'))
353 | FROM _tab1
354 | ),
355 |
356 | _tab3 AS (
357 | SELECT
358 | *,
359 | date - lag(date) OVER (PARTITION BY team ORDER BY date) -1 AS rest
360 | FROM _tab2
361 | )
362 |
363 | SELECT *
364 | FROM _tab3
365 | WHERE rest IS NOT NULL
366 | ORDER BY game_id
367 | )
368 | ```
369 |
370 | ```{glue_sql}
371 | #| filename: SQL
372 | FROM tidy
373 | LIMIT 5
374 | ```
375 |
376 | #### dplyr R
377 |
378 | ```{r}
379 | #| filename: R
380 | tidy <- games |>
381 | tidyr::pivot_longer(
382 | cols = c(away_team, home_team),
383 | names_to = "variable",
384 | values_to = "team"
385 | ) |>
386 | select(!ends_with("_points")) |>
387 | arrange(game_id) |>
388 | mutate(
389 | rest = date - lag(date) - 1,
390 | .by = team
391 | ) |>
392 | filter(!is.na(rest))
393 | ```
394 |
395 | ```{r}
396 | #| filename: R
397 | tidy |>
398 | head(5)
399 | ```
400 |
401 | #### Python Polars
402 |
403 | ```{python}
404 | #| filename: Python
405 | #| cache: false
406 | tidy = (
407 | games.unpivot(
408 | index=["game_id", "date"],
409 | on=["away_team", "home_team"],
410 | value_name="team",
411 | )
412 | .sort("game_id")
413 | .with_columns(
414 | rest=(pl.col("date").diff().over("team").dt.total_days() - 1).cast(pl.Int8)
415 | )
416 | .drop_nulls("rest")
417 | )
418 | ```
419 |
420 | ```{python}
421 | #| filename: Python
422 | tidy.head(5)
423 | ```
424 |
425 | :::
426 |
427 | PRQL, SQL and dplyr remove unnecessary columns after UNPIVOT
428 | (columns that were automatically removed in the original Polars and Pandas example).
429 |
430 | ### Pivot
431 |
432 | Transforms the data from long format to wide format.
433 | This transformation is called by names such as pivot, pivot wider.
434 |
435 | :::{.panel-tabset}
436 |
437 | #### PRQL DuckDB
438 |
439 | :::{.callout-important}
440 |
441 | `tidy` in this query is defiened in @sec-tidy-data with SQL,
442 | and `games` is defiened in @sec-cleaning with SQL.
443 |
444 | :::
445 |
446 | ```{prql}
447 | #| filename: PRQL
448 | #| label: prql_tidy_nba_2
449 | from s"""
450 | SELECT *
451 | FROM (
452 | PIVOT_WIDER tidy ON variable USING FIRST(rest) GROUP BY (game_id, date)
453 | )
454 | """
455 | derive {
456 | away_rest = away_team,
457 | home_rest = home_team
458 | }
459 | select !{
460 | away_team,
461 | home_team
462 | }
463 | join side:left games (==game_id && ==date)
464 | derive {
465 | home_win = games.home_points > games.away_points,
466 | rest_spread = home_rest - away_rest
467 | }
468 | sort games.game_id
469 | take 5
470 | ```
471 |
472 | #### SQL DuckDB
473 |
474 | ```{sql}
475 | --| filename: SQL
476 | --| cache: false
477 | --| output: false
478 | CREATE TABLE by_game AS (
479 | WITH _tab1 AS (
480 | PIVOT_WIDER tidy ON variable USING FIRST(rest) GROUP BY (game_id, date)
481 | )
482 |
483 | SELECT
484 | * EXCLUDE(away_team, home_team),
485 | away_team AS away_rest,
486 | home_team AS home_rest
487 | FROM _tab1
488 | )
489 | ```
490 |
491 | ```{sql}
492 | --| filename: SQL
493 | --| cache: false
494 | --| output: false
495 | CREATE TABLE joined AS (
496 | SELECT
497 | *,
498 | home_points > away_points AS home_win,
499 | home_rest - away_rest AS rest_spread
500 | FROM by_game
501 | LEFT JOIN games USING (game_id, date)
502 | ORDER BY game_id
503 | )
504 | ```
505 |
506 | ```{glue_sql}
507 | #| filename: SQL
508 | FROM joined
509 | LIMIT 5
510 | ```
511 |
512 | #### dplyr R
513 |
514 | ```{r}
515 | #| filename: R
516 | by_game <- tidy |>
517 | tidyr::pivot_wider(
518 | id_cols = c("game_id", "date"),
519 | values_from = "rest",
520 | names_from = "variable"
521 | ) |>
522 | rename(
523 | away_rest = away_team,
524 | home_rest = home_team
525 | )
526 |
527 | joined <- by_game |>
528 | left_join(games, by = c("game_id", "date")) |>
529 | mutate(
530 | home_win = home_points > away_points,
531 | rest_spread = home_rest - away_rest
532 | )
533 | ```
534 |
535 | ```{r}
536 | #| filename: R
537 | joined |>
538 | head(5)
539 | ```
540 |
541 | #### Python Polars
542 |
543 | ```{python}
544 | #| filename: Python
545 | by_game = tidy.pivot(
546 | values="rest", index=["game_id", "date"], on="variable"
547 | ).rename({"away_team": "away_rest", "home_team": "home_rest"})
548 |
549 | joined = by_game.join(games, on=["game_id", "date"]).with_columns(
550 | home_win=pl.col("home_points") > pl.col("away_points"),
551 | rest_spread=pl.col("home_rest") - pl.col("away_rest"),
552 | )
553 | ```
554 |
555 | ```{python}
556 | #| filename: Python
557 | joined.head(5)
558 | ```
559 |
560 | :::
561 |
562 | There are more columns in the PRQL result than in the other results.
563 | Because the output SQL is not using `USING` for joins ([PRQL/prql#1335](https://github.com/PRQL/prql/issues/1335)).
564 |
565 | The SQL generated by the PRQL compiler looks like this:
566 |
567 | ```{prql}
568 | #| connection: null
569 | #| echo: false
570 | <>
571 | ```
572 |
--------------------------------------------------------------------------------
/docs/timeseries.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: Timeseries
3 | description: Handling timeseries data.
4 | engine: knitr
5 | knitr:
6 | opts_chunk:
7 | connection: con
8 | engine-opts:
9 | target: sql.duckdb
10 | use_glue: true
11 | execute:
12 | cache: true
13 | sidebar_position: 5
14 | ---
15 |
16 | :::{.callout-note}
17 |
18 | This page is based on the chapter ["Timeseries"](https://kevinheavey.github.io/modern-polars/timeseries.html)
19 | of the Modern Polars book.
20 |
21 | :::
22 |
23 | ## Preparing Data
24 |
25 | {{< include _setup-knitr.qmd >}}
26 |
27 | ### Download
28 |
29 | Download the data from [Binance REST API](https://github.com/binance/binance-spot-api-docs/blob/master/rest-api.md)
30 | and write it to a Parquet file.
31 |
32 | This document uses R to download the data from the source here,
33 | but we can also download and use the Parquet file included in the
34 | [kevinheavey/modern-polars](https://github.com/kevinheavey/modern-polars/blob/master/data/ohlcv.pq) GitHub repository.
35 |
36 | :::{.panel-tabset}
37 |
38 | #### R
39 |
40 | ```{r}
41 | #| filename: R
42 | #| cache: false
43 | #| code-fold: true
44 | #| warning: false
45 | data_path <- "data/ohlcv.parquet"
46 |
47 | if (!fs::file_exists(data_path)) {
48 | fs::dir_create(fs::path_dir(data_path))
49 |
50 | .epoch_ms <- function(dt) {
51 | dt |>
52 | lubridate::as_datetime() |>
53 | (\(x) (as.integer(x) * 1000))()
54 | }
55 |
56 | .start <- lubridate::make_datetime(2021, 1, 1) |> .epoch_ms()
57 | .end <- lubridate::make_datetime(2022, 1, 1) |> .epoch_ms()
58 |
59 | .url <- glue::glue(
60 | "https://api.binance.com/api/v3/klines?symbol=BTCUSDT&",
61 | "interval=1d&startTime={.start}&endTime={.end}"
62 | )
63 |
64 | .res <- jsonlite::read_json(.url)
65 |
66 | time_col <- "time"
67 | ohlcv_cols <- c(
68 | "open",
69 | "high",
70 | "low",
71 | "close",
72 | "volume"
73 | )
74 | cols_to_use <- c(time_col, ohlcv_cols)
75 | cols <- c(cols_to_use, glue::glue("ignore_{i}", i = 1:6))
76 |
77 | df <- .res |>
78 | tibble::enframe(name = NULL) |>
79 | tidyr::unnest_wider(value, names_sep = "_") |>
80 | rlang::set_names({{ cols }}) |>
81 | dplyr::mutate(
82 | dplyr::across({{ time_col }}, \(x) lubridate::as_datetime(x / 1000) |> lubridate::as_date()),
83 | dplyr::across({{ ohlcv_cols }}, as.numeric),
84 | .keep = "none"
85 | )
86 |
87 | # Unlike the Python client, the duckdb R client does not (yet) have automatic DataFrame registration.
88 | # (duckdb/duckdb#6771)
89 | con_tmp <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
90 | duckdb::duckdb_register(con_tmp, "df", df)
91 | duckdb:::sql(glue::glue("COPY df TO '{data_path}' (FORMAT PARQUET)"), con_tmp)
92 | DBI::dbDisconnect(con_tmp)
93 | }
94 | ```
95 |
96 | #### Shell
97 |
98 | This is a sample command to download the Parquet file from the `kevinheavey/modern-polars` GitHub repository.
99 |
100 | ```{.bash filename=Terminal}
101 | mkdir data
102 | curl -sL https://github.com/kevinheavey/modern-polars/raw/d67d6f95ce0de8aad5492c4497ac4c3e33d696e8/data/ohlcv.pq -o data/ohlcv.parquet
103 | ```
104 |
105 | :::
106 |
107 | ### Load the Data
108 |
109 | After the Parquet file is ready, load that into DuckDB (in-memory) database table, R DataFrame, and Python polars.LazyFrame.
110 |
111 | :::{.panel-tabset}
112 |
113 | #### DuckDB
114 |
115 | ```{r}
116 | #| filename: R
117 | #| cache: false
118 | #| output: false
119 | #| echo: false
120 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:")
121 | ```
122 |
123 | ```{glue_sql}
124 | #| filename: SQL
125 | #| cache: false
126 | #| output: false
127 | CREATE TABLE tab AS FROM 'data/ohlcv.parquet'
128 | ```
129 |
130 | ```{glue_sql}
131 | #| filename: SQL
132 | FROM tab
133 | LIMIT 5
134 | ```
135 |
136 | #### R DataFrame
137 |
138 | ```{r}
139 | #| filename: R
140 | #| cache: false
141 | #| output: false
142 | library(dplyr, warn.conflicts = FALSE)
143 |
144 | df <- duckdb:::sql("FROM 'data/ohlcv.parquet'")
145 | ```
146 |
147 | ```{r}
148 | #| filename: R
149 | df |> head(5)
150 | ```
151 |
152 | #### Python polars.LazyFrame
153 |
154 | ```{python}
155 | #| filename: Python
156 | #| cache: false
157 | #| output: false
158 | import polars as pl
159 |
160 | lf = pl.scan_parquet("data/ohlcv.parquet")
161 | ```
162 |
163 | ```{python}
164 | #| filename: Python
165 | lf.fetch(5)
166 | ```
167 |
168 | :::
169 |
170 | ## Filtering
171 |
172 | :::{.panel-tabset}
173 |
174 | ### PRQL DuckDB
175 |
176 | ```{prql}
177 | #| filename: PRQL
178 | #| engine-opts:
179 | #| use_glue: false
180 | from tab
181 | filter s"date_part(['year', 'month'], time) = {{year: 2021, month: 2}}"
182 | take 5
183 | ```
184 |
185 | ### SQL DuckDB
186 |
187 | :::{.content-hidden}
188 |
189 | Because of the bug of kintr's sql engine ,
190 | I want to use the following block as `glue_sql`.
191 | But in `glue_sql` code blocks, we should escape `{` and `}` as `{{` and `}}`,
192 | so I mark this block as `sql` and `cache: false`.
193 |
194 | :::
195 |
196 | ```{sql}
197 | --| filename: SQL
198 | --| cache.lazy: false
199 | FROM tab
200 | WHERE date_part(['year', 'month'], time) = {year: 2021, month: 2}
201 | LIMIT 5
202 | ```
203 |
204 | ### dplyr R
205 |
206 | ```{r}
207 | #| filename: R
208 | df |>
209 | filter(
210 | lubridate::floor_date(time, "month") == lubridate::make_datetime(2021, 2)
211 | ) |>
212 | head(5)
213 | ```
214 |
215 | ### Python Polars
216 |
217 | ```{python}
218 | #| filename: Python
219 | (
220 | lf.filter((pl.col("time").dt.year() == 2021) & (pl.col("time").dt.month() == 2))
221 | .head(5)
222 | .collect()
223 | )
224 | ```
225 |
226 | :::
227 |
228 | ## Downsampling
229 |
230 | It is important to note carefully how units such as `5 days` or `1 week` actually work.
231 | In other words, where to start counting `5 days` or `1 week` could be completely different in each system.
232 |
233 | Here, we should note that `time_bucket` in DuckDB, `lubridate::floor_date` in R,
234 | and `group_by_dynamic` in Polars have completely different initial starting points by default.
235 |
236 | - The DuckDB function `time_bucket`'s origin defaults to `2000-01-03 00:00:00+00` for days or weeks interval.[^time_bucket]
237 | - In the R `lubridate::floor_date` function, timestamp is floored using the number of days elapsed
238 | since the beginning of every month when specifying `"5 days"` to the `unit` argument.
239 |
240 | ```{r}
241 | #| filename: R
242 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |>
243 | lubridate::floor_date("5 days")
244 | ```
245 |
246 | And when `"1 week"` to the `unit` argument, it is floored to the nearest week,
247 | Sunday through Saturday.
248 |
249 | ```{r}
250 | #| filename: R
251 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |>
252 | lubridate::floor_date("1 week")
253 | ```
254 |
255 | To start from an arbitrary origin, all breaks must be specified as a vector in the unit argument.[^floor_date]
256 |
257 | ```{r}
258 | #| filename: R
259 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |>
260 | lubridate::floor_date(lubridate::make_date(2023, 1, 31))
261 | ```
262 |
263 | - `group_by_dynamic` of Polars, the `offset` parameter to specify the origin point, is negative `every` by default.[^group_by_dynamic]
264 |
265 | [^time_bucket]:
266 | [^floor_date]:
267 | [^group_by_dynamic]:
268 |
269 | :::{.panel-tabset}
270 |
271 | ### PRQL DuckDB
272 |
273 | ```{prql}
274 | #| filename: PRQL
275 | from tab
276 | derive {
277 | time_new = s"""
278 | time_bucket(INTERVAL '5 days', time, (FROM tab SELECT min(time)))
279 | """
280 | }
281 | group {time_new} (
282 | aggregate {
283 | open = average open,
284 | high = average high,
285 | low = average low,
286 | close = average close,
287 | volume = average volume
288 | }
289 | )
290 | sort time_new
291 | take 5
292 | ```
293 |
294 | ### SQL DuckDB
295 |
296 | ```{glue_sql}
297 | #| filename: SQL
298 | WITH _tab1 AS (
299 | FROM tab
300 | SELECT
301 | * REPLACE (time_bucket(INTERVAL '5 days', time, (FROM tab SELECT min(time)))) AS time
302 | )
303 |
304 | FROM _tab1
305 | SELECT
306 | time,
307 | avg(COLUMNS(x -> x NOT IN ('time')))
308 | GROUP BY time
309 | ORDER BY time
310 | LIMIT 5
311 | ```
312 |
313 | ### dplyr R
314 |
315 | ```{r}
316 | #| filename: R
317 | df |>
318 | mutate(
319 | time = time |>
320 | (\(x) lubridate::floor_date(x, seq(min(x), max(x), by = 5)))()
321 | ) |>
322 | summarise(across(everything(), mean), .by = time) |>
323 | head(5)
324 | ```
325 |
326 | ### Python Polars
327 |
328 | ```{python}
329 | #| filename: Python
330 | (
331 | lf.sort("time")
332 | .group_by_dynamic("time", every="5d")
333 | .agg(pl.col(pl.Float64).mean())
334 | .head(5)
335 | .collect()
336 | )
337 | ```
338 |
339 | :::
340 |
341 | :::{.panel-tabset}
342 |
343 | ### PRQL DuckDB
344 |
345 | ```{prql}
346 | #| filename: PRQL
347 | from tab
348 | derive {
349 | time_new = s"""
350 | time_bucket(INTERVAL '7 days', time, (FROM tab SELECT min(time)))
351 | """
352 | }
353 | group {time_new} (
354 | aggregate {
355 | open_mean = average open,
356 | high_mean = average high,
357 | low_mean = average low,
358 | close_mean = average close,
359 | volume_mean = average volume,
360 | open_sum = sum open,
361 | high_sum = sum high,
362 | low_sum = sum low,
363 | close_sum = sum close,
364 | volume_sum = sum volume
365 | }
366 | )
367 | sort time_new
368 | take 5
369 | ```
370 |
371 | ### SQL DukcDB
372 |
373 | ```{glue_sql}
374 | #| filename: SQL
375 | WITH _tab1 AS (
376 | FROM tab
377 | SELECT
378 | * REPLACE (time_bucket(INTERVAL '7 days', time, (FROM tab SELECT min(time)))) AS time
379 | )
380 |
381 | FROM _tab1
382 | SELECT
383 | time,
384 | avg(COLUMNS(x -> x NOT IN ('time'))),
385 | sum(COLUMNS(x -> x NOT IN ('time')))
386 | GROUP BY time
387 | ORDER BY time
388 | LIMIT 5
389 | ```
390 |
391 | ### dplyr R
392 |
393 | ```{r}
394 | #| filename: R
395 | df |>
396 | mutate(
397 | time = time |>
398 | (\(x) lubridate::floor_date(x, seq(min(x), max(x), by = 7)))()
399 | ) |>
400 | summarise(
401 | across(
402 | everything(),
403 | list(mean = mean, sum = sum),
404 | .names = "{.col}_{.fn}"
405 | ),
406 | .by = time
407 | ) |>
408 | head(5)
409 | ```
410 |
411 | ### Python Polars
412 |
413 | ```{python}
414 | #| filename: Python
415 | (
416 | lf.sort("time")
417 | .group_by_dynamic("time", every="1w")
418 | .agg(
419 | [
420 | pl.col(pl.Float64).mean().name.suffix("_mean"),
421 | pl.col(pl.Float64).sum().name.suffix("_sum"),
422 | ]
423 | )
424 | .head(5)
425 | .collect()
426 | )
427 | ```
428 |
429 | :::
430 |
431 | ## Upsampling
432 |
433 | The way to use a function like `generate_series` to generate sequential values and then join them is general-purpose.
434 |
435 | In R, we can also use dedicated functions like
436 | [`timetk::pad_by_time`](https://business-science.github.io/timetk/reference/pad_by_time.html).
437 |
438 | :::{.panel-tabset}
439 |
440 | ### PRQL DuckDB
441 |
442 | :::{.callout-important}
443 |
444 | This example does not work with prql-compiler 0.11.1.
445 | ([PRQL/prql#3129](https://github.com/PRQL/prql/issues/3129))
446 |
447 | :::
448 |
449 | ```{.prql filename=PRQL}
450 | let _tab1 = s"""
451 | SELECT
452 | generate_series(
453 | (SELECT min(time)),
454 | (SELECT max(time)),
455 | INTERVAL '6 hours'
456 | ).unnest() AS time
457 | FROM tab
458 | """
459 |
460 | from _tab1
461 | join side:left tab (==time)
462 | sort tab.time
463 | select !{tab.time}
464 | take 5
465 | ```
466 |
467 | ### SQL DuckDB
468 |
469 | ```{glue_sql}
470 | #| filename: SQL
471 | WITH _tab1 AS (
472 | SELECT
473 | generate_series(
474 | (FROM tab SELECT min(time)),
475 | (FROM tab SELECT max(time)),
476 | INTERVAL '6 hours'
477 | ).unnest() AS time
478 | )
479 |
480 | FROM _tab1
481 | LEFT JOIN tab USING (time)
482 | ORDER BY time
483 | LIMIT 5
484 | ```
485 |
486 | ### dplyr R
487 |
488 | ```{r}
489 | #| filename: R
490 | .grid <- df$time |>
491 | lubridate::as_datetime() |>
492 | (\(x) seq(min(x), max(x), by = "6 hours"))() |>
493 | tibble::tibble(time = _)
494 |
495 | .grid |>
496 | left_join(df, by = "time") |>
497 | head(5)
498 | ```
499 |
500 | ### Python Polars
501 |
502 | ```{python}
503 | #| filename: Python
504 | lf.collect().sort("time").upsample("time", every="6h").head(5)
505 | ```
506 |
507 | :::
508 |
509 | ## Window Functions
510 |
511 | It is necessary to be careful how the Window function calculates
512 | if the width of the window is less than the specified value.
513 |
514 | ### Moving Average, Cumulative Avarage {#sec-moving-ave}
515 |
516 | PRQL has a dedicated way of applying the window to the entire table.
517 | For the others, use a individual function for each column.
518 |
519 | In R, base R have some window functions like `cumsum`, but none like cumulative avarage.
520 | dplyr complements this with several functions, including `cummean`.
521 |
522 | Polars does not yet have a dedicated function to compute cumulative averages,
523 | so we must use cumulative sums to compute them.
524 |
525 | :::{.callout-note}
526 |
527 | [The original Modern Pandas post](https://tomaugspurger.github.io/posts/modern-7-timeseries/#rolling--expanding--ew)
528 | and [the Modern Polars book](https://kevinheavey.github.io/modern-polars/timeseries.html#rolling-expanding-ew)
529 | have a exponentially weighted (EW) calculation example in addition.
530 | But DuckDB does not have a dedicated function to do this, so it is omitted here.
531 |
532 | :::
533 |
534 | :::{.panel-tabset}
535 |
536 | #### PRQL DuckDB
537 |
538 | ```{prql}
539 | #| filename: PRQL
540 | from tab
541 | sort this.time
542 | window rolling:28 (
543 | derive {`28D MA` = average close}
544 | )
545 | window rows:..0 (
546 | derive {`Expanding Average` = average close}
547 | )
548 | select {
549 | this.time,
550 | Raw = close,
551 | `28D MA`,
552 | `Expanding Average`
553 | }
554 | take 26..30
555 | ```
556 |
557 | #### SQL DuckDB
558 |
559 | ```{glue_sql}
560 | #| filename: SQL
561 | FROM tab
562 | SELECT
563 | time,
564 | close AS "Raw",
565 | avg(close) OVER (
566 | ORDER BY time
567 | ROWS BETWEEN 27 PRECEDING AND CURRENT ROW
568 | ) AS "28D MA",
569 | avg(close) OVER (
570 | ORDER BY time
571 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
572 | ) AS "Expanding Average"
573 | LIMIT 5 OFFSET 25
574 | ```
575 |
576 | #### dplyr R
577 |
578 | ```{r}
579 | #| filename: R
580 | #| cache: false
581 | roll_and_expand <- df |>
582 | arrange(time) |>
583 | mutate(
584 | time,
585 | Raw = close,
586 | `28D MA` = close |>
587 | slider::slide_vec(mean, .before = 27, .complete = TRUE),
588 | `Expanding Average` = cummean(close),
589 | .keep = "none"
590 | )
591 | ```
592 |
593 | ```{r}
594 | #| filename: R
595 | roll_and_expand |>
596 | slice(26:30)
597 | ```
598 |
599 | #### Python Polars
600 |
601 | ```{python}
602 | #| filename: Python
603 | #| cache: false
604 | close = pl.col("close")
605 |
606 | roll_and_expand = lf.sort("time").select(
607 | [
608 | pl.col("time"),
609 | close.alias("Raw"),
610 | close.rolling_mean(28).alias("28D MA"),
611 | close.alias("Expanding Average").cum_sum() / (close.cum_count() + 1),
612 | ]
613 | )
614 | ```
615 |
616 | ```{python}
617 | #| filename: Python
618 | roll_and_expand.head(30).tail(5).collect()
619 | ```
620 |
621 | :::
622 |
623 | Here, DuckDB also calculates avarage for cases where the window width is less than 28 for the `28D MA` column,
624 | whereas R `slider::slide_vec(.complete = TRUE)` and Polars `rolling_mean` make them missing values.
625 | If we are using DuckDB and need to make replacement for `NULL`,
626 | we need to add further processing.
627 |
628 | Plotting the results of dplyr shows the following.
629 |
630 | ```{r}
631 | #| filename: R
632 | #| label: plot-window-functions
633 | #| code-fold: true
634 | #| warning: false
635 | library(ggplot2)
636 |
637 | roll_and_expand |>
638 | tidyr::pivot_longer(cols = !time) |>
639 | ggplot(aes(time, value, colour = name)) +
640 | geom_line() +
641 | theme_linedraw() +
642 | labs(y = "Close ($)") +
643 | scale_x_date(
644 | date_breaks = "month",
645 | labels = scales::label_date_short()
646 | )
647 | ```
648 |
649 | ### Combining Rolling Aggregations
650 |
651 | :::{.panel-tabset}
652 |
653 | #### PRQL DuckDB
654 |
655 | ```{prql}
656 | #| filename: PRQL
657 | from tab
658 | sort this.time
659 | window rows:-15..14 (
660 | select {
661 | this.time,
662 | mean = average close,
663 | std = stddev close
664 | }
665 | )
666 | take 13..17
667 | ```
668 |
669 | #### SQL DuckDB
670 |
671 | ```{glue_sql}
672 | #| filename: SQL
673 | FROM tab
674 | SELECT
675 | time,
676 | avg(close) OVER (
677 | ORDER BY time
678 | ROWS BETWEEN 15 PRECEDING AND 14 FOLLOWING
679 | ) AS mean,
680 | stddev(close) OVER (
681 | ORDER BY time
682 | ROWS BETWEEN 15 PRECEDING AND 14 FOLLOWING
683 | ) AS std
684 | ORDER BY time
685 | LIMIT 5 OFFSET 12
686 | ```
687 |
688 | #### dplyr R
689 |
690 | ```{r}
691 | #| filename: R
692 | #| cache: false
693 | .slide_func <- function(.x, .fn) {
694 | slider::slide_vec(.x, .fn, .before = 15, .after = 14, .complete = TRUE)
695 | }
696 |
697 | mean_std <- df |>
698 | arrange(time) |>
699 | mutate(
700 | time,
701 | across(
702 | close,
703 | .fns = list(mean = \(x) .slide_func(x, mean), std = \(x) .slide_func(x, sd)),
704 | .names = "{.fn}"
705 | ),
706 | .keep = "none"
707 | )
708 | ```
709 |
710 | ```{r}
711 | #| filename: R
712 | mean_std |>
713 | slice(13:17)
714 | ```
715 |
716 | #### Python Polars
717 |
718 | ```{python}
719 | #| filename: Python
720 | #| cache: false
721 | mean_std = lf.sort("time").select(
722 | time=pl.col("time"),
723 | mean=pl.col("close").rolling_mean(30, center=True),
724 | std=pl.col("close").rolling_std(30, center=True),
725 | )
726 | ```
727 |
728 | ```{python}
729 | #| filename: Python
730 | mean_std.head(17).tail(5).collect()
731 | ```
732 |
733 | :::
734 |
735 | As in @sec-moving-ave, here too the DuckDB results differ from the others.
736 |
737 | Plotting the results of dplyr shows the following.
738 |
739 | ```{r}
740 | #| filename: R
741 | #| label: plot-rolling-combined
742 | #| code-fold: true
743 | #| warning: false
744 | library(ggplot2)
745 |
746 | mean_std |>
747 | ggplot(aes(time)) +
748 | geom_ribbon(
749 | aes(ymin = mean - std, ymax = mean + std),
750 | alpha = 0.3, fill = "blue"
751 | ) +
752 | geom_line(aes(y = mean), color = "blue") +
753 | theme_linedraw() +
754 | labs(y = "Close ($)") +
755 | scale_x_date(
756 | date_breaks = "month",
757 | labels = scales::label_date_short()
758 | )
759 | ```
760 |
761 | ## Timezones
762 |
763 | :::{.callout-important}
764 |
765 | In DuckDB, the icu DuckDB extension is needed for time zones support.
766 | If the DuckDB client that we are using does not contain the extension, we need to install and load it.
767 |
768 | ```{sql}
769 | --| filename: SQL
770 | --| cache: false
771 | --| warning: false
772 | INSTALL 'icu'
773 | ```
774 |
775 | ```{sql}
776 | --| filename: SQL
777 | --| cache: false
778 | --| warning: false
779 | LOAD 'icu'
780 | ```
781 |
782 | :::
783 |
784 | :::{.panel-tabset}
785 |
786 | ### PRQL DuckDB
787 |
788 | ```{prql}
789 | #| filename: PRQL
790 | let timezone = tz col -> s"timezone({tz}, {col})"
791 |
792 | from tab
793 | derive {
794 | time_new = (this.time | timezone "UTC" | timezone "US/Eastern")
795 | }
796 | select !{this.time}
797 | take 5
798 | ```
799 |
800 | ### SQL DuckDB
801 |
802 | ```{glue_sql}
803 | #| filename: SQL
804 | FROM tab
805 | SELECT
806 | * REPLACE timezone('US/Eastern', timezone('UTC', time)) AS time
807 | LIMIT 5
808 | ```
809 |
810 | ### dplyr R
811 |
812 | ```{r}
813 | #| filename: R
814 | df |>
815 | mutate(
816 | time = time |>
817 | lubridate::force_tz("UTC") |>
818 | lubridate::with_tz("US/Eastern")
819 | ) |>
820 | head(5)
821 | ```
822 |
823 | ### Python Polars
824 |
825 | ```{python}
826 | #| filename: Python
827 | (
828 | lf.with_columns(
829 | pl.col("time")
830 | .cast(pl.Datetime)
831 | .dt.replace_time_zone("UTC")
832 | .dt.convert_time_zone("US/Eastern")
833 | )
834 | .head(5)
835 | .collect()
836 | )
837 | ```
838 |
839 | :::
840 |
841 | Note that each system may keep time zone information in a different way.
842 | Here, the `time` column (and the `time_new` column) in DuckDB results
843 | are the TIMESTAMP type, has no time zone information.
844 |
--------------------------------------------------------------------------------
/docusaurus.config.js:
--------------------------------------------------------------------------------
1 | // @ts-check
2 | // Note: type annotations allow type checking and IDEs autocompletion
3 |
4 | const { themes } = require('prism-react-renderer');
5 | const lightTheme = themes.github;
6 | const darkTheme = themes.dracula;
7 |
8 | /** @type {import('@docusaurus/types').Config} */
9 | const config = {
10 | title: "Querying with PRQL",
11 | tagline: "Data transformation with PRQL and DuckDB",
12 | favicon: "img/favicon.ico",
13 |
14 | // Set the production url of your site here
15 | url: "https://eitsupi.github.io/",
16 | // Set the // pathname under which your site is served
17 | // For GitHub pages deployment, it is often '//'
18 | baseUrl: "/querying-with-prql/",
19 |
20 | // GitHub pages deployment config.
21 | // If you aren't using GitHub pages, you don't need these.
22 | organizationName: "eitsupi", // Usually your GitHub org/user name.
23 | projectName: "querying-with-prql", // Usually your repo name.
24 |
25 | onBrokenLinks: "throw",
26 | onBrokenMarkdownLinks: "warn",
27 |
28 | // Even if you don't use internalization, you can use this field to set useful
29 | // metadata like html lang. For example, if your site is Chinese, you may want
30 | // to replace "en" with "zh-Hans".
31 | i18n: {
32 | defaultLocale: "en",
33 | locales: ["en"],
34 | },
35 |
36 | presets: [
37 | [
38 | "classic",
39 | /** @type {import('@docusaurus/preset-classic').Options} */
40 | ({
41 | docs: {
42 | routeBasePath: "/",
43 | sidebarPath: require.resolve("./sidebars.js"),
44 | // editUrl: 'https://github.com/eitsupi/querying-with-prql/tree/main/',
45 | },
46 | blog: false,
47 | theme: {
48 | customCss: require.resolve("./src/css/custom.css"),
49 | },
50 | }),
51 | ],
52 | ],
53 |
54 | themeConfig:
55 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */
56 | ({
57 | // Replace with your project's social card
58 | // image: 'img/docusaurus-social-card.jpg',
59 | navbar: {
60 | title: "Quering with PRQL",
61 | logo: {
62 | alt: "Logo",
63 | src: "img/logo.svg",
64 | },
65 | items: [
66 | {
67 | href: "https://github.com/eitsupi/querying-with-prql",
68 | label: "GitHub",
69 | position: "right",
70 | },
71 | ],
72 | },
73 | footer: {
74 | style: "dark",
75 | links: [
76 | {
77 | title: "PRQL",
78 | items: [
79 | {
80 | label: "Website",
81 | href: "https://prql-lang.org/",
82 | },
83 | {
84 | label: "GitHub",
85 | href: "https://github.com/PRQL/prql",
86 | },
87 | {
88 | label: "Language book",
89 | href: "https://prql-lang.org/book/",
90 | },
91 | {
92 | label: "Playground",
93 | href: "https://prql-lang.org/playground/",
94 | },
95 | ],
96 | },
97 | ],
98 | copyright: `Copyright © ${new Date().getFullYear()} @eitsupi. Built with Quarto and Docusaurus.`,
99 | },
100 | prism: {
101 | additionalLanguages: ["bash", "python", "sql", "r", "elm"],
102 | },
103 | }),
104 | themes: [
105 | [
106 | require.resolve("@easyops-cn/docusaurus-search-local"),
107 | {
108 | hashed: true,
109 | docsRouteBasePath: "/",
110 | },
111 | ],
112 | ],
113 | };
114 |
115 | module.exports = config;
116 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "version": "0.0.0",
4 | "private": true,
5 | "scripts": {
6 | "docusaurus": "docusaurus",
7 | "start": "docusaurus start",
8 | "build": "docusaurus build",
9 | "swizzle": "docusaurus swizzle",
10 | "deploy": "docusaurus deploy",
11 | "clear": "docusaurus clear",
12 | "serve": "docusaurus serve",
13 | "write-translations": "docusaurus write-translations",
14 | "write-heading-ids": "docusaurus write-heading-ids"
15 | },
16 | "dependencies": {
17 | "@docusaurus/core": "^3.5.2",
18 | "@docusaurus/preset-classic": "^3.5.2",
19 | "@easyops-cn/docusaurus-search-local": "^0.44.5",
20 | "@mdx-js/react": "^3.0.1",
21 | "clsx": "^2.1.1",
22 | "prism-react-renderer": "^2.3.1",
23 | "react": "^18.3.1",
24 | "react-dom": "^18.3.1"
25 | },
26 | "devDependencies": {
27 | "@docusaurus/module-type-aliases": "^3.5.2",
28 | "@docusaurus/types": "^3.5.2"
29 | },
30 | "browserslist": {
31 | "production": [
32 | ">0.5%",
33 | "not dead",
34 | "not op_mini all"
35 | ],
36 | "development": [
37 | "last 1 chrome version",
38 | "last 1 firefox version",
39 | "last 1 safari version"
40 | ]
41 | },
42 | "engines": {
43 | "node": ">=18.0"
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/renv.lock:
--------------------------------------------------------------------------------
1 | {
2 | "R": {
3 | "Version": "4.4.1",
4 | "Repositories": [
5 | {
6 | "Name": "CRAN",
7 | "URL": "https://packagemanager.posit.co/cran/latest"
8 | }
9 | ]
10 | },
11 | "Packages": {
12 | "DBI": {
13 | "Package": "DBI",
14 | "Version": "1.2.3",
15 | "Source": "Repository",
16 | "Repository": "RSPM",
17 | "Requirements": [
18 | "R",
19 | "methods"
20 | ],
21 | "Hash": "065ae649b05f1ff66bb0c793107508f5"
22 | },
23 | "MASS": {
24 | "Package": "MASS",
25 | "Version": "7.3-61",
26 | "Source": "Repository",
27 | "Repository": "RSPM",
28 | "Requirements": [
29 | "R",
30 | "grDevices",
31 | "graphics",
32 | "methods",
33 | "stats",
34 | "utils"
35 | ],
36 | "Hash": "0cafd6f0500e5deba33be22c46bf6055"
37 | },
38 | "Matrix": {
39 | "Package": "Matrix",
40 | "Version": "1.7-0",
41 | "Source": "Repository",
42 | "Repository": "RSPM",
43 | "Requirements": [
44 | "R",
45 | "grDevices",
46 | "graphics",
47 | "grid",
48 | "lattice",
49 | "methods",
50 | "stats",
51 | "utils"
52 | ],
53 | "Hash": "1920b2f11133b12350024297d8a4ff4a"
54 | },
55 | "R6": {
56 | "Package": "R6",
57 | "Version": "2.5.1",
58 | "Source": "Repository",
59 | "Repository": "RSPM",
60 | "Requirements": [
61 | "R"
62 | ],
63 | "Hash": "470851b6d5d0ac559e9d01bb352b4021"
64 | },
65 | "RColorBrewer": {
66 | "Package": "RColorBrewer",
67 | "Version": "1.1-3",
68 | "Source": "Repository",
69 | "Repository": "RSPM",
70 | "Requirements": [
71 | "R"
72 | ],
73 | "Hash": "45f0398006e83a5b10b72a90663d8d8c"
74 | },
75 | "Rcpp": {
76 | "Package": "Rcpp",
77 | "Version": "1.0.13",
78 | "Source": "Repository",
79 | "Repository": "CRAN",
80 | "Requirements": [
81 | "methods",
82 | "utils"
83 | ],
84 | "Hash": "f27411eb6d9c3dada5edd444b8416675"
85 | },
86 | "RcppTOML": {
87 | "Package": "RcppTOML",
88 | "Version": "0.2.2",
89 | "Source": "Repository",
90 | "Repository": "RSPM",
91 | "Requirements": [
92 | "R",
93 | "Rcpp"
94 | ],
95 | "Hash": "c232938949fcd8126034419cc529333a"
96 | },
97 | "askpass": {
98 | "Package": "askpass",
99 | "Version": "1.2.1",
100 | "Source": "Repository",
101 | "Repository": "CRAN",
102 | "Requirements": [
103 | "sys"
104 | ],
105 | "Hash": "c39f4155b3ceb1a9a2799d700fbd4b6a"
106 | },
107 | "base64enc": {
108 | "Package": "base64enc",
109 | "Version": "0.1-3",
110 | "Source": "Repository",
111 | "Repository": "RSPM",
112 | "Requirements": [
113 | "R"
114 | ],
115 | "Hash": "543776ae6848fde2f48ff3816d0628bc"
116 | },
117 | "bit": {
118 | "Package": "bit",
119 | "Version": "4.5.0",
120 | "Source": "Repository",
121 | "Repository": "CRAN",
122 | "Requirements": [
123 | "R"
124 | ],
125 | "Hash": "5dc7b2677d65d0e874fc4aaf0e879987"
126 | },
127 | "bit64": {
128 | "Package": "bit64",
129 | "Version": "4.5.2",
130 | "Source": "Repository",
131 | "Repository": "CRAN",
132 | "Requirements": [
133 | "R",
134 | "bit",
135 | "methods",
136 | "stats",
137 | "utils"
138 | ],
139 | "Hash": "e84984bf5f12a18628d9a02322128dfd"
140 | },
141 | "bslib": {
142 | "Package": "bslib",
143 | "Version": "0.8.0",
144 | "Source": "Repository",
145 | "Repository": "CRAN",
146 | "Requirements": [
147 | "R",
148 | "base64enc",
149 | "cachem",
150 | "fastmap",
151 | "grDevices",
152 | "htmltools",
153 | "jquerylib",
154 | "jsonlite",
155 | "lifecycle",
156 | "memoise",
157 | "mime",
158 | "rlang",
159 | "sass"
160 | ],
161 | "Hash": "b299c6741ca9746fb227debcb0f9fb6c"
162 | },
163 | "cachem": {
164 | "Package": "cachem",
165 | "Version": "1.1.0",
166 | "Source": "Repository",
167 | "Repository": "RSPM",
168 | "Requirements": [
169 | "fastmap",
170 | "rlang"
171 | ],
172 | "Hash": "cd9a672193789068eb5a2aad65a0dedf"
173 | },
174 | "cli": {
175 | "Package": "cli",
176 | "Version": "3.6.3",
177 | "Source": "Repository",
178 | "Repository": "CRAN",
179 | "Requirements": [
180 | "R",
181 | "utils"
182 | ],
183 | "Hash": "b21916dd77a27642b447374a5d30ecf3"
184 | },
185 | "clipr": {
186 | "Package": "clipr",
187 | "Version": "0.8.0",
188 | "Source": "Repository",
189 | "Repository": "RSPM",
190 | "Requirements": [
191 | "utils"
192 | ],
193 | "Hash": "3f038e5ac7f41d4ac41ce658c85e3042"
194 | },
195 | "colorspace": {
196 | "Package": "colorspace",
197 | "Version": "2.1-1",
198 | "Source": "Repository",
199 | "Repository": "CRAN",
200 | "Requirements": [
201 | "R",
202 | "grDevices",
203 | "graphics",
204 | "methods",
205 | "stats"
206 | ],
207 | "Hash": "d954cb1c57e8d8b756165d7ba18aa55a"
208 | },
209 | "cpp11": {
210 | "Package": "cpp11",
211 | "Version": "0.5.0",
212 | "Source": "Repository",
213 | "Repository": "CRAN",
214 | "Requirements": [
215 | "R"
216 | ],
217 | "Hash": "91570bba75d0c9d3f1040c835cee8fba"
218 | },
219 | "crayon": {
220 | "Package": "crayon",
221 | "Version": "1.5.3",
222 | "Source": "Repository",
223 | "Repository": "RSPM",
224 | "Requirements": [
225 | "grDevices",
226 | "methods",
227 | "utils"
228 | ],
229 | "Hash": "859d96e65ef198fd43e82b9628d593ef"
230 | },
231 | "curl": {
232 | "Package": "curl",
233 | "Version": "5.2.3",
234 | "Source": "Repository",
235 | "Repository": "CRAN",
236 | "Requirements": [
237 | "R"
238 | ],
239 | "Hash": "d91263322a58af798f6cf3b13fd56dde"
240 | },
241 | "digest": {
242 | "Package": "digest",
243 | "Version": "0.6.37",
244 | "Source": "Repository",
245 | "Repository": "CRAN",
246 | "Requirements": [
247 | "R",
248 | "utils"
249 | ],
250 | "Hash": "33698c4b3127fc9f506654607fb73676"
251 | },
252 | "dplyr": {
253 | "Package": "dplyr",
254 | "Version": "1.1.4",
255 | "Source": "Repository",
256 | "Repository": "RSPM",
257 | "Requirements": [
258 | "R",
259 | "R6",
260 | "cli",
261 | "generics",
262 | "glue",
263 | "lifecycle",
264 | "magrittr",
265 | "methods",
266 | "pillar",
267 | "rlang",
268 | "tibble",
269 | "tidyselect",
270 | "utils",
271 | "vctrs"
272 | ],
273 | "Hash": "fedd9d00c2944ff00a0e2696ccf048ec"
274 | },
275 | "duckdb": {
276 | "Package": "duckdb",
277 | "Version": "1.1.0",
278 | "Source": "Repository",
279 | "Repository": "CRAN",
280 | "Requirements": [
281 | "DBI",
282 | "R",
283 | "methods",
284 | "utils"
285 | ],
286 | "Hash": "ac14e3bdcaab23293129b451fee02910"
287 | },
288 | "evaluate": {
289 | "Package": "evaluate",
290 | "Version": "1.0.0",
291 | "Source": "Repository",
292 | "Repository": "CRAN",
293 | "Requirements": [
294 | "R"
295 | ],
296 | "Hash": "6b567375113ceb7d9f800de4dd42218e"
297 | },
298 | "fansi": {
299 | "Package": "fansi",
300 | "Version": "1.0.6",
301 | "Source": "Repository",
302 | "Repository": "RSPM",
303 | "Requirements": [
304 | "R",
305 | "grDevices",
306 | "utils"
307 | ],
308 | "Hash": "962174cf2aeb5b9eea581522286a911f"
309 | },
310 | "farver": {
311 | "Package": "farver",
312 | "Version": "2.1.2",
313 | "Source": "Repository",
314 | "Repository": "RSPM",
315 | "Hash": "680887028577f3fa2a81e410ed0d6e42"
316 | },
317 | "fastmap": {
318 | "Package": "fastmap",
319 | "Version": "1.2.0",
320 | "Source": "Repository",
321 | "Repository": "RSPM",
322 | "Hash": "aa5e1cd11c2d15497494c5292d7ffcc8"
323 | },
324 | "fontawesome": {
325 | "Package": "fontawesome",
326 | "Version": "0.5.2",
327 | "Source": "Repository",
328 | "Repository": "RSPM",
329 | "Requirements": [
330 | "R",
331 | "htmltools",
332 | "rlang"
333 | ],
334 | "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d"
335 | },
336 | "fs": {
337 | "Package": "fs",
338 | "Version": "1.6.4",
339 | "Source": "Repository",
340 | "Repository": "RSPM",
341 | "Requirements": [
342 | "R",
343 | "methods"
344 | ],
345 | "Hash": "15aeb8c27f5ea5161f9f6a641fafd93a"
346 | },
347 | "generics": {
348 | "Package": "generics",
349 | "Version": "0.1.3",
350 | "Source": "Repository",
351 | "Repository": "RSPM",
352 | "Requirements": [
353 | "R",
354 | "methods"
355 | ],
356 | "Hash": "15e9634c0fcd294799e9b2e929ed1b86"
357 | },
358 | "ggplot2": {
359 | "Package": "ggplot2",
360 | "Version": "3.5.1",
361 | "Source": "Repository",
362 | "Repository": "RSPM",
363 | "Requirements": [
364 | "MASS",
365 | "R",
366 | "cli",
367 | "glue",
368 | "grDevices",
369 | "grid",
370 | "gtable",
371 | "isoband",
372 | "lifecycle",
373 | "mgcv",
374 | "rlang",
375 | "scales",
376 | "stats",
377 | "tibble",
378 | "vctrs",
379 | "withr"
380 | ],
381 | "Hash": "44c6a2f8202d5b7e878ea274b1092426"
382 | },
383 | "glue": {
384 | "Package": "glue",
385 | "Version": "1.8.0",
386 | "Source": "Repository",
387 | "Repository": "CRAN",
388 | "Requirements": [
389 | "R",
390 | "methods"
391 | ],
392 | "Hash": "5899f1eaa825580172bb56c08266f37c"
393 | },
394 | "gtable": {
395 | "Package": "gtable",
396 | "Version": "0.3.5",
397 | "Source": "Repository",
398 | "Repository": "RSPM",
399 | "Requirements": [
400 | "R",
401 | "cli",
402 | "glue",
403 | "grid",
404 | "lifecycle",
405 | "rlang"
406 | ],
407 | "Hash": "e18861963cbc65a27736e02b3cd3c4a0"
408 | },
409 | "here": {
410 | "Package": "here",
411 | "Version": "1.0.1",
412 | "Source": "Repository",
413 | "Repository": "RSPM",
414 | "Requirements": [
415 | "rprojroot"
416 | ],
417 | "Hash": "24b224366f9c2e7534d2344d10d59211"
418 | },
419 | "highr": {
420 | "Package": "highr",
421 | "Version": "0.11",
422 | "Source": "Repository",
423 | "Repository": "RSPM",
424 | "Requirements": [
425 | "R",
426 | "xfun"
427 | ],
428 | "Hash": "d65ba49117ca223614f71b60d85b8ab7"
429 | },
430 | "hms": {
431 | "Package": "hms",
432 | "Version": "1.1.3",
433 | "Source": "Repository",
434 | "Repository": "RSPM",
435 | "Requirements": [
436 | "lifecycle",
437 | "methods",
438 | "pkgconfig",
439 | "rlang",
440 | "vctrs"
441 | ],
442 | "Hash": "b59377caa7ed00fa41808342002138f9"
443 | },
444 | "htmltools": {
445 | "Package": "htmltools",
446 | "Version": "0.5.8.1",
447 | "Source": "Repository",
448 | "Repository": "RSPM",
449 | "Requirements": [
450 | "R",
451 | "base64enc",
452 | "digest",
453 | "fastmap",
454 | "grDevices",
455 | "rlang",
456 | "utils"
457 | ],
458 | "Hash": "81d371a9cc60640e74e4ab6ac46dcedc"
459 | },
460 | "httr": {
461 | "Package": "httr",
462 | "Version": "1.4.7",
463 | "Source": "Repository",
464 | "Repository": "RSPM",
465 | "Requirements": [
466 | "R",
467 | "R6",
468 | "curl",
469 | "jsonlite",
470 | "mime",
471 | "openssl"
472 | ],
473 | "Hash": "ac107251d9d9fd72f0ca8049988f1d7f"
474 | },
475 | "isoband": {
476 | "Package": "isoband",
477 | "Version": "0.2.7",
478 | "Source": "Repository",
479 | "Repository": "RSPM",
480 | "Requirements": [
481 | "grid",
482 | "utils"
483 | ],
484 | "Hash": "0080607b4a1a7b28979aecef976d8bc2"
485 | },
486 | "janitor": {
487 | "Package": "janitor",
488 | "Version": "2.2.0",
489 | "Source": "Repository",
490 | "Repository": "RSPM",
491 | "Requirements": [
492 | "R",
493 | "dplyr",
494 | "hms",
495 | "lifecycle",
496 | "lubridate",
497 | "magrittr",
498 | "purrr",
499 | "rlang",
500 | "snakecase",
501 | "stringi",
502 | "stringr",
503 | "tidyr",
504 | "tidyselect"
505 | ],
506 | "Hash": "5baae149f1082f466df9d1442ba7aa65"
507 | },
508 | "jquerylib": {
509 | "Package": "jquerylib",
510 | "Version": "0.1.4",
511 | "Source": "Repository",
512 | "Repository": "RSPM",
513 | "Requirements": [
514 | "htmltools"
515 | ],
516 | "Hash": "5aab57a3bd297eee1c1d862735972182"
517 | },
518 | "jsonlite": {
519 | "Package": "jsonlite",
520 | "Version": "1.8.9",
521 | "Source": "Repository",
522 | "Repository": "CRAN",
523 | "Requirements": [
524 | "methods"
525 | ],
526 | "Hash": "4e993b65c2c3ffbffce7bb3e2c6f832b"
527 | },
528 | "knitr": {
529 | "Package": "knitr",
530 | "Version": "1.48",
531 | "Source": "Repository",
532 | "Repository": "CRAN",
533 | "Requirements": [
534 | "R",
535 | "evaluate",
536 | "highr",
537 | "methods",
538 | "tools",
539 | "xfun",
540 | "yaml"
541 | ],
542 | "Hash": "acf380f300c721da9fde7df115a5f86f"
543 | },
544 | "labeling": {
545 | "Package": "labeling",
546 | "Version": "0.4.3",
547 | "Source": "Repository",
548 | "Repository": "RSPM",
549 | "Requirements": [
550 | "graphics",
551 | "stats"
552 | ],
553 | "Hash": "b64ec208ac5bc1852b285f665d6368b3"
554 | },
555 | "lattice": {
556 | "Package": "lattice",
557 | "Version": "0.22-6",
558 | "Source": "Repository",
559 | "Repository": "RSPM",
560 | "Requirements": [
561 | "R",
562 | "grDevices",
563 | "graphics",
564 | "grid",
565 | "stats",
566 | "utils"
567 | ],
568 | "Hash": "cc5ac1ba4c238c7ca9fa6a87ca11a7e2"
569 | },
570 | "lifecycle": {
571 | "Package": "lifecycle",
572 | "Version": "1.0.4",
573 | "Source": "Repository",
574 | "Repository": "RSPM",
575 | "Requirements": [
576 | "R",
577 | "cli",
578 | "glue",
579 | "rlang"
580 | ],
581 | "Hash": "b8552d117e1b808b09a832f589b79035"
582 | },
583 | "lubridate": {
584 | "Package": "lubridate",
585 | "Version": "1.9.3",
586 | "Source": "Repository",
587 | "Repository": "RSPM",
588 | "Requirements": [
589 | "R",
590 | "generics",
591 | "methods",
592 | "timechange"
593 | ],
594 | "Hash": "680ad542fbcf801442c83a6ac5a2126c"
595 | },
596 | "magrittr": {
597 | "Package": "magrittr",
598 | "Version": "2.0.3",
599 | "Source": "Repository",
600 | "Repository": "RSPM",
601 | "Requirements": [
602 | "R"
603 | ],
604 | "Hash": "7ce2733a9826b3aeb1775d56fd305472"
605 | },
606 | "memoise": {
607 | "Package": "memoise",
608 | "Version": "2.0.1",
609 | "Source": "Repository",
610 | "Repository": "RSPM",
611 | "Requirements": [
612 | "cachem",
613 | "rlang"
614 | ],
615 | "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c"
616 | },
617 | "mgcv": {
618 | "Package": "mgcv",
619 | "Version": "1.9-1",
620 | "Source": "Repository",
621 | "Repository": "RSPM",
622 | "Requirements": [
623 | "Matrix",
624 | "R",
625 | "graphics",
626 | "methods",
627 | "nlme",
628 | "splines",
629 | "stats",
630 | "utils"
631 | ],
632 | "Hash": "110ee9d83b496279960e162ac97764ce"
633 | },
634 | "mime": {
635 | "Package": "mime",
636 | "Version": "0.12",
637 | "Source": "Repository",
638 | "Repository": "RSPM",
639 | "Requirements": [
640 | "tools"
641 | ],
642 | "Hash": "18e9c28c1d3ca1560ce30658b22ce104"
643 | },
644 | "munsell": {
645 | "Package": "munsell",
646 | "Version": "0.5.1",
647 | "Source": "Repository",
648 | "Repository": "RSPM",
649 | "Requirements": [
650 | "colorspace",
651 | "methods"
652 | ],
653 | "Hash": "4fd8900853b746af55b81fda99da7695"
654 | },
655 | "nlme": {
656 | "Package": "nlme",
657 | "Version": "3.1-166",
658 | "Source": "Repository",
659 | "Repository": "CRAN",
660 | "Requirements": [
661 | "R",
662 | "graphics",
663 | "lattice",
664 | "stats",
665 | "utils"
666 | ],
667 | "Hash": "ccbb8846be320b627e6aa2b4616a2ded"
668 | },
669 | "openssl": {
670 | "Package": "openssl",
671 | "Version": "2.2.2",
672 | "Source": "Repository",
673 | "Repository": "CRAN",
674 | "Requirements": [
675 | "askpass"
676 | ],
677 | "Hash": "d413e0fef796c9401a4419485f709ca1"
678 | },
679 | "pillar": {
680 | "Package": "pillar",
681 | "Version": "1.9.0",
682 | "Source": "Repository",
683 | "Repository": "RSPM",
684 | "Requirements": [
685 | "cli",
686 | "fansi",
687 | "glue",
688 | "lifecycle",
689 | "rlang",
690 | "utf8",
691 | "utils",
692 | "vctrs"
693 | ],
694 | "Hash": "15da5a8412f317beeee6175fbc76f4bb"
695 | },
696 | "pkgconfig": {
697 | "Package": "pkgconfig",
698 | "Version": "2.0.3",
699 | "Source": "Repository",
700 | "Repository": "RSPM",
701 | "Requirements": [
702 | "utils"
703 | ],
704 | "Hash": "01f28d4278f15c76cddbea05899c5d6f"
705 | },
706 | "png": {
707 | "Package": "png",
708 | "Version": "0.1-8",
709 | "Source": "Repository",
710 | "Repository": "RSPM",
711 | "Requirements": [
712 | "R"
713 | ],
714 | "Hash": "bd54ba8a0a5faded999a7aab6e46b374"
715 | },
716 | "prettyunits": {
717 | "Package": "prettyunits",
718 | "Version": "1.2.0",
719 | "Source": "Repository",
720 | "Repository": "RSPM",
721 | "Requirements": [
722 | "R"
723 | ],
724 | "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7"
725 | },
726 | "progress": {
727 | "Package": "progress",
728 | "Version": "1.2.3",
729 | "Source": "Repository",
730 | "Repository": "RSPM",
731 | "Requirements": [
732 | "R",
733 | "R6",
734 | "crayon",
735 | "hms",
736 | "prettyunits"
737 | ],
738 | "Hash": "f4625e061cb2865f111b47ff163a5ca6"
739 | },
740 | "prqlr": {
741 | "Package": "prqlr",
742 | "Version": "0.9.0",
743 | "Source": "Repository",
744 | "Repository": "CRAN",
745 | "Requirements": [
746 | "R"
747 | ],
748 | "Hash": "d45578a066c904d8bc36d69197161d0b"
749 | },
750 | "purrr": {
751 | "Package": "purrr",
752 | "Version": "1.0.2",
753 | "Source": "Repository",
754 | "Repository": "RSPM",
755 | "Requirements": [
756 | "R",
757 | "cli",
758 | "lifecycle",
759 | "magrittr",
760 | "rlang",
761 | "vctrs"
762 | ],
763 | "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc"
764 | },
765 | "rappdirs": {
766 | "Package": "rappdirs",
767 | "Version": "0.3.3",
768 | "Source": "Repository",
769 | "Repository": "RSPM",
770 | "Requirements": [
771 | "R"
772 | ],
773 | "Hash": "5e3c5dc0b071b21fa128676560dbe94d"
774 | },
775 | "readr": {
776 | "Package": "readr",
777 | "Version": "2.1.5",
778 | "Source": "Repository",
779 | "Repository": "RSPM",
780 | "Requirements": [
781 | "R",
782 | "R6",
783 | "cli",
784 | "clipr",
785 | "cpp11",
786 | "crayon",
787 | "hms",
788 | "lifecycle",
789 | "methods",
790 | "rlang",
791 | "tibble",
792 | "tzdb",
793 | "utils",
794 | "vroom"
795 | ],
796 | "Hash": "9de96463d2117f6ac49980577939dfb3"
797 | },
798 | "renv": {
799 | "Package": "renv",
800 | "Version": "1.0.10",
801 | "Source": "Repository",
802 | "Repository": "CRAN",
803 | "Requirements": [
804 | "utils"
805 | ],
806 | "Hash": "d0387d5687ec933dd7587efd4cfa2d85"
807 | },
808 | "reticulate": {
809 | "Package": "reticulate",
810 | "Version": "1.39.0",
811 | "Source": "Repository",
812 | "Repository": "CRAN",
813 | "Requirements": [
814 | "Matrix",
815 | "R",
816 | "Rcpp",
817 | "RcppTOML",
818 | "graphics",
819 | "here",
820 | "jsonlite",
821 | "methods",
822 | "png",
823 | "rappdirs",
824 | "rlang",
825 | "utils",
826 | "withr"
827 | ],
828 | "Hash": "e1a5d04397edc1580c5e0ed1dbdccf76"
829 | },
830 | "rlang": {
831 | "Package": "rlang",
832 | "Version": "1.1.4",
833 | "Source": "Repository",
834 | "Repository": "RSPM",
835 | "Requirements": [
836 | "R",
837 | "utils"
838 | ],
839 | "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1"
840 | },
841 | "rmarkdown": {
842 | "Package": "rmarkdown",
843 | "Version": "2.28",
844 | "Source": "Repository",
845 | "Repository": "CRAN",
846 | "Requirements": [
847 | "R",
848 | "bslib",
849 | "evaluate",
850 | "fontawesome",
851 | "htmltools",
852 | "jquerylib",
853 | "jsonlite",
854 | "knitr",
855 | "methods",
856 | "tinytex",
857 | "tools",
858 | "utils",
859 | "xfun",
860 | "yaml"
861 | ],
862 | "Hash": "062470668513dcda416927085ee9bdc7"
863 | },
864 | "rprojroot": {
865 | "Package": "rprojroot",
866 | "Version": "2.0.4",
867 | "Source": "Repository",
868 | "Repository": "RSPM",
869 | "Requirements": [
870 | "R"
871 | ],
872 | "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144"
873 | },
874 | "rvest": {
875 | "Package": "rvest",
876 | "Version": "1.0.4",
877 | "Source": "Repository",
878 | "Repository": "RSPM",
879 | "Requirements": [
880 | "R",
881 | "cli",
882 | "glue",
883 | "httr",
884 | "lifecycle",
885 | "magrittr",
886 | "rlang",
887 | "selectr",
888 | "tibble",
889 | "xml2"
890 | ],
891 | "Hash": "0bcf0c6f274e90ea314b812a6d19a519"
892 | },
893 | "sass": {
894 | "Package": "sass",
895 | "Version": "0.4.9",
896 | "Source": "Repository",
897 | "Repository": "RSPM",
898 | "Requirements": [
899 | "R6",
900 | "fs",
901 | "htmltools",
902 | "rappdirs",
903 | "rlang"
904 | ],
905 | "Hash": "d53dbfddf695303ea4ad66f86e99b95d"
906 | },
907 | "scales": {
908 | "Package": "scales",
909 | "Version": "1.3.0",
910 | "Source": "Repository",
911 | "Repository": "RSPM",
912 | "Requirements": [
913 | "R",
914 | "R6",
915 | "RColorBrewer",
916 | "cli",
917 | "farver",
918 | "glue",
919 | "labeling",
920 | "lifecycle",
921 | "munsell",
922 | "rlang",
923 | "viridisLite"
924 | ],
925 | "Hash": "c19df082ba346b0ffa6f833e92de34d1"
926 | },
927 | "selectr": {
928 | "Package": "selectr",
929 | "Version": "0.4-2",
930 | "Source": "Repository",
931 | "Repository": "RSPM",
932 | "Requirements": [
933 | "R",
934 | "R6",
935 | "methods",
936 | "stringr"
937 | ],
938 | "Hash": "3838071b66e0c566d55cc26bd6e27bf4"
939 | },
940 | "slider": {
941 | "Package": "slider",
942 | "Version": "0.3.1",
943 | "Source": "Repository",
944 | "Repository": "RSPM",
945 | "Requirements": [
946 | "R",
947 | "cli",
948 | "rlang",
949 | "vctrs",
950 | "warp"
951 | ],
952 | "Hash": "a584625e2b9e4fad4be135c8ea5c99aa"
953 | },
954 | "snakecase": {
955 | "Package": "snakecase",
956 | "Version": "0.11.1",
957 | "Source": "Repository",
958 | "Repository": "RSPM",
959 | "Requirements": [
960 | "R",
961 | "stringi",
962 | "stringr"
963 | ],
964 | "Hash": "58767e44739b76965332e8a4fe3f91f1"
965 | },
966 | "stringi": {
967 | "Package": "stringi",
968 | "Version": "1.8.4",
969 | "Source": "Repository",
970 | "Repository": "RSPM",
971 | "Requirements": [
972 | "R",
973 | "stats",
974 | "tools",
975 | "utils"
976 | ],
977 | "Hash": "39e1144fd75428983dc3f63aa53dfa91"
978 | },
979 | "stringr": {
980 | "Package": "stringr",
981 | "Version": "1.5.1",
982 | "Source": "Repository",
983 | "Repository": "RSPM",
984 | "Requirements": [
985 | "R",
986 | "cli",
987 | "glue",
988 | "lifecycle",
989 | "magrittr",
990 | "rlang",
991 | "stringi",
992 | "vctrs"
993 | ],
994 | "Hash": "960e2ae9e09656611e0b8214ad543207"
995 | },
996 | "sys": {
997 | "Package": "sys",
998 | "Version": "3.4.3",
999 | "Source": "Repository",
1000 | "Repository": "CRAN",
1001 | "Hash": "de342ebfebdbf40477d0758d05426646"
1002 | },
1003 | "tibble": {
1004 | "Package": "tibble",
1005 | "Version": "3.2.1",
1006 | "Source": "Repository",
1007 | "Repository": "RSPM",
1008 | "Requirements": [
1009 | "R",
1010 | "fansi",
1011 | "lifecycle",
1012 | "magrittr",
1013 | "methods",
1014 | "pillar",
1015 | "pkgconfig",
1016 | "rlang",
1017 | "utils",
1018 | "vctrs"
1019 | ],
1020 | "Hash": "a84e2cc86d07289b3b6f5069df7a004c"
1021 | },
1022 | "tidyr": {
1023 | "Package": "tidyr",
1024 | "Version": "1.3.1",
1025 | "Source": "Repository",
1026 | "Repository": "RSPM",
1027 | "Requirements": [
1028 | "R",
1029 | "cli",
1030 | "cpp11",
1031 | "dplyr",
1032 | "glue",
1033 | "lifecycle",
1034 | "magrittr",
1035 | "purrr",
1036 | "rlang",
1037 | "stringr",
1038 | "tibble",
1039 | "tidyselect",
1040 | "utils",
1041 | "vctrs"
1042 | ],
1043 | "Hash": "915fb7ce036c22a6a33b5a8adb712eb1"
1044 | },
1045 | "tidyselect": {
1046 | "Package": "tidyselect",
1047 | "Version": "1.2.1",
1048 | "Source": "Repository",
1049 | "Repository": "RSPM",
1050 | "Requirements": [
1051 | "R",
1052 | "cli",
1053 | "glue",
1054 | "lifecycle",
1055 | "rlang",
1056 | "vctrs",
1057 | "withr"
1058 | ],
1059 | "Hash": "829f27b9c4919c16b593794a6344d6c0"
1060 | },
1061 | "timechange": {
1062 | "Package": "timechange",
1063 | "Version": "0.3.0",
1064 | "Source": "Repository",
1065 | "Repository": "RSPM",
1066 | "Requirements": [
1067 | "R",
1068 | "cpp11"
1069 | ],
1070 | "Hash": "c5f3c201b931cd6474d17d8700ccb1c8"
1071 | },
1072 | "tinytex": {
1073 | "Package": "tinytex",
1074 | "Version": "0.53",
1075 | "Source": "Repository",
1076 | "Repository": "CRAN",
1077 | "Requirements": [
1078 | "xfun"
1079 | ],
1080 | "Hash": "9db859e8aabbb474293dde3097839420"
1081 | },
1082 | "tzdb": {
1083 | "Package": "tzdb",
1084 | "Version": "0.4.0",
1085 | "Source": "Repository",
1086 | "Repository": "RSPM",
1087 | "Requirements": [
1088 | "R",
1089 | "cpp11"
1090 | ],
1091 | "Hash": "f561504ec2897f4d46f0c7657e488ae1"
1092 | },
1093 | "utf8": {
1094 | "Package": "utf8",
1095 | "Version": "1.2.4",
1096 | "Source": "Repository",
1097 | "Repository": "RSPM",
1098 | "Requirements": [
1099 | "R"
1100 | ],
1101 | "Hash": "62b65c52671e6665f803ff02954446e9"
1102 | },
1103 | "vctrs": {
1104 | "Package": "vctrs",
1105 | "Version": "0.6.5",
1106 | "Source": "Repository",
1107 | "Repository": "RSPM",
1108 | "Requirements": [
1109 | "R",
1110 | "cli",
1111 | "glue",
1112 | "lifecycle",
1113 | "rlang"
1114 | ],
1115 | "Hash": "c03fa420630029418f7e6da3667aac4a"
1116 | },
1117 | "viridisLite": {
1118 | "Package": "viridisLite",
1119 | "Version": "0.4.2",
1120 | "Source": "Repository",
1121 | "Repository": "RSPM",
1122 | "Requirements": [
1123 | "R"
1124 | ],
1125 | "Hash": "c826c7c4241b6fc89ff55aaea3fa7491"
1126 | },
1127 | "vroom": {
1128 | "Package": "vroom",
1129 | "Version": "1.6.5",
1130 | "Source": "Repository",
1131 | "Repository": "RSPM",
1132 | "Requirements": [
1133 | "R",
1134 | "bit64",
1135 | "cli",
1136 | "cpp11",
1137 | "crayon",
1138 | "glue",
1139 | "hms",
1140 | "lifecycle",
1141 | "methods",
1142 | "progress",
1143 | "rlang",
1144 | "stats",
1145 | "tibble",
1146 | "tidyselect",
1147 | "tzdb",
1148 | "vctrs",
1149 | "withr"
1150 | ],
1151 | "Hash": "390f9315bc0025be03012054103d227c"
1152 | },
1153 | "warp": {
1154 | "Package": "warp",
1155 | "Version": "0.2.1",
1156 | "Source": "Repository",
1157 | "Repository": "RSPM",
1158 | "Requirements": [
1159 | "R"
1160 | ],
1161 | "Hash": "fea474d578b1cbcb696ae6ac8bdcc439"
1162 | },
1163 | "withr": {
1164 | "Package": "withr",
1165 | "Version": "3.0.1",
1166 | "Source": "Repository",
1167 | "Repository": "CRAN",
1168 | "Requirements": [
1169 | "R",
1170 | "grDevices",
1171 | "graphics"
1172 | ],
1173 | "Hash": "07909200e8bbe90426fbfeb73e1e27aa"
1174 | },
1175 | "xfun": {
1176 | "Package": "xfun",
1177 | "Version": "0.48",
1178 | "Source": "Repository",
1179 | "Repository": "CRAN",
1180 | "Requirements": [
1181 | "R",
1182 | "grDevices",
1183 | "stats",
1184 | "tools"
1185 | ],
1186 | "Hash": "89e455b87c84e227eb7f60a1b4e5fe1f"
1187 | },
1188 | "xml2": {
1189 | "Package": "xml2",
1190 | "Version": "1.3.6",
1191 | "Source": "Repository",
1192 | "Repository": "RSPM",
1193 | "Requirements": [
1194 | "R",
1195 | "cli",
1196 | "methods",
1197 | "rlang"
1198 | ],
1199 | "Hash": "1d0336142f4cd25d8d23cd3ba7a8fb61"
1200 | },
1201 | "yaml": {
1202 | "Package": "yaml",
1203 | "Version": "2.3.10",
1204 | "Source": "Repository",
1205 | "Repository": "CRAN",
1206 | "Hash": "51dab85c6c98e50a18d7551e9d49f76c"
1207 | }
1208 | }
1209 | }
1210 |
--------------------------------------------------------------------------------
/renv/.gitignore:
--------------------------------------------------------------------------------
1 | library/
2 | local/
3 | cellar/
4 | lock/
5 | python/
6 | sandbox/
7 | staging/
8 |
--------------------------------------------------------------------------------
/renv/activate.R:
--------------------------------------------------------------------------------
1 |
2 | local({
3 |
4 | # the requested version of renv
5 | version <- "1.0.10"
6 | attr(version, "sha") <- NULL
7 |
8 | # the project directory
9 | project <- Sys.getenv("RENV_PROJECT")
10 | if (!nzchar(project))
11 | project <- getwd()
12 |
13 | # use start-up diagnostics if enabled
14 | diagnostics <- Sys.getenv("RENV_STARTUP_DIAGNOSTICS", unset = "FALSE")
15 | if (diagnostics) {
16 | start <- Sys.time()
17 | profile <- tempfile("renv-startup-", fileext = ".Rprof")
18 | utils::Rprof(profile)
19 | on.exit({
20 | utils::Rprof(NULL)
21 | elapsed <- signif(difftime(Sys.time(), start, units = "auto"), digits = 2L)
22 | writeLines(sprintf("- renv took %s to run the autoloader.", format(elapsed)))
23 | writeLines(sprintf("- Profile: %s", profile))
24 | print(utils::summaryRprof(profile))
25 | }, add = TRUE)
26 | }
27 |
28 | # figure out whether the autoloader is enabled
29 | enabled <- local({
30 |
31 | # first, check config option
32 | override <- getOption("renv.config.autoloader.enabled")
33 | if (!is.null(override))
34 | return(override)
35 |
36 | # if we're being run in a context where R_LIBS is already set,
37 | # don't load -- presumably we're being run as a sub-process and
38 | # the parent process has already set up library paths for us
39 | rcmd <- Sys.getenv("R_CMD", unset = NA)
40 | rlibs <- Sys.getenv("R_LIBS", unset = NA)
41 | if (!is.na(rlibs) && !is.na(rcmd))
42 | return(FALSE)
43 |
44 | # next, check environment variables
45 | # TODO: prefer using the configuration one in the future
46 | envvars <- c(
47 | "RENV_CONFIG_AUTOLOADER_ENABLED",
48 | "RENV_AUTOLOADER_ENABLED",
49 | "RENV_ACTIVATE_PROJECT"
50 | )
51 |
52 | for (envvar in envvars) {
53 | envval <- Sys.getenv(envvar, unset = NA)
54 | if (!is.na(envval))
55 | return(tolower(envval) %in% c("true", "t", "1"))
56 | }
57 |
58 | # enable by default
59 | TRUE
60 |
61 | })
62 |
63 | # bail if we're not enabled
64 | if (!enabled) {
65 |
66 | # if we're not enabled, we might still need to manually load
67 | # the user profile here
68 | profile <- Sys.getenv("R_PROFILE_USER", unset = "~/.Rprofile")
69 | if (file.exists(profile)) {
70 | cfg <- Sys.getenv("RENV_CONFIG_USER_PROFILE", unset = "TRUE")
71 | if (tolower(cfg) %in% c("true", "t", "1"))
72 | sys.source(profile, envir = globalenv())
73 | }
74 |
75 | return(FALSE)
76 |
77 | }
78 |
79 | # avoid recursion
80 | if (identical(getOption("renv.autoloader.running"), TRUE)) {
81 | warning("ignoring recursive attempt to run renv autoloader")
82 | return(invisible(TRUE))
83 | }
84 |
85 | # signal that we're loading renv during R startup
86 | options(renv.autoloader.running = TRUE)
87 | on.exit(options(renv.autoloader.running = NULL), add = TRUE)
88 |
89 | # signal that we've consented to use renv
90 | options(renv.consent = TRUE)
91 |
92 | # load the 'utils' package eagerly -- this ensures that renv shims, which
93 | # mask 'utils' packages, will come first on the search path
94 | library(utils, lib.loc = .Library)
95 |
96 | # unload renv if it's already been loaded
97 | if ("renv" %in% loadedNamespaces())
98 | unloadNamespace("renv")
99 |
100 | # load bootstrap tools
101 | ansify <- function(text) {
102 | if (renv_ansify_enabled())
103 | renv_ansify_enhanced(text)
104 | else
105 | renv_ansify_default(text)
106 | }
107 |
108 | renv_ansify_enabled <- function() {
109 |
110 | override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA)
111 | if (!is.na(override))
112 | return(as.logical(override))
113 |
114 | pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA)
115 | if (identical(pane, "build"))
116 | return(FALSE)
117 |
118 | testthat <- Sys.getenv("TESTTHAT", unset = "false")
119 | if (tolower(testthat) %in% "true")
120 | return(FALSE)
121 |
122 | iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false")
123 | if (tolower(iderun) %in% "false")
124 | return(FALSE)
125 |
126 | TRUE
127 |
128 | }
129 |
130 | renv_ansify_default <- function(text) {
131 | text
132 | }
133 |
134 | renv_ansify_enhanced <- function(text) {
135 |
136 | # R help links
137 | pattern <- "`\\?(renv::(?:[^`])+)`"
138 | replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`"
139 | text <- gsub(pattern, replacement, text, perl = TRUE)
140 |
141 | # runnable code
142 | pattern <- "`(renv::(?:[^`])+)`"
143 | replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`"
144 | text <- gsub(pattern, replacement, text, perl = TRUE)
145 |
146 | # return ansified text
147 | text
148 |
149 | }
150 |
151 | renv_ansify_init <- function() {
152 |
153 | envir <- renv_envir_self()
154 | if (renv_ansify_enabled())
155 | assign("ansify", renv_ansify_enhanced, envir = envir)
156 | else
157 | assign("ansify", renv_ansify_default, envir = envir)
158 |
159 | }
160 |
161 | `%||%` <- function(x, y) {
162 | if (is.null(x)) y else x
163 | }
164 |
165 | catf <- function(fmt, ..., appendLF = TRUE) {
166 |
167 | quiet <- getOption("renv.bootstrap.quiet", default = FALSE)
168 | if (quiet)
169 | return(invisible())
170 |
171 | msg <- sprintf(fmt, ...)
172 | cat(msg, file = stdout(), sep = if (appendLF) "\n" else "")
173 |
174 | invisible(msg)
175 |
176 | }
177 |
178 | header <- function(label,
179 | ...,
180 | prefix = "#",
181 | suffix = "-",
182 | n = min(getOption("width"), 78))
183 | {
184 | label <- sprintf(label, ...)
185 | n <- max(n - nchar(label) - nchar(prefix) - 2L, 8L)
186 | if (n <= 0)
187 | return(paste(prefix, label))
188 |
189 | tail <- paste(rep.int(suffix, n), collapse = "")
190 | paste0(prefix, " ", label, " ", tail)
191 |
192 | }
193 |
194 | heredoc <- function(text, leave = 0) {
195 |
196 | # remove leading, trailing whitespace
197 | trimmed <- gsub("^\\s*\\n|\\n\\s*$", "", text)
198 |
199 | # split into lines
200 | lines <- strsplit(trimmed, "\n", fixed = TRUE)[[1L]]
201 |
202 | # compute common indent
203 | indent <- regexpr("[^[:space:]]", lines)
204 | common <- min(setdiff(indent, -1L)) - leave
205 | text <- paste(substring(lines, common), collapse = "\n")
206 |
207 | # substitute in ANSI links for executable renv code
208 | ansify(text)
209 |
210 | }
211 |
212 | startswith <- function(string, prefix) {
213 | substring(string, 1, nchar(prefix)) == prefix
214 | }
215 |
216 | bootstrap <- function(version, library) {
217 |
218 | friendly <- renv_bootstrap_version_friendly(version)
219 | section <- header(sprintf("Bootstrapping renv %s", friendly))
220 | catf(section)
221 |
222 | # attempt to download renv
223 | catf("- Downloading renv ... ", appendLF = FALSE)
224 | withCallingHandlers(
225 | tarball <- renv_bootstrap_download(version),
226 | error = function(err) {
227 | catf("FAILED")
228 | stop("failed to download:\n", conditionMessage(err))
229 | }
230 | )
231 | catf("OK")
232 | on.exit(unlink(tarball), add = TRUE)
233 |
234 | # now attempt to install
235 | catf("- Installing renv ... ", appendLF = FALSE)
236 | withCallingHandlers(
237 | status <- renv_bootstrap_install(version, tarball, library),
238 | error = function(err) {
239 | catf("FAILED")
240 | stop("failed to install:\n", conditionMessage(err))
241 | }
242 | )
243 | catf("OK")
244 |
245 | # add empty line to break up bootstrapping from normal output
246 | catf("")
247 |
248 | return(invisible())
249 | }
250 |
251 | renv_bootstrap_tests_running <- function() {
252 | getOption("renv.tests.running", default = FALSE)
253 | }
254 |
255 | renv_bootstrap_repos <- function() {
256 |
257 | # get CRAN repository
258 | cran <- getOption("renv.repos.cran", "https://cloud.r-project.org")
259 |
260 | # check for repos override
261 | repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA)
262 | if (!is.na(repos)) {
263 |
264 | # check for RSPM; if set, use a fallback repository for renv
265 | rspm <- Sys.getenv("RSPM", unset = NA)
266 | if (identical(rspm, repos))
267 | repos <- c(RSPM = rspm, CRAN = cran)
268 |
269 | return(repos)
270 |
271 | }
272 |
273 | # check for lockfile repositories
274 | repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity)
275 | if (!inherits(repos, "error") && length(repos))
276 | return(repos)
277 |
278 | # retrieve current repos
279 | repos <- getOption("repos")
280 |
281 | # ensure @CRAN@ entries are resolved
282 | repos[repos == "@CRAN@"] <- cran
283 |
284 | # add in renv.bootstrap.repos if set
285 | default <- c(FALLBACK = "https://cloud.r-project.org")
286 | extra <- getOption("renv.bootstrap.repos", default = default)
287 | repos <- c(repos, extra)
288 |
289 | # remove duplicates that might've snuck in
290 | dupes <- duplicated(repos) | duplicated(names(repos))
291 | repos[!dupes]
292 |
293 | }
294 |
295 | renv_bootstrap_repos_lockfile <- function() {
296 |
297 | lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock")
298 | if (!file.exists(lockpath))
299 | return(NULL)
300 |
301 | lockfile <- tryCatch(renv_json_read(lockpath), error = identity)
302 | if (inherits(lockfile, "error")) {
303 | warning(lockfile)
304 | return(NULL)
305 | }
306 |
307 | repos <- lockfile$R$Repositories
308 | if (length(repos) == 0)
309 | return(NULL)
310 |
311 | keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1))
312 | vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1))
313 | names(vals) <- keys
314 |
315 | return(vals)
316 |
317 | }
318 |
319 | renv_bootstrap_download <- function(version) {
320 |
321 | sha <- attr(version, "sha", exact = TRUE)
322 |
323 | methods <- if (!is.null(sha)) {
324 |
325 | # attempting to bootstrap a development version of renv
326 | c(
327 | function() renv_bootstrap_download_tarball(sha),
328 | function() renv_bootstrap_download_github(sha)
329 | )
330 |
331 | } else {
332 |
333 | # attempting to bootstrap a release version of renv
334 | c(
335 | function() renv_bootstrap_download_tarball(version),
336 | function() renv_bootstrap_download_cran_latest(version),
337 | function() renv_bootstrap_download_cran_archive(version)
338 | )
339 |
340 | }
341 |
342 | for (method in methods) {
343 | path <- tryCatch(method(), error = identity)
344 | if (is.character(path) && file.exists(path))
345 | return(path)
346 | }
347 |
348 | stop("All download methods failed")
349 |
350 | }
351 |
352 | renv_bootstrap_download_impl <- function(url, destfile) {
353 |
354 | mode <- "wb"
355 |
356 | # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715
357 | fixup <-
358 | Sys.info()[["sysname"]] == "Windows" &&
359 | substring(url, 1L, 5L) == "file:"
360 |
361 | if (fixup)
362 | mode <- "w+b"
363 |
364 | args <- list(
365 | url = url,
366 | destfile = destfile,
367 | mode = mode,
368 | quiet = TRUE
369 | )
370 |
371 | if ("headers" %in% names(formals(utils::download.file)))
372 | {
373 | headers <- renv_bootstrap_download_custom_headers(url)
374 | if (length(headers) && is.character(headers))
375 | args$headers <- headers
376 | }
377 |
378 | do.call(utils::download.file, args)
379 |
380 | }
381 |
382 | renv_bootstrap_download_custom_headers <- function(url) {
383 |
384 | headers <- getOption("renv.download.headers")
385 | if (is.null(headers))
386 | return(character())
387 |
388 | if (!is.function(headers))
389 | stopf("'renv.download.headers' is not a function")
390 |
391 | headers <- headers(url)
392 | if (length(headers) == 0L)
393 | return(character())
394 |
395 | if (is.list(headers))
396 | headers <- unlist(headers, recursive = FALSE, use.names = TRUE)
397 |
398 | ok <-
399 | is.character(headers) &&
400 | is.character(names(headers)) &&
401 | all(nzchar(names(headers)))
402 |
403 | if (!ok)
404 | stop("invocation of 'renv.download.headers' did not return a named character vector")
405 |
406 | headers
407 |
408 | }
409 |
410 | renv_bootstrap_download_cran_latest <- function(version) {
411 |
412 | spec <- renv_bootstrap_download_cran_latest_find(version)
413 | type <- spec$type
414 | repos <- spec$repos
415 |
416 | baseurl <- utils::contrib.url(repos = repos, type = type)
417 | ext <- if (identical(type, "source"))
418 | ".tar.gz"
419 | else if (Sys.info()[["sysname"]] == "Windows")
420 | ".zip"
421 | else
422 | ".tgz"
423 | name <- sprintf("renv_%s%s", version, ext)
424 | url <- paste(baseurl, name, sep = "/")
425 |
426 | destfile <- file.path(tempdir(), name)
427 | status <- tryCatch(
428 | renv_bootstrap_download_impl(url, destfile),
429 | condition = identity
430 | )
431 |
432 | if (inherits(status, "condition"))
433 | return(FALSE)
434 |
435 | # report success and return
436 | destfile
437 |
438 | }
439 |
440 | renv_bootstrap_download_cran_latest_find <- function(version) {
441 |
442 | # check whether binaries are supported on this system
443 | binary <-
444 | getOption("renv.bootstrap.binary", default = TRUE) &&
445 | !identical(.Platform$pkgType, "source") &&
446 | !identical(getOption("pkgType"), "source") &&
447 | Sys.info()[["sysname"]] %in% c("Darwin", "Windows")
448 |
449 | types <- c(if (binary) "binary", "source")
450 |
451 | # iterate over types + repositories
452 | for (type in types) {
453 | for (repos in renv_bootstrap_repos()) {
454 |
455 | # build arguments for utils::available.packages() call
456 | args <- list(type = type, repos = repos)
457 |
458 | # add custom headers if available -- note that
459 | # utils::available.packages() will pass this to download.file()
460 | if ("headers" %in% names(formals(utils::download.file)))
461 | {
462 | headers <- renv_bootstrap_download_custom_headers(url)
463 | if (length(headers) && is.character(headers))
464 | args$headers <- headers
465 | }
466 |
467 | # retrieve package database
468 | db <- tryCatch(
469 | as.data.frame(
470 | do.call(utils::available.packages, args),
471 | stringsAsFactors = FALSE
472 | ),
473 | error = identity
474 | )
475 |
476 | if (inherits(db, "error"))
477 | next
478 |
479 | # check for compatible entry
480 | entry <- db[db$Package %in% "renv" & db$Version %in% version, ]
481 | if (nrow(entry) == 0)
482 | next
483 |
484 | # found it; return spec to caller
485 | spec <- list(entry = entry, type = type, repos = repos)
486 | return(spec)
487 |
488 | }
489 | }
490 |
491 | # if we got here, we failed to find renv
492 | fmt <- "renv %s is not available from your declared package repositories"
493 | stop(sprintf(fmt, version))
494 |
495 | }
496 |
497 | renv_bootstrap_download_cran_archive <- function(version) {
498 |
499 | name <- sprintf("renv_%s.tar.gz", version)
500 | repos <- renv_bootstrap_repos()
501 | urls <- file.path(repos, "src/contrib/Archive/renv", name)
502 | destfile <- file.path(tempdir(), name)
503 |
504 | for (url in urls) {
505 |
506 | status <- tryCatch(
507 | renv_bootstrap_download_impl(url, destfile),
508 | condition = identity
509 | )
510 |
511 | if (identical(status, 0L))
512 | return(destfile)
513 |
514 | }
515 |
516 | return(FALSE)
517 |
518 | }
519 |
520 | renv_bootstrap_download_tarball <- function(version) {
521 |
522 | # if the user has provided the path to a tarball via
523 | # an environment variable, then use it
524 | tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA)
525 | if (is.na(tarball))
526 | return()
527 |
528 | # allow directories
529 | if (dir.exists(tarball)) {
530 | name <- sprintf("renv_%s.tar.gz", version)
531 | tarball <- file.path(tarball, name)
532 | }
533 |
534 | # bail if it doesn't exist
535 | if (!file.exists(tarball)) {
536 |
537 | # let the user know we weren't able to honour their request
538 | fmt <- "- RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist."
539 | msg <- sprintf(fmt, tarball)
540 | warning(msg)
541 |
542 | # bail
543 | return()
544 |
545 | }
546 |
547 | catf("- Using local tarball '%s'.", tarball)
548 | tarball
549 |
550 | }
551 |
552 | renv_bootstrap_github_token <- function() {
553 | for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) {
554 | envval <- Sys.getenv(envvar, unset = NA)
555 | if (!is.na(envval))
556 | return(envval)
557 | }
558 | }
559 |
560 | renv_bootstrap_download_github <- function(version) {
561 |
562 | enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
563 | if (!identical(enabled, "TRUE"))
564 | return(FALSE)
565 |
566 | # prepare download options
567 | token <- renv_bootstrap_github_token()
568 | if (nzchar(Sys.which("curl")) && nzchar(token)) {
569 | fmt <- "--location --fail --header \"Authorization: token %s\""
570 | extra <- sprintf(fmt, token)
571 | saved <- options("download.file.method", "download.file.extra")
572 | options(download.file.method = "curl", download.file.extra = extra)
573 | on.exit(do.call(base::options, saved), add = TRUE)
574 | } else if (nzchar(Sys.which("wget")) && nzchar(token)) {
575 | fmt <- "--header=\"Authorization: token %s\""
576 | extra <- sprintf(fmt, token)
577 | saved <- options("download.file.method", "download.file.extra")
578 | options(download.file.method = "wget", download.file.extra = extra)
579 | on.exit(do.call(base::options, saved), add = TRUE)
580 | }
581 |
582 | url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version)
583 | name <- sprintf("renv_%s.tar.gz", version)
584 | destfile <- file.path(tempdir(), name)
585 |
586 | status <- tryCatch(
587 | renv_bootstrap_download_impl(url, destfile),
588 | condition = identity
589 | )
590 |
591 | if (!identical(status, 0L))
592 | return(FALSE)
593 |
594 | renv_bootstrap_download_augment(destfile)
595 |
596 | return(destfile)
597 |
598 | }
599 |
600 | # Add Sha to DESCRIPTION. This is stop gap until #890, after which we
601 | # can use renv::install() to fully capture metadata.
602 | renv_bootstrap_download_augment <- function(destfile) {
603 | sha <- renv_bootstrap_git_extract_sha1_tar(destfile)
604 | if (is.null(sha)) {
605 | return()
606 | }
607 |
608 | # Untar
609 | tempdir <- tempfile("renv-github-")
610 | on.exit(unlink(tempdir, recursive = TRUE), add = TRUE)
611 | untar(destfile, exdir = tempdir)
612 | pkgdir <- dir(tempdir, full.names = TRUE)[[1]]
613 |
614 | # Modify description
615 | desc_path <- file.path(pkgdir, "DESCRIPTION")
616 | desc_lines <- readLines(desc_path)
617 | remotes_fields <- c(
618 | "RemoteType: github",
619 | "RemoteHost: api.github.com",
620 | "RemoteRepo: renv",
621 | "RemoteUsername: rstudio",
622 | "RemotePkgRef: rstudio/renv",
623 | paste("RemoteRef: ", sha),
624 | paste("RemoteSha: ", sha)
625 | )
626 | writeLines(c(desc_lines[desc_lines != ""], remotes_fields), con = desc_path)
627 |
628 | # Re-tar
629 | local({
630 | old <- setwd(tempdir)
631 | on.exit(setwd(old), add = TRUE)
632 |
633 | tar(destfile, compression = "gzip")
634 | })
635 | invisible()
636 | }
637 |
638 | # Extract the commit hash from a git archive. Git archives include the SHA1
639 | # hash as the comment field of the tarball pax extended header
640 | # (see https://www.kernel.org/pub/software/scm/git/docs/git-archive.html)
641 | # For GitHub archives this should be the first header after the default one
642 | # (512 byte) header.
643 | renv_bootstrap_git_extract_sha1_tar <- function(bundle) {
644 |
645 | # open the bundle for reading
646 | # We use gzcon for everything because (from ?gzcon)
647 | # > Reading from a connection which does not supply a 'gzip' magic
648 | # > header is equivalent to reading from the original connection
649 | conn <- gzcon(file(bundle, open = "rb", raw = TRUE))
650 | on.exit(close(conn))
651 |
652 | # The default pax header is 512 bytes long and the first pax extended header
653 | # with the comment should be 51 bytes long
654 | # `52 comment=` (11 chars) + 40 byte SHA1 hash
655 | len <- 0x200 + 0x33
656 | res <- rawToChar(readBin(conn, "raw", n = len)[0x201:len])
657 |
658 | if (grepl("^52 comment=", res)) {
659 | sub("52 comment=", "", res)
660 | } else {
661 | NULL
662 | }
663 | }
664 |
665 | renv_bootstrap_install <- function(version, tarball, library) {
666 |
667 | # attempt to install it into project library
668 | dir.create(library, showWarnings = FALSE, recursive = TRUE)
669 | output <- renv_bootstrap_install_impl(library, tarball)
670 |
671 | # check for successful install
672 | status <- attr(output, "status")
673 | if (is.null(status) || identical(status, 0L))
674 | return(status)
675 |
676 | # an error occurred; report it
677 | header <- "installation of renv failed"
678 | lines <- paste(rep.int("=", nchar(header)), collapse = "")
679 | text <- paste(c(header, lines, output), collapse = "\n")
680 | stop(text)
681 |
682 | }
683 |
684 | renv_bootstrap_install_impl <- function(library, tarball) {
685 |
686 | # invoke using system2 so we can capture and report output
687 | bin <- R.home("bin")
688 | exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R"
689 | R <- file.path(bin, exe)
690 |
691 | args <- c(
692 | "--vanilla", "CMD", "INSTALL", "--no-multiarch",
693 | "-l", shQuote(path.expand(library)),
694 | shQuote(path.expand(tarball))
695 | )
696 |
697 | system2(R, args, stdout = TRUE, stderr = TRUE)
698 |
699 | }
700 |
701 | renv_bootstrap_platform_prefix <- function() {
702 |
703 | # construct version prefix
704 | version <- paste(R.version$major, R.version$minor, sep = ".")
705 | prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-")
706 |
707 | # include SVN revision for development versions of R
708 | # (to avoid sharing platform-specific artefacts with released versions of R)
709 | devel <-
710 | identical(R.version[["status"]], "Under development (unstable)") ||
711 | identical(R.version[["nickname"]], "Unsuffered Consequences")
712 |
713 | if (devel)
714 | prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r")
715 |
716 | # build list of path components
717 | components <- c(prefix, R.version$platform)
718 |
719 | # include prefix if provided by user
720 | prefix <- renv_bootstrap_platform_prefix_impl()
721 | if (!is.na(prefix) && nzchar(prefix))
722 | components <- c(prefix, components)
723 |
724 | # build prefix
725 | paste(components, collapse = "/")
726 |
727 | }
728 |
729 | renv_bootstrap_platform_prefix_impl <- function() {
730 |
731 | # if an explicit prefix has been supplied, use it
732 | prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA)
733 | if (!is.na(prefix))
734 | return(prefix)
735 |
736 | # if the user has requested an automatic prefix, generate it
737 | auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA)
738 | if (is.na(auto) && getRversion() >= "4.4.0")
739 | auto <- "TRUE"
740 |
741 | if (auto %in% c("TRUE", "True", "true", "1"))
742 | return(renv_bootstrap_platform_prefix_auto())
743 |
744 | # empty string on failure
745 | ""
746 |
747 | }
748 |
749 | renv_bootstrap_platform_prefix_auto <- function() {
750 |
751 | prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity)
752 | if (inherits(prefix, "error") || prefix %in% "unknown") {
753 |
754 | msg <- paste(
755 | "failed to infer current operating system",
756 | "please file a bug report at https://github.com/rstudio/renv/issues",
757 | sep = "; "
758 | )
759 |
760 | warning(msg)
761 |
762 | }
763 |
764 | prefix
765 |
766 | }
767 |
768 | renv_bootstrap_platform_os <- function() {
769 |
770 | sysinfo <- Sys.info()
771 | sysname <- sysinfo[["sysname"]]
772 |
773 | # handle Windows + macOS up front
774 | if (sysname == "Windows")
775 | return("windows")
776 | else if (sysname == "Darwin")
777 | return("macos")
778 |
779 | # check for os-release files
780 | for (file in c("/etc/os-release", "/usr/lib/os-release"))
781 | if (file.exists(file))
782 | return(renv_bootstrap_platform_os_via_os_release(file, sysinfo))
783 |
784 | # check for redhat-release files
785 | if (file.exists("/etc/redhat-release"))
786 | return(renv_bootstrap_platform_os_via_redhat_release())
787 |
788 | "unknown"
789 |
790 | }
791 |
792 | renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) {
793 |
794 | # read /etc/os-release
795 | release <- utils::read.table(
796 | file = file,
797 | sep = "=",
798 | quote = c("\"", "'"),
799 | col.names = c("Key", "Value"),
800 | comment.char = "#",
801 | stringsAsFactors = FALSE
802 | )
803 |
804 | vars <- as.list(release$Value)
805 | names(vars) <- release$Key
806 |
807 | # get os name
808 | os <- tolower(sysinfo[["sysname"]])
809 |
810 | # read id
811 | id <- "unknown"
812 | for (field in c("ID", "ID_LIKE")) {
813 | if (field %in% names(vars) && nzchar(vars[[field]])) {
814 | id <- vars[[field]]
815 | break
816 | }
817 | }
818 |
819 | # read version
820 | version <- "unknown"
821 | for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) {
822 | if (field %in% names(vars) && nzchar(vars[[field]])) {
823 | version <- vars[[field]]
824 | break
825 | }
826 | }
827 |
828 | # join together
829 | paste(c(os, id, version), collapse = "-")
830 |
831 | }
832 |
833 | renv_bootstrap_platform_os_via_redhat_release <- function() {
834 |
835 | # read /etc/redhat-release
836 | contents <- readLines("/etc/redhat-release", warn = FALSE)
837 |
838 | # infer id
839 | id <- if (grepl("centos", contents, ignore.case = TRUE))
840 | "centos"
841 | else if (grepl("redhat", contents, ignore.case = TRUE))
842 | "redhat"
843 | else
844 | "unknown"
845 |
846 | # try to find a version component (very hacky)
847 | version <- "unknown"
848 |
849 | parts <- strsplit(contents, "[[:space:]]")[[1L]]
850 | for (part in parts) {
851 |
852 | nv <- tryCatch(numeric_version(part), error = identity)
853 | if (inherits(nv, "error"))
854 | next
855 |
856 | version <- nv[1, 1]
857 | break
858 |
859 | }
860 |
861 | paste(c("linux", id, version), collapse = "-")
862 |
863 | }
864 |
865 | renv_bootstrap_library_root_name <- function(project) {
866 |
867 | # use project name as-is if requested
868 | asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE")
869 | if (asis)
870 | return(basename(project))
871 |
872 | # otherwise, disambiguate based on project's path
873 | id <- substring(renv_bootstrap_hash_text(project), 1L, 8L)
874 | paste(basename(project), id, sep = "-")
875 |
876 | }
877 |
878 | renv_bootstrap_library_root <- function(project) {
879 |
880 | prefix <- renv_bootstrap_profile_prefix()
881 |
882 | path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA)
883 | if (!is.na(path))
884 | return(paste(c(path, prefix), collapse = "/"))
885 |
886 | path <- renv_bootstrap_library_root_impl(project)
887 | if (!is.null(path)) {
888 | name <- renv_bootstrap_library_root_name(project)
889 | return(paste(c(path, prefix, name), collapse = "/"))
890 | }
891 |
892 | renv_bootstrap_paths_renv("library", project = project)
893 |
894 | }
895 |
896 | renv_bootstrap_library_root_impl <- function(project) {
897 |
898 | root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA)
899 | if (!is.na(root))
900 | return(root)
901 |
902 | type <- renv_bootstrap_project_type(project)
903 | if (identical(type, "package")) {
904 | userdir <- renv_bootstrap_user_dir()
905 | return(file.path(userdir, "library"))
906 | }
907 |
908 | }
909 |
910 | renv_bootstrap_validate_version <- function(version, description = NULL) {
911 |
912 | # resolve description file
913 | #
914 | # avoid passing lib.loc to `packageDescription()` below, since R will
915 | # use the loaded version of the package by default anyhow. note that
916 | # this function should only be called after 'renv' is loaded
917 | # https://github.com/rstudio/renv/issues/1625
918 | description <- description %||% packageDescription("renv")
919 |
920 | # check whether requested version 'version' matches loaded version of renv
921 | sha <- attr(version, "sha", exact = TRUE)
922 | valid <- if (!is.null(sha))
923 | renv_bootstrap_validate_version_dev(sha, description)
924 | else
925 | renv_bootstrap_validate_version_release(version, description)
926 |
927 | if (valid)
928 | return(TRUE)
929 |
930 | # the loaded version of renv doesn't match the requested version;
931 | # give the user instructions on how to proceed
932 | dev <- identical(description[["RemoteType"]], "github")
933 | remote <- if (dev)
934 | paste("rstudio/renv", description[["RemoteSha"]], sep = "@")
935 | else
936 | paste("renv", description[["Version"]], sep = "@")
937 |
938 | # display both loaded version + sha if available
939 | friendly <- renv_bootstrap_version_friendly(
940 | version = description[["Version"]],
941 | sha = if (dev) description[["RemoteSha"]]
942 | )
943 |
944 | fmt <- heredoc("
945 | renv %1$s was loaded from project library, but this project is configured to use renv %2$s.
946 | - Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.
947 | - Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.
948 | ")
949 | catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote)
950 |
951 | FALSE
952 |
953 | }
954 |
955 | renv_bootstrap_validate_version_dev <- function(version, description) {
956 | expected <- description[["RemoteSha"]]
957 | is.character(expected) && startswith(expected, version)
958 | }
959 |
960 | renv_bootstrap_validate_version_release <- function(version, description) {
961 | expected <- description[["Version"]]
962 | is.character(expected) && identical(expected, version)
963 | }
964 |
965 | renv_bootstrap_hash_text <- function(text) {
966 |
967 | hashfile <- tempfile("renv-hash-")
968 | on.exit(unlink(hashfile), add = TRUE)
969 |
970 | writeLines(text, con = hashfile)
971 | tools::md5sum(hashfile)
972 |
973 | }
974 |
975 | renv_bootstrap_load <- function(project, libpath, version) {
976 |
977 | # try to load renv from the project library
978 | if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE))
979 | return(FALSE)
980 |
981 | # warn if the version of renv loaded does not match
982 | renv_bootstrap_validate_version(version)
983 |
984 | # execute renv load hooks, if any
985 | hooks <- getHook("renv::autoload")
986 | for (hook in hooks)
987 | if (is.function(hook))
988 | tryCatch(hook(), error = warnify)
989 |
990 | # load the project
991 | renv::load(project)
992 |
993 | TRUE
994 |
995 | }
996 |
997 | renv_bootstrap_profile_load <- function(project) {
998 |
999 | # if RENV_PROFILE is already set, just use that
1000 | profile <- Sys.getenv("RENV_PROFILE", unset = NA)
1001 | if (!is.na(profile) && nzchar(profile))
1002 | return(profile)
1003 |
1004 | # check for a profile file (nothing to do if it doesn't exist)
1005 | path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project)
1006 | if (!file.exists(path))
1007 | return(NULL)
1008 |
1009 | # read the profile, and set it if it exists
1010 | contents <- readLines(path, warn = FALSE)
1011 | if (length(contents) == 0L)
1012 | return(NULL)
1013 |
1014 | # set RENV_PROFILE
1015 | profile <- contents[[1L]]
1016 | if (!profile %in% c("", "default"))
1017 | Sys.setenv(RENV_PROFILE = profile)
1018 |
1019 | profile
1020 |
1021 | }
1022 |
1023 | renv_bootstrap_profile_prefix <- function() {
1024 | profile <- renv_bootstrap_profile_get()
1025 | if (!is.null(profile))
1026 | return(file.path("profiles", profile, "renv"))
1027 | }
1028 |
1029 | renv_bootstrap_profile_get <- function() {
1030 | profile <- Sys.getenv("RENV_PROFILE", unset = "")
1031 | renv_bootstrap_profile_normalize(profile)
1032 | }
1033 |
1034 | renv_bootstrap_profile_set <- function(profile) {
1035 | profile <- renv_bootstrap_profile_normalize(profile)
1036 | if (is.null(profile))
1037 | Sys.unsetenv("RENV_PROFILE")
1038 | else
1039 | Sys.setenv(RENV_PROFILE = profile)
1040 | }
1041 |
1042 | renv_bootstrap_profile_normalize <- function(profile) {
1043 |
1044 | if (is.null(profile) || profile %in% c("", "default"))
1045 | return(NULL)
1046 |
1047 | profile
1048 |
1049 | }
1050 |
1051 | renv_bootstrap_path_absolute <- function(path) {
1052 |
1053 | substr(path, 1L, 1L) %in% c("~", "/", "\\") || (
1054 | substr(path, 1L, 1L) %in% c(letters, LETTERS) &&
1055 | substr(path, 2L, 3L) %in% c(":/", ":\\")
1056 | )
1057 |
1058 | }
1059 |
1060 | renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) {
1061 | renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv")
1062 | root <- if (renv_bootstrap_path_absolute(renv)) NULL else project
1063 | prefix <- if (profile) renv_bootstrap_profile_prefix()
1064 | components <- c(root, renv, prefix, ...)
1065 | paste(components, collapse = "/")
1066 | }
1067 |
1068 | renv_bootstrap_project_type <- function(path) {
1069 |
1070 | descpath <- file.path(path, "DESCRIPTION")
1071 | if (!file.exists(descpath))
1072 | return("unknown")
1073 |
1074 | desc <- tryCatch(
1075 | read.dcf(descpath, all = TRUE),
1076 | error = identity
1077 | )
1078 |
1079 | if (inherits(desc, "error"))
1080 | return("unknown")
1081 |
1082 | type <- desc$Type
1083 | if (!is.null(type))
1084 | return(tolower(type))
1085 |
1086 | package <- desc$Package
1087 | if (!is.null(package))
1088 | return("package")
1089 |
1090 | "unknown"
1091 |
1092 | }
1093 |
1094 | renv_bootstrap_user_dir <- function() {
1095 | dir <- renv_bootstrap_user_dir_impl()
1096 | path.expand(chartr("\\", "/", dir))
1097 | }
1098 |
1099 | renv_bootstrap_user_dir_impl <- function() {
1100 |
1101 | # use local override if set
1102 | override <- getOption("renv.userdir.override")
1103 | if (!is.null(override))
1104 | return(override)
1105 |
1106 | # use R_user_dir if available
1107 | tools <- asNamespace("tools")
1108 | if (is.function(tools$R_user_dir))
1109 | return(tools$R_user_dir("renv", "cache"))
1110 |
1111 | # try using our own backfill for older versions of R
1112 | envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME")
1113 | for (envvar in envvars) {
1114 | root <- Sys.getenv(envvar, unset = NA)
1115 | if (!is.na(root))
1116 | return(file.path(root, "R/renv"))
1117 | }
1118 |
1119 | # use platform-specific default fallbacks
1120 | if (Sys.info()[["sysname"]] == "Windows")
1121 | file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv")
1122 | else if (Sys.info()[["sysname"]] == "Darwin")
1123 | "~/Library/Caches/org.R-project.R/R/renv"
1124 | else
1125 | "~/.cache/R/renv"
1126 |
1127 | }
1128 |
1129 | renv_bootstrap_version_friendly <- function(version, shafmt = NULL, sha = NULL) {
1130 | sha <- sha %||% attr(version, "sha", exact = TRUE)
1131 | parts <- c(version, sprintf(shafmt %||% " [sha: %s]", substring(sha, 1L, 7L)))
1132 | paste(parts, collapse = "")
1133 | }
1134 |
1135 | renv_bootstrap_exec <- function(project, libpath, version) {
1136 | if (!renv_bootstrap_load(project, libpath, version))
1137 | renv_bootstrap_run(version, libpath)
1138 | }
1139 |
1140 | renv_bootstrap_run <- function(version, libpath) {
1141 |
1142 | # perform bootstrap
1143 | bootstrap(version, libpath)
1144 |
1145 | # exit early if we're just testing bootstrap
1146 | if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA)))
1147 | return(TRUE)
1148 |
1149 | # try again to load
1150 | if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) {
1151 | return(renv::load(project = getwd()))
1152 | }
1153 |
1154 | # failed to download or load renv; warn the user
1155 | msg <- c(
1156 | "Failed to find an renv installation: the project will not be loaded.",
1157 | "Use `renv::activate()` to re-initialize the project."
1158 | )
1159 |
1160 | warning(paste(msg, collapse = "\n"), call. = FALSE)
1161 |
1162 | }
1163 |
1164 | renv_json_read <- function(file = NULL, text = NULL) {
1165 |
1166 | jlerr <- NULL
1167 |
1168 | # if jsonlite is loaded, use that instead
1169 | if ("jsonlite" %in% loadedNamespaces()) {
1170 |
1171 | json <- tryCatch(renv_json_read_jsonlite(file, text), error = identity)
1172 | if (!inherits(json, "error"))
1173 | return(json)
1174 |
1175 | jlerr <- json
1176 |
1177 | }
1178 |
1179 | # otherwise, fall back to the default JSON reader
1180 | json <- tryCatch(renv_json_read_default(file, text), error = identity)
1181 | if (!inherits(json, "error"))
1182 | return(json)
1183 |
1184 | # report an error
1185 | if (!is.null(jlerr))
1186 | stop(jlerr)
1187 | else
1188 | stop(json)
1189 |
1190 | }
1191 |
1192 | renv_json_read_jsonlite <- function(file = NULL, text = NULL) {
1193 | text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n")
1194 | jsonlite::fromJSON(txt = text, simplifyVector = FALSE)
1195 | }
1196 |
1197 | renv_json_read_default <- function(file = NULL, text = NULL) {
1198 |
1199 | # find strings in the JSON
1200 | text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n")
1201 | pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]'
1202 | locs <- gregexpr(pattern, text, perl = TRUE)[[1]]
1203 |
1204 | # if any are found, replace them with placeholders
1205 | replaced <- text
1206 | strings <- character()
1207 | replacements <- character()
1208 |
1209 | if (!identical(c(locs), -1L)) {
1210 |
1211 | # get the string values
1212 | starts <- locs
1213 | ends <- locs + attr(locs, "match.length") - 1L
1214 | strings <- substring(text, starts, ends)
1215 |
1216 | # only keep those requiring escaping
1217 | strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE)
1218 |
1219 | # compute replacements
1220 | replacements <- sprintf('"\032%i\032"', seq_along(strings))
1221 |
1222 | # replace the strings
1223 | mapply(function(string, replacement) {
1224 | replaced <<- sub(string, replacement, replaced, fixed = TRUE)
1225 | }, strings, replacements)
1226 |
1227 | }
1228 |
1229 | # transform the JSON into something the R parser understands
1230 | transformed <- replaced
1231 | transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE)
1232 | transformed <- gsub("[[{]", "list(", transformed, perl = TRUE)
1233 | transformed <- gsub("[]}]", ")", transformed, perl = TRUE)
1234 | transformed <- gsub(":", "=", transformed, fixed = TRUE)
1235 | text <- paste(transformed, collapse = "\n")
1236 |
1237 | # parse it
1238 | json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]]
1239 |
1240 | # construct map between source strings, replaced strings
1241 | map <- as.character(parse(text = strings))
1242 | names(map) <- as.character(parse(text = replacements))
1243 |
1244 | # convert to list
1245 | map <- as.list(map)
1246 |
1247 | # remap strings in object
1248 | remapped <- renv_json_read_remap(json, map)
1249 |
1250 | # evaluate
1251 | eval(remapped, envir = baseenv())
1252 |
1253 | }
1254 |
1255 | renv_json_read_remap <- function(json, map) {
1256 |
1257 | # fix names
1258 | if (!is.null(names(json))) {
1259 | lhs <- match(names(json), names(map), nomatch = 0L)
1260 | rhs <- match(names(map), names(json), nomatch = 0L)
1261 | names(json)[rhs] <- map[lhs]
1262 | }
1263 |
1264 | # fix values
1265 | if (is.character(json))
1266 | return(map[[json]] %||% json)
1267 |
1268 | # handle true, false, null
1269 | if (is.name(json)) {
1270 | text <- as.character(json)
1271 | if (text == "true")
1272 | return(TRUE)
1273 | else if (text == "false")
1274 | return(FALSE)
1275 | else if (text == "null")
1276 | return(NULL)
1277 | }
1278 |
1279 | # recurse
1280 | if (is.recursive(json)) {
1281 | for (i in seq_along(json)) {
1282 | json[i] <- list(renv_json_read_remap(json[[i]], map))
1283 | }
1284 | }
1285 |
1286 | json
1287 |
1288 | }
1289 |
1290 | # load the renv profile, if any
1291 | renv_bootstrap_profile_load(project)
1292 |
1293 | # construct path to library root
1294 | root <- renv_bootstrap_library_root(project)
1295 |
1296 | # construct library prefix for platform
1297 | prefix <- renv_bootstrap_platform_prefix()
1298 |
1299 | # construct full libpath
1300 | libpath <- file.path(root, prefix)
1301 |
1302 | # run bootstrap code
1303 | renv_bootstrap_exec(project, libpath, version)
1304 |
1305 | invisible()
1306 |
1307 | })
1308 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | polars[pyarrow]==1.9.0
2 | pyprql==0.12.1
3 |
--------------------------------------------------------------------------------
/sidebars.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Creating a sidebar enables you to:
3 | - create an ordered group of docs
4 | - render a sidebar for each doc of that group
5 | - provide next/previous navigation
6 |
7 | The sidebars can be generated from the filesystem, or explicitly defined here.
8 |
9 | Create as many sidebars as you want.
10 | */
11 |
12 | // @ts-check
13 |
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 | // By default, Docusaurus generates a sidebar from the docs folder structure
17 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
18 |
19 | // But you can create a sidebar manually
20 | /*
21 | tutorialSidebar: [
22 | 'intro',
23 | 'hello',
24 | {
25 | type: 'category',
26 | label: 'Tutorial',
27 | items: ['tutorial-basics/create-a-document'],
28 | },
29 | ],
30 | */
31 | };
32 |
33 | module.exports = sidebars;
34 |
--------------------------------------------------------------------------------
/src/css/custom.css:
--------------------------------------------------------------------------------
1 | /**
2 | * Any CSS included here will be global. The classic template
3 | * bundles Infima by default. Infima is a CSS framework designed to
4 | * work well for content-centric websites.
5 | */
6 |
7 | /* You can override the default Infima variables here. */
8 | :root {
9 | --ifm-color-primary: #2e8555;
10 | --ifm-color-primary-dark: #29784c;
11 | --ifm-color-primary-darker: #277148;
12 | --ifm-color-primary-darkest: #205d3b;
13 | --ifm-color-primary-light: #33925d;
14 | --ifm-color-primary-lighter: #359962;
15 | --ifm-color-primary-lightest: #3cad6e;
16 | --ifm-code-font-size: 95%;
17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
18 | }
19 |
20 | /* For readability concerns, you should choose a lighter palette in dark mode. */
21 | [data-theme='dark'] {
22 | --ifm-color-primary: #25c2a0;
23 | --ifm-color-primary-dark: #21af90;
24 | --ifm-color-primary-darker: #1fa588;
25 | --ifm-color-primary-darkest: #1a8870;
26 | --ifm-color-primary-light: #29d5b0;
27 | --ifm-color-primary-lighter: #32d8b4;
28 | --ifm-color-primary-lightest: #4fddbf;
29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
30 | }
31 |
--------------------------------------------------------------------------------
/src/pages/index.module.css:
--------------------------------------------------------------------------------
1 | /**
2 | * CSS files with the .module.css suffix will be treated as CSS modules
3 | * and scoped locally.
4 | */
5 |
6 | .heroBanner {
7 | padding: 4rem 0;
8 | text-align: center;
9 | position: relative;
10 | overflow: hidden;
11 | }
12 |
13 | @media screen and (max-width: 996px) {
14 | .heroBanner {
15 | padding: 2rem;
16 | }
17 | }
18 |
19 | .buttons {
20 | display: flex;
21 | align-items: center;
22 | justify-content: center;
23 | }
24 |
--------------------------------------------------------------------------------
/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eitsupi/querying-with-prql/8c46b2c7eb7f56d659f05fe49b2ba9984ba75198/static/.nojekyll
--------------------------------------------------------------------------------
/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eitsupi/querying-with-prql/8c46b2c7eb7f56d659f05fe49b2ba9984ba75198/static/img/favicon.ico
--------------------------------------------------------------------------------
/static/img/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------