├── .Rprofile ├── .devcontainer └── devcontainer.json ├── .editorconfig ├── .gitignore ├── .vscode └── settings.json ├── LICENSE.md ├── README.md ├── _quarto.yml ├── babel.config.js ├── data └── .gitignore ├── docs ├── .gitignore ├── _cleanup-knitr.qmd ├── _setup-data-1.qmd ├── _setup-knitr.qmd ├── indexing.qmd ├── info.qmd ├── intro.qmd ├── method_chaining.qmd ├── tidy.qmd └── timeseries.qmd ├── docusaurus.config.js ├── package-lock.json ├── package.json ├── renv.lock ├── renv ├── .gitignore └── activate.R ├── requirements.txt ├── sidebars.js ├── src ├── css │ └── custom.css └── pages │ └── index.module.css └── static ├── .nojekyll └── img ├── favicon.ico └── logo.svg /.Rprofile: -------------------------------------------------------------------------------- 1 | source("renv/activate.R") 2 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "ghcr.io/rocker-org/devcontainer/r-ver:4", 3 | "features": { 4 | "ghcr.io/rocker-org/devcontainer-features/quarto-cli": { 5 | "version": "1.5.57" 6 | }, 7 | "ghcr.io/eitsupi/devcontainer-features/go-task": {}, 8 | "ghcr.io/devcontainers/features/node": {}, 9 | "ghcr.io/devcontainers/features/python": {}, 10 | "ghcr.io/eitsupi/devcontainer-features/duckdb-cli": {}, 11 | "ghcr.io/rocker-org/devcontainer-features/renv-cache": {} 12 | }, 13 | "customizations": { 14 | "vscode": { 15 | "extensions": [ 16 | "editorconfig.editorconfig", 17 | "ms-toolsai.jupyter", 18 | "prql-lang.prql-vscode" 19 | ] 20 | } 21 | }, 22 | "updateContentCommand": { 23 | "r-packages": "R -q -e 'renv::restore()'", 24 | "python-packages": "python3 -m pip install jupyter jupyter-cache -r requirements.txt", 25 | "npm-packages": "npm install" 26 | }, 27 | "containerEnv": { 28 | "NOT_CRAN": "true", 29 | // "RENV_CONFIG_PAK_ENABLED": "TRUE" 30 | "RENV_CONFIG_SANDBOX_ENABLED": "FALSE" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = false 9 | insert_final_newline = true 10 | 11 | [*.json] 12 | indent_style = tab 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | 22 | /.quarto/ 23 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "jupyter.notebookFileRoot": "${workspaceFolder}", 3 | "python.formatting.provider": "black", 4 | "[r]": { 5 | "editor.tabSize": 2 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 querying-with-prql authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quering with PRQL 2 | 3 | This is a book-style website built by Quarto and Docusaurus. 4 | 5 | To build, we need quarto cli, node.js, R, Python, and a lot of packages! 6 | Please check the [devcontainer.json](.devcontainer/devcontainer.json) file. 7 | 8 | Build by 9 | 10 | ```sh 11 | quarto render --cache-refresh 12 | ``` 13 | 14 | And deploy by 15 | 16 | ```sh 17 | npm run deploy 18 | ``` 19 | 20 | ## License 21 | 22 | Licensed under the MIT License. 23 | -------------------------------------------------------------------------------- /_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: docusaurus 3 | execute-dir: project 4 | render: 5 | - /docs/info.qmd 6 | - /docs/[!_]*.qmd 7 | 8 | format: 9 | docusaurus-md: 10 | df-print: kable 11 | code-line-numbers: true 12 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # mdx files and figures will be generated by Quarto CLI 2 | *.mdx 3 | *_files 4 | 5 | *_cache 6 | -------------------------------------------------------------------------------- /docs/_cleanup-knitr.qmd: -------------------------------------------------------------------------------- 1 | 2 | ```{r} 3 | #| include: false 4 | DBI::dbDisconnect(con, shutdown = TRUE) 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/_setup-data-1.qmd: -------------------------------------------------------------------------------- 1 | ## Preparing Data 2 | 3 | {{< include _setup-knitr.qmd >}} 4 | 5 | ### Download 6 | 7 | Download the data to be analysis (zipped CSV file) and write the data to a Parquet file. 8 | 9 | This document uses R here, but we can do it in another language, or, manually download and unzip and 10 | create the Parquet file (with DuckDB CLI). 11 | 12 | :::{.panel-tabset} 13 | 14 | #### R 15 | 16 | ```{r} 17 | #| filename: R 18 | #| cache: false 19 | #| code-fold: true 20 | #| warning: false 21 | # Create "data" directory, download the zip file into the directory, and create a Parquet file. 22 | data_dir <- "data" 23 | dest <- file.path(data_dir, "flights.csv.zip") 24 | csv_name <- "On_Time_Reporting_Carrier_On_Time_Performance_(1987_present)_2022_1.csv" 25 | csv_path <- file.path(data_dir, csv_name) 26 | parquet_path <- file.path(data_dir, "flights.parquet") 27 | 28 | if (!fs::file_exists(parquet_path)) { 29 | if (!fs::file_exists(dest)) { 30 | fs::dir_create(data_dir) 31 | curl::curl_download( 32 | "https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_2022_1.zip", 33 | dest, 34 | handle = curl::new_handle(ssl_verifypeer = FALSE) 35 | ) 36 | } 37 | 38 | unzip(dest, csv_name, exdir = data_dir) 39 | duckdb:::sql(glue::glue("COPY (FROM read_csv_auto('{csv_path}')) TO '{parquet_path}' (FORMAT PARQUET)")) 40 | } 41 | ``` 42 | 43 | #### Shell 44 | 45 | This is a sample command to download the zipped CSV file and covert it to a Parquet file. 46 | 47 | :::{.callout-tip} 48 | 49 | On Windows, the `unzip` command is not available by default, so use something like 50 | `Expand-Archive -Path data/flights.csv.zip -DestinationPath data` in PowerShell insead. 51 | 52 | ::: 53 | 54 | ```{.bash filename=Terminal} 55 | mkdir data 56 | curl -sL https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_2022_1.zip -o data/flights.csv.zip 57 | unzip -d data data/flights.csv.zip 58 | duckdb -c "COPY (FROM read_csv_auto('data/On_Time_Reporting_Carrier_On_Time_Performance_(1987_present)_2022_1.csv')) TO 'data/flights.parquet' (FORMAT PARQUET)" 59 | ``` 60 | 61 | ::: 62 | 63 | ### Load the Data 64 | 65 | After the Parquet file is ready, 66 | load it into DuckDB (in-memory) database table, R DataFrame, and Python polars.LazyFrame. 67 | 68 | :::{.panel-tabset} 69 | 70 | #### DuckDB 71 | 72 | ```{r} 73 | #| filename: R 74 | #| cache: false 75 | #| include: false 76 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") 77 | ``` 78 | 79 | ```{glue_sql} 80 | #| filename: SQL 81 | #| cache: false 82 | #| warning: false 83 | CREATE TABLE tab AS SELECT * FROM 'data/flights.parquet' 84 | ``` 85 | 86 | ```{glue_sql} 87 | #| filename: SQL 88 | FROM tab LIMIT 5 89 | ``` 90 | 91 | #### R DataFrame 92 | 93 | ```{r} 94 | #| filename: R 95 | #| cache: false 96 | #| output: false 97 | library(dplyr, warn.conflicts = FALSE) 98 | 99 | df <- duckdb:::sql("FROM 'data/flights.parquet'") 100 | ``` 101 | 102 | ```{r} 103 | #| filename: R 104 | df |> head(5) 105 | ``` 106 | 107 | #### Python polars.LazyFrame 108 | 109 | ```{python} 110 | #| filename: Python 111 | #| cache: false 112 | #| output: false 113 | import polars as pl 114 | 115 | lf = pl.scan_parquet("data/flights.parquet") 116 | ``` 117 | 118 | ```{python} 119 | #| filename: Python 120 | lf.fetch(5) 121 | ``` 122 | 123 | ::: 124 | -------------------------------------------------------------------------------- /docs/_setup-knitr.qmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | #| include: false 3 | #| cache: false 4 | library(knitr) 5 | library(glue) 6 | library(prqlr) 7 | library(reticulate) 8 | 9 | # Set SQL code blocks print option 10 | knitr::opts_knit$set(sql.print = \(x) paste(knitr::kable(x, format = "markdown"), collapse = "\n")) 11 | 12 | # readr package option 13 | options(readr.show_col_types = FALSE) 14 | ``` 15 | 16 | :::{.callout-important} 17 | 18 | Since the PRQL and SQL results shown on this page are after being converted to R DataFrame via knitr, 19 | they have been converted from DuckDB types to R types. 20 | So `NULL` in DuckDB is shown as `NA`. 21 | 22 | ::: 23 | -------------------------------------------------------------------------------- /docs/indexing.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Selecting and Filtering 3 | description: Selecting columns and filtering rows from a table. 4 | engine: knitr 5 | knitr: 6 | opts_chunk: 7 | connection: con 8 | engine-opts: 9 | target: sql.duckdb 10 | use_glue: true 11 | execute: 12 | cache: true 13 | sidebar_position: 2 14 | --- 15 | 16 | :::{.callout-note} 17 | 18 | This page is based on the chapter ["Indexing (Or Lack Thereof)"](https://kevinheavey.github.io/modern-polars/indexing.html) 19 | of the Modern Polars book. 20 | 21 | ::: 22 | 23 | {{< include _setup-data-1.qmd >}} 24 | 25 | ## Read the Data with PRQL 26 | 27 | Since PRQL cannot be used to create tables, data must be read from pre-created tables in a DB. 28 | 29 | But when PRQL is used on DuckDB, it is possible to treat Parquet and other files as tables, 30 | so that PRQL queries can be executed without creating tables. 31 | 32 | ```{prql} 33 | #| filename: PRQL 34 | from `{{parquet_path}}` 35 | take 5 36 | ``` 37 | 38 | ## Selecting Columns and Slicing Rows 39 | 40 | For column selection, the syntax is much the same, but the syntax for row selection using position looks different. 41 | 42 | Note that PRQL, SQL, and R are 1-based, while Python is 0-based. 43 | 44 | :::{.panel-tabset} 45 | 46 | ### PRQL DuckDB 47 | 48 | ```{prql} 49 | #| filename: PRQL 50 | from tab 51 | select {Dest, Tail_Number} 52 | take 13..16 53 | ``` 54 | 55 | ### SQL DuckDB 56 | 57 | ```{glue_sql} 58 | #| filename: SQL 59 | SELECT 60 | Dest, 61 | Tail_Number 62 | FROM tab 63 | LIMIT 4 OFFSET 12 64 | ``` 65 | 66 | :::{.callout-tip} 67 | 68 | DuckDB allows SQL query starts with `FROM`, 69 | so we can also write the query as follows: 70 | 71 | ```{.sql filename="SQL"} 72 | FROM tab 73 | SELECT 74 | Dest, 75 | Tail_Number 76 | LIMIT 4 OFFSET 12 77 | ``` 78 | 79 | ::: 80 | 81 | ### dplyr R 82 | 83 | ```{r} 84 | #| filename: R 85 | df |> 86 | select(Dest, Tail_Number) |> 87 | slice(13:16) 88 | ``` 89 | 90 | ### Python Polars 91 | 92 | ```{python} 93 | #| filename: Python 94 | lf.select("Dest", "Tail_Number").slice(12, 4).collect() 95 | ``` 96 | 97 | ::: 98 | 99 | ## Filtering Rows 100 | 101 | :::{.panel-tabset} 102 | 103 | ### PRQL DuckDB 104 | 105 | ```{prql} 106 | #| filename: PRQL 107 | from tab 108 | filter (IATA_CODE_Reporting_Airline | in ["AA", "DL"]) 109 | take 5 110 | ``` 111 | 112 | ### SQL DuckDB 113 | 114 | ```{glue_sql} 115 | #| filename: SQL 116 | FROM tab 117 | WHERE IATA_CODE_Reporting_Airline IN ('AA', 'DL') 118 | LIMIT 5 119 | ``` 120 | 121 | ### dplyr R 122 | 123 | ```{r} 124 | #| filename: R 125 | df |> 126 | filter(IATA_CODE_Reporting_Airline %in% c("AA", "DL")) |> 127 | head(5) 128 | ``` 129 | 130 | ### Python Polars 131 | 132 | ```{python} 133 | #| filename: Python 134 | lf.filter(pl.col("IATA_CODE_Reporting_Airline").is_in(["AA", "DL"])).head(5).collect() 135 | ``` 136 | 137 | ::: 138 | 139 | ## Assigning 140 | 141 | :::{.panel-tabset} 142 | 143 | ### PRQL DuckDB 144 | 145 | ```{prql} 146 | #| filename: PRQL 147 | let f = from [ 148 | {a = 1, b = 10}, 149 | {a = 2, b = 20}, 150 | {a = 3, b = 30}, 151 | {a = 4, b = 40}, 152 | {a = 5, b = 50}, 153 | ] 154 | 155 | from f 156 | select { 157 | a, # Note: can't use `derive` here https://github.com/PRQL/prql/issues/3130 158 | b = case [ 159 | a <= 3 => b // 10, 160 | true => b 161 | ] 162 | } 163 | ``` 164 | 165 | ### SQL DuckDB 166 | 167 | ```{glue_sql} 168 | #| filename: SQL 169 | WITH table_0 AS ( 170 | SELECT 171 | 1 AS a, 172 | 10 AS b 173 | UNION 174 | ALL 175 | SELECT 176 | 2 AS a, 177 | 20 AS b 178 | UNION 179 | ALL 180 | SELECT 181 | 3 AS a, 182 | 30 AS b 183 | UNION 184 | ALL 185 | SELECT 186 | 4 AS a, 187 | 40 AS b 188 | UNION 189 | ALL 190 | SELECT 191 | 5 AS a, 192 | 50 AS b 193 | ), 194 | 195 | f AS ( 196 | SELECT 197 | a, 198 | b 199 | FROM 200 | table_0 AS table_1 201 | ) 202 | 203 | SELECT 204 | a, 205 | CASE 206 | WHEN a <= 3 THEN b // 10 207 | ELSE b 208 | END AS b 209 | FROM 210 | f 211 | ``` 212 | 213 | ### dplyr R 214 | 215 | ```{r} 216 | #| filename: R 217 | f <- dplyr::tibble(a = 1:5, b = a * 10) 218 | 219 | f |> 220 | mutate( 221 | b = case_when( 222 | a <= 3 ~ b %/% 10, 223 | .default = b 224 | ) 225 | ) 226 | ``` 227 | 228 | ### Python Polars 229 | 230 | ```{python} 231 | #| filename: Python 232 | f = pl.DataFrame({"a": list(range(1, 6)), "b": list(range(10, 60, 10))}) 233 | 234 | f.with_columns( 235 | b=pl.when(pl.col("a") <= 3).then(pl.col("b") // 10).otherwise(pl.col("b")) 236 | ) 237 | ``` 238 | 239 | ::: 240 | 241 | {{< include _cleanup-knitr.qmd >}} 242 | -------------------------------------------------------------------------------- /docs/info.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | engine: knitr 3 | output-file: _info.mdx 4 | --- 5 | 6 | :::{.callout-important} 7 | 8 | The versions we are currently using are as follows: 9 | 10 | - PRQL: `r prqlr::prql_version()` 11 | - DuckDB: `r duckdb:::sql("select version()")[1, 1]` 12 | 13 | ::: 14 | -------------------------------------------------------------------------------- /docs/intro.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | jupyter: python3 4 | sidebar_position: 1 5 | slug: / 6 | --- 7 | 8 | This document is an attempt to do some typical table data manipulation 9 | using [PRQL](https://prql-lang.org/) and [DuckDB](https://duckdb.org/). 10 | 11 | PRQL is a in development modern language for data manipulation, and can be compiled to SQL. 12 | DuckDB is a modern in-memory SQL OLAP ([very fast](https://duckdblabs.github.io/db-benchmark/)) 13 | database management system. 14 | 15 | ## Motivation 16 | 17 | CLI, JavaScript, Python, R, etc., we can combine PRQL compiler and DuckDB in various places 18 | to manipulate data with PRQL. 19 | 20 | At the same time, new features are being added to DuckDB's SQL on a constant basis, 21 | and the syntax is becoming more friendly. 22 | 23 | So, I thought that by comparing PRQL and the latest DuckDB SQL (and other query libraries), 24 | we could clarify the missing features of the PRQL and highlight the latest features of DuckDB. 25 | 26 | ## Content and Credit 27 | 28 | The content of each data manipulation is based on 29 | the awesome [Modern Polars](https://kevinheavey.github.io/modern-polars/) book 30 | by [Kevin Heavey](https://github.com/kevinheavey). 31 | And furthermore its origin is [Modern Pandas](https://tomaugspurger.github.io/posts/modern-1-intro/) 32 | by [Tom Augsburger](https://github.com/TomAugspurger). 33 | 34 | Each query is side-by-side with that of the original 35 | [Python Polars](https://pola-rs.github.io/polars/py-polars/html/reference/) one 36 | and the equivalent operations in PRQL, DuckDB SQL, and [dplyr](https://dplyr.tidyverse.org/) (R). 37 | 38 | Since this document focuses on the differences between the grammars of the different languages 39 | and does not go into detail about the processing details, 40 | I suggest that you also check out the original excellent artcles and book. 41 | 42 | :::{.content-hidden} 43 | 44 | Since Quarto cannot mix knitr and jupyter engines in a single qmd file (also if using include shortcode), 45 | the knitr document wll output to a separate md file and imported here using the Docusaurus mdx functionality. 46 | 47 | ::: 48 | 49 | 50 | ```{=mdx} 51 | import VesionInfo from './_info.mdx'; 52 | 53 | 54 | ``` 55 | 56 | ## Running the Code on IPython 57 | 58 | As mentioned above, PRQL and DuckDB can run on a variety of languages. 59 | This section describes, as an example, the use of IPython for easy interactive execution of PRQL, SQL, and Polars. 60 | 61 | ### Setup 62 | 63 | This command installs the necessary Python packages. 64 | 65 | ```{.bash filename="Terminal"} 66 | python -m pip install pyprql polars[pyarrow] 67 | ``` 68 | 69 | Then, we can start `ipython`. 70 | 71 | Run `load_ext` magic to activate PRQL magic (from [pyprql](https://pyprql.readthedocs.io/)) 72 | and SQL magic (from [jupysql](https://jupysql.ploomber.io/)). 73 | 74 | ```{python} 75 | #| filename: IPython 76 | #| output: false 77 | %load_ext pyprql.magic 78 | %load_ext sql 79 | ``` 80 | 81 | ```{python} 82 | #| filename: IPython 83 | #| include: false 84 | %config SqlMagic.displaycon = False 85 | %config SqlMagic.feedback = False 86 | ``` 87 | 88 | Use SQL magic connects to DuckDB in-memory database. 89 | Note that PRQL magic shares the connection with SQL magic. 90 | 91 | ```{python} 92 | #| filename: IPython 93 | %sql duckdb:// 94 | ``` 95 | 96 | To fix the SQL dialect used inside PRQL magic, we need to set the target with config magic. 97 | By setting `"sql.duckdb"` here, we can specify to the PRQL compiler that it should generate optimized SQL for DuckDB. 98 | 99 | ```{python} 100 | #| filename: IPython 101 | %config PrqlMagic.target = "sql.duckdb" 102 | ``` 103 | 104 | All available targets can be found in the `prqlc.get_targets` function. 105 | 106 | ```{python} 107 | #| filename: IPython 108 | import prqlc 109 | 110 | print(prqlc.get_targets()) 111 | ``` 112 | 113 | ### Combine DuckDB and Polars 114 | 115 | By setting `autopolars` config, the results of PRQL and SQL executions can be converted to polars.DataFrame. 116 | 117 | ```{python} 118 | #| filename: IPython 119 | #| output: false 120 | %config PrqlMagic.autopolars = True 121 | %config SqlMagic.autopolars = True 122 | ``` 123 | 124 | Also, since DuckDB can execute queries against polars.DataFrame and polars.LazyFrame etc., 125 | these can be referenced directly from PRQL or SQL, as shown below. 126 | 127 | ```{python} 128 | #| filename: IPython 129 | import polars as pl 130 | 131 | lf = pl.LazyFrame({"a": list(range(1, 6))}) 132 | ``` 133 | 134 | :::{.callout-important} 135 | 136 | With JupySQL 0.10.14 and DuckDB 1.1, the following setting is needed to scan all frames in Python. 137 | 138 | ```{python} 139 | #| filename: IPython 140 | #| output: false 141 | %%sql 142 | set python_scan_all_frames=true 143 | ``` 144 | 145 | ::: 146 | 147 | :::{.panel-tabset} 148 | 149 | #### PRQL DuckDB 150 | 151 | ```{python} 152 | #| filename: IPython 153 | %%prql 154 | from lf 155 | derive b = a * 5 156 | take 3 157 | ``` 158 | 159 | #### SQL DuckDB 160 | 161 | ```{python} 162 | #| filename: IPython 163 | %%sql 164 | SELECT 165 | a, 166 | a * 5 AS b 167 | FROM lf 168 | LIMIT 3 169 | ``` 170 | 171 | #### Python Polars 172 | 173 | ```{python} 174 | #| filename: IPython 175 | lf.with_columns(b=pl.col("a") * 5).head(3).collect() 176 | ``` 177 | 178 | ::: 179 | 180 | ## License 181 | 182 | This website's content is licensed under the MIT license. 183 | -------------------------------------------------------------------------------- /docs/method_chaining.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Define Functions 3 | description: | 4 | To eliminate the need to copy and paste the same code, 5 | it is convenient to make the process a function. 6 | engine: knitr 7 | knitr: 8 | opts_chunk: 9 | connection: con 10 | engine-opts: 11 | target: sql.duckdb 12 | use_glue: true 13 | execute: 14 | cache: true 15 | sidebar_position: 3 16 | --- 17 | 18 | :::{.callout-note} 19 | 20 | This page is based on the chapter ["Method Chaining"](https://kevinheavey.github.io/modern-polars/method_chaining.html) 21 | of the Modern Polars book. 22 | 23 | ::: 24 | 25 | To eliminate the need to copy and paste the same code, it is convenient to make the process a function. 26 | 27 | PRQL can chain functions by `|` (pipe operator), 28 | DuckDB SQL can chain functions by `.` (DuckDB >= 0.8, [duckdb/duckdb#6725](https://github.com/duckdb/duckdb/pull/6725)), 29 | and R can chain functions by `|>` (pipe operator, R >= 4.1). 30 | They are defined so that the previous value in the chain becomes the first (DuckDB SQL and R) or 31 | the last (PRQL) parameter of the function. 32 | 33 | In Python, we can chain methods using `.`. 34 | 35 | {{< include _setup-data-1.qmd >}} 36 | 37 | ## Functions 38 | 39 | Define some functions and see if they work. 40 | 41 | :::{.callout-important} 42 | 43 | PRQL currently does not allow functions to be stored in a separate session or file, 44 | and they must always be defined with the main query. 45 | ([PRQL/prql#1803](https://github.com/PRQL/prql/issues/1803)) 46 | 47 | ::: 48 | 49 | ### Extract City Names 50 | 51 | PRQL does not currently have the ability to apply a function to multiple columns at once. 52 | ([PRQL/prql#2386](https://github.com/PRQL/prql/issues/2386)) 53 | 54 | The others can apply a function to multiple columns at once, 55 | but DuckDB SQL differs in that the column names are updated and that behavior cannot be prevented. 56 | 57 | :::{.panel-tabset} 58 | 59 | #### PRQL DuckDB 60 | 61 | ```{r} 62 | #| echo: false 63 | .prql_func_extract_city_name <- r"-(let extract_city_name = col -> s"regexp_replace({col}, ',.*', '')")-" 64 | ``` 65 | 66 | ```{prql} 67 | #| filename: PRQL 68 | {{.prql_func_extract_city_name}} 69 | 70 | from tab 71 | select { 72 | OriginCityName = extract_city_name OriginCityName, 73 | DestCityName = extract_city_name DestCityName 74 | } 75 | take 5 76 | ``` 77 | 78 | #### SQL DuckDB 79 | 80 | ```{sql} 81 | #| filename: SQL 82 | #| cache: false 83 | CREATE MACRO extract_city_name(col) AS regexp_replace(col, ',.*', '') 84 | ``` 85 | 86 | The function works as follows: 87 | 88 | ```{glue_sql} 89 | #| filename: SQL 90 | SELECT 91 | extract_city_name(COLUMNS(['OriginCityName', 'DestCityName'])) 92 | FROM 93 | tab 94 | LIMIT 5 95 | ``` 96 | 97 | #### dplyr R 98 | 99 | ```{r} 100 | #| filename: R 101 | #| cache: false 102 | extract_city_name <- function(col) stringr::str_remove(col, ",.*") 103 | ``` 104 | 105 | The function works as follows: 106 | 107 | ```{r} 108 | #| filename: R 109 | df |> 110 | head(5) |> 111 | mutate( 112 | across(c(OriginCityName, DestCityName), extract_city_name), 113 | .keep = "none" 114 | ) 115 | ``` 116 | 117 | #### Python Polars 118 | 119 | ```{python} 120 | #| filename: Python 121 | #| cache: false 122 | def extract_city_name(cols: list[str]) -> pl.Expr: 123 | return pl.col(cols).str.replace(",.*", "") 124 | ``` 125 | 126 | The function works as follows: 127 | 128 | ```{python} 129 | #| filename: Python 130 | lf.fetch(5).select(extract_city_name(["OriginCityName", "DestCityName"])) 131 | ``` 132 | 133 | ::: 134 | 135 | ### Timestamp Calculation 136 | 137 | PRQL and SQL can only define functions that return a single column. 138 | 139 | The column name (here `FlightDate`) cannot be used in the function definition of DuckDB SQL. 140 | 141 | :::{.panel-tabset} 142 | 143 | #### PRQL DuckDB 144 | 145 | ```{r} 146 | #| echo: false 147 | .prql_func_time_to_datetime <- r"(let time_to_datetime = string -> s""" 148 | FlightDate::TIMESTAMP + 149 | TRY_CAST(regexp_replace({string}, '^2400$', '0000').substr(1, 2).concat(' hours') AS INTERVAL) + 150 | TRY_CAST(regexp_replace({string}, '^2400$', '0000').substr(3, 2).concat(' minutes') AS INTERVAL) 151 | """)" 152 | ``` 153 | 154 | ```{prql} 155 | #| filename: PRQL 156 | {{.prql_func_time_to_datetime}} 157 | 158 | from tab 159 | select { 160 | FlightDate, 161 | DepTimeOld = DepTime 162 | } 163 | derive { 164 | DepTime = (time_to_datetime DepTimeOld) 165 | } 166 | take 5 167 | ``` 168 | 169 | #### SQL DuckDB 170 | 171 | ```{sql} 172 | #| filename: SQL 173 | #| cache: false 174 | CREATE MACRO time_to_datetime(date, string) AS 175 | date::TIMESTAMP + 176 | TRY_CAST(regexp_replace(string, '^2400$', '0000').substr(1, 2).concat(' hours') AS INTERVAL) + 177 | TRY_CAST(regexp_replace(string, '^2400$', '0000').substr(3, 2).concat(' minutes') AS INTERVAL) 178 | ``` 179 | 180 | The function works as follows: 181 | 182 | ```{glue_sql} 183 | #| filename: SQL 184 | SELECT 185 | FlightDate, 186 | time_to_datetime(FlightDate, DepTime) AS DepTime, 187 | DepTime AS DepTimeOld 188 | FROM 189 | tab 190 | LIMIT 5 191 | ``` 192 | 193 | #### dplyr R 194 | 195 | A function returns a single column. 196 | 197 | ```{r} 198 | #| filename: R 199 | #| cache: false 200 | time_col <- function(col) { 201 | col |> 202 | case_match( 203 | "2400" ~ "0000", 204 | .default = col 205 | ) |> 206 | (\(x) { 207 | (stringr::str_sub(x, 1, 2) |> lubridate::hours()) + 208 | (stringr::str_sub(x, 3, 4) |> lubridate::minutes()) 209 | })() 210 | } 211 | ``` 212 | 213 | A function returns a DataFrame. 214 | 215 | ```{r} 216 | #| filename: R 217 | #| cache: false 218 | time_to_datetime <- function(df, columns) { 219 | df |> 220 | mutate(across(all_of({{ columns }}), \(x) FlightDate + time_col(x))) 221 | } 222 | ``` 223 | 224 | The function works as follows: 225 | 226 | ```{r} 227 | #| filename: R 228 | df |> 229 | head(5) |> 230 | select(FlightDate, DepTime) |> 231 | mutate(DepTimeOld = DepTime) |> 232 | time_to_datetime("DepTime") 233 | ``` 234 | 235 | #### Python Polars 236 | 237 | A function returns a single `polars.Expr`. 238 | 239 | ```{python} 240 | #| filename: Python 241 | #| cache: false 242 | def time_col(col: str) -> pl.Expr: 243 | col_expr = pl.col(col) 244 | return ( 245 | pl.when(col_expr == "2400") 246 | .then(pl.lit("0000")) 247 | .otherwise(col_expr) 248 | .str.strptime(pl.Time, "%H%M", strict=False) 249 | .alias(col) 250 | ) 251 | ``` 252 | 253 | A function returns a list of `polars.Expr`. 254 | 255 | ```{python} 256 | #| filename: Python 257 | #| cache: false 258 | def time_to_datetime(columns: list[str]) -> list[pl.Expr]: 259 | date_col = pl.col("FlightDate") 260 | return [date_col.dt.combine(time_col(col)).alias(col) for col in columns] 261 | ``` 262 | 263 | The function works as follows: 264 | 265 | ```{python} 266 | #| filename: Python 267 | lf.fetch(5).select(["FlightDate", "DepTime"]).with_columns( 268 | DepTimeOld=pl.col("DepTime"), *time_to_datetime(["DepTime"]) 269 | ) 270 | ``` 271 | 272 | ::: 273 | 274 | ## Use Functions in the Query 275 | 276 | dplyr and Polars allow column names to be predefined as vectors or lists, 277 | which can then be referenced in the query. 278 | 279 | :::{.panel-tabset} 280 | 281 | ### PRQL DuckDB 282 | 283 | ```{prql} 284 | #| filename: PRQL 285 | {{.prql_func_extract_city_name}} 286 | 287 | {{.prql_func_time_to_datetime}} 288 | 289 | from tab 290 | select { 291 | Dest, 292 | Tail_Number, 293 | IATA_CODE_Reporting_Airline, 294 | CancellationCode, 295 | DepTime = time_to_datetime DepTime, 296 | ArrTime = time_to_datetime ArrTime, 297 | CRSArrTime = time_to_datetime CRSArrTime, 298 | CRSDepTime = time_to_datetime CRSDepTime, 299 | FlightDate, 300 | Flight_Number_Reporting_Airline, 301 | OriginCityName = extract_city_name OriginCityName, 302 | DestCityName = extract_city_name DestCityName, 303 | Origin, 304 | DepDelay 305 | } 306 | take 5 307 | ``` 308 | 309 | ### SQL DuckDB 310 | 311 | ```{glue_sql} 312 | #| filename: SQL 313 | SELECT 314 | Dest, 315 | Tail_Number, 316 | IATA_CODE_Reporting_Airline, 317 | CancellationCode, 318 | time_to_datetime( 319 | FlightDate, 320 | COLUMNS([ 321 | DepTime, 322 | ArrTime, 323 | CRSArrTime, 324 | CRSDepTime 325 | ]) 326 | ), 327 | FlightDate, 328 | Flight_Number_Reporting_Airline, 329 | extract_city_name(COLUMNS([OriginCityName, DestCityName])), 330 | Origin, 331 | DepDelay 332 | FROM 333 | tab 334 | LIMIT 5 335 | ``` 336 | 337 | ### dplyr R 338 | 339 | ```{r} 340 | #| filename: R 341 | category_cols <- c( 342 | "Dest", 343 | "Tail_Number", 344 | "IATA_CODE_Reporting_Airline", 345 | "CancellationCode" 346 | ) 347 | 348 | time_cols <- c("DepTime", "ArrTime", "CRSArrTime", "CRSDepTime") 349 | 350 | cols <- c( 351 | category_cols, 352 | time_cols, 353 | c( 354 | "FlightDate", 355 | "Flight_Number_Reporting_Airline", 356 | "OriginCityName", 357 | "DestCityName", 358 | "Origin", 359 | "DepDelay" 360 | ) 361 | ) 362 | 363 | df |> 364 | select(all_of(cols)) |> 365 | mutate( 366 | across({{ category_cols }}, as.factor), 367 | across(c(OriginCityName, DestCityName), extract_city_name) 368 | ) |> 369 | time_to_datetime(time_cols) |> 370 | head(5) 371 | ``` 372 | 373 | ### Python Polars 374 | 375 | ```{python} 376 | #| filename: Python 377 | category_cols = [ 378 | "Dest", 379 | "Tail_Number", 380 | "IATA_CODE_Reporting_Airline", 381 | "CancellationCode", 382 | ] 383 | 384 | time_cols = ["DepTime", "ArrTime", "CRSArrTime", "CRSDepTime"] 385 | 386 | cols = ( 387 | category_cols 388 | + time_cols 389 | + [ 390 | "FlightDate", 391 | "Flight_Number_Reporting_Airline", 392 | "OriginCityName", 393 | "DestCityName", 394 | "Origin", 395 | "DepDelay", 396 | ] 397 | ) 398 | 399 | lf.select(cols).with_columns( 400 | [ 401 | pl.col(category_cols).cast(pl.Categorical), 402 | extract_city_name(["OriginCityName", "DestCityName"]), 403 | *time_to_datetime(time_cols), 404 | ] 405 | ).head(5).collect() 406 | ``` 407 | 408 | ::: 409 | 410 | {{< include _cleanup-knitr.qmd >}} 411 | -------------------------------------------------------------------------------- /docs/tidy.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Reshaping and Tidy Data 3 | description: Make the data tidy. 4 | engine: knitr 5 | knitr: 6 | opts_chunk: 7 | connection: con 8 | engine-opts: 9 | target: sql.duckdb 10 | use_glue: true 11 | info_string: '{.sql filename="SQL"}' 12 | execute: 13 | cache: true 14 | sidebar_position: 4 15 | --- 16 | 17 | :::{.callout-note} 18 | 19 | This page is based on the chapter ["Reshaping and Tidy Data"](https://kevinheavey.github.io/modern-polars/tidy.html) 20 | of the Modern Polars book. 21 | 22 | ::: 23 | 24 | ## Read the Data 25 | 26 | {{< include _setup-knitr.qmd >}} 27 | 28 | ### Download 29 | 30 | Download the data to be analysis (tables on the website) and write the data to CSV files. 31 | 32 | This document uses R to download the data from the source here, 33 | but we can also download and use the CSV files included in 34 | the [kevinheavey/modern-polars](https://github.com/kevinheavey/modern-polars/tree/master/data/nba) GitHub repository. 35 | 36 | :::{.panel-tabset} 37 | 38 | #### R 39 | 40 | ```{r} 41 | #| filename: R 42 | #| code-fold: true 43 | #| cache: false 44 | nba_dir <- file.path("data", "nba") 45 | 46 | months <- c( 47 | "october", 48 | "november", 49 | "december", 50 | "january", 51 | "february", 52 | "march", 53 | "april", 54 | "may", 55 | "june" 56 | ) 57 | 58 | column_names <- c( 59 | date = "date", 60 | away_team = "visitor_neutral", 61 | away_points = "pts", 62 | home_team = "home_neutral", 63 | home_points = "pts_2" 64 | ) 65 | 66 | .write_data <- function(month) { 67 | base_url <- "http://www.basketball-reference.com/leagues/NBA_2016_games-{month}.html" 68 | 69 | glue::glue(base_url, month = month) |> 70 | rvest::read_html() |> 71 | rvest::html_table() |> 72 | (\(x) x[[1]])() |> # TODO: Rewrite after R 4.3 73 | janitor::clean_names() |> 74 | dplyr::select(all_of(column_names)) |> 75 | dplyr::filter(date != "Playoffs") |> 76 | readr::write_csv(file.path(nba_dir, glue::glue("{month}.csv"))) 77 | } 78 | 79 | if (!fs::dir_exists(nba_dir)) { 80 | fs::dir_create(nba_dir) 81 | months |> 82 | purrr::walk(.write_data) 83 | } 84 | ``` 85 | 86 | #### Shell 87 | 88 | This is a sample command to download the CSV files from the `kevinheavey/modern-polars` GitHub repository. 89 | 90 | ```{r} 91 | #| results: asis 92 | #| echo: false 93 | base_command <- glue::glue("curl -sL https://github.com/kevinheavey/modern-polars/raw/87539190dde3e99d5e4c4f9957c78932a33075a0/data/nba/{{month}}.csv -o {nba_dir}/{{month}}.csv") 94 | 95 | commands <- glue::glue(base_command, month = months) |> 96 | stringr::str_c(collapse = "\n") 97 | 98 | cat( 99 | "```{.bash filename=Terminal}", 100 | glue::glue("mkdir {nba_dir}"), 101 | commands, 102 | "```", 103 | sep = "\n" 104 | ) 105 | ``` 106 | 107 | ::: 108 | 109 | ### Load the Data 110 | 111 | After the CSV files are ready, load these into DuckDB (in-memory) database table, 112 | R DataFrame, and Python polars.LazyFrame. 113 | 114 | :::{.panel-tabset} 115 | 116 | #### DuckDB 117 | 118 | ```{r } 119 | #| filename: R 120 | #| cache: false 121 | #| output: false 122 | #| echo: false 123 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") 124 | ``` 125 | 126 | ```{glue_sql} 127 | #| filename: SQL 128 | #| cache: false 129 | #| output: false 130 | CREATE TABLE tab AS FROM read_csv_auto('data/nba/*.csv') 131 | ``` 132 | 133 | ```{glue_sql} 134 | #| filename: SQL 135 | FROM tab 136 | LIMIT 5 137 | ``` 138 | 139 | #### R DataFrame 140 | 141 | ```{r} 142 | #| filename: R 143 | #| cache: false 144 | library(dplyr, warn.conflicts = FALSE) 145 | 146 | df <- readr::read_csv( 147 | fs::dir_ls("data/nba", glob = "*.csv") 148 | ) 149 | ``` 150 | 151 | ```{r} 152 | #| filename: R 153 | df |> head(5) 154 | ``` 155 | 156 | #### Python polars.LazyFrame 157 | 158 | ```{python} 159 | #| filename: Python 160 | #| cache: false 161 | import polars as pl 162 | 163 | lf = pl.scan_csv("data/nba/*.csv") 164 | ``` 165 | 166 | ```{python} 167 | #| filename: Python 168 | lf.head(5).collect() 169 | ``` 170 | 171 | ::: 172 | 173 | ## Cleaning {#sec-cleaning} 174 | 175 | Convert the `date` column to date type and delete rows containing missing values (`null`). 176 | 177 | PRQL does not have a "remove rows with missing values in any column" syntax 178 | ([PRQL/prql#2386](https://github.com/PRQL/prql/issues/2386)), 179 | but DuckDB SQL does (>= 0.8, [duckdb/duckdb#6621](https://github.com/duckdb/duckdb/pull/6621)), so it can be used. 180 | 181 | :::{.panel-tabset} 182 | 183 | ### PRQL DuckDB 184 | 185 | ```{prql} 186 | #| filename: PRQL 187 | #| label: prql_cleaning 188 | let games = ( 189 | from tab 190 | filter s"COLUMNS(*) IS NOT NULL" 191 | derive date_new = (s"strptime(date, '%a, %b %d, %Y')" | as date) 192 | select !{this.date} # `this` points to refer to current relation 193 | sort date_new 194 | derive game_id = (row_number this) 195 | ) 196 | 197 | from games 198 | take 5 199 | ``` 200 | 201 | ### SQL DuckDB 202 | 203 | ```{sql} 204 | --| filename: SQL 205 | --| cache: false 206 | --| output: false 207 | CREATE TABLE games AS ( 208 | WITH _tab1 AS ( 209 | SELECT 210 | * REPLACE (strptime(date, '%a, %b %d, %Y')::date AS date) 211 | FROM tab 212 | WHERE COLUMNS(*) IS NOT NULL 213 | ) 214 | 215 | SELECT 216 | row_number() OVER(ORDER BY date) AS game_id, 217 | * 218 | FROM _tab1 219 | ORDER BY date 220 | ) 221 | ``` 222 | 223 | ```{glue_sql} 224 | #| filename: SQL 225 | FROM games 226 | LIMIT 5 227 | ``` 228 | 229 | ### dplyr R 230 | 231 | ```{r} 232 | #| filename: R 233 | games <- df |> 234 | filter(!if_any(everything(), is.na)) |> # Also can use `tidyr::drop_na` 235 | mutate( 236 | date = lubridate::parse_date_time(date, "%a, %b %d, %Y") |> 237 | lubridate::as_date() 238 | ) |> 239 | arrange(date) |> 240 | mutate(game_id = row_number(), .before = 1) 241 | ``` 242 | 243 | ```{r} 244 | #| filename: R 245 | games |> 246 | head(5) 247 | ``` 248 | 249 | ### Python Polars 250 | 251 | ```{python} 252 | #| filename: Python 253 | #| cache: false 254 | games = ( 255 | lf.filter(~pl.any_horizontal(pl.all().is_null())) # Also can use `polars.LazyFrame.drop_nulls` 256 | .with_columns( 257 | pl.col("date").str.strptime(pl.Date, "%a, %b %d, %Y"), 258 | ) 259 | .sort("date") 260 | .with_row_index("game_id") 261 | .collect() 262 | ) 263 | ``` 264 | 265 | ```{python} 266 | #| filename: Python 267 | games.head(5) 268 | ``` 269 | 270 | ::: 271 | 272 | Looking at the result tables, we notice that the PRQL result is different from the other results; 273 | A column named `date` in other results is named `date_new` in PRQL. 274 | This is because another name is needed to avoid the behavior that 275 | using the column name `date` here would add a new column called `date:1` 276 | instead of updating the original `date` column. 277 | 278 | In DuckDB SQL, we can use [Replace Clause](https://duckdb.org/docs/sql/expressions/star#replace-clause) 279 | to update the original column with the same column name. 280 | 281 | The SQL generated by the PRQL compiler looks like this: 282 | 283 | ```{prql} 284 | #| connection: null 285 | #| echo: false 286 | <> 287 | ``` 288 | 289 | ## Tidy Data {#sec-tidy-data} 290 | 291 | :::{.callout-important} 292 | 293 | - PRQL does not yet support PIVOT and UNPIVOT. ([PRQL/prql#644](https://github.com/PRQL/prql/issues/644)) 294 | - DuckDB SQL supports PIVOT and UNPIVOT >= 0.8. ([duckdb/duckdb#6387](https://github.com/duckdb/duckdb/pull/6387)) 295 | 296 | ::: 297 | 298 | ### Unpivot 299 | 300 | Transforms the data from wide format to long format. 301 | This transformation is called by names such as unpivot, pivot longer, and melt. 302 | 303 | :::{.panel-tabset} 304 | 305 | #### PRQL DuckDB 306 | 307 | :::{.callout-important} 308 | 309 | `games` in this query is defiened in @sec-cleaning with SQL, not with PRQL. 310 | 311 | ::: 312 | 313 | ```{prql} 314 | #| filename: PRQL 315 | from s""" 316 | SELECT * 317 | FROM ( 318 | PIVOT_LONGER games 319 | ON away_team, home_team 320 | INTO 321 | NAME variable 322 | VALUE team 323 | ) 324 | """ 325 | group {team} ( 326 | sort this.date 327 | derive rest = this.date - (this.date | lag 1) - 1 328 | ) 329 | select !{away_points, home_points} 330 | filter rest != null 331 | sort game_id 332 | take 5 333 | ``` 334 | 335 | #### SQL DuckDB 336 | 337 | ```{sql} 338 | --| filename: SQL 339 | --| cache: false 340 | --| output: false 341 | CREATE TABLE tidy AS ( 342 | WITH _tab1 AS ( 343 | PIVOT_LONGER games 344 | ON away_team, home_team 345 | INTO 346 | NAME variable 347 | VALUE team 348 | ), 349 | 350 | _tab2 AS ( 351 | SELECT 352 | COLUMNS(x -> NOT suffix(x, '_points')) 353 | FROM _tab1 354 | ), 355 | 356 | _tab3 AS ( 357 | SELECT 358 | *, 359 | date - lag(date) OVER (PARTITION BY team ORDER BY date) -1 AS rest 360 | FROM _tab2 361 | ) 362 | 363 | SELECT * 364 | FROM _tab3 365 | WHERE rest IS NOT NULL 366 | ORDER BY game_id 367 | ) 368 | ``` 369 | 370 | ```{glue_sql} 371 | #| filename: SQL 372 | FROM tidy 373 | LIMIT 5 374 | ``` 375 | 376 | #### dplyr R 377 | 378 | ```{r} 379 | #| filename: R 380 | tidy <- games |> 381 | tidyr::pivot_longer( 382 | cols = c(away_team, home_team), 383 | names_to = "variable", 384 | values_to = "team" 385 | ) |> 386 | select(!ends_with("_points")) |> 387 | arrange(game_id) |> 388 | mutate( 389 | rest = date - lag(date) - 1, 390 | .by = team 391 | ) |> 392 | filter(!is.na(rest)) 393 | ``` 394 | 395 | ```{r} 396 | #| filename: R 397 | tidy |> 398 | head(5) 399 | ``` 400 | 401 | #### Python Polars 402 | 403 | ```{python} 404 | #| filename: Python 405 | #| cache: false 406 | tidy = ( 407 | games.unpivot( 408 | index=["game_id", "date"], 409 | on=["away_team", "home_team"], 410 | value_name="team", 411 | ) 412 | .sort("game_id") 413 | .with_columns( 414 | rest=(pl.col("date").diff().over("team").dt.total_days() - 1).cast(pl.Int8) 415 | ) 416 | .drop_nulls("rest") 417 | ) 418 | ``` 419 | 420 | ```{python} 421 | #| filename: Python 422 | tidy.head(5) 423 | ``` 424 | 425 | ::: 426 | 427 | PRQL, SQL and dplyr remove unnecessary columns after UNPIVOT 428 | (columns that were automatically removed in the original Polars and Pandas example). 429 | 430 | ### Pivot 431 | 432 | Transforms the data from long format to wide format. 433 | This transformation is called by names such as pivot, pivot wider. 434 | 435 | :::{.panel-tabset} 436 | 437 | #### PRQL DuckDB 438 | 439 | :::{.callout-important} 440 | 441 | `tidy` in this query is defiened in @sec-tidy-data with SQL, 442 | and `games` is defiened in @sec-cleaning with SQL. 443 | 444 | ::: 445 | 446 | ```{prql} 447 | #| filename: PRQL 448 | #| label: prql_tidy_nba_2 449 | from s""" 450 | SELECT * 451 | FROM ( 452 | PIVOT_WIDER tidy ON variable USING FIRST(rest) GROUP BY (game_id, date) 453 | ) 454 | """ 455 | derive { 456 | away_rest = away_team, 457 | home_rest = home_team 458 | } 459 | select !{ 460 | away_team, 461 | home_team 462 | } 463 | join side:left games (==game_id && ==date) 464 | derive { 465 | home_win = games.home_points > games.away_points, 466 | rest_spread = home_rest - away_rest 467 | } 468 | sort games.game_id 469 | take 5 470 | ``` 471 | 472 | #### SQL DuckDB 473 | 474 | ```{sql} 475 | --| filename: SQL 476 | --| cache: false 477 | --| output: false 478 | CREATE TABLE by_game AS ( 479 | WITH _tab1 AS ( 480 | PIVOT_WIDER tidy ON variable USING FIRST(rest) GROUP BY (game_id, date) 481 | ) 482 | 483 | SELECT 484 | * EXCLUDE(away_team, home_team), 485 | away_team AS away_rest, 486 | home_team AS home_rest 487 | FROM _tab1 488 | ) 489 | ``` 490 | 491 | ```{sql} 492 | --| filename: SQL 493 | --| cache: false 494 | --| output: false 495 | CREATE TABLE joined AS ( 496 | SELECT 497 | *, 498 | home_points > away_points AS home_win, 499 | home_rest - away_rest AS rest_spread 500 | FROM by_game 501 | LEFT JOIN games USING (game_id, date) 502 | ORDER BY game_id 503 | ) 504 | ``` 505 | 506 | ```{glue_sql} 507 | #| filename: SQL 508 | FROM joined 509 | LIMIT 5 510 | ``` 511 | 512 | #### dplyr R 513 | 514 | ```{r} 515 | #| filename: R 516 | by_game <- tidy |> 517 | tidyr::pivot_wider( 518 | id_cols = c("game_id", "date"), 519 | values_from = "rest", 520 | names_from = "variable" 521 | ) |> 522 | rename( 523 | away_rest = away_team, 524 | home_rest = home_team 525 | ) 526 | 527 | joined <- by_game |> 528 | left_join(games, by = c("game_id", "date")) |> 529 | mutate( 530 | home_win = home_points > away_points, 531 | rest_spread = home_rest - away_rest 532 | ) 533 | ``` 534 | 535 | ```{r} 536 | #| filename: R 537 | joined |> 538 | head(5) 539 | ``` 540 | 541 | #### Python Polars 542 | 543 | ```{python} 544 | #| filename: Python 545 | by_game = tidy.pivot( 546 | values="rest", index=["game_id", "date"], on="variable" 547 | ).rename({"away_team": "away_rest", "home_team": "home_rest"}) 548 | 549 | joined = by_game.join(games, on=["game_id", "date"]).with_columns( 550 | home_win=pl.col("home_points") > pl.col("away_points"), 551 | rest_spread=pl.col("home_rest") - pl.col("away_rest"), 552 | ) 553 | ``` 554 | 555 | ```{python} 556 | #| filename: Python 557 | joined.head(5) 558 | ``` 559 | 560 | ::: 561 | 562 | There are more columns in the PRQL result than in the other results. 563 | Because the output SQL is not using `USING` for joins ([PRQL/prql#1335](https://github.com/PRQL/prql/issues/1335)). 564 | 565 | The SQL generated by the PRQL compiler looks like this: 566 | 567 | ```{prql} 568 | #| connection: null 569 | #| echo: false 570 | <> 571 | ``` 572 | -------------------------------------------------------------------------------- /docs/timeseries.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Timeseries 3 | description: Handling timeseries data. 4 | engine: knitr 5 | knitr: 6 | opts_chunk: 7 | connection: con 8 | engine-opts: 9 | target: sql.duckdb 10 | use_glue: true 11 | execute: 12 | cache: true 13 | sidebar_position: 5 14 | --- 15 | 16 | :::{.callout-note} 17 | 18 | This page is based on the chapter ["Timeseries"](https://kevinheavey.github.io/modern-polars/timeseries.html) 19 | of the Modern Polars book. 20 | 21 | ::: 22 | 23 | ## Preparing Data 24 | 25 | {{< include _setup-knitr.qmd >}} 26 | 27 | ### Download 28 | 29 | Download the data from [Binance REST API](https://github.com/binance/binance-spot-api-docs/blob/master/rest-api.md) 30 | and write it to a Parquet file. 31 | 32 | This document uses R to download the data from the source here, 33 | but we can also download and use the Parquet file included in the 34 | [kevinheavey/modern-polars](https://github.com/kevinheavey/modern-polars/blob/master/data/ohlcv.pq) GitHub repository. 35 | 36 | :::{.panel-tabset} 37 | 38 | #### R 39 | 40 | ```{r} 41 | #| filename: R 42 | #| cache: false 43 | #| code-fold: true 44 | #| warning: false 45 | data_path <- "data/ohlcv.parquet" 46 | 47 | if (!fs::file_exists(data_path)) { 48 | fs::dir_create(fs::path_dir(data_path)) 49 | 50 | .epoch_ms <- function(dt) { 51 | dt |> 52 | lubridate::as_datetime() |> 53 | (\(x) (as.integer(x) * 1000))() 54 | } 55 | 56 | .start <- lubridate::make_datetime(2021, 1, 1) |> .epoch_ms() 57 | .end <- lubridate::make_datetime(2022, 1, 1) |> .epoch_ms() 58 | 59 | .url <- glue::glue( 60 | "https://api.binance.com/api/v3/klines?symbol=BTCUSDT&", 61 | "interval=1d&startTime={.start}&endTime={.end}" 62 | ) 63 | 64 | .res <- jsonlite::read_json(.url) 65 | 66 | time_col <- "time" 67 | ohlcv_cols <- c( 68 | "open", 69 | "high", 70 | "low", 71 | "close", 72 | "volume" 73 | ) 74 | cols_to_use <- c(time_col, ohlcv_cols) 75 | cols <- c(cols_to_use, glue::glue("ignore_{i}", i = 1:6)) 76 | 77 | df <- .res |> 78 | tibble::enframe(name = NULL) |> 79 | tidyr::unnest_wider(value, names_sep = "_") |> 80 | rlang::set_names({{ cols }}) |> 81 | dplyr::mutate( 82 | dplyr::across({{ time_col }}, \(x) lubridate::as_datetime(x / 1000) |> lubridate::as_date()), 83 | dplyr::across({{ ohlcv_cols }}, as.numeric), 84 | .keep = "none" 85 | ) 86 | 87 | # Unlike the Python client, the duckdb R client does not (yet) have automatic DataFrame registration. 88 | # (duckdb/duckdb#6771) 89 | con_tmp <- DBI::dbConnect(duckdb::duckdb(), ":memory:") 90 | duckdb::duckdb_register(con_tmp, "df", df) 91 | duckdb:::sql(glue::glue("COPY df TO '{data_path}' (FORMAT PARQUET)"), con_tmp) 92 | DBI::dbDisconnect(con_tmp) 93 | } 94 | ``` 95 | 96 | #### Shell 97 | 98 | This is a sample command to download the Parquet file from the `kevinheavey/modern-polars` GitHub repository. 99 | 100 | ```{.bash filename=Terminal} 101 | mkdir data 102 | curl -sL https://github.com/kevinheavey/modern-polars/raw/d67d6f95ce0de8aad5492c4497ac4c3e33d696e8/data/ohlcv.pq -o data/ohlcv.parquet 103 | ``` 104 | 105 | ::: 106 | 107 | ### Load the Data 108 | 109 | After the Parquet file is ready, load that into DuckDB (in-memory) database table, R DataFrame, and Python polars.LazyFrame. 110 | 111 | :::{.panel-tabset} 112 | 113 | #### DuckDB 114 | 115 | ```{r} 116 | #| filename: R 117 | #| cache: false 118 | #| output: false 119 | #| echo: false 120 | con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") 121 | ``` 122 | 123 | ```{glue_sql} 124 | #| filename: SQL 125 | #| cache: false 126 | #| output: false 127 | CREATE TABLE tab AS FROM 'data/ohlcv.parquet' 128 | ``` 129 | 130 | ```{glue_sql} 131 | #| filename: SQL 132 | FROM tab 133 | LIMIT 5 134 | ``` 135 | 136 | #### R DataFrame 137 | 138 | ```{r} 139 | #| filename: R 140 | #| cache: false 141 | #| output: false 142 | library(dplyr, warn.conflicts = FALSE) 143 | 144 | df <- duckdb:::sql("FROM 'data/ohlcv.parquet'") 145 | ``` 146 | 147 | ```{r} 148 | #| filename: R 149 | df |> head(5) 150 | ``` 151 | 152 | #### Python polars.LazyFrame 153 | 154 | ```{python} 155 | #| filename: Python 156 | #| cache: false 157 | #| output: false 158 | import polars as pl 159 | 160 | lf = pl.scan_parquet("data/ohlcv.parquet") 161 | ``` 162 | 163 | ```{python} 164 | #| filename: Python 165 | lf.fetch(5) 166 | ``` 167 | 168 | ::: 169 | 170 | ## Filtering 171 | 172 | :::{.panel-tabset} 173 | 174 | ### PRQL DuckDB 175 | 176 | ```{prql} 177 | #| filename: PRQL 178 | #| engine-opts: 179 | #| use_glue: false 180 | from tab 181 | filter s"date_part(['year', 'month'], time) = {{year: 2021, month: 2}}" 182 | take 5 183 | ``` 184 | 185 | ### SQL DuckDB 186 | 187 | :::{.content-hidden} 188 | 189 | Because of the bug of kintr's sql engine , 190 | I want to use the following block as `glue_sql`. 191 | But in `glue_sql` code blocks, we should escape `{` and `}` as `{{` and `}}`, 192 | so I mark this block as `sql` and `cache: false`. 193 | 194 | ::: 195 | 196 | ```{sql} 197 | --| filename: SQL 198 | --| cache.lazy: false 199 | FROM tab 200 | WHERE date_part(['year', 'month'], time) = {year: 2021, month: 2} 201 | LIMIT 5 202 | ``` 203 | 204 | ### dplyr R 205 | 206 | ```{r} 207 | #| filename: R 208 | df |> 209 | filter( 210 | lubridate::floor_date(time, "month") == lubridate::make_datetime(2021, 2) 211 | ) |> 212 | head(5) 213 | ``` 214 | 215 | ### Python Polars 216 | 217 | ```{python} 218 | #| filename: Python 219 | ( 220 | lf.filter((pl.col("time").dt.year() == 2021) & (pl.col("time").dt.month() == 2)) 221 | .head(5) 222 | .collect() 223 | ) 224 | ``` 225 | 226 | ::: 227 | 228 | ## Downsampling 229 | 230 | It is important to note carefully how units such as `5 days` or `1 week` actually work. 231 | In other words, where to start counting `5 days` or `1 week` could be completely different in each system. 232 | 233 | Here, we should note that `time_bucket` in DuckDB, `lubridate::floor_date` in R, 234 | and `group_by_dynamic` in Polars have completely different initial starting points by default. 235 | 236 | - The DuckDB function `time_bucket`'s origin defaults to `2000-01-03 00:00:00+00` for days or weeks interval.[^time_bucket] 237 | - In the R `lubridate::floor_date` function, timestamp is floored using the number of days elapsed 238 | since the beginning of every month when specifying `"5 days"` to the `unit` argument. 239 | 240 | ```{r} 241 | #| filename: R 242 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |> 243 | lubridate::floor_date("5 days") 244 | ``` 245 | 246 | And when `"1 week"` to the `unit` argument, it is floored to the nearest week, 247 | Sunday through Saturday. 248 | 249 | ```{r} 250 | #| filename: R 251 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |> 252 | lubridate::floor_date("1 week") 253 | ``` 254 | 255 | To start from an arbitrary origin, all breaks must be specified as a vector in the unit argument.[^floor_date] 256 | 257 | ```{r} 258 | #| filename: R 259 | lubridate::as_date(c("2023-01-31", "2023-02-01")) |> 260 | lubridate::floor_date(lubridate::make_date(2023, 1, 31)) 261 | ``` 262 | 263 | - `group_by_dynamic` of Polars, the `offset` parameter to specify the origin point, is negative `every` by default.[^group_by_dynamic] 264 | 265 | [^time_bucket]: 266 | [^floor_date]: 267 | [^group_by_dynamic]: 268 | 269 | :::{.panel-tabset} 270 | 271 | ### PRQL DuckDB 272 | 273 | ```{prql} 274 | #| filename: PRQL 275 | from tab 276 | derive { 277 | time_new = s""" 278 | time_bucket(INTERVAL '5 days', time, (FROM tab SELECT min(time))) 279 | """ 280 | } 281 | group {time_new} ( 282 | aggregate { 283 | open = average open, 284 | high = average high, 285 | low = average low, 286 | close = average close, 287 | volume = average volume 288 | } 289 | ) 290 | sort time_new 291 | take 5 292 | ``` 293 | 294 | ### SQL DuckDB 295 | 296 | ```{glue_sql} 297 | #| filename: SQL 298 | WITH _tab1 AS ( 299 | FROM tab 300 | SELECT 301 | * REPLACE (time_bucket(INTERVAL '5 days', time, (FROM tab SELECT min(time)))) AS time 302 | ) 303 | 304 | FROM _tab1 305 | SELECT 306 | time, 307 | avg(COLUMNS(x -> x NOT IN ('time'))) 308 | GROUP BY time 309 | ORDER BY time 310 | LIMIT 5 311 | ``` 312 | 313 | ### dplyr R 314 | 315 | ```{r} 316 | #| filename: R 317 | df |> 318 | mutate( 319 | time = time |> 320 | (\(x) lubridate::floor_date(x, seq(min(x), max(x), by = 5)))() 321 | ) |> 322 | summarise(across(everything(), mean), .by = time) |> 323 | head(5) 324 | ``` 325 | 326 | ### Python Polars 327 | 328 | ```{python} 329 | #| filename: Python 330 | ( 331 | lf.sort("time") 332 | .group_by_dynamic("time", every="5d") 333 | .agg(pl.col(pl.Float64).mean()) 334 | .head(5) 335 | .collect() 336 | ) 337 | ``` 338 | 339 | ::: 340 | 341 | :::{.panel-tabset} 342 | 343 | ### PRQL DuckDB 344 | 345 | ```{prql} 346 | #| filename: PRQL 347 | from tab 348 | derive { 349 | time_new = s""" 350 | time_bucket(INTERVAL '7 days', time, (FROM tab SELECT min(time))) 351 | """ 352 | } 353 | group {time_new} ( 354 | aggregate { 355 | open_mean = average open, 356 | high_mean = average high, 357 | low_mean = average low, 358 | close_mean = average close, 359 | volume_mean = average volume, 360 | open_sum = sum open, 361 | high_sum = sum high, 362 | low_sum = sum low, 363 | close_sum = sum close, 364 | volume_sum = sum volume 365 | } 366 | ) 367 | sort time_new 368 | take 5 369 | ``` 370 | 371 | ### SQL DukcDB 372 | 373 | ```{glue_sql} 374 | #| filename: SQL 375 | WITH _tab1 AS ( 376 | FROM tab 377 | SELECT 378 | * REPLACE (time_bucket(INTERVAL '7 days', time, (FROM tab SELECT min(time)))) AS time 379 | ) 380 | 381 | FROM _tab1 382 | SELECT 383 | time, 384 | avg(COLUMNS(x -> x NOT IN ('time'))), 385 | sum(COLUMNS(x -> x NOT IN ('time'))) 386 | GROUP BY time 387 | ORDER BY time 388 | LIMIT 5 389 | ``` 390 | 391 | ### dplyr R 392 | 393 | ```{r} 394 | #| filename: R 395 | df |> 396 | mutate( 397 | time = time |> 398 | (\(x) lubridate::floor_date(x, seq(min(x), max(x), by = 7)))() 399 | ) |> 400 | summarise( 401 | across( 402 | everything(), 403 | list(mean = mean, sum = sum), 404 | .names = "{.col}_{.fn}" 405 | ), 406 | .by = time 407 | ) |> 408 | head(5) 409 | ``` 410 | 411 | ### Python Polars 412 | 413 | ```{python} 414 | #| filename: Python 415 | ( 416 | lf.sort("time") 417 | .group_by_dynamic("time", every="1w") 418 | .agg( 419 | [ 420 | pl.col(pl.Float64).mean().name.suffix("_mean"), 421 | pl.col(pl.Float64).sum().name.suffix("_sum"), 422 | ] 423 | ) 424 | .head(5) 425 | .collect() 426 | ) 427 | ``` 428 | 429 | ::: 430 | 431 | ## Upsampling 432 | 433 | The way to use a function like `generate_series` to generate sequential values and then join them is general-purpose. 434 | 435 | In R, we can also use dedicated functions like 436 | [`timetk::pad_by_time`](https://business-science.github.io/timetk/reference/pad_by_time.html). 437 | 438 | :::{.panel-tabset} 439 | 440 | ### PRQL DuckDB 441 | 442 | :::{.callout-important} 443 | 444 | This example does not work with prql-compiler 0.11.1. 445 | ([PRQL/prql#3129](https://github.com/PRQL/prql/issues/3129)) 446 | 447 | ::: 448 | 449 | ```{.prql filename=PRQL} 450 | let _tab1 = s""" 451 | SELECT 452 | generate_series( 453 | (SELECT min(time)), 454 | (SELECT max(time)), 455 | INTERVAL '6 hours' 456 | ).unnest() AS time 457 | FROM tab 458 | """ 459 | 460 | from _tab1 461 | join side:left tab (==time) 462 | sort tab.time 463 | select !{tab.time} 464 | take 5 465 | ``` 466 | 467 | ### SQL DuckDB 468 | 469 | ```{glue_sql} 470 | #| filename: SQL 471 | WITH _tab1 AS ( 472 | SELECT 473 | generate_series( 474 | (FROM tab SELECT min(time)), 475 | (FROM tab SELECT max(time)), 476 | INTERVAL '6 hours' 477 | ).unnest() AS time 478 | ) 479 | 480 | FROM _tab1 481 | LEFT JOIN tab USING (time) 482 | ORDER BY time 483 | LIMIT 5 484 | ``` 485 | 486 | ### dplyr R 487 | 488 | ```{r} 489 | #| filename: R 490 | .grid <- df$time |> 491 | lubridate::as_datetime() |> 492 | (\(x) seq(min(x), max(x), by = "6 hours"))() |> 493 | tibble::tibble(time = _) 494 | 495 | .grid |> 496 | left_join(df, by = "time") |> 497 | head(5) 498 | ``` 499 | 500 | ### Python Polars 501 | 502 | ```{python} 503 | #| filename: Python 504 | lf.collect().sort("time").upsample("time", every="6h").head(5) 505 | ``` 506 | 507 | ::: 508 | 509 | ## Window Functions 510 | 511 | It is necessary to be careful how the Window function calculates 512 | if the width of the window is less than the specified value. 513 | 514 | ### Moving Average, Cumulative Avarage {#sec-moving-ave} 515 | 516 | PRQL has a dedicated way of applying the window to the entire table. 517 | For the others, use a individual function for each column. 518 | 519 | In R, base R have some window functions like `cumsum`, but none like cumulative avarage. 520 | dplyr complements this with several functions, including `cummean`. 521 | 522 | Polars does not yet have a dedicated function to compute cumulative averages, 523 | so we must use cumulative sums to compute them. 524 | 525 | :::{.callout-note} 526 | 527 | [The original Modern Pandas post](https://tomaugspurger.github.io/posts/modern-7-timeseries/#rolling--expanding--ew) 528 | and [the Modern Polars book](https://kevinheavey.github.io/modern-polars/timeseries.html#rolling-expanding-ew) 529 | have a exponentially weighted (EW) calculation example in addition. 530 | But DuckDB does not have a dedicated function to do this, so it is omitted here. 531 | 532 | ::: 533 | 534 | :::{.panel-tabset} 535 | 536 | #### PRQL DuckDB 537 | 538 | ```{prql} 539 | #| filename: PRQL 540 | from tab 541 | sort this.time 542 | window rolling:28 ( 543 | derive {`28D MA` = average close} 544 | ) 545 | window rows:..0 ( 546 | derive {`Expanding Average` = average close} 547 | ) 548 | select { 549 | this.time, 550 | Raw = close, 551 | `28D MA`, 552 | `Expanding Average` 553 | } 554 | take 26..30 555 | ``` 556 | 557 | #### SQL DuckDB 558 | 559 | ```{glue_sql} 560 | #| filename: SQL 561 | FROM tab 562 | SELECT 563 | time, 564 | close AS "Raw", 565 | avg(close) OVER ( 566 | ORDER BY time 567 | ROWS BETWEEN 27 PRECEDING AND CURRENT ROW 568 | ) AS "28D MA", 569 | avg(close) OVER ( 570 | ORDER BY time 571 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW 572 | ) AS "Expanding Average" 573 | LIMIT 5 OFFSET 25 574 | ``` 575 | 576 | #### dplyr R 577 | 578 | ```{r} 579 | #| filename: R 580 | #| cache: false 581 | roll_and_expand <- df |> 582 | arrange(time) |> 583 | mutate( 584 | time, 585 | Raw = close, 586 | `28D MA` = close |> 587 | slider::slide_vec(mean, .before = 27, .complete = TRUE), 588 | `Expanding Average` = cummean(close), 589 | .keep = "none" 590 | ) 591 | ``` 592 | 593 | ```{r} 594 | #| filename: R 595 | roll_and_expand |> 596 | slice(26:30) 597 | ``` 598 | 599 | #### Python Polars 600 | 601 | ```{python} 602 | #| filename: Python 603 | #| cache: false 604 | close = pl.col("close") 605 | 606 | roll_and_expand = lf.sort("time").select( 607 | [ 608 | pl.col("time"), 609 | close.alias("Raw"), 610 | close.rolling_mean(28).alias("28D MA"), 611 | close.alias("Expanding Average").cum_sum() / (close.cum_count() + 1), 612 | ] 613 | ) 614 | ``` 615 | 616 | ```{python} 617 | #| filename: Python 618 | roll_and_expand.head(30).tail(5).collect() 619 | ``` 620 | 621 | ::: 622 | 623 | Here, DuckDB also calculates avarage for cases where the window width is less than 28 for the `28D MA` column, 624 | whereas R `slider::slide_vec(.complete = TRUE)` and Polars `rolling_mean` make them missing values. 625 | If we are using DuckDB and need to make replacement for `NULL`, 626 | we need to add further processing. 627 | 628 | Plotting the results of dplyr shows the following. 629 | 630 | ```{r} 631 | #| filename: R 632 | #| label: plot-window-functions 633 | #| code-fold: true 634 | #| warning: false 635 | library(ggplot2) 636 | 637 | roll_and_expand |> 638 | tidyr::pivot_longer(cols = !time) |> 639 | ggplot(aes(time, value, colour = name)) + 640 | geom_line() + 641 | theme_linedraw() + 642 | labs(y = "Close ($)") + 643 | scale_x_date( 644 | date_breaks = "month", 645 | labels = scales::label_date_short() 646 | ) 647 | ``` 648 | 649 | ### Combining Rolling Aggregations 650 | 651 | :::{.panel-tabset} 652 | 653 | #### PRQL DuckDB 654 | 655 | ```{prql} 656 | #| filename: PRQL 657 | from tab 658 | sort this.time 659 | window rows:-15..14 ( 660 | select { 661 | this.time, 662 | mean = average close, 663 | std = stddev close 664 | } 665 | ) 666 | take 13..17 667 | ``` 668 | 669 | #### SQL DuckDB 670 | 671 | ```{glue_sql} 672 | #| filename: SQL 673 | FROM tab 674 | SELECT 675 | time, 676 | avg(close) OVER ( 677 | ORDER BY time 678 | ROWS BETWEEN 15 PRECEDING AND 14 FOLLOWING 679 | ) AS mean, 680 | stddev(close) OVER ( 681 | ORDER BY time 682 | ROWS BETWEEN 15 PRECEDING AND 14 FOLLOWING 683 | ) AS std 684 | ORDER BY time 685 | LIMIT 5 OFFSET 12 686 | ``` 687 | 688 | #### dplyr R 689 | 690 | ```{r} 691 | #| filename: R 692 | #| cache: false 693 | .slide_func <- function(.x, .fn) { 694 | slider::slide_vec(.x, .fn, .before = 15, .after = 14, .complete = TRUE) 695 | } 696 | 697 | mean_std <- df |> 698 | arrange(time) |> 699 | mutate( 700 | time, 701 | across( 702 | close, 703 | .fns = list(mean = \(x) .slide_func(x, mean), std = \(x) .slide_func(x, sd)), 704 | .names = "{.fn}" 705 | ), 706 | .keep = "none" 707 | ) 708 | ``` 709 | 710 | ```{r} 711 | #| filename: R 712 | mean_std |> 713 | slice(13:17) 714 | ``` 715 | 716 | #### Python Polars 717 | 718 | ```{python} 719 | #| filename: Python 720 | #| cache: false 721 | mean_std = lf.sort("time").select( 722 | time=pl.col("time"), 723 | mean=pl.col("close").rolling_mean(30, center=True), 724 | std=pl.col("close").rolling_std(30, center=True), 725 | ) 726 | ``` 727 | 728 | ```{python} 729 | #| filename: Python 730 | mean_std.head(17).tail(5).collect() 731 | ``` 732 | 733 | ::: 734 | 735 | As in @sec-moving-ave, here too the DuckDB results differ from the others. 736 | 737 | Plotting the results of dplyr shows the following. 738 | 739 | ```{r} 740 | #| filename: R 741 | #| label: plot-rolling-combined 742 | #| code-fold: true 743 | #| warning: false 744 | library(ggplot2) 745 | 746 | mean_std |> 747 | ggplot(aes(time)) + 748 | geom_ribbon( 749 | aes(ymin = mean - std, ymax = mean + std), 750 | alpha = 0.3, fill = "blue" 751 | ) + 752 | geom_line(aes(y = mean), color = "blue") + 753 | theme_linedraw() + 754 | labs(y = "Close ($)") + 755 | scale_x_date( 756 | date_breaks = "month", 757 | labels = scales::label_date_short() 758 | ) 759 | ``` 760 | 761 | ## Timezones 762 | 763 | :::{.callout-important} 764 | 765 | In DuckDB, the icu DuckDB extension is needed for time zones support. 766 | If the DuckDB client that we are using does not contain the extension, we need to install and load it. 767 | 768 | ```{sql} 769 | --| filename: SQL 770 | --| cache: false 771 | --| warning: false 772 | INSTALL 'icu' 773 | ``` 774 | 775 | ```{sql} 776 | --| filename: SQL 777 | --| cache: false 778 | --| warning: false 779 | LOAD 'icu' 780 | ``` 781 | 782 | ::: 783 | 784 | :::{.panel-tabset} 785 | 786 | ### PRQL DuckDB 787 | 788 | ```{prql} 789 | #| filename: PRQL 790 | let timezone = tz col -> s"timezone({tz}, {col})" 791 | 792 | from tab 793 | derive { 794 | time_new = (this.time | timezone "UTC" | timezone "US/Eastern") 795 | } 796 | select !{this.time} 797 | take 5 798 | ``` 799 | 800 | ### SQL DuckDB 801 | 802 | ```{glue_sql} 803 | #| filename: SQL 804 | FROM tab 805 | SELECT 806 | * REPLACE timezone('US/Eastern', timezone('UTC', time)) AS time 807 | LIMIT 5 808 | ``` 809 | 810 | ### dplyr R 811 | 812 | ```{r} 813 | #| filename: R 814 | df |> 815 | mutate( 816 | time = time |> 817 | lubridate::force_tz("UTC") |> 818 | lubridate::with_tz("US/Eastern") 819 | ) |> 820 | head(5) 821 | ``` 822 | 823 | ### Python Polars 824 | 825 | ```{python} 826 | #| filename: Python 827 | ( 828 | lf.with_columns( 829 | pl.col("time") 830 | .cast(pl.Datetime) 831 | .dt.replace_time_zone("UTC") 832 | .dt.convert_time_zone("US/Eastern") 833 | ) 834 | .head(5) 835 | .collect() 836 | ) 837 | ``` 838 | 839 | ::: 840 | 841 | Note that each system may keep time zone information in a different way. 842 | Here, the `time` column (and the `time_new` column) in DuckDB results 843 | are the TIMESTAMP type, has no time zone information. 844 | -------------------------------------------------------------------------------- /docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // Note: type annotations allow type checking and IDEs autocompletion 3 | 4 | const { themes } = require('prism-react-renderer'); 5 | const lightTheme = themes.github; 6 | const darkTheme = themes.dracula; 7 | 8 | /** @type {import('@docusaurus/types').Config} */ 9 | const config = { 10 | title: "Querying with PRQL", 11 | tagline: "Data transformation with PRQL and DuckDB", 12 | favicon: "img/favicon.ico", 13 | 14 | // Set the production url of your site here 15 | url: "https://eitsupi.github.io/", 16 | // Set the // pathname under which your site is served 17 | // For GitHub pages deployment, it is often '//' 18 | baseUrl: "/querying-with-prql/", 19 | 20 | // GitHub pages deployment config. 21 | // If you aren't using GitHub pages, you don't need these. 22 | organizationName: "eitsupi", // Usually your GitHub org/user name. 23 | projectName: "querying-with-prql", // Usually your repo name. 24 | 25 | onBrokenLinks: "throw", 26 | onBrokenMarkdownLinks: "warn", 27 | 28 | // Even if you don't use internalization, you can use this field to set useful 29 | // metadata like html lang. For example, if your site is Chinese, you may want 30 | // to replace "en" with "zh-Hans". 31 | i18n: { 32 | defaultLocale: "en", 33 | locales: ["en"], 34 | }, 35 | 36 | presets: [ 37 | [ 38 | "classic", 39 | /** @type {import('@docusaurus/preset-classic').Options} */ 40 | ({ 41 | docs: { 42 | routeBasePath: "/", 43 | sidebarPath: require.resolve("./sidebars.js"), 44 | // editUrl: 'https://github.com/eitsupi/querying-with-prql/tree/main/', 45 | }, 46 | blog: false, 47 | theme: { 48 | customCss: require.resolve("./src/css/custom.css"), 49 | }, 50 | }), 51 | ], 52 | ], 53 | 54 | themeConfig: 55 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 56 | ({ 57 | // Replace with your project's social card 58 | // image: 'img/docusaurus-social-card.jpg', 59 | navbar: { 60 | title: "Quering with PRQL", 61 | logo: { 62 | alt: "Logo", 63 | src: "img/logo.svg", 64 | }, 65 | items: [ 66 | { 67 | href: "https://github.com/eitsupi/querying-with-prql", 68 | label: "GitHub", 69 | position: "right", 70 | }, 71 | ], 72 | }, 73 | footer: { 74 | style: "dark", 75 | links: [ 76 | { 77 | title: "PRQL", 78 | items: [ 79 | { 80 | label: "Website", 81 | href: "https://prql-lang.org/", 82 | }, 83 | { 84 | label: "GitHub", 85 | href: "https://github.com/PRQL/prql", 86 | }, 87 | { 88 | label: "Language book", 89 | href: "https://prql-lang.org/book/", 90 | }, 91 | { 92 | label: "Playground", 93 | href: "https://prql-lang.org/playground/", 94 | }, 95 | ], 96 | }, 97 | ], 98 | copyright: `Copyright © ${new Date().getFullYear()} @eitsupi. Built with Quarto and Docusaurus.`, 99 | }, 100 | prism: { 101 | additionalLanguages: ["bash", "python", "sql", "r", "elm"], 102 | }, 103 | }), 104 | themes: [ 105 | [ 106 | require.resolve("@easyops-cn/docusaurus-search-local"), 107 | { 108 | hashed: true, 109 | docsRouteBasePath: "/", 110 | }, 111 | ], 112 | ], 113 | }; 114 | 115 | module.exports = config; 116 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids" 15 | }, 16 | "dependencies": { 17 | "@docusaurus/core": "^3.5.2", 18 | "@docusaurus/preset-classic": "^3.5.2", 19 | "@easyops-cn/docusaurus-search-local": "^0.44.5", 20 | "@mdx-js/react": "^3.0.1", 21 | "clsx": "^2.1.1", 22 | "prism-react-renderer": "^2.3.1", 23 | "react": "^18.3.1", 24 | "react-dom": "^18.3.1" 25 | }, 26 | "devDependencies": { 27 | "@docusaurus/module-type-aliases": "^3.5.2", 28 | "@docusaurus/types": "^3.5.2" 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.5%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | }, 42 | "engines": { 43 | "node": ">=18.0" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /renv.lock: -------------------------------------------------------------------------------- 1 | { 2 | "R": { 3 | "Version": "4.4.1", 4 | "Repositories": [ 5 | { 6 | "Name": "CRAN", 7 | "URL": "https://packagemanager.posit.co/cran/latest" 8 | } 9 | ] 10 | }, 11 | "Packages": { 12 | "DBI": { 13 | "Package": "DBI", 14 | "Version": "1.2.3", 15 | "Source": "Repository", 16 | "Repository": "RSPM", 17 | "Requirements": [ 18 | "R", 19 | "methods" 20 | ], 21 | "Hash": "065ae649b05f1ff66bb0c793107508f5" 22 | }, 23 | "MASS": { 24 | "Package": "MASS", 25 | "Version": "7.3-61", 26 | "Source": "Repository", 27 | "Repository": "RSPM", 28 | "Requirements": [ 29 | "R", 30 | "grDevices", 31 | "graphics", 32 | "methods", 33 | "stats", 34 | "utils" 35 | ], 36 | "Hash": "0cafd6f0500e5deba33be22c46bf6055" 37 | }, 38 | "Matrix": { 39 | "Package": "Matrix", 40 | "Version": "1.7-0", 41 | "Source": "Repository", 42 | "Repository": "RSPM", 43 | "Requirements": [ 44 | "R", 45 | "grDevices", 46 | "graphics", 47 | "grid", 48 | "lattice", 49 | "methods", 50 | "stats", 51 | "utils" 52 | ], 53 | "Hash": "1920b2f11133b12350024297d8a4ff4a" 54 | }, 55 | "R6": { 56 | "Package": "R6", 57 | "Version": "2.5.1", 58 | "Source": "Repository", 59 | "Repository": "RSPM", 60 | "Requirements": [ 61 | "R" 62 | ], 63 | "Hash": "470851b6d5d0ac559e9d01bb352b4021" 64 | }, 65 | "RColorBrewer": { 66 | "Package": "RColorBrewer", 67 | "Version": "1.1-3", 68 | "Source": "Repository", 69 | "Repository": "RSPM", 70 | "Requirements": [ 71 | "R" 72 | ], 73 | "Hash": "45f0398006e83a5b10b72a90663d8d8c" 74 | }, 75 | "Rcpp": { 76 | "Package": "Rcpp", 77 | "Version": "1.0.13", 78 | "Source": "Repository", 79 | "Repository": "CRAN", 80 | "Requirements": [ 81 | "methods", 82 | "utils" 83 | ], 84 | "Hash": "f27411eb6d9c3dada5edd444b8416675" 85 | }, 86 | "RcppTOML": { 87 | "Package": "RcppTOML", 88 | "Version": "0.2.2", 89 | "Source": "Repository", 90 | "Repository": "RSPM", 91 | "Requirements": [ 92 | "R", 93 | "Rcpp" 94 | ], 95 | "Hash": "c232938949fcd8126034419cc529333a" 96 | }, 97 | "askpass": { 98 | "Package": "askpass", 99 | "Version": "1.2.1", 100 | "Source": "Repository", 101 | "Repository": "CRAN", 102 | "Requirements": [ 103 | "sys" 104 | ], 105 | "Hash": "c39f4155b3ceb1a9a2799d700fbd4b6a" 106 | }, 107 | "base64enc": { 108 | "Package": "base64enc", 109 | "Version": "0.1-3", 110 | "Source": "Repository", 111 | "Repository": "RSPM", 112 | "Requirements": [ 113 | "R" 114 | ], 115 | "Hash": "543776ae6848fde2f48ff3816d0628bc" 116 | }, 117 | "bit": { 118 | "Package": "bit", 119 | "Version": "4.5.0", 120 | "Source": "Repository", 121 | "Repository": "CRAN", 122 | "Requirements": [ 123 | "R" 124 | ], 125 | "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" 126 | }, 127 | "bit64": { 128 | "Package": "bit64", 129 | "Version": "4.5.2", 130 | "Source": "Repository", 131 | "Repository": "CRAN", 132 | "Requirements": [ 133 | "R", 134 | "bit", 135 | "methods", 136 | "stats", 137 | "utils" 138 | ], 139 | "Hash": "e84984bf5f12a18628d9a02322128dfd" 140 | }, 141 | "bslib": { 142 | "Package": "bslib", 143 | "Version": "0.8.0", 144 | "Source": "Repository", 145 | "Repository": "CRAN", 146 | "Requirements": [ 147 | "R", 148 | "base64enc", 149 | "cachem", 150 | "fastmap", 151 | "grDevices", 152 | "htmltools", 153 | "jquerylib", 154 | "jsonlite", 155 | "lifecycle", 156 | "memoise", 157 | "mime", 158 | "rlang", 159 | "sass" 160 | ], 161 | "Hash": "b299c6741ca9746fb227debcb0f9fb6c" 162 | }, 163 | "cachem": { 164 | "Package": "cachem", 165 | "Version": "1.1.0", 166 | "Source": "Repository", 167 | "Repository": "RSPM", 168 | "Requirements": [ 169 | "fastmap", 170 | "rlang" 171 | ], 172 | "Hash": "cd9a672193789068eb5a2aad65a0dedf" 173 | }, 174 | "cli": { 175 | "Package": "cli", 176 | "Version": "3.6.3", 177 | "Source": "Repository", 178 | "Repository": "CRAN", 179 | "Requirements": [ 180 | "R", 181 | "utils" 182 | ], 183 | "Hash": "b21916dd77a27642b447374a5d30ecf3" 184 | }, 185 | "clipr": { 186 | "Package": "clipr", 187 | "Version": "0.8.0", 188 | "Source": "Repository", 189 | "Repository": "RSPM", 190 | "Requirements": [ 191 | "utils" 192 | ], 193 | "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" 194 | }, 195 | "colorspace": { 196 | "Package": "colorspace", 197 | "Version": "2.1-1", 198 | "Source": "Repository", 199 | "Repository": "CRAN", 200 | "Requirements": [ 201 | "R", 202 | "grDevices", 203 | "graphics", 204 | "methods", 205 | "stats" 206 | ], 207 | "Hash": "d954cb1c57e8d8b756165d7ba18aa55a" 208 | }, 209 | "cpp11": { 210 | "Package": "cpp11", 211 | "Version": "0.5.0", 212 | "Source": "Repository", 213 | "Repository": "CRAN", 214 | "Requirements": [ 215 | "R" 216 | ], 217 | "Hash": "91570bba75d0c9d3f1040c835cee8fba" 218 | }, 219 | "crayon": { 220 | "Package": "crayon", 221 | "Version": "1.5.3", 222 | "Source": "Repository", 223 | "Repository": "RSPM", 224 | "Requirements": [ 225 | "grDevices", 226 | "methods", 227 | "utils" 228 | ], 229 | "Hash": "859d96e65ef198fd43e82b9628d593ef" 230 | }, 231 | "curl": { 232 | "Package": "curl", 233 | "Version": "5.2.3", 234 | "Source": "Repository", 235 | "Repository": "CRAN", 236 | "Requirements": [ 237 | "R" 238 | ], 239 | "Hash": "d91263322a58af798f6cf3b13fd56dde" 240 | }, 241 | "digest": { 242 | "Package": "digest", 243 | "Version": "0.6.37", 244 | "Source": "Repository", 245 | "Repository": "CRAN", 246 | "Requirements": [ 247 | "R", 248 | "utils" 249 | ], 250 | "Hash": "33698c4b3127fc9f506654607fb73676" 251 | }, 252 | "dplyr": { 253 | "Package": "dplyr", 254 | "Version": "1.1.4", 255 | "Source": "Repository", 256 | "Repository": "RSPM", 257 | "Requirements": [ 258 | "R", 259 | "R6", 260 | "cli", 261 | "generics", 262 | "glue", 263 | "lifecycle", 264 | "magrittr", 265 | "methods", 266 | "pillar", 267 | "rlang", 268 | "tibble", 269 | "tidyselect", 270 | "utils", 271 | "vctrs" 272 | ], 273 | "Hash": "fedd9d00c2944ff00a0e2696ccf048ec" 274 | }, 275 | "duckdb": { 276 | "Package": "duckdb", 277 | "Version": "1.1.0", 278 | "Source": "Repository", 279 | "Repository": "CRAN", 280 | "Requirements": [ 281 | "DBI", 282 | "R", 283 | "methods", 284 | "utils" 285 | ], 286 | "Hash": "ac14e3bdcaab23293129b451fee02910" 287 | }, 288 | "evaluate": { 289 | "Package": "evaluate", 290 | "Version": "1.0.0", 291 | "Source": "Repository", 292 | "Repository": "CRAN", 293 | "Requirements": [ 294 | "R" 295 | ], 296 | "Hash": "6b567375113ceb7d9f800de4dd42218e" 297 | }, 298 | "fansi": { 299 | "Package": "fansi", 300 | "Version": "1.0.6", 301 | "Source": "Repository", 302 | "Repository": "RSPM", 303 | "Requirements": [ 304 | "R", 305 | "grDevices", 306 | "utils" 307 | ], 308 | "Hash": "962174cf2aeb5b9eea581522286a911f" 309 | }, 310 | "farver": { 311 | "Package": "farver", 312 | "Version": "2.1.2", 313 | "Source": "Repository", 314 | "Repository": "RSPM", 315 | "Hash": "680887028577f3fa2a81e410ed0d6e42" 316 | }, 317 | "fastmap": { 318 | "Package": "fastmap", 319 | "Version": "1.2.0", 320 | "Source": "Repository", 321 | "Repository": "RSPM", 322 | "Hash": "aa5e1cd11c2d15497494c5292d7ffcc8" 323 | }, 324 | "fontawesome": { 325 | "Package": "fontawesome", 326 | "Version": "0.5.2", 327 | "Source": "Repository", 328 | "Repository": "RSPM", 329 | "Requirements": [ 330 | "R", 331 | "htmltools", 332 | "rlang" 333 | ], 334 | "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" 335 | }, 336 | "fs": { 337 | "Package": "fs", 338 | "Version": "1.6.4", 339 | "Source": "Repository", 340 | "Repository": "RSPM", 341 | "Requirements": [ 342 | "R", 343 | "methods" 344 | ], 345 | "Hash": "15aeb8c27f5ea5161f9f6a641fafd93a" 346 | }, 347 | "generics": { 348 | "Package": "generics", 349 | "Version": "0.1.3", 350 | "Source": "Repository", 351 | "Repository": "RSPM", 352 | "Requirements": [ 353 | "R", 354 | "methods" 355 | ], 356 | "Hash": "15e9634c0fcd294799e9b2e929ed1b86" 357 | }, 358 | "ggplot2": { 359 | "Package": "ggplot2", 360 | "Version": "3.5.1", 361 | "Source": "Repository", 362 | "Repository": "RSPM", 363 | "Requirements": [ 364 | "MASS", 365 | "R", 366 | "cli", 367 | "glue", 368 | "grDevices", 369 | "grid", 370 | "gtable", 371 | "isoband", 372 | "lifecycle", 373 | "mgcv", 374 | "rlang", 375 | "scales", 376 | "stats", 377 | "tibble", 378 | "vctrs", 379 | "withr" 380 | ], 381 | "Hash": "44c6a2f8202d5b7e878ea274b1092426" 382 | }, 383 | "glue": { 384 | "Package": "glue", 385 | "Version": "1.8.0", 386 | "Source": "Repository", 387 | "Repository": "CRAN", 388 | "Requirements": [ 389 | "R", 390 | "methods" 391 | ], 392 | "Hash": "5899f1eaa825580172bb56c08266f37c" 393 | }, 394 | "gtable": { 395 | "Package": "gtable", 396 | "Version": "0.3.5", 397 | "Source": "Repository", 398 | "Repository": "RSPM", 399 | "Requirements": [ 400 | "R", 401 | "cli", 402 | "glue", 403 | "grid", 404 | "lifecycle", 405 | "rlang" 406 | ], 407 | "Hash": "e18861963cbc65a27736e02b3cd3c4a0" 408 | }, 409 | "here": { 410 | "Package": "here", 411 | "Version": "1.0.1", 412 | "Source": "Repository", 413 | "Repository": "RSPM", 414 | "Requirements": [ 415 | "rprojroot" 416 | ], 417 | "Hash": "24b224366f9c2e7534d2344d10d59211" 418 | }, 419 | "highr": { 420 | "Package": "highr", 421 | "Version": "0.11", 422 | "Source": "Repository", 423 | "Repository": "RSPM", 424 | "Requirements": [ 425 | "R", 426 | "xfun" 427 | ], 428 | "Hash": "d65ba49117ca223614f71b60d85b8ab7" 429 | }, 430 | "hms": { 431 | "Package": "hms", 432 | "Version": "1.1.3", 433 | "Source": "Repository", 434 | "Repository": "RSPM", 435 | "Requirements": [ 436 | "lifecycle", 437 | "methods", 438 | "pkgconfig", 439 | "rlang", 440 | "vctrs" 441 | ], 442 | "Hash": "b59377caa7ed00fa41808342002138f9" 443 | }, 444 | "htmltools": { 445 | "Package": "htmltools", 446 | "Version": "0.5.8.1", 447 | "Source": "Repository", 448 | "Repository": "RSPM", 449 | "Requirements": [ 450 | "R", 451 | "base64enc", 452 | "digest", 453 | "fastmap", 454 | "grDevices", 455 | "rlang", 456 | "utils" 457 | ], 458 | "Hash": "81d371a9cc60640e74e4ab6ac46dcedc" 459 | }, 460 | "httr": { 461 | "Package": "httr", 462 | "Version": "1.4.7", 463 | "Source": "Repository", 464 | "Repository": "RSPM", 465 | "Requirements": [ 466 | "R", 467 | "R6", 468 | "curl", 469 | "jsonlite", 470 | "mime", 471 | "openssl" 472 | ], 473 | "Hash": "ac107251d9d9fd72f0ca8049988f1d7f" 474 | }, 475 | "isoband": { 476 | "Package": "isoband", 477 | "Version": "0.2.7", 478 | "Source": "Repository", 479 | "Repository": "RSPM", 480 | "Requirements": [ 481 | "grid", 482 | "utils" 483 | ], 484 | "Hash": "0080607b4a1a7b28979aecef976d8bc2" 485 | }, 486 | "janitor": { 487 | "Package": "janitor", 488 | "Version": "2.2.0", 489 | "Source": "Repository", 490 | "Repository": "RSPM", 491 | "Requirements": [ 492 | "R", 493 | "dplyr", 494 | "hms", 495 | "lifecycle", 496 | "lubridate", 497 | "magrittr", 498 | "purrr", 499 | "rlang", 500 | "snakecase", 501 | "stringi", 502 | "stringr", 503 | "tidyr", 504 | "tidyselect" 505 | ], 506 | "Hash": "5baae149f1082f466df9d1442ba7aa65" 507 | }, 508 | "jquerylib": { 509 | "Package": "jquerylib", 510 | "Version": "0.1.4", 511 | "Source": "Repository", 512 | "Repository": "RSPM", 513 | "Requirements": [ 514 | "htmltools" 515 | ], 516 | "Hash": "5aab57a3bd297eee1c1d862735972182" 517 | }, 518 | "jsonlite": { 519 | "Package": "jsonlite", 520 | "Version": "1.8.9", 521 | "Source": "Repository", 522 | "Repository": "CRAN", 523 | "Requirements": [ 524 | "methods" 525 | ], 526 | "Hash": "4e993b65c2c3ffbffce7bb3e2c6f832b" 527 | }, 528 | "knitr": { 529 | "Package": "knitr", 530 | "Version": "1.48", 531 | "Source": "Repository", 532 | "Repository": "CRAN", 533 | "Requirements": [ 534 | "R", 535 | "evaluate", 536 | "highr", 537 | "methods", 538 | "tools", 539 | "xfun", 540 | "yaml" 541 | ], 542 | "Hash": "acf380f300c721da9fde7df115a5f86f" 543 | }, 544 | "labeling": { 545 | "Package": "labeling", 546 | "Version": "0.4.3", 547 | "Source": "Repository", 548 | "Repository": "RSPM", 549 | "Requirements": [ 550 | "graphics", 551 | "stats" 552 | ], 553 | "Hash": "b64ec208ac5bc1852b285f665d6368b3" 554 | }, 555 | "lattice": { 556 | "Package": "lattice", 557 | "Version": "0.22-6", 558 | "Source": "Repository", 559 | "Repository": "RSPM", 560 | "Requirements": [ 561 | "R", 562 | "grDevices", 563 | "graphics", 564 | "grid", 565 | "stats", 566 | "utils" 567 | ], 568 | "Hash": "cc5ac1ba4c238c7ca9fa6a87ca11a7e2" 569 | }, 570 | "lifecycle": { 571 | "Package": "lifecycle", 572 | "Version": "1.0.4", 573 | "Source": "Repository", 574 | "Repository": "RSPM", 575 | "Requirements": [ 576 | "R", 577 | "cli", 578 | "glue", 579 | "rlang" 580 | ], 581 | "Hash": "b8552d117e1b808b09a832f589b79035" 582 | }, 583 | "lubridate": { 584 | "Package": "lubridate", 585 | "Version": "1.9.3", 586 | "Source": "Repository", 587 | "Repository": "RSPM", 588 | "Requirements": [ 589 | "R", 590 | "generics", 591 | "methods", 592 | "timechange" 593 | ], 594 | "Hash": "680ad542fbcf801442c83a6ac5a2126c" 595 | }, 596 | "magrittr": { 597 | "Package": "magrittr", 598 | "Version": "2.0.3", 599 | "Source": "Repository", 600 | "Repository": "RSPM", 601 | "Requirements": [ 602 | "R" 603 | ], 604 | "Hash": "7ce2733a9826b3aeb1775d56fd305472" 605 | }, 606 | "memoise": { 607 | "Package": "memoise", 608 | "Version": "2.0.1", 609 | "Source": "Repository", 610 | "Repository": "RSPM", 611 | "Requirements": [ 612 | "cachem", 613 | "rlang" 614 | ], 615 | "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c" 616 | }, 617 | "mgcv": { 618 | "Package": "mgcv", 619 | "Version": "1.9-1", 620 | "Source": "Repository", 621 | "Repository": "RSPM", 622 | "Requirements": [ 623 | "Matrix", 624 | "R", 625 | "graphics", 626 | "methods", 627 | "nlme", 628 | "splines", 629 | "stats", 630 | "utils" 631 | ], 632 | "Hash": "110ee9d83b496279960e162ac97764ce" 633 | }, 634 | "mime": { 635 | "Package": "mime", 636 | "Version": "0.12", 637 | "Source": "Repository", 638 | "Repository": "RSPM", 639 | "Requirements": [ 640 | "tools" 641 | ], 642 | "Hash": "18e9c28c1d3ca1560ce30658b22ce104" 643 | }, 644 | "munsell": { 645 | "Package": "munsell", 646 | "Version": "0.5.1", 647 | "Source": "Repository", 648 | "Repository": "RSPM", 649 | "Requirements": [ 650 | "colorspace", 651 | "methods" 652 | ], 653 | "Hash": "4fd8900853b746af55b81fda99da7695" 654 | }, 655 | "nlme": { 656 | "Package": "nlme", 657 | "Version": "3.1-166", 658 | "Source": "Repository", 659 | "Repository": "CRAN", 660 | "Requirements": [ 661 | "R", 662 | "graphics", 663 | "lattice", 664 | "stats", 665 | "utils" 666 | ], 667 | "Hash": "ccbb8846be320b627e6aa2b4616a2ded" 668 | }, 669 | "openssl": { 670 | "Package": "openssl", 671 | "Version": "2.2.2", 672 | "Source": "Repository", 673 | "Repository": "CRAN", 674 | "Requirements": [ 675 | "askpass" 676 | ], 677 | "Hash": "d413e0fef796c9401a4419485f709ca1" 678 | }, 679 | "pillar": { 680 | "Package": "pillar", 681 | "Version": "1.9.0", 682 | "Source": "Repository", 683 | "Repository": "RSPM", 684 | "Requirements": [ 685 | "cli", 686 | "fansi", 687 | "glue", 688 | "lifecycle", 689 | "rlang", 690 | "utf8", 691 | "utils", 692 | "vctrs" 693 | ], 694 | "Hash": "15da5a8412f317beeee6175fbc76f4bb" 695 | }, 696 | "pkgconfig": { 697 | "Package": "pkgconfig", 698 | "Version": "2.0.3", 699 | "Source": "Repository", 700 | "Repository": "RSPM", 701 | "Requirements": [ 702 | "utils" 703 | ], 704 | "Hash": "01f28d4278f15c76cddbea05899c5d6f" 705 | }, 706 | "png": { 707 | "Package": "png", 708 | "Version": "0.1-8", 709 | "Source": "Repository", 710 | "Repository": "RSPM", 711 | "Requirements": [ 712 | "R" 713 | ], 714 | "Hash": "bd54ba8a0a5faded999a7aab6e46b374" 715 | }, 716 | "prettyunits": { 717 | "Package": "prettyunits", 718 | "Version": "1.2.0", 719 | "Source": "Repository", 720 | "Repository": "RSPM", 721 | "Requirements": [ 722 | "R" 723 | ], 724 | "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7" 725 | }, 726 | "progress": { 727 | "Package": "progress", 728 | "Version": "1.2.3", 729 | "Source": "Repository", 730 | "Repository": "RSPM", 731 | "Requirements": [ 732 | "R", 733 | "R6", 734 | "crayon", 735 | "hms", 736 | "prettyunits" 737 | ], 738 | "Hash": "f4625e061cb2865f111b47ff163a5ca6" 739 | }, 740 | "prqlr": { 741 | "Package": "prqlr", 742 | "Version": "0.9.0", 743 | "Source": "Repository", 744 | "Repository": "CRAN", 745 | "Requirements": [ 746 | "R" 747 | ], 748 | "Hash": "d45578a066c904d8bc36d69197161d0b" 749 | }, 750 | "purrr": { 751 | "Package": "purrr", 752 | "Version": "1.0.2", 753 | "Source": "Repository", 754 | "Repository": "RSPM", 755 | "Requirements": [ 756 | "R", 757 | "cli", 758 | "lifecycle", 759 | "magrittr", 760 | "rlang", 761 | "vctrs" 762 | ], 763 | "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" 764 | }, 765 | "rappdirs": { 766 | "Package": "rappdirs", 767 | "Version": "0.3.3", 768 | "Source": "Repository", 769 | "Repository": "RSPM", 770 | "Requirements": [ 771 | "R" 772 | ], 773 | "Hash": "5e3c5dc0b071b21fa128676560dbe94d" 774 | }, 775 | "readr": { 776 | "Package": "readr", 777 | "Version": "2.1.5", 778 | "Source": "Repository", 779 | "Repository": "RSPM", 780 | "Requirements": [ 781 | "R", 782 | "R6", 783 | "cli", 784 | "clipr", 785 | "cpp11", 786 | "crayon", 787 | "hms", 788 | "lifecycle", 789 | "methods", 790 | "rlang", 791 | "tibble", 792 | "tzdb", 793 | "utils", 794 | "vroom" 795 | ], 796 | "Hash": "9de96463d2117f6ac49980577939dfb3" 797 | }, 798 | "renv": { 799 | "Package": "renv", 800 | "Version": "1.0.10", 801 | "Source": "Repository", 802 | "Repository": "CRAN", 803 | "Requirements": [ 804 | "utils" 805 | ], 806 | "Hash": "d0387d5687ec933dd7587efd4cfa2d85" 807 | }, 808 | "reticulate": { 809 | "Package": "reticulate", 810 | "Version": "1.39.0", 811 | "Source": "Repository", 812 | "Repository": "CRAN", 813 | "Requirements": [ 814 | "Matrix", 815 | "R", 816 | "Rcpp", 817 | "RcppTOML", 818 | "graphics", 819 | "here", 820 | "jsonlite", 821 | "methods", 822 | "png", 823 | "rappdirs", 824 | "rlang", 825 | "utils", 826 | "withr" 827 | ], 828 | "Hash": "e1a5d04397edc1580c5e0ed1dbdccf76" 829 | }, 830 | "rlang": { 831 | "Package": "rlang", 832 | "Version": "1.1.4", 833 | "Source": "Repository", 834 | "Repository": "RSPM", 835 | "Requirements": [ 836 | "R", 837 | "utils" 838 | ], 839 | "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" 840 | }, 841 | "rmarkdown": { 842 | "Package": "rmarkdown", 843 | "Version": "2.28", 844 | "Source": "Repository", 845 | "Repository": "CRAN", 846 | "Requirements": [ 847 | "R", 848 | "bslib", 849 | "evaluate", 850 | "fontawesome", 851 | "htmltools", 852 | "jquerylib", 853 | "jsonlite", 854 | "knitr", 855 | "methods", 856 | "tinytex", 857 | "tools", 858 | "utils", 859 | "xfun", 860 | "yaml" 861 | ], 862 | "Hash": "062470668513dcda416927085ee9bdc7" 863 | }, 864 | "rprojroot": { 865 | "Package": "rprojroot", 866 | "Version": "2.0.4", 867 | "Source": "Repository", 868 | "Repository": "RSPM", 869 | "Requirements": [ 870 | "R" 871 | ], 872 | "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144" 873 | }, 874 | "rvest": { 875 | "Package": "rvest", 876 | "Version": "1.0.4", 877 | "Source": "Repository", 878 | "Repository": "RSPM", 879 | "Requirements": [ 880 | "R", 881 | "cli", 882 | "glue", 883 | "httr", 884 | "lifecycle", 885 | "magrittr", 886 | "rlang", 887 | "selectr", 888 | "tibble", 889 | "xml2" 890 | ], 891 | "Hash": "0bcf0c6f274e90ea314b812a6d19a519" 892 | }, 893 | "sass": { 894 | "Package": "sass", 895 | "Version": "0.4.9", 896 | "Source": "Repository", 897 | "Repository": "RSPM", 898 | "Requirements": [ 899 | "R6", 900 | "fs", 901 | "htmltools", 902 | "rappdirs", 903 | "rlang" 904 | ], 905 | "Hash": "d53dbfddf695303ea4ad66f86e99b95d" 906 | }, 907 | "scales": { 908 | "Package": "scales", 909 | "Version": "1.3.0", 910 | "Source": "Repository", 911 | "Repository": "RSPM", 912 | "Requirements": [ 913 | "R", 914 | "R6", 915 | "RColorBrewer", 916 | "cli", 917 | "farver", 918 | "glue", 919 | "labeling", 920 | "lifecycle", 921 | "munsell", 922 | "rlang", 923 | "viridisLite" 924 | ], 925 | "Hash": "c19df082ba346b0ffa6f833e92de34d1" 926 | }, 927 | "selectr": { 928 | "Package": "selectr", 929 | "Version": "0.4-2", 930 | "Source": "Repository", 931 | "Repository": "RSPM", 932 | "Requirements": [ 933 | "R", 934 | "R6", 935 | "methods", 936 | "stringr" 937 | ], 938 | "Hash": "3838071b66e0c566d55cc26bd6e27bf4" 939 | }, 940 | "slider": { 941 | "Package": "slider", 942 | "Version": "0.3.1", 943 | "Source": "Repository", 944 | "Repository": "RSPM", 945 | "Requirements": [ 946 | "R", 947 | "cli", 948 | "rlang", 949 | "vctrs", 950 | "warp" 951 | ], 952 | "Hash": "a584625e2b9e4fad4be135c8ea5c99aa" 953 | }, 954 | "snakecase": { 955 | "Package": "snakecase", 956 | "Version": "0.11.1", 957 | "Source": "Repository", 958 | "Repository": "RSPM", 959 | "Requirements": [ 960 | "R", 961 | "stringi", 962 | "stringr" 963 | ], 964 | "Hash": "58767e44739b76965332e8a4fe3f91f1" 965 | }, 966 | "stringi": { 967 | "Package": "stringi", 968 | "Version": "1.8.4", 969 | "Source": "Repository", 970 | "Repository": "RSPM", 971 | "Requirements": [ 972 | "R", 973 | "stats", 974 | "tools", 975 | "utils" 976 | ], 977 | "Hash": "39e1144fd75428983dc3f63aa53dfa91" 978 | }, 979 | "stringr": { 980 | "Package": "stringr", 981 | "Version": "1.5.1", 982 | "Source": "Repository", 983 | "Repository": "RSPM", 984 | "Requirements": [ 985 | "R", 986 | "cli", 987 | "glue", 988 | "lifecycle", 989 | "magrittr", 990 | "rlang", 991 | "stringi", 992 | "vctrs" 993 | ], 994 | "Hash": "960e2ae9e09656611e0b8214ad543207" 995 | }, 996 | "sys": { 997 | "Package": "sys", 998 | "Version": "3.4.3", 999 | "Source": "Repository", 1000 | "Repository": "CRAN", 1001 | "Hash": "de342ebfebdbf40477d0758d05426646" 1002 | }, 1003 | "tibble": { 1004 | "Package": "tibble", 1005 | "Version": "3.2.1", 1006 | "Source": "Repository", 1007 | "Repository": "RSPM", 1008 | "Requirements": [ 1009 | "R", 1010 | "fansi", 1011 | "lifecycle", 1012 | "magrittr", 1013 | "methods", 1014 | "pillar", 1015 | "pkgconfig", 1016 | "rlang", 1017 | "utils", 1018 | "vctrs" 1019 | ], 1020 | "Hash": "a84e2cc86d07289b3b6f5069df7a004c" 1021 | }, 1022 | "tidyr": { 1023 | "Package": "tidyr", 1024 | "Version": "1.3.1", 1025 | "Source": "Repository", 1026 | "Repository": "RSPM", 1027 | "Requirements": [ 1028 | "R", 1029 | "cli", 1030 | "cpp11", 1031 | "dplyr", 1032 | "glue", 1033 | "lifecycle", 1034 | "magrittr", 1035 | "purrr", 1036 | "rlang", 1037 | "stringr", 1038 | "tibble", 1039 | "tidyselect", 1040 | "utils", 1041 | "vctrs" 1042 | ], 1043 | "Hash": "915fb7ce036c22a6a33b5a8adb712eb1" 1044 | }, 1045 | "tidyselect": { 1046 | "Package": "tidyselect", 1047 | "Version": "1.2.1", 1048 | "Source": "Repository", 1049 | "Repository": "RSPM", 1050 | "Requirements": [ 1051 | "R", 1052 | "cli", 1053 | "glue", 1054 | "lifecycle", 1055 | "rlang", 1056 | "vctrs", 1057 | "withr" 1058 | ], 1059 | "Hash": "829f27b9c4919c16b593794a6344d6c0" 1060 | }, 1061 | "timechange": { 1062 | "Package": "timechange", 1063 | "Version": "0.3.0", 1064 | "Source": "Repository", 1065 | "Repository": "RSPM", 1066 | "Requirements": [ 1067 | "R", 1068 | "cpp11" 1069 | ], 1070 | "Hash": "c5f3c201b931cd6474d17d8700ccb1c8" 1071 | }, 1072 | "tinytex": { 1073 | "Package": "tinytex", 1074 | "Version": "0.53", 1075 | "Source": "Repository", 1076 | "Repository": "CRAN", 1077 | "Requirements": [ 1078 | "xfun" 1079 | ], 1080 | "Hash": "9db859e8aabbb474293dde3097839420" 1081 | }, 1082 | "tzdb": { 1083 | "Package": "tzdb", 1084 | "Version": "0.4.0", 1085 | "Source": "Repository", 1086 | "Repository": "RSPM", 1087 | "Requirements": [ 1088 | "R", 1089 | "cpp11" 1090 | ], 1091 | "Hash": "f561504ec2897f4d46f0c7657e488ae1" 1092 | }, 1093 | "utf8": { 1094 | "Package": "utf8", 1095 | "Version": "1.2.4", 1096 | "Source": "Repository", 1097 | "Repository": "RSPM", 1098 | "Requirements": [ 1099 | "R" 1100 | ], 1101 | "Hash": "62b65c52671e6665f803ff02954446e9" 1102 | }, 1103 | "vctrs": { 1104 | "Package": "vctrs", 1105 | "Version": "0.6.5", 1106 | "Source": "Repository", 1107 | "Repository": "RSPM", 1108 | "Requirements": [ 1109 | "R", 1110 | "cli", 1111 | "glue", 1112 | "lifecycle", 1113 | "rlang" 1114 | ], 1115 | "Hash": "c03fa420630029418f7e6da3667aac4a" 1116 | }, 1117 | "viridisLite": { 1118 | "Package": "viridisLite", 1119 | "Version": "0.4.2", 1120 | "Source": "Repository", 1121 | "Repository": "RSPM", 1122 | "Requirements": [ 1123 | "R" 1124 | ], 1125 | "Hash": "c826c7c4241b6fc89ff55aaea3fa7491" 1126 | }, 1127 | "vroom": { 1128 | "Package": "vroom", 1129 | "Version": "1.6.5", 1130 | "Source": "Repository", 1131 | "Repository": "RSPM", 1132 | "Requirements": [ 1133 | "R", 1134 | "bit64", 1135 | "cli", 1136 | "cpp11", 1137 | "crayon", 1138 | "glue", 1139 | "hms", 1140 | "lifecycle", 1141 | "methods", 1142 | "progress", 1143 | "rlang", 1144 | "stats", 1145 | "tibble", 1146 | "tidyselect", 1147 | "tzdb", 1148 | "vctrs", 1149 | "withr" 1150 | ], 1151 | "Hash": "390f9315bc0025be03012054103d227c" 1152 | }, 1153 | "warp": { 1154 | "Package": "warp", 1155 | "Version": "0.2.1", 1156 | "Source": "Repository", 1157 | "Repository": "RSPM", 1158 | "Requirements": [ 1159 | "R" 1160 | ], 1161 | "Hash": "fea474d578b1cbcb696ae6ac8bdcc439" 1162 | }, 1163 | "withr": { 1164 | "Package": "withr", 1165 | "Version": "3.0.1", 1166 | "Source": "Repository", 1167 | "Repository": "CRAN", 1168 | "Requirements": [ 1169 | "R", 1170 | "grDevices", 1171 | "graphics" 1172 | ], 1173 | "Hash": "07909200e8bbe90426fbfeb73e1e27aa" 1174 | }, 1175 | "xfun": { 1176 | "Package": "xfun", 1177 | "Version": "0.48", 1178 | "Source": "Repository", 1179 | "Repository": "CRAN", 1180 | "Requirements": [ 1181 | "R", 1182 | "grDevices", 1183 | "stats", 1184 | "tools" 1185 | ], 1186 | "Hash": "89e455b87c84e227eb7f60a1b4e5fe1f" 1187 | }, 1188 | "xml2": { 1189 | "Package": "xml2", 1190 | "Version": "1.3.6", 1191 | "Source": "Repository", 1192 | "Repository": "RSPM", 1193 | "Requirements": [ 1194 | "R", 1195 | "cli", 1196 | "methods", 1197 | "rlang" 1198 | ], 1199 | "Hash": "1d0336142f4cd25d8d23cd3ba7a8fb61" 1200 | }, 1201 | "yaml": { 1202 | "Package": "yaml", 1203 | "Version": "2.3.10", 1204 | "Source": "Repository", 1205 | "Repository": "CRAN", 1206 | "Hash": "51dab85c6c98e50a18d7551e9d49f76c" 1207 | } 1208 | } 1209 | } 1210 | -------------------------------------------------------------------------------- /renv/.gitignore: -------------------------------------------------------------------------------- 1 | library/ 2 | local/ 3 | cellar/ 4 | lock/ 5 | python/ 6 | sandbox/ 7 | staging/ 8 | -------------------------------------------------------------------------------- /renv/activate.R: -------------------------------------------------------------------------------- 1 | 2 | local({ 3 | 4 | # the requested version of renv 5 | version <- "1.0.10" 6 | attr(version, "sha") <- NULL 7 | 8 | # the project directory 9 | project <- Sys.getenv("RENV_PROJECT") 10 | if (!nzchar(project)) 11 | project <- getwd() 12 | 13 | # use start-up diagnostics if enabled 14 | diagnostics <- Sys.getenv("RENV_STARTUP_DIAGNOSTICS", unset = "FALSE") 15 | if (diagnostics) { 16 | start <- Sys.time() 17 | profile <- tempfile("renv-startup-", fileext = ".Rprof") 18 | utils::Rprof(profile) 19 | on.exit({ 20 | utils::Rprof(NULL) 21 | elapsed <- signif(difftime(Sys.time(), start, units = "auto"), digits = 2L) 22 | writeLines(sprintf("- renv took %s to run the autoloader.", format(elapsed))) 23 | writeLines(sprintf("- Profile: %s", profile)) 24 | print(utils::summaryRprof(profile)) 25 | }, add = TRUE) 26 | } 27 | 28 | # figure out whether the autoloader is enabled 29 | enabled <- local({ 30 | 31 | # first, check config option 32 | override <- getOption("renv.config.autoloader.enabled") 33 | if (!is.null(override)) 34 | return(override) 35 | 36 | # if we're being run in a context where R_LIBS is already set, 37 | # don't load -- presumably we're being run as a sub-process and 38 | # the parent process has already set up library paths for us 39 | rcmd <- Sys.getenv("R_CMD", unset = NA) 40 | rlibs <- Sys.getenv("R_LIBS", unset = NA) 41 | if (!is.na(rlibs) && !is.na(rcmd)) 42 | return(FALSE) 43 | 44 | # next, check environment variables 45 | # TODO: prefer using the configuration one in the future 46 | envvars <- c( 47 | "RENV_CONFIG_AUTOLOADER_ENABLED", 48 | "RENV_AUTOLOADER_ENABLED", 49 | "RENV_ACTIVATE_PROJECT" 50 | ) 51 | 52 | for (envvar in envvars) { 53 | envval <- Sys.getenv(envvar, unset = NA) 54 | if (!is.na(envval)) 55 | return(tolower(envval) %in% c("true", "t", "1")) 56 | } 57 | 58 | # enable by default 59 | TRUE 60 | 61 | }) 62 | 63 | # bail if we're not enabled 64 | if (!enabled) { 65 | 66 | # if we're not enabled, we might still need to manually load 67 | # the user profile here 68 | profile <- Sys.getenv("R_PROFILE_USER", unset = "~/.Rprofile") 69 | if (file.exists(profile)) { 70 | cfg <- Sys.getenv("RENV_CONFIG_USER_PROFILE", unset = "TRUE") 71 | if (tolower(cfg) %in% c("true", "t", "1")) 72 | sys.source(profile, envir = globalenv()) 73 | } 74 | 75 | return(FALSE) 76 | 77 | } 78 | 79 | # avoid recursion 80 | if (identical(getOption("renv.autoloader.running"), TRUE)) { 81 | warning("ignoring recursive attempt to run renv autoloader") 82 | return(invisible(TRUE)) 83 | } 84 | 85 | # signal that we're loading renv during R startup 86 | options(renv.autoloader.running = TRUE) 87 | on.exit(options(renv.autoloader.running = NULL), add = TRUE) 88 | 89 | # signal that we've consented to use renv 90 | options(renv.consent = TRUE) 91 | 92 | # load the 'utils' package eagerly -- this ensures that renv shims, which 93 | # mask 'utils' packages, will come first on the search path 94 | library(utils, lib.loc = .Library) 95 | 96 | # unload renv if it's already been loaded 97 | if ("renv" %in% loadedNamespaces()) 98 | unloadNamespace("renv") 99 | 100 | # load bootstrap tools 101 | ansify <- function(text) { 102 | if (renv_ansify_enabled()) 103 | renv_ansify_enhanced(text) 104 | else 105 | renv_ansify_default(text) 106 | } 107 | 108 | renv_ansify_enabled <- function() { 109 | 110 | override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA) 111 | if (!is.na(override)) 112 | return(as.logical(override)) 113 | 114 | pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA) 115 | if (identical(pane, "build")) 116 | return(FALSE) 117 | 118 | testthat <- Sys.getenv("TESTTHAT", unset = "false") 119 | if (tolower(testthat) %in% "true") 120 | return(FALSE) 121 | 122 | iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false") 123 | if (tolower(iderun) %in% "false") 124 | return(FALSE) 125 | 126 | TRUE 127 | 128 | } 129 | 130 | renv_ansify_default <- function(text) { 131 | text 132 | } 133 | 134 | renv_ansify_enhanced <- function(text) { 135 | 136 | # R help links 137 | pattern <- "`\\?(renv::(?:[^`])+)`" 138 | replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`" 139 | text <- gsub(pattern, replacement, text, perl = TRUE) 140 | 141 | # runnable code 142 | pattern <- "`(renv::(?:[^`])+)`" 143 | replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`" 144 | text <- gsub(pattern, replacement, text, perl = TRUE) 145 | 146 | # return ansified text 147 | text 148 | 149 | } 150 | 151 | renv_ansify_init <- function() { 152 | 153 | envir <- renv_envir_self() 154 | if (renv_ansify_enabled()) 155 | assign("ansify", renv_ansify_enhanced, envir = envir) 156 | else 157 | assign("ansify", renv_ansify_default, envir = envir) 158 | 159 | } 160 | 161 | `%||%` <- function(x, y) { 162 | if (is.null(x)) y else x 163 | } 164 | 165 | catf <- function(fmt, ..., appendLF = TRUE) { 166 | 167 | quiet <- getOption("renv.bootstrap.quiet", default = FALSE) 168 | if (quiet) 169 | return(invisible()) 170 | 171 | msg <- sprintf(fmt, ...) 172 | cat(msg, file = stdout(), sep = if (appendLF) "\n" else "") 173 | 174 | invisible(msg) 175 | 176 | } 177 | 178 | header <- function(label, 179 | ..., 180 | prefix = "#", 181 | suffix = "-", 182 | n = min(getOption("width"), 78)) 183 | { 184 | label <- sprintf(label, ...) 185 | n <- max(n - nchar(label) - nchar(prefix) - 2L, 8L) 186 | if (n <= 0) 187 | return(paste(prefix, label)) 188 | 189 | tail <- paste(rep.int(suffix, n), collapse = "") 190 | paste0(prefix, " ", label, " ", tail) 191 | 192 | } 193 | 194 | heredoc <- function(text, leave = 0) { 195 | 196 | # remove leading, trailing whitespace 197 | trimmed <- gsub("^\\s*\\n|\\n\\s*$", "", text) 198 | 199 | # split into lines 200 | lines <- strsplit(trimmed, "\n", fixed = TRUE)[[1L]] 201 | 202 | # compute common indent 203 | indent <- regexpr("[^[:space:]]", lines) 204 | common <- min(setdiff(indent, -1L)) - leave 205 | text <- paste(substring(lines, common), collapse = "\n") 206 | 207 | # substitute in ANSI links for executable renv code 208 | ansify(text) 209 | 210 | } 211 | 212 | startswith <- function(string, prefix) { 213 | substring(string, 1, nchar(prefix)) == prefix 214 | } 215 | 216 | bootstrap <- function(version, library) { 217 | 218 | friendly <- renv_bootstrap_version_friendly(version) 219 | section <- header(sprintf("Bootstrapping renv %s", friendly)) 220 | catf(section) 221 | 222 | # attempt to download renv 223 | catf("- Downloading renv ... ", appendLF = FALSE) 224 | withCallingHandlers( 225 | tarball <- renv_bootstrap_download(version), 226 | error = function(err) { 227 | catf("FAILED") 228 | stop("failed to download:\n", conditionMessage(err)) 229 | } 230 | ) 231 | catf("OK") 232 | on.exit(unlink(tarball), add = TRUE) 233 | 234 | # now attempt to install 235 | catf("- Installing renv ... ", appendLF = FALSE) 236 | withCallingHandlers( 237 | status <- renv_bootstrap_install(version, tarball, library), 238 | error = function(err) { 239 | catf("FAILED") 240 | stop("failed to install:\n", conditionMessage(err)) 241 | } 242 | ) 243 | catf("OK") 244 | 245 | # add empty line to break up bootstrapping from normal output 246 | catf("") 247 | 248 | return(invisible()) 249 | } 250 | 251 | renv_bootstrap_tests_running <- function() { 252 | getOption("renv.tests.running", default = FALSE) 253 | } 254 | 255 | renv_bootstrap_repos <- function() { 256 | 257 | # get CRAN repository 258 | cran <- getOption("renv.repos.cran", "https://cloud.r-project.org") 259 | 260 | # check for repos override 261 | repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) 262 | if (!is.na(repos)) { 263 | 264 | # check for RSPM; if set, use a fallback repository for renv 265 | rspm <- Sys.getenv("RSPM", unset = NA) 266 | if (identical(rspm, repos)) 267 | repos <- c(RSPM = rspm, CRAN = cran) 268 | 269 | return(repos) 270 | 271 | } 272 | 273 | # check for lockfile repositories 274 | repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) 275 | if (!inherits(repos, "error") && length(repos)) 276 | return(repos) 277 | 278 | # retrieve current repos 279 | repos <- getOption("repos") 280 | 281 | # ensure @CRAN@ entries are resolved 282 | repos[repos == "@CRAN@"] <- cran 283 | 284 | # add in renv.bootstrap.repos if set 285 | default <- c(FALLBACK = "https://cloud.r-project.org") 286 | extra <- getOption("renv.bootstrap.repos", default = default) 287 | repos <- c(repos, extra) 288 | 289 | # remove duplicates that might've snuck in 290 | dupes <- duplicated(repos) | duplicated(names(repos)) 291 | repos[!dupes] 292 | 293 | } 294 | 295 | renv_bootstrap_repos_lockfile <- function() { 296 | 297 | lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") 298 | if (!file.exists(lockpath)) 299 | return(NULL) 300 | 301 | lockfile <- tryCatch(renv_json_read(lockpath), error = identity) 302 | if (inherits(lockfile, "error")) { 303 | warning(lockfile) 304 | return(NULL) 305 | } 306 | 307 | repos <- lockfile$R$Repositories 308 | if (length(repos) == 0) 309 | return(NULL) 310 | 311 | keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) 312 | vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) 313 | names(vals) <- keys 314 | 315 | return(vals) 316 | 317 | } 318 | 319 | renv_bootstrap_download <- function(version) { 320 | 321 | sha <- attr(version, "sha", exact = TRUE) 322 | 323 | methods <- if (!is.null(sha)) { 324 | 325 | # attempting to bootstrap a development version of renv 326 | c( 327 | function() renv_bootstrap_download_tarball(sha), 328 | function() renv_bootstrap_download_github(sha) 329 | ) 330 | 331 | } else { 332 | 333 | # attempting to bootstrap a release version of renv 334 | c( 335 | function() renv_bootstrap_download_tarball(version), 336 | function() renv_bootstrap_download_cran_latest(version), 337 | function() renv_bootstrap_download_cran_archive(version) 338 | ) 339 | 340 | } 341 | 342 | for (method in methods) { 343 | path <- tryCatch(method(), error = identity) 344 | if (is.character(path) && file.exists(path)) 345 | return(path) 346 | } 347 | 348 | stop("All download methods failed") 349 | 350 | } 351 | 352 | renv_bootstrap_download_impl <- function(url, destfile) { 353 | 354 | mode <- "wb" 355 | 356 | # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 357 | fixup <- 358 | Sys.info()[["sysname"]] == "Windows" && 359 | substring(url, 1L, 5L) == "file:" 360 | 361 | if (fixup) 362 | mode <- "w+b" 363 | 364 | args <- list( 365 | url = url, 366 | destfile = destfile, 367 | mode = mode, 368 | quiet = TRUE 369 | ) 370 | 371 | if ("headers" %in% names(formals(utils::download.file))) 372 | { 373 | headers <- renv_bootstrap_download_custom_headers(url) 374 | if (length(headers) && is.character(headers)) 375 | args$headers <- headers 376 | } 377 | 378 | do.call(utils::download.file, args) 379 | 380 | } 381 | 382 | renv_bootstrap_download_custom_headers <- function(url) { 383 | 384 | headers <- getOption("renv.download.headers") 385 | if (is.null(headers)) 386 | return(character()) 387 | 388 | if (!is.function(headers)) 389 | stopf("'renv.download.headers' is not a function") 390 | 391 | headers <- headers(url) 392 | if (length(headers) == 0L) 393 | return(character()) 394 | 395 | if (is.list(headers)) 396 | headers <- unlist(headers, recursive = FALSE, use.names = TRUE) 397 | 398 | ok <- 399 | is.character(headers) && 400 | is.character(names(headers)) && 401 | all(nzchar(names(headers))) 402 | 403 | if (!ok) 404 | stop("invocation of 'renv.download.headers' did not return a named character vector") 405 | 406 | headers 407 | 408 | } 409 | 410 | renv_bootstrap_download_cran_latest <- function(version) { 411 | 412 | spec <- renv_bootstrap_download_cran_latest_find(version) 413 | type <- spec$type 414 | repos <- spec$repos 415 | 416 | baseurl <- utils::contrib.url(repos = repos, type = type) 417 | ext <- if (identical(type, "source")) 418 | ".tar.gz" 419 | else if (Sys.info()[["sysname"]] == "Windows") 420 | ".zip" 421 | else 422 | ".tgz" 423 | name <- sprintf("renv_%s%s", version, ext) 424 | url <- paste(baseurl, name, sep = "/") 425 | 426 | destfile <- file.path(tempdir(), name) 427 | status <- tryCatch( 428 | renv_bootstrap_download_impl(url, destfile), 429 | condition = identity 430 | ) 431 | 432 | if (inherits(status, "condition")) 433 | return(FALSE) 434 | 435 | # report success and return 436 | destfile 437 | 438 | } 439 | 440 | renv_bootstrap_download_cran_latest_find <- function(version) { 441 | 442 | # check whether binaries are supported on this system 443 | binary <- 444 | getOption("renv.bootstrap.binary", default = TRUE) && 445 | !identical(.Platform$pkgType, "source") && 446 | !identical(getOption("pkgType"), "source") && 447 | Sys.info()[["sysname"]] %in% c("Darwin", "Windows") 448 | 449 | types <- c(if (binary) "binary", "source") 450 | 451 | # iterate over types + repositories 452 | for (type in types) { 453 | for (repos in renv_bootstrap_repos()) { 454 | 455 | # build arguments for utils::available.packages() call 456 | args <- list(type = type, repos = repos) 457 | 458 | # add custom headers if available -- note that 459 | # utils::available.packages() will pass this to download.file() 460 | if ("headers" %in% names(formals(utils::download.file))) 461 | { 462 | headers <- renv_bootstrap_download_custom_headers(url) 463 | if (length(headers) && is.character(headers)) 464 | args$headers <- headers 465 | } 466 | 467 | # retrieve package database 468 | db <- tryCatch( 469 | as.data.frame( 470 | do.call(utils::available.packages, args), 471 | stringsAsFactors = FALSE 472 | ), 473 | error = identity 474 | ) 475 | 476 | if (inherits(db, "error")) 477 | next 478 | 479 | # check for compatible entry 480 | entry <- db[db$Package %in% "renv" & db$Version %in% version, ] 481 | if (nrow(entry) == 0) 482 | next 483 | 484 | # found it; return spec to caller 485 | spec <- list(entry = entry, type = type, repos = repos) 486 | return(spec) 487 | 488 | } 489 | } 490 | 491 | # if we got here, we failed to find renv 492 | fmt <- "renv %s is not available from your declared package repositories" 493 | stop(sprintf(fmt, version)) 494 | 495 | } 496 | 497 | renv_bootstrap_download_cran_archive <- function(version) { 498 | 499 | name <- sprintf("renv_%s.tar.gz", version) 500 | repos <- renv_bootstrap_repos() 501 | urls <- file.path(repos, "src/contrib/Archive/renv", name) 502 | destfile <- file.path(tempdir(), name) 503 | 504 | for (url in urls) { 505 | 506 | status <- tryCatch( 507 | renv_bootstrap_download_impl(url, destfile), 508 | condition = identity 509 | ) 510 | 511 | if (identical(status, 0L)) 512 | return(destfile) 513 | 514 | } 515 | 516 | return(FALSE) 517 | 518 | } 519 | 520 | renv_bootstrap_download_tarball <- function(version) { 521 | 522 | # if the user has provided the path to a tarball via 523 | # an environment variable, then use it 524 | tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) 525 | if (is.na(tarball)) 526 | return() 527 | 528 | # allow directories 529 | if (dir.exists(tarball)) { 530 | name <- sprintf("renv_%s.tar.gz", version) 531 | tarball <- file.path(tarball, name) 532 | } 533 | 534 | # bail if it doesn't exist 535 | if (!file.exists(tarball)) { 536 | 537 | # let the user know we weren't able to honour their request 538 | fmt <- "- RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist." 539 | msg <- sprintf(fmt, tarball) 540 | warning(msg) 541 | 542 | # bail 543 | return() 544 | 545 | } 546 | 547 | catf("- Using local tarball '%s'.", tarball) 548 | tarball 549 | 550 | } 551 | 552 | renv_bootstrap_github_token <- function() { 553 | for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) { 554 | envval <- Sys.getenv(envvar, unset = NA) 555 | if (!is.na(envval)) 556 | return(envval) 557 | } 558 | } 559 | 560 | renv_bootstrap_download_github <- function(version) { 561 | 562 | enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") 563 | if (!identical(enabled, "TRUE")) 564 | return(FALSE) 565 | 566 | # prepare download options 567 | token <- renv_bootstrap_github_token() 568 | if (nzchar(Sys.which("curl")) && nzchar(token)) { 569 | fmt <- "--location --fail --header \"Authorization: token %s\"" 570 | extra <- sprintf(fmt, token) 571 | saved <- options("download.file.method", "download.file.extra") 572 | options(download.file.method = "curl", download.file.extra = extra) 573 | on.exit(do.call(base::options, saved), add = TRUE) 574 | } else if (nzchar(Sys.which("wget")) && nzchar(token)) { 575 | fmt <- "--header=\"Authorization: token %s\"" 576 | extra <- sprintf(fmt, token) 577 | saved <- options("download.file.method", "download.file.extra") 578 | options(download.file.method = "wget", download.file.extra = extra) 579 | on.exit(do.call(base::options, saved), add = TRUE) 580 | } 581 | 582 | url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) 583 | name <- sprintf("renv_%s.tar.gz", version) 584 | destfile <- file.path(tempdir(), name) 585 | 586 | status <- tryCatch( 587 | renv_bootstrap_download_impl(url, destfile), 588 | condition = identity 589 | ) 590 | 591 | if (!identical(status, 0L)) 592 | return(FALSE) 593 | 594 | renv_bootstrap_download_augment(destfile) 595 | 596 | return(destfile) 597 | 598 | } 599 | 600 | # Add Sha to DESCRIPTION. This is stop gap until #890, after which we 601 | # can use renv::install() to fully capture metadata. 602 | renv_bootstrap_download_augment <- function(destfile) { 603 | sha <- renv_bootstrap_git_extract_sha1_tar(destfile) 604 | if (is.null(sha)) { 605 | return() 606 | } 607 | 608 | # Untar 609 | tempdir <- tempfile("renv-github-") 610 | on.exit(unlink(tempdir, recursive = TRUE), add = TRUE) 611 | untar(destfile, exdir = tempdir) 612 | pkgdir <- dir(tempdir, full.names = TRUE)[[1]] 613 | 614 | # Modify description 615 | desc_path <- file.path(pkgdir, "DESCRIPTION") 616 | desc_lines <- readLines(desc_path) 617 | remotes_fields <- c( 618 | "RemoteType: github", 619 | "RemoteHost: api.github.com", 620 | "RemoteRepo: renv", 621 | "RemoteUsername: rstudio", 622 | "RemotePkgRef: rstudio/renv", 623 | paste("RemoteRef: ", sha), 624 | paste("RemoteSha: ", sha) 625 | ) 626 | writeLines(c(desc_lines[desc_lines != ""], remotes_fields), con = desc_path) 627 | 628 | # Re-tar 629 | local({ 630 | old <- setwd(tempdir) 631 | on.exit(setwd(old), add = TRUE) 632 | 633 | tar(destfile, compression = "gzip") 634 | }) 635 | invisible() 636 | } 637 | 638 | # Extract the commit hash from a git archive. Git archives include the SHA1 639 | # hash as the comment field of the tarball pax extended header 640 | # (see https://www.kernel.org/pub/software/scm/git/docs/git-archive.html) 641 | # For GitHub archives this should be the first header after the default one 642 | # (512 byte) header. 643 | renv_bootstrap_git_extract_sha1_tar <- function(bundle) { 644 | 645 | # open the bundle for reading 646 | # We use gzcon for everything because (from ?gzcon) 647 | # > Reading from a connection which does not supply a 'gzip' magic 648 | # > header is equivalent to reading from the original connection 649 | conn <- gzcon(file(bundle, open = "rb", raw = TRUE)) 650 | on.exit(close(conn)) 651 | 652 | # The default pax header is 512 bytes long and the first pax extended header 653 | # with the comment should be 51 bytes long 654 | # `52 comment=` (11 chars) + 40 byte SHA1 hash 655 | len <- 0x200 + 0x33 656 | res <- rawToChar(readBin(conn, "raw", n = len)[0x201:len]) 657 | 658 | if (grepl("^52 comment=", res)) { 659 | sub("52 comment=", "", res) 660 | } else { 661 | NULL 662 | } 663 | } 664 | 665 | renv_bootstrap_install <- function(version, tarball, library) { 666 | 667 | # attempt to install it into project library 668 | dir.create(library, showWarnings = FALSE, recursive = TRUE) 669 | output <- renv_bootstrap_install_impl(library, tarball) 670 | 671 | # check for successful install 672 | status <- attr(output, "status") 673 | if (is.null(status) || identical(status, 0L)) 674 | return(status) 675 | 676 | # an error occurred; report it 677 | header <- "installation of renv failed" 678 | lines <- paste(rep.int("=", nchar(header)), collapse = "") 679 | text <- paste(c(header, lines, output), collapse = "\n") 680 | stop(text) 681 | 682 | } 683 | 684 | renv_bootstrap_install_impl <- function(library, tarball) { 685 | 686 | # invoke using system2 so we can capture and report output 687 | bin <- R.home("bin") 688 | exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" 689 | R <- file.path(bin, exe) 690 | 691 | args <- c( 692 | "--vanilla", "CMD", "INSTALL", "--no-multiarch", 693 | "-l", shQuote(path.expand(library)), 694 | shQuote(path.expand(tarball)) 695 | ) 696 | 697 | system2(R, args, stdout = TRUE, stderr = TRUE) 698 | 699 | } 700 | 701 | renv_bootstrap_platform_prefix <- function() { 702 | 703 | # construct version prefix 704 | version <- paste(R.version$major, R.version$minor, sep = ".") 705 | prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") 706 | 707 | # include SVN revision for development versions of R 708 | # (to avoid sharing platform-specific artefacts with released versions of R) 709 | devel <- 710 | identical(R.version[["status"]], "Under development (unstable)") || 711 | identical(R.version[["nickname"]], "Unsuffered Consequences") 712 | 713 | if (devel) 714 | prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") 715 | 716 | # build list of path components 717 | components <- c(prefix, R.version$platform) 718 | 719 | # include prefix if provided by user 720 | prefix <- renv_bootstrap_platform_prefix_impl() 721 | if (!is.na(prefix) && nzchar(prefix)) 722 | components <- c(prefix, components) 723 | 724 | # build prefix 725 | paste(components, collapse = "/") 726 | 727 | } 728 | 729 | renv_bootstrap_platform_prefix_impl <- function() { 730 | 731 | # if an explicit prefix has been supplied, use it 732 | prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) 733 | if (!is.na(prefix)) 734 | return(prefix) 735 | 736 | # if the user has requested an automatic prefix, generate it 737 | auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) 738 | if (is.na(auto) && getRversion() >= "4.4.0") 739 | auto <- "TRUE" 740 | 741 | if (auto %in% c("TRUE", "True", "true", "1")) 742 | return(renv_bootstrap_platform_prefix_auto()) 743 | 744 | # empty string on failure 745 | "" 746 | 747 | } 748 | 749 | renv_bootstrap_platform_prefix_auto <- function() { 750 | 751 | prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) 752 | if (inherits(prefix, "error") || prefix %in% "unknown") { 753 | 754 | msg <- paste( 755 | "failed to infer current operating system", 756 | "please file a bug report at https://github.com/rstudio/renv/issues", 757 | sep = "; " 758 | ) 759 | 760 | warning(msg) 761 | 762 | } 763 | 764 | prefix 765 | 766 | } 767 | 768 | renv_bootstrap_platform_os <- function() { 769 | 770 | sysinfo <- Sys.info() 771 | sysname <- sysinfo[["sysname"]] 772 | 773 | # handle Windows + macOS up front 774 | if (sysname == "Windows") 775 | return("windows") 776 | else if (sysname == "Darwin") 777 | return("macos") 778 | 779 | # check for os-release files 780 | for (file in c("/etc/os-release", "/usr/lib/os-release")) 781 | if (file.exists(file)) 782 | return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) 783 | 784 | # check for redhat-release files 785 | if (file.exists("/etc/redhat-release")) 786 | return(renv_bootstrap_platform_os_via_redhat_release()) 787 | 788 | "unknown" 789 | 790 | } 791 | 792 | renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { 793 | 794 | # read /etc/os-release 795 | release <- utils::read.table( 796 | file = file, 797 | sep = "=", 798 | quote = c("\"", "'"), 799 | col.names = c("Key", "Value"), 800 | comment.char = "#", 801 | stringsAsFactors = FALSE 802 | ) 803 | 804 | vars <- as.list(release$Value) 805 | names(vars) <- release$Key 806 | 807 | # get os name 808 | os <- tolower(sysinfo[["sysname"]]) 809 | 810 | # read id 811 | id <- "unknown" 812 | for (field in c("ID", "ID_LIKE")) { 813 | if (field %in% names(vars) && nzchar(vars[[field]])) { 814 | id <- vars[[field]] 815 | break 816 | } 817 | } 818 | 819 | # read version 820 | version <- "unknown" 821 | for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { 822 | if (field %in% names(vars) && nzchar(vars[[field]])) { 823 | version <- vars[[field]] 824 | break 825 | } 826 | } 827 | 828 | # join together 829 | paste(c(os, id, version), collapse = "-") 830 | 831 | } 832 | 833 | renv_bootstrap_platform_os_via_redhat_release <- function() { 834 | 835 | # read /etc/redhat-release 836 | contents <- readLines("/etc/redhat-release", warn = FALSE) 837 | 838 | # infer id 839 | id <- if (grepl("centos", contents, ignore.case = TRUE)) 840 | "centos" 841 | else if (grepl("redhat", contents, ignore.case = TRUE)) 842 | "redhat" 843 | else 844 | "unknown" 845 | 846 | # try to find a version component (very hacky) 847 | version <- "unknown" 848 | 849 | parts <- strsplit(contents, "[[:space:]]")[[1L]] 850 | for (part in parts) { 851 | 852 | nv <- tryCatch(numeric_version(part), error = identity) 853 | if (inherits(nv, "error")) 854 | next 855 | 856 | version <- nv[1, 1] 857 | break 858 | 859 | } 860 | 861 | paste(c("linux", id, version), collapse = "-") 862 | 863 | } 864 | 865 | renv_bootstrap_library_root_name <- function(project) { 866 | 867 | # use project name as-is if requested 868 | asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") 869 | if (asis) 870 | return(basename(project)) 871 | 872 | # otherwise, disambiguate based on project's path 873 | id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) 874 | paste(basename(project), id, sep = "-") 875 | 876 | } 877 | 878 | renv_bootstrap_library_root <- function(project) { 879 | 880 | prefix <- renv_bootstrap_profile_prefix() 881 | 882 | path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) 883 | if (!is.na(path)) 884 | return(paste(c(path, prefix), collapse = "/")) 885 | 886 | path <- renv_bootstrap_library_root_impl(project) 887 | if (!is.null(path)) { 888 | name <- renv_bootstrap_library_root_name(project) 889 | return(paste(c(path, prefix, name), collapse = "/")) 890 | } 891 | 892 | renv_bootstrap_paths_renv("library", project = project) 893 | 894 | } 895 | 896 | renv_bootstrap_library_root_impl <- function(project) { 897 | 898 | root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) 899 | if (!is.na(root)) 900 | return(root) 901 | 902 | type <- renv_bootstrap_project_type(project) 903 | if (identical(type, "package")) { 904 | userdir <- renv_bootstrap_user_dir() 905 | return(file.path(userdir, "library")) 906 | } 907 | 908 | } 909 | 910 | renv_bootstrap_validate_version <- function(version, description = NULL) { 911 | 912 | # resolve description file 913 | # 914 | # avoid passing lib.loc to `packageDescription()` below, since R will 915 | # use the loaded version of the package by default anyhow. note that 916 | # this function should only be called after 'renv' is loaded 917 | # https://github.com/rstudio/renv/issues/1625 918 | description <- description %||% packageDescription("renv") 919 | 920 | # check whether requested version 'version' matches loaded version of renv 921 | sha <- attr(version, "sha", exact = TRUE) 922 | valid <- if (!is.null(sha)) 923 | renv_bootstrap_validate_version_dev(sha, description) 924 | else 925 | renv_bootstrap_validate_version_release(version, description) 926 | 927 | if (valid) 928 | return(TRUE) 929 | 930 | # the loaded version of renv doesn't match the requested version; 931 | # give the user instructions on how to proceed 932 | dev <- identical(description[["RemoteType"]], "github") 933 | remote <- if (dev) 934 | paste("rstudio/renv", description[["RemoteSha"]], sep = "@") 935 | else 936 | paste("renv", description[["Version"]], sep = "@") 937 | 938 | # display both loaded version + sha if available 939 | friendly <- renv_bootstrap_version_friendly( 940 | version = description[["Version"]], 941 | sha = if (dev) description[["RemoteSha"]] 942 | ) 943 | 944 | fmt <- heredoc(" 945 | renv %1$s was loaded from project library, but this project is configured to use renv %2$s. 946 | - Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile. 947 | - Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library. 948 | ") 949 | catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote) 950 | 951 | FALSE 952 | 953 | } 954 | 955 | renv_bootstrap_validate_version_dev <- function(version, description) { 956 | expected <- description[["RemoteSha"]] 957 | is.character(expected) && startswith(expected, version) 958 | } 959 | 960 | renv_bootstrap_validate_version_release <- function(version, description) { 961 | expected <- description[["Version"]] 962 | is.character(expected) && identical(expected, version) 963 | } 964 | 965 | renv_bootstrap_hash_text <- function(text) { 966 | 967 | hashfile <- tempfile("renv-hash-") 968 | on.exit(unlink(hashfile), add = TRUE) 969 | 970 | writeLines(text, con = hashfile) 971 | tools::md5sum(hashfile) 972 | 973 | } 974 | 975 | renv_bootstrap_load <- function(project, libpath, version) { 976 | 977 | # try to load renv from the project library 978 | if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) 979 | return(FALSE) 980 | 981 | # warn if the version of renv loaded does not match 982 | renv_bootstrap_validate_version(version) 983 | 984 | # execute renv load hooks, if any 985 | hooks <- getHook("renv::autoload") 986 | for (hook in hooks) 987 | if (is.function(hook)) 988 | tryCatch(hook(), error = warnify) 989 | 990 | # load the project 991 | renv::load(project) 992 | 993 | TRUE 994 | 995 | } 996 | 997 | renv_bootstrap_profile_load <- function(project) { 998 | 999 | # if RENV_PROFILE is already set, just use that 1000 | profile <- Sys.getenv("RENV_PROFILE", unset = NA) 1001 | if (!is.na(profile) && nzchar(profile)) 1002 | return(profile) 1003 | 1004 | # check for a profile file (nothing to do if it doesn't exist) 1005 | path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project) 1006 | if (!file.exists(path)) 1007 | return(NULL) 1008 | 1009 | # read the profile, and set it if it exists 1010 | contents <- readLines(path, warn = FALSE) 1011 | if (length(contents) == 0L) 1012 | return(NULL) 1013 | 1014 | # set RENV_PROFILE 1015 | profile <- contents[[1L]] 1016 | if (!profile %in% c("", "default")) 1017 | Sys.setenv(RENV_PROFILE = profile) 1018 | 1019 | profile 1020 | 1021 | } 1022 | 1023 | renv_bootstrap_profile_prefix <- function() { 1024 | profile <- renv_bootstrap_profile_get() 1025 | if (!is.null(profile)) 1026 | return(file.path("profiles", profile, "renv")) 1027 | } 1028 | 1029 | renv_bootstrap_profile_get <- function() { 1030 | profile <- Sys.getenv("RENV_PROFILE", unset = "") 1031 | renv_bootstrap_profile_normalize(profile) 1032 | } 1033 | 1034 | renv_bootstrap_profile_set <- function(profile) { 1035 | profile <- renv_bootstrap_profile_normalize(profile) 1036 | if (is.null(profile)) 1037 | Sys.unsetenv("RENV_PROFILE") 1038 | else 1039 | Sys.setenv(RENV_PROFILE = profile) 1040 | } 1041 | 1042 | renv_bootstrap_profile_normalize <- function(profile) { 1043 | 1044 | if (is.null(profile) || profile %in% c("", "default")) 1045 | return(NULL) 1046 | 1047 | profile 1048 | 1049 | } 1050 | 1051 | renv_bootstrap_path_absolute <- function(path) { 1052 | 1053 | substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( 1054 | substr(path, 1L, 1L) %in% c(letters, LETTERS) && 1055 | substr(path, 2L, 3L) %in% c(":/", ":\\") 1056 | ) 1057 | 1058 | } 1059 | 1060 | renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { 1061 | renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") 1062 | root <- if (renv_bootstrap_path_absolute(renv)) NULL else project 1063 | prefix <- if (profile) renv_bootstrap_profile_prefix() 1064 | components <- c(root, renv, prefix, ...) 1065 | paste(components, collapse = "/") 1066 | } 1067 | 1068 | renv_bootstrap_project_type <- function(path) { 1069 | 1070 | descpath <- file.path(path, "DESCRIPTION") 1071 | if (!file.exists(descpath)) 1072 | return("unknown") 1073 | 1074 | desc <- tryCatch( 1075 | read.dcf(descpath, all = TRUE), 1076 | error = identity 1077 | ) 1078 | 1079 | if (inherits(desc, "error")) 1080 | return("unknown") 1081 | 1082 | type <- desc$Type 1083 | if (!is.null(type)) 1084 | return(tolower(type)) 1085 | 1086 | package <- desc$Package 1087 | if (!is.null(package)) 1088 | return("package") 1089 | 1090 | "unknown" 1091 | 1092 | } 1093 | 1094 | renv_bootstrap_user_dir <- function() { 1095 | dir <- renv_bootstrap_user_dir_impl() 1096 | path.expand(chartr("\\", "/", dir)) 1097 | } 1098 | 1099 | renv_bootstrap_user_dir_impl <- function() { 1100 | 1101 | # use local override if set 1102 | override <- getOption("renv.userdir.override") 1103 | if (!is.null(override)) 1104 | return(override) 1105 | 1106 | # use R_user_dir if available 1107 | tools <- asNamespace("tools") 1108 | if (is.function(tools$R_user_dir)) 1109 | return(tools$R_user_dir("renv", "cache")) 1110 | 1111 | # try using our own backfill for older versions of R 1112 | envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") 1113 | for (envvar in envvars) { 1114 | root <- Sys.getenv(envvar, unset = NA) 1115 | if (!is.na(root)) 1116 | return(file.path(root, "R/renv")) 1117 | } 1118 | 1119 | # use platform-specific default fallbacks 1120 | if (Sys.info()[["sysname"]] == "Windows") 1121 | file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") 1122 | else if (Sys.info()[["sysname"]] == "Darwin") 1123 | "~/Library/Caches/org.R-project.R/R/renv" 1124 | else 1125 | "~/.cache/R/renv" 1126 | 1127 | } 1128 | 1129 | renv_bootstrap_version_friendly <- function(version, shafmt = NULL, sha = NULL) { 1130 | sha <- sha %||% attr(version, "sha", exact = TRUE) 1131 | parts <- c(version, sprintf(shafmt %||% " [sha: %s]", substring(sha, 1L, 7L))) 1132 | paste(parts, collapse = "") 1133 | } 1134 | 1135 | renv_bootstrap_exec <- function(project, libpath, version) { 1136 | if (!renv_bootstrap_load(project, libpath, version)) 1137 | renv_bootstrap_run(version, libpath) 1138 | } 1139 | 1140 | renv_bootstrap_run <- function(version, libpath) { 1141 | 1142 | # perform bootstrap 1143 | bootstrap(version, libpath) 1144 | 1145 | # exit early if we're just testing bootstrap 1146 | if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) 1147 | return(TRUE) 1148 | 1149 | # try again to load 1150 | if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { 1151 | return(renv::load(project = getwd())) 1152 | } 1153 | 1154 | # failed to download or load renv; warn the user 1155 | msg <- c( 1156 | "Failed to find an renv installation: the project will not be loaded.", 1157 | "Use `renv::activate()` to re-initialize the project." 1158 | ) 1159 | 1160 | warning(paste(msg, collapse = "\n"), call. = FALSE) 1161 | 1162 | } 1163 | 1164 | renv_json_read <- function(file = NULL, text = NULL) { 1165 | 1166 | jlerr <- NULL 1167 | 1168 | # if jsonlite is loaded, use that instead 1169 | if ("jsonlite" %in% loadedNamespaces()) { 1170 | 1171 | json <- tryCatch(renv_json_read_jsonlite(file, text), error = identity) 1172 | if (!inherits(json, "error")) 1173 | return(json) 1174 | 1175 | jlerr <- json 1176 | 1177 | } 1178 | 1179 | # otherwise, fall back to the default JSON reader 1180 | json <- tryCatch(renv_json_read_default(file, text), error = identity) 1181 | if (!inherits(json, "error")) 1182 | return(json) 1183 | 1184 | # report an error 1185 | if (!is.null(jlerr)) 1186 | stop(jlerr) 1187 | else 1188 | stop(json) 1189 | 1190 | } 1191 | 1192 | renv_json_read_jsonlite <- function(file = NULL, text = NULL) { 1193 | text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n") 1194 | jsonlite::fromJSON(txt = text, simplifyVector = FALSE) 1195 | } 1196 | 1197 | renv_json_read_default <- function(file = NULL, text = NULL) { 1198 | 1199 | # find strings in the JSON 1200 | text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n") 1201 | pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' 1202 | locs <- gregexpr(pattern, text, perl = TRUE)[[1]] 1203 | 1204 | # if any are found, replace them with placeholders 1205 | replaced <- text 1206 | strings <- character() 1207 | replacements <- character() 1208 | 1209 | if (!identical(c(locs), -1L)) { 1210 | 1211 | # get the string values 1212 | starts <- locs 1213 | ends <- locs + attr(locs, "match.length") - 1L 1214 | strings <- substring(text, starts, ends) 1215 | 1216 | # only keep those requiring escaping 1217 | strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) 1218 | 1219 | # compute replacements 1220 | replacements <- sprintf('"\032%i\032"', seq_along(strings)) 1221 | 1222 | # replace the strings 1223 | mapply(function(string, replacement) { 1224 | replaced <<- sub(string, replacement, replaced, fixed = TRUE) 1225 | }, strings, replacements) 1226 | 1227 | } 1228 | 1229 | # transform the JSON into something the R parser understands 1230 | transformed <- replaced 1231 | transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE) 1232 | transformed <- gsub("[[{]", "list(", transformed, perl = TRUE) 1233 | transformed <- gsub("[]}]", ")", transformed, perl = TRUE) 1234 | transformed <- gsub(":", "=", transformed, fixed = TRUE) 1235 | text <- paste(transformed, collapse = "\n") 1236 | 1237 | # parse it 1238 | json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] 1239 | 1240 | # construct map between source strings, replaced strings 1241 | map <- as.character(parse(text = strings)) 1242 | names(map) <- as.character(parse(text = replacements)) 1243 | 1244 | # convert to list 1245 | map <- as.list(map) 1246 | 1247 | # remap strings in object 1248 | remapped <- renv_json_read_remap(json, map) 1249 | 1250 | # evaluate 1251 | eval(remapped, envir = baseenv()) 1252 | 1253 | } 1254 | 1255 | renv_json_read_remap <- function(json, map) { 1256 | 1257 | # fix names 1258 | if (!is.null(names(json))) { 1259 | lhs <- match(names(json), names(map), nomatch = 0L) 1260 | rhs <- match(names(map), names(json), nomatch = 0L) 1261 | names(json)[rhs] <- map[lhs] 1262 | } 1263 | 1264 | # fix values 1265 | if (is.character(json)) 1266 | return(map[[json]] %||% json) 1267 | 1268 | # handle true, false, null 1269 | if (is.name(json)) { 1270 | text <- as.character(json) 1271 | if (text == "true") 1272 | return(TRUE) 1273 | else if (text == "false") 1274 | return(FALSE) 1275 | else if (text == "null") 1276 | return(NULL) 1277 | } 1278 | 1279 | # recurse 1280 | if (is.recursive(json)) { 1281 | for (i in seq_along(json)) { 1282 | json[i] <- list(renv_json_read_remap(json[[i]], map)) 1283 | } 1284 | } 1285 | 1286 | json 1287 | 1288 | } 1289 | 1290 | # load the renv profile, if any 1291 | renv_bootstrap_profile_load(project) 1292 | 1293 | # construct path to library root 1294 | root <- renv_bootstrap_library_root(project) 1295 | 1296 | # construct library prefix for platform 1297 | prefix <- renv_bootstrap_platform_prefix() 1298 | 1299 | # construct full libpath 1300 | libpath <- file.path(root, prefix) 1301 | 1302 | # run bootstrap code 1303 | renv_bootstrap_exec(project, libpath, version) 1304 | 1305 | invisible() 1306 | 1307 | }) 1308 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | polars[pyarrow]==1.9.0 2 | pyprql==0.12.1 3 | -------------------------------------------------------------------------------- /sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | const sidebars = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 18 | 19 | // But you can create a sidebar manually 20 | /* 21 | tutorialSidebar: [ 22 | 'intro', 23 | 'hello', 24 | { 25 | type: 'category', 26 | label: 'Tutorial', 27 | items: ['tutorial-basics/create-a-document'], 28 | }, 29 | ], 30 | */ 31 | }; 32 | 33 | module.exports = sidebars; 34 | -------------------------------------------------------------------------------- /src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #2e8555; 10 | --ifm-color-primary-dark: #29784c; 11 | --ifm-color-primary-darker: #277148; 12 | --ifm-color-primary-darkest: #205d3b; 13 | --ifm-color-primary-light: #33925d; 14 | --ifm-color-primary-lighter: #359962; 15 | --ifm-color-primary-lightest: #3cad6e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #25c2a0; 23 | --ifm-color-primary-dark: #21af90; 24 | --ifm-color-primary-darker: #1fa588; 25 | --ifm-color-primary-darkest: #1a8870; 26 | --ifm-color-primary-light: #29d5b0; 27 | --ifm-color-primary-lighter: #32d8b4; 28 | --ifm-color-primary-lightest: #4fddbf; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | -------------------------------------------------------------------------------- /src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /** 2 | * CSS files with the .module.css suffix will be treated as CSS modules 3 | * and scoped locally. 4 | */ 5 | 6 | .heroBanner { 7 | padding: 4rem 0; 8 | text-align: center; 9 | position: relative; 10 | overflow: hidden; 11 | } 12 | 13 | @media screen and (max-width: 996px) { 14 | .heroBanner { 15 | padding: 2rem; 16 | } 17 | } 18 | 19 | .buttons { 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | } 24 | -------------------------------------------------------------------------------- /static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eitsupi/querying-with-prql/8c46b2c7eb7f56d659f05fe49b2ba9984ba75198/static/.nojekyll -------------------------------------------------------------------------------- /static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eitsupi/querying-with-prql/8c46b2c7eb7f56d659f05fe49b2ba9984ba75198/static/img/favicon.ico -------------------------------------------------------------------------------- /static/img/logo.svg: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------